Page MenuHomec4science

ghost.c
No OneTemporary

File Metadata

Created
Sun, Jun 9, 23:56
#ifdef TESSEL
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mpi.h>
#include "allvars.h"
#include "proto.h"
/*! \file ghost.c
* \brief
*
*/
#ifdef PERIODIC
static double boxSize, boxHalf;
#ifdef LONG_X
static double boxSize_X, boxHalf_X;
#else
#define boxSize_X boxSize
#define boxHalf_X boxHalf
#endif
#ifdef LONG_Y
static double boxSize_Y, boxHalf_Y;
#else
#define boxSize_Y boxSize
#define boxHalf_Y boxHalf
#endif
#ifdef LONG_Z
static double boxSize_Z, boxHalf_Z;
#else
#define boxSize_Z boxSize
#define boxHalf_Z boxHalf
#endif
#endif
static int ifirstGhost,nGhostToAdd;
/*! This function computes the local density for each active SPH particle,
* the number of neighbours in the current smoothing radius, and the
* divergence and curl of the velocity field. The pressure is updated as
* well. If a particle with its smoothing region is fully inside the
* local domain, it is not exported to the other processors. The function
* also detects particles that have a number of neighbours outside the
* allowed tolerance range. For these particles, the smoothing length is
* adjusted accordingly, and the density computation is executed again.
* Note that the smoothing length is not allowed to fall below the lower
* bound set by MinGasHsml.
*/
void ghost(int mode)
{
long long ntot, ntotleft;
int *noffset, *nbuffer, *nsend, *nsend_local, *numlist, *ndonelist;
int i, j, n, ndone, npleft, maxfill, source, iter = 0;
int level, ngrp, sendTask, recvTask, place, nexport;
double dt_entr, tstart, tend, tstart_ngb = 0, tend_ngb = 0;
double sumt, sumcomm, timengb, sumtimengb;
double timecomp = 0, timeimbalance = 0, timecommsumm = 0, sumimbalance;
MPI_Status status;
int completeness;
#ifdef DETAILED_CPU_OUTPUT_IN_DENSITY
double *timengblist;
double *timecomplist;
double *timecommsummlist;
double *timeimbalancelist;
#endif
#ifdef DETAILED_CPU
double t0=0,t1=0;
#endif
#ifdef PERIODIC
boxSize = All.BoxSize;
boxHalf = 0.5 * All.BoxSize;
#ifdef LONG_X
boxHalf_X = boxHalf * LONG_X;
boxSize_X = boxSize * LONG_X;
#endif
#ifdef LONG_Y
boxHalf_Y = boxHalf * LONG_Y;
boxSize_Y = boxSize * LONG_Y;
#endif
#ifdef LONG_Z
boxHalf_Z = boxHalf * LONG_Z;
boxSize_Z = boxSize * LONG_Z;
#endif
#endif
#ifdef DETAILED_CPU
if (mode==1)
t0 = second();
#endif
noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
nbuffer = malloc(sizeof(int) * NTask);
nsend_local = malloc(sizeof(int) * NTask);
nsend = malloc(sizeof(int) * NTask * NTask);
ndonelist = malloc(sizeof(int) * NTask);
for(n = 0, NumPTUpdate = 0; n < N_gas; n++)
{
P[n].IsDone = 0;
P[n].IsAdded = 0;
NumPTUpdate++;
}
numlist = malloc(NTask * sizeof(int) * NTask);
MPI_Allgather(&NumPTUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
for(i = 0, ntot = 0; i < NTask; i++)
ntot += numlist[i];
free(numlist);
/* we will repeat the whole thing for those particles where we didn't
* find enough neighbours
*/
do
{
i = 0; /* beginn with this index */
ntotleft = ntot; /* particles left for all tasks together */
nGhostToAdd=0;
ifirstGhost=NumPart+NumgPart;
while(ntotleft > 0)
{
for(j = 0; j < NTask; j++)
nsend_local[j] = 0;
/* do local particles and prepare export list */
tstart = second();
for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeGhost - NTask; i++)
if(P[i].IsDone == 0)
{
ndone++;
for(j = 0; j < NTask; j++)
Exportflag[j] = 0;
ghost_evaluate(i, 0);
for(j = 0; j < NTask; j++)
{
if(Exportflag[j])
{
GhostDataIn[nexport].Pos[0] = P[i].Pos[0];
GhostDataIn[nexport].Pos[1] = P[i].Pos[1];
GhostDataIn[nexport].Pos[2] = P[i].Pos[2];
GhostDataIn[nexport].Index = i;
GhostDataIn[nexport].rSearch = P[i].rSearch;
nexport++;
nsend_local[j]++;
}
}
}
tend = second();
timecomp += timediff(tstart, tend);
qsort(GhostDataIn, nexport, sizeof(struct ghostdata_in), ghost_compare_key);
for(j = 1, noffset[0] = 0; j < NTask; j++)
noffset[j] = noffset[j - 1] + nsend_local[j - 1];
tstart = second();
MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);
tend = second();
timeimbalance += timediff(tstart, tend);
/* now do the particles that need to be exported */
for(level = 1; level < (1 << PTask); level++)
{
tstart = second();
for(j = 0; j < NTask; j++)
nbuffer[j] = 0;
for(ngrp = level; ngrp < (1 << PTask); ngrp++)
{
maxfill = 0;
for(j = 0; j < NTask; j++)
{
if((j ^ ngrp) < NTask)
if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
}
if(maxfill >= All.BunchSizeGhost)
break;
sendTask = ThisTask;
recvTask = ThisTask ^ ngrp;
if(recvTask < NTask)
{
if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
{
/* get the particles */
MPI_Sendrecv(&GhostDataIn[noffset[recvTask]],
nsend_local[recvTask] * sizeof(struct ghostdata_in), MPI_BYTE,
recvTask, TAG_DENS_A,
&GhostDataGet[nbuffer[ThisTask]],
nsend[recvTask * NTask + ThisTask] * sizeof(struct ghostdata_in),
MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, &status);
}
}
for(j = 0; j < NTask; j++)
if((j ^ ngrp) < NTask)
nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
}
tend = second();
timecommsumm += timediff(tstart, tend);
tstart = second();
for(j = 0; j < nbuffer[ThisTask]; j++)
ghost_evaluate(j, 1);
tend = second();
timecomp += timediff(tstart, tend);
/* do a block to explicitly measure imbalance */
tstart = second();
MPI_Barrier(MPI_COMM_WORLD);
tend = second();
timeimbalance += timediff(tstart, tend);
/* get the result */
tstart = second();
for(j = 0; j < NTask; j++)
nbuffer[j] = 0;
for(ngrp = level; ngrp < (1 << PTask); ngrp++)
{
maxfill = 0;
for(j = 0; j < NTask; j++)
{
if((j ^ ngrp) < NTask)
if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
}
if(maxfill >= All.BunchSizeGhost)
break;
sendTask = ThisTask;
recvTask = ThisTask ^ ngrp;
if(recvTask < NTask)
{
if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
{
/* send the results */
MPI_Sendrecv(&GhostDataResult[nbuffer[ThisTask]],
nsend[recvTask * NTask + ThisTask] * sizeof(struct ghostdata_out),
MPI_BYTE, recvTask, TAG_DENS_B,
&GhostDataPartialResult[noffset[recvTask]],
nsend_local[recvTask] * sizeof(struct ghostdata_out),
MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, &status);
/* add the result to the particles */
for(j = 0; j < nsend_local[recvTask]; j++)
{
source = j + noffset[recvTask];
place = GhostDataIn[source].Index;
// this is needed if we want to get some values
//SphP[place].Value = GhostDataPartialResult[source].Value;
}
}
}
for(j = 0; j < NTask; j++)
if((j ^ ngrp) < NTask)
nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
}
tend = second();
timecommsumm += timediff(tstart, tend);
level = ngrp - 1;
}
MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
for(j = 0; j < NTask; j++)
ntotleft -= ndonelist[j];
}
/* do final operations on results */
tstart = second();
/* add all ghost particles to the tesselation */
AddGhostPoints(ifirstGhost,nGhostToAdd);
for(i = 0, npleft = 0; i < N_gas; i++)
{
if(!P[i].IsDone)
{
completeness = CheckCompletenessForThisPoint(i);
/* now check whether we had enough neighbours */
if (completeness!=0)
{
/* need to redo this particle */
npleft++;
}
else
P[i].IsDone = 1; /* Mark as inactive */
}
}
tend = second();
timecomp += timediff(tstart, tend);
numlist = malloc(NTask * sizeof(int) * NTask);
MPI_Allgather(&npleft, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
for(i = 0, ntot = 0; i < NTask; i++)
ntot += numlist[i];
free(numlist);
if(ntot > 0)
{
if(iter == 0)
tstart_ngb = second();
iter++;
if(iter > 0 && ThisTask == 0)
{
printf("ngb tessel iteration %d: need to repeat for %d%09d particles.\n", iter,
(int) (ntot / 1000000000), (int) (ntot % 1000000000));
fflush(stdout);
}
if(iter > MAXITER)
{
printf("failed to converge in neighbour iteration in density()\n");
fflush(stdout);
endrun(1155);
}
}
else
tend_ngb = second();
}
while(ntot > 0);
/* mark as active again */
//for(i = 0; i < NumPart; i++)
// P[i].IsDone = 0;
free(ndonelist);
free(nsend);
free(nsend_local);
free(nbuffer);
free(noffset);
/* collect some timing information */
if(iter > 0)
timengb = timediff(tstart_ngb, tend_ngb);
else
timengb = 0;
MPI_Reduce(&timengb, &sumtimengb, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&timecomp, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&timecommsumm, &sumcomm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
}
#define NGB_PERIODIC_SIGN_X(x) (xtmp=(x),(xtmp>boxHalf_X)?(-1.0):((xtmp<-boxHalf_X)?(1.0):0))
#define NGB_PERIODIC_SIGN_Y(x) (xtmp=(x),(xtmp>boxHalf_Y)?(-1.0):((xtmp<-boxHalf_Y)?(1.0):0))
#define NGB_PERIODIC_SIGN_Z(x) (xtmp=(x),(xtmp>boxHalf_Z)?(-1.0):((xtmp<-boxHalf_Z)?(1.0):0))
#define ADD_TO_QUADRANT(i,qflag)(1<<i | qflag)
#define QUADRANT_IS_EMPTY(i,qflag)(((1<<i & qflag)==0)?1:0)
#define QUADRANT(x,y,z)( x + 3*y + 9*z )
/*! This function represents the core of the SPH density computation. The
* target particle may either be local, or reside in the communication
* buffer.
*/
void ghost_evaluate(int target, int mode)
{
int j, n, startnode, numngb, numngb_inbox;
double h, h2, hinv, hinv3, hinv4;
double dx, dy, dz, r, r2;
FLOAT *pos;
FLOAT posj[3];
int phase=0;
#ifdef PERIODIC
double xtmp;
double sgnx,sgny,sgnz;
int quadrant;
#endif
if(mode == 0)
{
pos = P[target].Pos;
h = P[target].rSearch;
}
else
{
pos = GhostDataGet[target].Pos;
h = GhostDataGet[target].rSearch;
}
h2 = h * h;
hinv = 1.0 / h;
#ifndef TWODIMS
hinv3 = hinv * hinv * hinv;
#else
hinv3 = hinv * hinv / boxSize_Z;
#endif
hinv4 = hinv3 * hinv;
startnode = All.MaxPart;
numngb = 0;
do
{
numngb_inbox = ngb_treefind_variable_for_tessel(&pos[0], h, phase, &startnode);
for(n = 0; n < numngb_inbox; n++)
{
j = Ngblist[n];
#ifdef PERIODIC
sgnx = NGB_PERIODIC_SIGN_X(P[j].Pos[0]-pos[0]);
sgny = NGB_PERIODIC_SIGN_Y(P[j].Pos[1]-pos[1]);
sgnz = NGB_PERIODIC_SIGN_Z(P[j].Pos[2]-pos[2]);
quadrant = QUADRANT((int)(sgnx+1),(int)(sgny+1),(int)(sgnz+1));
#endif
/* check if a particle is already in a quadrant */
//printf("%g %g %g\n",sgnx+1,sgny+1,sgnz+1);
//printf("quadrant=%d %d\n",quadrant,QUADRANT_IS_EMPTY(quadrant,P[j].IsAdded));
if( QUADRANT_IS_EMPTY(quadrant,P[j].IsAdded) )
{
/* add offset */
posj[0] = P[j].Pos[0] + boxSize_X*sgnx;
posj[1] = P[j].Pos[1] + boxSize_Y*sgny;
posj[2] = P[j].Pos[2] + boxSize_Z*sgnz;
dx = pos[0] - posj[0];
dy = pos[1] - posj[1];
dz = pos[2] - posj[2];
r2 = dx * dx + dy * dy + dz * dz;
if(r2 < h2)
{
/* we can add a point j to the tesselation */
if (NumgPart+NumgPart>=All.MaxPart)
{
printf("NumgPart+NumgPart %d >= MaxPart %d !!!\n",NumgPart+NumgPart,All.MaxPart);
endrun(-123);
}
else
{
//printf("this point is added %g %g %g (%g %g %g)\n",posj[0],posj[1],posj[2],P[j].Pos[0],P[j].Pos[1],P[j].Pos[2]);
P[j].IsAdded = ADD_TO_QUADRANT(quadrant,P[j].IsAdded);
P[NumPart+NumgPart].Pos[0] = posj[0];
P[NumPart+NumgPart].Pos[1] = posj[1];
P[NumPart+NumgPart].Pos[2] = posj[2];
P[NumPart+NumgPart].Mass = P[j].Mass;
P[NumPart+NumgPart].ID = NumPart+NumgPart; /* this is no longer needed */
P[NumPart+NumgPart].iPref = j;
//printf("this point is added %d %g %g %g (%g %g %g)\n",NumPart+NumgPart,P[NumPart+NumgPart].Pos[0],P[NumPart+NumgPart].Pos[1],P[NumPart+NumgPart].Pos[2],posj[0],posj[1],posj[2]);
NumgPart++;
nGhostToAdd++;
}
}
}
}
}
while(startnode >= 0);
if(mode == 0)
{
//SphP[target].Value = value;
}
else
{
//GhostDataResult[target].Value = value;
}
}
/*! This routine is a comparison kernel used in a sort routine to group
* particles that are exported to the same processor.
*/
int ghost_compare_key(const void *a, const void *b)
{
if(((struct ghostdata_in *) a)->Task < (((struct ghostdata_in *) b)->Task))
return -1;
if(((struct ghostdata_in *) a)->Task > (((struct ghostdata_in *) b)->Task))
return +1;
return 0;
}
#endif

Event Timeline