Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F88741966
forcetree.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Oct 20, 11:20
Size
80 KB
Mime Type
text/x-c
Expires
Tue, Oct 22, 11:20 (2 d)
Engine
blob
Format
Raw Data
Handle
21811017
Attached To
rGEAR Gear
forcetree.c
View Options
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <mpi.h>
#include "allvars.h"
#include "proto.h"
/*! \file forcetree.c
* \brief gravitational tree and code for Ewald correction
*
* This file contains the computation of the gravitational force by means
* of a tree. The type of tree implemented is a geometrical oct-tree,
* starting from a cube encompassing all particles. This cube is
* automatically found in the domain decomposition, which also splits up
* the global "top-level" tree along node boundaries, moving the particles
* of different parts of the tree to separate processors. Tree nodes can
* be dynamically updated in drift/kick operations to avoid having to
* reconstruct the tree every timestep.
*/
/*! auxialiary variable used to set-up non-recursive walk */
static int last;
/*! length of lock-up table for short-range force kernel in TreePM algorithm */
#define NTAB 1000
/*! variables for short-range lookup table */
static float tabfac, shortrange_table[NTAB], shortrange_table_potential[NTAB];
/*! toggles after first tree-memory allocation, has only influence on log-files */
static int first_flag = 0;
#ifdef PERIODIC
/*! Macro that maps a distance to the nearest periodic neighbour */
#define NEAREST(x) (((x)>boxhalf)?((x)-boxsize):(((x)<-boxhalf)?((x)+boxsize):(x)))
/*! Size of 3D lock-up table for Ewald correction force */
#define EN 64
/*! 3D lock-up table for Ewald correction to force and potential. Only one
* octant is stored, the rest constructed by using the symmetry
*/
static FLOAT fcorrx[EN + 1][EN + 1][EN + 1];
static FLOAT fcorry[EN + 1][EN + 1][EN + 1];
static FLOAT fcorrz[EN + 1][EN + 1][EN + 1];
static FLOAT potcorr[EN + 1][EN + 1][EN + 1];
static double fac_intp;
#endif
/*! This function is a driver routine for constructing the gravitational
* oct-tree, which is done by calling a small number of other functions.
*/
int force_treebuild(int npart)
{
Numnodestree = force_treebuild_single(npart);
force_update_pseudoparticles();
force_flag_localnodes();
TimeOfLastTreeConstruction = All.Time;
return Numnodestree;
}
/*! Constructs the gravitational oct-tree.
*
* The index convention for accessing tree nodes is the following: the
* indices 0...NumPart-1 reference single particles, the indices
* All.MaxPart.... All.MaxPart+nodes-1 reference tree nodes. `Nodes_base'
* points to the first tree node, while `nodes' is shifted such that
* nodes[All.MaxPart] gives the first tree node. Finally, node indices
* with values 'All.MaxPart + MaxNodes' and larger indicate "pseudo
* particles", i.e. multipole moments of top-level nodes that lie on
* different CPUs. If such a node needs to be opened, the corresponding
* particle must be exported to that CPU. The 'Extnodes' structure
* parallels that of 'Nodes'. Its information is only needed for the SPH
* part of the computation. (The data is split onto these two structures
* as a tuning measure. If it is merged into 'Nodes' a somewhat bigger
* size of the nodes also for gravity would result, which would reduce
* cache utilization slightly.
*/
int force_treebuild_single(int npart)
{
int i, j, subnode = 0, parent, numnodes;
int nfree, th, nn, no;
struct NODE *nfreep;
double lenhalf, epsilon;
peanokey key;
/* create an empty root node */
nfree = All.MaxPart; /* index of first free node */
nfreep = &Nodes[nfree]; /* select first node */
nfreep->len = DomainLen;
for(j = 0; j < 3; j++)
nfreep->center[j] = DomainCenter[j];
for(j = 0; j < 8; j++)
nfreep->u.suns[j] = -1;
numnodes = 1;
nfreep++;
nfree++;
/* create a set of empty nodes corresponding to the top-level domain
* grid. We need to generate these nodes first to make sure that we have a
* complete top-level tree which allows the easy insertion of the
* pseudo-particles at the right place
*/
force_create_empty_nodes(All.MaxPart, 0, 1, 0, 0, 0, &numnodes, &nfree);
/* if a high-resolution region in a global tree is used, we need to generate
* an additional set empty nodes to make sure that we have a complete
* top-level tree for the high-resolution inset
*/
nfreep = &Nodes[nfree];
parent = -1; /* note: will not be used below before it is changed */
/* now we insert all particles */
for(i = 0; i < npart; i++)
{
/* the softening is only used to check whether particles are so close
* that the tree needs not to be refined further
*/
epsilon = All.ForceSoftening[P[i].Type];
key = peano_hilbert_key((P[i].Pos[0] - DomainCorner[0]) * DomainFac,
(P[i].Pos[1] - DomainCorner[1]) * DomainFac,
(P[i].Pos[2] - DomainCorner[2]) * DomainFac, BITS_PER_DIMENSION);
no = 0;
while(TopNodes[no].Daughter >= 0)
no = TopNodes[no].Daughter + (key - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
no = TopNodes[no].Leaf;
th = DomainNodeIndex[no];
while(1)
{
if(th >= All.MaxPart) /* we are dealing with an internal node */
{
subnode = 0;
if(P[i].Pos[0] > Nodes[th].center[0])
subnode += 1;
if(P[i].Pos[1] > Nodes[th].center[1])
subnode += 2;
if(P[i].Pos[2] > Nodes[th].center[2])
subnode += 4;
nn = Nodes[th].u.suns[subnode];
if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */
{
parent = th;
th = nn;
}
else
{
/* here we have found an empty slot where we can attach
* the new particle as a leaf.
*/
Nodes[th].u.suns[subnode] = i;
break; /* done for this particle */
}
}
else
{
/* We try to insert into a leaf with a single particle. Need
* to generate a new internal node at this point.
*/
Nodes[parent].u.suns[subnode] = nfree;
nfreep->len = 0.5 * Nodes[parent].len;
lenhalf = 0.25 * Nodes[parent].len;
if(subnode & 1)
nfreep->center[0] = Nodes[parent].center[0] + lenhalf;
else
nfreep->center[0] = Nodes[parent].center[0] - lenhalf;
if(subnode & 2)
nfreep->center[1] = Nodes[parent].center[1] + lenhalf;
else
nfreep->center[1] = Nodes[parent].center[1] - lenhalf;
if(subnode & 4)
nfreep->center[2] = Nodes[parent].center[2] + lenhalf;
else
nfreep->center[2] = Nodes[parent].center[2] - lenhalf;
nfreep->u.suns[0] = -1;
nfreep->u.suns[1] = -1;
nfreep->u.suns[2] = -1;
nfreep->u.suns[3] = -1;
nfreep->u.suns[4] = -1;
nfreep->u.suns[5] = -1;
nfreep->u.suns[6] = -1;
nfreep->u.suns[7] = -1;
subnode = 0;
if(P[th].Pos[0] > nfreep->center[0])
subnode += 1;
if(P[th].Pos[1] > nfreep->center[1])
subnode += 2;
if(P[th].Pos[2] > nfreep->center[2])
subnode += 4;
#ifndef NOTREERND
if(nfreep->len < 1.0e-3 * epsilon)
{
/* seems like we're dealing with particles at identical (or extremely close)
* locations. Randomize subnode index to allow tree construction. Note: Multipole moments
* of tree are still correct, but this will only happen well below gravitational softening
* length-scale anyway.
*/
subnode = (int) (8.0 * get_random_number((0xffff & P[i].ID) + P[i].GravCost));
P[i].GravCost += 1;
if(subnode >= 8)
subnode = 7;
}
#endif
nfreep->u.suns[subnode] = th;
th = nfree; /* resume trying to insert the new particle at
* the newly created internal node
*/
numnodes++;
nfree++;
nfreep++;
if((numnodes) >= MaxNodes)
{
printf("task %d: maximum number %d of tree-nodes reached.\n", ThisTask, MaxNodes);
printf("for particle %d\n", i);
dump_particles();
endrun(1);
}
}
}
}
/* insert the pseudo particles that represent the mass distribution of other domains */
force_insert_pseudo_particles();
/* now compute the multipole moments recursively */
last = -1;
force_update_node_recursive(All.MaxPart, -1, -1);
if(last >= All.MaxPart)
{
if(last >= All.MaxPart + MaxNodes) /* a pseudo-particle */
Nextnode[last - MaxNodes] = -1;
else
Nodes[last].u.d.nextnode = -1;
}
else
Nextnode[last] = -1;
return numnodes;
}
/*! This function recursively creates a set of empty tree nodes which
* corresponds to the top-level tree for the domain grid. This is done to
* ensure that this top-level tree is always "complete" so that we can
* easily associate the pseudo-particles of other CPUs with tree-nodes at
* a given level in the tree, even when the particle population is so
* sparse that some of these nodes are actually empty.
*/
void force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z, int *nodecount,
int *nextfree)
{
int i, j, k, n, sub, count;
if(TopNodes[topnode].Daughter >= 0)
{
for(i = 0; i < 2; i++)
for(j = 0; j < 2; j++)
for(k = 0; k < 2; k++)
{
sub = 7 & peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
count = i + 2 * j + 4 * k;
Nodes[no].u.suns[count] = *nextfree;
Nodes[*nextfree].len = 0.5 * Nodes[no].len;
Nodes[*nextfree].center[0] = Nodes[no].center[0] + (2 * i - 1) * 0.25 * Nodes[no].len;
Nodes[*nextfree].center[1] = Nodes[no].center[1] + (2 * j - 1) * 0.25 * Nodes[no].len;
Nodes[*nextfree].center[2] = Nodes[no].center[2] + (2 * k - 1) * 0.25 * Nodes[no].len;
for(n = 0; n < 8; n++)
Nodes[*nextfree].u.suns[n] = -1;
if(TopNodes[TopNodes[topnode].Daughter + sub].Daughter == -1)
DomainNodeIndex[TopNodes[TopNodes[topnode].Daughter + sub].Leaf] = *nextfree;
*nextfree = *nextfree + 1;
*nodecount = *nodecount + 1;
if((*nodecount) >= MaxNodes)
{
printf("task %d: maximum number %d of tree-nodes reached.\n", ThisTask, MaxNodes);
printf("in create empty nodes\n");
dump_particles();
endrun(11);
}
force_create_empty_nodes(*nextfree - 1, TopNodes[topnode].Daughter + sub,
bits + 1, 2 * x + i, 2 * y + j, 2 * z + k, nodecount, nextfree);
}
}
}
/*! this function inserts pseudo-particles which will represent the mass
* distribution of the other CPUs. Initially, the mass of the
* pseudo-particles is set to zero, and their coordinate is set to the
* center of the domain-cell they correspond to. These quantities will be
* updated later on.
*/
void force_insert_pseudo_particles(void)
{
int i, index, subnode, nn, th;
for(i = 0; i < NTopleaves; i++)
{
index = DomainNodeIndex[i];
DomainMoment[i].mass = 0;
#ifdef STELLAR_FLUX
DomainMoment[i].starlum = 0;
#endif
DomainMoment[i].s[0] = Nodes[index].center[0];
DomainMoment[i].s[1] = Nodes[index].center[1];
DomainMoment[i].s[2] = Nodes[index].center[2];
}
for(i = 0; i < NTopleaves; i++)
{
if(i < DomainMyStart || i > DomainMyLast)
{
th = All.MaxPart; /* select index of first node in tree */
while(1)
{
if(th >= All.MaxPart) /* we are dealing with an internal node */
{
if(th >= All.MaxPart + MaxNodes)
endrun(888); /* this can't be */
subnode = 0;
if(DomainMoment[i].s[0] > Nodes[th].center[0])
subnode += 1;
if(DomainMoment[i].s[1] > Nodes[th].center[1])
subnode += 2;
if(DomainMoment[i].s[2] > Nodes[th].center[2])
subnode += 4;
nn = Nodes[th].u.suns[subnode];
if(nn >= 0) /* ok, something is in the daughter slot already, need to continue */
{
th = nn;
}
else
{
/* here we have found an empty slot where we can
* attach the pseudo particle as a leaf
*/
Nodes[th].u.suns[subnode] = All.MaxPart + MaxNodes + i;
break; /* done for this pseudo particle */
}
}
else
{
endrun(889); /* this can't be */
}
}
}
}
}
/*! this routine determines the multipole moments for a given internal node
* and all its subnodes using a recursive computation. The result is
* stored in the Nodes[] structure in the sequence of this tree-walk.
*
* Note that the bitflags-variable for each node is used to store in the
* lowest bits some special information: Bit 0 flags whether the node
* belongs to the top-level tree corresponding to the domain
* decomposition, while Bit 1 signals whether the top-level node is
* dependent on local mass.
*
* If UNEQUALSOFTENINGS is set, bits 2-4 give the particle type with
* the maximum softening among the particles in the node, and bit 5
* flags whether the node contains any particles with lower softening
* than that.
*/
void force_update_node_recursive(int no, int sib, int father)
{
int j, jj, p, pp, nextsib, suns[8];
FLOAT hmax;
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
int maxsofttype, diffsoftflag;
#else
FLOAT maxsoft;
#endif
#endif
struct particle_data *pa;
double s[3], vs[3], mass;
#ifdef STELLAR_FLUX
double starlum;
#endif
if(no >= All.MaxPart && no < All.MaxPart + MaxNodes) /* internal node */
{
for(j = 0; j < 8; j++)
suns[j] = Nodes[no].u.suns[j]; /* this "backup" is necessary because the nextnode entry will
overwrite one element (union!) */
if(last >= 0)
{
if(last >= All.MaxPart)
{
if(last >= All.MaxPart + MaxNodes) /* a pseudo-particle */
Nextnode[last - MaxNodes] = no;
else
Nodes[last].u.d.nextnode = no;
}
else
Nextnode[last] = no;
}
last = no;
mass = 0;
#ifdef STELLAR_FLUX
starlum = 0;
#endif
s[0] = 0;
s[1] = 0;
s[2] = 0;
vs[0] = 0;
vs[1] = 0;
vs[2] = 0;
hmax = 0;
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
maxsofttype = 7;
diffsoftflag = 0;
#else
maxsoft = 0;
#endif
#endif
for(j = 0; j < 8; j++)
{
if((p = suns[j]) >= 0)
{
/* check if we have a sibling on the same level */
for(jj = j + 1; jj < 8; jj++)
if((pp = suns[jj]) >= 0)
break;
if(jj < 8) /* yes, we do */
nextsib = pp;
else
nextsib = sib;
force_update_node_recursive(p, nextsib, no);
if(p >= All.MaxPart) /* an internal node or pseudo particle */
{
if(p >= All.MaxPart + MaxNodes) /* a pseudo particle */
{
/* nothing to be done here because the mass of the
* pseudo-particle is still zero. This will be changed
* later.
*/
}
else
{
mass += Nodes[p].u.d.mass;
#ifdef STELLAR_FLUX
starlum += Nodes[p].starlum;
#endif
s[0] += Nodes[p].u.d.mass * Nodes[p].u.d.s[0];
s[1] += Nodes[p].u.d.mass * Nodes[p].u.d.s[1];
s[2] += Nodes[p].u.d.mass * Nodes[p].u.d.s[2];
vs[0] += Nodes[p].u.d.mass * Extnodes[p].vs[0];
vs[1] += Nodes[p].u.d.mass * Extnodes[p].vs[1];
vs[2] += Nodes[p].u.d.mass * Extnodes[p].vs[2];
if(Extnodes[p].hmax > hmax)
hmax = Extnodes[p].hmax;
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
diffsoftflag |= (Nodes[p].u.d.bitflags >> 5) & 1;
if(maxsofttype == 7)
{
maxsofttype = (Nodes[p].u.d.bitflags >> 2) & 7;
}
else
{
if(((Nodes[p].u.d.bitflags >> 2) & 7) != 7)
{
if(All.ForceSoftening[((Nodes[p].u.d.bitflags >> 2) & 7)] >
All.ForceSoftening[maxsofttype])
{
maxsofttype = ((Nodes[p].u.d.bitflags >> 2) & 7);
diffsoftflag = 1;
}
else
{
if(All.ForceSoftening[((Nodes[p].u.d.bitflags >> 2) & 7)] <
All.ForceSoftening[maxsofttype])
diffsoftflag = 1;
}
}
}
#else
if(Nodes[p].maxsoft > maxsoft)
maxsoft = Nodes[p].maxsoft;
#endif
#endif
}
}
else /* a particle */
{
pa = &P[p];
mass += pa->Mass;
#ifdef STELLAR_FLUX
starlum += pa->Mass*All.HeatingPeLMRatio[pa->Type];
#endif
s[0] += pa->Mass * pa->Pos[0];
s[1] += pa->Mass * pa->Pos[1];
s[2] += pa->Mass * pa->Pos[2];
vs[0] += pa->Mass * pa->Vel[0];
vs[1] += pa->Mass * pa->Vel[1];
vs[2] += pa->Mass * pa->Vel[2];
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
if(maxsofttype == 7)
{
maxsofttype = pa->Type;
}
else
{
if(All.ForceSoftening[pa->Type] > All.ForceSoftening[maxsofttype])
{
maxsofttype = pa->Type;
diffsoftflag = 1;
}
else
{
if(All.ForceSoftening[pa->Type] < All.ForceSoftening[maxsofttype])
diffsoftflag = 1;
}
}
#else
if(pa->Type == 0)
{
if(SphP[p].Hsml > maxsoft)
maxsoft = SphP[p].Hsml;
}
else
{
if(All.ForceSoftening[pa->Type] > maxsoft)
maxsoft = All.ForceSoftening[pa->Type];
}
#endif
#endif
if(pa->Type == 0)
if(SphP[p].Hsml > hmax)
hmax = SphP[p].Hsml;
}
}
}
if(mass)
{
s[0] /= mass;
s[1] /= mass;
s[2] /= mass;
vs[0] /= mass;
vs[1] /= mass;
vs[2] /= mass;
}
else
{
s[0] = Nodes[no].center[0];
s[1] = Nodes[no].center[1];
s[2] = Nodes[no].center[2];
}
Nodes[no].u.d.s[0] = s[0];
Nodes[no].u.d.s[1] = s[1];
Nodes[no].u.d.s[2] = s[2];
Nodes[no].u.d.mass = mass;
#ifdef STELLAR_FLUX
Nodes[no].starlum = starlum;
#endif
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
Nodes[no].u.d.bitflags = 4 * maxsofttype + 32 * diffsoftflag;
#else
Nodes[no].u.d.bitflags = 0;
Nodes[no].maxsoft = maxsoft;
#endif
#else
Nodes[no].u.d.bitflags = 0;
#endif
Extnodes[no].vs[0] = vs[0];
Extnodes[no].vs[1] = vs[1];
Extnodes[no].vs[2] = vs[2];
Extnodes[no].hmax = hmax;
Nodes[no].u.d.sibling = sib;
Nodes[no].u.d.father = father;
}
else /* single particle or pseudo particle */
{
if(last >= 0)
{
if(last >= All.MaxPart)
{
if(last >= All.MaxPart + MaxNodes) /* a pseudo-particle */
Nextnode[last - MaxNodes] = no;
else
Nodes[last].u.d.nextnode = no;
}
else
Nextnode[last] = no;
}
last = no;
if(no < All.MaxPart) /* only set it for single particles */
Father[no] = father;
}
}
/*! This function updates the multipole moments of the pseudo-particles
* that represent the mass distribution on different CPUs. For that
* purpose, it first exchanges the necessary data, and then updates the
* top-level tree accordingly. The detailed implementation of these two
* tasks is done in separate functions.
*/
void force_update_pseudoparticles(void)
{
force_exchange_pseudodata();
force_treeupdate_pseudos();
}
/*! This function communicates the values of the multipole moments of the
* top-level tree-nodes of the domain grid. This data can then be used to
* update the pseudo-particles on each CPU accordingly.
*/
void force_exchange_pseudodata(void)
{
int i, no;
MPI_Status status;
int level, sendTask, recvTask;
for(i = DomainMyStart; i <= DomainMyLast; i++)
{
no = DomainNodeIndex[i];
/* read out the multipole moments from the local base cells */
DomainMoment[i].s[0] = Nodes[no].u.d.s[0];
DomainMoment[i].s[1] = Nodes[no].u.d.s[1];
DomainMoment[i].s[2] = Nodes[no].u.d.s[2];
DomainMoment[i].vs[0] = Extnodes[no].vs[0];
DomainMoment[i].vs[1] = Extnodes[no].vs[1];
DomainMoment[i].vs[2] = Extnodes[no].vs[2];
DomainMoment[i].mass = Nodes[no].u.d.mass;
#ifdef STELLAR_FLUX
DomainMoment[i].starlum = Nodes[no].starlum;
#endif
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
DomainMoment[i].bitflags = Nodes[no].u.d.bitflags;
#else
DomainMoment[i].maxsoft = Nodes[no].maxsoft;
#endif
#endif
}
/* share the pseudo-particle data accross CPUs */
for(level = 1; level < (1 << PTask); level++)
{
sendTask = ThisTask;
recvTask = ThisTask ^ level;
if(recvTask < NTask)
MPI_Sendrecv(&DomainMoment[DomainStartList[sendTask]],
(DomainEndList[sendTask] - DomainStartList[sendTask] + 1) * sizeof(struct DomainNODE),
MPI_BYTE, recvTask, TAG_DMOM,
&DomainMoment[DomainStartList[recvTask]],
(DomainEndList[recvTask] - DomainStartList[recvTask] + 1) * sizeof(struct DomainNODE),
MPI_BYTE, recvTask, TAG_DMOM, MPI_COMM_WORLD, &status);
}
}
/*! This function updates the top-level tree after the multipole moments of
* the pseudo-particles have been updated.
*/
void force_treeupdate_pseudos(void)
{
int i, k, no;
FLOAT sold[3], vsold[3], snew[3], vsnew[3], massold, massnew, mm;
#ifdef STELLAR_FLUX
FLOAT starlumold, starlumnew, starmm;
#endif
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
int maxsofttype, diffsoftflag;
#else
FLOAT maxsoft;
#endif
#endif
for(i = 0; i < NTopleaves; i++)
if(i < DomainMyStart || i > DomainMyLast)
{
no = DomainNodeIndex[i];
for(k = 0; k < 3; k++)
{
sold[k] = Nodes[no].u.d.s[k];
vsold[k] = Extnodes[no].vs[k];
}
massold = Nodes[no].u.d.mass;
#ifdef STELLAR_FLUX
starlumold = Nodes[no].starlum;
#endif
for(k = 0; k < 3; k++)
{
snew[k] = DomainMoment[i].s[k];
vsnew[k] = DomainMoment[i].vs[k];
}
massnew = DomainMoment[i].mass;
#ifdef STELLAR_FLUX
starlumnew = DomainMoment[i].starlum;
#endif
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
maxsofttype = (DomainMoment[i].bitflags >> 2) & 7;
diffsoftflag = (DomainMoment[i].bitflags >> 5) & 1;
#else
maxsoft = DomainMoment[i].maxsoft;
#endif
#endif
do
{
mm = Nodes[no].u.d.mass + massnew - massold;
#ifdef STELLAR_FLUX
starmm = Nodes[no].starlum + starlumnew - starlumold;
#endif
for(k = 0; k < 3; k++)
{
if(mm > 0)
{
Nodes[no].u.d.s[k] =
(Nodes[no].u.d.mass * Nodes[no].u.d.s[k] + massnew * snew[k] - massold * sold[k]) / mm;
Extnodes[no].vs[k] =
(Nodes[no].u.d.mass * Extnodes[no].vs[k] + massnew * vsnew[k] -
massold * vsold[k]) / mm;
}
}
Nodes[no].u.d.mass = mm;
#ifdef STELLAR_FLUX
Nodes[no].starlum = starmm;
#endif
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
diffsoftflag |= (Nodes[no].u.d.bitflags >> 5) & 1;
if(maxsofttype == 7)
maxsofttype = (Nodes[no].u.d.bitflags >> 2) & 7;
else
{
if(((Nodes[no].u.d.bitflags >> 2) & 7) != 7)
{
if(All.ForceSoftening[((Nodes[no].u.d.bitflags >> 2) & 7)] >
All.ForceSoftening[maxsofttype])
{
maxsofttype = ((Nodes[no].u.d.bitflags >> 2) & 7);
diffsoftflag = 1;
}
else
{
if(All.ForceSoftening[((Nodes[no].u.d.bitflags >> 2) & 7)] <
All.ForceSoftening[maxsofttype])
diffsoftflag = 1;
}
}
}
Nodes[no].u.d.bitflags = (Nodes[no].u.d.bitflags & 3) + 4 * maxsofttype + 32 * diffsoftflag;
#else
if(Nodes[no].maxsoft < maxsoft)
Nodes[no].maxsoft = maxsoft;
maxsoft = Nodes[no].maxsoft;
#endif
#endif
no = Nodes[no].u.d.father;
}
while(no >= 0);
}
}
/*! This function flags nodes in the top-level tree that are dependent on
* local particle data.
*/
void force_flag_localnodes(void)
{
int no, i;
/* mark all top-level nodes */
for(i = 0; i < NTopleaves; i++)
{
no = DomainNodeIndex[i];
while(no >= 0)
{
if((Nodes[no].u.d.bitflags & 1))
break;
Nodes[no].u.d.bitflags |= 1;
no = Nodes[no].u.d.father;
}
}
/* mark top-level nodes that contain local particles */
for(i = DomainMyStart; i <= DomainMyLast; i++)
{
/*
if(DomainMoment[i].mass > 0)
*/
{
no = DomainNodeIndex[i];
while(no >= 0)
{
if((Nodes[no].u.d.bitflags & 2))
break;
Nodes[no].u.d.bitflags |= 2;
no = Nodes[no].u.d.father;
}
}
}
}
/*! This function updates the side-length of tree nodes in case the tree is
* not reconstructed, but only drifted. The grouping of particles to tree
* nodes is not changed in this case, but some tree nodes may need to be
* enlarged because particles moved out of their original bounds.
*/
void force_update_len(void)
{
int i, no;
MPI_Status status;
int level, sendTask, recvTask;
force_update_node_len_local();
/* first update the side-lengths of all local nodes */
for(i = DomainMyStart; i <= DomainMyLast; i++)
{
no = DomainNodeIndex[i];
DomainTreeNodeLen[i] = Nodes[no].len;
}
for(level = 1; level < (1 << PTask); level++)
{
sendTask = ThisTask;
recvTask = ThisTask ^ level;
if(recvTask < NTask)
MPI_Sendrecv(&DomainTreeNodeLen[DomainStartList[sendTask]],
(DomainEndList[sendTask] - DomainStartList[sendTask] + 1) * sizeof(FLOAT),
MPI_BYTE, recvTask, TAG_NODELEN,
&DomainTreeNodeLen[DomainStartList[recvTask]],
(DomainEndList[recvTask] - DomainStartList[recvTask] + 1) * sizeof(FLOAT),
MPI_BYTE, recvTask, TAG_NODELEN, MPI_COMM_WORLD, &status);
}
/* Finally, we update the top-level tree. */
force_update_node_len_toptree();
}
/*! This function recursively enlarges nodes such that they always contain
* all their daughter nodes and daughter particles.
*/
void force_update_node_len_local(void)
{
int i, p, k, no;
FLOAT dist, distmax;
for(i = 0; i < NumPart; i++)
{
no = Father[i];
for(k = 0, distmax = 0; k < 3; k++)
{
dist = P[i].Pos[k] - Nodes[no].center[k];
if(dist < 0)
dist = -dist;
if(dist > distmax)
distmax = dist;
}
if(distmax + distmax > Nodes[no].len)
{
Nodes[no].len = distmax + distmax;
p = Nodes[no].u.d.father;
while(p >= 0)
{
distmax = Nodes[p].center[0] - Nodes[no].center[0];
if(distmax < 0)
distmax = -distmax;
distmax = distmax + distmax + Nodes[no].len;
if(0.999999 * distmax > Nodes[p].len)
{
Nodes[p].len = distmax;
no = p;
p = Nodes[p].u.d.father;
}
else
break;
}
}
}
}
/*! This function recursively enlarges nodes of the top-level tree such
* that they always contain all their daughter nodes.
*/
void force_update_node_len_toptree(void)
{
int i, no, p;
FLOAT distmax;
for(i = 0; i < NTopleaves; i++)
if(i < DomainMyStart || i > DomainMyLast)
{
no = DomainNodeIndex[i];
if(Nodes[no].len < DomainTreeNodeLen[i])
Nodes[no].len = DomainTreeNodeLen[i];
p = Nodes[no].u.d.father;
while(p >= 0)
{
distmax = Nodes[p].center[0] - Nodes[no].center[0];
if(distmax < 0)
distmax = -distmax;
distmax = distmax + distmax + Nodes[no].len;
if(0.999999 * distmax > Nodes[p].len)
{
Nodes[p].len = distmax;
no = p;
p = Nodes[p].u.d.father;
}
else
break;
}
}
}
/*! This function updates the hmax-values in tree nodes that hold SPH
* particles. These values are needed to find all neighbors in the
* hydro-force computation. Since the Hsml-values are potentially changed
* in the SPH-denity computation, force_update_hmax() should be carried
* out just before the hydrodynamical SPH forces are computed, i.e. after
* density().
*/
void force_update_hmax(void)
{
int i, no;
MPI_Status status;
int level, sendTask, recvTask;
force_update_node_hmax_local();
for(i = DomainMyStart; i <= DomainMyLast; i++)
{
no = DomainNodeIndex[i];
DomainHmax[i] = Extnodes[no].hmax;
}
/* share the hmax-data of the pseudo-particles accross CPUs */
for(level = 1; level < (1 << PTask); level++)
{
sendTask = ThisTask;
recvTask = ThisTask ^ level;
if(recvTask < NTask)
MPI_Sendrecv(&DomainHmax[DomainStartList[sendTask]],
(DomainEndList[sendTask] - DomainStartList[sendTask] + 1) * sizeof(FLOAT),
MPI_BYTE, recvTask, TAG_HMAX,
&DomainHmax[DomainStartList[recvTask]],
(DomainEndList[recvTask] - DomainStartList[recvTask] + 1) * sizeof(FLOAT),
MPI_BYTE, recvTask, TAG_HMAX, MPI_COMM_WORLD, &status);
}
force_update_node_hmax_toptree();
}
/*! This routine updates the hmax-values of local tree nodes.
*/
void force_update_node_hmax_local(void)
{
int i, p, no;
for(i = 0; i < N_gas; i++)
{
no = Father[i];
if(SphP[i].Hsml > Extnodes[no].hmax)
{
Extnodes[no].hmax = SphP[i].Hsml;
p = Nodes[no].u.d.father;
while(p >= 0)
{
if(Extnodes[no].hmax > Extnodes[p].hmax)
{
Extnodes[p].hmax = Extnodes[no].hmax;
no = p;
p = Nodes[p].u.d.father;
}
else
break;
}
}
}
}
/*! This function recursively sets the hmax-values of the top-level tree.
*/
void force_update_node_hmax_toptree(void)
{
int i, no, p;
for(i = 0; i < NTopleaves; i++)
if(i < DomainMyStart || i > DomainMyLast)
{
no = DomainNodeIndex[i];
if(Extnodes[no].hmax < DomainHmax[i])
Extnodes[no].hmax = DomainHmax[i];
p = Nodes[no].u.d.father;
while(p >= 0)
{
if(Extnodes[no].hmax > Extnodes[p].hmax)
{
Extnodes[p].hmax = Extnodes[no].hmax;
no = p;
p = Nodes[p].u.d.father;
}
else
break;
}
}
}
/*! This routine computes the gravitational force for a given local
* particle, or for a particle in the communication buffer. Depending on
* the value of TypeOfOpeningCriterion, either the geometrical BH
* cell-opening criterion, or the `relative' opening criterion is used.
*/
int force_treeevaluate(int target, int mode, double *ewaldcountsum)
{
struct NODE *nop = 0;
int no, ninteractions, ptype;
double r2, dx, dy, dz, mass, r, fac, u, h, h_inv, h3_inv;
double acc_x, acc_y, acc_z, pos_x, pos_y, pos_z, aold;
#if defined(UNEQUALSOFTENINGS) && !defined(ADAPTIVE_GRAVSOFT_FORGAS)
int maxsofttype;
#endif
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
double soft = 0;
#endif
#ifdef PERIODIC
double boxsize, boxhalf;
boxsize = All.BoxSize;
boxhalf = 0.5 * All.BoxSize;
#endif
#ifdef STELLAR_FLUX
double flux=0,starlum=0;
int type=0;
double hf;
#endif
acc_x = 0;
acc_y = 0;
acc_z = 0;
ninteractions = 0;
if(mode == 0)
{
pos_x = P[target].Pos[0];
pos_y = P[target].Pos[1];
pos_z = P[target].Pos[2];
ptype = P[target].Type;
aold = All.ErrTolForceAcc * P[target].OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = SphP[target].Hsml;
#endif
#ifdef STELLAR_FLUX
type = P[target].Type;
#endif
}
else
{
pos_x = GravDataGet[target].u.Pos[0];
pos_y = GravDataGet[target].u.Pos[1];
pos_z = GravDataGet[target].u.Pos[2];
#ifdef UNEQUALSOFTENINGS
ptype = GravDataGet[target].Type;
#else
ptype = P[0].Type;
#endif
aold = All.ErrTolForceAcc * GravDataGet[target].w.OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = GravDataGet[target].Soft;
#endif
#ifdef STELLAR_FLUX
type = GravDataGet[target].Type;
#endif
}
#ifndef UNEQUALSOFTENINGS
h = All.ForceSoftening[ptype];
h_inv = 1.0 / h;
h3_inv = h_inv * h_inv * h_inv;
#endif
no = All.MaxPart; /* root node */
while(no >= 0)
{
if(no < All.MaxPart) /* single particle */
{
/* the index of the node is the index of the particle */
/* observe the sign */
dx = P[no].Pos[0] - pos_x;
dy = P[no].Pos[1] - pos_y;
dz = P[no].Pos[2] - pos_z;
mass = P[no].Mass;
#ifdef STELLAR_FLUX
starlum = P[no].Mass*All.HeatingPeLMRatio[P[no].Type];
#endif
}
else
{
if(no >= All.MaxPart + MaxNodes) /* pseudo particle */
{
if(mode == 0)
{
Exportflag[DomainTask[no - (All.MaxPart + MaxNodes)]] = 1;
}
no = Nextnode[no - MaxNodes];
continue;
}
nop = &Nodes[no];
dx = nop->u.d.s[0] - pos_x;
dy = nop->u.d.s[1] - pos_y;
dz = nop->u.d.s[2] - pos_z;
mass = nop->u.d.mass;
#ifdef STELLAR_FLUX
starlum = nop->starlum;
#endif
}
#ifdef PERIODIC
dx = NEAREST(dx);
dy = NEAREST(dy);
dz = NEAREST(dz);
#endif
r2 = dx * dx + dy * dy + dz * dz;
if(no < All.MaxPart)
{
#ifdef UNEQUALSOFTENINGS
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(P[no].Type == 0)
{
if(h < SphP[no].Hsml)
h = SphP[no].Hsml;
}
else
{
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
}
#else
h = All.ForceSoftening[ptype];
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
#endif
#endif
no = Nextnode[no];
}
else /* we have an internal node. Need to check opening criterion */
{
if(mode == 1)
{
if((nop->u.d.bitflags & 3) == 1) /* if it's a top-level node
* which does not contain
* local particles we can
* continue to do a short-cut */
{
no = nop->u.d.sibling;
continue;
}
}
if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */
{
if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
}
else /* check relative opening criterion */
{
if(mass * nop->len * nop->len > r2 * r2 * aold)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
/* check in addition whether we lie inside the cell */
if(fabs(nop->center[0] - pos_x) < 0.60 * nop->len)
{
if(fabs(nop->center[1] - pos_y) < 0.60 * nop->len)
{
if(fabs(nop->center[2] - pos_z) < 0.60 * nop->len)
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
h = All.ForceSoftening[ptype];
maxsofttype = (nop->u.d.bitflags >> 2) & 7;
if(maxsofttype == 7) /* may only occur for zero mass top-level nodes */
{
if(mass > 0)
endrun(986);
no = nop->u.d.nextnode;
continue;
}
else
{
if(h < All.ForceSoftening[maxsofttype])
{
h = All.ForceSoftening[maxsofttype];
if(r2 < h * h)
{
if(((nop->u.d.bitflags >> 5) & 1)) /* bit-5 signals that there are particles of different softening in the node */
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#else
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(h < nop->maxsoft)
{
h = nop->maxsoft;
if(r2 < h * h)
{
no = nop->u.d.nextnode;
continue;
}
}
#endif
#endif
no = nop->u.d.sibling; /* ok, node can be used */
if(mode == 1)
{
if(((nop->u.d.bitflags) & 1)) /* Bit 0 signals that this node belongs to top-level tree */
continue;
}
}
r = sqrt(r2);
if(r >= h)
fac = mass / (r2 * r);
else
{
#ifdef UNEQUALSOFTENINGS
h_inv = 1.0 / h;
h3_inv = h_inv * h_inv * h_inv;
#endif
u = r * h_inv;
if(u < 0.5)
fac = mass * h3_inv * (10.666666666667 + u * u * (32.0 * u - 38.4));
else
fac =
mass * h3_inv * (21.333333333333 - 48.0 * u +
38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
}
acc_x += dx * fac;
acc_y += dy * fac;
acc_z += dz * fac;
#ifdef STELLAR_FLUX
if (type==0) /* gas particle */
{
hf = All.SofteningTable[type];
flux += starlum / (4*PI) / (r2 + hf*hf) ;
//if ((target==0)&&(starlum!=0))
// printf("flux flux=%g\n",flux);
//if ((target==0)&&(starlum!=0))
// printf("flux r=%g starlum=%g mass=%g h=%g no=%d\n",sqrt(r2),starlum,mass,hf,no);
//if(r >= hf)
// flux += starlum/All.HeatingPeLMRatio/ (4*PI) / r2 ;
//else
// flux += starlum/All.HeatingPeLMRatio/ (4*PI) / (r2 + hf*hf) ;
}
#endif
ninteractions++;
}
/* store result at the proper place */
if(mode == 0)
{
P[target].GravAccel[0] = acc_x;
P[target].GravAccel[1] = acc_y;
P[target].GravAccel[2] = acc_z;
#ifdef STELLAR_FLUX
if (P[target].Type==0) /* only for gas */
SphP[target].EnergyFlux = flux;
#endif
P[target].GravCost = ninteractions;
}
else
{
GravDataResult[target].u.Acc[0] = acc_x;
GravDataResult[target].u.Acc[1] = acc_y;
GravDataResult[target].u.Acc[2] = acc_z;
#ifdef STELLAR_FLUX
GravDataResult[target].EnergyFlux = flux;
#endif
GravDataResult[target].w.Ninteractions = ninteractions;
}
#ifdef PERIODIC
*ewaldcountsum += force_treeevaluate_ewald_correction(target, mode, pos_x, pos_y, pos_z, aold);
#endif
return ninteractions;
}
#ifdef PMGRID
/*! In the TreePM algorithm, the tree is walked only locally around the
* target coordinate. Tree nodes that fall outside a box of half
* side-length Rcut= RCUT*ASMTH*MeshSize can be discarded. The short-range
* potential is modified by a complementary error function, multiplied
* with the Newtonian form. The resulting short-range suppression compared
* to the Newtonian force is tabulated, because looking up from this table
* is faster than recomputing the corresponding factor, despite the
* memory-access panelty (which reduces cache performance) incurred by the
* table.
*/
int force_treeevaluate_shortrange(int target, int mode)
{
struct NODE *nop = 0;
int no, ptype, ninteractions, tabindex;
double r2, dx, dy, dz, mass, r, fac, u, h, h_inv, h3_inv;
double acc_x, acc_y, acc_z, pos_x, pos_y, pos_z, aold;
double eff_dist;
double rcut, asmth, asmthfac, rcut2, dist;
#if defined(UNEQUALSOFTENINGS) && !defined(ADAPTIVE_GRAVSOFT_FORGAS)
int maxsofttype;
#endif
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
double soft = 0;
#endif
#ifdef PERIODIC
double boxsize, boxhalf;
boxsize = All.BoxSize;
boxhalf = 0.5 * All.BoxSize;
#endif
acc_x = 0;
acc_y = 0;
acc_z = 0;
ninteractions = 0;
if(mode == 0)
{
pos_x = P[target].Pos[0];
pos_y = P[target].Pos[1];
pos_z = P[target].Pos[2];
ptype = P[target].Type;
aold = All.ErrTolForceAcc * P[target].OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = SphP[target].Hsml;
#endif
}
else
{
pos_x = GravDataGet[target].u.Pos[0];
pos_y = GravDataGet[target].u.Pos[1];
pos_z = GravDataGet[target].u.Pos[2];
#ifdef UNEQUALSOFTENINGS
ptype = GravDataGet[target].Type;
#else
ptype = P[0].Type;
#endif
aold = All.ErrTolForceAcc * GravDataGet[target].w.OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = GravDataGet[target].Soft;
#endif
}
rcut = All.Rcut[0];
asmth = All.Asmth[0];
#ifdef PLACEHIGHRESREGION
if(((1 << ptype) & (PLACEHIGHRESREGION)))
{
rcut = All.Rcut[1];
asmth = All.Asmth[1];
}
#endif
rcut2 = rcut * rcut;
asmthfac = 0.5 / asmth * (NTAB / 3.0);
#ifndef UNEQUALSOFTENINGS
h = All.ForceSoftening[ptype];
h_inv = 1.0 / h;
h3_inv = h_inv * h_inv * h_inv;
#endif
no = All.MaxPart; /* root node */
while(no >= 0)
{
if(no < All.MaxPart)
{
/* the index of the node is the index of the particle */
dx = P[no].Pos[0] - pos_x;
dy = P[no].Pos[1] - pos_y;
dz = P[no].Pos[2] - pos_z;
#ifdef PERIODIC
dx = NEAREST(dx);
dy = NEAREST(dy);
dz = NEAREST(dz);
#endif
r2 = dx * dx + dy * dy + dz * dz;
mass = P[no].Mass;
#ifdef UNEQUALSOFTENINGS
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(P[no].Type == 0)
{
if(h < SphP[no].Hsml)
h = SphP[no].Hsml;
}
else
{
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
}
#else
h = All.ForceSoftening[ptype];
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
#endif
#endif
no = Nextnode[no];
}
else /* we have an internal node */
{
if(no >= All.MaxPart + MaxNodes) /* pseudo particle */
{
if(mode == 0)
{
Exportflag[DomainTask[no - (All.MaxPart + MaxNodes)]] = 1;
}
no = Nextnode[no - MaxNodes];
continue;
}
nop = &Nodes[no];
if(mode == 1)
{
if((nop->u.d.bitflags & 3) == 1) /* if it's a top-level node
* which does not contain
* local particles we can
* continue at this point
*/
{
no = nop->u.d.sibling;
continue;
}
}
mass = nop->u.d.mass;
dx = nop->u.d.s[0] - pos_x;
dy = nop->u.d.s[1] - pos_y;
dz = nop->u.d.s[2] - pos_z;
#ifdef PERIODIC
dx = NEAREST(dx);
dy = NEAREST(dy);
dz = NEAREST(dz);
#endif
r2 = dx * dx + dy * dy + dz * dz;
if(r2 > rcut2)
{
/* check whether we can stop walking along this branch */
eff_dist = rcut + 0.5 * nop->len;
#ifdef PERIODIC
dist = NEAREST(nop->center[0] - pos_x);
#else
dist = nop->center[0] - pos_x;
#endif
if(dist < -eff_dist || dist > eff_dist)
{
no = nop->u.d.sibling;
continue;
}
#ifdef PERIODIC
dist = NEAREST(nop->center[1] - pos_y);
#else
dist = nop->center[1] - pos_y;
#endif
if(dist < -eff_dist || dist > eff_dist)
{
no = nop->u.d.sibling;
continue;
}
#ifdef PERIODIC
dist = NEAREST(nop->center[2] - pos_z);
#else
dist = nop->center[2] - pos_z;
#endif
if(dist < -eff_dist || dist > eff_dist)
{
no = nop->u.d.sibling;
continue;
}
}
if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */
{
if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
}
else /* check relative opening criterion */
{
if(mass * nop->len * nop->len > r2 * r2 * aold)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
/* check in addition whether we lie inside the cell */
if(fabs(nop->center[0] - pos_x) < 0.60 * nop->len)
{
if(fabs(nop->center[1] - pos_y) < 0.60 * nop->len)
{
if(fabs(nop->center[2] - pos_z) < 0.60 * nop->len)
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
h = All.ForceSoftening[ptype];
maxsofttype = (nop->u.d.bitflags >> 2) & 7;
if(maxsofttype == 7) /* may only occur for zero mass top-level nodes */
{
if(mass > 0)
endrun(987);
no = nop->u.d.nextnode;
continue;
}
else
{
if(h < All.ForceSoftening[maxsofttype])
{
h = All.ForceSoftening[maxsofttype];
if(r2 < h * h)
{
if(((nop->u.d.bitflags >> 5) & 1)) /* bit-5 signals that there are particles of different softening in the node */
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#else
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(h < nop->maxsoft)
{
h = nop->maxsoft;
if(r2 < h * h)
{
no = nop->u.d.nextnode;
continue;
}
}
#endif
#endif
no = nop->u.d.sibling; /* ok, node can be used */
if(mode == 1)
{
if(((nop->u.d.bitflags) & 1)) /* Bit 0 signals that this node belongs to top-level tree */
continue;
}
}
r = sqrt(r2);
if(r >= h)
fac = mass / (r2 * r);
else
{
#ifdef UNEQUALSOFTENINGS
h_inv = 1.0 / h;
h3_inv = h_inv * h_inv * h_inv;
#endif
u = r * h_inv;
if(u < 0.5)
fac = mass * h3_inv * (10.666666666667 + u * u * (32.0 * u - 38.4));
else
fac =
mass * h3_inv * (21.333333333333 - 48.0 * u +
38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
}
tabindex = (int) (asmthfac * r);
if(tabindex < NTAB)
{
fac *= shortrange_table[tabindex];
acc_x += dx * fac;
acc_y += dy * fac;
acc_z += dz * fac;
ninteractions++;
}
}
/* store result at the proper place */
if(mode == 0)
{
P[target].GravAccel[0] = acc_x;
P[target].GravAccel[1] = acc_y;
P[target].GravAccel[2] = acc_z;
P[target].GravCost = ninteractions;
}
else
{
GravDataResult[target].u.Acc[0] = acc_x;
GravDataResult[target].u.Acc[1] = acc_y;
GravDataResult[target].u.Acc[2] = acc_z;
GravDataResult[target].w.Ninteractions = ninteractions;
}
return ninteractions;
}
#endif
#ifdef PERIODIC
/*! This function computes the Ewald correction, and is needed if periodic
* boundary conditions together with a pure tree algorithm are used. Note
* that the ordinary tree walk does not carry out this correction directly
* as it was done in Gadget-1.1. Instead, the tree is walked a second
* time. This is actually faster because the "Ewald-Treewalk" can use a
* different opening criterion than the normal tree walk. In particular,
* the Ewald correction is negligible for particles that are very close,
* but it is large for particles that are far away (this is quite
* different for the normal direct force). So we can here use a different
* opening criterion. Sufficient accuracy is usually obtained if the node
* length has dropped to a certain fraction ~< 0.25 of the
* BoxLength. However, we may only short-cut the interaction list of the
* normal full Ewald tree walk if we are sure that the whole node and all
* daughter nodes "lie on the same side" of the periodic boundary,
* i.e. that the real tree walk would not find a daughter node or particle
* that was mapped to a different nearest neighbour position when the tree
* walk would be further refined.
*/
int force_treeevaluate_ewald_correction(int target, int mode, double pos_x, double pos_y, double pos_z,
double aold)
{
struct NODE *nop = 0;
int no, cost;
double dx, dy, dz, mass, r2;
int signx, signy, signz;
int i, j, k, openflag;
double u, v, w;
double f1, f2, f3, f4, f5, f6, f7, f8;
double acc_x, acc_y, acc_z;
double boxsize, boxhalf;
boxsize = All.BoxSize;
boxhalf = 0.5 * All.BoxSize;
acc_x = 0;
acc_y = 0;
acc_z = 0;
cost = 0;
no = All.MaxPart;
while(no >= 0)
{
if(no < All.MaxPart) /* single particle */
{
/* the index of the node is the index of the particle */
/* observe the sign */
dx = P[no].Pos[0] - pos_x;
dy = P[no].Pos[1] - pos_y;
dz = P[no].Pos[2] - pos_z;
mass = P[no].Mass;
}
else
{
if(no >= All.MaxPart + MaxNodes) /* pseudo particle */
{
if(mode == 0)
{
Exportflag[DomainTask[no - (All.MaxPart + MaxNodes)]] = 1;
}
no = Nextnode[no - MaxNodes];
continue;
}
nop = &Nodes[no];
dx = nop->u.d.s[0] - pos_x;
dy = nop->u.d.s[1] - pos_y;
dz = nop->u.d.s[2] - pos_z;
mass = nop->u.d.mass;
}
dx = NEAREST(dx);
dy = NEAREST(dy);
dz = NEAREST(dz);
if(no < All.MaxPart)
no = Nextnode[no];
else /* we have an internal node. Need to check opening criterion */
{
openflag = 0;
r2 = dx * dx + dy * dy + dz * dz;
if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */
{
if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta)
{
openflag = 1;
}
}
else /* check relative opening criterion */
{
if(mass * nop->len * nop->len > r2 * r2 * aold)
{
openflag = 1;
}
else
{
if(fabs(nop->center[0] - pos_x) < 0.60 * nop->len)
{
if(fabs(nop->center[1] - pos_y) < 0.60 * nop->len)
{
if(fabs(nop->center[2] - pos_z) < 0.60 * nop->len)
{
openflag = 1;
}
}
}
}
}
if(openflag)
{
/* now we check if we can avoid opening the cell */
u = nop->center[0] - pos_x;
if(u > boxhalf)
u -= boxsize;
if(u < -boxhalf)
u += boxsize;
if(fabs(u) > 0.5 * (boxsize - nop->len))
{
no = nop->u.d.nextnode;
continue;
}
u = nop->center[1] - pos_y;
if(u > boxhalf)
u -= boxsize;
if(u < -boxhalf)
u += boxsize;
if(fabs(u) > 0.5 * (boxsize - nop->len))
{
no = nop->u.d.nextnode;
continue;
}
u = nop->center[2] - pos_z;
if(u > boxhalf)
u -= boxsize;
if(u < -boxhalf)
u += boxsize;
if(fabs(u) > 0.5 * (boxsize - nop->len))
{
no = nop->u.d.nextnode;
continue;
}
/* if the cell is too large, we need to refine
* it further
*/
if(nop->len > 0.20 * boxsize)
{
/* cell is too large */
no = nop->u.d.nextnode;
continue;
}
}
no = nop->u.d.sibling; /* ok, node can be used */
if(mode == 1)
{
if((nop->u.d.bitflags & 1)) /* Bit 0 signals that this node belongs to top-level tree */
continue;
}
}
/* compute the Ewald correction force */
if(dx < 0)
{
dx = -dx;
signx = +1;
}
else
signx = -1;
if(dy < 0)
{
dy = -dy;
signy = +1;
}
else
signy = -1;
if(dz < 0)
{
dz = -dz;
signz = +1;
}
else
signz = -1;
u = dx * fac_intp;
i = (int) u;
if(i >= EN)
i = EN - 1;
u -= i;
v = dy * fac_intp;
j = (int) v;
if(j >= EN)
j = EN - 1;
v -= j;
w = dz * fac_intp;
k = (int) w;
if(k >= EN)
k = EN - 1;
w -= k;
/* compute factors for trilinear interpolation */
f1 = (1 - u) * (1 - v) * (1 - w);
f2 = (1 - u) * (1 - v) * (w);
f3 = (1 - u) * (v) * (1 - w);
f4 = (1 - u) * (v) * (w);
f5 = (u) * (1 - v) * (1 - w);
f6 = (u) * (1 - v) * (w);
f7 = (u) * (v) * (1 - w);
f8 = (u) * (v) * (w);
acc_x += mass * signx * (fcorrx[i][j][k] * f1 +
fcorrx[i][j][k + 1] * f2 +
fcorrx[i][j + 1][k] * f3 +
fcorrx[i][j + 1][k + 1] * f4 +
fcorrx[i + 1][j][k] * f5 +
fcorrx[i + 1][j][k + 1] * f6 +
fcorrx[i + 1][j + 1][k] * f7 + fcorrx[i + 1][j + 1][k + 1] * f8);
acc_y += mass * signy * (fcorry[i][j][k] * f1 +
fcorry[i][j][k + 1] * f2 +
fcorry[i][j + 1][k] * f3 +
fcorry[i][j + 1][k + 1] * f4 +
fcorry[i + 1][j][k] * f5 +
fcorry[i + 1][j][k + 1] * f6 +
fcorry[i + 1][j + 1][k] * f7 + fcorry[i + 1][j + 1][k + 1] * f8);
acc_z += mass * signz * (fcorrz[i][j][k] * f1 +
fcorrz[i][j][k + 1] * f2 +
fcorrz[i][j + 1][k] * f3 +
fcorrz[i][j + 1][k + 1] * f4 +
fcorrz[i + 1][j][k] * f5 +
fcorrz[i + 1][j][k + 1] * f6 +
fcorrz[i + 1][j + 1][k] * f7 + fcorrz[i + 1][j + 1][k + 1] * f8);
cost++;
}
/* add the result at the proper place */
if(mode == 0)
{
P[target].GravAccel[0] += acc_x;
P[target].GravAccel[1] += acc_y;
P[target].GravAccel[2] += acc_z;
P[target].GravCost += cost;
}
else
{
GravDataResult[target].u.Acc[0] += acc_x;
GravDataResult[target].u.Acc[1] += acc_y;
GravDataResult[target].u.Acc[2] += acc_z;
GravDataResult[target].w.Ninteractions += cost;
}
return cost;
}
#endif
/*! This routine computes the gravitational potential by walking the
* tree. The same opening criteria is used as for the gravitational force
* walk.
*/
void force_treeevaluate_potential(int target, int mode)
{
struct NODE *nop = 0;
int no, ptype;
double r2, dx, dy, dz, mass, r, u, h, h_inv, wp;
double pot, pos_x, pos_y, pos_z, aold;
#if defined(UNEQUALSOFTENINGS) && !defined(ADAPTIVE_GRAVSOFT_FORGAS)
int maxsofttype;
#endif
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
double soft = 0;
#endif
#ifdef PERIODIC
double boxsize, boxhalf;
boxsize = All.BoxSize;
boxhalf = 0.5 * All.BoxSize;
#endif
pot = 0;
if(mode == 0)
{
pos_x = P[target].Pos[0];
pos_y = P[target].Pos[1];
pos_z = P[target].Pos[2];
ptype = P[target].Type;
aold = All.ErrTolForceAcc * P[target].OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = SphP[target].Hsml;
#endif
}
else
{
pos_x = GravDataGet[target].u.Pos[0];
pos_y = GravDataGet[target].u.Pos[1];
pos_z = GravDataGet[target].u.Pos[2];
#ifdef UNEQUALSOFTENINGS
ptype = GravDataGet[target].Type;
#else
ptype = P[0].Type;
#endif
aold = All.ErrTolForceAcc * GravDataGet[target].w.OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = GravDataGet[target].Soft;
#endif
}
#ifndef UNEQUALSOFTENINGS
h = All.ForceSoftening[ptype];
h_inv = 1.0 / h;
#endif
no = All.MaxPart;
while(no >= 0)
{
if(no < All.MaxPart) /* single particle */
{
/* the index of the node is the index of the particle */
/* observe the sign */
dx = P[no].Pos[0] - pos_x;
dy = P[no].Pos[1] - pos_y;
dz = P[no].Pos[2] - pos_z;
mass = P[no].Mass;
}
else
{
if(no >= All.MaxPart + MaxNodes) /* pseudo particle */
{
if(mode == 0)
{
Exportflag[DomainTask[no - (All.MaxPart + MaxNodes)]] = 1;
}
no = Nextnode[no - MaxNodes];
continue;
}
nop = &Nodes[no];
dx = nop->u.d.s[0] - pos_x;
dy = nop->u.d.s[1] - pos_y;
dz = nop->u.d.s[2] - pos_z;
mass = nop->u.d.mass;
}
#ifdef PERIODIC
dx = NEAREST(dx);
dy = NEAREST(dy);
dz = NEAREST(dz);
#endif
r2 = dx * dx + dy * dy + dz * dz;
if(no < All.MaxPart)
{
#ifdef UNEQUALSOFTENINGS
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(P[no].Type == 0)
{
if(h < SphP[no].Hsml)
h = SphP[no].Hsml;
}
else
{
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
}
#else
h = All.ForceSoftening[ptype];
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
#endif
#endif
no = Nextnode[no];
}
else /* we have an internal node. Need to check opening criterion */
{
if(mode == 1)
{
if((nop->u.d.bitflags & 3) == 1) /* if it's a top-level node
* which does not contain
* local particles we can make
* a short-cut
*/
{
no = nop->u.d.sibling;
continue;
}
}
if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */
{
if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
}
else /* check relative opening criterion */
{
if(mass * nop->len * nop->len > r2 * r2 * aold)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
if(fabs(nop->center[0] - pos_x) < 0.60 * nop->len)
{
if(fabs(nop->center[1] - pos_y) < 0.60 * nop->len)
{
if(fabs(nop->center[2] - pos_z) < 0.60 * nop->len)
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
h = All.ForceSoftening[ptype];
maxsofttype = (nop->u.d.bitflags >> 2) & 7;
if(maxsofttype == 7) /* may only occur for zero mass top-level nodes */
{
if(mass > 0)
endrun(988);
no = nop->u.d.nextnode;
continue;
}
else
{
if(h < All.ForceSoftening[maxsofttype])
{
h = All.ForceSoftening[maxsofttype];
if(r2 < h * h)
{
if(((nop->u.d.bitflags >> 5) & 1)) /* bit-5 signals that there are particles of different softening in the node */
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#else
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(h < nop->maxsoft)
{
h = nop->maxsoft;
if(r2 < h * h)
{
no = nop->u.d.nextnode;
continue;
}
}
#endif
#endif
no = nop->u.d.sibling; /* node can be used */
if(mode == 1)
{
if(((nop->u.d.bitflags) & 1)) /* Bit 0 signals that this node belongs to top-level tree */
continue;
}
}
r = sqrt(r2);
if(r >= h)
pot -= mass / r;
else
{
#ifdef UNEQUALSOFTENINGS
h_inv = 1.0 / h;
#endif
u = r * h_inv;
if(u < 0.5)
wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6));
else
wp =
-3.2 + 0.066666666667 / u + u * u * (10.666666666667 +
u * (-16.0 + u * (9.6 - 2.133333333333 * u)));
pot += mass * h_inv * wp;
}
#ifdef PERIODIC
pot += mass * ewald_pot_corr(dx, dy, dz);
#endif
}
/* store result at the proper place */
if(mode == 0)
P[target].Potential = pot;
else
GravDataResult[target].u.Potential = pot;
}
#ifdef PMGRID
/*! This function computes the short-range potential when the TreePM
* algorithm is used. This potential is the Newtonian potential, modified
* by a complementary error function.
*/
void force_treeevaluate_potential_shortrange(int target, int mode)
{
struct NODE *nop = 0;
int no, ptype, tabindex;
double r2, dx, dy, dz, mass, r, u, h, h_inv, wp;
double pot, pos_x, pos_y, pos_z, aold;
double eff_dist, fac, rcut, asmth, asmthfac;
double dxx, dyy, dzz;
#if defined(UNEQUALSOFTENINGS) && !defined(ADAPTIVE_GRAVSOFT_FORGAS)
int maxsofttype;
#endif
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
double soft = 0;
#endif
#ifdef PERIODIC
double boxsize, boxhalf;
boxsize = All.BoxSize;
boxhalf = 0.5 * All.BoxSize;
#endif
pot = 0;
if(mode == 0)
{
pos_x = P[target].Pos[0];
pos_y = P[target].Pos[1];
pos_z = P[target].Pos[2];
ptype = P[target].Type;
aold = All.ErrTolForceAcc * P[target].OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = SphP[target].Hsml;
#endif
}
else
{
pos_x = GravDataGet[target].u.Pos[0];
pos_y = GravDataGet[target].u.Pos[1];
pos_z = GravDataGet[target].u.Pos[2];
#ifdef UNEQUALSOFTENINGS
ptype = GravDataGet[target].Type;
#else
ptype = P[0].Type;
#endif
aold = All.ErrTolForceAcc * GravDataGet[target].w.OldAcc;
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
soft = GravDataGet[target].Soft;
#endif
}
rcut = All.Rcut[0];
asmth = All.Asmth[0];
#ifdef PLACEHIGHRESREGION
if(((1 << ptype) & (PLACEHIGHRESREGION)))
{
rcut = All.Rcut[1];
asmth = All.Asmth[1];
}
#endif
asmthfac = 0.5 / asmth * (NTAB / 3.0);
#ifndef UNEQUALSOFTENINGS
h = All.ForceSoftening[ptype];
h_inv = 1.0 / h;
#endif
no = All.MaxPart;
while(no >= 0)
{
if(no < All.MaxPart) /* single particle */
{
/* the index of the node is the index of the particle */
/* observe the sign */
dx = P[no].Pos[0] - pos_x;
dy = P[no].Pos[1] - pos_y;
dz = P[no].Pos[2] - pos_z;
mass = P[no].Mass;
}
else
{
if(no >= All.MaxPart + MaxNodes) /* pseudo particle */
{
if(mode == 0)
{
Exportflag[DomainTask[no - (All.MaxPart + MaxNodes)]] = 1;
}
no = Nextnode[no - MaxNodes];
continue;
}
nop = &Nodes[no];
dx = nop->u.d.s[0] - pos_x;
dy = nop->u.d.s[1] - pos_y;
dz = nop->u.d.s[2] - pos_z;
mass = nop->u.d.mass;
}
#ifdef PERIODIC
dx = NEAREST(dx);
dy = NEAREST(dy);
dz = NEAREST(dz);
#endif
r2 = dx * dx + dy * dy + dz * dz;
if(no < All.MaxPart)
{
#ifdef UNEQUALSOFTENINGS
#ifdef ADAPTIVE_GRAVSOFT_FORGAS
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(P[no].Type == 0)
{
if(h < SphP[no].Hsml)
h = SphP[no].Hsml;
}
else
{
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
}
#else
h = All.ForceSoftening[ptype];
if(h < All.ForceSoftening[P[no].Type])
h = All.ForceSoftening[P[no].Type];
#endif
#endif
no = Nextnode[no];
}
else /* we have an internal node. Need to check opening criterion */
{
/* check whether we can stop walking along this branch */
if(no >= All.MaxPart + MaxNodes) /* pseudo particle */
{
if(mode == 0)
{
Exportflag[DomainTask[no - (All.MaxPart + MaxNodes)]] = 1;
}
no = Nextnode[no - MaxNodes];
continue;
}
if(mode == 1)
{
if((nop->u.d.bitflags & 3) == 1) /* if it's a top-level node which does not contain local particles */
{
no = nop->u.d.sibling;
continue;
}
}
eff_dist = rcut + 0.5 * nop->len;
dxx = nop->center[0] - pos_x; /* observe the sign ! */
dyy = nop->center[1] - pos_y; /* this vector is -y in my thesis notation */
dzz = nop->center[2] - pos_z;
#ifdef PERIODIC
dxx = NEAREST(dxx);
dyy = NEAREST(dyy);
dzz = NEAREST(dzz);
#endif
if(dxx < -eff_dist || dxx > eff_dist)
{
no = nop->u.d.sibling;
continue;
}
if(dyy < -eff_dist || dyy > eff_dist)
{
no = nop->u.d.sibling;
continue;
}
if(dzz < -eff_dist || dzz > eff_dist)
{
no = nop->u.d.sibling;
continue;
}
if(All.ErrTolTheta) /* check Barnes-Hut opening criterion */
{
if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
}
else /* check relative opening criterion */
{
if(mass * nop->len * nop->len > r2 * r2 * aold)
{
/* open cell */
no = nop->u.d.nextnode;
continue;
}
if(fabs(nop->center[0] - pos_x) < 0.60 * nop->len)
{
if(fabs(nop->center[1] - pos_y) < 0.60 * nop->len)
{
if(fabs(nop->center[2] - pos_z) < 0.60 * nop->len)
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#ifdef UNEQUALSOFTENINGS
#ifndef ADAPTIVE_GRAVSOFT_FORGAS
h = All.ForceSoftening[ptype];
maxsofttype = (nop->u.d.bitflags >> 2) & 7;
if(maxsofttype == 7) /* may only occur for zero mass top-level nodes */
{
if(mass > 0)
endrun(989);
no = nop->u.d.nextnode;
continue;
}
else
{
if(h < All.ForceSoftening[maxsofttype])
{
h = All.ForceSoftening[maxsofttype];
if(r2 < h * h)
{
/* bit-5 signals that there are particles of
* different softening in the node
*/
if(((nop->u.d.bitflags >> 5) & 1))
{
no = nop->u.d.nextnode;
continue;
}
}
}
}
#else
if(ptype == 0)
h = soft;
else
h = All.ForceSoftening[ptype];
if(h < nop->maxsoft)
{
h = nop->maxsoft;
if(r2 < h * h)
{
no = nop->u.d.nextnode;
continue;
}
}
#endif
#endif
no = nop->u.d.sibling; /* node can be used */
if(mode == 1)
{
if(((nop->u.d.bitflags) & 1)) /* Bit 0 signals that this node belongs to top-level tree */
continue;
}
}
r = sqrt(r2);
tabindex = (int) (r * asmthfac);
if(tabindex < NTAB)
{
fac = shortrange_table_potential[tabindex];
if(r >= h)
pot -= fac * mass / r;
else
{
#ifdef UNEQUALSOFTENINGS
h_inv = 1.0 / h;
#endif
u = r * h_inv;
if(u < 0.5)
wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6));
else
wp =
-3.2 + 0.066666666667 / u + u * u * (10.666666666667 +
u * (-16.0 + u * (9.6 - 2.133333333333 * u)));
pot += fac * mass * h_inv * wp;
}
}
}
/* store result at the proper place */
if(mode == 0)
P[target].Potential = pot;
else
GravDataResult[target].u.Potential = pot;
}
#endif
/*! This function allocates the memory used for storage of the tree and of
* auxiliary arrays needed for tree-walk and link-lists. Usually,
* maxnodes approximately equal to 0.7*maxpart is sufficient to store the
* tree for up to maxpart particles.
*/
void force_treeallocate(int maxnodes, int maxpart)
{
int i;
size_t bytes;
double allbytes = 0;
double u;
MaxNodes = maxnodes;
if(!(Nodes_base = malloc(bytes = (MaxNodes + 1) * sizeof(struct NODE))))
{
printf("failed to allocate memory for %d tree-nodes (%g MB).\n", MaxNodes, bytes / (1024.0 * 1024.0));
endrun(3);
}
allbytes += bytes;
if(!(Extnodes_base = malloc(bytes = (MaxNodes + 1) * sizeof(struct extNODE))))
{
printf("failed to allocate memory for %d tree-extnodes (%g MB).\n", MaxNodes,
bytes / (1024.0 * 1024.0));
endrun(3);
}
allbytes += bytes;
Nodes = Nodes_base - All.MaxPart;
Extnodes = Extnodes_base - All.MaxPart;
if(!(Nextnode = malloc(bytes = (maxpart + MAXTOPNODES) * sizeof(int))))
{
printf("Failed to allocate %d spaces for 'Nextnode' array (%g MB)\n", maxpart + MAXTOPNODES,
bytes / (1024.0 * 1024.0));
exit(0);
}
allbytes += bytes;
if(!(Father = malloc(bytes = (maxpart) * sizeof(int))))
{
printf("Failed to allocate %d spaces for 'Father' array (%g MB)\n", maxpart, bytes / (1024.0 * 1024.0));
exit(0);
}
allbytes += bytes;
if(first_flag == 0)
{
first_flag = 1;
if(ThisTask == 0)
printf("\nAllocated %g MByte for BH-tree. %d\n\n", allbytes / (1024.0 * 1024.0),
sizeof(struct NODE) + sizeof(struct extNODE));
tabfac = NTAB / 3.0;
for(i = 0; i < NTAB; i++)
{
u = 3.0 / NTAB * (i + 0.5);
shortrange_table[i] = erfc(u) + 2.0 * u / sqrt(M_PI) * exp(-u * u);
shortrange_table_potential[i] = erfc(u);
}
}
}
/*! This function frees the memory allocated for the tree, i.e. it frees
* the space allocated by the function force_treeallocate().
*/
void force_treefree(void)
{
free(Father);
free(Nextnode);
free(Extnodes_base);
free(Nodes_base);
}
/*! This function does the force computation with direct summation for the
* specified particle in the communication buffer. This can be useful for
* debugging purposes, in particular for explicit checks of the force
* accuracy.
*/
#ifdef FORCETEST
int force_treeevaluate_direct(int target, int mode)
{
double epsilon;
double h, h_inv, dx, dy, dz, r, r2, u, r_inv, fac;
int i, ptype;
double pos_x, pos_y, pos_z;
double acc_x, acc_y, acc_z;
#ifdef PERIODIC
double fcorr[3];
#endif
#ifdef PERIODIC
double boxsize, boxhalf;
boxsize = All.BoxSize;
boxhalf = 0.5 * All.BoxSize;
#endif
acc_x = 0;
acc_y = 0;
acc_z = 0;
if(mode == 0)
{
pos_x = P[target].Pos[0];
pos_y = P[target].Pos[1];
pos_z = P[target].Pos[2];
ptype = P[target].Type;
}
else
{
pos_x = GravDataGet[target].u.Pos[0];
pos_y = GravDataGet[target].u.Pos[1];
pos_z = GravDataGet[target].u.Pos[2];
#ifdef UNEQUALSOFTENINGS
ptype = GravDataGet[target].Type;
#else
ptype = P[0].Type;
#endif
}
for(i = 0; i < NumPart; i++)
{
epsilon = dmax(All.ForceSoftening[P[i].Type], All.ForceSoftening[ptype]);
h = epsilon;
h_inv = 1 / h;
dx = P[i].Pos[0] - pos_x;
dy = P[i].Pos[1] - pos_y;
dz = P[i].Pos[2] - pos_z;
#ifdef PERIODIC
while(dx > boxhalf)
dx -= boxsize;
while(dy > boxhalf)
dy -= boxsize;
while(dz > boxhalf)
dz -= boxsize;
while(dx < -boxhalf)
dx += boxsize;
while(dy < -boxhalf)
dy += boxsize;
while(dz < -boxhalf)
dz += boxsize;
#endif
r2 = dx * dx + dy * dy + dz * dz;
r = sqrt(r2);
u = r * h_inv;
if(u >= 1)
{
r_inv = 1 / r;
fac = P[i].Mass * r_inv * r_inv * r_inv;
}
else
{
if(u < 0.5)
fac = P[i].Mass * h_inv * h_inv * h_inv * (10.666666666667 + u * u * (32.0 * u - 38.4));
else
fac =
P[i].Mass * h_inv * h_inv * h_inv * (21.333333333333 -
48.0 * u + 38.4 * u * u -
10.666666666667 * u * u *
u - 0.066666666667 / (u * u * u));
}
acc_x += dx * fac;
acc_y += dy * fac;
acc_z += dz * fac;
#ifdef PERIODIC
if(u > 1.0e-5)
{
ewald_corr(dx, dy, dz, fcorr);
acc_x += P[i].Mass * fcorr[0];
acc_y += P[i].Mass * fcorr[1];
acc_z += P[i].Mass * fcorr[2];
}
#endif
}
if(mode == 0)
{
P[target].GravAccelDirect[0] = acc_x;
P[target].GravAccelDirect[1] = acc_y;
P[target].GravAccelDirect[2] = acc_z;
}
else
{
GravDataResult[target].u.Acc[0] = acc_x;
GravDataResult[target].u.Acc[1] = acc_y;
GravDataResult[target].u.Acc[2] = acc_z;
}
return NumPart;
}
#endif
/*! This function dumps some of the basic particle data to a file. In case
* the tree construction fails, it is called just before the run
* terminates with an error message. Examination of the generated file may
* then give clues to what caused the problem.
*/
void dump_particles(void)
{
FILE *fd;
char buffer[200];
int i;
sprintf(buffer, "particles%d.dat", ThisTask);
fd = fopen(buffer, "w");
my_fwrite(&NumPart, 1, sizeof(int), fd);
for(i = 0; i < NumPart; i++)
my_fwrite(&P[i].Pos[0], 3, sizeof(FLOAT), fd);
for(i = 0; i < NumPart; i++)
my_fwrite(&P[i].Vel[0], 3, sizeof(FLOAT), fd);
for(i = 0; i < NumPart; i++)
my_fwrite(&P[i].ID, 1, sizeof(int), fd);
fclose(fd);
}
#ifdef PERIODIC
/*! This function initializes tables with the correction force and the
* correction potential due to the periodic images of a point mass located
* at the origin. These corrections are obtained by Ewald summation. (See
* Hernquist, Bouchet, Suto, ApJS, 1991, 75, 231) The correction fields
* are used to obtain the full periodic force if periodic boundaries
* combined with the pure tree algorithm are used. For the TreePM
* algorithm, the Ewald correction is not used.
*
* The correction fields are stored on disk once they are computed. If a
* corresponding file is found, they are loaded from disk to speed up the
* initialization. The Ewald summation is done in parallel, i.e. the
* processors share the work to compute the tables if needed.
*/
void ewald_init(void)
{
int i, j, k, beg, len, size, n, task, count;
double x[3], force[3];
char buf[200];
FILE *fd;
if(ThisTask == 0)
{
printf("initialize Ewald correction...\n");
fflush(stdout);
}
#ifdef DOUBLEPRECISION
sprintf(buf, "ewald_spc_table_%d_dbl.dat", EN);
#else
sprintf(buf, "ewald_spc_table_%d.dat", EN);
#endif
#ifdef ONLY_MASTER_READ_EWALD
/* the master check if the file exists */
int is_ewald_file=0;
if(ThisTask == 0)
{
if((fd = fopen(buf, "r")))
{
printf("\n(%d) reading Ewald tables from file `%s'\n", ThisTask,buf);
fflush(stdout);
my_fread(&fcorrx[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fread(&fcorry[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fread(&fcorrz[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fread(&potcorr[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
fclose(fd);
is_ewald_file=1;
}
else
is_ewald_file=0;
}
MPI_Bcast(&is_ewald_file, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (is_ewald_file)
{
#ifdef DOUBLEPRECISION
MPI_Bcast(&fcorrx[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&fcorry[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&fcorrz[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&potcorr[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_DOUBLE, 0, MPI_COMM_WORLD);
#else
MPI_Bcast(&fcorrx[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Bcast(&fcorry[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Bcast(&fcorrz[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Bcast(&potcorr[0][0][0], (EN + 1) * (EN + 1) * (EN + 1), MPI_FLOAT, 0, MPI_COMM_WORLD);
#endif
}
#else
if((fd = fopen(buf, "r")))
{
if(ThisTask == 0)
{
printf("\nreading Ewald tables from file `%s'\n", buf);
fflush(stdout);
}
my_fread(&fcorrx[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fread(&fcorry[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fread(&fcorrz[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fread(&potcorr[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
fclose(fd);
}
#endif
else
{
if(ThisTask == 0)
{
printf("\nNo Ewald tables in file `%s' found.\nRecomputing them...\n", buf);
fflush(stdout);
}
/* ok, let's recompute things. Actually, we do that in parallel. */
size = (EN + 1) * (EN + 1) * (EN + 1) / NTask;
beg = ThisTask * size;
len = size;
if(ThisTask == (NTask - 1))
len = (EN + 1) * (EN + 1) * (EN + 1) - beg;
for(i = 0, count = 0; i <= EN; i++)
for(j = 0; j <= EN; j++)
for(k = 0; k <= EN; k++)
{
n = (i * (EN + 1) + j) * (EN + 1) + k;
if(n >= beg && n < (beg + len))
{
if(ThisTask == 0)
{
if((count % (len / 20)) == 0)
{
printf("%4.1f percent done\n", count / (len / 100.0));
fflush(stdout);
}
}
x[0] = 0.5 * ((double) i) / EN;
x[1] = 0.5 * ((double) j) / EN;
x[2] = 0.5 * ((double) k) / EN;
ewald_force(i, j, k, x, force);
fcorrx[i][j][k] = force[0];
fcorry[i][j][k] = force[1];
fcorrz[i][j][k] = force[2];
if(i + j + k == 0)
potcorr[i][j][k] = 2.8372975;
else
potcorr[i][j][k] = ewald_psi(x);
count++;
}
}
for(task = 0; task < NTask; task++)
{
beg = task * size;
len = size;
if(task == (NTask - 1))
len = (EN + 1) * (EN + 1) * (EN + 1) - beg;
#ifdef DOUBLEPRECISION
MPI_Bcast(&fcorrx[0][0][beg], len, MPI_DOUBLE, task, MPI_COMM_WORLD);
MPI_Bcast(&fcorry[0][0][beg], len, MPI_DOUBLE, task, MPI_COMM_WORLD);
MPI_Bcast(&fcorrz[0][0][beg], len, MPI_DOUBLE, task, MPI_COMM_WORLD);
MPI_Bcast(&potcorr[0][0][beg], len, MPI_DOUBLE, task, MPI_COMM_WORLD);
#else
MPI_Bcast(&fcorrx[0][0][beg], len, MPI_FLOAT, task, MPI_COMM_WORLD);
MPI_Bcast(&fcorry[0][0][beg], len, MPI_FLOAT, task, MPI_COMM_WORLD);
MPI_Bcast(&fcorrz[0][0][beg], len, MPI_FLOAT, task, MPI_COMM_WORLD);
MPI_Bcast(&potcorr[0][0][beg], len, MPI_FLOAT, task, MPI_COMM_WORLD);
#endif
}
if(ThisTask == 0)
{
printf("\nwriting Ewald tables to file `%s'\n", buf);
fflush(stdout);
if((fd = fopen(buf, "w")))
{
my_fwrite(&fcorrx[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fwrite(&fcorry[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fwrite(&fcorrz[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
my_fwrite(&potcorr[0][0][0], sizeof(FLOAT), (EN + 1) * (EN + 1) * (EN + 1), fd);
fclose(fd);
}
}
}
fac_intp = 2 * EN / All.BoxSize;
for(i = 0; i <= EN; i++)
for(j = 0; j <= EN; j++)
for(k = 0; k <= EN; k++)
{
potcorr[i][j][k] /= All.BoxSize;
fcorrx[i][j][k] /= All.BoxSize * All.BoxSize;
fcorry[i][j][k] /= All.BoxSize * All.BoxSize;
fcorrz[i][j][k] /= All.BoxSize * All.BoxSize;
}
if(ThisTask == 0)
{
printf("initialization of periodic boundaries finished.\n");
fflush(stdout);
}
}
/*! This function looks up the correction force due to the infinite number
* of periodic particle/node images. We here use trilinear interpolation
* to get it from the precomputed tables, which contain one octant
* around the target particle at the origin. The other octants are
* obtained from it by exploiting the symmetry properties.
*/
#ifdef FORCETEST
void ewald_corr(double dx, double dy, double dz, double *fper)
{
int signx, signy, signz;
int i, j, k;
double u, v, w;
double f1, f2, f3, f4, f5, f6, f7, f8;
if(dx < 0)
{
dx = -dx;
signx = +1;
}
else
signx = -1;
if(dy < 0)
{
dy = -dy;
signy = +1;
}
else
signy = -1;
if(dz < 0)
{
dz = -dz;
signz = +1;
}
else
signz = -1;
u = dx * fac_intp;
i = (int) u;
if(i >= EN)
i = EN - 1;
u -= i;
v = dy * fac_intp;
j = (int) v;
if(j >= EN)
j = EN - 1;
v -= j;
w = dz * fac_intp;
k = (int) w;
if(k >= EN)
k = EN - 1;
w -= k;
f1 = (1 - u) * (1 - v) * (1 - w);
f2 = (1 - u) * (1 - v) * (w);
f3 = (1 - u) * (v) * (1 - w);
f4 = (1 - u) * (v) * (w);
f5 = (u) * (1 - v) * (1 - w);
f6 = (u) * (1 - v) * (w);
f7 = (u) * (v) * (1 - w);
f8 = (u) * (v) * (w);
fper[0] = signx * (fcorrx[i][j][k] * f1 +
fcorrx[i][j][k + 1] * f2 +
fcorrx[i][j + 1][k] * f3 +
fcorrx[i][j + 1][k + 1] * f4 +
fcorrx[i + 1][j][k] * f5 +
fcorrx[i + 1][j][k + 1] * f6 +
fcorrx[i + 1][j + 1][k] * f7 + fcorrx[i + 1][j + 1][k + 1] * f8);
fper[1] = signy * (fcorry[i][j][k] * f1 +
fcorry[i][j][k + 1] * f2 +
fcorry[i][j + 1][k] * f3 +
fcorry[i][j + 1][k + 1] * f4 +
fcorry[i + 1][j][k] * f5 +
fcorry[i + 1][j][k + 1] * f6 +
fcorry[i + 1][j + 1][k] * f7 + fcorry[i + 1][j + 1][k + 1] * f8);
fper[2] = signz * (fcorrz[i][j][k] * f1 +
fcorrz[i][j][k + 1] * f2 +
fcorrz[i][j + 1][k] * f3 +
fcorrz[i][j + 1][k + 1] * f4 +
fcorrz[i + 1][j][k] * f5 +
fcorrz[i + 1][j][k + 1] * f6 +
fcorrz[i + 1][j + 1][k] * f7 + fcorrz[i + 1][j + 1][k + 1] * f8);
}
#endif
/*! This function looks up the correction potential due to the infinite
* number of periodic particle/node images. We here use tri-linear
* interpolation to get it from the precomputed table, which contains
* one octant around the target particle at the origin. The other
* octants are obtained from it by exploiting symmetry properties.
*/
double ewald_pot_corr(double dx, double dy, double dz)
{
int i, j, k;
double u, v, w;
double f1, f2, f3, f4, f5, f6, f7, f8;
if(dx < 0)
dx = -dx;
if(dy < 0)
dy = -dy;
if(dz < 0)
dz = -dz;
u = dx * fac_intp;
i = (int) u;
if(i >= EN)
i = EN - 1;
u -= i;
v = dy * fac_intp;
j = (int) v;
if(j >= EN)
j = EN - 1;
v -= j;
w = dz * fac_intp;
k = (int) w;
if(k >= EN)
k = EN - 1;
w -= k;
f1 = (1 - u) * (1 - v) * (1 - w);
f2 = (1 - u) * (1 - v) * (w);
f3 = (1 - u) * (v) * (1 - w);
f4 = (1 - u) * (v) * (w);
f5 = (u) * (1 - v) * (1 - w);
f6 = (u) * (1 - v) * (w);
f7 = (u) * (v) * (1 - w);
f8 = (u) * (v) * (w);
return potcorr[i][j][k] * f1 +
potcorr[i][j][k + 1] * f2 +
potcorr[i][j + 1][k] * f3 +
potcorr[i][j + 1][k + 1] * f4 +
potcorr[i + 1][j][k] * f5 +
potcorr[i + 1][j][k + 1] * f6 + potcorr[i + 1][j + 1][k] * f7 + potcorr[i + 1][j + 1][k + 1] * f8;
}
/*! This function computes the potential correction term by means of Ewald
* summation.
*/
double ewald_psi(double x[3])
{
double alpha, psi;
double r, sum1, sum2, hdotx;
double dx[3];
int i, n[3], h[3], h2;
alpha = 2.0;
for(n[0] = -4, sum1 = 0; n[0] <= 4; n[0]++)
for(n[1] = -4; n[1] <= 4; n[1]++)
for(n[2] = -4; n[2] <= 4; n[2]++)
{
for(i = 0; i < 3; i++)
dx[i] = x[i] - n[i];
r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]);
sum1 += erfc(alpha * r) / r;
}
for(h[0] = -4, sum2 = 0; h[0] <= 4; h[0]++)
for(h[1] = -4; h[1] <= 4; h[1]++)
for(h[2] = -4; h[2] <= 4; h[2]++)
{
hdotx = x[0] * h[0] + x[1] * h[1] + x[2] * h[2];
h2 = h[0] * h[0] + h[1] * h[1] + h[2] * h[2];
if(h2 > 0)
sum2 += 1 / (M_PI * h2) * exp(-M_PI * M_PI * h2 / (alpha * alpha)) * cos(2 * M_PI * hdotx);
}
r = sqrt(x[0] * x[0] + x[1] * x[1] + x[2] * x[2]);
psi = M_PI / (alpha * alpha) - sum1 - sum2 + 1 / r;
return psi;
}
/*! This function computes the force correction term (difference between full
* force of infinite lattice and nearest image) by Ewald summation.
*/
void ewald_force(int iii, int jjj, int kkk, double x[3], double force[3])
{
double alpha, r2;
double r, val, hdotx, dx[3];
int i, h[3], n[3], h2;
alpha = 2.0;
for(i = 0; i < 3; i++)
force[i] = 0;
if(iii == 0 && jjj == 0 && kkk == 0)
return;
r2 = x[0] * x[0] + x[1] * x[1] + x[2] * x[2];
for(i = 0; i < 3; i++)
force[i] += x[i] / (r2 * sqrt(r2));
for(n[0] = -4; n[0] <= 4; n[0]++)
for(n[1] = -4; n[1] <= 4; n[1]++)
for(n[2] = -4; n[2] <= 4; n[2]++)
{
for(i = 0; i < 3; i++)
dx[i] = x[i] - n[i];
r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]);
val = erfc(alpha * r) + 2 * alpha * r / sqrt(M_PI) * exp(-alpha * alpha * r * r);
for(i = 0; i < 3; i++)
force[i] -= dx[i] / (r * r * r) * val;
}
for(h[0] = -4; h[0] <= 4; h[0]++)
for(h[1] = -4; h[1] <= 4; h[1]++)
for(h[2] = -4; h[2] <= 4; h[2]++)
{
hdotx = x[0] * h[0] + x[1] * h[1] + x[2] * h[2];
h2 = h[0] * h[0] + h[1] * h[1] + h[2] * h[2];
if(h2 > 0)
{
val = 2.0 / ((double) h2) * exp(-M_PI * M_PI * h2 / (alpha * alpha)) * sin(2 * M_PI * hdotx);
for(i = 0; i < 3; i++)
force[i] -= h[i] * val;
}
}
}
#endif
Event Timeline
Log In to Comment