gravtree.c
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, May 25, 02:15

gravtree.c
View Options

	#include <stdlib.h>
	#include <string.h>
	#include <math.h>
	#include <float.h>
	#include <mpi.h>

	#include "allvars.h"
	#include "proto.h"

	/*! \file gravtree.c
	* \brief main driver routines for gravitational (short-range) force computation
	*
	* This file contains the code for the gravitational force computation by
	* means of the tree algorithm. To this end, a tree force is computed for
	* all active local particles, and particles are exported to other
	* processors if needed, where they can receive additional force
	* contributions. If the TreePM algorithm is enabled, the force computed
	* will only be the short-range part.
	*/

	/*! This function computes the gravitational forces for all active
	* particles. If needed, a new tree is constructed, otherwise the
	* dynamically updated tree is used. Particles are only exported to other
	* processors when really needed, thereby allowing a good use of the
	* communication buffer.
	*/
	void gravity_tree(void)
	{
	long long ntot;
	int numnodes, nexportsum = 0;
	int i, j, iter = 0;
	int numnodeslist, maxnumnodes, nexport, numlist, nrecv, ndonelist;
	double tstart, tend, timetree = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
	double ewaldcount;
	double costtotal, ewaldtot, costtreelist, ewaldlist;
	double maxt, sumt, timetreelist, timecommlist;
	double fac, plb, plb_max, sumcomm;

	#ifndef NOGRAVITY
	int noffset, nbuffer, nsend, nsend_local;
	long long ntotleft;
	int ndone, maxfill, ngrp;
	int k, place;
	int level, sendTask, recvTask;
	double ax, ay, az;
	MPI_Status status;
	#endif

	/* set new softening lengths */
	if(All.ComovingIntegrationOn)
	set_softenings();


	/* contruct tree if needed */
	tstart = second();
	if(TreeReconstructFlag)
	{
	if(ThisTask == 0)
	printf("Tree construction.\n");

	force_treebuild(NumPart);

	TreeReconstructFlag = 0;

	if(ThisTask == 0)
	printf("Tree construction done.\n");
	}
	tend = second();
	All.CPU_TreeConstruction += timediff(tstart, tend);

	costtotal = ewaldcount = 0;

	/* Note: 'NumForceUpdate' has already been determined in find_next_sync_point_and_drift() */
	numlist = malloc(NTask * sizeof(int) * NTask);
	MPI_Allgather(&NumForceUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
	for(i = 0, ntot = 0; i < NTask; i++)
	ntot += numlist[i];
	free(numlist);


	#ifndef NOGRAVITY
	if(ThisTask == 0)
	printf("Begin tree force.\n");


	#ifdef SELECTIVE_NO_GRAVITY
	for(i = 0; i < NumPart; i++)
	if(((1 << P[i].Type) & (SELECTIVE_NO_GRAVITY)))
	P[i].Ti_endstep = -P[i].Ti_endstep - 1;
	#endif


	noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
	nbuffer = malloc(sizeof(int) * NTask);
	nsend_local = malloc(sizeof(int) * NTask);
	nsend = malloc(sizeof(int) * NTask * NTask);
	ndonelist = malloc(sizeof(int) * NTask);

	i = 0; /* beginn with this index */
	ntotleft = ntot; /* particles left for all tasks together */

	while(ntotleft > 0)
	{
	iter++;

	for(j = 0; j < NTask; j++)
	nsend_local[j] = 0;

	/* do local particles and prepare export list */
	tstart = second();
	for(nexport = 0, ndone = 0; i < NumPart && nexport < All.BunchSizeForce - NTask; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	{
	ndone++;

	for(j = 0; j < NTask; j++)
	Exportflag[j] = 0;
	#ifndef PMGRID
	costtotal += force_treeevaluate(i, 0, &ewaldcount);
	#else
	costtotal += force_treeevaluate_shortrange(i, 0);
	#endif
	for(j = 0; j < NTask; j++)
	{
	if(Exportflag[j])
	{
	for(k = 0; k < 3; k++)
	GravDataGet[nexport].u.Pos[k] = P[i].Pos[k];
	#ifdef UNEQUALSOFTENINGS
	GravDataGet[nexport].Type = P[i].Type;
	#ifdef ADAPTIVE_GRAVSOFT_FORGAS
	if(P[i].Type == 0)
	GravDataGet[nexport].Soft = SphP[i].Hsml;
	#endif
	#endif
	GravDataGet[nexport].w.OldAcc = P[i].OldAcc;
	GravDataIndexTable[nexport].Task = j;
	GravDataIndexTable[nexport].Index = i;
	GravDataIndexTable[nexport].SortIndex = nexport;
	nexport++;
	nexportsum++;
	nsend_local[j]++;
	}
	}
	}
	tend = second();
	timetree += timediff(tstart, tend);

	qsort(GravDataIndexTable, nexport, sizeof(struct gravdata_index), grav_tree_compare_key);

	for(j = 0; j < nexport; j++)
	GravDataIn[j] = GravDataGet[GravDataIndexTable[j].SortIndex];

	for(j = 1, noffset[0] = 0; j < NTask; j++)
	noffset[j] = noffset[j - 1] + nsend_local[j - 1];

	tstart = second();

	MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);

	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* now do the particles that need to be exported */

	for(level = 1; level < (1 << PTask); level++)
	{
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeForce)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* get the particles */
	MPI_Sendrecv(&GravDataIn[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE,
	recvTask, TAG_GRAV_A,
	&GravDataGet[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE,
	recvTask, TAG_GRAV_A, MPI_COMM_WORLD, &status);
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);


	tstart = second();
	for(j = 0; j < nbuffer[ThisTask]; j++)
	{
	#ifndef PMGRID
	costtotal += force_treeevaluate(j, 1, &ewaldcount);
	#else
	costtotal += force_treeevaluate_shortrange(j, 1);
	#endif
	}
	tend = second();
	timetree += timediff(tstart, tend);

	tstart = second();
	MPI_Barrier(MPI_COMM_WORLD);
	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* get the result */
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeForce)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;
	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* send the results */
	MPI_Sendrecv(&GravDataResult[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in),
	MPI_BYTE, recvTask, TAG_GRAV_B,
	&GravDataOut[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct gravdata_in),
	MPI_BYTE, recvTask, TAG_GRAV_B, MPI_COMM_WORLD, &status);

	/* add the result to the particles */
	for(j = 0; j < nsend_local[recvTask]; j++)
	{
	place = GravDataIndexTable[noffset[recvTask] + j].Index;

	for(k = 0; k < 3; k++)
	P[place].GravAccel[k] += GravDataOut[j + noffset[recvTask]].u.Acc[k];

	P[place].GravCost += GravDataOut[j + noffset[recvTask]].w.Ninteractions;
	}
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	level = ngrp - 1;
	}

	MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
	for(j = 0; j < NTask; j++)
	ntotleft -= ndonelist[j];
	}

	free(ndonelist);
	free(nsend);
	free(nsend_local);
	free(nbuffer);
	free(noffset);

	/* now add things for comoving integration */

	#ifndef PERIODIC
	#ifndef PMGRID
	if(All.ComovingIntegrationOn)
	{
	fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G;

	for(i = 0; i < NumPart; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	P[i].GravAccel[j] += fac * P[i].Pos[j];
	}
	#endif
	#endif

	for(i = 0; i < NumPart; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	{
	#ifdef PMGRID
	ax = P[i].GravAccel[0] + P[i].GravPM[0] / All.G;
	ay = P[i].GravAccel[1] + P[i].GravPM[1] / All.G;
	az = P[i].GravAccel[2] + P[i].GravPM[2] / All.G;
	#else
	ax = P[i].GravAccel[0];
	ay = P[i].GravAccel[1];
	az = P[i].GravAccel[2];
	#endif
	P[i].OldAcc = sqrt(ax * ax + ay * ay + az * az);
	}


	if(All.TypeOfOpeningCriterion == 1)
	All.ErrTolTheta = 0; /* This will switch to the relative opening criterion for the following force computations */

	/* muliply by G */
	for(i = 0; i < NumPart; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	P[i].GravAccel[j] *= All.G;


	/* Finally, the following factor allows a computation of a cosmological simulation
	with vacuum energy in physical coordinates */
	#ifndef PERIODIC
	#ifndef PMGRID
	if(All.ComovingIntegrationOn == 0)
	{
	fac = All.OmegaLambda * All.Hubble * All.Hubble;

	for(i = 0; i < NumPart; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	P[i].GravAccel[j] += fac * P[i].Pos[j];
	}
	#endif
	#endif

	#ifdef SELECTIVE_NO_GRAVITY
	for(i = 0; i < NumPart; i++)
	if(P[i].Ti_endstep < 0)
	P[i].Ti_endstep = -P[i].Ti_endstep - 1;
	#endif

	if(ThisTask == 0)
	printf("tree is done.\n");

	#else /* gravity is switched off */

	for(i = 0; i < NumPart; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	P[i].GravAccel[j] = 0;

	#endif


	/* Now the force computation is finished */

	/* gather some diagnostic information */

	timetreelist = malloc(sizeof(double) * NTask);
	timecommlist = malloc(sizeof(double) * NTask);
	costtreelist = malloc(sizeof(double) * NTask);
	numnodeslist = malloc(sizeof(int) * NTask);
	ewaldlist = malloc(sizeof(double) * NTask);
	nrecv = malloc(sizeof(int) * NTask);

	numnodes = Numnodestree;

	MPI_Gather(&costtotal, 1, MPI_DOUBLE, costtreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Gather(&numnodes, 1, MPI_INT, numnodeslist, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Gather(&timetree, 1, MPI_DOUBLE, timetreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Gather(&timecommsumm, 1, MPI_DOUBLE, timecommlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Gather(&NumPart, 1, MPI_INT, nrecv, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Gather(&ewaldcount, 1, MPI_DOUBLE, ewaldlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Reduce(&nexportsum, &nexport, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
	MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

	if(ThisTask == 0)
	{
	All.TotNumOfForces += ntot;

	//fprintf(FdTimings, "Step= %d t= %g dt= %g \n", All.NumCurrentTiStep, All.Time, All.TimeStep);
	//fprintf(FdTimings, "Nf= %d%09d total-Nf= %d%09d ex-frac= %g iter= %d\n",
	// (int) (ntot / 1000000000), (int) (ntot % 1000000000),
	// (int) (All.TotNumOfForces / 1000000000), (int) (All.TotNumOfForces % 1000000000),
	// nexport / ((double) ntot), iter);
	/* note: on Linux, the 8-byte integer could be printed with the format identifier "%qd", but doesn't work on AIX */

	fac = NTask / ((double) All.TotNumPart);

	for(i = 0, maxt = timetreelist[0], sumt = 0, plb_max = 0,
	maxnumnodes = 0, costtotal = 0, sumcomm = 0, ewaldtot = 0; i < NTask; i++)
	{
	costtotal += costtreelist[i];

	sumcomm += timecommlist[i];

	if(maxt < timetreelist[i])
	maxt = timetreelist[i];
	sumt += timetreelist[i];

	plb = nrecv[i] * fac;

	if(plb > plb_max)
	plb_max = plb;

	if(numnodeslist[i] > maxnumnodes)
	maxnumnodes = numnodeslist[i];

	ewaldtot += ewaldlist[i];
	}
	//fprintf(FdTimings, "work-load balance: %g max=%g avg=%g PE0=%g\n",
	// maxt / (sumt / NTask), maxt, sumt / NTask, timetreelist[0]);
	//fprintf(FdTimings, "particle-load balance: %g\n", plb_max);
	//fprintf(FdTimings, "max. nodes: %d, filled: %g\n", maxnumnodes,
	// maxnumnodes / (All.TreeAllocFactor * All.MaxPart));
	//fprintf(FdTimings, "part/sec=%g \| %g ia/part=%g (%g)\n", ntot / (sumt + 1.0e-20),
	// ntot / (maxt * NTask), ((double) (costtotal)) / ntot, ((double) ewaldtot) / ntot);
	//fprintf(FdTimings, "\n");
	//
	//fflush(FdTimings);

	All.CPU_TreeWalk += sumt / NTask;
	All.CPU_Imbalance += sumimbalance / NTask;
	All.CPU_CommSum += sumcomm / NTask;
	}

	free(nrecv);
	free(ewaldlist);
	free(numnodeslist);
	free(costtreelist);
	free(timecommlist);
	free(timetreelist);
	}


	#ifdef PY_INTERFACE
	/*! This function computes the gravitational forces for all active
	* particles. If needed, a new tree is constructed, otherwise the
	* dynamically updated tree is used. Particles are only exported to other
	* processors when really needed, thereby allowing a good use of the
	* communication buffer.
	*/
	void gravity_tree_sub(void)
	{
	long long ntot;
	int numnodes, nexportsum = 0;
	int i, j, iter = 0;
	int numnodeslist, maxnumnodes, nexport, numlist, nrecv, ndonelist;
	double tstart, tend, timetree = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
	double ewaldcount;
	double costtotal, ewaldtot, costtreelist, ewaldlist;
	double maxt, sumt, timetreelist, timecommlist;
	double fac, plb, plb_max, sumcomm;

	#ifndef NOGRAVITY
	int noffset, nbuffer, nsend, nsend_local;
	long long ntotleft;
	int ndone, maxfill, ngrp;
	int k, place;
	int level, sendTask, recvTask;
	double ax, ay, az;
	MPI_Status status;
	#endif

	/* set new softening lengths */
	if(All.ComovingIntegrationOn)
	set_softenings();


	/* contruct tree if needed */
	tstart = second();
	if(TreeReconstructFlag)
	{
	if(ThisTask == 0)
	printf("Tree construction.\n");

	force_treebuild(NumPart);

	TreeReconstructFlag = 0;

	if(ThisTask == 0)
	printf("Tree construction done.\n");
	}
	tend = second();
	All.CPU_TreeConstruction += timediff(tstart, tend);

	costtotal = ewaldcount = 0;

	/* Note: 'NumForceUpdate' has already been determined in find_next_sync_point_and_drift() */
	numlist = malloc(NTask * sizeof(int) * NTask);
	MPI_Allgather(&NumPartQ, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
	for(i = 0, ntot = 0; i < NTask; i++)
	ntot += numlist[i];
	free(numlist);


	#ifndef NOGRAVITY
	if(ThisTask == 0)
	printf("Begin tree force.\n");


	#ifdef SELECTIVE_NO_GRAVITY
	for(i = 0; i < NumPartQ; i++)
	if(((1 << Q[i].Type) & (SELECTIVE_NO_GRAVITY)))
	Q[i].Ti_endstep = -Q[i].Ti_endstep - 1;
	#endif


	noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
	nbuffer = malloc(sizeof(int) * NTask);
	nsend_local = malloc(sizeof(int) * NTask);
	nsend = malloc(sizeof(int) * NTask * NTask);
	ndonelist = malloc(sizeof(int) * NTask);

	i = 0; /* beginn with this index */
	ntotleft = ntot; /* particles left for all tasks together */

	while(ntotleft > 0)
	{
	iter++;

	for(j = 0; j < NTask; j++)
	nsend_local[j] = 0;

	/* do local particles and prepare export list */
	tstart = second();
	for(nexport = 0, ndone = 0; i < NumPartQ && nexport < All.BunchSizeForce - NTask; i++)
	//if(Q[i].Ti_endstep == All.Ti_Current)
	{
	ndone++;

	for(j = 0; j < NTask; j++)
	Exportflag[j] = 0;
	#ifndef PMGRID
	costtotal += force_treeevaluate_sub(i, 0, &ewaldcount);
	#else
	costtotal += force_treeevaluate_shortrange_sub(i, 0);
	#endif
	for(j = 0; j < NTask; j++)
	{
	if(Exportflag[j])
	{
	for(k = 0; k < 3; k++)
	GravDataGet[nexport].u.Pos[k] = Q[i].Pos[k];
	#ifdef UNEQUALSOFTENINGS
	GravDataGet[nexport].Type = Q[i].Type;
	#ifdef ADAPTIVE_GRAVSOFT_FORGAS
	if(Q[i].Type == 0)
	GravDataGet[nexport].Soft = SphQ[i].Hsml;
	#endif
	#endif
	GravDataGet[nexport].w.OldAcc = Q[i].OldAcc;
	GravDataIndexTable[nexport].Task = j;
	GravDataIndexTable[nexport].Index = i;
	GravDataIndexTable[nexport].SortIndex = nexport;
	nexport++;
	nexportsum++;
	nsend_local[j]++;
	}
	}
	}
	tend = second();
	timetree += timediff(tstart, tend);

	qsort(GravDataIndexTable, nexport, sizeof(struct gravdata_index), grav_tree_compare_key);

	for(j = 0; j < nexport; j++)
	GravDataIn[j] = GravDataGet[GravDataIndexTable[j].SortIndex];

	for(j = 1, noffset[0] = 0; j < NTask; j++)
	noffset[j] = noffset[j - 1] + nsend_local[j - 1];

	tstart = second();

	MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);

	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* now do the particles that need to be exported */

	for(level = 1; level < (1 << PTask); level++)
	{
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeForce)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* get the particles */
	MPI_Sendrecv(&GravDataIn[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE,
	recvTask, TAG_GRAV_A,
	&GravDataGet[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE,
	recvTask, TAG_GRAV_A, MPI_COMM_WORLD, &status);
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);


	tstart = second();
	for(j = 0; j < nbuffer[ThisTask]; j++)
	{
	#ifndef PMGRID
	costtotal += force_treeevaluate_sub(j, 1, &ewaldcount);
	#else
	costtotal += force_treeevaluate_shortrange_sub(j, 1);
	#endif
	}
	tend = second();
	timetree += timediff(tstart, tend);

	tstart = second();
	MPI_Barrier(MPI_COMM_WORLD);
	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* get the result */
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeForce)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;
	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* send the results */
	MPI_Sendrecv(&GravDataResult[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in),
	MPI_BYTE, recvTask, TAG_GRAV_B,
	&GravDataOut[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct gravdata_in),
	MPI_BYTE, recvTask, TAG_GRAV_B, MPI_COMM_WORLD, &status);

	/* add the result to the particles */
	for(j = 0; j < nsend_local[recvTask]; j++)
	{
	place = GravDataIndexTable[noffset[recvTask] + j].Index;

	for(k = 0; k < 3; k++)
	Q[place].GravAccel[k] += GravDataOut[j + noffset[recvTask]].u.Acc[k];

	Q[place].GravCost += GravDataOut[j + noffset[recvTask]].w.Ninteractions;
	}
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	level = ngrp - 1;
	}

	MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
	for(j = 0; j < NTask; j++)
	ntotleft -= ndonelist[j];
	}

	free(ndonelist);
	free(nsend);
	free(nsend_local);
	free(nbuffer);
	free(noffset);

	/* now add things for comoving integration */

	#ifndef PERIODIC
	#ifndef PMGRID
	if(All.ComovingIntegrationOn)
	{
	fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G;

	for(i = 0; i < NumPartQ; i++)
	//if(Q[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	Q[i].GravAccel[j] += fac * Q[i].Pos[j];
	}
	#endif
	#endif

	for(i = 0; i < NumPartQ; i++)
	//if(Q[i].Ti_endstep == All.Ti_Current)
	{
	#ifdef PMGRID
	ax = Q[i].GravAccel[0] + Q[i].GravPM[0] / All.G;
	ay = Q[i].GravAccel[1] + Q[i].GravPM[1] / All.G;
	az = Q[i].GravAccel[2] + Q[i].GravPM[2] / All.G;
	#else
	ax = Q[i].GravAccel[0];
	ay = Q[i].GravAccel[1];
	az = Q[i].GravAccel[2];
	#endif
	Q[i].OldAcc = sqrt(ax * ax + ay * ay + az * az);
	}


	if(All.TypeOfOpeningCriterion == 1)
	All.ErrTolTheta = 0; /* This will switch to the relative opening criterion for the following force computations */

	/* muliply by G */
	for(i = 0; i < NumPartQ; i++)
	//if(Q[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	Q[i].GravAccel[j] *= All.G;


	/* Finally, the following factor allows a computation of a cosmological simulation
	with vacuum energy in physical coordinates */
	#ifndef PERIODIC
	#ifndef PMGRID
	if(All.ComovingIntegrationOn == 0)
	{
	fac = All.OmegaLambda * All.Hubble * All.Hubble;

	for(i = 0; i < NumPartQ; i++)
	//if(Q[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	Q[i].GravAccel[j] += fac * Q[i].Pos[j];
	}
	#endif
	#endif

	#ifdef SELECTIVE_NO_GRAVITY
	for(i = 0; i < NumPartQ; i++)
	//if(Q[i].Ti_endstep < 0)
	Q[i].Ti_endstep = -Q[i].Ti_endstep - 1;
	#endif

	if(ThisTask == 0)
	printf("tree is done.\n");

	#else /* gravity is switched off */

	for(i = 0; i < NumPartQ; i++)
	//if(Q[i].Ti_endstep == All.Ti_Current)
	for(j = 0; j < 3; j++)
	Q[i].GravAccel[j] = 0;

	#endif


	/* Now the force computation is finished */

	/* gather some diagnostic information */

	timetreelist = malloc(sizeof(double) * NTask);
	timecommlist = malloc(sizeof(double) * NTask);
	costtreelist = malloc(sizeof(double) * NTask);
	numnodeslist = malloc(sizeof(int) * NTask);
	ewaldlist = malloc(sizeof(double) * NTask);
	nrecv = malloc(sizeof(int) * NTask);

	numnodes = Numnodestree;

	MPI_Gather(&costtotal, 1, MPI_DOUBLE, costtreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Gather(&numnodes, 1, MPI_INT, numnodeslist, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Gather(&timetree, 1, MPI_DOUBLE, timetreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Gather(&timecommsumm, 1, MPI_DOUBLE, timecommlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Gather(&NumPartQ, 1, MPI_INT, nrecv, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Gather(&ewaldcount, 1, MPI_DOUBLE, ewaldlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Reduce(&nexportsum, &nexport, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
	MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

	if(ThisTask == 0)
	{
	All.TotNumOfForces += ntot;

	//fprintf(FdTimings, "Step= %d t= %g dt= %g \n", All.NumCurrentTiStep, All.Time, All.TimeStep);
	//fprintf(FdTimings, "Nf= %d%09d total-Nf= %d%09d ex-frac= %g iter= %d\n",
	// (int) (ntot / 1000000000), (int) (ntot % 1000000000),
	// (int) (All.TotNumOfForces / 1000000000), (int) (All.TotNumOfForces % 1000000000),
	// nexport / ((double) ntot), iter);
	/* note: on Linux, the 8-byte integer could be printed with the format identifier "%qd", but doesn't work on AIX */

	fac = NTask / ((double) All.TotNumPart);

	for(i = 0, maxt = timetreelist[0], sumt = 0, plb_max = 0,
	maxnumnodes = 0, costtotal = 0, sumcomm = 0, ewaldtot = 0; i < NTask; i++)
	{
	costtotal += costtreelist[i];

	sumcomm += timecommlist[i];

	if(maxt < timetreelist[i])
	maxt = timetreelist[i];
	sumt += timetreelist[i];

	plb = nrecv[i] * fac;

	if(plb > plb_max)
	plb_max = plb;

	if(numnodeslist[i] > maxnumnodes)
	maxnumnodes = numnodeslist[i];

	ewaldtot += ewaldlist[i];
	}
	//fprintf(FdTimings, "work-load balance: %g max=%g avg=%g PE0=%g\n",
	// maxt / (sumt / NTask), maxt, sumt / NTask, timetreelist[0]);
	//fprintf(FdTimings, "particle-load balance: %g\n", plb_max);
	//fprintf(FdTimings, "max. nodes: %d, filled: %g\n", maxnumnodes,
	// maxnumnodes / (All.TreeAllocFactor * All.MaxPart));
	//fprintf(FdTimings, "part/sec=%g \| %g ia/part=%g (%g)\n", ntot / (sumt + 1.0e-20),
	// ntot / (maxt * NTask), ((double) (costtotal)) / ntot, ((double) ewaldtot) / ntot);
	//fprintf(FdTimings, "\n");
	//
	//fflush(FdTimings);

	All.CPU_TreeWalk += sumt / NTask;
	All.CPU_Imbalance += sumimbalance / NTask;
	All.CPU_CommSum += sumcomm / NTask;
	}

	free(nrecv);
	free(ewaldlist);
	free(numnodeslist);
	free(costtreelist);
	free(timecommlist);
	free(timetreelist);
	}
	#endif


	/*! This function sets the (comoving) softening length of all particle
	* types in the table All.SofteningTable[...]. We check that the physical
	* softening length is bounded by the Softening-MaxPhys values.
	*/
	void set_softenings(void)
	{
	int i;

	if(All.ComovingIntegrationOn)
	{
	if(All.SofteningGas * All.Time > All.SofteningGasMaxPhys)
	All.SofteningTable[0] = All.SofteningGasMaxPhys / All.Time;
	else
	All.SofteningTable[0] = All.SofteningGas;

	if(All.SofteningHalo * All.Time > All.SofteningHaloMaxPhys)
	All.SofteningTable[1] = All.SofteningHaloMaxPhys / All.Time;
	else
	All.SofteningTable[1] = All.SofteningHalo;

	if(All.SofteningDisk * All.Time > All.SofteningDiskMaxPhys)
	All.SofteningTable[2] = All.SofteningDiskMaxPhys / All.Time;
	else
	All.SofteningTable[2] = All.SofteningDisk;

	if(All.SofteningBulge * All.Time > All.SofteningBulgeMaxPhys)
	All.SofteningTable[3] = All.SofteningBulgeMaxPhys / All.Time;
	else
	All.SofteningTable[3] = All.SofteningBulge;

	if(All.SofteningStars * All.Time > All.SofteningStarsMaxPhys)
	All.SofteningTable[4] = All.SofteningStarsMaxPhys / All.Time;
	else
	All.SofteningTable[4] = All.SofteningStars;

	if(All.SofteningBndry * All.Time > All.SofteningBndryMaxPhys)
	All.SofteningTable[5] = All.SofteningBndryMaxPhys / All.Time;
	else
	All.SofteningTable[5] = All.SofteningBndry;
	}
	else
	{
	All.SofteningTable[0] = All.SofteningGas;
	All.SofteningTable[1] = All.SofteningHalo;
	All.SofteningTable[2] = All.SofteningDisk;
	All.SofteningTable[3] = All.SofteningBulge;
	All.SofteningTable[4] = All.SofteningStars;
	All.SofteningTable[5] = All.SofteningBndry;
	}

	for(i = 0; i < 6; i++)
	All.ForceSoftening[i] = 2.8 * All.SofteningTable[i];

	All.MinGasHsml = All.MinGasHsmlFractional * All.ForceSoftening[0];
	}


	/*! This function is used as a comparison kernel in a sort routine. It is
	* used to group particles in the communication buffer that are going to
	* be sent to the same CPU.
	*/
	int grav_tree_compare_key(const void a, const void b)
	{
	if(((struct gravdata_index ) a)->Task < (((struct gravdata_index ) b)->Task))
	return -1;

	if(((struct gravdata_index ) a)->Task > (((struct gravdata_index ) b)->Task))
	return +1;

	return 0;
	}

gravtree.cNo OneTemporaryActions

File Metadata

gravtree.cView Options

Event Timeline

gravtree.c
No OneTemporary
Actions

gravtree.c
View Options