sph.c
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, Oct 20, 05:48

sph.c
View Options

	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <math.h>
	#include <mpi.h>
	#include <gsl/gsl_math.h>
	#include "allvars.h"
	#include "proto.h"


	#ifdef PY_INTERFACE


	/*! \file hydra.c
	* \brief Computation of SPH forces and rate of entropy generation
	*
	* This file contains the "second SPH loop", where the SPH forces are
	* computed, and where the rate of change of entropy due to the shock heating
	* (via artificial viscosity) is computed.
	*/


	static double hubble_a, atime, hubble_a2, fac_mu, fac_vsic_fix, a3inv, fac_egy;

	#ifdef PERIODIC
	static double boxSize, boxHalf;

	#ifdef LONG_X
	static double boxSize_X, boxHalf_X;
	#else
	#define boxSize_X boxSize
	#define boxHalf_X boxHalf
	#endif
	#ifdef LONG_Y
	static double boxSize_Y, boxHalf_Y;
	#else
	#define boxSize_Y boxSize
	#define boxHalf_Y boxHalf
	#endif
	#ifdef LONG_Z
	static double boxSize_Z, boxHalf_Z;
	#else
	#define boxSize_Z boxSize
	#define boxHalf_Z boxHalf
	#endif
	#endif




	/*! This is a comparison kernel for a sort routine, which is used to group
	* particles that are going to be exported to the same CPU.
	*/
	int sph_compare_key(const void a, const void b)
	{
	if(((struct sphdata_in ) a)->Task < (((struct sphdata_in ) b)->Task))
	return -1;
	if(((struct sphdata_in ) a)->Task > (((struct sphdata_in ) b)->Task))
	return +1;
	return 0;
	}



	/*! This function is the driver routine for the calculation of hydrodynamical
	* force and rate of change of entropy due to shock heating for all active
	* particles .
	*/
	void sph_orig(void)
	{
	long long ntot, ntotleft;
	int i, j, k, n, ngrp, maxfill, source, ndone;
	int nbuffer, noffset, nsend_local, nsend, numlist, ndonelist;
	int level, sendTask, recvTask, nexport, place;
	double soundspeed_i;
	double tstart, tend, sumt, sumcomm;
	double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
	MPI_Status status;

	#ifdef PERIODIC
	boxSize = All.BoxSize;
	boxHalf = 0.5 * All.BoxSize;
	#ifdef LONG_X
	boxHalf_X = boxHalf * LONG_X;
	boxSize_X = boxSize * LONG_X;
	#endif
	#ifdef LONG_Y
	boxHalf_Y = boxHalf * LONG_Y;
	boxSize_Y = boxSize * LONG_Y;
	#endif
	#ifdef LONG_Z
	boxHalf_Z = boxHalf * LONG_Z;
	boxSize_Z = boxSize * LONG_Z;
	#endif
	#endif

	if(All.ComovingIntegrationOn)
	{
	/* Factors for comoving integration of hydro */
	hubble_a = All.Omega0 / (All.Time * All.Time * All.Time)
	+ (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda;

	hubble_a = All.Hubble * sqrt(hubble_a);
	hubble_a2 = All.Time * All.Time * hubble_a;

	fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time;

	fac_egy = pow(All.Time, 3 * (GAMMA - 1));

	fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1);

	a3inv = 1 / (All.Time * All.Time * All.Time);
	atime = All.Time;
	}
	else
	hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0;


	/* `NumSphUpdate' gives the number of particles on this processor that want a force update */
	for(n = 0, NumSphUpdate = 0; n < N_gas; n++)
	{
	if(P[n].Ti_endstep == All.Ti_Current)
	NumSphUpdate++;
	}

	numlist = malloc(NTask * sizeof(int) * NTask);
	MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
	for(i = 0, ntot = 0; i < NTask; i++)
	ntot += numlist[i];
	free(numlist);


	noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
	nbuffer = malloc(sizeof(int) * NTask);
	nsend_local = malloc(sizeof(int) * NTask);
	nsend = malloc(sizeof(int) * NTask * NTask);
	ndonelist = malloc(sizeof(int) * NTask);


	i = 0; /* first particle for this task */
	ntotleft = ntot; /* particles left for all tasks together */

	while(ntotleft > 0)
	{
	for(j = 0; j < NTask; j++)
	nsend_local[j] = 0;

	/* do local particles and prepare export list */
	tstart = second();
	for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeHydro - NTask; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	{
	ndone++;

	for(j = 0; j < NTask; j++)
	Exportflag[j] = 0;

	sph_orig_evaluate(i, 0);

	for(j = 0; j < NTask; j++)
	{
	if(Exportflag[j])
	{
	for(k = 0; k < 3; k++)
	{
	HydroDataIn[nexport].Pos[k] = P[i].Pos[k];
	HydroDataIn[nexport].Vel[k] = SphP[i].VelPred[k];
	}
	HydroDataIn[nexport].Hsml = SphP[i].Hsml;
	HydroDataIn[nexport].Mass = P[i].Mass;
	HydroDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor;
	HydroDataIn[nexport].Density = SphP[i].Density;
	HydroDataIn[nexport].Pressure = SphP[i].Pressure;
	HydroDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep;

	/* calculation of F1 */
	soundspeed_i = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density);
	HydroDataIn[nexport].F1 = fabs(SphP[i].DivVel) /
	(fabs(SphP[i].DivVel) + SphP[i].CurlVel +
	0.0001 * soundspeed_i / SphP[i].Hsml / fac_mu);

	HydroDataIn[nexport].Index = i;
	HydroDataIn[nexport].Task = j;
	nexport++;
	nsend_local[j]++;
	}
	}
	}
	tend = second();
	timecomp += timediff(tstart, tend);

	qsort(HydroDataIn, nexport, sizeof(struct hydrodata_in), sph_compare_key);

	for(j = 1, noffset[0] = 0; j < NTask; j++)
	noffset[j] = noffset[j - 1] + nsend_local[j - 1];

	tstart = second();

	MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);

	tend = second();
	timeimbalance += timediff(tstart, tend);



	/* now do the particles that need to be exported */

	for(level = 1; level < (1 << PTask); level++)
	{
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeHydro)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* get the particles */
	MPI_Sendrecv(&HydroDataIn[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct hydrodata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A,
	&HydroDataGet[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status);
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	/* now do the imported particles */
	tstart = second();
	for(j = 0; j < nbuffer[ThisTask]; j++)
	sph_orig_evaluate(j, 1);
	tend = second();
	timecomp += timediff(tstart, tend);

	/* do a block to measure imbalance */
	tstart = second();
	MPI_Barrier(MPI_COMM_WORLD);
	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* get the result */
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeHydro)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* send the results */
	MPI_Sendrecv(&HydroDataResult[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B,
	&HydroDataPartialResult[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct hydrodata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status);

	/* add the result to the particles */
	for(j = 0; j < nsend_local[recvTask]; j++)
	{
	source = j + noffset[recvTask];
	place = HydroDataIn[source].Index;

	for(k = 0; k < 3; k++)
	SphP[place].HydroAccel[k] += HydroDataPartialResult[source].Acc[k];

	SphP[place].DtEntropy += HydroDataPartialResult[source].DtEntropy;

	if(SphP[place].MaxSignalVel < HydroDataPartialResult[source].MaxSignalVel)
	SphP[place].MaxSignalVel = HydroDataPartialResult[source].MaxSignalVel;
	}
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	level = ngrp - 1;
	}

	MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
	for(j = 0; j < NTask; j++)
	ntotleft -= ndonelist[j];
	}

	free(ndonelist);
	free(nsend);
	free(nsend_local);
	free(nbuffer);
	free(noffset);



	/* do final operations on results */
	tstart = second();

	for(i = 0; i < N_gas; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	{
	SphP[i].DtEntropy = GAMMA_MINUS1 / (hubble_a2 pow(SphP[i].Density, GAMMA_MINUS1));
	#ifdef SPH_BND_PARTICLES
	if(P[i].ID == 0)
	{
	SphP[i].DtEntropy = 0;
	for(k = 0; k < 3; k++)
	SphP[i].HydroAccel[k] = 0;
	}
	#endif
	}

	tend = second();
	timecomp += timediff(tstart, tend);

	/* collect some timing information */

	MPI_Reduce(&timecomp, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
	MPI_Reduce(&timecommsumm, &sumcomm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
	MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

	if(ThisTask == 0)
	{
	All.CPU_HydCompWalk += sumt / NTask;
	All.CPU_HydCommSumm += sumcomm / NTask;
	All.CPU_HydImbalance += sumimbalance / NTask;
	}
	}


	/*! This function is the 'core' of the SPH force computation. A target
	* particle is specified which may either be local, or reside in the
	* communication buffer.
	*/
	void sph_orig_evaluate(int target, int mode)
	{
	int j, k, n, timestep, startnode, numngb;
	FLOAT pos, vel;
	FLOAT mass, h_i, dhsmlDensityFactor, rho, pressure, f1, f2;
	double acc[3], dtEntropy, maxSignalVel;
	double dx, dy, dz, dvx, dvy, dvz;
	double h_i2, hinv, hinv4;
	double p_over_rho2_i, p_over_rho2_j, soundspeed_i, soundspeed_j;
	double hfc, dwk_i, vdotr, vdotr2, visc, mu_ij, rho_ij, vsig;
	double h_j, dwk_j, r, r2, u, hfc_visc;
	int phase=0;
	#ifndef NOVISCOSITYLIMITER
	double dt;
	#endif

	if(mode == 0)
	{
	pos = P[target].Pos;
	vel = SphP[target].VelPred;
	h_i = SphP[target].Hsml;
	mass = P[target].Mass;
	dhsmlDensityFactor = SphP[target].DhsmlDensityFactor;
	rho = SphP[target].Density;
	pressure = SphP[target].Pressure;
	timestep = P[target].Ti_endstep - P[target].Ti_begstep;
	soundspeed_i = sqrt(GAMMA * pressure / rho);
	f1 = fabs(SphP[target].DivVel) /
	(fabs(SphP[target].DivVel) + SphP[target].CurlVel +
	0.0001 * soundspeed_i / SphP[target].Hsml / fac_mu);
	}
	else
	{
	pos = HydroDataGet[target].Pos;
	vel = HydroDataGet[target].Vel;
	h_i = HydroDataGet[target].Hsml;
	mass = HydroDataGet[target].Mass;
	dhsmlDensityFactor = HydroDataGet[target].DhsmlDensityFactor;
	rho = HydroDataGet[target].Density;
	pressure = HydroDataGet[target].Pressure;
	timestep = HydroDataGet[target].Timestep;
	soundspeed_i = sqrt(GAMMA * pressure / rho);
	f1 = HydroDataGet[target].F1;
	}


	/* initialize variables before SPH loop is started */
	acc[0] = acc[1] = acc[2] = dtEntropy = 0;
	maxSignalVel = 0;

	p_over_rho2_i = pressure / (rho * rho) * dhsmlDensityFactor;
	h_i2 = h_i * h_i;

	/* Now start the actual SPH computation for this particle */
	startnode = All.MaxPart;
	do
	{
	//numngb = ngb_treefind_pairs(&pos[0], h_i, &startnode);
	numngb = ngb_treefind_pairs(&pos[0], h_i, phase, &startnode);

	for(n = 0; n < numngb; n++)
	{
	j = Ngblist[n];

	dx = pos[0] - P[j].Pos[0];
	dy = pos[1] - P[j].Pos[1];
	dz = pos[2] - P[j].Pos[2];

	#ifdef PERIODIC /* find the closest image in the given box size */
	if(dx > boxHalf_X)
	dx -= boxSize_X;
	if(dx < -boxHalf_X)
	dx += boxSize_X;
	if(dy > boxHalf_Y)
	dy -= boxSize_Y;
	if(dy < -boxHalf_Y)
	dy += boxSize_Y;
	if(dz > boxHalf_Z)
	dz -= boxSize_Z;
	if(dz < -boxHalf_Z)
	dz += boxSize_Z;
	#endif
	r2 = dx * dx + dy * dy + dz * dz;
	h_j = SphP[j].Hsml;
	if(r2 < h_i2 \|\| r2 < h_j * h_j)
	{
	r = sqrt(r2);
	if(r > 0)
	{
	p_over_rho2_j = SphP[j].Pressure / (SphP[j].Density * SphP[j].Density);
	soundspeed_j = sqrt(GAMMA * p_over_rho2_j * SphP[j].Density);
	dvx = vel[0] - SphP[j].VelPred[0];
	dvy = vel[1] - SphP[j].VelPred[1];
	dvz = vel[2] - SphP[j].VelPred[2];
	vdotr = dx * dvx + dy * dvy + dz * dvz;

	if(All.ComovingIntegrationOn)
	vdotr2 = vdotr + hubble_a2 * r2;
	else
	vdotr2 = vdotr;

	if(r2 < h_i2)
	{
	hinv = 1.0 / h_i;
	#ifndef TWODIMS
	hinv4 = hinv * hinv * hinv * hinv;
	#else
	hinv4 = hinv * hinv * hinv / boxSize_Z;
	#endif
	u = r * hinv;
	if(u < 0.5)
	dwk_i = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
	else
	dwk_i = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
	}
	else
	{
	dwk_i = 0;
	}

	if(r2 < h_j * h_j)
	{
	hinv = 1.0 / h_j;
	#ifndef TWODIMS
	hinv4 = hinv * hinv * hinv * hinv;
	#else
	hinv4 = hinv * hinv * hinv / boxSize_Z;
	#endif
	u = r * hinv;
	if(u < 0.5)
	dwk_j = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
	else
	dwk_j = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
	}
	else
	{
	dwk_j = 0;
	}

	if(soundspeed_i + soundspeed_j > maxSignalVel)
	maxSignalVel = soundspeed_i + soundspeed_j;

	if(vdotr2 < 0) /* ... artificial viscosity */
	{
	mu_ij = fac_mu * vdotr2 / r; /* note: this is negative! */

	vsig = soundspeed_i + soundspeed_j - 3 * mu_ij;

	if(vsig > maxSignalVel)
	maxSignalVel = vsig;

	rho_ij = 0.5 * (rho + SphP[j].Density);
	f2 =
	fabs(SphP[j].DivVel) / (fabs(SphP[j].DivVel) + SphP[j].CurlVel +
	0.0001 * soundspeed_j / fac_mu / SphP[j].Hsml);

	visc = 0.25 * All.ArtBulkViscConst * vsig * (-mu_ij) / rho_ij * (f1 + f2);

	/* .... end artificial viscosity evaluation */
	#ifndef NOVISCOSITYLIMITER
	/* make sure that viscous acceleration is not too large */
	dt = imax(timestep, (P[j].Ti_endstep - P[j].Ti_begstep)) * All.Timebase_interval;
	if(dt > 0 && (dwk_i + dwk_j) < 0)
	{
	visc = dmin(visc, 0.5 * fac_vsic_fix * vdotr2 /
	(0.5 * (mass + P[j].Mass) * (dwk_i + dwk_j) * r * dt));
	}
	#endif
	}
	else
	visc = 0;

	p_over_rho2_j *= SphP[j].DhsmlDensityFactor;

	hfc_visc = 0.5 * P[j].Mass * visc * (dwk_i + dwk_j) / r;

	hfc = hfc_visc + P[j].Mass * (p_over_rho2_i * dwk_i + p_over_rho2_j * dwk_j) / r;

	acc[0] -= hfc * dx;
	acc[1] -= hfc * dy;
	acc[2] -= hfc * dz;
	dtEntropy += 0.5 * hfc_visc * vdotr2;
	}
	}
	}
	}
	while(startnode >= 0);

	/* Now collect the result at the right place */
	if(mode == 0)
	{
	for(k = 0; k < 3; k++)
	SphP[target].HydroAccel[k] = acc[k];
	SphP[target].DtEntropy = dtEntropy;
	SphP[target].MaxSignalVel = maxSignalVel;
	}
	else
	{
	for(k = 0; k < 3; k++)
	HydroDataResult[target].Acc[k] = acc[k];
	HydroDataResult[target].DtEntropy = dtEntropy;
	HydroDataResult[target].MaxSignalVel = maxSignalVel;
	}
	}









	/*! This function is the driver routine for the calculation of hydrodynamical
	* force and rate of change of entropy due to shock heating for all active
	* particles .
	*/
	void sph(void)
	{
	long long ntot, ntotleft;
	int i, j, k, n, ngrp, maxfill, source, ndone;
	int nbuffer, noffset, nsend_local, nsend, numlist, ndonelist;
	int level, sendTask, recvTask, nexport, place;
	double soundspeed_i;
	double tstart, tend, sumt, sumcomm;
	double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
	MPI_Status status;

	#ifdef PERIODIC
	boxSize = All.BoxSize;
	boxHalf = 0.5 * All.BoxSize;
	#ifdef LONG_X
	boxHalf_X = boxHalf * LONG_X;
	boxSize_X = boxSize * LONG_X;
	#endif
	#ifdef LONG_Y
	boxHalf_Y = boxHalf * LONG_Y;
	boxSize_Y = boxSize * LONG_Y;
	#endif
	#ifdef LONG_Z
	boxHalf_Z = boxHalf * LONG_Z;
	boxSize_Z = boxSize * LONG_Z;
	#endif
	#endif

	if(All.ComovingIntegrationOn)
	{
	/* Factors for comoving integration of hydro */
	hubble_a = All.Omega0 / (All.Time * All.Time * All.Time)
	+ (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda;

	hubble_a = All.Hubble * sqrt(hubble_a);
	hubble_a2 = All.Time * All.Time * hubble_a;

	fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time;

	fac_egy = pow(All.Time, 3 * (GAMMA - 1));

	fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1);

	a3inv = 1 / (All.Time * All.Time * All.Time);
	atime = All.Time;
	}
	else
	hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0;


	/* `NumSphUpdate' gives the number of particles on this processor that want a force update */
	for(n = 0, NumSphUpdate = 0; n < N_gas; n++)
	{

	SphP[n].ObsMoment0=0;
	SphP[n].ObsMoment1=0;
	SphP[n].GradObservable[0]=0;
	SphP[n].GradObservable[1]=0;
	SphP[n].GradObservable[2]=0;

	P[n].Ti_endstep = All.Ti_Current;
	if(P[n].Ti_endstep == All.Ti_Current)
	NumSphUpdate++;
	}

	numlist = malloc(NTask * sizeof(int) * NTask);
	MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
	for(i = 0, ntot = 0; i < NTask; i++)
	ntot += numlist[i];
	free(numlist);


	noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
	nbuffer = malloc(sizeof(int) * NTask);
	nsend_local = malloc(sizeof(int) * NTask);
	nsend = malloc(sizeof(int) * NTask * NTask);
	ndonelist = malloc(sizeof(int) * NTask);


	i = 0; /* first particle for this task */
	ntotleft = ntot; /* particles left for all tasks together */

	while(ntotleft > 0)
	{
	for(j = 0; j < NTask; j++)
	nsend_local[j] = 0;

	/* do local particles and prepare export list */
	//tstart = second();
	for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeSph - NTask; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	{
	ndone++;

	for(j = 0; j < NTask; j++)
	Exportflag[j] = 0;

	sph_evaluate(i, 0);

	for(j = 0; j < NTask; j++)
	{
	if(Exportflag[j])
	{
	for(k = 0; k < 3; k++)
	{
	SphDataIn[nexport].Pos[k] = P[i].Pos[k];
	SphDataIn[nexport].Vel[k] = SphP[i].VelPred[k];
	}
	SphDataIn[nexport].Hsml = SphP[i].Hsml;
	SphDataIn[nexport].Density = SphP[i].Density;
	SphDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor;
	//SphDataIn[nexport].Mass = P[i].Mass;
	//SphDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor;
	//SphDataIn[nexport].Density = SphP[i].Density;
	//SphDataIn[nexport].Pressure = SphP[i].Pressure;
	//SphDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep;

	//SphDataIn[nexport].ObsMoment0 = 0;
	//SphDataIn[nexport].ObsMoment1 = 0;
	SphDataIn[nexport].Observable = SphP[i].Observable;

	SphDataIn[nexport].Index = i;
	SphDataIn[nexport].Task = j;
	nexport++;
	nsend_local[j]++;
	}
	}
	}
	//tend = second();
	//timecomp += timediff(tstart, tend);

	qsort(SphDataIn, nexport, sizeof(struct sphdata_in), sph_compare_key);

	for(j = 1, noffset[0] = 0; j < NTask; j++)
	noffset[j] = noffset[j - 1] + nsend_local[j - 1];

	//tstart = second();

	MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);

	//tend = second();
	//timeimbalance += timediff(tstart, tend);

	/* now do the particles that need to be exported */

	for(level = 1; level < (1 << PTask); level++)
	{
	//tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* get the particles */
	MPI_Sendrecv(&SphDataIn[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A,
	&SphDataGet[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status);
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	/* now do the imported particles */
	tstart = second();
	for(j = 0; j < nbuffer[ThisTask]; j++)
	sph_evaluate(j, 1);
	tend = second();
	timecomp += timediff(tstart, tend);

	/* do a block to measure imbalance */
	tstart = second();
	MPI_Barrier(MPI_COMM_WORLD);
	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* get the result */
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* send the results */
	MPI_Sendrecv(&SphDataResult[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B,
	&SphDataPartialResult[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status);

	/* add the result to the particles */
	for(j = 0; j < nsend_local[recvTask]; j++)
	{
	source = j + noffset[recvTask];
	place = SphDataIn[source].Index;


	SphP[place].ObsMoment0 += SphDataPartialResult[source].ObsMoment0;
	SphP[place].ObsMoment1 += SphDataPartialResult[source].ObsMoment1;
	SphP[place].GradObservable[0] += SphDataPartialResult[source].GradObservable[0];
	SphP[place].GradObservable[1] += SphDataPartialResult[source].GradObservable[1];
	SphP[place].GradObservable[2] += SphDataPartialResult[source].GradObservable[2];


	}
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	level = ngrp - 1;
	}

	MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
	for(j = 0; j < NTask; j++)
	ntotleft -= ndonelist[j];
	}

	free(ndonelist);
	free(nsend);
	free(nsend_local);
	free(nbuffer);
	free(noffset);

	/* do final operations on results */
	//tstart = second();

	for(i = 0; i < N_gas; i++)
	if(P[i].Ti_endstep == All.Ti_Current)
	{
	//SphP[i].Observable = SphP[i].ObsMoment1/SphP[i].ObsMoment0;
	SphP[i].Observable = SphP[i].ObsMoment1;
	}


	}


	/*! This function is the 'core' of the SPH force computation. A target
	* particle is specified which may either be local, or reside in the
	* communication buffer.
	*/
	void sph_evaluate(int target, int mode)
	{

	int j, n, startnode, numngb;
	double h_i, fac, hinv, hinv3, hinv4;
	FLOAT rho,pressure,dhsmlDensityFactor;
	double p_over_rho2_i, p_over_rho2_j;
	double divv, wk_i, dwk_i, wk_j, dwk_j;
	double dx, dy, dz, r, r2, u, mass_j;
	double dvx, dvy, dvz, rotv[3];
	double weighted_numngb, dhsmlrho;
	double h_i2,h_j,h_j2;
	FLOAT pos, vel;
	FLOAT mom1,mom0,gradx,grady,gradz,observable;
	int phase=0;

	#ifndef NOVISCOSITYLIMITER
	double dt;
	#endif

	if(mode == 0)
	{
	pos = P[target].Pos;
	vel = SphP[target].VelPred;
	h_i = SphP[target].Hsml;
	rho = SphP[target].Density;
	dhsmlDensityFactor = SphP[target].DhsmlDensityFactor;
	//mom0 = SphP[target].ObsMoment0;
	//mom1 = SphP[target].ObsMoment1;
	observable = SphP[target].Observable;
	}
	else
	{
	pos = SphDataGet[target].Pos;
	vel = SphDataGet[target].Vel;
	h_i = SphDataGet[target].Hsml;
	rho = SphDataGet[target].Density;
	dhsmlDensityFactor = SphDataGet[target].DhsmlDensityFactor;
	//mom0 = SphDataGet[target].ObsMoment0;
	//mom1 = SphDataGet[target].ObsMoment1;
	observable = SphDataGet[target].Observable;
	}


	mom0 = mom1 = gradx = grady = gradz = 0;


	divv = rotv[0] = rotv[1] = rotv[2] = 0;
	weighted_numngb = 0;
	dhsmlrho = 0;

	startnode = All.MaxPart;
	numngb = 0;

	//p_over_rho2_i = pressure / (rho * rho) * dhsmlDensityFactor;
	p_over_rho2_i = observable / (rho * rho);
	h_i2 = h_i * h_i;


	do
	{
	numngb = ngb_treefind_pairs(&pos[0], h_i, phase, &startnode);





	for(n = 0; n < numngb; n++)
	{
	j = Ngblist[n];

	dx = pos[0] - P[j].Pos[0];
	dy = pos[1] - P[j].Pos[1];
	dz = pos[2] - P[j].Pos[2];

	#ifdef PERIODIC /* find the closest image in the given box size */
	if(dx > boxHalf_X)
	dx -= boxSize_X;
	if(dx < -boxHalf_X)
	dx += boxSize_X;
	if(dy > boxHalf_Y)
	dy -= boxSize_Y;
	if(dy < -boxHalf_Y)
	dy += boxSize_Y;
	if(dz > boxHalf_Z)
	dz -= boxSize_Z;
	if(dz < -boxHalf_Z)
	dz += boxSize_Z;
	#endif
	r2 = dx * dx + dy * dy + dz * dz;
	h_j = SphP[j].Hsml;
	if(r2 < h_i2 \|\| r2 < h_j * h_j)
	{
	r = sqrt(r2);
	if(r > 0)
	{
	//p_over_rho2_j = SphP[j].Pressure / (SphP[j].Density * SphP[j].Density);
	p_over_rho2_j = SphP[j].Observable / (SphP[j].Density * SphP[j].Density);



	if(r2 < h_i2)
	{
	hinv = 1.0 / h_i;
	#ifndef TWODIMS
	hinv4 = hinv * hinv * hinv * hinv;
	#else
	hinv4 = hinv * hinv * hinv / boxSize_Z;
	#endif
	u = r * hinv;
	if(u < 0.5)
	{
	wk_i = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
	dwk_i = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
	}
	else
	{
	wk_i = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
	dwk_i = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
	}
	}
	else
	{
	dwk_i = 0;
	}

	if(r2 < h_j * h_j)
	{
	hinv = 1.0 / h_j;
	#ifndef TWODIMS
	hinv4 = hinv * hinv * hinv * hinv;
	#else
	hinv4 = hinv * hinv * hinv / boxSize_Z;
	#endif
	u = r * hinv;
	if(u < 0.5)
	{
	wk_j = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
	dwk_j = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
	}
	else
	{
	wk_j = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
	dwk_j = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
	}
	}
	else
	{
	dwk_j = 0;
	}





	//p_over_rho2_j *= SphP[j].DhsmlDensityFactor;


	mass_j = P[j].Mass;

	mom1 += mass_j(SphP[j].Observable)/(SphP[j].Density) wk_i;
	mom0 += mass_j /(SphP[j].Density) * wk_i;


	if (r==0)
	fac = 0;
	else
	{

	p_over_rho2_j = SphP[j].Observable / (SphP[j].Density * SphP[j].Density);

	// very simple way of computing a gradient
	//fac = mass_j * (observable-SphP[j].Observable) /(SphP[j].Density) * dwk_i / r;

	// conservative form (Gadget-2)
	//fac = rho* (P[j].Mass * (p_over_rho2_idhsmlDensityFactor dwk_i + p_over_rho2_jSphP[j].DhsmlDensityFactor dwk_j) / r);

	// classical form
	//fac = rho(P[j].Mass (p_over_rho2_i + p_over_rho2_j ) *dwk_i / r);
	//fac = rho(P[j].Mass (p_over_rho2_i + p_over_rho2_j ) 0.5(dwk_i+dwk_j) / r);


	// zero order term
	//fac = observable * rho ( P[j].Mass (1/(rhorho) + 1/(SphP[j].DensitySphP[j].Density) ) 0.5(dwk_i+dwk_j));
	fac = ( P[j].Mass * (1/(rhorho) + 1/(SphP[j].DensitySphP[j].Density) ) 0.5(dwk_i+dwk_j));

	// first order term
	//fac =
	//fac = P[j].Mass * 1/(SphP[j].DensitySphP[j].Density) xxx 0.5(dwk_i+dwk_j);

	}

	gradx += fac * dx;
	grady += fac * dy;
	gradz += fac * dz;




	}
	}
	}
	}
	while(startnode >= 0);





	/* Now collect the result at the right place */
	if(mode == 0)
	{
	SphP[target].ObsMoment0 = mom0;
	SphP[target].ObsMoment1 = mom1;
	SphP[target].GradObservable[0] = gradx;
	SphP[target].GradObservable[1] = grady;
	SphP[target].GradObservable[2] = gradz;
	}
	else
	{
	SphDataResult[target].ObsMoment0 = mom0;
	SphDataResult[target].ObsMoment1 = mom1;
	SphDataResult[target].GradObservable[0] = gradx;
	SphDataResult[target].GradObservable[1] = grady;
	SphDataResult[target].GradObservable[2] = gradz;
	}
	}

















	/*! This function is the driver routine for the calculation of hydrodynamical
	* force and rate of change of entropy due to shock heating for all active
	* particles .
	*/
	void sph_sub(void)
	{
	long long ntot, ntotleft;
	int i, j, k, n, ngrp, maxfill, source, ndone;
	int nbuffer, noffset, nsend_local, nsend, numlist, ndonelist;
	int level, sendTask, recvTask, nexport, place;
	double soundspeed_i;
	double tstart, tend, sumt, sumcomm;
	double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
	MPI_Status status;

	#ifdef PERIODIC
	boxSize = All.BoxSize;
	boxHalf = 0.5 * All.BoxSize;
	#ifdef LONG_X
	boxHalf_X = boxHalf * LONG_X;
	boxSize_X = boxSize * LONG_X;
	#endif
	#ifdef LONG_Y
	boxHalf_Y = boxHalf * LONG_Y;
	boxSize_Y = boxSize * LONG_Y;
	#endif
	#ifdef LONG_Z
	boxHalf_Z = boxHalf * LONG_Z;
	boxSize_Z = boxSize * LONG_Z;
	#endif
	#endif

	if(All.ComovingIntegrationOn)
	{
	/* Factors for comoving integration of hydro */
	hubble_a = All.Omega0 / (All.Time * All.Time * All.Time)
	+ (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda;

	hubble_a = All.Hubble * sqrt(hubble_a);
	hubble_a2 = All.Time * All.Time * hubble_a;

	fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time;

	fac_egy = pow(All.Time, 3 * (GAMMA - 1));

	fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1);

	a3inv = 1 / (All.Time * All.Time * All.Time);
	atime = All.Time;
	}
	else
	hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0;


	/* `NumSphUpdate' gives the number of particles on this processor that want a force update */
	for(n = 0, NumSphUpdate = 0; n < N_gasQ; n++)
	{

	SphQ[n].ObsMoment0=0;
	SphQ[n].ObsMoment1=0;

	Q[n].Ti_endstep = All.Ti_Current;
	if(Q[n].Ti_endstep == All.Ti_Current)
	NumSphUpdate++;
	}

	numlist = malloc(NTask * sizeof(int) * NTask);
	MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
	for(i = 0, ntot = 0; i < NTask; i++)
	ntot += numlist[i];
	free(numlist);


	noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
	nbuffer = malloc(sizeof(int) * NTask);
	nsend_local = malloc(sizeof(int) * NTask);
	nsend = malloc(sizeof(int) * NTask * NTask);
	ndonelist = malloc(sizeof(int) * NTask);


	i = 0; /* first particle for this task */
	ntotleft = ntot; /* particles left for all tasks together */

	while(ntotleft > 0)
	{
	for(j = 0; j < NTask; j++)
	nsend_local[j] = 0;

	/* do local particles and prepare export list */
	//tstart = second();
	for(nexport = 0, ndone = 0; i < N_gasQ && nexport < All.BunchSizeSph - NTask; i++)
	if(Q[i].Ti_endstep == All.Ti_Current)
	{
	ndone++;

	for(j = 0; j < NTask; j++)
	Exportflag[j] = 0;

	sph_evaluate_sub(i, 0);

	for(j = 0; j < NTask; j++)
	{
	if(Exportflag[j])
	{
	for(k = 0; k < 3; k++)
	{
	SphDataIn[nexport].Pos[k] = Q[i].Pos[k];
	SphDataIn[nexport].Vel[k] = SphQ[i].VelPred[k];
	}
	SphDataIn[nexport].Hsml = SphQ[i].Hsml;
	//SphDataIn[nexport].Mass = Q[i].Mass;
	//SphDataIn[nexport].DhsmlDensityFactor = SphQ[i].DhsmlDensityFactor;
	//SphDataIn[nexport].Density = SphQ[i].Density;
	//SphDataIn[nexport].Pressure = SphQ[i].Pressure;
	//SphDataIn[nexport].Timestep = Q[i].Ti_endstep - Q[i].Ti_begstep;

	SphDataIn[nexport].ObsMoment0 = Q[i].Mass;
	SphDataIn[nexport].ObsMoment1 = Q[i].Mass;


	SphDataIn[nexport].Index = i;
	SphDataIn[nexport].Task = j;
	nexport++;
	nsend_local[j]++;
	}
	}
	}
	//tend = second();
	//timecomp += timediff(tstart, tend);

	qsort(SphDataIn, nexport, sizeof(struct sphdata_in), sph_compare_key);

	for(j = 1, noffset[0] = 0; j < NTask; j++)
	noffset[j] = noffset[j - 1] + nsend_local[j - 1];

	//tstart = second();

	MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);

	//tend = second();
	//timeimbalance += timediff(tstart, tend);

	/* now do the particles that need to be exported */

	for(level = 1; level < (1 << PTask); level++)
	{
	//tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* get the particles */
	MPI_Sendrecv(&SphDataIn[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A,
	&SphDataGet[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status);
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	/* now do the imported particles */
	tstart = second();
	for(j = 0; j < nbuffer[ThisTask]; j++)
	sph_evaluate_sub(j, 1);
	tend = second();
	timecomp += timediff(tstart, tend);

	/* do a block to measure imbalance */
	tstart = second();
	MPI_Barrier(MPI_COMM_WORLD);
	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* get the result */
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* send the results */
	MPI_Sendrecv(&SphDataResult[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B,
	&SphDataPartialResult[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status);

	/* add the result to the particles */
	for(j = 0; j < nsend_local[recvTask]; j++)
	{
	source = j + noffset[recvTask];
	place = SphDataIn[source].Index;


	SphQ[place].ObsMoment0 += SphDataPartialResult[source].ObsMoment0;
	SphQ[place].ObsMoment1 += SphDataPartialResult[source].ObsMoment1;


	}
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	level = ngrp - 1;
	}

	MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
	for(j = 0; j < NTask; j++)
	ntotleft -= ndonelist[j];
	}

	free(ndonelist);
	free(nsend);
	free(nsend_local);
	free(nbuffer);
	free(noffset);

	/* do final operations on results */
	//tstart = second();

	for(i = 0; i < N_gasQ; i++)
	if(Q[i].Ti_endstep == All.Ti_Current)
	{
	SphQ[i].Observable = SphQ[i].ObsMoment1/SphQ[i].ObsMoment0;
	}


	}


	/*! This function is the 'core' of the SPH force computation. A target
	* particle is specified which may either be local, or reside in the
	* communication buffer.
	*/
	void sph_evaluate_sub(int target, int mode)
	{

	int j, n, startnode, numngb, numngb_inbox;
	double h, h2, fac, hinv, hinv3, hinv4;
	double rho, divv, wk, dwk;
	double dx, dy, dz, r, r2, u, mass_j;
	double dvx, dvy, dvz, rotv[3];
	double weighted_numngb, dhsmlrho;
	FLOAT pos, vel;
	FLOAT mom1,mom0;
	int phase=0;


	#ifndef NOVISCOSITYLIMITER
	double dt;
	#endif

	if(mode == 0)
	{
	pos = Q[target].Pos;
	vel = SphQ[target].VelPred;
	h = SphQ[target].Hsml;
	mom0 = SphQ[target].ObsMoment0;
	mom1 = SphQ[target].ObsMoment1;
	}
	else
	{
	pos = SphDataGet[target].Pos;
	vel = SphDataGet[target].Vel;
	h = SphDataGet[target].Hsml;
	mom0 = SphDataGet[target].ObsMoment0;
	mom1 = SphDataGet[target].ObsMoment1;
	}



	h2 = h * h;
	hinv = 1.0 / h;
	#ifndef TWODIMS
	hinv3 = hinv * hinv * hinv;
	#else
	hinv3 = hinv * hinv / boxSize_Z;
	#endif
	hinv4 = hinv3 * hinv;

	rho = divv = rotv[0] = rotv[1] = rotv[2] = 0;
	weighted_numngb = 0;
	dhsmlrho = 0;

	startnode = All.MaxPart;
	numngb = 0;


	do
	{
	numngb_inbox = ngb_treefind_variable(&pos[0], h, phase, &startnode);

	for(n = 0; n < numngb_inbox; n++)
	{
	j = Ngblist[n];


	dx = pos[0] - P[j].Pos[0];
	dy = pos[1] - P[j].Pos[1];
	dz = pos[2] - P[j].Pos[2];

	#ifdef PERIODIC /* now find the closest image in the given box size */
	if(dx > boxHalf_X)
	dx -= boxSize_X;
	if(dx < -boxHalf_X)
	dx += boxSize_X;
	if(dy > boxHalf_Y)
	dy -= boxSize_Y;
	if(dy < -boxHalf_Y)
	dy += boxSize_Y;
	if(dz > boxHalf_Z)
	dz -= boxSize_Z;
	if(dz < -boxHalf_Z)
	dz += boxSize_Z;
	#endif
	r2 = dx * dx + dy * dy + dz * dz;

	if(r2 < h2)
	{
	numngb++;

	r = sqrt(r2);

	u = r * hinv;

	if(u < 0.5)
	{
	wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
	}
	else
	{
	wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
	}

	mom1 += P[j].Mass(SphP[j].Observable)/(SphP[j].Density) wk;
	mom0 += P[j].Mass /(SphP[j].Density) * wk;

	}
	}
	}
	while(startnode >= 0);


	/* Now collect the result at the right place */
	if(mode == 0)
	{
	SphQ[target].ObsMoment0 = mom0;
	SphQ[target].ObsMoment1 = mom1;
	}
	else
	{
	SphDataResult[target].ObsMoment0 = mom0;
	SphDataResult[target].ObsMoment1 = mom1;
	}
	}





	/*! This function is the driver routine for the calculation of hydrodynamical
	* force and rate of change of entropy due to shock heating for all active
	* particles .
	*/
	void sph_NgbsW(void)
	{
	long long ntot, ntotleft;
	int i, j, k, n, ngrp, maxfill, source, ndone;
	int nbuffer, noffset, nsend_local, nsend, numlist, ndonelist;
	int level, sendTask, recvTask, nexport, place;
	double soundspeed_i;
	double tstart, tend, sumt, sumcomm;
	double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
	MPI_Status status;

	#ifdef PERIODIC
	boxSize = All.BoxSize;
	boxHalf = 0.5 * All.BoxSize;
	#ifdef LONG_X
	boxHalf_X = boxHalf * LONG_X;
	boxSize_X = boxSize * LONG_X;
	#endif
	#ifdef LONG_Y
	boxHalf_Y = boxHalf * LONG_Y;
	boxSize_Y = boxSize * LONG_Y;
	#endif
	#ifdef LONG_Z
	boxHalf_Z = boxHalf * LONG_Z;
	boxSize_Z = boxSize * LONG_Z;
	#endif
	#endif

	if(All.ComovingIntegrationOn)
	{
	/* Factors for comoving integration of hydro */
	hubble_a = All.Omega0 / (All.Time * All.Time * All.Time)
	+ (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda;

	hubble_a = All.Hubble * sqrt(hubble_a);
	hubble_a2 = All.Time * All.Time * hubble_a;

	fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time;

	fac_egy = pow(All.Time, 3 * (GAMMA - 1));

	fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1);

	a3inv = 1 / (All.Time * All.Time * All.Time);
	atime = All.Time;
	}
	else
	hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0;


	/* `NumSphUpdate' gives the number of particles on this processor that want a force update */
	for(n = 0, NumSphUpdate = 0; n < N_gasQ; n++)
	{

	SphQ[n].ObsMoment0=0;
	SphQ[n].ObsMoment1=0;

	Q[n].Ti_endstep = All.Ti_Current;
	if(Q[n].Ti_endstep == All.Ti_Current)
	NumSphUpdate++;
	}

	numlist = malloc(NTask * sizeof(int) * NTask);
	MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
	for(i = 0, ntot = 0; i < NTask; i++)
	ntot += numlist[i];
	free(numlist);


	noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
	nbuffer = malloc(sizeof(int) * NTask);
	nsend_local = malloc(sizeof(int) * NTask);
	nsend = malloc(sizeof(int) * NTask * NTask);
	ndonelist = malloc(sizeof(int) * NTask);


	i = 0; /* first particle for this task */
	ntotleft = ntot; /* particles left for all tasks together */

	while(ntotleft > 0)
	{
	for(j = 0; j < NTask; j++)
	nsend_local[j] = 0;

	/* do local particles and prepare export list */
	//tstart = second();
	for(nexport = 0, ndone = 0; i < N_gasQ && nexport < All.BunchSizeSph - NTask; i++)
	if(Q[i].Ti_endstep == All.Ti_Current)
	{
	ndone++;

	for(j = 0; j < NTask; j++)
	Exportflag[j] = 0;

	sph_evaluate_NgbsW(i, 0);

	for(j = 0; j < NTask; j++)
	{
	if(Exportflag[j])
	{
	for(k = 0; k < 3; k++)
	{
	SphDataIn[nexport].Pos[k] = Q[i].Pos[k];
	SphDataIn[nexport].Vel[k] = SphQ[i].VelPred[k];
	}
	SphDataIn[nexport].Hsml = SphQ[i].Hsml;
	//SphDataIn[nexport].Mass = Q[i].Mass;
	//SphDataIn[nexport].DhsmlDensityFactor = SphQ[i].DhsmlDensityFactor;
	//SphDataIn[nexport].Density = SphQ[i].Density;
	//SphDataIn[nexport].Pressure = SphQ[i].Pressure;
	//SphDataIn[nexport].Timestep = Q[i].Ti_endstep - Q[i].Ti_begstep;

	SphDataIn[nexport].ObsMoment0 = Q[i].Mass;
	SphDataIn[nexport].ObsMoment1 = Q[i].Mass;


	SphDataIn[nexport].Index = i;
	SphDataIn[nexport].Task = j;
	nexport++;
	nsend_local[j]++;
	}
	}
	}
	//tend = second();
	//timecomp += timediff(tstart, tend);

	qsort(SphDataIn, nexport, sizeof(struct sphdata_in), sph_compare_key);

	for(j = 1, noffset[0] = 0; j < NTask; j++)
	noffset[j] = noffset[j - 1] + nsend_local[j - 1];

	//tstart = second();

	MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);

	//tend = second();
	//timeimbalance += timediff(tstart, tend);

	/* now do the particles that need to be exported */

	for(level = 1; level < (1 << PTask); level++)
	{
	//tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* get the particles */
	MPI_Sendrecv(&SphDataIn[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A,
	&SphDataGet[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status);
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	/* now do the imported particles */
	tstart = second();
	for(j = 0; j < nbuffer[ThisTask]; j++)
	sph_evaluate_NgbsW(j, 1);
	tend = second();
	timecomp += timediff(tstart, tend);

	/* do a block to measure imbalance */
	tstart = second();
	MPI_Barrier(MPI_COMM_WORLD);
	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* get the result */
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* send the results */
	MPI_Sendrecv(&SphDataResult[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B,
	&SphDataPartialResult[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status);

	/* add the result to the particles */
	for(j = 0; j < nsend_local[recvTask]; j++)
	{
	source = j + noffset[recvTask];
	place = SphDataIn[source].Index;


	SphQ[place].ObsMoment0 += SphDataPartialResult[source].ObsMoment0;
	SphQ[place].ObsMoment1 += SphDataPartialResult[source].ObsMoment1;


	}
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	level = ngrp - 1;
	}

	MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
	for(j = 0; j < NTask; j++)
	ntotleft -= ndonelist[j];
	}

	free(ndonelist);
	free(nsend);
	free(nsend_local);
	free(nbuffer);
	free(noffset);

	/* do final operations on results */
	//tstart = second();

	for(i = 0; i < N_gasQ; i++)
	if(Q[i].Ti_endstep == All.Ti_Current)
	{
	SphQ[i].Observable = SphQ[i].ObsMoment1/SphQ[i].ObsMoment0;
	}


	}


	/*! This function is the 'core' of the SPH force computation. A target
	* particle is specified which may either be local, or reside in the
	* communication buffer.
	*/
	void sph_evaluate_NgbsW(int target, int mode)
	{

	int j, n, startnode, numngb, numngb_inbox;
	double h, h2, fac, hinv, hinv3, hinv4;
	double rho, divv, wk, dwk;
	double dx, dy, dz, r, r2, u, mass_j;
	double dvx, dvy, dvz, rotv[3];
	double weighted_numngb, dhsmlrho;
	FLOAT pos, vel;
	FLOAT mom1,mom0;
	int phase=0;


	#ifndef NOVISCOSITYLIMITER
	double dt;
	#endif

	if(mode == 0)
	{
	pos = Q[target].Pos;
	vel = SphQ[target].VelPred;
	h = SphQ[target].Hsml;
	mom0 = SphQ[target].ObsMoment0;
	mom1 = SphQ[target].ObsMoment1;
	}
	else
	{
	pos = SphDataGet[target].Pos;
	vel = SphDataGet[target].Vel;
	h = SphDataGet[target].Hsml;
	mom0 = SphDataGet[target].ObsMoment0;
	mom1 = SphDataGet[target].ObsMoment1;
	}



	h2 = h * h;
	hinv = 1.0 / h;
	#ifndef TWODIMS
	hinv3 = hinv * hinv * hinv;
	#else
	hinv3 = hinv * hinv / boxSize_Z;
	#endif
	hinv4 = hinv3 * hinv;

	rho = divv = rotv[0] = rotv[1] = rotv[2] = 0;
	weighted_numngb = 0;
	dhsmlrho = 0;

	startnode = All.MaxPart;
	numngb = 0;


	do
	{
	numngb_inbox = ngb_treefind_variable(&pos[0], h, phase, &startnode);

	for(n = 0; n < numngb_inbox; n++)
	{
	j = Ngblist[n];


	dx = pos[0] - P[j].Pos[0];
	dy = pos[1] - P[j].Pos[1];
	dz = pos[2] - P[j].Pos[2];

	#ifdef PERIODIC /* now find the closest image in the given box size */
	if(dx > boxHalf_X)
	dx -= boxSize_X;
	if(dx < -boxHalf_X)
	dx += boxSize_X;
	if(dy > boxHalf_Y)
	dy -= boxSize_Y;
	if(dy < -boxHalf_Y)
	dy += boxSize_Y;
	if(dz > boxHalf_Z)
	dz -= boxSize_Z;
	if(dz < -boxHalf_Z)
	dz += boxSize_Z;
	#endif
	r2 = dx * dx + dy * dy + dz * dz;

	if(r2 < h2)
	{
	numngb++;


	r = sqrt(r2);

	u = r * hinv;

	if(u < 0.5)
	{
	wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
	}
	else
	{
	wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
	}

	mom1 += SphP[j].Observable * wk;
	mom0 += wk;


	}
	}
	}
	while(startnode >= 0);


	/* Now collect the result at the right place */
	if(mode == 0)
	{
	SphQ[target].ObsMoment0 = mom0;
	SphQ[target].ObsMoment1 = mom1;
	}
	else
	{
	SphDataResult[target].ObsMoment0 = mom0;
	SphDataResult[target].ObsMoment1 = mom1;
	}
	}





	/*! This function is the driver routine for the calculation of hydrodynamical
	* force and rate of change of entropy due to shock heating for all active
	* particles .
	*/
	void sph_thermal_conductivity(void)
	{
	long long ntot, ntotleft;
	int i, j, k, n, ngrp, maxfill, source, ndone;
	int nbuffer, noffset, nsend_local, nsend, numlist, ndonelist;
	int level, sendTask, recvTask, nexport, place;
	double soundspeed_i;
	double tstart, tend, sumt, sumcomm;
	double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
	MPI_Status status;

	#ifdef PERIODIC
	boxSize = All.BoxSize;
	boxHalf = 0.5 * All.BoxSize;
	#ifdef LONG_X
	boxHalf_X = boxHalf * LONG_X;
	boxSize_X = boxSize * LONG_X;
	#endif
	#ifdef LONG_Y
	boxHalf_Y = boxHalf * LONG_Y;
	boxSize_Y = boxSize * LONG_Y;
	#endif
	#ifdef LONG_Z
	boxHalf_Z = boxHalf * LONG_Z;
	boxSize_Z = boxSize * LONG_Z;
	#endif
	#endif

	if(All.ComovingIntegrationOn)
	{
	/* Factors for comoving integration of hydro */
	hubble_a = All.Omega0 / (All.Time * All.Time * All.Time)
	+ (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda;

	hubble_a = All.Hubble * sqrt(hubble_a);
	hubble_a2 = All.Time * All.Time * hubble_a;

	fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time;

	fac_egy = pow(All.Time, 3 * (GAMMA - 1));

	fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1);

	a3inv = 1 / (All.Time * All.Time * All.Time);
	atime = All.Time;
	}
	else
	hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0;


	/* `NumSphUpdate' gives the number of particles on this processor that want a force update */
	for(n = 0, NumSphUpdate = 0; n < N_gasQ; n++)
	{

	SphQ[n].ObsMoment0=0;
	SphQ[n].ObsMoment1=0;

	Q[n].Ti_endstep = All.Ti_Current;
	if(Q[n].Ti_endstep == All.Ti_Current)
	NumSphUpdate++;
	}

	numlist = malloc(NTask * sizeof(int) * NTask);
	MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
	for(i = 0, ntot = 0; i < NTask; i++)
	ntot += numlist[i];
	free(numlist);


	noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */
	nbuffer = malloc(sizeof(int) * NTask);
	nsend_local = malloc(sizeof(int) * NTask);
	nsend = malloc(sizeof(int) * NTask * NTask);
	ndonelist = malloc(sizeof(int) * NTask);


	i = 0; /* first particle for this task */
	ntotleft = ntot; /* particles left for all tasks together */

	while(ntotleft > 0)
	{
	for(j = 0; j < NTask; j++)
	nsend_local[j] = 0;

	/* do local particles and prepare export list */
	//tstart = second();
	for(nexport = 0, ndone = 0; i < N_gasQ && nexport < All.BunchSizeSph - NTask; i++)
	if(Q[i].Ti_endstep == All.Ti_Current)
	{
	ndone++;

	for(j = 0; j < NTask; j++)
	Exportflag[j] = 0;

	sph_evaluate_thermal_conductivity(i, 0);

	for(j = 0; j < NTask; j++)
	{
	if(Exportflag[j])
	{
	for(k = 0; k < 3; k++)
	{
	SphDataIn[nexport].Pos[k] = Q[i].Pos[k];
	SphDataIn[nexport].Vel[k] = SphQ[i].VelPred[k];
	}
	SphDataIn[nexport].Hsml = SphQ[i].Hsml;
	//SphDataIn[nexport].Mass = Q[i].Mass;
	//SphDataIn[nexport].DhsmlDensityFactor = SphQ[i].DhsmlDensityFactor;
	//SphDataIn[nexport].Density = SphQ[i].Density;
	//SphDataIn[nexport].Pressure = SphQ[i].Pressure;
	//SphDataIn[nexport].Timestep = Q[i].Ti_endstep - Q[i].Ti_begstep;

	SphDataIn[nexport].ObsMoment0 = Q[i].Mass;
	SphDataIn[nexport].ObsMoment1 = Q[i].Mass;


	SphDataIn[nexport].Index = i;
	SphDataIn[nexport].Task = j;
	nexport++;
	nsend_local[j]++;
	}
	}
	}
	//tend = second();
	//timecomp += timediff(tstart, tend);

	qsort(SphDataIn, nexport, sizeof(struct sphdata_in), sph_compare_key);

	for(j = 1, noffset[0] = 0; j < NTask; j++)
	noffset[j] = noffset[j - 1] + nsend_local[j - 1];

	//tstart = second();

	MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);

	//tend = second();
	//timeimbalance += timediff(tstart, tend);

	/* now do the particles that need to be exported */

	for(level = 1; level < (1 << PTask); level++)
	{
	//tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* get the particles */
	MPI_Sendrecv(&SphDataIn[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A,
	&SphDataGet[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_in), MPI_BYTE,
	recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status);
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	/* now do the imported particles */
	tstart = second();
	for(j = 0; j < nbuffer[ThisTask]; j++)
	sph_evaluate_thermal_conductivity(j, 1);
	tend = second();
	timecomp += timediff(tstart, tend);

	/* do a block to measure imbalance */
	tstart = second();
	MPI_Barrier(MPI_COMM_WORLD);
	tend = second();
	timeimbalance += timediff(tstart, tend);

	/* get the result */
	tstart = second();
	for(j = 0; j < NTask; j++)
	nbuffer[j] = 0;
	for(ngrp = level; ngrp < (1 << PTask); ngrp++)
	{
	maxfill = 0;
	for(j = 0; j < NTask; j++)
	{
	if((j ^ ngrp) < NTask)
	if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
	maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
	}
	if(maxfill >= All.BunchSizeSph)
	break;

	sendTask = ThisTask;
	recvTask = ThisTask ^ ngrp;

	if(recvTask < NTask)
	{
	if(nsend[ThisTask * NTask + recvTask] > 0 \|\| nsend[recvTask * NTask + ThisTask] > 0)
	{
	/* send the results */
	MPI_Sendrecv(&SphDataResult[nbuffer[ThisTask]],
	nsend[recvTask * NTask + ThisTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B,
	&SphDataPartialResult[noffset[recvTask]],
	nsend_local[recvTask] * sizeof(struct sphdata_out),
	MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status);

	/* add the result to the particles */
	for(j = 0; j < nsend_local[recvTask]; j++)
	{
	source = j + noffset[recvTask];
	place = SphDataIn[source].Index;


	SphQ[place].ObsMoment0 += SphDataPartialResult[source].ObsMoment0;
	SphQ[place].ObsMoment1 += SphDataPartialResult[source].ObsMoment1;


	}
	}
	}

	for(j = 0; j < NTask; j++)
	if((j ^ ngrp) < NTask)
	nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
	}
	tend = second();
	timecommsumm += timediff(tstart, tend);

	level = ngrp - 1;
	}

	MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
	for(j = 0; j < NTask; j++)
	ntotleft -= ndonelist[j];
	}

	free(ndonelist);
	free(nsend);
	free(nsend_local);
	free(nbuffer);
	free(noffset);

	/* do final operations on results */
	//tstart = second();

	for(i = 0; i < N_gasQ; i++)
	if(Q[i].Ti_endstep == All.Ti_Current)
	{
	SphQ[i].Observable = SphQ[i].ObsMoment1/SphQ[i].ObsMoment0;
	}


	}


	/*! This function is the 'core' of the SPH force computation. A target
	* particle is specified which may either be local, or reside in the
	* communication buffer.
	*/
	void sph_evaluate_thermal_conductivity(int target, int mode)
	{

	int j, n, startnode, numngb, numngb_inbox;
	double h, h2, fac, hinv, hinv3, hinv4;
	double rho, divv, wk, dwk;
	double dx, dy, dz, r, r2, u, mass_j;
	double dvx, dvy, dvz, rotv[3];
	double weighted_numngb, dhsmlrho;
	FLOAT pos, vel;
	FLOAT mom1,mom0;
	int phase=0;


	#ifndef NOVISCOSITYLIMITER
	double dt;
	#endif

	if(mode == 0)
	{
	pos = Q[target].Pos;
	vel = SphQ[target].VelPred;
	h = SphQ[target].Hsml;
	mom0 = SphQ[target].ObsMoment0;
	mom1 = SphQ[target].ObsMoment1;
	}
	else
	{
	pos = SphDataGet[target].Pos;
	vel = SphDataGet[target].Vel;
	h = SphDataGet[target].Hsml;
	mom0 = SphDataGet[target].ObsMoment0;
	mom1 = SphDataGet[target].ObsMoment1;
	}



	h2 = h * h;
	hinv = 1.0 / h;
	#ifndef TWODIMS
	hinv3 = hinv * hinv * hinv;
	#else
	hinv3 = hinv * hinv / boxSize_Z;
	#endif
	hinv4 = hinv3 * hinv;

	rho = divv = rotv[0] = rotv[1] = rotv[2] = 0;
	weighted_numngb = 0;
	dhsmlrho = 0;

	startnode = All.MaxPart;
	numngb = 0;


	do
	{
	numngb_inbox = ngb_treefind_variable(&pos[0], h, phase, &startnode);

	for(n = 0; n < numngb_inbox; n++)
	{
	j = Ngblist[n];


	dx = pos[0] - P[j].Pos[0];
	dy = pos[1] - P[j].Pos[1];
	dz = pos[2] - P[j].Pos[2];

	#ifdef PERIODIC /* now find the closest image in the given box size */
	if(dx > boxHalf_X)
	dx -= boxSize_X;
	if(dx < -boxHalf_X)
	dx += boxSize_X;
	if(dy > boxHalf_Y)
	dy -= boxSize_Y;
	if(dy < -boxHalf_Y)
	dy += boxSize_Y;
	if(dz > boxHalf_Z)
	dz -= boxSize_Z;
	if(dz < -boxHalf_Z)
	dz += boxSize_Z;
	#endif
	r2 = dx * dx + dy * dy + dz * dz;

	if(r2 < h2)
	{
	numngb++;

	r = sqrt(r2);

	u = r * hinv;

	if(u < 0.5)
	{
	wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
	}
	else
	{
	wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
	}

	mom1 += P[j].Mass(SphP[j].Observable)/(SphP[j].Density) wk;
	mom0 += P[j].Mass /(SphP[j].Density) * wk;

	}
	}
	}
	while(startnode >= 0);


	/* Now collect the result at the right place */
	if(mode == 0)
	{
	SphQ[target].ObsMoment0 = mom0;
	SphQ[target].ObsMoment1 = mom1;
	}
	else
	{
	SphDataResult[target].ObsMoment0 = mom0;
	SphDataResult[target].ObsMoment1 = mom1;
	}
	}



	#endif /PY_INTERFACE/

sph.cNo OneTemporaryActions

File Metadata

sph.cView Options

Event Timeline

sph.c
No OneTemporary
Actions

sph.c
View Options