Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F83526033
pm_periodic.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Sep 17, 15:11
Size
33 KB
Mime Type
text/x-c
Expires
Thu, Sep 19, 15:11 (2 d)
Engine
blob
Format
Raw Data
Handle
20850304
Attached To
R195 SCITAS application benchmarks
pm_periodic.c
View Options
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include <mpi.h>
/*! \file pm_periodic.c
* \brief routines for periodic PM-force computation
*/
#ifdef PMGRID
#ifdef PERIODIC
#ifdef NOTYPEPREFIXFFTW
#include <rfftw_mpi.h>
#else
#ifdef DOUBLEPRECISION_FFTW
#include <drfftw_mpi.h>
/* double precision FFTW */
#else
#include <srfftw_mpi.h>
#endif
#endif
#include "allvars.h"
#include "proto.h"
#define PMGRID2 (2*(PMGRID/2 + 1))
static
rfftwnd_mpi_plan
fft_forward_plan
,
fft_inverse_plan
;
static
int
slab_to_task
[
PMGRID
];
static
int
*
slabs_per_task
;
static
int
*
first_slab_of_task
;
static
int
*
meshmin_list
,
*
meshmax_list
;
static
int
slabstart_x
,
nslab_x
,
slabstart_y
,
nslab_y
,
smallest_slab
;
static
int
fftsize
,
maxfftsize
;
static
fftw_real
*
rhogrid
,
*
forcegrid
,
*
workspace
;
static
fftw_complex
*
fft_of_rhogrid
;
static
FLOAT
to_slab_fac
;
/*! This routines generates the FFTW-plans to carry out the parallel FFTs
* later on. Some auxiliary variables are also initialized.
*/
void
pm_init_periodic
(
void
)
{
int
i
;
int
slab_to_task_local
[
PMGRID
];
All
.
Asmth
[
0
]
=
ASMTH
*
All
.
BoxSize
/
PMGRID
;
All
.
Rcut
[
0
]
=
RCUT
*
All
.
Asmth
[
0
];
/* Set up the FFTW plan files. */
fft_forward_plan
=
rfftw3d_mpi_create_plan
(
MPI_COMM_WORLD
,
PMGRID
,
PMGRID
,
PMGRID
,
FFTW_REAL_TO_COMPLEX
,
FFTW_ESTIMATE
|
FFTW_IN_PLACE
);
fft_inverse_plan
=
rfftw3d_mpi_create_plan
(
MPI_COMM_WORLD
,
PMGRID
,
PMGRID
,
PMGRID
,
FFTW_COMPLEX_TO_REAL
,
FFTW_ESTIMATE
|
FFTW_IN_PLACE
);
/* Workspace out the ranges on each processor. */
rfftwnd_mpi_local_sizes
(
fft_forward_plan
,
&
nslab_x
,
&
slabstart_x
,
&
nslab_y
,
&
slabstart_y
,
&
fftsize
);
for
(
i
=
0
;
i
<
PMGRID
;
i
++
)
slab_to_task_local
[
i
]
=
0
;
for
(
i
=
0
;
i
<
nslab_x
;
i
++
)
slab_to_task_local
[
slabstart_x
+
i
]
=
ThisTask
;
MPI_Allreduce
(
slab_to_task_local
,
slab_to_task
,
PMGRID
,
MPI_INT
,
MPI_SUM
,
MPI_COMM_WORLD
);
MPI_Allreduce
(
&
nslab_x
,
&
smallest_slab
,
1
,
MPI_INT
,
MPI_MIN
,
MPI_COMM_WORLD
);
slabs_per_task
=
malloc
(
NTask
*
sizeof
(
int
));
MPI_Allgather
(
&
nslab_x
,
1
,
MPI_INT
,
slabs_per_task
,
1
,
MPI_INT
,
MPI_COMM_WORLD
);
if
(
ThisTask
==
0
)
{
for
(
i
=
0
;
i
<
NTask
;
i
++
)
printf
(
"Task=%d FFT-Slabs=%d
\n
"
,
i
,
slabs_per_task
[
i
]);
}
first_slab_of_task
=
malloc
(
NTask
*
sizeof
(
int
));
MPI_Allgather
(
&
slabstart_x
,
1
,
MPI_INT
,
first_slab_of_task
,
1
,
MPI_INT
,
MPI_COMM_WORLD
);
meshmin_list
=
malloc
(
3
*
NTask
*
sizeof
(
int
));
meshmax_list
=
malloc
(
3
*
NTask
*
sizeof
(
int
));
to_slab_fac
=
PMGRID
/
All
.
BoxSize
;
MPI_Allreduce
(
&
fftsize
,
&
maxfftsize
,
1
,
MPI_INT
,
MPI_MAX
,
MPI_COMM_WORLD
);
}
/*! This function allocates the memory neeed to compute the long-range PM
* force. Three fields are used, one to hold the density (and its FFT, and
* then the real-space potential), one to hold the force field obtained by
* finite differencing, and finally a workspace field, which is used both as
* workspace for the parallel FFT, and as buffer for the communication
* algorithm used in the force computation.
*/
void
pm_init_periodic_allocate
(
int
dimprod
)
{
static
int
first_alloc
=
1
;
int
dimprodmax
;
double
bytes_tot
=
0
;
size_t
bytes
;
MPI_Allreduce
(
&
dimprod
,
&
dimprodmax
,
1
,
MPI_INT
,
MPI_MAX
,
MPI_COMM_WORLD
);
/* allocate the memory to hold the FFT fields */
if
(
!
(
rhogrid
=
(
fftw_real
*
)
malloc
(
bytes
=
fftsize
*
sizeof
(
fftw_real
))))
{
printf
(
"failed to allocate memory for `FFT-rhogrid' (%g MB).
\n
"
,
bytes
/
(
1024.0
*
1024.0
));
endrun
(
1
);
}
bytes_tot
+=
bytes
;
if
(
!
(
forcegrid
=
(
fftw_real
*
)
malloc
(
bytes
=
imax
(
fftsize
,
dimprodmax
)
*
sizeof
(
fftw_real
))))
{
printf
(
"failed to allocate memory for `FFT-forcegrid' (%g MB).
\n
"
,
bytes
/
(
1024.0
*
1024.0
));
endrun
(
1
);
}
bytes_tot
+=
bytes
;
if
(
!
(
workspace
=
(
fftw_real
*
)
malloc
(
bytes
=
imax
(
maxfftsize
,
dimprodmax
)
*
sizeof
(
fftw_real
))))
{
printf
(
"failed to allocate memory for `FFT-workspace' (%g MB).
\n
"
,
bytes
/
(
1024.0
*
1024.0
));
endrun
(
1
);
}
bytes_tot
+=
bytes
;
if
(
first_alloc
==
1
)
{
first_alloc
=
0
;
if
(
ThisTask
==
0
)
printf
(
"
\n
Allocated %g MByte for FFT data.
\n\n
"
,
bytes_tot
/
(
1024.0
*
1024.0
));
}
fft_of_rhogrid
=
(
fftw_complex
*
)
&
rhogrid
[
0
];
}
/*! This routine frees the space allocated for the parallel FFT algorithm.
*/
void
pm_init_periodic_free
(
void
)
{
/* allocate the memory to hold the FFT fields */
free
(
workspace
);
free
(
forcegrid
);
free
(
rhogrid
);
}
/*! Calculates the long-range periodic force given the particle positions
* using the PM method. The force is Gaussian filtered with Asmth, given in
* mesh-cell units. We carry out a CIC charge assignment, and compute the
* potenial by Fourier transform methods. The potential is finite differenced
* using a 4-point finite differencing formula, and the forces are
* interpolated tri-linearly to the particle positions. The CIC kernel is
* deconvolved. Note that the particle distribution is not in the slab
* decomposition that is used for the FFT. Instead, overlapping patches
* between local domains and FFT slabs are communicated as needed.
*/
void
pmforce_periodic
(
void
)
{
double
k2
,
kx
,
ky
,
kz
,
smth
;
double
dx
,
dy
,
dz
;
double
fx
,
fy
,
fz
,
ff
;
double
asmth2
,
fac
,
acc_dim
;
int
i
,
j
,
slab
,
level
,
sendTask
,
recvTask
;
int
x
,
y
,
z
,
xl
,
yl
,
zl
,
xr
,
yr
,
zr
,
xll
,
yll
,
zll
,
xrr
,
yrr
,
zrr
,
ip
,
dim
;
int
slab_x
,
slab_y
,
slab_z
;
int
slab_xx
,
slab_yy
,
slab_zz
;
int
meshmin
[
3
],
meshmax
[
3
],
sendmin
,
sendmax
,
recvmin
,
recvmax
;
int
rep
,
ncont
,
cont_sendmin
[
2
],
cont_sendmax
[
2
],
cont_recvmin
[
2
],
cont_recvmax
[
2
];
int
dimx
,
dimy
,
dimz
,
recv_dimx
,
recv_dimy
,
recv_dimz
;
MPI_Status
status
;
if
(
ThisTask
==
0
)
{
printf
(
"Starting periodic PM calculation.
\n
"
);
fflush
(
stdout
);
}
force_treefree
();
asmth2
=
(
2
*
M_PI
)
*
All
.
Asmth
[
0
]
/
All
.
BoxSize
;
asmth2
*=
asmth2
;
fac
=
All
.
G
/
(
M_PI
*
All
.
BoxSize
);
/* to get potential */
fac
*=
1
/
(
2
*
All
.
BoxSize
/
PMGRID
);
/* for finite differencing */
/* first, establish the extension of the local patch in the PMGRID */
for
(
j
=
0
;
j
<
3
;
j
++
)
{
meshmin
[
j
]
=
PMGRID
;
meshmax
[
j
]
=
0
;
}
for
(
i
=
0
;
i
<
NumPart
;
i
++
)
{
for
(
j
=
0
;
j
<
3
;
j
++
)
{
slab
=
to_slab_fac
*
P
[
i
].
Pos
[
j
];
if
(
slab
>=
PMGRID
)
slab
=
PMGRID
-
1
;
if
(
slab
<
meshmin
[
j
])
meshmin
[
j
]
=
slab
;
if
(
slab
>
meshmax
[
j
])
meshmax
[
j
]
=
slab
;
}
}
MPI_Allgather
(
meshmin
,
3
,
MPI_INT
,
meshmin_list
,
3
,
MPI_INT
,
MPI_COMM_WORLD
);
MPI_Allgather
(
meshmax
,
3
,
MPI_INT
,
meshmax_list
,
3
,
MPI_INT
,
MPI_COMM_WORLD
);
dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
2
;
dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
2
;
dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
2
;
pm_init_periodic_allocate
((
dimx
+
4
)
*
(
dimy
+
4
)
*
(
dimz
+
4
));
for
(
i
=
0
;
i
<
dimx
*
dimy
*
dimz
;
i
++
)
workspace
[
i
]
=
0
;
for
(
i
=
0
;
i
<
NumPart
;
i
++
)
{
slab_x
=
to_slab_fac
*
P
[
i
].
Pos
[
0
];
if
(
slab_x
>=
PMGRID
)
slab_x
=
PMGRID
-
1
;
dx
=
to_slab_fac
*
P
[
i
].
Pos
[
0
]
-
slab_x
;
slab_x
-=
meshmin
[
0
];
slab_xx
=
slab_x
+
1
;
slab_y
=
to_slab_fac
*
P
[
i
].
Pos
[
1
];
if
(
slab_y
>=
PMGRID
)
slab_y
=
PMGRID
-
1
;
dy
=
to_slab_fac
*
P
[
i
].
Pos
[
1
]
-
slab_y
;
slab_y
-=
meshmin
[
1
];
slab_yy
=
slab_y
+
1
;
slab_z
=
to_slab_fac
*
P
[
i
].
Pos
[
2
];
if
(
slab_z
>=
PMGRID
)
slab_z
=
PMGRID
-
1
;
dz
=
to_slab_fac
*
P
[
i
].
Pos
[
2
]
-
slab_z
;
slab_z
-=
meshmin
[
2
];
slab_zz
=
slab_z
+
1
;
workspace
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
workspace
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
dy
*
(
1.0
-
dz
);
workspace
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
dz
;
workspace
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
dy
*
dz
;
workspace
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
workspace
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
dx
)
*
dy
*
(
1.0
-
dz
);
workspace
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
dx
)
*
(
1.0
-
dy
)
*
dz
;
workspace
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
dx
)
*
dy
*
dz
;
}
for
(
i
=
0
;
i
<
fftsize
;
i
++
)
/* clear local density field */
rhogrid
[
i
]
=
0
;
for
(
level
=
0
;
level
<
(
1
<<
PTask
);
level
++
)
/* note: for level=0, target is the same task */
{
sendTask
=
ThisTask
;
recvTask
=
ThisTask
^
level
;
if
(
recvTask
<
NTask
)
{
/* check how much we have to send */
sendmin
=
2
*
PMGRID
;
sendmax
=
-
1
;
for
(
slab_x
=
meshmin
[
0
];
slab_x
<
meshmax
[
0
]
+
2
;
slab_x
++
)
if
(
slab_to_task
[
slab_x
%
PMGRID
]
==
recvTask
)
{
if
(
slab_x
<
sendmin
)
sendmin
=
slab_x
;
if
(
slab_x
>
sendmax
)
sendmax
=
slab_x
;
}
if
(
sendmax
==
-
1
)
sendmin
=
0
;
/* check how much we have to receive */
recvmin
=
2
*
PMGRID
;
recvmax
=
-
1
;
for
(
slab_x
=
meshmin_list
[
3
*
recvTask
];
slab_x
<
meshmax_list
[
3
*
recvTask
]
+
2
;
slab_x
++
)
if
(
slab_to_task
[
slab_x
%
PMGRID
]
==
sendTask
)
{
if
(
slab_x
<
recvmin
)
recvmin
=
slab_x
;
if
(
slab_x
>
recvmax
)
recvmax
=
slab_x
;
}
if
(
recvmax
==
-
1
)
recvmin
=
0
;
if
((
recvmax
-
recvmin
)
>=
0
||
(
sendmax
-
sendmin
)
>=
0
)
/* ok, we have a contribution to the slab */
{
recv_dimx
=
meshmax_list
[
3
*
recvTask
+
0
]
-
meshmin_list
[
3
*
recvTask
+
0
]
+
2
;
recv_dimy
=
meshmax_list
[
3
*
recvTask
+
1
]
-
meshmin_list
[
3
*
recvTask
+
1
]
+
2
;
recv_dimz
=
meshmax_list
[
3
*
recvTask
+
2
]
-
meshmin_list
[
3
*
recvTask
+
2
]
+
2
;
if
(
level
>
0
)
{
MPI_Sendrecv
(
workspace
+
(
sendmin
-
meshmin
[
0
])
*
dimy
*
dimz
,
(
sendmax
-
sendmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_A
,
forcegrid
,
(
recvmax
-
recvmin
+
1
)
*
recv_dimy
*
recv_dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_A
,
MPI_COMM_WORLD
,
&
status
);
}
else
{
memcpy
(
forcegrid
,
workspace
+
(
sendmin
-
meshmin
[
0
])
*
dimy
*
dimz
,
(
sendmax
-
sendmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
));
}
for
(
slab_x
=
recvmin
;
slab_x
<=
recvmax
;
slab_x
++
)
{
slab_xx
=
(
slab_x
%
PMGRID
)
-
first_slab_of_task
[
ThisTask
];
if
(
slab_xx
>=
0
&&
slab_xx
<
slabs_per_task
[
ThisTask
])
{
for
(
slab_y
=
meshmin_list
[
3
*
recvTask
+
1
];
slab_y
<=
meshmax_list
[
3
*
recvTask
+
1
]
+
1
;
slab_y
++
)
{
slab_yy
=
slab_y
;
if
(
slab_yy
>=
PMGRID
)
slab_yy
-=
PMGRID
;
for
(
slab_z
=
meshmin_list
[
3
*
recvTask
+
2
];
slab_z
<=
meshmax_list
[
3
*
recvTask
+
2
]
+
1
;
slab_z
++
)
{
slab_zz
=
slab_z
;
if
(
slab_zz
>=
PMGRID
)
slab_zz
-=
PMGRID
;
rhogrid
[
PMGRID
*
PMGRID2
*
slab_xx
+
PMGRID2
*
slab_yy
+
slab_zz
]
+=
forcegrid
[((
slab_x
-
recvmin
)
*
recv_dimy
+
(
slab_y
-
meshmin_list
[
3
*
recvTask
+
1
]))
*
recv_dimz
+
(
slab_z
-
meshmin_list
[
3
*
recvTask
+
2
])];
}
}
}
}
}
}
}
/* Do the FFT of the density field */
rfftwnd_mpi
(
fft_forward_plan
,
1
,
rhogrid
,
workspace
,
FFTW_TRANSPOSED_ORDER
);
/* multiply with Green's function for the potential */
for
(
y
=
slabstart_y
;
y
<
slabstart_y
+
nslab_y
;
y
++
)
for
(
x
=
0
;
x
<
PMGRID
;
x
++
)
for
(
z
=
0
;
z
<
PMGRID
/
2
+
1
;
z
++
)
{
if
(
x
>
PMGRID
/
2
)
kx
=
x
-
PMGRID
;
else
kx
=
x
;
if
(
y
>
PMGRID
/
2
)
ky
=
y
-
PMGRID
;
else
ky
=
y
;
if
(
z
>
PMGRID
/
2
)
kz
=
z
-
PMGRID
;
else
kz
=
z
;
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
if
(
k2
>
0
)
{
smth
=
-
exp
(
-
k2
*
asmth2
)
/
k2
;
/* do deconvolution */
fx
=
fy
=
fz
=
1
;
if
(
kx
!=
0
)
{
fx
=
(
M_PI
*
kx
)
/
PMGRID
;
fx
=
sin
(
fx
)
/
fx
;
}
if
(
ky
!=
0
)
{
fy
=
(
M_PI
*
ky
)
/
PMGRID
;
fy
=
sin
(
fy
)
/
fy
;
}
if
(
kz
!=
0
)
{
fz
=
(
M_PI
*
kz
)
/
PMGRID
;
fz
=
sin
(
fz
)
/
fz
;
}
ff
=
1
/
(
fx
*
fy
*
fz
);
smth
*=
ff
*
ff
*
ff
*
ff
;
/* end deconvolution */
ip
=
PMGRID
*
(
PMGRID
/
2
+
1
)
*
(
y
-
slabstart_y
)
+
(
PMGRID
/
2
+
1
)
*
x
+
z
;
fft_of_rhogrid
[
ip
].
re
*=
smth
;
fft_of_rhogrid
[
ip
].
im
*=
smth
;
}
}
if
(
slabstart_y
==
0
)
fft_of_rhogrid
[
0
].
re
=
fft_of_rhogrid
[
0
].
im
=
0.0
;
/* Do the FFT to get the potential */
rfftwnd_mpi
(
fft_inverse_plan
,
1
,
rhogrid
,
workspace
,
FFTW_TRANSPOSED_ORDER
);
/* Now rhogrid holds the potential */
/* construct the potential for the local patch */
dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
6
;
dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
6
;
dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
6
;
for
(
level
=
0
;
level
<
(
1
<<
PTask
);
level
++
)
/* note: for level=0, target is the same task */
{
sendTask
=
ThisTask
;
recvTask
=
ThisTask
^
level
;
if
(
recvTask
<
NTask
)
{
/* check how much we have to send */
sendmin
=
2
*
PMGRID
;
sendmax
=
-
PMGRID
;
for
(
slab_x
=
meshmin_list
[
3
*
recvTask
]
-
2
;
slab_x
<
meshmax_list
[
3
*
recvTask
]
+
4
;
slab_x
++
)
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
==
sendTask
)
{
if
(
slab_x
<
sendmin
)
sendmin
=
slab_x
;
if
(
slab_x
>
sendmax
)
sendmax
=
slab_x
;
}
if
(
sendmax
==
-
PMGRID
)
sendmin
=
sendmax
+
1
;
/* check how much we have to receive */
recvmin
=
2
*
PMGRID
;
recvmax
=
-
PMGRID
;
for
(
slab_x
=
meshmin
[
0
]
-
2
;
slab_x
<
meshmax
[
0
]
+
4
;
slab_x
++
)
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
==
recvTask
)
{
if
(
slab_x
<
recvmin
)
recvmin
=
slab_x
;
if
(
slab_x
>
recvmax
)
recvmax
=
slab_x
;
}
if
(
recvmax
==
-
PMGRID
)
recvmin
=
recvmax
+
1
;
if
((
recvmax
-
recvmin
)
>=
0
||
(
sendmax
-
sendmin
)
>=
0
)
/* ok, we have a contribution to the slab */
{
recv_dimx
=
meshmax_list
[
3
*
recvTask
+
0
]
-
meshmin_list
[
3
*
recvTask
+
0
]
+
6
;
recv_dimy
=
meshmax_list
[
3
*
recvTask
+
1
]
-
meshmin_list
[
3
*
recvTask
+
1
]
+
6
;
recv_dimz
=
meshmax_list
[
3
*
recvTask
+
2
]
-
meshmin_list
[
3
*
recvTask
+
2
]
+
6
;
ncont
=
1
;
cont_sendmin
[
0
]
=
sendmin
;
cont_sendmax
[
0
]
=
sendmax
;
cont_sendmin
[
1
]
=
sendmax
+
1
;
cont_sendmax
[
1
]
=
sendmax
;
cont_recvmin
[
0
]
=
recvmin
;
cont_recvmax
[
0
]
=
recvmax
;
cont_recvmin
[
1
]
=
recvmax
+
1
;
cont_recvmax
[
1
]
=
recvmax
;
for
(
slab_x
=
sendmin
;
slab_x
<=
sendmax
;
slab_x
++
)
{
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
ThisTask
)
{
/* non-contiguous */
cont_sendmax
[
0
]
=
slab_x
-
1
;
while
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
ThisTask
)
slab_x
++
;
cont_sendmin
[
1
]
=
slab_x
;
ncont
++
;
}
}
for
(
slab_x
=
recvmin
;
slab_x
<=
recvmax
;
slab_x
++
)
{
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
recvTask
)
{
/* non-contiguous */
cont_recvmax
[
0
]
=
slab_x
-
1
;
while
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
recvTask
)
slab_x
++
;
cont_recvmin
[
1
]
=
slab_x
;
if
(
ncont
==
1
)
ncont
++
;
}
}
for
(
rep
=
0
;
rep
<
ncont
;
rep
++
)
{
sendmin
=
cont_sendmin
[
rep
];
sendmax
=
cont_sendmax
[
rep
];
recvmin
=
cont_recvmin
[
rep
];
recvmax
=
cont_recvmax
[
rep
];
/* prepare what we want to send */
if
(
sendmax
-
sendmin
>=
0
)
{
for
(
slab_x
=
sendmin
;
slab_x
<=
sendmax
;
slab_x
++
)
{
slab_xx
=
((
slab_x
+
PMGRID
)
%
PMGRID
)
-
first_slab_of_task
[
ThisTask
];
for
(
slab_y
=
meshmin_list
[
3
*
recvTask
+
1
]
-
2
;
slab_y
<
meshmax_list
[
3
*
recvTask
+
1
]
+
4
;
slab_y
++
)
{
slab_yy
=
(
slab_y
+
PMGRID
)
%
PMGRID
;
for
(
slab_z
=
meshmin_list
[
3
*
recvTask
+
2
]
-
2
;
slab_z
<=
meshmax_list
[
3
*
recvTask
+
2
]
+
4
;
slab_z
++
)
{
slab_zz
=
(
slab_z
+
PMGRID
)
%
PMGRID
;
forcegrid
[((
slab_x
-
sendmin
)
*
recv_dimy
+
(
slab_y
-
(
meshmin_list
[
3
*
recvTask
+
1
]
-
2
)))
*
recv_dimz
+
slab_z
-
(
meshmin_list
[
3
*
recvTask
+
2
]
-
2
)]
=
rhogrid
[
PMGRID
*
PMGRID2
*
slab_xx
+
PMGRID2
*
slab_yy
+
slab_zz
];
}
}
}
}
if
(
level
>
0
)
{
MPI_Sendrecv
(
forcegrid
,
(
sendmax
-
sendmin
+
1
)
*
recv_dimy
*
recv_dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_B
,
workspace
+
(
recvmin
-
(
meshmin
[
0
]
-
2
))
*
dimy
*
dimz
,
(
recvmax
-
recvmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_B
,
MPI_COMM_WORLD
,
&
status
);
}
else
{
memcpy
(
workspace
+
(
recvmin
-
(
meshmin
[
0
]
-
2
))
*
dimy
*
dimz
,
forcegrid
,
(
recvmax
-
recvmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
));
}
}
}
}
}
dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
2
;
dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
2
;
dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
2
;
recv_dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
6
;
recv_dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
6
;
recv_dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
6
;
for
(
dim
=
0
;
dim
<
3
;
dim
++
)
/* Calculate each component of the force. */
{
/* get the force component by finite differencing the potential */
/* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */
for
(
x
=
0
;
x
<
meshmax
[
0
]
-
meshmin
[
0
]
+
2
;
x
++
)
for
(
y
=
0
;
y
<
meshmax
[
1
]
-
meshmin
[
1
]
+
2
;
y
++
)
for
(
z
=
0
;
z
<
meshmax
[
2
]
-
meshmin
[
2
]
+
2
;
z
++
)
{
xrr
=
xll
=
xr
=
xl
=
x
;
yrr
=
yll
=
yr
=
yl
=
y
;
zrr
=
zll
=
zr
=
zl
=
z
;
switch
(
dim
)
{
case
0
:
xr
=
x
+
1
;
xrr
=
x
+
2
;
xl
=
x
-
1
;
xll
=
x
-
2
;
break
;
case
1
:
yr
=
y
+
1
;
yl
=
y
-
1
;
yrr
=
y
+
2
;
yll
=
y
-
2
;
break
;
case
2
:
zr
=
z
+
1
;
zl
=
z
-
1
;
zrr
=
z
+
2
;
zll
=
z
-
2
;
break
;
}
forcegrid
[(
x
*
dimy
+
y
)
*
dimz
+
z
]
=
fac
*
((
4.0
/
3
)
*
(
workspace
[((
xl
+
2
)
*
recv_dimy
+
(
yl
+
2
))
*
recv_dimz
+
(
zl
+
2
)]
-
workspace
[((
xr
+
2
)
*
recv_dimy
+
(
yr
+
2
))
*
recv_dimz
+
(
zr
+
2
)])
-
(
1.0
/
6
)
*
(
workspace
[((
xll
+
2
)
*
recv_dimy
+
(
yll
+
2
))
*
recv_dimz
+
(
zll
+
2
)]
-
workspace
[((
xrr
+
2
)
*
recv_dimy
+
(
yrr
+
2
))
*
recv_dimz
+
(
zrr
+
2
)]));
}
/* read out the forces */
for
(
i
=
0
;
i
<
NumPart
;
i
++
)
{
slab_x
=
to_slab_fac
*
P
[
i
].
Pos
[
0
];
if
(
slab_x
>=
PMGRID
)
slab_x
=
PMGRID
-
1
;
dx
=
to_slab_fac
*
P
[
i
].
Pos
[
0
]
-
slab_x
;
slab_x
-=
meshmin
[
0
];
slab_xx
=
slab_x
+
1
;
slab_y
=
to_slab_fac
*
P
[
i
].
Pos
[
1
];
if
(
slab_y
>=
PMGRID
)
slab_y
=
PMGRID
-
1
;
dy
=
to_slab_fac
*
P
[
i
].
Pos
[
1
]
-
slab_y
;
slab_y
-=
meshmin
[
1
];
slab_yy
=
slab_y
+
1
;
slab_z
=
to_slab_fac
*
P
[
i
].
Pos
[
2
];
if
(
slab_z
>=
PMGRID
)
slab_z
=
PMGRID
-
1
;
dz
=
to_slab_fac
*
P
[
i
].
Pos
[
2
]
-
slab_z
;
slab_z
-=
meshmin
[
2
];
slab_zz
=
slab_z
+
1
;
acc_dim
=
forcegrid
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
acc_dim
+=
forcegrid
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
*
(
1.0
-
dx
)
*
dy
*
(
1.0
-
dz
);
acc_dim
+=
forcegrid
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
dz
;
acc_dim
+=
forcegrid
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
*
(
1.0
-
dx
)
*
dy
*
dz
;
acc_dim
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
*
(
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
acc_dim
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
*
(
dx
)
*
dy
*
(
1.0
-
dz
);
acc_dim
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
*
(
dx
)
*
(
1.0
-
dy
)
*
dz
;
acc_dim
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
*
(
dx
)
*
dy
*
dz
;
P
[
i
].
GravPM
[
dim
]
=
acc_dim
;
}
}
pm_init_periodic_free
();
force_treeallocate
(
All
.
TreeAllocFactor
*
All
.
MaxPart
,
All
.
MaxPart
);
All
.
NumForcesSinceLastDomainDecomp
=
1
+
All
.
TotNumPart
*
All
.
TreeDomainUpdateFrequency
;
if
(
ThisTask
==
0
)
{
printf
(
"done PM.
\n
"
);
fflush
(
stdout
);
}
}
/*! Calculates the long-range potential using the PM method. The potential is
* Gaussian filtered with Asmth, given in mesh-cell units. We carry out a CIC
* charge assignment, and compute the potenial by Fourier transform
* methods. The CIC kernel is deconvolved.
*/
void
pmpotential_periodic
(
void
)
{
double
k2
,
kx
,
ky
,
kz
,
smth
;
double
dx
,
dy
,
dz
;
double
fx
,
fy
,
fz
,
ff
;
double
asmth2
,
fac
;
int
i
,
j
,
slab
,
level
,
sendTask
,
recvTask
;
int
x
,
y
,
z
,
ip
;
int
slab_x
,
slab_y
,
slab_z
;
int
slab_xx
,
slab_yy
,
slab_zz
;
int
meshmin
[
3
],
meshmax
[
3
],
sendmin
,
sendmax
,
recvmin
,
recvmax
;
int
rep
,
ncont
,
cont_sendmin
[
2
],
cont_sendmax
[
2
],
cont_recvmin
[
2
],
cont_recvmax
[
2
];
int
dimx
,
dimy
,
dimz
,
recv_dimx
,
recv_dimy
,
recv_dimz
;
MPI_Status
status
;
if
(
ThisTask
==
0
)
{
printf
(
"Starting periodic PM calculation.
\n
"
);
fflush
(
stdout
);
}
asmth2
=
(
2
*
M_PI
)
*
All
.
Asmth
[
0
]
/
All
.
BoxSize
;
asmth2
*=
asmth2
;
fac
=
All
.
G
/
(
M_PI
*
All
.
BoxSize
);
/* to get potential */
force_treefree
();
/* first, establish the extension of the local patch in the PMGRID */
for
(
j
=
0
;
j
<
3
;
j
++
)
{
meshmin
[
j
]
=
PMGRID
;
meshmax
[
j
]
=
0
;
}
for
(
i
=
0
;
i
<
NumPart
;
i
++
)
{
for
(
j
=
0
;
j
<
3
;
j
++
)
{
slab
=
to_slab_fac
*
P
[
i
].
Pos
[
j
];
if
(
slab
>=
PMGRID
)
slab
=
PMGRID
-
1
;
if
(
slab
<
meshmin
[
j
])
meshmin
[
j
]
=
slab
;
if
(
slab
>
meshmax
[
j
])
meshmax
[
j
]
=
slab
;
}
}
MPI_Allgather
(
meshmin
,
3
,
MPI_INT
,
meshmin_list
,
3
,
MPI_INT
,
MPI_COMM_WORLD
);
MPI_Allgather
(
meshmax
,
3
,
MPI_INT
,
meshmax_list
,
3
,
MPI_INT
,
MPI_COMM_WORLD
);
dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
2
;
dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
2
;
dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
2
;
pm_init_periodic_allocate
((
dimx
+
4
)
*
(
dimy
+
4
)
*
(
dimz
+
4
));
for
(
i
=
0
;
i
<
dimx
*
dimy
*
dimz
;
i
++
)
workspace
[
i
]
=
0
;
for
(
i
=
0
;
i
<
NumPart
;
i
++
)
{
slab_x
=
to_slab_fac
*
P
[
i
].
Pos
[
0
];
if
(
slab_x
>=
PMGRID
)
slab_x
=
PMGRID
-
1
;
dx
=
to_slab_fac
*
P
[
i
].
Pos
[
0
]
-
slab_x
;
slab_x
-=
meshmin
[
0
];
slab_xx
=
slab_x
+
1
;
slab_y
=
to_slab_fac
*
P
[
i
].
Pos
[
1
];
if
(
slab_y
>=
PMGRID
)
slab_y
=
PMGRID
-
1
;
dy
=
to_slab_fac
*
P
[
i
].
Pos
[
1
]
-
slab_y
;
slab_y
-=
meshmin
[
1
];
slab_yy
=
slab_y
+
1
;
slab_z
=
to_slab_fac
*
P
[
i
].
Pos
[
2
];
if
(
slab_z
>=
PMGRID
)
slab_z
=
PMGRID
-
1
;
dz
=
to_slab_fac
*
P
[
i
].
Pos
[
2
]
-
slab_z
;
slab_z
-=
meshmin
[
2
];
slab_zz
=
slab_z
+
1
;
workspace
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
workspace
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
dy
*
(
1.0
-
dz
);
workspace
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
dz
;
workspace
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
1.0
-
dx
)
*
dy
*
dz
;
workspace
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
workspace
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
+=
P
[
i
].
Mass
*
(
dx
)
*
dy
*
(
1.0
-
dz
);
workspace
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
dx
)
*
(
1.0
-
dy
)
*
dz
;
workspace
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
+=
P
[
i
].
Mass
*
(
dx
)
*
dy
*
dz
;
}
for
(
i
=
0
;
i
<
fftsize
;
i
++
)
/* clear local density field */
rhogrid
[
i
]
=
0
;
for
(
level
=
0
;
level
<
(
1
<<
PTask
);
level
++
)
/* note: for level=0, target is the same task */
{
sendTask
=
ThisTask
;
recvTask
=
ThisTask
^
level
;
if
(
recvTask
<
NTask
)
{
/* check how much we have to send */
sendmin
=
2
*
PMGRID
;
sendmax
=
-
1
;
for
(
slab_x
=
meshmin
[
0
];
slab_x
<
meshmax
[
0
]
+
2
;
slab_x
++
)
if
(
slab_to_task
[
slab_x
%
PMGRID
]
==
recvTask
)
{
if
(
slab_x
<
sendmin
)
sendmin
=
slab_x
;
if
(
slab_x
>
sendmax
)
sendmax
=
slab_x
;
}
if
(
sendmax
==
-
1
)
sendmin
=
0
;
/* check how much we have to receive */
recvmin
=
2
*
PMGRID
;
recvmax
=
-
1
;
for
(
slab_x
=
meshmin_list
[
3
*
recvTask
];
slab_x
<
meshmax_list
[
3
*
recvTask
]
+
2
;
slab_x
++
)
if
(
slab_to_task
[
slab_x
%
PMGRID
]
==
sendTask
)
{
if
(
slab_x
<
recvmin
)
recvmin
=
slab_x
;
if
(
slab_x
>
recvmax
)
recvmax
=
slab_x
;
}
if
(
recvmax
==
-
1
)
recvmin
=
0
;
if
((
recvmax
-
recvmin
)
>=
0
||
(
sendmax
-
sendmin
)
>=
0
)
/* ok, we have a contribution to the slab */
{
recv_dimx
=
meshmax_list
[
3
*
recvTask
+
0
]
-
meshmin_list
[
3
*
recvTask
+
0
]
+
2
;
recv_dimy
=
meshmax_list
[
3
*
recvTask
+
1
]
-
meshmin_list
[
3
*
recvTask
+
1
]
+
2
;
recv_dimz
=
meshmax_list
[
3
*
recvTask
+
2
]
-
meshmin_list
[
3
*
recvTask
+
2
]
+
2
;
if
(
level
>
0
)
{
MPI_Sendrecv
(
workspace
+
(
sendmin
-
meshmin
[
0
])
*
dimy
*
dimz
,
(
sendmax
-
sendmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_C
,
forcegrid
,
(
recvmax
-
recvmin
+
1
)
*
recv_dimy
*
recv_dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_C
,
MPI_COMM_WORLD
,
&
status
);
}
else
{
memcpy
(
forcegrid
,
workspace
+
(
sendmin
-
meshmin
[
0
])
*
dimy
*
dimz
,
(
sendmax
-
sendmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
));
}
for
(
slab_x
=
recvmin
;
slab_x
<=
recvmax
;
slab_x
++
)
{
slab_xx
=
(
slab_x
%
PMGRID
)
-
first_slab_of_task
[
ThisTask
];
if
(
slab_xx
>=
0
&&
slab_xx
<
slabs_per_task
[
ThisTask
])
{
for
(
slab_y
=
meshmin_list
[
3
*
recvTask
+
1
];
slab_y
<=
meshmax_list
[
3
*
recvTask
+
1
]
+
1
;
slab_y
++
)
{
slab_yy
=
slab_y
;
if
(
slab_yy
>=
PMGRID
)
slab_yy
-=
PMGRID
;
for
(
slab_z
=
meshmin_list
[
3
*
recvTask
+
2
];
slab_z
<=
meshmax_list
[
3
*
recvTask
+
2
]
+
1
;
slab_z
++
)
{
slab_zz
=
slab_z
;
if
(
slab_zz
>=
PMGRID
)
slab_zz
-=
PMGRID
;
rhogrid
[
PMGRID
*
PMGRID2
*
slab_xx
+
PMGRID2
*
slab_yy
+
slab_zz
]
+=
forcegrid
[((
slab_x
-
recvmin
)
*
recv_dimy
+
(
slab_y
-
meshmin_list
[
3
*
recvTask
+
1
]))
*
recv_dimz
+
(
slab_z
-
meshmin_list
[
3
*
recvTask
+
2
])];
}
}
}
}
}
}
}
/* Do the FFT of the density field */
rfftwnd_mpi
(
fft_forward_plan
,
1
,
rhogrid
,
workspace
,
FFTW_TRANSPOSED_ORDER
);
/* multiply with Green's function for the potential */
for
(
y
=
slabstart_y
;
y
<
slabstart_y
+
nslab_y
;
y
++
)
for
(
x
=
0
;
x
<
PMGRID
;
x
++
)
for
(
z
=
0
;
z
<
PMGRID
/
2
+
1
;
z
++
)
{
if
(
x
>
PMGRID
/
2
)
kx
=
x
-
PMGRID
;
else
kx
=
x
;
if
(
y
>
PMGRID
/
2
)
ky
=
y
-
PMGRID
;
else
ky
=
y
;
if
(
z
>
PMGRID
/
2
)
kz
=
z
-
PMGRID
;
else
kz
=
z
;
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
if
(
k2
>
0
)
{
smth
=
-
exp
(
-
k2
*
asmth2
)
/
k2
*
fac
;
/* do deconvolution */
fx
=
fy
=
fz
=
1
;
if
(
kx
!=
0
)
{
fx
=
(
M_PI
*
kx
)
/
PMGRID
;
fx
=
sin
(
fx
)
/
fx
;
}
if
(
ky
!=
0
)
{
fy
=
(
M_PI
*
ky
)
/
PMGRID
;
fy
=
sin
(
fy
)
/
fy
;
}
if
(
kz
!=
0
)
{
fz
=
(
M_PI
*
kz
)
/
PMGRID
;
fz
=
sin
(
fz
)
/
fz
;
}
ff
=
1
/
(
fx
*
fy
*
fz
);
smth
*=
ff
*
ff
*
ff
*
ff
;
/* end deconvolution */
ip
=
PMGRID
*
(
PMGRID
/
2
+
1
)
*
(
y
-
slabstart_y
)
+
(
PMGRID
/
2
+
1
)
*
x
+
z
;
fft_of_rhogrid
[
ip
].
re
*=
smth
;
fft_of_rhogrid
[
ip
].
im
*=
smth
;
}
}
if
(
slabstart_y
==
0
)
fft_of_rhogrid
[
0
].
re
=
fft_of_rhogrid
[
0
].
im
=
0.0
;
/* Do the FFT to get the potential */
rfftwnd_mpi
(
fft_inverse_plan
,
1
,
rhogrid
,
workspace
,
FFTW_TRANSPOSED_ORDER
);
/* note: "rhogrid" now contains the potential */
dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
6
;
dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
6
;
dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
6
;
for
(
level
=
0
;
level
<
(
1
<<
PTask
);
level
++
)
/* note: for level=0, target is the same task */
{
sendTask
=
ThisTask
;
recvTask
=
ThisTask
^
level
;
if
(
recvTask
<
NTask
)
{
/* check how much we have to send */
sendmin
=
2
*
PMGRID
;
sendmax
=
-
PMGRID
;
for
(
slab_x
=
meshmin_list
[
3
*
recvTask
]
-
2
;
slab_x
<
meshmax_list
[
3
*
recvTask
]
+
4
;
slab_x
++
)
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
==
sendTask
)
{
if
(
slab_x
<
sendmin
)
sendmin
=
slab_x
;
if
(
slab_x
>
sendmax
)
sendmax
=
slab_x
;
}
if
(
sendmax
==
-
PMGRID
)
sendmin
=
sendmax
+
1
;
/* check how much we have to receive */
recvmin
=
2
*
PMGRID
;
recvmax
=
-
PMGRID
;
for
(
slab_x
=
meshmin
[
0
]
-
2
;
slab_x
<
meshmax
[
0
]
+
4
;
slab_x
++
)
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
==
recvTask
)
{
if
(
slab_x
<
recvmin
)
recvmin
=
slab_x
;
if
(
slab_x
>
recvmax
)
recvmax
=
slab_x
;
}
if
(
recvmax
==
-
PMGRID
)
recvmin
=
recvmax
+
1
;
if
((
recvmax
-
recvmin
)
>=
0
||
(
sendmax
-
sendmin
)
>=
0
)
/* ok, we have a contribution to the slab */
{
recv_dimx
=
meshmax_list
[
3
*
recvTask
+
0
]
-
meshmin_list
[
3
*
recvTask
+
0
]
+
6
;
recv_dimy
=
meshmax_list
[
3
*
recvTask
+
1
]
-
meshmin_list
[
3
*
recvTask
+
1
]
+
6
;
recv_dimz
=
meshmax_list
[
3
*
recvTask
+
2
]
-
meshmin_list
[
3
*
recvTask
+
2
]
+
6
;
ncont
=
1
;
cont_sendmin
[
0
]
=
sendmin
;
cont_sendmax
[
0
]
=
sendmax
;
cont_sendmin
[
1
]
=
sendmax
+
1
;
cont_sendmax
[
1
]
=
sendmax
;
cont_recvmin
[
0
]
=
recvmin
;
cont_recvmax
[
0
]
=
recvmax
;
cont_recvmin
[
1
]
=
recvmax
+
1
;
cont_recvmax
[
1
]
=
recvmax
;
for
(
slab_x
=
sendmin
;
slab_x
<=
sendmax
;
slab_x
++
)
{
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
ThisTask
)
{
/* non-contiguous */
cont_sendmax
[
0
]
=
slab_x
-
1
;
while
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
ThisTask
)
slab_x
++
;
cont_sendmin
[
1
]
=
slab_x
;
ncont
++
;
}
}
for
(
slab_x
=
recvmin
;
slab_x
<=
recvmax
;
slab_x
++
)
{
if
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
recvTask
)
{
/* non-contiguous */
cont_recvmax
[
0
]
=
slab_x
-
1
;
while
(
slab_to_task
[(
slab_x
+
PMGRID
)
%
PMGRID
]
!=
recvTask
)
slab_x
++
;
cont_recvmin
[
1
]
=
slab_x
;
if
(
ncont
==
1
)
ncont
++
;
}
}
for
(
rep
=
0
;
rep
<
ncont
;
rep
++
)
{
sendmin
=
cont_sendmin
[
rep
];
sendmax
=
cont_sendmax
[
rep
];
recvmin
=
cont_recvmin
[
rep
];
recvmax
=
cont_recvmax
[
rep
];
/* prepare what we want to send */
if
(
sendmax
-
sendmin
>=
0
)
{
for
(
slab_x
=
sendmin
;
slab_x
<=
sendmax
;
slab_x
++
)
{
slab_xx
=
((
slab_x
+
PMGRID
)
%
PMGRID
)
-
first_slab_of_task
[
ThisTask
];
for
(
slab_y
=
meshmin_list
[
3
*
recvTask
+
1
]
-
2
;
slab_y
<
meshmax_list
[
3
*
recvTask
+
1
]
+
4
;
slab_y
++
)
{
slab_yy
=
(
slab_y
+
PMGRID
)
%
PMGRID
;
for
(
slab_z
=
meshmin_list
[
3
*
recvTask
+
2
]
-
2
;
slab_z
<=
meshmax_list
[
3
*
recvTask
+
2
]
+
4
;
slab_z
++
)
{
slab_zz
=
(
slab_z
+
PMGRID
)
%
PMGRID
;
forcegrid
[((
slab_x
-
sendmin
)
*
recv_dimy
+
(
slab_y
-
(
meshmin_list
[
3
*
recvTask
+
1
]
-
2
)))
*
recv_dimz
+
slab_z
-
(
meshmin_list
[
3
*
recvTask
+
2
]
-
2
)]
=
rhogrid
[
PMGRID
*
PMGRID2
*
slab_xx
+
PMGRID2
*
slab_yy
+
slab_zz
];
}
}
}
}
if
(
level
>
0
)
{
MPI_Sendrecv
(
forcegrid
,
(
sendmax
-
sendmin
+
1
)
*
recv_dimy
*
recv_dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_D
,
workspace
+
(
recvmin
-
(
meshmin
[
0
]
-
2
))
*
dimy
*
dimz
,
(
recvmax
-
recvmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
),
MPI_BYTE
,
recvTask
,
TAG_PERIODIC_D
,
MPI_COMM_WORLD
,
&
status
);
}
else
{
memcpy
(
workspace
+
(
recvmin
-
(
meshmin
[
0
]
-
2
))
*
dimy
*
dimz
,
forcegrid
,
(
recvmax
-
recvmin
+
1
)
*
dimy
*
dimz
*
sizeof
(
fftw_real
));
}
}
}
}
}
dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
2
;
dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
2
;
dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
2
;
recv_dimx
=
meshmax
[
0
]
-
meshmin
[
0
]
+
6
;
recv_dimy
=
meshmax
[
1
]
-
meshmin
[
1
]
+
6
;
recv_dimz
=
meshmax
[
2
]
-
meshmin
[
2
]
+
6
;
for
(
x
=
0
;
x
<
meshmax
[
0
]
-
meshmin
[
0
]
+
2
;
x
++
)
for
(
y
=
0
;
y
<
meshmax
[
1
]
-
meshmin
[
1
]
+
2
;
y
++
)
for
(
z
=
0
;
z
<
meshmax
[
2
]
-
meshmin
[
2
]
+
2
;
z
++
)
{
forcegrid
[(
x
*
dimy
+
y
)
*
dimz
+
z
]
=
workspace
[((
x
+
2
)
*
recv_dimy
+
(
y
+
2
))
*
recv_dimz
+
(
z
+
2
)];
}
/* read out the potential */
for
(
i
=
0
;
i
<
NumPart
;
i
++
)
{
slab_x
=
to_slab_fac
*
P
[
i
].
Pos
[
0
];
if
(
slab_x
>=
PMGRID
)
slab_x
=
PMGRID
-
1
;
dx
=
to_slab_fac
*
P
[
i
].
Pos
[
0
]
-
slab_x
;
slab_x
-=
meshmin
[
0
];
slab_xx
=
slab_x
+
1
;
slab_y
=
to_slab_fac
*
P
[
i
].
Pos
[
1
];
if
(
slab_y
>=
PMGRID
)
slab_y
=
PMGRID
-
1
;
dy
=
to_slab_fac
*
P
[
i
].
Pos
[
1
]
-
slab_y
;
slab_y
-=
meshmin
[
1
];
slab_yy
=
slab_y
+
1
;
slab_z
=
to_slab_fac
*
P
[
i
].
Pos
[
2
];
if
(
slab_z
>=
PMGRID
)
slab_z
=
PMGRID
-
1
;
dz
=
to_slab_fac
*
P
[
i
].
Pos
[
2
]
-
slab_z
;
slab_z
-=
meshmin
[
2
];
slab_zz
=
slab_z
+
1
;
P
[
i
].
Potential
+=
forcegrid
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
P
[
i
].
Potential
+=
forcegrid
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
*
(
1.0
-
dx
)
*
dy
*
(
1.0
-
dz
);
P
[
i
].
Potential
+=
forcegrid
[(
slab_x
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
*
(
1.0
-
dx
)
*
(
1.0
-
dy
)
*
dz
;
P
[
i
].
Potential
+=
forcegrid
[(
slab_x
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
*
(
1.0
-
dx
)
*
dy
*
dz
;
P
[
i
].
Potential
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_z
]
*
(
dx
)
*
(
1.0
-
dy
)
*
(
1.0
-
dz
);
P
[
i
].
Potential
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_z
]
*
(
dx
)
*
dy
*
(
1.0
-
dz
);
P
[
i
].
Potential
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_y
)
*
dimz
+
slab_zz
]
*
(
dx
)
*
(
1.0
-
dy
)
*
dz
;
P
[
i
].
Potential
+=
forcegrid
[(
slab_xx
*
dimy
+
slab_yy
)
*
dimz
+
slab_zz
]
*
(
dx
)
*
dy
*
dz
;
}
pm_init_periodic_free
();
force_treeallocate
(
All
.
TreeAllocFactor
*
All
.
MaxPart
,
All
.
MaxPart
);
All
.
NumForcesSinceLastDomainDecomp
=
1
+
All
.
TotNumPart
*
All
.
TreeDomainUpdateFrequency
;
if
(
ThisTask
==
0
)
{
printf
(
"done PM-Potential.
\n
"
);
fflush
(
stdout
);
}
}
#endif
#endif
Event Timeline
Log In to Comment