Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F65779408
mpi_advanced.tex
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Jun 6, 04:16
Size
25 KB
Mime Type
text/x-tex
Expires
Sat, Jun 8, 04:16 (2 d)
Engine
blob
Format
Raw Data
Handle
18087680
Attached To
R11821 phys-743-lecture
mpi_advanced.tex
View Options
\renewcommand
{
\FIGREP
}{
src/mpi/figures
}
\section
{
Advanced MPI
}
\intersec
{
izar
}
\begin
{
frame
}
\frametitle
{
Advanced MPI
}
\framesubtitle
{
Goals of this section
}
\begin
{
itemize
}
\item
Overview of more advanced functionalities
\item
Persistent communications
\item
Advanced collective communications
\item
Describing your own datatype
\item
Redefining communicators
\item
Associating a topology to a communicator
\item
Parallel I/O
\item
One sided communications
\end
{
itemize
}
\end
{
frame
}
\subsection
{
Persistent point to point
}
\begin
{
frame
}
[fragile]
\frametitle
{
Persistent communications
}
\framesubtitle
{}
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Send
_
init
}
\cxxinline
{
MPI
_
Recv
_
init
}
, initialize the communication
\item
Same signature as non-blocking communications
\item
\cxxinline
{
MPI
_
Start
}
,
\cxxinline
{
MPI
_
Startall
}
to start the communication
\item
Completion is checked the same way as for non-blocking
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[exercise, fragile]
\frametitle
{
Persistent communications
}
\framesubtitle
{}
\begin
{
itemize
}
\item
Replace the non-blocking communication in the Poisson code by persistent ones
\end
{
itemize
}
\end
{
frame
}
\subsection
{
Advanced collective communications
}
\subsubsection
{
V versions
}
\begin
{
frame
}
[fragile]
\frametitle
{
Collective communications
}
\framesubtitle
{
V extension to
\cxxinline
{
MPI
\_
Gather
}}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Gatherv(const void *sendbuf, int sendcount, MPI
_
Datatype sendtype,
void *recvbuf, const int recvcounts[], const int displs[],
MPI
_
Datatype recvtype, int root, MPI
_
Comm comm);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
\cxxinline
{
recvcounts
}
is now an array, one entry per rank
\item
\cxxinline
{
displs
}
array of displacements defining where to place the
$
i^{
\mathrm
{th}}
$
receive data
\item
receive different sizes per process
\item
receive in an array with strides
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Collective communications
}
\framesubtitle
{
Gatherv semantic
}
\begin
{
cxxcode
}{
Semantic equivalent
}
// Every process
MPI
_
Send(sendbuf, sendcount, sendtype, root, /*...*/);
// On root process
for(i = 0; i < nb
_
process; ++i)
MPI
_
Recv(recvbuf+displs[j] * extent(recvtype), recvcounts[j], recvtype, i,
/*...*/);
\end
{
cxxcode
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Collective communications
}
\framesubtitle
{
V extension to
\cxxinline
{
MPI
\_
Scatter
}}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Scatterv(const void *sendbuf, const int sendcounts[],
const int displs[], MPI
_
Datatype sendtype, void *recvbuf,
int recvcount, MPI
_
Datatype recvtype, int root, MPI
_
Comm comm);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
\cxxinline
{
sendcounts
}
is now an array, one entry per rank
\item
\cxxinline
{
displs
}
array of displacements defining where to place the
$
i^{
\mathrm
{th}}
$
receive data
\item
receive different sizes
\item
receive in an array with strides
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Collective communications
}
\framesubtitle
{
Scatterv semantic
}
\begin
{
cxxcode
}{
Semantic equivalent
}
// On root process
for(i = 0; i < nb
_
process; ++i)
MPI
_
Send(sendbuf+displs[i]*extent(sendtype), sendcounts[i], sendtype, i,
/*...*/)
// Every process
MPI
_
Recv(recvbuf, recvcount, recvtype, i, /*...*/).
\end
{
cxxcode
}
\end
{
frame
}
\subsubsection
{
Non-blocking collective communications
}
\begin
{
frame
}
[fragile]
\frametitle
{
Non-blocking collective communications
}
\framesubtitle
{}
\begin
{
itemize
}
\item
\code
{
I
}
variant of collective communications
\item
extra parameter
\cxxinline
{
request
}
\item
\cxxinline
{
MPI
_
Ibarrier
}
,
\cxxinline
{
MPI
_
Ibcast
}
\item
\cxxinline
{
MPI
_
Igather
}
,
\cxxinline
{
MPI
_
Igatherv
}
,
\cxxinline
{
MPI
_
Iscatter
}
,
\cxxinline
{
MPI
_
Iscatterv
}
\item
\cxxinline
{
MPI
_
Iallgather
}
,
\cxxinline
{
MPI
_
Iallgatherv
}
,
\cxxinline
{
MPI
_
Ialltoall
}
\item
\cxxinline
{
MPI
_
Ireduce
}
,
\cxxinline
{
MPI
_
Iallreduce
}
,
\cxxinline
{
MPI
_
Iscan
}
,
\cxxinline
{
MPI
_
Iexscan
}
\end
{
itemize
}
\end
{
frame
}
\subsubsection
{
Persistent collective communications
}
\begin
{
frame
}
[fragile]
\frametitle
{
Persistent collective communications
}
\framesubtitle
{}
\begin
{
itemize
}
\item
\code
{
\_
init
}
variant of collective communications
\item
extra parameter
\cxxinline
{
request
}
\item
\cxxinline
{
MPI
_
Barrier
_
init
}
,
\cxxinline
{
MPI
_
Bcast
_
init
}
\item
\cxxinline
{
MPI
_
Gather
_
init
}
,
\cxxinline
{
MPI
_
Gatherv
_
init
}
,
\cxxinline
{
MPI
_
Scatter
_
init
}
,
\cxxinline
{
MPI
_
Scatterv
_
init
}
\item
\cxxinline
{
MPI
_
Allgather
_
init
}
,
\cxxinline
{
MPI
_
Allgatherv
_
init
}
,
\cxxinline
{
MPI
_
Alltoall
_
init
}
\item
\cxxinline
{
MPI
_
Reduce
_
init
}
,
\cxxinline
{
MPI
_
Allreduce
_
init
}
,
\cxxinline
{
MPI
_
Scan
_
init
}
,
\cxxinline
{
MPI
_
Exscan
_
init
}
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[exercise, fragile]
\frametitle
{
Persistent collective
}
\framesubtitle
{}
\begin
{
itemize
}
\item
Replace the the
\cxxinline
{
MPI
_
Allreduce
}
by a persistent one
\end
{
itemize
}
\end
{
frame
}
\subsection
{
Derived Datatypes
}
\begin
{
frame
}
[fragile]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Definition of a datatypes
}
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Datatype
}
opaque type containing a
\emph
{
Typemap
}
\begin
{
itemize
}
\item
$
Typemap
=
\{
(
type_{
0
},disp_{
0
}
)
,
\dotsb
,
(
type_{n
-
1
},disp_{n
-
1
}
)
\}
$
\item
sequence of basic datatypes
\item
sequence of displacements (in bytes)
\end
{
itemize
}
\item
\code
{
extent
}
is the span from the first byte to the last one, with alignment requirement
\begin
{
align*
}
lb(Typemap)
&
=
\underset
{
j
}{
min
}
(disp
_{
j
}
),
\\
ub(Typemap)
&
=
\underset
{
j
}{
max
}
(disp
_{
j
}
+
\mathrm
{
sizeof
}
(type
_{
j
}
)) +
\epsilon
, and
\\
extent(Typemap)
&
= ub(Typemap) - lb(Typemap)
\end
{
align*
}
$
\epsilon
$
is there to account for alignment requirements
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Base datatypes
}
\begin
{
minipage
}{
.45
\linewidth
}
\small
\begin
{
tabular
}{
ll
}
\toprule
MPI datatype
&
C datatype
\\
\midrule
\cxxinline
{
MPI
_
CHAR
}
&
\cxxinline
{
char
}
\\
\cxxinline
{
MPI
_
SHORT
}
&
\cxxinline
{
signed short int
}
\\
\cxxinline
{
MPI
_
INT
}
&
\cxxinline
{
signed int
}
\\
\cxxinline
{
MPI
_
LONG
}
&
\cxxinline
{
signed long int
}
\\
\cxxinline
{
MPI
_
LONG
_
LONG
_
INT
}
&
\cxxinline
{
signed long long int
}
\\
\cxxinline
{
MPI
_
LONG
_
LONG
}
&
\cxxinline
{
signed long long int
}
\\
\cxxinline
{
MPI
_
SIGNED
_
CHAR
}
&
\cxxinline
{
signed char
}
\\
\cxxinline
{
MPI
_
UNSIGNED
_
CHAR
}
&
\cxxinline
{
unsigned char
}
\\
\cxxinline
{
MPI
_
UNSIGNED
_
SHORT
}
&
\cxxinline
{
unsigned short int
}
\\
\cxxinline
{
MPI
_
UNSIGNED
}
&
\cxxinline
{
unsigned int
}
\\
\cxxinline
{
MPI
_
UNSIGNED
_
LONG
}
&
\cxxinline
{
unsigned long int
}
\\
\cxxinline
{
MPI
_
UNSIGNED
_
LONG
_
LONG
}
&
\cxxinline
{
unsigned long long int
}
\\
\bottomrule
\end
{
tabular
}
\end
{
minipage
}
\hspace
{
1cm
}
\begin
{
minipage
}{
.45
\linewidth
}
\small
\begin
{
tabular
}{
ll
}
\toprule
MPI datatype
&
C datatype
\\
\midrule
\cxxinline
{
MPI
_
FLOAT
}
&
\cxxinline
{
float
}
\\
\cxxinline
{
MPI
_
DOUBLE
}
&
\cxxinline
{
double
}
\\
\cxxinline
{
MPI
_
LONG
_
DOUBLE
}
&
\cxxinline
{
long double
}
\\
\cxxinline
{
MPI
_
WCHAR
}
&
\cxxinline
{
wchar
_
t
}
\\
\cxxinline
{
MPI
_
C
_
BOOL
}
&
\cxxinline
{_
Bool
}
\\
\cxxinline
{
MPI
_
INT8
_
T
}
&
\cxxinline
{
int8
_
t
}
\\
\cxxinline
{
MPI
_
INT16
_
T
}
&
\cxxinline
{
int16
_
t
}
\\
\cxxinline
{
MPI
_
INT32
_
T
}
&
\cxxinline
{
int32
_
t
}
\\
\cxxinline
{
MPI
_
INT64
_
T
}
&
\cxxinline
{
int64
_
t
}
\\
\cxxinline
{
MPI
_
UINT8
_
T
}
&
\cxxinline
{
uint8
_
t
}
\\
\cxxinline
{
MPI
_
UINT16
_
T
}
&
\cxxinline
{
uint16
_
t
}
\\
\cxxinline
{
MPI
_
UINT32
_
T
}
&
\cxxinline
{
uint32
_
t
}
\\
\cxxinline
{
MPI
_
UINT64
_
T
}
&
\cxxinline
{
uint64
_
t
}
\\
\bottomrule
\end
{
tabular
}
\end
{
minipage
}
\end
{
frame
}
\begin
{
frame
}
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Base datatypes
}
\begin
{
minipage
}{
.45
\linewidth
}
\small
\begin
{
tabular
}{
ll
}
\toprule
MPI datatype
&
C++ datatype
\\
\midrule
\cxxinline
{
MPI
_
CXX
_
BOOL
}
&
\cxxinline
{
bool
}
\\
\cxxinline
{
MPI
_
CXX
_
FLOAT
_
COMPLEX
}
&
\cxxinline
{
std::complex<float>
}
\\
\cxxinline
{
MPI
_
CXX
_
DOUBLE
_
COMPLEX
}
&
\cxxinline
{
std::complex<double>
}
\\
\cxxinline
{
MPI
_
CXX
_
LONG
_
DOUBLE
_
COMPLEX
}
&
\cxxinline
{
std::complex<long double>
}
\\
\bottomrule
\end
{
tabular
}
\end
{
minipage
}
\hspace
{
1.8cm
}
\begin
{
minipage
}{
.3
\linewidth
}
\small
\begin
{
tabular
}{
ll
}
\toprule
MPI datatype
&
C datatype
\\
\midrule
\cxxinline
{
MPI
_
AINT
}
&
\cxxinline
{
MPI
_
Aint
}
\\
\cxxinline
{
MPI
_
OFFSET
}
&
\cxxinline
{
MPI
_
Offset
}
\\
\cxxinline
{
MPI
_
COUNT
}
&
\cxxinline
{
MPI
_
Count
}
\\
\cxxinline
{
MPI
_
BYTE
}
&
\\
\cxxinline
{
MPI
_
PACKED
}
&
\\
\bottomrule
\end
{
tabular
}
\end
{
minipage
}
\end
{
frame
}
\note
{
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
CHAR
}
is a printable character where
\cxxinline
{
MPI
_
BYTE
}
is a type of exactly 8bit not printable as a character
\item
\cxxinline
{
MPI
_
PACKED
}
for pack/unpacked
\end
{
itemize
}
}
\begin
{
frame
}
[fragile]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Arrays
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Type
_
contiguous(int count, MPI
_
Datatype oldtype,
MPI
_
Datatype *newtype);
int MPI
_
Type
_
vector(int count, int blocklength, int stride,
MPI
_
Datatype oldtype, MPI
_
Datatype *newtype);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
array of contiguous array or with strided blocks of same type
\item
\cxxinline
{
count
}
: number of repetition (blocks)
\item
\cxxinline
{
blocklength
}
: number of element per block
\item
\cxxinline
{
stride
}
: number of element between start of each block
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Array variants
}
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Type
_
create
_
hvector
}
: same as
\cxxinline
{
MPI
_
Type
_
vector
}
with
\cxxinline
{
stride
}
expressed in bytes
\item
\cxxinline
{
MPI
_
Type
_
create
_
indexed
_
block
}
same as
\cxxinline
{
MPI
_
Type
_
vector
}
with array of and
\cxxinline
{
displacements
}
\item
\cxxinline
{
MPI
_
Type
_
create
_
hindexed
_
block
}
: same as
\cxxinline
{
MPI
_
Type
_
create
_
indexed
_
block
}
with
\cxxinline
{
displacements
}
in bytes
\item
\cxxinline
{
MPI
_
Type
_
indexed
}
: same as
\cxxinline
{
MPI
_
Type
_
create
_
indexed
_
block
}
with arrays of
\cxxinline
{
blocklengths
}
\item
\cxxinline
{
MPI
_
Type
_
create
_
hindexed
}
: same as
\cxxinline
{
MPI
_
Type
_
indexed
}
with
\cxxinline
{
displacements
}
in bytes
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Structures
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Type
_
create
_
struct(int count, const int array
_
of
_
blocklengths[],
const MPI
_
Aint array
_
of
_
displacements[],
const MPI
_
Datatype array
_
of
_
types[], MPI
_
Datatype *newtype)
\end
{
cxxcode
}
\begin
{
itemize
}
\item
\cxxinline
{
count
}
: number of repetition (blocks)
\item
\cxxinline
{
array
_
of
_
blocklengths
}
: sizes per block
\item
\cxxinline
{
array
_
of
_
displacements
}
: displacements between blocks in bytes
\item
\cxxinline
{
array
_
of
_
types
}
: types contained in each blocks
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Usefull helper functions
}
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Get
_
address
}
: get the address of a variable
\item
\cxxinline
{
MPI
_
Aint
_
diff
}
: get the difference between 2 addresses
\item
\cxxinline
{
MPI
_
Aint
_
add
}
: get the sum of 2 addresses
\item
\cxxinline
{
MPI
_
Type
_
size
}
: get the size of a datatype
\item
\cxxinline
{
MPI
_
Get
_
type
_
extent
}
: get the lower bound and the extent of a type
\item
\cxxinline
{
MPI
_
Type
_
create
_
resized
}
: reset the lower bound and the extent of a type
\end
{
itemize
}
\end
{
frame
}
\note
{
\begin
{
itemize
}
\item
Prefer
\cxxinline
{
MPI
_
Get
_
address
}
over
\&
\item
extent could be badly set then not possible to communicate multiple
objects of same datatype
\end
{
itemize
}
}
\begin
{
frame
}
[fragile]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Commit/free
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Type
_
commit(MPI
_
Datatype *datatype);
int MPI
_
Type
_
free(MPI
_
Datatype *datatype);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
new datatypes should be committed before being usable in communications
\item
committed types need to be freed once not used anymore
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Example
}
\cxxfile
[title=
{
mpi/datatypes.cc
}
,
minted options app=
{
firstline=13,
lastline=41,
fontsize=
\tiny
}
]
{
examples/mpi/datatypes.cc
}
\end
{
frame
}
\begin
{
frame
}
[fragile, exercise]
\frametitle
{
Derived Datatypes
}
\framesubtitle
{
Send lines in poisson code
}
\begin
{
itemize
}
\item
Create a
\cxxinline
{
MPI
_
Datatype line
_
t
}
representing a line of data
\item
Exchange data of type
\cxxinline
{
line
_
t
}
instead of
\cxxinline
{
MPI
_
FLOAT
}
\end
{
itemize
}
\end
{
frame
}
\subsection
{
Pack/Unpack
}
\begin
{
frame
}
[fragile]
\frametitle
{
Pack/Unpack
}
\framesubtitle
{
Pack
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Pack(const void *inbuf, int incount, MPI
_
Datatype datatype,
void *outbuf, int outsize, int *position, MPI
_
Comm comm);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
\cxxinline
{
inbuf
}
,
\cxxinline
{
incount
}
,
\cxxinline
{
datatype
}
correspond to the description of data to pack
\item
\cxxinline
{
outbuf
}
,
\cxxinline
{
outsize
}
description of the buffer where to pack
\item
\cxxinline
{
position
}
current position in the packing buffer
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Pack/Unpack
}
\framesubtitle
{
Unpack
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Unpack(const void *inbuf, int insize, int *position, void *outbuf,
int outcount, MPI
_
Datatype datatype, MPI
_
Comm comm);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
\cxxinline
{
inbuf
}
,
\cxxinline
{
incount
}
, description of the buffer from which to unpack
\item
\cxxinline
{
position
}
current position in the unpacking buffer
\item
\cxxinline
{
outbuf
}
,
\cxxinline
{
outsize
}
, and
\cxxinline
{
datatype
}
correspond to the description of data to unpack
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Pack/Unpack
}
\framesubtitle
{
Example
}
\cxxfile
[title=
{
mpi/pack
\_
unpack.cc
}
,
minted options app=
{
firstline=26,
lastline=39
}
]
{
examples/mpi/pack
_
unpack.cc
}
\end
{
frame
}
\subsection
{
Groups and Communicator
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Groups and Communicators
}
\begin
{
itemize
}
\item
a
\code
{
communicator
}
:
\begin
{
itemize
}
\item
Encapsulate a
\code
{
context
}
, a
\code
{
group
}
, a
\code
{
virtual topology
}
and
\code
{
attributes
}
\item
Two kinds
\code
{
intra-communicator
}
and
\code
{
inter-communicator
}
\end
{
itemize
}
\item
a
\code
{
group
}
:
\begin
{
itemize
}
\item
ordered set of processes
\item
each process has an unique ID (rank within the group) and can belong to several different groups
\item
a group can be used to create a new communicator
\end
{
itemize
}
\end
{
itemize
}
\end
{
frame
}
\note
{
\begin
{
itemize
}
\item
\code
{
intra
}
communications inside a group
\item
\code
{
inter
}
communications between groups
\end
{
itemize
}
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Groups and Communicators
}
\framesubtitle
{
Creating new communicators
}
\begin
{
itemize
}
\item
duplicating or splitting an existing one
\cxxinline
{
MPI
_
Comm
_
dup
}
,
\cxxinline
{
MPI
_
Comm
_
split
}
\item
creating communicator from a group
\cxxinline
{
MPI
_
Comm
_
create
}
,
\cxxinline
{
MPI
_
Comm
_
create
_
group
}
\item
need to create groups
\begin
{
itemize
}
\item
from a communicator
\cxxinline
{
MPI
_
Comm
_
group
}
\item
boolean operations
\cxxinline
{
MPI
_
Group
_
union
}
,
\cxxinline
{
MPI
_
Group
_
intersection
}
,
\cxxinline
{
MPI
_
Group
_
difference
}
\item
specifying ranks
\cxxinline
{
MPI
_
Group
_
incl
}
,
\cxxinline
{
MPI
_
Group
_
excl
}
\end
{
itemize
}
\item
destroy created objects
\cxxinline
{
MPI
_
Comm
_
free
}
,
\cxxinline
{
MPI
_
Group
_
free
}
\end
{
itemize
}
\end
{
frame
}
\subsection
{
Virutal Topologies
}
\begin
{
frame
}
\frametitle
{
Virtual Topologies
}
\framesubtitle
{}
\begin
{
itemize
}
\item
potential performance gain by mapping process to hardware
\item
helps for program readability
\item
types of topologies: Cartesian, Graph, Distributed Graph
\item
collective communication on neighborhoods
\end
{
itemize
}
\end
{
frame
}
\note
{
Details only on the cartesian on
}
\begin
{
frame
}
[fragile]
\frametitle
{
Virtual Topologies
}
\framesubtitle
{
Cartesian topology
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
Cart
_
create(MPI
_
Comm comm
_
old, int ndims, const int dims[],
const int periods[], int reorder, MPI
_
Comm *comm
_
cart);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
create a communicator with cartesian information
\item
convenient functions:
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Dims
_
create
}
helps creating balanced distribution of process
\item
\cxxinline
{
MPI
_
Cart
_
shift
}
helps determining neighboors
\item
\cxxinline
{
MPI
_
Cart
_
rank
}
get the rank based on coordinates
\item
\cxxinline
{
MPI
_
Cart
_
coords
}
get coordinates based on rank
\end
{
itemize
}
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Virtual topology
}
\framesubtitle
{
Neighborhoods collective
}
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Neighbor
_
allgather
}
assuming we are on process with
rank
$
i
$
, gather data from all rank
$
j
$
if edge
$
(
j, i
)
$
exists and
send same data to all
$
j
$
where edge
$
(
i,j
)
$
exists
\item
\cxxinline
{
MPI
_
Neighbor
_
alltoall
}
compare to allgather, sends
different data to all
$
j
$
process
\item
vector variant are available
\code
{
v
}
\item
immediate variant are available
\code
{
I
}
\item
persistent variant are available
\code
{
\_
init
}
\item
\cxxinline
{
MPI
_
Neighbor
_
alltoall
}
as one in all flavors the
\code
{
w
}
,
different datatypes are echanged with all neighbors
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[exercise, fragile]
\frametitle
{
Virtual topology
}
\framesubtitle
{}
\begin
{
itemize
}
\item
Rewrite the parallelism using a cartesian communicator
\item
Use neighbor collective communications
\end
{
itemize
}
\end
{
frame
}
\subsection
{
Parallel I/O
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Parallel I/O overview
}
\begin
{
itemize
}
\item
I/O is often (if not always) the main bottleneck in a parallel application
\item
MPI provides a mechanism to read/write in parallel
\end
{
itemize
}
\begin
{
center
}
\input
{
src/mpi/figures/parallelFS.tex
}
\end
{
center
}
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Introducing remarks
}
\begin
{
itemize
}
\item
MPI IO API works on your desktop/laptop
\item
Most of the large HPC systems have a
\textbf
{
parallel file system
}
(like GPFS, Lustre,
\emph
{
etc
}
.)
\item
If the file is distributed smartly on a parallel file system:
performance increases
\item
MPI IO offers a high-level API to access a distributed file (no needs
to implement complex POSIX calls)
\item
\textbf
{
does not work with ASCII files
}
\item
Most of the standard file format support MPI IO (
\emph
{
e.g
}
. HDF5,
NetCDF,
\emph
{
etc
}
..)
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Poisson so far
}
\begin
{
center
}
\input
{
src/mpi/figures/sofar.tex
}
\end
{
center
}
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Poisson ideal
}
\begin
{
center
}
\input
{
src/mpi/figures/sogoal.tex
}
\end
{
center
}
\end
{
frame
}
\begin
{
frame
}
[fragile]
\frametitle
{
Open/Close a file in parallel
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
File
_
open(MPI
_
Comm comm, const char *filename, int amode,
MPI
_
Info info, MPI
_
File *fh);
int MPI
_
File
_
close(MPI
_
File *fh);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
\cxxinline
{
comm
}
: the communicator that contains the writing/reading MPI processes
\item
\cxxinline
{
filename
}
: a file name
\item
\cxxinline
{
amode
}
: file access mode,
\cxxinline
{
MPI
_
MODE
_
RDONLY
}
,
\cxxinline
{
MPI
_
MODE
_
WRONLY
}
,
\cxxinline
{
MPI
_
MODE
_
RDWR
}
,
\cxxinline
{
MPI
_
MODE
_
CREATE
}
,
\emph
{
e.t.c.
}
\item
\cxxinline
{
info
}
: file info object (
\cxxinline
{
MPI
_
INFO
_
NULL
}
is a valid info)
\item
\cxxinline
{
fh
}
: file handle
\end
{
itemize
}
\textbf
{
Collective calls !!
}
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Parallel IO
}
\framesubtitle
{
Terminology
}
\begin
{
itemize
}
\item
\code
{
etype
}
is the elementary type of the data of the parallel accessed file
\item
\code
{
offset
}
is a position in the file in term of multiple of etypes
\item
\code
{
displacement
}
of a position within the file is the number of bytes from the beginning of the file
\end
{
itemize
}
\begin
{
center
}
\includegraphics
{
src/mpi/figures/offset
}
%\input{day3/images/offset.tex}
\end
{
center
}
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Parallel IO
}
\framesubtitle
{
Simple independent read/write
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
File
_
read
_
at(MPI
_
File fh, MPI
_
Offset offset, void *buf, int count,
MPI
_
Datatype datatype, MPI
_
Status *status);
int MPI
_
File
_
write
_
at(MPI
_
File fh, MPI
_
Offset offset, const void *buf,
int count, MPI
_
Datatype datatype, MPI
_
Status *status);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
Can be used from a single (or group) of processes
\item
\cxxinline
{
offset
}
must be specified in the
\cxxinline
{
buf
}
buffer
\item
\cxxinline
{
count
}
elements of type
\cxxinline
{
datatype
}
are written
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Parallel IO
}
\framesubtitle
{
\code
{
view
}
by each process
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
File
_
set
_
view(MPI
_
File fh, MPI
_
Offset disp, MPI
_
Datatype etype,
MPI
_
Datatype filetype, const char *datarep, MPI
_
Info info);
int MPI
_
File
_
get
_
view(MPI
_
File fh, MPI
_
Offset *disp, MPI
_
Datatype *etype,
MPI
_
Datatype *filetype, char *datarep);
\end
{
cxxcode
}
\begin
{
itemize
}
\item
initially, each process view the file as a linear byte stream and each
process views data in its own native representation
\item
\cxxinline
{
disp
}
is the displacement (defines the beginning of the
data of the file that belongs to the process) in byte
\item
\cxxinline
{
etype
}
is the unit of data access and positioning
\item
\cxxinline
{
filetype
}
is a single
\cxxinline
{
etype
}
of a multiple of it
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[b,containsverbatim]
\frametitle
{
Parallel IO
}
\frametitle
{
Setting up a
\code
{
view
}}
\begin
{
center
}
\addimage
[width=12cm]
{
\FIGREP
/displacements
}{
2cm
}{
2cm
}
\end
{
center
}
(source : MPI 2.2 specifications)
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Parallel IO
}
\framesubtitle
{
Simple independent read/write without offset
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
File
_
read(MPI
_
File fh, void *buf, int count, MPI
_
Datatype datatype,
MPI
_
Status *status);
int MPI
_
File
_
write(MPI
_
File fh, const void *buf, int count,
MPI
_
Datatype datatype, MPI
_
Status *status);
\end
{
cxxcode
}
\end
{
frame
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
Parallel IO
}
\framesubtitle
{
Collective read/write with/without offset
}
\begin
{
cxxcode
}{
Syntax
}
int MPI
_
File
_
write
_
all(MPI
_
File fh, const void *buf, int count,
MPI
_
Datatype datatype, MPI
_
Status *status);
int MPI
_
File
_
read
_
all(MPI
_
File fh, void *buf, int count,
MPI
_
Datatype datatype, MPI
_
Status *status);
\end
{
cxxcode
}
\end
{
frame
}
\subsection
{
One Sided
}
\begin
{
frame
}
[containsverbatim]
\frametitle
{
What we did not view
}
\begin
{
itemize
}
\item
One Sided communications
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Put
}
,
\cxxinline
{
MPI
_
Get
}
\item
\cxxinline
{
MPI
_
Win
_
*
}
\item
shared memory
\end
{
itemize
}
\item
Process management
\begin
{
itemize
}
\item
\cxxinline
{
MPI
_
Comm
_
spawn
}
\item
Communications on inter-communicators
\end
{
itemize
}
\end
{
itemize
}
\end
{
frame
}
\begin
{
frame
}
[fragile,t]
\frametitle
{
Parallelization of the poisson code
}
\begin
{
minipage
}{
.45
\linewidth
}
\centering
\begin
{
overprint
}
\only
<1>
{
\includegraphics
[width=.8\linewidth]
{
\FIGREP
/grid
_
2D
_
1
}}
\only
<2>
{
\includegraphics
[width=.8\linewidth]
{
\FIGREP
/grid
_
2D
_
2
}}
\only
<3>
{
\includegraphics
[width=.8\linewidth]
{
\FIGREP
/grid
_
2D
_
3
}}
\only
<4->
{
\includegraphics
[width=.8\linewidth]
{
\FIGREP
/grid
_
2D
_
4
}}
\end
{
overprint
}
\end
{
minipage
}
\begin
{
minipage
}{
.45
\linewidth
}
\begin
{
overprint
}
\onslide
<1>
\begin
{
itemize
}
\item
Parallelize the Poisson 2D problem using the Messages Passing
Interface (MPI)
\end
{
itemize
}
\onslide
<2>
\begin
{
itemize
}
\item
This time, we want to make a 2D domain decomposition using
Cartesian topology
\item
Use
\code
{
MPI
\_
Dims
\_
create
}
and
\code
{
MPI
\_
Cart
\_
create
}
to create a Cratesian topology
\end
{
itemize
}
\onslide
<3>
\begin
{
itemize
}
\item
The
$
p
$
processes are split into
$
(
p_{x}, p_{y}
)
$
to make the
Cartesian grid
\item
Each domain has size
$
(
N
/
p_{x}, N
/
p_{y}
)
$
(1 per process)
\item
Use
\code
{
MPI
\_
Cart
\_
shift
}
to find the neighboring domains
\end
{
itemize
}
\onslide
<4>
\begin
{
itemize
}
\item
Adding
\emph
{
ghost
}
lines before and after
\item
Use the
\emph
{
ghost
}
lines to receive the missing local data
\item
You will need to define a new
\textit
{
matrix column
}
datatype and update the
\textit
{
matrix line
}
datatype
\end
{
itemize
}
\onslide
<5>
\begin
{
itemize
}
\item
Use the
\code
{
MPI
\_
neighbor
\_
alltoallw
}
routine
\item
You can use the number of iteration as a check
\item
Remove the
\cxxinline
{
dump()
}
function to start
\end
{
itemize
}
\end
{
overprint
}
\end
{
minipage
}
\end
{
frame
}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "../../phys_743_parallel_programming"
%%% End:
Event Timeline
Log In to Comment