Page MenuHomec4science

mpi_advanced.tex
No OneTemporary

File Metadata

Created
Thu, Jun 6, 04:16

mpi_advanced.tex

\renewcommand{\FIGREP}{src/mpi/figures}
\section{Advanced MPI}
\intersec{izar}
\begin{frame}
\frametitle{Advanced MPI}
\framesubtitle{Goals of this section}
\begin{itemize}
\item Overview of more advanced functionalities
\item Persistent communications
\item Advanced collective communications
\item Describing your own datatype
\item Redefining communicators
\item Associating a topology to a communicator
\item Parallel I/O
\item One sided communications
\end{itemize}
\end{frame}
\subsection{Persistent point to point}
\begin{frame}[fragile]
\frametitle{Persistent communications}
\framesubtitle{}
\begin{itemize}
\item \cxxinline{MPI_Send_init} \cxxinline{MPI_Recv_init}, initialize the communication
\item Same signature as non-blocking communications
\item \cxxinline{MPI_Start}, \cxxinline{MPI_Startall} to start the communication
\item Completion is checked the same way as for non-blocking
\end{itemize}
\end{frame}
\begin{frame}[exercise, fragile]
\frametitle{Persistent communications}
\framesubtitle{}
\begin{itemize}
\item Replace the non-blocking communication in the Poisson code by persistent ones
\end{itemize}
\end{frame}
\subsection{Advanced collective communications}
\subsubsection{V versions}
\begin{frame}[fragile]
\frametitle{Collective communications}
\framesubtitle{V extension to \cxxinline{MPI\_Gather}}
\begin{cxxcode}{Syntax}
int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, const int recvcounts[], const int displs[],
MPI_Datatype recvtype, int root, MPI_Comm comm);
\end{cxxcode}
\begin{itemize}
\item \cxxinline{recvcounts} is now an array, one entry per rank
\item \cxxinline{displs} array of displacements defining where to place the
$i^{\mathrm{th}}$ receive data
\item receive different sizes per process
\item receive in an array with strides
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Collective communications}
\framesubtitle{Gatherv semantic}
\begin{cxxcode}{Semantic equivalent}
// Every process
MPI_Send(sendbuf, sendcount, sendtype, root, /*...*/);
// On root process
for(i = 0; i < nb_process; ++i)
MPI_Recv(recvbuf+displs[j] * extent(recvtype), recvcounts[j], recvtype, i,
/*...*/);
\end{cxxcode}
\end{frame}
\begin{frame}[fragile]
\frametitle{Collective communications}
\framesubtitle{V extension to \cxxinline{MPI\_Scatter}}
\begin{cxxcode}{Syntax}
int MPI_Scatterv(const void *sendbuf, const int sendcounts[],
const int displs[], MPI_Datatype sendtype, void *recvbuf,
int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
\end{cxxcode}
\begin{itemize}
\item \cxxinline{sendcounts} is now an array, one entry per rank
\item \cxxinline{displs} array of displacements defining where to place the
$i^{\mathrm{th}}$ receive data
\item receive different sizes
\item receive in an array with strides
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Collective communications}
\framesubtitle{Scatterv semantic}
\begin{cxxcode}{Semantic equivalent}
// On root process
for(i = 0; i < nb_process; ++i)
MPI_Send(sendbuf+displs[i]*extent(sendtype), sendcounts[i], sendtype, i,
/*...*/)
// Every process
MPI_Recv(recvbuf, recvcount, recvtype, i, /*...*/).
\end{cxxcode}
\end{frame}
\subsubsection{Non-blocking collective communications}
\begin{frame}[fragile]
\frametitle{Non-blocking collective communications}
\framesubtitle{}
\begin{itemize}
\item \code{I} variant of collective communications
\item extra parameter \cxxinline{request}
\item \cxxinline{MPI_Ibarrier}, \cxxinline{MPI_Ibcast}
\item \cxxinline{MPI_Igather}, \cxxinline{MPI_Igatherv},
\cxxinline{MPI_Iscatter}, \cxxinline{MPI_Iscatterv}
\item \cxxinline{MPI_Iallgather}, \cxxinline{MPI_Iallgatherv},
\cxxinline{MPI_Ialltoall}
\item \cxxinline{MPI_Ireduce}, \cxxinline{MPI_Iallreduce},
\cxxinline{MPI_Iscan}, \cxxinline{MPI_Iexscan}
\end{itemize}
\end{frame}
\subsubsection{Persistent collective communications}
\begin{frame}[fragile]
\frametitle{Persistent collective communications}
\framesubtitle{}
\begin{itemize}
\item \code{\_init} variant of collective communications
\item extra parameter \cxxinline{request}
\item \cxxinline{MPI_Barrier_init}, \cxxinline{MPI_Bcast_init}
\item \cxxinline{MPI_Gather_init}, \cxxinline{MPI_Gatherv_init},
\cxxinline{MPI_Scatter_init}, \cxxinline{MPI_Scatterv_init}
\item \cxxinline{MPI_Allgather_init}, \cxxinline{MPI_Allgatherv_init},
\cxxinline{MPI_Alltoall_init}
\item \cxxinline{MPI_Reduce_init}, \cxxinline{MPI_Allreduce_init},
\cxxinline{MPI_Scan_init}, \cxxinline{MPI_Exscan_init}
\end{itemize}
\end{frame}
\begin{frame}[exercise, fragile]
\frametitle{Persistent collective}
\framesubtitle{}
\begin{itemize}
\item Replace the the \cxxinline{MPI_Allreduce} by a persistent one
\end{itemize}
\end{frame}
\subsection{Derived Datatypes}
\begin{frame}[fragile]
\frametitle{Derived Datatypes}
\framesubtitle{Definition of a datatypes}
\begin{itemize}
\item \cxxinline{MPI_Datatype} opaque type containing a \emph{Typemap}
\begin{itemize}
\item $Typemap = \{(type_{0},disp_{0}), \dotsb, (type_{n - 1},disp_{n - 1})\}$
\item sequence of basic datatypes
\item sequence of displacements (in bytes)
\end{itemize}
\item \code{extent} is the span from the first byte to the last one, with alignment requirement
\begin{align*}
lb(Typemap) &= \underset{j}{min}(disp_{j}),\\
ub(Typemap) &= \underset{j}{max}(disp_{j} + \mathrm{sizeof}(type_{j})) + \epsilon, and\\
extent(Typemap) &= ub(Typemap) - lb(Typemap)
\end{align*}
$\epsilon$ is there to account for alignment requirements
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Derived Datatypes}
\framesubtitle{Base datatypes}
\begin{minipage}{.45\linewidth}
\small
\begin{tabular}{ll}
\toprule
MPI datatype & C datatype\\
\midrule
\cxxinline{MPI_CHAR} & \cxxinline{char} \\
\cxxinline{MPI_SHORT} & \cxxinline{signed short int} \\
\cxxinline{MPI_INT} & \cxxinline{signed int} \\
\cxxinline{MPI_LONG} & \cxxinline{signed long int} \\
\cxxinline{MPI_LONG_LONG_INT} & \cxxinline{signed long long int} \\
\cxxinline{MPI_LONG_LONG} & \cxxinline{signed long long int} \\
\cxxinline{MPI_SIGNED_CHAR} & \cxxinline{signed char} \\
\cxxinline{MPI_UNSIGNED_CHAR} & \cxxinline{unsigned char} \\
\cxxinline{MPI_UNSIGNED_SHORT} & \cxxinline{unsigned short int} \\
\cxxinline{MPI_UNSIGNED} & \cxxinline{unsigned int} \\
\cxxinline{MPI_UNSIGNED_LONG} & \cxxinline{unsigned long int} \\
\cxxinline{MPI_UNSIGNED_LONG_LONG} & \cxxinline{unsigned long long int} \\
\bottomrule
\end{tabular}
\end{minipage}
\hspace{1cm}
\begin{minipage}{.45\linewidth}
\small
\begin{tabular}{ll}
\toprule
MPI datatype & C datatype\\
\midrule
\cxxinline{MPI_FLOAT} & \cxxinline{float} \\
\cxxinline{MPI_DOUBLE} & \cxxinline{double} \\
\cxxinline{MPI_LONG_DOUBLE} & \cxxinline{long double} \\
\cxxinline{MPI_WCHAR} & \cxxinline{wchar_t} \\
\cxxinline{MPI_C_BOOL} & \cxxinline{_Bool} \\
\cxxinline{MPI_INT8_T} & \cxxinline{int8_t} \\
\cxxinline{MPI_INT16_T} & \cxxinline{int16_t} \\
\cxxinline{MPI_INT32_T} & \cxxinline{int32_t} \\
\cxxinline{MPI_INT64_T} & \cxxinline{int64_t} \\
\cxxinline{MPI_UINT8_T} & \cxxinline{uint8_t} \\
\cxxinline{MPI_UINT16_T} & \cxxinline{uint16_t} \\
\cxxinline{MPI_UINT32_T} & \cxxinline{uint32_t} \\
\cxxinline{MPI_UINT64_T} & \cxxinline{uint64_t} \\
\bottomrule
\end{tabular}
\end{minipage}
\end{frame}
\begin{frame}
\frametitle{Derived Datatypes}
\framesubtitle{Base datatypes}
\begin{minipage}{.45\linewidth}
\small
\begin{tabular}{ll}
\toprule
MPI datatype & C++ datatype\\
\midrule
\cxxinline{MPI_CXX_BOOL} & \cxxinline{bool} \\
\cxxinline{MPI_CXX_FLOAT_COMPLEX} & \cxxinline{std::complex<float>} \\
\cxxinline{MPI_CXX_DOUBLE_COMPLEX} & \cxxinline{std::complex<double>} \\
\cxxinline{MPI_CXX_LONG_DOUBLE_COMPLEX} & \cxxinline{std::complex<long double>}\\
\bottomrule
\end{tabular}
\end{minipage}
\hspace{1.8cm}
\begin{minipage}{.3\linewidth}
\small
\begin{tabular}{ll}
\toprule
MPI datatype & C datatype\\
\midrule
\cxxinline{MPI_AINT} & \cxxinline{MPI_Aint} \\
\cxxinline{MPI_OFFSET} & \cxxinline{MPI_Offset} \\
\cxxinline{MPI_COUNT} & \cxxinline{MPI_Count} \\
\cxxinline{MPI_BYTE} & \\
\cxxinline{MPI_PACKED} & \\
\bottomrule
\end{tabular}
\end{minipage}
\end{frame}
\note{
\begin{itemize}
\item \cxxinline{MPI_CHAR} is a printable character where \cxxinline{MPI_BYTE} is a type of exactly 8bit not printable as a character
\item \cxxinline{MPI_PACKED} for pack/unpacked
\end{itemize}
}
\begin{frame}[fragile]
\frametitle{Derived Datatypes}
\framesubtitle{Arrays}
\begin{cxxcode}{Syntax}
int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
MPI_Datatype *newtype);
int MPI_Type_vector(int count, int blocklength, int stride,
MPI_Datatype oldtype, MPI_Datatype *newtype);
\end{cxxcode}
\begin{itemize}
\item array of contiguous array or with strided blocks of same type
\item \cxxinline{count}: number of repetition (blocks)
\item \cxxinline{blocklength}: number of element per block
\item \cxxinline{stride}: number of element between start of each block
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Derived Datatypes}
\framesubtitle{Array variants}
\begin{itemize}
\item \cxxinline{MPI_Type_create_hvector}: same as \cxxinline{MPI_Type_vector} with \cxxinline{stride} expressed in bytes
\item \cxxinline{MPI_Type_create_indexed_block} same as \cxxinline{MPI_Type_vector} with array of and \cxxinline{displacements}
\item \cxxinline{MPI_Type_create_hindexed_block}: same as \cxxinline{MPI_Type_create_indexed_block} with \cxxinline{displacements} in bytes
\item \cxxinline{MPI_Type_indexed}: same as \cxxinline{MPI_Type_create_indexed_block} with arrays of \cxxinline{blocklengths}
\item \cxxinline{MPI_Type_create_hindexed}: same as \cxxinline{MPI_Type_indexed} with \cxxinline{displacements} in bytes
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Derived Datatypes}
\framesubtitle{Structures}
\begin{cxxcode}{Syntax}
int MPI_Type_create_struct(int count, const int array_of_blocklengths[],
const MPI_Aint array_of_displacements[],
const MPI_Datatype array_of_types[], MPI_Datatype *newtype)
\end{cxxcode}
\begin{itemize}
\item \cxxinline{count}: number of repetition (blocks)
\item \cxxinline{array_of_blocklengths}: sizes per block
\item \cxxinline{array_of_displacements}: displacements between blocks in bytes
\item \cxxinline{array_of_types}: types contained in each blocks
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Derived Datatypes}
\framesubtitle{Usefull helper functions}
\begin{itemize}
\item \cxxinline{MPI_Get_address}: get the address of a variable
\item \cxxinline{MPI_Aint_diff}: get the difference between 2 addresses
\item \cxxinline{MPI_Aint_add}: get the sum of 2 addresses
\item \cxxinline{MPI_Type_size}: get the size of a datatype
\item \cxxinline{MPI_Get_type_extent}: get the lower bound and the extent of a type
\item \cxxinline{MPI_Type_create_resized}: reset the lower bound and the extent of a type
\end{itemize}
\end{frame}
\note{
\begin{itemize}
\item Prefer \cxxinline{MPI_Get_address} over \&
\item extent could be badly set then not possible to communicate multiple
objects of same datatype
\end{itemize}
}
\begin{frame}[fragile]
\frametitle{Derived Datatypes}
\framesubtitle{Commit/free}
\begin{cxxcode}{Syntax}
int MPI_Type_commit(MPI_Datatype *datatype);
int MPI_Type_free(MPI_Datatype *datatype);
\end{cxxcode}
\begin{itemize}
\item new datatypes should be committed before being usable in communications
\item committed types need to be freed once not used anymore
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Derived Datatypes}
\framesubtitle{Example}
\cxxfile[title={mpi/datatypes.cc},
minted options app={
firstline=13,
lastline=41,
fontsize=\tiny}]{examples/mpi/datatypes.cc}
\end{frame}
\begin{frame}[fragile, exercise]
\frametitle{Derived Datatypes}
\framesubtitle{Send lines in poisson code}
\begin{itemize}
\item Create a \cxxinline{MPI_Datatype line_t} representing a line of data
\item Exchange data of type \cxxinline{line_t} instead of \cxxinline{MPI_FLOAT}
\end{itemize}
\end{frame}
\subsection{Pack/Unpack}
\begin{frame}[fragile]
\frametitle{Pack/Unpack}
\framesubtitle{Pack}
\begin{cxxcode}{Syntax}
int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype,
void *outbuf, int outsize, int *position, MPI_Comm comm);
\end{cxxcode}
\begin{itemize}
\item \cxxinline{inbuf}, \cxxinline{incount}, \cxxinline{datatype} correspond to the description of data to pack
\item \cxxinline{outbuf}, \cxxinline{outsize} description of the buffer where to pack
\item \cxxinline{position} current position in the packing buffer
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Pack/Unpack}
\framesubtitle{Unpack}
\begin{cxxcode}{Syntax}
int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf,
int outcount, MPI_Datatype datatype, MPI_Comm comm);
\end{cxxcode}
\begin{itemize}
\item \cxxinline{inbuf}, \cxxinline{incount}, description of the buffer from which to unpack
\item \cxxinline{position} current position in the unpacking buffer
\item \cxxinline{outbuf}, \cxxinline{outsize}, and \cxxinline{datatype} correspond to the description of data to unpack
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Pack/Unpack}
\framesubtitle{Example}
\cxxfile[title={mpi/pack\_unpack.cc},
minted options app={
firstline=26,
lastline=39
}]{examples/mpi/pack_unpack.cc}
\end{frame}
\subsection{Groups and Communicator}
\begin{frame}[containsverbatim]
\frametitle{Groups and Communicators}
\begin{itemize}
\item a \code{communicator}:
\begin{itemize}
\item Encapsulate a \code{context}, a \code{group}, a \code{virtual topology} and \code{attributes}
\item Two kinds \code{intra-communicator} and \code{inter-communicator}
\end{itemize}
\item a \code{group}:
\begin{itemize}
\item ordered set of processes
\item each process has an unique ID (rank within the group) and can belong to several different groups
\item a group can be used to create a new communicator
\end{itemize}
\end{itemize}
\end{frame}
\note{
\begin{itemize}
\item \code{intra} communications inside a group
\item \code{inter} communications between groups
\end{itemize}
}
\begin{frame}[containsverbatim]
\frametitle{Groups and Communicators}
\framesubtitle{Creating new communicators}
\begin{itemize}
\item duplicating or splitting an existing one \cxxinline{MPI_Comm_dup}, \cxxinline{MPI_Comm_split}
\item creating communicator from a group \cxxinline{MPI_Comm_create}, \cxxinline{MPI_Comm_create_group}
\item need to create groups
\begin{itemize}
\item from a communicator \cxxinline{MPI_Comm_group}
\item boolean operations \cxxinline{MPI_Group_union},
\cxxinline{MPI_Group_intersection}, \cxxinline{MPI_Group_difference}
\item specifying ranks \cxxinline{MPI_Group_incl}, \cxxinline{MPI_Group_excl}
\end{itemize}
\item destroy created objects \cxxinline{MPI_Comm_free},
\cxxinline{MPI_Group_free}
\end{itemize}
\end{frame}
\subsection{Virutal Topologies}
\begin{frame}
\frametitle{Virtual Topologies}
\framesubtitle{}
\begin{itemize}
\item potential performance gain by mapping process to hardware
\item helps for program readability
\item types of topologies: Cartesian, Graph, Distributed Graph
\item collective communication on neighborhoods
\end{itemize}
\end{frame}
\note{
Details only on the cartesian on
}
\begin{frame}[fragile]
\frametitle{Virtual Topologies}
\framesubtitle{Cartesian topology}
\begin{cxxcode}{Syntax}
int MPI_Cart_create(MPI_Comm comm_old, int ndims, const int dims[],
const int periods[], int reorder, MPI_Comm *comm_cart);
\end{cxxcode}
\begin{itemize}
\item create a communicator with cartesian information
\item convenient functions:
\begin{itemize}
\item \cxxinline{MPI_Dims_create} helps creating balanced distribution of process
\item \cxxinline{MPI_Cart_shift} helps determining neighboors
\item \cxxinline{MPI_Cart_rank} get the rank based on coordinates
\item \cxxinline{MPI_Cart_coords} get coordinates based on rank
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Virtual topology}
\framesubtitle{Neighborhoods collective}
\begin{itemize}
\item \cxxinline{MPI_Neighbor_allgather} assuming we are on process with
rank $i$, gather data from all rank $j$ if edge $(j, i)$ exists and
send same data to all $j$ where edge $(i,j)$ exists
\item \cxxinline{MPI_Neighbor_alltoall} compare to allgather, sends
different data to all $j$ process
\item vector variant are available \code{v}
\item immediate variant are available \code{I}
\item persistent variant are available \code{\_init}
\item \cxxinline{MPI_Neighbor_alltoall} as one in all flavors the \code{w},
different datatypes are echanged with all neighbors
\end{itemize}
\end{frame}
\begin{frame}[exercise, fragile]
\frametitle{Virtual topology}
\framesubtitle{}
\begin{itemize}
\item Rewrite the parallelism using a cartesian communicator
\item Use neighbor collective communications
\end{itemize}
\end{frame}
\subsection{Parallel I/O}
\begin{frame}[containsverbatim]
\frametitle{Parallel I/O overview}
\begin{itemize}
\item I/O is often (if not always) the main bottleneck in a parallel application
\item MPI provides a mechanism to read/write in parallel
\end{itemize}
\begin{center}
\input{src/mpi/figures/parallelFS.tex}
\end{center}
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Introducing remarks}
\begin{itemize}
\item MPI IO API works on your desktop/laptop
\item Most of the large HPC systems have a \textbf{parallel file system}
(like GPFS, Lustre, \emph{etc}.)
\item If the file is distributed smartly on a parallel file system:
performance increases
\item MPI IO offers a high-level API to access a distributed file (no needs
to implement complex POSIX calls)
\item \textbf{does not work with ASCII files}
\item Most of the standard file format support MPI IO (\emph{e.g}. HDF5,
NetCDF, \emph{etc}..)
\end{itemize}
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Poisson so far}
\begin{center}
\input{src/mpi/figures/sofar.tex}
\end{center}
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Poisson ideal}
\begin{center}
\input{src/mpi/figures/sogoal.tex}
\end{center}
\end{frame}
\begin{frame}[fragile]
\frametitle{Open/Close a file in parallel}
\begin{cxxcode}{Syntax}
int MPI_File_open(MPI_Comm comm, const char *filename, int amode,
MPI_Info info, MPI_File *fh);
int MPI_File_close(MPI_File *fh);
\end{cxxcode}
\begin{itemize}
\item \cxxinline{comm}: the communicator that contains the writing/reading MPI processes
\item \cxxinline{filename}: a file name
\item \cxxinline{amode}: file access mode, \cxxinline{MPI_MODE_RDONLY}, \cxxinline{MPI_MODE_WRONLY},
\cxxinline{MPI_MODE_RDWR}, \cxxinline{MPI_MODE_CREATE}, \emph{e.t.c.}
\item \cxxinline{info}: file info object (\cxxinline{MPI_INFO_NULL} is a valid info)
\item \cxxinline{fh}: file handle
\end{itemize}
\textbf{Collective calls !!}
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Parallel IO}
\framesubtitle{Terminology}
\begin{itemize}
\item \code{etype} is the elementary type of the data of the parallel accessed file
\item \code{offset} is a position in the file in term of multiple of etypes
\item \code{displacement} of a position within the file is the number of bytes from the beginning of the file
\end{itemize}
\begin{center}
\includegraphics{src/mpi/figures/offset}
%\input{day3/images/offset.tex}
\end{center}
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Parallel IO}
\framesubtitle{Simple independent read/write}
\begin{cxxcode}{Syntax}
int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count,
MPI_Datatype datatype, MPI_Status *status);
int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf,
int count, MPI_Datatype datatype, MPI_Status *status);
\end{cxxcode}
\begin{itemize}
\item Can be used from a single (or group) of processes
\item \cxxinline{offset} must be specified in the \cxxinline{buf} buffer
\item \cxxinline{count} elements of type \cxxinline{datatype} are written
\end{itemize}
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Parallel IO}
\framesubtitle{\code{view} by each process}
\begin{cxxcode}{Syntax}
int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype,
MPI_Datatype filetype, const char *datarep, MPI_Info info);
int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype,
MPI_Datatype *filetype, char *datarep);
\end{cxxcode}
\begin{itemize}
\item initially, each process view the file as a linear byte stream and each
process views data in its own native representation
\item \cxxinline{disp} is the displacement (defines the beginning of the
data of the file that belongs to the process) in byte
\item \cxxinline{etype} is the unit of data access and positioning
\item \cxxinline{filetype} is a single \cxxinline{etype} of a multiple of it
\end{itemize}
\end{frame}
\begin{frame}[b,containsverbatim]
\frametitle{Parallel IO}
\frametitle{Setting up a \code{view}}
\begin{center}
\addimage[width=12cm]{\FIGREP/displacements}{2cm}{2cm}
\end{center}
(source : MPI 2.2 specifications)
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Parallel IO}
\framesubtitle{Simple independent read/write without offset}
\begin{cxxcode}{Syntax}
int MPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype,
MPI_Status *status);
int MPI_File_write(MPI_File fh, const void *buf, int count,
MPI_Datatype datatype, MPI_Status *status);
\end{cxxcode}
\end{frame}
\begin{frame}[containsverbatim]
\frametitle{Parallel IO}
\framesubtitle{Collective read/write with/without offset}
\begin{cxxcode}{Syntax}
int MPI_File_write_all(MPI_File fh, const void *buf, int count,
MPI_Datatype datatype, MPI_Status *status);
int MPI_File_read_all(MPI_File fh, void *buf, int count,
MPI_Datatype datatype, MPI_Status *status);
\end{cxxcode}
\end{frame}
\subsection{One Sided}
\begin{frame}[containsverbatim]
\frametitle{What we did not view}
\begin{itemize}
\item One Sided communications
\begin{itemize}
\item \cxxinline{MPI_Put}, \cxxinline{MPI_Get}
\item \cxxinline{MPI_Win_*}
\item shared memory
\end{itemize}
\item Process management
\begin{itemize}
\item \cxxinline{MPI_Comm_spawn}
\item Communications on inter-communicators
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Parallelization of the poisson code}
\begin{minipage}{.45\linewidth}
\centering
\begin{overprint}
\only<1>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_1}}
\only<2>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_2}}
\only<3>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_3}}
\only<4->{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_4}}
\end{overprint}
\end{minipage}
\begin{minipage}{.45\linewidth}
\begin{overprint}
\onslide<1>
\begin{itemize}
\item Parallelize the Poisson 2D problem using the Messages Passing
Interface (MPI)
\end{itemize}
\onslide<2>
\begin{itemize}
\item This time, we want to make a 2D domain decomposition using
Cartesian topology
\item Use \code{MPI\_Dims\_create} and \code{MPI\_Cart\_create} to create a Cratesian topology
\end{itemize}
\onslide<3>
\begin{itemize}
\item The $p$ processes are split into $(p_{x}, p_{y})$ to make the
Cartesian grid
\item Each domain has size $(N/p_{x}, N/p_{y})$ (1 per process)
\item Use \code{MPI\_Cart\_shift} to find the neighboring domains
\end{itemize}
\onslide<4>
\begin{itemize}
\item Adding \emph{ghost} lines before and after
\item Use the \emph{ghost} lines to receive the missing local data
\item You will need to define a new \textit{matrix column} datatype and update the
\textit{matrix line} datatype
\end{itemize}
\onslide<5>
\begin{itemize}
\item Use the \code{MPI\_neighbor\_alltoallw} routine
\item You can use the number of iteration as a check
\item Remove the \cxxinline{dump()} function to start
\end{itemize}
\end{overprint}
\end{minipage}
\end{frame}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "../../phys_743_parallel_programming"
%%% End:

Event Timeline