diff --git a/src/mpi/mpi_advanced.tex b/src/mpi/mpi_advanced.tex index 9140d23..aa740a8 100644 --- a/src/mpi/mpi_advanced.tex +++ b/src/mpi/mpi_advanced.tex @@ -1,783 +1,786 @@ \renewcommand{\FIGREP}{src/mpi/figures} \section{Advanced MPI} \intersec{izar} \begin{frame} \frametitle{Advanced MPI} \framesubtitle{Goals of this section} \begin{itemize} \item Overview of more advanced functionalities \item Persistent communications \item Advanced collective communications \item Describing your own datatype \item Redefining communicators \item Associating a topology to a communicator \item Parallel I/O \item One sided communications \end{itemize} \end{frame} \subsection{Persistent point to point} \begin{frame}[fragile] \frametitle{Persistent communications} \framesubtitle{} \begin{itemize} \item \cxxinline{MPI_Send_init} \cxxinline{MPI_Recv_init}, initialize the communication \item Same signature as non-blocking communications \item \cxxinline{MPI_Start}, \cxxinline{MPI_Startall} to start the communication \item Completion is checked the same way as for non-blocking \end{itemize} \end{frame} \begin{frame}[exercise, fragile] \frametitle{Persistent communications} \framesubtitle{} \begin{itemize} \item Replace the non-blocking communication in the Poisson code by persistent ones \end{itemize} \end{frame} \subsection{Advanced collective communications} \subsubsection{V versions} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{V extension to \cxxinline{MPI\_Gather}} \begin{cxxcode}{Syntax} int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{recvcounts} is now an array, one entry per rank \item \cxxinline{displs} array of displacements defining where to place the $i^{\mathrm{th}}$ receive data \item receive different sizes per process \item receive in an array with strides \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{Gatherv semantic} \begin{cxxcode}{Semantic equivalent} // Every process MPI_Send(sendbuf, sendcount, sendtype, root, /*...*/); // On root process for(i = 0; i < nb_process; ++i) MPI_Recv(recvbuf+displs[j] * extent(recvtype), recvcounts[j], recvtype, i, /*...*/); \end{cxxcode} \end{frame} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{V extension to \cxxinline{MPI\_Scatter}} \begin{cxxcode}{Syntax} int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{sendcounts} is now an array, one entry per rank \item \cxxinline{displs} array of displacements defining where to place the $i^{\mathrm{th}}$ receive data \item receive different sizes \item receive in an array with strides \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{Scatterv semantic} \begin{cxxcode}{Semantic equivalent} // On root process for(i = 0; i < nb_process; ++i) MPI_Send(sendbuf+displs[i]*extent(sendtype), sendcounts[i], sendtype, i, /*...*/) // Every process MPI_Recv(recvbuf, recvcount, recvtype, i, /*...*/). \end{cxxcode} \end{frame} \subsubsection{Non-blocking collective communications} \begin{frame}[fragile] \frametitle{Non-blocking collective communications} \framesubtitle{} \begin{itemize} \item \code{I} variant of collective communications \item extra parameter \cxxinline{request} \item \cxxinline{MPI_Ibarrier}, \cxxinline{MPI_Ibcast} \item \cxxinline{MPI_Igather}, \cxxinline{MPI_Igatherv}, \cxxinline{MPI_Iscatter}, \cxxinline{MPI_Iscatterv} \item \cxxinline{MPI_Iallgather}, \cxxinline{MPI_Iallgatherv}, \cxxinline{MPI_Ialltoall} \item \cxxinline{MPI_Ireduce}, \cxxinline{MPI_Iallreduce}, \cxxinline{MPI_Iscan}, \cxxinline{MPI_Iexscan} \end{itemize} \end{frame} \subsubsection{Persistent collective communications} \begin{frame}[fragile] \frametitle{Persistent collective communications} \framesubtitle{} \begin{itemize} \item \code{\_init} variant of collective communications \item extra parameter \cxxinline{request} \item \cxxinline{MPI_Barrier_init}, \cxxinline{MPI_Bcast_init} \item \cxxinline{MPI_Gather_init}, \cxxinline{MPI_Gatherv_init}, \cxxinline{MPI_Scatter_init}, \cxxinline{MPI_Scatterv_init} \item \cxxinline{MPI_Allgather_init}, \cxxinline{MPI_Allgatherv_init}, \cxxinline{MPI_Alltoall_init} \item \cxxinline{MPI_Reduce_init}, \cxxinline{MPI_Allreduce_init}, \cxxinline{MPI_Scan_init}, \cxxinline{MPI_Exscan_init} \end{itemize} \end{frame} \begin{frame}[exercise, fragile] \frametitle{Persistent collective} \framesubtitle{} \begin{itemize} \item Replace the the \cxxinline{MPI_Allreduce} by a persistent one \end{itemize} \end{frame} \subsection{Derived Datatypes} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Definition of a datatypes} \begin{itemize} \item \cxxinline{MPI_Datatype} opaque type containing a \emph{Typemap} \begin{itemize} \item $Typemap = \{(type_{0},disp_{0}), \dotsb, (type_{n - 1},disp_{n - 1})\}$ \item sequence of basic datatypes \item sequence of displacements (in bytes) \end{itemize} \item \code{extent} is the span from the first byte to the last one, with alignment requirement \begin{align*} lb(Typemap) &= \underset{j}{min}(disp_{j}),\\ ub(Typemap) &= \underset{j}{max}(disp_{j} + \mathrm{sizeof}(type_{j})) + \epsilon, and\\ extent(Typemap) &= ub(Typemap) - lb(Typemap) \end{align*} $\epsilon$ is there to account for alignment requirements \end{itemize} \end{frame} \begin{frame} \frametitle{Derived Datatypes} \framesubtitle{Base datatypes} \begin{minipage}{.45\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C datatype\\ \midrule \cxxinline{MPI_CHAR} & \cxxinline{char} \\ \cxxinline{MPI_SHORT} & \cxxinline{signed short int} \\ \cxxinline{MPI_INT} & \cxxinline{signed int} \\ \cxxinline{MPI_LONG} & \cxxinline{signed long int} \\ \cxxinline{MPI_LONG_LONG_INT} & \cxxinline{signed long long int} \\ \cxxinline{MPI_LONG_LONG} & \cxxinline{signed long long int} \\ \cxxinline{MPI_SIGNED_CHAR} & \cxxinline{signed char} \\ \cxxinline{MPI_UNSIGNED_CHAR} & \cxxinline{unsigned char} \\ \cxxinline{MPI_UNSIGNED_SHORT} & \cxxinline{unsigned short int} \\ \cxxinline{MPI_UNSIGNED} & \cxxinline{unsigned int} \\ \cxxinline{MPI_UNSIGNED_LONG} & \cxxinline{unsigned long int} \\ \cxxinline{MPI_UNSIGNED_LONG_LONG} & \cxxinline{unsigned long long int} \\ \bottomrule \end{tabular} \end{minipage} \hspace{1cm} \begin{minipage}{.45\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C datatype\\ \midrule \cxxinline{MPI_FLOAT} & \cxxinline{float} \\ \cxxinline{MPI_DOUBLE} & \cxxinline{double} \\ \cxxinline{MPI_LONG_DOUBLE} & \cxxinline{long double} \\ \cxxinline{MPI_WCHAR} & \cxxinline{wchar_t} \\ \cxxinline{MPI_C_BOOL} & \cxxinline{_Bool} \\ \cxxinline{MPI_INT8_T} & \cxxinline{int8_t} \\ \cxxinline{MPI_INT16_T} & \cxxinline{int16_t} \\ \cxxinline{MPI_INT32_T} & \cxxinline{int32_t} \\ \cxxinline{MPI_INT64_T} & \cxxinline{int64_t} \\ \cxxinline{MPI_UINT8_T} & \cxxinline{uint8_t} \\ \cxxinline{MPI_UINT16_T} & \cxxinline{uint16_t} \\ \cxxinline{MPI_UINT32_T} & \cxxinline{uint32_t} \\ \cxxinline{MPI_UINT64_T} & \cxxinline{uint64_t} \\ \bottomrule \end{tabular} \end{minipage} \end{frame} \begin{frame} \frametitle{Derived Datatypes} \framesubtitle{Base datatypes} \begin{minipage}{.45\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C++ datatype\\ \midrule \cxxinline{MPI_CXX_BOOL} & \cxxinline{bool} \\ \cxxinline{MPI_CXX_FLOAT_COMPLEX} & \cxxinline{std::complex} \\ \cxxinline{MPI_CXX_DOUBLE_COMPLEX} & \cxxinline{std::complex} \\ \cxxinline{MPI_CXX_LONG_DOUBLE_COMPLEX} & \cxxinline{std::complex}\\ \bottomrule \end{tabular} \end{minipage} \hspace{1.8cm} \begin{minipage}{.3\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C datatype\\ \midrule \cxxinline{MPI_AINT} & \cxxinline{MPI_Aint} \\ \cxxinline{MPI_OFFSET} & \cxxinline{MPI_Offset} \\ \cxxinline{MPI_COUNT} & \cxxinline{MPI_Count} \\ \cxxinline{MPI_BYTE} & \\ \cxxinline{MPI_PACKED} & \\ \bottomrule \end{tabular} \end{minipage} \end{frame} \note{ \begin{itemize} \item \cxxinline{MPI_CHAR} is a printable character where \cxxinline{MPI_BYTE} is a type of exactly 8bit not printable as a character \item \cxxinline{MPI_PACKED} for pack/unpacked \end{itemize} } \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Arrays} \begin{cxxcode}{Syntax} int MPI_Type_contiguous(int count, MPI_Datatype oldtype, MPI_Datatype *newtype); int MPI_Type_vector(int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype); \end{cxxcode} \begin{itemize} \item array of contiguous array or with strided blocks of same type \item \cxxinline{count}: number of repetition (blocks) \item \cxxinline{blocklength}: number of element per block \item \cxxinline{stride}: number of element between start of each block \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Array variants} \begin{itemize} \item \cxxinline{MPI_Type_create_hvector}: same as \cxxinline{MPI_Type_vector} with \cxxinline{stride} expressed in bytes \item \cxxinline{MPI_Type_create_indexed_block} same as \cxxinline{MPI_Type_vector} with array of and \cxxinline{displacements} \item \cxxinline{MPI_Type_create_hindexed_block}: same as \cxxinline{MPI_Type_create_indexed_block} with \cxxinline{displacements} in bytes \item \cxxinline{MPI_Type_indexed}: same as \cxxinline{MPI_Type_create_indexed_block} with arrays of \cxxinline{blocklengths} \item \cxxinline{MPI_Type_create_hindexed}: same as \cxxinline{MPI_Type_indexed} with \cxxinline{displacements} in bytes \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Structures} \begin{cxxcode}{Syntax} int MPI_Type_create_struct(int count, const int array_of_blocklengths[], const MPI_Aint array_of_displacements[], const MPI_Datatype array_of_types[], MPI_Datatype *newtype) \end{cxxcode} \begin{itemize} \item \cxxinline{count}: number of repetition (blocks) \item \cxxinline{array_of_blocklengths}: sizes per block \item \cxxinline{array_of_displacements}: displacements between blocks in bytes \item \cxxinline{array_of_types}: types contained in each blocks \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Usefull helper functions} \begin{itemize} \item \cxxinline{MPI_Get_address}: get the address of a variable \item \cxxinline{MPI_Aint_diff}: get the difference between 2 addresses \item \cxxinline{MPI_Aint_add}: get the sum of 2 addresses \item \cxxinline{MPI_Type_size}: get the size of a datatype \item \cxxinline{MPI_Get_type_extent}: get the lower bound and the extent of a type \item \cxxinline{MPI_Type_create_resized}: reset the lower bound and the extent of a type \end{itemize} \end{frame} \note{ \begin{itemize} \item Prefer \cxxinline{MPI_Get_address} over \& \item extent could be badly set then not possible to communicate multiple objects of same datatype \end{itemize} } \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Commit/free} \begin{cxxcode}{Syntax} int MPI_Type_commit(MPI_Datatype *datatype); int MPI_Type_free(MPI_Datatype *datatype); \end{cxxcode} \begin{itemize} \item new datatypes should be committed before being usable in communications \item committed types need to be freed once not used anymore \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Example} \cxxfile[title={mpi/datatypes.cc}, minted options app={ firstline=13, lastline=41, fontsize=\tiny}]{examples/mpi/datatypes.cc} \end{frame} \begin{frame}[fragile, exercise] \frametitle{Derived Datatypes} \framesubtitle{Send lines in poisson code} \begin{itemize} \item Create a \cxxinline{MPI_Datatype line_t} representing a line of data \item Exchange data of type \cxxinline{line_t} instead of \cxxinline{MPI_FLOAT} \end{itemize} \end{frame} \subsection{Pack/Unpack} \begin{frame}[fragile] \frametitle{Pack/Unpack} \framesubtitle{Pack} \begin{cxxcode}{Syntax} int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, int outsize, int *position, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{inbuf}, \cxxinline{incount}, \cxxinline{datatype} correspond to the description of data to pack \item \cxxinline{outbuf}, \cxxinline{outsize} description of the buffer where to pack \item \cxxinline{position} current position in the packing buffer \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Pack/Unpack} \framesubtitle{Unpack} \begin{cxxcode}{Syntax} int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf, int outcount, MPI_Datatype datatype, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{inbuf}, \cxxinline{incount}, description of the buffer from which to unpack \item \cxxinline{position} current position in the unpacking buffer \item \cxxinline{outbuf}, \cxxinline{outsize}, and \cxxinline{datatype} correspond to the description of data to unpack \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Pack/Unpack} \framesubtitle{Example} \cxxfile[title={mpi/pack\_unpack.cc}, minted options app={ firstline=26, lastline=39 }]{examples/mpi/pack_unpack.cc} \end{frame} \subsection{Groups and Communicator} \begin{frame}[containsverbatim] \frametitle{Groups and Communicators} \begin{itemize} \item a \code{communicator}: \begin{itemize} \item Encapsulate a \code{context}, a \code{group}, a \code{virtual topology} and \code{attributes} \item Two kinds \code{intra-communicator} and \code{inter-communicator} \end{itemize} \item a \code{group}: \begin{itemize} \item ordered set of processes \item each process has an unique ID (rank within the group) and can belong to several different groups \item a group can be used to create a new communicator \end{itemize} \end{itemize} \end{frame} \note{ \begin{itemize} \item \code{intra} communications inside a group \item \code{inter} communications between groups \end{itemize} } \begin{frame}[containsverbatim] \frametitle{Groups and Communicators} \framesubtitle{Creating new communicators} \begin{itemize} \item duplicating or splitting an existing one \cxxinline{MPI_Comm_dup}, \cxxinline{MPI_Comm_split} \item creating communicator from a group \cxxinline{MPI_Comm_create}, \cxxinline{MPI_Comm_create_group} \item need to create groups \begin{itemize} \item from a communicator \cxxinline{MPI_Comm_group} \item boolean operations \cxxinline{MPI_Group_union}, \cxxinline{MPI_Group_intersection}, \cxxinline{MPI_Group_difference} \item specifying ranks \cxxinline{MPI_Group_incl}, \cxxinline{MPI_Group_excl} \end{itemize} \item destroy created objects \cxxinline{MPI_Comm_free}, \cxxinline{MPI_Group_free} \end{itemize} \end{frame} \subsection{Virutal Topologies} \begin{frame} \frametitle{Virtual Topologies} \framesubtitle{} \begin{itemize} \item potential performance gain by mapping process to hardware \item helps for program readability \item types of topologies: Cartesian, Graph, Distributed Graph \item collective communication on neighborhoods \end{itemize} \end{frame} \note{ Details only on the cartesian on } \begin{frame}[fragile] \frametitle{Virtual Topologies} \framesubtitle{Cartesian topology} \begin{cxxcode}{Syntax} int MPI_Cart_create(MPI_Comm comm_old, int ndims, const int dims[], const int periods[], int reorder, MPI_Comm *comm_cart); \end{cxxcode} \begin{itemize} \item create a communicator with cartesian information \item convenient functions: \begin{itemize} \item \cxxinline{MPI_Dims_create} helps creating balanced distribution of process \item \cxxinline{MPI_Cart_shift} helps determining neighboors \item \cxxinline{MPI_Cart_rank} get the rank based on coordinates \item \cxxinline{MPI_Cart_coords} get coordinates based on rank \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Virutal topology} \framesubtitle{Neighborhoods collective} \begin{itemize} \item \cxxinline{MPI_Neighbor_allgather} assuming we are on process with rank $i$, gather data from all rank $j$ if edge $(j, i)$ exists and send same data to all $j$ where edge $(i,j)$ exists \item \cxxinline{MPI_Neighbor_alltoall} compare to allgather, sends different data to all $j$ process \item vector variant are available \code{v} \item immediate variant are available \code{I} \item persistent variant are available \code{\_init} \item \cxxinline{MPI_Neighbor_alltoall} as one in all flavors the \code{w}, different datatypes are echanged with all neighbors \end{itemize} \end{frame} \begin{frame}[exercise, fragile] \frametitle{Virtual topology} \framesubtitle{} \begin{itemize} \item Rewrite the parallelism using a cartesian communicator \item Use neighbor collective communications \end{itemize} \end{frame} \subsection{Parallel I/O} \begin{frame}[containsverbatim] \frametitle{Parallel I/O overview} \begin{itemize} \item I/O is often (if not always) the main bottleneck in a parallel application \item MPI provides a mechanism to read/write in parallel \end{itemize} \begin{center} \input{src/mpi/figures/parallelFS.tex} \end{center} \end{frame} \begin{frame}[containsverbatim] \frametitle{Introducing remarks} \begin{itemize} \item MPI IO API works on your desktop/laptop \item Most of the large HPC systems have a \textbf{parallel file system} (like GPFS, Lustre, \emph{etc}.) \item If the file is distributed smartly on a parallel file system: performance increases \item MPI IO offers a high-level API to access a distributed file (no needs to implement complex POSIX calls) \item \textbf{does not work with ASCII files} \item Most of the standard file format support MPI IO (\emph{e.g}. HDF5, NetCDF, \emph{etc}..) \end{itemize} \end{frame} \begin{frame}[containsverbatim] \frametitle{Poisson so far} \begin{center} \input{src/mpi/figures/sofar.tex} \end{center} \end{frame} \begin{frame}[containsverbatim] \frametitle{Poisson ideal} \begin{center} \input{src/mpi/figures/sogoal.tex} \end{center} \end{frame} \begin{frame}[fragile] \frametitle{Open/Close a file in parallel} \begin{cxxcode}{Syntax} int MPI_File_open(MPI_Comm comm, const char *filename, int amode, MPI_Info info, MPI_File *fh); int MPI_File_close(MPI_File *fh); \end{cxxcode} \begin{itemize} \item \cxxinline{comm}: the communicator that contains the writing/reading MPI processes \item \cxxinline{filename}: a file name \item \cxxinline{amode}: file access mode, \cxxinline{MPI_MODE_RDONLY}, \cxxinline{MPI_MODE_WRONLY}, \cxxinline{MPI_MODE_RDWR}, \cxxinline{MPI_MODE_CREATE}, \emph{e.t.c.} \item \cxxinline{info}: file info object (\cxxinline{MPI_INFO_NULL} is a valid info) \item \cxxinline{fh}: file handle \end{itemize} \textbf{Collective calls !!} \end{frame} \begin{frame}[containsverbatim] \frametitle{Parallel IO} \framesubtitle{Terminology} \begin{itemize} \item \code{etype} is the elementary type of the data of the parallel accessed file \item \code{offset} is a position in the file in term of multiple of etypes \item \code{displacement} of a position within the file is the number of bytes from the beginning of the file \end{itemize} \begin{center} \includegraphics{src/mpi/figures/offset} %\input{day3/images/offset.tex} \end{center} \end{frame} \begin{frame}[containsverbatim] \frametitle{Parallel IO} \framesubtitle{Simple independent read/write} \begin{cxxcode}{Syntax} int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Status *status); int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status); \end{cxxcode} \begin{itemize} \item Can be used from a single (or group) of processes \item \cxxinline{offset} must be specified in the \cxxinline{buf} buffer \item \cxxinline{count} elements of type \cxxinline{datatype} are written \end{itemize} \end{frame} \begin{frame}[containsverbatim] \frametitle{Parallel IO} \framesubtitle{\code{view} by each process} \begin{cxxcode}{Syntax} int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, const char *datarep, MPI_Info info); int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype, MPI_Datatype *filetype, char *datarep); \end{cxxcode} \begin{itemize} \item initially, each process view the file as a linear byte stream and each process views data in its own native representation \item \cxxinline{disp} is the displacement (defines the beginning of the data of the file that belongs to the process) in byte \item \cxxinline{etype} is the unit of data access and positioning \item \cxxinline{filetype} is a single \cxxinline{etype} of a multiple of it \end{itemize} \end{frame} \begin{frame}[containsverbatim] \frametitle{Parallel IO} \frametitle{Setting up a \code{view}} \begin{center} \input{day3/images/displacements.tex} \end{center} (source : MPI 2.2 specifications) \end{frame} \begin{frame}[containsverbatim] \frametitle{Parallel IO} \framesubtitle{Simple independent read/write without offset} \begin{cxxcode}{Syntax} int MPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status); int MPI_File_write(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status); \end{cxxcode} \end{frame} \begin{frame}[containsverbatim] \frametitle{Parallel IO} \framesubtitle{Collective read/write with/without offset} \begin{cxxcode}{Syntax} int MPI_File_write_all(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status); int MPI_File_read_all(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status); \end{cxxcode} \end{frame} \subsection{One Sided} \begin{frame}[containsverbatim] \frametitle{What we did not view} \begin{itemize} \item One Sided communications \begin{itemize} \item \cxxinline{MPI_Put}, \cxxinline{MPI_Get} \item \cxxinline{MPI_Win_*} \item shared memory \end{itemize} \item Process management \begin{itemize} \item \cxxinline{MPI_Comm_spawn} \item Communications on inter-communicators \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile,t] \frametitle{Parallelization of the poisson code} \begin{minipage}{.45\linewidth} \centering \begin{overprint} \only<1>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_1}} \only<2>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_2}} \only<3>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_3}} \only<4->{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_4}} \end{overprint} \end{minipage} \begin{minipage}{.45\linewidth} \begin{overprint} \onslide<1> \begin{itemize} \item Parallelize the Poisson 2D problem using the Messages Passing Interface (MPI) \end{itemize} \onslide<2> \begin{itemize} \item This time, we want to make a 2D domain decomposition using Cartesian topology + \item Use \code{MPI\_Dims\_create} and \code{MPI\_Cart\_create} to create a Cratesian topology \end{itemize} \onslide<3> \begin{itemize} \item The $p$ processes are split into $(p_{x}, p_{y})$ to make the Cartesian grid \item Each domain has size $(N/p_{x}, N/p_{y})$ (1 per process) + \item Use \code{MPI\_Cart\_shift} to find the neighboring domains \end{itemize} \onslide<4> \begin{itemize} \item Adding \emph{ghost} lines before and after \item Use the \emph{ghost} lines to receive the missing local data + \item You will need to define a new \textit{matrix column} datatype and update the + \textit{matrix line} datatype \end{itemize} \onslide<5> \begin{itemize} - \item Start using \cxxinline{MPI_Sendrecv} to implement the communications + \item Use the \code{MPI\_alltoall\_neighborw} routine \item You can use the number of iteration as a check \item Remove the \cxxinline{dump()} function to start - \item One it is working try to use \emph{non-blocking} communications \end{itemize} \end{overprint} \end{minipage} \end{frame} %%% Local Variables: %%% mode: latex %%% TeX-master: "../../phys_743_parallel_programming" %%% End: