diff --git a/notes.org b/notes.org index 613a1a8..75c8ea5 100644 --- a/notes.org +++ b/notes.org @@ -1,89 +1,89 @@ * PHYS 743 - Parallel Programming ** General remarks - QUESTION: Do we have time to visit INJ? ** Tentative agenda *** Admin intro **** Projects *** Basic concepts **** ssh, scp, rsync **** Compilation ***** Modules **** Debugging *** Architecture **** Cluster (MPI) ***** Clusters in general ***** At SCITAS **** Multicore (OpenMP) **** Singlecore (SIMD) *** Optimization **** Data access **** Vectorization **** Basic optimization techniques *** Performance measurement **** Key concepts ***** FLOPS, memory bandwidth ***** timing (speedup, scalings) **** Profiling **** Roofline *** Shared memory (OpenMP) [13/13] **** [X] Task parallelism **** [X] OpenMP terminology / Read spec **** [X] Fork-join / Omp parallel / Implicit barriers **** [X] Exercise Hello World / SLURM **** [X] Omp parallel for **** [X] Exercise **** [X] Omp critical (synchronization), atomic **** [X] Barriers **** [X] Omp private **** [X] Omp reduction **** [X] Collapse **** [X] Work sharing constructs **** [X] Exercise Poisson *** Advanced [3/5] **** [X] Schedule **** [X] NUMA / pinning / first touch **** [X] Race condition, accumulation in array (false sharing) **** [-] OpenMP (new features not covered) **** [-] (GPU) -*** Distributed memory (MPI) basic [5/8] +*** Distributed memory (MPI) basic [6/8] **** [X] Introduction / Read spec -**** [-] MPI enviroment / Hello world [1/3] -***** [-] Print before init +**** [X] MPI enviroment / Hello world [1/3] +***** [?] Print before init ***** [X] Print rank -***** [-] Print conditionaly rank +***** [?] Print conditionaly rank **** [-] MPI terminology **** [X] Point-to-point [2/2] ***** [X] Synchronous / Deadlock ****** example MPI_Send MPI_Recv ***** [X] Asynchronous / race condition **** [X] Collective [3/3] ***** [X] Bcast ***** [X] Gather/scatter ***** [X] Reduce **** [X] Advanced collective [3/3] ***** [X] All ***** [X] All to all ***** [X] Barrier **** [-] MPI Fortran ***** [-] Bindings ***** [-] Asynchronous arrays **** [X] Exercise Poisson -*** Distributed memory (MPI) advanced [1/9] +*** Distributed memory (MPI) advanced [7/9] **** [X] Gather/Scatterv -**** [-] Derived types -**** [-] (un)Pack -**** [-] Communicator -**** [-] Topologies +**** [X] Derived types +**** [X] (un)Pack +**** [X] Communicator +**** [X] Topologies **** [-] IO **** [-] One-sided -**** [-] Persistent -**** [-] Non blocking collectives +**** [X] Persistent +**** [X] Non blocking collectives *** Hybrid programming **** Mpi init **** Task/thread Repartition **** MPI_Mprobe *** Recap *** Projects *** Pub pour SCITAS diff --git a/phys_743_parallel_programming.tex b/phys_743_parallel_programming.tex index 3ac0237..476d265 100644 --- a/phys_743_parallel_programming.tex +++ b/phys_743_parallel_programming.tex @@ -1,278 +1,278 @@ \documentclass[8pt,aspectratio=169,notes]{beamer} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage[english]{babel} \usepackage[most, minted]{tcolorbox} \usepackage{xcolor} \usepackage{graphicx} \usepackage{fancyvrb} \usepackage{tikz} \usepackage{colortbl} \usepackage{booktabs} \usepackage[super]{nth} \usepackage{amssymb} \usepackage[binary-units=true]{siunitx} \usepackage{booktabs} \usepackage{pgfpages} %\setbeameroption{show notes on second screen=left} \usemintedstyle{emacs} \makeatletter % Define commands to select the folder where the Beamer theme lies in \def\beamer@calltheme#1#2#3{% \def\beamer@themelist{#2} \@for\beamer@themename:=\beamer@themelist\do {\usepackage[{#1}]{\beamer@themelocation/#3\beamer@themename}}} \def\usefolder#1{ \def\beamer@themelocation{#1} } \def\beamer@themelocation{} % Patch Table of Content to insert fixed spaces between items instead of vfills \patchcmd{\beamer@sectionintoc} {\vfill} {\vskip\itemsep} {} {} % New counter for line numbers \newcounter{verbatim@cnt} % This is for color band on the linenos in listings \AtEndEnvironment{Verbatim}{% \stepcounter{verbatim@cnt}% \protected@write\@auxout{}{% \global\protect\@namedef{verbatim@numwidth@\the\c@verbatim@cnt}{% \ifnum\c@FancyVerbLine>999 7.5mm% \else\ifnum\c@FancyVerbLine>99 6mm% \else 4mm% \fi\fi }% }% } \def\minted@auto@numwidth#1{% \ifcsname verbatim@numwidth@\the\numexpr\c@verbatim@cnt#1\relax\endcsname \csname verbatim@numwidth@\the\numexpr\c@verbatim@cnt#1\relax\endcsname \else 4mm% \fi } \tcbset{bashstyle/.style={ colframe=black!70, listing engine=minted, listing only, minted style=colorful, minted language=console, size=fbox, breakable, enhanced, minted options={ autogobble=true, breaklines=true, breakbefore=., numbersep=2mm, }, }} \tcbset{cxx/.style={ colframe=black!70, listing engine=minted, listing only, minted style=emacs, minted language=C++, size=fbox, breakable, enhanced, minted options={ autogobble=true, linenos, breaklines=true, breakbefore=., numbersep=2mm, escapeinside=||, }, overlay={% \begin{tcbclipinterior} \fill[gray!25] (frame.south west) rectangle ([xshift=\dimexpr\minted@auto@numwidth{}\relax]frame.north west); \end{tcbclipinterior} }, % in "left", \c@verbatim@cnt is not stepped yet, hence the argument "+1" left=\dimexpr\minted@auto@numwidth{+1}\relax, }} % \EscMintinline[options]{}{} \def\EscMintinline{% \FVExtraRobustCommand \RobustEscMintinline \FVExtraUnexpandedReadOArgMArgEscVArg} \NewExpandableDocumentCommand \FVExtraUnexpandedReadOArgMArgEscVArg { o m m } {% \IfNoValueTF{#1} {\FVExtraAlwaysUnexpanded {\FVExtraUnexpandedReadOArgMArgEscVArg{#2}{#3}}} {\FVExtraAlwaysUnexpanded {\FVExtraUnexpandedReadOArgMArgEscVArg[#1]{#2}{#3}}}% } \newrobustcmd\RobustEscMintinline[2][]{% % similar to \mintinline \begingroup \setboolean{minted@isinline}{true}% \minted@configlang{#2}% \setkeys{minted@opt@cmd}{#1}% \minted@fvset \begingroup \@ifnextchar\bgroup {\FVExtraDetokenizeREscVArg{\minted@inline@iii}}% {\PackageError{minted}% {\string\EscMintinline\space delimiters must be paired curly braces in this context}% {Delimit argument with curly braces}}} \makeatother \newtcblisting{bashcode}{% colframe=black!70, width=\linewidth, bashstyle, } \newtcblisting{consoleoutput}{% colback=black, colupper=gray!50, colframe=black!70, listing engine=minted, listing only, minted style=monokai, minted language=console, size=fbox, breakable, enhanced, minted options={ autogobble=true, breaklines=true, breakbefore=., numbersep=2mm, }, % width=80ex, } \newtcblisting{cxxcode}[2][]{ cxx, title={#2}, #1, } \newtcbinputlisting{cxxfile}[2][]{% cxx, minted options app={ fontsize=\small, }, listing file={#2}, % width=80ex, #1 } \newcommand{\cxxinline}[1]{\EscMintinline{C++}{#1}} \newcommand{\cmd}[1]{\EscMintinline[style=colorful]{console}{#1}} %newmintinline[cmd]{console}{style=colorful,autogobble} \newcommand{\code}[1]{\texttt{\bf #1}} \DeclareSIUnit\flop{FLOP} \DeclareSIUnit\transfer{T} \DeclareSIUnit\cycle{c} \DeclareSIUnit\flops{\flop\per\second} \DeclareSIUnit\chf{CHF} \sisetup{per-mode=symbol} \sisetup{exponent-product = \cdot} \sisetup{group-separator={\mathrm{'}}} \definecolor{blue0}{HTML}{002255} \definecolor{blue1}{HTML}{003380} \definecolor{blue2}{HTML}{0044AA} \definecolor{blue3}{HTML}{0055D4} \definecolor{blue4}{HTML}{0066FF} \definecolor{blue5}{HTML}{2A7FFF} \definecolor{blue6}{HTML}{5599FF} \definecolor{blue7}{HTML}{80B3FF} \definecolor{blue8}{HTML}{AACCFF} \definecolor{blue9}{HTML}{D5E5FF} \definecolor{yellowbrown0}{HTML}{554400} \definecolor{yellowbrown1}{HTML}{806600} \definecolor{yellowbrown2}{HTML}{AA8800} \definecolor{yellowbrown3}{HTML}{D4AA00} \definecolor{yellowbrown4}{HTML}{FFCC00} \definecolor{yellowbrown5}{HTML}{FFD42A} \definecolor{yellowbrown6}{HTML}{FFDD55} \definecolor{yellowbrown7}{HTML}{FFE680} \definecolor{yellowbrown8}{HTML}{FFEEAA} \definecolor{yellowbrown9}{HTML}{FFF6D5} \definecolor{colShellBg}{HTML}{F5EDE4} \definecolor{links}{HTML}{2A1B81} \hypersetup{colorlinks,linkcolor=,urlcolor=links} \usefolder{scitas_theme} \usetheme{scitas} \newcommand{\FIGREP}{figures} \renewcommand{\arraystretch}{1.3} % Remove numbering from the ToC when it's spread on multiple frames \setbeamertemplate{frametitle continuation}{} \title{{\huge Parallel Programming}\\Single-core optimization, MPI, OpenMP, and hybrid programming} \author[N. Richart, E. Lanti]{Nicolas Richart \\ Emmanuel Lanti \\ {\scriptsize Course based on V. Keller's lecture notes}} \date{\nth{15} - \nth{19} of November 2021} \begin{document} \begin{frame}[plain] \titlepage \end{frame} % \section{Table of Contents} % \begin{frame}[allowframebreaks=0.8] % \frametitle{Table of Contents} % \tableofcontents%[hideallsubsections] % \end{frame} % Administration -%\input{src/admin/admin} +\input{src/admin/admin} % Single-core optimization -%\input{src/basic_concepts/basic_concepts} -%\input{src/cluster_architecture/cluster_architecture} -%\input{src/performance_measurement/performance_measurement} -%\input{src/optimization/optimization} +\input{src/basic_concepts/basic_concepts} +\input{src/cluster_architecture/cluster_architecture} +\input{src/performance_measurement/performance_measurement} +\input{src/optimization/optimization} % OpenMP \input{src/openmp/openmp} % MPI -%\input{src/mpi/mpi} -%\input{src/mpi/mpi_advanced} +\input{src/mpi/mpi} +\input{src/mpi/mpi_advanced} % Hybrid programming -%\input{src/hybrid/hybrid} +\input{src/hybrid/hybrid} % Recapitulation of the course %\input{src/recap/recap} % Project description %\input{src/projects/projects} \end{document} %%% Local Variables: %%% mode: latex %%% TeX-command-extra-options: "-shell-escape" %%% TeX-master: t %%% End: diff --git a/day3/images/offset.fig b/src/mpi/figures/offset.fig similarity index 100% rename from day3/images/offset.fig rename to src/mpi/figures/offset.fig diff --git a/day3/images/offset.pdf b/src/mpi/figures/offset.pdf similarity index 100% rename from day3/images/offset.pdf rename to src/mpi/figures/offset.pdf diff --git a/day3/images/offset.tex b/src/mpi/figures/offset.tex similarity index 100% rename from day3/images/offset.tex rename to src/mpi/figures/offset.tex diff --git a/day3/images/parallelFS.fig b/src/mpi/figures/parallelFS.fig similarity index 100% rename from day3/images/parallelFS.fig rename to src/mpi/figures/parallelFS.fig diff --git a/day3/images/parallelFS.pdf b/src/mpi/figures/parallelFS.pdf similarity index 100% rename from day3/images/parallelFS.pdf rename to src/mpi/figures/parallelFS.pdf diff --git a/day3/images/parallelFS.tex b/src/mpi/figures/parallelFS.tex similarity index 100% rename from day3/images/parallelFS.tex rename to src/mpi/figures/parallelFS.tex diff --git a/day3/images/sofar.fig b/src/mpi/figures/sofar.fig similarity index 100% rename from day3/images/sofar.fig rename to src/mpi/figures/sofar.fig diff --git a/day3/images/sofar.pdf b/src/mpi/figures/sofar.pdf similarity index 100% rename from day3/images/sofar.pdf rename to src/mpi/figures/sofar.pdf diff --git a/day3/images/sofar.tex b/src/mpi/figures/sofar.tex similarity index 93% rename from day3/images/sofar.tex rename to src/mpi/figures/sofar.tex index 5ddcc82..39d04ae 100644 --- a/day3/images/sofar.tex +++ b/src/mpi/figures/sofar.tex @@ -1,18 +1,18 @@ % Slide 210 \begin{tikzpicture}[xscale = 0.8] \foreach \x/\c[count = \i from 0] in {0/blue8, 3/blue4, 6/yellowbrown4, 9/yellowbrown2} { \node[outer sep = 3pt, fill = \c!40!white, circle, draw, inner sep = 5pt] (\i) at (\x, 4) {\i}; \fill[\c!40!white] (\x, 0) rectangle (\x + 3, 0.5); } \draw[step = 0.5cm] (0,0) grid (12, 0.5); \draw[->] (1) to[bend left] (0); \draw[->] (2) to[bend left] (0); -\draw[->] (3) to[bend left] node[pos = 0.3, right, xshift = 0.5cm] {\texttt{MPI\_send(mypart, 0)}} (0); +\draw[->] (3) to[bend left] node[pos = 0.3, right, xshift = 0.5cm] {\texttt{MPI\_Gather(mypart, 0)}} (0); \draw[->] (0.north east) to[out = 70, in = 110, looseness = 2] (0.north west); \draw[->] (0) to[bend right] node[midway, right] {\texttt{Write()}} (-0.05, 0.55); \end{tikzpicture} diff --git a/day3/images/sogoal.fig b/src/mpi/figures/sogoal.fig similarity index 100% rename from day3/images/sogoal.fig rename to src/mpi/figures/sogoal.fig diff --git a/day3/images/sogoal.pdf b/src/mpi/figures/sogoal.pdf similarity index 100% rename from day3/images/sogoal.pdf rename to src/mpi/figures/sogoal.pdf diff --git a/day3/images/sogoal.tex b/src/mpi/figures/sogoal.tex similarity index 100% rename from day3/images/sogoal.tex rename to src/mpi/figures/sogoal.tex diff --git a/src/mpi/mpi_advanced.tex b/src/mpi/mpi_advanced.tex index bf39687..e36814f 100644 --- a/src/mpi/mpi_advanced.tex +++ b/src/mpi/mpi_advanced.tex @@ -1,829 +1,734 @@ \section{Advanced MPI} \intersec{izar} \begin{frame} \frametitle{Advanced MPI} \framesubtitle{Goals of this section} \begin{itemize} \item Overview of more advanced functionalities \item Persistent communications \item Advanced collective communications \item Describing your own datatype \item Redefining communicators \item Associating a topology to a communicator \item Parallel I/O \item One sided communications \end{itemize} \end{frame} \subsection{Persistent point to point} \begin{frame}[fragile] \frametitle{Persistent communications} \framesubtitle{} \begin{itemize} \item \cxxinline{MPI_Send_init} \cxxinline{MPI_Recv_init}, initialize the communication \item Same signature as non-blocking communications \item \cxxinline{MPI_Start}, \cxxinline{MPI_Startall} to start the communication \item Completion is checked the same way as for non-blocking \end{itemize} \end{frame} \begin{frame}[exercise, fragile] \frametitle{Persistent communications} \framesubtitle{} \begin{itemize} \item Replace the non-blocking communication in the Poisson code by persistent ones \end{itemize} \end{frame} \subsection{Advanced collective communications} \subsubsection{V versions} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{V extension to \cxxinline{MPI\_Gather}} \begin{cxxcode}{Syntax} int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{recvcounts} is now an array, one entry per rank \item \cxxinline{displs} array of displacements defining where to place the $i^{\mathrm{th}}$ receive data \item receive different sizes per process \item receive in an array with strides \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{Gatherv semantic} \begin{cxxcode}{Semantic equivalent} // Every process MPI_Send(sendbuf, sendcount, sendtype, root, /*...*/); // On root process for(i = 0; i < nb_process; ++i) MPI_Recv(recvbuf+displs[j] * extent(recvtype), recvcounts[j], recvtype, i, /*...*/); \end{cxxcode} \end{frame} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{V extension to \cxxinline{MPI\_Scatter}} \begin{cxxcode}{Syntax} int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{sendcounts} is now an array, one entry per rank \item \cxxinline{displs} array of displacements defining where to place the $i^{\mathrm{th}}$ receive data \item receive different sizes \item receive in an array with strides \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Collective communications} \framesubtitle{Scatterv semantic} \begin{cxxcode}{Semantic equivalent} // On root process for(i = 0; i < nb_process; ++i) MPI_Send(sendbuf+displs[i]*extent(sendtype), sendcounts[i], sendtype, i, /*...*/) // Every process MPI_Recv(recvbuf, recvcount, recvtype, i, /*...*/). \end{cxxcode} \end{frame} \subsubsection{Non-blocking collective communications} \begin{frame}[fragile] \frametitle{Non-blocking collective communications} \framesubtitle{} \begin{itemize} \item \code{I} variant of collective communications \item extra parameter \cxxinline{request} \item \cxxinline{MPI_Ibarrier}, \cxxinline{MPI_Ibcast} \item \cxxinline{MPI_Igather}, \cxxinline{MPI_Igatherv}, \cxxinline{MPI_Iscatter}, \cxxinline{MPI_Iscatterv} \item \cxxinline{MPI_Iallgather}, \cxxinline{MPI_Iallgatherv}, \cxxinline{MPI_Ialltoall} \item \cxxinline{MPI_Ireduce}, \cxxinline{MPI_Iallreduce}, \cxxinline{MPI_Iscan}, \cxxinline{MPI_Iexscan} \end{itemize} \end{frame} \subsubsection{Persistent collective communications} \begin{frame}[fragile] \frametitle{Persistent collective communications} \framesubtitle{} \begin{itemize} \item \code{\_init} variant of collective communications \item extra parameter \cxxinline{request} \item \cxxinline{MPI_Barrier_init}, \cxxinline{MPI_Bcast_init} \item \cxxinline{MPI_Gather_init}, \cxxinline{MPI_Gatherv_init}, \cxxinline{MPI_Scatter_init}, \cxxinline{MPI_Scatterv_init} \item \cxxinline{MPI_Allgather_init}, \cxxinline{MPI_Allgatherv_init}, \cxxinline{MPI_Alltoall_init} \item \cxxinline{MPI_Reduce_init}, \cxxinline{MPI_Allreduce_init}, \cxxinline{MPI_Scan_init}, \cxxinline{MPI_Exscan_init} \end{itemize} \end{frame} \begin{frame}[exercise, fragile] \frametitle{Persistent collective} \framesubtitle{} \begin{itemize} \item Replace the the \cxxinline{MPI_Allreduce} by a persistent one \end{itemize} \end{frame} \subsection{Derived Datatypes} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Definition of a datatypes} \begin{itemize} \item \cxxinline{MPI_Datatype} opaque type containing a \emph{Typemap} \begin{itemize} \item $Typemap = \{(type_{0},disp_{0}), \dotsb, (type_{n - 1},disp_{n - 1})\}$ \item sequence of basic datatypes \item sequence of displacements (in bytes) \end{itemize} \item \code{extent} is the span from the first byte to the last one, with alignment requirement \begin{align*} lb(Typemap) &= \underset{j}{min}(disp_{j}),\\ ub(Typemap) &= \underset{j}{max}(disp_{j} + \mathrm{sizeof}(type_{j})) + \epsilon, and\\ extent(Typemap) &= ub(Typemap) - lb(Typemap) \end{align*} $\epsilon$ is there to account for alignment requirements \end{itemize} \end{frame} \begin{frame} \frametitle{Derived Datatypes} \framesubtitle{Base datatypes} \begin{minipage}{.45\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C datatype\\ \midrule \cxxinline{MPI_CHAR} & \cxxinline{char} \\ \cxxinline{MPI_SHORT} & \cxxinline{signed short int} \\ \cxxinline{MPI_INT} & \cxxinline{signed int} \\ \cxxinline{MPI_LONG} & \cxxinline{signed long int} \\ \cxxinline{MPI_LONG_LONG_INT} & \cxxinline{signed long long int} \\ \cxxinline{MPI_LONG_LONG} & \cxxinline{signed long long int} \\ \cxxinline{MPI_SIGNED_CHAR} & \cxxinline{signed char} \\ \cxxinline{MPI_UNSIGNED_CHAR} & \cxxinline{unsigned char} \\ \cxxinline{MPI_UNSIGNED_SHORT} & \cxxinline{unsigned short int} \\ \cxxinline{MPI_UNSIGNED} & \cxxinline{unsigned int} \\ \cxxinline{MPI_UNSIGNED_LONG} & \cxxinline{unsigned long int} \\ \cxxinline{MPI_UNSIGNED_LONG_LONG} & \cxxinline{unsigned long long int} \\ \bottomrule \end{tabular} \end{minipage} \hspace{1cm} \begin{minipage}{.45\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C datatype\\ \midrule \cxxinline{MPI_FLOAT} & \cxxinline{float} \\ \cxxinline{MPI_DOUBLE} & \cxxinline{double} \\ \cxxinline{MPI_LONG_DOUBLE} & \cxxinline{long double} \\ \cxxinline{MPI_WCHAR} & \cxxinline{wchar_t} \\ \cxxinline{MPI_C_BOOL} & \cxxinline{_Bool} \\ \cxxinline{MPI_INT8_T} & \cxxinline{int8_t} \\ \cxxinline{MPI_INT16_T} & \cxxinline{int16_t} \\ \cxxinline{MPI_INT32_T} & \cxxinline{int32_t} \\ \cxxinline{MPI_INT64_T} & \cxxinline{int64_t} \\ \cxxinline{MPI_UINT8_T} & \cxxinline{uint8_t} \\ \cxxinline{MPI_UINT16_T} & \cxxinline{uint16_t} \\ \cxxinline{MPI_UINT32_T} & \cxxinline{uint32_t} \\ \cxxinline{MPI_UINT64_T} & \cxxinline{uint64_t} \\ \bottomrule \end{tabular} \end{minipage} \end{frame} \begin{frame} \frametitle{Derived Datatypes} \framesubtitle{Base datatypes} \begin{minipage}{.45\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C++ datatype\\ \midrule \cxxinline{MPI_CXX_BOOL} & \cxxinline{bool} \\ \cxxinline{MPI_CXX_FLOAT_COMPLEX} & \cxxinline{std::complex} \\ \cxxinline{MPI_CXX_DOUBLE_COMPLEX} & \cxxinline{std::complex} \\ \cxxinline{MPI_CXX_LONG_DOUBLE_COMPLEX} & \cxxinline{std::complex}\\ \bottomrule \end{tabular} \end{minipage} \hspace{1.8cm} \begin{minipage}{.3\linewidth} \small \begin{tabular}{ll} \toprule MPI datatype & C datatype\\ \midrule \cxxinline{MPI_AINT} & \cxxinline{MPI_Aint} \\ \cxxinline{MPI_OFFSET} & \cxxinline{MPI_Offset} \\ \cxxinline{MPI_COUNT} & \cxxinline{MPI_Count} \\ \cxxinline{MPI_BYTE} & \\ \cxxinline{MPI_PACKED} & \\ \bottomrule \end{tabular} \end{minipage} \end{frame} \note{ \begin{itemize} \item \cxxinline{MPI_CHAR} is a printable character where \cxxinline{MPI_BYTE} is a type of exactly 8bit not printable as a character \item \cxxinline{MPI_PACKED} for pack/unpacked \end{itemize} } \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Arrays} \begin{cxxcode}{Syntax} int MPI_Type_contiguous(int count, MPI_Datatype oldtype, MPI_Datatype *newtype); int MPI_Type_vector(int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype); \end{cxxcode} \begin{itemize} \item array of contiguous array or with strided blocks of same type \item \cxxinline{count}: number of repetition (blocks) \item \cxxinline{blocklength}: number of element per block \item \cxxinline{stride}: number of element between start of each block \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Array variants} \begin{itemize} \item \cxxinline{MPI_Type_create_hvector}: same as \cxxinline{MPI_Type_vector} with \cxxinline{stride} expressed in bytes \item \cxxinline{MPI_Type_create_indexed_block} same as \cxxinline{MPI_Type_vector} with array of and \cxxinline{displacements} \item \cxxinline{MPI_Type_create_hindexed_block}: same as \cxxinline{MPI_Type_create_indexed_block} with \cxxinline{displacements} in bytes \item \cxxinline{MPI_Type_indexed}: same as \cxxinline{MPI_Type_create_indexed_block} with arrays of \cxxinline{blocklengths} \item \cxxinline{MPI_Type_create_hindexed}: same as \cxxinline{MPI_Type_indexed} with \cxxinline{displacements} in bytes \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Structures} \begin{cxxcode}{Syntax} int MPI_Type_create_struct(int count, const int array_of_blocklengths[], const MPI_Aint array_of_displacements[], const MPI_Datatype array_of_types[], MPI_Datatype *newtype) \end{cxxcode} \begin{itemize} \item \cxxinline{count}: number of repetition (blocks) \item \cxxinline{array_of_blocklengths}: sizes per block \item \cxxinline{array_of_displacements}: displacements between blocks in bytes \item \cxxinline{array_of_types}: types contained in each blocks \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Usefull helper functions} \begin{itemize} \item \cxxinline{MPI_Get_address}: get the address of a variable \item \cxxinline{MPI_Aint_diff}: get the difference between 2 addresses \item \cxxinline{MPI_Aint_add}: get the sum of 2 addresses \item \cxxinline{MPI_Type_size}: get the size of a datatype \item \cxxinline{MPI_Get_type_extent}: get the lower bound and the extent of a type \item \cxxinline{MPI_Type_create_resized}: reset the lower bound and the extent of a type \end{itemize} \end{frame} \note{ \begin{itemize} \item Prefer \cxxinline{MPI_Get_address} over \& \item extent could be badly set then not possible to communicate multiple objects of same datatype \end{itemize} } \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Commit/free} \begin{cxxcode}{Syntax} int MPI_Type_commit(MPI_Datatype *datatype); int MPI_Type_free(MPI_Datatype *datatype); \end{cxxcode} \begin{itemize} \item new datatypes should be committed before being usable in communications \item committed types need to be freed once not used anymore \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Derived Datatypes} \framesubtitle{Example} \cxxfile[title={mpi/datatypes.cc}, minted options app={ firstline=13, lastline=41, fontsize=\tiny}]{examples/mpi/datatypes.cc} \end{frame} \begin{frame}[fragile, exercise] \frametitle{Derived Datatypes} \framesubtitle{Send lines in poisson code} \begin{itemize} \item Create a \cxxinline{MPI_Datatype line_t} representing a line of data \item Exchange data of type \cxxinline{line_t} instead of \cxxinline{MPI_FLOAT} \end{itemize} \end{frame} \subsection{Pack/Unpack} \begin{frame}[fragile] \frametitle{Pack/Unpack} \framesubtitle{Pack} \begin{cxxcode}{Syntax} int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, int outsize, int *position, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{inbuf}, \cxxinline{incount}, \cxxinline{datatype} correspond to the description of data to pack \item \cxxinline{outbuf}, \cxxinline{outsize} description of the buffer where to pack \item \cxxinline{position} current position in the packing buffer \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Pack/Unpack} \framesubtitle{Unpack} \begin{cxxcode}{Syntax} int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf, int outcount, MPI_Datatype datatype, MPI_Comm comm); \end{cxxcode} \begin{itemize} \item \cxxinline{inbuf}, \cxxinline{incount}, description of the buffer from which to unpack \item \cxxinline{position} current position in the unpacking buffer \item \cxxinline{outbuf}, \cxxinline{outsize}, and \cxxinline{datatype} correspond to the description of data to unpack \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Pack/Unpack} \framesubtitle{Example} \cxxfile[title={mpi/pack\_unpack.cc}, minted options app={ firstline=26, lastline=39 }]{examples/mpi/pack_unpack.cc} \end{frame} \subsection{Groups and Communicator} \begin{frame}[containsverbatim] \frametitle{Groups and Communicators} \begin{itemize} \item a \code{communicator}: \begin{itemize} \item Encapsulate a \code{context}, a \code{group}, a \code{virtual topology} and \code{attributes} \item Two kinds \code{intra-communicator} and \code{inter-communicator} \end{itemize} \item a \code{group}: \begin{itemize} \item ordered set of processes \item each process has an unique ID (rank within the group) and can belong to several different groups \item a group can be used to create a new communicator \end{itemize} \end{itemize} \end{frame} \note{ \begin{itemize} \item \code{intra} communications inside a group \item \code{inter} communications between groups \end{itemize} } \begin{frame}[containsverbatim] \frametitle{Groups and Communicators} \framesubtitle{Creating new communicators} \begin{itemize} \item duplicating or splitting an existing one \cxxinline{MPI_Comm_dup}, \cxxinline{MPI_Comm_split} \item creating communicator from a group \cxxinline{MPI_Comm_create}, \cxxinline{MPI_Comm_create_group} \item need to create groups \begin{itemize} \item from a communicator \cxxinline{MPI_Comm_group} \item boolean operations \cxxinline{MPI_Group_union}, \cxxinline{MPI_Group_intersection}, \cxxinline{MPI_Group_difference} \item specifying ranks \cxxinline{MPI_Group_incl}, \cxxinline{MPI_Group_excl} \end{itemize} \item destroy created objects \cxxinline{MPI_Comm_free}, \cxxinline{MPI_Group_free} \end{itemize} \end{frame} \subsection{Virutal Topologies} \begin{frame} \frametitle{Virtual Topologies} \framesubtitle{} \begin{itemize} \item potential performance gain by mapping process to hardware \item helps for program readability \item types of topologies: Cartesian, Graph, Distributed Graph \item collective communication on neighborhoods \end{itemize} \end{frame} \note{ Details only on the cartesian on } \begin{frame}[fragile] \frametitle{Virtual Topologies} \framesubtitle{Cartesian topology} \begin{cxxcode}{Syntax} int MPI_Cart_create(MPI_Comm comm_old, int ndims, const int dims[], const int periods[], int reorder, MPI_Comm *comm_cart); \end{cxxcode} \begin{itemize} \item create a communicator with cartesian information \item convenient functions: \begin{itemize} \item \cxxinline{MPI_Dims_create} helps creating balanced distribution of process \item \cxxinline{MPI_Cart_shift} helps determining neighboors \item \cxxinline{MPI_Cart_rank} get the rank based on coordinates \item \cxxinline{MPI_Cart_coords} get coordinates based on rank \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Virutal topology} \framesubtitle{Neighborhoods collective} \begin{itemize} \item \cxxinline{MPI_Neighbor_allgather} assuming we are on process with rank $i$, gather data from all rank $j$ if edge $(j, i)$ exists and send same data to all $j$ where edge $(i,j)$ exists \item \cxxinline{MPI_Neighbor_alltoall} compare to allgather, sends different data to all $j$ process \item vector variant are available \code{v} \item immediate variant are available \code{I} \item persistent variant are available \code{\_init} \item \cxxinline{MPI_Neighbor_alltoall} as one in all flavors the \code{w}, different datatypes are echanged with all neighbors \end{itemize} \end{frame} \begin{frame}[exercise, fragile] \frametitle{Virtual topology} \framesubtitle{} \begin{itemize} \item Rewrite the parallelism using a cartesian communicator \item Use neighbor collective communications \end{itemize} \end{frame} \subsection{Parallel I/O} \begin{frame}[containsverbatim] \frametitle{Parallel I/O overview} \begin{itemize} \item I/O is often (if not always) the main bottleneck in a parallel application \item MPI provides a mechanism to read/write in parallel \end{itemize} \begin{center} - \input{day3/images/parallelFS.tex} + \input{src/mpi/figures/parallelFS.tex} \end{center} \end{frame} \begin{frame}[containsverbatim] \frametitle{Introducing remarks} \begin{itemize} \item MPI IO API works on your desktop/laptop \item Most of the large HPC systems have a \textbf{parallel file system} (like GPFS, Lustre, \emph{etc}.) \item If the file is distributed smartly on a parallel file system: performance increases \item MPI IO offers a high-level API to access a distributed file (no needs to implement complex POSIX calls) \item \textbf{does not work with ASCII files} \item Most of the standard file format support MPI IO (\emph{e.g}. HDF5, NetCDF, \emph{etc}..) \end{itemize} \end{frame} \begin{frame}[containsverbatim] \frametitle{Poisson so far} \begin{center} - \input{day3/images/sofar.tex} + \input{src/mpi/figures/sofar.tex} \end{center} \end{frame} \begin{frame}[containsverbatim] \frametitle{Poisson ideal} \begin{center} - \input{day3/images/sogoal.tex} + \input{src/mpi/figures/sogoal.tex} \end{center} \end{frame} -% \begin{frame}[containsverbatim] -% \frametitle{Open/Close a file in parallel} -% \begin{itemize} -% \item {\verb+comm+ : the communicator that contains the writing/reading MPI processes} -% \item {\verb+*filename+ : a file name} -% \item {\verb+amode+ : file access mode (Read only \verb+MPI_MODE_RDONLY+, read/write \verb+MPI_MODE_RDWR+, create \verb+MPI_MODE_CREATE+, etc..)} -% \item {\verb+info+ : file info object} -% \item {\verb+*fh+ : file handle} -% \end{itemize} - -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_open(MPI_Comm comm, const char *filename, int amode, MPI_Info info, MPI_File *fh) -% \end{lstlisting} - -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_close(MPI_File *fh) -% \end{lstlisting} -% \textbf{Collective calls !!} -% \end{frame} - - -% \begin{frame}[containsverbatim] -% \frametitle{etype, offset and displacement} -% \begin{itemize} -% \item {\textbf{etype} is the elementary type of the data of the parallel accessed file} -% \item {\textbf{offset} is a position in the file in term of multiple of etypes} -% \item {\textbf{displacement} of a position within the file is the number of bytes from the beginning of the file} -% \end{itemize} -% \begin{center} -% \input{day3/images/offset.tex} -% \end{center} -% \end{frame} - - -% \begin{frame}[containsverbatim] -% \frametitle{Simple independent read/write} -% \begin{itemize} -% \item {Can be used from a single (or group) of processes} -% \item {The \verb+offset+ must be specified in the \verb+*buf+ buffer} -% \item {\verb+count+ elements of type \verb+datatype+ are written} -% \end{itemize} -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_write_at(MPI_File fh, MPI_Offset offset, ROMIO_CONST void *buf, int count, MPI_Datatype datatype, MPI_Status *status) -% \end{lstlisting} -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf,int count, MPI_Datatype datatype, MPI_Status *status) -% \end{lstlisting} -% \end{frame} - - -% \begin{frame}[containsverbatim] -% \frametitle{\texttt{view} by each process} -% \begin{itemize} -% \item {Initialy, each process view the file as a linear byte stream and each process views data in its own native representation} -% \item {this is changed using \verb+MPI_File_set_view+} -% \item {\verb+disp+ is the displacement (defines the beginning of the data of the file that belongs to the process) in bytes} -% \item {\verb+etype+ is the elementary type} -% \end{itemize} -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, ROMIO_CONST char *datarep, MPI_Info info) -% \end{lstlisting} -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype, MPI_Datatype *filetype, char *datarep) -% \end{lstlisting} -% \end{frame} - -% \begin{frame}[containsverbatim] -% \frametitle{Setting up a \texttt{view}} -% \begin{center} -% \input{day3/images/displacements.tex} -% \end{center} -% (source : MPI 2.2 specifications) -% \end{frame} - -% \begin{frame}[containsverbatim] -% \frametitle{Simple independent read/write without offset} -% \begin{itemize} -% \item {the \texttt{view} is specified prior to the call } -% \end{itemize} -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_write(MPI_File fh, ROMIO_CONST void *buf, int count, MPI_Datatype datatype, MPI_Status *status) -% \end{lstlisting} -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_read(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status) -% \end{lstlisting} -% \end{frame} - - -% \begin{frame}[containsverbatim] -% \frametitle{Collective read/write with/without offset} -% \begin{itemize} -% \item {Same structure than Independent routines but with \verb+_all+ at the end } -% \item {for instance : } -% \end{itemize} -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_File_write_all(MPI_File fh, ROMIO_CONST void *buf, int count, MPI_Datatype datatype, MPI_Status *status) -% \end{lstlisting} -% \end{frame} - -% \subsection{One Sided} - -% \begin{frame}[containsverbatim] -% \frametitle{One-sided communication} -% \begin{itemize} -% \item {A MPI process can access another MPI process's memory space directly (RMA)} -% \item {No explicit coordination between both processes} -% \item {explicit transfer, explicit synchronization} -% \item {Better performance} -% \end{itemize} -% \end{frame} - -% \begin{frame}[containsverbatim] -% \frametitle{One-sided communication} -% Initialization/Free (of the \textit{window} = window in memory) -% \begin{itemize} -% \item {\verb+MPI_Alloc_Mem()+, \verb+MPI_Free_Mem()+} -% \item {\verb+MPI_Win_Create()+, \verb+MPI_Win_Free()+} -% \end{itemize} -% Remote memory access -% \begin{itemize} -% \item {\verb+MPI_Put()+ (like send)} -% \item {\verb+MPI_Get()+ (like recv)} -% \item {\verb+MPI_Accumulate()+ (like reduce)} -% \end{itemize} -% Synchronization -% \begin{itemize} -% \item {\verb+MPI_Win_Fence()+} -% \item {\verb+MPI_Win_Post()+, \verb+MPI_Win_Start()+, \verb+MPI_Win_Complete()+, \verb+MPI_Win_Wait()+} -% \item {\verb+MPI_Win_Lock()+, \verb+MPI_Win_Unlock()+} -% \end{itemize} - -% \end{frame} - -% \begin{frame}[containsverbatim] -% \frametitle{Memory allocation} -% \begin{itemize} -% \item {allocate \verb+size+ of memory segments in bytes} -% \item {\verb+info+ can be used to provide directives that control the desired location of the allocated memory} -% \item {\verb+*baseptr+ is the pointer to the beginning of the memory segment} -% \end{itemize} - -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) -% \end{lstlisting} - -% \end{frame} - - - -% \begin{frame}[containsverbatim] -% \frametitle{Memory \texttt{window} creation} -% \begin{itemize} -% \item {A \verb+MPI_Win+ is an opaque object which can be reused to perform one-sided communication} -% \item {A \verb+window+ is a specified region in memory that can be accessed by another process} -% \end{itemize} - -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, MPI_Win *win) -% \end{lstlisting} - -% where \verb+base+ is the initial address of the region, of \verb+size+ length of size \verb+disp_unit+ in bytes. - -% \end{frame} - -% \begin{frame}[containsverbatim] -% \frametitle{\texttt{Put}/\texttt{Get} within the \texttt{window}} -% \begin{itemize} -% \item {close to an \verb+MPI_Send+ call with -% \begin{itemize} -% \item {\textit{what to send} : \verb+origin_addr+ start of the buffer of size \verb+origin_count+ of type \verb+origin_datatype+} -% \item {\textit{to which process} : \verb+target_rank+ at the place \verb+target_count+ of type \verb+target_datatype+} -% \item {\textit{in which context} : within the window \verb+win+} -% \end{itemize} -% } -% % \item {} -% \end{itemize} - -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win) -% \end{lstlisting} - -% \begin{lstlisting}[language=C,frame=lines] -% int MPI_Get(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win) -% \end{lstlisting} - - -% \end{frame} - - -% \begin{frame}[containsverbatim] -% \frametitle{One-sided communications example} - -% \begin{lstlisting}[language=C,frame=lines] -% MPI_Win win; -% int *mem; -% float x = 1.0; -% MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &mem); -% MPI_Win_create(mem, size * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); - -% // Write x at position 1 within process 0 's memory -% MPI_Put(&x, 1, MPI_FLOAT, 0, rank, 1, MPI_INT, win); - -% MPI_Win_free(win); -% MPI_Free_mem(mem); -% \end{lstlisting} - - -% \end{frame} - - -% \begin{frame}[containsverbatim] -% \frametitle{One-sided communications remarks} - -% \begin{itemize} -% % \item {Three primitives : Put (like a send), Get (like a recv) and accumulate (like a reduction)} -% % \item {synchronizations : fence / post-start-complete-wait / lock-unlock} -% \item {Pay attention to the memory coherence} -% \item {Can be dangerous : how a process knows if its data are in use/modified ?} -% \item {MPI-3 provides new features : \begin{itemize} -% \item cache-coherent windows, -% \item new primitives \verb+MPI_Get_accumulate()+, \verb+MPI_Fetch_and_op()+, \verb+MPI_Compare_and_swap+, -% \item requested-based primitives like \verb+MPI_R{put,get,accumulate,get_accumulate}+, -% \item ``all''-versions of the synchronization routines : \verb+MPI_Win_{un}lock_all+, \verb+MPI_Win_flush{_all}+, \verb+MPI_Win_flush_local{_all}+ -% \end{itemize} -% } -% % \item {} -% \end{itemize} -% \end{frame} +\begin{frame}[fragile] + \frametitle{Open/Close a file in parallel} + + \begin{cxxcode}{Syntax} + int MPI_File_open(MPI_Comm comm, const char *filename, int amode, + MPI_Info info, MPI_File *fh); + + int MPI_File_close(MPI_File *fh); + \end{cxxcode} + + \begin{itemize} + \item \cxxinline{comm}: the communicator that contains the writing/reading MPI processes + \item \cxxinline{filename}: a file name + \item \cxxinline{amode}: file access mode, \cxxinline{MPI_MODE_RDONLY}, \cxxinline{MPI_MODE_WRONLY}, + \cxxinline{MPI_MODE_RDWR}, \cxxinline{MPI_MODE_CREATE}, \emph{e.t.c.} + \item \cxxinline{info}: file info object (\cxxinline{MPI_INFO_NULL} is a valid info) + \item \cxxinline{fh}: file handle + \end{itemize} + + \textbf{Collective calls !!} +\end{frame} + + +\begin{frame}[containsverbatim] + \frametitle{Parallel IO} + \framesubtitle{Terminology} + \begin{itemize} + \item \code{etype} is the elementary type of the data of the parallel accessed file + \item \code{offset} is a position in the file in term of multiple of etypes + \item \code{displacement} of a position within the file is the number of bytes from the beginning of the file + \end{itemize} + \begin{center} + \includegraphics{src/mpi/figures/offset} + %\input{day3/images/offset.tex} + \end{center} +\end{frame} + + +\begin{frame}[containsverbatim] + \frametitle{Parallel IO} + \framesubtitle{Simple independent read/write} + + \begin{cxxcode}{Syntax} + int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); + + int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Status *status); + \end{cxxcode} + + \begin{itemize} + \item Can be used from a single (or group) of processes + \item \cxxinline{offset} must be specified in the \cxxinline{buf} buffer + \item \cxxinline{count} elements of type \cxxinline{datatype} are written + \end{itemize} +\end{frame} + + +\begin{frame}[containsverbatim] + \frametitle{Parallel IO} + \framesubtitle{\code{view} by each process} + + \begin{cxxcode}{Syntax} + int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, + MPI_Datatype filetype, const char *datarep, MPI_Info info); + + int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype, + MPI_Datatype *filetype, char *datarep); + \end{cxxcode} + + \begin{itemize} + \item initially, each process view the file as a linear byte stream and each + process views data in its own native representation + \item \cxxinline{disp} is the displacement (defines the beginning of the + data of the file that belongs to the process) in byte + \item \cxxinline{etype} is the unit of data access and positioning + \item \cxxinline{filetype} is a single \cxxinline{etype} of a multiple of it + \end{itemize} +\end{frame} + +\begin{frame}[containsverbatim] + \frametitle{Parallel IO} + \frametitle{Setting up a \code{view}} + \begin{center} + \input{day3/images/displacements.tex} + \end{center} + (source : MPI 2.2 specifications) +\end{frame} + +\begin{frame}[containsverbatim] + \frametitle{Parallel IO} + \framesubtitle{Simple independent read/write without offset} + \begin{cxxcode}{Syntax} + int MPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status); + + int MPI_File_write(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); + \end{cxxcode} +\end{frame} + + +\begin{frame}[containsverbatim] + \frametitle{Parallel IO} + \framesubtitle{Collective read/write with/without offset} + + \begin{cxxcode}{Syntax} + int MPI_File_write_all(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); + + int MPI_File_read_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Status *status); + \end{cxxcode} +\end{frame} + +\subsection{One Sided} + +\begin{frame}[containsverbatim] + \frametitle{What we did not view} + \begin{itemize} + \item One Sided communications + \begin{itemize} + \item \cxxinline{MPI_Put}, \cxxinline{MPI_Get} + \item \cxxinline{MPI_Win_*} + \item shared memory + \end{itemize} + \item Process management + \begin{itemize} + \item \cxxinline{MPI_Comm_spawn} + \item Communications on inter-communicators + \end{itemize} + \end{itemize} +\end{frame} %%% Local Variables: %%% mode: latex %%% TeX-master: "../../phys_743_parallel_programming" %%% End: