diff --git a/src/mpi/mpi_advanced.tex b/src/mpi/mpi_advanced.tex
index 9140d23..aa740a8 100644
--- a/src/mpi/mpi_advanced.tex
+++ b/src/mpi/mpi_advanced.tex
@@ -1,783 +1,786 @@
 \renewcommand{\FIGREP}{src/mpi/figures}
 \section{Advanced MPI}
 \intersec{izar}
 
 \begin{frame}
   \frametitle{Advanced MPI}
   \framesubtitle{Goals of this section}
 
   \begin{itemize}
     \item Overview of more advanced functionalities
     \item Persistent communications
     \item Advanced collective communications
     \item Describing your own datatype
     \item Redefining communicators
     \item Associating a topology to a communicator
     \item Parallel I/O
     \item One sided communications
   \end{itemize}
 \end{frame}
 
 \subsection{Persistent point to point}
 
 \begin{frame}[fragile]
   \frametitle{Persistent communications}
   \framesubtitle{}
 
   \begin{itemize}
     \item \cxxinline{MPI_Send_init} \cxxinline{MPI_Recv_init}, initialize the communication
     \item Same signature as non-blocking communications
     \item \cxxinline{MPI_Start}, \cxxinline{MPI_Startall} to start the communication
     \item Completion is checked the same way as for non-blocking
   \end{itemize}
 \end{frame}
 
 \begin{frame}[exercise, fragile]
   \frametitle{Persistent communications}
   \framesubtitle{}
 
   \begin{itemize}
     \item Replace the non-blocking communication in the Poisson code by persistent ones
   \end{itemize}
 \end{frame}
 
 
 \subsection{Advanced collective communications}
 \subsubsection{V versions}
 \begin{frame}[fragile]
   \frametitle{Collective communications}
   \framesubtitle{V extension to \cxxinline{MPI\_Gather}}
 
   \begin{cxxcode}{Syntax}
     int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                     void *recvbuf, const int recvcounts[], const int displs[],
                     MPI_Datatype recvtype, int root, MPI_Comm comm);
   \end{cxxcode}
 
   \begin{itemize}
     \item \cxxinline{recvcounts} is now an array, one entry per rank
     \item \cxxinline{displs} array of displacements defining where to place the
     $i^{\mathrm{th}}$ receive data
     \item receive different sizes per process
     \item receive in an array with strides
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Collective communications}
   \framesubtitle{Gatherv semantic}
 
   \begin{cxxcode}{Semantic equivalent}
     // Every process
     MPI_Send(sendbuf, sendcount, sendtype, root, /*...*/);
 
     // On root process
     for(i = 0; i < nb_process; ++i)
       MPI_Recv(recvbuf+displs[j] * extent(recvtype), recvcounts[j], recvtype, i,
                /*...*/);
   \end{cxxcode}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Collective communications}
   \framesubtitle{V extension to \cxxinline{MPI\_Scatter}}
 
   \begin{cxxcode}{Syntax}
     int MPI_Scatterv(const void *sendbuf, const int sendcounts[],
                      const int displs[], MPI_Datatype sendtype, void *recvbuf,
                      int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
   \end{cxxcode}
 
   \begin{itemize}
     \item \cxxinline{sendcounts} is now an array, one entry per rank
     \item \cxxinline{displs} array of displacements defining where to place the
     $i^{\mathrm{th}}$ receive data
     \item receive different sizes
     \item receive in an array with strides
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Collective communications}
   \framesubtitle{Scatterv semantic}
 
   \begin{cxxcode}{Semantic equivalent}
     // On root process
     for(i = 0; i < nb_process; ++i)
       MPI_Send(sendbuf+displs[i]*extent(sendtype), sendcounts[i], sendtype, i,
                /*...*/)
 
     // Every process
     MPI_Recv(recvbuf, recvcount, recvtype, i, /*...*/).
   \end{cxxcode}
 \end{frame}
 
 \subsubsection{Non-blocking collective communications}
 
 \begin{frame}[fragile]
   \frametitle{Non-blocking collective communications}
   \framesubtitle{}
 
   \begin{itemize}
     \item \code{I} variant of collective communications
     \item extra parameter \cxxinline{request}
     \item \cxxinline{MPI_Ibarrier}, \cxxinline{MPI_Ibcast}
     \item \cxxinline{MPI_Igather}, \cxxinline{MPI_Igatherv},
           \cxxinline{MPI_Iscatter}, \cxxinline{MPI_Iscatterv}
     \item \cxxinline{MPI_Iallgather}, \cxxinline{MPI_Iallgatherv},
           \cxxinline{MPI_Ialltoall}
     \item \cxxinline{MPI_Ireduce}, \cxxinline{MPI_Iallreduce},
           \cxxinline{MPI_Iscan}, \cxxinline{MPI_Iexscan}
   \end{itemize}
 \end{frame}
 
 
 \subsubsection{Persistent collective communications}
 
 \begin{frame}[fragile]
   \frametitle{Persistent collective communications}
   \framesubtitle{}
 
   \begin{itemize}
     \item \code{\_init} variant of collective communications
     \item extra parameter \cxxinline{request}
     \item \cxxinline{MPI_Barrier_init}, \cxxinline{MPI_Bcast_init}
     \item \cxxinline{MPI_Gather_init}, \cxxinline{MPI_Gatherv_init},
           \cxxinline{MPI_Scatter_init}, \cxxinline{MPI_Scatterv_init}
     \item \cxxinline{MPI_Allgather_init}, \cxxinline{MPI_Allgatherv_init},
           \cxxinline{MPI_Alltoall_init}
     \item \cxxinline{MPI_Reduce_init}, \cxxinline{MPI_Allreduce_init},
           \cxxinline{MPI_Scan_init}, \cxxinline{MPI_Exscan_init}
 
   \end{itemize}
 \end{frame}
 
 \begin{frame}[exercise, fragile]
   \frametitle{Persistent collective}
   \framesubtitle{}
 
   \begin{itemize}
     \item Replace the the \cxxinline{MPI_Allreduce} by a persistent one
   \end{itemize}
 \end{frame}
 
 
 \subsection{Derived Datatypes}
 
 \begin{frame}[fragile]
   \frametitle{Derived Datatypes}
   \framesubtitle{Definition of a datatypes}
 
   \begin{itemize}
     \item \cxxinline{MPI_Datatype} opaque type containing a \emph{Typemap}
     \begin{itemize}
       \item $Typemap = \{(type_{0},disp_{0}), \dotsb, (type_{n - 1},disp_{n - 1})\}$
       \item sequence of basic datatypes
       \item sequence of displacements (in bytes)
     \end{itemize}
     \item \code{extent} is the span from the first byte to the last one, with alignment requirement
     \begin{align*}
         lb(Typemap) &= \underset{j}{min}(disp_{j}),\\
         ub(Typemap) &= \underset{j}{max}(disp_{j} + \mathrm{sizeof}(type_{j})) + \epsilon, and\\
         extent(Typemap) &= ub(Typemap) - lb(Typemap)
       \end{align*}
       $\epsilon$ is there to account for alignment requirements
   \end{itemize}
 
 \end{frame}
 
 \begin{frame}
   \frametitle{Derived Datatypes}
   \framesubtitle{Base datatypes}
   \begin{minipage}{.45\linewidth}
     \small
     \begin{tabular}{ll}
       \toprule
       MPI datatype & C datatype\\
       \midrule
       \cxxinline{MPI_CHAR} & \cxxinline{char} \\
       \cxxinline{MPI_SHORT} & \cxxinline{signed short int} \\
       \cxxinline{MPI_INT} & \cxxinline{signed int} \\
       \cxxinline{MPI_LONG} & \cxxinline{signed long int} \\
       \cxxinline{MPI_LONG_LONG_INT} & \cxxinline{signed long long int} \\
       \cxxinline{MPI_LONG_LONG} & \cxxinline{signed long long int} \\
       \cxxinline{MPI_SIGNED_CHAR} & \cxxinline{signed char} \\
       \cxxinline{MPI_UNSIGNED_CHAR} & \cxxinline{unsigned char} \\
       \cxxinline{MPI_UNSIGNED_SHORT} & \cxxinline{unsigned short int} \\
       \cxxinline{MPI_UNSIGNED} & \cxxinline{unsigned int} \\
       \cxxinline{MPI_UNSIGNED_LONG} & \cxxinline{unsigned long int} \\
       \cxxinline{MPI_UNSIGNED_LONG_LONG} & \cxxinline{unsigned long long int} \\
       \bottomrule
     \end{tabular}
   \end{minipage}
   \hspace{1cm}
   \begin{minipage}{.45\linewidth}
     \small
     \begin{tabular}{ll}
       \toprule
       MPI datatype & C datatype\\
       \midrule
       \cxxinline{MPI_FLOAT} & \cxxinline{float} \\
       \cxxinline{MPI_DOUBLE} & \cxxinline{double} \\
       \cxxinline{MPI_LONG_DOUBLE} & \cxxinline{long double} \\
       \cxxinline{MPI_WCHAR} & \cxxinline{wchar_t} \\
 
       \cxxinline{MPI_C_BOOL} & \cxxinline{_Bool} \\
       \cxxinline{MPI_INT8_T} & \cxxinline{int8_t} \\
       \cxxinline{MPI_INT16_T} & \cxxinline{int16_t} \\
       \cxxinline{MPI_INT32_T} & \cxxinline{int32_t} \\
       \cxxinline{MPI_INT64_T} & \cxxinline{int64_t} \\
       \cxxinline{MPI_UINT8_T} & \cxxinline{uint8_t} \\
       \cxxinline{MPI_UINT16_T} & \cxxinline{uint16_t} \\
       \cxxinline{MPI_UINT32_T} & \cxxinline{uint32_t} \\
       \cxxinline{MPI_UINT64_T} & \cxxinline{uint64_t} \\
       \bottomrule
     \end{tabular}
   \end{minipage}
 \end{frame}
 
 \begin{frame}
   \frametitle{Derived Datatypes}
   \framesubtitle{Base datatypes}
 
   \begin{minipage}{.45\linewidth}
     \small
     \begin{tabular}{ll}
       \toprule
       MPI datatype & C++ datatype\\
       \midrule
       \cxxinline{MPI_CXX_BOOL} & \cxxinline{bool} \\
       \cxxinline{MPI_CXX_FLOAT_COMPLEX} & \cxxinline{std::complex<float>} \\
       \cxxinline{MPI_CXX_DOUBLE_COMPLEX} & \cxxinline{std::complex<double>} \\
       \cxxinline{MPI_CXX_LONG_DOUBLE_COMPLEX} & \cxxinline{std::complex<long double>}\\
       \bottomrule
     \end{tabular}
   \end{minipage}
   \hspace{1.8cm}
   \begin{minipage}{.3\linewidth}
     \small
     \begin{tabular}{ll}
       \toprule
       MPI datatype & C datatype\\
       \midrule
       \cxxinline{MPI_AINT} & \cxxinline{MPI_Aint} \\
       \cxxinline{MPI_OFFSET} & \cxxinline{MPI_Offset} \\
       \cxxinline{MPI_COUNT} & \cxxinline{MPI_Count} \\
       \cxxinline{MPI_BYTE} & \\
       \cxxinline{MPI_PACKED} & \\
       \bottomrule
     \end{tabular}
   \end{minipage}
 \end{frame}
 
 \note{
   \begin{itemize}
     \item \cxxinline{MPI_CHAR} is a printable character where \cxxinline{MPI_BYTE} is a type of exactly 8bit not printable as a character
     \item \cxxinline{MPI_PACKED} for pack/unpacked
   \end{itemize}
 }
 
 \begin{frame}[fragile]
   \frametitle{Derived Datatypes}
   \framesubtitle{Arrays}
 
   \begin{cxxcode}{Syntax}
     int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
                             MPI_Datatype *newtype);
 
     int MPI_Type_vector(int count, int blocklength, int stride,
                         MPI_Datatype oldtype, MPI_Datatype *newtype);
   \end{cxxcode}
 
   \begin{itemize}
     \item array of contiguous array or with strided blocks of same type
     \item \cxxinline{count}: number of repetition (blocks)
     \item \cxxinline{blocklength}: number of element per block
     \item \cxxinline{stride}: number of element between start of each block
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Derived Datatypes}
   \framesubtitle{Array variants}
 
   \begin{itemize}
     \item \cxxinline{MPI_Type_create_hvector}: same as \cxxinline{MPI_Type_vector} with \cxxinline{stride} expressed in bytes
     \item \cxxinline{MPI_Type_create_indexed_block} same as \cxxinline{MPI_Type_vector} with array of and \cxxinline{displacements}
     \item \cxxinline{MPI_Type_create_hindexed_block}: same as \cxxinline{MPI_Type_create_indexed_block} with \cxxinline{displacements} in bytes
     \item \cxxinline{MPI_Type_indexed}: same as \cxxinline{MPI_Type_create_indexed_block} with arrays of \cxxinline{blocklengths}
     \item \cxxinline{MPI_Type_create_hindexed}: same as \cxxinline{MPI_Type_indexed} with \cxxinline{displacements} in bytes
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Derived Datatypes}
   \framesubtitle{Structures}
 
   \begin{cxxcode}{Syntax}
     int MPI_Type_create_struct(int count, const int array_of_blocklengths[],
                                const MPI_Aint array_of_displacements[],
                                const MPI_Datatype array_of_types[], MPI_Datatype *newtype)
   \end{cxxcode}
 
   \begin{itemize}
     \item \cxxinline{count}: number of repetition (blocks)
     \item \cxxinline{array_of_blocklengths}: sizes per block
     \item \cxxinline{array_of_displacements}: displacements between blocks in bytes
     \item \cxxinline{array_of_types}: types contained in each blocks
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Derived Datatypes}
   \framesubtitle{Usefull helper functions}
 
   \begin{itemize}
     \item \cxxinline{MPI_Get_address}: get the address of a variable
     \item \cxxinline{MPI_Aint_diff}: get the difference between 2 addresses
     \item \cxxinline{MPI_Aint_add}: get the sum of 2 addresses
     \item \cxxinline{MPI_Type_size}: get the size of a datatype
     \item \cxxinline{MPI_Get_type_extent}: get the lower bound and the extent of a type
     \item \cxxinline{MPI_Type_create_resized}: reset the lower bound and the extent of a type
   \end{itemize}
 \end{frame}
 
 \note{
   \begin{itemize}
     \item Prefer \cxxinline{MPI_Get_address} over \&
     \item extent could be badly set then not possible to communicate multiple
           objects of same datatype
   \end{itemize}
 }
 
 \begin{frame}[fragile]
   \frametitle{Derived Datatypes}
   \framesubtitle{Commit/free}
 
   \begin{cxxcode}{Syntax}
     int MPI_Type_commit(MPI_Datatype *datatype);
 
     int MPI_Type_free(MPI_Datatype *datatype);
   \end{cxxcode}
   
   \begin{itemize}
     \item new datatypes should be committed before being usable in communications
     \item committed types need to be freed once not used anymore
   \end{itemize}
 \end{frame}
 
 
 \begin{frame}[fragile]
   \frametitle{Derived Datatypes}
   \framesubtitle{Example}
 
   \cxxfile[title={mpi/datatypes.cc},
   minted options app={
     firstline=13,
     lastline=41,
     fontsize=\tiny}]{examples/mpi/datatypes.cc}
 \end{frame}
 
 \begin{frame}[fragile, exercise]
   \frametitle{Derived Datatypes}
   \framesubtitle{Send lines in poisson code}
 
   \begin{itemize}
     \item Create a \cxxinline{MPI_Datatype line_t} representing a line of data
     \item Exchange data of type \cxxinline{line_t} instead of \cxxinline{MPI_FLOAT}
   \end{itemize}
 \end{frame}
 
 \subsection{Pack/Unpack}
 
 \begin{frame}[fragile]
   \frametitle{Pack/Unpack}
   \framesubtitle{Pack}
 
   \begin{cxxcode}{Syntax}
     int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype,
                  void *outbuf, int outsize, int *position, MPI_Comm comm);
   \end{cxxcode}
 
   \begin{itemize}
     \item \cxxinline{inbuf}, \cxxinline{incount}, \cxxinline{datatype} correspond to the description of data to pack
     \item \cxxinline{outbuf}, \cxxinline{outsize} description of the buffer where to pack
     \item \cxxinline{position} current position in the packing buffer
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Pack/Unpack}
   \framesubtitle{Unpack}
 
   \begin{cxxcode}{Syntax}
     int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf,
                    int outcount, MPI_Datatype datatype, MPI_Comm comm);
   \end{cxxcode}
 
   \begin{itemize}
     \item \cxxinline{inbuf}, \cxxinline{incount}, description of the buffer from which to unpack
     \item \cxxinline{position} current position in the unpacking buffer
     \item \cxxinline{outbuf}, \cxxinline{outsize}, and \cxxinline{datatype} correspond to the description of data to unpack
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Pack/Unpack}
   \framesubtitle{Example}
 
   \cxxfile[title={mpi/pack\_unpack.cc},
   minted options app={
     firstline=26,
     lastline=39
     }]{examples/mpi/pack_unpack.cc}
 \end{frame}
 
 \subsection{Groups and Communicator}
 
 \begin{frame}[containsverbatim]
   \frametitle{Groups and Communicators}
 
   \begin{itemize}
     \item a \code{communicator}:
     \begin{itemize}
       \item Encapsulate a \code{context}, a \code{group}, a \code{virtual topology}  and \code{attributes}
       \item Two kinds \code{intra-communicator} and \code{inter-communicator}
     \end{itemize}
     \item a \code{group}:
     \begin{itemize}
       \item ordered set of processes
       \item each process has an unique ID (rank within the group) and can belong to several different groups
       \item a group can be used to create a new communicator
     \end{itemize}
   \end{itemize}
 \end{frame}
 
 \note{
   \begin{itemize}
     \item \code{intra} communications inside a group
     \item \code{inter} communications between groups
   \end{itemize}
 }
 
 \begin{frame}[containsverbatim]
   \frametitle{Groups and Communicators}
   \framesubtitle{Creating new communicators}
 
   \begin{itemize}
     \item duplicating or splitting an existing one \cxxinline{MPI_Comm_dup}, \cxxinline{MPI_Comm_split}
     \item creating communicator from a group \cxxinline{MPI_Comm_create}, \cxxinline{MPI_Comm_create_group}
     \item need to create groups
     \begin{itemize}
       \item from a communicator \cxxinline{MPI_Comm_group}
       \item boolean operations \cxxinline{MPI_Group_union},
             \cxxinline{MPI_Group_intersection}, \cxxinline{MPI_Group_difference}
       \item specifying ranks \cxxinline{MPI_Group_incl}, \cxxinline{MPI_Group_excl}
     \end{itemize}
     \item destroy created objects \cxxinline{MPI_Comm_free},
           \cxxinline{MPI_Group_free}
   \end{itemize}
 \end{frame}
 
 
 \subsection{Virutal Topologies}
 
 \begin{frame}
   \frametitle{Virtual Topologies}
   \framesubtitle{}
 
   \begin{itemize}
     \item potential performance gain by mapping process to hardware
     \item helps for program readability
     \item types of topologies: Cartesian, Graph, Distributed Graph
     \item collective communication on neighborhoods
   \end{itemize}
 \end{frame}
 
 \note{
   Details only on the cartesian on
 }
 
 \begin{frame}[fragile]
   \frametitle{Virtual Topologies}
   \framesubtitle{Cartesian topology}
   \begin{cxxcode}{Syntax}
     int MPI_Cart_create(MPI_Comm comm_old, int ndims, const int dims[],
                         const int periods[], int reorder, MPI_Comm *comm_cart);
   \end{cxxcode}
 
   \begin{itemize}
     \item create a communicator with cartesian information
     \item convenient functions:
     \begin{itemize}
       \item \cxxinline{MPI_Dims_create} helps creating balanced distribution of process
       \item \cxxinline{MPI_Cart_shift} helps determining neighboors
       \item \cxxinline{MPI_Cart_rank} get the rank based on coordinates
       \item \cxxinline{MPI_Cart_coords} get coordinates based on rank
     \end{itemize}
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Virutal topology}
   \framesubtitle{Neighborhoods collective}
 
   \begin{itemize}
     \item \cxxinline{MPI_Neighbor_allgather} assuming we are on process with
           rank $i$, gather data from all rank $j$ if edge $(j, i)$ exists and
           send same data to all $j$ where edge $(i,j)$ exists
     \item \cxxinline{MPI_Neighbor_alltoall} compare to allgather, sends
           different data to all $j$ process
     \item vector variant are available \code{v}
     \item immediate variant are available \code{I}
     \item persistent variant are available \code{\_init}
 
     \item \cxxinline{MPI_Neighbor_alltoall} as one in all flavors the \code{w},
           different datatypes are echanged with all neighbors
   \end{itemize}
 \end{frame}
 
 \begin{frame}[exercise, fragile]
   \frametitle{Virtual topology}
   \framesubtitle{}
 
   \begin{itemize}
     \item Rewrite the parallelism using a cartesian communicator
     \item Use neighbor collective communications
   \end{itemize}
 \end{frame}
 
 \subsection{Parallel I/O}
 
 \begin{frame}[containsverbatim]
   \frametitle{Parallel I/O overview}
   \begin{itemize}
     \item I/O is often (if not always) the main bottleneck in a parallel application
     \item MPI provides a mechanism to read/write in parallel
   \end{itemize}
 
   \begin{center}
     \input{src/mpi/figures/parallelFS.tex}
   \end{center}
 \end{frame}
 
 
 \begin{frame}[containsverbatim]
   \frametitle{Introducing remarks}
   \begin{itemize}
     \item MPI IO API works on your desktop/laptop
     \item Most of the large HPC systems have a \textbf{parallel file system}
           (like GPFS, Lustre, \emph{etc}.)
     \item If the file is distributed smartly on a parallel file system:
           performance increases
     \item MPI IO offers a high-level API to access a distributed file (no needs
           to implement complex POSIX calls)
     \item \textbf{does not work with ASCII files}
     \item Most of the standard file format support MPI IO (\emph{e.g}. HDF5,
           NetCDF, \emph{etc}..)
   \end{itemize}
 \end{frame}
 
 
 \begin{frame}[containsverbatim]
   \frametitle{Poisson so far}
   \begin{center}
     \input{src/mpi/figures/sofar.tex}
   \end{center}
 \end{frame}
 
 \begin{frame}[containsverbatim]
   \frametitle{Poisson ideal}
   \begin{center}
     \input{src/mpi/figures/sogoal.tex}
   \end{center}
 \end{frame}
 
 
 \begin{frame}[fragile]
   \frametitle{Open/Close a file in parallel}
 
   \begin{cxxcode}{Syntax}
     int MPI_File_open(MPI_Comm comm, const char *filename, int amode,
                       MPI_Info info, MPI_File *fh);
 
     int MPI_File_close(MPI_File *fh);
   \end{cxxcode}
 
   \begin{itemize}
     \item \cxxinline{comm}: the communicator that contains the writing/reading MPI processes
     \item \cxxinline{filename}: a file name
     \item \cxxinline{amode}: file access mode, \cxxinline{MPI_MODE_RDONLY}, \cxxinline{MPI_MODE_WRONLY},
           \cxxinline{MPI_MODE_RDWR}, \cxxinline{MPI_MODE_CREATE}, \emph{e.t.c.}
     \item \cxxinline{info}: file info object (\cxxinline{MPI_INFO_NULL} is a valid info)
     \item \cxxinline{fh}: file handle
   \end{itemize}
 
   \textbf{Collective calls !!}
 \end{frame}
 
 
 \begin{frame}[containsverbatim]
   \frametitle{Parallel IO}
   \framesubtitle{Terminology}
   \begin{itemize}
     \item \code{etype} is the elementary type of the data of the parallel accessed file
     \item \code{offset} is a position in the file in term of multiple of etypes
     \item \code{displacement} of a position within the file is the number of bytes from the beginning of the file
   \end{itemize}
   \begin{center}
     \includegraphics{src/mpi/figures/offset}
     %\input{day3/images/offset.tex}
   \end{center}
 \end{frame}
 
 
 \begin{frame}[containsverbatim]
     \frametitle{Parallel IO}
   \framesubtitle{Simple independent read/write}
 
   \begin{cxxcode}{Syntax}
     int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count,
                          MPI_Datatype datatype, MPI_Status *status);
 
     int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf,
                           int count, MPI_Datatype datatype, MPI_Status *status);
   \end{cxxcode}
 
   \begin{itemize}
     \item Can be used from a single (or group) of processes
     \item \cxxinline{offset} must be specified in the \cxxinline{buf} buffer
     \item \cxxinline{count} elements of type \cxxinline{datatype} are written
   \end{itemize}
 \end{frame}
 
 
 \begin{frame}[containsverbatim]
   \frametitle{Parallel IO}
   \framesubtitle{\code{view} by each process}
 
   \begin{cxxcode}{Syntax}
     int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype,
                           MPI_Datatype filetype, const char *datarep, MPI_Info info);
 
     int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype,
                           MPI_Datatype *filetype, char *datarep);
   \end{cxxcode}
 
   \begin{itemize}
     \item initially, each process view the file as a linear byte stream and each
           process views data in its own native representation
     \item \cxxinline{disp} is the displacement (defines the beginning of the
           data of the file that belongs to the process) in byte
     \item \cxxinline{etype} is the unit of data access and positioning
     \item \cxxinline{filetype} is a single \cxxinline{etype} of a multiple of it
   \end{itemize}
 \end{frame}
 
 \begin{frame}[containsverbatim]
   \frametitle{Parallel IO}
   \frametitle{Setting up a \code{view}}
   \begin{center}
     \input{day3/images/displacements.tex}
   \end{center}
   (source : MPI 2.2 specifications)
 \end{frame}
 
 \begin{frame}[containsverbatim]
   \frametitle{Parallel IO}
   \framesubtitle{Simple independent read/write without offset}
   \begin{cxxcode}{Syntax}
     int MPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype,
                       MPI_Status *status);
 
     int MPI_File_write(MPI_File fh, const void *buf, int count,
                        MPI_Datatype datatype, MPI_Status *status);
   \end{cxxcode}
 \end{frame}
 
 
 \begin{frame}[containsverbatim]
   \frametitle{Parallel IO}
   \framesubtitle{Collective read/write with/without offset}
 
   \begin{cxxcode}{Syntax}
     int MPI_File_write_all(MPI_File fh, const void *buf, int count,
                            MPI_Datatype datatype, MPI_Status *status);
 
     int MPI_File_read_all(MPI_File fh, void *buf, int count,
                           MPI_Datatype datatype, MPI_Status *status);
   \end{cxxcode}
 \end{frame}
 
 \subsection{One Sided}
 
 \begin{frame}[containsverbatim]
   \frametitle{What we did not view}
   \begin{itemize}
     \item One Sided communications
     \begin{itemize}
       \item \cxxinline{MPI_Put}, \cxxinline{MPI_Get}
       \item \cxxinline{MPI_Win_*}
       \item shared memory
     \end{itemize}
     \item Process management
     \begin{itemize}
       \item \cxxinline{MPI_Comm_spawn}
       \item Communications on inter-communicators
     \end{itemize}
   \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile,t]
   \frametitle{Parallelization of the poisson code}
 
   \begin{minipage}{.45\linewidth}
     \centering
     \begin{overprint}
       \only<1>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_1}}
       \only<2>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_2}}
       \only<3>{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_3}}
       \only<4->{\includegraphics[width=.8\linewidth]{\FIGREP/grid_2D_4}}
     \end{overprint}
   \end{minipage}
   \begin{minipage}{.45\linewidth}
     \begin{overprint}
       \onslide<1>
       \begin{itemize}
         \item Parallelize the Poisson 2D problem using the Messages Passing
               Interface (MPI)
       \end{itemize}
 
       \onslide<2>
       \begin{itemize}
         \item This time, we want to make a 2D domain decomposition using
               Cartesian topology
+        \item Use \code{MPI\_Dims\_create} and \code{MPI\_Cart\_create} to create a Cratesian topology
       \end{itemize}
 
       \onslide<3>
       \begin{itemize}
         \item The $p$ processes are split into $(p_{x}, p_{y})$ to make the
               Cartesian grid
         \item Each domain has size $(N/p_{x}, N/p_{y})$ (1 per process)
+        \item Use \code{MPI\_Cart\_shift} to find the neighboring domains
       \end{itemize}
 
       \onslide<4>
       \begin{itemize}
         \item Adding \emph{ghost} lines before and after
         \item Use the \emph{ghost} lines to receive the missing local data
+        \item You will need to define a new \textit{matrix column} datatype and update the
+              \textit{matrix line} datatype
       \end{itemize}
 
       \onslide<5>
       \begin{itemize}
-        \item Start using \cxxinline{MPI_Sendrecv} to implement the communications
+        \item Use the \code{MPI\_alltoall\_neighborw} routine
         \item You can use the number of iteration as a check
         \item Remove the \cxxinline{dump()} function to start
-        \item One it is working try to use \emph{non-blocking} communications
       \end{itemize}
     \end{overprint}
   \end{minipage}
 \end{frame}
 %%% Local Variables:
 %%% mode: latex
 %%% TeX-master: "../../phys_743_parallel_programming"
 %%% End: