-% \begin{block}{A solution with the \texttt{reduction(...)} clause}
-
-% \begin{verbatim}
-% vec = (int*) malloc (size_vec*sizeof(int));
-% global_sum = 0;
-% #pragma omp parallel for reduction(+:global_sum)
-% for (i=0;i<size_vec;i++){
-% global_sum += vec[i];
-% }
-% \end{verbatim}
-% But other solutions exist !
-% \end{block}
-% \end{frame}
-
-
-% \begin{frame}[containsverbatim]
-% \frametitle{The \texttt{schedule} clause}
-
-% \begin{block}{}
-% Load-balancing
-% \end{block}
-
-% \begin{center}
-% \begin{tabular}{|l|l|}
-% \hline
-% \textbf{clause} & \textbf{behavior} \\
-% \hline
-% \hline
-% \textit{schedule(static [, chunk\_size])} &
-% iterations divided in chunks sized \\
-% & \textit{chunk\_size} assigned to threads in \\
-% & a round-robin fashion. \\
-% & If \textit{chunk\_size} not specified \\
-% & system decides. \\
-% \hline
-
-% \textit{schedule(dynamic [, chunk\_size])} &
-% iterations divided in chunks sized \\
-% & \textit{chunk\_size} assigned to threads \\
-% & when they request them until no \\
-% & chunk remains to be distributed. \\
-% & If \textit{chunk\_size} not specified \\
-% & default is 1. \\
-
-% \hline
-% \end{tabular}
-% \end{center}
-
-% \end{frame}
-
-
-% \begin{frame}[containsverbatim]
-% \frametitle{The \texttt{schedule} clause}
-
-% \begin{center}
-% \begin{tabular}{|l|l|}
-% \hline
-% \textbf{clause} & \textbf{behavior} \\
-% \hline
-% \hline
-% \textit{schedule(guided [, chunk\_size])} &
-% iterations divided in chunks sized \\
-% & \textit{chunk\_size} assigned to threads \\
-% & when they request them. Size of \\
-% & chunks is proportional to the \\
-% & remaining unassigned chunks. \\
-% % & If \textit{chunk\_size} not specified \\
-% % & default is 1. \\
-% & By default the chunk size is approx \\
-% & loop$\_$count/number$\_$of$\_$threads. \\
-
-% % By default the chunk size is approximately
-
-
-% \hline
-% \textit{schedule(auto)} &
-% The decisions is delegated to the \\
-% & compiler and/or the runtime system \\
-
-% \hline
-% \textit{schedule(runtime)} &
-% The decisions is delegated to the \\
-% & runtime system \\
-
-
-% \hline
-% \end{tabular}
-% \end{center}
-
-% \end{frame}
-
-
-% \begin{frame}
-% \frametitle{The \texttt{schedule} clause}
-% \begin{center}
-% {\input{day1/images/schedule-decision.tex}}
-% \end{center}
-% \end{frame}
-
-
-% \begin{frame}[containsverbatim]
-% \frametitle{A parallel \texttt{for} example}
-
-
-% \begin{block}{How to...}
-% ... parallelize the dense matrix multiplication $C = A B$ (triple for loop $C_{ij} = C_{ij} + A_{ik} B_{kj}$). What happens using different \texttt{schedule} clauses ?)
-% [DGEMM] Compute time [s] : 9.17780399322509E-002
-% [DGEMM] Performance [GF/s]: 2.17917053085506
-% [DGEMM] Verification : 2000000000.00000
-% \end{verbatim}
-
-% \end{frame}
-
-
-
-
-% \begin{frame}
-% \frametitle{The \texttt{collapse} clause}
-
-% \begin{block}{Intel view}
-% Use the collapse clause to increase the total number of iterations that will be partitioned across the available number of OMP threads by reducing the granularity of work to be done by each thread.
-
-% You can improve performance by avoiding use of the collapsed-loop indices (if possible) inside the collapse loop-nest (since the compiler has to recreate them from the collapsed loop-indices using divide/mod operations AND the uses are complicated enough that they don't get dead-code-eliminated as part of compiler optimizations)
-% It is mandatory that the \textit{\texttt{n-}}collapsed loops are perfectly nested and with a rectangular shape (nothing like \texttt{do i=1,N ... do j=1,f(i)}) and that their upper limits are ``small''.
% % \item{The directive must appear after the declaration of listed variables/common blocks}
% % \item{The values of data in the threadprivate variables of non-initial threads are guaranteed to persist between two consecutive active \texttt{parallel} regions if:
% % \begin{itemize}
% % \item{No nested \texttt{parallel} regions}
% % \item{Number of threads for both \texttt{parallel} regions is the same}
% % \item{\texttt{dyn-var} ICV is false for both \texttt{parallel} regions}
% % \end{itemize}
% % }
% % \item{A \texttt{threadprivate} variable is affected by a \texttt{copyin} clause if it appears in the list}
% % \item{A \texttt{threadprivate} variable is \textbf{NOT} affected by a \texttt{copyin} clause if it as the \texttt{allocatable} (not initially allocated) or the \texttt{pointer} (no initial association) attributes}
% % \end{itemize}
% % \end{exampleblock}
% %
% % \end{frame}
% % \begin{frame}[containsverbatim]
% % \frametitle{A \texttt{copyin} clause}
% %
% % \begin{exampleblock}{Properties}
% % \begin{itemize}
% % \item{The \texttt{copyin} clause provides a mechanism to copy the value of the master thread's \texttt{threadprivate} variable to the \texttt{threadprivate} variable of each other member of the team executing the \texttt{parallel}region. }
% % \item{If the original list item has the \texttt{POINTER} attribute, each copy receives the same association status of the master thread's copy as if by pointer assignment. }
% % \item{If the original list item does not have the \texttt{POINTER} attribute, each copy becomes defined with the value of the master thread's copy as if by intrinsic assignment, unless it has the allocation status of not currently allocated, in which case each copy will have the same status. }
% % \end{itemize}
% % \end{exampleblock}
% % \end{frame}
% % \begin{frame}[containsverbatim]
% % \frametitle{A \texttt{copyprivate} clause}
% %
% % \begin{exampleblock}{Properties}
% % \begin{itemize}
% % \item{The \texttt{copyprivate} clause provides a mechanism to use a private variable to broadcast a value from the data environment of one implicit task to the data environments of the other implicit tasks belonging to the \texttt{parallel} region.}
% % \item{To avoid race conditions, concurrent reads or updates of the list item must be synchronized with the update of the list item that occurs as a result of the \texttt{copyprivate} clause.}
% % \end{itemize}
% % \end{exampleblock}
% % \end{frame}
% \subsubsection{Nesting}
% \begin{frame}
% \frametitle{Nesting regions}
% \begin{exampleblock}{Nesting}
% It is possible to include parallel regions in a parallel region (i.e. nesting) under restrictions (cf. sec. 2.10, p.111, \textit{OpenMP: Specifications ver. 3.1})
% \end{exampleblock}
% \end{frame}
% \subsection{Runtime Library routines}
% \begin{frame}
% \frametitle{Runtime Library routines}
% \begin{exampleblock}{Usage}
% \begin{itemize}
% \item{The functions/subroutines are defined in the lib \texttt{libomp.so / libgomp.so}. Don't
% forget to include \texttt{\#include <omp.h>}}
% \item{These functions can be called anywhere in your programs}
% \textbf{FIRST OPTIMIZE ON ONE CORE, THEN PARALLELIZE (the right algorithm)}
% \end{alertblock}
% \url{http://openmp.org/wp/openmp-compilers}
% \end{center}
% \end{frame}
% \subsection{What's new in 4.0 ?}
% \begin{frame}
% \frametitle{What's new with OpenMP 4.0 ?}
% \begin{itemize}
% \item{ Support for new devices (\verb+Intel Phi+, \verb+GPU+,...) with \verb+omp target+. Offloading on those devices. }
% \item{ Hardware agnostic}
% \item{ League of threads with \verb+omp teams+ and distribute a loop over the team with \verb+omp distribute+ }
% \item{ SIMD support for vectorization \verb+omp simd+ }
% \item{ Task management enhancements (cancelation of a task, groups of tasks, task-to-task synchro)}
% \item{ Set thread affinity with a more standard way than \verb+KMP_AFFINITY+ with the concepts of \verb+places+ (a thread, a core, a socket), \verb+policies+ (spread, close, master) and \verb+control settings+ the new clause \verb+proc_bind+}