Page MenuHomec4science

cluster_architecture.tex
No OneTemporary

File Metadata

Created
Tue, May 7, 09:59

cluster_architecture.tex

\renewcommand{\FIGREP}{src/cluster_architecture/figures}
\section{Cluster Architecture}
\label{sec:cluster_architecture}
\intersec{helvetios}
\begin{frame}[t]
\frametitle{Cluster Architecture}
\framesubtitle{}
\begin{itemize}
\item The goal of this section is to understand what's under the cluster's
hood
\item In order to take full advantage of your computer, you have to
understand how it works, what are the limits, etc.
\item We'll go from the cluster level down to the core level
\end{itemize}
\addimage[width=7cm]{\FIGREP/summit}{5.5cm}{0.5cm}
\end{frame}
\subsection{Cluster as a whole}
\label{sec:cluster}
\begin{frame}
\frametitle{Cluster Architecture}
\framesubtitle{General presentation}
\begin{minipage}{0.5\linewidth}
\begin{itemize}
\item An HPC cluster is composed of
\begin{itemize}
\item Login node(s)
\item Compute nodes
\item Storage system
\item High performance interconnect
\end{itemize}
\item The simulation data is written on the storage systems. At SCITAS:
\begin{itemize}
\item \code{/home}: store source files, input data, small files
\item \code{/work}: collaboration space for a group
\item \code{/scratch}: temporary huge result files
\end{itemize}
Please, note that only \cmd{/home} and \cmd{/work} have backups!
\cmd{/scratch} data can be erased at any moment!
\end{itemize}
\end{minipage}
\addimage[width=6cm]{\FIGREP/abstract_architecture}{9.5cm}{1.5cm}
\end{frame}
\note{
\begin{itemize}
\item The users connect to the login node
\item Backups on \code{/work} are paying
\end{itemize}
}
\subsection{Introduction to SLURM}
\begin{frame}
\frametitle{Introduction to SLURM}
\framesubtitle{}
\begin{minipage}{0.55\linewidth}
\begin{itemize}
\item Users do not run their calculations directly on the compute nodes
\item A \textit{scheduler} is used to ensure fair resource usage
\pause
\item At SCITAS, we use the SLURM scheduler
\pause
\item You submit your simulation and the resources you need to SLURM
\item SLURM stores it into a queue and assigns it a starting time
depending on many parameters
\item Your job may not start right away and it is normal!
\end{itemize}
\end{minipage}
\onslide<1>\addimage[width=6cm]{\FIGREP/abstract_architecture}{9.5cm}{1.5cm}
\onslide<2->\addimage[width=6cm]{\FIGREP/abstract_architecture_slurm}{9.5cm}{1.5cm}
\end{frame}
\begin{frame}[fragile]
\frametitle{Introduction to SLURM}
\framesubtitle{How to submit simulations}
\textbf{To submit a job}
\begin{bashcode}
$> srun -A phys-743 --reservation phys-743 ./my_program
\end{bashcode}%$
\vfill
\begin{description}
\item[-A / -{}-account=<account>]: name of your SLURM account
\item[-{}-reservation=<reservation>]: name of your SLURM reservation
\item[-t / -{}-time=<HH:MM:SS>]: set a limit on the total run time of the job
\item[-N / -{}-nodes=<N>]: request that a minimum of $N$ nodes be allocated to the job
\item[-n / -{}-ntasks=<n>]: advise SLURM that this job will launch a maximum of
$n$ tasks
\item[-c / -{}-cpus-per-task=<ncpus>]: advise SLURM that job will require \code{ncpus} per task
\item[-{}-mem=<size{[}units{]}>]: specify the memory required per node
\end{description}
\vfill
Need more help? Have a look at the \href{https://slurm.schedmd.com/sbatch.html}{documentation}
\end{frame}
\begin{frame}[fragile]
\frametitle{Introduction to SLURM}
\framesubtitle{How to submit simulations}
\textbf{Or you can put everything in a file called, e.g. \code{my\_simulation.job}}
\begin{bashcode}
#!/bin/bash -l
#SBATCH --account=phys-743
#SBATCH --reservation=phys-743
#SBATCH --time=01:10:00
#SBATCH --nodes=2
#SBATCH --ntasks=56
srun ./my_program
\end{bashcode}
and submit the job with
\begin{bashcode}
$> sbatch my_simulation.job
\end{bashcode}%$
\end{frame}
\begin{frame}[fragile]
\frametitle{Introduction to SLURM}
\framesubtitle{How to manage simulations}
\textbf{To list all your jobs}
\begin{bashcode}
$> squeue -u <username>
\end{bashcode}%$
\vfill
\textbf{To cancel a simulation}
\begin{bashcode}
$> scancel <jobid>
\end{bashcode}%$
The \code{<jobid>} can be found using \code{squeue}
\end{frame}
\begin{frame}[t]
\frametitle{Cluster Architecture}
\framesubtitle{A few numbers}
\textbf{Let's go back to Summit}
\begin{itemize}
\item Second most powerful HPC cluster in the world according to the
\href{https://www.top500.org/lists/top500/list/2021/06/}{Top500 June
2021 list}
\item It is composed of \SI{4608}{} compute nodes
\item Power consumption of \SI{10096.00}{\kilo\watt}
% Consommation annuelle par habitant en Suisse ~7500kWh
% https://donnees.banquemondiale.org/indicator/EG.USE.ELEC.KH.PC
\item Equivalent consumption as a city with $\sim$\SI{13000}{} inhabitants
% Prix du kWh a Lausanne ~20 centimes
\item In Lausanne, running Summit would cost $\sim$\SI{50000}{\chf\per\day} only for electricity!
\end{itemize}
\addimage[width=5cm]{\FIGREP/summit}{5.5cm}{1.0cm}
\end{frame}
\subsection{The compute node}
\label{sec:node}
\begin{frame}[t]
\frametitle{Cluster Architecture}
\framesubtitle{Let's dive into a compute node!}
\begin{itemize}
\item The compute node is the basic building bloc of a cluster
\item It is composed of one or more CPU with RAM (memory) and eventually one
or more accelerator, e.g. GPUs
\item All the nodes are connected together with an interconnect
\end{itemize}
\addimage[width=4.5cm]{\FIGREP/node_architecture}{5.75cm}{1.0cm}
\end{frame}
\note{
\begin{itemize}
\item A compute node is like a personal computer on steroid
\end{itemize}
}
\subsection{The CPU}
\label{sec:cpu}
\begin{frame}[t]
\frametitle{Cluster Architecture}
\framesubtitle{Central processing unit}
\begin{itemize}
\item The CPU is the ``brain'' of the node
\item CPUs work in clock cycles; they are the ``heart beat'' of the CPU
\item It is composed of cores and different levels of memories called caches
\item There are usually three levels of cache called L1, L2, and L3
\end{itemize}
\vspace{0.8cm}
\begin{table}
\hspace{5cm}
\scriptsize
\begin{tabular}{@{}llll@{}}
\toprule
\textbf{Event} & \textbf{Latency} & \textbf{Scaled} & \textbf{Capacity} \\
\midrule
1 CPU cycle & 0.1\,ns & 1 s & -- \\
L1 cache access & 1\,ns & 10\,s & kB \\
L2 cache access & 1\,ns & 10\,s & MB \\
L3 cache access & 10\,ns & 1\,min & MB \\
RAM access & 100\,ns & 10\,min & GB \\
Solid-state disk access & 100\,$\mu$s & 10 days & TB \\
Hard-disk drive access& 1--10\,ms & 1--12 months & TB \\
\bottomrule
\end{tabular}
\end{table}
\addimage[width=4cm]{\FIGREP/cpu_architecture}{3cm}{1.5cm}
\end{frame}
\note{
\begin{itemize}
\item Caches are extremely fast memories that are used to hide the latency
of other memories (RAM, hard drive, etc.)
\item However, they are usually quite small compared to e.g. RAM, hard drive
\item L1 is the closest to the core, followed by L2 and L3
\item Some cache levels are private to a core, e.g. here L1 and L2
\end{itemize}
}
\subsection{Summary of SCITAS' clusters}
\begin{frame}[t]
\frametitle{Cluster Architecture}
\framesubtitle{Summary of SCITAS' clusters}
\onslide<2>{\begin{tikzpicture}[overlay,remember picture]
\begin{scope}[shift={(current page.south west)}]
\draw[red, thick] (2, 1) -- (6.5, 7);
\end{scope}
\end{tikzpicture}}
\begin{minipage}[t]{0.32\linewidth}
\begin{center}
\textbf{Fidis}
\end{center}
\begin{itemize}
\item CPU cluster
\item 336 nodes each with
\begin{itemize}
\item 2 Intel Xeon E5-2690 @\SI{2.6}{\giga\hertz} with 14 cores each
\item \SI{128}{\gibi\byte} of RAM
\end{itemize}
\item 72 nodes each with
\begin{itemize}
\item 2 Intel Xeon E5-2690 @\SI{2.6}{\giga\hertz} with 14 cores each
\item \SI{256}{\gibi\byte} of RAM
\end{itemize}
\item 216 nodes each with
\begin{itemize}
\item 2 Intel Xeon Gold 6132 @\SI{2.6}{\giga\hertz} with 14 cores each
\item \SI{192}{\gibi\byte} of RAM
\end{itemize}
\end{itemize}
\end{minipage}
\hfill
\begin{minipage}[t]{0.32\linewidth}
\begin{center}
\textbf{Helvetios}
\end{center}
\begin{itemize}
\item CPU cluster
\item 287 nodes each with
\begin{itemize}
\item 2 Intel Xeon Gold 6140 @\SI{2.3}{\giga\hertz} with 18 cores each
\item \SI{192}{\gibi\byte} of DDR3 RAM
\end{itemize}
\end{itemize}
\vspace{0.5cm}
\onslide<2>{\begin{minipage}[t]{1.0\linewidth}
\begin{center}
\textbf{Jed}
\end{center}
\begin{itemize}
\item CPU cluster
\item 419 nodes, 2 Intel Ice Lake Platinum with 36 cores each
\begin{itemize}
\item XXX nodes with \SI{512}{\gibi\byte} of DDR3 RAM
\item YYY nodes with \SI{1}{\tebi\byte} of DDR3 RAM
\end{itemize}
\end{itemize}
\end{minipage}}
\end{minipage}
\hfill
\begin{minipage}[t]{0.32\linewidth}
\begin{center}
\textbf{Izar}
\end{center}
\begin{itemize}
\item CPU + GPU cluster
\item 64 nodes each with
\begin{itemize}
\item 2 Intel Xeon Gold 6230 @\SI{2.1}{\giga\hertz} with 20 cores each
\item 2 NVIDIA V100 PCIe \SI{32}{\gibi\byte} GPUs
\item \SI{192}{\gibi\byte} of DDR4 RAM
\end{itemize}
\item 2 nodes each with
\begin{itemize}
\item 2 Intel Skylake @\SI{2.1}{\giga\hertz} with 20 cores each
\item 4 NVIDIA V100 SMX2 \SI{32}{\gibi\byte} GPUs
\item \SI{192}{\gibi\byte} of DDR4 RAM
\end{itemize}
\end{itemize}
\end{minipage}
\end{frame}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "../../phys_743_parallel_programming"
%%% End:

Event Timeline