Page MenuHomec4science

4_jpeg.tex
No OneTemporary

File Metadata

Created
Thu, Mar 13, 12:16

4_jpeg.tex

\documentclass[aspectratio=169]{beamer}
\def\stylepath{../styles}
\usepackage{\stylepath/com202}
\usepackage{pst-3dplot}
\usepackage{pst-tree}
\begin{document}
\intertitle{the JPEG image compression standard}
\begin{frame} \frametitle{A thought experiment}
\begin{itemize}
\item consider all possible $256\times 256$, 8bpp ``images''
\item each image is 524,288~bits
\vspace{1em}
\item total number of possible images: $2^{524,288} \approx 10^{157,826}$
\item number of atoms in the universe: $10^{82}$
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Image compression}
\centering
Goal: reduce the storage required by an image
\vspace{1em}
\begin{itemize}
\item lossless compression: store the information exactly, just more compactly
\begin{itemize}
\item zip files, PNG
\item exploits patterns in bitstream (not image-specific)
\item mostly a problem for Information Theory
\end{itemize}
\item lossy compression: accept some degradation in return for larger storage gains
\begin{itemize}
\item JPG, MP3
\item designed specifically for a class of signals (images, music)
\item relies on known ``blind spots'' in our perceptual systems
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Lossy image compression}
\begin{itemize}
\item exploit natural redundancy in images (eg: large blue sky)
\item use psychovisual experiments to determine what impacts perceived quality
\item allocate most bits to the things that matter
\item hide the losses where they are hard to see
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Key ingredients of the JPEG standard}
\begin{enumerate}
\item compressing at block level
\item using a suitable transform (i.e., a change of basis)
\item smart quantization
\item entropy coding
\end{enumerate}
\end{frame}
\begin{frame} \frametitle{Compressing at pixel level}
\begin{columns}
\begin{column}{0.45\paperwidth}
\begin{itemize}
\item reduce number bits per pixel
\item equivalent to coarser quantization
\item the lowest we can go is one bit per pixel\\
i.e. pixels are either black or white
\end{itemize}
\end{column}
%
\begin{column}{0.45\paperwidth}
\begin{figure}
{
\begin{dspCP}[width=5cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/bpp1}%
\end{dspCP}}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Compressing at block level}
\begin{columns}
\begin{column}{0.45\paperwidth}
\begin{itemize}
\item split image into small blocks
\item keep only average pixel value in blocks
\item shown here:
\begin{itemize}
\item $3\times 3$ blocks, i.e. 9 pixels each
\item average value coded using 8 bits
\item storage less than 1~bpp
\end{itemize}
\item where's the catch? zoom in to see
\end{itemize}
\end{column}
%
\begin{column}{0.45\paperwidth}
\begin{figure}
{
\begin{dspCP}[width=5cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/bpp9}%
\end{dspCP}}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Transform coding}
\begin{columns}
\begin{column}{0.45\paperwidth}
A simple 1D example:
\begin{itemize}
\item signal encoded at $R$ bits per sample
\item storing $N$ samples requires $NR$ bits
\item look at the DFT: if many DFT values are almost zero we can discard them and store only the nonzero ones
\item many natural signals are sparse once properly transformed!
\end{itemize}
\end{column}
%
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{dspPlot}[width=5cm,height=2cm,yticks=none,xout=true]{0,31}{-1.1,1.1}
\moocStyle
\dspSignal{x 360 32 div 5 mul mul cos}
\end{dspPlot}
\begin{dspPlot}[width=5cm,height=2cm,yticks=none,xout=true]{0,31}{-1.1,1.1}
\moocStyle
\dspSignal{x 5 eq {1} {x 27 eq {1} {0} ifelse} ifelse}
\end{dspPlot}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Smart quantization}
Once we have a few nonzero transform coefficents:
\begin{itemize}
\item discard all the ``small'' coefficients
\item use a larger number of bits for the ``important'' coefficients
\begin{itemize}
\item using few bits is equivalent to a rounding error (e.g. $\pi = 3, 3.1, 3.14, 3.141$ etc)
\item ``importance'' is determined experimentally using human subjects
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Entropy coding}
Smart quantization returns a series of numbers. How do we minimize storage?
\begin{itemize}
\item not all values occur with the same frequency
\item represent values with binary strings of different size
\item use short strings for frequent values and vice-versa
\end{itemize}
\begin{figure}
\begin{center}
\includegraphics[height=4cm]{morse}%
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Key ingredients in the JPEG standard}
\begin{itemize}
\item compressing at block level: {\color{blue!80} split image into $8\times 8$ non-overlapping blocks}
\item using a suitable transform: {\color{blue!80}compute the Discrete Cosine Transform of each block}
\item smart quantization: {\color{blue!80} round DCT coefficients according to psycovisually-tuned tables}
\item entropy coding: {\color{blue!80} run-length and entropy coding}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{The Discrete Cosine Transform (DCT)}
pretty much the same as the DFT:
\begin{itemize}
\item each coefficient inner product between image block and DCT basis vectors
\item basis vectors are a bit different from the DFT but still orthogonal
\item DCT coefficients are real-valued
\item DCT minimizes border effects
\end{itemize}
\end{frame}
\begin{frame} \frametitle{DCT basis vectors for an $8\times 8$ image}
\note<1>{\vspace{10em} Stress the fact that the transform, as a change of basis, is the correlation between the image block under analysis and each of the basis vectors. The resulting coefficients are the weights that measure the contribution to the patch of each DCT pattern. Also mention that the 1st coeff is the average graylevel}
\begin{figure}
\begin{center}
\begin{dspCP}[width=6cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/dctBasis}%
\end{dspCP}
\end{center}
\end{figure}
\end{frame}
% 20x20 blocks
\begin{frame} \frametitle{DCT coefficients of image blocks}
\begin{figure}
\begin{center}
\begin{dspCP}[width=6cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/dctDetail}%
\end{dspCP}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Psycovisually-tuned quantization}
\begin{itemize}
\item most of the 64 DCT coefficients in each block are small and rounded to zero
\item coefficient with high visual impact are encoded first
\item remaining bit budget allocated to the rest
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Impact of smart quantization at 0.2bpp}
\note<1>{\vspace{10em} both images are encoded at the same bitrate of 0.2bpp, the first one with a uniform quantization table for each DCT block, the second with the JPEG (fixed) quantization table)}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{loUni}
&
\showImage{loOpt}
\\
uniform & tuned
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Runlength encoding}
The sequence of quantized DCT coefficients will contain a lot of zeros:
\begin{itemize}
\item encode sequences of zeros by their length\\
eg: the sequence $[0, 0, 0, 0, 0, 12]$ becomes $[5, 12]$
\item maximize the chance of long zero sequences using a zigzag scan over the DCT values
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Zigzag scan}
\begin{figure}
\begin{center}
\begin{dspCP}[xticks=none,yticks=none]{0.5,8.5}{0.5,8.5}%
{\psset{linecolor=blue!80,linewidth=0.8pt}
\psline{->}(1,8)(2,8)(1,7)(1,6)(3,8)(4,8)
\psline{->}(4,8)(1,5)(1,4)(5,8)(6,8)
\psline{->}(6,8)(1,3)(1,2)(7,8)(8,8)(1,1)(2,1)(8,7)(8,6)(3,1)(4,1)(8,5)(8,4)(5,1)(6,1)(8,3)(8,2)(7,1)(8,1)
}
\multido{\n=1+1}{7}{%
\psline[linewidth=1pt](! 0.5 \n.0 0.5 add)(! 8.5 \n.0 0.5 add)
\psline[linewidth=1pt](! \n.0 0.5 add 0.5)(! \n.0 0.5 add 8.5)}
\multido{\n=1+1}{8}{%
\multido{\i=1+1}{8}{\pscircle*(\n,\i){2pt}}}
\pscircle*[linecolor=red](1,8){2pt} \end{dspCP}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Example}
\[
\begin{bmatrix*}[r]
100 & -60 & 0 & 6 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
13 & -1 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
\end{bmatrix*}
\]
\vspace{1em}
100, -60, 0, 0, 0, 0, 6, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
\end{frame}
\begin{frame} \frametitle{Runlength encoding}
\[
\begin{bmatrix*}[r]
100 & -60 & 0 & 6 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
13 & -1 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
\end{bmatrix*}
\]
\vspace{1em}
\[
[100], \, [(0, 6),\, -60], \, [(4, 3),\, 6], \, [(3, 4),\, 13], \, [(8, 1),\, -1], [(0, 0)]
\]
\end{frame}
\begin{frame} \frametitle{Entropy coding}
goal: minimize message length
\begin{itemize}
\item use prefix-free codes to avoid explicit separators between codewords
\item assign short sequences to more frequent symbols
\item JPEG provides a ``general-purpose'' code but you can build your own
\item given the symbol frequencies, the Huffman algorithm builds an optimal prefix-free code
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Example}
\begin{itemize}
\item four symbols: A, B, C, D
\item probability table:
\begin{center}
\begin{tabular}{lcl}
$p(A) = 0.38$ & ~~~~~~ & $p(B) = 0.32$ \\
& \\
$p(C) = 0.1$ & & $p(D) = 0.2$ \\
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Building the Huffman code}
\begin{figure}
\begin{center}
\pstree[radius=3pt,treemode=R,treefit=loose]{\Tcircle{0.30}}{%
\Tcircle{0.10}~{{\bf C}}^{0}
\Tcircle{0.20}~{{\bf D}}_{1}}
\end{center}
\end{figure}
\[
p(A) = 0.38 \quad p(B) = 0.32 {\color{red} \quad p(C) = 0.1 \quad p(D) = 0.2}
\]
\end{frame}
\begin{frame} \frametitle{Building the Huffman code}
\begin{figure}
\begin{center}
\pstree[radius=3pt,treemode=R,treefit=loose]{\Tcircle{0.62}}{%
\pstree{\Tcircle{0.30}^{0}}{%
\TC*~{\bf C}^{0}
\TC*~{\bf D}_{1}}
\Tcircle{0.32}~{\bf B}_{1}}
\end{center}
\end{figure}
\[
p(A) = 0.38 \color{red} \quad {\color{red} p(B) = 0.32 \quad p(C+D) = 0.3}
\]
\end{frame}
\begin{frame} \frametitle{Huffman Coding}
\begin{figure}
\begin{center}
\pstree[radius=3pt,treemode=R,treefit=loose]{\Tcircle{1.00}}{%
\Tcircle{0.38}~{\bf A}^{0}
\pstree{\Tcircle{0.62}^{1}}{%
\pstree{\TC*^{0}}{%
\TC*~{\bf C}^{0}
\TC*~{\bf D}_{1}}
\TC*~{\bf B}_{1}}}
\end{center}
\end{figure}
\[
{\color{red} p(A) = 0.38 \quad p(B+C+D) = 0.62}
\]
\end{frame}
\begin{frame} \frametitle{Conclusions}
\begin{itemize}
\item JPEG is a very complex and comprehensive standard:
\begin{itemize}
\item lossless, lossy
\item color, B\&W
\item progressive encoding
\item HDR (12bpp) for medical imaging
\end{itemize}
\item JPEG is VERY good:
\begin{itemize}
\item compression factor of 10:1 virtually indistinguishable
\item rates of 1bpp for RGB images acceptable (25:1 compression ratio)
\end{itemize}
\item other important compression schemes:
\begin{itemize}
\item TIFF, JPEG2000
\item MPEG (MP3)
\end{itemize}
\end{itemize}
\end{frame}
\end{document}

Event Timeline