Page MenuHomec4science

ip.tex
No OneTemporary

File Metadata

Created
Thu, Mar 13, 11:33
\documentclass[aspectratio=169]{beamer}
\def\stylepath{../styles}
\usepackage{\stylepath/com303}
\usepackage{pst-3dplot}
\usepackage{pst-tree}
\begin{document}
\begin{frame} \frametitle{In the old, non-PC days...}
\begin{figure}
% \begin{pspicture}(0,0)(0,1)%
%\includegraphics[width=3cm,height=3cm]{lena}%
%\psframe[dimen=middle,linewidth=1.1pt](-0.01,0)(-.99,.99)
% \end{pspicture}
\showImage{lena}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Please meet ...}
\begin{figure}
\showImage{orig}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Digital images}
\begin{itemize}
\item two-dimensional signal $x[n_1, n_2]$, $n_1, n_2 \in \mathbb{Z}$
\item indices locate a point on a grid $\rightarrow$ pixel
\item grid is usually regularly spaced
\item values $x[n_1, n_2]$ refer to the pixel's appearance
\end{itemize}
\end{frame}
\begin{frame} \frametitle{2D signals: image representation}
\begin{columns}
\begin{column}{0.45\paperwidth}
\begin{itemize}
\item medium has a certain dynamic range (paper, screen)
\item image values are quantized (usually to 8 bits, or 256 levels)
\item the eye does the interpolation in space provided the pixel density is high enough
\end{itemize}
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\begin{dspCP}[width=5cm,xout=true,xlabel={$n_1$},ylabel={$n_2$}]{0,511}{0,511}%
\dspImageFile{\imagePath/baseImage}%
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{About dynamic ranges...}
\begin{columns}
\begin{column}{0.45\paperwidth}
Images:
\begin{itemize}
\item human eye: ~120dB
\item prints: 12dB to 36dB
\item LCD: 60dB
\item digital cinema: ~90dB
\end{itemize}
\end{column}
\begin{column}{0.45\paperwidth}
Sounds:
\begin{itemize}
\item human ear: 140dB
\item speech: ~40dB
\item vinyl, tape: ~50dB
\item CD: 96dB
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Digital images: grayscale vs color}
\begin{itemize}
\item grayscale images: scalar pixel values
\item color images: multidimensional pixel values in a color space (RGB, HSV, YUV, etc)
\item we can consider the single components separately:
\end{itemize}
\setbeamercovered{invisible}
{
\begin{figure}
\raisebox{-1cm}{\showImage[2.5cm]{orig}} =
\raisebox{-1cm}{\showImage[2.5cm]{red}} +
\raisebox{-1cm}{\showImage[2.5cm]{green}} +
\raisebox{-1cm}{\showImage[2.5cm]{blue}}
\end{figure}}
\end{frame}
\begin{frame} \frametitle{Image processing}
From one to two dimensions...
\begin{itemize}
\item something still works
\item something breaks down
\item something is new
\end{itemize}
\end{frame}
\begin{frame} \frametitle{A thought experiment}
\begin{itemize}
\item consider all possible $256\times 256$, 8bpp ``images''
\item each image is 524,288~bits
\vspace{1em}
\item total number of possible images: $2^{524,288} \approx 10^{157,826}$
\item number of atoms in the universe: $10^{82}$
\item how many ``images'' are proper images?
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Image processing}
\centering
images are very specialized signals, \\
``designed'' for a very specific processing system: the human brain!
\vspace{2em}
visual semantics is extremely hard to deal with
\end{frame}
\begin{frame} \frametitle{Image processing}
What works:
\begin{itemize}
\item the maths: linearity, space-invariance, convolution
\item Fourier transform formulas
\item interpolation, sampling, quantization
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Image processing}
What breaks down:
\begin{itemize}
\item Fourier analysis less relevant
\item filter design hard, IIRs rare
\item linear, space-invariant operators only mildly useful because of their isotropy
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Image processing}
What's new:
\begin{itemize}
\item new manipulations: affine transforms
\item images are finite-support signals
\item images are usually available in their entirety $\rightarrow$ causality mostly irrelevant
\end{itemize}
\end{frame}
\begin{frame} \frametitle{2D signal processing: the basics}
A two-dimensional discrete-space signal:
\[
x[n_1, n_2], \qquad n_1, n_2 \in \mathbb{Z}
\]
\end{frame}
\intertitle{affine transforms}
\begin{frame} \frametitle{Affine transforms}
\centering
mapping $\mathbb{R}^2 \rightarrow \mathbb{R}^2$ that reshapes the coordinate system (in continuous space):
\[
\begin{bmatrix} t'_1 \\ t'_2 \end{bmatrix} =
\begin{bmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{bmatrix} \begin{bmatrix} t_1 \\ t_2 \end{bmatrix} - \begin{bmatrix} d_1 \\ d_2 \end{bmatrix}
\]
\vspace{2em}
\[
\begin{bmatrix} t'_1 \\ t'_2 \end{bmatrix} = \mathbf{A} \begin{bmatrix} t_1 \\ t_2 \end{bmatrix} - \mathbf{d}
\]
\end{frame}
\def\flag{\psline(-.5,-1)(-.5,1.2)(1,1.2)(0.5,0.7)(1,0.2)(-.5,0.2)}
\def\grayFlag{\psline[linecolor=lightgray](-.5,-1)(-.5,1.2)(1,1.2)(0.5,0.7)(1,0.2)(-.5,0.2)}
\begin{frame} \frametitle{Translation}
\setbeamercovered{invisible}
\begin{columns}
\begin{column}{0.45\paperwidth}
\begin{align*}
\mathbf{A} &= \begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix} = \mathbf{I} \\
\mathbf{d} &= \begin{bmatrix} d_1 \\ d_2 \end{bmatrix}
\end{align*}
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\uncover<2>{$d_1 = -0.7, d_2 = -0.3$}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-3,3}{-3,3}
\moocStyle
\only<1>{\flag}
\only<2>{%
\grayFlag
\rput(0.7,0.3){\flag}}
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Scaling}
\setbeamercovered{invisible}
\begin{columns}
\begin{column}{0.45\paperwidth}
\begin{align*}
\mathbf{A} &= \begin{bmatrix} a_1 & 0 \\ 0 & a_2 \end{bmatrix} \\
\mathbf{d} &= 0
\end{align*}
\vspace{1em}
\only<2>{if $a_1 = a_2$ the {\em aspect ratio} is preserved}
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\only<1>{~}
\only<2>{$a_1 = a_2 = 1.5$}
\only<3>{$a_1 = 2, a_2 = 1$}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-3,3}{-3,3}
\moocStyle
\only<1>{\flag}
\only<2>{%
\grayFlag
\psscalebox{1.5}{\flag}}
\only<3>{%
\grayFlag
\psscalebox{2 1}{\flag}}
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Rotation}
\setbeamercovered{invisible}
\begin{columns}
\begin{column}{0.45\paperwidth}
\begin{align*}
\mathbf{A} &= \begin{bmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{bmatrix} \\
\mathbf{d} &= 0
\end{align*}
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\only<1>{~}
\only<2>{$\theta = -0.6\pi$}
\only<3>{$\theta = \pi$}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-3,3}{-3,3}
\moocStyle
\only<1>{\flag}
\only<2>{%
\grayFlag
\rput{110}{\flag}}
\only<3>{%
\grayFlag
\rput{180}{\flag}}
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Flips}
\setbeamercovered{invisible}
\begin{columns}
\begin{column}{0.45\paperwidth}
Horizontal:
\begin{align*}
\mathbf{A} &= \begin{bmatrix} -1 & 0 \\ 0 & 1 \end{bmatrix} \\
\mathbf{d} &= 0
\end{align*}
Vertical:
\begin{align*}
\mathbf{A} &= \begin{bmatrix} 1 & 0 \\ 0 & -1 \end{bmatrix} \\
\mathbf{d} &= 0
\end{align*}
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-3,3}{-3,3}
\moocStyle
\only<1>{\flag}
\only<2>{
\grayFlag
\psscalebox{-1 1}{\flag}}
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Shear}
\setbeamercovered{invisible}
\begin{columns}
\begin{column}{0.45\paperwidth}
Horizontal:
\begin{align*}
\mathbf{A} &= \begin{bmatrix} 1 & s \\ 0 & 1 \end{bmatrix} \\
\mathbf{d} &= 0
\end{align*}
Vertical:
\begin{align*}
\mathbf{A} &= \begin{bmatrix} 1 & 0 \\ s & 1 \end{bmatrix} \\
\mathbf{d} &= 0
\end{align*}
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-3,3}{-3,3}
\moocStyle
\only<1>{\flag}
\only<2>{
\grayFlag
\def\s{0.5 }
\psline(! -.5 -1 dup \s mul 3 -1 roll add exch)(! -.5 1.2 dup \s mul 3 -1 roll add exch)(! 1 1.2 dup \s mul 3 -1 roll add exch)(! 0.5 0.7 dup \s mul 3 -1 roll add exch)(! 1 0.2 dup \s mul 3 -1 roll add exch)(! -.5 0.2 dup \s mul 3 -1 roll add exch)}
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{Affine transforms in discrete-space}
\[
\begin{bmatrix} t'_1 \\ t'_2 \end{bmatrix} = \mathbf{A} \begin{bmatrix} n_1 \\ n_2 \end{bmatrix} - \mathbf{d} \quad {\color{red} \in \mathbb{R}^2 \neq \mathbb{Z}^2}
\]
\end{frame}
\begin{frame}
\frametitle{Solution for images}
\begin{itemize}
\item take each {\em output} coordinate $y[m_1, m_2]$
\item apply the {\em inverse} transform to $[m_1, m_2]$ and find the {\em source} coordinates:
\[
\begin{bmatrix} t_1 \\ t_2 \end{bmatrix} = \mathbf{A}^{-1} \begin{bmatrix} m_1 + d_1 \\ m_2 + d_2 \end{bmatrix};
\]
\item if source point not on source grid, write
\[
(t_1, t_2) = (\eta_1 + \tau_1, \eta_2 + \tau_2), \qquad \eta_{1,2} \in \mathbb{Z}, \quad 0 \le \tau_{1,2} < 1
\]
and interpolate from the surrounding original grid points
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Bilinear Interpolation}
\begin{figure}\label{bilinearInt}
\begin{center}
\begin{dspCP}[width=6cm,xticks=custom,yticks=custom,xout=true]{0.5,2.5}{0.5,2.5}
\only<2>{
\dspPointValue[linecolor=blue]{2,2}{$x[\eta_1+1,\eta_2+1]$}
\dspPointValue[linecolor=blue]{1,1}{$x[\eta_1,\eta_2]$}}
\only<3->{
\dspPointValue[linecolor=blue]{2,2}{}
\dspPointValue[linecolor=blue]{1,1}{}}
\only<2->{
\dspPointValue[linecolor=blue]{2,1}{}
\dspPointValue[linecolor=blue]{1,2}{}}
%
\only<1-2>{\dspPointValue[linecolor=red]{1.3,1.7}{$(t_1,t_2)$}}
\only<3->{\dspPointValue[linecolor=red]{1.3,1.7}{}}
%
\only<3->{\psline[linewidth=0.5pt]{<->}(1,1)(1.3,1)\dspText(1.14,0.9){$\tau_1$}}
\only<3,5->{\psline[linewidth=0.5pt]{<->}(1.3,1)(1.3,1.7)\dspText(1.4,1.3){$\tau_2$}}
%
\only<4->{
\psline[linewidth=0.5pt,linestyle=dotted](1,1)(2,1)
\psline[linewidth=0.5pt,linestyle=dotted](1,2)(2,2)
\dspPointValue[dotstyle=triangle*,linecolor=green]{1.3,1}{}
\dspPointValue[dotstyle=triangle*,linecolor=green]{1.3,2}{}}
%
\only<5->{
\psline[linewidth=0.5pt,linestyle=dotted](1.3,1)(1.3,2)}
%
\only<1>{
\dspCustomTicks[axis=x]{1 ~ 2 ~}
\dspCustomTicks[axis=y]{1 ~ 2 ~}}
\only<2->{
\dspCustomTicks[axis=x]{1 $\eta_1$ 2 $\eta_1+1$}
\dspCustomTicks[axis=y]{1 $\eta_2$ 2 $\eta_2+1$}}
\end{dspCP}
\end{center}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Bilinear Interpolation}
If we use a first-order interpolator:
\begin{align*}
y[m_1,m_2] &= (1-\tau_1)(1-\tau_2)x[\eta_1, \eta_2] + \tau_1(1-\tau_2)x[\eta_1+1, \eta_2] \\
& + (1-\tau_1)\tau_2x[\eta_1, \eta_2+1] + \tau_1\tau_2x[\eta_1+1, \eta_2+1]
\end{align*}
\end{frame}
\begin{frame}
\frametitle{Shearing}
\begin{figure}
\includegraphics[height=6cm]{\imagePath/shear.eps}%
\end{figure}
\end{frame}
\intertitle{image processing as signal processing}
\begin{frame} \frametitle{Basic 2D signals: the impulse}
\begin{columns}
\begin{column}{0.45\paperwidth}
\[
\delta[n_1, n_2] = \begin{cases}
1 & \mbox{if $n_1= n_2 = 0$} \\
0 & \mbox{otherwise.}
\end{cases}
\]
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-6,6}{-6,6}
\moocStyle
\dspPoints{0 0}
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Basic 2D signals: the rect}
\begin{columns}
\begin{column}{0.45\paperwidth}
\[
\rect\left(\frac{n_1}{2N_1}, \frac{n_2}{2N_2}\right) = \begin{cases}
1 & \parbox[t]{10em}{if $|n_1| < N_1$ \\ and $|n_2| < N_2$} \\ \\
0 & \mbox{otherwise;}
\end{cases}
\]
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-6,6}{-6,6}
\moocStyle
\dspPoints{-3 2 -2 2 -1 2 0 2 1 2 2 2 3 2
-3 1 -2 1 -1 1 0 1 1 1 2 1 3 1
-3 0 -2 0 -1 0 0 0 1 0 2 0 3 0
-3 -1 -2 -1 -1 -1 0 -1 1 -1 2 -1 3 -1
-3 -2 -2 -2 -1 -2 0 -2 1 -2 2 -2 3 -2 }
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Separability}
\[
x[n_1, n_2] = x_1[n_1]x_2[n_2]
\]
\end{frame}
\begin{frame} \frametitle{Separable signals}
\[
\delta[n_1, n_2] = \delta[n_1]\delta[n_2]
\]
\vspace{2em}
\[
\rect\left(\frac{n_1}{2N_1}, \frac{n_2}{2N_2}\right) = \rect\left( \frac{n_1}{2N_1} \right) \rect\left( \frac{n_2}{2N_2} \right).
\]
\end{frame}
\begin{frame} \frametitle{Nonseparable signal}
\begin{columns}
\begin{column}{0.45\paperwidth}
\[
x[n_1, n_2] = \begin{cases}
1 & \mbox{if $|n_1| + |n_2| < N$} \\
0 & \mbox{otherwise}
\end{cases}
\]
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-6,6}{-6,6}
\moocStyle
\dspPoints{0 2
-1 1 0 1 1 1
-2 0 -1 0 0 0 1 0 2 0
-1 -1 0 -1 1 -1
0 -2}
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Nonseparable signal}
\begin{columns}
\begin{column}{0.45\paperwidth}
\[
x[n_1, n_2] = \rect\left(\frac{n_1}{2N_1}, \frac{n_2}{2N_2}\right) - \rect\left(\frac{n_1}{2M_1}, \frac{n_2}{2M_2}\right)
\]
\end{column}
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{center}
\begin{dspCP}[width=5cm,xticks=5,yticks=5,xout=true]{-6,6}{-6,6}
\moocStyle
\dspPoints{-3 3 -2 3 -1 3 0 3 1 3 2 3 3 3
-3 2 -2 2 -1 2 0 2 1 2 2 2 3 2
-3 1 -2 1 2 1 3 1
-3 0 -2 0 2 0 3 0
-3 -1 -2 -1 2 -1 3 -1
-3 -2 -2 -2 -1 -2 0 -2 1 -2 2 -2 3 -2
-3 -3 -2 -3 -1 -3 0 -3 1 -3 2 -3 3 -3 }
\end{dspCP}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{2D DFT}
\[
X[k_1, k_2] = \sum_{n_1 = 0}^{N_1-1}\sum_{n_2=0}^{N_2-1} x[n_1,n_2] e^{-j\frac{2\pi}{N_1} n_1 k_1} e^{-j\frac{2\pi}{N_2} n_2 k_2}
\]
\vspace{1em}
\[
x[n_1,n_2] = \frac{1}{N_1 N_2}\sum_{k_1 = 0}^{N_1-1}\sum_{k_2=0}^{N_2-1} X[k_1, k_2] e^{j\frac{2\pi}{N_1} n_1 k_1} e^{j\frac{2\pi}{N_2} n_2 k_2}
\]
\end{frame}
\begin{frame}
\frametitle{2D-DFT Basis Vectors}
\centering
There are $N_1 N_2$ orthogonal basis vectors for an $N_1\times N_2$ image:
\[
w_{k_1,k_2}[n_1,n_2] = e^{j\frac{2\pi}{N_1} n_1 k_1} e^{j\frac{2\pi}{N_2} n_2 k_2}
\]
\vspace{1em}
for $n_1, k_1 = 0, 1, \ldots, N_1-1$ and $n_2, k_2 = 0, 1, \ldots, N_2-1$
\end{frame}
\def\basisImage#1#2{%
\begin{frame}
\frametitle{2D-DFT basis vectors for $k_1=#1, k_2=#2$ (real part)}
\begin{figure}
\begin{dspCP}[xticks=255,yticks=255,xout=true]{0,255}{0,255}%
\dspImageFile{\imagePath/2DDFT-#1-#2.eps}%
\end{dspCP}
\end{figure}
\end{frame}}
\basisImage{1}{0}
\basisImage{0}{1}
\basisImage{2}{0}
\basisImage{3}{0}
\basisImage{0}{3}
\basisImage{30}{0}
\basisImage{1}{1}
\basisImage{2}{7}
\basisImage{5}{250}
\basisImage{3}{230}
\begin{frame} \frametitle{2D DFT}
\psset{linecolor=darkred}
2D-DFT basis functions are separable, and so is the 2D-DFT:
\[
X[k_1, k_2] = %
\hlBox{t1}{green!30}{\displaystyle
\sum_{n_1 = 0}^{N_1-1}
\left[
\hlBox{t2}{blue!30}{\displaystyle
\sum_{n_2=0}^{N_2-1}
x[n_1,n_2] e^{-j\frac{2\pi}{N_2} n_2 k_2}}
\right]
e^{-j\frac{2\pi}{N_1} n_1 k_1}}
\]
\begin{itemize}
\item 1D-DFT along $n_2$ \rnode[rc]{T2}{(the columns)~~}
\item 1D-DFT along $n_1$ \rnode[rc]{T1}{(the rows)~~}
\end{itemize}
{\nccurve[angleB=-90]{->}{T2}{t2}}
{\nccurve[angleB=-45]{->}{T1}{t1}}
\end{frame}
\begin{frame}
\frametitle{How does a 2D-DFT look like?}
\begin{figure}
\begin{tabular}{cc}
\begin{dspCP}[width=5cm,xout=true]{0,511}{0,511}%
\dspImageFile{\imagePath/baseImage}%
\end{dspCP} &
\begin{dspCP}[width=5cm,xout=true]{0,511}{0,511}%
\dspImageFile{\imagePath/fft}%
\end{dspCP}
\end{tabular}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{DFT magnitude doesn't carry much information}
\note<1>{\vspace{10em} the following images are the reconstruction of the original image from the magnitude only and the phase only, respectively}
\begin{figure}
\showImage{magOnly}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{DFT phase, on the other hand...}
\begin{figure}
\showImage{phaseOnly}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Image frequency analysis}
\begin{itemize}
\item most of the information is contained in image's {\em edges}
\item edges are points of abrupt change in signal's values
\item edges are a ``space-domain'' feature $\rightarrow$ not captured by DFT's magnitude
\item phase alignment is important for reproducing edges
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Filters in 2D}
2D convolution:
\[
x[n_1, n_2] \ast h[n_1, n_2] = \sum_{k_1 = -\infty}^{\infty}\sum_{k_2 = -\infty}^{\infty} x[k_1,k_2]h[n_1-k_1,n_2-k_2]
\]
\end{frame}
\begin{frame} \frametitle{2D convolution for separable signals}
\centering
If $h[n_1, n_2] = h_1[n_1]h_2[n_2]$:
\begin{align*}
x[n_1, n_2] \ast h[n_1, n_2] &= \sum_{k_1 = -\infty}^{\infty}h_1[n_1-k_1] \sum_{k_2 = -\infty}^{\infty} x[k_1,k_2]h_2[n_2-k_2] \\
&= h_1[n_1] \ast (h_2[n_2] \ast x[n_1,n_2]).
\end{align*}
\end{frame}
\begin{frame} \frametitle{2D convolution for separable signals}
If $h[n_1, n_2]$ is an $M_1 \times M_2$ finite-support signal:
\begin{itemize}
\item non-separable convolution: $M_1 M_2$ operations per output sample
\item separable convolution: $M_1 + M_2$ operations per output sample!
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Everyday 2D filter types}
\begin{itemize}
\item you'll see mostly FIR (need to preserve edges)
\item separable if possible
\item filters will be odd-length and zero-centered (noncausal)
\item lowpass $\rightarrow$ image smoothing
\item highpass $\rightarrow$ enhancement, edge detection
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Moving Average}
\[
y[n_1,n_2] = \frac{1}{(2N+1)^2}\sum_{k_1 = -N}^{N}\sum_{k_2 = -N}^{N} x[n_1-k_1,n_2-k_2]
\]
\vspace{2em}
\[
h[n_1,n_2] = \frac{1}{(2N+1)^2}\mbox{rect}\left(\frac{n_1}{2N}, \frac{n_2}{2N}\right)
\]
\end{frame}
\begin{frame} \frametitle{Moving Average}
\[
h[n_1, n_2] = \frac{1}{9}\,
\begin{bmatrix}
1 & 1 & 1 \\ 1 & 1 & 1 \\ 1 & 1 & 1
\end{bmatrix}
\]
\end{frame}
\begin{frame} \frametitle{Moving Average}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{baseImage}
&
\showImage{MA11} \\
original & $11\times 11$ MA
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Moving Average}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{baseImage}
&
\showImage{MA51} \\
original & $51\times 51$ MA
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Gaussian Blur}
\centering
\[
h[n_1,n_2] = \frac{1}{2\pi\sigma^2}e^{-\frac{n_1^2+n_2^2}{2\sigma^2}}, \qquad |n_1,n_2| < N
\]
\vspace{2em}
with $N \approx 3\sigma$
\end{frame}
\def\var{5 }
\begin{frame} \frametitle{Gaussian Blur}
\begin{figure}
\begin{center}
\psset{unit=2.4mm}
\begin{pspicture}(-5,-5)(5,15)
\psset{Beta=15}
\psplotThreeD[hiddenLine=true,plotstyle=curve,drawStyle=yLines,% is the default anyway
yPlotpoints=29,xPlotpoints=29,linewidth=1pt](-14,14)(-14,14){%
x x mul y y mul add -2 \var \var mul mul div 2.71828 exch exp 10 mul}
\pstThreeDCoor[linecolor=darkgray,xMin=-15,xMax=16,nameX={$n_1$},yMin=-15,yMax=16,nameY={$n_2$},zMin=0,zMax=13,nameZ={$h[n_1,n_2]$}]
\end{pspicture}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Gaussian Blur}
\begin{figure}
\begin{dspCP}[xout=true,xlabel={$\sigma=5,N=14$}]{-14,14}{-14,14}%
\dspImageFile{\imagePath/Gauss5}%
\end{dspCP}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Gaussian Blur}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{baseImage}
&
\showImage{GB11} \\
original & $\sigma=1.8, 11\times 11$ blur
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Gaussian Blur}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{baseImage}
&
\showImage{GB51} \\
original & $\sigma=8.7, 51\times 51$ blur
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Gaussian blur more ``photographic'' than moving average}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{MA11}
&
\showImage{GB11} \\
$11\times 11$ MA & $\sigma=1.8, 11\times 11$ blur
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Gaussian blur more ``photographic'' than moving average}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{MA51}
&
\showImage{GB51} \\
$51\times 51$ MA & $\sigma=8.7, 51\times 51$ blur
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Filters for edge detection}
What is an edge? Very complicated but, simplifying:
\begin{itemize}
\item points of ``discontinuity'' in intensity
\item points of inflection in intensity
\end{itemize}
\vspace{2em}
\begin{center}
\begin{tabular}{ccc}
\showImage[4cm]{bw}
& \hspace{1cm} &
\showImage[4cm]{2DDFT-1-0}
\end{tabular}
\end{center}
\end{frame}
\begin{frame} \frametitle{Sobel filter}
\centering
Goal: find points where derivative is large.
\vspace{2em}
\begin{align*}
\nabla f(t_1, t_2) &= \begin{bmatrix}
\displaystyle\frac{\partial f}{\partial t_1} &
\displaystyle\frac{\partial f}{\partial t_2}
\end{bmatrix}^T \\
&= \frac{\partial f}{\partial t_1}\begin{bmatrix} 1 \\ 0 \end{bmatrix} + \frac{\partial f}{\partial t_2}\begin{bmatrix} 0\\ 1 \end{bmatrix}
\end{align*}
\end{frame}
\begin{frame} \frametitle{Sobel filter}
\centering
approximating the first derivative on the discrete grid in a circularly symmetric way:
\[
\begin{bmatrix*}[r]
x_1 & x_2 & x_3 \\
x_4 & x & x_5 \\
x_6 & x_7 & x_8 \\
\end{bmatrix*}
\]
\vspace{1em}
\[
\mathbf{x}' \approx \sum_{i=1}^{8} \frac{x-x_i}{d(x, x_i)}\mathbf{x}_i
\]
\end{frame}
\begin{frame} \frametitle{Sobel filter}
\centering
\[
\begin{bmatrix*}[r]
x_1 & x_2 & x_3 \\
x_4 & x & x_5 \\
x_6 & x_7 & x_8 \\
\end{bmatrix*}
\]
\vspace{2em}
\[
d(x, x_i) = \begin{cases}
2 & \mbox{for $i=2, 4, 5, 7$} \\
4 & \mbox{for $i=1, 3, 6, 8$}
\end{cases}
\]
\[
\mathbf{x}_1 = \begin{bmatrix}-1 & 1\end{bmatrix}^T, \quad
\mathbf{x}_2 = \begin{bmatrix}0 & 1\end{bmatrix}^T, \quad
\ldots, \quad
\mathbf{x}_8 = \begin{bmatrix}1 & -1\end{bmatrix}^T
\]
\end{frame}
\begin{frame} \frametitle{Sobel filter}
\centering
\[
\begin{bmatrix*}[r]
x_1 & x_2 & x_3 \\
x_4 & x & x_5 \\
x_6 & x_7 & x_8 \\
\end{bmatrix*}
\]
\vspace{2em}
\[
4\nabla x \approx \begin{bmatrix}
x_3 - x_1 +2(x_4-x_8) +x_5 - x_7 \\ x_7-x_1 +2(x_6-x_2) + x_5-x_3
\end{bmatrix}
\]
\end{frame}
\begin{frame} \frametitle{Sobel filter}
approximation of the first derivative in the horizontal direction:
\[
s_h[n_1, n_2] =
\begin{bmatrix*}[r]
-1 & 0 & 1 \\
-2 & 0 & 2 \\
-1 & 0 & 1 \\
\end{bmatrix*}
\]
\vspace{2em}
approximation of the first derivative in the vertical direction:
\[
s_v[n_1, n_2] =
\begin{bmatrix*}[r]
-1 & -2 & 1 \\
0 & 0 & 0 \\
1 & 2 & 1 \\
\end{bmatrix*}
\]
\end{frame}
\begin{frame} \frametitle{Sobel filter}
\centering
filter is separable, e.g.:
\[
s_h[n_1, n_2] =
\begin{bmatrix*}[r]
1 \\
2 \\
1 \\
\end{bmatrix*}
\begin{bmatrix*}[r] -1 & 0 & 1 \end{bmatrix*}
\]
\vspace{2em}
horizontal gradient = vertical averaging followed by horizontal differentiation
\end{frame}
\begin{frame} \frametitle{Sobel filter}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{sobelO}
&
\showImage{sobelV} \\
horizontal Sobel filter & vertical Sobel filter
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Sobel operator}
approximation for the square magnitude of the gradient:
\[
|\nabla x[n_1, n_2]|^2 = |s_h[n_1, n_2] \ast x[n_1, n_2]|^2 + |s_v[n_1, n_2]\ast x[n_1, n_2]|^2
\]
\vspace{2em}
(``operator'' because it's nonlinear)
\end{frame}
\begin{frame}
\frametitle{Gradient approximation for edge detection}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{sobel}
&
\showImage{sobelTh} \\
Sobel operator & thresholeded Sobel operator
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Laplacian operator}
Laplacian of a function in continuous-space:
\[
\Delta f(t_1, t_2) = \frac{\partial^2 f}{\partial t_1^2} + \frac{\partial^2 f}{\partial t_2^2}
\]
\end{frame}
\begin{frame}
\frametitle{Laplacian operator}
approximating the Laplacian; start with a Taylor expansion
\[
f(t+\tau) = \sum_{n=0}^{\infty} \frac{f^{(n)}(t)}{n!}\tau^n
\]
\vspace{1em}
and compute the expansion in $(t+\tau)$ and $(t-\tau)$:
\begin{align*}
f(t+\tau) &= f(t) + f'(t)\tau + \frac{1}{2}f''(t)\tau^2 \\
f(t-\tau) &= f(t) - f'(t)\tau + \frac{1}{2}f''(t)\tau^2
\end{align*}
\end{frame}
\begin{frame}
\frametitle{Laplacian operator}
by rearranging terms:
\[
f''(t) = \frac{1}{\tau^2}(f(t-\tau) - 2f(t) + f(t+\tau))
\]
\vspace{1em}
which, on the discrete grid, is the FIR $h[n] = \begin{bmatrix*}[r] 1 & -2 & 1 \end{bmatrix*}$
\end{frame}
\begin{frame}
\frametitle{Laplacian}
summing the horizontal and vertical components:
\[
h[n_1, n_2] =
\begin{bmatrix*}[r]
0 & 1 & 0 \\
1 & -4 & 1 \\
0 & 1 & 0 \\
\end{bmatrix*}
\]
\end{frame}
\begin{frame}
If we use the diagonals too:
\frametitle{Laplacian}
\[
h[n_1, n_2] =
\begin{bmatrix*}[r]
1 & 1 & 1 \\
1 & -8 & 1 \\
1 & 1 & 1 \\
\end{bmatrix*}
\]
\end{frame}
\begin{frame}
\frametitle{Laplacian for Edge Detection}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{lapl}
&
\showImage{laplTh} \\
Laplacian operator & thresholded Laplacian operator
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\intertitle{image compression: the JPEG standard}
\begin{frame} \frametitle{Image Compression}
Another approach:
\begin{itemize}
\item exploit natural redundancy in images
\item allocate bits for things that matter (e.g. edges)
\item use psychovisual experiments to find out what matters
\item some information is discarded: {\em lossy}\/ compression
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Key ingredients}
\begin{itemize}
\item compressing at block level
\item using a suitable transform (i.e., a change of basis)
\item smart quantization
\item entropy coding
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Compressing at pixel level}
\begin{columns}
\begin{column}{0.45\paperwidth}
\begin{itemize}
\item reduce number bits per pixel
\item equivalent to coarser quantization
\item in the limit, 1bpp
\end{itemize}
\end{column}
%
\begin{column}{0.45\paperwidth}
\begin{figure}
{
\begin{dspCP}[width=5cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/bpp1}%
\end{dspCP}}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Compressing at block level}
\begin{columns}
\begin{column}{0.5\paperwidth}
\begin{itemize}
\item divide the image in blocks
\item code the average value with 8 bits
\item $3\times 3$ blocks at 8 bits per block gives less than 1bpp
\end{itemize}
\end{column}
%
\begin{column}{0.5\paperwidth}
\begin{figure}
{
\begin{dspCP}[width=5cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/bpp9}%
\end{dspCP}}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Transform coding}
\begin{columns}
\begin{column}{0.45\paperwidth}
A simple example:
\begin{itemize}
\item take a DT signal, assume $R$ bits per sample
\item storing the signal requires $NR$ bits
\item now you take the DFT and it looks like this
\item in theory, we can just code the two nonzero DFT coefficients!
\end{itemize}
\end{column}
%
\begin{column}{0.45\paperwidth}
\begin{figure}
\begin{dspPlot}[width=5cm,height=2cm,yticks=none,xout=true]{0,31}{-1.1,1.1}
\moocStyle
\dspSignal{x 360 32 div 5 mul mul cos}
\end{dspPlot}
\begin{dspPlot}[width=5cm,height=2cm,yticks=none,xout=true]{0,31}{-1.1,1.1}
\moocStyle
\dspSignal{x 5 eq {1} {x 27 eq {1} {0} ifelse} ifelse}
\end{dspPlot}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{DCT basis vectors for an $8\times 8$ image}
\note<1>{\vspace{10em} Stress the fact that the transform, as a change of basis, is the correlation between the image block under analysis and each of the basis vectors. The resulting coefficients are the weights that measure the contribution to the patch of each DCT pattern. Also mention that the 1st coeff is the average graylevel}
\begin{figure}
\begin{center}
\begin{dspCP}[width=6cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/dctBasis}%
\end{dspCP}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Smart quantization of the DCT coefficients}
\begin{itemize}
\item deadzone
\item variable step (fine to coarse)
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Standard Uniform Quantization}
\begin{columns}
\begin{column}{0.35\paperwidth}
\[
\hat{x}=\mbox{floor}(x)+0.5
\]
\end{column}
%
\begin{column}{0.55\paperwidth}
\begin{figure}
\begin{center}
\psset{unit=1.3cm}
\begin{pspicture}(-2.2,-2.2)(2.2,2.2)
\footnotesize
\psaxes[linewidth=1pt,tickwidth=0.5pt,Dx=1,Dy=1,showorigin=false](0,0)(-2.1,-2.1)(2.1,2.1)
\uput[90](0,2){$\hat{x}[n]$}
\uput[90](2.1,0){$x[n]$}
\multido{\n=1.5+-1.0}{4}{\pscircle*[linecolor=orange](0,\n){2pt}}
\psline[linecolor=blue,linestyle=dotted](-2,-2)(2,2)
\psplot[plotpoints=800,linewidth=1pt,linecolor=orange]{-1.99}{1.99}%
{x abs floor 0.5 add x abs x div mul}
\uput[90](! -2 0.5 add dup){\gray 11}
\uput[90](! -1 0.5 add dup){\gray 10}
\uput[90](! 0 0.5 add dup){\gray 00}
\uput[90](! 1 0.5 add dup){\gray 01}
\end{pspicture}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame} \frametitle{Quantizing a full-range signal}
\begin{center}
\begin{figure}
\begin{dspPlot}[xticks=none,sidegap=2]{0,32}{-1,1}
\moocStyle
\only<1-2>{\dspSignal{x 32 div 360 mul sin}}
\only<2->{\multido{\n=-1+0.25}{9}{\psline[linecolor=lightgray,linewidth=.4pt](-2,\n)(34,\n)}}
\only<3>{\dspSignal{x 32 div 360 mul sin 0.99 mul dup 4 mul cvi 4 div exch 0 ge {.125} {-.125} ifelse add}}
\end{dspPlot}
\end{figure}
\end{center}
\end{frame}
\begin{frame} \frametitle{Quantizing a small, noise-like signal}
\begin{center}
\begin{figure}
\begin{dspPlot}[xticks=none,sidegap=2]{0,32}{-1,1}
\moocStyle
\only<1-2>{\dspSignal{x 170 mul sin 0.05 mul}}
\only<2->{\multido{\n=-1+0.25}{9}{\psline[linecolor=lightgray,linewidth=.4pt](-2,\n)(34,\n)}}
\only<3>{\dspSignal{x 170 mul sin 0.1 mul dup 4 mul cvi 4 div exch 0 ge {.125} {-.125} ifelse add}}
\end{dspPlot}
\end{figure}
\end{center}
\end{frame}
\begin{frame} \frametitle{Deadzone Quantization}
\begin{columns}
\begin{column}{0.35\paperwidth}
\[
\hat{x}=\mbox{round}(x)
\]
\end{column}
%
\begin{column}{0.55\paperwidth}
\begin{figure}
\begin{center}
\psset{unit=1.3cm}
\begin{pspicture}(-2.2,-2.2)(2.2,2.2)
\footnotesize
\psaxes[linewidth=1pt,tickwidth=0.5pt,Dx=1,Dy=1,showorigin=false](0,0)(-2.1,-2.1)(2.1,2.1)
\uput[90](0,2){$\hat{x}[n]$}
\uput[90](2.1,0){$x[n]$}
\multido{\n=1+-1}{3}{\pscircle*[linecolor=orange](0,\n){2pt}}
\psline[linecolor=blue,linestyle=dotted](-2,-2)(2,2)
\psplot[plotpoints=800,linewidth=1pt,linecolor=orange]{-1.49}{1.49}{x round }
\uput[90](! -1 dup){\gray 10}
\uput*[90](! 0 dup){\gray 00}
\uput[90](! 1 dup){\gray 01}
\end{pspicture}
\end{center}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\def\sp{ }
\begin{frame} \frametitle{Quantizing a small, noise-like signal, take 2}
\begin{center}
\begin{figure}
\begin{dspPlot}[xticks=none,sidegap=2]{0,32}{-1,1}
\moocStyle
\only<1-2>{\dspSignal{x 170 mul sin 0.05 mul}}
\only<2->{\multido{\n=-1+0.25}{8}{\psline[linecolor=lightgray,linewidth=.4pt](!-2 \n \sp 0.125 add)(! 34 \n \sp 0.125 add)}}
\only<3>{\dspSignal{0}}
\end{dspPlot}
\end{figure}
\end{center}
\end{frame}
\begin{frame} \frametitle{Entropy coding}
\begin{itemize}
\item minimize the effort to encode a certain amount of information
\item associate short symbols to frequent values and vice-versa
\end{itemize}
\begin{figure}
\begin{center}
\includegraphics[height=4cm]{morse}%
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Key ingredients}
\begin{itemize}
\item compressing at block level: {\color{blue!80} split image into $8\times 8$ non-overlapping blocks}
\item using a suitable transform: {\color{blue!80}compute the DCT of each block}
\item smart quantization: {\color{blue!80}quantize DCT coefficients according to psycovisually-tuned tables}
\item entropy coding: {\color{blue!80}run-length encoding and Huffman coding}
\end{itemize}
\end{frame}
% 20x20 blocks
\begin{frame}
\frametitle{DCT coefficients of image blocks}
\begin{figure}
\begin{center}
\begin{dspCP}[width=6cm,xticks=none,yticks=none]{0,511}{0,511}%
\dspImageFile{\imagePath/dctDetail}%
\end{dspCP}
\end{center}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Smart quantization}
\begin{itemize}
\item most coefficients are negligible $\rightarrow$ captured by the deadzone
\item some coefficients have a higher visual impact
\item find out the critical coefficients by experimentation
\item use smaller quantization intervals (i.e. use more more bits) for the important coefficients
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Advantages of tuned quantization intervals at 0.2bpp}
\note<1>{\vspace{10em} both images are encoded at the same bitrate of 0.2bpp, the first one with a uniform quantization table for each DCT block, the second with the JPEG (fixed) quantization table)}
\begin{figure}
\begin{center}
\begin{tabular}{cc}
\showImage{loUni}
&
\showImage{loOpt}
\\
uniform & tuned
\end{tabular}
\end{center}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Zigzag scan}
\begin{figure}
\begin{center}
\begin{dspCP}[xticks=none,yticks=none]{0.5,8.5}{0.5,8.5}%
{\psset{linecolor=blue!80,linewidth=0.8pt}
\psline{->}(1,8)(2,8)(1,7)(1,6)(3,8)(4,8)
\psline{->}(4,8)(1,5)(1,4)(5,8)(6,8)
\psline{->}(6,8)(1,3)(1,2)(7,8)(8,8)(1,1)(2,1)(8,7)(8,6)(3,1)(4,1)(8,5)(8,4)(5,1)(6,1)(8,3)(8,2)(7,1)(8,1)
}
\multido{\n=1+1}{7}{%
\psline[linewidth=1pt](! 0.5 \n.0 0.5 add)(! 8.5 \n.0 0.5 add)
\psline[linewidth=1pt](! \n.0 0.5 add 0.5)(! \n.0 0.5 add 8.5)}
\multido{\n=1+1}{8}{%
\multido{\i=1+1}{8}{\pscircle*(\n,\i){2pt}}}
\pscircle*[linecolor=red](1,8){2pt} \end{dspCP}
\end{center}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Example}
\[
\begin{bmatrix*}[r]
100 & -60 & 0 & 6 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
13 & -1 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
\end{bmatrix*}
\]
\vspace{1em}
100, -60, 0, 0, 0, 0, 6, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
\end{frame}
\begin{frame}
\frametitle{Runlength encoding}
\[
\begin{bmatrix*}[r]
100 & -60 & 0 & 6 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
13 & -1 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
\end{bmatrix*}
\]
\vspace{1em}
\[
[100], \, [(0, 6),\, -60], \, [(4, 3),\, 6], \, [(3, 4),\, 13], \, [(8, 1),\, -1], [(0, 0)]
\]
\end{frame}
\begin{frame}
\frametitle{Entropy coding}
goal: minimize message length
\begin{itemize}
\item assign short sequences to more frequent symbols
\item the Huffman algorithm builds the optimal prefix-free code for a set of symbol probabilities
\item in JPEG, you can use a ``general-purpose'' Huffman code or build your own \\ (but then you pay a ``side-information'' price)
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Example}
\begin{itemize}
\item four symbols: A, B, C, D
\item probability table:
\begin{center}
\begin{tabular}{lcl}
$p(A) = 0.38$ & ~~~~~~ & $p(B) = 0.32$ \\
& \\
$p(C) = 0.1$ & & $p(D) = 0.2$ \\
\end{tabular}
\end{center}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Building the Huffman code}
\begin{figure}
\begin{center}
\pstree[radius=3pt,treemode=R,treefit=loose]{\Tcircle{0.30}}{%
\Tcircle{0.10}~{{\bf C}}^{0}
\Tcircle{0.20}~{{\bf D}}_{1}}
\end{center}
\end{figure}
\[
p(A) = 0.38 \quad p(B) = 0.32 {\color{red} \quad p(C) = 0.1 \quad p(D) = 0.2}
\]
\end{frame}
\begin{frame}
\frametitle{Building the Huffman code}
\begin{figure}
\begin{center}
\pstree[radius=3pt,treemode=R,treefit=loose]{\Tcircle{0.62}}{%
\pstree{\Tcircle{0.30}^{0}}{%
\TC*~{\bf C}^{0}
\TC*~{\bf D}_{1}}
\Tcircle{0.32}~{\bf B}_{1}}
\end{center}
\end{figure}
\[
p(A) = 0.38 \color{red} \quad {\color{red} p(B) = 0.32 \quad p(C+D) = 0.3}
\]
\end{frame}
\begin{frame}
\frametitle{Huffman Coding}
\begin{figure}
\begin{center}
\pstree[radius=3pt,treemode=R,treefit=loose]{\Tcircle{1.00}}{%
\Tcircle{0.38}~{\bf A}^{0}
\pstree{\Tcircle{0.62}^{1}}{%
\pstree{\TC*^{0}}{%
\TC*~{\bf C}^{0}
\TC*~{\bf D}_{1}}
\TC*~{\bf B}_{1}}}
\end{center}
\end{figure}
\[
{\color{red} p(A) = 0.38 \quad p(B+C+D) = 0.62}
\]
\end{frame}
\begin{frame}
\frametitle{Conclusions}
\begin{itemize}
\item JPEG is a very complex and comprehensive standard:
\begin{itemize}
\item lossless, lossy
\item color, B\&W
\item progressive encoding
\item HDR (12bpp) for medical imaging
\end{itemize}
\item JPEG is VERY good:
\begin{itemize}
\item compression factor of 10:1 virtually indistinguishable
\item rates of 1bpp for RGB images acceptable (25:1 compression ratio)
\end{itemize}
\item other important compression schemes:
\begin{itemize}
\item TIFF, JPEG2000
\item MPEG (MP3)
\end{itemize}
\end{itemize}
\end{frame}
\end{document}

Event Timeline