Page MenuHomec4science

2_FFT.tex
No OneTemporary

File Metadata

Created
Thu, Mar 13, 08:15

2_FFT.tex

\documentclass[aspectratio=169]{beamer}
\def\stylepath{../styles}
\usepackage{\stylepath/com303}
\usepackage{comment}
\begin{document}
\def\sp{ }
%% Top iteration for labels split by dots
\def\topiter#1#2#3#4{%
\FPupn\t{#3 0.4 mul 1 max 0 trunc}
\multido{\n=0+1}{\t}{\FPupn\c{\n{} 1 #3 - - #2 + 0 trunc} #4}}
%% Bottom iteration for labels split by dots
\def\botiter#1#2#3#4{%
\FPupn\b{#3 0.3 mul 1.0 max 0 trunc}
\multido{\n=1+1}{\b}{\FPupn\c{#2 1 \n{} - + 0 trunc} #4}}
%% Write out labels of type x[n] on the left for a block
\def\slabelsL(#1,#2)#3#4{%
\topiter{#1}{#2}{#3}{\psdot(#1,\c)\rput[r]{0}(#1,\c){$#4[\n]$~~}}
\botiter{#1}{#2}{#3}{\psdot(#1,\c)\rput[r]{0}(#1,\c){$#4[N-\n]$~~}}}
%% Write out labels of type x[n] on the right for a block
\def\slabelsR(#1,#2)#3#4{%
\topiter{#1}{#2}{#3}{\psdot(!#1 2 add \c)\rput[l]{0}(!#1 2 add \c){~~$#4[\n]$}}
\botiter{#1}{#2}{#3}{\psdot(!#1 2 add \c)\rput[l]{0}(!#1 2 add \c){~~$#4[N\ifnum\n=0\relax\else-\n\fi]$}}}
%% plot the connecting lines out of a block (split by dots)
\def\sconn(#1,#2)#3{%
\topiter{#1}{#2}{#3}{\psline(#1,\c)(! #1 2 add \c\sp)}
\botiter{#1}{#2}{#3}{\psline(#1,\c)(! #1 2 add \c\sp)}
\psline[linestyle=dotted](! #1 1 add #2 #3 add 1 sub \t\sp sub)(! #1 1 add #2 \b\sp add)}
%% draw a block: (x,y){rot}{text}
\def\sblock(#1,#2)#3#4#5{%
\psframe(! #1 #2 0.2 sub)(! #1 2 add #2 #3 0.8 sub add)
\rput[B]{#4}(! #1 1 add #2 #3 1 sub 2 div add){#5}}
%% plot the connecting lines out of a block (full set)
\def\fconn(#1,#2)#3{%
\multido{\n=0+1}{#3}{\psline(!#1 #2 \n\sp add)(! #1 2 add #2 \n\sp add)}}
%% draw labels iterating over a comma separated list
\def\flabelsL(#1,#2)#3#4{%
\psForeach{\n}{#3}{\psdot(!#1 #2 \the\psLoopIndex\sp add)\rput[r]{0}(!#1 #2 \the\psLoopIndex\sp add){$#4[\n]$~~}}}
\def\flabelsR(#1,#2)#3#4{%
\psForeach{\n}{#3}{\psdot(!#1 #2 \the\psLoopIndex\sp add)\rput[l]{0}(!#1 #2 \the\psLoopIndex\sp add){~~$#4[\n]$}}}
%% butterfly
\def\butterfly#1#2#3{%
\def\off{0.4 }
\psline[linecolor=gray]{->}(! #1 \off add #2)(! #1 2 add \off sub #3)
\psline[linecolor=gray]{->}(! #1 \off add #3)(! #1 2 add \off sub #2)
\psdot(! #1 \off add #2)\psdot(! #1 \off add #3)
\psdot(! #1 2 add \off sub #3)\psdot(! #1 2 add \off sub #2)
\uput{0.2}[225]{0}(! #1 2 add \off sub #2){{\footnotesize $-1$}}}
%% connecting line in flow graph
\def\connS(#1,#2)#3#4{%
\psForeach{\n}{#4}{\psline{->}(!#1 #2 \the\psLoopIndex\sp add)(!#1 #3 add #2 \the\psLoopIndex\sp add)\psdot(!#1 #3 add #2 \the\psLoopIndex\sp add)\uput{0.2}[135]{0}(!#1 #3 add #2 \the\psLoopIndex\sp add){{\footnotesize $\n$}}}}
\begin{frame}
\frametitle{Overview}
\begin{itemize}
\item A bit of history: From Gauss to the fastest FFT in the west
\item Small DFT matrices
\item The Cooley-Tukey FFT
\item Decimation-in-Time FFT for length $2^N$ FFTs
\item Conclusions: There are FFTs for any length!
\end{itemize}
\end{frame}
%% Fourier
\begin{frame}
\frametitle{Fourier had the Fourier transform}
\begin{center}
\includegraphics[width=5cm]{fourier.eps}
\end{center}
\end{frame}
\begin{frame}
\frametitle{But Gauss had the FFT all along ;)}
\begin{center}
\includegraphics[width=5cm]{gauss.eps}
\end{center}
\end{frame}
\begin{frame}
\frametitle{History}
\begin{itemize}[<+->]
\item Gauss computes trigonometric series efficiently in 1805
\item Fourier invents Fourier series in 1807
\item People start computing Fourier series, and develop tricks
\item Good comes up with an algorithm in 1958
\item Cooley and Tukey (re)-discover the fast Fourier transform algorithm in 1965 for N a power of a prime
\item Winograd combines all methods to give the most efficient FFTs in 1978
\item Frigo and Johnson develop the FFTW in 1999
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The DFT matrix}
\begin{itemize}[<+->]
\item $W_N = e^{-j\frac{2\pi}{N}}$ (or simply $W$ when $N$ is clear from the context)
\item powers of $N$ can be taken modulo $N$, since $W^N = 1$.
\item DFT Matrix of size $N$ by $N$:
\[
\mathbf{W} = \begin{bmatrix}
1 & 1 & 1 & 1 & \ldots & 1 \\
1 & W^{1} & W^{2} & W^{3} & \ldots & W^{N-1} \\
1 & W^{2} & W^{4} & W^{6} & \ldots & W^{2(N-1)} \\
& & & \ldots \\
1 & W^{N-1} & W^{2(N-1)} & W^{3(N-1)} & \ldots & W^{(N-1)^2}
\end{bmatrix}
\]
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Small DFT matrices: $N = 2$}
\[
\mathbf{W}_2 =
\begin{bmatrix}
1 & 1 \\
1 & -1
\end{bmatrix}
\]
\end{frame}
\begin{comment}
\begin{frame}
\frametitle{Small DFT matrices: $N = 3$}
\[
\mathbf{W}_3 =
\begin{bmatrix}
1 & 1 & 1 \\
1 & W & W^2 \\
1 & W^2 & W^4
\end{bmatrix}
=
\begin{bmatrix}
1 & 1 & 1 \\
1 & W & W^2 \\
1 & W^2 & W
\end{bmatrix}
=
\begin{bmatrix}
1 & 1 & 1 \\
1 & \frac{-1 - j\sqrt{3}}{2} & \frac{-1 + j\sqrt{3}}{2} \\
1 & \frac{-1 + j\sqrt{3}}{2} & \frac{-1 - j\sqrt{3}}{2}
\end{bmatrix}
\]
\end{frame}
\end{comment}
\begin{frame}
\frametitle{Small DFT matrices: $N = 4$}
\[
\mathbf{W}_4 =
\begin{bmatrix}
1 & 1 & 1 & 1 \\
1 & W & W^2& W^3 \\
1 & W^2 & W^4 & W^6 \\
1 & W^3 & W^6 & W^9
\end{bmatrix}
=
\begin{bmatrix}
1 & 1 & 1 & 1 \\
1 & W & W^2& W^3 \\
1 & W^2 & 1 & W^2\\
1 & W^3 & W^2 & W
\end{bmatrix}
=
\begin{bmatrix}
1 & 1 & 1 & 1 \\
1 & -j & -1 & j \\
1 & -1 & 1 & -1 \\
1 & j & -1 & -j
\end{bmatrix}
\]
\end{frame}
\begin{comment}
\begin{frame}
\frametitle{Small DFT matrices: $N = 5$}
\[
\mathbf{W}_5 =
\begin{bmatrix}
1 & 1 & 1 & 1 & 1\\
1 & W & W^2& W^3 & W^4 \\
1 & W^2 & W^4 & W^6 & W^8\\
1 & W^3 & W^6 & W^9 & W^{12}\\
1 & W^4 & W^8 & W^{12} & W^{16}\\
\end{bmatrix}
=
\begin{bmatrix}
1 & 1 & 1 & 1 & 1\\
1 & W & W^2& W^3 & W^4 \\
1 & W^2 & W^4 & W & W^3\\
1 & W^3 & W & W^4 & W^{2}\\
1 & W^4 & W^3 & W^2 & W\\
\end{bmatrix}
\]
\end{frame}
\begin{frame}
\frametitle{Small DFT matrices: $N = 6$}
\[
\mathbf{W}_6 =
\begin{bmatrix}
1 & 1 & 1 & 1 & 1 & 1\\
1 & W & W^2& W^3 & W^4 & W^5\\
1 & W^2 & W^4 & W^6 & W^8 & W^{10}\\
1 & W^3 & W^6 & W^9 & W^{12} & W^{15}\\
1 & W^4 & W^8 & W^{12} & W^{16} & W^{20}\\
1 & W^5 & W^{10} & W^{15} & W^{20} & W^{25}
\end{bmatrix}
=
\begin{bmatrix}
1 & 1 & 1 & 1 & 1 & 1\\
1 & W & W^2& W^3 & W^4 & W^5\\
1 & W^2 & W^4 & 1 & W^2 & W^{4}\\
1 & W^3 & 1 & W^3 & 1 & W^{3}\\
1 & W^4 & W^2 & 1 & W^{4} & W^{2}\\
1 & W^5 & W^{4} & W^{3} & W^{2} & W
\end{bmatrix}
\]
\end{frame}
\end{comment}
\def\ovn(#1,#2)#3#4{\rput{0}(#1,#2){\ovalnode{#3}{\parbox{12ex}{\centering #4}}}}
\def\arc#1#2#3{\ncarc{#1}{#2}\ncput*[nrot=:U]{#3}}
\def\rarc#1#2#3{\ncarc{#2}{#1}\ncput*[nrot=:D]{#3}}
\begin{frame}
\frametitle{ Divide et impera - Divide and Conquer (Julius Caesar)}
\centering
Divide and conquer is a standard attack for developing fast algorithms.
\vspace{1em}
\begin{figure}
\scalebox{0.7}{%
\psset{xunit=3.5cm,yunit=1.1cm}
\begin{pspicture}(0,0)(4,6)
\footnotesize
\ovn(0,3){A}{{\color{red} problem}}
\ovn(1,1){B1}{subproblem}
\ovn(1,5){B2}{subproblem}
\ovn(2,0){C1}{simple \\ subproblem}
\ovn(2,2){C2}{simple \\ subproblem}
\ovn(2,4){C3}{simple \\ subproblem}
\ovn(2,6){C4}{simple \\ subproblem}
\ovn(3,1){D1}{intermediate \\ solution}
\ovn(3,5){D2}{intermediate \\ solution}
\ovn(4,3){E}{{\color{green} solution}}
%
\rarc{A}{B1}{{\color{red} split}}
\arc{A}{B2}{{\color{red} split}}
\rarc{B1}{C1}{{\color{red} split}}
\arc{B1}{C2}{{\color{red} split}}
\rarc{B2}{C3}{{\color{red} split}}
\arc{B2}{C4}{{\color{red} split}}
\rarc{C1}{D1}{{\color{green} merge}}
\arc{C2}{D1}{{\color{green} merge}}
\rarc{C3}{D2}{{\color{green} merge}}
\arc{C4}{D2}{{\color{green} merge}}
\rarc{D1}{E}{{\color{green} merge}}
\arc{D2}{E}{{\color{green} merge}}
\end{pspicture}}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT - One step}
\begin{center}
Recall: computing $\mathbf{X} = \mathbf{W}_N \ \mathbf{x}$ has complexity $O(N^2)$.
\end{center}
\vspace{1em}
Idea:
\begin{itemize}[<+->]
\item Assume $N$ even
\item Split the problem into two subproblems of size $N/2$; cost is $N^2 / 4$ each
\item \emph{If}\/ the cost to recover the full solution is linear $N\,\ldots$
\item $\quad\ldots$ the divide-and-conquer solution costs $N^2/2 + N$ for one step
\item For $N \geq 4$ this is better than $N^2$
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT - One step}
\setbeamercovered{invisible}
Graphically
\begin{itemize}[<+->]
\item Split DFT input into 2 pieces of size $N/2$
\item Compute two DFT's of size $N/2$
\item Merge the two results
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT - One step}
\begin{figure}
\scalebox{.5}{%
\psset{xunit=1.2cm,yunit=0.8cm,linewidth=2.5pt}
\begin{pspicture}(0,0)(13,15)
\Large
\sconn(0,0){16}\slabelsL(0,0){16}{x}\sblock(2,0){16}{90}{\Huge split}
\sconn(4,0){8}\sblock(6,0){8}{90}{\Huge DFT-$N/2$}\sconn(8,0){8}
\sconn(4,8){8}\sblock(6,8){8}{90}{\Huge DFT-$N/2$}\sconn(8,8){8}
\sblock(10,0){16}{90}{\Huge merge}\sconn(12,0){16}\slabelsR(12,0){16}{X}
\end{pspicture}}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT - Multiple steps}
\begin{center}
Idea: if $N=2^K$, divide and conquer can be reapplied!
\end{center}
\begin{itemize}[<+->]
\item Cut the two problems of size $N/2$ into 4 problems of size $N/4$
\item Assume complexity to recover the full solution still linear, e.g. $N$ at each step
\item You can do this $\log_2 N - 1 = K-1$ times, until problem of size 2 is obtained
\item The divide-and-conquer solution has therefore complexity of order $ N \log_2 N $
\item For $N \geq 4$ this is much better than $N^2$!
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT - Multiple steps}
Graphically
\begin{itemize}[<+->]
\item Split DFT input into 2, 4 and 8 pieces of sizes $N/2$, $N/4$ and $N/8$, respectively
\item Compute 8 DFT's of size $N/8$
\item Merge the results successively into DFT's of size $N/4$, $N/2$ and finally $N$
\end{itemize}
\end{frame}
\def\lb{\raisebox{-2mm}{\parbox{2em}{\centering\footnotesize DFT \\ $N/8$}}}
\begin{frame}
\frametitle{ Divide and Conquer for DFT - Multiple steps}
\begin{figure}
\scalebox{0.5}{%
\psset{xunit=0.75cm,yunit=0.8cm,linewidth=1.7pt}
\begin{pspicture}(0,0)(29,15)
\LARGE
\sconn(0,0){16}\slabelsL(0,0){16}{x}\sblock(2,0){16}{90}{split}
%
\sconn(4,0){8}\sblock(6,0){8}{90}{split}
\sconn(4,8){8}\sblock(6,8){8}{90}{split}
%
\sconn(8,0){4}\sblock(10,0){4}{90}{split}
\sconn(8,4){4}\sblock(10,4){4}{90}{split}
\sconn(8,8){4}\sblock(10,8){4}{90}{split}
\sconn(8,12){4}\sblock(10,12){4}{90}{split}
%
\sconn(12,0){2}\sblock(14,0){2}{0}{\lb} \sconn(16,0){2}
\sconn(12,2){2}\sblock(14,2){2}{0}{\lb}\sconn(16,2){2}
\sconn(12,4){2}\sblock(14,4){2}{0}{\lb}\sconn(16,4){2}
\sconn(12,6){2}\sblock(14,6){2}{0}{\lb}\sconn(16,6){2}
\sconn(12,8){2}\sblock(14,8){2}{0}{\lb}\sconn(16,8){2}
\sconn(12,10){2}\sblock(14,10){2}{0}{\lb}\sconn(16,10){2}
\sconn(12,12){2}\sblock(14,12){2}{0}{\lb}\sconn(16,12){2}
\sconn(12,14){2}\sblock(14,14){2}{0}{\lb}\sconn(16,14){2}
%
\sblock(18,0){4}{90}{merge}\sconn(20,0){4}
\sblock(18,4){4}{90}{merge}\sconn(20,4){4}
\sblock(18,8){4}{90}{merge}\sconn(20,8){4}
\sblock(18,12){4}{90}{merge}\sconn(20,12){4}
%
\sblock(22,0){8}{90}{merge}\sconn(24,0){8}
\sblock(22,8){8}{90}{merge}\sconn(24,8){8}
%
\sblock(26,0){16}{90}{merge}\sconn(28,0){16}\slabelsR(28,0){16}{x}
\end{pspicture}}
\end{figure}
\end{frame}
%\endMoocMod
% DIT
\begin{frame}
\frametitle{Divide and Conquer for DFT- Analysis of DIT}
%Recall the computation of the DFT on an input $x[n]$ of length $N$
\[
X[k] = \sum_{n = 0}^{N-1} x[n]\, W^{nk}_N, \qquad k = 0,1,\ldots,N-1, \ \ W_N = e^{-j\frac{2\pi}{N}}
\]
%with output $X[k]$ of length $N$
%\vspace{1em}
\pause
Idea (a good guess is half of the answer!):
\begin{itemize}[<+->]
\item break input into even and odd indexed terms (so-called ''decimation in time''):
\[
x[n], \quad n = 0,1,\ldots,N-1 \ \longrightarrow \ x[2n] \mbox{ and } x[2n+1], \quad n = 0, \ldots,N/2-1
\]
\item break output into first and second half
\[
X[k], \quad k = 0,1,\ldots,N-1 \ \longrightarrow \ X[k] \mbox{ and } X[k+N/2], \quad k = 0,\ldots, N/2-1
\]
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT- Analysis of DIT}
\note<1>{write by hand on slide:\\
$W^2 = e^{-j\frac{4\pi}{N}} = e^{-j\frac{2\pi}{N/2}} = W_{N/2}$ \\
$W^{2nk} = W_{N/2}^{nk}$}
%Initial computation
%\[
% X[k] = \sum_{n = 0}^{N-1} x[n]\, W^{nk}, \qquad k = 0,1,\ldots,N-1, \ \ W = e^{-j\frac{2\pi}{N}}
%\]
%\pause
Consider even and odd inputs separately:
\begin{align*}
X[k] & = \displaystyle \sum_{n = 0}^{N/2-1} x[2n]\, W_N^{2nk} + \sum_{n = 0}^{N/2-1} x[2n+1]\, W_N^{(2n+1)k} \\ \pause
& = \displaystyle \sum_{n = 0}^{N/2-1} x[2n]\, W_N^{2nk} + \sum_{n = 0}^{N/2-1} x[2n+1]\, W_N^{2nk+k} \\ \pause
& = \displaystyle \sum_{n = 0}^{N/2-1} x[2n]\, W_{N/2}^{nk} + W^k \, \sum_{n = 0}^{N/2-1} x[2n+1]\, W_{N/2}^{nk} \\ \pause
& = \displaystyle X_A[k] + W_N^k \, X_B[k], \qquad k = 0,1,\ldots,N-1
\end{align*}
%where we used the fact that $W^2 = e^{-j\frac{4\pi}{N}} = e^{-j\frac{2\pi}{N/2}} = W_{N/2}$ and thus $W^{2nk} = %W_{N/2}^{nk}$.
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT- Analysis of DIT}
hmmm, we haven't gained much so far:
\vspace{1em}
\begin{itemize}[<+->]
\item both $X_A[k]$ and $X_B[k]$ require $N/2$ multiplications
\item multiplying the second DFT by $W_N^k$ requires another multiplication
\item to compute for all $k$ we need $N(N/2 + N/2 + 1) \approx N^2$
\item but here comes the trick!
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT- Analysis of DIT}
\note<1>{Write out:$W^{N/2} = e^{-j\frac{N\pi}{N}} = -1$}
Consider now the first and second half of the outputs separately:
\begin{align*}
X[k] & = X_A[k] + W^k \, X_B[k] \\
& = \displaystyle \sum_{n = 0}^{N/2-1} x[2n]\, W_{N/2}^{nk} + W^k \, \sum_{n = 0}^{N/2-1} x[2n+1]\, W_{N/2}^{nk} \\
X[k+N/2] & = \sum_{n = 0}^{N/2-1} x[2n]\, W_{N/2}^{n(k+N/2)} + W_N^{k+N/2} \, \sum_{n = 0}^{N/2-1} x[2n+1]\, W_{N/2}^{n(k+N/2)} \\ \pause
& = \sum_{n = 0}^{N/2-1} x[2n]\, W_{N/2}^{nk} - W^k \, \sum_{n = 0}^{N/2-1} x[2n+1]\, W_{N/2}^{nk} \\ \pause
& = X_A[k] - W_N^k \, X_B[k], \qquad k = 0,1,\ldots,N/2-1
\end{align*}
%where we used the fact that $W^{N/2} = e^{-j\frac{N\pi}{N}} = -1$.
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT- Analysis of DIT}
so the trick is that we only need to compute for half the range of $k$:
\vspace{1em}
\begin{itemize}[<+->]
\item both $X_A[k]$ and $X_B[k]$ require $N/2$ multiplications
\item multiplying the second DFT by $W_N^k$ requires another multiplication
\item to compute for all $k$ we need $(N/2)(N/2 + N/2 + 1) \approx N^2/2$
\item the rest is just sums and differences
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Divide and Conquer for DFT- Analysis of DIT}
\begin{figure}
\psset{xunit=1.2cm,yunit=0.8cm,linewidth=1.5pt}
\begin{pspicture}(0,0)(7,7)
\flabelsL(0,0){7,5,3,1}{x}
\fconn(0,0){4}
\sblock(2,0){4}{0}{DFT-$N/2$}
\connS(4,0){1}{W^3_N,W^2_N,W^1_N,W^0_N}
\fconn(5,0){4}
\flabelsR(7,0){7,6,5,4}{X}
%
\flabelsL(0,4){6,4,2,0}{x}
\fconn(0,4){4}
\sblock(2,4){4}{0}{DFT-$N/2$}
\fconn(4,4){4}\fconn(5,4){4}
\flabelsR(7,4){3,2,1,0}{X}
%
\butterfly{4.6}{0}{4}
\butterfly{4.6}{1}{5}
\butterfly{4.6}{2}{6}
\butterfly{4.6}{3}{7}
\end{pspicture}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT- Analysis of DIT}
So, what is the complexity now?
\vspace{1em}
\pause
\begin{itemize}[<+->]
\item Split DFT input into 2 pieces of size N/2: free!
\item Compute 2 DFT-N/2: twice $(N/2)^2$, or $N^2/2$
\item Merge the two results: multiplication by $N/2$ complex numbers $W^k$
\item Total: $N^2/2 + N/2$ which is indeed smaller than $N^2$ for any $N \geq 4$,
\item In general, about half the complexity of the initial problem!
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{ Divide and Conquer for DFT- Analysis of DIT}
So, what if we repeat the process?
\begin{itemize}[<+->]
\item Go until DFT-2, since that is trivial (sum and difference)
\item Requires $\log_2 N -1$ steps
\item Each step requires a merger of order $N/2$ multiplications and $N$ additions
\item Total: $(N/2) (\log_2 N -1)$ multiplications and $N \log_2 N$ additions
\end{itemize}
\vspace{1em}
\centering
\uncover<+->{Key Result: {\bf A DFT of size $N$ requires order $N \log_2 N$ operations!}}
\end{frame}
%\begin{comment}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 4}
\begin{itemize}[<+->]
\item Separate even and odd samples
\item Compute two DFT's of size 2 having output $X'[k]$ and $X"[k]$
\item Compute sum and difference of $X'[k]$ and $W^k X"[k]$
\end{itemize}
\vspace{1em}
\uncover<+->{
\[
\mathbf{W}_4 =
\begin{bmatrix}
1 & 1 & 1 & 1 \\
1 & j & -1 & -j \\
1 & -1 & 1 & -1 \\
1 & -j & -1 & j
\end{bmatrix}
=
\begin{bmatrix}
1 & 0 & 1 & 0 \\
0 & 1 & 0 & -j \\
1 & 0 & -1 & 0 \\
0 & 1 & 0 & j
\end{bmatrix}
\begin{bmatrix}
1 & 1 & 0 & 0 \\
1 & -1 & 0 & 0 \\
0 & 0 & 1 & 1 \\
0 & 0 & 1 & -1
\end{bmatrix}
\begin{bmatrix}
1 & 0 & 0 & 0 \\
0 & 0 & 1 & 0 \\
0 & 1 & 0 & 0 \\
0 & 0 & 0 & 1
\end{bmatrix}
\]
\vspace{1em}
This uses 8 additions and no multiplications!}
\end{frame}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 8, 1/8}
Now this is going to be big...
\vspace{1em}
Too big for a single slide!
\[
\mathbf{W}_8 =
\begin{bmatrix}
1 & 1 & 1 & 1 & \ldots & 1 \\
1 & W^{1} & W^{2} & W^{3} & \ldots & W^{7} \\
1 & W^{2} & W^{4} & W^{6} & \ldots & W^{14} \\
& & & \ldots \\
1 & W^{7} & W^{14} & W^{21} & \ldots & W^{49}
\end{bmatrix}
= \ldots
\]
\end{frame}
% \begin{frame}
% \frametitle{ Matrix factorization view of DFT, N = 8}
% \[
% \mathbf{W}_8 =
% \begin{bmatrix}
% 1 & 0 & 0 & 0 & 1 & 0 & 0 & 0\\
% 0 & 1 & 0 & 0 & 0 & 1 & 0 & 0\\
% 0 & 0 & 1 & 0 & 0 & 0 & 1 & 0\\
% 0 & 0 & 0 & 1 & 0 & 0 & 0 & 1\\
% 1 & 0 & 0 & 0 & -1 & 0 & 0 & 0\\
% 0 & 1 & 0 & 0 & 0 & -1 & 0 & 0\\
% 0 & 0 & 1 & 0 & 0 & 0 & -1 & 0\\
% 0 & 0 & 0 & 1 & 0 & 0 & 0 & -1
% \end{bmatrix}
% \]
% \[
% \begin{bmatrix}
% 1 & & & & & & & \\
% & 1 & & & & & & \\
% & & 1 & & & & & \\
% & & & 1 & & & & \\
% & & & & 1 & & & \\
% & & & & & W & & \\
% & & & & & & W^2 & \\
% & & & & & & & W^3
% \end{bmatrix}
% \begin{bmatrix}
% {\bf W_4} & {\bf 0_4} \\
% {\bf 0_4} & {\bf W_4}
% \end{bmatrix}
% \begin{bmatrix}
% 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\
% 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\
% 0 & 0 & 0 & 0 & 1 & 0 & 0 & 0\\
% 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\
% 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0\\
% 0 & 0 & 0 & 1 & 0 & 0 & 0 & 0\\
% 0 & 0 & 0 & 0 & 0 & 1 & 0 & 0\\
% 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1
% \end{bmatrix}
% \]
%\end{frame}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 8, 2/8}
Step 1: separate even from odd indexed samples
Call this $ \mathbf{D}_8 $ for decimation of size 8
\[
\mathbf{D}_8 =
\begin{bmatrix}
1 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\
0 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\
0 & 0 & 0 & 0 & 1 & 0 & 0 & 0\\
0 & 0 & 0 & 0 & 0 & 0 & 1 & 0\\
0 & 1 & 0 & 0 & 0 & 0 & 0 & 0\\
0 & 0 & 0 & 1 & 0 & 0 & 0 & 0\\
0 & 0 & 0 & 0 & 0 & 1 & 0 & 0\\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 1
\end{bmatrix}
\]
\vspace{1em}
This requires no arithmetic operations!
\end{frame}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 8, 3/8}
Step 2: Compute two DFTs of size N/2 on the even and on the odd indexed samples
Each submatrix is $ \mathbf{W}_4 $, and the matrix is block diagonal, where ${\bf 0_4}$ stands for a matrix of 0's
\[
\begin{bmatrix}
{\bf W_4} & {\bf 0_4} \\
{\bf 0_4} & {\bf W_4}
\end{bmatrix}
=
\begin{bmatrix}
1 & 1 & 1 & 1 & & & & \\
1 & j & -1 & -j & & & & \\
1 & -1 & 1 & -1 & & & & \\
1 & -j & -1 & j & & & & \\
& & & &1 & 1 & 1 & 1 \\
& & & &1 & j & -1 & -j \\
& & & & 1 & -1 & 1 & -1\\
& & & &1 & -j & -1 & j
\end{bmatrix}
\]
\vspace{1em}
This requires two DFT-4, or a total of 16 additions!
\end{frame}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 8, 4/8}
Step 3: Multiply output of second DFT of size 4 by $W^k$
This is a diagonal matrix, with ${\bf I_4}$ for the identity of size 4,
\[
\begin{bmatrix}
{\bf I_4} & {\bf 0_4} \\
{\bf 0_4} & {\mathbf{\Lambda}_4}
\end{bmatrix}
=
\begin{bmatrix}
1 & & & & & & & \\
& 1 & & & & & & \\
& & 1 & & & & & \\
& & & 1 & & & & \\
& & & & 1 & & & \\
& & & & & W & & \\
& & & & & & W^2 & \\
& & & & & & & W^3
\end{bmatrix}
\mbox{where \ \ }
{\mathbf{\Lambda}_4}
=
\begin{bmatrix}
1 & & & \\
& W & & \\
& & W^2 & \\
& & & W^3
\end{bmatrix}
\]
\vspace{1em}
This requires 2 multiplications ($W^2 = -j$ is free)
\end{frame}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 8, 5/8}
Step 4: Recombine final output $X[k]$ and $X[k+N/2]$ by sum and difference, $ \mathbf{S}_8 $
\[
\mathbf{S}_8 =
\begin{bmatrix}
{\bf I_4} & {\bf I_4} \\
{\bf I_4} & -{\bf I_4}
\end{bmatrix}
=
\begin{bmatrix}
1 & 0 & 0 & 0 & 1 & 0 & 0 & 0\\
0 & 1 & 0 & 0 & 0 & 1 & 0 & 0\\
0 & 0 & 1 & 0 & 0 & 0 & 1 & 0\\
0 & 0 & 0 & 1 & 0 & 0 & 0 & 1\\
1 & 0 & 0 & 0 & -1 & 0 & 0 & 0\\
0 & 1 & 0 & 0 & 0 & -1 & 0 & 0\\
0 & 0 & 1 & 0 & 0 & 0 & -1 & 0\\
0 & 0 & 0 & 1 & 0 & 0 & 0 & -1
\end{bmatrix}
\]
\vspace{1em}
This requires 8 additions!
\end{frame}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 8, 6/8}
In total:
Product of 4 matrices
\[
\mathbf{W}_8 =
\begin{bmatrix}
{\bf I_4} & {\bf I_4} \\
{\bf I_4} & -{\bf I_4}
\end{bmatrix}
\cdot
\begin{bmatrix}
{\bf I_4} & {\bf 0_4} \\
{\bf 0_4} & {\mathbf{\Lambda}_4}
\end{bmatrix}
\cdot
\begin{bmatrix}
{\bf W_4} & {\bf 0_4} \\
{\bf 0_4} & {\bf W_4}
\end{bmatrix}
\cdot
\mathbf{D}_8
\]
\vspace{2em}
This requires 24 additions and 2 multiplications!
\end{frame}
%\end{comment}
\begin{frame}
\frametitle{Flowgraph view of FFT, $N =8$}
\begin{figure}
\psset{xunit=1.2cm,yunit=0.8cm,linewidth=1.5pt}
\begin{pspicture}(0,0)(10,7)
\flabelsL(0,0){7,3,5,1,6,2,4,0}{x}
\connS(0,0){1}{W^0_N,\sp,W^0_N,\sp,W^0_N,\sp,W^0_N,\sp}
\fconn(1,0){8}
\butterfly{1}{0}{1}\butterfly{1}{2}{3}\butterfly{1}{4}{5}\butterfly{1}{6}{7}
\connS(3,0){1}{W^2_N,W^0_N,\sp,\sp,W^2_N,W^0_N,\sp,\sp}
\fconn(4,0){8}
\butterfly{4}{0}{2}\butterfly{4}{1}{3}\butterfly{4}{4}{6}\butterfly{4}{5}{7}
\connS(6,0){1}{W^3_N,W^2_N,W^1_N,W^0_N,\sp,\sp,\sp,\sp}
\fconn(7,0){8}
\butterfly{7}{0}{4}\butterfly{7}{1}{5}\butterfly{7}{2}{6}\butterfly{7}{3}{7}
\flabelsR(9,0){7,6,5,4,3,2,1,0}{X}
\end{pspicture}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Flowgraph view of FFT, $N =8$}
\begin{figure}
\psset{xunit=1.2cm,yunit=0.8cm,linewidth=1.5pt}
\begin{pspicture}(0,0)(10,7)
\flabelsL(0,0){7,3,5,1,6,2,4,0}{x}
\connS(0,0){1}{\sp,\sp,\sp,\sp,\sp,\sp,\sp,\sp}
\fconn(1,0){8}
\butterfly{1}{0}{1}\butterfly{1}{2}{3}\butterfly{1}{4}{5}\butterfly{1}{6}{7}
\connS(3,0){1}{j,\sp,\sp,\sp,j,\sp,\sp,\sp}
\fconn(4,0){8}
\butterfly{4}{0}{2}\butterfly{4}{1}{3}\butterfly{4}{4}{6}\butterfly{4}{5}{7}
\connS(6,0){1}{W^3_8,j,W^1_8,\sp,\sp,\sp,\sp,\sp}
\fconn(7,0){8}
\butterfly{7}{0}{4}\butterfly{7}{1}{5}\butterfly{7}{2}{6}\butterfly{7}{3}{7}
\flabelsR(9,0){7,6,5,4,3,2,1,0}{X}
\end{pspicture}
\end{figure}
\end{frame}
%\begin{comment}
\begin{frame}
\frametitle{ Matrix factorization view of DFT, N = 8, 8/8}
Is this a big deal?
\begin{itemize}[<+->]
\item In image processing (e.g. digital photography) one takes block of 8 by 8 pixels
\item One computes a transform (called DCT) similar to a DFT
\item It has a fast algorithm inspired by what we just saw
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Some numbers}
\begin{itemize}[<+->]
\item Direct: $64^2 = 4096$ multiplications required (dominant cost, fixed point multiplications)
\item The transform can be computed in rows and columns separately, or 16 DFT's
\item The algorithm we saw has 2 multiplications, or a total of 32 multiplications
\item Saving of 2 orders of magnitude!
\end{itemize}
\end{frame}
%\end{comment}
\begin{frame}
\frametitle{Some examples}
image processing (JPEG compression)
\begin{itemize}[<+->]
\item image is divided into $8\times 8$-pixel blocks
\item DFT performed on rows and columns: 16 8-point DFTs
\item direct computation: $16\times 8^2 = 1024$ multiplications
\item FFT: $16\times 2 = 32$ multiplications
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Some examples}
audio processing (MP3 compression)
\begin{itemize}[<+->]
\item audio is split into 1152-point frames
\item direct DFT computation: $1.3\cdot 10^6$ multiplications
\item FFT: $1.1\cdot 10^4$ multiplications
\end{itemize}
\end{frame}
% remove the esoteric stuff
% \begin{frame}
% \frametitle{How about other lengths? To each their own!}
%
% \begin{itemize}[<+->]
% \item if $N = p^K$ with $p$ a prime, we can use an algorithm similar to that for $p=2$; this is the \textbf{Cooley-Tukey} algorithm (1965, but invented several times before)
% \item if $N=N_1 N_2$, with $N_1$ and $N_2$ coprime, we can reorder this into a two-dimensional DFT of size $N_1$ by $N_2$; this is the \textbf{Good-Thomas} algorithm (1958, 1963)
% \item if $N$ is prime, a mapping transforms the result into a convolution of size $N-1$, which can be solved by a DFT of size $N-1$; this is the \textbf{Rader} algorithm (1968)
% \item by combining all the tricks, and minimizing the number of multiplications, we have the \textbf{Winograd} FFT (1979)
% \end{itemize}
%\end{frame}
%\begin{frame}
% \frametitle{ How about other lengths? Example for $N = 5$}
% According to Rader's algorithm:
%
% Skip first and column and first row (trivial computation)
% \[
% \mathbf{W}_5 =
% \begin{bmatrix}
% 1 & 1 & 1 & 1 & 1\\
% 1 & W & W^2& W^3 & W^4 \\
% 1 & W^2 & W^4 & W & W^3\\
% 1 & W^3 & W & W^4 & W^{2}\\
% 1 & W^4 & W^3 & W^2 & W
% \end{bmatrix}
% \rightarrow
% \begin{bmatrix}
% W & W^2& W^3 & W^4 \\
% W^2 & W^4 & W & W^3\\
% W^3 & W & W^4 & W^{2}\\
% W^4 & W^3 & W^2 & W
% \end{bmatrix}
% =
% {\bf P}
% \begin{bmatrix}
% W & W^2& W^3 & W^4 \\
% W^4 & W & W^2& W^3 \\
% W^3 & W^4 & W & W^2\\
% W^2 & W^3 & W^4 & W
% \end{bmatrix}
% {\bf Q}
% \]
% where ${\bf P}$ and $ {\bf Q}$ are appropriate permutation matrices.
%
%
% This is now a circulant matrix, diagonizable by a DFT of size 4!
%\end{frame}
%\begin{frame}
% \frametitle{ How about other lengths? Example for $N = 6$}
% According to the Good-Thomas algorithm, this can be written as 3 DFT-2 and 2 DFT-3:
% \[
% \mathbf{W}_6 =
% {\bf P}
% \begin{bmatrix}
% 1 & 1 & & & & \\
% 1 & -1 & & & & \\
% & & 1 & 1 & & \\
% & & 1 & -1 & & \\
% & & & & 1 & 1 \\
% & & & & 1 & -1
% \end{bmatrix}
% {\bf Q}
% \begin{bmatrix}
% 1 & 1 & 1 & & &\\
% 1 & W & W^2 & & &\\
% 1 & W^2 & W & & & \\
% & & & 1 & 1 & 1 \\
% & & & 1 & W & W^2 \\
% & & & 1 & W^2 & W
% \end{bmatrix}
% {\bf R}
% \]
%
% where ${\bf P}$, $ {\bf Q}$ and $ {\bf R}$ are appropriate permutation matrices.
%
% Except for permutations (no arithmetic operation, just addressing), there is no cost!
%\end{frame}
\begin{frame}
\frametitle{ Conclusions}
Don't worry, be happy!
\begin{itemize}
\item The Cooley-Tukey is the most popular algorithm, mostly for $N=2^N$
\item Note that there is always a good FFT algorithm around the corner
\item Do not zero-pad to lengthen a vector to have a size equal to a power of 2
\item There are good packages out there (e.g. Fastest Fourier Transform in the West, SPIRAL)
\item It does make a BIG difference!
\end{itemize}
\vspace{2em}
\centering
\includegraphics[width=0.3\paperwidth]{FFTW.eps}
\end{frame}
\end{document}

Event Timeline