Page MenuHomec4science

rational_interpolation_method.tex
No OneTemporary

File Metadata

Created
Wed, May 1, 08:06

rational_interpolation_method.tex

\documentclass[10pt,a4paper]{article}
\usepackage[left=1in,right=1in,top=1in,bottom=1in]{geometry}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{xcolor}
\setlength{\parindent}{0pt}
\newcommand{\code}[1]{{\color{blue}\texttt{#1}}}
\newcommand{\footpath}[1]{\footnote{\path{#1}}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\I}{\mathcal{I}}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\inner}[2]{\left\langle#1,#2\right\rangle_V}
\newcommand{\norm}[1]{\left\|#1\right\|_V}
\title{\bf The \code{RROMPy} rational interpolation method}
\author{D. Pradovera, CSQI, EPF Lausanne -- \texttt{davide.pradovera@epfl.ch}}
\date{}
\hypersetup{ pdftitle={The RROMPy rational interpolation method}, pdfauthor={Davide Pradovera} }
\begin{document}
\maketitle
\section*{Introduction}
This document provides an explanation for the numerical method provided by the class \code{Rational Interpolant}\footpath{./rrompy/reduction_methods/standard/rational_interpolant.py} and daughters, e.g. \code{Rational Interpolant Greedy}\footpath{./rrompy/reduction_methods/standard/greedy/rational_interpolant_greedy.py}, as well as most of the pivoted approximants\footpath{./rrompy/reduction_methods/pivoted/{,greedy/}rational_interpolant_*.py}.
We restrict the discussion to the single-parameter case, and most of the focus will be dedicated to the impact of the \code{functionalSolve} parameter, whose allowed values are
\begin{itemize}
\item \code{NORM} (default): see \ref{sec:norm}; allows for derivative information, i.e. repeated sample points.
\item \code{DOMINANT}: see \ref{sec:dominant}; allows for derivative information, i.e. repeated sample points.
\item \code{BARYCENTRIC\_NORM}: see \ref{sec:barycentricnorm}; does not allow for a Least Squares (LS) approach.
\item \code{BARYCENTRIC\_AVERAGE}: see \ref{sec:barycentricaverage}; does not allow for a Least Squares (LS) approach.
\end{itemize}
The main reference throughout the present document is \cite{Pradovera}.
\section{Aim of approximation}
We seek an approximation of $u:\C\to V$, with $(V,\inner{\cdot}{\cdot})$ a complex\footnote{The inner product is linear (resp. conjugate linear) in the first (resp. second) argument: $\inner{\alpha v}{\beta w}=\alpha\overline{\beta}\inner{v}{w}$.} Hilbert space (with induced norm $\norm{\cdot}$), of the form $\widehat{p}/\widehat{q}$, where $\widehat{p}:\C\to V$ and $\widehat{q}:\C\to\C$. For a given denominator $\widehat{q}$, the numerator $\widehat{p}$ is found by interpolation (possibly, LS or based on radial basis functions) of $\widehat{q}u$. Hence, here we focus on the computation of the denominator $\widehat{q}$.
Other than the choice of target function $u$, the parameters which affect the computation of $\widehat{q}$ are:
\begin{itemize}
\item $\code{mus}\subset\C$ ($\{\mu_j\}_{j=1}^S$ below); for all \code{functionalSolve} values but \code{NORM} and \code{DOMINANT}, the $S$ points must be distinct.
\item $\code{N}\in\N$ ($N$ below); for \code{BARYCENTRIC\_*}, $N$ must equal $S-1$.
\item $\code{polybasis0}\in\{\code{"CHEBYSHEV"}, \code{"LEGENDRE"}, \code{"MONOMIAL"}\}$; only for \code{NORM} and \code{DOMINANT}.
\end{itemize}
For simplicity, we will consider only the case of $S$ distinct sample points. One can deal with the case of confluent sample points by extending the standard (Lagrange) interpolation steps to Hermite-Lagrange ones.
The main motivation behind the method involves the modified approximation problem
\[u\approx\I^N\left(\Big(\big(\mu_j,\widehat{q}(\mu_j)u(\mu_j)\big)\Big)_{j=1}^S\right)\Big/\widehat{q},\]
where $\widehat{q}:\C\to\C$ is a polynomial of degree $\leq N<S$, and $\I^N:(\C\times V)^S\to\mathbb{P}^N(\C;V)$ is a (LS) polynomial interpolation operator, which maps $S$ samples of a function (which lie in $V$) to a polynomial of degree $\leq N$ with coefficients in $V$.
More precisely, let
\[\mathbb{P}^N(\C;W)=\left\{\mu\mapsto\sum_{i=0}^N\alpha_i\mu^i\ :\ \alpha_0,\ldots,\alpha_N\in W\right\},\]
with $W$ either $\C$ or $V$. We set
\[\I^N\left(\big((\mu_j,\psi_j)\big)_{j=1}^S\right)\bigg|_\mu=\argmin_{p\in\mathbb{P}^N(\C;V)}\sum_{j=1}^S\norm{p(\mu_j)-\psi_j}^2.\]
In \code{RROMPy}, we compute (LS-)interpolants by employing normal equations: given a basis $\{\phi_i\}_{i=0}^N$ of $\mathbb{P}^N(\C;\C)$, we expand
\[\I^N\left(\big((\mu_j,\psi_j)\big)_{j=1}^S\right)=\sum_{i=0}^Nc_i\phi_i\]
and observe that, for optimality, the coefficients $\{c_i\}_{i=0}^N\subset V$ must satisfy
\[\sum_{j=1}^S\sum_{l=0}^N\overline{\phi_i(\mu_j)}\phi_l(\mu_j)c_l=\sum_{j=1}^S\overline{\phi_i(\mu_j)}\psi_j\quad\forall i=0,\ldots,N,\]
i.e., in matrix form\footnote{The superscript ${}^H$ denotes adjunction (conjugate transposition), i.e. $A^H=\overline{A}^\top$.},
\[\underbrace{[c_i]_{i=0}^N}_{\in V^{N+1}}=\bigg(\underbrace{\Big[\overline{\phi_i(\mu_j)}\Big]_{i=0,j=1}^{N,S}}_{=:\Phi^H\in\C^{(N+1)\times S}}\underbrace{\Big[\phi_i(\mu_j)\Big]_{j=1,i=0}^{S,N}}_{=:\Phi\in\C^{S\times(N+1)}}\bigg)^{-1}\underbrace{\Big[\overline{\phi_i(\mu_j)}\Big]_{i=0,j=1}^{N,S}}_{=:\Phi^H\in\C^{(N+1)\times S}}\underbrace{[\psi_j]_{j=1}^S}_{\in V^{S}}.\]
In practice the polynomial basis $\{\phi_i\}_{i=0}^N$ is determined by the value of $\code{polybasis0}$:
\begin{itemize}
\item If $\code{polybasis0}=\code{"CHEBYSHEV"}$, then $\phi_k(\mu)=\mu^k$ for $k\in\{0,1\}$ and $\phi_k(\mu)=2\mu\phi_{k-1}(\mu)-\phi_{k-2}(\mu)$ for $k\geq2$.
\item If $\code{polybasis0}=\code{"LEGENDRE"}$, then $\phi_k(\mu)=\mu^k$ for $k\in\{0,1\}$ and $\phi_k(\mu)=(2-1/k)\mu\phi_{k-1}(\mu)-(1-1/k)\phi_{k-2}(\mu)$ for $k\geq2$.
\item If $\code{polybasis0}=\code{"MONOMIAL"}$, then $\phi_k(\mu)=\mu^k$ for $k\geq0$.
\end{itemize}
The actual denominator $\widehat{q}$ is found as
\begin{equation}\label{eq:glob1}
\widehat{q}=\argmin_{\substack{q\in\mathbb{P}^N(\C;\C)\\(\star)}}\norm{\frac{\textrm{d}^N}{\textrm{d}\mu^N}\I^N\left(\Big(\big(\mu_j,q(\mu_j)u(\mu_j)\big)\Big)_{j=1}^S\right)}
\end{equation}
where $(\star)$ is a normalization condition (which changes depending on \code{functionalSolve}) to exclude the trivial minimizer $\widehat{q}=0$.
Broadly speaking, the methods described differ in terms of the constraint $(\star)$, as well as of the degrees of freedom which are chosen to represent the denominator $q$.
\section{Polynomial coefficients as degrees of freedom}
If the polynomial basis $\{\phi_i\}_{i=0}^N$ is hierarchical (as the three ones above), then the $N$-th derivative of $\I^N$ coincides with the coefficient $c_N$, and we have
\begin{equation}\label{eq:poly1}
\widehat{q}=\argmin_{\substack{q\in\mathbb{P}^N(\C;\C)\\(\star)}}\norm{[\underbrace{0,\ldots,0}_{N},1]\big(\Phi^H\Phi\big)^{-1}\Phi^H[q(\mu_j)u(\mu_j)]_{j=1}^S}.
\end{equation}
Using the Kronecker delta ($\delta_{ij}=1$ if $i=j$ and $\delta_{ij}=0$ if $i\neq j$), the last term $[q(\mu_j)u(\mu_j)]_{j=1}^S\in V^S$ can be factored into
\begin{equation}\label{eq:poly2}
\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}[q(\mu_j)]_{j=1}^S=\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\Phi[q_i]_{i=0}^N,
\end{equation}
where we have expanded the polynomial $q$ using the basis\footnote{In theory, nothing prevents us from using different bases for $\I^N$ and $q$, cf. \ref{sec:observations}.} $\{\phi_i\}_{i=0}^N$: $q(\mu)=\sum_{i=0}^Nq_i\phi_i(\mu)$, with coefficients $\{q_i\}_{i=0}^N\subset\C$.
Combining \eqref{eq:poly1} and \eqref{eq:poly2}, it is useful to consider the $(N+1)\times(N+1)$ Hermitian matrix with entries ($0\leq i,i'\leq N$)
\begin{equation}\label{eq:poly3}
G_{ii'}=\inner{\sum_{j'=1}^S\left(\big(\Phi^H\Phi\big)^{-1}\Phi^H\right)_{Nj'}(\Phi)_{j'i'}u(\mu_{j'})}{\sum_{j=1}^S\left(\big(\Phi^H\Phi\big)^{-1}\Phi^H\right)_{Nj}(\Phi)_{ji}u(\mu_j)}.
\end{equation}
If $(\star)$ is quadratic (resp. linear) in $[q_i]_{i=0}^N$, then we can cast the computation of the denominator as a quadratically (resp. linearly) constrained quadratic program involving $G$.
\subsection{Quadratic constraint}\label{sec:norm}
We constrain $[\widehat{q}_i]_{i=0}^N$ to have unit (Euclidean) norm. The resulting optimization problem can be cast as a minimal (normalized) eigenvector problem for $G$ in \eqref{eq:poly3}. More explicitly,
\[[\widehat{q}_i]_{i=0}^N=\argmin_{\substack{\mathbf{q}\in\C^{N+1}\\\|\mathbf{q}\|_2=1}}\mathbf{q}^HG\mathbf{q}.\]
\subsection{Linear constraint}\label{sec:dominant}
We constrain $\widehat{q}_N=1$, thus forcing $q$ to be monic, with degree exactly $N$. Given $G$ in \eqref{eq:poly3}, the resulting optimization problem can be solved rather easily as:
\[[\widehat{q}_i]_{i=0}^N=\frac{G^{-1}\mathbf{e}_{N+1}}{\mathbf{e}_{N+1}^\top G^{-1}\mathbf{e}_{N+1}},\quad\text{with }\mathbf{e}_{N+1}=[0,\ldots,0,1]^\top\in\C^{N+1}.\]
\section{Barycentric coefficients as degrees of freedom}\label{sec:bary}
Here we assume that the sample points are distinct, and such that $N=S-1$. We can choose for convenience a non-hierarchical basis, dependent on the sample points, for $q$ and $\I^N$, taking inspiration from barycentric interpolation:
\begin{equation}\label{eq:bary1}
\phi_i(\mu)=\prod_{\substack{j=1\\j\neq i+1}}^S(\mu-\mu_j).
\end{equation}
Since all elements of the basis are monic and of degree exactly $N$, the minimization problem can be cast as
\begin{equation}\label{eq:bary2}
\widehat{\mathbf{q}}=\argmin_{\substack{\mathbf{q}\in\C^{N+1}\\(\star)}}\norm{[\underbrace{1,\ldots,1}_{N+1}]\big(\Phi^H\Phi\big)^{-1}\Phi^H\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\Phi\mathbf{q}\,}.
\end{equation}
At the same time, it is easy to see from \eqref{eq:bary1} that the Vandermonde-like matrix $\Phi$ is diagonal, so that
\[\big(\Phi^H\Phi\big)^{-1}\Phi^H\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\Phi=\big(\Phi^H\Phi\big)^{-1}\Phi^H\Phi\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}=\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S},\]
and
\begin{equation}\label{eq:bary3}
\widehat{\mathbf{q}}=\argmin_{\substack{\mathbf{q}\in\C^{N+1}\\(\star)}}\norm{\sum_{i=0}^Nu(\mu_{i+1})q_i}.
\end{equation}
Considering \eqref{eq:bary3}, it is useful to define the $(N+1)\times(N+1)$ Hermitian (``snapshot Gramian'') matrix with entries ($0\leq i,i'\leq N$)
\begin{equation}\label{eq:bary4}
G_{ii'}=\inner{u(\mu_{i'+1})}{u(\mu_{i+1})}.
\end{equation}
So, once again, if $(\star)$ is quadratic (resp. linear) in $[q_i]_{i=0}^N$, then we can cast the computation of the denominator as a quadratically (resp. linearly) constrained quadratic program involving $G$.
Before specifying the kind of normalization enforced, it is important to make a remark on numerical stability. The basis in \eqref{eq:bary1} is actually just a ($i$-dependent) factor away from being the Lagrangian one (for which $\phi_i(\mu_j)$ would equal $\delta_{(i+1)j}$ instead of
\[\delta_{(i+1)j}\prod_{k\neq i+1}(\mu_j-\mu_k),\]
as it does in our case). As such, it is generally a bad idea to numerically evaluate $q$ starting from its expansion coefficients with respect to $\{\phi_i\}_{i=0}^N$. We get around this by exploiting the following trick, whose foundation is in \cite[Section 2.3.3]{Klein}: the roots of $\widehat{q}=\sum_{i=0}^N\widehat{q}_i\phi_i$ are the $N$ finite eigenvalues $\lambda$ of the generalized $(N+2)\times(N+2)$ eigenproblem
\begin{equation}
\textrm{Det}\left(\begin{bmatrix}
0 & \widehat{q}_0 & \cdots & \widehat{q}_N\\
1 & \mu_1 & & \\
\vdots & & \ddots & \\
1 & & & \mu_S
\end{bmatrix}-\begin{bmatrix}
0 & & & \\
& 1 & & \\
& & \ddots & \\
& & & 1
\end{bmatrix}\lambda\right)=0.
\end{equation}
This computation is numerically more stable than most other manipulations of a polynomial in the basis \eqref{eq:bary1}.
Once the roots of $\widehat{q}$ have been computed, one can either convert it to nodal form
\begin{equation}\label{eq:pole1}
\widehat{q}(\mu)\propto\prod_{i=1}^N(\mu-\widehat{\lambda}_i),
\end{equation}
or forgo using $\widehat{q}$ completely, in favor of a Heaviside-like approximation involving the newly computed roots $\{\widehat{\lambda}_i\}_{i=1}^N$ as poles:
\[
\frac{\widehat{p}(\mu)}{\widehat{q}(\mu)}\quad\rightsquigarrow\quad\sum_{i=1}^N\frac{\widehat{r}_i}{\mu-\widehat{\lambda}_i}+\widetilde{p}(\mu),
\]
with $\widetilde{p}$, e.g., a polynomial (of degree at most $S-N-1$) or a combination of radial basis functions. See the final paragraph in \ref{sec:observations} for a slightly more detailed motivation of why the Heaviside form of the approximant might be more useful than the standard rational one $\widehat{p}/\widehat{q}$ in practice.
\subsection{Quadratic constraint}\label{sec:barycentricnorm}
We constrain $[\widehat{q}_i]_{i=0}^N$ to have unit (Euclidean) norm. The resulting optimization problem can be cast as a minimal (normalized) eigenvector problem for $G$ in \eqref{eq:bary4}. More explicitly,
\[[\widehat{q}_i]_{i=0}^N=\argmin_{\substack{\mathbf{q}\in\C^{N+1}\\\|\mathbf{q}\|_2=1}}\mathbf{q}^HG\mathbf{q}.\]
\subsection{Linear constraint}\label{sec:barycentricaverage}
We constrain $\sum_{i=0}^N\widehat{q}_i=1$, so that the polynomial $\widehat{q}$ is monic. Given $G$ in \eqref{eq:bary4}, the resulting optimization problem can be solved rather easily as:
\[[\widehat{q}_i]_{i=0}^N=\frac{G^{-1}\mathbf{1}_{N+1}}{\mathbf{1}_{N+1}^\top G^{-1}\mathbf{1}_{N+1}},\quad\text{with }\mathbf{1}_{N+1}=[1,\ldots,1]^\top\in\C^{N+1}.\]
\section{Minor observations}\label{sec:observations}
\begin{itemize}
\item For \code{BARYCENTRYC\_*}, a specific choice of polynomial basis for $\I^N$ was used to diagonalize the functional. Under the assumptions that the sample points are distinct and that $N=S-1$, one can employ the quasi-Lagrangian basis \eqref{eq:bary1} to expand $\I^N$ in the other approaches as well, thus simplifying significantly the structure of \eqref{eq:glob1}:
\begin{equation*}
\widehat{q}=\argmin_{\substack{q\in\mathbb{P}^N(\C;\C)\\(\star)}}\norm{\sum_{j=1}^Sq(\mu_j)u(\mu_j)\prod_{\substack{j'=1\\j'\neq j}}^S\frac1{\mu_j-\mu_{j'}}}.
\end{equation*}
Numerically, this has repercussions on the computation of the term $\big(\Phi^H\Phi\big)^{-1}\Phi^H$ in \eqref{eq:poly3}.
\item In general, \code{NORM} and \code{BARYCENTRIC\_NORM} can be expected to be more numerically stable than \code{DOMINANT} and \code{BARYCENTRIC\_AVERAGE}, respectively. This is due to the fact that the normalization is enforced in a more numerically robust fashion.
\item If the snapshots are orthonormalized via \code{POD}\footpath{./rrompy/sampling/engines/sampling_engine_pod.py}, all the $V$-inner products (resp. norms) are recast as Euclidean inner products (resp. norms) involving the $R$ factor of the generalized ($V$-orthonormal) QR decomposition of the snapshots.
\item If a univariate rational surrogate is built in the scope of multivariate pole-matching-based pivoted approximation\footpath{./rrompy/reduction_methods/pivoted/*}, the rational approximant is converted into a Heaviside/nodal representation when different surrogates are combined. As such, the \code{BARYCENTRIC\_*} approach may be preferable to avoid extra computations, as well as additional round-off artifacts.
\end{itemize}
\begin{thebibliography}{1}
\bibitem{Pradovera}%
D.~Pradovera, Interpolatory rational model order reduction of parametric problems lacking uniform inf-sup stability, SIAM J. Numer. Anal. 58 (2020) 2265--2293. doi:10.1137/19M1269695.
\bibitem{Klein}%
G.~Klein, Applications of Linear Barycentric Rational Interpolation, PhD Thesis no. 1762, Universit\'e de Fribourg (2012).
\end{thebibliography}
\end{document}

Event Timeline