Page MenuHomec4science

rational_interpolation_method.tex
No OneTemporary

File Metadata

Created
Fri, Mar 29, 09:09

rational_interpolation_method.tex

\documentclass[10pt,a4paper]{article}
\usepackage[left=1in,right=1in,top=1in,bottom=1in]{geometry}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{xcolor}
\setlength{\parindent}{0pt}
\newcommand{\code}[1]{{\color{blue}\texttt{#1}}}
\newcommand{\footpath}[1]{\footnote{\path{#1}}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\I}{\mathcal{I}}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\inner}[2]{\left\langle#1,#2\right\rangle_V}
\newcommand{\norm}[1]{\left\|#1\right\|_V}
\title{\bf The RROMPy rational interpolation method}
\author{D. Pradovera, CSQI, EPF Lausanne -- \texttt{davide.pradovera@epfl.ch}}
\date{}
\begin{document}
\maketitle
\section*{Introduction}
This document provides an explanation for the numerical method provided by the class \code{Rational Interpolant}\footpath{./rrompy/reduction_methods/standard/rational_interpolant.py} and daughters, e.g. \code{Rational Interpolant Greedy}\footpath{./rrompy/reduction_methods/standard/greedy/rational_interpolant_greedy.py}, as well as most of the pivoted approximants\footpath{./rrompy/reduction_methods/pivoted/{,greedy/}rational_interpolant_*.py}.
We restrict the discussion to the single-parameter case, and most of the focus will be dedicated to the impact of the \code{functionalSolve} parameter, whose allowed values are
\begin{itemize}
\item \code{NORM} (default): see \ref{sec:norm}; allows for derivative information, i.e. repeated sample points.
\item \code{DOMINANT}: see \ref{sec:dominant}; allows for derivative information, i.e. repeated sample points.
\item \code{BARYCENTRIC\_NORM}: see \ref{sec:barycentricnorm}; does not allow for a Least Squares (LS) approach.
\item \code{BARYCENTRIC\_AVERAGE}: see \ref{sec:barycentricaverage}; does not allow for a Least Squares (LS) approach.
\item \code{NODAL}: see \ref{sec:nodal}; iterative method.
\end{itemize}
The main reference throughout the present document is \cite{Pradovera}.
\section{Aim of approximation}
We seek an approximation of $u:\C\to V$, with $(V,\inner{\cdot}{\cdot})$ a complex\footnote{The inner product is linear (resp. conjugate linear) in the first (resp. second) argument: $\inner{\alpha v}{\beta w}=\alpha\overline{\beta}\inner{v}{w}$.} Hilbert space (with endowed norm $\norm{\cdot}$), of the form $\widehat{p}/\widehat{q}$, where $\widehat{p}:\C\to V$ and $\widehat{q}:\C\to\C$. For a given denominator $\widehat{q}$, the numerator $\widehat{p}$ is found by interpolation (possibly, LS or based on radial basis functions) of $\widehat{q}u$. Hence, here we focus on the computation of the denominator $\widehat{q}$.
Other than the choice of target function $u$, the parameters which affect the computation of $\widehat{q}$ are:
\begin{itemize}
\item $\code{mus}\subset\C$ ($\{\mu_j\}_{j=1}^S$ below); for all \code{functionalSolve} values but \code{NORM} and \code{DOMINANT}, the $S$ points must be distinct.
\item $\code{N}\in\N$ ($N$ below); for \code{BARYCENTRIC}, $N$ must equal $S-1$.
\item $\code{polybasis0}\in\{\code{"CHEBYSHEV"}, \code{"LEGENDRE"}, \code{"MONOMIAL"}\}$; only for \code{NORM} and \code{DOMINANT}.
\end{itemize}
For simplicity, we will consider only the case of $S$ distinct sample points. One can deal with the case of confluent sample points by extending the standard (Lagrange) interpolation steps to Hermite-Lagrange ones.
The main motivation behind the method involves the modified approximation problem
\[u\approx\I^N\left(\Big(\big(\mu_j,\widehat{q}(\mu_j)u(\mu_j)\big)\Big)_{j=1}^S\right)\Big/\widehat{q},\]
where $\widehat{q}:\C\to\C$ is a polynomial of degree $\leq N$, and $\I^N:(\C\times V)^S\to\mathbb{P}^N(\C;V)$ is a (LS) polynomial interpolation operator, which maps $S$ samples of a function (which lie in $V$) to a polynomial of degree $N<S$ with coefficients in $V$.
More precisely, let
\[\mathbb{P}^N(\C;W)=\left\{\mu\mapsto\sum_{i=0}^N\alpha_iz^i\ :\ \alpha_0,\ldots,\alpha_N\in W\right\},\]
with $W$ either $\C$ or $V$. We set
\[\I^N\left(\big((\mu_j,\psi_j)\big)_{j=1}^S\right)\bigg|_\mu=\argmin_{p\in\mathbb{P}^N(\C;V)}\sum_{j=1}^S\norm{p(\mu_j)-\psi_j}^2.\]
In RROMPy, we compute (LS-)interpolants by employing normal equations: given a basis $\{\phi_i\}_{i=0}^N$ of $\mathbb{P}^N(\C;\C)$, we expand
\[\I^N\left(\big((\mu_j,\psi_j)\big)_{j=1}^S\right)=\sum_{i=0}^Nc_i\phi_i\]
and observe that, for optimality, the coefficients $\{c_i\}_{i=0}^N\subset V$ must satisfy
\[\sum_{j=1}^S\sum_{l=0}^N\overline{\phi_i(\mu_j)}\phi_l(\mu_j)c_l=\sum_{j=1}^S\overline{\phi_i(\mu_j)}\psi_j\quad\forall i=0,\ldots,N,\]
i.e., in matrix form\footnote{The superscript ${}^H$ denotes conjugate transposition, i.e. $A^H=\overline{A}^\top$.},
\[\underbrace{[c_i]_{i=0}^N}_{\in V^{N+1}}=\bigg(\underbrace{\Big[\overline{\phi_i(\mu_j)}\Big]_{i=0,j=1}^{N,S}}_{=:\Phi^H\in\C^{(N+1)\times S}}\underbrace{\Big[\phi_i(\mu_j)\Big]_{j=1,i=0}^{S,N}}_{=:\Phi\in\C^{S\times(N+1)}}\bigg)^{-1}\underbrace{\Big[\overline{\phi_i(\mu_j)}\Big]_{i=0,j=1}^{N,S}}_{=:\Phi^H\in\C^{(N+1)\times S}}\underbrace{[\psi_j]_{j=1}^S}_{\in V^{S}}.\]
In practice the polynomial basis $\{\phi_i\}_{i=0}^N$ is determined by the value of $\code{polybasis0}$:
\begin{itemize}
\item If $\code{polybasis0}=\code{"CHEBYSHEV"}$, then $\phi_k(\mu)=\mu^k$ for $k\in\{0,1\}$ and $\phi_k(\mu)=2\mu\phi_{k-1}(\mu)-\phi_{k-2}(\mu)$ for $k\geq2$.
\item If $\code{polybasis0}=\code{"LEGENDRE"}$, then $\phi_k(\mu)=\mu^k$ for $k\in\{0,1\}$ and $\phi_k(\mu)=(2-1/k)\mu\phi_{k-1}(\mu)-(1-1/k)\phi_{k-2}(\mu)$ for $k\geq2$.
\item If $\code{polybasis0}=\code{"MONOMIAL"}$, then $\phi_k(\mu)=\mu^k$ for $k\geq0$.
\end{itemize}
The actual denominator $\widehat{q}$ is found as
\begin{equation}\label{eq:glob1}
\widehat{q}=\argmin_{\substack{q\in\mathbb{P}^N(\C;\C)\\(\star)}}\norm{\frac{\textrm{d}^N}{\textrm{d}\mu^N}\I^N\left(\Big(\big(\mu_j,q(\mu_j)u(\mu_j)\big)\Big)_{j=1}^S\right)}
\end{equation}
where $(\star)$ is a normalization condition (which changes depending on \code{functionalSolve}) to exclude the trivial minimizer $\widehat{q}=0$.
Broadly speaking, the five methods described differ in terms of the constraint $(\star)$, as well as of the degrees of freedom which are chosen to represent the denominator $q$.
\section{Polynomial coefficients as degrees of freedom}
If the polynomial basis $\{\phi_i\}_{i=0}^N$ is hierarchical (as the three ones above), then the $N$-th derivative of $\I^N$ coincides with the coefficient $c_N$, and we have
\begin{equation}\label{eq:poly1}
\widehat{q}=\argmin_{\substack{q\in\mathbb{P}^N(\C;\C)\\(\star)}}\norm{[\underbrace{0,\ldots,0}_{N},1]\big(\Phi^H\Phi\big)^{-1}\Phi^H[q(\mu_j)u(\mu_j)]_{j=1}^S}.
\end{equation}
Using the Kronecker delta ($\delta_{ij}=1$ if $i=j$ and $\delta_{ij}=0$ if $i\neq j$), the last term $[q(\mu_j)u(\mu_j)]_{j=1}^S\in V^S$ can be factored into
\begin{equation}\label{eq:poly2}
\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}[q(\mu_j)]_{j=1}^S=\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\Phi[q_i]_{i=0}^N,
\end{equation}
where we have expanded the polynomial $q$ using the basis\footnote{In theory, nothing prevents us from using different bases for $\I^N$ and $q$.} $\{\phi_i\}_{i=0}^N$: $q(\mu)=\sum_{i=0}^Nq_i\phi_i(\mu)$, with coefficients $\{q_i\}_{i=0}^N\subset\C$.
Combining \eqref{eq:poly1} and \eqref{eq:poly2}, it is useful to consider the $(N+1)\times(N+1)$ Hermitian matrix with entries ($0\leq i,i'\leq N$)
\begin{equation}\label{eq:poly3}
G_{ii'}=\inner{\sum_{j'=1}^S\left(\big(\Phi^H\Phi\big)^{-1}\Phi^H\right)_{Nj'}(\Phi)_{j'i'}u(\mu_{j'})}{\sum_{j=1}^S\left(\big(\Phi^H\Phi\big)^{-1}\Phi^H\right)_{Nj}(\Phi)_{ji}u(\mu_j)}.
\end{equation}
If $(\star)$ is quadratic (resp. linear) in $[q_i]_{i=0}^N$, then we can cast the computation of the denominator as a quadratically (resp. linearly) constrained quadratic program involving $G$.
\subsection{Quadratic constraint}\label{sec:norm}
We constrain $[\widehat{q}_i]_{i=0}^N$ to have unit (Euclidean) norm. The resulting optimization problem can be cast as a minimal (normalized) eigenvector problem for $G$ in \eqref{eq:poly3}. More explicitly,
\[[\widehat{q}_i]_{i=0}^N=\argmin_{\substack{\mathbf{q}\in\C^{N+1}\\\|\mathbf{q}\|_2=1}}\mathbf{q}^HG\mathbf{q}.\]
\subsection{Linear constraint}\label{sec:dominant}
We constrain $\widehat{q}_N=1$, thus forcing $q$ to be monic, with degree exactly $N$. Given $G$ in \eqref{eq:poly3}, the resulting optimization problem can be solved rather easily as:
\[[\widehat{q}_i]_{i=0}^N=\frac{G^{-1}\mathbf{e}_{N+1}}{\mathbf{e}_{N+1}^\top G^{-1}\mathbf{e}_{N+1}},\quad\text{with }\mathbf{e}_{N+1}=[0,\ldots,0,1]^\top\in\C^{N+1}.\]
\section{Barycentric coefficients as degrees of freedom}
Here we assume that the sample points are distinct, and such that $N=S-1$. We can choose for convenience a non-hierarchical basis, dependent on the sample points, for $q$ and $\I^N$, taking inspiration from barycentric interpolation:
\begin{equation}\label{eq:bary1}
\phi_i(\mu)=\prod_{\substack{j=1\\j\neq i+1}}^S(\mu-\mu_j).
\end{equation}
Since all elements of the basis are monic and of degree exactly $N$, the minimization problem can be cast as
\begin{equation}\label{eq:bary2}
\widehat{q}=\argmin_{\substack{q\in\mathbb{P}^N(\C;\C)\\(\star)}}\norm{[\underbrace{1,\ldots,1}_{N+1}]\big(\Phi^H\Phi\big)^{-1}\Phi^H\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\Phi[q_i]_{i=0}^N}.
\end{equation}
At the same time, it is easy to see from \eqref{eq:bary1} that the Vandermonde-like matrix $\Phi$ is diagonal, so that
\[\big(\Phi^H\Phi\big)^{-1}\Phi^H\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\Phi=\big(\Phi^H\Phi\big)^{-1}\Phi^H\Phi\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}=\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S},\]
and
\begin{equation}\label{eq:bary3}
\widehat{q}=\argmin_{\substack{\mathbf{q}\in\C^{N+1}\\(\star)}}\norm{\sum_{i=0}^Nu(\mu_{i+1})q_i}.
\end{equation}
Considering \eqref{eq:bary3}, it is useful to define the $(N+1)\times(N+1)$ Hermitian (``snapshot Gramian'') matrix with entries ($0\leq i,i'\leq N$)
\begin{equation}\label{eq:bary4}
G_{ii'}=\inner{u(\mu_{i'+1})}{u(\mu_{i+1})}.
\end{equation}
So, once again, if $(\star)$ is quadratic (resp. linear) in $[q_i]_{i=0}^N$, then we can cast the computation of the denominator as a quadratically (resp. linearly) constrained quadratic program involving $G$.
Before specifying the kind of normalization enforced, it is important to make a remark on numerical stability. The basis in \eqref{eq:bary1} is actually just a ($i$-dependent) factor away from being the Lagrangian one (for which $\phi_i(\mu_j)$ would equal $\delta_{(i+1)j}$ instead of $\delta_{(i+1)j}\prod_{k\neq i+1}(\mu_j-\mu_k)$ as is does in our case). As such, it is generally a bad idea to numerically evaluate $q$ starting from its expansion coefficients with respect to $\{\phi_i\}_{i=0}^N$. We get around this by exploiting the following trick, whose foundation is in \cite[Section 2.3.3]{Klein}: the roots of $\widehat{q}=\sum_{i=0}^N\widehat{q}_i\phi_i$ are the $N$ finite eigenvalues $\lambda$ of the generalized $(N+2)\times(N+2)$ eigenproblem
\begin{equation}
\textrm{det}\left(\begin{bmatrix}
0 & \widehat{q}_0 & \cdots & \widehat{q}_N\\
1 & \mu_1 & & \\
\vdots & & \ddots & \\
1 & & & \mu_S
\end{bmatrix}-\begin{bmatrix}
0 & & & \\
& 1 & & \\
& & \ddots & \\
& & & 1
\end{bmatrix}\lambda\right)=0.
\end{equation}
This computation is numerically more stable than most manipulations of a polynomial in the basis \eqref{eq:bary1}.
Once the roots of $\widehat{q}$ have been computed, one can either convert it to nodal form \eqref{eq:pole1} or forgo using $\widehat{q}$ completely, in favor of a Heaviside like approximation involving the newly computed roots $\{\widehat{\lambda}_i\}_{i=1}^N$ as poles:
\[
\frac{\widehat{p}(\mu)}{\widehat{q}(\mu)}\quad\rightsquigarrow\quad\sum_{i=1}^N\frac{\widehat{r}_i}{\mu-\widehat{\lambda}_i}+\widetilde{p}(\mu),
\]
with $\widetilde{p}$, e.g., a polynomial (of degree at most $S-N-1$) or a combination of radial basis functions.
\subsection{Quadratic constraint}\label{sec:barycentricnorm}
We constrain $[\widehat{q}_i]_{i=0}^N$ to have unit (Euclidean) norm. The resulting optimization problem can be cast as a minimal (normalized) eigenvector problem for $G$ in \eqref{eq:bary4}. More explicitly,
\[[\widehat{q}_i]_{i=0}^N=\argmin_{\substack{\mathbf{q}\in\C^{N+1}\\\|\mathbf{q}\|_2=1}}\mathbf{q}^HG\mathbf{q}.\]
\subsection{Linear constraint}\label{sec:barycentricaverage}
We constrain $\sum_{i=0}^N\widehat{q}_i=1$, so that the polynomial $\widehat{q}$ is monic. Given $G$ in \eqref{eq:bary4}, the resulting optimization problem can be solved rather easily as:
\[[\widehat{q}_i]_{i=0}^N=\frac{G^{-1}\mathbf{1}_{N+1}}{\mathbf{1}_{N+1}^\top G^{-1}\mathbf{1}_{N+1}},\quad\text{with }\mathbf{1}_{N+1}=[1,\ldots,1]^\top\in\C^{N+1}.\]
\section{Poles as degrees of freedom}\label{sec:nodal}
Here we assume that the sample points are distinct. One can avoid expressing $q$ explicitly and work directly with its roots by employing a nodal form
\begin{equation}\label{eq:pole1}
q(\mu)=\prod_{i=1}^N(\mu-\lambda_i),
\end{equation}
with $\{\lambda_i\}_{i=1}^N\subset\C$. (It is important to note that we are constraining $q$ to have degree exactly $N$.)
The optimization problem that allows to find the desired poles can now be cast as
\[\{\widehat{\lambda}_i\}_{i=1}^N=\argmin_{\{\lambda_i\}_{i=1}^N\subset\C}\norm{\frac{\textrm{d}^N}{\textrm{d}\mu^N}\I^N\left(\Big(\big(\mu_j,u(\mu_j)\prod_{i=1}^N(\mu_j-\lambda_i)\big)\Big)_{j=1}^S\right)},\]
which, if $\I^N$ is expressed via a hierarchical basis, is equivalent to
\begin{equation}\label{eq:pole2}
\{\widehat{\lambda}_i\}_{i=1}^N=\argmin_{\{\lambda_i\}_{i=1}^N\subset\C}\norm{[\underbrace{0,\ldots,0}_{N},1]\big(\Phi^H\Phi\big)^{-1}\Phi^H\Big[u(\mu_j)\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\left[\prod_{i=1}^N(\mu_j-\lambda_i)\right]_{j=1}^S}^2
\end{equation}
(the outer square has been added for later convenience). This problem is nonconvex and highly nonlinear with respect to $\{\lambda_i\}_{i=1}^N\subset\C$, and an iterative method is necessary for its solution.
Two observations aid us here:
\begin{itemize}
\item The target functional has a relatively simple structure, allowing to compute exactly its Jacobian and Hessian with respect to the poles. Explicitly, for all $\{\lambda_i\}_{i=1}^N\subset\C$ and $k=1,\ldots,N$, the partial derivative with respect to $\lambda_k$ at $\{\lambda_i\}_{i=1}^N\subset\C$ equals
\begin{equation}\label{eq:pole3}
-2\inner{\mathbf{y}^H\left[q(\mu_j)\right]_{j=1}^S}{\mathbf{y}^H\left[\frac{q(\mu_j)}{\mu_j-\lambda_k}\right]_{j=1}^S}
\end{equation}
where $q$ is given in \eqref{eq:pole1} and
\[\mathbf{y}=\Big[\overline{u(\mu_j)}\delta_{jj'}\Big]_{j=1,j'=1}^{S,S}\Phi\big(\Phi^H\Phi\big)^{-1}[\underbrace{0,\ldots,0}_{N},1]^\top\in V^S\]
does not change as the iterations proceed. Similarly, for all $\{\lambda_i\}_{i=1}^N\subset\C$ and $k,k'=1,\ldots,N$, the second order partial derivative with respect to $\lambda_k$ and $\lambda_{k'}$ at $\{\lambda_i\}_{i=1}^N\subset\C$ is
\begin{multline}\label{eq:pole4}
2\inner{\mathbf{y}^H\left[\frac{q(\mu_j)}{\mu_j-\lambda_{k'}}\right]_{j=1}^S}{\mathbf{y}^H\left[\frac{q(\mu_j)}{\mu_j-\lambda_k}\right]_{j=1}^S}+\\
+2(1-\delta_{kk'})\inner{\mathbf{y}^H\left[q(\mu_j)\right]_{j=1}^S}{\mathbf{y}^H\left[\frac{q(\mu_j)}{(\mu_j-\lambda_k)(\mu_j-\lambda_{k'})}\right]_{j=1}^S}
\end{multline}
Hence, we can use Newton's method.
\item Due to the iterative nature of the solution strategy, an initial guess of the optimal poles is necessary. Luckily, we can employ any of the other four methods to this aim. More precisely, we apply \code{DOMINANT} as a preliminary step, compute the roots of the resulting $\widehat{q}$, and use them as initial guess for the Newton iterations.
\end{itemize}
As stopping criterion for Newton's method, we use the (relative) norm of the increment:
\[\sum_{i=1}^N\left|\lambda_i^{(\textrm{iter}+1)}-\lambda_i^{(\textrm{iter})}\right|^2\leq\underbrace{\textrm{tolerance}}_{\sim 10^{-10}}\sum_{i=1}^N\left|\lambda_i^{(\textrm{iter}+1)}\right|^2.\]
Since the initial guess is quite good, usually just a few (most often, 1) Newton iterations are necessary.
\section{Minor observations}
\begin{itemize}
\item For \code{BARYCENTRYC\_*}, a specific choice of polynomial basis for $\I^N$ was used to diagonalize the functional. Under the assumptions that the sample points are distinct and that $N=S-1$, one can employ the quasi-Lagrangian basis \eqref{eq:bary1} to expand $\I^N$ in the other approaches as well, thus simplifying significantly the structure of \eqref{eq:glob1}:
\begin{equation*}
\widehat{q}=\argmin_{\substack{q\in\mathbb{P}^N(\C;\C)\\(\star)}}\norm{\sum_{j=1}^Sq(\mu_j)u(\mu_j)\prod_{\substack{j'=1\\j'\neq j}}^S\frac1{\mu_j-\mu_{j'}}}.
\end{equation*}
Numerically, this has repercussions on the computation of the Gramian \eqref{eq:poly3} and of the vector $\mathbf{y}$ in \eqref{eq:pole3}.
\item In general, \code{NORM} and \code{BARYCENTRIC\_NORM} can be expected to be more numerically stable than \code{DOMINANT} and \code{BARYCENTRIC\_AVERAGE}, respectively. This is due to the fact that the normalization is enforced in a more numerically robust fashion.
\item If the snapshots are orthonormalized via \code{POD}\footpath{./rrompy/sampling/engines/sampling_engine_pod.py}, all the $V$-inner products (resp. norms) are recast as Euclidean inner products (resp. norms) involving the $R$ factor of the generalized ($V$-orthonormal) QR decomposition of the snapshots.
\item If a univariate rational surrogate is built in the scope of multivariate pivoted approximation\footpath{./rrompy/reduction_methods/pivoted/*}, the rational approximant is converted into a Heaviside/nodal representation when different surrogates are combined. As such, the \code{BARYCENTRYC\_*} or \code{NODAL} approaches may be preferable to avoid extra computations, as well as additional round-off artifacts.
\end{itemize}
\begin{thebibliography}{1}
\bibitem{Pradovera}%
D.~Pradovera, Interpolatory rational model order reduction of parametric problems lacking uniform inf-sup stability, SIAM J. Numer. Anal. 58 (2020) 2265--2293. doi:10.1137/19M1269695.
\bibitem{Klein}%
G.~Klein, Applications of Linear Barycentric Rational Interpolation, PhD Thesis no. 1762, Universit\'e de Fribourg (2012).
\end{thebibliography}
\end{document}

Event Timeline