Page MenuHomec4science

appendix.tex
No OneTemporary

File Metadata

Created
Tue, Oct 8, 09:21

appendix.tex

\section{Proof of Lemma \ref{lem:smoothness}\label{sec:proof of smoothness lemma}}
\textbf{We assume Hessian exists. We shouldn't assume that for a strictly correct proof!}
Note that
\begin{align}
\mathcal{L}_{\beta}(x,y) = f(x) + \sum_{i=1}^m y_i A_i (x) + \frac{\b}{2} \sum_{i=1}^m (A_i(x))^2,
\end{align}
which implies that
\begin{align}
\nabla_x \mathcal{L}_\beta(x,y) & = \nabla f(x) + \sum_{i=1}^m y_i \nabla A_i(x) + \frac{\b}{2} \sum_{i=1}^m A_i(x) \nabla A_i(x) \nonumber\\
& = \nabla f(x) + DA(x)^\top y + \b DA(x)^\top A(x),
\end{align}
where $DA(x)$ is the Jacobian of $A$ at $x$. By taking another derivative with respect to $x$, we reach
\begin{align}
\nabla^2_x \mathcal{L}_\beta(x,y) & = \nabla^2 f(x) + \sum_{i=1}^m \left( y_i + \b A_i(x) \right) \nabla^2 A_i(x) + \b \sum_{i=1}^m \nabla A_i(x) \nabla A_i(x)^\top.
\end{align}
It follows that
\begin{align}
\|\nabla_x^2 \mathcal{L}_\beta(x,y)\| & \le \| \nabla^2 f(x) \|+ \max_i \| \nabla^2 A_i(x)\| \left (\|y\|_1+\b \|A(x)\|_1 \right) + \beta\sum_{i=1}^m \|\nabla A_i(x)\|^2 \nonumber\\
& \le \lambda_h+ \sqrt{m} \lambda_A \left (\|y\|+\b \|A(x)\| \right) + \b \|DA(x)\|^2_F.
\end{align}
For every $x$ such that $\|A(x)\|\le \rho$ and $\|x\|\le \rho'$, we conclude that
\begin{align}
\|\nabla_x^2 \mathcal{L}_\beta(x,y)\|
& \le \lambda_f + \sqrt{m}\lambda_A \left(\|y\| + \b\rho \right) + \b \max_{\|x\|\le \rho'}\|DA(x)\|_F^2,
\end{align}
which completes the proof of Lemma \ref{lem:smoothness}.
%\section{Proof of Lemma \ref{lem:11}\label{sec:proof of descent lemma}}
%
%Throughout, let
%\begin{align}
%G = G_{\b,\g}(x,y) = \frac{x-x^+}{\g},
%\end{align}
%for short.
%Suppose that $\|A(x)\|\le \rho$, $\|x\|\le \rho$, and similarly $\|A(x^+)\|\le \rho$, $\|x^+\|\le \rho'$. An application of Lemma \ref{lem:smoothness} yields that
%\begin{align}
%\L_\b(x^+,y)+g(x^+) & \le \L_\b(x,y)+ \langle x^+-x,\nabla_x \L_\b(x,y) \rangle
%+ \frac{\lambda_\b}{2} \|x^+ - x\|^2 + g(x^+) \nonumber\\
%& = \L_\b(x,y)-\g \langle G ,\nabla_x \L_\b(x,y) \rangle
%+ \frac{\g^2 \lambda_\b }{2} \|G\|^2 + g(x^+)
%\label{eq:descent pr 1}
%\end{align}
%Since $x^+ = P_g(x - \g \nabla_x \L_\b(x,y))$, we also have that
%\begin{align}
%\g (G - \nabla_x \L_\b(x,y)) = \xi \in \partial g(x^+).
%\label{eq:opt of prox}
%\end{align}
%By combining (\ref{eq:descent pr 1},\ref{eq:opt of prox}), we find that
%\begin{align}
%\L_\b(x^+,y)+g(x^+) &
%\le \L_\b(x,y) -\g \|G\|^2 + \g \langle G, \xi \rangle + \frac{\g^2 \lambda_\b}{2}\|G\|^2 + g(x^+) \nonumber\\
%& = \L_\b(x,y) -\g \|G\|^2 + \langle x- x^+ , \xi \rangle + \frac{\g^2 \lambda_\b}{2}\|G\|^2 + g(x^+) \nonumber\\
%& \le \L_\b(x,y) + g(x) - \g\left( 1-\frac{\g\lambda_\b}{2}\right) \|G\|^2,
%\end{align}
%where the last line above uses the convexity of $g$. Recalling that $\g\le 1/\lambda_\b$ completes the proof of Lemma \ref{lem:11}.
%
%
%\section{Proof of Lemma \ref{lem:eval Lipsc cte}\label{sec:proof of linesearch}}
%
%
%Recalling $x^+_{\gamma}$ in \eqref{eq:defn of gamma line search}, we note that
%\begin{equation}
%x^+_{\gamma} - x +\gamma \nabla_x \mathcal{L}_\beta(x,y) = -\xi \in -\partial g (x^+_{\gamma}).
%\label{eq:optimality of uplus}
%\end{equation}
%Lastly, $\gamma$ by definition in \eqref{eq:defn of gamma line search} satisfies
%\begin{align}
%& \mathcal{L}_{\beta}(x^+_{\gamma},y) + g(x_\g^+) \nonumber\\
% & \le \mathcal{L}_\beta(x,y) + \g \left\langle
%x^+_{\gamma} - x , \nabla_x \mathcal{L}_\beta (x,y)
%\right\rangle + \frac{1}{2\gamma}\|x^+_{\gamma} - x\|^2
%+ g(x_\g^+)
%\nonumber\\
%& = \mathcal{L}_\beta(x,y) + \left\langle
%x - x^+_{\gamma} ,\xi
%\right\rangle
%- \frac{1}{2\gamma}\|x^+_{\gamma} - x\|^2 + g(x_\g^+)\nonumber\\
%& \le \mathcal{L}_\beta(x,y)
%- \frac{1}{2\gamma}\|x^+_{\gamma} - x\|^2 + g(x) - g(x^+_\g)
%\qquad (\text{convexity of } g) \nonumber\\
%& = \mathcal{L}_\beta(x,y) - \frac{\gamma}{2} \|G_{\beta,\gamma}(x,y)\|^2 +g(x) - g(x^+_\g),
%\qquad \text{(see Definition \ref{def:grad map})}
%\end{align}
%which completes the proof of Lemma \ref{lem:eval Lipsc cte}.
%
%
%

Event Timeline