appendix.tex
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, Oct 8, 09:21

appendix.tex
View Options

	\section{Proof of Lemma \ref{lem:smoothness}\label{sec:proof of smoothness lemma}}
	\textbf{We assume Hessian exists. We shouldn't assume that for a strictly correct proof!}
	Note that
	\begin{align}
	\mathcal{L}_{\beta}(x,y) = f(x) + \sum_{i=1}^m y_i A_i (x) + \frac{\b}{2} \sum_{i=1}^m (A_i(x))^2,
	\end{align}
	which implies that
	\begin{align}
	\nabla_x \mathcal{L}_\beta(x,y) & = \nabla f(x) + \sum_{i=1}^m y_i \nabla A_i(x) + \frac{\b}{2} \sum_{i=1}^m A_i(x) \nabla A_i(x) \nonumber\\
	& = \nabla f(x) + DA(x)^\top y + \b DA(x)^\top A(x),
	\end{align}
	where $DA(x)$ is the Jacobian of $A$ at $x$. By taking another derivative with respect to $x$, we reach
	\begin{align}
	\nabla^2_x \mathcal{L}_\beta(x,y) & = \nabla^2 f(x) + \sum_{i=1}^m \left( y_i + \b A_i(x) \right) \nabla^2 A_i(x) + \b \sum_{i=1}^m \nabla A_i(x) \nabla A_i(x)^\top.
	\end{align}
	It follows that
	\begin{align}
	\\|\nabla_x^2 \mathcal{L}_\beta(x,y)\\| & \le \\| \nabla^2 f(x) \\|+ \max_i \\| \nabla^2 A_i(x)\\| \left (\\|y\\|_1+\b \\|A(x)\\|_1 \right) + \beta\sum_{i=1}^m \\|\nabla A_i(x)\\|^2 \nonumber\\
	& \le \lambda_h+ \sqrt{m} \lambda_A \left (\\|y\\|+\b \\|A(x)\\| \right) + \b \\|DA(x)\\|^2_F.
	\end{align}
	For every $x$ such that $\\|A(x)\\|\le \rho$ and $\\|x\\|\le \rho'$, we conclude that
	\begin{align}
	\\|\nabla_x^2 \mathcal{L}_\beta(x,y)\\|
	& \le \lambda_f + \sqrt{m}\lambda_A \left(\\|y\\| + \b\rho \right) + \b \max_{\\|x\\|\le \rho'}\\|DA(x)\\|_F^2,
	\end{align}
	which completes the proof of Lemma \ref{lem:smoothness}.

	%\section{Proof of Lemma \ref{lem:11}\label{sec:proof of descent lemma}}
	%
	%Throughout, let
	%\begin{align}
	%G = G_{\b,\g}(x,y) = \frac{x-x^+}{\g},
	%\end{align}
	%for short.
	%Suppose that $\\|A(x)\\|\le \rho$, $\\|x\\|\le \rho$, and similarly $\\|A(x^+)\\|\le \rho$, $\\|x^+\\|\le \rho'$. An application of Lemma \ref{lem:smoothness} yields that
	%\begin{align}
	%\L_\b(x^+,y)+g(x^+) & \le \L_\b(x,y)+ \langle x^+-x,\nabla_x \L_\b(x,y) \rangle
	%+ \frac{\lambda_\b}{2} \\|x^+ - x\\|^2 + g(x^+) \nonumber\\
	%& = \L_\b(x,y)-\g \langle G ,\nabla_x \L_\b(x,y) \rangle
	%+ \frac{\g^2 \lambda_\b }{2} \\|G\\|^2 + g(x^+)
	%\label{eq:descent pr 1}
	%\end{align}
	%Since $x^+ = P_g(x - \g \nabla_x \L_\b(x,y))$, we also have that
	%\begin{align}
	%\g (G - \nabla_x \L_\b(x,y)) = \xi \in \partial g(x^+).
	%\label{eq:opt of prox}
	%\end{align}
	%By combining (\ref{eq:descent pr 1},\ref{eq:opt of prox}), we find that
	%\begin{align}
	%\L_\b(x^+,y)+g(x^+) &
	%\le \L_\b(x,y) -\g \\|G\\|^2 + \g \langle G, \xi \rangle + \frac{\g^2 \lambda_\b}{2}\\|G\\|^2 + g(x^+) \nonumber\\
	%& = \L_\b(x,y) -\g \\|G\\|^2 + \langle x- x^+ , \xi \rangle + \frac{\g^2 \lambda_\b}{2}\\|G\\|^2 + g(x^+) \nonumber\\
	%& \le \L_\b(x,y) + g(x) - \g\left( 1-\frac{\g\lambda_\b}{2}\right) \\|G\\|^2,
	%\end{align}
	%where the last line above uses the convexity of $g$. Recalling that $\g\le 1/\lambda_\b$ completes the proof of Lemma \ref{lem:11}.
	%
	%
	%\section{Proof of Lemma \ref{lem:eval Lipsc cte}\label{sec:proof of linesearch}}
	%
	%
	%Recalling $x^+_{\gamma}$ in \eqref{eq:defn of gamma line search}, we note that
	%\begin{equation}
	%x^+_{\gamma} - x +\gamma \nabla_x \mathcal{L}_\beta(x,y) = -\xi \in -\partial g (x^+_{\gamma}).
	%\label{eq:optimality of uplus}
	%\end{equation}
	%Lastly, $\gamma$ by definition in \eqref{eq:defn of gamma line search} satisfies
	%\begin{align}
	%& \mathcal{L}_{\beta}(x^+_{\gamma},y) + g(x_\g^+) \nonumber\\
	% & \le \mathcal{L}_\beta(x,y) + \g \left\langle
	%x^+_{\gamma} - x , \nabla_x \mathcal{L}_\beta (x,y)
	%\right\rangle + \frac{1}{2\gamma}\\|x^+_{\gamma} - x\\|^2
	%+ g(x_\g^+)
	%\nonumber\\
	%& = \mathcal{L}_\beta(x,y) + \left\langle
	%x - x^+_{\gamma} ,\xi
	%\right\rangle
	%- \frac{1}{2\gamma}\\|x^+_{\gamma} - x\\|^2 + g(x_\g^+)\nonumber\\
	%& \le \mathcal{L}_\beta(x,y)
	%- \frac{1}{2\gamma}\\|x^+_{\gamma} - x\\|^2 + g(x) - g(x^+_\g)
	%\qquad (\text{convexity of } g) \nonumber\\
	%& = \mathcal{L}_\beta(x,y) - \frac{\gamma}{2} \\|G_{\beta,\gamma}(x,y)\\|^2 +g(x) - g(x^+_\g),
	%\qquad \text{(see Definition \ref{def:grad map})}
	%\end{align}
	%which completes the proof of Lemma \ref{lem:eval Lipsc cte}.
	%
	%
	%

appendix.texNo OneTemporaryActions

File Metadata

appendix.texView Options

Event Timeline

appendix.tex
No OneTemporary
Actions

appendix.tex
View Options