ADMM.tex
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Oct 5, 20:34

ADMM.tex
View Options

	\section{ADMM}

	To solve the equivalent formulation presented in Program \eqref{eq:minmax}, we first propose LinAL, an linearized augmented Lagrangian algorithm, detailed in Algorithm~\ref{Algo:2}.
	\begin{algorithm}
	\label{Algo:2}
	Input: Parameters $\beta_1,\s_1,\rho,\rho',\rho'',\tau> 0$, initialization $x_{1}\in \RR^d$ with $\\|A(x_1)\\|\le \rho$ and $\\|x_1\\|\le \rho'$, dual initialization $y_0\in \RR^m$.
	\\

	For $k=1,2,\ldots$ and until convergence, execute\\
	\begin{enumerate}

	\item \textbf{(Update penalty weight)} Set
	\begin{align}
	\beta_k = \frac{\beta_{1}}{\log 2}\sqrt{k \log^2(k+1)} .
	\end{align}
	\item \textbf{(Line search)} Use the line search procedure in \eqref{eq:defn of gamma line search} with $x=x_k,y=y_k,\b=\b_k$ and let $\g_k = \g$ be the output.\\

	\item \textbf{(Primal descent step)} Set $ x_{k+1} = P_{g }(x_{k} - \gamma_{k} \nabla \mathcal{L}_{\beta_{k}}(x_{k},y_{k}))$, where $\L_{\b_k}$ is the augmented Lagrangian and $P_g$ denotes the proximal operator, defined in (\ref{eq:Lagrangian},\ref{eq:defn of prox}), respectively.\\
	\item \textbf{(Control)} Properly modify $x_{k+1}$ to ensure that $\\|x_{k+1}\\|\le \rho'$ or $\\|x_{k+1}-x_k\\|\le \rho''$.\\
	\item \textbf{(Stopping criterion)} If $\g_k \\|G_{\b_k,\g_k}(x_k,y_k)\\|^2 + \s_k \\|A(x_k)\\|^2 \le \tau$, then quit and return $x_{k+1}$ as an approximate minimizer of Program \eqref{prob:01}. See \eqref{eq:grad map} for the definition of the gradient mapping. \\
	\item \textbf{(Update dual step size)} Set
	\begin{align}
	\s_{k+1} = \s_{1} \min\left( {\frac{1}{\sqrt{k}}}, \frac{\\|A(x_1)\\| \log^2 2 }{\\|A(x_k)\\|k\log^2(k+1)} \right).
	\end{align}\\
	\item \textbf{(Dual ascent step)} Set $y_{k+1} = y_{k} + \sigma_{k+1}A(x_{k+1})$.
	\end{enumerate}
	\caption{LinAL for solving Program \eqref{prob:01}}
	\end{algorithm}







	Slater's condition plays a key role in convex optimization and we require a similar condition for the success of our algorithm.
	\begin{definition}\label{defn:nonconvex slater} \textbf{(Nonconvex Slater's condition)}
	Consider an interval $K=[k_0:k_1]$ and a sequence $\{x_k\}_{k=k_0}^{k_1}\subset \RR^d$. Let $S_K\subset \RR^d$ be a subspace such that
	\begin{align}
	S _{K} \supseteq \bigcup_{k\in K} P_{\partial g^*(x_{k+1})}\left( DA(x_{k+1})^\top A(x_{k+1}) \right),
	\label{eq:defn of S defn}
	\end{align}
	and, to simplify the notation, let $S_{K}$ also denote an orthonormal basis for this subspace.
	For $\rho,\rho'>0$, we set
	\begin{align}
	\nu_A :=
	\begin{cases}
	\underset{v,x}{\min} \, \frac{\left\\| S_{K}^\top P_{\partial g^*(x)} ( DA(x)^\top v) \right\\|}{\\|v\\|} \\
	\\|v\\|\le \rho\\
	\\|x\\|\le \rho',
	\end{cases}
	\label{eq:new slater defn}
	\end{align}
	and say that Program (\ref{prob:01}) satisfies the nonconvex Slater's condition if $\nu_A >0$.
	\end{definition}
	To gain some insight about Definition \ref{defn:nonconvex slater}, suppose that $f:\RR^d\rightarrow\RR$ is convex. Consider a nonempty and bounded convex set $C\subseteq\RR^d$ and let us restrict ourselves to the choice of $g = 1_C:\RR^d\rightarrow\RR$, which takes $x\in \RR^d$ to
	\begin{align}
	g(x) = 1_C(x) =
	\begin{cases}
	0 & x \in C\\
	\infty & x\notin C.
	\end{cases}
	\label{eq:indicator}
	\end{align}
	We also let $T_C(x)$ denote the tangent cone to $C$ at $x$, and $P_{T_C(x)}:\RR^d\rightarrow\RR^d$ denotes the orthogonal projection onto this tangent cone.
	We also set $S_K = \RR^d$ to simplify the discussion. Then, \eqref{eq:new slater defn} can be written as
	\begin{align}
	\nu_A :=
	\begin{cases}
	\underset{v,x}{\min} \, \frac{\left\\| P_{T_C(x)} A^\top v \right\\|}{\\|v\\|} \\
	\\|v\\|\le \rho\\
	\\|x\\|\le \rho'
	\end{cases}
	= \begin{cases}
	\underset{x}{\min} \, \eta_{m}\left( P_{T_C(x)} A^\top \right) \\
	\\|x\\|\le \rho',
	\end{cases}
	\label{eq:nu for cvx}
	\end{align}
	where $\eta_{m}$ returns the $m$th largest singular value of its input. The nonconvex Slater condition here, namely, $\nu_A>0$, is closely related to the standard Slater's condition in convex optimization.
	\begin{proposition}\label{prop:convex}
	In Program (\ref{prob:01}), suppose that $f$ is a convex function, $g$ is as in (\ref{eq:indicator}), and $A:\RR^d\rightarrow\RR^m$ a linear operator, represented with the matrix $A\in \RR^{m\times d}$. Assume also that Program (\ref{prob:01}) is feasible, namely, there exists $x\in C$ such that $Ax=0$. In (\ref{eq:nu for cvx}), let us take $S_K =\RR^d$. Then the standard Slater's condition holds if the nonconvex Slater's condition ($\nu_A>0$) holds.
	\end{proposition}
	\begin{proof}
	Suppose that the standard Slater's condition does not hold, namely, that
	\begin{equation}
	\relint(\Null(A) \cap C) = \Null(A)\cap \relint(C) = \emptyset,
	\label{eq:no feas}
	\end{equation}
	where $\Null(A)$ and $\relint(C)$ denote the null space of the matrix $A$ and the relative interior of $C$, respectively.
	By assumption, there exists $x_0\in C$ such that $Ax_0=0$. It follows from \eqref{eq:no feas} that $x_0\in \text{boundary}(C)$ and that $\Null(A)$ supports $C$ at $x_0$, namely,
	$A x_0\ge 0$, for every $x_0\in C$.
	Consequently, $\Null(A) \cap T_C(x) \ne \{0\}$, where $T_C(x_0)$ is the tangent cone of the set $C$ at $x_0$.
	Equivalently, it holds that
	$\row(A)\cap N_C(u) \ne \{0\}$, where $\row(A)$ is the row space of the matrix $A$. That is, there exists a unit-norm vector $v$ such that
	$P_{T_C(x_0)}A^\top v=0$,
	and, consequently,
	$
	\eta_{m}(P_{T_C(x_0)}A^\top) = 0
	$.
	Let us take $\rho'=\\|x_0\\|$ in \eqref{eq:nu for cvx}. We then conclude that $\nu_A=0$, namely, the nonconvex Slater's condition also does not hold, thereby completing the proof of Proposition \ref{prop:convex}.\hfill \qed
	\end{proof}


	The nonconvex Slater's condition is necessary for the success of Algorithm~\ref{Algo:2}. \textbf{Please add and discuss the convex visualization discussed with Volkan.}




	\textbf{We should add a discussion of the result.}




	\begin{theorem}[LinAL]
	\label{thm:main}
	For a sufficiently large $k_0$, consider an integer interval $K=[k_0:k_1]$, and suppose that the nonconvex Slater's condition, namely, Lemma \ref{lem:bnd bnd Ak}, is in force. Suppose also that
	\begin{align}
	\inf_{k\ge k_0} f(x_k) + g(x_k) + \langle A(x_k) ,y_{k_0-1}\rangle
	>- \infty,
	\end{align}
	and that $\rho$ is large enough such that (\ref{eq:suff cnd on rho final}) holds . For $\rho'>0$, in addition to the strong smoothness of $f$ and $A$ quantified in (\ref{eq:smoothness basic}), let us define
	\begin{align}
	\lambda'_f = \max_{\\|x\\|\le \rho'} \\|\nabla f(x)\\|,
	\qquad
	\lambda'_A = \max_{\\|x\\| \le \rho'} \\|DA(x)\\|,
	\end{align}
	to be the (restricted) Lipschitz constants of $f$ and $A$, respectively.
	Moreover, suppose that
	\begin{align}
	\rho'' \le \frac{2\nu_A}{\lambda_A}.
	\end{align}
	Then the output of LinAL satisfies
	\begin{align}
	\min_{k\in K}\overline{\g} \\|G_k\\|^2 \sqrt{\frac{k_0 \log^2(k_0+1)}{k_1\log^2(k_1+1)}} + \\|A(x_k)\\|^2
	\lesssim \frac{1}{k_1-k_0},
	\end{align}
	where $\overline{\gamma}$ is independent of $k_1$ and $\lesssim$ suppresses the dependence on all parameters except $k_1$. The exact expressions in terms of $\lambda_f,\lambda'_f,\lambda_A,\lambda'_A, \b_{k_0},\s_{k_0}, \nu_A,y_0$ are found in (\ref{eq:low bnd on gammas},\ref{eq:exact dependences}).

	\end{theorem}

ADMM.texNo OneTemporaryActions

File Metadata

ADMM.texView Options

Event Timeline

ADMM.tex
No OneTemporary
Actions

ADMM.tex
View Options