diff --git a/codes/Test_sparse_reg.m b/codes/Test_sparse_reg.m new file mode 100644 index 0000000..4400452 --- /dev/null +++ b/codes/Test_sparse_reg.m @@ -0,0 +1,61 @@ +% TEST Sparse Regression: Main file for solving sparse regression problem +% with inexact augmented lagrangian method. +% minimize ||AA(w) - b||^2 +% subject to: w = z.^2 +% |z|_2 < tau +% +% +% +% Author: Mehmet Fatih Sahin +% Date : 27.12.2018 + + +clearvars +% close all + +vec = @(x)reshape( x, numel( x ), 1 ); +addpath(genpath('./functions/')); +addpath(genpath('./helpers/')); +%% function settings +sparse_reg_load_data(); + +Ahat = [A, -A]; + +%% Initializations + +f_grad_u = @(u, beta_, yc) [Ahat'*(Ahat*u(1:2*d) - b); zeros(2*d,1)] + 1/(beta_)*[u(1:2*d) - u(2*d+1:4*d).^2; -2*(u(1:2*d) - u(2*d+1:4*d).^2).*u(2*d+1:4*d)] + [yc; -2*yc.*u(2*d+1:4*d)]; +f_u = @(u) 1/2*norm(Ahat*u(1:2*d) - b, 'fro')^2; +proj = @(u) [u(1:2*d); u(2*d+1:4*d)*min(1, sqrt(tau)/norm(u(2*d+1:4*d)))]; +get_feas = @(u) u(1:2*d) - u(2*d+1:4*d).^2; +gamma_rule = @(beta_, yc) 1e-2; % rule for the stepsize: later add linesearch for that + +%% Define algorithmic parameters and rules + +beta0 = 1e0; % initial value for the penalty parameter +sigma0 = beta0*1e2; % initial value for dual step size +clearvars update_params +update_params = @(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec) update_params_nonadaptive(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec, sigma0, beta0); + +params.averaging = 0; +params.iters = 1e4;% Number of iterations +params.verbose = 1e3; + + +params.Uinit = 1*rand(4*d, 1); % In case a random initialization is used + +params.ycinit = zeros(2*d,1); +params.savehist = 1; +params.startfeasible = 0; % start from a feasible point +params.gamma = 2; +params.theta = 2; + +sparse_reg_cvx() +% params.x_cvx = x_cvx; + + + %% Run the algorithm +[Udl, output] = linearized_augmented_lagrangian_ls(f_u, f_grad_u, proj, update_params, ... + gamma_rule, get_feas, params); + +%% Plot results +plot_results() diff --git a/codes/functions/linearized_augmented_lagrangian_ls.m b/codes/functions/linearized_augmented_lagrangian_ls.m new file mode 100644 index 0000000..a0192e5 --- /dev/null +++ b/codes/functions/linearized_augmented_lagrangian_ls.m @@ -0,0 +1,168 @@ +function [Ucur, output] = linearized_augmented_lagrangian_ls(f_u, f_grad_u, proj, update_params, ... + gamma_rule, get_feas, params) + +% Non-convex solver for sdp's. This function is used to solve a nonlinear +% constrained nonconvex problem with an inexact augmented lagrangian method. +% min f(u), subject to A(u) = b, u \in \mathcal{C} +% Augmented Lagrangian: +% L(u, beta, yc) = f(u) + beta/2*||A(u)-b||^2 + +% +% INPUT: f_u: Function handle to evaluate the function value +% f_grad_u: Function handle to evaluate the gradient of augmented +% lagrangian. Accepts 3 inputs: u, beta, yc +% proj: Projection operator onto set C +% update_params: Function handle determining the update of +% beta(penalty parameter) and sigma(dual step-size) +% gamma_rule: Function handle determining the step size +% get_feas: Function handle which returns (A(u) - b) +% params: Structure containing parameters for the +% algorithm( maxiters, Uinit, tol, savehist etc.) +% OUTPUT: Ucur: Last iterate of the algorithm +% output: Structure containing the history of the +% iterates(times, feas, obj, grad_mapping etc.) +% +% Author: Mehmet Fatih Sahin +% Date : 28.12.2018 + + + +tol = 1e-15; + +init_hist_track(); +init_iters(); + +tic; + +if (params.verbose) + fprintf('Nonconvex Algorithm running ... \n'); + fprintf('Iter. |Feas |Objective |Grad Mapping \n'); + fprintf('------------------------------------------------------------------\n'); +end; + +current_obj = inf; +prev_obj = inf; + +beta_old = beta_; +sigma_old = sigma_; +k_t = 1; +feas_old = inf; +% gamma_in = gamma_rule(1/beta_, yc); +gamma_in = 1e-5; +x_cvx = params.x_cvx; + +for ii = 1:params.iters + + Uold = Ucur; + + + gradU = f_grad_u(Ucur, 1/beta_, yc); + f_lagrangian = @(u) f_u(u) + beta_/2*norm(get_feas(u),2)^2 + get_feas(u)'*yc; + [Ucur, gamma_out, nfeval] = linesearch(Uold, proj, f_lagrangian, gradU, gamma_in, theta); + + % get the current feasibility(A(u) - b), update sigma and yc + feas_cur = get_feas(Ucur); + + [beta_, sigma_, k_t] = update_params(norm(feas_cur),feas_old, k_t, ii, beta_old, sigma_old, output.sigma, output.beta); + beta_old = beta_; + sigma_old = sigma_; + feas_old = norm(feas_cur); + % update dual + yc = yc + sigma_ * feas_cur; + gamma_ = gamma_out; + gamma_in = gamma_out; + + + % average of the iterates + if params.averaging + Usum = Ucur + Usum; + ysum = yc + ysum; + Uav = 1/(ii+1)*Usum; + ycav = 1/(ii+1)*ysum; + else + Uav = Ucur; + ycav = yc; + end + + + % Stopping criteria + if (abs(current_obj-prev_obj) < tol && feas_old < 1e-6)% || ... + %(abs(f_lagrangian(Ucur) - f_lagrangian(Uold)) < 1e-15) + fprintf('Converged: Breaking...\n') + break; + end + prev_obj = current_obj; + + track_hist(); + verbosity(); +end + +if ii == params.iters + fprintf('Max iters reached: Breaking...\n\n') +end + + + function [] = init_hist_track() + % Initialization + output.times = nan(params.iters, 1); + output.feas = nan(params.iters, 1); + output.obj = nan(params.iters, 1); + output.grad_mapping = nan(params.iters, 1); + output.sigma = nan(params.iters, 1); + output.beta = nan(params.iters, 1); + output.gamma = nan(params.iters, 1); + output.norm_dual = nan(params.iters, 1); + output.lagr = nan(params.iters, 1); + output.dist_ = nan(params.iters, 1); + end + + function [] = track_hist() + % save the history + if (params.savehist == 1) + output.times(ii) = toc; tic; + output.feas(ii) = norm(get_feas(Uav)); + output.obj(ii) = f_u(Uav); + output.grad_mapping(ii) = 1/gamma_^2 * norm(Ucur-Uold, 2)^2; + current_obj = output.obj(ii); + output.sigma(ii) = sigma_; + output.gamma(ii) = gamma_; + output.norm_dual(ii) = norm(yc); + output.beta(ii) = beta_; + output.lagr(ii) = f_lagrangian(Uold); + output.dist_(ii) = norm(Uav(1:size(Uav, 1)/2,1) - x_cvx(1:size(Uav, 1)/2,1), 'fro')^2; + end; + end + + function [] = verbosity() + if (params.verbose) && (mod(ii, params.verbose)==0 || ii == 1) + if ((mod(ii, 10*params.verbose)==0)) + fprintf('Iter. |Feas |Objective |Grad Mapping |Nfeval \n'); + fprintf('------------------------------------------------------------------\n'); + end + str = sprintf('%d %f %f %f %f'... + , ii, output.feas(ii), output.obj(ii), output.grad_mapping(ii), nfeval); + disp(str); + end + end + + function [] = init_iters() + % initialize the iterations + + Ucur = params.Uinit; + Ucur = Ucur./norm(Ucur, 'fro'); + if params.startfeasible + % Start feasible for maxcut + rowN = sqrt(sum(Ucur.^2,2)); + Ucur = Ucur./(rowN*ones(1,size(Ucur,2))); + end + + yc = params.ycinit; % center point to be updated + ysum = yc; + Usum = Ucur; + gamma_ = params.gamma; + theta = params.theta; + + [beta_, sigma_] = update_params(0,0,0,0,0,0,0,0); % set initials + end + + +end \ No newline at end of file diff --git a/codes/helpers/linesearch.m b/codes/helpers/linesearch.m new file mode 100644 index 0000000..1862ae9 --- /dev/null +++ b/codes/helpers/linesearch.m @@ -0,0 +1,59 @@ +function [unext, gamma_out, nfeval] = linesearch(ucur, proj, f_lagrangian, grad_cur, gamma_in, theta) + +% This function is a linesearch routine which calcualtes the next iterate +% with best possible choice of step size depending on the following rule: +% min f(u), subject to A(u) = b, u \in \mathcal{C} +% Augmented Lagrangian: +% L(u, beta, yc) = f(u) + (beta)/2*||A(u)-b||^2 + +% +% INPUT: ucur: Current iterate +% proj: Projection operator onto set C +% f_lagrangian: Callable to calculate lagrangian function value +% grad_cur : Current gradient array in the size of u +% gamma_in: Initial value for starting linesearch routine +% theta: Decrease factor for step size +% OUTPUT: unext: Next iterate +% gamma_out: Step size after linesearch +% nfeval: Number of function evaluations +% +% Author: Mehmet Fatih Sahin +% Date : 28.12.2018 +if gamma_in < 1e-10 + gamma_in = 1e-2; +end + +ls_max = 20; % maximum number of linesearch steps +gamma_ls = gamma_in; +fl2 = 0; fl1 = 0; +for k = 1:ls_max + u_in = proj(ucur - gamma_ls*grad_cur); + if 1% abs(f_lagrangian(u_in) - (f_lagrangian(ucur))) > 1e-15 + if (f_lagrangian(u_in) > (f_lagrangian(ucur) + trace((u_in - ucur)'*grad_cur) ... + + 1/(2*gamma_ls)*norm(u_in - ucur, 2)^2)) + if fl1 == 0 + fl2 = 1; + gamma_ls = gamma_ls/theta; + else + gamma_out = gamma_ls/theta; + nfeval = k; + return; + end + else + if k == 1 || fl2 == 1 + fl1 = 1; + end + gamma_ls = gamma_ls*theta; + unext = u_in; + end + else + gamma_out = gamma_ls; + unext = u_in; + nfeval = k; + return + end +end +nfeval = k; +unext = u_in; +gamma_out = gamma_ls; +end + diff --git a/codes/helpers/sparse_reg/sparse_reg_cvx.m b/codes/helpers/sparse_reg/sparse_reg_cvx.m new file mode 100644 index 0000000..e1e68cc --- /dev/null +++ b/codes/helpers/sparse_reg/sparse_reg_cvx.m @@ -0,0 +1,16 @@ +cvx_begin + variable x_cvx(d) + minimize 1/2*sum_square(A*x_cvx - b) + subject to + norm(x_cvx,1) < tau +cvx_end + +opt_val = cvx_optval; + +tm1 = zeros(size(x_cvx)); +tm1(x_cvx < -1e-10) = x_cvx(x_cvx < -1e-10); +tm2 = zeros(size(x_cvx)); +tm2(x_cvx > 1e-10) = x_cvx(x_cvx > 1e-10); +qq = [tm2; abs(tm1)]; +params.x_cvx = [qq; sqrt(qq)]; + diff --git a/codes/helpers/sparse_reg/sparse_reg_load_data.m b/codes/helpers/sparse_reg/sparse_reg_load_data.m new file mode 100644 index 0000000..1460fef --- /dev/null +++ b/codes/helpers/sparse_reg/sparse_reg_load_data.m @@ -0,0 +1,11 @@ +n = 200; % number of data points +d = 100; % dimension +k = 100; % sparsity level + +x = zeros(d,1); +x(randperm(d,k)) = randn(k,1); + +A = randn(n, d); +b = A*x; + +tau = 0.9*norm(x,1); \ No newline at end of file