diff --git a/codes/Test_clustering.m b/codes/Test_clustering.m index 314d55f..8b4d52a 100644 --- a/codes/Test_clustering.m +++ b/codes/Test_clustering.m @@ -1,66 +1,67 @@ % TEST CLUSTERING: Main test file for solving the clustering problem: % minimize < C,X > % subject to: Trace(X) = k % X positive-semi-definite % sum(X,2) = ones % X elementwise nonnegative % % SDP relaxation of clustering problem is solved with the preprocessed % data of MNIST digits[1]. % TODO(mfsahin): Add also synthetic data option and other real data options as % shown in [2] % % [1]: D. G. Mixon, S. Villar, and R. Ward. Clustering sub- gaussian % mixtures by semidefinite programming. In- formation and Inference: % A Journal of the IMA, 6 (4):389?415, 2017 % [2]: Mariano Tepper, Anirvan M. Sengupta, and Dmitri B. Chklovskii, % The surprising secret identity of the semidefinite relaxation of % k-means: manifold learning, CoRR abs/1706.06028 (2017) % % Author: Mehmet Fatih Sahin % Date : 02.11.2018 clearvars close all vec = @(x)reshape( x, numel( x ), 1 ); addpath(genpath('./helpers/')); addpath(genpath('./functions/')); %% clustering_load_data(); % NOTE(mfsahin): we might try normalizing C normC = normest(C); %% function % TODO(mfsahin): remove Aef2(u) - b2 condition since it is redundant. Aef = @(U) U*sum(U, 1)'; Aef2 = @(U) sum(U,1)*U'; r = 20; f_grad_u = @(U, beta_, yc) C*U + 1/beta_*( repmat(U*sum(U, 1)' - b, [1, r]).*repmat(sum(U,1), [N, 1]) + repmat((sum(U,1)*U' -b2)*U, [N,1]))... + repmat(yc(1:size(b,1)), [1, r]).*repmat(sum(U,1), [N, 1]) + repmat(yc(size(b,1)+1:2*size(b,1))'*U, [N,1]); f_u = @(u) sum(vec(u .* (C*u))); proj = @(u) clustering_proj(u, k); get_feas = @(u) [Aef(u) - b; (Aef2(u)-b2)']; gamma_rule = @(beta_, yc) .5*beta_/(2*N + beta_*normC + sqrt(2*N)*norm(beta_*yc-1)); %% Define algorithmic parameters and rules -beta0 = 1e-1; % initial value for the penalty parameter +beta0 = 1e-2; % initial value for the penalty parameter sigma0 = 1e2*beta0; % initial value for dual step size +%% -update_params = @(feas_cur, iter, beta_old, sigma_old) update_params(feas_cur, iter, beta_old, sigma_old, sigma0, beta0); +update_params = @(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec) update_params(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec, sigma0, beta0); params.averaging = 0; params.iters = 1e5;% Number of iterations params.verbose = 1e2; params.Uinit = randn(N, r); % In case a random initialization is used params.ycinit = zeros(2*size(b,1),1); params.savehist = 1; params.startfeasible = 0; % this option is for maxcut %% Run the algorithm [Udl, output] = linearized_augmented_lagrangian(f_u, f_grad_u, proj, update_params, ... gamma_rule, get_feas, params); %% Plot results opt_val = 77.190856515019890; plot_results(); \ No newline at end of file diff --git a/codes/Test_maxcut.m b/codes/Test_maxcut.m index eebd6ee..73be32c 100644 --- a/codes/Test_maxcut.m +++ b/codes/Test_maxcut.m @@ -1,62 +1,62 @@ % TEST MAXCUT: Main file for solving sdp relaxation of maxcut problem using % inexact augmented lagrangian method. % minimize % subject to: diag(X) = n % X positive-semi-definite % % Data: You may download the data from Gset-Uf Sparse Dateset Collection % The graph must be symmetric. % % Author: Mehmet Fatih Sahin % Date : 02.11.2018 clearvars close all vec = @(x)reshape( x, numel( x ), 1 ); addpath(genpath('./functions/')); addpath(genpath('./helpers/')); %% function settings dataNum = 2; % default data number if no input userinput = 1; % if enabled data is selected from the command line maxcut_load_data(); normA = 1; normC = normest(C); %% Initializations fprintf('Data size: %dx%d \n',n,n) f_grad_u = @(u, beta, yc) (1/beta).*bsxfun(@times, u, A_id(u, u) - y) + C*u + bsxfun(@times, u, yc); f_u = @(u) sum(vec(u .* (C*u))); proj = @(u) u; get_feas = @(u) A_id(u, u) - y; gamma_rule = @(beta_, yc) beta_/(2*beta_*normC +2*normA^2 + 2*norm((beta_*yc-y))); % rule for the stepsize: later add linesearch for that %% Define algorithmic parameters and rules r = 20; % target rank: you may set it sqrt(n) -beta0 = normC/5e0; % initial value for the penalty parameter -sigma0 = beta0*1e0; % initial value for dual step size +beta0 = normC/5e1; % initial value for the penalty parameter +sigma0 = beta0*1e2; % initial value for dual step size -update_params = @(feas_cur, iter, beta_old, sigma_old) update_params(feas_cur, iter, beta_old, sigma_old, sigma0, beta0); +update_params = @(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec) update_params(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec, sigma0, beta0); params.averaging = 0; -params.iters = 1e4;% Number of iterations +params.iters = 1e5;% Number of iterations params.verbose = 1e3; params.Uinit = rand(n, r); % In case a random initialization is used params.ycinit = zeros(size(y)); params.savehist = 1; params.startfeasible = 1; % start from a feasible point %% Run the algorithm [Udl, output] = linearized_augmented_lagrangian(f_u, f_grad_u, proj, update_params, ... gamma_rule, get_feas, params); %% Plot results maxcut_solve_cvx() plot_results() diff --git a/codes/functions/linearized_augmented_lagrangian.m b/codes/functions/linearized_augmented_lagrangian.m index b66234c..1362ec5 100644 --- a/codes/functions/linearized_augmented_lagrangian.m +++ b/codes/functions/linearized_augmented_lagrangian.m @@ -1,158 +1,161 @@ function [Ucur, output] = linearized_augmented_lagrangian(f_u, f_grad_u, proj, update_params, ... gamma_rule, get_feas, params) % Non-convex solver for sdp's. This function is used to solve a nonlinear % constrained nonconvex problem with an inexact augmented lagrangian method. % min f(u), subject to A(u) = b, u \in \mathcal{C} % Augmented Lagrangian: % L(u, beta, yc) = f(u) + 1/(2*beta)||A(u)-b||^2 + % % INPUT: f_u: function handle to evaluate the function value % f_grad_u: function handle to evaluate the gradient of augmented % lagrangian. Accepts 3 inputs: u, beta, yc % proj: Projection operator onto set C % update_params: function handle determining the update of % beta(penalty parameter) and sigma(dual step-size) % gamma_rule: function handle determining the step size % get_feas: function handle which returns (A(u) - b) % params: structure containing parameters for the % algorithm( maxiters, Uinit, tol, savehist etc.) % OUTPUT: Ucur: Last iterate of the algorithm % output: structure containing the history of the % iterates(times, feas, obj, grad_mapping etc.) % % Author: Mehmet Fatih Sahin % Date : 02.11.2018 % Update: 09.11.2018 (mfsahin) adaptive choice of sigma and beta tol = 1e-15; init_hist_track(); init_iters(); tic; if (params.verbose) fprintf('Nonconvex Algorithm running ... \n'); fprintf('Iter. |Feas |Objective |Grad Mapping \n'); fprintf('------------------------------------------------------------------\n'); end; current_obj = inf; prev_obj = inf; beta_old = beta_; sigma_old = sigma_; +k_t = 1; +feas_old = inf; for ii = 1:params.iters % TODO(mfsahin): add line search here gamma = gamma_rule(1/beta_, yc); Uold = Ucur; gradU = f_grad_u(Ucur, 1/beta_, yc); Ucur = Ucur - gamma * gradU; % projection onto set C Ucur = proj(Ucur); % get the current feasibility(A(u) - b), update sigma and yc feas_cur = get_feas(Ucur); - [beta_, sigma_] = update_params(norm(feas_cur), ii, beta_old, sigma_old); + [beta_, sigma_, k_t] = update_params(norm(feas_cur),feas_old, k_t, ii, beta_old, sigma_old, output.sigma, output.beta); beta_old = beta_; sigma_old = sigma_; - + feas_old = norm(feas_cur); % update dual yc = yc + sigma_ * feas_cur; % average of the iterates if params.averaging Usum = Ucur + Usum; ysum = yc + ysum; Uav = 1/(ii+1)*Usum; ycav = 1/(ii+1)*ysum; else Uav = Ucur; ycav = yc; end if abs(current_obj-prev_obj) < tol fprintf('Converged: Breaking...\n') break; end prev_obj = current_obj; track_hist(); verbosity(); end if ii == params.iters fprintf('Max iters reached: Breaking...\n\n') end function [] = init_hist_track() % Initialization output.times = nan(params.iters, 1); output.feas = nan(params.iters, 1); output.obj = nan(params.iters, 1); output.grad_mapping = nan(params.iters, 1); output.sigma = nan(params.iters, 1); + output.beta = nan(params.iters, 1); output.gamma = nan(params.iters, 1); output.norm_dual = nan(params.iters, 1); end function [] = track_hist() % save the history if (params.savehist == 1) output.times(ii) = toc; tic; output.feas(ii) = norm(get_feas(Uav)); output.obj(ii) = f_u(Uav); output.grad_mapping(ii) = 1/gamma^2 * norm(Ucur-Uold, 2)^2; current_obj = output.obj(ii); output.sigma(ii) = sigma_; output.gamma(ii) = gamma; output.norm_dual(ii) = norm(yc); output.beta(ii) = beta_; end; end function [] = verbosity() if (params.verbose) && (mod(ii, params.verbose)==0 || ii == 1) if ((mod(ii, 10*params.verbose)==0)) fprintf('Iter. |Feas |Objective |Grad Mapping \n'); fprintf('------------------------------------------------------------------\n'); end str = sprintf('%d %f %f %f '... %f'... , ii, output.feas(ii), output.obj(ii), output.grad_mapping(ii));%, norm_gf(ii)); disp(str); end end function [] = init_iters() % initialize the iterations Ucur = params.Uinit; Ucur = Ucur./norm(Ucur, 'fro'); if params.startfeasible % Start feasible for maxcut rowN = sqrt(sum(Ucur.^2,2)); Ucur = Ucur./(rowN*ones(1,size(Ucur,2))); end yc = params.ycinit; % center point to be updated ysum = yc; Usum = Ucur; - [beta_, sigma_] = update_params(0,0,0,0); % set initials + [beta_, sigma_] = update_params(0,0,0,0,0,0,0,0); % set initials end end \ No newline at end of file diff --git a/codes/helpers/plot_results.m b/codes/helpers/plot_results.m index f3fa952..9cd3acd 100644 --- a/codes/helpers/plot_results.m +++ b/codes/helpers/plot_results.m @@ -1,39 +1,39 @@ % Plotting function font_size = 20; lw = 2; figure('Position',[100,100,1140,270]), subplot(1,3,1) loglog(output.feas,'--r', 'Linewidth', lw), hold on, grid on title('Feasibility Gap','Interpreter', 'latex', 'FontSize', font_size); xlabel('iters', 'Interpreter', 'latex', 'FontSize', font_size); ylabel('$||A(u)-b||$', 'Interpreter', 'latex', 'FontSize', font_size); ax = gca; ax.XTick = 10.^(-20:10); subplot(1,3,2) loglog(output.grad_mapping, '--r', 'Linewidth', lw) hold on, grid on title('Gradient Mapping','Interpreter', 'latex', 'FontSize', font_size); xlabel('iters', 'Interpreter', 'latex', 'FontSize', font_size); ylabel('$|G(\gamma_k, u_k)|^2$', 'Interpreter', 'latex', 'FontSize', font_size); ax = gca; ax.XTick = 10.^(-20:10); subplot(1,3,3) loglog(abs(output.obj-opt_val)/abs(opt_val), '--r', 'Linewidth', lw) hold on, grid on title('Relative Objective Residual','Interpreter', 'latex', 'FontSize', font_size); xlabel('iters', 'Interpreter', 'latex', 'FontSize', font_size); ylabel('$\frac{ |f(u)-f^\star| }{ |f^\star| }$', 'Interpreter', 'latex', 'FontSize', font_size); ax = gca; ax.XTick = 10.^(-20:10); figure;loglog(output.beta,'r','Linewidth', lw);grid on;hold on;loglog(output.sigma, 'k','Linewidth', lw) -legend({'$\beta_{k}$', '$\sigma_{k}$'}, 'Interpreter', 'Latex', 'FontSize', font_size, 'Location', 'southeast') \ No newline at end of file +legend({'$\beta_{k}$', '$\sigma_{k}$'}, 'Interpreter', 'Latex', 'FontSize', font_size, 'Location', 'best') \ No newline at end of file diff --git a/codes/helpers/update_params.m b/codes/helpers/update_params.m index cdadda7..0a1e727 100644 --- a/codes/helpers/update_params.m +++ b/codes/helpers/update_params.m @@ -1,36 +1,44 @@ -function [beta_, sigma_] = update_params(feas_cur, iter, beta_old, sigma_old, sigma_0, beta_0) +function [beta_, sigma_, k_t] = update_params(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec, sigma_0, beta_0) % This function computes the parameters for the new iteration depending on % the predefined adaptive rule % INPUTS: feas_cur: current feasibility value % iter : current iteration number % sigma_0 : initial value for the dual step size % beta_0 : initial value for the penalty parameter % OUTPUTS: beta_ : updated penalty parameter % sigma_ : updated dual step size % % Author: Mehmet Fatih Sahin % Date: 09.11.2018 if iter<2 beta_ = beta_0; sigma_ = sigma_0; return end -if feas_cur > sigma_0/sqrt(iter) +if feas_cur > beta_0/sqrt(iter) beta_ = beta_old*sqrt(iter/(iter-1)); else beta_ = beta_old; end - +if feas_old <= sigma_0/sqrt(iter) && feas_cur > sigma_0/sqrt(iter) + k_t = iter; +end + if feas_cur <= sigma_0/iter sigma_ = sigma_old; elseif sigma_0/iter < feas_cur && feas_cur<=sigma_0/sqrt(iter) sigma_ = sigma_old*sqrt((iter-1)/iter); else - sigma_ = sigma_0/beta_0*(beta_-beta_old); +% sigma_ = sigma_0/beta_0*(beta_-beta_old); + if k_t == iter || k_t == 1 + sigma_ = sigma_old; + else + sigma_ = sigma_vec(k_t)/(beta_vec(k_t)-beta_vec(k_t-1))*(beta_-beta_old); + end end end \ No newline at end of file