diff --git a/codes/Test_clustering.m b/codes/Test_clustering.m
index 314d55f..8b4d52a 100644
--- a/codes/Test_clustering.m
+++ b/codes/Test_clustering.m
@@ -1,66 +1,67 @@
 % TEST CLUSTERING:  Main test file for solving the clustering problem:
 %                       minimize        < C,X >
 %                       subject to:     Trace(X) = k
 %                                       X positive-semi-definite
 %                                       sum(X,2) = ones
 %                                       X elementwise nonnegative
 %
 % SDP relaxation of clustering problem is solved with the preprocessed
 % data of MNIST digits[1]. 
 % TODO(mfsahin): Add also synthetic data option and other real data options as
 %                shown in [2]
 %
 % [1]: D. G. Mixon, S. Villar, and R. Ward. Clustering sub- gaussian
 %      mixtures by semidefinite programming. In- formation and Inference:
 %      A Journal of the IMA, 6 (4):389?415, 2017
 % [2]: Mariano Tepper, Anirvan M. Sengupta, and Dmitri B. Chklovskii,
 %      The surprising secret identity of the semidefinite relaxation of       
 %      k-means: manifold learning, CoRR abs/1706.06028 (2017)
 % 
 % Author: Mehmet Fatih Sahin
 % Date  : 02.11.2018
 
 clearvars
 close all
 
 vec = @(x)reshape( x, numel( x ), 1 );
 addpath(genpath('./helpers/'));
 addpath(genpath('./functions/'));
 %%
 clustering_load_data();
 % NOTE(mfsahin): we might try normalizing C
 normC = normest(C);
 %% function 
 % TODO(mfsahin): remove Aef2(u) - b2 condition since it is redundant.
 Aef = @(U) U*sum(U, 1)';
 Aef2 = @(U) sum(U,1)*U';
 
 r = 20;
 
 f_grad_u = @(U, beta_, yc) C*U + 1/beta_*( repmat(U*sum(U, 1)' - b, [1, r]).*repmat(sum(U,1), [N, 1]) + repmat((sum(U,1)*U' -b2)*U, [N,1]))...
         + repmat(yc(1:size(b,1)), [1, r]).*repmat(sum(U,1), [N, 1]) + repmat(yc(size(b,1)+1:2*size(b,1))'*U, [N,1]);
 f_u = @(u) sum(vec(u .* (C*u)));
 proj = @(u) clustering_proj(u, k);
 get_feas = @(u) [Aef(u) - b; (Aef2(u)-b2)'];
 gamma_rule = @(beta_, yc) .5*beta_/(2*N + beta_*normC + sqrt(2*N)*norm(beta_*yc-1));
 
 %% Define algorithmic parameters and rules
-beta0 = 1e-1; % initial value for the penalty parameter
+beta0 = 1e-2; % initial value for the penalty parameter
 sigma0 = 1e2*beta0; % initial value for dual step size
+%%
 
-update_params = @(feas_cur, iter, beta_old, sigma_old) update_params(feas_cur, iter, beta_old, sigma_old, sigma0, beta0);
+update_params = @(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec) update_params(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec, sigma0, beta0);
 
 params.averaging = 0;
 params.iters = 1e5;% Number of iterations
 params.verbose = 1e2;
 
 params.Uinit = randn(N, r);     % In case a random initialization is used
 params.ycinit = zeros(2*size(b,1),1);
 params.savehist = 1;
 params.startfeasible = 0; % this option is for maxcut
 %% Run the algorithm
 [Udl, output] = linearized_augmented_lagrangian(f_u, f_grad_u, proj, update_params, ...
     gamma_rule, get_feas, params);
 %% Plot results
 opt_val = 77.190856515019890;
 plot_results();
\ No newline at end of file
diff --git a/codes/Test_maxcut.m b/codes/Test_maxcut.m
index eebd6ee..73be32c 100644
--- a/codes/Test_maxcut.m
+++ b/codes/Test_maxcut.m
@@ -1,62 +1,62 @@
 % TEST MAXCUT: Main file for solving sdp relaxation of maxcut problem using
 %              inexact augmented lagrangian method.
 %                      minimize        <C, X>
 %                      subject to:     diag(X) = n
 %                                      X positive-semi-definite
 % 
 % Data: You may download the data from <a href="matlab: 
 % web('https://www.cise.ufl.edu/research/sparse/matrices/Gset/index.html')">Gset-Uf Sparse Dateset Collection</a>
 % The graph must be symmetric. 
 % 
 % Author: Mehmet Fatih Sahin
 % Date  : 02.11.2018
 
 
 clearvars
 close all
 
 vec = @(x)reshape( x, numel( x ), 1 );
 addpath(genpath('./functions/'));
 addpath(genpath('./helpers/'));
 %% function settings 
 dataNum = 2; % default data number if no input
 userinput = 1; % if enabled data is selected from the command line
 
 maxcut_load_data();
 
 normA = 1;
 normC = normest(C);
 
 %% Initializations
 fprintf('Data size: %dx%d \n',n,n)
 
 f_grad_u = @(u, beta, yc) (1/beta).*bsxfun(@times, u, A_id(u, u) - y) + C*u + bsxfun(@times, u, yc);
 f_u = @(u) sum(vec(u .* (C*u)));
 proj = @(u) u;
 get_feas = @(u) A_id(u, u) - y;
 gamma_rule = @(beta_, yc) beta_/(2*beta_*normC +2*normA^2 + 2*norm((beta_*yc-y))); % rule for the stepsize: later add linesearch for that
 
 %% Define algorithmic parameters and rules
 r = 20; % target rank: you may set it sqrt(n)
 
-beta0 = normC/5e0; % initial value for the penalty parameter
-sigma0 = beta0*1e0; % initial value for dual step size
+beta0 = normC/5e1; % initial value for the penalty parameter
+sigma0 = beta0*1e2; % initial value for dual step size
 
-update_params = @(feas_cur, iter, beta_old, sigma_old) update_params(feas_cur, iter, beta_old, sigma_old, sigma0, beta0);
+update_params = @(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec) update_params(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec, sigma0, beta0);
 
 params.averaging = 0;
-params.iters = 1e4;% Number of iterations
+params.iters = 1e5;% Number of iterations
 params.verbose = 1e3;
 
 params.Uinit = rand(n, r);     % In case a random initialization is used
 params.ycinit = zeros(size(y));
 params.savehist = 1;
 params.startfeasible = 1; % start from a feasible point
 
 %% Run the algorithm
 [Udl, output] = linearized_augmented_lagrangian(f_u, f_grad_u, proj, update_params, ...
     gamma_rule, get_feas, params);
 
 %% Plot results
 maxcut_solve_cvx()
 plot_results()
diff --git a/codes/functions/linearized_augmented_lagrangian.m b/codes/functions/linearized_augmented_lagrangian.m
index b66234c..1362ec5 100644
--- a/codes/functions/linearized_augmented_lagrangian.m
+++ b/codes/functions/linearized_augmented_lagrangian.m
@@ -1,158 +1,161 @@
 function [Ucur, output] = linearized_augmented_lagrangian(f_u, f_grad_u, proj, update_params, ...
     gamma_rule, get_feas, params)
 
 % Non-convex solver for sdp's. This function is used to solve a nonlinear
 % constrained nonconvex problem with an inexact augmented lagrangian method.
 %               min f(u), subject to A(u) = b, u \in \mathcal{C}
 % Augmented Lagrangian: 
 %           L(u, beta, yc) = f(u) + 1/(2*beta)||A(u)-b||^2 + <yc, b-A(u)>
 %
 % INPUT:  f_u:           function handle to evaluate the function value
 %         f_grad_u:      function handle to evaluate the gradient of augmented
 %                        lagrangian. Accepts 3 inputs: u, beta, yc
 %         proj:          Projection operator onto set C
 %         update_params: function handle determining the update of
 %                        beta(penalty parameter) and sigma(dual step-size)
 %         gamma_rule:    function handle determining the step size
 %         get_feas:      function handle which returns (A(u) - b)
 %         params:        structure containing parameters for the
 %                        algorithm( maxiters, Uinit, tol, savehist etc.)
 % OUTPUT: Ucur:       Last iterate of the algorithm
 %         output:     structure containing the history of the
 %                       iterates(times, feas, obj, grad_mapping etc.) 
 %
 % Author: Mehmet Fatih Sahin
 % Date  : 02.11.2018
 % Update: 09.11.2018 (mfsahin) adaptive choice of sigma and beta
 
 
 
 tol = 1e-15;
 
 init_hist_track();
 init_iters();
 
 tic;
 
 if (params.verbose)
     fprintf('Nonconvex Algorithm running ... \n');
     fprintf('Iter.            |Feas            |Objective             |Grad Mapping \n');
     fprintf('------------------------------------------------------------------\n');
 end;
 
 current_obj = inf;
 prev_obj = inf;
 
 beta_old = beta_;
 sigma_old = sigma_;
+k_t = 1;
+feas_old = inf;
 
 for ii = 1:params.iters
     
     
     % TODO(mfsahin): add line search here
     gamma = gamma_rule(1/beta_, yc);
     
     Uold = Ucur; 
 
     gradU    = f_grad_u(Ucur, 1/beta_, yc);
     Ucur     = Ucur - gamma * gradU;
     
     % projection onto set C
     Ucur = proj(Ucur);
     
     % get the current feasibility(A(u) - b), update sigma and yc
     feas_cur = get_feas(Ucur);
 
-    [beta_, sigma_] = update_params(norm(feas_cur), ii, beta_old, sigma_old);
+    [beta_, sigma_, k_t] = update_params(norm(feas_cur),feas_old, k_t, ii, beta_old, sigma_old, output.sigma, output.beta);
     beta_old = beta_;
     sigma_old = sigma_;
-    
+    feas_old = norm(feas_cur);
     % update dual
     yc      = yc + sigma_ * feas_cur; 
     
     
     % average of the iterates 
     if params.averaging
         Usum = Ucur + Usum;
         ysum = yc + ysum;
         Uav = 1/(ii+1)*Usum;
         ycav   = 1/(ii+1)*ysum;
     else
         Uav = Ucur;
         ycav = yc;
     end
     
     
     if abs(current_obj-prev_obj) < tol
         fprintf('Converged: Breaking...\n')
         break;
     end
     prev_obj = current_obj;
     
     track_hist();
     verbosity();
 end
 
 if ii == params.iters
     fprintf('Max iters reached: Breaking...\n\n')
 end
 
 
     function [] = init_hist_track()
         % Initialization
         output.times = nan(params.iters, 1);
         output.feas  = nan(params.iters, 1);
         output.obj   = nan(params.iters, 1);
         output.grad_mapping = nan(params.iters, 1);
         output.sigma   = nan(params.iters, 1);
+        output.beta   = nan(params.iters, 1);
         output.gamma   = nan(params.iters, 1);
         output.norm_dual = nan(params.iters, 1);
     end
 
     function [] = track_hist()
         % save the history
         if (params.savehist == 1)
             output.times(ii) = toc; tic;
             output.feas(ii)  = norm(get_feas(Uav));
             output.obj(ii)   = f_u(Uav);
             output.grad_mapping(ii) = 1/gamma^2 * norm(Ucur-Uold, 2)^2;
             current_obj = output.obj(ii);
             output.sigma(ii) = sigma_;
             output.gamma(ii)   = gamma;
             output.norm_dual(ii) = norm(yc);
             output.beta(ii)  = beta_;
         end;
     end
 
     function [] = verbosity()
         if (params.verbose) && (mod(ii, params.verbose)==0 || ii == 1)
             if ((mod(ii, 10*params.verbose)==0))
                 fprintf('Iter.            |Feas            |Objective             |Grad Mapping \n');
                 fprintf('------------------------------------------------------------------\n');
             end
             str = sprintf('%d            %f         %f             %f '...    %f'...
                 , ii, output.feas(ii), output.obj(ii), output.grad_mapping(ii));%, norm_gf(ii));
             disp(str);
         end
     end
 
     function [] = init_iters()
         % initialize the iterations 
 
         Ucur = params.Uinit;
         Ucur = Ucur./norm(Ucur, 'fro');
         if params.startfeasible
             % Start feasible for maxcut
             rowN = sqrt(sum(Ucur.^2,2));
             Ucur = Ucur./(rowN*ones(1,size(Ucur,2)));
         end
         
         yc = params.ycinit; % center point to be updated
         ysum = yc;
         Usum = Ucur;
         
-        [beta_, sigma_] = update_params(0,0,0,0); % set initials
+        [beta_, sigma_] = update_params(0,0,0,0,0,0,0,0); % set initials
     end
         
 
 end
\ No newline at end of file
diff --git a/codes/helpers/plot_results.m b/codes/helpers/plot_results.m
index f3fa952..9cd3acd 100644
--- a/codes/helpers/plot_results.m
+++ b/codes/helpers/plot_results.m
@@ -1,39 +1,39 @@
 % Plotting function
 
 font_size = 20;
 lw = 2;
 figure('Position',[100,100,1140,270]),
 subplot(1,3,1)
 loglog(output.feas,'--r', 'Linewidth', lw),
 hold on,
 grid on
 title('Feasibility Gap','Interpreter', 'latex', 'FontSize', font_size);
 xlabel('iters', 'Interpreter', 'latex', 'FontSize', font_size);
 ylabel('$||A(u)-b||$', 'Interpreter', 'latex', 'FontSize', font_size);
 ax = gca;
 ax.XTick = 10.^(-20:10);
 
 subplot(1,3,2)
 loglog(output.grad_mapping, '--r', 'Linewidth', lw)
 hold on,
 grid on
 title('Gradient Mapping','Interpreter', 'latex', 'FontSize', font_size);
 xlabel('iters', 'Interpreter', 'latex', 'FontSize', font_size);
 ylabel('$|G(\gamma_k, u_k)|^2$', 'Interpreter', 'latex', 'FontSize', font_size);
 ax = gca;
 ax.XTick = 10.^(-20:10);
 
 
 subplot(1,3,3)
 loglog(abs(output.obj-opt_val)/abs(opt_val), '--r', 'Linewidth', lw)
 hold on,
 grid on
 title('Relative Objective Residual','Interpreter', 'latex', 'FontSize', font_size);
 xlabel('iters', 'Interpreter', 'latex', 'FontSize', font_size);
 ylabel('$\frac{ |f(u)-f^\star| }{ |f^\star| }$', 'Interpreter', 'latex', 'FontSize', font_size);  
 ax = gca;
 ax.XTick = 10.^(-20:10);
 
 
 figure;loglog(output.beta,'r','Linewidth', lw);grid on;hold on;loglog(output.sigma, 'k','Linewidth', lw)
-legend({'$\beta_{k}$', '$\sigma_{k}$'}, 'Interpreter', 'Latex', 'FontSize', font_size, 'Location', 'southeast')
\ No newline at end of file
+legend({'$\beta_{k}$', '$\sigma_{k}$'}, 'Interpreter', 'Latex', 'FontSize', font_size, 'Location', 'best')
\ No newline at end of file
diff --git a/codes/helpers/update_params.m b/codes/helpers/update_params.m
index cdadda7..0a1e727 100644
--- a/codes/helpers/update_params.m
+++ b/codes/helpers/update_params.m
@@ -1,36 +1,44 @@
-function [beta_, sigma_] = update_params(feas_cur, iter, beta_old, sigma_old, sigma_0, beta_0)
+function [beta_, sigma_, k_t] = update_params(feas_cur, feas_old, k_t, iter, beta_old, sigma_old, sigma_vec, beta_vec, sigma_0, beta_0)
 % This function computes the parameters for the new iteration depending on
 % the predefined adaptive rule
 % INPUTS:  feas_cur: current feasibility value
 %          iter    : current iteration number
 %          sigma_0 : initial value for the dual step size
 %          beta_0  : initial value for the penalty parameter
 % OUTPUTS: beta_   : updated penalty parameter
 %          sigma_  : updated dual step size
 % 
 % Author: Mehmet Fatih Sahin
 % Date:   09.11.2018
 
 if iter<2
     beta_ = beta_0;
     sigma_ = sigma_0;
     return
 end
 
-if feas_cur > sigma_0/sqrt(iter)
+if feas_cur > beta_0/sqrt(iter)
     beta_ = beta_old*sqrt(iter/(iter-1));
 else
     beta_ = beta_old;
 end
 
-
+if feas_old <= sigma_0/sqrt(iter) && feas_cur > sigma_0/sqrt(iter) 
+    k_t = iter;
+end
+ 
 if feas_cur <= sigma_0/iter
     sigma_ = sigma_old;
 elseif sigma_0/iter < feas_cur && feas_cur<=sigma_0/sqrt(iter)
     sigma_ = sigma_old*sqrt((iter-1)/iter);
 else
-    sigma_ = sigma_0/beta_0*(beta_-beta_old);
+%     sigma_ = sigma_0/beta_0*(beta_-beta_old);
+    if k_t == iter || k_t == 1
+        sigma_ = sigma_old;
+    else
+        sigma_ = sigma_vec(k_t)/(beta_vec(k_t)-beta_vec(k_t-1))*(beta_-beta_old);
+    end
 end
         
 
 end
\ No newline at end of file