%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Authors: Thomas Bolton and Mert Inan
% Date: 03/08/2017 -
% Description:
% This script runs motion and behavior analyses on a set of subjects from
% the Human Connectome Project
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% 1. Loading some of the data

% SS contains the parameters for convergence of the probabilistic PCA
% algorithm
load SS

% Adds paths to cbrewer (generation of figures), graph construction
% entities, and PLS scripts (kindly shared by Daniela Zoeller, EPFL)
addpath(genpath(fullfile(pwd,'cbrewer')));
addpath(genpath(fullfile(pwd,'01_Graph_Construction')));
addpath(genpath(fullfile(pwd,'PLS corrado')));

% Do we want to plot stuff or not
is_plot = 1;

%% 2. Extracting motion data

% Reads the paths towards the actual motion data from the HCP
cd('Motion_Data');
d = dir;

% We start from index 3 to remove the ".." files from directory listing
d = d(3:end);

% Number of subjects to consider
n_subjects = length(d);

% Reading of the data for all subjects
for i = 1:n_subjects
    try
        
        i
        
        cd(d(i).name);
        cd('Motion');
        
        % Reads and modifies the text file to have 594 rows and to have proper
        % units, then copies it from read location to write location.
        movText = textread(fullfile(pwd,'Movement_Regressors.txt'));

        %Changes degrees to radians assuming a 50mm radius
        movText = movText(:,1:6);
        for l = 4:6
            movText(:,l) = 50*movText(:,l);
        end

        MOTION(:,:,i) = movText;
        
        cd('..');
        cd('..');
        
    catch
        warning(['Could not do it for the ',int2str(i), '. iteration.']);
        continue;
    end
end

cd('..');

% Here, MOTION is a 1200 x 6 x 224 matrix containing the information for
% all 6 motion parameters over 1200 frames for the 224 considered subjects

%% 3. Computing "too large movers"

% Threshold to use for scrubbing throughout
T_scrubbing = 0.3;

% Computes the instantaneous motion change from a frame to the next
DeltaMot = diff(MOTION,[],1);

% Framewise displacement over time is computed according to Power's
% definition (Power et al., NeuroImage, 2012)
FD = squeeze(sum(abs(DeltaMot),2));

% For different FD threshold choices, we compute which subjects would be
% kept of left out in the analyses
idx_a = 1;

% Will contain the results
FD_Matrix = [];

% Will mask which frames to keep or not to keep for a subject
TemporalMask = {};

% T_FD is the threshold above which we deem an instantaneous FD value to be
% too large
for T_FD = T_scrubbing
    
    for s = 1:n_subjects
        TemporalMask{idx_a}(:,s) = (FD(:,s) > T_FD);
    end
    
    idx_b = 1;
    
    % P_FD is the percentage of such values over a session to claim a
    % subject not good enough (in terms of fMRI data quality) to be kept
    for P_FD = 10:5:50
        % Percentage of frames above a given threshold and related mover indices
        Perc_scrub = sum(FD > T_FD,1)/size(FD,1)*100;
        IDX_MOVER = (Perc_scrub > P_FD);
        FD_Matrix(:,idx_a,idx_b) = IDX_MOVER;
        idx_b = idx_b + 1;
    end
    idx_a = idx_a + 1;
end
    
clear idx_a
clear idx_b

% % Computes the percentage of too large movers for each case
% vec = 1:n_subjects;
% idx = 1;
% for i = 1:9
%     for j = 1:9
%         Excluded_subjects{idx} = vec(logical(FD_Matrix(:,i,j)));
%         idx = idx + 1;
%         Perc_excluded_subjects(idx) = sum(logical(FD_Matrix(:,i,j)));
%     end
% end

% ScrubbedFrames = [];
% 
% % I will now retain only the frames that would be removed with scrubbing
% for T_FD = T_scrubbing
% 
%     TMask = (FD > T_FD);
%     
%     for s = 1:n_subjects
%         tmp_DM  = DeltaMot(:,:,s);
%         ScrubbedFrames = [ScrubbedFrames;tmp_DM(TMask(:,s),:)];
%     end
% end

% [IDX,Centroids] = kmeans(ScrubbedFrames,10,'Distance','sqeuclidean','Replicates',1000,'Start','sample','MaxIter',300);
% [Consensus] = CAP_ConsensusClustering({ScrubbedFrames'},2:30,'items',0.8,100,'cosine');


% I will find the threshold values that enable to yield exactly no frames
% (scrubbed) with [0 0 0 0 0 0]
idx_med = 1;

for Median_Threshold = 3:-0.01:0.1
    
    idx_med
    
    % Thresholding individual traces
    MaskXYZABG = zeros(1199,6,n_subjects);

    % Subject index
    s_index = [];

    % Thresholded params
    TParams = [];
    
    for s = 1:n_subjects

        % Selection of time x params matrix for a subject
        tmp = DeltaMot(:,:,s);

        % Thresholds
        THR = median(tmp) + Median_Threshold*std(tmp);
        THR2 = median(tmp) - Median_Threshold*std(tmp);

        % Thresholding
        for p = 1:6
            MaskXYZABG(tmp(:,p) > THR(:,p),p,s) = 1;
            MaskXYZABG(tmp(:,p) < THR2(:,p),p,s) = -1;
        end

        Test = MaskXYZABG(FD(:,s) > T_scrubbing,:,s);

        s_index = [s_index;s*ones(size(Test,1),1)];
        TParams = [TParams;Test];

    end
    
    N_FN(idx_med) = sum(ismember(TParams,[0 0 0 0 0 0],'rows')); 
   
    idx_med = idx_med + 1;
end

if is_plot
    figure;
    set(gcf,'color','w');
    plot(3:-0.01:0.1,N_FN/N_FN(1)*100,'color',[0.4 0.4 0.4],'LineWidth',2);
    set(gca,'Box','off');
end

% Value of 0.38 selected (other values: 1.1, 2)

Median_Threshold = 0.38;

% Thresholding individual traces
MaskXYZABG = zeros(1199,6,n_subjects);

% Subject index
s_index = [];

% Thresholded params
TParams = [];

for s = 1:n_subjects

    % Selection of time x params matrix for a subject
    tmp = DeltaMot(:,:,s);

    % Thresholds
    THR = median(tmp) + Median_Threshold*std(tmp);
    THR2 = median(tmp) - Median_Threshold*std(tmp);

    % Thresholding
    for p = 1:6
        MaskXYZABG(tmp(:,p) > THR(:,p),p,s) = 1;
        MaskXYZABG(tmp(:,p) < THR2(:,p),p,s) = -1;
    end

    Test = MaskXYZABG(FD(:,s) > T_scrubbing,:,s);

    s_index = [s_index;s*ones(size(Test,1),1)];
    TParams = [TParams;Test];
end


% I will create a counts vector with many many bars
Counts = zeros(3^6,n_subjects);
Counts_total = zeros(3^6,1);

% Label vector to situate myself


idx_counts = 1;

for x = -1:1
    for y = -1:1
        for z = -1:1
            for a = -1:1
                for b = -1:1
                    for g = -1:1
                        Counts_total(idx_counts) = sum(ismember(TParams,[x y z a b g],'rows'));
                        
                        for s = 1:n_subjects
                            Counts(idx_counts,s) = sum(ismember(MaskXYZABG(:,:,s),[x y z a b g],'rows'));
                        end
                        
                        Label{idx_counts} = [num2str(x),' ',num2str(y),' ',num2str(x),' ',num2str(a),' ',num2str(b),' ',num2str(g)];
                        
                        idx_counts = idx_counts + 1;
                    end
                end
            end
        end
    end
end

% Sorts according to counts
[AAA,IDX_counts] = sort(Counts_total,'descend');

test = median(Counts');

[BBB,IDX_counts2] = sort(test,'descend');

% Plots top configurations in descending order
for top = 1:50
    tmp_state(:,top) = str2num(Label{IDX_counts(top)});
end

if is_plot
    
    figure;
    set(gcf,'color','w');
    CM = cbrewer('div','RdBu',1000);
    imagesc(tmp_state);
    colormap(CM);
    caxis([-1.5 1.5]);
    set(gca,'Box','off');
    
    figure;
    set(gcf,'color','w');
    bar(Counts_total/sum(Counts_total)*100);
end


%% 4. Computing processed motion information (spatiotemporal features)

% Type of motion to keep in the analysis

% Threshold above which we "scrub" 

% Do we remove highest movers for the ensuing analyses? If so, what extent
% of remval do we pick?
is_removemovers = 0;
idx_removal = NaN;

% Am I going to split scrubbed and non-scrubbed data?
is_split = 1;

% number of time bins that we want to split the motion data into: 6 is the
% recommended value. A range can be given (e.g., n_TB = 4:8).
n_TB = 6;

% Number of nearest neighbours to consider for graph construction
n_NN = 10;

if is_removemovers
    DeltaMot = DeltaMot(:,:,~IDX_MOVER);
    n_subjects = size(DeltaMot,3);
end

% Duration of the individual bins to consider
samples_session = size(DeltaMot,1);

% Retains only the moments of positive/negative motion change; this is to verify
% that positive and negative-valued motion moments are equally frequent
DMPos = DeltaMot;
DMPos(DMPos < 0) = NaN;

DMNeg = DeltaMot;
DMNeg(DMNeg > 0) = NaN;

%%%%%%%%%%%% RUNNING ACROSS TIME DIVISIONS %%%%%%%%%%%%%%%

for n_timebins = n_TB

    % Number of samples to include in each temporal bin
    dur = floor(samples_session/n_timebins);

    % Time at which we stand (in number of samples) and associated index
    current_time = 1;
    idx_timebin = 1;
    
    % To fill with the spatiotemporal information
    Motion_Matrix = [];
    
    Motion_Matrix_Pos = [];
    Motion_Matrix_Neg = [];

    % As long as we still consider a time lying within the session
    % duration...
    while current_time <= n_timebins * dur

        % Separately for positive and negative instantaneous changes, samples
        % the data and sums
        for s = 1:n_subjects
            tmp_pos(s,:) = nanmean(DMPos(~TemporalMask{1}(current_time:current_time+dur-1,s),:,s));
            tmp_neg(s,:) = nanmean(abs(DMNeg(~TemporalMask{1}(current_time:current_time+dur-1,s),:,s)));
        end
       
        for s = 1:n_subjects

            % Data for a given subject, across all 6 parameters, for
            % non-scrubbed time points
            tmp_nonscrubbed(s,:) = mean(abs(DeltaMot(~TemporalMask{1}(current_time:current_time+dur-1,s),:,s)));
        end


        Motion_Matrix = [Motion_Matrix,tmp_nonscrubbed];

        % output data matrices are filled in appropriately: each column
        % highlights one spatiotemporal index
        
        Motion_Matrix_Pos = [Motion_Matrix_Pos,tmp_pos];
        Motion_Matrix_Neg = [Motion_Matrix_Neg,tmp_neg];

        current_time = current_time + dur;
        idx_timebin = idx_timebin + 1;
    end
    

    % Supplementary Figure 1: motion sign compensates between positive and
    % negative values across time points and motion parameters
    if is_plot
        figure;
        set(gcf,'color','w');

        tmp1 = mean(Motion_Matrix_Pos);
        tmp2 = mean(Motion_Matrix_Neg);

        tmp1S = std(Motion_Matrix_Pos);
        tmp2S = std(Motion_Matrix_Neg);

        for i = 1:n_timebins
            subplot(n_timebins,1,i);
            set(gca,'Box','off');

            bar(1:12,[tmp1(1+(i-1)*6),tmp2(1+(i-1)*6),tmp1(2+(i-1)*6),tmp2(2+(i-1)*6),...
                tmp1(3+(i-1)*6),tmp2(3+(i-1)*6),tmp1(4+(i-1)*6),tmp2(4+(i-1)*6),...
                tmp1(5+(i-1)*6),tmp2(5+(i-1)*6),tmp1(6+(i-1)*6),tmp2(6+(i-1)*6)]);

            hold on;

            errorbar(1:12,[tmp1(1+(i-1)*6),tmp2(1+(i-1)*6),tmp1(2+(i-1)*6),tmp2(2+(i-1)*6),...
                tmp1(3+(i-1)*6),tmp2(3+(i-1)*6),tmp1(4+(i-1)*6),tmp2(4+(i-1)*6),...
                tmp1(5+(i-1)*6),tmp2(5+(i-1)*6),tmp1(6+(i-1)*6),tmp2(6+(i-1)*6)],...
                [tmp1S(1+(i-1)*6),tmp2S(1+(i-1)*6),tmp1S(2+(i-1)*6),tmp2S(2+(i-1)*6),...
                tmp1S(3+(i-1)*6),tmp2S(3+(i-1)*6),tmp1S(4+(i-1)*6),tmp2S(4+(i-1)*6),...
                tmp1S(5+(i-1)*6),tmp2S(5+(i-1)*6),tmp1S(6+(i-1)*6),tmp2S(6+(i-1)*6)]);
        end
        
        % Statistical assessment of whether, for a given spatiotemporal
        % bin, positive and negative entries significantly differ
        for i = 1:36
            P_posvsneg(i) = ranksum(Motion_Matrix_Pos(:,i),Motion_Matrix_Neg(:,i));
        end
    end
    
    % At this stage, we observe that along X, there is
    % significantly more elevated motion changes in the negative direction
    % as compared to the positive one (across time). No significant 
    % differences for other cases

    
    %% 5. Representing mover subtypes
    
    % Creating a standardized version of Motion_PLS: negative values then
    % highlight lower motion extent as compared to the population average,
    % and vice versa for positive values
    MOTION_VAR = [Motion_Matrix];

    for i = 1:size(MOTION_VAR,2)
        MOTION_VAR(:,i) = (MOTION_VAR(:,i)-mean(MOTION_VAR(:,i)))./std(MOTION_VAR(:,i));
    end

    %%%%%%%%%%%% RUNNING ACROSS KNN VALUES %%%%%%%%%%%%%%%
    
    % Constructing a K-nearest neighbours graph (k=10 is a typical
    % starting guess in most data science problems from what I learned in my
    % classes)
    for knn = n_NN
        
        % Construction of the adjacency matrix reflecting my graph, using
        % cosine distance (similar results are obtained using euclidean
        % distance)
        W = construct_knn_graph(MOTION_VAR,knn,'cosine');

        % Then, I compute the first three dimensions of my new space in which I
        % express my data: first I compute the normalized Laplacian of my graph,
        % and second, I do an eigenvalue decomposition on this. X, Y and Z are the
        % second, third and fourth eigenvectors resulting from the process
        [X,Y,Z] = compute_non_linear_dim_reduction(W);

        % Of course, we want to find the optimal data parcellation, and so
        % we must derive an optimal number of clusters to use. For this, we
        % resort to Consensus Clustering (Monti et al., Machine learning, 2003)
        [Consensus] = CAP_ConsensusClustering({[X,Y,Z]'},2:17,'items',0.8,100,'cosine');
        
        % Supplementary Figure 2: consensus clustering matrices for K
        % ranging from 2 to 17
        if is_plot
            figure;
            set(gcf,'color','w');

            for j = 1:16

                subplot(4,4,j);
                set(gca,'Box');
                axis square

                imagesc(squeeze(Consensus(:,:,j)));

            end
        end

        % Calculates the quality metrics to determine the optimal value of
        % K: the inter-percentile interval (PAC) is used
        [~,Lorena] = ComputeClusteringQuality(Consensus,2:17);

        if is_plot
            figure; 
            bar(Lorena);
        end
    
        % Based on the output metrics, the optimal cluster number is K=4
        n_clusters = 4;

        % Clustering is performed with this optimal value
        [IDX,Centroids] = kmeans([X,Y,Z],n_clusters,'Distance','cosine','Replicates',10000,'Start','sample');

        % First, I construct 3D patches to depict the degree of motion along the X,
        % the Y and the Z directions in the scanner. Patches are at least 0.15 in
        % length in each dimension, localized at (X(i),Y(i),Z(i)). There are six
        % outputs for each subject because I specify six faces for each 3D
        % rectangle
        for i = 1:n_subjects
            [F1{i},F2{i},F3{i},F4{i},F5{i},F6{i}] = MakePatch(X(i),Y(i),Z(i),...
                (2+mean(MOTION_VAR(i,[1,7,13,19,25,31])))/150,(2+mean(MOTION_VAR(i,[1,7,13,19,25,31]+1)))/150,(2+mean(MOTION_VAR(i,[1,7,13,19,25,31]+2)))/150);
        end

        % Figure 1: representation of movers in nonlinearly reduced space
        % (paper figure and representation as cluster members)
        if is_plot
            
            figure;
            hold on;

            % Computation of the features to plot:
            % (1) RGB color of the patches, to indicate rotational motion
            Alpha = (mean(MOTION_VAR(:,[4,10,16,22,28,34]),2) - min(MOTION_VAR(:,[4,10,16,22,28,34]),[],2));
            Alpha = 3*Alpha;%/max(Alpha);
            Alpha(Alpha > 1) = 1;

            Beta = (mean(MOTION_VAR(:,[4,10,16,22,28,34]+1),2) - min(MOTION_VAR(:,[4,10,16,22,28,34]+1),[],2));
            Beta = 3*Beta;%/max(Beta);
            Beta(Beta > 1) = 1;

            Gamma = (mean(MOTION_VAR(:,[4,10,16,22,28,34]+2),2) - min(MOTION_VAR(:,[4,10,16,22,28,34]+2),[],2));
            Gamma = 3*Gamma;%/max(Gamma);
            Gamma(Gamma > 1) = 1;

            % (2) Edge color and width, to indicate temporal evolution of
            % motion

            % Creation of a matrix of regressors
            for s = 1:n_subjects

                X = zeros(6,2);
                X(:,1) = ones(6,1);
                X(:,2) = [1;2;3;4;5;6];

                Y = [mean(MOTION_VAR(s,1:6),2);mean(MOTION_VAR(s,7:12),2);...
                mean(MOTION_VAR(s,13:18),2);mean(MOTION_VAR(s,19:24),2);...
                mean(MOTION_VAR(s,25:30),2);mean(MOTION_VAR(s,31:36),2)];

                % Solves the linear model
                Beta_coefs(:,s) = pinv(X)*Y;
            end

            % We want to highlight the overall linear trend of motion as a
            % function of the baseline and the increase/decrease
            CM_edges = cbrewer('div','PuOr',1000);
             
            % Computes the edge properties
            Thick = Beta_coefs(2,:);
            Thick = Thick/max(abs(Thick));
            
            for i = 1:n_subjects
                
                % Computes the color of the patch
                RGB = [Alpha(i),Beta(i),Gamma(i)];
                
                if Thick(i) > 0
                    ECol = [208/255,0,40/255];
                else
                    ECol = [27/255 0 161/255];
                end
                
                patch(F1{i}(:,1),F1{i}(:,2),F1{i}(:,3),RGB,'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F2{i}(:,1),F2{i}(:,2),F2{i}(:,3),RGB,'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F3{i}(:,1),F3{i}(:,2),F3{i}(:,3),RGB,'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F4{i}(:,1),F4{i}(:,2),F4{i}(:,3),RGB,'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F5{i}(:,1),F5{i}(:,2),F5{i}(:,3),RGB,'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F6{i}(:,1),F6{i}(:,2),F6{i}(:,3),RGB,'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);

            end

            CM = cbrewer('div','RdBu',1000);
            colormap(flipud(CM));
            colormap(jet);

            grid on;
            xlabel('X');
            ylabel('Y');
            zlabel('Z');
            
            figure;
            hold on;

            for i = 1:n_subjects 
                
                if Thick(i) > 0
                    ECol = [208/255,0,40/255];
                else
                    ECol = [27/255 0 161/255];
                end
                
                patch(F1{i}(:,1),F1{i}(:,2),F1{i}(:,3),IDX(i),'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F2{i}(:,1),F2{i}(:,2),F2{i}(:,3),IDX(i),'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F3{i}(:,1),F3{i}(:,2),F3{i}(:,3),IDX(i),'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F4{i}(:,1),F4{i}(:,2),F4{i}(:,3),IDX(i),'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F5{i}(:,1),F5{i}(:,2),F5{i}(:,3),IDX(i),'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);
                patch(F6{i}(:,1),F6{i}(:,2),F6{i}(:,3),IDX(i),'LineWidth',realmin+9*abs(Thick(i)),'EdgeColor',ECol,'FaceAlpha',0.7,'EdgeAlpha',0.7);

            end

            CM = cbrewer('div','RdBu',1000);
            colormap(flipud(CM));
            colormap(jet);

            grid on;
            xlabel('X');
            ylabel('Y');
            zlabel('Z');
        end
        
        %% 6. Summary of motion along time and space
        
        % Constructs the time vector, which should have 6*n_tb entries made of
        % n_timebins integers
        pos_avg = zeros(1,n_timebins*6);
        
        for i = 1:length(pos_avg)
            pos_avg(i) = mod(i-1,n_timebins)+1;
        end
        
        % Constructs the position vector, which should have 6*n_tb entries
        % made of 6 integers
        t_avg = zeros(1,n_timebins*6);
        
        for i = 1:length(t_avg)
            t_avg(i) = ceil((i-0.5)/n_timebins);
        end
        
        % We now want to compute, for all subjects, representative values
        for subj = 1:n_subjects

            % Data of the considered subject, and associated cluster index
            tmp_subject = MOTION_VAR(subj,:);

            % Computing the time
            for t = 1:n_timebins
                Summary_time(subj,t) = mean(tmp_subject(t_avg == t));
            end

            for p = 1:6
                Summary_position(subj,p) = mean(tmp_subject(pos_avg == p));
            end
        end
        
        % Plotting of the information appropriately
        if is_plot
            
            figure;
            set(gcf,'color','w');

            % Along time
            to_plot = [];
            to_plot2 = [];

            for k = 1:n_clusters
                for t = 1:n_timebins

                    tmp = Summary_time(IDX == k,t);
                    to_plot = [to_plot,mean(tmp)];
                    to_plot2 = [to_plot2,std(tmp)];
                end
            end
            
            % Computes SEM for display
            to_plot2 = to_plot2/sqrt(n_subjects);

            subplot(2,1,1);
            bar(1:n_timebins*n_clusters,to_plot);
            hold on;
            errorbar(1:n_timebins*n_clusters,to_plot,to_plot2);
            xlabel('Time x Clusters');

            % Along space
            to_plot = [];
            to_plot2 = [];

            for k = 1:n_clusters
                for p = 1:6

                    tmp = Summary_position(IDX == k,p);
                    to_plot = [to_plot,mean(tmp)];
                    to_plot2 = [to_plot2,std(tmp)];
                end
            end
            to_plot2 = to_plot2/sqrt(n_subjects);

            subplot(2,1,2);
            bar(1:6*n_clusters,to_plot);
            hold on;
            errorbar(1:6*n_clusters,to_plot,to_plot2);
            xlabel('Space x Clusters');

%             to_plot = [];
%             to_plot2 = [];
% 
%             for k = 1:n_clusters
%                 tmp = MOTION_VAR(IDXB == k,:);
% 
%                 to_plot = [to_plot,mean(tmp)];
%                 to_plot2 = [to_plot2,std(tmp)];
%             end
% 
%             to_plot2 = to_plot2/sqrt(n_subjects);
% 
%             figure;
%             set(gcf,'color','w');
%             bar(1:n_clusters*n_timebins*6,to_plot);
%             hold on;
%             errorbar(1:n_clusters*n_timebins*6,to_plot,to_plot2);
            
        end
        
        % Interpretation: one group of big movers, one group of
        % 'translational movers', one group of 'rotational movers', one
        % group of 'no movers'
        
        % ANOVA assessment (time x space x group)
        
        % Creates the variables
        t_ANOVA = repmat(t_avg,1,n_subjects);
        p_ANOVA = repmat(pos_avg,1,n_subjects);
        
        g_ANOVA = [];
        Data_ANOVA = [];
        
        for i = 1:n_subjects
            g_ANOVA = [g_ANOVA,IDX(i)*ones(1,n_timebins*6)];
            Data_ANOVA = [Data_ANOVA,MOTION_VAR(i,:)];
        end
        
        [~,Tab,Stats] = anovan(Data_ANOVA,{t_ANOVA',p_ANOVA',g_ANOVA'},'model','full');
        
        for null = 1:1000
            tmp_idx = randperm(length(t_ANOVA));
            tmp_idx2 = randperm(length(t_ANOVA));
            
            [~,Tab,Stats] = anovan(Data_ANOVA,{t_ANOVA(tmp_idx)',p_ANOVA(tmp_idx2)',g_ANOVA'},'model','full','display','off');
            Tab = Tab(:,6);
            Tab_null(:,null) = [Tab{2:8}];
        end
        
        F_threshold = prctile(Tab_null,99,2);
        
    end
end

% Conclusions at this stage: movers can be subdivided into four separate
% groups:
%
% Group 1: strong movers regardless of time or space

%% 6a. Assigning from dataset two

% For all second sessions
for s = 1:n_subjects 
    SES2_Label(s) = Mot_KNN(Motion_Matrix,IDX);
end


%% 6b. Comparison between scrubbing and no scrubbing metrics

% Cluster assignments from the no scrubbing analysis will be plotted on
% reduced dimensionality data points from the scrubbing analysis

% Takes the counts data and only keeps the top 36
SortedCounts = Counts(IDX_counts2(1:50),:);

% Computes the graph
W2 = construct_knn_graph(SortedCounts',10,'cosine');

[Xc,Yc,Zc] = compute_non_linear_dim_reduction(W2);

[~,BBB] = pca(SortedCounts');

% Creates the color representation


figure;
scatter3(Xc,Yc,Zc,40,IDX,'filled','o');
colormap(jet);

figure;
scatter3(BBB(:,1),BBB(:,2),BBB(:,3),40,IDX,'filled','o');
colormap(jet);

%% 7. Preparation of the behavioural data

% We want to simplify our behavioral data into explanatory variables
Output = [];

% Loading the excel files containing the information of interest to work
% with
[~, ~, BEHAV] = xlsread('BEHAVIORAL_SHINEY.xlsx');

TITLE = BEHAV(1,1:end);

% Removes the title of each SM (first row)
BEHAV = BEHAV(2:225,1:end);

% Indices of non integer variables
idx_nn = [2,5,6];

% Reads through all entries to assess whether, for that variable, we
% have at least one float entry

% Zygosity
tmp = BEHAV(:,2);
tmp2 = zeros(size(tmp,1),1);

for i = 1:size(tmp,1)
    switch tmp{i}
        case 'NotTwin'
            tmp2(i) = 3;
        case 'MZ'
            tmp2(i) = 1;
        case 'NotMZ'
            tmp2(i) = 2;
        otherwise
            disp('::::::');
    end    
end


tmp = BEHAV(:,5);
tmp_race = zeros(size(tmp,1),1);

for i = 1:length(tmp)
    switch tmp{i}
        case 'White'
            tmp_race(i) = 1;
        case 'Asian/Nat. Hawaiian/Othr Pacific Is.'
            tmp_race(i) = 2;
        case 'Am. Indian/Alaskan Nat.'
            tmp_race(i) = 3;
        case 'Unknown or Not Reported'
            tmp_race(i) = 4;
        case 'Black or African Am.'
            tmp_race(i) = 5;
        case 'More than one'
            tmp_race(i) = 6;
        otherwise
            disp('::::::');
    end    
end

tmp = BEHAV(:,6);
tmp_ethn = zeros(size(tmp,1),1);

for i = 1:length(tmp)
    switch tmp{i}
        case 'Hispanic/Latino'
            tmp_ethn(i) = 1;
        case 'Not Hispanic/Latino'
            tmp_ethn(i) = 2;
        case 'Unknown or Not Reported'
            tmp_ethn(i) = 3;
        otherwise
            disp('::::::');
    end    
end

Output = [cell2mat(BEHAV(:,1)),tmp2,cell2mat(BEHAV(:,3:4)),tmp_race,tmp_ethn,cell2mat(BEHAV(:,7:end))];

if is_removemovers
    Output(IDX_MOVER,:) = [];
end

% Normalize each column (keeping 'non-existing values')
O2 = Output - repmat(nanmean(Output,1),n_subjects,1);
O2 = O2 ./ repmat(nanstd(O2,1),n_subjects,1);

%% 8. Conversion into a subset of components of interest

% For each separate type of information provided in the HCP behavioral
% assessment, we will derive solely one indicative feature through
% probabilistic PCA. By this mean, we will also estimate missing entries
% for most scores (all except the ones left alone to derive a feature;
% i.e., not combined)

% Handedness
PCA(:,1) = O2(:,7);
PCA_title{1} = 'Handedness';

% Education
% SSAGA_Educ, SSAGA_InSchool
[tmp_PC,tmp_CM] = Assess_PC(O2,[10,11],SS,[]);
PCA(:,2) = tmp_PC(:,1);
PCA_title{2} = 'Education';

% height, weight, BMI
[tmp_PC,tmp_CM] = Assess_PC(O2,[13:15],SS,[]);
PCA(:,3:4) = tmp_PC(:,1:2);
PCA_title{3} = 'BMI (bad)';
PCA_title{4} = 'BMI (good)';

% Hematocrit
[tmp_PC,tmp_CM] = Assess_PC(O2,[16,17],SS,[]);
PCA(:,5) = tmp_PC(:,1);
PCA_title{5} = 'Hematocrit';

% Blood pressure
[tmp_PC,tmp_CM] = Assess_PC(O2,[18,19],SS,[]);
PCA(:,6) = tmp_PC(:,1);
PCA_title{6} = 'Blood pressure';

% Thyroid hormone
PCA(:,7) = O2(:,20);
PCA_title{7} = 'Thyroid hormone';

% HbA1C
PCA(:,8) = O2(:,21);
PCA_title{8} = 'HbA1C';

% Sleep
[tmp_PC,tmp_CM] = Assess_PC(O2,[41:47],SS,[]);
PCA(:,9) = tmp_PC(:,1);
PCA_title{9} = 'Sleep';

% PicSeq
[tmp_PC,tmp_CM] = Assess_PC(O2,[48,49],SS,[]);
PCA(:,10) = tmp_PC(:,1);
PCA_title{10} = 'PicSeq';

% CardSort
[tmp_PC,tmp_CM] = Assess_PC(O2,[50,51],SS,[]);
PCA(:,11) = tmp_PC(:,1);
PCA_title{11} = 'CardSort';

% Flanker
[tmp_PC,tmp_CM] = Assess_PC(O2,[52,53],SS,[]);
PCA(:,12) = tmp_PC(:,1);
PCA_title{12} = 'Flanker';

% PMAT
[tmp_PC,tmp_CM] = Assess_PC(O2,[54:56],SS,[]);
PCA(:,13) = tmp_PC(:,1);
PCA_title{13} = 'PMAT';

% ReadEng
[tmp_PC,tmp_CM] = Assess_PC(O2,[57,58],SS,[]);
PCA(:,14) = tmp_PC(:,1);
PCA_title{14} = 'ReadEng';

% PicVocab
[tmp_PC,tmp_CM] = Assess_PC(O2,[59,60],SS,[]);
PCA(:,15) = tmp_PC(:,1);
PCA_title{15} = 'PicVocab';

% Language
[tmp_PC,tmp_CM] = Assess_PC(O2,[61:66],SS,[]);
PCA(:,16:17) = tmp_PC(:,1:2);
PCA_title{16} = 'Language (math perf.)';
PCA_title{17} = 'Language (time to reply)';

% PicVocab
[tmp_PC,tmp_CM] = Assess_PC(O2,[67,68],SS,[]);
PCA(:,18) = tmp_PC(:,1);
PCA_title{18} = 'ProcSpeed';

% DDISC
[tmp_PC,tmp_CM] = Assess_PC(O2,[69,70],SS,[]);
PCA(:,19) = tmp_PC(:,1);
PCA_title{19} = 'DDISC';

% VSPLOT
[tmp_PC,tmp_CM] = Assess_PC(O2,[71:73],SS,[]);
PCA(:,20:21) = tmp_PC(:,1:2);
PCA_title{20} = 'VSPLOT (OFF - Correct)';
PCA_title{21} = 'VSPLOT (RT)';

% SCPT
% WARNING: the first two components are sometimes switched (due to the
% randomness inherent to pPCA); check tmp_CM (correlation between
% individual scores and the PC)
[tmp_PC,tmp_CM] = Assess_PC(O2,[74:81],SS,4);
PCA(:,22:24) = tmp_PC(:,1:3);
PCA_title{22} = 'SCPT (Performance)';
PCA_title{23} = 'SCPT (Over-positive)';
PCA_title{24} = 'SCPT (RT)';

% IWRD
[tmp_PC,tmp_CM] = Assess_PC(O2,[82,83],SS,[]);
PCA(:,25) = tmp_PC(:,1);
PCA_title{25} = 'IWRD (Bad)';

% ListSort
[tmp_PC,tmp_CM] = Assess_PC(O2,[84,85],SS,[]);
PCA(:,26) = tmp_PC(:,1);
PCA_title{26} = 'ListSort';

% WM
[tmp_PC,tmp_CM] = Assess_PC(O2,[86:89],SS,[]);
PCA(:,27) = tmp_PC(:,1);
PCA_title{27} = 'WM (Bad)';

% Gambling
[tmp_PC,tmp_CM] = Assess_PC(O2,[90:97],SS,6);
PCA(:,28:30) = tmp_PC(:,1:3);
PCA_title{28} = 'Gambling (Conservative)';
PCA_title{29} = 'Gambling (Daring)';
PCA_title{30} = 'Gambling (Bad play)';

% Relational
[tmp_PC,tmp_CM] = Assess_PC(O2,[98:101],SS,[]);
PCA(:,31:32) = tmp_PC(:,1:2);
PCA_title{31} = 'Relational (RT)';
PCA_title{32} = 'Relational (Acc)';

% Endurance
[tmp_PC,tmp_CM] = Assess_PC(O2,[102,103],SS,[]);
PCA(:,33) = tmp_PC(:,1);
PCA_title{33} = 'Endurance';

% Gait speed
PCA(:,34) = O2(:,104);
PCA_title{34} = 'Gait speed';

% Dexterity
[tmp_PC,tmp_CM] = Assess_PC(O2,[105,106],SS,[]);
PCA(:,35) = tmp_PC(:,1);
PCA_title{35} = 'Dexterity';

% Strength
[tmp_PC,tmp_CM] = Assess_PC(O2,[107,108],SS,[]);
PCA(:,36) = tmp_PC(:,1);
PCA_title{36} = 'Strength';

% Emotion (global)
[tmp_PC,tmp_CM] = Assess_PC(O2,[109,110],SS,[]);
PCA(:,37) = tmp_PC(:,1);
PCA_title{37} = 'ER40 (Correct - RT)';

% Emotion
[tmp_PC,tmp_CM] = Assess_PC(O2,[111:115],SS,[]);
PCA(:,38) = tmp_PC(:,1);
PCA_title{38} = 'ER40 (Response to sad)';

% Negative emotions
[tmp_PC,tmp_CM] = Assess_PC(O2,[116:121],SS,[]);
PCA(:,39) = tmp_PC(:,1);
PCA_title{39} = 'Negative emotions';

% Positive emotion
[tmp_PC,tmp_CM] = Assess_PC(O2,[122:124],SS,[]);
PCA(:,40) = tmp_PC(:,1);
PCA_title{40} = 'Positive emotions';

% Feeling of being supported
[tmp_PC,tmp_CM] = Assess_PC(O2,[125:130],SS,[]);
PCA(:,41) = tmp_PC(:,1);
PCA_title{41} = 'Support';

% Stress/efficacy
[tmp_PC,tmp_CM] = Assess_PC(O2,[131,132],SS,[]);
PCA(:,42) = tmp_PC(:,1);
PCA_title{42} = 'Self efficacy - Stress';

% Emotion task
[tmp_PC,tmp_CM] = Assess_PC(O2,[135:138],SS,[]);
PCA(:,43:44) = tmp_PC(:,1:2);
PCA_title{43} = 'Emotion task (RT)';
PCA_title{44} = 'Emotion task (Acc)';

% ToM
[tmp_PC,tmp_CM] = Assess_PC(O2,[139:144],SS,[]);
PCA(:,45) = tmp_PC(:,1);
PCA_title{45} = 'ToM (Bad)';

% Noise
PCA(:,46) = O2(:,145);
PCA_title{46} = 'Noise';

% Odor
[tmp_PC,tmp_CM] = Assess_PC(O2,[146,147],SS,[]);
PCA(:,47) = tmp_PC(:,1);
PCA_title{47} = 'Odor';

% Pain Intensity
[tmp_PC,tmp_CM] = Assess_PC(O2,[148,149],SS,[]);
PCA(:,48) = tmp_PC(:,1);
PCA_title{48} = 'Pain intensity';

% Taste
[tmp_PC,tmp_CM] = Assess_PC(O2,[150,151],SS,[]);
PCA(:,49) = tmp_PC(:,1);
PCA_title{49} = 'Taste';

% Mars
[tmp_PC,tmp_CM] = Assess_PC(O2,[152:154],SS,[]);
PCA(:,50) = tmp_PC(:,1);
PCA_title{50} = 'Mars';

% NEOFAC
[tmp_PC,tmp_CM] = Assess_PC(O2,[155:159],SS,[]);
PCA(:,51:52) = tmp_PC(:,1:2);
PCA_title{51} = 'NEOFAC (Introverted)';
PCA_title{52} = 'NEOFAC (Daring)';

% Anxiety
[tmp_PC,tmp_CM] = Assess_PC(O2,[160,161,184,185],SS,[]);
PCA(:,53) = tmp_PC(:,1);
PCA_title{53} = 'Anxiety';

% Withdrawal
[tmp_PC,tmp_CM] = Assess_PC(O2,[162,163],SS,[]);
PCA(:,54) = tmp_PC(:,1);
PCA_title{54} = 'Withdrawal';

% Somatic
[tmp_PC,tmp_CM] = Assess_PC(O2,[164,165,186,187],SS,[]);
PCA(:,55) = tmp_PC(:,1);
PCA_title{55} = 'Somatic problems';

% Thought problems
[tmp_PC,tmp_CM] = Assess_PC(O2,[166,167],SS,[]);
PCA(:,56) = tmp_PC(:,1);
PCA_title{56} = 'Thought problems';

% Attention problems
[tmp_PC,tmp_CM] = Assess_PC(O2,[168,169],SS,[]);
PCA(:,57) = tmp_PC(:,1);
PCA_title{57} = 'Attention problems';

% Aggressiveness
[tmp_PC,tmp_CM] = Assess_PC(O2,[170,171],SS,[]);
PCA(:,58) = tmp_PC(:,1);
PCA_title{58} = 'Aggressiveness';

% Rule breaking
[tmp_PC,tmp_CM] = Assess_PC(O2,[172,173],SS,[]);
PCA(:,59) = tmp_PC(:,1);
PCA_title{59} = 'Rule breaking';

% Intrusive
[tmp_PC,tmp_CM] = Assess_PC(O2,[174,175],SS,[]);
PCA(:,60) = tmp_PC(:,1);
PCA_title{60} = 'Intrusive';

% Internalizing
[tmp_PC,tmp_CM] = Assess_PC(O2,[176,177],SS,[]);
PCA(:,61) = tmp_PC(:,1);
PCA_title{61} = 'Internalizing';

% Externalizing
[tmp_PC,tmp_CM] = Assess_PC(O2,[178,179],SS,[]);
PCA(:,62) = tmp_PC(:,1);
PCA_title{62} = 'Externalizing';

% Avoidance
[tmp_PC,tmp_CM] = Assess_PC(O2,[188,189],SS,[]);
PCA(:,63) = tmp_PC(:,1);
PCA_title{63} = 'Avoidance';

% ADHD
[tmp_PC,tmp_CM] = Assess_PC(O2,[190,191],SS,[]);
PCA(:,64) = tmp_PC(:,1);
PCA_title{64} = 'ADHD';

% Inattention
PCA(:,65) = O2(:,192);
PCA_title{65} = 'Inattention';

% Hyperresponsiveness
PCA(:,66) = O2(:,193);
PCA_title{66} = 'Hyperresponsiveness';

% ADHD
[tmp_PC,tmp_CM] = Assess_PC(O2,[194,195],SS,[]);
PCA(:,67) = tmp_PC(:,1);
PCA_title{67} = 'Antisocial';

% Alcohol
[tmp_PC,tmp_CM] = Assess_PC(O2,[199:205],SS,[]);
PCA(:,68) = tmp_PC(:,1);
PCA_title{68} = 'Alcohol';

% Tobacco
[tmp_PC,tmp_CM] = Assess_PC(O2,[206:209],SS,[]);
PCA(:,69) = tmp_PC(:,1);
PCA_title{69} = 'Tobacco';

% Tobacco
[tmp_PC,tmp_CM] = Assess_PC(O2,[210:216],SS,[]);
PCA(:,70) = tmp_PC(:,1);
PCA_title{70} = 'Drugs';

% At this stage, we have reduced the dimensionality of each of our behavioral 
% variable type of interest

%% 9. Further processing of the obtained data

% Some individual variables have been kept without undergoing PPCA; in such
% a case, we want to keep only the ones with few NaN entries

%%%% STEP 1

% We want to remove the columns that are no good (too many Nans)
T_nan = 5;

% Scores to remove
idx_toremove = [];

for SM = 1:size(PCA,2)
    
    tmp = PCA(:,SM);
    
    if sum(isnan(tmp)) > T_nan
        idx_toremove = [idx_toremove,SM];
    end
    
end

PCA(:,idx_toremove) = [];
PCA_title(idx_toremove) = [];


%%%% STEP 2

% Remove AM if max(y) > 100*mean(y) with (y) = (x - med(x))^2
idx_toremove = [];

for SM = 1:size(PCA,2)
    tmp = PCA(:,SM);
    
    y = (tmp-median(tmp)).^2;
    
    if max(y) > 100 * mean(y)
        idx_toremove = [idx_toremove,SM];
    end
end

PCA(:,idx_toremove) = [];
PCA_title(idx_toremove) = [];


%%%% STEP 3

% Remove if more than 95% of subjects show the same value
idx_toremove = [];

for SM = 1:size(PCA,2)
    tmp = PCA(:,SM);
    
    if sum(unique(tmp)) <= 11
        idx_toremove = [idx_toremove,SM];
    end
end

PCA(:,idx_toremove) = [];
PCA_title(idx_toremove) = [];


%%%% STEP 4

% Rank-based inverse Gaussian transformation
for SM = 1:size(PCA,2)
    rank = tiedrank(PCA(:,SM));
    p = rank / ( length(rank) + 1 ); %# +1 to avoid Inf for the max point
    PCA(:,SM) = norminv( p, 0, 1 );
end

% Number of missing values
P_missing = sum(sum(isnan(PCA)))/size(PCA,1)/size(PCA,2)*100;


%% 10. Univariate assessment of the derived anthropomorphic and 
% behavioral features link to motion

% We compute the Fisher score (and related null distribution when shuffling
% behvior and motion entries with respect to each other) for each feature
% of interest
[F,Fnull] = MOT_LinkBehavMotion(PCA,IDX);

% Bonferroni correction for the number of univariate assessments and
% subsequent derivation of significance thresholds
alpha = 5/size(PCA,2);
Fthresh = squeeze(prctile(Fnull,100-alpha,2));

% Another attempt using the proposal by Holmes and Nichols (Human Brain
% Mapping, 2001). We compute a distribution based on the max of each score
% null distribution and then, threshold that at 5%
tmp_F = max(Fnull,[],2);
F_Holmes = prctile(tmp_F,100-alpha);

% Plotting of univariate results
figure;
bar(F);

hold on;

for i = 1:size(PCA,2)
    plot([i-0.4,i+0.4],[Fthresh(i),Fthresh(i)],'color',[0.3 0.3 0.3],'LineWidth',1.5);
    %plot([i-0.4,i+0.4],[F_Holmes,F_Holmes],'color',[0.6 0.6 0.6],'LineWidth',1.5);
end

% Indices of the significant variables
idx_uni = [2,3,4,6];

% For each, post-hoc comparison across groups
for i = 1:length(idx_uni)
    
    tmp = PCA(:,idx_uni(i));
    
    p_12(i) = ranksum(tmp(IDX == 1),tmp(IDX == 2));
    p_13(i) = ranksum(tmp(IDX == 1),tmp(IDX == 3));
    p_14(i) = ranksum(tmp(IDX == 1),tmp(IDX == 4));
    p_23(i) = ranksum(tmp(IDX == 2),tmp(IDX == 3));
    p_24(i) = ranksum(tmp(IDX == 2),tmp(IDX == 4));
    p_34(i) = ranksum(tmp(IDX == 3),tmp(IDX == 4));
    
    Delta_12(i) = nanmean(tmp(IDX == 1)) - nanmean(tmp(IDX == 2));
    Delta_13(i) = nanmean(tmp(IDX == 1)) - nanmean(tmp(IDX == 3));
    Delta_14(i) = nanmean(tmp(IDX == 1)) - nanmean(tmp(IDX == 4));
    Delta_23(i) = nanmean(tmp(IDX == 2)) - nanmean(tmp(IDX == 3));
    Delta_24(i) = nanmean(tmp(IDX == 2)) - nanmean(tmp(IDX == 4));
    Delta_34(i) = nanmean(tmp(IDX == 3)) - nanmean(tmp(IDX == 4));
    
end

% Conclusion at this stage: we find significant differences between the
% "large movers" and the "low movers" clusters with univariate analysis. In
% each case, the larger movers will show worse BMI, higher blood pressure,
% and lower endurance abilities

%% 11. Combination of both sides with PLS

clear MOTION_VAR_trim
clear PCA_trim
clear X0
clear Y0
clear nSub
clear Y
clear X
clear R
clear U
clear S
clear V
clear Lx
clear perm_order
clear Xp
clear Yp
clear Rp
clear Up
clear Vp
clear Sp
clear rotatemat
clear permsamp
clear Lxp
clear sp
clear Xb
clear Yb
clear Sb
clear Vb
clear Ub
clear Ub_vect
clear Vb_vect


% Because we still had a small amount of NaN entries left, concerned
% subjects are removed prior to PLS
MOTION_VAR_trim = [Counts(IDX_counts2(1:50),:)'];
MOTION_VAR_trim(isnan(sum(PCA,2)),:) = [];

PCA_trim = PCA;
PCA_trim(isnan(sum(PCA,2)),:) = [];

Y0 = PCA_trim;
X0 = MOTION_VAR_trim;

% Number of subjects in the PLS
nSub = size(PCA_trim,1);

% normalization of X and Y
Y=myPLS_norm(Y0,1,ones(1,nSub),2);
X=myPLS_norm(X0,1,ones(1,nSub),2);

% Number of permutations to carry on to determine significant components
NUM_PERMS=10000;

% Number of times to bootstrap the data on
NUM_BOOTSTRAP=1000;

% Number of behavioral scores
NUM_BEHAV_SCORES = size(Y0,2);

% Covariance matrix computation
R=myPLS_cov(X,Y,1,ones(1,nSub));
    
% SVD on the covariance matrix
[U,S,V]=svd(R,'econ');

% We get the unitary basis in both sides and singular values, which give
% the weight of each singular component

% Number of latent variables
NUM_LVs=min(size(S));
    
% ICA convention: turn LVs such that max is positive
for iLV=1:NUM_LVs
    [~,maxID] = max(abs(U(:,iLV)));
    if U(maxID,iLV)<0
        U(:,iLV)=-U(:,iLV);
        V(:,iLV)=-V(:,iLV);
    end
end
    
% Latent variables (for motion parameters)
Lx=X*V;
    
% Permutation measures
fprintf('Permutation: ');
    
for iter=1:10000
    
    fprintf('%d ',iter);
    
    if ~mod(iter,20) 
        fprintf('\n'); 
    end
    
    % How to randomize the data
    perm_order=randperm(size(X,1));

    % Permuted motion data (untouched)
    Xp=X;

    % Permuted behavior data (shuffled)
    Yp=Y0(perm_order,:);
    
    % Renormalization
    Yp=myPLS_norm(Yp,1,ones(1,nSub),2);

    % "Null" correlation matrix and SVD
    Rp=myPLS_cov(Xp,Yp,1,ones(1,nSub));
    [Up,Sp,Vp]=svd(Rp,'econ');

    % Procrustes transform to fit the new U (permuted) to the old one, as
    % well as the singular values
    rotatemat = rri_bootprocrust(U, Up);
    Up = Up * Sp * rotatemat;
    Sp = sqrt(sum(Up.^2));
    
    % There can be changes of order so Procrustes needed; now it tells us
    % how things must be reordered (high-dimensionally) but how does it
    % impact singular values?
    % The trick is ti incorporate the singular values linked to noz yet
    % roated basis to the rotation, and recalculate the singular values
    % afterwards

    % Current null singular values are stored
    permsamp(:,iter)=Sp';

    % sp contains whether the current singular value exceeds the actual one
    % (real one)
    if iter==1
        sp= (Sp'>=diag(S));
    else
        sp = sp + (Sp'>=diag(S));
    end

    % Computation of permuted latent variable
    Lxp=X*Vp;

end

fprintf('\n');

% The larger the sprob entries, the larger the p-value
sprob = sp ./ (NUM_PERMS + 1);

% Plots a summarising figure
fig=figure('Position',[508,524,327,266]);
plot(diag(S),'ko-');
xlabel('Latent variables');
ylabel('Singular value');
hold on;
errorbar(1:NUM_LVs,mean(permsamp,2),std(permsamp,[],2),'r-');
legend({'observed','surrogates'});
for iter=1:NUM_LVs
    str{iter}=sprintf('%d (p=%5.4f)',iter,sprob(iter));
    fprintf('LV%s\n',str{iter});
end
axis([0.8 NUM_LVs+.2 0 max(diag(S))*1.2]);
set(gca,'XTick',1:NUM_LVs,'XTickLabel',str);
hold off;

% Computes the indices to use for bootstrapping (with replacement)
[boot_order]=rri_boot_order(nSub,1,1000);

% Runs across bootstrapping folds
for iter=1:1000
    
    fprintf('%d ',iter);
    if ~mod(iter,20)
        fprintf('\n');
    end
    
    % Takes bootstrapped motion data and normalizes it
    Xb=X0(boot_order(:,iter),:);
    Xb=myPLS_norm(Xb,1,ones(1,nSub),2);

    % Same for behavior
    Yb=Y0(boot_order(:,iter),:);
    Yb=myPLS_norm(Yb,1,ones(1,nSub),2);

    % Accordingly, computes R and the SVD
    Rb=myPLS_cov(Xb,Yb,1,ones(1,nSub));
    [Ub,Sb,Vb]=svd(Rb,'econ');

    % Procrustes transform as before to generate fitting U and V
    % bootstrapped variants
    rotatemat = rri_bootprocrust(U, Ub);
    rotatemat2 = rri_bootprocrust(V, Vb);
    
    % Full rotation
    Rot_full = (rotatemat + rotatemat2)/2;
    
    Vb = Vb * Rot_full;
    Ub = Ub * Rot_full;

    % Saves them all
    Ub_vect(iter,:,:) = Ub;
    Vb_vect(iter,:,:) = Vb;
end

% Selects the percentiles
U_PCT = prctile(Ub_vect,[1,99],1);
V_PCT = prctile(Vb_vect,[1,99],1);

% Computes standard deviation
U_STD = squeeze(std(Ub_vect,[],1));
V_STD = squeeze(std(Vb_vect,[],1));

clear Ub_vect
clear Vb_vect

% Computes z-scores
U_z = U./U_STD;
V_z = V./V_STD;

%% 12. PLS-related plots

% For all four first components, plots the spatiotemporal motion and the
% behavioral weights (PCs 1, 3 and 4 are significant)
% For the same components, also plots the linearly reduced spatiotemporal data
% with color as the PLS behavioral scores
idx_togetout = find(isnan(sum(PCA,2)));

% Type of
type = 'Scrub';

F1b = F1;
F2b = F2;
F3b = F3;
F4b = F4;
F5b = F5;
F6b = F6;

F1b(idx_togetout) = [];
F2b(idx_togetout) = [];
F3b(idx_togetout) = [];
F4b(idx_togetout) = [];
F5b(idx_togetout) = [];
F6b(idx_togetout) = [];

% Latent scores for behaviors for the subjects (how strongly a component is
% expressed in a subject)
Ly = Y*U;

for j = 1:5
    
    if strcmp(type,'Scrub')
    
        figure;
        set(gcf,'color','w');
        bar(1:size(U,1),U_z(:,j));

        figure;
        set(gcf,'color','w');
        bar(1:50,V_z(:,j));
        
        
    else
        
        % Computing the time
        for t = 1:n_timebins
            Summary_time_PLS(j,t) = mean(V_z(t_avg == t,j));
        end

        for p = 1:6
            Summary_position_PLS(j,p) = mean(V_z(pos_avg == p,j));
        end

    
        figure;
        set(gcf,'color','w');
        bar(1:size(U,1),U_z(:,j));

        figure;
        set(gcf,'color','w');
        subplot(2,1,1);
        bar(1:6,Summary_time_PLS(j,:));
        xlabel('Time');
        subplot(2,1,2);
        bar(1:6,Summary_position_PLS(j,:));
        xlabel('Position');

        
    end
    
    
    figure;
    hold on;

    for i = 1:nSub
        patch(F1b{i}(:,1),F1b{i}(:,2),F1b{i}(:,3),Ly(i,j));
        patch(F2b{i}(:,1),F2b{i}(:,2),F2b{i}(:,3),Ly(i,j));
        patch(F3b{i}(:,1),F3b{i}(:,2),F3b{i}(:,3),Ly(i,j));
        patch(F4b{i}(:,1),F4b{i}(:,2),F4b{i}(:,3),Ly(i,j));
        patch(F5b{i}(:,1),F5b{i}(:,2),F5b{i}(:,3),Ly(i,j));
        patch(F6b{i}(:,1),F6b{i}(:,2),F6b{i}(:,3),Ly(i,j));
    end

    CM = cbrewer('div','RdBu',100);
    colormap(flipud(CM));
    caxis([-7,7]);
    
    grid on;
    xlabel('X');
    ylabel('Y');
    zlabel('Z');
end

% For each component, also plots a "text representation" along a z-score
% axis (similar to Smith et al. 2015)
% PlotSmith(PCA_title,(U_z(:,1)),[3 30],3);
% PlotSmith(PCA_title,(U_z(:,2)),[3 15],3);
% PlotSmith(PCA_title,(U_z(:,3)),[3 15],3);
% PlotSmith(PCA_title,(U_z(:,4)),[3 15],3);