diff --git a/Matlab_Scripts/MOT_ANA_CompareClusters.m b/Matlab_Scripts/Functions/MOT_ANA_CompareClusters.m similarity index 100% rename from Matlab_Scripts/MOT_ANA_CompareClusters.m rename to Matlab_Scripts/Functions/MOT_ANA_CompareClusters.m diff --git a/Matlab_Scripts/MOT_ANA_CompureParameters.m b/Matlab_Scripts/Functions/MOT_ANA_CompureParameters.m similarity index 100% rename from Matlab_Scripts/MOT_ANA_CompureParameters.m rename to Matlab_Scripts/Functions/MOT_ANA_CompureParameters.m diff --git a/Matlab_Scripts/MOT_ANA_ComputeParameters.m b/Matlab_Scripts/Functions/MOT_ANA_ComputeParameters.m similarity index 100% rename from Matlab_Scripts/MOT_ANA_ComputeParameters.m rename to Matlab_Scripts/Functions/MOT_ANA_ComputeParameters.m diff --git a/Matlab_Scripts/MOT_ANA_ComputeParameters_DS.m b/Matlab_Scripts/Functions/MOT_ANA_ComputeParameters_DS.m similarity index 100% rename from Matlab_Scripts/MOT_ANA_ComputeParameters_DS.m rename to Matlab_Scripts/Functions/MOT_ANA_ComputeParameters_DS.m diff --git a/Matlab_Scripts/MOT_ANA_Reftopop.m b/Matlab_Scripts/Functions/MOT_ANA_Reftopop.m similarity index 100% rename from Matlab_Scripts/MOT_ANA_Reftopop.m rename to Matlab_Scripts/Functions/MOT_ANA_Reftopop.m diff --git a/Matlab_Scripts/MOT_ANA_SummariseMotion.m b/Matlab_Scripts/Functions/MOT_ANA_SummariseMotion.m similarity index 100% rename from Matlab_Scripts/MOT_ANA_SummariseMotion.m rename to Matlab_Scripts/Functions/MOT_ANA_SummariseMotion.m diff --git a/Matlab_Scripts/Functions/jUpperTriMatToVec.m b/Matlab_Scripts/Functions/jUpperTriMatToVec.m old mode 100755 new mode 100644 diff --git a/Matlab_Scripts/Script_Motion_Revisions.m b/Matlab_Scripts/Script_Main.m similarity index 99% rename from Matlab_Scripts/Script_Motion_Revisions.m rename to Matlab_Scripts/Script_Main.m index 5f60de9..a822d81 100644 --- a/Matlab_Scripts/Script_Motion_Revisions.m +++ b/Matlab_Scripts/Script_Main.m @@ -1,1546 +1,1543 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Author: Thomas Bolton % Date: 23/10/2019 % Description: % This script runs motion and behavior analyses on an extended set of % subjects from the Human Connectome Project, in an updated fashion to % answer the comments from the Reviewers %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% 1. Loading some of the data and setting paths % SS contains the parameters for convergence of the probabilistic PCA % algorithm load SS -% Adds paths to functions and utilities +% Adds paths sto functions and utilities addpath(genpath(fullfile(pwd,'GlobalUtilities'))); addpath(genpath(fullfile(pwd,'Functions'))); -addpath(genpath('/Users/TiBiUan/Desktop/Github/IBI_Application/HCP_General')); -addpath(genpath('/Users/TiBiUan/Desktop/Github/MOT_ANA/Behavioral_Data_Extended')); - % Do we want to plot stuff or not is_plot = 1; %% 2. Indices of interest with and without physiology % Loads the data of interest for us: motion and FD are available for 1112 % subjects (the ones in IsYeoS1200) load('Motion_Data.mat'); load('FramewiseDisplacement.mat'); % Loads the indices that will enable us to select the appropriate subjects load('ALL_INDICES.mat', 'IsYeoS1200') load('ALL_INDICES.mat', 'IsSES1Motion'); load('ALL_INDICES.mat', 'IsSES2Motion'); load('ALL_INDICES.mat', 'IsSES3Motion'); load('ALL_INDICES.mat', 'IsSES4Motion'); load('ALL_INDICES.mat', 'IsBehav5P'); load('ALL_INDICES.mat', 'IsRSAll'); load('IDX_PHYSIO'); % Making the data back to a "1206-subject" form SES = nan(1200,6,1206,4); SES(:,:,IsYeoS1200,1) = SES1; SES(:,:,IsYeoS1200,2) = SES2; SES(:,:,IsYeoS1200,3) = SES3; SES(:,:,IsYeoS1200,4) = SES4; FD = nan(1200,1206,4); FD(:,IsYeoS1200,1) = FD1; FD(:,IsYeoS1200,2) = FD2; FD(:,IsYeoS1200,3) = FD3; FD(:,IsYeoS1200,4) = FD4; % At this stage, NaN for the subjects for whom data is not available, and % for the others, the data is in % Behavioral data is available for all 1206 subjects with missing entries % for some load('Behavior.mat') load('Age.mat'); load('Handedness.mat'); load('Gender.mat'); % I want to select all the subjects for which behavior and all 4 motion % file data are available IDX_MOTANA = IsYeoS1200 & IsSES1Motion & IsSES2Motion &... IsSES3Motion & IsSES4Motion & IsBehav5P & IsRSAll; % In another set, I want to also select solely the subjects that have good % respiratory and cardiac data for the first two sessions IDX_MOTANA_PHYSIO = IsYeoS1200 & IsSES1Motion & IsSES2Motion &... IsSES3Motion & IsSES4Motion & IsBehav5P & IsRSAll & IDX_PHYSIO; IDX_MOTANA_NOPHYSIO = IDX_MOTANA & ~IDX_MOTANA_PHYSIO; %% 3. Selection of the data (all 951 subjects) % First, we select the data for the "main analyses" % Selection of motion data Motion_Data = squeeze(SES(:,:,IDX_MOTANA,:)); FD_Data = squeeze(FD(:,IDX_MOTANA,:)); % Selection of behavioral data Behavioral_Data = Behavior(IDX_MOTANA,:); n_subjects = sum(IDX_MOTANA); clear SES clear FD clear SES1 clear SES2 clear SES3 clear SES4 clear FD1 clear FD2 clear FD3 clear FD4 %% 4. Flagging scrubbed time points % Threshold to use for scrubbing (i.e., separating the scrubbed frames from % the non-scrubbed ones) T_scrubbing = 0.3; % Will mask which frames to keep or not to keep for a given subject TemporalMask_All = []; for s = 1:n_subjects for ses = 1:4 TemporalMask_All(:,s,ses) = GetTemporalMask(FD_Data(:,s,ses),T_scrubbing,1); end end %% 5. Computing processed motion information (spatiotemporal features) % number of time bins that we want to split the motion data into: 6 is the % recommended value. n_TB = 6; % Computation of the motion parameters: Motion_Matrix will contain the % results for session 1 [Motion_Matrix,Motion_Matrix_Pos,Motion_Matrix_Neg,Space_Labels,... Time_Labels,Session_Labels] = MOT_ANA_ComputeParameters(Motion_Data(:,:,:,1),TemporalMask_All(:,:,1),n_TB); % We do a similar process to get the results for session 2 [Motion_Matrix2] = MOT_ANA_ComputeParameters(Motion_Data(:,:,:,2),TemporalMask_All(:,:,2),n_TB); [Motion_Matrix_DS2] = MOT_ANA_ComputeParameters(downsample(Motion_Data(:,:,:,1),2),downsample(TemporalMask_All(:,:,1),2),n_TB); [Motion_Matrix_DS3] = MOT_ANA_ComputeParameters(downsample(Motion_Data(:,:,:,1),3),downsample(TemporalMask_All(:,:,1),3),n_TB); [Motion_Matrix_DS4] = MOT_ANA_ComputeParameters(downsample(Motion_Data(:,:,:,1),4),downsample(TemporalMask_All(:,:,1),4),n_TB); [Motion_Matrix_VAL,~,~,Space_Labels_red,Time_Labels_red] = MOT_ANA_ComputeParameters(downsample(Motion_Data(1:410,:,:,1),3),downsample(TemporalMask_All(1:410,:,1),3),2); %% 6. Conversion into "relative to population values" % Note that we use both session data in this step: indeed, we are % interested in keeping them in the same space [MOTION_VAR] = MOT_ANA_Reftopop([Motion_Matrix;Motion_Matrix2],'Popref'); [MOTION_VAR_ONLYSES1] = MOT_ANA_Reftopop([Motion_Matrix],'Popref'); [MOTION_VAR_ONLYSES2] = MOT_ANA_Reftopop([Motion_Matrix2],'Popref'); [MOTION_VAR_DS2] = MOT_ANA_Reftopop(Motion_Matrix_DS2,'Popref'); [MOTION_VAR_DS3] = MOT_ANA_Reftopop(Motion_Matrix_DS3,'Popref'); [MOTION_VAR_DS4] = MOT_ANA_Reftopop(Motion_Matrix_DS4,'Popref'); [MOTION_VAR_VAL] = MOT_ANA_Reftopop(Motion_Matrix_VAL,'Popref'); %% 7. Low dimensional representation of the spatiotemporal motion data: % determination of the optimal number of clusters for the combined data % 10 nearest neighbours for spectral clustering n_NN = 10; % Construction of the adjacency matrix reflecting my graph, using % cosine distance (similar results are obtained using euclidean % distance): note that the graph is constructed on both session data points W = construct_knn_graph(MOTION_VAR,n_NN,'cosine'); W_ONLYSES1 = construct_knn_graph(MOTION_VAR_ONLYSES1,n_NN,'cosine'); W_ONLYSES2 = construct_knn_graph(MOTION_VAR_ONLYSES2,n_NN,'cosine'); W_DS2 = construct_knn_graph(MOTION_VAR_DS2,n_NN,'cosine'); W_DS3 = construct_knn_graph(MOTION_VAR_DS3,n_NN,'cosine'); W_DS4 = construct_knn_graph(MOTION_VAR_DS4,n_NN,'cosine'); W_VAL = construct_knn_graph(MOTION_VAR_VAL,n_NN,'cosine'); % Then, I compute the first three dimensions of my new space in which I % express my data: first I compute the normalized Laplacian of my graph, % and second, I do an eigenvalue decomposition on this. X, Y and Z are the % second, third and fourth eigenvectors resulting from the process (that % is, the "low frequency information" along the graph that links similar % subjects together [X,Y,Z,E1,E2,E3] = compute_non_linear_dim_reduction(W); [X_ONLYSES1,Y_ONLYSES1,Z_ONLYSES1] = compute_non_linear_dim_reduction(W_ONLYSES1); [X_ONLYSES2,Y_ONLYSES2,Z_ONLYSES2] = compute_non_linear_dim_reduction(W_ONLYSES2); [X_DS2,Y_DS2,Z_DS2] = compute_non_linear_dim_reduction(W_DS2); [X_DS3,Y_DS3,Z_DS3] = compute_non_linear_dim_reduction(W_DS3); [X_DS4,Y_DS4,Z_DS4] = compute_non_linear_dim_reduction(W_DS4); [X_VAL,Y_VAL,Z_VAL] = compute_non_linear_dim_reduction(W_VAL); % Of course, we want to find the optimal data parcellation, and so % we must derive an optimal number of clusters to use. For this, we % resort to Consensus Clustering (Monti et al., Machine learning, 2003) % From here on, we separate session 1 and session 2 data, in the spirit of % conducting a replicability assessment [Consensus_SES1] = CAP_ConsensusClustering({[X(1:n_subjects),Y(1:n_subjects),Z(1:n_subjects)]'},2:50,'items',0.8,100,'cosine'); [Consensus_SES2] = CAP_ConsensusClustering({[X(n_subjects+1:end),Y(n_subjects+1:end),Z(n_subjects+1:end)]'},2:50,'items',0.8,100,'cosine'); [Consensus_ONLYSES1] = CAP_ConsensusClustering({[X_ONLYSES1,Y_ONLYSES1,Z_ONLYSES1]'},2:15,'items',0.8,100,'cosine'); [Consensus_ONLYSES2] = CAP_ConsensusClustering({[X_ONLYSES2,Y_ONLYSES2,Z_ONLYSES2]'},2:50,'items',0.8,100,'cosine'); [Consensus_DS2] = CAP_ConsensusClustering({[X_DS2,Y_DS2,Z_DS2]'},2:50,'items',0.8,100,'cosine'); [Consensus_DS3] = CAP_ConsensusClustering({[X_DS3,Y_DS3,Z_DS3]'},2:50,'items',0.8,100,'cosine'); [Consensus_DS4] = CAP_ConsensusClustering({[X_DS4,Y_DS4,Z_DS4]'},2:50,'items',0.8,100,'cosine'); [Consensus_VAL] = CAP_ConsensusClustering({[X_VAL,Y_VAL,Z_VAL]'},2:50,'items',0.8,100,'cosine'); %% 8. Closer assessment of consensus clustering results % Consensus clustering matrices for K ranging from 2 to 17, in both session % cases if is_plot figure; set(gcf,'color','w'); for j = 1:36 subplot(6,6,j); set(gca,'Box'); axis square imagesc(squeeze(Consensus_SES1(:,:,j))); end figure; set(gcf,'color','w'); for j = 1:36 subplot(6,6,j); set(gca,'Box'); axis square imagesc(squeeze(Consensus_SES2(:,:,j))); end end % Calculates the quality metrics to determine the optimal value of % K: the inter-percentile interval (PAC) is used [~,Lorena_SES1] = ComputeClusteringQuality(Consensus_SES1,2:50); [~,Lorena_SES2] = ComputeClusteringQuality(Consensus_SES2,2:50); [~,Lorena_ONLYSES1] = ComputeClusteringQuality(Consensus_ONLYSES1,2:15); [~,Lorena_ONLYSES2] = ComputeClusteringQuality(Consensus_ONLYSES2,2:50); [~,Lorena_DS2] = ComputeClusteringQuality(Consensus_DS2,2:50); [~,Lorena_DS3] = ComputeClusteringQuality(Consensus_DS3,2:50); [~,Lorena_DS4] = ComputeClusteringQuality(Consensus_DS4,2:50); [~,Lorena_VAL] = ComputeClusteringQuality(Consensus_VAL,2:50); % Assessment of the results if is_plot figure; subplot(2,1,1); bar(Lorena_SES1); subplot(2,1,2); bar(Lorena_SES2); figure; subplot(2,1,1); bar(Lorena_ONLYSES1); subplot(2,1,2); bar(Lorena_ONLYSES2); figure; bar(Lorena_VAL); figure; subplot(4,1,1); bar(Lorena_SES1); subplot(4,1,2); bar(Lorena_DS2); subplot(4,1,3); bar(Lorena_DS3); subplot(4,1,4); bar(Lorena_DS4); end %% 8. Analysis and displays for the chosen, whole data % Based on the output metrics, the optimal cluster number is n_clusters = 4; % Clustering is performed with this optimal value [IDX_SES1,Centroids_SES1] = kmeans([X(1:n_subjects),Y(1:n_subjects),Z(1:n_subjects)],n_clusters,'Distance',... 'cosine','Replicates',10000,'Start','sample'); n_clusters = 3; % Clustering is performed with this optimal value [IDX_SES2,Centroids_SES2] = kmeans([X(n_subjects+1:end),Y(n_subjects+1:end),Z(n_subjects+1:end)],n_clusters,'Distance',... 'cosine','Replicates',10000,'Start','sample'); % Figure 1: representation of movers in nonlinearly reduced space % (paper figure and representation as cluster members) if is_plot % Plotting of the whole data [F1,F2,F3,F4,F5,F6] = MOTANA_OMFGBBQPlotSimple(MOTION_VAR(1:n_subjects,:),X(1:n_subjects),Y(1:n_subjects),Z(1:n_subjects),n_subjects,1.5,500,5,Time_Labels,Space_Labels,'RGB',IDX_SES1,[]); [F1,F2,F3,F4,F5,F6] = MOTANA_OMFGBBQPlotSimple(MOTION_VAR(1:n_subjects,:),X(1:n_subjects),Y(1:n_subjects),Z(1:n_subjects),n_subjects,1.5,500,3,Time_Labels,Space_Labels,'Groups',IDX_SES1,[]); MOTANA_OMFGBBQPlotSimple(MOTION_VAR(n_subjects+1:end,:),X(n_subjects+1:end),Y(n_subjects+1:end),Z(n_subjects+1:end),n_subjects,1.5,500,5,Time_Labels,Space_Labels,'RGB',IDX_SES2,[]); MOTANA_OMFGBBQPlotSimple(MOTION_VAR(n_subjects+1:end,:),X(n_subjects+1:end),Y(n_subjects+1:end),Z(n_subjects+1:end),n_subjects,1.5,500,3,Time_Labels,Space_Labels,'Groups',IDX_SES2,[]); end % Gets 36-dimensional centroids n_clusters = 4; C1 = zeros(n_clusters,6*n_TB); tmp1 = MOTION_VAR(1:n_subjects,:); for k = 1:n_clusters C1(k,:) = mean(tmp1(IDX_SES1==k,:)); end n_clusters = 3; C2 = zeros(n_clusters,6*n_TB); tmp2 = MOTION_VAR(n_subjects+1:end,:); for k = 1:n_clusters C2(k,:) = mean(tmp2(IDX_SES2==k,:)); end % Clustering is performed with this optimal value [IDX_DS2] = kmeans([X_DS2,Y_DS2,Z_DS2],n_clusters,'Distance',... 'cosine','Replicates',10000,'Start','sample'); % Clustering is performed with this optimal value [IDX_DS3] = kmeans([X_DS3,Y_DS3,Z_DS3],n_clusters,'Distance',... 'cosine','Replicates',10000,'Start','sample'); % Clustering is performed with this optimal value [IDX_DS4] = kmeans([X_DS4,Y_DS4,Z_DS4],n_clusters,'Distance',... 'cosine','Replicates',10000,'Start','sample'); % Gets 36-dimensional centroids CDS2 = zeros(n_clusters,6*n_TB); CDS3 = zeros(n_clusters,6*n_TB); CDS4 = zeros(n_clusters,6*n_TB); for k = 1:n_clusters CDS2(k,:) = mean(MOTION_VAR_DS2(IDX_DS2==k,:)); CDS3(k,:) = mean(MOTION_VAR_DS3(IDX_DS3==k,:)); CDS4(k,:) = mean(MOTION_VAR_DS4(IDX_DS4==k,:)); end [IDX_VAL] = kmeans([X_VAL,Y_VAL,Z_VAL],n_clusters,'Distance',... 'cosine','Replicates',10000,'Start','sample'); C1_VAL = zeros(n_clusters,6*2); for k = 1:n_clusters C1_VAL(k,:) = mean(MOTION_VAR_VAL(IDX_VAL==k,:)); end %% 9. Summary of motion along time and space % Observation: we find similar clusters: although K=4 is a less "clear" % pick in the latter dataset case MOT_ANA_SummariseMotion(MOTION_VAR(1:n_subjects,:),IDX_SES1,Time_Labels,Space_Labels); MOT_ANA_SummariseMotion(MOTION_VAR(n_subjects+1:end,:),IDX_SES2,Time_Labels,Space_Labels); % Exact quantification of similarity in clusters needed here [RealCorrs,NullCors] = MOT_ANA_CompareClusters(C1,C2,'MSE'); %[RealCorrs,NullCors] = MOT_ANA_CompareClusters(C1,C2,'SpatialCorr'); %MOT_ANA_SummariseMotion(MOTION_VAR_DS2,IDX_DS2,Time_Labels,Space_Labels); MOT_ANA_SummariseMotion(MOTION_VAR_DS3,IDX_DS3,Time_Labels,Space_Labels); %MOT_ANA_SummariseMotion(MOTION_VAR_DS4,IDX_DS4,Time_Labels,Space_Labels); %[RealCorrs_DS2,NullCors_DS2] = MOT_ANA_CompareClusters(C1,CDS2); [RealCorrs,NullCors] = MOT_ANA_CompareClusters(C1,CDS3,'MSE'); %[RealCorrs_DS4,NullCors_DS4] = MOT_ANA_CompareClusters(C1,CDS4); MOT_ANA_SummariseMotion(MOTION_VAR_VAL,IDX_VAL,Time_Labels_red,Space_Labels_red); [RealCorrs,NullCors] = MOT_ANA_CompareClusters(C1_VAL,CDS3(:,1:12),'MSE'); %% 10. Associated three-way ANOVA % ANOVA assessment (time x space x group) % Creates the variables tmp = zeros(1,36); for t = 1:6 tmp(Time_Labels(t,:)) = t; end t_ANOVA = repmat(tmp,1,n_subjects); tmp = zeros(1,36); for p = 1:6 tmp(Space_Labels(p,:)) = p; end p_ANOVA = repmat(tmp,1,n_subjects); g_ANOVA = []; Data_ANOVA = []; for i = 1:n_subjects g_ANOVA = [g_ANOVA,IDX_SES1(i)*ones(1,n_TB*6)]; Data_ANOVA = [Data_ANOVA,MOTION_VAR(i,:)]; end [~,Tab,Stats] = anovan(Data_ANOVA,{t_ANOVA',p_ANOVA',g_ANOVA'},'model','full'); % How many times do we run the non-parametric case n_np = 10000; for null = 1:n_np tmp_idx = randperm(length(t_ANOVA)); tmp_idx2 = randperm(length(t_ANOVA)); tmp_idx3 = randperm(length(t_ANOVA)); [~,Tab,Stats] = anovan(Data_ANOVA,{t_ANOVA(tmp_idx)',p_ANOVA(tmp_idx2)',g_ANOVA(tmp_idx3)'},'model','full','display','off'); Tab = Tab(:,6); Tab_null(:,null) = [Tab{2:8}]; end F_threshold = prctile(Tab_null,99,2); clear W clear X clear Y clear Z clear E1 clear E2 clear E3 for s = 1:n_subjects X = zeros(6,2); X(:,1) = ones(6,1); X(:,2) = [1;2;3;4;5;6]; Y = [mean(MOTION_VAR(s,Time_Labels(1,:)),2);mean(MOTION_VAR(s,Time_Labels(2,:)),2);... mean(MOTION_VAR(s,Time_Labels(3,:)),2);mean(MOTION_VAR(s,Time_Labels(4,:)),2);... mean(MOTION_VAR(s,Time_Labels(5,:)),2);mean(MOTION_VAR(s,Time_Labels(6,:)),2)]; % Solves the linear model Beta_coefs(:,s) = pinv(X)*Y; end for s = 1:n_subjects XMOT(s) = mean(MOTION_VAR(s,Space_Labels(1,:)),2); YMOT(s) = mean(MOTION_VAR(s,Space_Labels(2,:)),2); ZMOT(s) = mean(MOTION_VAR(s,Space_Labels(3,:)),2); AlphaMOT(s) = mean(MOTION_VAR(s,Space_Labels(4,:)),2); BetaMOT(s) = mean(MOTION_VAR(s,Space_Labels(5,:)),2); GammaMOT(s) = mean(MOTION_VAR(s,Space_Labels(6,:)),2); end %% 11. Attempt of group fingerprinting as a function of spatiotemporal % motion properties X_KNN_Train = X(n_subjects+1:end); X_KNN_Train = X_KNN_Train(IDX_SES1 > 1); Y_KNN_Train = Y(n_subjects+1:end); Y_KNN_Train = Y_KNN_Train(IDX_SES1 > 1); Z_KNN_Train = Z(n_subjects+1:end); Z_KNN_Train = Z_KNN_Train(IDX_SES1 > 1); X_KNN_Test = X(1:n_subjects); X_KNN_Test = X_KNN_Test(IDX_SES1 > 1); Y_KNN_Test = Y(1:n_subjects); Y_KNN_Test = Y_KNN_Test(IDX_SES1 > 1); Z_KNN_Test = Z(1:n_subjects); Z_KNN_Test = Z_KNN_Test(IDX_SES1 > 1); C_grnd = IDX_SES2; C_grnd = C_grnd(IDX_SES1 > 1); % I will go for KNN = 10 as a reasonable choice for KNN = 5:5:300 KNN C_out = []; for s = 1:length(C_grnd) C_out(s) = MOTANA_KNN([X_KNN_Train,Y_KNN_Train,Z_KNN_Train]',C_grnd,[X_KNN_Test(s),Y_KNN_Test(s),... Z_KNN_Test(s)]',KNN,'sqeuclidean'); end C_out = C_out'; for c1 = 1:3 for c2 = 1:3 Acc_Reverse(c1,c2,KNN) = sum((C_grnd == c1) & (C_out == c2))/length(C_grnd)*100; end end end figure; hold on; for i = 1:3 subplot(4,1,i); plot(5:5:300,squeeze(Acc_Reverse(i,i,5:5:300)),'k','LineWidth',2); end subplot(4,1,4); plot(5:5:300,squeeze(Acc_Reverse(1,1,5:5:300))+squeeze(Acc_Reverse(2,2,5:5:300))+squeeze(Acc_Reverse(3,3,5:5:300)),'k'); % First question: what session can I predict best using SES1 as training ? CM = cbrewer('div','RdBu',1000); figure; imagesc(squeeze(Acc_Reverse(:,:,50))); colormap(flipud(CM)); ACC = squeeze(Acc_Reverse(:,:,50)); C_grnd = IDX_SES1; C_grnd = C_grnd(IDX_SES1 > 1); % I will go for KNN = 10 as a reasonable choice for KNN = 5:5:300 KNN C_out = []; for s = 1:length(C_grnd) C_out(s) = MOTANA_KNN([X_KNN_Test,Y_KNN_Test,Z_KNN_Test]',C_grnd,[X_KNN_Train(s),Y_KNN_Train(s),... Z_KNN_Train(s)]',KNN,'sqeuclidean'); end C_out = C_out'; for c1 = 2:4 for c2 = 2:4 Acc(c1,c2,KNN) = sum((C_grnd == c1) & (C_out == c2))/length(C_grnd)*100; end end end figure; hold on; for i = 1:3 subplot(4,1,i); plot(5:5:300,squeeze(Acc(i+1,i+1,5:5:300)),'k','LineWidth',2); end subplot(4,1,4); plot(5:5:300,squeeze(Acc(2,2,5:5:300))+squeeze(Acc(3,3,5:5:300))+squeeze(Acc(4,4,5:5:300)),'k'); % First question: what session can I predict best using SES1 as training ? CM = cbrewer('div','RdBu',1000); figure; imagesc(squeeze(Acc(2:4,2:4,50))); colormap(flipud(CM)); caxis([0 30]); figure; imagesc(squeeze(Acc(2:4,2:4,100))); colormap(flipud(CM)); caxis([0 30]); figure; imagesc(squeeze(Acc_Reverse(:,:,50))); colormap(flipud(CM)); caxis([0 30]); figure; imagesc(squeeze(Acc_Reverse(:,:,100))); colormap(flipud(CM)); caxis([0 30]); %% 12. Robustness of each individual feature % % I want to subsample 80% of subjects each time % n_subsamp = floor(0.8*n_subjects); % % for n = 1:100 % % n % % idx = randperm(n_subjects); % % % Clustering is performed with this optimal value % [IDXss(:,n),Centroidsss(:,:,n)] = kmeans(MOTION_VAR(idx(1:n_subsamp),:),[],'Distance',... % 'cosine','Start',repmat(Centroids,1,1,10000)); % end % % Ref = Centroids'; % % for n = 1 % Comp = squeeze(Centroidsss(:,:,n))'; % % figure; imagesc(corr(Ref,Comp)); % % end % % % Now, I want to assess what changes more or less % % for g = 1:5 % % tmp = squeeze(Centroidsss(g,:,:)); % MU(g,:) = mean(tmp,2); % STD(g,:) = std(tmp,[],2); % end % % figure; imagesc(MU./STD); %% 12. Preparation of the behavioural data (normalization of each column) Output = Behavioral_Data; % Normalize each column (keeping 'non-existing values') O2 = Output - repmat(nanmean(Output,1),n_subjects,1); O2 = O2 ./ repmat(nanstd(O2,1),n_subjects,1); %% 13. Conversion into a subset of components of interest % For each separate type of information provided in the HCP behavioral % assessment, we will derive solely one indicative feature through % probabilistic PCA. By this mean, we will also estimate missing entries % for most scores (all except the ones left alone to derive a feature; % i.e., not combined) % height, weight, BMI [tmp_PC,tmp_CM] = Assess_PC(O2,[1:3],SS,[]); PCA(:,1:2) = tmp_PC(:,1:2); PCA_title{1} = 'BMI (bad)'; PCA_title{2} = 'BMI (good)'; % Hematocrit [tmp_PC,tmp_CM] = Assess_PC(O2,[4,5],SS,[]); PCA(:,3) = tmp_PC(:,1); PCA_title{3} = 'Hematocrit'; % Blood pressure [tmp_PC,tmp_CM] = Assess_PC(O2,[6,7],SS,[]); PCA(:,4) = tmp_PC(:,1); PCA_title{4} = 'Blood pressure'; % Thyroid hormone PCA(:,5) = O2(:,8); PCA_title{5} = 'Thyroid hormone'; % HbA1C PCA(:,6) = O2(:,9); PCA_title{6} = 'HbA1C'; % MMSE PCA(:,7) = O2(:,10); PCA_title{7} = 'MMSE'; % Sleep [tmp_PC,tmp_CM] = Assess_PC(O2,[11:17],SS,[]); PCA(:,8) = tmp_PC(:,1); PCA_title{8} = 'Sleep'; % PicSeq [tmp_PC,tmp_CM] = Assess_PC(O2,[18,19],SS,[]); PCA(:,9) = tmp_PC(:,1); PCA_title{9} = 'PicSeq'; % CardSort [tmp_PC,tmp_CM] = Assess_PC(O2,[20,21],SS,[]); PCA(:,10) = tmp_PC(:,1); PCA_title{10} = 'CardSort'; % Flanker [tmp_PC,tmp_CM] = Assess_PC(O2,[22,23],SS,[]); PCA(:,11) = tmp_PC(:,1); PCA_title{11} = 'Flanker'; % PMAT [tmp_PC,tmp_CM] = Assess_PC(O2,[24:26],SS,[]); PCA(:,12) = tmp_PC(:,1); PCA_title{12} = 'PMAT'; % ReadEng [tmp_PC,tmp_CM] = Assess_PC(O2,[27,28],SS,[]); PCA(:,13) = tmp_PC(:,1); PCA_title{13} = 'ReadEng'; % PicVocab [tmp_PC,tmp_CM] = Assess_PC(O2,[29,30],SS,[]); PCA(:,14) = tmp_PC(:,1); PCA_title{14} = 'PicVocab'; % PicVocab [tmp_PC,tmp_CM] = Assess_PC(O2,[31,32],SS,[]); PCA(:,15) = tmp_PC(:,1); PCA_title{15} = 'ProcSpeed'; % DDISC [tmp_PC,tmp_CM] = Assess_PC(O2,[33,34],SS,[]); PCA(:,16) = tmp_PC(:,1); PCA_title{16} = 'DDISC'; % VSPLOT [tmp_PC,tmp_CM] = Assess_PC(O2,[35:37],SS,[]); PCA(:,17:18) = tmp_PC(:,1:2); PCA_title{17} = 'VSPLOT'; PCA_title{18} = 'VSPLOT (RT)'; % SCPT % WARNING: the first two components are sometimes switched (due to the % randomness inherent to pPCA); check tmp_CM (correlation between % individual scores and the PC) [tmp_PC,tmp_CM] = Assess_PC(O2,[38:45],SS,4); PCA(:,19:21) = tmp_PC(:,1:3); PCA_title{19} = 'SCPT (Performance)'; PCA_title{20} = 'SCPT (Over-positive)'; PCA_title{21} = 'SCPT (RT/Prudent)'; % IWRD [tmp_PC,tmp_CM] = Assess_PC(O2,[46,47],SS,[]); PCA(:,22) = tmp_PC(:,1); PCA_title{22} = 'IWRD'; % ListSort [tmp_PC,tmp_CM] = Assess_PC(O2,[48,49],SS,[]); PCA(:,23) = tmp_PC(:,1); PCA_title{23} = 'ListSort'; % Emotion (global) [tmp_PC,tmp_CM] = Assess_PC(O2,[50,51],SS,[]); PCA(:,24) = tmp_PC(:,1); PCA_title{24} = 'ER40 (Correct - RT)'; % Emotion [tmp_PC,tmp_CM] = Assess_PC(O2,[52:56],SS,[]); PCA(:,25) = tmp_PC(:,1); PCA_title{25} = 'ER40 (Response to sad)'; % Negative emotions [tmp_PC,tmp_CM] = Assess_PC(O2,[57:62],SS,[]); PCA(:,26) = tmp_PC(:,1); PCA_title{26} = 'Negative emotions'; % Positive emotion [tmp_PC,tmp_CM] = Assess_PC(O2,[63:65],SS,[]); PCA(:,27) = tmp_PC(:,1); PCA_title{27} = 'Positive emotions'; % Feeling of being supported [tmp_PC,tmp_CM] = Assess_PC(O2,[66:71],SS,[]); PCA(:,28) = tmp_PC(:,1); PCA_title{28} = 'Support (Negative)'; % Stress/efficacy [tmp_PC,tmp_CM] = Assess_PC(O2,[72,73],SS,[]); PCA(:,29) = tmp_PC(:,1); PCA_title{29} = 'Self efficacy - Stress'; % Emotion task [tmp_PC,tmp_CM] = Assess_PC(O2,[74:79],SS,[]); PCA(:,30:31) = tmp_PC(:,1:2); PCA_title{30} = 'Emotion task (RT)'; PCA_title{31} = 'Emotion task (Acc)'; % Gambling [tmp_PC,tmp_CM] = Assess_PC(O2,[80:87],SS,6); PCA(:,32:34) = tmp_PC(:,1:3); PCA_title{32} = 'Gambling (Conservative)'; PCA_title{33} = 'Gambling (Daring)'; PCA_title{34} = 'Gambling (Bad play)'; % Language [tmp_PC,tmp_CM] = Assess_PC(O2,[88:93],SS,[]); PCA(:,35) = tmp_PC(:,1); PCA_title{35} = 'Language (overall perf.)'; % Relational [tmp_PC,tmp_CM] = Assess_PC(O2,[94:97],SS,[]); PCA(:,36:37) = tmp_PC(:,1:2); PCA_title{36} = 'Relational (RT)'; PCA_title{37} = 'Relational (Acc)'; % ToM [tmp_PC,tmp_CM] = Assess_PC(O2,[98:103],SS,[]); PCA(:,38) = tmp_PC(:,1); PCA_title{38} = 'ToM'; % WM [tmp_PC,tmp_CM] = Assess_PC(O2,[104:107],SS,[]); PCA(:,39) = tmp_PC(:,1); PCA_title{39} = 'WM (Bad)'; % Endurance [tmp_PC,tmp_CM] = Assess_PC(O2,[108,109],SS,[]); PCA(:,40) = tmp_PC(:,1); PCA_title{40} = 'Endurance'; % Gait speed PCA(:,41) = O2(:,110); PCA_title{41} = 'Gait speed'; % Dexterity [tmp_PC,tmp_CM] = Assess_PC(O2,[111,112],SS,[]); PCA(:,42) = tmp_PC(:,1); PCA_title{42} = 'Dexterity'; % Strength [tmp_PC,tmp_CM] = Assess_PC(O2,[113,114],SS,[]); PCA(:,43) = tmp_PC(:,1); PCA_title{43} = 'Strength'; % NEOFAC [tmp_PC,tmp_CM] = Assess_PC(O2,[115:119],SS,[]); PCA(:,44:45) = tmp_PC(:,1:2); PCA_title{44} = 'NEOFAC (Introverted)'; PCA_title{45} = 'NEOFAC (Daring)'; % Noise PCA(:,46) = O2(:,120); PCA_title{46} = 'Noise'; % Odor [tmp_PC,tmp_CM] = Assess_PC(O2,[121,122],SS,[]); PCA(:,47) = tmp_PC(:,1); PCA_title{47} = 'Odor'; % Pain PCA(:,48) = O2(:,123); PCA_title{48} = 'Pain'; % Taste [tmp_PC,tmp_CM] = Assess_PC(O2,[124,125],SS,[]); PCA(:,49) = tmp_PC(:,1); PCA_title{49} = 'Taste'; % Mars [tmp_PC,tmp_CM] = Assess_PC(O2,[126:128],SS,[]); PCA(:,50) = tmp_PC(:,1); PCA_title{50} = 'Mars'; % Anxiety [tmp_PC,tmp_CM] = Assess_PC(O2,[129,130,131,132],SS,[]); PCA(:,51) = tmp_PC(:,1); PCA_title{51} = 'Anxiety'; % Withdrawal [tmp_PC,tmp_CM] = Assess_PC(O2,[133,134],SS,[]); PCA(:,52) = tmp_PC(:,1); PCA_title{52} = 'Withdrawal'; % Somatic [tmp_PC,tmp_CM] = Assess_PC(O2,[135,136,137,138],SS,[]); PCA(:,53) = tmp_PC(:,1); PCA_title{53} = 'Somatic problems'; % Thought problems [tmp_PC,tmp_CM] = Assess_PC(O2,[139,140],SS,[]); PCA(:,54) = tmp_PC(:,1); PCA_title{54} = 'Thought problems'; % Attention problems [tmp_PC,tmp_CM] = Assess_PC(O2,[141,142],SS,[]); PCA(:,55) = tmp_PC(:,1); PCA_title{55} = 'Attention problems'; % Aggressiveness [tmp_PC,tmp_CM] = Assess_PC(O2,[143,144],SS,[]); PCA(:,56) = tmp_PC(:,1); PCA_title{56} = 'Aggressiveness'; % Rule breaking [tmp_PC,tmp_CM] = Assess_PC(O2,[145,146],SS,[]); PCA(:,57) = tmp_PC(:,1); PCA_title{57} = 'Rule breaking'; % Intrusive [tmp_PC,tmp_CM] = Assess_PC(O2,[147,148],SS,[]); PCA(:,58) = tmp_PC(:,1); PCA_title{58} = 'Intrusive'; % Internalizing [tmp_PC,tmp_CM] = Assess_PC(O2,[149,150],SS,[]); PCA(:,59) = tmp_PC(:,1); PCA_title{59} = 'Internalizing'; % Externalizing [tmp_PC,tmp_CM] = Assess_PC(O2,[151,152],SS,[]); PCA(:,60) = tmp_PC(:,1); PCA_title{60} = 'Externalizing'; % Depression [tmp_PC,tmp_CM] = Assess_PC(O2,[153,154,155,156],SS,[]); PCA(:,61) = tmp_PC(:,1); PCA_title{61} = 'Depression'; % Avoidance [tmp_PC,tmp_CM] = Assess_PC(O2,[157,158],SS,[]); PCA(:,62) = tmp_PC(:,1); PCA_title{62} = 'Avoidance'; % ADHD [tmp_PC,tmp_CM] = Assess_PC(O2,[159,160],SS,[]); PCA(:,63) = tmp_PC(:,1); PCA_title{63} = 'ADHD'; % Inattention PCA(:,64) = O2(:,161); PCA_title{64} = 'Inattention'; % Hyperresponsiveness PCA(:,65) = O2(:,162); PCA_title{65} = 'Hyperresponsiveness'; % Antisocial [tmp_PC,tmp_CM] = Assess_PC(O2,[163,164],SS,[]); PCA(:,66) = tmp_PC(:,1); PCA_title{66} = 'Antisocial'; % Alcohol [tmp_PC,tmp_CM] = Assess_PC(O2,[165:171],SS,[]); PCA(:,67) = tmp_PC(:,1); PCA_title{67} = 'Alcohol'; % Tobacco [tmp_PC,tmp_CM] = Assess_PC(O2,[172:175],SS,[]); PCA(:,68) = tmp_PC(:,1); PCA_title{68} = 'Tobacco'; % Drugs [tmp_PC,tmp_CM] = Assess_PC(O2,[176:182],SS,[]); PCA(:,69) = tmp_PC(:,1); PCA_title{69} = 'Drugs'; % At this stage, we have reduced the dimensionality of each of our behavioral % variable type of interest %% 14. Further processing of the obtained data % Some individual variables have been kept without undergoing PPCA; in such % a case, we want to keep only the ones with few NaN entries %%%% STEP 1 % We want to remove the columns that are no good (too many Nans) T_nan = 5; % Scores to remove idx_toremove = []; for SM = 1:size(PCA,2) tmp = PCA(:,SM); if sum(isnan(tmp)) > T_nan idx_toremove = [idx_toremove,SM]; end end PCA(:,idx_toremove) = []; PCA_title(idx_toremove) = []; %%%% STEP 2 % Remove AM if max(y) > 100*mean(y) with (y) = (x - med(x))^2 idx_toremove = []; for SM = 1:size(PCA,2) tmp = PCA(:,SM); y = (tmp-median(tmp)).^2; if max(y) > 100 * mean(y) idx_toremove = [idx_toremove,SM]; end end PCA(:,idx_toremove) = []; PCA_title(idx_toremove) = []; %%%% STEP 3 % Remove if more than 50% of subjects show the same value idx_toremove = []; P_samevalsubj = 50; for SM = 1:size(PCA,2) tmp = PCA(:,SM); [~,tmp_n] = mode(tmp); if tmp_n >= floor(P_samevalsubj*n_subjects/100) idx_toremove = [idx_toremove,SM]; end end PCA(:,idx_toremove) = []; PCA_title(idx_toremove) = []; %%%% STEP 4 % Rank-based inverse Gaussian transformation for SM = 1:size(PCA,2) rank = tiedrank(PCA(:,SM)); p = rank / ( length(rank) + 1 ); %# +1 to avoid Inf for the max point PCA(:,SM) = norminv( p, 0, 1 ); end % Number of missing values P_missing = sum(sum(isnan(PCA)))/size(PCA,1)/size(PCA,2)*100; %% 15. Univariate assessment of the derived anthropomorphic and % behavioral features link to motion (whole dataset and separate assessments) % We compute the Fisher score (and related null distribution when shuffling % behvior and motion entries with respect to each other) for each feature % of interest [F,Fnull] = MOT_LinkBehavMotion(PCA,IDX_SES2); % Bonferroni correction for the number of univariate assessments and % subsequent derivation of significance thresholds alpha = 5/size(PCA,2); Fthresh = squeeze(prctile(Fnull,100-alpha,2)); % Another attempt using the proposal by Holmes and Nichols (Human Brain % Mapping, 2001). We compute a distribution based on the max of each score % null distribution and then, threshold that at 5% tmp_F = max(Fnull,[],2); F_Holmes = prctile(tmp_F,100-alpha); % Yet another attempt using FDR correction for i = 1:size(PCA,2) tmp = Fnull(i,:); tmpF = F(i); pval(i) = sum(tmp > tmpF)/length(tmp); end % FDR correction [h, crit_p]=fdr_bh(pval,0.05); % Determines the thresold level matching crit_p for i = 1:size(PCA,2) tmp = Fnull(i,:); F_FDR(i) = prctile(tmp,100 - 100*crit_p); end % Plotting of univariate results figure; bar(F); hold on; for i = 1:size(PCA,2) plot([i-0.4,i+0.4],[Fthresh(i),Fthresh(i)],'color',[0.3 0.3 0.3],'LineWidth',1.5); plot([i-0.4,i+0.4],[F_FDR(i),F_FDR(i)],'color',[0.6 0.6 0.6],'LineWidth',1.5); end %% % Indices of the significant variables idx_uni = [1,2,3,5,7,10,11,13,14,29,33,34]; % For each, post-hoc comparison across groups for i = 1:length(idx_uni) tmp = PCA(:,idx_uni(i)); [p_12(i),~,STATS_12{i}] = ranksum(tmp(IDX_SES1 == 1),tmp(IDX_SES1 == 2)); [p_13(i),~,STATS_13{i}] = ranksum(tmp(IDX_SES1 == 1),tmp(IDX_SES1 == 3)); [p_14(i),~,STATS_14{i}] = ranksum(tmp(IDX_SES1 == 1),tmp(IDX_SES1 == 4)); [p_23(i),~,STATS_23{i}] = ranksum(tmp(IDX_SES1 == 2),tmp(IDX_SES1 == 3)); [p_24(i),~,STATS_24{i}] = ranksum(tmp(IDX_SES1 == 2),tmp(IDX_SES1 == 4)); [p_34(i),~,STATS_34{i}] = ranksum(tmp(IDX_SES1 == 3),tmp(IDX_SES1 == 4)); Delta_12(i) = nanmean(tmp(IDX_SES1 == 1)) - nanmean(tmp(IDX_SES1 == 2)); Delta_13(i) = nanmean(tmp(IDX_SES1 == 1)) - nanmean(tmp(IDX_SES1 == 3)); Delta_14(i) = nanmean(tmp(IDX_SES1 == 1)) - nanmean(tmp(IDX_SES1 == 4)); Delta_23(i) = nanmean(tmp(IDX_SES1 == 2)) - nanmean(tmp(IDX_SES1 == 3)); Delta_24(i) = nanmean(tmp(IDX_SES1 == 2)) - nanmean(tmp(IDX_SES1 == 4)); Delta_34(i) = nanmean(tmp(IDX_SES1 == 3)) - nanmean(tmp(IDX_SES1 == 4)); % tmpf = figure; % set(gcf,'color','w'); % imagesc([0,Delta_12(i),Delta_13(i),Delta_14(i);... % 0,0,Delta_23(i),Delta_24(i);... % 0,0,0,Delta_34(i);... % 0,0,0,0]); % set(gca,'Box','off'); % CM = cbrewer('div','RdBu',1000); % colormap(flipud(CM)); % tmp_scale = max(abs([Delta_12(i),Delta_13(i),Delta_14(i),Delta_23(i),... % Delta_24(i),Delta_34(i)])); % % caxis([-1,1]); % axis off % axis square % colorbar % print(tmpf,'-depsc',['UNIV_',num2str(i)]); % close(tmpf); end %% 16. Combination of both sides with PLS % If I keep both session types together % Because we still had a small amount of NaN entries left, concerned % subjects are removed prior to PLS MOTION_VAR_trim = [MOTION_VAR(1:n_subjects,:),MOTION_VAR(n_subjects+1:end,:)]; MOTION_VAR_trim(isnan(sum(PCA,2)),:) = []; PCA_trim = PCA; PCA_trim(isnan(sum(PCA,2)),:) = []; [U,V,EV,sprob,BS_U,BS_V,Ub_vect,Vb_vect] = RunPLS(MOTION_VAR_trim,PCA_trim); % MOTION_VAR_trim_VAL = [MOTION_VAR_VAL(1:n_subjects,:)]; % MOTION_VAR_trim_VAL(isnan(sum(PCA,2)),:) = []; % % [U_VAL,V_VAL,EV_VAL,sprob_VAL,BS_U_VAL,BS_V_VAL,Ub_vect_VAL,Vb_vect_VAL] = RunPLS(MOTION_VAR_trim_VAL,PCA_trim); % % %% Preparing data for EN % % Motion_features = [X(1:n_subjects),Y(1:n_subjects),Z(1:n_subjects),... % X(n_subjects+1:end),Y(n_subjects+1:end),Z(n_subjects+1:end)]; % %% 17. PLS-related plots % Here, we plot the results of the PLS analysis % How many components do we want to plot ? n_comps = 4; % Latent scores for behaviors for the subjects (how strongly a component is % expressed in a subject) Lm = MOTION_VAR_trim*U; Lm = Lm(:,1:n_comps); % Latent scores for motion Lb = PCA_trim*V; Lb = Lb(:,1:n_comps); idx_togetout = find(isnan(sum(PCA,2))); % Trims the patches data to exclude subjects for whom NaN behavioral scores remain F1b = F1; F2b = F2; F3b = F3; F4b = F4; F5b = F5; F6b = F6; F1b(idx_togetout) = []; F2b(idx_togetout) = []; F3b(idx_togetout) = []; F4b(idx_togetout) = []; F5b(idx_togetout) = []; F6b(idx_togetout) = []; for j = 1:n_comps % Computing the time and space indices for my 36 values for t = 1:n_TB Summary_time_PLS(j,t) = mean(U(Time_Labels(t,:),j)); end for p = 1:6 Summary_position_PLS(j,p) = mean(U(Space_Labels(p,:),j)); end % Same for my bootstrapping folds for k = 1:1000 for t = 1:n_TB Summary_time_boot(j,t,k) = mean(squeeze(Ub_vect(k,Time_Labels(t,:),j))); end for p = 1:6 Summary_position_boot(j,p,k) = mean(squeeze(Ub_vect(k,Space_Labels(p,:),j))); end end % Computes standard deviation V_STD = squeeze(std(Vb_vect,[],1)); U_STDt = squeeze(std(Summary_time_boot,[],3)); U_STDs = squeeze(std(Summary_position_boot,[],3)); % Computes z-scores U_zt = Summary_time_PLS./U_STDt; U_zs = Summary_position_PLS./U_STDs; V_z = V./V_STD; if is_plot figure; set(gcf,'color','w'); bar(1:size(V,1),V_z(:,j)); set(gca,'Box','off'); figure; set(gcf,'color','w'); subplot(2,1,1); bar(1:6,U_zt(j,:)); set(gca,'Box','off'); subplot(2,1,2); bar(1:6,U_zs(j,:)); set(gca,'Box','off'); tmpf = figure; hold on; for i = 1:length(F1b) patch(F1b{i}(:,1),F1b{i}(:,2),F1b{i}(:,3),Lm(i,j)); patch(F2b{i}(:,1),F2b{i}(:,2),F2b{i}(:,3),Lm(i,j)); patch(F3b{i}(:,1),F3b{i}(:,2),F3b{i}(:,3),Lm(i,j)); patch(F4b{i}(:,1),F4b{i}(:,2),F4b{i}(:,3),Lm(i,j)); patch(F5b{i}(:,1),F5b{i}(:,2),F5b{i}(:,3),Lm(i,j)); patch(F6b{i}(:,1),F6b{i}(:,2),F6b{i}(:,3),Lm(i,j)); end CM = cbrewer('div','RdBu',100); colormap(flipud(CM)); tmp_scale = max(abs(Lm(:,j))); caxis([-22,22]); grid on; xlabel('X'); ylabel('Y'); zlabel('Z'); colorbar set(gcf,'color','w'); set(gca,'Box','off'); print(tmpf,'-dtiff',['FANCY_',num2str(j)]); close(tmpf); end end for j = 1:n_comps % Computing the time and space indices for my 36 values for t = 1:n_TB Summary_time_PLS(j,t) = mean(U(36+Time_Labels(t,:),j)); end for p = 1:6 Summary_position_PLS(j,p) = mean(U(36+Space_Labels(p,:),j)); end % Same for my bootstrapping folds for k = 1:1000 for t = 1:n_TB Summary_time_boot(j,t,k) = mean(squeeze(Ub_vect(k,36+Time_Labels(t,:),j))); end for p = 1:6 Summary_position_boot(j,p,k) = mean(squeeze(Ub_vect(k,36+Space_Labels(p,:),j))); end end % Computes standard deviation V_STD = squeeze(std(Vb_vect,[],1)); U_STDt = squeeze(std(Summary_time_boot,[],3)); U_STDs = squeeze(std(Summary_position_boot,[],3)); % Computes z-scores U_zt = Summary_time_PLS./U_STDt; U_zs = Summary_position_PLS./U_STDs; V_z = V./V_STD; if is_plot figure; set(gcf,'color','w'); bar(1:size(V,1),V_z(:,j)); set(gca,'Box','off'); figure; set(gcf,'color','w'); subplot(2,1,1); bar(1:6,U_zt(j,:)); set(gca,'Box','off'); subplot(2,1,2); bar(1:6,U_zs(j,:)); set(gca,'Box','off'); end end %% 18. Understanding how components relate to each other % Latent variable relationships for i = 1:n_comps figure; set(gcf,'color','w'); scatter(Lm(:,i),Lb(:,i),'k.'); set(gca,'Box','off'); end % Correlation between the different components CorrComp = corr(Lm,Lb); figure; set(gcf,'color','w'); imagesc(CorrComp); CM = cbrewer('div','RdBu',1000); colormap(flipud(CM)); caxis([-0.5 0.5]); % Correlation between subject-level expression of the components MotComp = corr(Lm); BehComp = corr(Lb); figure; set(gcf,'color','w'); imagesc(MotComp); CM = cbrewer('div','RdBu',1000); colormap(flipud(CM)); caxis([-0.5 0.5]); figure; set(gcf,'color','w'); imagesc(BehComp); CM = cbrewer('div','RdBu',1000); colormap(flipud(CM)); caxis([-0.5 0.5]); %% 19. Relating age, gender and motion (quantified as the traditional FD) % to the components % Creates the gender and age vectors that only conserve subjects without % NaNs Age2 = Age; Age2(isnan(sum(PCA,2)),:) = []; Gender2 = Gender; Gender2(isnan(sum(PCA,2)),:) = []; Handedness2 = Handedness; Handedness2(isnan(sum(PCA,2)),:) = []; % For age for i = 1:n_comps figure; set(gcf,'color','w'); scatter(Age2,Lm(:,i),'k.'); set(gca,'Box','off'); figure; set(gcf,'color','w'); scatter(Age2,Lb(:,i),'k.'); set(gca,'Box','off'); [R_age(i,1), p_age(i,1)] = AssessAgeEffect(Age2,Lm(:,i)); [R_age(i,2), p_age(i,2)] = AssessAgeEffect(Age2,Lb(:,i)); [R_age_abs(i,1), p_age_abs(i,1)] = AssessAgeEffect(Age2,abs(Lm(:,i))); [R_age_abs(i,2), p_age_abs(i,2)] = AssessAgeEffect(Age2,abs(Lb(:,i))); end % For handedness for i = 1:n_comps figure; set(gcf,'color','w'); scatter(Handedness2,Lm(:,i),'k.'); set(gca,'Box','off'); figure; set(gcf,'color','w'); scatter(Handedness2,Lb(:,i),'k.'); set(gca,'Box','off'); [R_HD(i,1), p_HD(i,1)] = AssessAgeEffect(Handedness2,Lm(:,i)); [R_HD(i,2), p_HD(i,2)] = AssessAgeEffect(Handedness2,Lb(:,i)); [R_HD_abs(i,1), p_HD_abs(i,1)] = AssessAgeEffect(abs(Handedness2),Lm(:,i)); [R_HD_abs(i,2), p_HD_abs(i,2)] = AssessAgeEffect(abs(Handedness2),Lb(:,i)); end % For gender for i = 1:n_comps figure; tmp_scale = max(abs(Lm(:,i))); tmp_scale2 = max(abs(Lb(:,i))); subplot(1,2,1); boxplot(Lm(Gender2==0,i)); ylim([-tmp_scale,tmp_scale]); subplot(1,2,2); boxplot(Lm(Gender2==1,i)); ylim([-tmp_scale,tmp_scale]); figure; subplot(1,2,1); boxplot(Lb(Gender2==0,i)); ylim([-tmp_scale2,tmp_scale2]); subplot(1,2,2); boxplot(Lb(Gender2==1,i)); ylim([-tmp_scale2,tmp_scale2]); tmpf = figure; COL{1} = [0.5 0.5 0.5]; COL{2} = [0 0 0]; MakeViolin(Lb(Gender==0,i)',gca,{''},'',COL,1,1); ylim([-8 8]); print(tmpf,'-depsc',['VIOLIN_GENDER']); close(tmpf); [t_gender(i,1), p_gender(i,1)] = AssessGenderEffect(Gender2,Lm(:,i)); [t_gender(i,2), p_gender(i,2)] = AssessGenderEffect(Gender2,Lb(:,i)); [t_gender_abs(i,1), p_gender_abs(i,1)] = AssessGenderEffect(Gender2,abs(Lm(:,i))); [t_gender_abs(i,2), p_gender_abs(i,2)] = AssessGenderEffect(Gender2,abs(Lb(:,i))); end % For Framewise displacement FD_Data2 = FD_Data; FD_Data2(:,isnan(sum(PCA,2)),:) = []; TemporalMaskAll2 = TemporalMask_All; TemporalMaskAll2(:,isnan(sum(PCA,2)),:) = []; % FD is size 1199 x 224; I want to compute the mean FD across non scrubbed % frames for s = 1:936 for ses = 1:4 FD_s(s,ses) = mean(FD_Data2(~TemporalMaskAll2(:,s,ses),s,ses)); FD_ns(s,ses) = mean(FD_Data2(:,s,ses)); end end for i = 1:n_comps for ses = 1:4 figure; set(gcf,'color','w'); scatter(FD_s(:,ses),Lm(:,i),'k.'); set(gca,'Box','off'); figure; set(gcf,'color','w'); scatter(FD_s(:,ses),Lb(:,i),'k.'); set(gca,'Box','off'); [R_FDs(i,ses,1), p_FDs(i,ses,1)] = AssessAgeEffect(FD_s(:,ses),Lm(:,i)); [R_FDs(i,ses,2), p_FDs(i,ses,2)] = AssessAgeEffect(FD_s(:,ses),Lb(:,i)); [R_FDs_abs(i,ses,1), p_FDs_abs(i,ses,1)] = AssessAgeEffect(FD_s(:,ses),abs(Lm(:,i))); [R_FDs_abs(i,ses,2), p_FDs_abs(i,ses,2)] = AssessAgeEffect(FD_s(:,ses),abs(Lb(:,i))); end end diff --git a/Matlab_Scripts/Script_Motion_Revision_Validation.m b/Matlab_Scripts/Script_Validation.m similarity index 100% rename from Matlab_Scripts/Script_Motion_Revision_Validation.m rename to Matlab_Scripts/Script_Validation.m diff --git a/Matlab_Scripts/jUpperTriMatToVec.m b/Matlab_Scripts/jUpperTriMatToVec.m deleted file mode 100644 index 693b942..0000000 --- a/Matlab_Scripts/jUpperTriMatToVec.m +++ /dev/null @@ -1,32 +0,0 @@ -function v=jUpperTriMatToVec(m,varargin) -% converts the upper-triangular part of a matrix to a vector -% -% IN: -% m: matrix -% offset: offset above leading diagonal, fed to triu function -% OUT: -% v: vector of upper-triangular values -% -% v1.0 Oct 2009 Jonas Richiardi -% - initial release -% v1.1 Feb 2015 Dimitri Van De Ville -% - faster version by single index - -switch nargin - case 1 - offset=1; - case 2 - offset=varargin{1}; -end - -% get indices of upper triangular part (Peter Acklam's trick) -%[m_i m_j] = find(triu(ones(size(m)), offset)); -idx = find(triu(ones(size(m)), offset)); - -v=m(idx); - -% copy to vector -%v=zeros(numel(m_i),1); -%for v_idx=1:numel(m_i) -% v(v_idx)=m(m_i(v_idx),m_j(v_idx)); -%end \ No newline at end of file diff --git a/Matlab_Scripts/munkres.m b/Matlab_Scripts/munkres.m deleted file mode 100644 index 58a30c8..0000000 --- a/Matlab_Scripts/munkres.m +++ /dev/null @@ -1,200 +0,0 @@ -function [assignment,cost] = munkres(costMat) -% MUNKRES Munkres (Hungarian) Algorithm for Linear Assignment Problem. -% -% [ASSIGN,COST] = munkres(COSTMAT) returns the optimal column indices, -% ASSIGN assigned to each row and the minimum COST based on the assignment -% problem represented by the COSTMAT, where the (i,j)th element represents the cost to assign the jth -% job to the ith worker. -% -% Partial assignment: This code can identify a partial assignment is a full -% assignment is not feasible. For a partial assignment, there are some -% zero elements in the returning assignment vector, which indicate -% un-assigned tasks. The cost returned only contains the cost of partially -% assigned tasks. - -% This is vectorized implementation of the algorithm. It is the fastest -% among all Matlab implementations of the algorithm. - -% Examples -% Example 1: a 5 x 5 example -%{ -[assignment,cost] = munkres(magic(5)); -disp(assignment); % 3 2 1 5 4 -disp(cost); %15 -%} -% Example 2: 400 x 400 random data -%{ -n=400; -A=rand(n); -tic -[a,b]=munkres(A); -toc % about 2 seconds -%} -% Example 3: rectangular assignment with inf costs -%{ -A=rand(10,7); -A(A>0.7)=Inf; -[a,b]=munkres(A); -%} -% Example 4: an example of partial assignment -%{ -A = [1 3 Inf; Inf Inf 5; Inf Inf 0.5]; -[a,b]=munkres(A) -%} -% a = [1 0 3] -% b = 1.5 -% Reference: -% "Munkres' Assignment Algorithm, Modified for Rectangular Matrices", -% http://csclab.murraystate.edu/bob.pilgrim/445/munkres.html - -% version 2.3 by Yi Cao at Cranfield University on 11th September 2011 - -assignment = zeros(1,size(costMat,1)); -cost = 0; - -validMat = costMat == costMat & costMat < Inf; -bigM = 10^(ceil(log10(sum(costMat(validMat))))+1); -costMat(~validMat) = bigM; - -% costMat(costMat~=costMat)=Inf; -% validMat = costMat0) - break - end - coverColumn = false(1,n); - coverColumn(starZ(starZ>0))=true; - coverRow = false(n,1); - primeZ = zeros(n,1); - [rIdx, cIdx] = find(dMat(~coverRow,~coverColumn)==bsxfun(@plus,minR(~coverRow),minC(~coverColumn))); - while 1 - %************************************************************************** - % STEP 4: Find a noncovered zero and prime it. If there is no starred - % zero in the row containing this primed zero, Go to Step 5. - % Otherwise, cover this row and uncover the column containing - % the starred zero. Continue in this manner until there are no - % uncovered zeros left. Save the smallest uncovered value and - % Go to Step 6. - %************************************************************************** - cR = find(~coverRow); - cC = find(~coverColumn); - rIdx = cR(rIdx); - cIdx = cC(cIdx); - Step = 6; - while ~isempty(cIdx) - uZr = rIdx(1); - uZc = cIdx(1); - primeZ(uZr) = uZc; - stz = starZ(uZr); - if ~stz - Step = 5; - break; - end - coverRow(uZr) = true; - coverColumn(stz) = false; - z = rIdx==uZr; - rIdx(z) = []; - cIdx(z) = []; - cR = find(~coverRow); - z = dMat(~coverRow,stz) == minR(~coverRow) + minC(stz); - rIdx = [rIdx(:);cR(z)]; - cIdx = [cIdx(:);stz(ones(sum(z),1))]; - end - if Step == 6 - % ************************************************************************* - % STEP 6: Add the minimum uncovered value to every element of each covered - % row, and subtract it from every element of each uncovered column. - % Return to Step 4 without altering any stars, primes, or covered lines. - %************************************************************************** - [minval,rIdx,cIdx]=outerplus(dMat(~coverRow,~coverColumn),minR(~coverRow),minC(~coverColumn)); - minC(~coverColumn) = minC(~coverColumn) + minval; - minR(coverRow) = minR(coverRow) - minval; - else - break - end - end - %************************************************************************** - % STEP 5: - % Construct a series of alternating primed and starred zeros as - % follows: - % Let Z0 represent the uncovered primed zero found in Step 4. - % Let Z1 denote the starred zero in the column of Z0 (if any). - % Let Z2 denote the primed zero in the row of Z1 (there will always - % be one). Continue until the series terminates at a primed zero - % that has no starred zero in its column. Unstar each starred - % zero of the series, star each primed zero of the series, erase - % all primes and uncover every line in the matrix. Return to Step 3. - %************************************************************************** - rowZ1 = find(starZ==uZc); - starZ(uZr)=uZc; - while rowZ1>0 - starZ(rowZ1)=0; - uZc = primeZ(rowZ1); - uZr = rowZ1; - rowZ1 = find(starZ==uZc); - starZ(uZr)=uZc; - end -end - -% Cost of assignment -rowIdx = find(validRow); -colIdx = find(validCol); -starZ = starZ(1:nRows); -vIdx = starZ <= nCols; -assignment(rowIdx(vIdx)) = colIdx(starZ(vIdx)); -pass = assignment(assignment>0); -pass(~diag(validMat(assignment>0,pass))) = 0; -assignment(assignment>0) = pass; -cost = trace(costMat(assignment>0,assignment(assignment>0))); - -function [minval,rIdx,cIdx]=outerplus(M,x,y) -ny=size(M,2); -minval=inf; -for c=1:ny - M(:,c)=M(:,c)-(x+y(c)); - minval = min(minval,min(M(:,c))); -end -[rIdx,cIdx]=find(M==minval); diff --git a/Data_of_Interest/HCPSubjectData.mat b/Results/Section_3.1/HCPSubjectData.mat similarity index 100% rename from Data_of_Interest/HCPSubjectData.mat rename to Results/Section_3.1/HCPSubjectData.mat