% PARSE_PROTEOMICS produces the plots necessary to analyze our proteomics data. % % Blanchoud Group, UNIFR % Simon Blanchoud % 18/05/2021 function parse_proteomics % Location of the reference files prot_file = '/home/blanchou/Documents/Projects/Techniques/MolecularBiology/Proteomics/proteinGroups_PP2101_normalized.csv'; % Gather the data of our manual measurements of water quality fid = fopen(prot_file, 'r'); if fid > 0 line = fgetl(fid); headers = strsplit(line, ','); ncol = length(headers)-1; data = NaN(5000, ncol); ids = cell(5000, 1); % Parse the CSV manually i = 1; line = fgetl(fid); while ischar(line) columns = strsplit(line, ',', 'COLLAPSEDELIMITERS', false); ids{i} = columns{1}; for j=1:ncol if (~isempty(columns{j+1})) data(i, j) = str2double(columns{j+1}); end end line = fgetl(fid); i = i + 1; end fclose(fid); data = data(1:i-1, :); ids = ids(1:i-1); end nprots = data(:,1); confs = data(:,2:4); conf = sum(confs, 2); conf(conf==0) = 1; valids = (nprots == 1) & all(confs > 1, 2) & any(confs == 3, 2); dprots = hist(nprots, [1:5]); dconfs = hist(confs, [0:3]); dvals = hist(confs(valids,:), [0:3]); figure; subplot(2,2,1); bar([1:5], dprots, 'facecolor', 'k'); xt = get(gca, 'xticklabel'); xt{end} = ['>' xt{end}]; set(gca, 'yscale', 'log', 'xticklabel', xt); title('Ambiguous peptide assignments') xlabel('Number of possible proteins per peptide'); ylabel('Peptide number'); subplot(2,2,2); bar([0:3], dconfs); legend(headers(3:5)) subplot(2,2,3); bar([0:3], dconfs); set(gca, 'yscale', 'log'); y = ylim(); title('Peptide detection over biological replica') xlabel('Number of detections per peptide'); ylabel('Peptide number'); subplot(2,2,4); bar([0:3], dvals); set(gca, 'yscale', 'log', 'ylim', y); title('Peptide detection of most robust peptides') xlabel('Number of detections per peptide'); ylabel('Peptide number'); disp('N. proteins') disp(dprots) disp('N. peptides') disp(dconfs) disp('N. robust') disp(dvals) keyboard data = data(:,5:end); figure; subplot(2,2,1); [up1,dn1,col] = volcano_plot(data(:,1), data(:,2), conf); axis([-12 12 0 25]) ylabel('-log_2 (p-value)') xlabel('-log_2 (fold changes)') title('Control - Takeover') subplot(2,2,2); [x,y]=meshgrid([1:3],[1:size(col,1)/3]); scatter(x,y,[],col, 'filled'); axis([-1 5 -1 11]) xlabel('Groups') ylabel('Confidence') title('colorcode legend') subplot(2,2,3); [up2,dn2] = volcano_plot(data(:,3), data(:,4), conf); axis([-12 12 0 25]) ylabel('-log_2 (p-value)') xlabel('-log_2 (fold changes)') title('Control - WBR 3 dpa') subplot(2,2,4); [up3,dn3] = volcano_plot(data(:,5), data(:,6), conf); axis([-12 12 0 25]) ylabel('-log_2 (p-value)') xlabel('-log_2 (fold changes)') title('Takeover - WBR 3 dpa') keyboard figure; subplot(2,2,1); [vup1,vdn1,col] = volcano_plot(data(:,1), data(:,2), conf, valids); axis([-12 12 0 25]) ylabel('-log_2 (p-value)') xlabel('-log_2 (fold changes)') title('Control - Takeover') subplot(2,2,2); [x,y]=meshgrid([1:3],[1:size(col,1)/3]); scatter(x,y,[],col, 'filled'); axis([-1 5 -1 11]) xlabel('Groups') ylabel('Confidence') title('colorcode legend') subplot(2,2,3); [vup2,vdn2] = volcano_plot(data(:,3), data(:,4), conf, valids); axis([-12 12 0 25]) ylabel('-log_2 (p-value)') xlabel('-log_2 (fold changes)') title('Control - WBR 3 dpa') subplot(2,2,4); [vup3,vdn3] = volcano_plot(data(:,5), data(:,6), conf, valids); axis([-12 12 0 25]) ylabel('-log_2 (p-value)') xlabel('-log_2 (fold changes)') title('Takeover - WBR 3 dpa') keyboard figure; vuu = vup2 & vup3; vud = vup2 & vdn3; vdu = vdn2 & vup3; vdd = vdn2 & vdn3; vun = vup2 & ~(vup3 | vdn3); vdn = vdn2 & ~(vup3 | vdn3); vnu = vup3 & ~(vup2 | vdn2); vnd = vdn3 & ~(vup2 | vdn2); vnn = valids & ~(vup2 | vdn2 | vup3 | vdn3); cpurp = brewermap(3,'Purples'); corag = brewermap(3,'Oranges'); subplot(2,2,1); hold on; scatter(0.5*(data(vnu,3)+data(vnu,5)),0.5*(data(vnu,4)+data(vnu,6)),[],corag(2,:), 'filled'); scatter(0.5*(data(vuu,3)+data(vuu,5)),0.5*(data(vuu,4)+data(vuu,6)),[],corag(3,:), 'filled'); scatter(0.5*(data(vdn,3)+data(vdn,5)),0.5*(data(vdn,4)+data(vdn,6)),[],cpurp(2,:), 'filled'); scatter(0.5*(data(vdd,3)+data(vdd,5)),0.5*(data(vdd,4)+data(vdd,6)),[],cpurp(3,:), 'filled'); axis([-12 12 0 25]) ylabel('-log_2 (p-value)') xlabel('-log_2 (fold changes)') title('Avg - WBR 3 dpa') subplot(2,2,2); scatter([2 2 1 1],[1 2 1 2],[],[corag(2:3,:); cpurp(2:3,:)], 'filled'); axis([0 3 0 3]) xlabel('Down - Up') ylabel('Not significant - significant') title('colorcode legend') subplot(2,2,3); hold on; scatter(data(vnu,6),data(vnu,4),[],corag(2,:), 'filled'); scatter(data(vuu,6),data(vuu,4),[],corag(3,:), 'filled'); scatter(data(vdn,6),data(vdn,4),[],cpurp(2,:), 'filled'); scatter(data(vdd,6),data(vdd,4),[],cpurp(3,:), 'filled'); axis([0 25 0 25]) xlabel('Takeover') ylabel('Control') title('p-value (-log_2)') subplot(2,2,4); hold on; scatter(data(vnu,5),data(vnu,3),[],corag(2,:), 'filled'); scatter(data(vuu,5),data(vuu,3),[],corag(3,:), 'filled'); scatter(data(vdn,5),data(vdn,3),[],cpurp(2,:), 'filled'); scatter(data(vdd,5),data(vdd,3),[],cpurp(3,:), 'filled'); axis([-12 12 -12 12]) xlabel('Takeover') ylabel('Control') title('Fold change (-log_2)') disp([sum(vuu) sum(vud) sum(vun); sum(vdu) sum(vdd) sum(vdn); sum(vnu) sum(vnd) sum(vnn)]) keyboard export_ids('./ups_ids.txt', ids(vuu | vnu), data(vuu | vnu, 3:6)); export_ids('./downs_ids.txt', ids(vdn | vdd), data(vdn | vdd, 3:6)); dlmwrite('./is_significant.csv', [vdd vdn vnu vuu]); keyboard return; end function export_ids(fname, ids, params) nparams = size(params, 2)/2; fid = fopen(fname, 'w'); for i=1:length(ids) fprintf(fid, '%s', ids{i}); for j=1:nparams fprintf(fid, ' (%f, %f)', params(i, 2*j-1), params(i, 2*j)); end fprintf(fid, '\n'); end fclose(fid); return; end function [ups, dwn, colors] = volcano_plot(fold, pval, conf, valids) if (nargin<4) valids = ones(size(conf)); end [indxs, junk, map] = unique(conf); nindx = length(indxs); cgray = brewermap(nindx+1,'Greys'); creds = brewermap(nindx+1,'Reds'); cblue = brewermap(nindx+1,'Blues'); colors = [cgray(2:end,:); creds(2:end,:); cblue(2:end,:)]; ups = (fold>1 & pval>-log2(0.05)) & valids; dwn = (fold<-1 & pval>-log2(0.05)) & valids; cindx = map + nindx*ups + 2*nindx*dwn; hold on; for i=1:3*nindx curr = (cindx == i) & valids; scatter(fold(curr), pval(curr), [], colors(i,:), 'filled'); end hold off; return; end