% PARSE_HUSBANDRY produces the plots necessary to analyze our husbandry setup. % % Blanchoud Group, UNIFR % Simon Blanchoud % 01/12/2020 function parse_husbandry(recompute) % Checkif we need to recompute everything if (nargin==0) recompute = false; else recompute = logical(recompute); end % Location of the reference files db_file = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/export_strains.csv'; water = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/Water_quality.csv'; counts = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/colonies_countings.csv'; microbiota = {'/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-1.zip', ... '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-2.zip', ... '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-3.zip', ... '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-4.zip'} ; feeding = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/Differential_feeding.csv'; water = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/Water_quality.csv'; apex_dir = '/home/blanchou/Code/Apex/Apex/'; tmpfiles = glob('/home/blanchou/Code/TmpData/*.csv'); strain_id = [131 143 144 145 177 178 191]; % Either recompute or gather the existing files if (recompute || length(tmpfiles) == 0) disp('Recomputing Apex data...') files = gather_apex_data(apex_dir); disp('done!') else files = tmpfiles; end % Parse the data from Apex and dispatch it on several panels hfig = figure;hold on; nsensors = length(files); sensor_label = cell(nsensors, 1); sensor_vals = NaN(nsensors, 2); plot_indx = [1 1 2 3 3 4 4 4 4 4 5]; % Read each file, extract its name and plot it accordingly for i=1:nsensors data = csvread(files{i}); [x, indxs] = sort(data(:,1)); [dir, sensor_label{i}, ext] = fileparts(files{i}); subplot(2,3, plot_indx(i)); hold on; plot(x, data(indxs,2)); sensor_vals(i,1) = mean(data(:,2)); sensor_vals(i,2) = std(data(:,2)); end % Gather the data of our manual measurements of water quality fid = fopen(water, 'r'); if fid > 0 line = fgetl(fid); headers = strsplit(line, ','); data = NaN(500, length(headers)); % Parse the CSV manually i = 1; line = fgetl(fid); while ischar(line) columns = strsplit(line, ',', 'COLLAPSEDELIMITERS', false); for j=1:length(columns) if (~isempty(columns{j})) val = columns{j}; if (j == 1) [curr_time, indx] = strptime(val, '%d/%m/%Y'); if (indx > length(val)) data(i, j) = mktime(curr_time); end else data(i, j) = str2double(val); end end end line = fgetl(fid); i = i + 1; end fclose(fid); data = data(1:i-1, :); end % Plot the various columns on top of the Apex data plot_indx2 = [6 6 6 1 3 5 6]; for i=2:size(data,2) subplot(2,3, plot_indx2(i-1)); hold on; plot(data(:,1), data(:,i)); end % Set the axis, legends and labels of all panels labels = [sensor_label; headers(2:end).']; plot_indxs = [plot_indx plot_indx2]; limx = xlim; limy = [0 300; 0 600; 0 50; 0 30; 7 9; 0 6]; % Set the limits and legends for i=1:6 subplot(2,3,i); xlim(limx); ylim(limy(i,:)); legend(labels(plot_indxs==i)); end % Format the axis into readable format t = get(gca, 'XTick'); tdate = cell(size(t)); for i=1:numel(t) tdate{i} = strftime('%m/%y', localtime(t(i))); end for i=1:6 hax = subplot(2,3,i); set(hax, 'XTickLabel', tdate); xlabel('date (mm/yy)'); end % Parse the strains CSV file strains = parse_strains_db(db_file); % Estimate the time range of our data today = mktime(localtime(time())); month = 60*60*24*365.25/12; [start, nindx] = min(strains(:,5)); nmonths = ceil((today - start) / month); if (nmonths>200) disp(strains(nindx,:)) error(['Some date issue with entry ' nindx ' !']); end nstrains = max(strains(:,1)); % Display all the strains of interest xrange = NaN; for s=1:length(strain_id) % Parse one single strain (strain_id) to show its lineage clones = strains(strains(:,1)==strain_id(s), :); % Infer death dates (today for living clones) missings = isnan(clones(:,end)); clones(missings, end) = clones(missings, end-1)+month; clones(missings & clones(:,4)>0, end) = today; % Plot the lifespan of each clone, and link it to its parent clone figure;hold on; for i=1:size(clones, 1) plot(clones(i,[end-1 end]), clones(i, [2 2]), 'r', 'LineWidth', 2); mom = find(abs(clones(1:i-1, 4))==clones(i,3), 1, 'last'); if (~isempty(mom)) plot([clones(i, end-1)-month/12 clones(i, end-1)], [clones(mom, 2) clones(i,2)], 'k'); % Here we manually fix a few links to unknown slides else if clones(i, 3) == 31 plot([clones(i, end-1)-month/12 clones(i, end-1)], [37 clones(i,2)], 'k'); elseif clones(i, 3) == 96 plot([clones(i, end-1)-month/12 clones(i, end-1)], [126 clones(i,2)], 'k'); elseif ~isnan(clones(i, 3)) disp(clones(i,[1 3])) end end end % Make sure all the lineages have the same temporal range if s>1 xlim(xrange); else xrange = xlim; end % Update the axes, labels and strain index xval = get(gca, 'XTick'); xdate = cell(size(xval)); for i=1:numel(xval) xdate{i} = strftime('%m/%y', localtime(xval(i))); end set(gca, 'XTickLabel', xdate); xlabel('date (mm/yy)'); title(strain_id(s)); end % Plot the population distribution per strain [strain_ids, mapping] = unique(strains(:,1)); npopulations = [mapping(1); diff(mapping)]; figure; hist(npopulations, [0:20]); axis([0 21 0 180]); xlabel('Number of clones per strain'); ylabel('Number of strains'); title('Strain propagation') % Bin the data to count the number of clones per strain over time populations = zeros(nmonths, nstrains); death = strains(:,6); death(strains(:,4) > 0) = today; % Count one "bin" for unknown death times lifespan = max(ceil((death - strains(:,5))/month), 1); ticks = (start-1)+[0:nmonths]*month; % Loop over each clone to add its lifespan over the count of its strain for i=1:size(strains, 1) id = strains(i, 1); new = strains(i, 5); % Add one count to the proper strain over the lifespan of the clone indx = find(new>ticks(1:end-1) & new<=ticks(2:end)); populations(indx+[0:lifespan(i)-1],id) = populations(indx+[0:lifespan(i)-1],id) + 1; end % Don't show ongoing periods populations = populations(1:end-1, :); ticks = ticks(1:end-2); % Display the statistics using stacked bars figure;bar(populations, 'stacked'); hold on; plot([0 nmonths], [128 128], 'k'); xlim([0 nmonths]); % Update the axes, labels and strain index pdate = cell(size(ticks)); for i=1:2:numel(ticks) pdate{i} = strftime('%m/%y', localtime(ticks(i))); end set(gca, 'XTick', [1:length(ticks)], 'XTickLabel', pdate); xlabel('date (mm/yy)'); ylabel('number of clones'); title('Husbandry population') hbar = colorbar(); t = get(hbar, 'YTick'); set(hbar, 'YTickLabel', cellstr(num2str(round(t.'*(nstrains-1))+1))) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Figures 5B, differential feeding %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Gather the data of our manual measurements of zooid numbers during differential feeding fid = fopen(feeding, 'r'); if fid > 0 % Discard the headers line = fgetl(fid); headers = strsplit(line, ','); zooids = NaN(500, length(headers)); feeds = ''; % Parse the CSV manually i = 1; line = fgetl(fid); while ischar(line) columns = strsplit(line, ',', 'COLLAPSEDELIMITERS', false); % Parse the splitted line for j=1:length(columns) if (~isempty(columns{j})) val = columns{j}; % First we have the feeding diet as a single character if (j == 1) indx = find(val == feeds); if isempty(indx) feeds = [feeds val]; indx = length(feeds); end zooids(i,j) = indx; % Third we have the date of the counting elseif (j == 3) [curr_time, indx] = strptime(val, '%d.%m.%y'); if (indx > length(val)) zooids(i, j) = mktime(curr_time); end % The other values are integer numbers else zooids(i, j) = str2double(val); end end end line = fgetl(fid); i = i + 1; end % Close the CSV file and discard the excess data fclose(fid); zooids = zooids(1:i-1, :); end % Check the data we have colonies = unique(zooids(:,2)); sampling = unique(zooids(:,3)); ncolonies = length(colonies); nsampling = length(sampling); % Preassign variables for the analysis conds = NaN(2, ncolonies); growth = NaN(nsampling-1, ncolonies); % Loop through each colony imaged during the experiment for i=1:ncolonies % Extract the ncounts and the feedings vals = zooids(zooids(:,2)==colonies(i), :); diets = unique(vals(:,1)); % Need to correct for the automatic sort done by unique ndiets = numel(diets); if (ndiets>1 && diets(1)~=vals(1,1)) diets = diets([2 1]); end % Compute the growth rates rates = vals(2:end,end) ./ vals(1:end-1, end); days = vals(2:end, 3); % Store the results growth(ismember(sampling(2:end), days), i) = rates; conds(1:ndiets, i) = diets; end % Date when feeding was changed change = mktime(strptime('09.10.20', '%d.%m.%y')); % Separate the feedings between the two conditions ndiets = numel(feeds); conds(2,:) = conds(2,:) + max(conds(1,:)); conds = conds((sampling(2:end)>change)+1, :); % Discard the transitions conds(~isfinite(growth)) = NaN; conds(sampling(1:end-1)==change, :) = NaN; % Assign variables for the analysis classes = unique(conds(isfinite(conds))); nclasses = numel(classes); stats = NaN(3, nclasses); pvals = NaN(nclasses, nclasses); % Check if the user has the t-test function has_ttest2 = (exist('ttest2')>0); if (~has_ttest2) warning('Computing the p-value using Student''s t-test requires the statistics package. Check the help of this function for further information.'); end % Loop through the various feeding conditions for i=1:nclasses % Isolate the growth rates p = growth(conds==classes(i)); % Compute the statistics stats(:, i) = [mean(p); std(p); numel(p)]; % And all the p-values if possible if (has_ttest2) for j=1:i [H, pvals(j,i)] = ttest2(p, growth(conds==classes(j))); end end end % Reorganize the order of the conditions indexes = [1 7 2 8 3 5 4 6]; % Display the results figure;hold on; bar(stats(1,indexes)); errorbar(stats(1,indexes),stats(2,indexes)./sqrt(stats(3,indexes)), '.'); % Update the labels and ranges xlim([0 9]); ylim([0 1.65]); set(gca, 'XTick', 1:8, 'XTickLabel', feeds(mod(indexes-1, 4)+1).'); xlabel('Feeding diet') ylabel('Growth rate') figure;hold on; for i=1:nclasses/2 subplot(nclasses/2,1,i);hold on cols = any(conds==i); data = (growth(:,cols)); mpts = nanmean(data, 2); spts = nanstd(data, 0, 2); plot(mpts) plot(mpts+spts, 'k'); plot(mpts-spts, 'k'); ylim([0 3.5]) end % Microbiome study countings = csvread(counts); countings(:,end+1) = 0; for i=1:length(microbiota) ROIs = ReadImageJROI(microbiota{i}); curr = (countings(:,1) == i); indxs = countings(curr, 2:end); indxs(:,end+1) = 0; for k=1:length(ROIs) if strncmp(ROIs{k}.strType, 'Point', 5) pts = ROIs{k}.vnSlices; s = max(pts); if isfinite(s) for l=1:s c = sum(pts==l); indxs(indxs(:,1) == l, end) = max(indxs(indxs(:,1) == l, end), c); end end end end countings(curr, end) = indxs(:, end); end [exps, junk, eix] = unique(countings(:,3)); [tpts, junk, tix] = unique(countings(:,4)); [slds, junk, six] = unique(countings(:,5)); nmeas = 6; nexps = length(exps); ntpts = length(tpts); vals = NaN(nmeas, ntpts, nexps); side = NaN(nmeas, nexps); for i=1:nexps slides = unique(slds(six(eix==i))); slides(slides==0) = []; for j=1:ntpts curr = (eix==i & tix==j); tmp = countings(curr, :); for k=1:length(slides) if j==1 sval = sum(tmp(tmp(:,5)==slides(k), 6:7), 1); side(k,i) = sval(1); else sval = tmp(tmp(:,5)==slides(k) & tmp(:,6)==side(k,i), 7); if (isempty(sval)) sval = sum(tmp(tmp(:,5)==slides(k), 7), 1); end end vals(k,j,i) = sval(end); end end end vals = bsxfun(@rdivide, vals, vals(:,1,:)); mval = squeeze(nanmean(vals)); sval = squeeze(nanstd(vals)); nnan = ~isnan(vals); Hsurv = NaN(ntpts, nexps); psurv = Hsurv; for j=1:ntpts for k=2:nexps [Hsurv(k,j),psurv(k,j)]=ttest2(vals(nnan(:,j,1),j,1), vals(nnan(:,j,k),j,k)); end end Hsurv = Hsurv+(psurv<0.01)+(psurv<0.001); figure;hold on; plot(tpts, mval); legend(cellstr(num2str(exps))) plot(tpts, mval+sval, 'k') plot(tpts, mval-sval, 'k') % Allow the user to interact with the variables keyboard return; end