% PARSE_HUSBANDRY produces the plots necessary to analyze our husbandry setup.
%
% Blanchoud Group, UNIFR
% Simon Blanchoud
% 01/12/2020
function parse_husbandry(recompute)

  % Checkif we need to recompute everything
  if (nargin==0)
    recompute = false;
  else
    recompute = logical(recompute);
  end

  % Location of the reference files
  db_file = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/export_strains.csv';
  water = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/Water_quality.csv';
  counts = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/colonies_countings.csv';
  microbiota = {'/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-1.zip', ...
  	        '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-2.zip', ...
  	        '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-3.zip', ...
  	        '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-4.zip'} ;
  feeding = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/Differential_feeding.csv';
  water = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/Water_quality.csv';
  apex_dir = '/home/blanchou/Code/Apex/Apex/';
  tmpfiles = glob('/home/blanchou/Code/TmpData/*.csv');
  strain_id = [131 143 144 145 177 178 191];

  % Either recompute or gather the existing files
  if (recompute || length(tmpfiles) == 0)
    disp('Recomputing Apex data...')
    files = gather_apex_data(apex_dir);
    disp('done!')
  else
    files = tmpfiles;
  end
  
  % Parse the data from Apex and dispatch it on several panels
  hfig = figure;hold on;
  nsensors = length(files);
  sensor_label = cell(nsensors, 1);
  sensor_vals = NaN(nsensors, 2);
  plot_indx = [1 1 2 3 3 4 4 4 4 4 5];

  % Read each file, extract its name and plot it accordingly
  for i=1:nsensors
    data = csvread(files{i});
    [x, indxs] = sort(data(:,1));
    [dir, sensor_label{i}, ext] = fileparts(files{i});

    subplot(2,3, plot_indx(i)); hold on;
    plot(x, data(indxs,2));

    sensor_vals(i,1) = mean(data(:,2));
    sensor_vals(i,2) = std(data(:,2));
  end

  % Gather the data of our manual measurements of water quality
  fid = fopen(water, 'r');
  if fid > 0
    line = fgetl(fid);
    headers = strsplit(line, ',');
    data = NaN(500, length(headers));

    % Parse the CSV manually
    i = 1;
    line = fgetl(fid);
    while ischar(line)
      columns = strsplit(line, ',', 'COLLAPSEDELIMITERS', false);

      for j=1:length(columns)
        if (~isempty(columns{j}))
          val = columns{j};
          if (j == 1)
            [curr_time, indx] = strptime(val, '%d/%m/%Y');
            if (indx > length(val))
              data(i, j) = mktime(curr_time);
            end
          else
            data(i, j) = str2double(val);
          end
        end
      end

      line = fgetl(fid);
      i = i + 1;
    end

    fclose(fid);
    data = data(1:i-1, :);
  end

  % Plot the various columns on top of the Apex data
  plot_indx2 = [6 6 6 1 3 5 6];
  for i=2:size(data,2)
    subplot(2,3, plot_indx2(i-1)); hold on;
    plot(data(:,1), data(:,i));
  end

  % Set the axis, legends and labels of all panels
  labels = [sensor_label; headers(2:end).'];
  plot_indxs = [plot_indx plot_indx2];
  limx = xlim;
  limy = [0 300; 0 600; 0 50; 0 30; 7 9; 0 6];

  % Set the limits and legends
  for i=1:6
    subplot(2,3,i);
    xlim(limx);
    ylim(limy(i,:));
    legend(labels(plot_indxs==i));
  end

  % Format the axis into readable format
  t = get(gca, 'XTick');
  tdate = cell(size(t));
  for i=1:numel(t)
    tdate{i} = strftime('%m/%y', localtime(t(i)));
  end
  for i=1:6
    hax = subplot(2,3,i);
    set(hax, 'XTickLabel', tdate);
    xlabel('date (mm/yy)');
  end

  % Parse the strains CSV file
  strains = parse_strains_db(db_file);

  % Estimate the time range of our data
  today = mktime(localtime(time()));
  month = 60*60*24*365.25/12;
  [start, nindx] = min(strains(:,5));
  nmonths = ceil((today - start) / month);
  if (nmonths>200)
    disp(strains(nindx,:))
    error(['Some date issue with entry ' nindx ' !']);
  end
  nstrains = max(strains(:,1));

  % Display all the strains of interest
  xrange = NaN;
  for s=1:length(strain_id)
    % Parse one single strain (strain_id) to show its lineage
    clones = strains(strains(:,1)==strain_id(s), :);

    % Infer death dates (today for living clones)
    missings = isnan(clones(:,end));
    clones(missings, end) = clones(missings, end-1)+month;
    clones(missings & clones(:,4)>0, end) = today;

    % Plot the lifespan of each clone, and link it to its parent clone
    figure;hold on;
    for i=1:size(clones, 1)
      plot(clones(i,[end-1 end]), clones(i, [2 2]), 'r', 'LineWidth', 2);
      mom = find(abs(clones(1:i-1, 4))==clones(i,3), 1, 'last');
      if (~isempty(mom))
        plot([clones(i, end-1)-month/12 clones(i, end-1)], [clones(mom, 2) clones(i,2)], 'k');

      % Here we manually fix a few links to unknown slides
      else
        if clones(i, 3) == 31
          plot([clones(i, end-1)-month/12 clones(i, end-1)], [37 clones(i,2)], 'k');
        elseif clones(i, 3) == 96
          plot([clones(i, end-1)-month/12 clones(i, end-1)], [126 clones(i,2)], 'k');
        elseif ~isnan(clones(i, 3))
          disp(clones(i,[1 3]))
        end
      end
    end

    % Make sure all the lineages have the same temporal range
    if s>1
      xlim(xrange);
    else
      xrange = xlim;
    end

    % Update the axes, labels and strain index
    xval = get(gca, 'XTick');
    xdate = cell(size(xval));
    for i=1:numel(xval)
      xdate{i} = strftime('%m/%y', localtime(xval(i)));
    end
    set(gca, 'XTickLabel', xdate);
    xlabel('date (mm/yy)');
    title(strain_id(s));
  end

  % Plot the population distribution per strain
  [strain_ids, mapping] = unique(strains(:,1));
  npopulations = [mapping(1); diff(mapping)];
  figure;
  hist(npopulations, [0:20]);
  axis([0 21 0 180]);
  xlabel('Number of clones per strain');
  ylabel('Number of strains');
  title('Strain propagation')

  % Bin the data to count the number of clones per strain over time
  populations = zeros(nmonths, nstrains);
  death = strains(:,6);
  death(strains(:,4) > 0) = today;

  % Count one "bin" for unknown death times
  lifespan = max(ceil((death - strains(:,5))/month), 1);
  ticks = (start-1)+[0:nmonths]*month;

  % Loop over each clone to add its lifespan over the count of its strain
  for i=1:size(strains, 1)
    id = strains(i, 1);
    new = strains(i, 5);

    % Add one count to the proper strain over the lifespan of the clone
    indx = find(new>ticks(1:end-1) & new<=ticks(2:end));
    populations(indx+[0:lifespan(i)-1],id) = populations(indx+[0:lifespan(i)-1],id) + 1; 
  end
  % Don't show ongoing periods
  populations = populations(1:end-1, :);
  ticks = ticks(1:end-2);

  % Display the statistics using stacked bars
  figure;bar(populations, 'stacked');
  hold on;
  plot([0 nmonths], [128 128], 'k');
  xlim([0 nmonths]);

  % Update the axes, labels and strain index
  pdate = cell(size(ticks));
  for i=1:2:numel(ticks)
    pdate{i} = strftime('%m/%y', localtime(ticks(i)));
  end
  set(gca, 'XTick', [1:length(ticks)], 'XTickLabel', pdate);
  xlabel('date (mm/yy)');
  ylabel('number of clones');
  title('Husbandry population')
  hbar = colorbar();
  t = get(hbar, 'YTick');
  set(hbar, 'YTickLabel', cellstr(num2str(round(t.'*(nstrains-1))+1)))

  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % Figures 5B, differential feeding
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

  % Gather the data of our manual measurements of zooid numbers during differential feeding
  fid = fopen(feeding, 'r');
  if fid > 0
    % Discard the headers
    line = fgetl(fid);
    headers = strsplit(line, ',');
    zooids = NaN(500, length(headers));
    feeds = '';

    % Parse the CSV manually
    i = 1;
    line = fgetl(fid);
    while ischar(line)
      columns = strsplit(line, ',', 'COLLAPSEDELIMITERS', false);

      % Parse the splitted line
      for j=1:length(columns)
        if (~isempty(columns{j}))
          val = columns{j};

	  % First we have the feeding diet as a single character
	  if (j == 1)
	    indx = find(val == feeds);
	    if isempty(indx)
	      feeds = [feeds val];
	      indx = length(feeds);
	    end
	    zooids(i,j) = indx;

	  % Third we have the date of the counting
	  elseif (j == 3)
            [curr_time, indx] = strptime(val, '%d.%m.%y');
            if (indx > length(val))
              zooids(i, j) = mktime(curr_time);
            end

	  % The other values are integer numbers
          else
            zooids(i, j) = str2double(val);
          end
        end
      end

      line = fgetl(fid);
      i = i + 1;
    end

    % Close the CSV file and discard the excess data
    fclose(fid);
    zooids = zooids(1:i-1, :);
  end

  % Check the data we have
  colonies = unique(zooids(:,2));
  sampling = unique(zooids(:,3));
  ncolonies = length(colonies);
  nsampling = length(sampling);

  % Preassign variables for the analysis
  conds = NaN(2, ncolonies);
  growth = NaN(nsampling-1, ncolonies);

  % Loop through each colony imaged during the experiment
  for i=1:ncolonies

    % Extract the ncounts and the feedings
    vals = zooids(zooids(:,2)==colonies(i), :);
    diets = unique(vals(:,1));

    % Need to correct for the automatic sort done by unique
    ndiets = numel(diets);
    if (ndiets>1 && diets(1)~=vals(1,1))
      diets = diets([2 1]);
    end

    % Compute the growth rates
    rates = vals(2:end,end) ./ vals(1:end-1, end);
    days = vals(2:end, 3);

    % Store the results
    growth(ismember(sampling(2:end), days), i) = rates;
    conds(1:ndiets, i) = diets;
  end

  % Date when feeding was changed
  change = mktime(strptime('09.10.20', '%d.%m.%y'));

  % Separate the feedings between the two conditions
  ndiets = numel(feeds);
  conds(2,:) = conds(2,:) + max(conds(1,:));
  conds = conds((sampling(2:end)>change)+1, :);

  % Discard the transitions
  conds(~isfinite(growth)) = NaN;
  conds(sampling(1:end-1)==change, :) = NaN;

  % Assign variables for the analysis
  classes = unique(conds(isfinite(conds)));
  nclasses = numel(classes);
  stats = NaN(3, nclasses);
  pvals = NaN(nclasses, nclasses);

  % Check if the user has the t-test function
  has_ttest2 = (exist('ttest2')>0);
  if (~has_ttest2)
    warning('Computing the p-value using Student''s t-test requires the statistics package. Check the help of this function for further information.');
  end

  % Loop through the various feeding conditions
  for i=1:nclasses

    % Isolate the growth rates
    p = growth(conds==classes(i));

    % Compute the statistics
    stats(:, i) = [mean(p); std(p); numel(p)];

    % And all the p-values if possible
    if (has_ttest2)
      for j=1:i
        [H, pvals(j,i)] = ttest2(p, growth(conds==classes(j)));
      end
    end
  end

  % Reorganize the order of the conditions
  indexes = [1 7 2 8 3 5 4 6];

  % Display the results
  figure;hold on;
  bar(stats(1,indexes));
  errorbar(stats(1,indexes),stats(2,indexes)./sqrt(stats(3,indexes)), '.');

  % Update the labels and ranges
  xlim([0 9]);
  ylim([0 1.65]);
  set(gca, 'XTick', 1:8, 'XTickLabel', feeds(mod(indexes-1, 4)+1).');
  xlabel('Feeding diet')
  ylabel('Growth rate')

  figure;hold on;
  for i=1:nclasses/2
    subplot(nclasses/2,1,i);hold on
    cols = any(conds==i);
    data = (growth(:,cols));
    mpts = nanmean(data, 2);
    spts = nanstd(data, 0, 2);
    plot(mpts)
    plot(mpts+spts, 'k');
    plot(mpts-spts, 'k');
    ylim([0 3.5])
  end

  % Microbiome study
  countings = csvread(counts);
  countings(:,end+1) = 0;
  for i=1:length(microbiota)
    ROIs = ReadImageJROI(microbiota{i});

    curr = (countings(:,1) == i);
    indxs = countings(curr, 2:end);
    indxs(:,end+1) = 0;

    for k=1:length(ROIs)
      if strncmp(ROIs{k}.strType, 'Point', 5)
        pts = ROIs{k}.vnSlices;
        s = max(pts);

        if isfinite(s)
          for l=1:s
            c = sum(pts==l);
            indxs(indxs(:,1) == l, end) = max(indxs(indxs(:,1) == l, end), c);
          end
        end
      end
    end

    countings(curr, end) = indxs(:, end);
  end

  [exps, junk, eix] = unique(countings(:,3));
  [tpts, junk, tix] = unique(countings(:,4));
  [slds, junk, six] = unique(countings(:,5));

  nmeas = 6;
  nexps = length(exps);
  ntpts = length(tpts);
  vals = NaN(nmeas, ntpts, nexps);
  side = NaN(nmeas, nexps);

  for i=1:nexps
    slides = unique(slds(six(eix==i)));
    slides(slides==0) = [];
    for j=1:ntpts
      curr = (eix==i & tix==j);
      tmp = countings(curr, :);
      for k=1:length(slides)
        if j==1
          sval = sum(tmp(tmp(:,5)==slides(k), 6:7), 1);
          side(k,i) = sval(1);
        else
          sval = tmp(tmp(:,5)==slides(k) & tmp(:,6)==side(k,i), 7);
          if (isempty(sval))
            sval = sum(tmp(tmp(:,5)==slides(k), 7), 1);
          end
        end
        vals(k,j,i) = sval(end);
      end
    end
  end

  vals = bsxfun(@rdivide, vals, vals(:,1,:));
  mval = squeeze(nanmean(vals));
  sval = squeeze(nanstd(vals));
  nnan = ~isnan(vals);

  Hsurv = NaN(ntpts, nexps);
  psurv = Hsurv;
  for j=1:ntpts
    for k=2:nexps
      [Hsurv(k,j),psurv(k,j)]=ttest2(vals(nnan(:,j,1),j,1), vals(nnan(:,j,k),j,k));
    end
  end
  Hsurv = Hsurv+(psurv<0.01)+(psurv<0.001);

  figure;hold on;
  plot(tpts, mval);
  legend(cellstr(num2str(exps)))
  plot(tpts, mval+sval, 'k')
  plot(tpts, mval-sval, 'k')

  % Allow the user to interact with the variables
  keyboard

  return;
end