diff --git a/helpers/myttest.m b/helpers/myttest.m new file mode 100755 index 0000000..f79f525 --- /dev/null +++ b/helpers/myttest.m @@ -0,0 +1,44 @@ +function [H, pval] = myttest(values, indexes, tails) + + if (nargin == 1) + indexes = repmat([1:size(values,2)], size(values,1), 1); + indexes = indexes(:); + values = values(:); + tails = 'both'; + elseif (nargin == 2) + tails = 'both'; + end + + groups = unique(indexes(:)).'; + if (numel(groups) == 1) + [H,pval] = ttest(values(:)); + + return; + end + + ngroups = length(groups); + H = zeros(ngroups); + pval = H; + + vfinites = isfinite(values); + for i = 1:ngroups + validsi = vfinites & indexes==groups(i); + if (~any(validsi)) + H(i,:) = NaN; + pval(i,:) = NaN; + else + for j = 1:ngroups + validsj = vfinites & indexes==groups(j); + if (~any(validsj)) + H(i,j) = NaN; + pval(i,j) = NaN; + else + [H(i,j), pval(i,j)] = ttest2(values(validsi), values(validsj), 'alpha', 0.05, 'tail', tails); + end + end + end + end + H = H + (pval < 0.01) + (pval < 0.001); + + return; +end diff --git a/helpers/parse_strains_db.m b/helpers/parse_strains_db.m index 8f717c7..2e79c68 100644 --- a/helpers/parse_strains_db.m +++ b/helpers/parse_strains_db.m @@ -1,211 +1,247 @@ % PARSE_STRAINS_DB extracts from a SciNote export CSV file the clones' % history and relations. % % STRAINS = PARSE_STRAINS_DB(CSV_FILE) extracts STRAINS from CSV_FILE. % STRAINS is a matrix with the following format: % [ StrainID CloneID ParentSlide SlideID CreationDate DeathDate ] % % Blanchoud Group, UNIFR % Simon Blanchoud % 01/12/2020 function strains = parse_strains_db(fname) % Prepare the output strains = NaN(0,6); % Only process files if (isfile(fname)) fid = fopen(fname, 'r'); % Make sure we can read it if fid >= 0 % Process the header line line = fgetl(fid); headers = strsplit(line, ','); % Prepare a cell matrix to store the CSV file data = cell(0, length(headers)); % Process the CSV file line = fgetl(fid); while ischar(line) % In case there are \", then there is likely a problematic comma if (any(line == '"')) ids = find(line == '"'); ids = reshape(ids, 2, []); % We simply replace the commas with \; for i=1:size(ids, 2) tmpstr = line(ids(1,i):ids(2,i)); tmpstr(tmpstr==',') = ';'; line(ids(1,i):ids(2,i)) = tmpstr; end % And we delete the \" line(line == '"') = []; end % We simplify a bit the symbols used line(line=='.') = '/'; line(line==':') = ';'; % And we store the line of the CSV into a cell matrix data(end+1,:) = strsplit(line, ',', 'COLLAPSEDELIMITERS', false); % Continue the file line = fgetl(fid); end end % Assign the variables nclones = size(data,1); nchars = 1; strains = NaN(nclones, 6); % Loop through the CSV table for i=1:nclones % Make sure this is a proper strain data nstrain = data{i,1}; if (nstrain(1) ~= 'S') continue; end % Get the strain and clone IDs from the name strains(i,1) = str2double(nstrain(2:4)); strains(i,2) = str2double(nstrain(6:8)); % The parent ID parent = data{i, 3}; strains(i,3) = str2double(parent); + if isnan(strains(i,3)) + strains(i,3) = roman2double(parent); + end % The slide ID slide = data{i, 2}; strains(i,4) = str2double(slide); + if isnan(strains(i,4)) + strains(i,4) = roman2double(slide); + end % And the record creation date as initial creation date val = data{i, end}; [curr_time, indx] = strptime(val, '%m/%d/%Y %H;%M'); if (indx > length(val)) strains(i, 5) = mktime(curr_time); end % Now we process the comments % Start by splitting them comments = strsplit(data{i,4}, ';'); % We match the expression of interest once deads = regexpi(comments, '(dead)|(empty)|(sacrificed)|(removed)|(fixation)|(slipped)|(missing)|(died)|(death)|(virtual)|(isolated)|(deattached)'); - clone = regexpi(comments, '(subcloned)|(sub.cloned)|(positioned)|(repositioned)'); + clone = regexpi(comments, '(subcloned)|(sub.cloned)|(positioned)|(repositioned)|(moved)'); % Loop over each comment for j=1:length(comments) % Temporary variables in case of no match indx = NaN; num = NaN; is_date = false; % Loop over each word of the comment words = strsplit(comments{j}); for k=1:length(words) % Word cleanup to maximize date matching w = words{k}; if (~isempty(w) && w(1)=='(') w = w(2:end-1); end if (~isempty(w) && w(end)==')') w = w(1:end-1); end if (~isempty(w) && w(end)=='/') w = w(1:end-1); end % Short values might be number nchars = length(w); if (nchars<4) tmp = str2double(w); if (~isnan(tmp)) num = tmp; + else + tmp = roman2double(w); + if (~isnan(tmp)) + num = tmp; + end end % Longer ones could be dates, in several formats elseif (nchars>7) [curr_time, indx] = strptime(w, '%d/%m/%y'); if (indx <= nchars) [curr_time, indx] = strptime(w, '%d/%m/%Y'); if (indx <= nchars) [curr_time, indx] = strptime(w, '%m/%d/%Y'); if (indx <= nchars) [curr_time, indx] = strptime(w, '%m/%d/%y'); end end end is_date = (indx > nchars); end end % If we found a mention of colony death if (length(deads{j}>0)) % Store the death date if any found tmptime = mktime(curr_time); if (is_date && ~(strains(i,6) > tmptime)) strains(i,6) = mktime(curr_time); end % And the slide number if any if (~isnan(num) && strains(i,4)==0) strains(i,4) = -num; end % If it mentioned subcloning elseif length(clone{j}>0) % Store the date if it is anterior to the creation of the record tmptime = mktime(curr_time); if (is_date && strains(i,5) > tmptime) strains(i,5) = tmptime; end end end if (strains(i,1)==131 && strains(i,2)==-1) - disp(data(i,:)); - if (isnan(strains(i,6))) - disp(['-> ' strftime('%d/%m/%y', localtime(strains(i,5))) ' : NaN']); - else - disp(['-> ' strftime('%d/%m/%y', localtime(strains(i,5))) ' : ' strftime('%d/%m/%y', localtime(strains(i,6))) ]); - end - keyboard + disp(data(i,:)); + if (isnan(strains(i,6))) + disp(['-> ' strftime('%d/%m/%y', localtime(strains(i,5))) ' : NaN']); + else + disp(['-> ' strftime('%d/%m/%y', localtime(strains(i,5))) ' : ' strftime('%d/%m/%y', localtime(strains(i,6))) ]); + end + keyboard end end end % Clean and sort the output strains = strains(~isnan(strains(:,1)),:); [junk, indx] = sortrows(strains(:,1:2)); strains = strains(indx,:); % Try to infer a minimal death date based on subcloning information unknown = (strains(:,4)<0 & isnan(strains(:,6))); ids = abs(strains(unknown, [1 4 5])); subclones = strains(~isnan(strains(:,3)), [1 3 5]); % Loop over each missing value and check if some clones were subcloned from it alive = NaN(size(ids, 1), 1); for i=1:numel(alive) hits = (subclones(:,1)==ids(i,1) & subclones(:,2)==ids(i,2)); tmax = max(subclones(hits, 3)); % Make sure we get something valid if (~isempty(tmax) && ids(i,3) < tmax) alive(i) = tmax; end end % Copy the results strains(unknown, 6) = alive; return; end + +% Special conversion for the large slides with roman numbering. +% Switching back to arabic numbers should not be an issue as strains won't cross. +function val = roman2double(chars) + + val = NaN; + if (~isempty(chars)) + switch chars + case 'I' + val = 1; + case 'DDI' + val = 11; + case {'II', 'IIa'} + val = 2; + case 'III' + val = 3; + case 'IV' + val = 4; + case 'XV' + val = 15; + end + end + + return; +end diff --git a/libraries/ReadImageJROI.m b/libraries/ReadImageJROI.m index 5e779de..125df77 100755 --- a/libraries/ReadImageJROI.m +++ b/libraries/ReadImageJROI.m @@ -1,568 +1,568 @@ function [sROI] = ReadImageJROI(cstrFilenames) % ReadImageJROI - FUNCTION Read an ImageJ ROI into a matlab structure % % Usage: [sROI] = ReadImageJROI(strFilename) % [cvsROIs] = ReadImageJROI(cstrFilenames) % [cvsROIs] = ReadImageJROI(strROIArchiveFilename) % % This function reads the ImageJ binary ROI file format. % % 'strFilename' is the full path to a '.roi' file. A list of ROI files can be % passed as a cell array of filenames, in 'cstrFilenames'. An ImageJ ROI % archive can be access by providing a '.zip' filename in % 'strROIArchiveFilename'. Single ROIs are returned as matlab structures, with % variable fields depending on the ROI type. Multiple ROIs are returned as a % cell array of ROI structures. % % The field '.strName' is guaranteed to exist, and contains the ROI name (the % filename minus '.roi', or the name set for the ROI). % % The field '.strType' is guaranteed to exist, and defines the ROI type: % {'Rectangle', 'Oval', Line', 'Polygon', 'Freehand', 'Traced', 'PolyLine', % 'FreeLine', 'Angle', 'Point', 'NoROI'}. % % The field '.vnRectBounds' is guaranteed to exist, and defines the rectangular % bounds of the ROI: ['nTop', 'nLeft', 'nBottom', 'nRight']. % % The field '.nVersion' is guaranteed to exist, and defines the version number % of the ROI format. % % The field '.vnPosition' is guaranteed to exist. If the information is % defined within the ROI, this field will be a three-element vector % [nCPosition nZPosition nTPosition]. % % ROI types: % Rectangle: % .strType = 'Rectangle'; % .nArcSize - The arc size of the rectangle's rounded corners % % For a composite, 'shape' ROI: % .strSubtype = 'Shape'; % .vfShapeSegments - A long, complicated vector of complicated shape % segments. This vector is in the format passed to the % ImageJ ShapeROI constructor. I won't decode this for % you! :( % % Oval: % .strType = 'Oval'; % % Line: % .strType = 'Line'; % .vnLinePoints - The end points of the line ['nX1', 'nY1', 'nX2', 'nY2'] % % With arrow: % .strSubtype = 'Arrow'; % .bDoubleHeaded - Does the line have two arrowheads? % .bOutlined - Is the arrow outlined? % .nArrowStyle - The ImageJ style of the arrow (unknown interpretation) % .nArrowHeadSize - The size of the arrowhead (unknown units) % % Polygon: % .strType = 'Polygon'; % .mnCoordinates - An [Nx2] matrix, specifying the coordinates of % the polygon vertices. Each row is [nX nY]. % % Freehand: % .strType = 'Freehand'; % .mnCoordinates - An [Nx2] matrix, specifying the coordinates of % the polygon vertices. Each row is [nX nY]. % % Ellipse subtype: % .strSubtype = 'Ellipse'; % .vfEllipsePoints - A vector containing the ellipse control points: % [fX1 fY1 fX2 fY2]. % .fAspectRatio - The aspect ratio of the ellipse. % % Traced: % .strType = 'Traced'; % .mnCoordinates - An [Nx2] matrix, specifying the coordinates of % the line vertices. Each row is [nX nY]. % % PolyLine: % .strType = 'PolyLine'; % .mnCoordinates - An [Nx2] matrix, specifying the coordinates of % the line vertices. Each row is [nX nY]. % % FreeLine: % .strType = 'FreeLine'; % .mnCoordinates - An [Nx2] matrix, specifying the coordinates of % the line vertices. Each row is [nX nY]. % % Angle: % .strType = 'Angle'; % .mnCoordinates - An [Nx2] matrix, specifying the coordinates of % the angle vertices. Each row is [nX nY]. % % Point: % .strType = 'Point'; % .mfCoordinates - An [Nx2] matrix, specifying the coordinates of % the points. Each row is [fX fY]. % .vnCounters - An [Nx1] vector, specifying which counter is % associated with each point. May be empty. % .vnSlices - An [Nx1] vector, specifying on which plane each % point is on. These are specified as linear % indices, with the hyperstack arranged as % [Channels Z-slices T-slices]. If empty, then all % points are placed on the first slice. % % NoROI: % .strType = 'NoROI'; % % Additionally, ROIs from later versions (.nVersion >= 218) may have the % following fields: % % .nStrokeWidth - The width of the line stroke % .nStrokeColor - The encoded color of the stroke (ImageJ color format) % .nFillColor - The encoded fill color for the ROI (ImageJ color % format) % % If the ROI contains text: % .strSubtype = 'Text'; % .nFontSize - The desired font size % .nFontStyle - The style of the font (unknown format) % .strFontName - The name of the font to render the text with % .strText - A string containing the text % Author: Dylan Muir % Created: 9th August, 2011 % % 20170118 Added code to read slice position for point ROIs % 20141020 Added code to read 'header 2' fields; thanks to Luca Nocetti % 20140602 Bug report contributed by Samuel Barnes and Yousef Mazaheri % 20110810 Bug report contributed by Jean-Yves Tinevez % 20110829 Bug fix contributed by Benjamin Ricca % 20120622 Order of ROIs in a ROI set is now preserved % 20120703 Different way of reading zip file contents guarantees that ROI order % is preserved % % Copyright (c) 2011, 2012, 2013, 2014 Dylan Muir % % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % -- Constants bOpt_SubPixelResolution = 128; % -- Check arguments if (nargin < 1) disp('*** ReadImageJROI: Incorrect usage'); help ReadImageJROI; return; end % -- Check for a cell array of ROI filenames if (iscell(cstrFilenames)) % - Read each ROI in turn %cvsROI = cellfun(@ReadImageJROI, CellFlatten(cstrFilenames), 'UniformOutput', false); tmpROI = CellFlatten(cstrFilenames); sROI = {}; for i=1:length(tmpROI) cvsROI = ReadImageJROI(tmpROI{i}); sROI = [sROI; cvsROI]; end % - Return all ROIs %sROI = cvsROI; return; else % - This is not a cell string strFilename = cstrFilenames; clear cstrFilenames; end % -- Check for a zip file [nul, nul, strExt] = fileparts(strFilename); %#ok if (isequal(lower(strExt), '.zip')) % - get zp file contents cstrFilenames_short = listzipcontents_rois(strFilename); % - Unzip the file into a temporary directory strROIDir = tempname; unzip(strFilename, strROIDir); for (nFileIndex = 1:length(cstrFilenames_short)) cstrFilenames{1, nFileIndex} = [strROIDir '/' char(cstrFilenames_short(nFileIndex, 1))]; end % - Build ROIs for each file cvsROIs = ReadImageJROI(cstrFilenames); % - Clean up temporary directory delete([strROIDir filesep '*.roi']); rmdir(strROIDir); % - Return ROIs sROI = cvsROIs; return; end % -- Read ROI % -- Check file and open if (~exist(strFilename, 'file')) error('ReadImageJROI:FileNotFound', ... '*** ReadImageJROI: The file [%s] was not found.', strFilename); end fidROI = fopen(strFilename, 'r', 'ieee-be'); % -- Check file magic code strMagic = fread(fidROI, [1 4], '*char'); if (~isequal(strMagic, 'Iout')) error('ReadImageJROI:FormatError', ... '*** ReadImageJROI: The file was not an ImageJ ROI format.'); end % -- Read version sROI.nVersion = fread(fidROI, 1, 'int16'); % -- Read ROI type nTypeID = fread(fidROI, 1, 'uint8'); fseek(fidROI, 1, 'cof'); % Skip a byte % -- Read rectangular bounds sROI.vnRectBounds = fread(fidROI, [1 4], 'int16'); % -- Read number of coordinates nNumCoords = fread(fidROI, 1, 'uint16'); % -- Read the rest of the header vfLinePoints = fread(fidROI, 4, 'float32'); nStrokeWidth = fread(fidROI, 1, 'int16'); nShapeROISize = fread(fidROI, 1, 'uint32'); nStrokeColor = fread(fidROI, 1, 'uint32'); nFillColor = fread(fidROI, 1, 'uint32'); nROISubtype = fread(fidROI, 1, 'int16'); nOptions = fread(fidROI, 1, 'int16'); nArrowStyle = fread(fidROI, 1, 'uint8'); nArrowHeadSize = fread(fidROI, 1, 'uint8'); nRoundedRectArcSize = fread(fidROI, 1, 'int16'); sROI.nPosition = fread(fidROI, 1, 'uint32'); % -- Read the 'header 2' fields nHeader2Offset = fread(fidROI, 1, 'uint32'); if (nHeader2Offset > 0) && ~fseek(fidROI, nHeader2Offset+32+4, 'bof') % - Seek to start of header 2 fseek(fidROI, nHeader2Offset+4, 'bof'); % - Read fields sROI.vnPosition = fread(fidROI, 3, 'uint32')'; vnNameParams = fread(fidROI, 2, 'uint32')'; nOverlayLabelColor = fread(fidROI, 1, 'uint32'); %#ok nOverlayFontSize = fread(fidROI, 1, 'int16'); %#ok fseek(fidROI, 1, 'cof'); % Skip a byte nOpacity = fread(fidROI, 1, 'uint8'); %#ok nImageSize = fread(fidROI, 1, 'uint32'); %#ok fStrokeWidth = fread(fidROI, 1, 'float32'); %#ok vnROIPropertiesParams = fread(fidROI, 2, 'uint32')'; %#ok nCountersOffset = fread(fidROI, 1, 'uint32'); else sROI.vnPosition = []; vnNameParams = [0 0]; nOverlayLabelColor = []; %#ok nOverlayFontSize = []; %#ok nOpacity = []; %#ok nImageSize = []; %#ok fStrokeWidth = []; %#ok vnROIPropertiesParams = [0 0]; %#ok nCountersOffset = 0; end % -- Set ROI name if (isempty(vnNameParams) || any(vnNameParams == 0) || fseek(fidROI, sum(vnNameParams), 'bof')) [nul, sROI.strName] = fileparts(strFilename); %#ok else % - Try to read ROI name from header fseek(fidROI, vnNameParams(1), 'bof'); sROI.strName = fread(fidROI, vnNameParams(2), 'int16=>char')'; end % - Seek to get aspect ratio fseek(fidROI, 52, 'bof'); fAspectRatio = fread(fidROI, 1, 'float32'); % - Seek to after header fseek(fidROI, 64, 'bof'); % -- Build ROI switch nTypeID case 1 % - Rectangle sROI.strType = 'Rectangle'; sROI.nArcSize = nRoundedRectArcSize; if (nShapeROISize > 0) % - This is a composite shape ROI sROI.strSubtype = 'Shape'; if (nTypeID ~= 1) error('ReadImageJROI:FormatError', ... '*** ReadImageJROI: A composite ROI must be a Rectangle type.'); end % - Read shapes sROI.vfShapes = fread(fidROI, nShapeROISize, 'float32'); end case 2 % - Oval sROI.strType = 'Oval'; case 3 % - Line sROI.strType = 'Line'; sROI.vnLinePoints = round(vfLinePoints); if (nROISubtype == 2) % - This is an arrow line sROI.strSubtype = 'Arrow'; sROI.bDoubleHeaded = nOptions & 2; sROI.bOutlined = nOptions & 4; sROI.nArrowStyle = nArrowStyle; sROI.nArrowHeadSize = nArrowHeadSize; end case 0 % - Polygon sROI.strType = 'Polygon'; sROI.mnCoordinates = read_coordinates; case 7 % - Freehand sROI.strType = 'Freehand'; sROI.mnCoordinates = read_coordinates; if (nROISubtype == 3) % - This is an ellipse sROI.strSubtype = 'Ellipse'; sROI.vfEllipsePoints = vfLinePoints; sROI.fAspectRatio = fAspectRatio; end case 8 % - Traced sROI.strType = 'Traced'; sROI.mnCoordinates = read_coordinates; case 5 % - PolyLine sROI.strType = 'PolyLine'; sROI.mnCoordinates = read_coordinates; case 4 % - FreeLine sROI.strType = 'FreeLine'; sROI.mnCoordinates = read_coordinates; case 9 % - Angle sROI.strType = 'Angle'; sROI.mnCoordinates = read_coordinates; case 10 % - Point sROI.strType = 'Point'; [sROI.mfCoordinates, vnCounters] = read_coordinates; % - Set counters and [C Z T] positions if (isempty(vnCounters)) sROI.vnCounters = zeros(nNumCoords, 1); sROI.vnSlices = ones(nNumCoords, 1); else sROI.vnCounters = bitand(vnCounters, 255); - sROI.vnSlices = bitshift(vnCounters, -8, 'uint32'); + sROI.vnSlices = bitshift(vnCounters, -8, 32); end case 6 sROI.strType = 'NoROI'; otherwise error('ReadImageJROI:FormatError', ... '--- ReadImageJROI: The ROI file contains an unknown ROI type.'); end % -- Handle version >= 218 if (sROI.nVersion >= 218) sROI.nStrokeWidth = nStrokeWidth; sROI.nStrokeColor = nStrokeColor; sROI.nFillColor = nFillColor; sROI.bSplineFit = nOptions & 1; if (nROISubtype == 1) % - This is a text ROI sROI.strSubtype = 'Text'; % - Seek to after header fseek(fidROI, 64, 'bof'); sROI.nFontSize = fread(fidROI, 1, 'uint32'); sROI.nFontStyle = fread(fidROI, 1, 'uint32'); nNameLength = fread(fidROI, 1, 'uint32'); nTextLength = fread(fidROI, 1, 'uint32'); % - Read font name sROI.strFontName = fread(fidROI, nNameLength, 'uint16=>char'); % - Read text sROI.strText = fread(fidROI, nTextLength, 'uint16=>char'); end end % - Close the file fclose(fidROI); % --- END of ReadImageJROI FUNCTION --- function [mnCoordinates, vnCounters] = read_coordinates % - Check for sub-pixel resolution if bitand(nOptions, bOpt_SubPixelResolution) fseek(fidROI, 64 + 4*nNumCoords, 'bof'); % - Read X and Y coordinates vnX = fread(fidROI, [nNumCoords 1], 'single'); vnY = fread(fidROI, [nNumCoords 1], 'single'); else % - Read X and Y coords vnX = fread(fidROI, [nNumCoords 1], 'int16'); vnY = fread(fidROI, [nNumCoords 1], 'int16'); % - Trim at zero vnX(vnX < 0) = 0; vnY(vnY < 0) = 0; % - Offset by top left ROI bound vnX = vnX + sROI.vnRectBounds(2); vnY = vnY + sROI.vnRectBounds(1); end mnCoordinates = [vnX vnY]; % - Read counters, if present if (nCountersOffset ~= 0) fseek(fidROI, nCountersOffset, 'bof'); vnCounters = fread(fidROI, [nNumCoords 1], 'uint32'); else vnCounters = []; end end end % --- END of ReadImageJROI.m --- function [filelist] = listzipcontents_rois(zipFilename) % listzipcontents_rois - FUNCTION Read the file names in a zip file % % Usage: [filelist] = listzipcontents_rois(zipFilename) % - Import java libraries %import java.util.zip.*; %import java.io.*; % - Read file list via JAVA object filelist={}; %in = java.util.zip.ZipInputStream(java.io.FileInputStream(zipFilename)); in = javaObject('java.util.zip.ZipInputStream', javaObject('java.io.FileInputStream', zipFilename)); entry = javaMethod('getNextEntry', in); % - Filter ROI files %while (entry~=0) while (isjava(entry) && ~entry.equals(0)) %name = entry.getName; name = javaMethod('getName', entry); %if (name.endsWith('.roi')) if (strncmp(name(end-3:end),'.roi',4)) filelist = cat(1,filelist,char(name)); end; %entry = in.getNextEntry(); entry = javaMethod('getNextEntry', in); end; % - Close zip file %in.close(); javaMethod('close', in); end function [cellArray] = CellFlatten(varargin) % CellFlatten - FUNCTION Convert a list of items to a single level cell array % % Usage: [cellArray] = CellFlatten(arg1, arg2, ...) % % CellFlatten will convert a list of arguments into a single-level cell array. % If any argument is already a cell array, each cell will be concatenated to % 'cellArray' in a list. The result of this function is a single-dimensioned % cell array containing a cell for each individual item passed to CellFlatten. % The order of cell elements in the argument list is guaranteed to be % preserved. % % This function is useful when dealing with variable-length argument lists, % each item of which can also be a cell array of items. % Author: Dylan Muir % Created: 14th May, 2004 % -- Check arguments if (nargin == 0) disp('*** CellFlatten: Incorrect usage'); help CellFlatten; return; end % -- Convert arguments if (iscell(varargin{1})) cellArray = CellFlatten(varargin{1}{:}); else cellArray = varargin(1); end for (nIndexArg = 2:length(varargin)) %#ok if (iscell(varargin{nIndexArg})) cellReturn = CellFlatten(varargin{nIndexArg}{:}); cellArray = [cellArray cellReturn{:}]; %#ok else cellArray = [cellArray varargin{nIndexArg}]; %#ok end end % --- END of CellFlatten.m --- end diff --git a/process_results/parse_husbandry.m b/process_results/parse_husbandry.m index 97dac24..06dc573 100644 --- a/process_results/parse_husbandry.m +++ b/process_results/parse_husbandry.m @@ -1,213 +1,314 @@ % PARSE_HUSBANDRY produces the plots necessary to analyze our husbandry setup. % % Blanchoud Group, UNIFR % Simon Blanchoud % 01/12/2020 function parse_husbandry(recompute) % Checkif we need to recompute everything if (nargin==0) recompute = false; else recompute = logical(recompute); end % Location of the reference files - %db_file = '/home/blanchou/Documents/Projects/Culture/Long-term/export_2020-11-27_14-55-07_UTC.csv'; db_file = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/export_strains.csv'; - %water = '/home/blanchou/Documents/Projects/Culture/Long-term/Water_quality.csv'; water = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/Water_quality.csv'; + counts = '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/colonies_countings.csv'; + zooids = {'/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-1.zip', ... + '/home/blanchou/Documents/Manuscripts/Tunicate_husbandry/data/microbiome/microbiome-2.zip'} ; apex_dir = '/home/blanchou/Code/Apex/Apex/'; tmpfiles = glob('/home/blanchou/Code/TmpData/*.csv'); - strain_id = 131; + strain_id = [131 143 144 145 177 178 191]; % Either recompute or gather the existing files if (recompute || length(tmpfiles) == 0) disp('Recomputing Apex data...') files = gather_apex_data(apex_dir); disp('done!') else files = tmpfiles; end + + % Microbiome study + countings = csvread(counts); + countings(:,end+1) = 0; + for i=1:length(zooids) + ROIs = ReadImageJROI(zooids{i}); - % Parse the data from Apexand dispatch it on several panels + curr = (countings(:,1) == i); + indxs = countings(curr, 2:end); + indxs(:,end+1) = 0; + + for k=1:length(ROIs) + if strncmp(ROIs{k}.strType, 'Point', 5) + pts = ROIs{k}.vnSlices; + s = max(pts); + c = sum(pts==s); + indxs(indxs(:,1) == s, end) = c; + end + end + + countings(curr, end) = indxs(:, end); + end + [exps, junk, eix] = unique(countings(:,3)); + [tpts, junk, tix] = unique(countings(:,4)); + [slds, junk, six] = unique(countings(:,5)); + nexps = length(exps); + ntpts = length(tpts); + vals = NaN(3, ntpts, nexps); + side = NaN(3, nexps); + + for i=1:nexps + slides = unique(slds(six(eix==i))); + slides(slides==0) = []; + for j=1:ntpts + curr = (eix==i & tix==j); + tmp = countings(curr, :); + for k=1:3 + if j==1 + sval = sum(tmp(tmp(:,5)==slides(k), 6:7), 1); + side(k,i) = sval(1); + else + sval = tmp(tmp(:,5)==slides(k) & tmp(:,6)==side(k,i), 7); + if (isempty(sval)) + sval = sum(tmp(tmp(:,5)==slides(k), 7), 1); + end + end + vals(k,j,i) = sval(end); + end + end + end + + vals = bsxfun(@rdivide, vals, vals(:,1,:)); + mval = squeeze(mean(vals)); + sval = squeeze(std(vals)); + + indx = repmat([1:nexps], 3, 1); + Hsurv = NaN(ntpts, nexps); + psurv = Hsurv; + for j=1:ntpts + for k=2:nexps + %v = squeeze(vals(:,j,:)); + %[H,p] = myttest(v(:),indx(:)); + %Hsurv(j,:) = H(1,:); + %psurv(j,:) = p(1,:); + + [Hsurv(k,j),psurv(k,j)]=ttest2(vals(:,j,1), vals(:,j,k)); + end + end + Hsurv = Hsurv+(psurv<0.01)+(psurv<0.001); + + figure;hold on; + plot(tpts, mval); + legend(cellstr(num2str(exps))) + plot(tpts, mval+sval, 'k') + plot(tpts, mval-sval, 'k') + keyboard + + % Parse the data from Apex and dispatch it on several panels hfig = figure;hold on; nsensors = length(files); sensor_label = cell(nsensors, 1); + sensor_vals = NaN(nsensors, 2); plot_indx = [1 1 2 3 3 4 4 4 4 4 5]; % Read each file, extract its name and plot it accordingly for i=1:nsensors data = csvread(files{i}); [x, indxs] = sort(data(:,1)); [dir, sensor_label{i}, ext] = fileparts(files{i}); subplot(2,3, plot_indx(i)); hold on; plot(x, data(indxs,2)); + + sensor_vals(i,1) = mean(data(:,2)); + sensor_vals(i,2) = std(data(:,2)); end % Gather the data of our manual measurements of water quality fid = fopen(water, 'r'); if fid > 0 line = fgetl(fid); headers = strsplit(line, ','); data = NaN(500, length(headers)); % Parse the CSV manually i = 1; line = fgetl(fid); while ischar(line) columns = strsplit(line, ',', 'COLLAPSEDELIMITERS', false); for j=1:length(columns) if (~isempty(columns{j})) val = columns{j}; if (j == 1) [curr_time, indx] = strptime(val, '%d/%m/%Y'); if (indx > length(val)) data(i, j) = mktime(curr_time); end else data(i, j) = str2double(val); end end end line = fgetl(fid); i = i + 1; end fclose(fid); data = data(1:i-1, :); end % Plot the various columns on top of the Apex data plot_indx2 = [6 6 6 1 3 5 6]; for i=2:size(data,2) subplot(2,3, plot_indx2(i-1)); hold on; plot(data(:,1), data(:,i)); end % Set the axis, legends and labels of all panels labels = [sensor_label; headers(2:end).']; plot_indxs = [plot_indx plot_indx2]; limx = xlim; limy = [0 300; 0 600; 0 50; 0 30; 7 9; 0 6]; % Set the limits and legends for i=1:6 subplot(2,3,i); xlim(limx); ylim(limy(i,:)); legend(labels(plot_indxs==i)); end % Format the axis into readable format t = get(gca, 'XTick'); tdate = cell(size(t)); for i=1:numel(t) tdate{i} = strftime('%m/%y', localtime(t(i))); end for i=1:6 hax = subplot(2,3,i); set(hax, 'XTickLabel', tdate); xlabel('date (mm/yy)'); end % Parse the strains CSV file strains = parse_strains_db(db_file); % Estimate the time range of our data today = mktime(localtime(time())); month = 60*60*24*365.25/12; [start, nindx] = min(strains(:,5)); nmonths = ceil((today - start) / month); if (nmonths>200) - disp(strins(nindx,:)) + disp(strains(nindx,:)) error(['Some date issue with entry ' nindx ' !']); end nstrains = max(strains(:,1)); + % Plot the population distribution per strain + [strain_ids, mapping] = unique(strains(:,1)); + npopulations = [mapping(1); diff(mapping)]; + figure; + hist(npopulations, [0:20]); + axis([0 21 0 180]); + xlabel('Number of clones'); + ylabel('Number of strains'); + % Bin the data to count the number of clones per strain over time populations = zeros(nmonths, nstrains); death = strains(:,6); death(strains(:,4) > 0) = today; % Count one "bin" for unknown death times lifespan = max(ceil((death - strains(:,5))/month), 1); ticks = (start-1)+[0:nmonths]*month; % Loop over each clone to add its lifespan over the count of its strain for i=1:size(strains, 1) id = strains(i, 1); new = strains(i, 5); % Add one count to the proper strain over the lifespan of the clone indx = find(new>ticks(1:end-1) & new<=ticks(2:end)); populations(indx+[0:lifespan(i)-1],id) = populations(indx+[0:lifespan(i)-1],id) + 1; end % Don't show ongoing periods populations = populations(1:end-1, :); ticks = ticks(1:end-2); % Display the statistics using stacked bars figure;bar(populations, 'stacked'); hold on; plot([0 nmonths], [128 128], 'k'); xlim([0 nmonths]); % Update the axes, labels and strain index pdate = cell(size(ticks)); for i=1:2:numel(ticks) pdate{i} = strftime('%m/%y', localtime(ticks(i))); end set(gca, 'XTick', [1:length(ticks)], 'XTickLabel', pdate); xlabel('date (mm/yy)'); ylabel('clone number'); title('Husbandry population') hbar = colorbar(); t = get(hbar, 'YTick'); set(hbar, 'YTickLabel', cellstr(num2str(round(t.'*(nstrains-1))+1))) + xrange = NaN; - % Parse one single strain (strain_id) to show its lineage - clones = strains(strains(:,1)==strain_id, :); + %Display all the strains of interest + for s=1:length(strain_id) + % Parse one single strain (strain_id) to show its lineage + clones = strains(strains(:,1)==strain_id(s), :); - % Infer death dates (today for living clones) - missings = isnan(clones(:,end)); - clones(missings, end) = clones(missings, end-1)+month; - clones(missings & clones(:,4)>0, end) = today; + % Infer death dates (today for living clones) + missings = isnan(clones(:,end)); + clones(missings, end) = clones(missings, end-1)+month; + clones(missings & clones(:,4)>0, end) = today; - % Plot the lifespan of each clone, and link it to its parent clone - figure;hold on; - for i=1:size(clones, 1) - plot(clones(i,[end-1 end]), clones(i, [2 2]), 'r', 'LineWidth', 2); - mom = find(abs(clones(1:i-1, 4))==clones(i,3), 1, 'last'); - if (~isempty(mom)) - plot([clones(i, end-1)-month/12 clones(i, end-1)], [clones(mom, 2) clones(i,2)], 'k'); + % Plot the lifespan of each clone, and link it to its parent clone + figure;hold on; + for i=1:size(clones, 1) + plot(clones(i,[end-1 end]), clones(i, [2 2]), 'r', 'LineWidth', 2); + mom = find(abs(clones(1:i-1, 4))==clones(i,3), 1, 'last'); + if (~isempty(mom)) + plot([clones(i, end-1)-month/12 clones(i, end-1)], [clones(mom, 2) clones(i,2)], 'k'); - % Here we manually fix a few links to unknown slides - else - if clones(i, 3) == 31 - plot([clones(i, end-1)-month/12 clones(i, end-1)], [37 clones(i,2)], 'k'); - elseif clones(i, 3) == 96 - plot([clones(i, end-1)-month/12 clones(i, end-1)], [126 clones(i,2)], 'k'); - elseif ~isnan(clones(i, 3)) - disp(clones(i,[1 3])) + % Here we manually fix a few links to unknown slides + else + if clones(i, 3) == 31 + plot([clones(i, end-1)-month/12 clones(i, end-1)], [37 clones(i,2)], 'k'); + elseif clones(i, 3) == 96 + plot([clones(i, end-1)-month/12 clones(i, end-1)], [126 clones(i,2)], 'k'); + elseif ~isnan(clones(i, 3)) + disp(clones(i,[1 3])) + end end end - end - % Update the axes, labels and strain index - xval = get(gca, 'XTick'); - xdate = cell(size(xval)); - for i=1:numel(xval) - xdate{i} = strftime('%m/%y', localtime(xval(i))); + if s>1 + xlim(xrange); + else + xrange = xlim; + end + + % Update the axes, labels and strain index + xval = get(gca, 'XTick'); + xdate = cell(size(xval)); + for i=1:numel(xval) + xdate{i} = strftime('%m/%y', localtime(xval(i))); + end + set(gca, 'XTickLabel', xdate); + xlabel('date (mm/yy)'); + ylabel('clone ID'); + title(strain_id(s)); end - set(gca, 'XTickLabel', xdate); - xlabel('date (mm/yy)'); - ylabel('clone ID'); keyboard return; end