% GATHER_APEX_DATA gathers all the data from APEX compressed archives % into individual CSV files that can be loaded by Octave. % % Blanchoud Group, UNIFR % Simon Blanchoud % 29/11/2020 function csv_files = gather_apex_data(fnames) % load the required packages pkg load io % Format the input as a list of cells if ~iscell(fnames) if (isfile(fnames)) fnames = {fnames}; elseif (isfolder(fnames)) fnames = glob(fullfile(fnames, '*.tgz')); end end % Prepare the temporary folder tmpdir = fullfile(pwd, 'TmpData'); if (length(glob(fullfile(tmpdir, '*.csv')))>0) delete(fullfile(tmpdir, '*.csv')); end % Prepare the output files csv_files = {}; % Loop through all files to gather for i=1:length(fnames) % Extract the actual files files = unpack(fnames{i}, tmpdir, 'tgz'); % Loop through those files for j=1:length(files) xml = fullfile(tmpdir,files{j}); % Make sure this is an actual file if isfile(xml) % Just try loading it as an XML file, ignore other types try dom = xmlread(xml); catch ME disp(ME.message) continue; end % Get the content of the file content = dom.getDocumentElement(); content.normalize(); % Check which type of file this is type = content.getNodeName(); switch type case 'datalog' node = 'record'; target = 'probe'; % Currently we only parse the datalog otherwise node = ''; target = ''; end % Get the nodes that are actually useful nodes = content.getElementsByTagName(node); % And convert them to CSV new_files = convert_xml(nodes, target, tmpdir); csv_files = [csv_files; new_files(:)]; end end % Delete the temporary files [dname, fname, ext] = fileparts(fnames{i}); delete(fullfile(tmpdir, fname, '*')); rmdir(fullfile(tmpdir, fname)); end % Remove duplicates csv_files = unique(csv_files); return; end % Here we physically copy the XML data into a CSV file function files = convert_xml(xml, target, fdir) % Some handlers for the files to be written files = {}; fids = struct(); % We loop over all the nodes in the XML for i=1:xml.getLength() item = xml.item(i-1); % We loop over all the attributes of each node for j=1:item.getLength() node = item.item(j-1); % We check which type of node this is name = node.getNodeName(); switch name % We extract the data from the target type case target % We get all the data from the child nodes content = node2cell(node.getChildNodes()); % We loop through all the cells and copy the proper data name = ''; type = ''; val = NaN; for k=1:size(content, 1) switch content{k,1} case 'name' name = content{k,2}; case 'type' type = content{k,2}; case 'value' val = str2double(content{k,2}); end end % If we got all the data we need, then we write it if (~isnan(val) && ~isempty(name)) % We store the file handlers in a structure, which we % need to create if it isn't ready yet if (~isfield(fids, name)) fname = fullfile(fdir, [name '.csv']); fids.(name) = fopen(fname, 'a'); if (fids.(name) > -1) files{end+1} = fname; else error(['Cannot create the proper CSV file at ' fname]) end end % Actually write the data on disk fprintf(fids.(name), '%d,%f\n', curr_time, val); end % We store the date for proper ordering of the CSV case 'date' val = node.getTextContent(); [curr_time, indx] = strptime(val, '%m/%d/%Y %H:%M:%S'); if (indx > length(val)) curr_time = mktime(curr_time); else error(['Cannot interpret the time format ' val]); end otherwise val = ''; end end end % We close all the handlers fields = fieldnames(fids); for i=1:length(fields) fclose(fids.(fields{i})); end return; end % Here we extract the nodes into a cell matrix function data = node2cell(nodes) % We populate a {name, text} stucture data = cell(0,2); for i=1:nodes.getLength() node = nodes.item(i-1); node.normalize(); % Get the two fields name = node.getNodeName(); val = node.getTextContent(); % If there is something, store it if (~isempty(name) && name(1)~='#') data{end+1, 1} = name; data{end, 2} = val; end end return; end