% GATHER_APEX_DATA gathers all the data from APEX compressed archives
% into individual CSV files that can be loaded by Octave.
%
% Blanchoud Group, UNIFR
% Simon Blanchoud
% 29/11/2020
function csv_files = gather_apex_data(fnames)

  % load the required packages
  pkg load io
 
  % Format the input as a list of cells
  if ~iscell(fnames)
    if (isfile(fnames))
      fnames = {fnames};
    elseif (isfolder(fnames))
      fnames = glob(fullfile(fnames, '*.tgz'));
    end
  end

  % Prepare the temporary folder
  tmpdir = fullfile(pwd, 'TmpData');
  if (length(glob(fullfile(tmpdir, '*.csv')))>0)
    delete(fullfile(tmpdir, '*.csv'));
  end

  % Prepare the output files
  csv_files = {};

  % Loop through all files to gather
  for i=1:length(fnames)

    % Extract the actual files
    files = unpack(fnames{i}, tmpdir, 'tgz');

    % Loop through those files
    for j=1:length(files)
      xml = fullfile(tmpdir,files{j});

      % Make sure this is an actual file
      if isfile(xml)

        % Just try loading it as an XML file, ignore other types
        try
          dom = xmlread(xml);
        catch ME
          disp(ME.message)
          continue;
        end

        % Get the content of the file
        content = dom.getDocumentElement();
        content.normalize();

        % Check which type of file this is
        type = content.getNodeName();
        switch type
          case 'datalog'
            node = 'record';
            target = 'probe';

          % Currently we only parse the datalog
          otherwise
            node = '';
            target = '';
        end

        % Get the nodes that are actually useful
        nodes = content.getElementsByTagName(node);

        % And convert them to CSV
        new_files = convert_xml(nodes, target, tmpdir);
        csv_files = [csv_files; new_files(:)];
      end
    end

    % Delete the temporary files
    [dname, fname, ext] = fileparts(fnames{i});
    delete(fullfile(tmpdir, fname, '*'));
    rmdir(fullfile(tmpdir, fname));
  end

  % Remove duplicates
  csv_files = unique(csv_files);

  return;
end

% Here we physically copy the XML data into a CSV file
function files = convert_xml(xml, target, fdir)

  % Some handlers for the files to be written
  files = {};
  fids = struct();

  % We loop over all the nodes in the XML
  for i=1:xml.getLength()
    item = xml.item(i-1);

    % We loop over all the attributes of each node
    for j=1:item.getLength()
      node = item.item(j-1);

      % We check which type of node this is
      name = node.getNodeName();
      switch name

        % We extract the data from the target type
        case target

          % We get all the data from the child nodes
          content = node2cell(node.getChildNodes());

          % We loop through all the cells and copy the proper data
          name = '';
          type = '';
          val = NaN;
          for k=1:size(content, 1)
            switch content{k,1}
              case 'name'
                name = content{k,2};
              case 'type'
                type = content{k,2};
              case 'value'
                val = str2double(content{k,2});
            end
          end

          % If we got all the data we need, then we write it
          if (~isnan(val) && ~isempty(name))

            % We store the file handlers in a structure, which we
            % need to create if it isn't ready yet
            if (~isfield(fids, name))
              fname = fullfile(fdir, [name '.csv']);
              fids.(name) = fopen(fname, 'a');

              if (fids.(name) > -1)
                files{end+1} = fname;
              else
                error(['Cannot create the proper CSV file at ' fname])
              end
            end

            % Actually write the data on disk
            fprintf(fids.(name), '%d,%f\n', curr_time, val);
          end

        % We store the date for proper ordering of the CSV
        case 'date'
          val = node.getTextContent();
          [curr_time, indx] = strptime(val, '%m/%d/%Y %H:%M:%S');
          if (indx > length(val))
            curr_time = mktime(curr_time);
          else
            error(['Cannot interpret the time format ' val]);
          end
        otherwise
          val = '';
      end
    end
  end

  % We close all the handlers
  fields = fieldnames(fids);
  for i=1:length(fields)
    fclose(fids.(fields{i}));
  end

  return;
end

% Here we extract the nodes into a cell matrix
function data = node2cell(nodes)

  % We populate a {name, text} stucture
  data = cell(0,2);
  for i=1:nodes.getLength()
    node = nodes.item(i-1);
    node.normalize();

    % Get the two fields
    name = node.getNodeName();
    val = node.getTextContent();

    % If there is something, store it
    if (~isempty(name) && name(1)~='#')
      data{end+1, 1} = name;
      data{end, 2} = val;
    end
  end

  return;
end