%% Import data from text file.
% Script for importing data from the following text file:
%
%    Y:\data\P\4\1\P411-2584882\dicomInfo411-3.txt
%
% To extend the code to different selected data or a different text file,
% generate a function instead of a script.

% Auto-generated by MATLAB on 2018/09/26 15:05:09

basedir = '//lbovenus.epfl.ch/shoulder/data/';
casedir = [basedir 'P/4/1/P411-2584882/'];
CTdir = [casedir 'CT-P411-2584882-2/'];
dicomFileName=[CTdir 'dicom/P411-2584882_CA-soft.0001.dcm'];

dicomMetaData=evalc('dicomdisp(dicomFileName)');
fid=fopen([CTdir 'metadata.txt'],'wt');
fprintf(fid,'%s',string(dicomMetaData));
fclose(fid);


%% Initialize variables.
filename = [CTdir 'metadata.txt']%'Y:\data\P\4\1\P411-2584882\dicomInfo411-3.txt';
delimiter = {'\t',' '};
startRow = 3;

%% Read columns of data as text:
% For more information, see the TEXTSCAN documentation.
formatSpec = '%s%s%s%s%s%s%s%s%s%s%s%s%s%[^\n\r]';

%% Open the text file.
fileID = fopen(filename,'r');

%% Read columns of data according to the format.
% This call is based on the structure of the file used to generate this
% code. If an error occurs for a different file, try regenerating the code
% from the Import Tool.
dataArray = textscan(fileID, formatSpec, 'Delimiter', delimiter, 'MultipleDelimsAsOne', true, 'TextType', 'string', 'HeaderLines' ,startRow-1, 'ReturnOnError', false, 'EndOfLine', '\r\n');

%% Close the text file.
fclose(fileID);

%% Convert the contents of columns containing numeric text to numbers.
% Replace non-numeric text with NaN.
raw = repmat({''},length(dataArray{1}),length(dataArray)-1);
for col=1:length(dataArray)-1
    raw(1:length(dataArray{col}),col) = mat2cell(dataArray{col}, ones(length(dataArray{col}), 1));
end
numericData = NaN(size(dataArray{1},1),size(dataArray,2));

for col=[1,2,3,5]
    % Converts text in the input cell array to numbers. Replaced non-numeric
    % text with NaN.
    rawData = dataArray{col};
    for row=1:size(rawData, 1)
        % Create a regular expression to detect and remove non-numeric prefixes and
        % suffixes.
        regexstr = '(?<prefix>.*?)(?<numbers>([-]*(\d+[\,]*)+[\.]{0,1}\d*[eEdD]{0,1}[-+]*\d*[i]{0,1})|([-]*(\d+[\,]*)*[\.]{1,1}\d+[eEdD]{0,1}[-+]*\d*[i]{0,1}))(?<suffix>.*)';
        try
            result = regexp(rawData(row), regexstr, 'names');
            numbers = result.numbers;
            
            % Detected commas in non-thousand locations.
            invalidThousandsSeparator = false;
            if numbers.contains(',')
                thousandsRegExp = '^\d+?(\,\d{3})*\.{0,1}\d*$';
                if isempty(regexp(numbers, thousandsRegExp, 'once'))
                    numbers = NaN;
                    invalidThousandsSeparator = true;
                end
            end
            % Convert numeric text to numbers.
            if ~invalidThousandsSeparator
                numbers = textscan(char(strrep(numbers, ',', '')), '%f');
                numericData(row, col) = numbers{1};
                raw{row, col} = numbers{1};
            end
        catch
            raw{row, col} = rawData{row};
        end
    end
end


%% Split data into numeric and string columns.
rawNumericColumns = raw(:, [1,2,3,5]);
rawStringColumns = string(raw(:, [4,6,7,8,9,10,11,12,13]));


%% Replace non-numeric cells with NaN
R = cellfun(@(x) ~isnumeric(x) && ~islogical(x),rawNumericColumns); % Find non-numeric cells
rawNumericColumns(R) = {NaN}; % Replace non-numeric cells

%% Make sure any text containing <undefined> is properly converted to an <undefined> categorical
for catIdx = [1,2,3,6]
    idx = (rawStringColumns(:, catIdx) == "<undefined>");
    rawStringColumns(idx, catIdx) = "";
end

%% Create output variable
dicomMetaData = table;
dicomMetaData.Location = cell2mat(rawNumericColumns(:, 1));
dicomMetaData.Level = cell2mat(rawNumericColumns(:, 2));
dicomMetaData.Tag = cell2mat(rawNumericColumns(:, 3));
dicomMetaData.VR = categorical(rawStringColumns(:, 1));
dicomMetaData.Size = cell2mat(rawNumericColumns(:, 4));
dicomMetaData.Name = categorical(rawStringColumns(:, 4));
dicomMetaData.Data = categorical(rawStringColumns(:, 5));

%% Clear temporary variables
clearvars filename delimiter startRow formatSpec fileID dataArray ans raw col numericData rawData row regexstr result numbers invalidThousandsSeparator thousandsRegExp rawNumericColumns rawStringColumns R catIdx idx;