diff --git a/synthetic_data/data_generation/data_generation_2.m b/synthetic_data/data_generation/data_generation_2.m new file mode 100644 index 00000000..a1a5a369 --- /dev/null +++ b/synthetic_data/data_generation/data_generation_2.m @@ -0,0 +1,86 @@ +% This script generate a synthetic dataset with three different +% possibilities. In any cases we consider the three following functions +% f_1 = @(a) alpha * sin(a); +% f_2 = @(a) beta * cos(a); +% f_3 = @(a) gamma * sin(a) + delta * cos(a); +% where alpha, beta, gamma and delta are scalars. +% The three possibilities are : +% 1) Generate train/test datasets by solving an ODE system of the form +% dX/dt = A*X + g(t) +% where A is a N*N symmetric negative definite matrix and g is such that +% g(t) = b*t +% where b is a random N*1 vector. +% The labels are generated by taking for each output one of the above +% functions at random, i.e for each input x_i and output p_i, +% p_i = f_j(x_i), for j in {1,2,3} +% +% 2) Generate train/test datasets in the same way as 1), except that X is +% now symmetric positive definite. Apply the same mapping for the labels. +% +% 3) Generate train/test datasets in the same way as 2). This time the +% labels are generated by evaluating the functions on a linear +% combination of the input dataset. +% +% Also, before generating the labels, there is the possibility to add +% gaussian noise the dataset X. + +clc +clear all +close all + +N = 3; % Number of input/output parameters +Nh = 70; % Resolution of the grid for the ODE system +N_samples = 2000; % Number of samples that we want to create +t0 = 0; tf = 5; % Initial time and final time + +path = '../'; % Path to the files + + +%% Definition of different functions + +% Definition of the time linear part of the ODE +b = rand(N, 1); +g = @(t) b * t; + +% System of ODEs +fun = @(t, X) A*X + g(t); + + +%% Reading the matrix A of the ODE system + +% Choose the matrix to be spd or snd +spd = 1; % Boolean for the type of the matrix +i = 0; % For reading files +A = read_matrix_ODE(spd, N, i); + +% Choose to apply a linear combination or not +lin_combin = 1; + +% Get the random linear combination matrix +B = random_linear_combination(N, lin_combin); + +%% Files creation + +% Decide whether we add noise or not +add_noise = 0; + +% Define the index for the control variable +control_index = 2;%randi(N,1); + +% Create directory to write dataset +name = 'files'; +% make directory 'files_N' +make_directories(path, name, N) + +% Generate train set +mode = 'train'; +[X_train, t] = file_creation_2(A, B, g, t0, tf, N, Nh, N_samples, control_index, add_noise, path, mode); + +% Generate test set +mode = 'test'; +[X_test, t] = file_creation_2(A, B, g, t0, tf, N, Nh, N_samples, control_index, add_noise, path, mode); + + + + + diff --git a/synthetic_data/data_generation/data_generation_2.m~ b/synthetic_data/data_generation/data_generation_2.m~ new file mode 100644 index 00000000..15236610 --- /dev/null +++ b/synthetic_data/data_generation/data_generation_2.m~ @@ -0,0 +1,83 @@ +% This script generate a synthetic dataset with three different +% possibilities. In any cases we consider the three following functions +% f_1 = @(a) alpha * sin(a); +% f_2 = @(a) beta * cos(a); +% f_3 = @(a) gamma * sin(a) + delta * cos(a); +% where alpha, beta, gamma and delta are scalars. +% The three possibilities are : +% 1) Generate train/test datasets by solving an ODE system of the form +% dX/dt = A*X + g(t) +% where A is a N*N symmetric negative definite matrix and g is such that +% g(t) = b*t +% where b is a random N*1 vector. +% The labels are generated by taking for each output one of the above +% functions at random, i.e for each input x_i and output p_i, +% p_i = f_j(x_i), for j in {1,2,3} +% +% 2) Generate train/test datasets in the same way as 1), except that X is +% now symmetric positive definite. Apply the same mapping for the labels. +% +% 3) Generate train/test datasets in the same way as 2). This time the +% labels are generated by evaluating the functions on a linear +% combination of the input dataset. +% +% Also, before generating the labels, there is the possibility to add +% gaussian noise the dataset X. + +clc +clear all +close all + +N = 3; % Number of input/output parameters +Nh = 70; % Resolution of the grid for the ODE system +N_samples = 2000; % Number of samples that we want to create +t0 = 0; tf = 5; % Initial time and final time + +path = '../'; % Path to the files + + +%% Definition of different functions + +% Definition of the time linear part of the ODE +b = rand(N, 1); +g = @(t) b * t; + +% System of ODEs +fun = @(t, X) A*X + g(t); + + +%% Reading the matrix A of the ODE system + +% Choose the matrix to be spd or snd +spd = 0; % Boolean for the type of the matrix +i = 0; % For reading files +A = read_matrix_ODE(spd, N, i); + +% Choose to apply a linear combination or not +lin_combin = 1; + +% Get the random linear combination matrix +B = random_linear_combination(N, lin_combin); + +%% Files creation + +% Decide whether we add noise or not +add_noise = 0; + +% Create directory to write dataset +name = 'files'; +% make directory 'files_N' +make_directories(path, name, N) + +% Generate train set +mode = 'train'; +[X_train, y_train, t] = file_creation_2(A, B, g, t0, tf, N, Nh, N_samples, control_index, add_noise, path, mode); + +% Generate test set +mode = 'test'; +[X_test, y_test, t] = file_creation_2(A, B, g, t0, tf, N, Nh, N_samples, control_index, add_noise, path, mode); + + + + + diff --git a/synthetic_data/data_generation/file_creation_2.m b/synthetic_data/data_generation/file_creation_2.m new file mode 100644 index 00000000..bc084144 --- /dev/null +++ b/synthetic_data/data_generation/file_creation_2.m @@ -0,0 +1,86 @@ +function [X, t] = file_creation_2(A, B, g, t0, tf, N, Nh, N_samples, control_index, add_noise, path, mode) +% Creates train/test dataset +% Inputs : +% A : Matrix of the ODE system +% B : Matrix of linear combination +% g : Linear part in the ODE system +% choices : choices for functions +% t0 : Initial time +% tf : Final time +% N : Number of input/output parameters +% Nh : Resolution of the grid +% N_samples : Number of samples desired +% add_noise : Boolean, for adding noise or not +% path : path to the folder where we create the dataset +% mode : 'test' or 'train' +% +% Outputs : +% X : Dataset +% y : Labels +% t : time interval + + +if add_noise + noise = random('Normal', 0, 1e-0, [N,Nh+1]); +else + noise = 0; +end + +% Iterate over the samples +for i=0:N_samples-1 + % Generate random initial condition + y0 = randi(20,1)*rand(N,1); + + % Solve the system of ODEs using Backward Euler + [t, X] = backward_euler(A, g, y0, t0, tf, Nh); + + % Add noise + X = X + noise; + + % Apply the linear combination of the inputs + C = X' * B; + C = C'; + + % Open the target and input files to write the solutions + fileID2 = fopen(strcat(path, 'files_', num2str(N), '/', mode, '/target/file_', num2str(i), '.txt'),'w'); + fileID3 = fopen(strcat(path, 'files_', num2str(N), '/', mode, '/input/file_', num2str(i), '.txt'),'w'); + + for j = 1:N + + if j == control_index + % Write in the files... + for t_=1:Nh+1 + % ... the input. + fprintf(fileID3, '%f1 ', X(j,t_)); + end + else + % Write in the files... + for t_=1:Nh+1 + % ... the target. + fprintf(fileID2, '%f2 ', X(j,t_)); + end + end + + fprintf(fileID2,'\n'); + fprintf(fileID3,'\n'); + end + fclose(fileID2); + fclose(fileID3); + +end + +not_control_indexes = []; +for j = 1:N + if j ~= control_index + not_control_indexes = [not_control_indexes, j]; + end +end +% Plot a sample of the inputs and the targets +figure +plot(t, X(not_control_indexes(1), :), '-r', t, X(not_control_indexes(2), :), 'b-', t, X(control_index, :), 'c--'); +title(mode) +legend('sample\_output1', 'sample\_ouput2', 'sample\_label1') +grid; + + +return \ No newline at end of file diff --git a/synthetic_data/data_generation/file_creation_2.m~ b/synthetic_data/data_generation/file_creation_2.m~ new file mode 100644 index 00000000..927d676b --- /dev/null +++ b/synthetic_data/data_generation/file_creation_2.m~ @@ -0,0 +1,85 @@ +function [X, y, t] = file_creation_2(A, B, g, t0, tf, N, Nh, N_samples, control_index, add_noise, path, mode) +% Creates train/test dataset +% Inputs : +% A : Matrix of the ODE system +% B : Matrix of linear combination +% g : Linear part in the ODE system +% choices : choices for functions +% t0 : Initial time +% tf : Final time +% N : Number of input/output parameters +% Nh : Resolution of the grid +% N_samples : Number of samples desired +% add_noise : Boolean, for adding noise or not +% path : path to the folder where we create the dataset +% mode : 'test' or 'train' +% +% Outputs : +% X : Dataset +% y : Labels +% t : time interval + + +if add_noise + noise = random('Normal', 0, 1e-0, [N,Nh+1]); +else + noise = 0; +end + +% Iterate over the samples +for i=0:N_samples-1 + % Generate random initial condition + y0 = randi(20,1)*rand(N,1); + + % Solve the system of ODEs using Backward Euler + [t, X] = backward_euler(A, g, y0, t0, tf, Nh); + + % Add noise + X = X + noise; + + % Apply the linear combination of the inputs + C = X' * B; + C = C'; + + % Open the target and input files to write the solutions + fileID2 = fopen(strcat(path, 'files_', num2str(N), '/', mode, '/target/file_', num2str(i), '.txt'),'w'); + fileID3 = fopen(strcat(path, 'files_', num2str(N), '/', mode, '/input/file_', num2str(i), '.txt'),'w'); + + for j = 1:N + + % if choice is 1 we choose sin, if 2 we choose cos etc. + if j == control_index + % Write in the files... + for t_=1:Nh+1 + % ... the target. + fprintf(fileID2, '%f ', X(j,t_)); + end + else + % Write in the files... + for t_=1:Nh+1 + % ... the input. + fprintf(fileID3, '%f ', X(j,t_)); + end + end + + fprintf(fileID2,'\n'); + fprintf(fileID3,'\n'); + end + fclose(fileID2); + fclose(fileID3); + +end + +not_control_indexes = []; +for j = 1:N + if j ~= control_index + not_control_indexes = [not_control_indexes, j]; +% Plot a sample of the inputs and the targets +figure +plot(t, X(not_control_indexes(1), :), '-r', t, X(2, :), 'b-', t, X(control_index, :), 'c--', t); +title(mode) +legend('sample\_input1', 'sample\_input2', 'sample\_label1', 'sample\_label2') +grid; + + +return \ No newline at end of file