diff --git a/src_pyeddl/model_basic_lopo.py b/src_pyeddl/model_basic_lopo.py index c29d85a..be82d7b 100644 --- a/src_pyeddl/model_basic_lopo.py +++ b/src_pyeddl/model_basic_lopo.py @@ -1,101 +1,101 @@ #Imports import numpy as np import pandas as pd # Utility file import ModelHandler as mh # sklearn from sklearn.utils import shuffle from sklearn.model_selection import train_test_split def main(handler): # Msc temp_data_folder = "../temp_data/" model_file = "../model/model.bin" - data_folder = "/home/teijeiro/Documents/databases/chbmit/deephealth/" + data_folder = "../dataset/" signal_length = "1mn" # processing validation_size=0.15 test_stride = 640 #Corresponds to 50% overlap, can be set to 1280 to have no overlap # Model epochs = 1 batch_size = 32 learning_rate = 0.0075 # 0.00075 with open(data_folder+"signal_mit_"+signal_length+".csv", "rb") as file: x_data_ = file.read().splitlines() y_data = np.loadtxt(data_folder+"labels_mit_"+signal_length+".txt") info_data = np.loadtxt(data_folder+"infos_mit_"+signal_length+".txt", dtype="str") # Convert from string x_data = [] for sig in x_data_: x_data.append(np.fromstring(sig, sep=',')) # Reshape in 2D as data are 1D in csv file x_data = [np.reshape(np.array(x_data_i), (handler.NB_CHNS, int(len(x_data_i)/handler.NB_CHNS))) for x_data_i in x_data] # Create the pandas df data = pd.concat([pd.Series(y_data), pd.Series([info[0] for info in info_data]),pd.Series([info[1] for info in info_data])], axis=1) data.columns = ["label", "patient", "file"] data["signal"] = "" data["signal"].astype(object) for i, sig in enumerate(x_data): data.at[i, "signal"] = sig patients = np.unique(data.patient) data.sort_values(["patient", "file", "label"], inplace=True) data = data.reset_index(drop=True) # Load seizure times seizures = pd.read_csv(data_folder+"seizures.csv", delimiter='\t') seizures_ = seizures[seizures.Patient.isin(patients)] seizures_["length"] = seizures_.apply(lambda x: (x.end_seizure - x.start_seizure), axis=1) results = {} for patient in patients: print("Patient: ", patient) patient_data = data[data.patient == patient] files = np.unique(patient_data.file) print(' ', len(files), ' files.') test_data = patient_data train_data = data.drop(patient_data.index) #Build train/test set by cutting each signals in pieces of 5 seconds, with 50% overlap x_train, y_train, x_test, y_test = handler.cut_signal_data(train_data, test_data, stride_test=test_stride) #Shuffle and balance classes x_train, y_train = shuffle(x_train, y_train) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_size, stratify = y_train) x_train = np.array(x_train) x_val = np.array(x_val) x_train = x_train.reshape((len(x_train), handler.NB_CHNS, 1280, 1)) x_val = x_val.reshape((len(x_val), handler.NB_CHNS, 1280, 1)) y_train = np.eye(2)[np.array(y_train).astype(int)] y_val = np.eye(2)[np.array(y_val).astype(int)] handler.train(model_file, epochs, batch_size, learning_rate, x_train, y_train, x_val, y_val) x_test = np.array(x_test) x_test_splitted = np.array_split(x_test, len(files)) y_test_splitted = np.array_split(np.array(y_test), len(files)) for x_test, y_test, file in zip(x_test_splitted, y_test_splitted, files): x_test = np.array(x_test) x_test = x_test.reshape((len(x_test), handler.NB_CHNS, 1280, 1)) y_test = np.eye(2)[np.array(y_test).astype(int)] handler.evaluate(model_file, x_test, y_test) if __name__ == '__main__': model_handler = mh.ModelHandler() main(model_handler)