diff --git a/src_pyeddl/ModelHandler.py b/src_pyeddl/ModelHandler.py index d0a48cd..0946cee 100644 --- a/src_pyeddl/ModelHandler.py +++ b/src_pyeddl/ModelHandler.py @@ -1,228 +1,231 @@ import numpy as np from matplotlib import pyplot as plt # pyeddl import pyeddl._core.eddl as eddl from pyeddl.tensor import Tensor from pyeddl._core.eddl import BatchNormalization, L2, sgd, Input, Dense, Dropout, Activation, Reshape, MaxPool, Flatten, Conv, GlorotUniform, Pad # sklearn from sklearn.metrics import confusion_matrix, classification_report, roc_curve,roc_auc_score class ModelHandler: NB_CHNS = 4 L2_K = 0.1 L2_B = 0.1 L2_A = 0.0 DROPOUT_RATE = 0.5 net = None def __init__(self): print("Object constructor") self.model = self.get_model() def get_model(self): in_ = Input([1, self.NB_CHNS, 1280]) l = in_ l = L2(GlorotUniform(Conv(l, 16, [3, 5], [1, 1], "same")), self.L2_K) l = BatchNormalization(l, 0.99, 0.001) l = Activation(l, "relu") l = MaxPool(l, [1, 2], [1, 2], "same") l = L2(GlorotUniform(Conv(l, 32, [3, 3], [1, 1], "same")), self.L2_K) l = BatchNormalization(l, 0.99, 0.001) l = Activation(l, "relu") l = MaxPool(l, [1, 2], [1, 2], "same") l = L2(GlorotUniform(Conv(Pad(l, [0, 1, 1, 0]), 32, [3, 3], [2, 2], "same")), self.L2_K) l = BatchNormalization(l, 0.99, 0.001) l = Activation(l, "relu") l = Dropout(l, self.DROPOUT_RATE) l = Flatten(l) l = L2(GlorotUniform(Dense(l, 64)), self.L2_K) l = Activation(l, "relu") l = Dropout(l, self.DROPOUT_RATE) l = GlorotUniform(Dense(l, 2)) l = Activation(l, "softmax") out_ = l model = eddl.Model([in_], [out_]) eddl.build(model, sgd(0.01, 0.9, 0.0, nesterov=True), ["cross_entropy"], ["categorical_accuracy"], eddl.CS_GPU([1])) eddl.summary(model) return model # Train_set and test_set are pandas dataframes where the signal is in the column labeled "signal" and the label "label" def cut_signal_data(self, train_set, test_set, size_train=1280, stride_train=640, size_test=1280, stride_test=1280): x_train = [] y_train = [] x_test = [] y_test = [] # For each training seizure/pre-ictal signals for row in train_set.itertuples(index=False): signal = row.signal label = row.label # Cut signal in chunks of 5 seconds signals = self.sliding_window(signal, size_train, stride_train) labels = np.ones(len(signals)) * label x_train += list(signals) y_train += list(labels) # For each testing seizure/pre-ictal signals for row in test_set.itertuples(index=False): signal = row.signal label = row.label # Cut signal in chunks of 5 seconds signals = self.sliding_window(signal, size_test, stride_test) labels = np.ones(len(signals)) * label x_test += list(signals) y_test += list(labels) return x_train, y_train, x_test, y_test # input: eeg signal, size: size of the window, stride: step # Default : 1280 long windows with 50% overlap # output : array of signal cuts according to the window size def sliding_window(self, signal, size=1280, stride=640): out = [] num_of_chunks = int(((signal.shape[1] - size) / stride) + 1) for i in range(0, num_of_chunks * stride, stride): out.append(signal[:, i:i + size]) return out def prepare_standardplot(self, title, xlabel): fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(12, 6) fig.suptitle(title) ax1.set_ylabel('binary cross entropy') ax1.set_xlabel(xlabel) ax1.set_yscale('log') ax2.set_ylabel('accuracy [% correct]') ax2.set_xlabel(xlabel) return fig, ax1, ax2 def finalize_standardplot(self, fig, ax1, ax2): ax1handles, ax1labels = ax1.get_legend_handles_labels() if len(ax1labels) > 0: ax1.legend(ax1handles, ax1labels) ax2handles, ax2labels = ax2.get_legend_handles_labels() if len(ax2labels) > 0: ax2.legend(ax2handles, ax2labels) fig.tight_layout() plt.subplots_adjust(top=0.9) def plot_history(self, history, title): fig, ax1, ax2 = self.prepare_standardplot(title, 'epoch') ax1.plot(history.history['loss'], label="training") ax1.plot(history.history['val_loss'], label="validation") ax2.plot(history.history['acc'], label="training") ax2.plot(history.history['val_acc'], label="validation") self.finalize_standardplot(fig, ax1, ax2) return fig # Display model performance on test set def show_confusion_matrix(self, y_true_, y_pred_, title): print("Generating Confusion Matrix") cm = confusion_matrix(y_pred=np.rint(y_pred_), y_true=np.array(y_true_)) print(cm) plt.imshow(cm, cmap="inferno_r") plt.title(title) plt.show() print(classification_report(y_pred=np.rint(y_pred_), y_true=np.array(y_true_))) # Calculate accuracy on test set def compute_accuracy(self, y_pred_, y_true_): return 1 - np.sum(np.abs(np.max(np.round(y_pred_), axis=1) - np.array(y_true_))) / len(y_true_) def false_positive_rate(self, y_test, y_pred, detect_rule): time_hr = len(y_pred) * 1280 / (256 * 60 * 60) preds = np.max(np.rint(y_pred), axis=1) fp = 0 x_ = np.zeros(detect_rule[1]) alarm_triggered = False counter_alarm = 23 # Needs 1mn seconds between seizure onsets false_alarms = [] for idx, x in enumerate(preds): if (counter_alarm == 0): alarm_triggered = False else: counter_alarm -= 1 if (alarm_triggered == False): for j, y in enumerate(x_[::-1]): if (j == len(x_) - 1): x_[1] = x_[0] x_[0] = 0 else: x_[len(x_) - 1 - j] = x_[len(x_) - 2 - j] if (x == 1): x_[0] = 1 if (np.sum(x_) >= detect_rule[0]): fp += 1 alarm_triggered = True counter_alarm = 23 false_alarms.append(idx) fpr = fp / time_hr return fpr, fp, false_alarms # Compute detection time: first time to have 2 out of 3 segments being classified as ictal def compute_detect_time(self, preds, test, detect_rule): x_ = np.zeros(detect_rule[1]) for idx, x in enumerate(preds): for j, y in enumerate(x_[::-1]): if (j == len(x_) - 1): x_[1] = x_[0] x_[0] = 0 else: x_[len(x_) - 1 - j] = x_[len(x_) - 2 - j] if (x == 1): x_[0] = 1 if (np.sum(x_) >= detect_rule[0]): return idx return -1 def train(self, model_file, epochs, batch_size, learning_rate, x_train, y_train, x_val, y_val): x_train = Tensor.fromarray(x_train) y_train = Tensor.fromarray(y_train) x_val = Tensor.fromarray(x_val) y_val = Tensor.fromarray(y_val) eddl.setlr(self.model, [learning_rate]) for e in range(epochs): print("Real Epoch number: {} of {}".format(e + 1, epochs)) eddl.fit(self.model, [x_train], [y_train], batch_size, 1) eddl.evaluate(self.model, [x_val], [y_val]) eddl.save(self.model, model_file) def evaluate(self, model_file, x_test, y_test): eddl.load(self.model, model_file) x_test = Tensor.fromarray(x_test) y_test = Tensor.fromarray(y_test) eddl.evaluate(self.model, [x_test], [y_test]) + + def save_to_onnx(self, model_file): + eddl.save_net_to_onnx_file(self.model, model_file) diff --git a/src_pyeddl/model_basic_lopo.py b/src_pyeddl/model_basic_lopo.py index be82d7b..cee4a05 100644 --- a/src_pyeddl/model_basic_lopo.py +++ b/src_pyeddl/model_basic_lopo.py @@ -1,101 +1,92 @@ #Imports import numpy as np import pandas as pd # Utility file import ModelHandler as mh # sklearn from sklearn.utils import shuffle from sklearn.model_selection import train_test_split -def main(handler): +if __name__ == "__main__": # Msc temp_data_folder = "../temp_data/" model_file = "../model/model.bin" data_folder = "../dataset/" - signal_length = "1mn" + signal_length = "3mn" # processing validation_size=0.15 test_stride = 640 #Corresponds to 50% overlap, can be set to 1280 to have no overlap # Model epochs = 1 batch_size = 32 learning_rate = 0.0075 # 0.00075 + handler = mh.ModelHandler() with open(data_folder+"signal_mit_"+signal_length+".csv", "rb") as file: x_data_ = file.read().splitlines() y_data = np.loadtxt(data_folder+"labels_mit_"+signal_length+".txt") info_data = np.loadtxt(data_folder+"infos_mit_"+signal_length+".txt", dtype="str") # Convert from string x_data = [] for sig in x_data_: x_data.append(np.fromstring(sig, sep=',')) # Reshape in 2D as data are 1D in csv file x_data = [np.reshape(np.array(x_data_i), (handler.NB_CHNS, int(len(x_data_i)/handler.NB_CHNS))) for x_data_i in x_data] # Create the pandas df data = pd.concat([pd.Series(y_data), pd.Series([info[0] for info in info_data]),pd.Series([info[1] for info in info_data])], axis=1) data.columns = ["label", "patient", "file"] data["signal"] = "" data["signal"].astype(object) for i, sig in enumerate(x_data): data.at[i, "signal"] = sig patients = np.unique(data.patient) data.sort_values(["patient", "file", "label"], inplace=True) data = data.reset_index(drop=True) # Load seizure times seizures = pd.read_csv(data_folder+"seizures.csv", delimiter='\t') seizures_ = seizures[seizures.Patient.isin(patients)] - seizures_["length"] = seizures_.apply(lambda x: (x.end_seizure - x.start_seizure), axis=1) + seizures_.loc[:, "length"] = seizures_.apply(lambda x: (x.end_seizure - x.start_seizure), axis=1) results = {} for patient in patients: print("Patient: ", patient) patient_data = data[data.patient == patient] files = np.unique(patient_data.file) print(' ', len(files), ' files.') test_data = patient_data train_data = data.drop(patient_data.index) #Build train/test set by cutting each signals in pieces of 5 seconds, with 50% overlap x_train, y_train, x_test, y_test = handler.cut_signal_data(train_data, test_data, stride_test=test_stride) #Shuffle and balance classes x_train, y_train = shuffle(x_train, y_train) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_size, stratify = y_train) x_train = np.array(x_train) x_val = np.array(x_val) x_train = x_train.reshape((len(x_train), handler.NB_CHNS, 1280, 1)) x_val = x_val.reshape((len(x_val), handler.NB_CHNS, 1280, 1)) y_train = np.eye(2)[np.array(y_train).astype(int)] y_val = np.eye(2)[np.array(y_val).astype(int)] handler.train(model_file, epochs, batch_size, learning_rate, x_train, y_train, x_val, y_val) + #Testing in CV x_test = np.array(x_test) + x_test = x_test.reshape((len(x_test), handler.NB_CHNS, 1280, 1)) + y_test = np.eye(2)[np.array(y_test).astype(int)] + handler.evaluate(model_file, x_test, y_test) - x_test_splitted = np.array_split(x_test, len(files)) - y_test_splitted = np.array_split(np.array(y_test), len(files)) - - for x_test, y_test, file in zip(x_test_splitted, y_test_splitted, files): - x_test = np.array(x_test) - x_test = x_test.reshape((len(x_test), handler.NB_CHNS, 1280, 1)) - y_test = np.eye(2)[np.array(y_test).astype(int)] - - handler.evaluate(model_file, x_test, y_test) - - -if __name__ == '__main__': - model_handler = mh.ModelHandler() - main(model_handler)