diff --git a/CMakeLists.txt b/CMakeLists.txt index 189a0c5..fec4d1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,43 +1,43 @@ cmake_minimum_required(VERSION 3.9.2) project(basic_lopo) SET(CMAKE_CXX_STANDARD 11) SET(CMAKE_CXX_STANDARD_REQUIRED ON) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) endif(NOT CMAKE_BUILD_TYPE) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -pedantic") SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") option(test "Build tests." OFF) -add_executable(basic_lopo.x src/main.cpp src/basicnet.cpp src/database.cpp src/dataitem.cpp) +add_executable(basic_lopo.x src/main.cpp src/basicnet.cpp src/database.cpp src/dataitem.cpp src/measurement.cpp) find_package(Eigen3 3.3 REQUIRED) target_link_libraries(basic_lopo.x PUBLIC Eigen3::Eigen) set(eddl_DIR "/home/andrea/eddl/lib/cmake/eddl") find_package(eddl REQUIRED) target_link_libraries(basic_lopo.x PUBLIC eddl) if (test) enable_testing() find_package(GTest) if (NOT GTEST_FOUND) SET(GTEST_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/include) SET(GTEST_BOTH_LIBRARIES libgtest.a libgtest_main.a) endif(NOT GTEST_FOUND) include_directories(${GTEST_INCLUDE_DIRS}) add_executable(testGenMot src/test_main.cpp src/motif.cpp src/site.cpp src/treatment.cpp ) target_link_libraries(testGenMot ${GTEST_BOTH_LIBRARIES} pthread) add_test(Genomic_motifs testGenMot) endif(test) find_package(Doxygen) if (DOXYGEN_FOUND) add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating API documentation with Doxygen" VERBATIM) endif(DOXYGEN_FOUND) diff --git a/src/basicnet.cpp b/src/basicnet.cpp index 508cb71..f018605 100644 --- a/src/basicnet.cpp +++ b/src/basicnet.cpp @@ -1,152 +1,163 @@ #include #include #include #include #include "basicnet.h" #define NB_CHNS 4 #define L2_K 0.1 #define L2_B 0.1 #define L2_A 0.0 #define DROPOUT_RATE 0.5 BasicNet::BasicNet() { using namespace eddl; /* ######################################################### ######################################################### ######################################################### ######################################################### # ZERO PADDING NOT AVAILBLE YET # REGULARIZERS ARE AVAILABLE, BUT AS SIMPLE LAYERS: NOT AS PARAMETERS KERNEL_REGULARIZER AND BIAS_REGULARIZER: WHERE DO I PUT THE LAYERS FOR THEM TO HAVE THE SAME EFFECT? # FUNCTION TRAIN UNDERWENT MAJOR (?) CHANGES: fit function is much simpler than keras one # Maybe implemented in the futre with "fine-grained training" */ layer in_ = Input({1, NB_CHNS, 1280}); layer l = in_; // l = ZeroPadding2D(l, {1,2}); // l = Conv(l, 16, {3, 5}, data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)) l = Conv(l, 16, {3, 5}, {1,2}, "same"); // padding='valid' l = BatchNormalization(l, 0.99, 0.001); l = Activation(l, "relu"); l = MaxPool(l, {1,2}, {1,2}, "same"); // padding='valid' // l = ZeroPadding2D(l, {1,1}); // l = Conv(l, 32, {3, 3}, {1,1}, data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)); l = Conv(l, 32, {3, 3}, {1,1}, "same"); // padding='valid' l = BatchNormalization(l, 0.99, 0.001); l = Activation(l, "relu"); l = MaxPool(l, {1,2}, {1,2}, "same"); // padding='valid' // l = ZeroPadding2D(l, {1,1}); // l = Conv(l, 32, {3, 3}, {2,2}, data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)); l = Conv(l, 32, {3, 3}, {2,2}, "same"); // padding='valid' l = BatchNormalization(l, 0.99, 0.001); l = Activation(l, "relu"); l = Dropout(l, DROPOUT_RATE); l = Flatten(l); // l = Dense(l, 64, kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)); l = Dense(l, 64); l = Activation(l, "relu"); l = Dropout(l, DROPOUT_RATE); // l = Dense(l, 1, kernel_initialiser='glorot_uniform', bias_initialiser='zeros'); l = Dense(l, 1); l = Activation(l, "sigmoid"); layer out_ = l; net = Model({in_}, {out_}); build(net, sgd(0.01, 0.9, 0.0, true), {"soft_cross_entropy"}, {"categorical_accuracy"}, CS_CPU(4, "full_mem")); summary(net); } void BasicNet::fit(Tensor* x, Tensor* y, int batch_size, int epochs) { x = Tensor::moveaxis(x, 3, 1); + + +/* // If no pre split validation set is provided, done autamatically here. But may result in class imbalance for small datasets + if(val_data==None): + eddl.fit(model, [x_train], [y_train], batch_size, epochs) + else: + val_data = [val_data[0], val_data[1]] + eddl.fit(model, [x_train], [y_train], batch_size, epochs) + return +*/ + eddl::fit(net, {x}, {y}, batch_size, epochs); } void BasicNet::evaluate(Tensor* x, Tensor* y) { x = Tensor::moveaxis(x, 3, 1); eddl::evaluate(net, {x}, {y}); } /* #Calculate accuracy on test set def compute_accuracy(y_pred_, y_true_): return 1 - np.sum(np.abs(np.max(np.round(y_pred_), axis=1) - np.array(y_true_)))/len(y_true_) def false_positive_rate(y_test, y_pred, detect_rule): time_hr = len(y_pred)*1280/(256*60*60) preds = np.max(np.rint(y_pred), axis=1) fp=0 x_ = np.zeros(detect_rule[1]) alarm_triggered = False counter_alarm = 23 #Needs 1mn seconds between seizure onsets false_alarms = [] for idx, x in enumerate(preds): if(counter_alarm == 0): alarm_triggered = False else: counter_alarm -= 1 if (alarm_triggered == False): for j, y in enumerate(x_[::-1]): if(j == len(x_)-1): x_[1] = x_[0] x_[0] = 0 else: x_[len(x_)-1-j] = x_[len(x_)-2-j] if(x==1): x_[0]=1 if(np.sum(x_) >= detect_rule[0]): fp+=1 alarm_triggered = True counter_alarm = 23 false_alarms.append(idx) fpr = fp/time_hr return fpr, fp, false_alarms #Compute detection time: first time to have 2 out of 3 segments being classified as ictal def compute_detect_time(preds, test, detect_rule): x_ = np.zeros(detect_rule[1]) for idx, x in enumerate(preds): for j, y in enumerate(x_[::-1]): if(j == len(x_)-1): x_[1] = x_[0] x_[0] = 0 else: x_[len(x_)-1-j] = x_[len(x_)-2-j] if(x==1): x_[0]=1 if(np.sum(x_) >= detect_rule[0]): return idx return -1 */ \ No newline at end of file diff --git a/src/database.cpp b/src/database.cpp index cf37607..36a5796 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -1,40 +1,40 @@ #include #include #include #include #include "database.h" // Dataset position and choice const std::string data_folder = "../dataset/"; const std::string signal_length = "1mn"; Database::Database() { std::ifstream info_file(data_folder + "infos_mit_" + signal_length + ".txt"); std::ifstream label_file(data_folder + "labels_mit_" + signal_length + ".txt"); std::ifstream signal_file(data_folder + "signal_mit_" + signal_length + ".csv"); while(info_file.good() && label_file.good() && signal_file.good()) - data.push_back(new DataItem(info_file, label_file, signal_file)); + data.push_back(new Measurement(info_file, label_file, signal_file)); // Deletes last element, which is not correctly initialised because he was after the eof. delete data.back(); data.pop_back(); info_file.close(); label_file.close(); signal_file.close(); - std::sort(data.begin(), data.end(), compare_ptr_DataItem); + std::sort(data.begin(), data.end(), compare_ptr_Measurement); } Database::~Database() { for(auto& data_item: data) delete data_item; } diff --git a/src/database.h b/src/database.h index 6ddb296..97465dc 100644 --- a/src/database.h +++ b/src/database.h @@ -1,16 +1,16 @@ #include #include #include -#include "dataitem.h" +#include "measurement.h" class Database { public: Database(); ~Database(); private: - std::vector data; + std::vector data; }; \ No newline at end of file diff --git a/src/dataitem.cpp b/src/dataitem.cpp index e4d5b60..5b7002c 100644 --- a/src/dataitem.cpp +++ b/src/dataitem.cpp @@ -1,50 +1,14 @@ #include #include #include #include #include "dataitem.h" -DataItem::DataItem(std::ifstream& info_file, std::ifstream& label_file, std::ifstream& signal_file) +DataItem::DataItem(double y_def, double x_def): + y(y_def), x(x_def) { - info_file >> patient; - info_file >> id; - label_file >> label; - - std::string line; - getline(signal_file, line); - std::stringstream line_stream(line); - float val; - while(line_stream >> val) - { - signal.push_back(val); - line_stream.get(); - } } - -bool DataItem::operator < (const DataItem& d) const -{ - if(patient.compare(d.patient) < 0) - return true; - else if(patient.compare(d.patient) == 0) - { - if(id.compare(d.id) < 0) - return true; - else if(id == d.id) - { - if(label < d.label) - return true; - } - } - - return false; -} - - -bool compare_ptr_DataItem(DataItem* p1, DataItem* p2) -{ - return *p1<*p2; -} diff --git a/src/dataitem.h b/src/dataitem.h index a3b10bd..c9488e6 100644 --- a/src/dataitem.h +++ b/src/dataitem.h @@ -1,21 +1,14 @@ #include #include #include class DataItem { public: - DataItem(std::ifstream& info_file, std::ifstream& label_file, std::ifstream& signal_file); - - bool operator < (const DataItem& d) const; - + DataItem(double y_def, double x_def); + private: - std::string patient; - std::string id; - float label; - std::vector signal; -}; - - -bool compare_ptr_DataItem(DataItem* p1, DataItem* p2); \ No newline at end of file + double y; + std::vector x; +}; \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 1b77b15..4fd7f8b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,109 +1,131 @@ #include #include "database.h" #include "basicnet.h" #define NB_CHNS 4 // Processing parameters const double validation_size = 0.15; const int test_stride = 640; // Corresponds to 50% overlap, can be set to 1280 to have no overlap // Model parameters const double lr = 0.00075; const int epochs = 1; const int batch_size = 32; int main() { + std::cout << "[INFO] Reading Dataset..." << std::endl; Database database; + std::cout << "[INFO] Reading Dataset Completed" << std::endl; + + std::cout << "[INFO] Building Net..." << std::endl; BasicNet basic_net; + std::cout << "[INFO] Building Net Completed" << std::endl; + + + + + // Count n. unique patients + + // Instance a result class + + // for each patient: + // write info + // test_data = data of current patient + // train_data = Database EXCEPT data of current patient + // ....... + + + + /* patients = np.unique(data.patient) #Load seizure times seizures = pd.read_csv(data_folder+"seizures.csv", delimiter='\t') seizures_ = seizures[seizures.Patient.isin(patients)] seizures_["length"] = seizures_.apply(lambda x: (x.end_seizure - x.start_seizure), axis=1) results = {} for patient in patients: print("Patient: ", patient) patient_data = data[data.patient == patient] files = np.unique(patient_data.file) print(' ', len(files), ' files.') test_data = patient_data train_data = data.drop(patient_data.index) #Build train/test set by cutting each signals in pieces of 5 seconds, with 50% overlap x_train, y_train, x_test, y_test = cut_signal_data(train_data, test_data, stride_test=test_stride) #Shuffle and balance classes x_train, y_train = shuffle(x_train, y_train) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_size, stratify = y_train) x_train = np.array(x_train) x_val = np.array(x_val) // x_data->reshape_({x_data->shape[0], NB_CHNS, 1280, 1)}); x_train = x_train.reshape((len(x_train),NB_CHNS,1280, 1)) x_val = x_val.reshape((len(x_val),NB_CHNS,1280, 1)) #Create and train new model model = getModel() es = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=20, restore_best_weights=True) train(model, x_train, y_train, [x_val, y_val], epochs=epochs, batch_size = batch_size) x_test = np.array(x_test) lengths = [len(x[0]) for x in x_test] x_test_splitted = np.array_split(x_test, len(files)) y_test_splitted = np.array_split(np.array(y_test), len(files)) for x_test, y_test, file in zip(x_test_splitted, y_test_splitted, files): x_test = np.array(x_test) x_test = x_test.reshape((len(x_test),NB_CHNS,1280, 1)) predict(model, x_test, y_test) pred = model.predict(x_test) - 0.25 #show_confusion_matrix(y_test, pred, str(patient) +' '+ str(file)) print("Accuracy (segments) :", compute_accuracy(pred, y_test)) detection_time = compute_detect_time(np.max(np.rint(pred), axis=1), y_test, [3,4]) print("Detection time window (positive if 2323) and (detect_time<=45)): detection_times.append(detect_time) plt.hist(detection_times, bins=23, range=[23,46]) plt.xlabel("Per segments of 5 seconds") plt.show() detected = len(detection_times)/counter print("Detection accuracy: ", detected) */ } \ No newline at end of file diff --git a/src/dataitem.cpp b/src/measurement.cpp similarity index 71% copy from src/dataitem.cpp copy to src/measurement.cpp index e4d5b60..468e22f 100644 --- a/src/dataitem.cpp +++ b/src/measurement.cpp @@ -1,50 +1,50 @@ #include #include #include #include -#include "dataitem.h" +#include "measurement.h" -DataItem::DataItem(std::ifstream& info_file, std::ifstream& label_file, std::ifstream& signal_file) +Measurement::Measurement(std::ifstream& info_file, std::ifstream& label_file, std::ifstream& signal_file) { info_file >> patient; info_file >> id; label_file >> label; std::string line; getline(signal_file, line); std::stringstream line_stream(line); - float val; + double val; while(line_stream >> val) { signal.push_back(val); line_stream.get(); } } -bool DataItem::operator < (const DataItem& d) const +bool Measurement::operator < (const Measurement& d) const { if(patient.compare(d.patient) < 0) return true; else if(patient.compare(d.patient) == 0) { if(id.compare(d.id) < 0) return true; else if(id == d.id) { if(label < d.label) return true; } } return false; } -bool compare_ptr_DataItem(DataItem* p1, DataItem* p2) +bool compare_ptr_Measurement(Measurement* p1, Measurement* p2) { return *p1<*p2; -} +} \ No newline at end of file diff --git a/src/measurement.h b/src/measurement.h new file mode 100644 index 0000000..cf18d42 --- /dev/null +++ b/src/measurement.h @@ -0,0 +1,21 @@ +#include +#include +#include + + +class Measurement +{ + public: + Measurement(std::ifstream& info_file, std::ifstream& label_file, std::ifstream& signal_file); + + bool operator < (const Measurement& d) const; + + private: + std::string patient; + std::string id; + double label; + std::vector signal; +}; + + +bool compare_ptr_Measurement(Measurement* p1, Measurement* p2); \ No newline at end of file diff --git a/src/model_basic_lopo.py b/src/model_basic_lopo.py index 736dbc5..257cbee 100644 --- a/src/model_basic_lopo.py +++ b/src/model_basic_lopo.py @@ -1,159 +1,150 @@ #Imports -import random, time, os import numpy as np from matplotlib import pyplot as plt import pandas as pd -import json #Utility file from utils import * -#Tensorflow -# os.environ["CUDA_VISIBLE_DEVICES"]="1" -# import tensorflow as tf -# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) -# sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) - -#keras -# from keras.models import model_from_json -# from keras.models import Sequential -# from keras.layers import Dense, Dropout, Activation, Flatten -# from keras.layers import Conv2D, BatchNormalization, Reshape, MaxPooling2D, ZeroPadding2D -# from keras.utils import to_categorical -# from keras.regularizers import l2 -# from keras.optimizers import SGD, Adam -# from keras.callbacks import EarlyStopping - -import pyeddl._core.eddl as eddl -import pyeddl._core.eddlT as eddlT - #sklearn from sklearn.utils import shuffle from sklearn.metrics import confusion_matrix, classification_report, roc_curve,roc_auc_score from sklearn.model_selection import train_test_split from scipy.signal import spectrogram, stft #Msc data_folder = "../dataset/" signal_length = "1mn" #processing validation_size=0.15 test_stride = 640 #Corresponds to 50% overlap, can be set to 1280 to have no overlap #Model lr = 0.00075 epochs = 1 batch_size = 32 NB_CHNS = 4 verbose = 1 with open(data_folder+"signal_mit_"+signal_length+".csv", "rb") as file: x_data_ = file.read().splitlines() y_data = np.loadtxt(data_folder+"labels_mit_"+signal_length+".txt") info_data = np.loadtxt(data_folder+"infos_mit_"+signal_length+".txt", dtype="str") #Convert from string x_data = [] for sig in x_data_: x_data.append(np.fromstring(sig, sep=',')) #Reshape in 2D as data are 1D in csv file x_data = [np.reshape(np.array(x_data_i), (NB_CHNS,int(len(x_data_i)/NB_CHNS))) for x_data_i in x_data] #Create the pandas df data = pd.concat([pd.Series(y_data), pd.Series([info[0] for info in info_data]),pd.Series([info[1] for info in info_data])], axis=1) data.columns = ["label", "patient", "file"] data["signal"] = "" data["signal"].astype(object) for i,sig in enumerate(x_data): data.at[i,"signal"] = sig patients = np.unique(data.patient) data.sort_values(["patient", "file", "label"], inplace=True) data = data.reset_index(drop=True) #Load seizure times seizures = pd.read_csv(data_folder+"seizures.csv", delimiter='\t') seizures_ = seizures[seizures.Patient.isin(patients)] seizures_["length"] = seizures_.apply(lambda x: (x.end_seizure - x.start_seizure), axis=1) results = {} for patient in patients: print("Patient: ", patient) patient_data = data[data.patient == patient] files = np.unique(patient_data.file) print(' ', len(files), ' files.') test_data = patient_data train_data = data.drop(patient_data.index) #Build train/test set by cutting each signals in pieces of 5 seconds, with 50% overlap x_train, y_train, x_test, y_test = cut_signal_data(train_data, test_data, stride_test=test_stride) #Shuffle and balance classes x_train, y_train = shuffle(x_train, y_train) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_size, stratify = y_train) x_train = np.array(x_train) x_val = np.array(x_val) x_train = x_train.reshape((len(x_train),NB_CHNS,1280, 1)) x_val = x_val.reshape((len(x_val),NB_CHNS,1280, 1)) - #Create and train new model - model = getModel() -# es = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=20, restore_best_weights=True) - train(model, x_train, y_train, [x_val, y_val], verb=verbose, epochs=epochs, batch_size = batch_size) + + + + + # SAVE X_TRAIN, Y_TRAIN, X_VAL, Y_VAL TO .NPY FILE + # LAUNCH EXECUTABLE WITH ARGUMENT --TRAIN + # A MODEL WILL NEED TO BE SAVED BY THIS + + + + x_test = np.array(x_test) lengths = [len(x[0]) for x in x_test] x_test_splitted = np.array_split(x_test, len(files)) y_test_splitted = np.array_split(np.array(y_test), len(files)) for x_test, y_test, file in zip(x_test_splitted, y_test_splitted, files): x_test = np.array(x_test) x_test = x_test.reshape((len(x_test),NB_CHNS,1280, 1)) - predict(model, x_test, y_test) + + # SAVE X_TEST, Y_TEST TO .NPY FILE + # LAUNCH EXECUTABLE WITH ARGUMENT --PREDICT + + + # pred = model.predict(x_test) - 0.25 #show_confusion_matrix(y_test, pred, str(patient) +' '+ str(file)) # print("Accuracy (segments) :", compute_accuracy(pred, y_test)) # detection_time = compute_detect_time(np.max(np.rint(pred), axis=1), y_test, [3,4]) # print("Detection time window (positive if 2323) and (detect_time<=45)): # detection_times.append(detect_time) #plt.hist(detection_times, bins=23, range=[23,46]) #plt.xlabel("Per segments of 5 seconds") #plt.show() #detected = len(detection_times)/counter #print("Detection accuracy: ", detected) diff --git a/src/utils.py b/src/utils.py index e916c03..f21e7c5 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,268 +1,146 @@ -import random, time import numpy as np from matplotlib import pyplot as plt -import pandas as pd -import json - -#keras -#from keras.models import model_from_json -#from keras.models import Sequential -#from keras.layers import Dense, Dropout, Activation, Flatten -#from keras.layers import Conv2D, BatchNormalization, Reshape, MaxPooling2D, ZeroPadding2D -#from keras.utils import to_categorical -#from keras.regularizers import l2 -#from keras.optimizers import SGD, Adam -#from keras.callbacks import EarlyStopping - -#pyeddl -import pyeddl._core.eddl as eddl -import pyeddl._core.eddlT as eddlT -from pyeddl._core.eddl import BatchNormalization, L2, sgd, Input, Dense, Dropout, Activation, Reshape, MaxPool, Flatten, Conv #sklearn -from sklearn.utils import shuffle -from sklearn.metrics import confusion_matrix, classification_report, roc_curve,roc_auc_score -from sklearn.model_selection import train_test_split - -from scipy.signal import spectrogram, stft - -NB_CHNS = 4 -l2_k, l2_b, l2_a = 0.1, 0.1, 0 -dropout_rate = 0.5 - -def getModel(): - - - - - - ######################################################### - ######################################################### - ######################################################### - ######################################################### - # ZERO PADDING NOT AVAILBLE YET - # REGULARIZERS ARE AVAILABLE, BUT AS SIMPLE LAYERS: NOT AS PARAMETERS KERNEL_REGULARIZER AND BIAS_REGULARIZER: WHERE DO I PUT THE LAYERS FOR THEM TO HAVE THE SAME EFFECT? - - # FUNCTION TRAIN UNDERWENT MAJOR (?) CHANGES: fit function is much simpler than keras one - # Maybe implemented in the futre with "fine-grained training" - - - - - in_ = Input([1, NB_CHNS, 1280]) - layer = in_ - -# layer = ZeroPadding2D(layer, (1,2)) -# layer = Conv(layer, 16, [3, 5], data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)) - layer = Conv(layer, 16, [3, 5], strides=[1,2], padding='same') # padding='valid' - layer = BatchNormalization(layer, momentum=0.99, epsilon=0.001) - layer = Activation(layer, "relu") - - layer = MaxPool(layer, (1,2), strides=[1,2], padding="same") # padding='valid' - -# layer = ZeroPadding2D(layer, (1,1)) -# layer = Conv(layer, 32, [3, 3], strides=[1,1], data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)) - layer = Conv(layer, 32, [3, 3], strides=[1,1], padding="same") # padding='valid' - layer = BatchNormalization(layer, momentum=0.99, epsilon=0.001) - layer = Activation(layer, "relu") - - layer = MaxPool(layer, (1,2), strides=[1,2], padding="same") # padding='valid' - -# layer = ZeroPadding2D(layer, (1,1)) -# layer = Conv(layer, 32, [3, 3], strides=[2,2], data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)) - layer = Conv(layer, 32, [3, 3], strides=[2,2], padding="same") # padding='valid' - layer = BatchNormalization(layer, momentum=0.99, epsilon=0.001) - layer = Activation(layer, "relu") - layer = Dropout(layer, dropout_rate) - - layer = Flatten(layer) -# layer = Dense(layer, 64, kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)) - layer = Dense(layer, 64) - layer = Activation(layer, "relu") - layer = Dropout(layer, dropout_rate) -# layer = Dense(layer, 1, kernel_initialiser='glorot_uniform', bias_initialiser='zeros') - layer = Dense(layer, 1) - layer = Activation(layer, "sigmoid") - - out_ = layer +from sklearn.metrics import confusion_matrix, classification_report - model = eddl.Model([in_], [out_]) - - eddl.build(model, - sgd(0.01, 0.9, 0.0, nesterov=True), - ["soft_cross_entropy"], - ["categorical_accuracy"], - eddl.CS_CPU(4, 'full_mem')) - - eddl.summary(model) - - return model #Train_set and test_set are pandas dataframes where the signal is in the column labeled "signal" and the label "label" def cut_signal_data(train_set, test_set, size_train = 1280, stride_train = 640, size_test=1280, stride_test=1280): x_train = [] y_train = [] x_test = [] y_test = [] #For each training seizure/pre-ictal signals for row in train_set.itertuples(index = False): signal = row.signal label = row.label #Cut signal in chunks of 5 seconds signals = sliding_window(signal, size_train, stride_train) labels = np.ones(len(signals))*label x_train+=list(signals) y_train+=list(labels) #For each testing seizure/pre-ictal signals for row in test_set.itertuples(index = False): signal = row.signal label = row.label #Cut signal in chunks of 5 seconds signals = sliding_window(signal, size_test, stride_test) labels = np.ones(len(signals))*label x_test+=list(signals) y_test+=list(labels) return x_train, y_train, x_test, y_test #input: eeg signal, size: size of the window, stride: step #Default : 1280 long windows with 50% overlap #output : array of signal cuts according to the window size def sliding_window(signal,size = 1280, stride = 640): out = [] numOfChunks = int(((signal.shape[1]-size)/stride)+1) for i in range(0,numOfChunks*stride,stride): out.append(signal[:,i:i+size]) return out - - -#train data according to keras.fit function -def train(model, x_train, y_train, val_data = None, verb=1, epochs = 50, batch_size = 32): - #If no pre split validation set is provided, done autamatically here. But may result in class imbalance for small datasets - - # Changing data to channels first - x_train = np.moveaxis(x_train, 3, 1) - - x_train = eddlT.create(x_train) - y_train = eddlT.create(y_train) - - if(val_data==None): - eddl.fit(model, [x_train], [y_train], batch_size, epochs) - else: - val_data = [val_data[0], val_data[1]] - eddl.fit(model, [x_train], [y_train], batch_size, epochs) - return - - -def predict(model, x_test, y_test): - # Changing data to channels first - x_test = np.moveaxis(x_test, 3, 1) - - x_test = eddlT.create(x_test) - y_test = eddlT.create(y_test) - - eddl.evaluate(model, [x_test], [y_test]) - return def prepare_standardplot(title, xlabel): fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(12,6) fig.suptitle(title) ax1.set_ylabel('binary cross entropy') ax1.set_xlabel(xlabel) ax1.set_yscale('log') ax2.set_ylabel('accuracy [% correct]') ax2.set_xlabel(xlabel) return fig, ax1, ax2 def finalize_standardplot(fig, ax1, ax2): ax1handles, ax1labels = ax1.get_legend_handles_labels() if len(ax1labels) > 0: ax1.legend(ax1handles, ax1labels) ax2handles, ax2labels = ax2.get_legend_handles_labels() if len(ax2labels) > 0: ax2.legend(ax2handles, ax2labels) fig.tight_layout() plt.subplots_adjust(top=0.9) def plot_history(history, title): fig, ax1, ax2 = prepare_standardplot(title, 'epoch') ax1.plot(history.history['loss'], label = "training") ax1.plot(history.history['val_loss'], label = "validation") ax2.plot(history.history['acc'], label = "training") ax2.plot(history.history['val_acc'], label = "validation") finalize_standardplot(fig, ax1, ax2) return fig #Display model performance on test set def show_confusion_matrix(y_true_, y_pred_, title): print("Generating Confusion Matrix") cm = confusion_matrix(y_pred=np.rint(y_pred_), y_true=np.array(y_true_)) print(cm) plt.imshow(cm, cmap="inferno_r") plt.title(title) plt.show() print(classification_report(y_pred=np.rint(y_pred_), y_true=np.array(y_true_))) #Calculate accuracy on test set def compute_accuracy(y_pred_, y_true_): return 1 - np.sum(np.abs(np.max(np.round(y_pred_), axis=1) - np.array(y_true_)))/len(y_true_) def false_positive_rate(y_test, y_pred, detect_rule): time_hr = len(y_pred)*1280/(256*60*60) preds = np.max(np.rint(y_pred), axis=1) fp=0 x_ = np.zeros(detect_rule[1]) alarm_triggered = False counter_alarm = 23 #Needs 1mn seconds between seizure onsets false_alarms = [] for idx, x in enumerate(preds): if(counter_alarm == 0): alarm_triggered = False else: counter_alarm -= 1 if (alarm_triggered == False): for j, y in enumerate(x_[::-1]): if(j == len(x_)-1): x_[1] = x_[0] x_[0] = 0 else: x_[len(x_)-1-j] = x_[len(x_)-2-j] if(x==1): x_[0]=1 if(np.sum(x_) >= detect_rule[0]): fp+=1 alarm_triggered = True counter_alarm = 23 false_alarms.append(idx) fpr = fp/time_hr return fpr, fp, false_alarms #Compute detection time: first time to have 2 out of 3 segments being classified as ictal def compute_detect_time(preds, test, detect_rule): x_ = np.zeros(detect_rule[1]) for idx, x in enumerate(preds): for j, y in enumerate(x_[::-1]): if(j == len(x_)-1): x_[1] = x_[0] x_[0] = 0 else: x_[len(x_)-1-j] = x_[len(x_)-2-j] if(x==1): x_[0]=1 if(np.sum(x_) >= detect_rule[0]): return idx return -1