diff --git a/.gitignore b/.gitignore index f1e6d0a..6d0be7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ dataset/ build/ **/__pycache__ **/pyeddl/ +**/eddl/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 24a6934..e2f76f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,43 +1,43 @@ cmake_minimum_required(VERSION 3.9.2) project(basic_lopo) SET(CMAKE_CXX_STANDARD 11) SET(CMAKE_CXX_STANDARD_REQUIRED ON) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) endif(NOT CMAKE_BUILD_TYPE) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -pedantic") SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") option(test "Build tests." OFF) -add_executable(basic_lopo.x src/main.cpp src/basicnet.cpp) +add_executable(basic_lopo.x src/main.cpp src/basicnet.cpp src/database.cpp) find_package(Eigen3 3.3 REQUIRED) target_link_libraries(basic_lopo.x PUBLIC Eigen3::Eigen) set(eddl_DIR "/home/andrea/eddl/lib/cmake/eddl") find_package(eddl REQUIRED) target_link_libraries(basic_lopo.x PUBLIC eddl) if (test) enable_testing() find_package(GTest) if (NOT GTEST_FOUND) SET(GTEST_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/include) SET(GTEST_BOTH_LIBRARIES libgtest.a libgtest_main.a) endif(NOT GTEST_FOUND) include_directories(${GTEST_INCLUDE_DIRS}) add_executable(testGenMot src/test_main.cpp src/motif.cpp src/site.cpp src/treatment.cpp ) target_link_libraries(testGenMot ${GTEST_BOTH_LIBRARIES} pthread) add_test(Genomic_motifs testGenMot) endif(test) find_package(Doxygen) if (DOXYGEN_FOUND) add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating API documentation with Doxygen" VERBATIM) endif(DOXYGEN_FOUND) diff --git a/src/basicnet.cpp b/src/basicnet.cpp index c91f211..508cb71 100644 --- a/src/basicnet.cpp +++ b/src/basicnet.cpp @@ -1,104 +1,152 @@ #include #include +#include #include #include "basicnet.h" -using namespace eddl; #define NB_CHNS 4 #define L2_K 0.1 #define L2_B 0.1 #define L2_A 0.0 #define DROPOUT_RATE 0.5 BasicNet::BasicNet() { + using namespace eddl; + /* ######################################################### ######################################################### ######################################################### ######################################################### # ZERO PADDING NOT AVAILBLE YET # REGULARIZERS ARE AVAILABLE, BUT AS SIMPLE LAYERS: NOT AS PARAMETERS KERNEL_REGULARIZER AND BIAS_REGULARIZER: WHERE DO I PUT THE LAYERS FOR THEM TO HAVE THE SAME EFFECT? # FUNCTION TRAIN UNDERWENT MAJOR (?) CHANGES: fit function is much simpler than keras one # Maybe implemented in the futre with "fine-grained training" */ layer in_ = Input({1, NB_CHNS, 1280}); layer l = in_; // l = ZeroPadding2D(l, {1,2}); // l = Conv(l, 16, {3, 5}, data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)) l = Conv(l, 16, {3, 5}, {1,2}, "same"); // padding='valid' l = BatchNormalization(l, 0.99, 0.001); l = Activation(l, "relu"); l = MaxPool(l, {1,2}, {1,2}, "same"); // padding='valid' // l = ZeroPadding2D(l, {1,1}); // l = Conv(l, 32, {3, 3}, {1,1}, data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)); l = Conv(l, 32, {3, 3}, {1,1}, "same"); // padding='valid' l = BatchNormalization(l, 0.99, 0.001); l = Activation(l, "relu"); l = MaxPool(l, {1,2}, {1,2}, "same"); // padding='valid' // l = ZeroPadding2D(l, {1,1}); // l = Conv(l, 32, {3, 3}, {2,2}, data_format="channels_last", kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)); l = Conv(l, 32, {3, 3}, {2,2}, "same"); // padding='valid' l = BatchNormalization(l, 0.99, 0.001); l = Activation(l, "relu"); l = Dropout(l, DROPOUT_RATE); l = Flatten(l); // l = Dense(l, 64, kernel_initialiser='glorot_uniform', bias_initialiser='zeros', kernel_regularizer=L2(l2_k), bias_regularizer=L2(l2_b)); l = Dense(l, 64); l = Activation(l, "relu"); l = Dropout(l, DROPOUT_RATE); // l = Dense(l, 1, kernel_initialiser='glorot_uniform', bias_initialiser='zeros'); l = Dense(l, 1); l = Activation(l, "sigmoid"); layer out_ = l; net = Model({in_}, {out_}); build(net, sgd(0.01, 0.9, 0.0, true), {"soft_cross_entropy"}, {"categorical_accuracy"}, CS_CPU(4, "full_mem")); summary(net); } -void BasicNet::train(const vector x, const vector y, int batch_size, int epochs) +void BasicNet::fit(Tensor* x, Tensor* y, int batch_size, int epochs) { - ////////////////////////////////////////// - ////////////////////////////////////////// - ////////////////////////////////////////// - // HERE SHOULD MOVE AXIS?????????????????? can be done with moveaxis (tensor.h ?) - -// tensor x_train = eddlT::create(x); -// tensor y_train = eddlT::create(y); + x = Tensor::moveaxis(x, 3, 1); -// fit(model, {x_train}, {y_train}, batch_size, epochs); + eddl::fit(net, {x}, {y}, batch_size, epochs); } -void BasicNet::evaluate(const vector x, const vector y) +void BasicNet::evaluate(Tensor* x, Tensor* y) { - ////////////////////////////////////////// - ////////////////////////////////////////// - ////////////////////////////////////////// - // HERE SHOULD MOVE AXIS?????????????????? can be done with moveaxis (tensor.h ?) - -// tensor x_test = eddlT::create(x); -// tensor y_test = eddlT::create(y); + x = Tensor::moveaxis(x, 3, 1); -// evaluate(model, {x_test}, {y_test}); + eddl::evaluate(net, {x}, {y}); } + + + +/* +#Calculate accuracy on test set +def compute_accuracy(y_pred_, y_true_): + return 1 - np.sum(np.abs(np.max(np.round(y_pred_), axis=1) - np.array(y_true_)))/len(y_true_) + +def false_positive_rate(y_test, y_pred, detect_rule): + time_hr = len(y_pred)*1280/(256*60*60) + preds = np.max(np.rint(y_pred), axis=1) + fp=0 + x_ = np.zeros(detect_rule[1]) + + alarm_triggered = False + counter_alarm = 23 #Needs 1mn seconds between seizure onsets + false_alarms = [] + + for idx, x in enumerate(preds): + if(counter_alarm == 0): + alarm_triggered = False + else: + counter_alarm -= 1 + + if (alarm_triggered == False): + for j, y in enumerate(x_[::-1]): + if(j == len(x_)-1): + x_[1] = x_[0] + x_[0] = 0 + else: + x_[len(x_)-1-j] = x_[len(x_)-2-j] + if(x==1): + x_[0]=1 + if(np.sum(x_) >= detect_rule[0]): + fp+=1 + alarm_triggered = True + counter_alarm = 23 + false_alarms.append(idx) + + fpr = fp/time_hr + return fpr, fp, false_alarms + +#Compute detection time: first time to have 2 out of 3 segments being classified as ictal +def compute_detect_time(preds, test, detect_rule): + x_ = np.zeros(detect_rule[1]) + for idx, x in enumerate(preds): + for j, y in enumerate(x_[::-1]): + if(j == len(x_)-1): + x_[1] = x_[0] + x_[0] = 0 + else: + x_[len(x_)-1-j] = x_[len(x_)-2-j] + if(x==1): + x_[0]=1 + if(np.sum(x_) >= detect_rule[0]): + return idx + return -1 +*/ \ No newline at end of file diff --git a/src/basicnet.h b/src/basicnet.h index f9dddf4..26e3dc6 100644 --- a/src/basicnet.h +++ b/src/basicnet.h @@ -1,17 +1,17 @@ #include #include #include class BasicNet { public: BasicNet(); - void train(const vector x, const vector y, int batch_size, int epochs); + void fit(Tensor* x, Tensor* y, int batch_size, int epochs); - void evaluate(const vector x, const vector y); + void evaluate(Tensor* x, Tensor* y); private: eddl::model net; }; \ No newline at end of file diff --git a/src/database.cpp b/src/database.cpp new file mode 100644 index 0000000..ff7bc73 --- /dev/null +++ b/src/database.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "database.h" + + +// Dataset position and choice +const std::string data_folder = "../dataset/"; +const std::string signal_length = "1mn"; + + + +DataItem::DataItem(std::ifstream& info_file, std::ifstream& label_file, std::ifstream& signal_file) +{ + info_file >> patient; + info_file >> id; + label_file >> label; + + std::string line; + getline(signal_file, line); + std::stringstream line_stream(line); + float val; + while(line_stream >> val) + { + signal.push_back(val); + line_stream.get(); + } +} + + +bool DataItem::operator < (const DataItem& d) const +{ + if(patient.compare(d.patient) < 0) + return true; + else if(patient.compare(d.patient) == 0) + { + if(id.compare(d.id) < 0) + return true; + else if(id == d.id) + { + if(label < d.label) + return true; + } + } + + return false; +} + + +bool compare_ptr_DataItem(DataItem* p1, DataItem* p2) +{ + return *p1<*p2; +} + + +Database::Database() +{ + std::ifstream info_file(data_folder + "infos_mit_" + signal_length + ".txt"); + std::ifstream label_file(data_folder + "labels_mit_" + signal_length + ".txt"); + std::ifstream signal_file(data_folder + "signal_mit_" + signal_length + ".csv"); + + while(info_file.good() && label_file.good() && signal_file.good()) + data.push_back(new DataItem(info_file, label_file, signal_file)); + + // Deletes last element, which is not correctly initialised because he was after the eof. + delete data.back(); + data.pop_back(); + + info_file.close(); + label_file.close(); + signal_file.close(); + + std::sort(data.begin(), data.end(), compare_ptr_DataItem); +} + + +Database::~Database() +{ + for(auto& data_item: data) + delete data_item; +} diff --git a/src/database.h b/src/database.h new file mode 100644 index 0000000..eb29862 --- /dev/null +++ b/src/database.h @@ -0,0 +1,29 @@ +#include +#include +#include + + +class DataItem +{ + public: + DataItem(std::ifstream& info_file, std::ifstream& label_file, std::ifstream& signal_file); + + bool operator < (const DataItem& d) const; + + private: + std::string patient; + std::string id; + float label; + std::vector signal; +}; + + +class Database +{ + public: + Database(); + ~Database(); + + private: + std::vector data; +}; \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 3019827..1b77b15 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,56 +1,109 @@ -#include -#include -#include - - #include +#include "database.h" #include "basicnet.h" #define NB_CHNS 4 -// Dataset position and choice -const std::string data_folder = "../dataset/"; -const std::string signal_length = "1mn"; - // Processing parameters const double validation_size = 0.15; const int test_stride = 640; // Corresponds to 50% overlap, can be set to 1280 to have no overlap // Model parameters const double lr = 0.00075; const int epochs = 1; const int batch_size = 32; int main() { - Tensor* x_data = Tensor::load_from_txt(data_folder + "signal_mit_" + signal_length + ".csv", ',', 0); - Tensor* y_data = Tensor::load_from_txt(data_folder + "labels_mit_" + signal_length + ".txt", ' ', 0); - - x_data->reshape_({x_data->shape[0], NB_CHNS, (int)(x_data->shape[1]/NB_CHNS)}); + Database database; + BasicNet basic_net; /* -info_data = np.loadtxt(data_folder+"infos_mit_"+signal_length+".txt", dtype="str") - - -#Create the pandas df -data = pd.concat([pd.Series(y_data), pd.Series([info[0] for info in info_data]),pd.Series([info[1] for info in info_data])], axis=1) -data.columns = ["label", "patient", "file"] -data["signal"] = "" -data["signal"].astype(object) -for i,sig in enumerate(x_data): - data.at[i,"signal"] = sig patients = np.unique(data.patient) -data.sort_values(["patient", "file", "label"], inplace=True) -data = data.reset_index(drop=True) #Load seizure times seizures = pd.read_csv(data_folder+"seizures.csv", delimiter='\t') seizures_ = seizures[seizures.Patient.isin(patients)] seizures_["length"] = seizures_.apply(lambda x: (x.end_seizure - x.start_seizure), axis=1) + +results = {} + +for patient in patients: + print("Patient: ", patient) + patient_data = data[data.patient == patient] + files = np.unique(patient_data.file) + print(' ', len(files), ' files.') + + test_data = patient_data + train_data = data.drop(patient_data.index) + + #Build train/test set by cutting each signals in pieces of 5 seconds, with 50% overlap + x_train, y_train, x_test, y_test = cut_signal_data(train_data, test_data, stride_test=test_stride) + + #Shuffle and balance classes + x_train, y_train = shuffle(x_train, y_train) + x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_size, stratify = y_train) + x_train = np.array(x_train) + x_val = np.array(x_val) + // x_data->reshape_({x_data->shape[0], NB_CHNS, 1280, 1)}); + x_train = x_train.reshape((len(x_train),NB_CHNS,1280, 1)) + x_val = x_val.reshape((len(x_val),NB_CHNS,1280, 1)) + + #Create and train new model + model = getModel() + es = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=20, restore_best_weights=True) + train(model, x_train, y_train, [x_val, y_val], epochs=epochs, batch_size = batch_size) + + x_test = np.array(x_test) + lengths = [len(x[0]) for x in x_test] + x_test_splitted = np.array_split(x_test, len(files)) + y_test_splitted = np.array_split(np.array(y_test), len(files)) + + for x_test, y_test, file in zip(x_test_splitted, y_test_splitted, files): + x_test = np.array(x_test) + x_test = x_test.reshape((len(x_test),NB_CHNS,1280, 1)) + predict(model, x_test, y_test) + pred = model.predict(x_test) - 0.25 + #show_confusion_matrix(y_test, pred, str(patient) +' '+ str(file)) + print("Accuracy (segments) :", compute_accuracy(pred, y_test)) + detection_time = compute_detect_time(np.max(np.rint(pred), axis=1), y_test, [3,4]) + print("Detection time window (positive if 2323) and (detect_time<=45)): + detection_times.append(detect_time) +plt.hist(detection_times, bins=23, range=[23,46]) +plt.xlabel("Per segments of 5 seconds") +plt.show() + +detected = len(detection_times)/counter +print("Detection accuracy: ", detected) */ } \ No newline at end of file diff --git a/src/model_basic_lopo.py b/src/model_basic_lopo.py index eb5d690..736dbc5 100644 --- a/src/model_basic_lopo.py +++ b/src/model_basic_lopo.py @@ -1,160 +1,159 @@ #Imports import random, time, os import numpy as np from matplotlib import pyplot as plt import pandas as pd import json #Utility file from utils import * #Tensorflow # os.environ["CUDA_VISIBLE_DEVICES"]="1" # import tensorflow as tf # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) #keras # from keras.models import model_from_json # from keras.models import Sequential # from keras.layers import Dense, Dropout, Activation, Flatten # from keras.layers import Conv2D, BatchNormalization, Reshape, MaxPooling2D, ZeroPadding2D # from keras.utils import to_categorical # from keras.regularizers import l2 # from keras.optimizers import SGD, Adam # from keras.callbacks import EarlyStopping import pyeddl._core.eddl as eddl import pyeddl._core.eddlT as eddlT #sklearn from sklearn.utils import shuffle from sklearn.metrics import confusion_matrix, classification_report, roc_curve,roc_auc_score from sklearn.model_selection import train_test_split from scipy.signal import spectrogram, stft #Msc data_folder = "../dataset/" signal_length = "1mn" #processing validation_size=0.15 test_stride = 640 #Corresponds to 50% overlap, can be set to 1280 to have no overlap #Model lr = 0.00075 epochs = 1 batch_size = 32 NB_CHNS = 4 verbose = 1 with open(data_folder+"signal_mit_"+signal_length+".csv", "rb") as file: x_data_ = file.read().splitlines() y_data = np.loadtxt(data_folder+"labels_mit_"+signal_length+".txt") info_data = np.loadtxt(data_folder+"infos_mit_"+signal_length+".txt", dtype="str") #Convert from string x_data = [] for sig in x_data_: x_data.append(np.fromstring(sig, sep=',')) #Reshape in 2D as data are 1D in csv file x_data = [np.reshape(np.array(x_data_i), (NB_CHNS,int(len(x_data_i)/NB_CHNS))) for x_data_i in x_data] - #Create the pandas df data = pd.concat([pd.Series(y_data), pd.Series([info[0] for info in info_data]),pd.Series([info[1] for info in info_data])], axis=1) data.columns = ["label", "patient", "file"] data["signal"] = "" data["signal"].astype(object) for i,sig in enumerate(x_data): data.at[i,"signal"] = sig patients = np.unique(data.patient) data.sort_values(["patient", "file", "label"], inplace=True) data = data.reset_index(drop=True) #Load seizure times seizures = pd.read_csv(data_folder+"seizures.csv", delimiter='\t') seizures_ = seizures[seizures.Patient.isin(patients)] seizures_["length"] = seizures_.apply(lambda x: (x.end_seizure - x.start_seizure), axis=1) results = {} for patient in patients: print("Patient: ", patient) patient_data = data[data.patient == patient] files = np.unique(patient_data.file) print(' ', len(files), ' files.') test_data = patient_data train_data = data.drop(patient_data.index) #Build train/test set by cutting each signals in pieces of 5 seconds, with 50% overlap x_train, y_train, x_test, y_test = cut_signal_data(train_data, test_data, stride_test=test_stride) #Shuffle and balance classes x_train, y_train = shuffle(x_train, y_train) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_size, stratify = y_train) x_train = np.array(x_train) x_val = np.array(x_val) x_train = x_train.reshape((len(x_train),NB_CHNS,1280, 1)) x_val = x_val.reshape((len(x_val),NB_CHNS,1280, 1)) #Create and train new model model = getModel() # es = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=20, restore_best_weights=True) train(model, x_train, y_train, [x_val, y_val], verb=verbose, epochs=epochs, batch_size = batch_size) x_test = np.array(x_test) lengths = [len(x[0]) for x in x_test] x_test_splitted = np.array_split(x_test, len(files)) y_test_splitted = np.array_split(np.array(y_test), len(files)) for x_test, y_test, file in zip(x_test_splitted, y_test_splitted, files): x_test = np.array(x_test) x_test = x_test.reshape((len(x_test),NB_CHNS,1280, 1)) predict(model, x_test, y_test) # pred = model.predict(x_test) - 0.25 #show_confusion_matrix(y_test, pred, str(patient) +' '+ str(file)) # print("Accuracy (segments) :", compute_accuracy(pred, y_test)) # detection_time = compute_detect_time(np.max(np.rint(pred), axis=1), y_test, [3,4]) # print("Detection time window (positive if 2323) and (detect_time<=45)): # detection_times.append(detect_time) #plt.hist(detection_times, bins=23, range=[23,46]) #plt.xlabel("Per segments of 5 seconds") #plt.show() #detected = len(detection_times)/counter #print("Detection accuracy: ", detected)