Page MenuHomec4science

script_prepareDatasets.py
No OneTemporary

File Metadata

Created
Thu, May 2, 17:58

script_prepareDatasets.py

__author__ = "Una Pale"
__email__ = "una.pale at epfl.ch"
'''script that does several things (comment out things not needed)
- converts edf files to csv files without any changes in data
- calculates statistics for each subject and its seizures (start, stop, duration)
- selects data and rearanges in in different ways to use later for feature calculation
- plots data for each subject after searangemens
- calculated features for each input file and saves them
'''
from parametersSetup import *
from VariousFunctionsLib import *
#########################################################################
###################################
#CHBMIT
Dataset='01_CHBMIT' #'01_CHBMIT', '01_iEEG_Bern'
createFolderIfNotExists( Dataset)
folderEDF = '../../../../databases/medical/chb-mit/edf/'
patients =['01','02','03','04','05','06','07','08','09','10','11', '12','13','14','15','16','17','18','19','20','21','22','23','24']
#######################################################################
#CONVERT EDF TO CSV DATA
'''converting original EDF files and .seizure files to one .csv file per input file
keeping only channels of interedt from DatasetPreprocessParams.channelNamesToKeep
renaming that output file keeps the same name except that files with seizures have extension '_Ns.csv' where N is number of seizures that is in that file
'''
folderOut= Dataset+'/01_datasetProcessed_RawData'
extractEDFdataToCSV_originalData(folderEDF, folderOut, DatasetPreprocessParams, patients)
#EXPORTING INFORMATION ABOUT SEIZURE STRUCTURE PER FILE
'''printing seizure start, ends and duration per file
'''
folderOutStats= Dataset+'/00_SeizureInfoOriginalData/'
analyseSeizureDurations(folderEDF, folderOutStats, patients)
# #######################################################################
# SELECT AND REARANGE DATA
# ALL DATA - SEIZURE TO SEIZURE CUT
'''important parameters are DatasetPreprocessParams.PreIctalTimeToRemove and DatasetPreprocessParams.PostIctalTimeToRemove, they determine hwo much data before and after seizure will be removed
this can be set to 0 and then removed also after features are calculated
'''
if (DatasetPreprocessParams.FileRearangeAllData=='AllData_StoS'):
folderOut = Dataset + '/01_datasetProcessed_' + DatasetPreprocessParams.FileRearangeAllData
extractEDFdataToCSV_KeepAllData_StoSFiles(folderEDF, folderOut, DatasetPreprocessParams, patients)
# # ALL DATA - FIXED SIZE FILES (e.g. 1h, 4h)
'''important parameters are:
DatasetPreprocessParams.FileLen - in min how long files we want
DatasetPreprocessParams.PreIctalTimeToRemove and DatasetPreprocessParams.PostIctalTimeToRemove, they determine hwo much data before and after seizure will be removed
'''
if (DatasetPreprocessParams.FileRearangeAllData=='AllData_FixedSize'):
folderOut= Dataset+'/01_datasetProcessed_'+DatasetPreprocessParams.FileRearangeAllData+'_'+str(DatasetPreprocessParams.FileLen)+'min'
extractEDFdataToCSV_KeepAllData_FixedSizeFiles(folderEDF, folderOut, DatasetPreprocessParams, patients)
# SUBSELECTION OF DATA - NON SEIZURE AROUND SEIZURE, FROM THE SAME FILE, ALSO FACTOR x NON SEIZURE DATA MORE THEN SEIZRUE
'''important parameters are:
DatasetPreprocessParams.RatioNonSeizSeiz
DatasetPreprocessParams.PreIctalTimeToRemove and DatasetPreprocessParams.PostIctalTimeToRemove, they determine hwo much data before and after seizure will be removed
'''
if (DatasetPreprocessParams.FileRearangeAllData=='SubselData_NonSeizAroundSeiz'):
folderOut= Dataset+'/01_datasetProcessed_'+DatasetPreprocessParams.FileRearangeAllData+'_Fact'+str(DatasetPreprocessParams.RatioNonSeizSeiz)
extractEDFdataToCSV_KeepSubselectionOfData_NonSeizAroundSeiz(folderEDF, folderOut, DatasetPreprocessParams, patients)
# SUBSELECTION OF DATA - NON SEIZURE RANDOMLY SELECTED FROM NON SEIZURE FILES, ALSO FACTOR x NON SEIZURE DATA MORE THEN SEIZRUE
'''important parameters are:
DatasetPreprocessParams.RatioNonSeizSeiz
DatasetPreprocessParams.PreIctalTimeToRemove and DatasetPreprocessParams.PostIctalTimeToRemove, they determine hwo much data before and after seizure will be removedOut, DatasetPreprocessParams, patients)
'''
if (DatasetPreprocessParams.FileRearangeAllData=='SubselData_NonSeizRandom'):
folderOut= Dataset+'/01_datasetProcessed_'+DatasetPreprocessParams.FileRearangeAllData+'_Fact'+str(DatasetPreprocessParams.RatioNonSeizSeiz)
extractEDFdataToCSV_KeepSubselectionOfData_NonSeizRandomlySelected(folderEDF, folderOut, DatasetPreprocessParams, patients)
############################################################################################
# PLOTTING RAW DATA LABELS (FOR CHECKING WITH ROLL BASE ONES)
'''
Plotting labels appended one after antoher for each subject to check files rearanging
'''
plotRawDataLabelsPerSubject(folderOut, patients )
# # #########################################################################
# CALCULATE FEATURES FOR EACH FILE
''' parameters are:
FeaturesUsedParams.winLen and FeaturesUsedParams.winStep
which data preparation type (from above ones is used)
'''
folderOutFeatures= Dataset+'/02_features_'+DatasetPreprocessParams.FileRearangeAllData+'_Fact'+str(DatasetPreprocessParams.RatioNonSeizSeiz)
createFolderIfNotExists(folderOutFeatures)
folderOutFeatures= folderOutFeatures +'/WinLen'+str(FeaturesUsedParams.winLen)+'_'+str(FeaturesUsedParams.winStep)
createFolderIfNotExists(folderOutFeatures)
calculateFeaturesPerEachFile(folderOut, folderOutFeatures, DatasetPreprocessParams, FeaturesUsedParams, patients)

Event Timeline