diff --git a/MITAnnotation.py b/MITAnnotation.py
new file mode 100644
index 0000000..23296c6
--- /dev/null
+++ b/MITAnnotation.py
@@ -0,0 +1,214 @@
+# -*- coding: utf-8 -*-
+# pylint: disable-msg=E1101, E0102, E0202
+"""
+Created on Mon Feb 27 12:08:29 2012
+
+Module with definition of classes to work with annotations in the MIT format.
+
+@author: T. Teijeiro
+"""
+import struct
+
+class MITAnnotation(object):
+    """
+    This class represents an annotation in the MIT format. Currently only
+    standard 2-byte annotations are supported.
+    """
+
+    #We use slots instead of __dict__ to save memory space when a lot of
+    #annotations are created and managed.
+    __slots__ = ('code', 'time', 'subtype', 'chan', 'num', 'aux')
+
+    def __init__(self, code=0, time=0, subtype=0, chan=0, num=0, aux=None):
+        self.code = code
+        self.time = time
+        self.subtype = subtype
+        self.chan = chan
+        self.num = num
+        self.aux = aux
+
+
+    def __str__(self):
+        return '{0} {1} {2} {3} {4} {5}'.format(self.time,    self.code,
+                                                self.subtype, self.chan,
+                                                self.num,     repr(self.aux))
+
+    def __repr__(self):
+        return str(self)
+
+    def __lt__(self, other):
+        return self.time < other.time
+
+#MIT format special codes
+SKIP_TIME = 1023
+AUX_CODE = 63
+SKIP_CODE = 59
+NUM_CODE = 60
+SUB_CODE = 61
+CHN_CODE = 62
+
+
+def is_qrs_annotation(annot):
+    """
+    Checks if an annotation corresponds to a QRS annotation.
+    NORMAL
+    UNKNOWN
+    SVPB
+    FUSION
+    VESC
+    LEARN
+    AESC
+    NESC
+    SVESC
+    PVC
+    BBB
+    LBBB
+    ABERR
+    RBBB
+    NPC
+    PACE
+    PFUS
+    APC
+    RONT
+    """
+    return annot.code in (1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
+                          25, 30, 34, 35, 38, 41)
+
+
+def read_annotations(path):
+    """
+    Reads an annotation file in the MIT format.
+    See: http://www.physionet.org/physiotools/wag/annot-5.htm
+
+    Parameters
+    ----------
+    path:
+        Path to the file containing the annotations.
+
+    Returns
+    -------
+    out:
+        List for annotation objects
+    """
+    result = []
+    f = open(path, 'rb')
+    num = 0
+    chan = 0
+    displ = 0
+    while True:
+        bann = f.read(2)
+        if not bann:
+            break
+        (b0, b1) = struct.unpack('bb', bann)
+        A = (b1 & 0xff) >> 2
+        I = ((b1 & 0x03) << 8) | (0xff & b0)
+        #Special codes parsing
+        if A == SKIP_CODE and I == 0:
+            (b0, b1, b2, b3) = struct.unpack('4b', f.read(4))
+            displ = ((b1 << 24) | ((b0 & 0xff) << 16) |
+                                  ((b3 & 0xff) << 8)  | (b2 & 0xff))
+        elif A is NUM_CODE:
+            num = I
+            result[-1].num = num
+        elif A is SUB_CODE:
+            result[-1].subtype = I
+        elif A is CHN_CODE:
+            chan = I
+            result[-1].chan = chan
+        elif A is AUX_CODE:
+            result[-1].aux = f.read(I)
+            if I % 2 != 0:
+                f.read(1)
+        elif A == I == 0:
+            break
+        else:
+            result.append(MITAnnotation(code=A, time=I+displ, chan=chan, num=num))
+            displ = 0
+    f.close()
+    #Now, for each annotation we put the absolute time
+    abs_time = 0
+    for annot in result:
+        abs_time += annot.time
+        annot.time = max(0, abs_time)
+    return result
+
+def save_annotations(annots, path):
+    """
+    Saves a list of annotations in a file, in the MIT format.
+    See: http://www.physionet.org/physiotools/wag/annot-5.htm
+
+    Parameters
+    ----------
+    annots: List of MITAnnotation objects to be saved. It is sorted before
+    the writing operation.
+    path: Path to the file where the list is saved.
+    """
+    annots = sorted(annots)
+    f = open(path, 'wb')
+    prev_time = 0
+    prev_num = 0
+    prev_chn = 0
+
+
+    for anot in annots:
+        rel_time = anot.time - prev_time
+        #If the distance is greater than 1023 (what we can express with 10
+        #bits), we should write an skip code in the file.
+        if rel_time > SKIP_TIME:
+            #A = SKIP_CODE, I = 0; Then 4 byte PDP-11 long integer
+            f.write(struct.pack('>H', SKIP_CODE << 2))
+            f.write(struct.pack('<H', rel_time >> 16))
+            f.write(struct.pack('<H', rel_time & 0xFFFF))
+            #The next written position is 0
+            rel_time = 0
+        #We write the annotation code and the timestamp
+        f.write(struct.pack('<H', anot.code << 10 | rel_time))
+        prev_time = anot.time
+        #Write the NUM annotation, if changes
+        if anot.num != prev_num:
+            f.write(struct.pack('<H', NUM_CODE << 10 | anot.num))
+            prev_num = anot.num
+        #Write the SUBTYPE annotation, if != 0
+        if anot.subtype != 0:
+            f.write(struct.pack('<H', SUB_CODE << 10 | anot.subtype))
+        #Write the CHAN annotation, if changes
+        if anot.chan != prev_chn:
+            f.write(struct.pack('<H', CHN_CODE << 10 | anot.chan))
+            prev_chn = anot.chan
+        #Write the AUX field, if present
+        if anot.aux != None:
+            f.write(struct.pack('<H', AUX_CODE << 10 | len(anot.aux)))
+            aux = (anot.aux if isinstance(anot.aux, bytes)
+                                        else bytes(anot.aux, encoding='utf-8'))
+            f.write(aux)
+            if len(anot.aux) % 2 != 0:
+                f.write(struct.pack('<b', 0))
+    #Finish the file with a 00
+    f.write(struct.pack('<h', 0))
+    f.close()
+
+
+def convert_annots_freq(spath, sfreq, dpath, dfreq):
+    """
+    Converts the frequency of an annotations file.
+
+    Parameters
+    ----------
+    spath:
+        Path of the input annotations file.
+    sfreq:
+        Frequency in Hz used in the input annotations timing.
+    dpath:
+        Path where the new annotations will be stored.
+    dfreq:
+        Frequency in Hz used in the output annotations timing.
+    """
+    annots = read_annotations(spath)
+    for ann in annots:
+        ann.time = int(ann.time/float(sfreq)*float(dfreq))
+    save_annotations(annots, dpath)
+
+
+
+if __name__ == "__main__":
+    pass
diff --git a/PerformanceMetricsLib.py b/PerformanceMetricsLib.py
new file mode 100644
index 0000000..ba57cfc
--- /dev/null
+++ b/PerformanceMetricsLib.py
@@ -0,0 +1,445 @@
+#!~/anaconda3/bin python
+
+__author__ = "Una Pale"
+__credits__ = ["Una Pale", "Renato Zanetti"]
+__license__ = "MIT"
+__version__ = "1.0"
+__email__ = "una.pale at epfl.ch"
+
+import numpy as np
+
+def performance_duration(y_pred_smoothed, y_true):
+    '''calculates performance metricses on the level of seizure duration '''
+    #total true seizure durations
+    durationTrueSeizure=np.sum(y_true)
+
+    #total predicted seizure duration
+    durationPredictedSeizure=np.sum(y_pred_smoothed)
+
+    #total duration of true predicted seizure
+    temp=2*y_true-y_pred_smoothed #where diff is 1 here both true and apredicted label are 1
+    indx=np.where(temp==1)
+    durationTruePredictedSeizure=np.squeeze(np.asarray(indx)).size
+
+
+    if (durationPredictedSeizure==0):
+        precision=0 #np.nan #if no seizures predicted (if there was really no seiz ti will be np.nan later)
+        #print('No predicted seizure in test data')
+    else:
+        precision=durationTruePredictedSeizure/durationPredictedSeizure
+    if (durationTrueSeizure==0):
+        sensitivity=np.nan
+        precision = np.nan
+        #print('No seizure in test data')
+    else:
+        sensitivity=durationTruePredictedSeizure/durationTrueSeizure
+    if ((sensitivity + precision)==0):
+        F1score_duration=0 #np.nan
+        #print('Sensitivity and prediction are 0 in test data')
+    else:
+        F1score_duration = 2 * sensitivity * precision / (sensitivity + precision)
+
+    return(sensitivity, precision , F1score_duration)
+
+
+def calculateStartsAndStops(labels):
+    sigLen = len(labels)
+    events=[]
+    for i in range(1, sigLen-1):
+        # if  label seizure starts
+        if (labels[i] == 1 and labels[i - 1] == 0):
+            sstart=i
+            while labels[i]==1 and i<sigLen-1:
+                i=i+1
+            sstop=i
+            events.append([sstart, sstop])
+
+    return(events)
+
+
+def calc_TPAndFP_Mod(ref, hyp, toleranceFP_bef, toleranceFP_aft):
+    ''' for pair of ref and hyp event decides for fp and tp'''
+    ## collect start and stop times from input arg events
+    start_ref = ref[0]
+    stop_ref = ref[1]
+    start_hyp = hyp[0]
+    stop_hyp = hyp[1]
+
+    ##### detect if hyp and ref have some overlap
+    tp = 0
+    #     ref:       |       <--------------------->
+    #     hyp:     ----------------
+    if (start_hyp <= start_ref  and stop_hyp > start_ref -toleranceFP_bef ): #+ tolerance
+        tp = 1
+    #     ref:              <--------------------->   |
+    #     hyp:                         <----------------
+    elif (start_hyp < stop_ref + toleranceFP_aft  and  stop_hyp >= stop_ref ): #- tolerance
+        tp = 1
+    #     ref:              <--------------------->
+    #     hyp:                         <---->
+    elif (stop_hyp <= stop_ref  and  start_hyp >= start_ref  ): #- tolerance
+        tp = 1
+        
+
+    #### detect fp
+    fp = 0
+    fp_bef=0
+    fp_aft = 0
+    #     ref:         |     <--------------------->     |
+    #     hyp:     <----------------
+    if (start_hyp < start_ref - toleranceFP_bef):
+        fp = fp + 1
+        fp_bef = 1
+    #     ref:         |     <--------------------->     |
+    #     hyp:    						  ------------------>
+    if (stop_hyp > stop_ref + toleranceFP_aft):
+        fp = fp + 1
+        fp_aft = 1
+        
+    return (tp, fp, fp_bef, fp_aft )
+
+def calc_TPAndFP(ref, hyp, toleranceFP_bef, toleranceFP_aft):
+    ''' for pair of ref and hyp event decides for fp and tp'''
+    ## collect start and stop times from input arg events
+    start_ref = ref[0]
+    stop_ref = ref[1]
+    start_hyp = hyp[0]
+    stop_hyp = hyp[1]
+
+    ##### detect if hyp and ref have some overlap
+    tp = 0
+    #     ref:              <--------------------->
+    #     hyp:     <----------------
+    if (start_hyp <= start_ref  and stop_hyp > start_ref  ): #+ tolerance
+        tp = 1
+    #     ref:              <--------------------->
+    #     hyp:                         <----------------
+    elif (start_hyp < stop_ref  and  stop_hyp >= stop_ref ): #- tolerance
+        tp = 1
+    #     ref:              <--------------------->
+    #     hyp:                         <---->
+    elif (stop_hyp <= stop_ref  and  start_hyp >= start_ref  ): #- tolerance
+        tp = 1
+
+    #### detect fp
+    fp = 0
+    fp_bef=0
+    fp_aft = 0
+    #     ref:         |     <--------------------->     |
+    #     hyp:     <----------------
+    if (start_hyp < start_ref - toleranceFP_bef):
+        fp = fp + 1
+        fp_bef = 1
+    #     ref:         |     <--------------------->     |
+    #     hyp:    						  ------------------>
+    if (stop_hyp > stop_ref + toleranceFP_aft):
+        fp = fp + 1
+        fp_aft = 1
+    return (tp, fp, fp_bef, fp_aft )
+
+def performance_episodes(predLab, trueLab,  toleranceFP_bef, toleranceFP_aft):
+    '''
+    function that detects episodes in a stream of true labels and predictions
+    detects overlaps and measures sensitivity, precision , F1 score and number of false positives
+    '''
+    totalTP=0
+    totalFP=0
+    #transform to events
+    predEvents= calculateStartsAndStops(predLab)
+    trueEvents = calculateStartsAndStops(trueLab)
+    #create flags for each event if it has been used
+    flag_predEvents=np.zeros(len(predEvents))
+    flag_trueEvents = np.zeros(len(trueEvents))
+    flag_trueEventsFPAround = np.zeros(len(trueEvents))
+    # goes through ref events
+    if (len(trueEvents)==0):
+        totalFP=len(predEvents)
+    else:
+        for etIndx, eTrue  in enumerate(trueEvents):
+            for epIndx, ePred in enumerate(predEvents):
+                (tp0, fp0, fp_bef, fp_aft)= calc_TPAndFP_Mod(eTrue, ePred,  toleranceFP_bef, toleranceFP_aft)
+
+                #if overlap detected (tp=1) and this refSeiz hasnt been used
+                #     ref:           <----->        <----->              <----->             <-------------->
+                #     hyp:     <---------->          <---------->     <-------------->           <----->
+                if (tp0==1 and flag_trueEvents[etIndx]==0 and flag_predEvents[epIndx]==0):
+                    totalTP=totalTP+tp0
+                    totalFP = totalFP + fp0
+                    flag_trueEvents[etIndx] = 1
+                    flag_predEvents[epIndx] = 1 #1 means match
+                    if (fp0==2):
+                        flag_trueEventsFPAround[etIndx]=2
+                    else:
+                        flag_trueEventsFPAround[etIndx] = fp_aft-fp_bef #1 if was after, or -1 if was before
+                #if ref event was already matched and now we have some extra predicted seiz ones
+                #     ref:           <------------------------------------------>
+                #     hyp:     <---------->     <-------------->     <----->
+                elif (tp0 == 1 and flag_trueEvents[etIndx] == 1 and flag_predEvents[epIndx] == 0):
+                    #totalTP = totalTP + tp0 #we already counted that one
+                    totalFP = totalFP + fp0 #ideally fp0 should be 0, but if at the end we might have 1 fp
+                    #flag_trueEvents[etIndx] = 1 it is already 1 so not needed again
+                    flag_predEvents[epIndx] = 2 #2 means overlaping but not the first match with siezure
+                #if one big pred seizure covering more ref
+                #     ref:         <---------->     <-------------->     <----->
+                #     hyp:              <------------------------------------------>
+                elif (tp0==1 and flag_trueEvents[etIndx]==0 and flag_predEvents[epIndx]==1):
+                    #totalTP=totalTP+tp0 # HERE WE NEED TO DECIDE TO WE COUND THEM AS TP OR NOT  !!!!
+                    totalFP = totalFP + fp0 #we treat this as 1 FP
+                    if (flag_trueEventsFPAround[etIndx-1]>0 and fp_bef==1): # if there was FP after true seiyure and already counted and now we want to count it again because of before
+                        totalFP=totalFP-1
+                    flag_trueEvents[etIndx] = 0 #it has to stay unmatched
+                    #flag_predEvents[epIndx] = 1 #already matched
+                #if pred seiyure was named FP in pass with previous seizure but now matches this seizure
+                elif (tp0==1 and flag_trueEvents[etIndx]==0 and flag_predEvents[epIndx]==-1):
+                    totalTP=totalTP+tp0
+                    totalFP = totalFP -1 + fp0 #remove fp from before
+                    flag_trueEvents[etIndx] = 1 #match seizure
+                    if (fp0==2):
+                        flag_trueEventsFPAround[etIndx]=2
+                    else:
+                        flag_trueEventsFPAround[etIndx] = fp_aft-fp_bef #1 if was after, or -1 if was before
+                    flag_predEvents[epIndx] = 1 #relabel this pred seizure
+                #if pred seizure was named FP in pass with previous seizure , now overlaps with seizure but this seizure was already matched
+                elif (tp0 == 1 and flag_trueEvents[etIndx] == 1 and flag_predEvents[epIndx] == -1):
+                    #totalTP = totalTP + tp0 #we already counted that one
+                    totalFP = totalFP -1 + fp0 #ideally fp0 should be 0, but if at the end we might have 1 fp, remove Fp from before
+                    #flag_trueEvents[etIndx] = 1 it is already 1 so not needed again
+                    flag_predEvents[epIndx] = 2 #2 means overlaping but not the first match with siezure
+                #prdiction but not matched with true seizure
+                elif (tp0==0 and flag_predEvents[epIndx]==0):
+                    totalFP = totalFP + 1  #+fp0
+                    flag_predEvents[epIndx] = -1 #-1 means used as FP
+                elif (flag_predEvents[epIndx]==2): #already counted as part of previous seiyure, we dont need to count it again
+                    a=0
+                elif (flag_predEvents[epIndx]==-1): #already counted as FP, we dont need to count it again
+                    a=0
+                elif (flag_trueEvents[etIndx]==1 and flag_predEvents[epIndx]==1): #both already matched
+                    a=0
+                elif ( flag_trueEvents[etIndx]==0 and flag_predEvents[epIndx]==1): #pred seiz was matched, true hasnt found yet
+                    a=0
+                else:
+                    #flag_predEvents[epIndx] = 1
+                    print('ERROR: new case i havent covered')
+
+    #calculating final performance
+    numTrueSeiz=len(trueEvents)
+    numPredSeiz = len(predEvents)
+
+    numMissedSeiz = numTrueSeiz- np.sum(flag_trueEvents)
+
+    #precision =TP/ numPredSeiz but if all is one big predicted seiz then thigs are wrong and value would be >1
+    if((totalTP +totalFP) != 0):
+        precision=totalTP/ (totalTP +totalFP)
+    else:
+        precision=0 #np.nan #if no seizures predicted (if there was really no seiz ti will be np.nan later)
+
+    #sensitivity= TP/ numTrueSeiy
+    if ((numTrueSeiz) != 0):
+        sensitivity= totalTP/numTrueSeiz
+    else:
+        sensitivity=np.nan #IF NOT TRUE SEIZRUES SENSITIVITY=NAN
+        precision = np.nan
+
+    if ((sensitivity + precision)!=0):
+        F1score= (2* sensitivity * precision)/ (sensitivity + precision)
+    else:
+        F1score= 0 #np.nan
+
+
+    #checkups
+    # if ( (totalTP +totalFP)!= numPredSeiz):
+    #     print('there are long pred seiz')
+    if ( (numMissedSeiz +totalTP)!= numTrueSeiz):
+        print('sth wrong with counting seizures')
+    if ( totalFP < len(np.where(flag_predEvents==-1)[0])):
+        print('sth wrong with counting FP')
+    if (totalTP != len(np.where(flag_predEvents==1)[0])):
+        print('sth wrong with counting seizures 2')
+
+    # #visualize
+    # xvalues = np.arange(0, len(trueLab), 1)
+    # fig1 = plt.figure(figsize=(16, 16), constrained_layout=False)
+    # gs = GridSpec(1, 1, figure=fig1)
+    # fig1.subplots_adjust(wspace=0.4, hspace=0.6)
+    # fig1.suptitle('True and pred labels')
+    # ax1 = fig1.add_subplot(gs[0,0])
+    # ax1.plot(xvalues, trueLab,  'r')
+    # ax1.plot(xvalues, predLab*0.9, 'k')
+    # ax1.set_xlabel('Time')
+    # ax1.legend(['True', 'Pred'])
+    # #calcualte performance for duration to put in title
+    # (sensitivity_duration, precision_duration, F1score_duration) = perfomance_duration(predLab, trueLab)
+    # ax1.set_title('EPISODES: sensitivity '+ str(sensitivity)+' precision ' + str(precision)+' F1score '+ str(F1score)+' totalTP ' + str(totalTP)+ ' totalFP '+ str(totalFP) + '\n' +
+    #               'DURATION: sensitivity ' + str(sensitivity_duration) + ' precision ' + str(precision_duration) + ' F1score ' + str(F1score_duration))
+    # ax1.grid()
+    # fig1.show()
+    # fig1.savefig(folderOut + '/' +figName)
+    # plt.close(fig1)
+
+    return(sensitivity, precision, F1score, totalFP)
+
+
+def performance_all9(predLab, trueLab,  toleranceFP_bef, toleranceFP_aft, numLabelsPerHour):
+    ''' fucntion that returns 9 different performance measures of prediction on epilepsy
+    - on the level of seizure episodes (sensitivity, precision and F1 score)
+    - on the level of seizure duration, or each sample (sens, prec, F1)
+    - combination of F1 scores for episodes and duration (either mean or gmean)
+    - number of false positives per day '''
+    (sensE, precisE, F1E, totalFP)= performance_episodes(predLab, trueLab, toleranceFP_bef, toleranceFP_aft)
+    (sensD, precisD , F1D)=performance_duration(predLab, trueLab)
+
+    #calculate combinations
+    F1DEmean=(F1D+F1E)/2
+    F1DEgeoMean=np.sqrt(F1D*F1E)
+
+
+    #calculate numFP per day
+    #numLabelsPerHour = 60 * 60 / SegSymbParams.slidWindStepSec
+    timeDurOfLabels = len(trueLab) / numLabelsPerHour
+    if (timeDurOfLabels != 0):
+        numFPperHour = totalFP / timeDurOfLabels
+    else:
+        numFPperHour = np.nan
+    numFPperDay=numFPperHour*24
+
+    if (sensE>1.0 or precisE>1.0 or F1E>1.0 or sensD>1.0 or precisD>1.0 or F1D>1.0 or F1DEmean>1.0 or F1DEgeoMean>1.0 ):
+        print('ERROR - perf measures impossibly big!')
+    # if (np.sum(trueLab)==0):
+    #     print('No Seiz in file')
+
+    return( sensE, precisE, F1E, sensD, precisD, F1D, F1DEmean, F1DEgeoMean, numFPperDay)
+
+
+def smoothenLabels(prediction,  seizureStableLenToTestIndx, seizureStablePercToTest, distanceBetweenSeizuresIndx):
+    ''' returns labels after two steps of postprocessing
+    first moving window with voting  - if more then threshold of labels are 1 final label is 1 otherwise 0
+    second merging seizures that are too close '''
+
+    #labels = labels.reshape(len(labels))
+    smoothLabelsStep1=np.zeros((len(prediction)))
+    smoothLabelsStep2=np.zeros((len(prediction)))
+    try:
+        a=int(seizureStableLenToTestIndx)
+    except:
+        print('error seizureStableLenToTestIndx')
+        print(seizureStableLenToTestIndx)
+    try:
+        a=int(len(prediction))
+    except:
+        print('error prediction')
+        print(prediction)
+    #first classifying as true 1 if at laest  GeneralParams.seizureStableLenToTest in a row is 1
+    for i in range(int(seizureStableLenToTestIndx), int(len(prediction))):
+        s= sum( prediction[i-seizureStableLenToTestIndx+1: i+1] )/seizureStableLenToTestIndx
+        try:
+            if (s>= seizureStablePercToTest):  #and prediction[i]==1
+                smoothLabelsStep1[i]=1
+        except:
+            print('error')
+    smoothLabelsStep2=np.copy(smoothLabelsStep1)
+
+    #second part
+    prevSeizureEnd=-distanceBetweenSeizuresIndx
+    for i in range(1,len(prediction)):
+        if (smoothLabelsStep2[i] == 1 and smoothLabelsStep2[i-1] == 0):  # new seizure started
+            # find end of the seizure
+            j = i
+            while (smoothLabelsStep2[j] == 1 and j< len(smoothLabelsStep2)-1):
+                j = j + 1
+            #if current seizure distance from prev seizure is too close merge them
+            if ((i - prevSeizureEnd) < distanceBetweenSeizuresIndx):  # if  seizure started but is too close to previous one
+                #delete secon seizure
+                #prevSeizureEnd = j
+                #[i:prevSeizureEnd]=np.zeros((prevSeizureEnd-i-1)) #delete second seizure - this was before
+                #concatenate seizures
+                if (prevSeizureEnd<0): #if exactly first seizure
+                    prevSeizureEnd=0
+                smoothLabelsStep2[prevSeizureEnd:j] = np.ones((j - prevSeizureEnd ))
+            prevSeizureEnd = j
+            i=prevSeizureEnd
+
+    return  (smoothLabelsStep2, smoothLabelsStep1)
+
+def smoothenLabels_Bayes(prediction,  probability, seizureStableLenToTestIndx,  probThresh, distanceBetweenSeizuresIndx):
+    ''' returns labels bayes postprocessing
+    calculates cummulative probability of seizure and non seizure over the window of size seizureStableLenToTestIndx
+    if log (cong_pos /cong_ned )> probThresh then seizure '''
+
+    #'probability' has the probability of predicted class, but Bayes needs the probability per target
+    probability_pos=np.copy(probability)
+    indxs=np.where(prediction==0)[0]
+    probability_pos[indxs]=1-probability[indxs] #keeps only the target '1''s probability
+
+    conf_pos=np.zeros((len(probability)))
+    conf_neg=np.zeros((len(probability)))
+
+    try:
+        a=int(seizureStableLenToTestIndx)
+    except:
+        print('error seizureStableLenToTestIndx')
+        print(seizureStableLenToTestIndx)
+    try:
+        a=int(len(prediction))
+    except:
+        print('error prediction')
+        print(prediction)
+
+    #first classifying as true 1 if at laest  GeneralParams.seizureStableLenToTest in a row is 1
+    conf_pos[0:seizureStableLenToTestIndx] = probability_pos[0:seizureStableLenToTestIndx]
+    conf_neg[0:seizureStableLenToTestIndx] = 1-probability_pos[0:seizureStableLenToTestIndx]
+    for i in range(int(seizureStableLenToTestIndx), int(len(prediction))):
+        probThisWind=probability_pos[i-seizureStableLenToTestIndx: i]
+        conf_pos[i]=np.prod(probThisWind)
+        conf_neg[i]=np.prod( 1-probThisWind)
+        
+    conf=np.log( (conf_pos+ 0.00000001) /(conf_neg + 0.00000001))
+    smoothLabels = conf > probThresh
+    #second part: merging detections that are too close into one
+    smoothLabelsStep2 = np.copy(smoothLabels)
+    prevSeizureEnd=-distanceBetweenSeizuresIndx
+    for i in range(1,len(prediction)):
+        if (smoothLabelsStep2[i] == 1 and smoothLabelsStep2[i-1] == 0):  # new seizure started
+            # find end of the seizure
+            j = i
+            while (smoothLabelsStep2[j] == 1 and j< len(smoothLabelsStep2)-1):
+                j = j + 1
+            #if current seizure distance from prev seizure is too close merge them
+            if ((i - prevSeizureEnd) < distanceBetweenSeizuresIndx):  # if  seizure started but is too close to previous one
+                #delete secon seizure
+                #prevSeizureEnd = j
+                #[i:prevSeizureEnd]=np.zeros((prevSeizureEnd-i-1)) #delete second seizure - this was before
+                #concatenate seizures
+                if (prevSeizureEnd<0): #if exactly first seizure
+                    prevSeizureEnd=0
+                smoothLabelsStep2[prevSeizureEnd:j] = np.ones((j - prevSeizureEnd ))
+            prevSeizureEnd = j
+            i=prevSeizureEnd
+
+
+    return  (smoothLabelsStep2)
+
+
+
+def calculatePerformanceAfterVariousSmoothing(predLabels, label, probabilityLabels,toleranceFP_bef, toleranceFP_aft, numLabelsPerHour, seizureStableLenToTestIndx, seizureStablePercToTest,  distanceBetweenSeizuresIndx, bayesWind, probThresh):
+    numTypes=4 #no smooth, movignAvrg, MmovingAvrg+merging, bayes
+    numPerf=9
+    performancesAll=np.zeros((numPerf*numTypes))
+
+    # calculate different performance measures - only no Smooth 
+    performancesAll[ : numPerf] = performance_all9(predLabels, label, toleranceFP_bef, toleranceFP_aft, numLabelsPerHour)
+
+    #smoothing using moving average and then merging
+    (yPred_SmoothOurStep2, yPred_SmoothOurStep1) = smoothenLabels(predLabels, seizureStableLenToTestIndx, seizureStablePercToTest,  distanceBetweenSeizuresIndx)
+    performancesAll[numPerf: 2*numPerf] = performance_all9(yPred_SmoothOurStep1, label, toleranceFP_bef, toleranceFP_aft, numLabelsPerHour)
+    performancesAll[2*numPerf: 3*numPerf]= performance_all9(yPred_SmoothOurStep1, label, toleranceFP_bef, toleranceFP_aft,  numLabelsPerHour)
+
+    #bayes smoothing
+    yPred_SmoothBayes=smoothenLabels_Bayes(predLabels, probabilityLabels, bayesWind, probThresh, distanceBetweenSeizuresIndx)
+    performancesAll[3 * numPerf: 4 * numPerf] = performance_all9(yPred_SmoothBayes, label, toleranceFP_bef, toleranceFP_aft, numLabelsPerHour)
+
+
+    return(performancesAll,yPred_SmoothOurStep1, yPred_SmoothOurStep2, yPred_SmoothBayes )
+
+
+if __name__ == "__main__":
+    pass
diff --git a/VariousFunctionsLib.py b/VariousFunctionsLib.py
new file mode 100644
index 0000000..7c54e1c
--- /dev/null
+++ b/VariousFunctionsLib.py
@@ -0,0 +1,1380 @@
+#!~/anaconda3/bin python
+
+__author__ = "Una Pale"
+__credits__ = ["Una Pale", "Renato Zanetti"]
+__license__ = "LGPL"
+__version__ = "1.0"
+__email__ = "una.pale at epfl.ch"
+
+import os
+import glob
+import csv
+import math
+# import sklearn
+import multiprocessing as mp
+import time
+from math import ceil
+# from sklearn import metrics
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from matplotlib.gridspec import GridSpec
+import pywt
+import scipy
+import sys
+import pyedflib
+import MITAnnotation as MIT
+from sklearn import svm
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.ensemble import BaggingClassifier
+from sklearn.ensemble import AdaBoostClassifier
+import scipy.io
+# import random
+from PerformanceMetricsLib import *
+from scipy import signal
+# from scipy import interpolate
+import pandas as pd
+# import seaborn as sns
+# import wfdb
+from antropy import *
+
+
+########################################################
+#global variables for multi-core operation
+number_free_cores = 0
+n_cores_semaphore = 1
+
+
+
+def createFolderIfNotExists(folderOut):
+    ''' creates folder if doesnt already exist
+    warns if creation failed '''
+    if not os.path.exists(folderOut):
+        try:
+            os.mkdir(folderOut)
+        except OSError:
+            print("Creation of the directory %s failed" % folderOut)
+        # else:
+        #     print("Successfully created the directory %s " % folderOut)
+
+
+def calculateOtherMLfeatures_oneCh(X, ZeroCrossParams):
+    numFeat = 56 #54 from Sopic2018 and LL and meanAmpl
+    lenSig= len(X)
+    segLenIndx = int(ZeroCrossParams.winLen * ZeroCrossParams.samplFreq)  # length of EEG segments in samples
+    slidWindStepIndx = int( ZeroCrossParams.winStep * ZeroCrossParams.samplFreq)  # step of slidin window to extract segments in samples
+    index = np.arange(0, lenSig - segLenIndx, slidWindStepIndx).astype(int)
+
+    featureValues=np.zeros((len(index), numFeat))
+    for i in range(len(index)):
+        sig = X[index[i]:index[i] + segLenIndx]
+        feat54 = calculateMLfeatures_oneDataWindow_54(sig, ZeroCrossParams.samplFreq)
+        meanAmpl = np.mean(np.abs(sig))
+        LL = np.mean(np.abs(np.diff(sig)))
+        featureValues[i, :] = np.hstack((meanAmpl, LL, feat54))
+    return (featureValues)
+
+
+def calculateMLfeatures_oneDataWindow_54(data,  samplFreq):
+    ''' function that calculates various features relevant for epileptic seizure detection
+    from paper: D. Sopic, A. Aminifar, and D. Atienza, e-Glass: A Wearable System for Real-Time Detection of Epileptic Seizures, 2018
+    but uses only features that are (can be) normalized
+    _54 means that return also absolute values of power bands
+    '''
+    #some parameters
+    DWTfilterName = 'db4'  # 'sym5'
+    DWTlevel = 7
+    n1 = 2  #num dimensions for sample entropy
+    r1 = 0.2 # num of STD for sample entropy
+    r2 = 0.35 # num of STD for sample entropy
+    a = 2 # param for shannon, renyi and tsallis enropy
+    q = 2 # param for shannon, renyi and tsallis enropy
+
+    #DWT
+    coeffs = pywt.wavedec(data, DWTfilterName, level=DWTlevel)
+    a7, d7, d6, d5, d4, d3, d2, d1= coeffs
+
+    # #sample entropy (Obs: the function sampen2 takes too long for executing, hence it was substituted with sample_entropy)
+    # samp_1_d7_1 = sampen2(n1, r1 * np.std(d7), d7)
+    # samp_1_d6_1 = sampen2(n1, r1 * np.std(d6), d6)
+    # samp_2_d7_1 = sampen2(n1, r2 * np.std(d7), d7)
+    # samp_2_d6_1 = sampen2(n1, r2 * np.std(d6), d6)
+
+    #sample entropy: only allows r=0.2, does we zeroed samp_2_d7_1 an samp_2_d6_1 (after first tests)
+    samp_1_d7_1 = sample_entropy(d7)
+    samp_1_d6_1 = sample_entropy(d6)
+    samp_2_d7_1 = 0 #sample entropy with r=0.35 was discarded for the use of 'antropy' library
+    samp_2_d6_1 = 0
+
+    #permutation entropy
+    perm_d7_3 = perm_entropy(d7, order=3, delay=1, normalize=True)  # normalize=True instead of false as in paper
+    perm_d7_5 = perm_entropy(d7, order=5, delay=1, normalize=True)
+    perm_d7_7 = perm_entropy(d7, order=7, delay=1, normalize=True)
+    perm_d6_3 = perm_entropy(d6, order=3, delay=1, normalize=True)
+    perm_d6_5 = perm_entropy(d6, order=5, delay=1, normalize=True)
+    perm_d6_7 = perm_entropy(d6, order=7, delay=1, normalize=True)
+    perm_d5_3 = perm_entropy(d5, order=3, delay=1, normalize=True)
+    perm_d5_5 = perm_entropy(d5, order=5, delay=1, normalize=True)
+    perm_d5_7 = perm_entropy(d5, order=7, delay=1, normalize=True)
+    perm_d4_3 = perm_entropy(d4, order=3, delay=1, normalize=True)
+    perm_d4_5 = perm_entropy(d4, order=5, delay=1, normalize=True)
+    perm_d4_7 = perm_entropy(d4, order=7, delay=1, normalize=True)
+    perm_d3_3 = perm_entropy(d3, order=3, delay=1, normalize=True)
+    perm_d3_5 = perm_entropy(d3, order=5, delay=1, normalize=True)
+    perm_d3_7 = perm_entropy(d3, order=7, delay=1, normalize=True)
+
+    #shannon renyi and tsallis entropy
+    (shannon_en_sig, renyi_en_sig, tsallis_en_sig) = sh_ren_ts_entropy(data, a, q)
+    (shannon_en_d7, renyi_en_d7, tsallis_en_d7)  = sh_ren_ts_entropy(d7, a, q)
+    (shannon_en_d6, renyi_en_d6, tsallis_en_d6)  = sh_ren_ts_entropy(d6, a, q)
+    (shannon_en_d5, renyi_en_d5, tsallis_en_d5)  = sh_ren_ts_entropy(d5, a, q)
+    (shannon_en_d4, renyi_en_d4, tsallis_en_d4)  = sh_ren_ts_entropy(d4, a, q)
+    (shannon_en_d3, renyi_en_d3, tsallis_en_d3)  = sh_ren_ts_entropy(d3, a, q)
+
+    #band power
+    p_tot = bandpower(data, samplFreq, 0,  45)
+    p_dc = bandpower(data, samplFreq, 0, 0.5)
+    p_mov = bandpower(data, samplFreq, 0.1, 0.5)
+    p_delta = bandpower(data, samplFreq, 0.5, 4)
+    p_theta = bandpower(data, samplFreq, 4, 8)
+    p_alfa = bandpower(data, samplFreq, 8, 13)
+    p_middle = bandpower(data, samplFreq, 12, 13)
+    p_beta = bandpower(data, samplFreq, 13, 30)
+    p_gamma = bandpower(data, samplFreq, 30, 45)
+    p_dc_rel = p_dc / p_tot
+    p_mov_rel = p_mov / p_tot
+    p_delta_rel = p_delta / p_tot
+    p_theta_rel = p_theta / p_tot
+    p_alfa_rel = p_alfa / p_tot
+    p_middle_rel = p_middle / p_tot
+    p_beta_rel = p_beta / p_tot
+    p_gamma_rel = p_gamma / p_tot
+
+    featuresAll= [samp_1_d7_1, samp_1_d6_1, samp_2_d7_1, samp_2_d6_1, perm_d7_3, perm_d7_5, perm_d7_7, perm_d6_3, perm_d6_5, perm_d6_7,   perm_d5_3, perm_d5_5, \
+             perm_d5_7, perm_d4_3, perm_d4_5, perm_d4_7, perm_d3_3, perm_d3_5, perm_d3_7, shannon_en_sig, renyi_en_sig, tsallis_en_sig, shannon_en_d7, renyi_en_d7, tsallis_en_d7, \
+             shannon_en_d6, renyi_en_d6, tsallis_en_d6, shannon_en_d5, renyi_en_d5, tsallis_en_d5, shannon_en_d4, renyi_en_d4, tsallis_en_d4, shannon_en_d3, renyi_en_d3, tsallis_en_d3, \
+             p_dc_rel, p_mov_rel, p_delta_rel, p_theta_rel, p_alfa_rel, p_middle_rel, p_beta_rel, p_gamma_rel,
+             p_dc, p_mov, p_delta, p_theta, p_alfa, p_middle, p_beta, p_gamma, p_tot]
+    return (featuresAll)
+
+
+def sh_ren_ts_entropy(x, a, q):
+    ''' function that calculates three different entropy meausres from given widow:
+    shannon, renyi and tsallis entropy'''
+    p, bin_edges = np.histogram(x)
+    p = p/ np.sum(p)
+    p=p[np.where(p >0)] # to exclude log(0)
+    shannon_en = - np.sum(p* np.log2(p))
+    renyi_en = np.log2(np.sum(pow(p,a))) / (1 - a)
+    tsallis_en = (1 - np.sum(pow(p,q))) / (q - 1)
+    return (shannon_en, renyi_en, tsallis_en)
+
+def bandpower(x, fs, fmin, fmax):
+    '''function that calculates energy of specific frequency band of FFT spectrum'''
+    f, Pxx = scipy.signal.periodogram(x, fs=fs)
+    ind_min = scipy.argmax(f > fmin) - 1
+    ind_max = scipy.argmax(f >fmax) - 1
+    return scipy.trapz(Pxx[ind_min: ind_max+1], f[ind_min: ind_max+1])
+
+def sampen2(dim,r,data):
+    ''' function that calculates sample entropy from given window of data'''
+    epsilon = 0.001
+    N = len(data)
+    correl = np.zeros( 2)
+    dataMat = np.zeros((dim + 1, N - dim))
+    for i in range(dim+1):
+        dataMat[i,:]= data[i: N - dim + i]
+
+    for m in range(dim,dim + 2):
+        count = np.zeros( N - dim)
+        tempMat = dataMat[0:m,:]
+
+        for i in range(N - m):
+            #calculate distance, excluding self - matching case
+            dist = np.max(np.abs(tempMat[:, i + 1: N - dim] - np.tile(tempMat[:, i],( (N - dim - i-1),1)).T  ), axis=0)
+            D = (dist < r)
+            count[i] = np.sum(D) / (N - dim - 1)
+
+        correl[m - dim] = np.sum(count) / (N - dim)
+
+    saen = np.log((correl[0] + epsilon) / (correl[1] + epsilon))
+    return saen
+
+def readEdfFile (fileName):
+    ''' reads .edf file and returnes  data[numSamples, numCh], sampling frequency, names of channels'''
+    f = pyedflib.EdfReader(fileName)
+    n = f.signals_in_file
+    channelNames = f.getSignalLabels()
+    f.getSampleFrequency(0)
+    samplFreq= data = np.zeros(( f.getNSamples()[0], n))
+    for i in np.arange(n):
+        data[:, i] = f.readSignal(i)
+    return (data, samplFreq, channelNames)
+
+def writeToCsvFile( data, labels,  fileName):
+    outputName= fileName+'.csv'
+    myFile = open(outputName, 'w',newline='')
+    dataToWrite=np.column_stack((data, labels))
+    with myFile:
+        writer = csv.writer(myFile)
+        writer.writerows(dataToWrite)
+
+def writeToCompressedCsvFile( data, labels,  fileName):
+    outputName= fileName+'.csv.gz'
+    dataToWrite=np.column_stack((data, labels))
+    df = pd.DataFrame(data=dataToWrite)
+    df.to_csv(outputName, index=False, compression='gzip') 
+
+
+def extractEDFdataToCSV_originalData(parallelize, perc_cores, samedir, folderIn, folderOut, SigInfoParams, patients):
+    ''' converts data from edf format to csv
+    20210705 UnaPale'''
+    global number_free_cores
+    print('Extracting .csv from CHB edf files')
+    if parallelize:
+        n_cores  = mp.cpu_count()
+        n_cores = ceil(n_cores*perc_cores)
+
+        if n_cores > len(patients):
+            n_cores = len(patients)
+
+        print('Number of used cores: ' + str(n_cores))
+
+        pool = mp.Pool(n_cores)
+        number_free_cores = n_cores
+        
+    # cutting segments
+    for patIndx, pat in enumerate(patients):
+        # print('-- Patient:', pat)
+        PATIENT = pat if len(sys.argv) < 2 else '{0:02d}'.format(int(sys.argv[1]))
+        #number of Seiz and nonSeiz files
+        if samedir:
+            SeizFiles=sorted(glob.glob(f'{folderIn}chb{PATIENT}*.seizures'))
+            EDFNonSeizFiles=sorted(glob.glob(f'{folderIn}chb{PATIENT}*.edf'))
+        else:
+            SeizFiles=sorted(glob.glob(f'{folderIn}chb{PATIENT}/chb{PATIENT}*.seizures'))
+            EDFNonSeizFiles=sorted(glob.glob(f'{folderIn}chb{PATIENT}/chb{PATIENT}*.edf'))
+
+        folderOutFiles = folderOut + 'chb' + pat + '/'
+        createFolderIfNotExists(folderOutFiles)
+        print('Patient: ' + pat + '  Number of files: ' + str(len(EDFNonSeizFiles)))
+        # create lists with just names, to be able to compare them
+        SeizFileNames = list()
+        for fIndx, f in enumerate(SeizFiles):
+            justName = os.path.split(f)[1][:-13]
+            if (fIndx == 0):
+                SeizFileNames = [justName]
+            else:
+                SeizFileNames.append(justName)
+        NonSeizFileNames = list()
+        NonSeizFileFullNames = list()
+        for fIndx, f in enumerate(EDFNonSeizFiles):
+            justName = os.path.split(f)[1][:-4]
+            if (justName not in SeizFileNames):
+                if (fIndx == 0):
+                    NonSeizFileNames = [justName]
+                    NonSeizFileFullNames = [f]
+                else:
+                    NonSeizFileNames.append(justName)
+                    NonSeizFileFullNames.append(f)
+
+        if parallelize:
+                pool.apply_async(extractDataLabels_CHB, args=(patIndx, SeizFiles, NonSeizFileFullNames, SigInfoParams, folderOutFiles), callback=collect_result) 
+                number_free_cores = number_free_cores -1
+                if number_free_cores==0:
+                    while number_free_cores==0: #synced in the callback
+                        time.sleep(0.1)
+                        pass
+        else:
+            extractDataLabels_CHB(patIndx, SeizFiles, NonSeizFileFullNames, SigInfoParams, folderOutFiles)
+
+    if parallelize:
+        while number_free_cores < n_cores: #wait till all subjects have their data processed
+            time.sleep(0.1)
+            pass
+        pool.close()
+        pool.join()  
+
+def extractDataLabels_CHB(patIndx, SeizFiles, NonSeizFileFullNames, SigInfoParams, folderOut):
+
+    #EXPORT SEIZURE FILES
+    for fileIndx,fileName in enumerate(SeizFiles):
+        allGood=1
+
+        fileName0 = os.path.splitext(fileName)[0]  # removing .seizures from the string
+        # here replaced reading .hea files with .edf reading to avoid converting !!!
+        (rec, samplFreq, channels) = readEdfFile(fileName0)
+        # take only the channels we need and in correct order
+        try:
+            chToKeepAndInCorrectOrder=[channels.index(SigInfoParams.channels[i]) for i in range(len(SigInfoParams.channels))]
+        except:
+            print('Sth wrong with the channels in a file: ', fileName)
+            allGood=0
+
+        if (allGood==1):
+            newData = rec[1:, chToKeepAndInCorrectOrder]
+            (lenSig, numCh) = newData.shape
+            newLabel = np.zeros(lenSig)
+            # read times of seizures
+            szStart = [a for a in MIT.read_annotations(fileName) if a.code == 32]  # start marked with '[' (32)
+            szStop = [a for a in MIT.read_annotations(fileName) if a.code == 33]  # start marked with ']' (33)
+            # for each seizure cut it out and save (with few parameters)
+            numSeizures = len(szStart)
+            for i in range(numSeizures):
+                seizureLen = szStop[i].time - szStart[i].time
+                newLabel[int(szStart[i].time):int(szStop[i].time)] = np.ones(seizureLen)
+
+            # saving to csv file - saving all seizures to one file,with name of how many seizures is there
+            pom, fileName1 = os.path.split(fileName0)
+            fileName2 = os.path.splitext(fileName1)[0]
+
+            fileName3 = folderOut +  fileName2 
+
+            writeToCompressedCsvFile(newData, newLabel, fileName3)
+
+    #EXPORT NON SEIZURE FILES
+    for fileIndx,fileName in enumerate(NonSeizFileFullNames):
+        allGood=1
+
+        # # here replaced reading .hea files with .edf reading to avoid converting !!!
+        (rec, samplFreq, channels) = readEdfFile(fileName)
+        # take only the channels we need and in correct order
+        try:
+            chToKeepAndInCorrectOrder=[channels.index(SigInfoParams.channels[i]) for i in range(len(SigInfoParams.channels))]
+        except:
+            print('Sth wrong with the channels in a file: ', fileName)
+            allGood=0
+
+        if (allGood==1):
+            newData = rec[1:, chToKeepAndInCorrectOrder]
+            (lenSig, numCh) = newData.shape
+            newLabel = np.zeros(lenSig)
+
+            # saving to csv file
+            pom, fileName1 = os.path.split(fileName)
+            fileName2 = os.path.splitext(fileName1)[0]
+            fileName3 = folderOut + fileName2
+            writeToCompressedCsvFile(newData, newLabel, fileName3)
+
+    return (patIndx)
+
+
+
+def train_StandardML_moreModelsPossible(X_train, y_train,  StandardMLParams):
+        #X_train0= np.float32(X_train)
+    #y_train = np.float32(y_train)
+    #y_train = y_train.reshape((len(y_train),))
+    X_train0=X_train
+    #replacing nan and inf values
+    #X_train0[np.where(np.isinf(X_train0))] = np.nan
+
+    if (np.size(X_train0,0)==0):
+            print('X train size 0 is 0!!', X_train0.shape, y_train.shape)
+    if (np.size(X_train0,1)==0):
+            print('X train size 1 is 0!!', X_train0.shape, y_train.shape)
+    col_mean = np.nanmean(X_train0, axis=0)
+    inds = np.where(np.isnan(X_train0))
+    X_train0[inds] = np.take(col_mean, inds[1])
+    # if still somewhere nan replace with 0
+    X_train0[np.where(np.isnan(X_train0))] = 0
+    X_train=X_train0
+
+    #MLmodels.modelType = 'KNN'
+    if (StandardMLParams.modelType=='KNN'):
+        model = KNeighborsClassifier(n_neighbors=StandardMLParams.KNN_n_neighbors, metric=StandardMLParams.KNN_metric)
+        model.fit(X_train, y_train)
+    elif (StandardMLParams.modelType=='SVM'):
+        #model = svm.SVC(kernel=StandardMLParams.SVM_kernel, C=StandardMLParams.SVM_C, gamma=StandardMLParams.SVM_gamma)
+        model = svm.SVC(kernel=StandardMLParams.SVM_kernel, C=StandardMLParams.SVM_C, gamma=StandardMLParams.SVM_gamma, probability=True) # when using bayes smoothing
+        model.fit(X_train, y_train)
+    elif (StandardMLParams.modelType=='DecisionTree'):
+        if (StandardMLParams.DecisionTree_max_depth==0):
+            model = DecisionTreeClassifier(random_state=0, criterion=StandardMLParams.DecisionTree_criterion, splitter=StandardMLParams.DecisionTree_splitter)
+        else:
+            model = DecisionTreeClassifier(random_state=0, criterion=StandardMLParams.DecisionTree_criterion, splitter=StandardMLParams.DecisionTree_splitter,  max_depth=StandardMLParams.DecisionTree_max_depth)
+        model = model.fit(X_train, y_train)
+    elif (StandardMLParams.modelType=='RandomForest'):
+        if (StandardMLParams.DecisionTree_max_depth == 0):
+            model = RandomForestClassifier(random_state=0, n_estimators=StandardMLParams.RandomForest_n_estimators, criterion=StandardMLParams.DecisionTree_criterion, n_jobs=StandardMLParams.n_jobs) # min_samples_leaf=10
+        else:
+            model = RandomForestClassifier(random_state=0, n_estimators=StandardMLParams.RandomForest_n_estimators, criterion=StandardMLParams.DecisionTree_criterion, n_jobs=StandardMLParams.n_jobs,  max_depth=StandardMLParams.DecisionTree_max_depth ) # min_samples_leaf=10
+        model = model.fit(X_train, y_train)
+    elif (StandardMLParams.modelType=='BaggingClassifier'):
+        if (StandardMLParams.Bagging_base_estimator=='SVM'):
+            model = BaggingClassifier(base_estimator=svm.SVC(kernel=StandardMLParams.SVM_kernel, C=StandardMLParams.SVM_C, gamma=StandardMLParams.SVM_gamma), n_estimators=StandardMLParams.Bagging_n_estimators,random_state=0)
+        elif  (StandardMLParams.Bagging_base_estimator=='KNN'):
+            model = BaggingClassifier(base_estimator= KNeighborsClassifier(n_neighbors=StandardMLParams.KNN_n_neighbors, metric=StandardMLParams.KNN_metric), n_estimators=StandardMLParams.Bagging_n_estimators,random_state=0)
+        elif (StandardMLParams.Bagging_base_estimator == 'DecisionTree'):
+            model = BaggingClassifier(DecisionTreeClassifier(random_state=0, criterion=StandardMLParams.DecisionTree_criterion, splitter=StandardMLParams.DecisionTree_splitter),
+                n_estimators=StandardMLParams.Bagging_n_estimators, random_state=0)
+        model = model.fit(X_train, y_train)
+    elif (StandardMLParams.modelType=='AdaBoost'):
+        if (StandardMLParams.Bagging_base_estimator=='SVM'):
+            model = AdaBoostClassifier(base_estimator=svm.SVC(kernel=StandardMLParams.SVM_kernel, C=StandardMLParams.SVM_C, gamma=StandardMLParams.SVM_gamma), n_estimators=StandardMLParams.Bagging_n_estimators,random_state=0)
+        elif  (StandardMLParams.Bagging_base_estimator=='KNN'):
+            model = AdaBoostClassifier(base_estimator= KNeighborsClassifier(n_neighbors=StandardMLParams.KNN_n_neighbors, metric=StandardMLParams.KNN_metric), n_estimators=StandardMLParams.Bagging_n_estimators,random_state=0)
+        elif (StandardMLParams.Bagging_base_estimator == 'DecisionTree'):
+            model = AdaBoostClassifier(DecisionTreeClassifier(random_state=0, criterion=StandardMLParams.DecisionTree_criterion, splitter=StandardMLParams.DecisionTree_splitter),
+                n_estimators=StandardMLParams.Bagging_n_estimators, random_state=0)
+        model = model.fit(X_train, y_train)
+
+    return (model)
+
+
+
+def test_StandardML_moreModelsPossible_v3(data,trueLabels,  model):
+    # number of clases
+    (unique_labels, counts) = np.unique(trueLabels, return_counts=True)
+    numLabels = len(unique_labels)
+    if (numLabels==1): #in specific case when in test set all the same label
+        numLabels=2
+
+    # #PREDICT LABELS
+    # X_test0 = np.float32(data)
+    # X_test0[np.where(np.isinf(X_test0))] = np.nan
+    # if (np.size(X_test0,0)==0):
+    #         print('X test size 0 is 0!!', X_test0.shape)
+    # if (np.size(X_test0,1)==0):
+    #         print('X test size 1 is 0!!', X_test0.shape)
+    # col_mean = np.nanmean(X_test0, axis=0)
+    # inds = np.where(np.isnan(X_test0))
+    # X_test0[inds] = np.take(col_mean, inds[1])
+    # # if still somewhere nan replace with 0
+    # X_test0[np.where(np.isnan(X_test0))] = 0
+    # X_test=X_test0
+    X_test = data
+    #calculate predictions
+    y_pred= model.predict(X_test)
+    y_probability = model.predict_proba(X_test)
+
+    #pick only probability of predicted class
+    y_probability_fin=np.zeros(len(y_pred))
+    indx=np.where(y_pred==1)
+    if (len(indx[0])!=0):
+        y_probability_fin[indx]=y_probability[indx,1]
+    else:
+        a=0
+        print('no seiz predicted')
+    indx = np.where(y_pred == 0)
+    if (len(indx[0])!=0):
+        y_probability_fin[indx] = y_probability[indx,0]
+    else:
+        a=0
+        print('no non seiz predicted')
+
+    #calculate accuracy
+    diffLab=y_pred-trueLabels
+    indx=np.where(diffLab==0)
+    acc= len(indx[0])/len(trueLabels)
+
+    # calculate performance and distances per class
+    accPerClass=np.zeros(numLabels)
+    distFromCorr_PerClass = np.zeros(numLabels)
+    distFromWrong_PerClass = np.zeros(numLabels)
+    for l in range(numLabels):
+        indx=np.where(trueLabels==l)
+        trueLabels_part=trueLabels[indx]
+        predLab_part=y_pred[indx]
+        diffLab = predLab_part - trueLabels_part
+        indx2 = np.where(diffLab == 0)
+        if (len(indx[0])==0):
+            accPerClass[l] = np.nan
+        else:
+            accPerClass[l] = len(indx2[0]) / len(indx[0])
+
+    return(y_pred, y_probability_fin, acc, accPerClass)
+
+
+def calcHistogramValues_v2(sig, segmentedLabels, histbins):
+    '''takes one window of signal - all ch and labels, separates seiz and nonSeiz and
+    calculates histogram of values  during seizure and non seizure '''
+    numBins=int(histbins)
+    sig2 = sig[~np.isnan(sig)]
+    sig2 = sig2[np.isfinite(sig2)]
+    # maxValFeat=np.max(sig)
+    # binBorders=np.arange(0, maxValFeat+1, (maxValFeat+1)/numBins)
+
+    # sig[sig == np.inf] = np.nan
+    indxs=np.where(segmentedLabels==0)[0]
+    nonSeiz = sig[indxs]
+    nonSeiz = nonSeiz[~np.isnan(nonSeiz)]
+    try:
+        nonSeiz_hist = np.histogram(nonSeiz, bins=numBins, range=(np.min(sig2), np.max(sig2)))
+    except:
+        print('Error with hist ')
+
+    indxs = np.where(segmentedLabels == 1)[0]
+    Seiz = sig[indxs]
+    Seiz = Seiz[~np.isnan(Seiz)]
+    try:
+        Seiz_hist = np.histogram(Seiz, bins=numBins, range=(np.min(sig2), np.max(sig2)))
+    except:
+        print('Error with hist ')
+
+    # normalizing values that are in percentage of total samples - to not be dependand on number of samples
+    nonSeiz_histNorm=[]
+    nonSeiz_histNorm.append(nonSeiz_hist[0]/len(nonSeiz))
+    nonSeiz_histNorm.append(nonSeiz_hist[1])
+    Seiz_histNorm=[]
+    Seiz_histNorm.append(Seiz_hist[0]/len(Seiz))
+    Seiz_histNorm.append(Seiz_hist[1])
+    # Seiz_hist[0] = Seiz_hist[0] / len(Seiz_allCh)
+    return( Seiz_histNorm, nonSeiz_histNorm)
+
+def concatenateDataFromFiles(fileNames):
+    dataAll = []
+    for f, fileName in enumerate(fileNames):
+        print(os.path.split(fileName)[-1])
+        data = readDataFromFile(fileName)
+
+        if (dataAll == []):
+            dataAll = data
+        else:
+            dataAll = np.vstack((dataAll, data))
+
+    return dataAll
+
+def concatenateDataFromFiles_AllocateFirst(fileNames, nrows):
+
+    data = readDataFromFile(fileNames[0]) #reads the first file to uncover the number of columns 
+
+    dataAll = np.zeros((nrows, data.shape[1]))
+    start=0
+    for f, fileName in enumerate(fileNames):
+        print(os.path.split(fileName)[-1])
+        data = readDataFromFile(fileName)
+
+        dataAll[start:start+data.shape[0]] = data
+        start=start+data.shape[0]
+        # if (dataAll == []):
+        #     dataAll = data
+        # else:
+        #     dataAll = np.vstack((dataAll, data))
+
+    return dataAll
+
+def concatenateDataFromFiles_TSCVFilesInput(fileNames):
+    dataAll = []
+    startIndxOfFiles=np.zeros(len(fileNames))
+    for f, fileName in enumerate(fileNames):
+        data = readDataFromFile(fileName)
+        data= np.float32(data)
+        # reader = csv.reader(open(fileName, "r"))
+        # data0 = list(reader)
+        # data = np.array(data0).astype("float")
+        # separating to data and labels
+        # X = data[:, 0:-1]
+        # y = data[:, -1]
+        dataSource= np.ones(len(data[:, -1]))*f
+
+        if (dataAll == []):
+            dataAll = data[:, 0:-1]
+            labelsAll = data[:, -1].astype(int)
+            # startIndxOfFiles[f]=0
+            lenPrevFile=int(len(data[:, -1]))
+            startIndxOfFiles[f]=lenPrevFile
+        else:
+            dataAll = np.vstack((dataAll, data[:, 0:-1]))
+            labelsAll = np.hstack((labelsAll, data[:, -1].astype(int)))
+            # startIndxOfFiles[f]=int(lenPrevFile)
+            lenPrevFile = lenPrevFile+ len(data[:, -1])
+            startIndxOfFiles[f] = int(lenPrevFile)
+    startIndxOfFiles = startIndxOfFiles.astype((int))
+    return (dataAll, labelsAll, startIndxOfFiles)
+
+
+def generateTSCVindexesFromAllFiles_storeTSCVLabels(dataset, GeneralParams, ZeroCrossFeatureParams, SegSymbParams, folderIn, folderOut, maxWinLen):
+    '''Reads the names of the files with labels, generating a vector with the indexes for the TSCV considering all data of a subject stacked 
+    in one matrix. It also generate files with the labels for each CV.'''
+    
+    maxWinLenIndx=int((maxWinLen-SegSymbParams.segLenSec)/SegSymbParams.slidWindStepSec)
+    minFirsFileLen=int((3600-SegSymbParams.segLenSec)/SegSymbParams.slidWindStepSec*5) #number of windows of 1h x nHours min
+    for patIndx, pat in enumerate(GeneralParams.patients):
+        if dataset=='01_SWEC':
+            allFiles=np.sort(glob.glob(folderIn + 'ID' + pat + '/'+'ID' + pat + '*Labels.csv.gz'))
+        if dataset=='02_CHB':
+            allFiles = np.sort(glob.glob(folderIn +'chb' + pat+ '/chb' + pat + '*Labels.csv.gz'))
+        
+        # print(os.path.split(allFiles[0][:])[-1])
+        #chb02_16+ commes first than chb02_16 when ordering files
+        if dataset=='02_CHB' and pat=='02':
+            if '+' in allFiles[15][:]: #chb02_16+
+                cpy = allFiles[15]
+                allFiles[15] = allFiles[16]
+                allFiles[16] = cpy
+
+        numCVThisSubj=0
+        firstFileCreated=0
+        startIndxOfFiles=[]
+        #LOAD ALL FILES ONE BY ONE
+        for fIndx, fileName in enumerate(allFiles):
+            print(os.path.split(fileName)[-1])
+
+            labels = readDataFromFile(fileName)
+
+            pom, fileName1 = os.path.split(fileName)
+            fileNameOut = fileName1.split('_')[0] #for the labels output files to plot the TSCV sequence
+
+            if 'chb17' in fileNameOut: #chb17 has files 'a', 'b', and 'c'
+                fileNameOut='chb17'
+
+            eachSubjDataOutFolder = folderOut+ fileNameOut+'/'
+            createFolderIfNotExists(eachSubjDataOutFolder) #for each subject
+
+            #if there is seizure in file find start and stops
+            if (np.sum(labels)!=0):
+                diffSig=np.diff(np.squeeze(labels))
+                szStart=np.where(diffSig==1)[0]
+                szStop= np.where(diffSig == -1)[0]
+
+
+            if (firstFileCreated==0): #first file, append until at least one seizure
+                if (fIndx==0):
+                    newLabel=labels
+                else: #appending to existing file
+                    newLabel =np.vstack((newLabel,labels))
+                
+                if (np.sum(newLabel)>0 and len(newLabel)>=minFirsFileLen): #at least min of hours and 1 seizure in the first file
+                    firstFileCreated=1
+                    startIndxOfFiles = np.append(startIndxOfFiles, int(len(newLabel)))                  
+
+                    fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numCVThisSubj).zfill(3) + '_Labels'
+                    saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                    numCVThisSubj = numCVThisSubj + 1
+
+            else:  #not first file, just resave with different cv name
+                newLabel=labels
+                if len(newLabel) < 1.5*maxWinLenIndx: #up to 1.5x the target, we kepp the original file in full
+                        startIndxOfFiles = np.append(startIndxOfFiles, startIndxOfFiles[-1] + int(len(newLabel)))
+
+                        fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numCVThisSubj).zfill(3) + '_Labels'
+                        saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                        numCVThisSubj = numCVThisSubj + 1
+                else:
+                    n_new_files = int(len(labels)/maxWinLenIndx)
+                    indxStart=0
+                    dataMissing = maxWinLenIndx
+                    for i in range(n_new_files-1):
+                        #check if we would cut seizure in half
+                        if (np.sum(labels)!=0):
+                            for s in range(len(szStart)):
+                                try:
+                                    if ( szStart[s]<indxStart+dataMissing  and szStop[s]>indxStart+dataMissing ): #cut would be whenre seizure is
+                                        dataMissing=szStop[s]- indxStart #move cut to the end of the seizure
+                                except:
+                                    print('error')
+
+                        newLabel=labels[indxStart:indxStart+dataMissing,:]
+
+                        #finished this new file to save
+                        startIndxOfFiles = np.append(startIndxOfFiles, startIndxOfFiles[-1] + int(len(newLabel)))
+
+                        fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numCVThisSubj).zfill(3) + '_Labels'
+                        saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                        indxStart = indxStart+dataMissing #start where we stopped
+                        numCVThisSubj = numCVThisSubj + 1
+
+                    #last part of the data of each file
+                    newLabel=labels[indxStart:len(labels),:]
+                    startIndxOfFiles = np.append(startIndxOfFiles, startIndxOfFiles[-1] + int(len(newLabel)))
+
+                    fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numCVThisSubj).zfill(3) + '_Labels'
+                    saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                    numCVThisSubj = numCVThisSubj + 1 #for next file
+
+        startIndxOfFiles = startIndxOfFiles.astype((int)) #contains indexes of test data
+        fileNameOut2 = folderOut + fileNameOut + '/' + fileNameOut + '_TSCV_indexes.csv.gz'
+        np.savetxt(fileNameOut2, startIndxOfFiles)
+
+
+
+
+def concatenateFeatures_FirstFileNeedsSeizure_KeepOriginal(folderIn, folderOut,GeneralParams, SegSymbParams, SigInfoParams, patients, maxWinLen):
+    '''Concatenate data from files till having the first seizure and write it to another file. The rest of the files are copied to the cross-validation ones as 
+    they are '''
+
+    createFolderIfNotExists(folderOut)
+    for patIndx, pat in enumerate(GeneralParams.patients):
+        print('Subj:'+ pat)
+        allFiles=np.sort(glob.glob(folderIn + '/chb' + pat + '/*chb' + pat + '*_OtherFeat.csv.gz'))
+        firstFileCreated=0
+        numFilesThisSubj=0
+        for fIndx, fileName in enumerate(allFiles):
+            # reader = csv.reader(open(fileName, "r"))
+            # data0 = np.array(list(reader)).astype("float")
+            data0 = readDataFromFile(fileName)
+            data=data0[:,0:-1]
+            label=data0[:,-1]
+            pom, fileName1 = os.path.split(fileName)
+            fileNameOut = os.path.splitext(fileName1)[0][0:5]
+
+            if (firstFileCreated==0): #first file, append until at least one seizure
+                if (fIndx==0):
+                    dataOut=data
+                    labelOut=label
+                else:
+                    dataOut=np.vstack((dataOut,data))
+                    labelOut = np.hstack((labelOut, label))
+                if (np.sum(labelOut)>0 and fIndx>4): #at least 6 h or at least 1 seizure in first file
+                    firstFileCreated=1
+                    fileNameOut2 = folderOut + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                    # writeToCsvFile(dataOut, labelOut, fileNameOut2)
+                    saveDataToFile(np.hstack((dataOut, labelOut.reshape((-1,1)))), fileNameOut2, 'gzip')
+                    numFilesThisSubj = numFilesThisSubj + 1
+            else:  #not first file, just resave with different cv name
+                fileNameOut2 = folderOut + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                # writeToCsvFile(data, label, fileNameOut2)
+                saveDataToFile(np.hstack((data,label.reshape((-1,1)))), fileNameOut2, 'gzip')
+                numFilesThisSubj = numFilesThisSubj + 1
+
+
+
+def concatenateFeatures_allDataInEqualWindows_FirstFileNeedsSeizure(parallelize, perc_cores, dataset, folderIn, folderOut, GeneralParams, SegSymbParams, ZeroCrossFeatureParams, maxWinLen):
+    ''' '''
+    print('Reorganizing features files to TSCV approach')
+    global number_free_cores
+
+    if parallelize:
+        n_cores  = mp.cpu_count()
+        n_cores = ceil(n_cores*perc_cores)
+
+        if n_cores > len(GeneralParams.patients):
+            n_cores = len(GeneralParams.patients)
+
+        print('Number of used cores: ' + str(n_cores))
+
+        pool = mp.Pool(n_cores)
+        number_free_cores = n_cores
+        
+
+    maxWinLenIndx=int((maxWinLen-SegSymbParams.segLenSec)/SegSymbParams.slidWindStepSec)
+    minFirsFileLen=int((3600-SegSymbParams.segLenSec)/SegSymbParams.slidWindStepSec*5) #number of windows of 1h x nHours min
+    for patIndx, pat in enumerate(GeneralParams.patients):
+        if dataset=='01_SWEC':
+            allFiles=np.sort(glob.glob(folderIn + 'ID' + pat + '/'+'ID' + pat + '*_OtherFeat.csv.gz'))
+        if dataset=='02_CHB':
+            allFiles = np.sort(glob.glob(folderIn +'chb' + pat+ '/chb' + pat + '*_OtherFeat.csv.gz'))
+        
+        # print(os.path.split(allFiles[0][:])[-1])
+        #chb02_16+ commes first than chb02_16 when ordering files
+        if dataset=='02_CHB' and pat=='02':
+            if '+' in allFiles[15][:]: #chb02_16+
+                cpy = allFiles[15]
+                allFiles[15] = allFiles[16]
+                allFiles[16] = cpy
+
+        if parallelize:
+            pool.apply_async(concatenateFeatureFiles_SeizureInFirstFile_DivideBigFiles, args=(patIndx, allFiles, folderOut, maxWinLenIndx, ZeroCrossFeatureParams, minFirsFileLen), callback=collect_result) 
+            number_free_cores = number_free_cores -1
+            if number_free_cores==0:
+                while number_free_cores==0: #synced in the callback
+                    time.sleep(0.1)
+                    pass
+        else:
+            concatenateFeatureFiles_SeizureInFirstFile_DivideBigFiles(patIndx, allFiles, folderOut, maxWinLenIndx, ZeroCrossFeatureParams, minFirsFileLen)
+
+    if parallelize:
+        while number_free_cores < n_cores: #wait till all subjects have their data processed
+            time.sleep(0.1)
+            pass
+        
+        pool.close()
+        pool.join()  
+
+        
+
+def concatenateFeatureFiles_EqualSize_SeizureInFirstFile(patIndx, allFiles, folderOut, maxWinLenIndx, ZeroCrossFeatureParams, minFirsFileLen):
+    indxStart=0
+    dataMissing=maxWinLenIndx #how much data we target to have per file
+    newFileToSave=1
+    numFilesThisSubj=0
+    firstFileCreated=0
+    #LOAD ALL FILES ONE BY ONE
+    for fIndx, fileName in enumerate(allFiles):
+        dataOtherFeat=readDataFromFile(fileName)
+
+        fileName2=fileName[0:-16]+'ZCFeat.csv.gz'
+        dataZCFeat = readDataFromFile(fileName2)
+
+        numCh=int(len(dataZCFeat[0, :])/(len(ZeroCrossFeatureParams.EPS_thresh_arr)+1))
+        data= mergeFeatFromTwoMatrixes(dataOtherFeat, dataZCFeat, numCh)
+
+        fileName2=fileName[0:-16]+'Labels.csv.gz'
+        labels = readDataFromFile(fileName2)
+
+        pom, fileName1 = os.path.split(fileName)
+        fileNameOut = fileName1.split('_')[0]
+
+        print('Patient:', str(patIndx+1), '  ', fileName1)
+
+        #if there is seizure in file find start and stops
+        if (np.sum(labels)!=0):
+            diffSig=np.diff(np.squeeze(labels))
+            szStart=np.where(diffSig==1)[0]
+            szStop= np.where(diffSig == -1)[0]
+
+
+        if (firstFileCreated==0): #first file, append until at least one seizure
+            if (fIndx==0):
+                newData=data
+                newLabel=labels
+            else: #appending to existing file
+                newData= np.vstack((newData,data))
+                newLabel =np.vstack((newLabel,labels))
+            
+            if (np.sum(newLabel)>0 and len(newLabel)>=minFirsFileLen): #at least 6 h or at least 1 seizure in first file
+                firstFileCreated=1
+                fileNameOut2 = folderOut + fileNameOut + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                # writeToCsvFile(dataOut, labelOut, fileNameOut2)
+                saveDataToFile(np.hstack((newData, newLabel.reshape((-1,1)))), fileNameOut2, 'gzip')
+                
+                fileNameOut2 = folderOut + 'Labels' + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3) + '_Labels'
+                saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+                
+                numFilesThisSubj = numFilesThisSubj + 1
+                del newData
+                del newLabel
+        else:  #not first file, just resave with different cv name
+            thisFileStillHasData=1
+            while (thisFileStillHasData==1):
+                #if enough data in file
+                if (indxStart + dataMissing <= len(labels)):
+                    #in case we are to create a new file but the remaining data after its creating is too few, this remaing
+                    #is added to the current file
+                    rest = len(labels) - indxStart - dataMissing
+                    if rest < int(0.05*maxWinLenIndx) and newFileToSave==1:
+                        dataMissing = len(labels)
+
+                    #check if we would cut seizure in half
+                    if (np.sum(labels)!=0):
+                        for s in range(len(szStart)):
+                            try:
+                                if ( szStart[s]<indxStart+dataMissing  and szStop[s]>indxStart+dataMissing ): #cut would be whenre seizure is
+                                    dataMissing=szStop[s]- indxStart #move cut to the end of the seizure
+                            except:
+                                print('error')
+
+                    if (newFileToSave==1):
+                        newData=data[indxStart:indxStart+dataMissing,:]
+                        newLabel=labels[indxStart:indxStart+dataMissing,:]
+                    else: #appending to existing file
+                        newData= np.vstack((newData,data[indxStart:indxStart+dataMissing,:]))
+                        newLabel =np.vstack((newLabel,labels[indxStart:indxStart+dataMissing,:]))
+
+                    #finished this new file to save
+                    fileNameOut2 = folderOut + '/' + fileNameOut + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                    saveDataToFile(np.hstack((newData, newLabel)), fileNameOut2, 'gzip')
+                    
+                    fileNameOut2 = folderOut + 'Labels' + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3) + '_Labels'
+                    saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                    numFilesThisSubj = numFilesThisSubj + 1
+                    newFileToSave = 1
+                    indxStart = indxStart+dataMissing #start where we stopped
+                    dataMissing = maxWinLenIndx
+                    thisFileStillHasData=1
+                    if indxStart < len(labels):
+                        thisFileStillHasData=1
+                    else:
+                        thisFileStillHasData=0
+                        indxStart = 0
+                else: #not enough data in file
+                    if (newFileToSave==1):
+                        newData=data[indxStart:,:] #read until the end of the file
+                        newLabel=labels[indxStart:,:]
+                    else: #appending to existing file
+                        newData= np.vstack((newData,data[indxStart:,:]))
+                        newLabel =np.vstack((newLabel,labels[indxStart:,:]))
+                    dataMissing = maxWinLenIndx - len(newLabel) #calculate how much data is missing
+                    indxStart = 0 #in next file start from beginning
+                    thisFileStillHasData=0 #this file has no more data, need to load new one
+                    newFileToSave=0
+
+
+    # save last file's remaining data 
+    if (len(labels)- indxStart> int(0.05*maxWinLenIndx) and thisFileStillHasData): 
+        fileNameOut2 = folderOut + '/' + fileNameOut + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+        saveDataToFile(np.hstack((newData, newLabel)), fileNameOut2, 'gzip')
+        
+        fileNameOut2 = folderOut + 'Labels' + '/' + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3) + '_Labels'
+        saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+    return patIndx
+
+
+def concatenateFeatureFiles_SeizureInFirstFile_DivideBigFiles(patIndx, allFiles, folderOut, maxWinLenIndx, ZeroCrossFeatureParams, minFirsFileLen):
+    
+    numFilesThisSubj=0
+    firstFileCreated=0
+    #LOAD ALL FILES ONE BY ONE
+    for fIndx, fileName in enumerate(allFiles):
+        print(os.path.split(fileName)[-1])
+        dataOtherFeat=readDataFromFile(fileName)
+
+        fileName2=fileName[0:-16]+'ZCFeat.csv.gz'
+        dataZCFeat = readDataFromFile(fileName2)
+
+        numCh=int(len(dataZCFeat[0, :])/(len(ZeroCrossFeatureParams.EPS_thresh_arr)+1))
+        data= mergeFeatFromTwoMatrixes(dataOtherFeat, dataZCFeat, numCh)
+
+        fileName2=fileName[0:-16]+'Labels.csv.gz'
+        labels = readDataFromFile(fileName2)
+
+        pom, fileName1 = os.path.split(fileName)
+        fileNameOut = fileName1.split('_')[0]
+
+        if 'chb17' in fileNameOut: #chb17 has files 'a', 'b', and 'c'
+            fileNameOut='chb17'
+
+        eachSubjDataOutFolder = folderOut+fileNameOut + '/'
+        createFolderIfNotExists(eachSubjDataOutFolder) #for each subject
+
+        #if there is seizure in file find start and stops
+        if (np.sum(labels)!=0):
+            diffSig=np.diff(np.squeeze(labels))
+            szStart=np.where(diffSig==1)[0]
+            szStop= np.where(diffSig == -1)[0]
+
+
+        if (firstFileCreated==0): #first file, append until at least one seizure
+            if (fIndx==0):
+                newData=data
+                newLabel=labels
+            else: #appending to existing file
+                newData= np.vstack((newData,data))
+                newLabel =np.vstack((newLabel,labels))
+            
+            if (np.sum(newLabel)>0 and len(newLabel)>=minFirsFileLen): #at least min of hours and 1 seizure in the first file
+                firstFileCreated=1
+                fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                # writeToCsvFile(dataOut, labelOut, fileNameOut2)
+                saveDataToFile(np.hstack((newData, newLabel.reshape((-1,1)))), fileNameOut2, 'gzip')
+                
+                LabelsOutFolder = folderOut + 'Labels' + '/'
+                createFolderIfNotExists(LabelsOutFolder) #for each subject                
+
+                fileNameOut2 = LabelsOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3) + '_Labels'
+                saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+                
+                numFilesThisSubj = numFilesThisSubj + 1
+                del newData
+                del newLabel
+        else:  #not first file, just resave with different cv name
+            if len(labels) < 1.5*maxWinLenIndx: #up to 1.5x the target, we kepp the original file in full
+                    fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                    saveDataToFile(np.hstack((data, labels.reshape((-1,1)))), fileNameOut2, 'gzip')
+                    
+                    fileNameOut2 =  LabelsOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3) + '_Labels'
+                    saveDataToFile(labels.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                    numFilesThisSubj = numFilesThisSubj + 1
+            else:
+                n_new_files = int(len(labels)/maxWinLenIndx)
+                indxStart=0
+                dataMissing = maxWinLenIndx
+                for i in range(n_new_files-1):
+                    #check if we would cut seizure in half
+                    if (np.sum(labels)!=0):
+                        for s in range(len(szStart)):
+                            try:
+                                if ( szStart[s]<indxStart+dataMissing  and szStop[s]>indxStart+dataMissing ): #cut would be whenre seizure is
+                                    dataMissing=szStop[s]- indxStart #move cut to the end of the seizure
+                            except:
+                                print('error')
+
+                    newData=data[indxStart:indxStart+dataMissing,:]
+                    newLabel=labels[indxStart:indxStart+dataMissing,:]
+
+                    #finished this new file to save
+                    fileNameOut2 = fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                    saveDataToFile(np.hstack((newData, newLabel)), fileNameOut2, 'gzip')
+                    
+                    fileNameOut2 = LabelsOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3) + '_Labels'
+                    saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                    indxStart = indxStart+dataMissing #start where we stopped
+                    numFilesThisSubj = numFilesThisSubj + 1
+
+                #last part of the data of each file
+                newData=data[indxStart:len(labels),:]
+                newLabel=labels[indxStart:len(labels),:]
+
+                #finished this new file to save
+                fileNameOut2 = fileNameOut2 = eachSubjDataOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3)
+                saveDataToFile(np.hstack((newData, newLabel)), fileNameOut2, 'gzip')
+                
+                fileNameOut2 = LabelsOutFolder + fileNameOut + '_cv' + str(numFilesThisSubj).zfill(3) + '_Labels'
+                saveDataToFile(newLabel.reshape((-1,1)), fileNameOut2, 'gzip')
+
+                numFilesThisSubj = numFilesThisSubj + 1 #for next file
+
+
+    return patIndx
+
+
+
+def mergeFeatFromTwoMatrixes(mat1, mat2, numCh):
+    numFeat1=int(len(mat1[0, :])/numCh)
+    numFeat2=int(len(mat2[0, :])/numCh)
+    numColPerCh=numFeat1+numFeat2
+    matFinal=np.zeros((len(mat1[:,0]), numColPerCh*numCh))
+    for ch in range(numCh):
+        matFinal[:, ch*numColPerCh : ch*numColPerCh + numFeat1]=mat1[ :, ch*numFeat1: (ch+1)*numFeat1]
+        matFinal[:,  ch * numColPerCh + numFeat1  : ch * numColPerCh + numColPerCh ] = mat2[:, ch * numFeat2: (ch + 1) * numFeat2]
+    return matFinal
+
+def plotRawDataLabels(dataset, folderIn,  GeneralParams,SegSymbParams):
+    
+    print('Print labels for all subjects extracted feature files')
+    
+    folderOut=folderIn +'LabelsInTime/'
+    createFolderIfNotExists(folderOut)
+
+    seizureStableLenToTestIndx = int(GeneralParams.seizureStableLenToTest / SegSymbParams.slidWindStepSec)
+    seizureStablePercToTest = GeneralParams.seizureStablePercToTest
+    distanceBetweenSeizuresIndx = int(GeneralParams.distanceBetween2Seizures / SegSymbParams.slidWindStepSec)
+    numLabelsPerHour = 60 * 60 / SegSymbParams.slidWindStepSec
+    toleranceFP_bef = int(GeneralParams.toleranceFP_befSeiz / SegSymbParams.slidWindStepSec)
+    toleranceFP_aft = int(GeneralParams.toleranceFP_aftSeiz / SegSymbParams.slidWindStepSec)
+
+    for patIndx, pat in enumerate(GeneralParams.patients):
+
+        if dataset=='01_SWEC':
+            allFiles=np.sort(glob.glob(folderIn + 'ID' + pat + '/ID' + pat + '*_Labels*'))
+        if dataset=='02_CHB':
+            allFiles = np.sort(glob.glob(folderIn +'chb' + pat+ '/chb' + pat +'*_Labels*'))
+
+        if dataset=='02_CHB' and pat=='02':
+            if '+' in allFiles[15][:]: #chb02_16+
+                cpy = allFiles[15]
+                allFiles[15] = allFiles[16]
+                allFiles[16] = cpy
+
+        #concatinatin predictions so that predictions for one seizure are based on train set with all seizures before it
+        for fIndx, fileName in enumerate(allFiles):
+            data=readDataFromFile(fileName)
+            if fIndx==0:
+                labels = np.squeeze(data)
+                testIndx=np.ones(len(data))*(fIndx+1)
+            else:
+                labels = np.hstack((labels,  np.squeeze(data)))
+                testIndx= np.hstack((testIndx, np.ones(len(data))*(fIndx+1)))
+
+        (yPred_SmoothOurStep2, yPred_SmoothOurStep1) = smoothenLabels(labels, seizureStableLenToTestIndx, seizureStablePercToTest, distanceBetweenSeizuresIndx)
+
+
+        #Plot predictions in time
+        fig1 = plt.figure(figsize=(16, 8), constrained_layout=False)
+        gs = GridSpec(4, 1, figure=fig1)
+        fig1.subplots_adjust(wspace=0.2, hspace=0.2)
+        xValues = np.arange(0, len(labels), 1) / (60*60*2)
+        ax1 = fig1.add_subplot(gs[0,0])
+        ax1.plot(xValues, labels , 'r')
+        ax1.set_ylabel('TrueLabel')
+        ax1.set_title('Subj'+pat)
+        ax1.grid()
+        ax1 = fig1.add_subplot(gs[1, 0])
+        ax1.plot(xValues, yPred_SmoothOurStep1, 'b')
+        ax1.set_ylabel('Step1')
+        ax1.grid()
+        ax1 = fig1.add_subplot(gs[2, 0])
+        ax1.plot(xValues, yPred_SmoothOurStep2, 'm')
+        ax1.set_ylabel('Step2')
+        ax1.grid()
+        ax1 = fig1.add_subplot(gs[3, 0])
+        ax1.plot(xValues, testIndx , 'k')
+        ax1.set_ylabel('FileNr')
+        ax1.grid()
+        ax1.set_xlabel('Time')
+        fig1.show()
+        fig1.savefig(folderOut + 'Subj' + pat + '_RawLabels.png', bbox_inches='tight')
+        plt.close(fig1)
+
+
+def kl_divergence(p,q):
+    delta=0.000001
+    deltaArr=np.ones(len(p))*delta
+    p=p+deltaArr
+    q=q+deltaArr
+    res=sum(p[i] * math.log2(p[i]/q[i]) for i in range(len(p)))
+    return res
+
+def js_divergence(p,q):
+    m=0.5* (p+q)
+    res=0.5* kl_divergence(p,m) +0.5* kl_divergence(q,m)
+    return (res)
+
+
+
+def saveDataAndLabelsToFile( data, labels,  fileName, type):
+    outputName= fileName+'.csv'
+    df = pd.DataFrame(data=np.hstack((data, labels.reshape((-1, 1)))))
+    if (type=='gzip'):
+        df.to_csv(outputName + '.gz', index=False, compression='gzip')
+    else:
+        df.to_csv(outputName, index=False)
+
+def saveDataToFile( data,  outputName, type):
+    if ('.csv' not in outputName):
+        outputName= outputName+'.csv'
+    df = pd.DataFrame(data=data)
+    if (type=='gzip'):
+        df.to_csv(outputName + '.gz', index=False, compression='gzip')
+    else:
+        df.to_csv(outputName, index=False)
+
+def readDataFromFile( inputName):
+
+    if ('.h5' in inputName):
+        df = pd.read_hdf(inputName)
+    elif ('.csv.gz' in inputName):
+        df= pd.read_csv(inputName, compression='gzip')
+    else:
+        df= pd.read_csv(inputName)
+    data=df.to_numpy()
+    return (data)
+
+
+def func_calculatePerformance_AppendingTestData_TSCV(folderIn, GeneralParams, SegSymbParams, dataset):
+    folderOut=folderIn +'PerformanceWithAppendedTests/'
+    createFolderIfNotExists(folderOut)
+
+    AllSubjDiffPerf_test = np.zeros((len(GeneralParams.patients), 4* 9))
+    seizureStableLenToTestIndx = int(GeneralParams.seizureStableLenToTest / SegSymbParams.slidWindStepSec)
+    seizureStablePercToTest = GeneralParams.seizureStablePercToTest
+    distanceBetweenSeizuresIndx = int(GeneralParams.distanceBetween2Seizures / SegSymbParams.slidWindStepSec)
+    numLabelsPerHour = 60 * 60 / SegSymbParams.slidWindStepSec
+    toleranceFP_bef = int(GeneralParams.toleranceFP_befSeiz / SegSymbParams.slidWindStepSec)
+    toleranceFP_aft = int(GeneralParams.toleranceFP_aftSeiz / SegSymbParams.slidWindStepSec)
+    for patIndx, pat in enumerate(GeneralParams.patients):
+        print('Patient: ' + pat)
+        filesAll = np.sort(glob.glob(folderIn + pat + '/Subj' + pat + '*TestPredictions.csv.gz'))
+
+        numFiles=len(filesAll)
+        for cv in range(len(filesAll)):
+            fileName2 = 'Subj' + pat + '_cv' + str(cv).zfill(3)
+            print(fileName2)
+            data= readDataFromFile(filesAll[cv])
+            dataSource_test = data[:, -1]
+            # indxs = np.where(dataSource_test == 1)[0]  # 0 is the first next seizure after the trained one
+            if cv==0:
+                trueLabels_AllCV = data[:, 0]
+                probabLabels_AllCV=data[:,1]
+                predLabels_AllCV=data[ :,2]
+                #predictionsSmoothed=data[ indxs,3:-1]
+                dataSource_AllCV= data[ :,3] #np.ones(len(indxs))*(cv+1)
+            else:
+                trueLabels_AllCV = np.hstack((trueLabels_AllCV, data[:, 0]))
+                probabLabels_AllCV = np.hstack((probabLabels_AllCV, data[: ,1]))
+                predLabels_AllCV=np.hstack((predLabels_AllCV,data[:,2]))
+                #predictionsSmoothed = np.vstack((predictionsSmoothed,data[indxs, 3:-1]))
+                dataSource_AllCV= np.hstack((dataSource_AllCV, data[ :,3]))#np.ones(len(indxs))*(cv+1)))
+
+
+        (performanceTest, yPredTest_MovAvrgStep1, yPredTest_MovAvrgStep2,yPredTest_SmoothBayes) = calculatePerformanceAfterVariousSmoothing(predLabels_AllCV, trueLabels_AllCV,probabLabels_AllCV,
+                                                                            toleranceFP_bef, toleranceFP_aft,
+                                                                            numLabelsPerHour,
+                                                                            seizureStableLenToTestIndx,
+                                                                            seizureStablePercToTest,
+                                                                            distanceBetweenSeizuresIndx,
+                                                                            GeneralParams.bayesWind,
+                                                                            GeneralParams.bayesProbThresh)
+        dataToSave = np.vstack((trueLabels_AllCV, probabLabels_AllCV, predLabels_AllCV, yPredTest_MovAvrgStep1,yPredTest_MovAvrgStep2, yPredTest_SmoothBayes, dataSource_AllCV)).transpose()  # added from which file is specific part of test set
+        outputName = folderOut + '/Subj' + pat + '_AppendedTest_Predictions.csv'
+        saveDataToFile(dataToSave, outputName, 'gzip')
+
+        AllSubjDiffPerf_test[patIndx, :] =performanceTest
+
+        #plot predictions in time
+        # Plot predictions in time
+        fig1 = plt.figure(figsize=(16, 8), constrained_layout=False)
+        gs = GridSpec(5, 1, figure=fig1)
+        fig1.subplots_adjust(wspace=0.2, hspace=0.2)
+        xValues = np.arange(0, len(trueLabels_AllCV), 1)*SegSymbParams.slidWindStepSec/3600
+        ax1 = fig1.add_subplot(gs[0, 0])
+        ax1.plot(xValues, predLabels_AllCV, 'k')
+        ax1.set_ylabel('NoSmooth')
+        ax1.set_title('Subj' + pat)
+        ax1.grid()
+        ax1 = fig1.add_subplot(gs[1, 0])
+        ax1.plot(xValues, yPredTest_MovAvrgStep1 * 0.8, 'b')
+        ax1.plot(xValues, yPredTest_MovAvrgStep2, 'c')
+        ax1.set_ylabel('Step1&2')
+        ax1.grid()
+        ax1 = fig1.add_subplot(gs[2, 0])
+        ax1.plot(xValues, yPredTest_SmoothBayes, 'm')
+        ax1.set_ylabel('Bayes')
+        ax1.grid()
+        ax1 = fig1.add_subplot(gs[3, 0])
+        ax1.plot(xValues, trueLabels_AllCV, 'r')
+        ax1.set_ylabel('TrueLabel')
+        ax1.grid()
+        ax1 = fig1.add_subplot(gs[4, 0])
+        ax1.plot(xValues, dataSource_AllCV, 'k')
+        ax1.set_ylabel('FileNr')
+        ax1.grid()
+        ax1.set_xlabel('Time (h)')
+        fig1.show()
+        fig1.savefig(folderOut + 'Subj' + pat + '_Appended_TestPredictions.png', bbox_inches='tight')
+        plt.close(fig1)
+
+        # ------------------------------------------------------------------------------------------------
+        font_size=11
+        fig1 = plt.figure(figsize=(6, 3), constrained_layout=False)
+        gs = GridSpec(2, 1, figure=fig1)
+        fig1.subplots_adjust(wspace=0.2, hspace=0.2)
+
+        xValues = np.arange(0, len(trueLabels_AllCV), 1)*SegSymbParams.slidWindStepSec/3600
+        ax1 = fig1.add_subplot(gs[0, 0])
+        ax1.plot(xValues, trueLabels_AllCV, color='g', alpha=0.75, linewidth=1)
+        ax1.set_ylabel('Label', fontsize=font_size)
+        # plt.yticks([0, 1], fontsize=font_size)
+        ax1.grid(axis='x', alpha=0.5, linestyle='--', linewidth=1)
+        ax1.spines['top'].set_visible(False)
+        ax1.spines['right'].set_visible(False)
+        ax1.spines['left'].set_visible(False)
+        ax1.spines['bottom'].set_visible(False)
+        ax1.set_xlim(left=0, right=np.max(xValues))  
+        # ax1.set_ylim(0, 1)
+        # ax1.legend('Predicted',fontsize=font_size-1) 
+        plt.tick_params(
+                        axis='both',          # changes apply to the x-axis
+                        which='both',      # both major and minor ticks are affected
+                        bottom=False,      # ticks along the bottom edge are off
+                        left=False,
+                        labelbottom=False, # labels along the bottom edge are off
+                        labelleft=False)
+
+        ax1 = fig1.add_subplot(gs[1, 0])
+        ax1.plot(xValues, yPredTest_SmoothBayes, color='r', alpha=0.75, linewidth=1)
+        ax1.set_ylabel('Predicted', fontsize=font_size)
+        
+        plt.yticks([0, 1], fontsize=font_size)
+        ax1.grid(axis='x', alpha=0.5, linestyle='--', linewidth=1)
+        ax1.spines['top'].set_visible(False)
+        ax1.spines['right'].set_visible(False)
+        ax1.spines['left'].set_visible(False)
+        # ax1.spines['bottom'].set_visible(False)
+        ax1.set_xlim(left=0, right=np.max(xValues))  
+        # ax1.set_ylim(0, 1)
+
+        plt.tick_params(
+                        axis='y',          # changes apply to the x-axis
+                        which='both',      # both major and minor ticks are affected
+                        left=False,      # ticks along the bottom edge are off
+                        labelleft=False) # labels along the bottom edge are off
+        # ax1.legend('Label',fontsize=font_size-1)
+        ax1.set_xlabel('Time (h)', fontsize=font_size)
+        # plt.xticks(xValues, xValues, fontsize=font_size)
+
+        fig1.show()
+
+        plt.savefig(folderOut+ dataset + '_Subj' + pat + '_Appended_TestPredictions_onlyBayes.pdf', bbox_inches='tight', dpi=200, format='pdf')
+        # fig1.savefig(folderOut + '/Subj' + pat + '_Appended_TestPredictions_onlyBayes.png', bbox_inches='tight')
+        plt.close(fig1)
+
+    # SAVE PERFORMANCE MEASURES FOR ALL SUBJ
+    outputName = folderOut + 'AllSubj_AppendedTest_AllPerfMeasures.csv'
+    saveDataToFile(AllSubjDiffPerf_test, outputName, 'gzip')
+
+
+
+
+def plotRearangedDataLabelsInTime(dataset, folderIn,  GeneralParams,SegSymbParams):
+    folderOut=folderIn +'/LabelsInTime/'
+    createFolderIfNotExists(folderOut)
+
+    print('Printing labels for TSCV reorganized files')
+
+    seizureStableLenToTestIndx = int(GeneralParams.seizureStableLenToTest / SegSymbParams.slidWindStepSec)
+    seizureStablePercToTest = GeneralParams.seizureStablePercToTest
+    distanceBetweenSeizuresIndx = int(GeneralParams.distanceBetween2Seizures / SegSymbParams.slidWindStepSec)
+    numLabelsPerHour = 60 * 60 / SegSymbParams.slidWindStepSec
+    toleranceFP_bef = int(GeneralParams.toleranceFP_befSeiz / SegSymbParams.slidWindStepSec)
+    toleranceFP_aft = int(GeneralParams.toleranceFP_aftSeiz / SegSymbParams.slidWindStepSec)
+
+    # CALCULATING SEIZURE INFORMATION FOR TABLE FOR PAPER
+    numSubj=len(GeneralParams.patients)
+    totalDataLenInHours=np.zeros((numSubj))
+    testDataLenInHours=np.zeros((numSubj))
+    firstFileDataLenInHours=np.zeros((numSubj))
+    totalNumSeiz=np.zeros((numSubj))
+    testNumSeiz=np.zeros((numSubj))
+
+    for patIndx, pat in enumerate(GeneralParams.patients):
+        if dataset=='01_SWEC':
+            allFiles=np.sort(glob.glob(folderIn + 'ID'+ pat +'/ID' + pat + '*_Labels*'))
+        if dataset=='02_CHB':
+            allFiles = np.sort(glob.glob(folderIn +'chb' + pat +'/chb' + pat +'*_Labels*'))
+        numFiles = len(allFiles)
+
+        print('Pat = ', pat)
+        #concatinatin predictions so that predictions for one seizure are based on train set with all seizures before it
+        for fIndx, fileName in enumerate(allFiles):
+            data=readDataFromFile(fileName)
+            if fIndx==0:
+                labels = np.squeeze(data[:,-1])
+                testIndx=np.ones(len(data[:,-1]))*(fIndx+1)
+                firstFileDataLenInHours[patIndx]= len(labels)/(2*60*60)
+                startIndx=np.where(np.diff(labels)==1)[0]
+                firstFileNumSeiz=len(startIndx)
+            else:
+                labels = np.hstack((labels,  np.squeeze(data[:,-1])))
+                testIndx = np.hstack((testIndx, np.ones(len(data[:,-1]))*(fIndx+1)))
+
+        #Information for paper table
+        totalDataLenInHours[patIndx] = len(labels)/(2*60*60)        
+        startIndx=np.where(np.diff(labels)==1)[0]
+        totalNumSeiz[patIndx]=len(startIndx)
+        testNumSeiz[patIndx]=totalNumSeiz[patIndx]-firstFileNumSeiz
+
+        #Plot predictions in time
+        fig1 = plt.figure(figsize=(16, 8), constrained_layout=False)
+        gs = GridSpec(2, 1, figure=fig1)
+        fig1.subplots_adjust(wspace=0.2, hspace=0.2)
+        xValues = np.arange(0, len(labels), 1) / (60*60*2)
+        ax1 = fig1.add_subplot(gs[0,0])
+        ax1.plot(xValues, labels , 'r')
+        ax1.set_ylabel('TrueLabel')
+        ax1.set_title('Subj'+pat)
+        ax1.grid()
+
+        ax1 = fig1.add_subplot(gs[1, 0])
+        ax1.plot(xValues, testIndx , 'k')
+        ax1.set_ylabel('FileNr')
+        ax1.grid()
+        ax1.set_xlabel('Time [h]')
+        fig1.show()
+        fig1.savefig(folderOut + 'Subj' + pat + '_RawLabels.png', bbox_inches='tight')
+        plt.close(fig1)
+
+    testDataLenInHours = totalDataLenInHours - firstFileDataLenInHours
+    dataToSave=np.vstack(( totalDataLenInHours, testDataLenInHours, totalNumSeiz, testNumSeiz  ))
+    dataToSave=(dataToSave*10000).astype(int) /10000
+    
+    outputName = folderOut  +  '/SeizureInfo_ForPaperTable'
+    saveDataToFile(dataToSave, outputName, 'csv')
+
+
+#callback for the apply_async process paralization
+def collect_result(result): 
+    global number_free_cores
+    global n_cores_semaphore
+
+    while n_cores_semaphore==0: #block callback in case of multiple accesses
+        pass
+
+    if n_cores_semaphore:
+        n_cores_semaphore=0
+        number_free_cores = number_free_cores+1
+        n_cores_semaphore=1
+
+if __name__ == "__main__":
+    pass
+
diff --git a/ZeroCrossLibrary.py b/ZeroCrossLibrary.py
new file mode 100644
index 0000000..b514314
--- /dev/null
+++ b/ZeroCrossLibrary.py
@@ -0,0 +1,375 @@
+#!~/anaconda3/bin python
+
+'''script that includes all functions related to the use of the AZC feature'''
+
+__authors__ = "Una Pale, Renato Zanetti, and Tomas Teijeiro"
+__license__ = "LGPL"
+__version__ = "1.0"
+__email__ = "{una.pale, renato.zaneti} at epfl.ch"
+
+
+import os
+import glob
+# import csv
+import matplotlib
+matplotlib.use('Agg') #to do not show figures
+from scipy.io import loadmat
+from scipy.signal import decimate
+from VariousFunctionsLib import *
+
+########################################################
+#global variables for multi-core operation
+files_processed=[]
+n_files_per_patient=[]
+number_free_cores = 0
+number_free_cores_pat = 0
+n_cores_semaphore = 1
+
+
+###########################################################################
+##defining functions needed
+def polygonal_approx(arr, epsilon):
+    """
+    Performs an optimized version of the Ramer-Douglas-Peucker algorithm assuming as an input
+    an array of single values, considered consecutive points, and **taking into account only the
+    vertical distances**.
+    """
+    def max_vdist(arr, first, last):
+        """
+        Obtains the distance and the index of the point in *arr* with maximum vertical distance to
+        the line delimited by the first and last indices. Returns a tuple (dist, index).
+        """
+        if first == last:
+            return (0.0, first)
+        frg = arr[first:last+1]
+        leng = last-first+1
+        dist = np.abs(frg - np.interp(np.arange(leng),[0, leng-1], [frg[0], frg[-1]]))
+        idx = np.argmax(dist)
+        return (dist[idx], first+idx)
+
+    if epsilon <= 0.0:
+        raise ValueError('Epsilon must be > 0.0')
+    if len(arr) < 3:
+        return arr
+    result = set()
+    stack = [(0, len(arr) - 1)]
+    while stack:
+        first, last = stack.pop()
+        max_dist, idx = max_vdist(arr, first, last)
+        if max_dist > epsilon:
+            stack.extend([(first, idx),(idx, last)])
+        else:
+            result.update((first, last))
+    return np.array(sorted(result))
+
+def zero_crossings(arr):
+    """Returns the positions of zero-crossings in the derivative of an array, as a binary vector"""
+    return np.diff(np.sign(np.diff(arr))) != 0
+
+
+def calculateMovingAvrgMeanWithUndersampling_v2(data, winLen, winStep):
+    lenSig=len(data)
+    index = np.arange(0, lenSig - winLen, winStep)
+
+    segmData = np.zeros(len(index))
+    for i in range(len(index)): #-1
+        x = data[index[i]:index[i] + winLen]
+        segmData[i]=np.mean(x)
+    return(segmData)
+
+
+
+def calculateFeaturesPerEachFile_CLFAndAZCFeatures_CHB(parallelize, perc_cores, EPS_thresh_arr,folderIn, folderOutFeatures, patients,  SigInfoParams, ZeroCrossFeatureParams, calculateOtherFeat, calculateZCFeat):
+    global files_processed
+    global n_files_per_patient
+    global number_free_cores
+    
+    print('Extracting features from all subjects files')
+    
+    numFeat = len(EPS_thresh_arr) + 1
+    
+    if parallelize:
+        n_cores  = mp.cpu_count()
+        n_cores = ceil(n_cores*perc_cores)
+        print('Number of cores: ' + str(n_cores))
+
+        pool = mp.Pool(n_cores)
+        number_free_cores = n_cores
+        
+    files_processed=np.zeros(len(patients))
+    n_files_per_patient = np.zeros(len(patients))
+        
+    # go through all patients
+    for patIndx, pat in enumerate(patients):
+        filesIn=np.sort(glob.glob(folderIn + 'chb' + pat+ '/chb' + pat + '*.csv.gz')) #only the data files
+        numFiles=len(filesIn)
+        n_files_per_patient[patIndx]=numFiles
+
+        print('Pat: ' + str(pat) + ' Nfiles: ' + str(numFiles))
+
+        folderOutFeaturesSub = folderOutFeatures + 'chb' + pat + '/'
+
+        if not os.path.exists(folderOutFeaturesSub):
+            try:
+                os.mkdir(folderOutFeaturesSub)
+            except OSError:
+                print("Creation of the directory %s failed" % folderOutFeaturesSub)
+
+        for fileIndx, file in enumerate(filesIn):
+            pom, fileName1 = os.path.split(file)
+            fileName2 = fileName1[:-7]
+            print('File: '+ fileName2 + '  NFilesprocessed: '+ str(files_processed[patIndx]) + '  Out of: ' + str(n_files_per_patient[patIndx]))
+            fileName3 = fileName2.split('_')
+            file_number_str = fileName3[1].zfill(3) #for file-reading proper ordering
+
+            if '+' in fileName2 and pat=='02': #chb02_16+    
+                file_number_str = '0'+ file_number_str
+
+            numFeat = len(EPS_thresh_arr) + 1
+            
+            # reading data
+            data = readDataFromFile(file)
+            # separating to data and labels
+            X = data[:, SigInfoParams.chToKeep]
+            y = data[:, -1]
+            (lenData, numCh) = X.shape
+            labels = y[0:lenData - 2]
+
+            index = np.arange(0, lenData - int(ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winLen),
+                              int(ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winStep))
+            
+            labelsSegm = calculateMovingAvrgMeanWithUndersampling_v2(labels, int(
+                ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winLen), int(
+                ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winStep))
+            labelsSegm = (labelsSegm > 0.5) * 1
+            
+
+            if parallelize:
+                pool.apply_async(readFileCalculateFeatures, args=(patIndx, data, labelsSegm, index, numCh, numFeat, ZeroCrossFeatureParams, EPS_thresh_arr, calculateOtherFeat, calculateZCFeat, folderOutFeaturesSub, fileName3, file_number_str), callback=collect_result) 
+                number_free_cores = number_free_cores -1
+                if number_free_cores==0:
+                    while number_free_cores==0: #synced in the callback
+                        time.sleep(0.1)
+                        pass
+            else:
+                readFileCalculateFeatures(patIndx, data, labelsSegm, index, numCh, numFeat, ZeroCrossFeatureParams, EPS_thresh_arr, calculateOtherFeat, calculateZCFeat, folderOutFeaturesSub, fileName3, file_number_str)
+
+    if parallelize:
+        while number_free_cores < n_cores: #wait till all subjects have their data processed
+            time.sleep(0.1)
+            pass
+        pool.close()
+        pool.join()      
+
+
+
+
+def calculateFeaturesPerEachFile_CLFAndAZCFeatures_SWEC(parallelize, perc_cores, EPS_thresh_arr,folderIn, folderOutFeatures, patients,  SigInfoParams, ZeroCrossFeatureParams, calculateOtherFeat, calculateZCFeat):
+    global files_processed
+    global n_files_per_patient
+    global number_free_cores
+    
+    print('Extracting features from all subjects files')
+
+    numFeat = len(EPS_thresh_arr) + 1
+    
+    diff_files_seiz=0
+
+    if parallelize:
+        n_cores  = mp.cpu_count()
+        n_cores = ceil(n_cores*perc_cores)
+        print('Number of cores: ' + str(n_cores))
+
+        pool = mp.Pool(n_cores)
+        number_free_cores = n_cores
+        
+    files_processed=np.zeros(len(patients))
+    n_files_per_patient = np.zeros(len(patients))
+        
+    # go through all patients
+    for patIndx, pat in enumerate(patients):
+        filesIn=np.sort(glob.glob(folderIn + 'ID' + pat + '/'+ 'ID' + pat + '*h.mat')) #only the data files
+        numFiles=len(filesIn)
+        n_files_per_patient[patIndx]=numFiles
+
+        print('Pat: ' + str(pat) + ' Nfiles: ' + str(numFiles))
+
+        f = loadmat(folderIn + 'ID' + pat + '/'+ 'ID' + pat +'_info.mat', simplify_cells=True)
+        seizure_begin = np.array(f['seizure_begin']) #given in seconds
+        seizure_end = np.array(f['seizure_end'])
+        fs = np.array(f['fs']).astype(int)
+
+        file_index = (seizure_begin/3600 + 1).astype(int) #sum 1 to fit calculate index to files indexes
+        print('file_index: ' + str(file_index))
+        seiz_count=0
+
+        folderOutFeaturesSub = folderOutFeatures + 'ID' + pat + '/'
+
+        if not os.path.exists(folderOutFeaturesSub):
+            try:
+                os.mkdir(folderOutFeaturesSub)
+            except OSError:
+                print("Creation of the directory %s failed" % folderOutFeaturesSub)
+
+        for fileIndx, file in enumerate(filesIn):
+            pom, fileName1 = os.path.split(file)
+            fileName2 = os.path.splitext(fileName1)[0]
+            print('File: '+ fileName2 + '  NFilesprocessed: '+ str(files_processed[patIndx]) + '  Out of: ' + str(n_files_per_patient[patIndx]))
+            fileName3 = fileName2.split('_')
+            file_number_str = fileName3[1][:-1]
+            file_number = int(file_number_str)
+
+            file_number_str = file_number_str.zfill(3) #for file-reading proper ordering
+
+            numFeat = len(EPS_thresh_arr) + 1
+            
+            # reading data
+            reader = loadmat(file, simplify_cells=True)
+            data0 = np.array(reader['EEG']).astype("float").T
+
+            #
+            data = decimate(data0, int(fs/ZeroCrossFeatureParams.samplFreq), axis=0, zero_phase=True)
+
+            (lenData, numCh) = data.shape
+            y=np.zeros((lenData,))
+
+            if diff_files_seiz: #previous file had a seizure which spams till the current file 
+                y[old_beg:old_end]=1
+                diff_files_seiz=0
+
+            idx_seiz = np.where(file_index==file_number)[0]
+            for i, v in enumerate(idx_seiz):
+                print('Seizure found at file: ' + str(file_index[v]))
+                beg = int((seizure_begin[v]%3600*fs)*(ZeroCrossFeatureParams.samplFreq/fs))
+                end = int((seizure_end[v]%3600*fs)*(ZeroCrossFeatureParams.samplFreq/fs))
+
+                if end < beg: #seizures in different files
+                    diff_files_seiz = 1
+                    old_end = end
+                    old_beg = 0
+                    end = int((3600*fs)*(ZeroCrossFeatureParams.samplFreq/fs))
+
+                y[beg:end]=1
+
+
+            labels = y[0:lenData - 2]
+            # windowing the data: 'index' contains start:stop indexes
+            index = np.arange(0, lenData - int(ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winLen),
+                                int(ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winStep))
+
+            #'labels' contains one value per datapoint. 'labelsSegm' contains one value per window (> 50% ictal data --> ictal label)
+            labelsSegm = calculateMovingAvrgMeanWithUndersampling_v2(labels, int(
+                ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winLen), int(
+                ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winStep))
+            labelsSegm = (labelsSegm > 0.5) * 1
+
+            if parallelize:
+                pool.apply_async(readFileCalculateFeatures, args=(patIndx, data, labelsSegm, index, numCh, numFeat, ZeroCrossFeatureParams, EPS_thresh_arr, calculateOtherFeat, calculateZCFeat, folderOutFeaturesSub, fileName3, file_number_str), callback=collect_result) 
+                number_free_cores = number_free_cores -1
+                if number_free_cores==0:
+                    while number_free_cores==0: #synced in the callback 'collect_result'
+                        time.sleep(0.1)
+                        pass
+            else:
+                readFileCalculateFeatures(patIndx, data, labelsSegm, index, numCh, numFeat, ZeroCrossFeatureParams, EPS_thresh_arr, calculateOtherFeat, calculateZCFeat, folderOutFeaturesSub, fileName3, file_number_str)
+
+    if parallelize:
+        while number_free_cores < n_cores: #wait till all subjects have their data processed
+            time.sleep(0.1)
+            pass
+        
+        pool.close()
+        pool.join()           
+            
+
+
+def readFileCalculateFeatures(patIndx, data, labelsSegm, index, numCh, numFeat, ZeroCrossFeatureParams, EPS_thresh_arr, calculateOtherFeat, calculateZCFeat, folderOutFeaturesSub, fileName3, file_number_str):
+
+    sos = signal.butter(4, [1, 20], 'bandpass', fs=ZeroCrossFeatureParams.samplFreq, output='sos')
+    allsigFilt = signal.sosfiltfilt(sos, data, axis=0)
+    del data
+    zeroCrossStandard = np.zeros((len(index), numCh))
+    zeroCrossApprox = np.zeros((len(index), numCh))
+    zeroCrossFeaturesAll = np.zeros((len(index), numFeat * numCh))
+    for ch in range(numCh):
+           
+        # t=time.time()
+        sigFilt=allsigFilt[:,ch]
+        # calculate CLF
+        if (calculateOtherFeat==1):
+            featOther = calculateOtherMLfeatures_oneCh(np.copy(sigFilt), ZeroCrossFeatureParams)
+            if (ch == 0):
+                AllFeatures = featOther
+            else:
+                AllFeatures = np.hstack((AllFeatures, featOther))
+
+        #AZC
+        if (calculateZCFeat==1):
+            # Zero-crossing of the original signal, counted in 1-second continuous sliding window
+            # zeroCrossStandard[:,ch] = np.convolve(zero_crossings(sigFilt), np.ones(ZeroCrossFeatures.samplFreq), mode='same')
+            x = np.convolve(zero_crossings(sigFilt), np.ones(ZeroCrossFeatureParams.samplFreq), mode='same')
+            # zeroCrossStandard[:,ch] =calculateMovingAvrgMeanWithUndersampling_v2(x, ZeroCrossFeatureParams.samplFreq)
+            zeroCrossStandard[:, ch] = calculateMovingAvrgMeanWithUndersampling_v2(x, int(
+                ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winLen), int(
+                ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winStep))
+            zeroCrossFeaturesAll[:, numFeat * ch] = zeroCrossStandard[:, ch]
+
+
+            for EPSthrIndx, EPSthr in enumerate(EPS_thresh_arr):
+                # Signal simplification at the given threshold, and zero crossing count in the same way
+                sigApprox = polygonal_approx(sigFilt, epsilon=EPSthr)
+                # axs[0].plot(sigApprox, sigFilt[sigApprox], alpha=0.6)
+                sigApproxInterp = np.interp(np.arange(len(sigFilt)), sigApprox, sigFilt[sigApprox])
+                # zeroCrossApprox[:,ch] = np.convolve(zero_crossings(sigApproxInterp), np.ones(ZeroCrossFeatures.samplFreq), mode='same')
+                x = np.convolve(zero_crossings(sigApproxInterp), np.ones(ZeroCrossFeatureParams.samplFreq),
+                                mode='same')
+                # zeroCrossApprox[:, ch] =  calculateMovingAvrgMeanWithUndersampling_v2(x, ZeroCrossFeatureParams.samplFreq)
+                zeroCrossApprox[:, ch] = calculateMovingAvrgMeanWithUndersampling_v2(x, int(
+                    ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winLen), int(
+                    ZeroCrossFeatureParams.samplFreq * ZeroCrossFeatureParams.winStep))
+                # save to matrix with all features that will be saved
+                zeroCrossFeaturesAll[:, numFeat * ch + EPSthrIndx + 1] = zeroCrossApprox[:, ch]
+
+    if (calculateZCFeat == 1):
+        # save for this file Zero cross features
+        df = pd.DataFrame(data=zeroCrossFeaturesAll)
+        outputName = folderOutFeaturesSub + fileName3[0]+ '_' + file_number_str+ 'h_ZCFeat.csv.gz'
+        #to read from disk: df = pd.read_csv('dfsavename.csv.gz', compression='gzip')
+        df.to_csv(outputName, index=False, compression='gzip') 
+        # np.savetxt(outputName, zeroCrossFeaturesAll, delimiter=",")
+
+    if (calculateOtherFeat == 1):
+        # save for this file all other features
+        df = pd.DataFrame(data=AllFeatures)
+        outputName = folderOutFeaturesSub + fileName3[0]+ '_' + file_number_str+ 'h_OtherFeat.csv.gz'        
+        df.to_csv(outputName, index=False, compression='gzip')
+        # np.savetxt(outputName, AllFeatures, delimiter=",")
+
+    # save for this file labels
+    df = pd.DataFrame(data=labelsSegm)
+    outputName = folderOutFeaturesSub + fileName3[0]+ '_' + file_number_str+ 'h_Labels.csv.gz'
+    df.to_csv(outputName, index=False, compression='gzip')
+    # np.savetxt(outputName, labelsSegm, delimiter=",")
+
+    return (patIndx)
+
+
+
+#callback for the apply_async process paralization
+def collect_result(result): 
+    global files_processed
+    global number_free_cores
+    global n_cores_semaphore
+    global n_files_per_patient
+
+    while n_cores_semaphore==0: #block callback in case of multiple accesses
+        pass
+
+    if n_cores_semaphore:
+        n_cores_semaphore=0
+        files_processed[result] += 1
+        number_free_cores = number_free_cores+1
+        n_cores_semaphore=1
+
+if __name__ == "__main__":
+    pass
diff --git a/bash_run_python_variosPat.sh b/bash_run_python_variosPat.sh
new file mode 100644
index 0000000..1eefcba
--- /dev/null
+++ b/bash_run_python_variosPat.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+for i in {1..18} #! e.g. of use: 1, {1,5,9,11,24}, and {1..18}.
+do
+	python script_AZC_Classification.py $i &
+done
\ No newline at end of file
diff --git a/parametersSetup.py b/parametersSetup.py
new file mode 100644
index 0000000..69e94b6
--- /dev/null
+++ b/parametersSetup.py
@@ -0,0 +1,86 @@
+#!~/anaconda3/bin python
+
+__author__ = "Una Pale"
+__credits__ = ["Una Pale", "Renato Zanetti"]
+__license__ = "LGPL"
+__version__ = "1.0"
+__email__ = "una.pale at epfl.ch"
+
+''' file with all parameters'''
+import numpy as np
+import pickle
+
+class GeneralParams:
+    #Label smoothing with moving average
+    seizureStableLenToTest=5 #in seconds  - window for performing label voting
+    seizureStablePercToTest=0.5 # 50% of 1 in last seizureStableLenToTest values that needs to be 1 to finally keep label 1
+    distanceBetween2Seizures=30 #in seconds - if seizures are closer then this then they are merged
+    timeBeforeSeizureConsideredAsSeizure=30 #in seconds - if seizure starts bit before true seizure to still consider ok
+    numFPperDayThr=1 #for additional measure of performance what number of FP seizures per days we consider ok
+    #Label smoothing with bayes
+    bayesWind=10 #orignal paper uses 5 windows
+    bayesProbThresh= 1.5 #smoothing with cummulative probabilities, threshold from Valentins paper
+    #tolerance for FP before and after seizure not to considered as FP
+    toleranceFP_befSeiz=10 #in sec
+    toleranceFP_aftSeiz=30 #in sec
+
+    patients=[]  #on which subjects to train and test
+    plottingON=0  #determines whether some additional plots are plotted
+    PersGenApproach='personalized' #'personalized', 'generalized' approaches - generalized not used here
+
+class SigInfoParams:
+    #CHB-MIT: common channels among all subjects
+    channels = ['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1',
+                'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2',  'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2', 'FZ-CZ', 'CZ-PZ']
+    chToKeep=np.array([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17])  #which channels to keep - after reordering them in above order
+    samplFreq=256 #sampling frequency of data
+
+class SegSymbParams:
+    # window discretization
+    segLenSec=4 #length of discrete EEG windows on which to perform analysis
+    slidWindStepSec=0.5 #step of sliding window
+    labelVotingType='majority' #'majority', 'atLeastOne' or 'allOne' #defines how final label of a segment is chosen
+
+class StandardMLParams:
+    modelType='DecisionTree' #'KNN', 'SVM', 'DecisionTree', 'RandomForest','BaggingClassifier','AdaBoost'
+    trainingDataResampling='NoResampling' #'NoResampling','ROS','RUS','TomekLinks','ClusterCentroids','SMOTE','SMOTEtomek'
+    samplingStrategy='default' # depends on resampling, but if 'default' then default for each resampling type, otherwise now implemented only for RUS if not default
+    #KNN parameters
+    KNN_n_neighbors=5
+    KNN_metric='euclidean' #'euclidean', 'manhattan'
+    #SVM parameters
+    SVM_kernel = 'linear'  # 'linear', 'rbf','poly'
+    SVM_C = 1  # 1,100,1000
+    SVM_gamma = 'auto' # 0  # 0,10,100
+    #DecisionTree and random forest parameters
+    DecisionTree_criterion = 'gini'  # 'gini', 'entropy'
+    DecisionTree_splitter = 'best'  # 'best','random'
+    DecisionTree_max_depth = 0  # 0, 2, 5,10,20
+    RandomForest_n_estimators = 100 #10,50, 100,250
+    #Bagging, boosting classifier parameters
+    Bagging_base_estimator='SVM' #'SVM','KNN', 'DecisionTree'
+    Bagging_n_estimators = 100  # 10,50, 100,250
+    n_jobs = 1
+
+
+class FeaturesParams:
+    numStandardFeat=62  #56 features from Sopic2018 + 6 AZC features
+    featNorm='noNorm' #'Norm','noNorm' #feature normalization flag
+
+
+class ZeroCrossFeatureParams:
+    EPS_thresh_arr = [16, 32, 64, 128, 256] #thresholds for AZC calculation
+    buttFilt_order=4 #Butterworth filter order
+    buttFilt_lfreq = 1 #low freq
+    buttFilt_hfreq=20 #high freq
+    samplFreq=256 #expected sampling frequency for the input signal
+    winLen=4 # data window length in sec
+    winStep=0.5 # non-overlapping data in sec 
+
+
+#SAVING SETUP once again to update if new info
+with open('../PARAMETERS.pickle', 'wb') as f:
+    # pickle.dump([GeneralParams, SegSymbParams, SigInfoParams, EEGfreqBands, StandardMLParams, FeaturesParams, ZeroCrossFeatureParams, patients], f)
+    pickle.dump([GeneralParams, SegSymbParams, SigInfoParams, StandardMLParams, FeaturesParams,  ZeroCrossFeatureParams], f)
+
+    
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..b55fecb
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,27 @@
+This repository will include the code used for the paper entitled //AZC: A new feature for EEG and iEEG seizure detection]{Approximate Zero-Crossing: A new interpretable, highly discriminative and low complex feature for EEG and iEEG seizure detection//
+
+License: LGPL
+
+We have used Anaconda toolset, including the following packages (original and added later):
+
+	- Python 					(v3.9.7)
+	- numpy                     (v1.20.3)
+	- pandas                    (v1.3.5)
+	- scikit-learn              (v1.0.2)
+	- scipy                     (v1.7.3)
+	- seaborn                   (v0.11.2)
+	- matplotlib                (v3.5.0)
+	- pyedflib                  (v0.1.25)
+	- antropy					(v0.1.4)
+	- PyWavelets				(v1.1.1)
+
+Script descriptions:
+	1) `script_AZC_datasetPreProcessing.py`: **to be executed first**, including feature extraction and preparation for TSCV execution.
+	2) `script_AZC_FeatDivergence.py`: calculates the KL divergence and generate related plots.
+	3) `script_AZC_Classification.py`: used for seizure classification (TSCV approach). 
+	4) `script_AZC_ConsolidateResults.py`: consolidation of results and its plots.
+
+All scripts include few expected user setups before execution (e.g., the path of the folder containing the datasets), also observing the parameters set in `parametersSetup.py`.
+
+It's important to note that the scrpit `script_AZC_datasetPreProcessing.py` employs a pool of processing cores in case the variable **parallelize** is set. Similar behaviour can be achieved for `script_AZC_Classification.py` script, but in this case, you should launch the execution via **bash** using the `bash_run_python_variosPat.sh` script.
+
diff --git a/script_AZC_Classification.py b/script_AZC_Classification.py
new file mode 100644
index 0000000..e34a5a4
--- /dev/null
+++ b/script_AZC_Classification.py
@@ -0,0 +1,250 @@
+#!~/anaconda3/bin python
+
+__authors__ = "Una Pale and Renato Zanetti"
+__license__ = "LGPL"
+__version__ = "1.0"
+__email__ = "{una.pale, renato.zaneti} at epfl.ch"
+
+''' script that performs seizure detection based on the TSCV approach'''
+
+from VariousFunctionsLib import *
+from parametersSetup import *
+from PerformanceMetricsLib import *
+
+import sys
+import tracemalloc
+
+###################################
+# Initial setup
+################################### 
+
+Dataset='01_SWEC' #'01_SWEC', '02_CHB' #select the dataset to be processed
+Server = 1 #0 - local execution, 1- server execution
+bash = 1 #using bash to launch script execution: patient as input, hence we can parellelize in more cores
+datasetPreparationType='Filt_1to20Hz'  # 'Raw', 'Filt_1to20Hz'
+
+if bash: 
+    if int(str(sys.argv[1]))<10:
+        patients=['0'+str(sys.argv[1])]
+    else:
+        patients=[str(sys.argv[1])]
+
+# DEFINING INPUT/OUTPUT FOLDERS
+if Dataset=='01_SWEC':
+    if Server:
+        folderInData = '/shares/eslfiler1/databases/medical/swec-ethz/ieeg/long-term-dataset/'
+        os.nice(20) #set low priority level for longer-running process
+    else:
+        folderInData = '../iEEG/'
+    
+    if bash==0: 
+        patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']
+
+if Dataset=='02_CHB':
+    if Server:
+        folderInData = '/shares/eslfiler1/databases/medical/chb-mit/edf/' #expect all subject's files in the same directory
+        os.nice(20) #set low priority level for longer-running process
+    else:
+        folderInData = '../chbmit/'
+    
+    if bash==0:
+        patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24']
+
+#folder that store partial results
+if Server:
+    folderResults = '/shares/eslfiler1/scratch/ZeroCrossFeatEpilepsy/'
+else:
+    folderResults = '../AZC_results/'
+
+# ###########################################################################################
+#SETUPS
+GeneralParams.patients= patients
+GeneralParams.plottingON=0
+GeneralParams.PersGenApproach='personalized' #'personalized', 'generalized'
+StandardMLParams.modelType = 'RandomForest'
+FeaturesParams.numStandardFeat=62 #56 (Sopic2018) + 6 AZC
+
+###############################
+##  FEATURE SET
+#features subset for evaluation
+FeatSetNamesArray = ['StandardFeat'] #[ 'ZeroCross', 'StandardFeat']
+
+# ###############################
+# DEFINING INPUT/OUTPUT FOLDERS
+windowSize=60*60 #files would have 1h data
+folderOutTSCV=folderResults+Dataset+'/03_Features_' +datasetPreparationType+'_TSCVprep_'+str(windowSize) +'s_DivBig/'
+folderOutFeatures= folderResults+Dataset+'/02_Features_' +datasetPreparationType + '/'
+###############################################################################################################
+# RUNNING FOR ALL COMBINATIONS OF FEATURES THAT ARE DEFINED ABOVE
+
+featNamesAll=np.array(['meanAmpl', 'LineLength', 'samp_1_d7_1', 'samp_1_d6_1', 'samp_2_d7_1', 'samp_2_d6_1', 'perm_d7_3',
+             'perm_d7_5', 'perm_d7_7', 'perm_d6_3', 'perm_d6_5', 'perm_d6_7', 'perm_d5_3', 'perm_d5_5', 'perm_d5_7',
+             'perm_d4_3', 'perm_d4_5', 'perm_d4_7', 'perm_d3_3', 'perm_d3_5', 'perm_d3_7',
+             'shannon_en_sig', 'renyi_en_sig', 'tsallis_en_sig', 'shannon_en_d7', 'renyi_en_d7', 'tsallis_en_d7',
+             'shannon_en_d6', 'renyi_en_d6', 'tsallis_en_d6', 'shannon_en_d5', 'renyi_en_d5', 'tsallis_en_d5',
+             'shannon_en_d4', 'renyi_en_d4', 'tsallis_en_d4', 'shannon_en_d3', 'renyi_en_d3', 'tsallis_en_d3',
+             'p_dc_rel', 'p_mov_rel', 'p_delta_rel', 'p_theta_rel', 'p_alfa_rel', 'p_middle_rel', 'p_beta_rel',
+             'p_gamma_rel', 'p_dc', 'p_mov', 'p_delta', 'p_theta', 'p_alfa', 'p_middle', 'p_beta', 'p_gamma', 'p_tot',
+             'ZC_NoApprox', 'ZC_approx_16', 'ZC_approx_32', 'ZC_approx_64', 'ZC_approx_128', 'ZC_approx_256'])
+
+for fs in range( len(FeatSetNamesArray)):
+    FeatureSetName=FeatSetNamesArray[fs]
+
+    if FeatureSetName=='All':
+        inportCLF = 1
+        inportAZC = 1
+        featNames = featNamesAll
+    elif FeatureSetName=='StandardFeat':
+        inportCLF = 1
+        inportAZC = 0
+        featNames = np.array(['meanAmpl', 'LineLength', 'samp_1_d7_1', 'samp_1_d6_1', 'samp_2_d7_1', 'samp_2_d6_1', 'perm_d7_3',
+             'perm_d7_5', 'perm_d7_7', 'perm_d6_3', 'perm_d6_5', 'perm_d6_7', 'perm_d5_3', 'perm_d5_5', 'perm_d5_7',
+             'perm_d4_3', 'perm_d4_5', 'perm_d4_7', 'perm_d3_3', 'perm_d3_5', 'perm_d3_7',
+             'shannon_en_sig', 'renyi_en_sig', 'tsallis_en_sig', 'shannon_en_d7', 'renyi_en_d7', 'tsallis_en_d7',
+             'shannon_en_d6', 'renyi_en_d6', 'tsallis_en_d6', 'shannon_en_d5', 'renyi_en_d5', 'tsallis_en_d5',
+             'shannon_en_d4', 'renyi_en_d4', 'tsallis_en_d4', 'shannon_en_d3', 'renyi_en_d3', 'tsallis_en_d3',
+             'p_dc_rel', 'p_mov_rel', 'p_delta_rel', 'p_theta_rel', 'p_alfa_rel', 'p_middle_rel', 'p_beta_rel',
+             'p_gamma_rel', 'p_dc', 'p_mov', 'p_delta', 'p_theta', 'p_alfa', 'p_middle', 'p_beta', 'p_gamma', 'p_tot'])
+
+    elif FeatureSetName == 'ZeroCross':
+        inportCLF = 0
+        inportAZC = 1
+        featNames = np.array(['ZC_NoApprox', 'ZC_approx_16', 'ZC_approx_32', 'ZC_approx_64', 'ZC_approx_128', 'ZC_approx_256'])
+    else:
+        inportCLF = 1
+        inportAZC = 1
+        featNames = np.array(
+            ['meanAmpl', 'LineLength', 'samp_1_d7_1', 'samp_1_d6_1', 'samp_2_d7_1', 'samp_2_d6_1', 'perm_d7_3',
+             'perm_d7_5', 'perm_d7_7', 'perm_d6_3', 'perm_d6_5', 'perm_d6_7', 'perm_d5_3', 'perm_d5_5', 'perm_d5_7',
+             'perm_d4_3', 'perm_d4_5', 'perm_d4_7', 'perm_d3_3', 'perm_d3_5', 'perm_d3_7',
+             'shannon_en_sig', 'renyi_en_sig', 'tsallis_en_sig', 'shannon_en_d7', 'renyi_en_d7', 'tsallis_en_d7',
+             'shannon_en_d6', 'renyi_en_d6', 'tsallis_en_d6', 'shannon_en_d5', 'renyi_en_d5', 'tsallis_en_d5',
+             'shannon_en_d4', 'renyi_en_d4', 'tsallis_en_d4', 'shannon_en_d3', 'renyi_en_d3', 'tsallis_en_d3',
+             'p_dc_rel', 'p_mov_rel', 'p_delta_rel', 'p_theta_rel', 'p_alfa_rel', 'p_middle_rel', 'p_beta_rel',
+             'p_gamma_rel', 'p_dc', 'p_mov', 'p_delta', 'p_theta', 'p_alfa', 'p_middle', 'p_beta', 'p_gamma', 'p_tot',
+             'ZC_NoApprox', 'ZC_approx_16', 'ZC_approx_32', 'ZC_approx_64', 'ZC_approx_128', 'ZC_approx_256'])
+
+
+    #creating output folder
+    folderOut = folderResults + Dataset + '/05_RF_TSCV/'
+    createFolderIfNotExists(folderOut)
+    folderOut = folderOut + datasetPreparationType +FeatureSetName +'/'
+    createFolderIfNotExists(folderOut)
+
+    ############################################################################################
+    seizureStableLenToTestIndx = int(GeneralParams.seizureStableLenToTest / SegSymbParams.slidWindStepSec)
+    seizureStablePercToTest = GeneralParams.seizureStablePercToTest
+    distanceBetweenSeizuresIndx = int(GeneralParams.distanceBetween2Seizures / SegSymbParams.slidWindStepSec)
+    numLabelsPerHour = 60 * 60 / SegSymbParams.slidWindStepSec
+    toleranceFP_bef = int(GeneralParams.toleranceFP_befSeiz / SegSymbParams.slidWindStepSec)
+    toleranceFP_aft = int(GeneralParams.toleranceFP_aftSeiz / SegSymbParams.slidWindStepSec)
+    tracemalloc.start()
+    for patIndx, pat in enumerate(GeneralParams.patients):
+        print('Patient:', pat)
+        #load all files only once and mark where each file starts
+
+        if Dataset=='01_SWEC':
+            filesAll=np.sort(glob.glob(folderOutFeatures + 'ID' + pat + '/ID' + pat + '*Labels.csv.gz'))
+            labelsAll = concatenateDataFromFiles(filesAll).reshape(-1)
+            nrows = np.size(labelsAll)
+
+            if inportCLF:
+                filesAll=np.sort(glob.glob(folderOutFeatures + 'ID' + pat + '/ID' + pat + '*OtherFeat.csv.gz'))
+                OtherFeat = concatenateDataFromFiles_AllocateFirst(filesAll, nrows)
+
+            if inportAZC:
+                filesAll=np.sort(glob.glob(folderOutFeatures + 'ID' + pat + '/ID' + pat + '*ZCFeat.csv.gz'))
+                ZCFeat = concatenateDataFromFiles_AllocateFirst(filesAll, nrows)
+
+            fileNameIndexes = folderOutTSCV + 'ID' + pat + '/ID' + pat + '_TSCV_indexes.csv'
+
+        if Dataset=='02_CHB':
+            filesAll = np.sort(glob.glob(folderOutFeatures +'chb' + pat+ '/chb' + pat + '*Labels.csv.gz'))            
+            #chb02_16+ commes first than chb02_16 when sorting files
+            if pat=='02' and '+' in filesAll[15][:]: #chb02_16+
+                cpy = filesAll[15]
+                filesAll[15] = filesAll[16]
+                filesAll[16] = cpy
+
+            labelsAll = concatenateDataFromFiles(filesAll).reshape(-1)
+            nrows = np.size(labelsAll)
+            if inportCLF:
+                filesAll = np.sort(glob.glob(folderOutFeatures +'chb' + pat+ '/chb' + pat + '*OtherFeat.csv.gz'))
+                if pat=='02' and '+' in filesAll[15][:]: #chb02_16+
+                    cpy = filesAll[15]
+                    filesAll[15] = filesAll[16]
+                    filesAll[16] = cpy
+                OtherFeat = concatenateDataFromFiles_AllocateFirst(filesAll, nrows)
+
+            if inportAZC:
+                filesAll = np.sort(glob.glob(folderOutFeatures +'chb' + pat+ '/chb' + pat + '*ZCFeat.csv.gz'))
+                if pat=='02' and '+' in filesAll[15][:]: #chb02_16+
+                    cpy = filesAll[15]
+                    filesAll[15] = filesAll[16]
+                    filesAll[16] = cpy
+                ZCFeat = concatenateDataFromFiles_AllocateFirst(filesAll, nrows)
+
+            fileNameIndexes = folderOutTSCV +'chb' + pat+ '/chb' + pat + '_TSCV_indexes.csv'
+        
+        folderOutPat = folderOut + pat + '/'
+        createFolderIfNotExists(folderOutPat)
+        #select the data to be used
+        startIndxOfFiles = np.loadtxt(fileNameIndexes).astype(int) #startIndxOfFiles holde TSCV files indexes considering stacking all subject's data in the same matrix, 
+
+        if inportAZC and inportCLF: #in case we use both features
+            numCh=int(len(ZCFeat[0, :])/(len(ZeroCrossFeatureParams.EPS_thresh_arr)+1))
+            dataAll= mergeFeatFromTwoMatrixes(OtherFeat, ZCFeat, numCh) 
+            del OtherFeat #as all data is merged into another matrix, we can free previous allocated memory
+            del ZCFeat
+        elif inportAZC:
+            dataAll = ZCFeat
+        else:
+            dataAll = OtherFeat
+
+        n_cores  = mp.cpu_count()
+        StandardMLParams.n_jobs = int(n_cores/24) #let's target half of the available cores, considering all the patients will be processed at once
+
+        if StandardMLParams.n_jobs < 2:
+            StandardMLParams.n_jobs=2 #at least 2 cores allocated for training 
+
+        # remove nan and inf from matrix
+        dataAll[np.where(np.isinf(dataAll))] = np.nan
+        col_mean = np.nanmean(dataAll, axis=0)
+        inds = np.where(np.isnan(dataAll))
+        dataAll[inds] = np.take(col_mean, inds[1])
+        # if still somewhere nan replace with 0
+        dataAll[np.where(np.isnan(dataAll))] = 0
+
+
+        current, peak = tracemalloc.get_traced_memory()
+        print('peak memory (GB): ' + str(peak/1024**3))
+
+        numCV = len(startIndxOfFiles) - 1 #first position is the begining of the testing data and last position is the length of total input data
+        performanceTrain = np.zeros((numCV, 4*9))  # 3 for noSmooth, step1, step2, and 9 or 9 perf meausres
+        performanceTest = np.zeros((numCV, 4*9))  # 3 for noSmooth, step1, step2, and 9 or 9 perf meausres
+        performanceTest_justNext = np.zeros(( numCV-1, 4*9))   # 3 for noSmooth, step1, step2, and 9 or 9 perf meausres
+
+        for cv in range(numCV):
+            print('Pat', pat, 'CV', cv+1, 'Out of', numCV, 'CVs')
+
+            # create train and test data
+            dataTest = dataAll[startIndxOfFiles[cv]:startIndxOfFiles[cv + 1],  :]  # test data comes from only one file after this CV
+            label_test = labelsAll[startIndxOfFiles[cv]:startIndxOfFiles[cv + 1]]
+            dataTrain = dataAll[0:startIndxOfFiles[cv], :]
+            label_train = labelsAll[0:startIndxOfFiles[cv]]
+            dataSource_test = (cv + 1) * np.ones( (startIndxOfFiles[cv + 1] - startIndxOfFiles[cv]))  # value 1 means file after the train one
+
+            # initializing model and then training
+            MLmodel = train_StandardML_moreModelsPossible(dataTrain, label_train, StandardMLParams)
+
+            #testing
+            (predLabels_test, probabLab_test, acc_test, accPerClass_test)= test_StandardML_moreModelsPossible_v3(dataTest,label_test,  MLmodel)
+
+            # save predictions
+            fileName2='Subj' + pat + '_cv'+str(cv).zfill(3)
+            dataToSave = np.vstack((label_test, probabLab_test, predLabels_test, dataSource_test)).transpose()  # added from which file is specific part of test set
+            outputName = folderOutPat + fileName2 + '_TestPredictions.csv'
+            saveDataToFile(dataToSave, outputName, 'gzip')
+
+
+
diff --git a/script_AZC_ConsolidateResults.py b/script_AZC_ConsolidateResults.py
new file mode 100644
index 0000000..a5ea7b8
--- /dev/null
+++ b/script_AZC_ConsolidateResults.py
@@ -0,0 +1,259 @@
+#!~/anaconda3/bin python
+
+__authors__ = "Una Pale and Renato Zanetti"
+__license__ = "LGPL"
+__version__ = "1.0"
+__email__ = "{una.pale, renato.zaneti} at epfl.ch"
+
+''' script that consolidates the classification results obtained for both
+datasets, hence partial results should be available before its execution '''
+
+from VariousFunctionsLib import *
+from parametersSetup import *
+
+
+###################################
+# Initial setup
+################################### 
+
+Server = 1 #0 - local execution, 1- server execution
+datasetPreparationType='Filt_1to20Hz'  # 'Raw', 'Filt_1to20Hz'
+
+#folder that store partial results
+if Server:
+    folderResults = '/shares/eslfiler1/scratch/ZeroCrossFeatEpilepsy/'
+else:
+    folderResults = '../AZC_results/'
+
+
+#------------------------------------------------------------------------------------------------------
+#Consolidating results 
+#------------------------------------------------------------------------------------------------------
+#chb-mit
+Dataset='02_CHB'
+patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24']
+GeneralParams.patients = patients
+
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'ZeroCross/'
+func_calculatePerformance_AppendingTestData_TSCV(folderOut, GeneralParams, SegSymbParams, Dataset)
+
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'StandardFeat/'
+func_calculatePerformance_AppendingTestData_TSCV(folderOut, GeneralParams, SegSymbParams, Dataset)
+
+
+#------------------------------------------------------------------------------------------------------
+#swec-ethz
+Dataset='01_SWEC' #02_CHBMIT, 01_iEEG_Bern
+patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']
+GeneralParams.patients = patients
+
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'ZeroCross/'
+func_calculatePerformance_AppendingTestData_TSCV(folderOut, GeneralParams, SegSymbParams, Dataset)
+
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'StandardFeat/'
+func_calculatePerformance_AppendingTestData_TSCV(folderOut, GeneralParams, SegSymbParams, Dataset)
+
+#---------------------------------------------------------------------------------------------------------
+#plots for paper
+#---------------------------------------------------------------------------------------------------------
+font_size=11
+mksize=7
+fig1 = plt.figure(figsize=(15, 6), constrained_layout=False)
+gs = GridSpec(2, 2, figure=fig1, width_ratios=[10, 7.5])
+fig1.subplots_adjust(wspace=0.15, hspace=0.4)
+
+#-------------------------------------------------------------------------------------------------------
+#Subplot CHB - AZC
+Dataset='02_CHB'
+patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24']
+GeneralParams.patients = patients
+
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'ZeroCross/'
+folderOut=folderOut +'PerformanceWithAppendedTests/'
+outputName = folderOut + 'AllSubj_AppendedTest_AllPerfMeasures.csv.gz'
+AllSubjDiffPerf_test= readDataFromFile(outputName)
+
+f1 = np.zeros(len(GeneralParams.patients))
+recall = np.zeros(len(GeneralParams.patients))
+precision = np.zeros(len(GeneralParams.patients))
+far = np.zeros(len(GeneralParams.patients))
+for patIndx, pat in enumerate(GeneralParams.patients):        
+    recall[patIndx] = AllSubjDiffPerf_test[patIndx, 27]
+    precision[patIndx] = AllSubjDiffPerf_test[patIndx, 28]
+    f1[patIndx] = AllSubjDiffPerf_test[patIndx, 29]
+    far[patIndx] = AllSubjDiffPerf_test[patIndx, 35]
+
+f1_azc_chb = f1
+recall_azc_chb = recall
+precision_azc_chb = precision
+
+idx_sorted = np.argsort(-f1_azc_chb) #always ascending, so the minus is used to invert the order
+p = np.arange(np.size(GeneralParams.patients))
+# xValues = GeneralParams.patients[idx_sorted]
+ax1 = fig1.add_subplot(gs[0,0])
+ax1.plot(f1[idx_sorted]*100, '*', color='r', alpha=0.5, markersize=mksize)
+ax1.plot(recall[idx_sorted]*100, '+', color='b', alpha=0.75, markersize=mksize)
+ax1.plot(precision[idx_sorted]*100, '.', color='g', alpha=0.75, markersize=mksize)
+
+ax1.set_ylabel('%', fontsize=font_size)
+# ax1.set_xlabel('Subjects', fontsize=font_size+1)
+ax1.set_title('AZC - CHB-MIT', fontsize=font_size+2)
+# ax1.set_xlabel('a) AZC on CHB-MIT', fontsize=font_size+1)
+ax1.spines['top'].set_visible(False)
+ax1.spines['right'].set_visible(False)
+ax1.grid(alpha=0.5, linestyle='--', linewidth=1)
+# plt.plot(xValues, far, 'o', color='r')
+perfNames = ['F1= ' + str(np.mean(f1_azc_chb)*100)[0:4] + ' ± ' + str(np.std(f1_azc_chb)*100)[0:4],\
+        'Sens= ' + str(np.mean(recall_azc_chb)*100)[0:4] + ' ± ' + str(np.std(recall_azc_chb)*100)[0:4],\
+            'Prec= ' + str(np.mean(precision_azc_chb)*100)[0:4] + ' ± ' + str(np.std(precision_azc_chb)*100)[0:4]]
+ax1.legend(perfNames,fontsize=font_size-1)
+plt.xticks(p, p[idx_sorted]+1, fontsize=font_size)
+plt.yticks([0, 20, 40, 60, 80, 100], fontsize=font_size)
+
+
+#-------------------------------------------------------------------------------------------------------
+#Subplot CHB - CLF
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'StandardFeat/'
+folderOut=folderOut +'PerformanceWithAppendedTests/'
+outputName = folderOut + 'AllSubj_AppendedTest_AllPerfMeasures.csv.gz'
+AllSubjDiffPerf_test= readDataFromFile(outputName)
+
+f1 = np.zeros(len(GeneralParams.patients))
+recall = np.zeros(len(GeneralParams.patients))
+precision = np.zeros(len(GeneralParams.patients))
+far = np.zeros(len(GeneralParams.patients))
+for patIndx, pat in enumerate(GeneralParams.patients):        
+    recall[patIndx] = AllSubjDiffPerf_test[patIndx, 27]
+    precision[patIndx] = AllSubjDiffPerf_test[patIndx, 28]
+    f1[patIndx] = AllSubjDiffPerf_test[patIndx, 29]
+    far[patIndx] = AllSubjDiffPerf_test[patIndx, 35]
+
+f1_std_chb = f1
+recall_std_chb = recall
+precision_std_chb = precision
+# idx_sorted = np.argsort(-f1) #always ascending, so the minus is used to invert the order
+# p = np.arange(np.size(GeneralParams.patients))
+# xValues = GeneralParams.patients[idx_sorted]
+ax1 = fig1.add_subplot(gs[1,0])
+ax1.plot(f1[idx_sorted]*100, '*', color='r', alpha=0.5, markersize=mksize)
+ax1.plot(recall[idx_sorted]*100, '+', color='b', alpha=0.75, markersize=mksize)
+ax1.plot(precision[idx_sorted]*100, '.', color='g', alpha=0.75, markersize=mksize)
+
+# ax1.set_title('CHB-MIT')
+ax1.set_ylabel('%', fontsize=font_size)
+ax1.set_xlabel('Subjects', fontsize=font_size+1)
+ax1.set_title('CLF - CHB-MIT', fontsize=font_size+2)
+ax1.spines['top'].set_visible(False)
+ax1.spines['right'].set_visible(False)
+ax1.grid(alpha=0.5, linestyle='--', linewidth=1)
+# plt.plot(xValues, far, 'o', color='r')
+perfNames = ['F1= ' + str(np.mean(f1_std_chb)*100)[0:4] + ' ± ' + str(np.std(f1_std_chb)*100)[0:4],\
+    'Sens= ' + str(np.mean(recall_std_chb)*100)[0:4] + ' ± ' + str(np.std(recall_std_chb)*100)[0:4],\
+        'Prec= ' + str(np.mean(precision_std_chb)*100)[0:4] + ' ± ' + str(np.std(precision_std_chb)*100)[0:4]]
+ax1.legend(perfNames,fontsize=font_size-1)
+plt.xticks(p, p[idx_sorted]+1, fontsize=font_size)
+plt.yticks([0, 20, 40, 60, 80, 100], fontsize=font_size)
+
+
+
+#-------------------------------------------------------------------------------------------------------
+#Subplot SWEC - AZC
+
+Dataset='01_SWEC'
+patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']
+GeneralParams.patients = patients
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'ZeroCross/'
+folderOut=folderOut +'PerformanceWithAppendedTests/'
+outputName = folderOut + 'AllSubj_AppendedTest_AllPerfMeasures.csv.gz'
+AllSubjDiffPerf_test= readDataFromFile(outputName)
+
+p = np.arange(np.size(GeneralParams.patients))
+
+f1 = np.zeros(len(GeneralParams.patients))
+recall = np.zeros(len(GeneralParams.patients))
+precision = np.zeros(len(GeneralParams.patients))
+far = np.zeros(len(GeneralParams.patients))
+for patIndx, pat in enumerate(GeneralParams.patients):        
+    recall[patIndx] = AllSubjDiffPerf_test[patIndx, 27]
+    precision[patIndx] = AllSubjDiffPerf_test[patIndx, 28]
+    f1[patIndx] = AllSubjDiffPerf_test[patIndx, 29]
+    far[patIndx] = AllSubjDiffPerf_test[patIndx, 35]
+
+f1_azc_ieeg = f1
+recall_azc_ieeg = recall
+precision_azc_ieeg = precision
+idx_sorted = np.argsort(-f1_azc_ieeg) #always ascending, so the minus is used to invert the order
+
+ax1 = fig1.add_subplot(gs[0,1])
+ax1.plot(f1[idx_sorted]*100, '*', color='r', alpha=0.5, markersize=mksize)
+ax1.plot(recall[idx_sorted]*100, '+', color='b', alpha=0.75, markersize=mksize)
+ax1.plot(precision[idx_sorted]*100, '.', color='g', alpha=0.75, markersize=mksize)
+ax1.grid(alpha=0.5, linestyle='--', linewidth=1)
+ax1.set_title('AZC - SWEC-ETHZ', fontsize=font_size+2)
+# ax1.set_ylabel('%', fontsize=font_size)
+# ax1.set_xlabel('Subjects', fontsize=font_size+1)
+# ax1.set_title('SWEC-ETHZ')
+ax1.spines['top'].set_visible(False)
+ax1.spines['right'].set_visible(False)
+
+perfNames = ['F1= ' + str(np.mean(f1_azc_ieeg)*100)[0:4] + ' ± ' + str(np.std(f1_azc_ieeg)*100)[0:4],\
+    'Sens= ' + str(np.mean(recall_azc_ieeg)*100)[0:4] + ' ± ' + str(np.std(recall_azc_ieeg)*100)[0:4],\
+        'Prec= ' + str(np.mean(precision_azc_ieeg)*100)[0:4] + ' ± ' + str(np.std(precision_azc_ieeg)*100)[0:4]]
+ax1.legend(perfNames,fontsize=font_size-1)
+plt.xticks(p, p[idx_sorted]+1, fontsize=font_size)
+plt.yticks([0, 20, 40, 60, 80, 100], fontsize=font_size)
+
+
+#-------------------------------------------------------------------------------------------------------
+#Subplot SWEC -  CLF
+folderOut = folderResults + Dataset + '/05_RF_TSCV/' + datasetPreparationType +'StandardFeat/'
+folderOut=folderOut +'PerformanceWithAppendedTests/'
+outputName = folderOut + 'AllSubj_AppendedTest_AllPerfMeasures.csv.gz'
+AllSubjDiffPerf_test= readDataFromFile(outputName)
+
+p = np.arange(np.size(GeneralParams.patients))
+
+f1 = np.zeros(len(GeneralParams.patients))
+recall = np.zeros(len(GeneralParams.patients))
+precision = np.zeros(len(GeneralParams.patients))
+far = np.zeros(len(GeneralParams.patients))
+for patIndx, pat in enumerate(GeneralParams.patients):        
+    recall[patIndx] = AllSubjDiffPerf_test[patIndx, 27]
+    precision[patIndx] = AllSubjDiffPerf_test[patIndx, 28]
+    f1[patIndx] = AllSubjDiffPerf_test[patIndx, 29]
+    far[patIndx] = AllSubjDiffPerf_test[patIndx, 35]
+
+# idx_sorted = np.argsort(-f1) #always ascending, so the minus is used to invert the order
+f1_std_ieeg = f1
+recall_std_ieeg = recall
+precision_std_ieeg = precision
+
+ax1 = fig1.add_subplot(gs[1,1])
+ax1.plot(f1[idx_sorted]*100, '*', color='r', alpha=0.5, markersize=mksize)
+ax1.plot(recall[idx_sorted]*100, '+', color='b', alpha=0.75, markersize=mksize)
+ax1.plot(precision[idx_sorted]*100, '.', color='g', alpha=0.75, markersize=mksize)
+ax1.grid(alpha=0.5, linestyle='--', linewidth=1)
+# ax1.set_ylabel('%', fontsize=font_size)
+ax1.set_xlabel('Subjects', fontsize=font_size+1)
+ax1.set_title('CLF - SWEC-ETHZ', fontsize=font_size+2)
+# ax1.set_title('SWEC-ETHZ')
+ax1.spines['top'].set_visible(False)
+ax1.spines['right'].set_visible(False)
+
+perfNames = ['F1= ' + str(np.mean(f1_std_ieeg)*100)[0:4] + ' ± ' + str(np.std(f1_std_ieeg)*100)[0:4],\
+    'Sens= ' + str(np.mean(recall_std_ieeg)*100)[0:4] + ' ± ' + str(np.std(recall_std_ieeg)*100)[0:4],\
+        'Prec= ' + str(np.mean(precision_std_ieeg)*100)[0:4] + ' ± ' + str(np.std(precision_std_ieeg)*100)[0:4]]
+ax1.legend(perfNames,fontsize=font_size-1)
+plt.xticks(p, p[idx_sorted]+1, fontsize=font_size)
+plt.yticks([0, 20, 40, 60, 80, 100], fontsize=font_size)
+
+
+print('AZC- Sen > 0.7: ' + str((np.sum(recall_azc_chb>0.7) + np.sum(recall_azc_ieeg>0.7))/42))
+print('STD- Sen > 0.7: ' + str((np.sum(recall_std_chb>0.7) + np.sum(recall_std_ieeg>0.7))/42))
+
+print('AZC- F1 > 0.7: ' + str((np.sum(f1_azc_chb>0.7) + np.sum(f1_azc_ieeg>0.7))/42))
+print('STD - F1 > 0.7: ' + str((np.sum(f1_std_chb>0.7) + np.sum(f1_std_ieeg>0.7))/42))
+
+# plt.show()
+plt.savefig(folderResults+'AllSubj_Consolidate_PaperPerfMeasures.pdf', bbox_inches='tight', dpi=200, format='pdf')
+plt.close()
diff --git a/script_AZC_FeatDivergence.py b/script_AZC_FeatDivergence.py
new file mode 100644
index 0000000..d201a24
--- /dev/null
+++ b/script_AZC_FeatDivergence.py
@@ -0,0 +1,288 @@
+#!~/anaconda3/bin python
+
+__authors__ = "Una Pale and Renato Zanetti"
+__license__ = "LGPL"
+__version__ = "1.0"
+__email__ = "{una.pale, renato.zaneti} at epfl.ch"
+
+''' script that :
+- calculates KL divergence values (KL_NSS - non seizure to seizure, JS- jensen shannon) 
+- calculated from raw feature values and saves it for each subject 
+- calculated average for all subjects and plots in different ways
+'''
+
+from parametersSetup import *
+from VariousFunctionsLib import *
+
+from matplotlib.patches import Polygon
+
+###################################
+# Initial setup
+################################### 
+
+Dataset='01_SWEC' #'01_SWEC', '02_CHB' #select the dataset to be processed
+Server = 1 #0 - local execution, 1- server execution
+datasetPreparationType='Filt_1to20Hz'  # 'Raw', 'Filt_1to20Hz'
+
+# DEFINING INPUT/OUTPUT FOLDERS
+if Dataset=='01_SWEC':
+    if Server:
+        folderInData = '/shares/eslfiler1/databases/medical/swec-ethz/ieeg/long-term-dataset/'
+        os.nice(20) #set low priority level for longer-running process
+    else:
+        folderInData = '../iEEG/'
+    patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']
+
+
+if Dataset=='02_CHB':
+    if Server:
+        folderInData = '/shares/eslfiler1/databases/medical/chb-mit/edf/' #expect all subject's files in the same directory
+        os.nice(20) #set low priority level for longer-running process
+    else:
+        folderInData = '../chbmit/'
+    patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24']
+
+#folder that store partial results
+if Server:
+    folderResults = '/shares/eslfiler1/scratch/ZeroCrossFeatEpilepsy/'
+else:
+    folderResults = '../AZC_results/'
+
+folderOutFeatures= folderResults+Dataset+'/02_Features_' +datasetPreparationType + '/'
+folderDiverg= folderResults+Dataset+'/04_FeatureDivergence/'
+createFolderIfNotExists(folderDiverg)
+
+# ###########################################################################################
+#SETUPS
+GeneralParams.patients= patients
+GeneralParams.plottingON=0
+GeneralParams.PersGenApproach='personalized' #'personalized', 'generalized'
+StandardMLParams.modelType = 'RandomForest'  # 'KNN', 'SVM', 'DecisionTree', 'RandomForest','BaggingClassifier','AdaBoost'
+
+FeaturesParams.numStandardFeat=62 #56 (Sopic2018) + 6 AZC
+
+numBins=100 #(30,100) bins used for the histogram calculation
+FeatNames=np.array(['meanAmpl', 'LineLength', 'samp_1_d7_1', 'samp_1_d6_1', 'samp_2_d7_1', 'samp_2_d6_1', 'perm_d7_3', 'perm_d7_5', 'perm_d7_7', 'perm_d6_3', 'perm_d6_5', 'perm_d6_7',   'perm_d5_3', 'perm_d5_5',  'perm_d5_7', 'perm_d4_3', 'perm_d4_5', 'perm_d4_7', 'perm_d3_3', 'perm_d3_5', 'perm_d3_7',
+           'shannon_en_sig', 'renyi_en_sig', 'tsallis_en_sig', 'shannon_en_d7', 'renyi_en_d7', 'tsallis_en_d7', 'shannon_en_d6', 'renyi_en_d6', 'tsallis_en_d6', 'shannon_en_d5', 'renyi_en_d5', 'tsallis_en_d5', 'shannon_en_d4', 'renyi_en_d4', 'tsallis_en_d4', 'shannon_en_d3', 'renyi_en_d3', 'tsallis_en_d3',
+           'p_dc_rel', 'p_mov_rel', 'p_delta_rel', 'p_theta_rel', 'p_alfa_rel', 'p_middle_rel', 'p_beta_rel', 'p_gamma_rel', 'p_dc', 'p_mov', 'p_delta', 'p_theta', 'p_alfa', 'p_middle', 'p_beta', 'p_gamma', 'p_tot',
+           'zeroCrossNoApprox', 'ZC_approx_16', 'ZC_approx_32', 'ZC_approx_64', 'ZC_approx_128', 'ZC_approx_256' ])
+
+
+############################################################################################
+# CALCULATING KL DIVERGENCE
+numFeat=FeaturesParams.numStandardFeat* len(SigInfoParams.chToKeep)
+JSdiverg=np.zeros((len(GeneralParams.patients), numFeat))
+KLdiverg_NSS = np.zeros((len(GeneralParams.patients), numFeat))
+for patIndx, pat in enumerate(GeneralParams.patients):
+    if Dataset=='01_SWEC':
+        filesIn=np.sort(glob.glob(folderOutFeatures + 'ID' + pat + '/ID' + pat + '*_Labels.csv.gz'))
+    if Dataset=='02_CHB':
+        filesIn = np.sort(glob.glob(folderOutFeatures +'chb' + pat+ '/chb' + pat + '*_Labels.csv.gz'))
+
+    numFiles=len(filesIn)
+    print('-- Patient:', pat, 'NumFiles:', numFiles)
+
+    #load all files of that subject and concatenate feature values
+    for fIndx, fileName in enumerate(filesIn):
+        print('----- File:', fIndx)
+        #load labels
+        labels0 = readDataFromFile(fileName)
+        #load feature values
+        fileName2 = fileName[:-14] +  '_OtherFeat.csv.gz'
+        OtherFeat0 = readDataFromFile(fileName2)
+        fileName2 = fileName[:-14] +  '_ZCFeat.csv.gz'
+        ZCFeat0 = readDataFromFile(fileName2)
+
+        nch = np.int32((ZCFeat0.shape[1] + OtherFeat0.shape[1])/FeaturesParams.numStandardFeat)
+        AllFeat0=mergeFeatFromTwoMatrixes(OtherFeat0, ZCFeat0, nch)
+        if (fIndx==0):
+            LabelsAll=labels0
+            FeatAll=AllFeat0
+        else:
+            LabelsAll=np.vstack((LabelsAll, labels0))
+            FeatAll = np.vstack((FeatAll, AllFeat0))
+
+    #calculate histograms per seizure and non seizure
+    for f in range(numFeat):
+        (SeizHist, nonSeizHist) = calcHistogramValues_v2(FeatAll[:,f], LabelsAll,numBins)
+        KLdiverg_NSS[patIndx, f] = kl_divergence(nonSeizHist[0],SeizHist[0])
+        JSdiverg[patIndx,f] = js_divergence(nonSeizHist[0], SeizHist[0])
+
+    # save predictions
+    outputName = folderDiverg + 'AllSubj_KLdivergence_NSS.csv'
+    np.savetxt(outputName, KLdiverg_NSS, delimiter=",")
+    outputName = folderDiverg  + 'AllSubj_JSdivergence.csv'
+    np.savetxt(outputName, JSdiverg, delimiter=",")
+
+############################################################################################
+############################################################################################
+# PLOTTING
+reader = csv.reader(open(folderDiverg + 'AllSubj_KLdivergence_NSS.csv', "r"))
+KLdiverg_NSS = np.array(list(reader)).astype("float")
+reader = csv.reader(open(folderDiverg  + 'AllSubj_JSdivergence.csv', "r"))
+JSdiverg = np.array(list(reader)).astype("float")
+
+#analysing per ch
+KLdiverg_NSS_reshaped=np.reshape(KLdiverg_NSS, (len(GeneralParams.patients),-1,FeaturesParams.numStandardFeat ))
+KLdiverg_NSS_meanForCh=np.nanmean(KLdiverg_NSS_reshaped,1)
+KLdiverg_NSS_stdForCh=np.nanstd(KLdiverg_NSS_reshaped,1)
+JSdiverg_reshaped=np.reshape(JSdiverg, (len(GeneralParams.patients),-1,FeaturesParams.numStandardFeat ))
+JSdiverg_meanForCh=np.nanmean(JSdiverg_reshaped,1)
+JSdiverg_stdForCh=np.nanstd(JSdiverg_reshaped,1)
+
+####################################
+#PLOTTING KL DIVERGENCE PER SUBJECT
+fig1 = plt.figure(figsize=(16, 16), constrained_layout=False)
+gs = GridSpec(6, 4, figure=fig1)
+fig1.subplots_adjust(wspace=0.4, hspace=0.6)
+xValues = np.arange(0, FeaturesParams.numStandardFeat, 1)
+for p, pat in enumerate(GeneralParams.patients):
+    ax1 = fig1.add_subplot(gs[int(np.floor(p / 4)), np.mod(p, 4)])
+    ax1.errorbar(xValues, KLdiverg_NSS_meanForCh[p, :], yerr=KLdiverg_NSS_stdForCh[p, :], fmt='b', label='KL_NSS')
+    ax1.errorbar(xValues, JSdiverg_meanForCh[p, :], yerr=JSdiverg_stdForCh[p, :], fmt='m', label='JS')
+    ax1.legend()
+    ax1.set_xlabel('Feature')
+    ax1.set_ylabel('Divergence')
+    ax1.set_title('Subj ' + pat)
+    ax1.grid()
+fig1.show()
+fig1.savefig(folderDiverg + 'AllSubj_DifferentDivergenceMeasures_perSubj.png', bbox_inches='tight')
+plt.close(fig1)
+
+
+####################################
+# CALCULATING AVERAGE KL VALUES FOR ALL SUBJECTS
+KLdiverg_NSS_meanAllSubj=np.nanmean(KLdiverg_NSS_meanForCh,0)
+JSdiverg_meanAllSubj=np.nanmean(JSdiverg_meanForCh,0)
+KLdiverg_NSS_stdAllSubj=np.nanstd(KLdiverg_NSS_meanForCh,0)
+JSdiverg_stdAllSubj=np.nanstd(JSdiverg_meanForCh,0)
+# save predictions
+outputName = folderDiverg + 'AllSubjAvrg_KLdivergence_NSS.csv'
+np.savetxt(outputName, np.vstack((KLdiverg_NSS_meanAllSubj, KLdiverg_NSS_stdAllSubj)), delimiter=",")
+outputName = folderDiverg + 'AllSubjAvrg_JSdivergence.csv'
+np.savetxt(outputName, np.vstack((JSdiverg_meanAllSubj, JSdiverg_stdAllSubj)), delimiter=",")
+
+#plotting
+fig1 = plt.figure(figsize=(16, 16), constrained_layout=False)
+gs = GridSpec(2, 1, figure=fig1)
+fig1.subplots_adjust(wspace=0.4, hspace=0.2)
+#fig1.suptitle('Feature ')
+xValues = np.arange(0, FeaturesParams.numStandardFeat, 1)
+ax1 = fig1.add_subplot(gs[0,0])
+ax1.errorbar(xValues, KLdiverg_NSS_meanAllSubj, yerr=KLdiverg_NSS_stdAllSubj, fmt='b', label='KL_NSS')
+#ax1.errorbar(xValues, JSdiverg_meanAllSubj, yerr=JSdiverg_stdAllSubj, fmt='k', label='JS')
+ax1.legend()
+ax1.set_xlabel('Feature')
+ax1.set_ylabel('Divergence')
+ax1.set_title('Kullback Leibler divergence')
+ax1.grid()
+ax1 = fig1.add_subplot(gs[1,0])
+ax1.errorbar(xValues, JSdiverg_meanAllSubj, yerr=JSdiverg_stdAllSubj, fmt='m', label='JS')
+ax1.legend()
+ax1.set_xlabel('Feature')
+ax1.set_ylabel('Divergence')
+ax1.set_title('Jensen Shannon divergence')
+ax1.grid()
+fig1.show()
+fig1.savefig(folderDiverg + 'AllSubj_DifferentDivergenceMeasures_avrgAllSubj.png', bbox_inches='tight')
+plt.close(fig1)
+
+#PLOTTING ONLY NSS WITH BOXPLOTS
+fig1 = plt.figure(figsize=(16, 16), constrained_layout=False)
+gs = GridSpec(2, 1, figure=fig1)
+fig1.subplots_adjust(wspace=0.4, hspace=0.2)
+#fig1.suptitle('Feature ')
+xValues = np.arange(0, FeaturesParams.numStandardFeat, 1)
+ax1 = fig1.add_subplot(gs[0,0])
+ax1.boxplot(KLdiverg_NSS_meanForCh, medianprops=dict(color='red', linewidth=2),
+            boxprops=dict(linewidth=2), capprops=dict(linewidth=2), whiskerprops=dict(linewidth=2), showfliers=False)
+ax1.set_xlabel('Feature')
+ax1.set_xticks(np.arange(1, FeaturesParams.numStandardFeat+1, 1))
+ax1.set_xticklabels(FeatNames, fontsize=10, rotation=45, ha='right', rotation_mode='anchor')
+ax1.set_ylabel('Divergence')
+ax1.set_title('Kullback Leibler divergence')
+ax1.grid()
+fig1.show()
+fig1.savefig(folderDiverg + 'AllSubj_KLdiverg_NSS_avrgAllSubj_BoxplotAllCh.png', bbox_inches='tight')
+fig1.savefig(folderDiverg + 'AllSubj_KLdiverg_NSS_avrgAllSubj_BoxplotAllCh.svg', bbox_inches='tight')
+plt.close(fig1)
+
+
+
+############################################################################################
+## CALCULATE THE BEST ONES FEATURES BASED ON KL DIVERGENCE AND SORTING THEM
+featNames=np.array(['meanAmpl', 'LineLength', 'samp_1_d7_1', 'samp_1_d6_1', 'samp_2_d7_1', 'samp_2_d6_1', 'perm_d7_3', 'perm_d7_5', 'perm_d7_7', 'perm_d6_3', 'perm_d6_5', 'perm_d6_7',   'perm_d5_3', 'perm_d5_5',  'perm_d5_7', 'perm_d4_3', 'perm_d4_5', 'perm_d4_7', 'perm_d3_3', 'perm_d3_5', 'perm_d3_7',
+           'shannon_en_sig', 'renyi_en_sig', 'tsallis_en_sig', 'shannon_en_d7', 'renyi_en_d7', 'tsallis_en_d7', 'shannon_en_d6', 'renyi_en_d6', 'tsallis_en_d6', 'shannon_en_d5', 'renyi_en_d5', 'tsallis_en_d5', 'shannon_en_d4', 'renyi_en_d4', 'tsallis_en_d4', 'shannon_en_d3', 'renyi_en_d3', 'tsallis_en_d3',
+           'p_dc_rel', 'p_mov_rel', 'p_delta_rel', 'p_theta_rel', 'p_alfa_rel', 'p_middle_rel', 'p_beta_rel', 'p_gamma_rel', 'p_dc', 'p_mov', 'p_delta', 'p_theta', 'p_alfa', 'p_middle', 'p_beta', 'p_gamma', 'p_tot',
+           'zeroCrossNoApprox', 'ZC_approx_16', 'ZC_approx_32', 'ZC_approx_64', 'ZC_approx_128', 'ZC_approx_256' ])
+indexSorted=np.argsort(-JSdiverg_meanAllSubj)
+ranking=np.arange(1,len(indexSorted)+1,1)
+dataAppend=np.vstack((ranking, indexSorted, featNames[indexSorted],JSdiverg_meanAllSubj[indexSorted])).transpose()
+FeaturesSorted_JS = pd.DataFrame(dataAppend, columns=['Ranking', 'FeatIndex', 'FeatName', 'JS_divergence'])
+fileOutputName=folderDiverg + '/AllSubj_FeaturesSorted_JS.csv'
+FeaturesSorted_JS.to_csv(fileOutputName)
+
+indexSorted=np.argsort(-KLdiverg_NSS_meanAllSubj)
+ranking=np.arange(1,len(indexSorted)+1,1)
+dataAppend=np.vstack((ranking, indexSorted, featNames[indexSorted],KLdiverg_NSS_meanAllSubj[indexSorted])).transpose()
+FeaturesSorted_JS = pd.DataFrame(dataAppend, columns=['Ranking', 'FeatIndex', 'FeatName', 'KL_NSS_divergence'])
+fileOutputName=folderDiverg + '/AllSubj_FeaturesSorted_KL_NSS.csv'
+FeaturesSorted_JS.to_csv(fileOutputName)
+
+
+#--------------------------------------------------------------------------------------------------------------------
+# PLOT FOR PAPER
+numFeat =FeaturesParams.numStandardFeat
+font_size=14
+fig1, ax1 = plt.subplots(figsize=(14, 7))
+
+fig1.canvas.set_window_title('KL divergenge')
+fig1.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)
+
+bp = ax1.boxplot(KLdiverg_NSS_meanForCh, medianprops=dict(color='k', linewidth=1), boxprops=dict(linewidth=0.5, color='k'), 
+            capprops=dict(linewidth=1), whiskerprops=dict(linewidth=1), showfliers=False, showcaps=False)
+
+box_colors = ['lightcoral','#FFA07A', 'papayawhip', '#BDFCC9', 'mediumseagreen', 'olive', '#33A1C9'] #mint=BDFCC9, peacock = 33A1C9
+num_colors = len(box_colors)
+num_boxes = [2,4,15,18,8,9,6] #number of features per feature type
+
+medians = np.empty(numFeat)
+n_used_box=0
+for l in range(num_colors):
+    for i in range(num_boxes[l]):
+        box = bp['boxes'][i+n_used_box]
+        boxX = []
+        boxY = []
+        for j in range(5):
+            boxX.append(box.get_xdata()[j])
+            boxY.append(box.get_ydata()[j])
+        box_coords = np.column_stack([boxX, boxY])
+        # Alternate colors
+        ax1.add_patch(Polygon(box_coords, facecolor=box_colors[l]))
+
+    n_used_box = n_used_box + num_boxes[l]
+
+colorLegends = ['Time domain', 'Sample Entropy', 'Permutation Entropy', 'Shan_Ren_Tsa Entropy', 'Relative Power', 'Bandpower', 'AZC']
+ax1.legend(colorLegends,fontsize=font_size-2)
+            
+ax1.set_xlabel('Features', fontsize=font_size+1)
+# ax1.set_xticks(np.arange(1, FeaturesParams.numStandardFeat+1, 1))
+ax1.set_xticks([])
+ax1.tick_params(axis='y', which='major', labelsize=font_size-1)
+# ax1.set_axisbelow(True)
+# ax1.set_xticklabels(FeatNames, fontsize=10, rotation=45, ha='right', rotation_mode='anchor')
+ax1.set_ylabel('KL Scores', fontsize=font_size+1)
+ax1.spines['top'].set_visible(False)
+ax1.spines['right'].set_visible(False)
+ax1.spines['bottom'].set_visible(False)
+ax1.spines['left'].set_visible(False)
+# ax1.set_title('Kullback Leibler divergence')
+ax1.grid(True, linestyle='-', which='major', color='lightgrey',
+               alpha=0.5)
+fig1.show()
+fig1.savefig(folderDiverg + '/'+ Dataset+ '_AllSubj_KLdiverg_NSS_avrgAllSubj_BoxplotAllCh.svg', bbox_inches='tight', dpi=200, format='svg')
+plt.close(fig1)
+
+
+
+
diff --git a/script_AZC_datasetPreProcessing.py b/script_AZC_datasetPreProcessing.py
new file mode 100644
index 0000000..901d321
--- /dev/null
+++ b/script_AZC_datasetPreProcessing.py
@@ -0,0 +1,99 @@
+#!~/anaconda3/bin python
+
+__authors__ = "Una Pale and Renato Zanetti"
+__license__ = "LGPL"
+__version__ = "1.0"
+__email__ = "{una.pale, renato.zaneti} at epfl.ch"
+
+'''script for:
+    - converts edf files to csv files without any changes in data (for CHB-MIT) adding the labels
+    - calculated features for each input file and saves them 
+    - reorders feature data on a rolling basis (TSCV approach) and stores the a indexes vector to future train/test division
+    - plots labels in time (before/after TSCV reordering)
+'''
+
+from parametersSetup import *
+from ZeroCrossLibrary import *
+
+###################################
+# Initial setup
+################################### 
+
+Dataset='01_SWEC' #'01_SWEC', '02_CHB' #select the dataset to be processed
+Server = 1 #0 - local execution, 1- server execution
+parallelize=1 #chose whether to parallelize feature extraction per file per core
+perc_cores= 0.5 #percetage of cores target to be charged with data processing
+
+datasetPreparationType='Filt_1to20Hz'  # 'Raw', 'Filt_1to20Hz'
+
+# DEFINING INPUT/OUTPUT FOLDERS
+if Dataset=='01_SWEC':
+    if Server:
+        folderInData = '/shares/eslfiler1/databases/medical/swec-ethz/ieeg/long-term-dataset/'
+        os.nice(20) #set low priority level for process running longer
+    else:
+        folderInData = '../iEEG/'
+    patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']
+
+if Dataset=='02_CHB':
+    if Server:
+        folderInData = '/shares/eslfiler1/databases/medical/chb-mit/edf/' #expect all subject's files in the same directory
+        os.nice(20) #set low priority level for process running longer
+        samedir=1 #1- in case all chb files are in the same directory
+    else:
+        folderInData = '../chbmit/'
+        samedir=0 #1- in case all chb files are in the same directory        
+    patients  =['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24']
+
+#folder that store partial results
+if Server:
+    folderResults = '/shares/eslfiler1/scratch/ZeroCrossFeatEpilepsy/'
+else:
+    folderResults = '../AZC_results/'
+
+createFolderIfNotExists(folderResults)
+createFolderIfNotExists( folderResults+Dataset)
+
+#folder for feature files
+folderOutFeatures= folderResults+Dataset+'/02_Features_' +datasetPreparationType + '/'
+createFolderIfNotExists(folderOutFeatures)
+
+###################################
+# Parameters update regarding definition in 'parametersSetup.py'
+GeneralParams.patients = patients
+
+# ############################################################################################
+#CHB-MIT edf files are converted to csv, adding extra column for labels
+if Dataset=='02_CHB':
+    folderOutCSV= folderResults+Dataset+'/01_DataCSV_raw/' 
+    createFolderIfNotExists(folderOutCSV)
+    # next function considers that all CHB-MIT edf files are in the same directory
+    extractEDFdataToCSV_originalData(parallelize, perc_cores, samedir, folderInData, folderOutCSV, SigInfoParams, patients)
+
+#########################################################################
+# CALCULATE FEATURES FOR EACH DATA FILE
+calculateCLFFeat = 1 #enable the calculation of each group of features
+calculateAZCFeat = 1
+if Dataset=='01_SWEC':
+    calculateFeaturesPerEachFile_CLFAndAZCFeatures_SWEC(parallelize, perc_cores, ZeroCrossFeatureParams.EPS_thresh_arr,folderInData, folderOutFeatures, GeneralParams.patients,  SigInfoParams, ZeroCrossFeatureParams, calculateCLFFeat, calculateAZCFeat)
+
+if Dataset=='02_CHB':
+    calculateFeaturesPerEachFile_CLFAndAZCFeatures_CHB(parallelize, perc_cores, ZeroCrossFeatureParams.EPS_thresh_arr,folderOutCSV, folderOutFeatures, GeneralParams.patients,  SigInfoParams, ZeroCrossFeatureParams, calculateCLFFeat, calculateAZCFeat)
+
+# # Plotting data labels before reorganizing files for TSCV approach
+plotRawDataLabels(Dataset, folderOutFeatures, GeneralParams,SegSymbParams)
+
+###########################################################################################
+# Reorganizing feature files for TSCV (first file containing min 6h of data and a seizure)
+windowSize=60*60 #target for files length in data
+folderOutTSCV=folderResults+Dataset+'/03_Features_' +datasetPreparationType+'_TSCVprep_'+str(windowSize) +'s_DivBig/'
+createFolderIfNotExists(folderOutTSCV)
+
+#considering that CHB-MIT presents big time gaps in between some files for a subject and that some files have more than 1hour of data, we generate indexes
+#for TSCV classification considering a first file with a minimum of 5hours and a seizure, trying to keep data grouped be origin file and only subdividing
+#data of a file in case it's 1.5x bigger than the target 'windowSize'. Test data is always originated from a specific file, thus no data is merged from 
+# multiple files.
+generateTSCVindexesFromAllFiles_storeTSCVLabels(Dataset, GeneralParams, ZeroCrossFeatureParams, SegSymbParams, folderOutFeatures, folderOutTSCV, windowSize)
+
+plotRearangedDataLabelsInTime(Dataset, folderOutTSCV,  GeneralParams,SegSymbParams)
+