diff --git a/pre_proc/ECG_lvlCrossing.py b/pre_proc/ECG_lvlCrossing.py index 51fd261..986a467 100644 --- a/pre_proc/ECG_lvlCrossing.py +++ b/pre_proc/ECG_lvlCrossing.py @@ -1,199 +1,196 @@ """ Created on Fri Feb 22 09:07:30 2019 Simple script to perform a non-uniform subsampling of an ECG record in the MIT-BIH format, using the Wall-Danielsson algorithm. As output, it produces a 2-column csv file with the sample times and values. Dependencies: - numpy - tqdm (https://pypi.org/project/tqdm/) - wfdb-python (https://pypi.org/project/wfdb/) @authors: T. Teijeiro S.Zanoli """ import multiprocessing import os import shutil import sys from multiprocessing import Pool, Process import numpy as np import pandas as pd from tqdm import trange scripts = '../helper_scripts' if scripts not in sys.path: sys.path.insert(0,scripts) from time import time import matplotlib.pyplot as plt from csvManager import csvManager import pickle ''' with open(file_name_full,"rb") as f: data = pickle.load(f) file_out = os.path.join(out_dir,file_name.split(".")[0]+".pickle") with open(file_out, 'wb') as handle: pickle.dump(data_inverted_keys, handle, protocol=pickle.HIGHEST_PROTOCOL) ''' data_folder = "../data/extracted_data" out = "../data/level_crossing" log_dir = "../data/logs/" pos = False bits_data = 11 bits = range(1,bits_data+1,1) hister = 5 -def ADC(values, original_bits = 12, all_pos = True, nBits = 5, hist = 0): - """ - To write - """ +def ADC(beat, nBits, hist = 5, original_bits = 11): + #ADC stats delta = 2**original_bits dV = (delta)/(2**nBits) hist = hist/100*dV - if all_pos: - #max_val = delta - min_val = 0 - else: - #max_val = delta//2 - min_val = -delta//2 - lowTh = min_val + min_val = -delta//2 + events = {'t':[],'v':[], 'QRS_pos': beat['QRS_pos']} + #init value, first sample (we assume we always sample the nearest level at start) and ADC status + v_0 = beat['v'][0] + lowTh = min_val+((v_0-min_val)//dV)*dV highTh = lowTh + dV - index = [] - values_sampled = [] - for val,time in zip(values,range(len(values))): + events['t'].append(beat['t'][0]) + events['v'].append(int(lowTh if v_0-lowTh < highTh - v_0 else highTh)) + + for val,time in zip(beat['v'],beat['t']): + #print(f"Value: {val}, time: {time}, low_th = {lowTh - hist}, high_th = { highTh + hist}") if val > highTh + hist or val < lowTh - hist: - index.append(time) direction = 1 if val > highTh else -1 lowTh = min_val+((val-min_val)//dV)*dV #Delta from the bottom: (val-min_val)//dV*dV then compute the actual level summin min_val highTh = lowTh + dV - values_sampled.append(int(lowTh if direction == 1 else highTh)) - return index, values_sampled - + events['t'].append(time) + events['v'].append(int(lowTh if direction == 1 else highTh)) + return events def sample(file): ''' manager = csvManager() _,vals = manager.read(file) ''' with open(file,"rb") as f: data = pickle.load(f) vals = data['v'] print("------------------------- Sub-sampling file: {} -------------------------".format(file)) for b in bits: #print("\nFile: {}, subsampling with {} bits".format(file,b)) idxs, vals_samp= ADC(vals, original_bits = bits_data, all_pos = pos, nBits = b, hist = hister) compression = 1-len(vals_samp)/len(vals) #---------- SAVE BACK ---------- out_dir = os.path.join(out,str(b)+"bits") file_name = os.path.join(out_dir,os.path.basename(file)) data_out = {'t':idxs,'v':vals_samp} with open(file_name, 'wb') as handle: pickle.dump(data_out, handle, protocol=pickle.HIGHEST_PROTOCOL) #manager.write(idxs,vals_samp,file_name) log(file_name,b,compression) def log (source_file, bits, compression): name = os.path.basename(source_file).split('.')[0] file_name = os.path.join(log_dir,str(bits)+"Bits") str_to_write = name + ": "+str(compression)+"\n" with open(file_name,"a") as f: f.write(str_to_write) def log_resume(): resume = {} if not os.path.isdir(log_dir): return for l in os.listdir(log_dir): if "resume" in l: continue bits = int(l[0:l.find("Bits")]) resume[bits] = {"avg":None,"std":None, "num_files":None} compressions = [] text = "" num_file = 0 with open(os.path.join(log_dir,l)) as f: text = f.readlines() for line in text: num_file += 1 compr = float(line[line.find(": ")+len(": "):]) compressions.append(compr) resume[bits]["avg"] = np.average(compressions) resume[bits]["std"] = np.std(compressions) resume[bits]["num_files"] = num_file with open(os.path.join(log_dir,"resume.txt"),"w") as f: keys = sorted(list(resume.keys())) for k in keys: line = "Bits: {}\t\tAvg: {}\tStD: {} (Total number of files:{})\n".format(str(k),resume[k]["avg"],resume[k]["std"],resume[k]["num_files"]) f.write(line) def process(multi = True,cores = 1): names = [os.path.join(data_folder,name) for name in os.listdir(data_folder)] if os.path.isdir(log_dir): shutil.rmtree(log_dir) os.mkdir(log_dir) if os.path.isdir(out): shutil.rmtree(out) os.mkdir(out) for b in bits: out_dir = os.path.join(out,str(b)+"bits") os.mkdir(out_dir) if multi: print(f"Executing level-crossing EB_sampling with {cores} cores...") used_cores = cores with Pool(used_cores) as pool: pool.map(sample, names) else: for arg in names: sample(arg) log_resume() def test_performances(): times = [] print("~"*85) print("Analyzing multicore performances in sampling signals with level crossing algorithm and saving in binary.\n" "Usefull? No, interesting, yes.") print("-"*85) print("-"*85) print("\nChecking single core (no library used)\n") start = time() process(multi=False) stop = time() print (f" Elapsed time for single core: {int((stop - start)//60)}:{int(((stop-start)-(stop - start)//60*60)//1)}\n\n") print("Using multicores...") for core_num in range(1,multiprocessing.cpu_count()): print("-"*85) print(f"Using {core_num} cores...") start = time() process(multi=True, cores=core_num) stop = time() times.append(start-stop) print (f" Elapsed time using {core_num} cores: {int((stop - start)//60)}:{int(((stop-start)-(stop - start)//60*60)//1)}\n\n") plt.figure() plt.plot(times) plt.savefig("../data/logs/perf.png") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--test", help="test multicore capabilities", action="store_true") parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones") args = parser.parse_args() if args.test: print("TEST MULTICORE...") test_performances() else: if args.cores is not None: used_cores = int(args.cores) else: used_cores = multiprocessing.cpu_count()//2 process(multi=True, cores=used_cores) diff --git a/pre_proc/extract_data.py b/pre_proc/extract_data.py index 3fcd926..eef3885 100644 --- a/pre_proc/extract_data.py +++ b/pre_proc/extract_data.py @@ -1,110 +1,105 @@ import os import sys import matplotlib.pyplot as plt import numpy as np from scipy import interpolate import pandas as pd scripts = '../helper_scripts' if scripts not in sys.path: sys.path.insert(0,scripts) import multiprocessing from multiprocessing import Pool import MITAnnotation as MITA import wfdb from csvManager import csvManager import pickle ''' with open(file_name_full,"rb") as f: data = pickle.load(f) file_out = os.path.join(out_dir,file_name.split(".")[0]+".pickle") with open(file_out, 'wb') as handle: pickle.dump(data_inverted_keys, handle, protocol=pickle.HIGHEST_PROTOCOL) ''' FREE_CORES = 0 data_folder = "../data/dataRaw" dest_data = "../data/extracted_data" dest_annotation = "../data/extracted_annotation/" userChannel = "ECG1" frequency_multiplier = 4 def upsample(signal,multiplier): upsampled = [] v = signal last_new_t = (len(v)-1)*multiplier t = np.arange(0,last_new_t+1,multiplier,dtype = np.int64) new_base = np.arange(0,last_new_t+1,1,dtype = np.int64) f = interpolate.interp1d(t, v) upsampled = f(new_base) return upsampled def extract_data(args): file_name = args[0] last_annot = args[1] print("working on file: {}".format(file_name)) single_name = os.path.basename(file_name) #Record reading rec = wfdb.rdrecord(file_name, channel_names=[userChannel], physical=False) #Input signal as a plain array v_orig = (rec.d_signal.reshape((1, -1))[0]).tolist() v = upsample(v_orig,frequency_multiplier)[0:last_annot+10] t = list(range(len(v)))[0:last_annot+10] file_out = os.path.join(dest_data,single_name+".pickle") data_out = {'t':t,'v':v} with open(file_out, 'wb') as handle: pickle.dump(data_out, handle, protocol=pickle.HIGHEST_PROTOCOL) - ''' - manager = csvManager() - manager.write(t,v,os.path.join(dest_data,single_name+".bin")) - ''' - def extract_annot(file_name): single_name = os.path.basename(file_name) file_source = file_name+".atr" file_dest = os.path.join(dest_annotation,single_name+".annot") times= [x.time*frequency_multiplier for x in MITA.read_annotations(file_source) if MITA.is_qrs_annotation(x)] df = pd.DataFrame(times) df.to_csv(file_dest,index= False) print("Extracted annotation: {}".format(file_name)) return times[-1] def process(multi=True, cores=1): # ------------ INIT ------------ if not os.path.isdir(dest_data): os.mkdir(dest_data) if not os.path.isdir(dest_annotation): os.mkdir(dest_annotation) # ------------ Extract DATA & ANNOTATIONS ------------ #find files: files = [] for x in os.listdir(data_folder): thisFile = os.path.join(data_folder,x) thisFileNoExt = os.path.splitext(thisFile)[0] if os.path.isfile(thisFile) and os.path.exists(thisFileNoExt+".hea"): files.append(thisFileNoExt) listOfFiles = list(set(files)) with Pool(cores) as pool: last_annot = pool.map(extract_annot, listOfFiles) pool.map(extract_data, zip(listOfFiles,last_annot)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones") args = parser.parse_args() if args.cores is not None: used_cores = int(args.cores) else: used_cores = multiprocessing.cpu_count()//2 print(f"Extracting data with {used_cores} cores...") process(multi=True, cores=used_cores) \ No newline at end of file diff --git a/src/build_template.py b/src/build_template.py index a6266b1..2dffe5b 100644 --- a/src/build_template.py +++ b/src/build_template.py @@ -1,343 +1,292 @@ import os import pickle import sys import warnings from copy import copy import matplotlib.pyplot as plt import numpy as np from scipy.signal import medfilt as median_filter from sklearn.cluster import AffinityPropagation, MeanShift, estimate_bandwidth scripts = '../helper_scripts' if scripts not in sys.path: sys.path.insert(0,scripts) from dtw.dtw import dtw_std data_beats_dir = "../data/beats/" FREQ = 128 MULTIPLICATOR_FREQ = 4 MIN_TO_AVG = 5 def open_file(file_name, t_start_seconds = None, t_stop_seconds = None): ''' Data structure: File: | DICTIONARY: | --> Beat_annot: | --> lvl: | --> "t": [] | --> "v": [] ''' file_name_full = os.path.join(data_beats_dir,os.path.basename(file_name)) data = {} data_out = {} with open(file_name_full,"rb") as f: data = pickle.load(f) if t_start_seconds is None: t_start_seconds = list(data.keys())[0]*FREQ t_stop_seconds = list(data.keys())[-1]*FREQ for k in data.keys(): if k >= int(t_start_seconds*FREQ) and k <= int(t_stop_seconds*FREQ): data_out[k] = data[k][0] elif k > int(t_stop_seconds*FREQ): break return data_out def dtw_dist(v1,v2): dist = dtw_std(v1, v2) return dist def compute_affinity_matr(vectors): A = np.zeros((len(vectors),len(vectors))) #print(f"Size of the affinity matrix: {A.shape}") for i in range(len(vectors)): #print(i) for j in range(i,len(vectors)): #print(j) dist = -dtw_dist(vectors[i], vectors[j])**2 A[i,j] = dist A[j,i] = dist return A def min_max_normalization(vector): mi_v = min(vector) ma_v = max(vector) norm = (np.array(vector)-mi_v)/(ma_v-mi_v) norm -= np.average(norm) return list(norm) def get_significant_peak(signal): val = -1000 peak_max = None is_peak = lambda w,x,y,z: True if (wy) or (wz) else False for i in range(1,len(signal)-2): if is_peak(signal[i-1],signal[i],signal[i+1],signal[i+2]) and signal[i]> val: peak_max = i val = signal[i] return peak_max def get_beats(data, t_start_seconds = 0, t_stop_seconds = MIN_TO_AVG*60): beats = {} annotations = list(data.keys()) for annot in annotations[1:]: if annot >= int(t_start_seconds*FREQ) and annot <= int(t_stop_seconds*FREQ): beats[annot] = data[annot] elif annot > int(t_stop_seconds*FREQ): break return beats def get_r_peaks_idx(beats): r_times_idx = [] time_to_look = int(0.15*FREQ) #150 ms for annot in beats.keys(): idx = 0 signal_r_peaks = [] for i,t in enumerate(beats[annot]['t']): if t == annot - time_to_look: idx = i if t >= annot - time_to_look and t<= annot + time_to_look: signal_r_peaks.append(beats[annot]['v'][i]) peak_idx = get_significant_peak(signal_r_peaks)+idx r_times_idx.append(peak_idx) return r_times_idx def allign_beats(data, allignment_idxs, normalize = True): len_bef = len_aft = 1000 data_alligned = {} for annot, allignment_idx in zip(data.keys(),allignment_idxs): time = data[annot]['t'] this_len_bef = len(time[:allignment_idx]) this_len_aft = len(time[allignment_idx+1:]) if this_len_bef < len_bef: len_bef = this_len_bef if this_len_aft < len_aft: len_aft = this_len_aft for annot, allignment_idx in zip(data.keys(),allignment_idxs): new_t = data[annot]['t'][allignment_idx-len_bef:allignment_idx+len_aft+1] new_v = data[annot]['v'][allignment_idx-len_bef:allignment_idx+len_aft+1] if normalize: new_v = min_max_normalization(new_v) data_alligned[annot] = {'t':new_t,'v':new_v} return data_alligned def RMSE(v1,v2): v1_n = np.array(v1) v2_n = np.array(v2) return np.sqrt(np.mean((v1_n-v2_n)**2)) def compute_template_lowest_distance(beats): dist = np.zeros((len(beats),len(beats))) for i in range(len(beats)): for j in range(i+1,len(beats)): dist[i][j] = RMSE(beats[i],beats[j]) dist[j][i] = dist[i][j] dist = np.sum(dist, axis=0) idx = np.argmin(dist) return beats[idx] def build_template(file_analyzed, normalize = True, mode = 'average', t_start_seconds = 0, t_stop_seconds = MIN_TO_AVG*60): template = [] beats_alligned = [] data = open_file(file_analyzed) beats = get_beats(data, t_start_seconds = t_start_seconds, t_stop_seconds = t_stop_seconds) allignment_idxs = get_r_peaks_idx(beats) alligned_beats = allign_beats(beats, allignment_idxs, normalize=normalize) for annot in alligned_beats.keys(): beats_alligned.append(alligned_beats[annot]['v']) if mode == 'average': template = list(np.average(beats_alligned,axis=0)) elif mode == 'distance': template = compute_template_lowest_distance(beats_alligned) template_std = list(np.std(beats_alligned,axis=0)) return template, template_std, alligned_beats def is_odd(num): is_odd_num = True if num%2 == 0: is_odd_num = False return is_odd_num def is_even(num): return not is_odd(num) def euristic_smoothing_routine(signal,freq): ms = 24e-3 K = int(ms * freq) if is_even(K): if K > 4: K -= 1 else: K += 1 filtered = median_filter(signal,K) noise = np.array(signal) - filtered p_filtered = sum(filtered**2)/len(filtered) p_noise = sum(noise**2)/len(noise) SNR = p_filtered/p_noise smooth = False - if SNR > 100: + if SNR > 50: smooth = True return smooth def signal_processing_smoothing_routine(signal,freq): ''' For future development: TODO ''' return euristic_smoothing_routine(signal,freq) def is_smooth(signal, freq, mode = 2): ''' This methods can be implemented in two major ways: 1: Complex (fft, signal procesing) 2: Using an Euristic, assuming low snr and that the Noise can be isolated with median filtering: - Kernel size (left/right) = K, with a time lenght of K/(frequency*multiplicator factor) -> K = time_lenght * frequency * multiplicator factor ''' smooth = False if mode == 2: smooth = euristic_smoothing_routine(signal,freq) if mode == 1: smooth = signal_processing_smoothing_routine(signal,freq) return smooth -def multi_template(data, normalize = True, percentage_each_cluster = 7, freq = FREQ * MULTIPLICATOR_FREQ): +def multi_template(data, percentage_each_cluster = 7, freq = FREQ * MULTIPLICATOR_FREQ): ''' Output: raw_templates: list descriptor:{$lbl: {"center": C, "point": P, "dist": D, "dist_all_pt_cluster":[]}} ''' descriptor = {} # {$lbl: {"center": C, "point": P, "dist": D, "dist_all_pt_cluster":[]}} raw_templates = [] beats_v = [] #print(f"Computing templates: num vectors: {len(data)}") for annot in data.keys(): beats_v.append(min_max_normalization(data[annot]['v'])) #print(f"Computing templates: num vectors: {len(beats_v)}") affinity_matr = compute_affinity_matr(beats_v) pref = np.percentile(affinity_matr, 5) with warnings.catch_warnings(): warnings.simplefilter("ignore") af = AffinityPropagation(damping = 0.7, preference = pref, affinity = 'precomputed').fit(affinity_matr) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ unique, counts = np.unique(labels, return_counts=True) # These two lines functionalities are equivalents except that line 2 remove the labels cluster with -1 and is nicly packed # Everything work still with the first line lbl_counts = dict(zip(unique, counts)) #lbl_counts = dict(zip([lbl_positive for lbl_positive in unique if lbl_positive != -1], [counts[pos_lbl] for pos_lbl in range(len(unique)) if unique[pos_lbl] != -1])) for i,pt in enumerate(beats_v): lbl_this_pt = labels[i] # The number samples in a class need to be bigger than x% of all samples if lbl_this_pt == -1 or lbl_counts[lbl_this_pt] < int(len(labels)*percentage_each_cluster/100) or not is_smooth(pt,freq): continue cluster_center = beats_v[cluster_centers_indices[lbl_this_pt]] dist = dtw_dist(pt, cluster_center) if lbl_this_pt not in descriptor.keys(): descriptor[lbl_this_pt] = {"center": None, "point": None, "dist": np.inf, "dist_all_pt_cluster": []} if dist < descriptor[lbl_this_pt]['dist']: descriptor[lbl_this_pt] = {"center": cluster_center, "point": pt, "dist": dist, "dist_all_pt_cluster": []} descriptor[lbl_this_pt]["dist_all_pt_cluster"].append(dist) lbls_kept = list(descriptor.keys()) descriptor_seq = {} for i,lbl in enumerate(lbls_kept): raw_templates.append(descriptor[lbl]["point"]) descriptor_seq[i] = descriptor[lbl] #make all the ids of the tamplates sequential print(f"Found labels and respective number of items (total items: {len(data)})\n{lbl_counts}\nof which, with more than {percentage_each_cluster}% associateb beats: {len(descriptor)}") return raw_templates,descriptor_seq - ''' - # - # Previous implementation - # - allignment_idxs = get_r_peaks_idx(beats) - alligned_beats = allign_beats(beats, allignment_idxs, normalize=normalize) - for annot in alligned_beats.keys(): - beats_alligned.append(alligned_beats[annot]['v']) - - # Find centroid clusters through mean shift - bandwidth = estimate_bandwidth(beats_alligned, quantile=0.06) - ms = MeanShift(bandwidth=bandwidth) - ms.fit(beats_alligned) - cluster_centers = ms.cluster_centers_ - lbls = ms.labels_ - unique, counts = np.unique(lbls, return_counts=True) - lbl_counts = dict(zip(unique, counts)) - - #Filter the found labels and find the nearest point to centroids - templates_descriptor = {} # {$lbl: {"center": C, "point": P, "dist": D, "dist_all_pt_cluster":[]}} - between_dists = [] - for i,pt in enumerate(beats_alligned): #------------------------- - lbl_this_pt = lbls[i] # The number samples in a class need to be bigger than x% of all samples #<-- UBER-IMPORTANT HERE | - if lbl_this_pt == -1 or lbl_counts[lbl_this_pt] < int(len(lbls)*percentage_each_cluster/100) or prune_template(pt): #------------------------- - continue - dist = np.linalg.norm(pt - cluster_centers[lbl_this_pt]) - if lbl_this_pt not in templates_descriptor.keys(): - #print("Added lable") - templates_descriptor[lbl_this_pt] = {"center": None, "point": None, "dist": np.inf, "dist_all_pt_cluster": []} - #print(f"label {lbl_this_pt} has now kes: {list(templates_descriptor[lbl_this_pt].keys())}") - if templates_descriptor[lbl_this_pt]['dist'] > dist: - templates_descriptor[lbl_this_pt] = {"center": cluster_centers[lbl_this_pt], "point": pt, "dist": dist, "dist_all_pt_cluster": []} - #print(f"Errors here, descriptor is: {templates_descriptor}") - templates_descriptor[lbl_this_pt]["dist_all_pt_cluster"].append(dist) - - lbls_kept = list(templates_descriptor.keys()) - for idx_lbl in range(len(lbls_kept)): - lbl = lbls_kept[idx_lbl] - raw_templates.append(templates_descriptor[lbl]["center"]) - center_orig = cluster_centers[lbl] - for idx_lbl_tgt in range(idx_lbl+1,len(lbls_kept)): - lbl_tgt = lbls_kept[idx_lbl_tgt] - center_tgt = cluster_centers[lbl_tgt] - between_dists.append(np.linalg.norm(center_orig - center_tgt)) - - descriptor["templates"] = copy(templates_descriptor) - descriptor["dist_between_clusters"] = {"avg": np.average(between_dists), "std": np.std(between_dists)} - ''' - - - if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--file", help="Force to analyze one specific file instead of default one (first found)") parser.add_argument("--not_norm", help="Force to NOT normalize each beats", action="store_true") args = parser.parse_args() files = os.listdir(data_beats_dir) if args.file is not None: analyzed = list(filter(lambda string: True if args.file in string else False, files))[0] else: analyzed = files[0] if args.not_norm: normalize = False else: normalize = True template_dist,std,alligned = build_template(analyzed, normalize = normalize, mode = 'distance') template_avg,_,_ = build_template(analyzed, normalize = normalize, mode = 'average') plt.plot(template_dist, color = 'C0') plt.plot(template_avg, color = 'C3') plt.plot(np.array(template_dist)+np.array(std), color = "C1") plt.plot(np.array(template_dist)-np.array(std), color = "C1") for beat in alligned.keys(): plt.plot(alligned[beat]['v'],color = 'C2', alpha = 0.01) plt.figure() plt.plot(template_dist, color = 'C0') plt.plot(template_avg, color = 'C3') plt.show() diff --git a/src/multiple_templates_prog.py b/src/multiple_templates_prog.py index b3ff5f9..97dd226 100644 --- a/src/multiple_templates_prog.py +++ b/src/multiple_templates_prog.py @@ -1,812 +1,820 @@ import multiprocessing import os import pickle import shutil import sys from multiprocessing import Pool from time import time import matplotlib import matplotlib.pyplot as plt from numpy.core.numeric import base_repr # configure backend here matplotlib.use('SVG') import numpy as np from dtaidistance import dtw from numpy.core.fromnumeric import argmin scripts = '../helper_scripts' if scripts not in sys.path: sys.path.insert(0,scripts) import warnings from copy import copy from dtw.dtw import dtw_std from scipy import stats from scipy.interpolate import interp1d from scipy.signal.signaltools import fftconvolve from build_template import build_template, multi_template data_beats_dir = "../data/beats/" log_dir = "../data/beat_recon_logs_multi_prog_" FREQ = 128*4 SEC_TO_AVG = 5*60 BEAT_TO_PLOT = 2 LEVEL = [3,4,5,6,7,8] NUM_BEAT_ANALYZED = 50 LEN_DISTANCE_VECTOR = 80 LEN_DISTANCE_VECTOR_REF = 500 REF_NEW = 100 TEMPLATE_TYPE = 'distance' PERC_BINS = 15 CLUSTER_PERCENTAGE = 3 def RMSE(v1,v2): v1_n = np.array(v1) v2_n = np.array(v2) return np.sqrt(np.mean((v1_n-v2_n)**2)) def RMSE_warped_resampled(data_orig,data_warped,data_resampled): rmse = {} template = {'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None} for beat in data_warped.keys(): rmse[beat] = copy(template) idx_beat = data_warped[beat]['t'].index(beat) l_resamp = data_resampled[beat]['v'][:idx_beat] r_resamp = data_resampled[beat]['v'][idx_beat+1:] l_warp = data_warped[beat]['v'][:idx_beat] r_warp = data_warped[beat]['v'][idx_beat+1:] l_orig = data_orig[beat]['v'][:idx_beat] r_orig = data_orig[beat]['v'][idx_beat+1:] rmse[beat]['warp'] = RMSE(data_orig[beat]['v'],data_warped[beat]['v']) rmse[beat]['resamp'] = RMSE(data_orig[beat]['v'],data_resampled[beat]['v']) rmse[beat]['warp_left'] = RMSE(l_orig,l_warp) rmse[beat]['resamp_left'] = RMSE(l_orig,l_resamp) rmse[beat]['warp_right'] = RMSE(r_orig,r_warp) rmse[beat]['resamp_right'] = RMSE(r_orig,r_resamp) return rmse def open_file(file_name, level, start_after = SEC_TO_AVG): ''' Data structure: File: | DICTIONARY: | --> Beat_annot: | --> "t": [] | --> "v": [] ''' file_name_full = os.path.join(data_beats_dir, str(level), os.path.basename(file_name)) data = {} with open(file_name_full,"rb") as f: data = pickle.load(f) for k in data.keys(): if k <= FREQ*start_after: data.pop(k) return data def get_beats_in_time_span(data, lvl = 0 ,t_start_seconds = 0, t_stop_seconds = SEC_TO_AVG): beats = {} for annot in data.keys(): if annot >= int(t_start_seconds*FREQ) and annot <= int(t_stop_seconds*FREQ): beats[annot] = data[annot] elif annot > int(t_stop_seconds*FREQ): break return beats def min_max_normalization(beats, params = None): params_out = {} normalized_beats = {} #print("\t",params) for beat in beats: vector = beats[beat]['v'] params_out[beat] = {} normalized_beats[beat] = {'t':beats[beat]['t'],'v':[]} if params is not None: mi_v = params[beat]['min'] ma_v = params[beat]['max'] avg = params[beat]['avg'] norm = (np.array(vector)-mi_v)/(ma_v-mi_v) norm -= avg normalized_beats[beat]['v'] = norm.tolist() else: mi_v = min(vector) ma_v = max(vector) norm = (np.array(vector)-mi_v)/(ma_v-mi_v) avg = np.average(norm) norm -= avg normalized_beats[beat]['v'] = norm.tolist() params_out[beat]['min'] = mi_v params_out[beat]['max'] = ma_v params_out[beat]['avg'] = avg return normalized_beats,params_out def resamp_one_signal(t,v,resample_type = 'linear', min_t = None, max_t = None): if resample_type == "linear": f = interp1d(t,v, bounds_error = False, fill_value = "extrapolate") elif resample_type == "flat": f = interp1d(t,v, kind = 'previous', bounds_error = False, fill_value = "extrapolate") if min_t is None: min_t = t[0] if max_t is None: max_t = t[-1] t_new = list(range(min_t,max_t+1)) v_new = f(t_new) return t_new,v_new def resample(data, resample_type = "linear", min_t = None, max_t = None): resampled_data = {"t":None,"v":None} t = data['t'] v = data['v'] t_r,v_r = resamp_one_signal(t,v,resample_type = resample_type, min_t = min_t, max_t = max_t) resampled_data['t'] = t_r resampled_data['v'] = v_r return resampled_data def change_sampling(vector, size_to_warp): resampled = [] if len(vector) < 3: #If the length of the vector is either 2 or 1 it will be mnorfed in a straight line and such line will be morfed equaly resampled = [vector[0]]*size_to_warp return resampled delta = abs(size_to_warp - len(vector)) # --> number of points to insert/delete #At least one more sample for each sample (except the last one)? --> insert k in each hole and recompute delta if delta >= len(vector)-1 and delta > 0: holes = len(vector)-1 k = delta // holes #Number of point for each hole time = range(0,len(vector)*(k+1),k+1) # [o1,i11,i12,...,i1k, o2,i21,i22,...,i2k, o3,..., o_fin] --> NOTE: last sampel is not taken # 0 k+1 2k+2 l*k +l --> Hence, we get to the exact end t_new = range(time[-1]+1) f = interp1d(time,vector, bounds_error = False, fill_value = "extrapolate") vector = list(f(t_new)) delta = abs(size_to_warp - len(vector)) if len(vector) == size_to_warp: return vector grad = np.gradient(vector) grad[-1] = max(grad) + 1 # --> we don't want to insert anything after the last sample grad_idxs = sorted(range(len(grad)), key = lambda idx: grad[idx]) idx_to_consider = grad_idxs[:delta] #print(delta, len(idx_to_consider), idx_to_consider) for i,sample in enumerate(vector): resampled.append(sample) if i in idx_to_consider: if size_to_warp < len(vector): resampled.pop() elif size_to_warp > len(vector): resampled.append((vector[i]+vector[i+1])/2) return resampled def segment_warp(segment): #print("--> ",len(segment['segment'])) #print("--> ",segment['length_to_warp']) resampled = np.array(change_sampling(segment['segment'], segment['length_to_warp'])) #print("--> ",len(resampled)) resampled += (segment['v_start'] - resampled[0]) m = (segment['v_stop'] - resampled[-1])/(segment['length_to_warp']-1) coef_to_sum = [m*x for x in range(segment['length_to_warp'])] resampled += coef_to_sum return list(resampled) def stitch_segments(segments): stitched = [] for i,segment in enumerate(segments): if i == len(segments)-1: stitched.extend(segment) else: stitched.extend(segment[0:-1]) return stitched def dtw_dist(v1,v2): with warnings.catch_warnings(): warnings.simplefilter("ignore") dist = dtw_std(v1, v2) return dist def warp(resamp_event, templates, events): #print(events) warped = {} segments = [] # --> {"segment":v, "length_to_warp":length, "v_start":v_eb_0, "v_stop":v_eb_1} segment = {"segment":None, "length_to_warp":None, "v_start":None, "v_stop":None} segment_start = None event_start = None #chosen_template = [] #ad the first and last points of the resampled and truncated beat if not already in event if resamp_event['t'][0] not in events['t']: events['t'].insert(0, resamp_event['t'][0]) events['v'].insert(0, 0) #events['v'].insert(0, data['v'][0]) if resamp_event['t'][-1] not in events['t']: events['t'].insert(-1,resamp_event['t'][-1]) events['v'].insert(-1,0) #events['v'].insert(-1,resamp_event['v'][-1]) #print(events) #Apply DTW for matching resampled event to template v_src = resamp_event['v'] #This is how its actualy done on the library when calling 'warping_path' dist = float('inf') path = [] selected_template = [] disatances_vector = [] template_id = None for id in templates: t = templates[id]['point'] #dist_this_template, paths_this_template = dtw.warping_paths(v_src, t) with warnings.catch_warnings(): warnings.simplefilter("ignore") dist_this_template, _, path_this_template = dtw_std(v_src, t, dist_only=False) disatances_vector.append(dist_this_template) if dist_this_template < dist: dist = dist_this_template path = path_this_template selected_template = t template_id = id #path = dtw.best_path(paths) path = [(v1,v2) for v1,v2 in zip(path[0],path[1])] #Remove the "left" steps from the path and segment the template based on the events point prev_idx_src = path[-1][0]+1 for idx_src, idx_temp in path: if prev_idx_src == idx_src: continue prev_idx_src = idx_src for idx, t in enumerate(events['t']): if resamp_event['t'][idx_src] == t: if segment_start == None: segment_start = idx_temp event_start = idx else: #print(f"SEGMENT {events['t'][event_start]} - {events['t'][idx]} ({abs(events['t'][event_start] - events['t'][idx])})") segment['segment'] = np.array(selected_template[segment_start:idx_temp+1], dtype=np.float64) segment['length_to_warp'] = events['t'][idx] - events['t'][event_start] + 1 segment['v_start'] = events['v'][event_start] segment['v_stop'] = events['v'][idx] w = segment_warp(segment) #print(len(w)) segments.append(w) segment_start = idx_temp event_start = idx break segment_stitched = stitch_segments(segments) #print(len(segment_stitched), len(resamp_event['t'])) warped = {'t':resamp_event['t'],'v':segment_stitched} return dist, warped, disatances_vector, template_id def compute_new_templates(data, t_start, t_stop, old_templates, templates_info): beats_for_new_template = get_beats_in_time_span(data, t_start_seconds=t_start,t_stop_seconds=t_stop) _, new_templates_descriptor = multi_template(beats_for_new_template, percentage_each_cluster = CLUSTER_PERCENTAGE*2) lbls_new_templates = list(new_templates_descriptor.keys()) old_ids = list(old_templates.keys()) next_id = max(old_ids) + 1 cluster_representatives = {} new_template_set = {} print(f"\nNew template built, number of new templates:{len(lbls_new_templates)}\n") old_templates_kept = 0 old_templates_substituted = 0 new_templates_kept = 0 new_templates_substituted = 0 # We search which of the old templates can be considered clustered with the newly founded ones if len(lbls_new_templates) > 0: for id_old_template in old_templates: t_o = old_templates[id_old_template]['point'] dists = np.zeros((len(lbls_new_templates))) for k,t_n in enumerate(lbls_new_templates): dists[k] = dtw_dist(t_o,new_templates_descriptor[t_n]['center']) min_dist_pos = np.argmin(dists) min_val = dists[min_dist_pos] this_lbls_new_templates = lbls_new_templates[min_dist_pos] distances_intra_cluster = new_templates_descriptor[this_lbls_new_templates]['dist_all_pt_cluster'] dist_threshold = np.average(distances_intra_cluster)+np.std(distances_intra_cluster) if min_val < dist_threshold: if this_lbls_new_templates not in cluster_representatives.keys(): cluster_representatives[this_lbls_new_templates] = {"ids": [], "dists": []} cluster_representatives[this_lbls_new_templates]["ids"].append(id_old_template) cluster_representatives[this_lbls_new_templates]["dists"].append(min_val) else: #This templates minimum distances to the newly founded templates are too big to be considered the same cluster new_template_set[id_old_template] = old_templates[id_old_template] old_templates_kept += 1 # Now we check if the old templates, clustered with the newly founded one, are representative of the cluster of if we should use the new templates for local_new_id in lbls_new_templates: if local_new_id in cluster_representatives.keys(): connected_ids = cluster_representatives[local_new_id]["ids"] arg_min_dist = np.argmin(cluster_representatives[local_new_id]["dists"]) old_template_id = cluster_representatives[local_new_id]["ids"][arg_min_dist] old_template_dist = cluster_representatives[local_new_id]["dists"][arg_min_dist] # The old template is nearer to the found cluster center than the beat found by the clustering alg if old_template_dist < new_templates_descriptor[local_new_id]["dist"]: new_template_set[old_template_id] = old_templates[old_template_id] connected_ids.remove(old_template_id) templates_info[old_template_id]["connected_template"].extend(connected_ids) new_templates_substituted += 1 # The beat found by the clustering alg is nearer to the found cluster center than the old template else: new_template_set[next_id] = new_templates_descriptor[local_new_id] templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": connected_ids} next_id += 1 old_templates_substituted += len(cluster_representatives[local_new_id]["ids"]) # The templates in this cluster that are not the center are now deceased for id_removed in connected_ids: templates_info[id_removed]["deceased"] = True else: new_template_set[next_id] = new_templates_descriptor[local_new_id] templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": []} next_id += 1 new_templates_kept += 1 print(f"Old templates kept untuched: {old_templates_kept}/{len(old_templates)}") print(f"Old templates kept, representative of new clusters (but already present): {new_templates_substituted}/{len(old_templates)}") print(f"Old templates removed for old clusters (with new defined params): {len(old_templates) - (old_templates_kept+old_templates_substituted+new_templates_substituted)}/{len(old_templates)}") print(f"Old templates removed for new clusters: {old_templates_substituted}/{len(old_templates)}") print(f"\nNew templates kept untuched: {new_templates_kept}/{len(lbls_new_templates)}") print(f"New templates kept, representative of old clusters (with new params): {len(lbls_new_templates) - (new_templates_kept+new_templates_substituted)}/{len(lbls_new_templates)}") print(f"New templates removed for old clusters: {new_templates_substituted}/{len(lbls_new_templates)}") print(f"\nFinal lenght of the new tempalte set: {len(new_template_set)}") else: new_template_set = old_templates print("No new template found, keeping the previously computed ones") return new_template_set # Code to re-use for bounded number of template. First, let's try the unbunded version ''' # Decide which template to keep and which to discard if len(new_templates) < max_num_template: to_keep = np.argsort(dist_vector)[:max_num_template-len(new_templates)] to_discard = np.argsort(dist_vector)[max_num_template-len(new_templates):] for idx in to_keep: new_full_templates.append(templates[idx]) new_ids.append(id_templates[idx]) print(f"\tKept template: {idx}, dist: {dist_vector[idx]}:\t","|"*int(20*dist_vector[idx]/max_accum_dist)) for idx in to_discard: templates_info[id_templates[idx]]["deceased"]=True else: print(f"\tNo old templates kept") # Add the new templates for n,t in enumerate(new_templates): new_full_templates.append(t) new_ids.append(next_id+n) templates_info[next_id+n] = {"template": t, "used": 0, "deceased": False} ''' def reconstruct_beats(data_orig, data_lvl, templates_descriptor_original, lvl_number, start_after = SEC_TO_AVG, normalize = True, resample_type = 'linear', num_beats_analyzed = None, verbose = False): reconstructed = {} resamp = {} beat_seq_number = 0 distances = [] distances_ref = [] num_distances_out = 0 skip_until = start_after templates = copy(templates_descriptor_original) dist_vector = [0]*len(templates) time_info = {'beats_low_res_num': 0, 'tot_beats_num': 0} #Init templet info templates_info = {} #{$templet_id: {used: 0, deceased: False, "connected_template": []}} for id_t in range(len((templates))): templates_info[id_t] = {"used": 0, "deceased": False, "connected_template": []} #print(list(templates.keys()),templates_info) for beat in data_orig.keys(): t_beat = beat/FREQ time_info['tot_beats_num'] += 1 if t_beat < skip_until: continue time_info['beats_low_res_num'] += 1 if beat_seq_number >= num_beats_analyzed: break beat_seq_number +=1 if (beat_seq_number %(num_beats_analyzed/20)==0 or beat_seq_number == 1) and verbose: print(f"Reconstructing beat {beat} ({beat_seq_number}/{num_beats_analyzed}: {100*beat_seq_number /num_beats_analyzed}%, LEVEL:{lvl_number})") reconstructed[beat] = {} resamp[beat] = {} data_resampled = resample(data_lvl[beat], resample_type = resample_type, min_t = data_orig[beat]['t'][0], max_t = data_orig[beat]['t'][-1]) #WARP DRIVEEEEEEE dist, reconstructed[beat], dist_all_template, local_template_id = warp(data_resampled,templates,data_lvl[beat]) dist_vector = [dist_vector[i]+dist_all_template[i] for i in range(len(dist_vector))] resamp[beat] = data_resampled #Update templates usage info template_id = list(templates_info.keys())[-len(templates)+local_template_id] templates_info[template_id]["used"] += 1 if len(distances_ref) < LEN_DISTANCE_VECTOR_REF: distances_ref.append(dist) if len(distances_ref) >= LEN_DISTANCE_VECTOR_REF: distances.append(dist) if len(distances) >= LEN_DISTANCE_VECTOR: with warnings.catch_warnings(): warnings.simplefilter("ignore") p = stats.anderson_ksamp([distances_ref,distances])[2] distances = [] if (p>= 0.05): num_distances_out = 0 else: # p value les than 0.05: null hypothesis (same distribution) rejected (please Kolmogorov forgive me) num_distances_out += 1 if num_distances_out >= 2: # The acquired vector of distances was out for 2 times max_accum_dist = max(dist_vector) print(f"\n################################################################") print(f"\nBeat num:{beat_seq_number}, New template needed ... ") print(f"\t p-value: {p}") for j in range(len(dist_vector)): print(f"\tTemplate {j}, dist: {dist_vector[j]}:\t","|"*int(20*dist_vector[j]/max_accum_dist)) templates = compute_new_templates(data_orig, t_beat, t_beat+120, templates, templates_info) dist_vector = [0]*len(templates) print(f"\n################################################################\n") #distances_ref = distances_ref[REF_NEW:] distances_ref = [] skip_until = t_beat + 120 num_distances_out = 0 return reconstructed,resamp,templates_info,time_info def percentile_idx(vector,perc): pcen=np.percentile(np.array(vector),perc,interpolation='nearest') i_near=abs(np.array(vector)-pcen).argmin() return i_near def correlate(v1,v2): v1_n = np.array(v1) v2_n = np.array(v2) cov = np.cov(v1_n,v2_n)[0,1] cor = cov/(np.std(v1_n)*np.std(v2_n)) return cor def avg_std_for_rmse_dict(dictionary): dict_out = {'warp':{'avg':None,'std':None},'resamp':{'avg':None,'std':None}} wr = [] rs = [] for beat in dictionary.keys(): wr.append(dictionary[beat]['warp']) rs.append(dictionary[beat]['resamp']) dict_out['warp']['avg'] = np.average(wr) dict_out['warp']['std'] = np.std(wr) dict_out['resamp']['avg'] = np.average(rs) dict_out['resamp']['std'] = np.std(rs) return dict_out def percentile_rmse_beat_rel(rmse,perc): vec = [] for beat in rmse.keys(): vec.append(rmse[beat]['warp'] - rmse[beat]['resamp']) idx = percentile_idx(vec,perc) return list(rmse.keys())[idx] def percentile_rmse_beat_abs(rmse,perc): vec = [] for beat in rmse.keys(): vec.append(rmse[beat]['warp']) idx = percentile_idx(vec,perc) return list(rmse.keys())[idx] def find_all_connceted_template(id_temp,templates_info): id_collected = [id_temp] for id_connected in id_collected: for id_to_add in templates_info[id_connected]["connected_template"]: if id_to_add not in id_collected: id_collected.append(id_to_add) return id_collected def plot_perc(orig,warp,resamp,title,out_file_name): plt.figure() plt.plot(orig[0],orig[1]) plt.plot(warp[0],warp[1]) plt.plot(resamp[0],resamp[1]) plt.legend(['original','warped template','resampled']) plt.title(title) plt.savefig(out_file_name) plt.close() def plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, perc, log_dir_this_lvl, file): #Relative beat_to_plot_rel = percentile_rmse_beat_rel(rmse, perc) file_name_to_save_fig_rel = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_relative.svg") orig_rel = orig[beat_to_plot_rel]['t'],orig[beat_to_plot_rel]['v'] recon_rel = recon[beat_to_plot_rel]['t'],recon[beat_to_plot_rel]['v'] resamp_rel = resamp[beat_to_plot_rel]['t'],resamp[beat_to_plot_rel]['v'] title_rel = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_rel}, {str(perc)} percentile' plot_perc(orig_rel,recon_rel,resamp_rel,title_rel,file_name_to_save_fig_rel) #Absolute beat_to_plot_abs = percentile_rmse_beat_abs(rmse, perc) file_name_to_save_fig_abs = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_absolute.svg") orig_abs = orig[beat_to_plot_abs]['t'],orig[beat_to_plot_abs]['v'] recon_abs = recon[beat_to_plot_abs]['t'],recon[beat_to_plot_abs]['v'] resamp_abs = resamp[beat_to_plot_abs]['t'],resamp[beat_to_plot_abs]['v'] title_abs = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_abs}, {str(perc)} percentile' plot_perc(orig_abs,recon_abs,resamp_abs,title_abs,file_name_to_save_fig_abs) def recontruct_and_compare(file): verbose = True log_dir_this_file = os.path.join(log_dir,file.split(".")[0]) os.mkdir(log_dir_this_file) #recon,orig,resamp,templates_info, time_info = reconstruct_beats(file, level = LEVEL, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear #rmse = RMSE_warped_resampled(orig,recon,resamp) time_to_plot_rmse_perc = [] time_to_build_rmse_vec_for_histogram = [] time_to_plot_histogram = [] if verbose: print("Extracting original data") data_orig = open_file(file, level = "original", start_after = 0) if verbose: print("Min-Max-Avg normaliztion of original data") data_orig,original_params = min_max_normalization(data_orig, params = None) if verbose: print("Building initial templates") beats_for_template = get_beats_in_time_span(data_orig, t_start_seconds=0,t_stop_seconds=60*5) templates_orig, templates_orig_descriptor = multi_template(beats_for_template, normalize = normalize, percentage_each_cluster = CLUSTER_PERCENTAGE) if verbose: print(f"Initial number of templates: {len(templates_orig)}") print("reconstructing") for lvl in LEVEL: if verbose: print(f"Analyzing level:{lvl}") print("Extracting...") data_lvl = open_file(file, level = lvl,start_after = 0) if verbose: print(f"Min-Max-Avg normaliztion for lvl {lvl}") data_lvl,_ = min_max_normalization(data_lvl, params = original_params) recon,resamp,templates_info, time_info = reconstruct_beats(data_orig, data_lvl, templates_orig_descriptor, lvl, start_after = SEC_TO_AVG, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear rmse = RMSE_warped_resampled(data_orig,recon,resamp) log_dir_this_lvl = os.path.join(log_dir_this_file,str(lvl)) os.mkdir(log_dir_this_lvl) stats = avg_std_for_rmse_dict(rmse) avg_rmse_warp = stats['warp']['avg'] std_rmse_warp = stats['warp']['std'] avg_rmse_resamp = stats['resamp']['avg'] std_rmse_resamp = stats['resamp']['std'] file_name_to_save = "L_"+file.split(".")[0]+".log" # Particular log for each lvl with open(os.path.join(log_dir_this_lvl,file_name_to_save),"a") as f: f.write(f"Lvl: {lvl}\n") f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n") f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n") for id_temp in templates_info: usage = templates_info[id_temp]["used"] deceased = templates_info[id_temp]["deceased"] f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \ deceased? {deceased}\n") f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n") f.write(f"\n\n") # General log (the same but all toghether: more confusing but with all infos) with open(os.path.join(log_dir_this_file,file_name_to_save),"a") as f: f.write(f"Lvl: {lvl}\n") f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n") f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n") ''' for id_temp in templates_info: usage = templates_info[id_temp]["used"] deceased = templates_info[id_temp]["deceased"] f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \ deceased? {deceased}\n") ''' f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n") f.write(f"\n\n") print(f"File:{file_name_to_save}") print(f"\tLvl: {lvl}") print(f"\t\twarp: {avg_rmse_warp}, +-{std_rmse_warp}") print(f"\t\tinterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}") print(f"\t\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%") print("\n") t_0 = time.time() # 01 percentile plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 1, log_dir_this_lvl, file) # 25 percentile plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 25, log_dir_this_lvl, file) # 50 percentile plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 50, log_dir_this_lvl, file) # 75 percentile plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 75, log_dir_this_lvl, file) # 99 percentile plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 99, log_dir_this_lvl, file) # Histograms t_1 = time.time() file_name_to_save_fig_hist = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist"+str(lvl)+".svg") file_name_to_save_fig_hist_left = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_left"+str(lvl)+".svg") file_name_to_save_fig_hist_right = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_right"+str(lvl)+".svg") rmse_warp = [] rmse_resamp = [] rmse_warp_left = [] rmse_resamp_left = [] rmse_warp_right = [] rmse_resamp_right = [] for beat in rmse.keys(): rmse_warp.append(rmse[beat]['warp']) rmse_resamp.append(rmse[beat]['resamp']) rmse_warp_left.append(rmse[beat]['warp_left']) rmse_resamp_left.append(rmse[beat]['resamp_left']) rmse_warp_right.append(rmse[beat]['warp_right']) rmse_resamp_right.append(rmse[beat]['resamp_right']) t_2 = time.time() n_bins = len(rmse_warp)*PERC_BINS//100 min_bin = min(min(rmse_warp),min(rmse_resamp)) max_bin = max(max(rmse_warp),max(rmse_resamp)) delta = (max_bin-min_bin)/n_bins bins = np.arange(min_bin,max_bin+delta,delta) plt.figure() plt.hist(rmse_warp, bins = bins, alpha=0.5) plt.hist(rmse_resamp, bins = bins, alpha=0.5) plt.title(f'File: {file}, Lvl: {lvl}, RMSE histogram') plt.legend(['RMSE warp','RMSE resampled']) plt.savefig(file_name_to_save_fig_hist) plt.close() n_bins = len(rmse_warp_left)*PERC_BINS//100 min_bin = min(min(rmse_warp_left),min(rmse_resamp_left)) max_bin = max(max(rmse_warp_left),max(rmse_resamp_left)) delta = (max_bin-min_bin)/n_bins bins = np.arange(min_bin,max_bin+delta,delta) plt.figure() plt.hist(rmse_warp_left, bins = bins, alpha=0.5) plt.hist(rmse_resamp_left, bins = bins, alpha=0.5) plt.title(f'File: {file}, Lvl: {lvl}, left RMSE histogram') plt.legend(['RMSE warp','RMSE resampled']) plt.savefig(file_name_to_save_fig_hist_left) plt.close() n_bins = len(rmse_warp_right)*PERC_BINS//100 min_bin = min(min(rmse_warp_right),min(rmse_resamp_right)) max_bin = max(max(rmse_warp_right),max(rmse_resamp_right)) delta = (max_bin-min_bin)/n_bins bins = np.arange(min_bin,max_bin+delta,delta) plt.figure() plt.hist(rmse_warp_right, bins = bins, alpha=0.5) plt.hist(rmse_resamp_right, bins = bins, alpha=0.5) plt.title(f'File: {file}, Lvl: {lvl}, right RMSE histogram') plt.legend(['RMSE warp','RMSE resampled']) plt.savefig(file_name_to_save_fig_hist_right) plt.close() t_3 = time.time() ''' template_heirarchy = {} counter = 0 blacklist = [] for id_temp in templates_info: if id_temp in blacklist: continue components = find_all_connceted_template(id_temp,templates_info) blacklist.extend(components) template_heirarchy[counter] = components counter += 1 t_4 = time.time() for component in template_heirarchy: dir_component = os.path.join(log_dir_this_lvl,f"cluster_{str(component)}") os.mkdir(dir_component) for id_template in template_heirarchy[component]: file_name_to_save_fig_template = os.path.join(dir_component,f"template_{str(id_template)}_cluster_{str(component)}.svg") plt.figure() plt.plot(templates_info[id_template]['template']) plt.legend(['template']) plt.title(f'File: {file}, template {id_template}') plt.savefig(file_name_to_save_fig_template) plt.close() t_5 = time.time() ''' time_to_plot_rmse_perc.append(t_1-t_0) time_to_build_rmse_vec_for_histogram.append(t_2-t_1) time_to_plot_histogram.append(t_3-t_2) print(f"\tLevel: {lvl}: Time to plot the beats relative to RMSE percentile: {t_1-t_0}") print(f"\tLevel: {lvl}: Time to build the rmse vectors for histogram plotting: {t_2-t_1}") print(f"\tLevel: {lvl}: Time to plot the histograms: {t_3-t_2}") print("\n----------------------------------------") print("\n--------------------------------------------\n") print(f"Time to plot the beats relative to RMSE percentile: {np.average(time_to_plot_rmse_perc)} +/- {np.std(time_to_plot_rmse_perc)} (total: {sum(time_to_plot_rmse_perc)})") print(f"Time to build the rmse vectors for histogram plotting: {np.average(time_to_build_rmse_vec_for_histogram)} +/- {np.std(time_to_build_rmse_vec_for_histogram)} (total: {sum(time_to_build_rmse_vec_for_histogram)})") print(f"Time to plot the histograms: {np.average(time_to_plot_histogram)} +/- {np.std(time_to_plot_histogram)} (total: {sum(time_to_plot_histogram)})") def process(files, multi=True, cores=1): # ------------ INIT ------------ global log_dir for i in range(1,1000): tmp_log_dir = log_dir+str(i) if not os.path.isdir(tmp_log_dir): log_dir = tmp_log_dir break os.mkdir(log_dir) + with open(os.path.join(log_dir,"specs.txt"), "w") as f: + f.write(f"Results generated by script: {sys.argv[0]}\n") + f.write(f"Time: {time.ctime(time.time())}\n\n") + f.write(f"Files: {files}\n") + f.write(f"Cores: {cores}\n") + f.write(f"Beats: {NUM_BEAT_ANALYZED}\n") + f.write(f"Cluster percentage: {CLUSTER_PERCENTAGE}\n") + f.write(f"Interpolation type: {INTERPOLATION_TYPE}\n") # ------------ Extract DATA & ANNOTATIONS ------------ if cores == 1: print("Single core") for f in files: recontruct_and_compare(f) else: with Pool(cores) as pool: pool.map(recontruct_and_compare, files) if __name__ == "__main__": import argparse import time seconds_start = time.time() local_time_start = time.ctime(seconds_start) print("\nStarted at:", local_time_start,"\n\n") #global NUM_BEAT_ANALYZED parser = argparse.ArgumentParser() parser.add_argument("--file", help="Force to analyze one specific file instead of default one (first found)") parser.add_argument("--not_norm", help="Force to NOT normalize each beats", action="store_true") parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones") parser.add_argument("--beats", help="Number of used beats, default: 5000") parser.add_argument("--cluster_opt", help="Percentage of points for a cluster to be considered") parser.add_argument("--template_type", help="Type of template, default: distance") args = parser.parse_args() files = os.listdir(os.path.join(data_beats_dir,"original")) if args.file is not None: if args.file == 'all': analyzed = files else: analyzed = list(filter(lambda string: True if args.file in string else False, files)) else: analyzed = [files[0]] if args.not_norm: normalize = False else: normalize = True if args.cores is not None: used_cores = int(args.cores) else: used_cores = multiprocessing.cpu_count()//2 if args.beats is not None: NUM_BEAT_ANALYZED = int(args.beats) else: NUM_BEAT_ANALYZED = 5000 if args.template_type is not None: TEMPLATE_TYPE = 'average' else: TEMPLATE_TYPE = 'distance' if args.cluster_opt is not None: CLUSTER_PERCENTAGE = int(args.cluster_opt) else: CLUSTER_PERCENTAGE = 3 print(f"Analyzing files: {analyzed}") print(f"Extracting data with {used_cores} cores...") process(files = analyzed, multi=True, cores=used_cores) seconds_stop = time.time() local_time_stop = time.ctime(seconds_stop) elapsed = seconds_stop - seconds_start hours = elapsed//60//60 minutes = (elapsed - hours * 60 * 60) // 60 seconds = (elapsed - hours * 60 * 60 - minutes * 60) // 1 print("\n\n\n-----------------------------------------------------------------------------------------------------------------") print(f"Finished at: {local_time_stop}, elapsed: {elapsed} seconds ({hours} hours, {minutes} minutes, {seconds} seconds)") diff --git a/src/precise_multiple_templates_prog.py b/src/precise_multiple_templates_prog.py index a3438de..109a94c 100644 --- a/src/precise_multiple_templates_prog.py +++ b/src/precise_multiple_templates_prog.py @@ -1,856 +1,861 @@ import multiprocessing import os import pickle import shutil import sys from multiprocessing import Pool from time import time import matplotlib import matplotlib.pyplot as plt from numpy.core.numeric import base_repr # configure backend here matplotlib.use('SVG') import numpy as np from dtaidistance import dtw from numpy.core.fromnumeric import argmin scripts = '../helper_scripts' if scripts not in sys.path: sys.path.insert(0,scripts) import warnings from copy import copy from dtw.dtw import dtw_std from scipy import stats -from scipy.interpolate import interp1d +from scipy.interpolate import interp1d, CubicSpline from scipy.signal.signaltools import fftconvolve from build_template import build_template, multi_template data_beats_dir = "../data/beats/" log_dir = "../data/beat_recon_logs_multi_prog_" FREQ = 128*4 -MULTIPLIER_FREQ = 1 #Final freq: 512*5 256 KHz +MULTIPLIER_FREQ = 5 #Final freq: 512*5 256 KHz SEC_TO_AVG = 5*60 BEAT_TO_PLOT = 2 LEVEL = [3,4,5,6,7,8] -NUM_BEAT_ANALYZED = 50 LEN_DISTANCE_VECTOR = 80 LEN_DISTANCE_VECTOR_REF = 500 REF_NEW = 100 -TEMPLATE_TYPE = 'distance' PERC_BINS = 15 CLUSTER_PERCENTAGE = 3 -#TODO: One possible solution to local up-sampling could be to reset the time vector to 0 for every heat-beat (making every heartbeat independent) -# AND adding inside the structure that describe the beats, the local position of the QRS complex +NUM_BEAT_ANALYZED = 50 +INTERPOLATION_TYPE = 'flat' def RMSE(v1,v2): v1_n = np.array(v1) v2_n = np.array(v2) return np.sqrt(np.mean((v1_n-v2_n)**2)) def RMSE_one_beat(orig,resamp,warped): rmse = {'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None} idx_qrs = orig['QRS_pos'] l_resamp = resamp['v'][:idx_qrs] r_resamp = resamp['v'][idx_qrs+1:] l_warp = warped['v'][:idx_qrs] r_warp = warped['v'][idx_qrs+1:] l_orig = orig['v'][:idx_qrs] r_orig = orig['v'][idx_qrs+1:] rmse['warp'] = RMSE(orig['v'],warped['v']) rmse['resamp'] = RMSE(orig['v'],resamp['v']) rmse['warp_left'] = RMSE(l_orig,l_warp) rmse['resamp_left'] = RMSE(l_orig,l_resamp) rmse['warp_right'] = RMSE(r_orig,r_warp) rmse['resamp_right'] = RMSE(r_orig,r_resamp) return rmse def RMSE_warped_resampled(data_orig,data_warped,data_resampled): rmse = {} for beat in data_warped.keys(): rmse[beat] = RMSE_one_beat(data_orig[beat],data_resampled[beat],data_warped[beat]) return rmse def open_file(file_name, level, start_after = SEC_TO_AVG): ''' Data structure: File: | DICTIONARY: | --> Beat_annot: | --> "t": [] | --> "v": [] ''' file_name_full = os.path.join(data_beats_dir, str(level), os.path.basename(file_name)) data = {} with open(file_name_full,"rb") as f: data = pickle.load(f) for k in data.keys(): if k <= FREQ*start_after: data.pop(k) return data def upsample_uniform_beat(beat,t_QRS,multiplier): out_beat = {} upsampled = [] v = beat['v'] new_QRS_pos = beat['t'].index(t_QRS)*multiplier last_new_t = (len(v)-1)*multiplier t = np.arange(0,last_new_t+1,multiplier,dtype = np.int64) new_base = np.arange(0,last_new_t+1,1,dtype = np.int64) f = interp1d(t, v) upsampled = f(new_base) - out_beat = {'t':t,'v':upsampled,'QRS_pos':new_QRS_pos} + out_beat = {'t':new_base,'v':upsampled,'QRS_pos':new_QRS_pos} return out_beat def upsample_uniform_beats_in_beats_dict(beats,multiplier): up_samp_beats = {} i = 0 for beat in beats: i += 1 up_samp_beats[beat] = upsample_uniform_beat(beats[beat], beat, multiplier) return up_samp_beats -def ADC(beat, nBits, hist = 5, original_bits = 12): +def ADC(beat, nBits, hist = 5, original_bits = 11): + #ADC stats delta = 2**original_bits dV = (delta)/(2**nBits) hist = hist/100*dV min_val = -delta//2 - lowTh = min_val - highTh = lowTh + dV events = {'t':[],'v':[], 'QRS_pos': beat['QRS_pos']} - index = [] - values_sampled = [] + #init value, first sample (we assume we always sample the nearest level at start) and ADC status + v_0 = beat['v'][0] + lowTh = min_val+((v_0-min_val)//dV)*dV + highTh = lowTh + dV + events['t'].append(beat['t'][0]) + events['v'].append(int(lowTh if v_0-lowTh < highTh - v_0 else highTh)) + for val,time in zip(beat['v'],beat['t']): + #print(f"Value: {val}, time: {time}, low_th = {lowTh - hist}, high_th = { highTh + hist}") if val > highTh + hist or val < lowTh - hist: - index.append(time) direction = 1 if val > highTh else -1 lowTh = min_val+((val-min_val)//dV)*dV #Delta from the bottom: (val-min_val)//dV*dV then compute the actual level summin min_val highTh = lowTh + dV events['t'].append(time) events['v'].append(int(lowTh if direction == 1 else highTh)) return events def get_beats_in_time_span(data, lvl = 0 ,t_start_seconds = 0, t_stop_seconds = SEC_TO_AVG): beats = {} for annot in data.keys(): if annot >= int(t_start_seconds*FREQ) and annot <= int(t_stop_seconds*FREQ): beats[annot] = data[annot] elif annot > int(t_stop_seconds*FREQ): break return beats def min_max_normalization_one_beat(beat,param = None): params_out = {} - normalized_beat = {'t':beat['t'],'v':[], 'QRS_pos': beat['QRS_pos']} + if 'QRS_pos' in list(beat.keys()): + normalized_beat = {'t':beat['t'],'v':[], 'QRS_pos': beat['QRS_pos']} + else: + normalized_beat = {'t':beat['t'],'v':[]} vector = beat['v'] if param is not None: mi_v = param['min'] ma_v = param['max'] avg = param['avg'] norm = (np.array(vector)-mi_v)/(ma_v-mi_v) norm -= avg else: mi_v = min(vector) ma_v = max(vector) norm = (np.array(vector)-mi_v)/(ma_v-mi_v) avg = np.average(norm) norm -= avg normalized_beat['v'] = norm.tolist() params_out['min'] = mi_v params_out['max'] = ma_v params_out['avg'] = avg return normalized_beat,params_out def min_max_normalization_beats_chunk(beats, params = None): params_out = {} normalized_beats = {} #print("\t",params) for beat in beats: normalized_beats[beat],params_out[beat] = min_max_normalization_one_beat(beats[beat],params) return normalized_beats,params_out -def resamp_one_signal(t,v,resample_type = 'linear', min_t = None, max_t = None): - if resample_type == "linear": - f = interp1d(t,v, bounds_error = False, fill_value = "extrapolate") - elif resample_type == "flat": - f = interp1d(t,v, kind = 'previous', bounds_error = False, fill_value = "extrapolate") +def resamp_one_signal(t,v,resample_type = INTERPOLATION_TYPE, min_t = None, max_t = None): if min_t is None: min_t = t[0] if max_t is None: max_t = t[-1] + + t_extended = copy(t) + v_extended = copy(v) + if max_t not in t: + t_extended.insert(len(t_extended), max_t) + v_extended.insert(len(v_extended), 0) + if min_t not in t: # This is not needed in this implementation as the first sample is always an events for each beat + t_extended.insert(0, min_t) # Still, we write it for clearity and consistency + v_extended.insert(0, 0) + + if resample_type == "linear": + f = interp1d(t_extended,v_extended, bounds_error = False, fill_value = (v[0],v[-1])) + elif resample_type == "flat": + f = interp1d(t_extended,v_extended, kind = 'previous', bounds_error = False, fill_value = (v[0],v[-1])) + elif resample_type == "spline": + f = CubicSpline(t_extended,v_extended, bc_type="natural") t_new = list(range(min_t,max_t+1)) v_new = f(t_new) return t_new,v_new -def resample(data, resample_type = "linear", min_t = None, max_t = None): +def resample(data, resample_type = INTERPOLATION_TYPE, min_t = None, max_t = None): resampled_data = {"t":None,"v":None} t = data['t'] v = data['v'] t_r,v_r = resamp_one_signal(t,v,resample_type = resample_type, min_t = min_t, max_t = max_t) resampled_data['t'] = t_r resampled_data['v'] = v_r return resampled_data def change_sampling(vector, size_to_warp): resampled = [] if len(vector) < 3: #If the length of the vector is either 2 or 1 it will be mnorfed in a straight line and such line will be morfed equaly resampled = [vector[0]]*size_to_warp return resampled delta = abs(size_to_warp - len(vector)) # --> number of points to insert/delete #At least one more sample for each sample (except the last one)? --> insert k in each hole and recompute delta if delta >= len(vector)-1 and delta > 0: holes = len(vector)-1 k = delta // holes #Number of point for each hole time = range(0,len(vector)*(k+1),k+1) # [o1,i11,i12,...,i1k, o2,i21,i22,...,i2k, o3,..., o_fin] --> NOTE: last sampel is not taken # 0 k+1 2k+2 l*k +l --> Hence, we get to the exact end t_new = range(time[-1]+1) f = interp1d(time,vector, bounds_error = False, fill_value = "extrapolate") vector = list(f(t_new)) delta = abs(size_to_warp - len(vector)) if len(vector) == size_to_warp: return vector grad = np.gradient(vector) grad[-1] = max(grad) + 1 # --> we don't want to insert anything after the last sample grad_idxs = sorted(range(len(grad)), key = lambda idx: grad[idx]) idx_to_consider = grad_idxs[:delta] #print(delta, len(idx_to_consider), idx_to_consider) for i,sample in enumerate(vector): resampled.append(sample) if i in idx_to_consider: if size_to_warp < len(vector): resampled.pop() elif size_to_warp > len(vector): resampled.append((vector[i]+vector[i+1])/2) return resampled def segment_warp(segment): #print("--> ",len(segment['segment'])) #print("--> ",segment['length_to_warp']) resampled = np.array(change_sampling(segment['segment'], segment['length_to_warp'])) #print("--> ",len(resampled)) resampled += (segment['v_start'] - resampled[0]) m = (segment['v_stop'] - resampled[-1])/(segment['length_to_warp']-1) coef_to_sum = [m*x for x in range(segment['length_to_warp'])] resampled += coef_to_sum return list(resampled) def stitch_segments(segments): stitched = [] for i,segment in enumerate(segments): if i == len(segments)-1: stitched.extend(segment) else: stitched.extend(segment[0:-1]) return stitched def dtw_dist(v1,v2): with warnings.catch_warnings(): warnings.simplefilter("ignore") dist = dtw_std(v1, v2) return dist def warp(resamp_event, templates, events): #print(events) warped = {} segments = [] # --> {"segment":v, "length_to_warp":length, "v_start":v_eb_0, "v_stop":v_eb_1} segment = {"segment":None, "length_to_warp":None, "v_start":None, "v_stop":None} segment_start = None event_start = None #chosen_template = [] #ad the first and last points of the resampled and truncated beat if not already in event if resamp_event['t'][0] not in events['t']: events['t'].insert(0, resamp_event['t'][0]) events['v'].insert(0, 0) #events['v'].insert(0, data['v'][0]) else: events['v'][0] = 0 if resamp_event['t'][-1] not in events['t']: - events['t'].insert(-1,resamp_event['t'][-1]) - events['v'].insert(-1,0) + events['t'].insert(len(events['t']),resamp_event['t'][-1]) + events['v'].insert(len(events['v']),0) else: events['v'][-1] = 0 #events['v'].insert(-1,resamp_event['v'][-1]) #print(events) #Apply DTW for matching resampled event to template v_src = resamp_event['v'] #This is how its actualy done on the library when calling 'warping_path' dist = float('inf') path = [] selected_template = [] disatances_vector = [] template_id = None for id in templates: t = templates[id]['point'] #dist_this_template, paths_this_template = dtw.warping_paths(v_src, t) with warnings.catch_warnings(): warnings.simplefilter("ignore") dist_this_template, _, path_this_template = dtw_std(v_src, t, dist_only=False) disatances_vector.append(dist_this_template) if dist_this_template < dist: dist = dist_this_template path = path_this_template selected_template = t template_id = id #path = dtw.best_path(paths) path = [(v1,v2) for v1,v2 in zip(path[0],path[1])] #Remove the "left" steps from the path and segment the template based on the events point prev_idx_src = path[-1][0]+1 for idx_src, idx_temp in path: if prev_idx_src == idx_src: continue prev_idx_src = idx_src for idx, t in enumerate(events['t']): if resamp_event['t'][idx_src] == t: if segment_start == None: segment_start = idx_temp event_start = idx else: #print(f"SEGMENT {events['t'][event_start]} - {events['t'][idx]} ({abs(events['t'][event_start] - events['t'][idx])})") segment['segment'] = np.array(selected_template[segment_start:idx_temp+1], dtype=np.float64) segment['length_to_warp'] = events['t'][idx] - events['t'][event_start] + 1 #TODO: Change this from timing information to length information segment['v_start'] = events['v'][event_start] segment['v_stop'] = events['v'][idx] w = segment_warp(segment) #print(len(w)) segments.append(w) segment_start = idx_temp event_start = idx break segment_stitched = stitch_segments(segments) #print(len(segment_stitched), len(resamp_event['t'])) warped = {'t':resamp_event['t'],'v':segment_stitched} return dist, warped, disatances_vector, template_id def compute_new_templates(data, t_start, t_stop, old_templates, templates_info): beats_for_new_template = get_beats_in_time_span(data, t_start_seconds=t_start,t_stop_seconds=t_stop) - beats_for_new_template = upsample_uniform_beats_in_beats_dict(beats_for_new_template,MULTIPLIER_FREQ) + #beats_for_new_template = upsample_uniform_beats_in_beats_dict(beats_for_new_template,MULTIPLIER_FREQ) beats_for_new_template,_ = min_max_normalization_beats_chunk(beats_for_new_template) - _, new_templates_descriptor = multi_template(beats_for_new_template, percentage_each_cluster = CLUSTER_PERCENTAGE*2, freq= FREQ*MULTIPLIER_FREQ) + _, new_templates_descriptor = multi_template(beats_for_new_template, percentage_each_cluster = CLUSTER_PERCENTAGE*2, freq= FREQ)#*MULTIPLIER_FREQ) lbls_new_templates = list(new_templates_descriptor.keys()) old_ids = list(old_templates.keys()) if old_ids == []: next_id = 0 else: next_id = max(old_ids) + 1 cluster_representatives = {} new_template_set = {} print(f"\nNew template built, number of new templates:{len(lbls_new_templates)}\n") old_templates_kept = 0 old_templates_substituted = 0 new_templates_kept = 0 new_templates_substituted = 0 # We search which of the old templates can be considered clustered with the newly founded ones if len(lbls_new_templates) > 0: for id_old_template in old_templates: t_o = old_templates[id_old_template]['point'] dists = np.zeros((len(lbls_new_templates))) for k,t_n in enumerate(lbls_new_templates): dists[k] = dtw_dist(t_o,new_templates_descriptor[t_n]['center']) min_dist_pos = np.argmin(dists) min_val = dists[min_dist_pos] this_lbls_new_templates = lbls_new_templates[min_dist_pos] distances_intra_cluster = new_templates_descriptor[this_lbls_new_templates]['dist_all_pt_cluster'] dist_threshold = np.average(distances_intra_cluster)+np.std(distances_intra_cluster) if min_val < dist_threshold: if this_lbls_new_templates not in cluster_representatives.keys(): cluster_representatives[this_lbls_new_templates] = {"ids": [], "dists": []} cluster_representatives[this_lbls_new_templates]["ids"].append(id_old_template) cluster_representatives[this_lbls_new_templates]["dists"].append(min_val) else: #This templates minimum distances to the newly founded templates are too big to be considered the same cluster new_template_set[id_old_template] = old_templates[id_old_template] old_templates_kept += 1 # Now we check if the old templates, clustered with the newly founded one, are representative of the cluster of if we should use the new templates for local_new_id in lbls_new_templates: if local_new_id in cluster_representatives.keys(): connected_ids = cluster_representatives[local_new_id]["ids"] arg_min_dist = np.argmin(cluster_representatives[local_new_id]["dists"]) old_template_id = cluster_representatives[local_new_id]["ids"][arg_min_dist] old_template_dist = cluster_representatives[local_new_id]["dists"][arg_min_dist] # The old template is nearer to the found cluster center than the beat found by the clustering alg if old_template_dist < new_templates_descriptor[local_new_id]["dist"]: new_template_set[old_template_id] = old_templates[old_template_id] connected_ids.remove(old_template_id) templates_info[old_template_id]["connected_template"].extend(connected_ids) new_templates_substituted += 1 # The beat found by the clustering alg is nearer to the found cluster center than the old template else: new_template_set[next_id] = new_templates_descriptor[local_new_id] templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": connected_ids} next_id += 1 old_templates_substituted += len(cluster_representatives[local_new_id]["ids"]) # The templates in this cluster that are not the center are now deceased for id_removed in connected_ids: templates_info[id_removed]["deceased"] = True else: new_template_set[next_id] = new_templates_descriptor[local_new_id] templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": []} next_id += 1 new_templates_kept += 1 print(f"Old templates kept untuched: {old_templates_kept}/{len(old_templates)}") print(f"Old templates kept, representative of new clusters (but already present): {new_templates_substituted}/{len(old_templates)}") print(f"Old templates removed for old clusters (with new defined params): {len(old_templates) - (old_templates_kept+old_templates_substituted+new_templates_substituted)}/{len(old_templates)}") print(f"Old templates removed for new clusters: {old_templates_substituted}/{len(old_templates)}") print(f"\nNew templates kept untuched: {new_templates_kept}/{len(lbls_new_templates)}") print(f"New templates kept, representative of old clusters (with new params): {len(lbls_new_templates) - (new_templates_kept+new_templates_substituted)}/{len(lbls_new_templates)}") print(f"New templates removed for old clusters: {new_templates_substituted}/{len(lbls_new_templates)}") print(f"\nFinal lenght of the new tempalte set: {len(new_template_set)}") else: new_template_set = old_templates print("No new template found, keeping the previously computed ones") return new_template_set - # Code to re-use for bounded number of template. First, let's try the unbunded version - ''' - # Decide which template to keep and which to discard - if len(new_templates) < max_num_template: - to_keep = np.argsort(dist_vector)[:max_num_template-len(new_templates)] - to_discard = np.argsort(dist_vector)[max_num_template-len(new_templates):] - for idx in to_keep: - new_full_templates.append(templates[idx]) - new_ids.append(id_templates[idx]) - print(f"\tKept template: {idx}, dist: {dist_vector[idx]}:\t","|"*int(20*dist_vector[idx]/max_accum_dist)) - for idx in to_discard: - templates_info[id_templates[idx]]["deceased"]=True - else: - print(f"\tNo old templates kept") - # Add the new templates - for n,t in enumerate(new_templates): - new_full_templates.append(t) - new_ids.append(next_id+n) - templates_info[next_id+n] = {"template": t, "used": 0, "deceased": False} - ''' - -def reconstruct_beats(data_orig, lvl_number, start_after = SEC_TO_AVG, normalize = True, resample_type = 'linear', num_beats_analyzed = None, verbose = False): +def reconstruct_beats(data_orig, lvl_number, init_templates = None, start_after = SEC_TO_AVG, resample_type = INTERPOLATION_TYPE, num_beats_analyzed = None, verbose = False): beat_seq_number = 0 distances = [] distances_ref = [] num_distances_out = 0 skip_until = start_after time_info = {'beats_low_res_num': 0, 'tot_beats_num': 0} # COMPUTE INITIAL TEMPLATES #Init templet info templates_info = {} #{$templet_id: {used: 0, deceased: False, "connected_template": []}} all_templates_collection = {} - templates = compute_new_templates(data_orig, 0, start_after, {}, templates_info) + if init_templates is None: + templates = compute_new_templates(data_orig, 0, start_after, {}, templates_info) + else: + templates = init_templates + for id_new in templates: + templates_info[id_new] = {"used": 0, "deceased": False, "connected_template": []} all_templates_collection = copy (templates) + init_templates = copy(templates) rmse_id_coupling = {"rmse_resamp":[],"rmse_right_resamp":[],"rmse_left_resamp":[], "rmse_warp": [],"rmse_warp_right": [],"rmse_warp_left": [], "ids":[], "beats":[]} dist_vector = [0]*len(templates) for beat in data_orig.keys(): t_beat = beat/FREQ time_info['tot_beats_num'] += 1 if t_beat < skip_until: continue time_info['beats_low_res_num'] += 1 if beat_seq_number >= num_beats_analyzed: break beat_seq_number +=1 if (beat_seq_number %(num_beats_analyzed/20)==0 or beat_seq_number == 1) and verbose: print(f"Reconstructing beat {beat} ({beat_seq_number}/{num_beats_analyzed}: {100*beat_seq_number /num_beats_analyzed}%, LEVEL:{lvl_number})") up_samp_beat = upsample_uniform_beat(data_orig[beat], beat, MULTIPLIER_FREQ) #TODO: keep track of QRS events = ADC(up_samp_beat,lvl_number) up_samp_beat,params = min_max_normalization_one_beat(up_samp_beat) events,_ = min_max_normalization_one_beat(events,params) resampled = resample(events, resample_type = resample_type, min_t = up_samp_beat['t'][0], max_t = up_samp_beat['t'][-1]) #WARP DRIVEEEEEEE dist, reconstructed, dist_all_template, local_template_id = warp(resampled,templates,events) dist_vector = [dist_vector[i]+dist_all_template[i] for i in range(len(dist_vector))] #Update templates usage info template_id = list(templates_info.keys())[-len(templates)+local_template_id] templates_info[template_id]["used"] += 1 #Compute RMSE #{'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None} rmse_this_beat = RMSE_one_beat(up_samp_beat,resampled,reconstructed) rmse_id_coupling['ids'].append(local_template_id) rmse_id_coupling['beats'].append(beat) rmse_id_coupling['rmse_resamp'].append(rmse_this_beat['resamp']) rmse_id_coupling['rmse_right_resamp'].append(rmse_this_beat['resamp_right']) rmse_id_coupling['rmse_left_resamp'].append(rmse_this_beat['resamp_left']) rmse_id_coupling['rmse_warp'].append(rmse_this_beat['warp']) rmse_id_coupling['rmse_warp_right'].append(rmse_this_beat['warp_right']) rmse_id_coupling['rmse_warp_left'].append(rmse_this_beat['warp_left']) if len(distances_ref) < LEN_DISTANCE_VECTOR_REF: distances_ref.append(dist) if len(distances_ref) >= LEN_DISTANCE_VECTOR_REF: distances.append(dist) if len(distances) >= LEN_DISTANCE_VECTOR: with warnings.catch_warnings(): warnings.simplefilter("ignore") p = stats.anderson_ksamp([distances_ref,distances])[2] distances = [] if (p>= 0.05): num_distances_out = 0 else: # p value les than 0.05: null hypothesis (same distribution) rejected (please Kolmogorov forgive me) num_distances_out += 1 if num_distances_out >= 2: # The acquired vector of distances was out for 2 times max_accum_dist = max(dist_vector) print(f"\n################################################################") print(f"\nBeat num:{beat_seq_number}, New template needed ... ") print(f"\t p-value: {p}") for j in range(len(dist_vector)): print(f"\tTemplate {j}, dist: {dist_vector[j]}:\t","|"*int(20*dist_vector[j]/max_accum_dist)) templates = compute_new_templates(data_orig, t_beat, t_beat+120, templates, templates_info) for t in templates: all_templates_collection[t] = templates[t] dist_vector = [0]*len(templates) print(f"\n################################################################\n") #distances_ref = distances_ref[REF_NEW:] distances_ref = [] skip_until = t_beat + 120 num_distances_out = 0 - return all_templates_collection,rmse_id_coupling,time_info + return all_templates_collection,rmse_id_coupling,init_templates,time_info def percentile_idx(vector,perc): pcen=np.percentile(np.array(vector),perc,interpolation='nearest') i_near=abs(np.array(vector)-pcen).argmin() return i_near def correlate(v1,v2): v1_n = np.array(v1) v2_n = np.array(v2) cov = np.cov(v1_n,v2_n)[0,1] cor = cov/(np.std(v1_n)*np.std(v2_n)) return cor def avg_std_rmse(rmse_descriptor): dict_out = {'warp':{'avg':None,'std':None},'resamp':{'avg':None,'std':None}} wr = [] rs = [] dict_out['warp']['avg'] = np.average(rmse_descriptor['rmse_warp']) dict_out['warp']['std'] = np.std(rmse_descriptor['rmse_warp']) dict_out['resamp']['avg'] = np.average(rmse_descriptor['rmse_resamp']) dict_out['resamp']['std'] = np.std(rmse_descriptor['rmse_resamp']) return dict_out def percentile_rmse_beat_rel(rmse,perc): vec = [] for beat in rmse.keys(): vec.append(np.array(rmse['rmse_warp']) - np.array(rmse['rmse_resamp'])) idx = percentile_idx(vec,perc) return rmse['beats'][idx],rmse['ids'][idx] def percentile_rmse_beat_abs(rmse,perc): vec = [] for beat in rmse.keys(): vec.append(rmse['rmse_warp']) idx = percentile_idx(vec,perc) return rmse['beats'][idx],rmse['ids'][idx] def find_all_connceted_template(id_temp,templates_info): id_collected = [id_temp] for id_connected in id_collected: for id_to_add in templates_info[id_connected]["connected_template"]: if id_to_add not in id_collected: id_collected.append(id_to_add) return id_collected def plot_perc(orig,warp,resamp,title,out_file_name): plt.figure() plt.plot(orig[0],orig[1]) plt.plot(warp[0],warp[1]) plt.plot(resamp[0],resamp[1]) plt.legend(['original','warped template','resampled']) plt.title(title) plt.savefig(out_file_name) plt.close() def plot_beat_rmse_percentile(orig, rmse_id_coupling, templates_collection, lvl, perc, log_dir_this_lvl, file): # rmse_id_coupling = {"rmse_resamp":[],"rmse_right_resamp":[],"rmse_left_resamp":[], "rmse_warp": [],"rmse_warp_right": [],"rmse_warp_left": [], "ids":[], "beats": []} #Relative template_sel = {"template":None} beat_id, template_id = percentile_rmse_beat_rel(rmse_id_coupling, perc) file_name_to_save_fig_rel = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_relative.svg") template_sel['template'] = templates_collection[template_id] up_samp_beat = upsample_uniform_beat(orig[beat_id], beat_id, MULTIPLIER_FREQ) #TODO: keep track of QRS events = ADC(up_samp_beat,lvl) up_samp_beat,params = min_max_normalization_one_beat(up_samp_beat) events,_ = min_max_normalization_one_beat(events,params) - resampled = resample(events, resample_type = "flat", min_t = up_samp_beat['t'][0], max_t = up_samp_beat['t'][-1]) + resampled = resample(events, resample_type = INTERPOLATION_TYPE, min_t = up_samp_beat['t'][0], max_t = up_samp_beat['t'][-1]) _, reconstructed, _, _ = warp(resampled,template_sel,events) orig_rel = up_samp_beat['t'],up_samp_beat['v'] recon_rel = reconstructed['t'],reconstructed['v'] resamp_rel = resampled['t'],resampled['v'] - title_rel = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_id}, {str(perc)} percentile' + title_rel = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_id}, {str(perc)} percentile, Relative' plot_perc(orig_rel,recon_rel,resamp_rel,title_rel,file_name_to_save_fig_rel) #Absolute beat_id, template_id = percentile_rmse_beat_abs(rmse_id_coupling, perc) file_name_to_save_fig_abs = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_absolute.svg") template_sel['template'] = templates_collection[template_id] up_samp_beat = upsample_uniform_beat(orig[beat_id], beat_id, MULTIPLIER_FREQ) #TODO: keep track of QRS events = ADC(up_samp_beat,lvl) up_samp_beat,params = min_max_normalization_one_beat(up_samp_beat) events,_ = min_max_normalization_one_beat(events,params) - resampled = resample(events, resample_type = "flat", min_t = up_samp_beat['t'][0], max_t = up_samp_beat['t'][-1]) + resampled = resample(events, resample_type = INTERPOLATION_TYPE, min_t = up_samp_beat['t'][0], max_t = up_samp_beat['t'][-1]) _, reconstructed, _, _ = warp(resampled,template_sel,events) orig_abs = up_samp_beat['t'],up_samp_beat['v'] recon_abs = reconstructed['t'],reconstructed['v'] resamp_abs = resampled['t'],resampled['v'] - title_abs = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_id}, {str(perc)} percentile' + title_abs = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_id}, {str(perc)} percentile, Absolute' plot_perc(orig_abs,recon_abs,resamp_abs,title_abs,file_name_to_save_fig_abs) def recontruct_and_compare(file): verbose = True log_dir_this_file = os.path.join(log_dir,file.split(".")[0]) os.mkdir(log_dir_this_file) - #recon,orig,resamp,templates_info, time_info = reconstruct_beats(file, level = LEVEL, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear - #rmse = RMSE_warped_resampled(orig,recon,resamp) time_to_plot_rmse_perc = [] time_to_build_rmse_vec_for_histogram = [] time_to_plot_histogram = [] + init_templates = None if verbose: print("Extracting original data") data_orig = open_file(file, level = "original", start_after = 0) for lvl in LEVEL: if verbose: print(f"Analyzing level:{lvl}") - all_templates_collection, rmse_id_coupling, time_info = reconstruct_beats(data_orig, lvl, start_after = SEC_TO_AVG, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear + all_templates_collection, rmse_id_coupling, init_templates, time_info = reconstruct_beats(data_orig, lvl, init_templates = init_templates, start_after = SEC_TO_AVG, resample_type = INTERPOLATION_TYPE, num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear log_dir_this_lvl = os.path.join(log_dir_this_file,str(lvl)) os.mkdir(log_dir_this_lvl) stats = avg_std_rmse(rmse_id_coupling) avg_rmse_warp = stats['warp']['avg'] std_rmse_warp = stats['warp']['std'] avg_rmse_resamp = stats['resamp']['avg'] std_rmse_resamp = stats['resamp']['std'] file_name_to_save = "L_"+file.split(".")[0]+".log" # Particular log for each lvl with open(os.path.join(log_dir_this_lvl,file_name_to_save),"a") as f: f.write(f"Lvl: {lvl}\n") f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n") f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n") f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n") f.write(f"\n\n") # General log (the same but all toghether: more confusing but with all infos) with open(os.path.join(log_dir_this_file,file_name_to_save),"a") as f: f.write(f"Lvl: {lvl}\n") f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n") f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n") ''' for id_temp in templates_info: usage = templates_info[id_temp]["used"] deceased = templates_info[id_temp]["deceased"] f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \ deceased? {deceased}\n") ''' f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n") f.write(f"\n\n") print(f"File:{file_name_to_save}") print(f"\tLvl: {lvl}") print(f"\t\twarp: {avg_rmse_warp}, +-{std_rmse_warp}") print(f"\t\tinterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}") print(f"\t\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%") print("\n") t_0 = time.time() # 01 percentile plot_beat_rmse_percentile(data_orig, rmse_id_coupling, all_templates_collection, lvl, 1, log_dir_this_lvl, file) # 25 percentile plot_beat_rmse_percentile(data_orig, rmse_id_coupling, all_templates_collection, lvl, 25, log_dir_this_lvl, file) # 50 percentile plot_beat_rmse_percentile(data_orig, rmse_id_coupling, all_templates_collection, lvl, 50, log_dir_this_lvl, file) # 75 percentile plot_beat_rmse_percentile(data_orig, rmse_id_coupling, all_templates_collection, lvl, 75, log_dir_this_lvl, file) # 99 percentile plot_beat_rmse_percentile(data_orig, rmse_id_coupling, all_templates_collection, lvl, 99, log_dir_this_lvl, file) # Histograms t_1 = time.time() file_name_to_save_fig_hist = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist"+str(lvl)+".svg") file_name_to_save_fig_hist_left = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_left"+str(lvl)+".svg") file_name_to_save_fig_hist_right = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_right"+str(lvl)+".svg") t_2 = time.time() n_bins = len(rmse_id_coupling['rmse_warp'])*PERC_BINS//100 min_bin = min(min(rmse_id_coupling['rmse_warp']),min(rmse_id_coupling['rmse_resamp'])) max_bin = max(max(rmse_id_coupling['rmse_warp']),max(rmse_id_coupling['rmse_resamp'])) delta = (max_bin-min_bin)/n_bins bins = np.arange(min_bin,max_bin+delta,delta) plt.figure() plt.hist(rmse_id_coupling['rmse_warp'], bins = bins, alpha=0.5) plt.hist(rmse_id_coupling['rmse_resamp'], bins = bins, alpha=0.5) plt.title(f'File: {file}, Lvl: {lvl}, RMSE histogram') plt.legend(['RMSE warp','RMSE resampled']) plt.savefig(file_name_to_save_fig_hist) plt.close() n_bins = len(rmse_id_coupling['rmse_warp_left'])*PERC_BINS//100 min_bin = min(min(rmse_id_coupling['rmse_warp_left']),min(rmse_id_coupling['rmse_left_resamp'])) max_bin = max(max(rmse_id_coupling['rmse_warp_left']),max(rmse_id_coupling['rmse_left_resamp'])) delta = (max_bin-min_bin)/n_bins bins = np.arange(min_bin,max_bin+delta,delta) plt.figure() plt.hist(rmse_id_coupling['rmse_warp_left'], bins = bins, alpha=0.5) plt.hist(rmse_id_coupling['rmse_left_resamp'], bins = bins, alpha=0.5) plt.title(f'File: {file}, Lvl: {lvl}, left RMSE histogram') plt.legend(['RMSE warp','RMSE resampled']) plt.savefig(file_name_to_save_fig_hist_left) plt.close() n_bins = len(rmse_id_coupling['rmse_warp_right'])*PERC_BINS//100 min_bin = min(min(rmse_id_coupling['rmse_warp_right']),min(rmse_id_coupling['rmse_right_resamp'])) max_bin = max(max(rmse_id_coupling['rmse_warp_right']),max(rmse_id_coupling['rmse_right_resamp'])) delta = (max_bin-min_bin)/n_bins bins = np.arange(min_bin,max_bin+delta,delta) plt.figure() plt.hist(rmse_id_coupling['rmse_warp_right'], bins = bins, alpha=0.5) plt.hist(rmse_id_coupling['rmse_right_resamp'], bins = bins, alpha=0.5) plt.title(f'File: {file}, Lvl: {lvl}, right RMSE histogram') plt.legend(['RMSE warp','RMSE resampled']) plt.savefig(file_name_to_save_fig_hist_right) plt.close() t_3 = time.time() time_to_plot_rmse_perc.append(t_1-t_0) time_to_build_rmse_vec_for_histogram.append(t_2-t_1) time_to_plot_histogram.append(t_3-t_2) print(f"\tLevel: {lvl}: Time to plot the beats relative to RMSE percentile: {t_1-t_0}") print(f"\tLevel: {lvl}: Time to build the rmse vectors for histogram plotting: {t_2-t_1}") print(f"\tLevel: {lvl}: Time to plot the histograms: {t_3-t_2}") print("\n----------------------------------------") print("\n--------------------------------------------\n") print(f"Time to plot the beats relative to RMSE percentile: {np.average(time_to_plot_rmse_perc)} +/- {np.std(time_to_plot_rmse_perc)} (total: {sum(time_to_plot_rmse_perc)})") print(f"Time to build the rmse vectors for histogram plotting: {np.average(time_to_build_rmse_vec_for_histogram)} +/- {np.std(time_to_build_rmse_vec_for_histogram)} (total: {sum(time_to_build_rmse_vec_for_histogram)})") print(f"Time to plot the histograms: {np.average(time_to_plot_histogram)} +/- {np.std(time_to_plot_histogram)} (total: {sum(time_to_plot_histogram)})") def process(files, multi=True, cores=1): # ------------ INIT ------------ global log_dir - for i in range(1,1000): + for i in range(1,10000): tmp_log_dir = log_dir+str(i) if not os.path.isdir(tmp_log_dir): log_dir = tmp_log_dir break os.mkdir(log_dir) + with open(os.path.join(log_dir,"specs.txt"), "w") as f: + f.write(f"Results generated by script: {sys.argv[0]}\n") + f.write(f"Time: {time.ctime(time.time())}\n\n") + f.write(f"Files: {files}\n") + f.write(f"Cores: {cores}\n") + f.write(f"Beats: {NUM_BEAT_ANALYZED}\n") + f.write(f"Cluster percentage: {CLUSTER_PERCENTAGE}\n") + f.write(f"Interpolation type: {INTERPOLATION_TYPE}\n") # ------------ Extract DATA & ANNOTATIONS ------------ if cores == 1: print("Single core") for f in files: recontruct_and_compare(f) else: with Pool(cores) as pool: pool.map(recontruct_and_compare, files) if __name__ == "__main__": import argparse import time seconds_start = time.time() local_time_start = time.ctime(seconds_start) print("\nStarted at:", local_time_start,"\n\n") #global NUM_BEAT_ANALYZED parser = argparse.ArgumentParser() parser.add_argument("--file", help="Force to analyze one specific file instead of default one (first found)") - parser.add_argument("--not_norm", help="Force to NOT normalize each beats", action="store_true") parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones") parser.add_argument("--beats", help="Number of used beats, default: 5000") parser.add_argument("--cluster_opt", help="Percentage of points for a cluster to be considered") - parser.add_argument("--template_type", help="Type of template, default: distance") + parser.add_argument("--interpolation_type", help="chose between: spline, flat, and linear, default: falt") + args = parser.parse_args() files = os.listdir(os.path.join(data_beats_dir,"original")) if args.file is not None: if args.file == 'all': analyzed = files else: analyzed = list(filter(lambda string: True if args.file in string else False, files)) else: analyzed = [files[0]] - if args.not_norm: - normalize = False - else: - normalize = True if args.cores is not None: used_cores = int(args.cores) else: used_cores = multiprocessing.cpu_count()//2 if args.beats is not None: NUM_BEAT_ANALYZED = int(args.beats) else: NUM_BEAT_ANALYZED = 5000 - if args.template_type is not None: - TEMPLATE_TYPE = 'average' - else: - TEMPLATE_TYPE = 'distance' if args.cluster_opt is not None: CLUSTER_PERCENTAGE = int(args.cluster_opt) else: CLUSTER_PERCENTAGE = 3 + if args.interpolation_type is not None: + INTERPOLATION_TYPE = args.interpolation_type + else: + INTERPOLATION_TYPE = "flat" print(f"Analyzing files: {analyzed}") print(f"Extracting data with {used_cores} cores...") process(files = analyzed, multi=True, cores=used_cores) seconds_stop = time.time() local_time_stop = time.ctime(seconds_stop) elapsed = seconds_stop - seconds_start hours = elapsed//60//60 minutes = (elapsed - hours * 60 * 60) // 60 seconds = (elapsed - hours * 60 * 60 - minutes * 60) // 1 print("\n\n\n-----------------------------------------------------------------------------------------------------------------") print(f"Finished at: {local_time_stop}, elapsed: {elapsed} seconds ({hours} hours, {minutes} minutes, {seconds} seconds)")