Page MenuHomec4science

multiple_templates_prog.py
No OneTemporary

File Metadata

Created
Mon, Jun 3, 13:31

multiple_templates_prog.py

import multiprocessing
import os
import pickle
import shutil
import sys
from multiprocessing import Pool
from time import time
import matplotlib
import matplotlib.pyplot as plt
from numpy.core.numeric import base_repr
# configure backend here
matplotlib.use('SVG')
import numpy as np
from dtaidistance import dtw
from numpy.core.fromnumeric import argmin
scripts = '../helper_scripts'
if scripts not in sys.path:
sys.path.insert(0,scripts)
import warnings
from copy import copy
from dtw.dtw import dtw_std
from scipy import stats
from scipy.interpolate import interp1d
from scipy.signal.signaltools import fftconvolve
from build_template import build_template, multi_template
data_beats_dir = "../data/beats/"
log_dir = "../data/beat_recon_logs_multi_prog_"
FREQ = 128*4
SEC_TO_AVG = 5*60
BEAT_TO_PLOT = 2
LEVEL = [3,4,5,6,7,8]
NUM_BEAT_ANALYZED = 50
LEN_DISTANCE_VECTOR = 80
LEN_DISTANCE_VECTOR_REF = 500
REF_NEW = 100
TEMPLATE_TYPE = 'distance'
PERC_BINS = 15
CLUSTER_PERCENTAGE = 3
def RMSE(v1,v2):
v1_n = np.array(v1)
v2_n = np.array(v2)
return np.sqrt(np.mean((v1_n-v2_n)**2))
def RMSE_warped_resampled(data_orig,data_warped,data_resampled):
rmse = {}
template = {'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None}
for beat in data_warped.keys():
rmse[beat] = copy(template)
idx_beat = data_warped[beat]['t'].index(beat)
l_resamp = data_resampled[beat]['v'][:idx_beat]
r_resamp = data_resampled[beat]['v'][idx_beat+1:]
l_warp = data_warped[beat]['v'][:idx_beat]
r_warp = data_warped[beat]['v'][idx_beat+1:]
l_orig = data_orig[beat]['v'][:idx_beat]
r_orig = data_orig[beat]['v'][idx_beat+1:]
rmse[beat]['warp'] = RMSE(data_orig[beat]['v'],data_warped[beat]['v'])
rmse[beat]['resamp'] = RMSE(data_orig[beat]['v'],data_resampled[beat]['v'])
rmse[beat]['warp_left'] = RMSE(l_orig,l_warp)
rmse[beat]['resamp_left'] = RMSE(l_orig,l_resamp)
rmse[beat]['warp_right'] = RMSE(r_orig,r_warp)
rmse[beat]['resamp_right'] = RMSE(r_orig,r_resamp)
return rmse
def open_file(file_name, level, start_after = SEC_TO_AVG):
'''
Data structure:
File:
|
DICTIONARY:
|
--> Beat_annot:
|
--> "t": []
|
--> "v": []
'''
file_name_full = os.path.join(data_beats_dir, str(level), os.path.basename(file_name))
data = {}
with open(file_name_full,"rb") as f:
data = pickle.load(f)
for k in data.keys():
if k <= FREQ*start_after:
data.pop(k)
return data
def get_beats_in_time_span(data, lvl = 0 ,t_start_seconds = 0, t_stop_seconds = SEC_TO_AVG):
beats = {}
for annot in data.keys():
if annot >= int(t_start_seconds*FREQ) and annot <= int(t_stop_seconds*FREQ):
beats[annot] = data[annot]
elif annot > int(t_stop_seconds*FREQ):
break
return beats
def min_max_normalization(beats, params = None):
params_out = {}
normalized_beats = {}
#print("\t",params)
for beat in beats:
vector = beats[beat]['v']
params_out[beat] = {}
normalized_beats[beat] = {'t':beats[beat]['t'],'v':[]}
if params is not None:
mi_v = params[beat]['min']
ma_v = params[beat]['max']
avg = params[beat]['avg']
norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
norm -= avg
normalized_beats[beat]['v'] = norm.tolist()
else:
mi_v = min(vector)
ma_v = max(vector)
norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
avg = np.average(norm)
norm -= avg
normalized_beats[beat]['v'] = norm.tolist()
params_out[beat]['min'] = mi_v
params_out[beat]['max'] = ma_v
params_out[beat]['avg'] = avg
return normalized_beats,params_out
def resamp_one_signal(t,v,resample_type = 'linear', min_t = None, max_t = None):
if resample_type == "linear":
f = interp1d(t,v, bounds_error = False, fill_value = "extrapolate")
elif resample_type == "flat":
f = interp1d(t,v, kind = 'previous', bounds_error = False, fill_value = "extrapolate")
if min_t is None:
min_t = t[0]
if max_t is None:
max_t = t[-1]
t_new = list(range(min_t,max_t+1))
v_new = f(t_new)
return t_new,v_new
def resample(data, resample_type = "linear", min_t = None, max_t = None):
resampled_data = {"t":None,"v":None}
t = data['t']
v = data['v']
t_r,v_r = resamp_one_signal(t,v,resample_type = resample_type, min_t = min_t, max_t = max_t)
resampled_data['t'] = t_r
resampled_data['v'] = v_r
return resampled_data
def change_sampling(vector, size_to_warp):
resampled = []
if len(vector) < 3: #If the length of the vector is either 2 or 1 it will be mnorfed in a straight line and such line will be morfed equaly
resampled = [vector[0]]*size_to_warp
return resampled
delta = abs(size_to_warp - len(vector)) # --> number of points to insert/delete
#At least one more sample for each sample (except the last one)? --> insert k in each hole and recompute delta
if delta >= len(vector)-1 and delta > 0:
holes = len(vector)-1
k = delta // holes #Number of point for each hole
time = range(0,len(vector)*(k+1),k+1) # [o1,i11,i12,...,i1k, o2,i21,i22,...,i2k, o3,..., o_fin] --> NOTE: last sampel is not taken
# 0 k+1 2k+2 l*k +l --> Hence, we get to the exact end
t_new = range(time[-1]+1)
f = interp1d(time,vector, bounds_error = False, fill_value = "extrapolate")
vector = list(f(t_new))
delta = abs(size_to_warp - len(vector))
if len(vector) == size_to_warp:
return vector
grad = np.gradient(vector)
grad[-1] = max(grad) + 1 # --> we don't want to insert anything after the last sample
grad_idxs = sorted(range(len(grad)), key = lambda idx: grad[idx])
idx_to_consider = grad_idxs[:delta]
#print(delta, len(idx_to_consider), idx_to_consider)
for i,sample in enumerate(vector):
resampled.append(sample)
if i in idx_to_consider:
if size_to_warp < len(vector):
resampled.pop()
elif size_to_warp > len(vector):
resampled.append((vector[i]+vector[i+1])/2)
return resampled
def segment_warp(segment):
#print("--> ",len(segment['segment']))
#print("--> ",segment['length_to_warp'])
resampled = np.array(change_sampling(segment['segment'], segment['length_to_warp']))
#print("--> ",len(resampled))
resampled += (segment['v_start'] - resampled[0])
m = (segment['v_stop'] - resampled[-1])/(segment['length_to_warp']-1)
coef_to_sum = [m*x for x in range(segment['length_to_warp'])]
resampled += coef_to_sum
return list(resampled)
def stitch_segments(segments):
stitched = []
for i,segment in enumerate(segments):
if i == len(segments)-1:
stitched.extend(segment)
else:
stitched.extend(segment[0:-1])
return stitched
def dtw_dist(v1,v2):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
dist = dtw_std(v1, v2)
return dist
def warp(resamp_event, templates, events):
#print(events)
warped = {}
segments = [] # --> {"segment":v, "length_to_warp":length, "v_start":v_eb_0, "v_stop":v_eb_1}
segment = {"segment":None, "length_to_warp":None, "v_start":None, "v_stop":None}
segment_start = None
event_start = None
#chosen_template = []
#ad the first and last points of the resampled and truncated beat if not already in event
if resamp_event['t'][0] not in events['t']:
events['t'].insert(0, resamp_event['t'][0])
events['v'].insert(0, 0)
#events['v'].insert(0, data['v'][0])
if resamp_event['t'][-1] not in events['t']:
events['t'].insert(-1,resamp_event['t'][-1])
events['v'].insert(-1,0)
#events['v'].insert(-1,resamp_event['v'][-1])
#print(events)
#Apply DTW for matching resampled event to template
v_src = resamp_event['v']
#This is how its actualy done on the library when calling 'warping_path'
dist = float('inf')
path = []
selected_template = []
disatances_vector = []
template_id = None
for id in templates:
t = templates[id]['point']
#dist_this_template, paths_this_template = dtw.warping_paths(v_src, t)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
dist_this_template, _, path_this_template = dtw_std(v_src, t, dist_only=False)
disatances_vector.append(dist_this_template)
if dist_this_template < dist:
dist = dist_this_template
path = path_this_template
selected_template = t
template_id = id
#path = dtw.best_path(paths)
path = [(v1,v2) for v1,v2 in zip(path[0],path[1])]
#Remove the "left" steps from the path and segment the template based on the events point
prev_idx_src = path[-1][0]+1
for idx_src, idx_temp in path:
if prev_idx_src == idx_src:
continue
prev_idx_src = idx_src
for idx, t in enumerate(events['t']):
if resamp_event['t'][idx_src] == t:
if segment_start == None:
segment_start = idx_temp
event_start = idx
else:
#print(f"SEGMENT {events['t'][event_start]} - {events['t'][idx]} ({abs(events['t'][event_start] - events['t'][idx])})")
segment['segment'] = np.array(selected_template[segment_start:idx_temp+1], dtype=np.float64)
segment['length_to_warp'] = events['t'][idx] - events['t'][event_start] + 1
segment['v_start'] = events['v'][event_start]
segment['v_stop'] = events['v'][idx]
w = segment_warp(segment)
#print(len(w))
segments.append(w)
segment_start = idx_temp
event_start = idx
break
segment_stitched = stitch_segments(segments)
#print(len(segment_stitched), len(resamp_event['t']))
warped = {'t':resamp_event['t'],'v':segment_stitched}
return dist, warped, disatances_vector, template_id
def compute_new_templates(data, t_start, t_stop, old_templates, templates_info):
beats_for_new_template = get_beats_in_time_span(data, t_start_seconds=t_start,t_stop_seconds=t_stop)
_, new_templates_descriptor = multi_template(beats_for_new_template, percentage_each_cluster = CLUSTER_PERCENTAGE*2)
lbls_new_templates = list(new_templates_descriptor.keys())
old_ids = list(old_templates.keys())
next_id = max(old_ids) + 1
cluster_representatives = {}
new_template_set = {}
print(f"\nNew template built, number of new templates:{len(lbls_new_templates)}\n")
old_templates_kept = 0
old_templates_substituted = 0
new_templates_kept = 0
new_templates_substituted = 0
# We search which of the old templates can be considered clustered with the newly founded ones
if len(lbls_new_templates) > 0:
for id_old_template in old_templates:
t_o = old_templates[id_old_template]['point']
dists = np.zeros((len(lbls_new_templates)))
for k,t_n in enumerate(lbls_new_templates):
dists[k] = dtw_dist(t_o,new_templates_descriptor[t_n]['center'])
min_dist_pos = np.argmin(dists)
min_val = dists[min_dist_pos]
this_lbls_new_templates = lbls_new_templates[min_dist_pos]
distances_intra_cluster = new_templates_descriptor[this_lbls_new_templates]['dist_all_pt_cluster']
dist_threshold = np.average(distances_intra_cluster)+np.std(distances_intra_cluster)
if min_val < dist_threshold:
if this_lbls_new_templates not in cluster_representatives.keys():
cluster_representatives[this_lbls_new_templates] = {"ids": [], "dists": []}
cluster_representatives[this_lbls_new_templates]["ids"].append(id_old_template)
cluster_representatives[this_lbls_new_templates]["dists"].append(min_val)
else: #This templates minimum distances to the newly founded templates are too big to be considered the same cluster
new_template_set[id_old_template] = old_templates[id_old_template]
old_templates_kept += 1
# Now we check if the old templates, clustered with the newly founded one, are representative of the cluster of if we should use the new templates
for local_new_id in lbls_new_templates:
if local_new_id in cluster_representatives.keys():
connected_ids = cluster_representatives[local_new_id]["ids"]
arg_min_dist = np.argmin(cluster_representatives[local_new_id]["dists"])
old_template_id = cluster_representatives[local_new_id]["ids"][arg_min_dist]
old_template_dist = cluster_representatives[local_new_id]["dists"][arg_min_dist]
# The old template is nearer to the found cluster center than the beat found by the clustering alg
if old_template_dist < new_templates_descriptor[local_new_id]["dist"]:
new_template_set[old_template_id] = old_templates[old_template_id]
connected_ids.remove(old_template_id)
templates_info[old_template_id]["connected_template"].extend(connected_ids)
new_templates_substituted += 1
# The beat found by the clustering alg is nearer to the found cluster center than the old template
else:
new_template_set[next_id] = new_templates_descriptor[local_new_id]
templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": connected_ids}
next_id += 1
old_templates_substituted += len(cluster_representatives[local_new_id]["ids"])
# The templates in this cluster that are not the center are now deceased
for id_removed in connected_ids:
templates_info[id_removed]["deceased"] = True
else:
new_template_set[next_id] = new_templates_descriptor[local_new_id]
templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": []}
next_id += 1
new_templates_kept += 1
print(f"Old templates kept untuched: {old_templates_kept}/{len(old_templates)}")
print(f"Old templates kept, representative of new clusters (but already present): {new_templates_substituted}/{len(old_templates)}")
print(f"Old templates removed for old clusters (with new defined params): {len(old_templates) - (old_templates_kept+old_templates_substituted+new_templates_substituted)}/{len(old_templates)}")
print(f"Old templates removed for new clusters: {old_templates_substituted}/{len(old_templates)}")
print(f"\nNew templates kept untuched: {new_templates_kept}/{len(lbls_new_templates)}")
print(f"New templates kept, representative of old clusters (with new params): {len(lbls_new_templates) - (new_templates_kept+new_templates_substituted)}/{len(lbls_new_templates)}")
print(f"New templates removed for old clusters: {new_templates_substituted}/{len(lbls_new_templates)}")
print(f"\nFinal lenght of the new tempalte set: {len(new_template_set)}")
else:
new_template_set = old_templates
print("No new template found, keeping the previously computed ones")
return new_template_set
# Code to re-use for bounded number of template. First, let's try the unbunded version
'''
# Decide which template to keep and which to discard
if len(new_templates) < max_num_template:
to_keep = np.argsort(dist_vector)[:max_num_template-len(new_templates)]
to_discard = np.argsort(dist_vector)[max_num_template-len(new_templates):]
for idx in to_keep:
new_full_templates.append(templates[idx])
new_ids.append(id_templates[idx])
print(f"\tKept template: {idx}, dist: {dist_vector[idx]}:\t","|"*int(20*dist_vector[idx]/max_accum_dist))
for idx in to_discard:
templates_info[id_templates[idx]]["deceased"]=True
else:
print(f"\tNo old templates kept")
# Add the new templates
for n,t in enumerate(new_templates):
new_full_templates.append(t)
new_ids.append(next_id+n)
templates_info[next_id+n] = {"template": t, "used": 0, "deceased": False}
'''
def reconstruct_beats(data_orig, data_lvl, templates_descriptor_original, lvl_number, start_after = SEC_TO_AVG, normalize = True, resample_type = 'linear', num_beats_analyzed = None, verbose = False):
reconstructed = {}
resamp = {}
beat_seq_number = 0
distances = []
distances_ref = []
num_distances_out = 0
skip_until = start_after
templates = copy(templates_descriptor_original)
dist_vector = [0]*len(templates)
time_info = {'beats_low_res_num': 0, 'tot_beats_num': 0}
#Init templet info
templates_info = {} #{$templet_id: {used: 0, deceased: False, "connected_template": []}}
for id_t in range(len((templates))):
templates_info[id_t] = {"used": 0, "deceased": False, "connected_template": []}
#print(list(templates.keys()),templates_info)
for beat in data_orig.keys():
t_beat = beat/FREQ
time_info['tot_beats_num'] += 1
if t_beat < skip_until:
continue
time_info['beats_low_res_num'] += 1
if beat_seq_number >= num_beats_analyzed:
break
beat_seq_number +=1
if (beat_seq_number %(num_beats_analyzed/20)==0 or beat_seq_number == 1) and verbose:
print(f"Reconstructing beat {beat} ({beat_seq_number}/{num_beats_analyzed}: {100*beat_seq_number /num_beats_analyzed}%, LEVEL:{lvl_number})")
reconstructed[beat] = {}
resamp[beat] = {}
data_resampled = resample(data_lvl[beat], resample_type = resample_type, min_t = data_orig[beat]['t'][0], max_t = data_orig[beat]['t'][-1])
#WARP DRIVEEEEEEE
dist, reconstructed[beat], dist_all_template, local_template_id = warp(data_resampled,templates,data_lvl[beat])
dist_vector = [dist_vector[i]+dist_all_template[i] for i in range(len(dist_vector))]
resamp[beat] = data_resampled
#Update templates usage info
template_id = list(templates_info.keys())[-len(templates)+local_template_id]
templates_info[template_id]["used"] += 1
if len(distances_ref) < LEN_DISTANCE_VECTOR_REF:
distances_ref.append(dist)
if len(distances_ref) >= LEN_DISTANCE_VECTOR_REF:
distances.append(dist)
if len(distances) >= LEN_DISTANCE_VECTOR:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
p = stats.anderson_ksamp([distances_ref,distances])[2]
distances = []
if (p>= 0.05):
num_distances_out = 0
else: # p value les than 0.05: null hypothesis (same distribution) rejected (please Kolmogorov forgive me)
num_distances_out += 1
if num_distances_out >= 2: # The acquired vector of distances was out for 2 times
max_accum_dist = max(dist_vector)
print(f"\n################################################################")
print(f"\nBeat num:{beat_seq_number}, New template needed ... ")
print(f"\t p-value: {p}")
for j in range(len(dist_vector)):
print(f"\tTemplate {j}, dist: {dist_vector[j]}:\t","|"*int(20*dist_vector[j]/max_accum_dist))
templates = compute_new_templates(data_orig, t_beat, t_beat+120, templates, templates_info)
dist_vector = [0]*len(templates)
print(f"\n################################################################\n")
#distances_ref = distances_ref[REF_NEW:]
distances_ref = []
skip_until = t_beat + 120
num_distances_out = 0
return reconstructed,resamp,templates_info,time_info
def percentile_idx(vector,perc):
pcen=np.percentile(np.array(vector),perc,interpolation='nearest')
i_near=abs(np.array(vector)-pcen).argmin()
return i_near
def correlate(v1,v2):
v1_n = np.array(v1)
v2_n = np.array(v2)
cov = np.cov(v1_n,v2_n)[0,1]
cor = cov/(np.std(v1_n)*np.std(v2_n))
return cor
def avg_std_for_rmse_dict(dictionary):
dict_out = {'warp':{'avg':None,'std':None},'resamp':{'avg':None,'std':None}}
wr = []
rs = []
for beat in dictionary.keys():
wr.append(dictionary[beat]['warp'])
rs.append(dictionary[beat]['resamp'])
dict_out['warp']['avg'] = np.average(wr)
dict_out['warp']['std'] = np.std(wr)
dict_out['resamp']['avg'] = np.average(rs)
dict_out['resamp']['std'] = np.std(rs)
return dict_out
def percentile_rmse_beat_rel(rmse,perc):
vec = []
for beat in rmse.keys():
vec.append(rmse[beat]['warp'] - rmse[beat]['resamp'])
idx = percentile_idx(vec,perc)
return list(rmse.keys())[idx]
def percentile_rmse_beat_abs(rmse,perc):
vec = []
for beat in rmse.keys():
vec.append(rmse[beat]['warp'])
idx = percentile_idx(vec,perc)
return list(rmse.keys())[idx]
def find_all_connceted_template(id_temp,templates_info):
id_collected = [id_temp]
for id_connected in id_collected:
for id_to_add in templates_info[id_connected]["connected_template"]:
if id_to_add not in id_collected:
id_collected.append(id_to_add)
return id_collected
def plot_perc(orig,warp,resamp,title,out_file_name):
plt.figure()
plt.plot(orig[0],orig[1])
plt.plot(warp[0],warp[1])
plt.plot(resamp[0],resamp[1])
plt.legend(['original','warped template','resampled'])
plt.title(title)
plt.savefig(out_file_name)
plt.close()
def plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, perc, log_dir_this_lvl, file):
#Relative
beat_to_plot_rel = percentile_rmse_beat_rel(rmse, perc)
file_name_to_save_fig_rel = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_relative.svg")
orig_rel = orig[beat_to_plot_rel]['t'],orig[beat_to_plot_rel]['v']
recon_rel = recon[beat_to_plot_rel]['t'],recon[beat_to_plot_rel]['v']
resamp_rel = resamp[beat_to_plot_rel]['t'],resamp[beat_to_plot_rel]['v']
title_rel = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_rel}, {str(perc)} percentile'
plot_perc(orig_rel,recon_rel,resamp_rel,title_rel,file_name_to_save_fig_rel)
#Absolute
beat_to_plot_abs = percentile_rmse_beat_abs(rmse, perc)
file_name_to_save_fig_abs = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_absolute.svg")
orig_abs = orig[beat_to_plot_abs]['t'],orig[beat_to_plot_abs]['v']
recon_abs = recon[beat_to_plot_abs]['t'],recon[beat_to_plot_abs]['v']
resamp_abs = resamp[beat_to_plot_abs]['t'],resamp[beat_to_plot_abs]['v']
title_abs = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_abs}, {str(perc)} percentile'
plot_perc(orig_abs,recon_abs,resamp_abs,title_abs,file_name_to_save_fig_abs)
def recontruct_and_compare(file):
verbose = True
log_dir_this_file = os.path.join(log_dir,file.split(".")[0])
os.mkdir(log_dir_this_file)
#recon,orig,resamp,templates_info, time_info = reconstruct_beats(file, level = LEVEL, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear
#rmse = RMSE_warped_resampled(orig,recon,resamp)
time_to_plot_rmse_perc = []
time_to_build_rmse_vec_for_histogram = []
time_to_plot_histogram = []
if verbose:
print("Extracting original data")
data_orig = open_file(file, level = "original", start_after = 0)
if verbose:
print("Min-Max-Avg normaliztion of original data")
data_orig,original_params = min_max_normalization(data_orig, params = None)
if verbose:
print("Building initial templates")
beats_for_template = get_beats_in_time_span(data_orig, t_start_seconds=0,t_stop_seconds=60*5)
templates_orig, templates_orig_descriptor = multi_template(beats_for_template, normalize = normalize, percentage_each_cluster = CLUSTER_PERCENTAGE)
if verbose:
print(f"Initial number of templates: {len(templates_orig)}")
print("reconstructing")
for lvl in LEVEL:
if verbose:
print(f"Analyzing level:{lvl}")
print("Extracting...")
data_lvl = open_file(file, level = lvl,start_after = 0)
if verbose:
print(f"Min-Max-Avg normaliztion for lvl {lvl}")
data_lvl,_ = min_max_normalization(data_lvl, params = original_params)
recon,resamp,templates_info, time_info = reconstruct_beats(data_orig, data_lvl, templates_orig_descriptor, lvl, start_after = SEC_TO_AVG, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear
rmse = RMSE_warped_resampled(data_orig,recon,resamp)
log_dir_this_lvl = os.path.join(log_dir_this_file,str(lvl))
os.mkdir(log_dir_this_lvl)
stats = avg_std_for_rmse_dict(rmse)
avg_rmse_warp = stats['warp']['avg']
std_rmse_warp = stats['warp']['std']
avg_rmse_resamp = stats['resamp']['avg']
std_rmse_resamp = stats['resamp']['std']
file_name_to_save = "L_"+file.split(".")[0]+".log"
# Particular log for each lvl
with open(os.path.join(log_dir_this_lvl,file_name_to_save),"a") as f:
f.write(f"Lvl: {lvl}\n")
f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n")
f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n")
for id_temp in templates_info:
usage = templates_info[id_temp]["used"]
deceased = templates_info[id_temp]["deceased"]
f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \
deceased? {deceased}\n")
f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n")
f.write(f"\n\n")
# General log (the same but all toghether: more confusing but with all infos)
with open(os.path.join(log_dir_this_file,file_name_to_save),"a") as f:
f.write(f"Lvl: {lvl}\n")
f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n")
f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n")
'''
for id_temp in templates_info:
usage = templates_info[id_temp]["used"]
deceased = templates_info[id_temp]["deceased"]
f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \
deceased? {deceased}\n")
'''
f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n")
f.write(f"\n\n")
print(f"File:{file_name_to_save}")
print(f"\tLvl: {lvl}")
print(f"\t\twarp: {avg_rmse_warp}, +-{std_rmse_warp}")
print(f"\t\tinterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}")
print(f"\t\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%")
print("\n")
t_0 = time.time()
# 01 percentile
plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 1, log_dir_this_lvl, file)
# 25 percentile
plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 25, log_dir_this_lvl, file)
# 50 percentile
plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 50, log_dir_this_lvl, file)
# 75 percentile
plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 75, log_dir_this_lvl, file)
# 99 percentile
plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 99, log_dir_this_lvl, file)
# Histograms
t_1 = time.time()
file_name_to_save_fig_hist = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist"+str(lvl)+".svg")
file_name_to_save_fig_hist_left = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_left"+str(lvl)+".svg")
file_name_to_save_fig_hist_right = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_right"+str(lvl)+".svg")
rmse_warp = []
rmse_resamp = []
rmse_warp_left = []
rmse_resamp_left = []
rmse_warp_right = []
rmse_resamp_right = []
for beat in rmse.keys():
rmse_warp.append(rmse[beat]['warp'])
rmse_resamp.append(rmse[beat]['resamp'])
rmse_warp_left.append(rmse[beat]['warp_left'])
rmse_resamp_left.append(rmse[beat]['resamp_left'])
rmse_warp_right.append(rmse[beat]['warp_right'])
rmse_resamp_right.append(rmse[beat]['resamp_right'])
t_2 = time.time()
n_bins = len(rmse_warp)*PERC_BINS//100
min_bin = min(min(rmse_warp),min(rmse_resamp))
max_bin = max(max(rmse_warp),max(rmse_resamp))
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
plt.figure()
plt.hist(rmse_warp, bins = bins, alpha=0.5)
plt.hist(rmse_resamp, bins = bins, alpha=0.5)
plt.title(f'File: {file}, Lvl: {lvl}, RMSE histogram')
plt.legend(['RMSE warp','RMSE resampled'])
plt.savefig(file_name_to_save_fig_hist)
plt.close()
n_bins = len(rmse_warp_left)*PERC_BINS//100
min_bin = min(min(rmse_warp_left),min(rmse_resamp_left))
max_bin = max(max(rmse_warp_left),max(rmse_resamp_left))
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
plt.figure()
plt.hist(rmse_warp_left, bins = bins, alpha=0.5)
plt.hist(rmse_resamp_left, bins = bins, alpha=0.5)
plt.title(f'File: {file}, Lvl: {lvl}, left RMSE histogram')
plt.legend(['RMSE warp','RMSE resampled'])
plt.savefig(file_name_to_save_fig_hist_left)
plt.close()
n_bins = len(rmse_warp_right)*PERC_BINS//100
min_bin = min(min(rmse_warp_right),min(rmse_resamp_right))
max_bin = max(max(rmse_warp_right),max(rmse_resamp_right))
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
plt.figure()
plt.hist(rmse_warp_right, bins = bins, alpha=0.5)
plt.hist(rmse_resamp_right, bins = bins, alpha=0.5)
plt.title(f'File: {file}, Lvl: {lvl}, right RMSE histogram')
plt.legend(['RMSE warp','RMSE resampled'])
plt.savefig(file_name_to_save_fig_hist_right)
plt.close()
t_3 = time.time()
'''
template_heirarchy = {}
counter = 0
blacklist = []
for id_temp in templates_info:
if id_temp in blacklist:
continue
components = find_all_connceted_template(id_temp,templates_info)
blacklist.extend(components)
template_heirarchy[counter] = components
counter += 1
t_4 = time.time()
for component in template_heirarchy:
dir_component = os.path.join(log_dir_this_lvl,f"cluster_{str(component)}")
os.mkdir(dir_component)
for id_template in template_heirarchy[component]:
file_name_to_save_fig_template = os.path.join(dir_component,f"template_{str(id_template)}_cluster_{str(component)}.svg")
plt.figure()
plt.plot(templates_info[id_template]['template'])
plt.legend(['template'])
plt.title(f'File: {file}, template {id_template}')
plt.savefig(file_name_to_save_fig_template)
plt.close()
t_5 = time.time()
'''
time_to_plot_rmse_perc.append(t_1-t_0)
time_to_build_rmse_vec_for_histogram.append(t_2-t_1)
time_to_plot_histogram.append(t_3-t_2)
print(f"\tLevel: {lvl}: Time to plot the beats relative to RMSE percentile: {t_1-t_0}")
print(f"\tLevel: {lvl}: Time to build the rmse vectors for histogram plotting: {t_2-t_1}")
print(f"\tLevel: {lvl}: Time to plot the histograms: {t_3-t_2}")
print("\n----------------------------------------")
print("\n--------------------------------------------\n")
print(f"Time to plot the beats relative to RMSE percentile: {np.average(time_to_plot_rmse_perc)} +/- {np.std(time_to_plot_rmse_perc)} (total: {sum(time_to_plot_rmse_perc)})")
print(f"Time to build the rmse vectors for histogram plotting: {np.average(time_to_build_rmse_vec_for_histogram)} +/- {np.std(time_to_build_rmse_vec_for_histogram)} (total: {sum(time_to_build_rmse_vec_for_histogram)})")
print(f"Time to plot the histograms: {np.average(time_to_plot_histogram)} +/- {np.std(time_to_plot_histogram)} (total: {sum(time_to_plot_histogram)})")
def process(files, multi=True, cores=1):
# ------------ INIT ------------
global log_dir
for i in range(1,1000):
tmp_log_dir = log_dir+str(i)
if not os.path.isdir(tmp_log_dir):
log_dir = tmp_log_dir
break
os.mkdir(log_dir)
with open(os.path.join(log_dir,"specs.txt"), "w") as f:
f.write(f"Results generated by script: {sys.argv[0]}\n")
f.write(f"Time: {time.ctime(time.time())}\n\n")
f.write(f"Files: {files}\n")
f.write(f"Cores: {cores}\n")
f.write(f"Beats: {NUM_BEAT_ANALYZED}\n")
f.write(f"Cluster percentage: {CLUSTER_PERCENTAGE}\n")
f.write(f"Interpolation type: {INTERPOLATION_TYPE}\n")
# ------------ Extract DATA & ANNOTATIONS ------------
if cores == 1:
print("Single core")
for f in files:
recontruct_and_compare(f)
else:
with Pool(cores) as pool:
pool.map(recontruct_and_compare, files)
if __name__ == "__main__":
import argparse
import time
seconds_start = time.time()
local_time_start = time.ctime(seconds_start)
print("\nStarted at:", local_time_start,"\n\n")
#global NUM_BEAT_ANALYZED
parser = argparse.ArgumentParser()
parser.add_argument("--file", help="Force to analyze one specific file instead of default one (first found)")
parser.add_argument("--not_norm", help="Force to NOT normalize each beats", action="store_true")
parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones")
parser.add_argument("--beats", help="Number of used beats, default: 5000")
parser.add_argument("--cluster_opt", help="Percentage of points for a cluster to be considered")
parser.add_argument("--template_type", help="Type of template, default: distance")
args = parser.parse_args()
files = os.listdir(os.path.join(data_beats_dir,"original"))
if args.file is not None:
if args.file == 'all':
analyzed = files
else:
analyzed = list(filter(lambda string: True if args.file in string else False, files))
else:
analyzed = [files[0]]
if args.not_norm:
normalize = False
else:
normalize = True
if args.cores is not None:
used_cores = int(args.cores)
else:
used_cores = multiprocessing.cpu_count()//2
if args.beats is not None:
NUM_BEAT_ANALYZED = int(args.beats)
else:
NUM_BEAT_ANALYZED = 5000
if args.template_type is not None:
TEMPLATE_TYPE = 'average'
else:
TEMPLATE_TYPE = 'distance'
if args.cluster_opt is not None:
CLUSTER_PERCENTAGE = int(args.cluster_opt)
else:
CLUSTER_PERCENTAGE = 3
print(f"Analyzing files: {analyzed}")
print(f"Extracting data with {used_cores} cores...")
process(files = analyzed, multi=True, cores=used_cores)
seconds_stop = time.time()
local_time_stop = time.ctime(seconds_stop)
elapsed = seconds_stop - seconds_start
hours = elapsed//60//60
minutes = (elapsed - hours * 60 * 60) // 60
seconds = (elapsed - hours * 60 * 60 - minutes * 60) // 1
print("\n\n\n-----------------------------------------------------------------------------------------------------------------")
print(f"Finished at: {local_time_stop}, elapsed: {elapsed} seconds ({hours} hours, {minutes} minutes, {seconds} seconds)")

Event Timeline