Page MenuHomec4science

multiple_templates_prog.py
No OneTemporary

File Metadata

Created
Sun, Jun 2, 03:10

multiple_templates_prog.py

import multiprocessing
import os
import pickle
import shutil
import sys
from multiprocessing import Pool
from time import time
import matplotlib
import matplotlib.pyplot as plt
# configure backend here
matplotlib.use('SVG')
import numpy as np
from dtaidistance import dtw
from numpy.core.fromnumeric import argmin
scripts = '../helper_scripts'
if scripts not in sys.path:
sys.path.insert(0,scripts)
import warnings
from copy import copy
from dtw.dtw import dtw_std
from scipy import stats
from scipy.interpolate import interp1d
from scipy.signal.signaltools import fftconvolve
from build_template import build_template, multi_template
data_beats_dir = "../data/beats/"
log_dir = "../data/beat_recon_logs_multi_prog_"
FREQ = 128*4
MIN_TO_AVG = 5
BEAT_TO_PLOT = 2
LEVEL = [3,4,5,6,7,8]
NUM_BEAT_ANALYZED = 50
LEN_DISTANCE_VECTOR = 80
LEN_DISTANCE_VECTOR_REF = 500
REF_NEW = 100
TEMPLATE_TYPE = 'distance'
PERC_BINS = 15
CLUSTER_PERCENTAGE = 3
def RMSE(v1,v2):
v1_n = np.array(v1)
v2_n = np.array(v2)
return np.sqrt(np.mean((v1_n-v2_n)**2))
def RMSE_warped_resampled(data_orig,data_warped,data_resampled):
rmse = {}
template = {'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None}
for beat in data_warped.keys():
for lvl in data_warped[beat].keys():
if lvl not in rmse.keys():
rmse[lvl] = {}
rmse[lvl][beat] = copy(template)
idx_beat = data_warped[beat][lvl]['t'].index(beat)
l_resamp = data_resampled[beat][lvl]['v'][:idx_beat]
r_resamp = data_resampled[beat][lvl]['v'][idx_beat+1:]
l_warp = data_warped[beat][lvl]['v'][:idx_beat]
r_warp = data_warped[beat][lvl]['v'][idx_beat+1:]
l_orig = data_orig[beat]['v'][:idx_beat]
r_orig = data_orig[beat]['v'][idx_beat+1:]
rmse[lvl][beat]['warp'] = RMSE(data_orig[beat]['v'],data_warped[beat][lvl]['v'])
rmse[lvl][beat]['resamp'] = RMSE(data_orig[beat]['v'],data_resampled[beat][lvl]['v'])
rmse[lvl][beat]['warp_left'] = RMSE(l_orig,l_warp)
rmse[lvl][beat]['resamp_left'] = RMSE(l_orig,l_resamp)
rmse[lvl][beat]['warp_right'] = RMSE(r_orig,r_warp)
rmse[lvl][beat]['resamp_right'] = RMSE(r_orig,r_resamp)
return rmse
def open_file(file_name, start_after = MIN_TO_AVG, get_selected_level = None):
'''
Data structure:
File:
|
DICTIONARY:
|
--> Beat_annot:
|
--> lvl:
|
--> "t": []
|
--> "v": []
'''
file_name_full = os.path.join(data_beats_dir,os.path.basename(file_name))
data = {}
data_out = {}
with open(file_name_full,"rb") as f:
data = pickle.load(f)
for k in data.keys():
if k > FREQ*start_after:
data_out[k] = {}
if get_selected_level is not None:
data_out[k][0] = data[k][0]
for lvl in get_selected_level:
data_out[k][lvl] = data[k][lvl]
else:
data_out[k] = data[k]
return data_out, list(data_out[k].keys())
def get_beats_in_time_span(data, lvl = 0 ,t_start_seconds = 0, t_stop_seconds = MIN_TO_AVG*60):
beats = {}
annotations = list(data.keys())
for annot in annotations[1:]:
if annot >= int(t_start_seconds*FREQ) and annot <= int(t_stop_seconds*FREQ):
beats[annot] = data[annot][lvl]
elif annot > int(t_stop_seconds*FREQ):
break
return beats
def min_max_normalization(vector, params = None):
params_out = {}
#print("\t",params)
if params is not None:
mi_v = params['min']
ma_v = params['max']
avg = params['avg']
norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
norm -= avg
params_out = params
else:
mi_v = min(vector)
ma_v = max(vector)
norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
avg = np.average(norm)
norm -= avg
params_out['min'] = mi_v
params_out['max'] = ma_v
params_out['avg'] = avg
return list(norm),params_out
def resamp_one_signal(t,v,resample_type = 'linear', min_t = None, max_t = None):
if resample_type == "linear":
f = interp1d(t,v, bounds_error = False, fill_value = "extrapolate")
elif resample_type == "flat":
f = interp1d(t,v, kind = 'previous', bounds_error = False, fill_value = "extrapolate")
if min_t is None:
min_t = t[0]
if max_t is None:
max_t = t[-1]
t_new = list(range(min_t,max_t+1))
v_new = f(t_new)
return t_new,v_new
def resample(data, resample_type = "linear", min_t = None, max_t = None):
resampled_data = {"t":None,"v":None}
t = data['t']
v = data['v']
t_r,v_r = resamp_one_signal(t,v,resample_type = resample_type, min_t = min_t, max_t = max_t)
resampled_data['t'] = t_r
resampled_data['v'] = v_r
return resampled_data
def change_sampling(vector, size_to_warp):
resampled = []
if len(vector) < 3: #If the length of the vector is either 2 or 1 it will be mnorfed in a straight line and such line will be morfed equaly
resampled = [vector[0]]*size_to_warp
return resampled
delta = abs(size_to_warp - len(vector)) # --> number of points to insert/delete
#At least one more sample for each sample (except the last one)? --> insert k in each hole and recompute delta
if delta >= len(vector)-1 and delta > 0:
holes = len(vector)-1
k = delta // holes #Number of point for each hole
time = range(0,len(vector)*(k+1),k+1) # [o1,i11,i12,...,i1k, o2,i21,i22,...,i2k, o3,..., o_fin] --> NOTE: last sampel is not taken
# 0 k+1 2k+2 l*k +l --> Hence, we get to the exact end
t_new = range(time[-1]+1)
f = interp1d(time,vector, bounds_error = False, fill_value = "extrapolate")
vector = list(f(t_new))
delta = abs(size_to_warp - len(vector))
if len(vector) == size_to_warp:
return vector
grad = np.gradient(vector)
grad[-1] = max(grad) + 1 # --> we don't want to insert anything after the last sample
grad_idxs = sorted(range(len(grad)), key = lambda idx: grad[idx])
idx_to_consider = grad_idxs[:delta]
#print(delta, len(idx_to_consider), idx_to_consider)
for i,sample in enumerate(vector):
resampled.append(sample)
if i in idx_to_consider:
if size_to_warp < len(vector):
resampled.pop()
elif size_to_warp > len(vector):
resampled.append((vector[i]+vector[i+1])/2)
return resampled
def segment_warp(segment):
#print("--> ",len(segment['segment']))
#print("--> ",segment['length_to_warp'])
resampled = np.array(change_sampling(segment['segment'], segment['length_to_warp']))
#print("--> ",len(resampled))
resampled += (segment['v_start'] - resampled[0])
m = (segment['v_stop'] - resampled[-1])/(segment['length_to_warp']-1)
coef_to_sum = [m*x for x in range(segment['length_to_warp'])]
resampled += coef_to_sum
return list(resampled)
def stitch_segments(segments):
stitched = []
for i,segment in enumerate(segments):
if i == len(segments)-1:
stitched.extend(segment)
else:
stitched.extend(segment[0:-1])
return stitched
def dtw_dist(v1,v2):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
dist = dtw_std(v1, v2)
return dist
def warp(data, templates, events):
#print(events)
warped = {}
segments = [] # --> {"segment":v, "length_to_warp":length, "v_start":v_eb_0, "v_stop":v_eb_1}
segment = {"segment":None, "length_to_warp":None, "v_start":None, "v_stop":None}
segment_start = None
event_start = None
#chosen_template = []
#ad the first and last points of the resampled and truncated beat if not already in event
if data['t'][0] not in events['t']:
events['t'].insert(0, data['t'][0])
events['v'].insert(0, 0)
#events['v'].insert(0, data['v'][0])
if data['t'][-1] not in events['t']:
events['t'].insert(-1,data['t'][-1])
events['v'].insert(-1,0)
#events['v'].insert(-1,data['v'][-1])
#print(events)
#Apply DTW for matching resampled event to template
v_src = data['v']
#This is how its actualy done on the library when calling 'warping_path'
dist = float('inf')
paths = []
selected_template = []
disatances_vector = []
template_pos = None
for i,t in enumerate(templates):
#dist_this_template, paths_this_template = dtw.warping_paths(v_src, t)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
dist_this_template, _, path_this_template = dtw_std(v_src, t, dist_only=False)
disatances_vector.append(dist_this_template)
if dist_this_template < dist:
dist = dist_this_template
path = path_this_template
selected_template = t
template_pos = i
#path = dtw.best_path(paths)
path = [(v1,v2) for v1,v2 in zip(path[0],path[1])]
#Remove the "left" steps from the path and segment the template based on the events point
prev_idx_src = path[-1][0]+1
for idx_src, idx_temp in path:
if prev_idx_src == idx_src:
continue
prev_idx_src = idx_src
for idx, t in enumerate(events['t']):
if data['t'][idx_src] == t:
if segment_start == None:
segment_start = idx_temp
event_start = idx
else:
#print(f"SEGMENT {events['t'][event_start]} - {events['t'][idx]} ({abs(events['t'][event_start] - events['t'][idx])})")
segment['segment'] = np.array(selected_template[segment_start:idx_temp+1], dtype=np.float64)
segment['length_to_warp'] = events['t'][idx] - events['t'][event_start] + 1
segment['v_start'] = events['v'][event_start]
segment['v_stop'] = events['v'][idx]
w = segment_warp(segment)
#print(len(w))
segments.append(w)
segment_start = idx_temp
event_start = idx
break
segment_stitched = stitch_segments(segments)
#print(len(segment_stitched), len(data['t']))
warped = {'t':data['t'],'v':segment_stitched}
return dist, warped, disatances_vector, template_pos
def compute_new_templates(data, t_start, t_stop, old_templates, id_templates, templates_info):
beats_for_new_template = get_beats_in_time_span(data, t_start_seconds=t_start,t_stop_seconds=t_stop)
_, new_templates_descriptor = multi_template(beats_for_new_template, normalize = normalize, percentage_each_cluster = CLUSTER_PERCENTAGE*2)
new_ids = []
new_full_templates = []
cluster_representatives = {}
next_id = max(id_templates) + 1
lbls_new_templates = list(new_templates_descriptor['templates'].keys())
print(f"\nNew template built, number of new templates:{len(lbls_new_templates)}\n")
old_templates_kept = 0
old_templates_substituted = 0
new_templates_kept = 0
new_templates_substituted = 0
# We search which of the old templates can be considered clustered with the newly founded ones
if len(lbls_new_templates) > 0:
for j,t_o in enumerate(old_templates):
dists = np.zeros((len(lbls_new_templates)))
for k,t_n in enumerate(lbls_new_templates):
dists[k] = dtw_dist(t_o,new_templates_descriptor['templates'][t_n]['center'])
min_dist_pos = np.argmin(dists)
min_val = dists[min_dist_pos]
this_lbls_new_templates = lbls_new_templates[min_dist_pos]
dist_threshold = np.average(new_templates_descriptor['templates'][this_lbls_new_templates]['dist_all_pt_cluster'])+np.std(new_templates_descriptor['templates'][this_lbls_new_templates]['dist_all_pt_cluster'])
if min_val < dist_threshold:
if this_lbls_new_templates not in cluster_representatives.keys():
cluster_representatives[this_lbls_new_templates] = {"ids": [], "dists": [], "templates": []}
cluster_representatives[this_lbls_new_templates]["ids"].append(id_templates[j])
cluster_representatives[this_lbls_new_templates]["dists"].append(min_val)
else: #This templates minimum distances to the newly founded templates are too big to be considered the same cluster
new_ids.append(id_templates[j])
new_full_templates.append(t_o)
old_templates_kept += 1
# Now we check if the old templates, clustered with the newly founded one, are representative of the cluster of if we should use the new templates
for local_new_id in lbls_new_templates:
new_id = None
new_template = None
if local_new_id in cluster_representatives.keys():
connected_ids = cluster_representatives[local_new_id]["ids"]
arg_min_dist = np.argmin(cluster_representatives[local_new_id]["dists"])
old_template_id = cluster_representatives[local_new_id]["ids"][arg_min_dist]
old_template_dist = cluster_representatives[local_new_id]["dists"][arg_min_dist]
if old_template_dist < new_templates_descriptor['templates'][local_new_id]["dist"]:
new_template = templates_info[old_template_id]['template']
new_id = old_template_id
connected_ids.remove(new_id)
templates_info[new_id]["connected_template"].extend(connected_ids)
new_templates_substituted += 1
else:
new_template = new_templates_descriptor['templates'][local_new_id]["point"]
new_id = next_id
templates_info[new_id] = {"template": new_template, "used": 0, "deceased": False, "connected_template": connected_ids}
next_id += 1
old_templates_substituted += len(cluster_representatives[local_new_id]["ids"])
# The templates in this cluster that are not the center are now deceased
for id_removed in connected_ids:
templates_info[id_removed]["deceased"] = True
else:
new_template = new_templates_descriptor['templates'][local_new_id]["point"]
new_id = next_id
templates_info[new_id] = {"template": new_template, "used": 0, "deceased": False, "connected_template": []}
next_id += 1
new_templates_kept += 1
new_ids.append(new_id)
new_full_templates.append(new_template)
print(f"Old templates kept untuched: {old_templates_kept}/{len(old_templates)}")
print(f"Old templates kept, representative of new clusters (but already present): {new_templates_substituted}/{len(old_templates)}")
print(f"Old templates removed for old clusters (with new defined params): {len(old_templates) - (old_templates_kept+old_templates_substituted+new_templates_substituted)}/{len(old_templates)}")
print(f"Old templates removed for new clusters: {old_templates_substituted}/{len(old_templates)}")
print(f"\nNew templates kept untuched: {new_templates_kept}/{len(lbls_new_templates)}")
print(f"New templates kept, representative of old clusters (with new params): {len(lbls_new_templates) - (new_templates_kept+new_templates_substituted)}/{len(lbls_new_templates)}")
print(f"New templates removed for old clusters: {new_templates_substituted}/{len(lbls_new_templates)}")
print(f"\nFinal lenght of the new tempalte set: {len(new_full_templates)}")
else:
new_full_templates = old_templates
new_ids = id_templates
print("No new template found, keeping the previously computed ones")
return new_full_templates, new_ids
# Code to re-use for bounded number of template. First, let's try the unbunded version
'''
# Decide which template to keep and which to discard
if len(new_templates) < max_num_template:
to_keep = np.argsort(dist_vector)[:max_num_template-len(new_templates)]
to_discard = np.argsort(dist_vector)[max_num_template-len(new_templates):]
for idx in to_keep:
new_full_templates.append(templates[idx])
new_ids.append(id_templates[idx])
print(f"\tKept template: {idx}, dist: {dist_vector[idx]}:\t","|"*int(20*dist_vector[idx]/max_accum_dist))
for idx in to_discard:
templates_info[lvl][id_templates[idx]]["deceased"]=True
else:
print(f"\tNo old templates kept")
# Add the new templates
for n,t in enumerate(new_templates):
new_full_templates.append(t)
new_ids.append(next_id+n)
templates_info[lvl][next_id+n] = {"template": t, "used": 0, "deceased": False}
'''
def reconstruct_beats(file_name, level = None, normalize = True, resample_type = 'linear', num_beats_analyzed = None, verbose = False):
reconstructed = {}
resamp = {}
data_orig = {}
num_distances_out = 0
skip_until = 0
templates = []
templates_orig = []
max_num_template = None
templates_info = {}
time_info = {}
if verbose:
print(f"Extracting {file_name}")
data,lvls = open_file(file_name, start_after = MIN_TO_AVG, get_selected_level = level)
lvls.remove(0)
if verbose:
print("Building template")
beats_for_template = get_beats_in_time_span(data, t_start_seconds=0,t_stop_seconds=60*5)
templates_orig, _ = multi_template(beats_for_template, normalize = normalize, percentage_each_cluster = CLUSTER_PERCENTAGE)
id_templates_orig = [n for n in range(len(templates_orig))]
if verbose:
print(f"Initial number of templates: {len(templates_orig)}")
max_num_template = len(templates_orig)*3
if num_beats_analyzed == None:
num_beats_analyzed = len(list(data.keys()))
if verbose:
print("reconstructing")
for lvl in lvls:
this_beat_number = 0
if verbose:
print(f"Analyzing level:{lvl}")
distances = []
distances_ref = []
num_distances_out = 0
skip_until = 0
templates = copy(templates_orig)
id_templates = copy(id_templates_orig)
dist_vector = [0]*len(templates_orig)
time_info[lvl] = {'beats_low_res_num': 0, 'tot_beats_num': 0}
#Init templet info
templates_info[lvl] = {} #{$templet_id: {template: None, used: 0, deceased: False}}
for t,id_t in zip(templates,id_templates):
templates_info[lvl][id_t] = {"template": t, "used": 0, "deceased": False, "connected_template": []}
for beat in data.keys():
t_beat = beat/FREQ
time_info[lvl]['tot_beats_num'] += 1
if t_beat < skip_until:
continue
time_info[lvl]['beats_low_res_num'] += 1
if this_beat_number >= num_beats_analyzed:
break
this_beat_number +=1
if (this_beat_number %(num_beats_analyzed/20)==0 or this_beat_number == 1) and verbose:
print(f"File: {file_name}, Reconstructing beat {beat} ({this_beat_number}/{num_beats_analyzed}: {100*this_beat_number /num_beats_analyzed}%, LEVEL:{lvl})")
if beat not in reconstructed.keys():
reconstructed[beat] = {}
resamp[beat] = {}
data_orig[beat] = copy(data[beat][0])
if normalize:
data_orig[beat]['v'],params_prev = min_max_normalization(data_orig[beat]['v'], params = None)
data[beat][lvl]['v'],_ = min_max_normalization(data[beat][lvl]['v'], params = params_prev)
data_resampled = resample(data[beat][lvl], resample_type = resample_type, min_t = data_orig[beat]['t'][0], max_t = data_orig[beat]['t'][-1])
#WARP DRIVEEEEEEE
dist, reconstructed[beat][lvl], dist_all_template, template_pos = warp(data_resampled,templates,data[beat][lvl])
dist_vector = [dist_vector[i]+dist_all_template[i] for i in range(len(dist_vector))]
resamp[beat][lvl] = data_resampled
#Update templates usage info
templates_info[lvl][id_templates[template_pos]]["used"] += 1
if len(distances_ref) < LEN_DISTANCE_VECTOR_REF:
distances_ref.append(dist)
if len(distances_ref) >= LEN_DISTANCE_VECTOR_REF:
distances.append(dist)
if len(distances) >= LEN_DISTANCE_VECTOR:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
p = stats.anderson_ksamp([distances_ref,distances])[2]
distances = []
if (p>= 0.05):
num_distances_out = 0
else: # p value les than 0.05: null hypothesis (same distribution) rejected (please Kolmogorov forgive me)
num_distances_out += 1
if num_distances_out >= 2: # The acquired vector of distances was out for 2 times
max_accum_dist = max(dist_vector)
print(f"\n################################################################")
print(f"\nBeat num:{this_beat_number}, New template needed ... ")
print(f"\t p-value: {p}")
for j in range(len(dist_vector)):
print(f"\tTemplate {j}, dist: {dist_vector[j]}:\t","|"*int(20*dist_vector[j]/max_accum_dist))
templates, id_templates = compute_new_templates(data, t_beat, t_beat+120, templates, id_templates, templates_info[lvl])
dist_vector = [0]*len(templates)
print(f"\n################################################################\n")
#distances_ref = distances_ref[REF_NEW:]
distances_ref = []
skip_until = t_beat + 120
num_distances_out = 0
return reconstructed,data_orig,resamp,templates_info,time_info
def percentile_idx(vector,perc):
pcen=np.percentile(np.array(vector),perc,interpolation='nearest')
i_near=abs(np.array(vector)-pcen).argmin()
return i_near
def correlate(v1,v2):
v1_n = np.array(v1)
v2_n = np.array(v2)
cov = np.cov(v1_n,v2_n)[0,1]
cor = cov/(np.std(v1_n)*np.std(v2_n))
return cor
def avg_std_for_rmse_dict(dictionary):
dict_out = {'warp':{'avg':None,'std':None},'resamp':{'avg':None,'std':None}}
wr = []
rs = []
for beat in dictionary.keys():
wr.append(dictionary[beat]['warp'])
rs.append(dictionary[beat]['resamp'])
dict_out['warp']['avg'] = np.average(wr)
dict_out['warp']['std'] = np.std(wr)
dict_out['resamp']['avg'] = np.average(rs)
dict_out['resamp']['std'] = np.std(rs)
return dict_out
def percentile_rmse_beat_rel(rmse,perc):
vec = []
for beat in rmse.keys():
vec.append(rmse[beat]['warp'] - rmse[beat]['resamp'])
idx = percentile_idx(vec,perc)
return list(rmse.keys())[idx]
def percentile_rmse_beat_abs(rmse,perc):
vec = []
for beat in rmse.keys():
vec.append(rmse[beat]['warp'])
idx = percentile_idx(vec,perc)
return list(rmse.keys())[idx]
def find_all_connceted_template(id_temp,templates_info):
id_collected = [id_temp]
for id_connected in id_collected:
for id_to_add in templates_info[id_connected]["connected_template"]:
if id_to_add not in id_collected:
id_collected.append(id_to_add)
return id_collected
def plot_perc(orig,warp,resamp,title,out_file_name):
plt.figure()
plt.plot(orig[0],orig[1])
plt.plot(warp[0],warp[1])
plt.plot(resamp[0],resamp[1])
plt.legend(['original','warped template','resampled'])
plt.title(title)
plt.savefig(out_file_name)
plt.close()
def plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, perc, log_dir_this_lvl, file):
#Relative
beat_to_plot_rel = percentile_rmse_beat_rel(rmse[lvl], perc)
file_name_to_save_fig_rel = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_relative.svg")
orig_rel = orig[beat_to_plot_rel]['t'],orig[beat_to_plot_rel]['v']
recon_rel = recon[beat_to_plot_rel][lvl]['t'],recon[beat_to_plot_rel][lvl]['v']
resamp_rel = resamp[beat_to_plot_rel][lvl]['t'],resamp[beat_to_plot_rel][lvl]['v']
title_rel = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_rel}, {str(perc)} percentile'
plot_perc(orig_rel,recon_rel,resamp_rel,title_rel,file_name_to_save_fig_rel)
#Absolute
beat_to_plot_abs = percentile_rmse_beat_abs(rmse[lvl], perc)
file_name_to_save_fig_abs = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_absolute.svg")
orig_abs = orig[beat_to_plot_abs]['t'],orig[beat_to_plot_abs]['v']
recon_abs = recon[beat_to_plot_abs][lvl]['t'],recon[beat_to_plot_abs][lvl]['v']
resamp_abs = resamp[beat_to_plot_abs][lvl]['t'],resamp[beat_to_plot_abs][lvl]['v']
title_abs = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_abs}, {str(perc)} percentile'
plot_perc(orig_abs,recon_abs,resamp_abs,title_abs,file_name_to_save_fig_abs)
def recontruct_and_compare(file):
log_dir_this_file = os.path.join(log_dir,file.split(".")[0])
os.mkdir(log_dir_this_file)
recon,orig,resamp,templates_info, time_info = reconstruct_beats(file, level = LEVEL, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear
rmse = RMSE_warped_resampled(orig,recon,resamp)
time_to_plot_rmse_perc = []
time_to_build_rmse_vec_for_histogram = []
time_to_plot_histogram = []
time_to_compute_template_heirarchy = []
time_to_plot_templates = []
for lvl in LEVEL:
log_dir_this_lvl = os.path.join(log_dir_this_file,str(lvl))
os.mkdir(log_dir_this_lvl)
stats = avg_std_for_rmse_dict(rmse[lvl])
avg_rmse_warp = stats['warp']['avg']
std_rmse_warp = stats['warp']['std']
avg_rmse_resamp = stats['resamp']['avg']
std_rmse_resamp = stats['resamp']['std']
file_name_to_save = "L_"+file.split(".")[0]+".log"
# Particular log for each lvl
with open(os.path.join(log_dir_this_lvl,file_name_to_save),"a") as f:
f.write(f"Lvl: {lvl}\n")
f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n")
f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n")
for id_temp in templates_info[lvl]:
usage = templates_info[lvl][id_temp]["used"]
deceased = templates_info[lvl][id_temp]["deceased"]
f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \
deceased? {deceased}\n")
f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info[lvl]['beats_low_res_num']/time_info[lvl]['tot_beats_num']*100}%\n")
f.write(f"\n\n")
# General log (the same but all toghether: more confusing but with all infos)
with open(os.path.join(log_dir_this_file,file_name_to_save),"a") as f:
f.write(f"Lvl: {lvl}\n")
f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n")
f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n")
'''
for id_temp in templates_info[lvl]:
usage = templates_info[lvl][id_temp]["used"]
deceased = templates_info[lvl][id_temp]["deceased"]
f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \
deceased? {deceased}\n")
'''
f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info[lvl]['beats_low_res_num']/time_info[lvl]['tot_beats_num']*100}%\n")
f.write(f"\n\n")
print(f"File:{file_name_to_save}")
print(f"\tLvl: {lvl}")
print(f"\t\twarp: {avg_rmse_warp}, +-{std_rmse_warp}")
print(f"\t\tinterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}")
print(f"\t\tTime (percentage) passed in low-sampling mode: {time_info[lvl]['beats_low_res_num']/time_info[lvl]['tot_beats_num']*100}%")
print("\n")
t_0 = time.time()
#TODO: plot both relative and absolute percentile
# 01 percentile
plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, 1, log_dir_this_lvl, file)
# 25 percentile
plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, 25, log_dir_this_lvl, file)
# 50 percentile
plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, 50, log_dir_this_lvl, file)
# 75 percentile
plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, 75, log_dir_this_lvl, file)
# 99 percentile
plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, 99, log_dir_this_lvl, file)
# Histograms
t_1 = time.time()
file_name_to_save_fig_hist = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist"+str(lvl)+".svg")
file_name_to_save_fig_hist_left = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_left"+str(lvl)+".svg")
file_name_to_save_fig_hist_right = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_right"+str(lvl)+".svg")
rmse_warp = []
rmse_resamp = []
rmse_warp_left = []
rmse_resamp_left = []
rmse_warp_right = []
rmse_resamp_right = []
for beat in rmse[lvl].keys():
rmse_warp.append(rmse[lvl][beat]['warp'])
rmse_resamp.append(rmse[lvl][beat]['resamp'])
rmse_warp_left.append(rmse[lvl][beat]['warp_left'])
rmse_resamp_left.append(rmse[lvl][beat]['resamp_left'])
rmse_warp_right.append(rmse[lvl][beat]['warp_right'])
rmse_resamp_right.append(rmse[lvl][beat]['resamp_right'])
t_2 = time.time()
n_bins = len(rmse_warp)*PERC_BINS//100
min_bin = min(min(rmse_warp),min(rmse_resamp))
max_bin = max(max(rmse_warp),max(rmse_resamp))
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
plt.figure()
plt.hist(rmse_warp, bins = bins, alpha=0.5)
plt.hist(rmse_resamp, bins = bins, alpha=0.5)
plt.title(f'File: {file}, Lvl: {lvl}, RMSE histogram')
plt.legend(['RMSE warp','RMSE resampled'])
plt.savefig(file_name_to_save_fig_hist)
plt.close()
n_bins = len(rmse_warp_left)*PERC_BINS//100
min_bin = min(min(rmse_warp_left),min(rmse_resamp_left))
max_bin = max(max(rmse_warp_left),max(rmse_resamp_left))
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
plt.figure()
plt.hist(rmse_warp_left, bins = bins, alpha=0.5)
plt.hist(rmse_resamp_left, bins = bins, alpha=0.5)
plt.title(f'File: {file}, Lvl: {lvl}, left RMSE histogram')
plt.legend(['RMSE warp','RMSE resampled'])
plt.savefig(file_name_to_save_fig_hist_left)
plt.close()
n_bins = len(rmse_warp_right)*PERC_BINS//100
min_bin = min(min(rmse_warp_right),min(rmse_resamp_right))
max_bin = max(max(rmse_warp_right),max(rmse_resamp_right))
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
plt.figure()
plt.hist(rmse_warp_right, bins = bins, alpha=0.5)
plt.hist(rmse_resamp_right, bins = bins, alpha=0.5)
plt.title(f'File: {file}, Lvl: {lvl}, right RMSE histogram')
plt.legend(['RMSE warp','RMSE resampled'])
plt.savefig(file_name_to_save_fig_hist_right)
plt.close()
t_3 = time.time()
'''
template_heirarchy = {}
counter = 0
blacklist = []
for id_temp in templates_info[lvl]:
if id_temp in blacklist:
continue
components = find_all_connceted_template(id_temp,templates_info[lvl])
blacklist.extend(components)
template_heirarchy[counter] = components
counter += 1
t_4 = time.time()
for component in template_heirarchy:
dir_component = os.path.join(log_dir_this_lvl,f"cluster_{str(component)}")
os.mkdir(dir_component)
for id_template in template_heirarchy[component]:
file_name_to_save_fig_template = os.path.join(dir_component,f"template_{str(id_template)}_cluster_{str(component)}.svg")
plt.figure()
plt.plot(templates_info[lvl][id_template]['template'])
plt.legend(['template'])
plt.title(f'File: {file}, template {id_template}')
plt.savefig(file_name_to_save_fig_template)
plt.close()
t_5 = time.time()
'''
time_to_plot_rmse_perc.append(t_1-t_0)
time_to_build_rmse_vec_for_histogram.append(t_2-t_1)
time_to_plot_histogram.append(t_3-t_2)
print(f"\tLevel: {lvl}: Time to plot the beats relative to RMSE percentile: {t_1-t_0}")
print(f"\tLevel: {lvl}: Time to build the rmse vectors for histogram plotting: {t_2-t_1}")
print(f"\tLevel: {lvl}: Time to plot the histograms: {t_3-t_2}")
print("\n----------------------------------------")
print("\n--------------------------------------------\n")
print(f"Time to plot the beats relative to RMSE percentile: {np.average(time_to_plot_rmse_perc)} +/- {np.std(time_to_plot_rmse_perc)} (total: {sum(time_to_plot_rmse_perc)})")
print(f"Time to build the rmse vectors for histogram plotting: {np.average(time_to_build_rmse_vec_for_histogram)} +/- {np.std(time_to_build_rmse_vec_for_histogram)} (total: {sum(time_to_build_rmse_vec_for_histogram)})")
print(f"Time to plot the histograms: {np.average(time_to_plot_histogram)} +/- {np.std(time_to_plot_histogram)} (total: {sum(time_to_plot_histogram)})")
def process(files, multi=True, cores=1):
# ------------ INIT ------------
global log_dir
for i in range(1,1000):
tmp_log_dir = log_dir+str(i)
if not os.path.isdir(tmp_log_dir):
log_dir = tmp_log_dir
break
os.mkdir(log_dir)
# ------------ Extract DATA & ANNOTATIONS ------------
if cores == 1:
print("Single core")
for f in files:
recontruct_and_compare(f)
else:
with Pool(cores) as pool:
pool.map(recontruct_and_compare, files)
if __name__ == "__main__":
import argparse
import time
seconds_start = time.time()
local_time_start = time.ctime(seconds_start)
print("\nStarted at:", local_time_start,"\n\n")
#global NUM_BEAT_ANALYZED
parser = argparse.ArgumentParser()
parser.add_argument("--file", help="Force to analyze one specific file instead of default one (first found)")
parser.add_argument("--not_norm", help="Force to NOT normalize each beats", action="store_true")
parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones")
parser.add_argument("--beats", help="Number of used beats, default: 5000")
parser.add_argument("--cluster_opt", help="Percentage of points for a cluster to be considered")
parser.add_argument("--template_type", help="Type of template, default: distance")
args = parser.parse_args()
files = os.listdir(data_beats_dir)
if args.file is not None:
if args.file == 'all':
analyzed = files
else:
analyzed = list(filter(lambda string: True if args.file in string else False, files))
else:
analyzed = [files[0]]
if args.not_norm:
normalize = False
else:
normalize = True
if args.cores is not None:
used_cores = int(args.cores)
else:
used_cores = multiprocessing.cpu_count()//2
if args.beats is not None:
NUM_BEAT_ANALYZED = int(args.beats)
else:
NUM_BEAT_ANALYZED = 5000
if args.template_type is not None:
TEMPLATE_TYPE = 'average'
else:
TEMPLATE_TYPE = 'distance'
if args.cluster_opt is not None:
CLUSTER_PERCENTAGE = int(args.cluster_opt)
else:
CLUSTER_PERCENTAGE = 3
print(f"Analyzing files: {analyzed}")
print(f"Extracting data with {used_cores} cores...")
process(files = analyzed, multi=True, cores=used_cores)
seconds_stop = time.time()
local_time_stop = time.ctime(seconds_stop)
elapsed = seconds_stop - seconds_start
hours = elapsed//60//60
minutes = (elapsed - hours * 60 * 60) // 60
seconds = (elapsed - hours * 60 * 60 - minutes * 60) // 1
print("\n\n\n-----------------------------------------------------------------------------------------------------------------")
print(f"Finished at: {local_time_stop}, elapsed: {elapsed} seconds ({hours} hours, {minutes} minutes, {seconds} seconds)")

Event Timeline