multiple_templates_prog.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Jul 3, 20:40

multiple_templates_prog.py
View Options

	import multiprocessing
	import os
	import pickle
	import shutil
	import sys
	from multiprocessing import Pool
	from time import time

	import matplotlib
	import matplotlib.pyplot as plt
	from numpy.core.numeric import base_repr

	# configure backend here
	matplotlib.use('SVG')
	import numpy as np
	from dtaidistance import dtw
	from numpy.core.fromnumeric import argmin

	scripts = '../helper_scripts'
	if scripts not in sys.path:
	sys.path.insert(0,scripts)
	import warnings
	from copy import copy

	from dtw.dtw import dtw_std
	from scipy import stats
	from scipy.interpolate import interp1d
	from scipy.signal.signaltools import fftconvolve

	from build_template import build_template, multi_template

	data_beats_dir = "../data/beats/"
	log_dir = "../data/beat_recon_logs_multi_prog_"

	FREQ = 128*4
	SEC_TO_AVG = 5*60
	BEAT_TO_PLOT = 2
	LEVEL = [3,4,5,6,7,8]
	NUM_BEAT_ANALYZED = 50
	LEN_DISTANCE_VECTOR = 80
	LEN_DISTANCE_VECTOR_REF = 500
	REF_NEW = 100
	TEMPLATE_TYPE = 'distance'
	PERC_BINS = 15
	CLUSTER_PERCENTAGE = 3

	def RMSE(v1,v2):
	v1_n = np.array(v1)
	v2_n = np.array(v2)
	return np.sqrt(np.mean((v1_n-v2_n)**2))

	def RMSE_warped_resampled(data_orig,data_warped,data_resampled):
	rmse = {}
	template = {'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None}
	for beat in data_warped.keys():
	rmse[beat] = copy(template)
	idx_beat = data_warped[beat]['t'].index(beat)
	l_resamp = data_resampled[beat]['v'][:idx_beat]
	r_resamp = data_resampled[beat]['v'][idx_beat+1:]
	l_warp = data_warped[beat]['v'][:idx_beat]
	r_warp = data_warped[beat]['v'][idx_beat+1:]
	l_orig = data_orig[beat]['v'][:idx_beat]
	r_orig = data_orig[beat]['v'][idx_beat+1:]

	rmse[beat]['warp'] = RMSE(data_orig[beat]['v'],data_warped[beat]['v'])
	rmse[beat]['resamp'] = RMSE(data_orig[beat]['v'],data_resampled[beat]['v'])

	rmse[beat]['warp_left'] = RMSE(l_orig,l_warp)
	rmse[beat]['resamp_left'] = RMSE(l_orig,l_resamp)

	rmse[beat]['warp_right'] = RMSE(r_orig,r_warp)
	rmse[beat]['resamp_right'] = RMSE(r_orig,r_resamp)
	return rmse

	def open_file(file_name, level, start_after = SEC_TO_AVG):
	'''
	Data structure:
	File:
	\|
	DICTIONARY:
	\|
	--> Beat_annot:
	\|
	--> "t": []
	\|
	--> "v": []
	'''
	file_name_full = os.path.join(data_beats_dir, str(level), os.path.basename(file_name))
	data = {}
	with open(file_name_full,"rb") as f:
	data = pickle.load(f)
	for k in data.keys():
	if k <= FREQ*start_after:
	data.pop(k)
	return data

	def get_beats_in_time_span(data, lvl = 0 ,t_start_seconds = 0, t_stop_seconds = SEC_TO_AVG):
	beats = {}
	for annot in data.keys():
	if annot >= int(t_start_secondsFREQ) and annot <= int(t_stop_secondsFREQ):
	beats[annot] = data[annot]
	elif annot > int(t_stop_seconds*FREQ):
	break
	return beats

	def min_max_normalization(beats, params = None):
	params_out = {}
	normalized_beats = {}
	#print("\t",params)
	for beat in beats:
	vector = beats[beat]['v']
	params_out[beat] = {}
	normalized_beats[beat] = {'t':beats[beat]['t'],'v':[]}
	if params is not None:
	mi_v = params[beat]['min']
	ma_v = params[beat]['max']
	avg = params[beat]['avg']
	norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
	norm -= avg
	normalized_beats[beat]['v'] = norm.tolist()
	else:
	mi_v = min(vector)
	ma_v = max(vector)
	norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
	avg = np.average(norm)
	norm -= avg
	normalized_beats[beat]['v'] = norm.tolist()
	params_out[beat]['min'] = mi_v
	params_out[beat]['max'] = ma_v
	params_out[beat]['avg'] = avg
	return normalized_beats,params_out

	def resamp_one_signal(t,v,resample_type = 'linear', min_t = None, max_t = None):
	if resample_type == "linear":
	f = interp1d(t,v, bounds_error = False, fill_value = "extrapolate")
	elif resample_type == "flat":
	f = interp1d(t,v, kind = 'previous', bounds_error = False, fill_value = "extrapolate")
	if min_t is None:
	min_t = t[0]
	if max_t is None:
	max_t = t[-1]
	t_new = list(range(min_t,max_t+1))
	v_new = f(t_new)
	return t_new,v_new

	def resample(data, resample_type = "linear", min_t = None, max_t = None):
	resampled_data = {"t":None,"v":None}
	t = data['t']
	v = data['v']
	t_r,v_r = resamp_one_signal(t,v,resample_type = resample_type, min_t = min_t, max_t = max_t)
	resampled_data['t'] = t_r
	resampled_data['v'] = v_r

	return resampled_data

	def change_sampling(vector, size_to_warp):
	resampled = []
	if len(vector) < 3: #If the length of the vector is either 2 or 1 it will be mnorfed in a straight line and such line will be morfed equaly
	resampled = [vector[0]]*size_to_warp
	return resampled
	delta = abs(size_to_warp - len(vector)) # --> number of points to insert/delete
	#At least one more sample for each sample (except the last one)? --> insert k in each hole and recompute delta
	if delta >= len(vector)-1 and delta > 0:
	holes = len(vector)-1
	k = delta // holes #Number of point for each hole
	time = range(0,len(vector)*(k+1),k+1) # [o1,i11,i12,...,i1k, o2,i21,i22,...,i2k, o3,..., o_fin] --> NOTE: last sampel is not taken
	# 0 k+1 2k+2 l*k +l --> Hence, we get to the exact end
	t_new = range(time[-1]+1)
	f = interp1d(time,vector, bounds_error = False, fill_value = "extrapolate")
	vector = list(f(t_new))
	delta = abs(size_to_warp - len(vector))
	if len(vector) == size_to_warp:
	return vector


	grad = np.gradient(vector)
	grad[-1] = max(grad) + 1 # --> we don't want to insert anything after the last sample
	grad_idxs = sorted(range(len(grad)), key = lambda idx: grad[idx])

	idx_to_consider = grad_idxs[:delta]
	#print(delta, len(idx_to_consider), idx_to_consider)
	for i,sample in enumerate(vector):
	resampled.append(sample)
	if i in idx_to_consider:
	if size_to_warp < len(vector):
	resampled.pop()
	elif size_to_warp > len(vector):
	resampled.append((vector[i]+vector[i+1])/2)
	return resampled

	def segment_warp(segment):
	#print("--> ",len(segment['segment']))
	#print("--> ",segment['length_to_warp'])
	resampled = np.array(change_sampling(segment['segment'], segment['length_to_warp']))
	#print("--> ",len(resampled))
	resampled += (segment['v_start'] - resampled[0])
	m = (segment['v_stop'] - resampled[-1])/(segment['length_to_warp']-1)
	coef_to_sum = [m*x for x in range(segment['length_to_warp'])]
	resampled += coef_to_sum
	return list(resampled)

	def stitch_segments(segments):
	stitched = []
	for i,segment in enumerate(segments):
	if i == len(segments)-1:
	stitched.extend(segment)
	else:
	stitched.extend(segment[0:-1])
	return stitched

	def dtw_dist(v1,v2):
	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	dist = dtw_std(v1, v2)
	return dist

	def warp(resamp_event, templates, events):
	#print(events)
	warped = {}
	segments = [] # --> {"segment":v, "length_to_warp":length, "v_start":v_eb_0, "v_stop":v_eb_1}
	segment = {"segment":None, "length_to_warp":None, "v_start":None, "v_stop":None}
	segment_start = None
	event_start = None
	#chosen_template = []

	#ad the first and last points of the resampled and truncated beat if not already in event
	if resamp_event['t'][0] not in events['t']:
	events['t'].insert(0, resamp_event['t'][0])
	events['v'].insert(0, 0)
	#events['v'].insert(0, data['v'][0])
	if resamp_event['t'][-1] not in events['t']:
	events['t'].insert(-1,resamp_event['t'][-1])
	events['v'].insert(-1,0)
	#events['v'].insert(-1,resamp_event['v'][-1])

	#print(events)
	#Apply DTW for matching resampled event to template
	v_src = resamp_event['v']
	#This is how its actualy done on the library when calling 'warping_path'
	dist = float('inf')
	path = []
	selected_template = []
	disatances_vector = []
	template_id = None
	for id in templates:
	t = templates[id]['point']
	#dist_this_template, paths_this_template = dtw.warping_paths(v_src, t)
	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	dist_this_template, _, path_this_template = dtw_std(v_src, t, dist_only=False)
	disatances_vector.append(dist_this_template)
	if dist_this_template < dist:
	dist = dist_this_template
	path = path_this_template
	selected_template = t
	template_id = id
	#path = dtw.best_path(paths)
	path = [(v1,v2) for v1,v2 in zip(path[0],path[1])]

	#Remove the "left" steps from the path and segment the template based on the events point
	prev_idx_src = path[-1][0]+1
	for idx_src, idx_temp in path:
	if prev_idx_src == idx_src:
	continue
	prev_idx_src = idx_src
	for idx, t in enumerate(events['t']):
	if resamp_event['t'][idx_src] == t:
	if segment_start == None:
	segment_start = idx_temp
	event_start = idx
	else:
	#print(f"SEGMENT {events['t'][event_start]} - {events['t'][idx]} ({abs(events['t'][event_start] - events['t'][idx])})")

	segment['segment'] = np.array(selected_template[segment_start:idx_temp+1], dtype=np.float64)
	segment['length_to_warp'] = events['t'][idx] - events['t'][event_start] + 1
	segment['v_start'] = events['v'][event_start]
	segment['v_stop'] = events['v'][idx]
	w = segment_warp(segment)
	#print(len(w))
	segments.append(w)
	segment_start = idx_temp
	event_start = idx
	break
	segment_stitched = stitch_segments(segments)
	#print(len(segment_stitched), len(resamp_event['t']))
	warped = {'t':resamp_event['t'],'v':segment_stitched}
	return dist, warped, disatances_vector, template_id

	def compute_new_templates(data, t_start, t_stop, old_templates, templates_info):
	beats_for_new_template = get_beats_in_time_span(data, t_start_seconds=t_start,t_stop_seconds=t_stop)
	_, new_templates_descriptor = multi_template(beats_for_new_template, percentage_each_cluster = CLUSTER_PERCENTAGE*2)
	lbls_new_templates = list(new_templates_descriptor.keys())

	old_ids = list(old_templates.keys())
	next_id = max(old_ids) + 1
	cluster_representatives = {}
	new_template_set = {}

	print(f"\nNew template built, number of new templates:{len(lbls_new_templates)}\n")
	old_templates_kept = 0
	old_templates_substituted = 0
	new_templates_kept = 0
	new_templates_substituted = 0
	# We search which of the old templates can be considered clustered with the newly founded ones
	if len(lbls_new_templates) > 0:
	for id_old_template in old_templates:
	t_o = old_templates[id_old_template]['point']
	dists = np.zeros((len(lbls_new_templates)))
	for k,t_n in enumerate(lbls_new_templates):
	dists[k] = dtw_dist(t_o,new_templates_descriptor[t_n]['center'])
	min_dist_pos = np.argmin(dists)
	min_val = dists[min_dist_pos]
	this_lbls_new_templates = lbls_new_templates[min_dist_pos]
	distances_intra_cluster = new_templates_descriptor[this_lbls_new_templates]['dist_all_pt_cluster']
	dist_threshold = np.average(distances_intra_cluster)+np.std(distances_intra_cluster)
	if min_val < dist_threshold:
	if this_lbls_new_templates not in cluster_representatives.keys():
	cluster_representatives[this_lbls_new_templates] = {"ids": [], "dists": []}
	cluster_representatives[this_lbls_new_templates]["ids"].append(id_old_template)
	cluster_representatives[this_lbls_new_templates]["dists"].append(min_val)
	else: #This templates minimum distances to the newly founded templates are too big to be considered the same cluster
	new_template_set[id_old_template] = old_templates[id_old_template]
	old_templates_kept += 1

	# Now we check if the old templates, clustered with the newly founded one, are representative of the cluster of if we should use the new templates
	for local_new_id in lbls_new_templates:
	if local_new_id in cluster_representatives.keys():
	connected_ids = cluster_representatives[local_new_id]["ids"]
	arg_min_dist = np.argmin(cluster_representatives[local_new_id]["dists"])
	old_template_id = cluster_representatives[local_new_id]["ids"][arg_min_dist]
	old_template_dist = cluster_representatives[local_new_id]["dists"][arg_min_dist]
	# The old template is nearer to the found cluster center than the beat found by the clustering alg
	if old_template_dist < new_templates_descriptor[local_new_id]["dist"]:
	new_template_set[old_template_id] = old_templates[old_template_id]
	connected_ids.remove(old_template_id)
	templates_info[old_template_id]["connected_template"].extend(connected_ids)
	new_templates_substituted += 1
	# The beat found by the clustering alg is nearer to the found cluster center than the old template
	else:
	new_template_set[next_id] = new_templates_descriptor[local_new_id]
	templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": connected_ids}
	next_id += 1
	old_templates_substituted += len(cluster_representatives[local_new_id]["ids"])
	# The templates in this cluster that are not the center are now deceased
	for id_removed in connected_ids:
	templates_info[id_removed]["deceased"] = True
	else:
	new_template_set[next_id] = new_templates_descriptor[local_new_id]
	templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": []}
	next_id += 1
	new_templates_kept += 1

	print(f"Old templates kept untuched: {old_templates_kept}/{len(old_templates)}")
	print(f"Old templates kept, representative of new clusters (but already present): {new_templates_substituted}/{len(old_templates)}")
	print(f"Old templates removed for old clusters (with new defined params): {len(old_templates) - (old_templates_kept+old_templates_substituted+new_templates_substituted)}/{len(old_templates)}")
	print(f"Old templates removed for new clusters: {old_templates_substituted}/{len(old_templates)}")
	print(f"\nNew templates kept untuched: {new_templates_kept}/{len(lbls_new_templates)}")
	print(f"New templates kept, representative of old clusters (with new params): {len(lbls_new_templates) - (new_templates_kept+new_templates_substituted)}/{len(lbls_new_templates)}")
	print(f"New templates removed for old clusters: {new_templates_substituted}/{len(lbls_new_templates)}")
	print(f"\nFinal lenght of the new tempalte set: {len(new_template_set)}")
	else:
	new_template_set = old_templates
	print("No new template found, keeping the previously computed ones")
	return new_template_set

	# Code to re-use for bounded number of template. First, let's try the unbunded version
	'''
	# Decide which template to keep and which to discard
	if len(new_templates) < max_num_template:
	to_keep = np.argsort(dist_vector)[:max_num_template-len(new_templates)]
	to_discard = np.argsort(dist_vector)[max_num_template-len(new_templates):]
	for idx in to_keep:
	new_full_templates.append(templates[idx])
	new_ids.append(id_templates[idx])
	print(f"\tKept template: {idx}, dist: {dist_vector[idx]}:\t","\|"int(20dist_vector[idx]/max_accum_dist))
	for idx in to_discard:
	templates_info[id_templates[idx]]["deceased"]=True
	else:
	print(f"\tNo old templates kept")
	# Add the new templates
	for n,t in enumerate(new_templates):
	new_full_templates.append(t)
	new_ids.append(next_id+n)
	templates_info[next_id+n] = {"template": t, "used": 0, "deceased": False}
	'''

	def reconstruct_beats(data_orig, data_lvl, templates_descriptor_original, lvl_number, start_after = SEC_TO_AVG, normalize = True, resample_type = 'linear', num_beats_analyzed = None, verbose = False):
	reconstructed = {}
	resamp = {}

	beat_seq_number = 0

	distances = []
	distances_ref = []

	num_distances_out = 0
	skip_until = start_after
	templates = copy(templates_descriptor_original)

	dist_vector = [0]*len(templates)

	time_info = {'beats_low_res_num': 0, 'tot_beats_num': 0}
	#Init templet info
	templates_info = {} #{$templet_id: {used: 0, deceased: False, "connected_template": []}}
	for id_t in range(len((templates))):
	templates_info[id_t] = {"used": 0, "deceased": False, "connected_template": []}

	#print(list(templates.keys()),templates_info)

	for beat in data_orig.keys():
	t_beat = beat/FREQ
	time_info['tot_beats_num'] += 1
	if t_beat < skip_until:
	continue
	time_info['beats_low_res_num'] += 1
	if beat_seq_number >= num_beats_analyzed:
	break
	beat_seq_number +=1
	if (beat_seq_number %(num_beats_analyzed/20)==0 or beat_seq_number == 1) and verbose:
	print(f"Reconstructing beat {beat} ({beat_seq_number}/{num_beats_analyzed}: {100*beat_seq_number /num_beats_analyzed}%, LEVEL:{lvl_number})")
	reconstructed[beat] = {}
	resamp[beat] = {}

	data_resampled = resample(data_lvl[beat], resample_type = resample_type, min_t = data_orig[beat]['t'][0], max_t = data_orig[beat]['t'][-1])
	#WARP DRIVEEEEEEE
	dist, reconstructed[beat], dist_all_template, local_template_id = warp(data_resampled,templates,data_lvl[beat])
	dist_vector = [dist_vector[i]+dist_all_template[i] for i in range(len(dist_vector))]
	resamp[beat] = data_resampled
	#Update templates usage info
	template_id = list(templates_info.keys())[-len(templates)+local_template_id]
	templates_info[template_id]["used"] += 1

	if len(distances_ref) < LEN_DISTANCE_VECTOR_REF:
	distances_ref.append(dist)
	if len(distances_ref) >= LEN_DISTANCE_VECTOR_REF:
	distances.append(dist)
	if len(distances) >= LEN_DISTANCE_VECTOR:
	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	p = stats.anderson_ksamp([distances_ref,distances])[2]
	distances = []
	if (p>= 0.05):
	num_distances_out = 0
	else: # p value les than 0.05: null hypothesis (same distribution) rejected (please Kolmogorov forgive me)
	num_distances_out += 1
	if num_distances_out >= 2: # The acquired vector of distances was out for 2 times
	max_accum_dist = max(dist_vector)
	print(f"\n################################################################")
	print(f"\nBeat num:{beat_seq_number}, New template needed ... ")
	print(f"\t p-value: {p}")
	for j in range(len(dist_vector)):
	print(f"\tTemplate {j}, dist: {dist_vector[j]}:\t","\|"int(20dist_vector[j]/max_accum_dist))
	templates = compute_new_templates(data_orig, t_beat, t_beat+120, templates, templates_info)
	dist_vector = [0]*len(templates)
	print(f"\n################################################################\n")

	#distances_ref = distances_ref[REF_NEW:]
	distances_ref = []
	skip_until = t_beat + 120
	num_distances_out = 0
	return reconstructed,resamp,templates_info,time_info

	def percentile_idx(vector,perc):
	pcen=np.percentile(np.array(vector),perc,interpolation='nearest')
	i_near=abs(np.array(vector)-pcen).argmin()
	return i_near

	def correlate(v1,v2):
	v1_n = np.array(v1)
	v2_n = np.array(v2)
	cov = np.cov(v1_n,v2_n)[0,1]
	cor = cov/(np.std(v1_n)*np.std(v2_n))
	return cor

	def avg_std_for_rmse_dict(dictionary):
	dict_out = {'warp':{'avg':None,'std':None},'resamp':{'avg':None,'std':None}}
	wr = []
	rs = []
	for beat in dictionary.keys():
	wr.append(dictionary[beat]['warp'])
	rs.append(dictionary[beat]['resamp'])
	dict_out['warp']['avg'] = np.average(wr)
	dict_out['warp']['std'] = np.std(wr)
	dict_out['resamp']['avg'] = np.average(rs)
	dict_out['resamp']['std'] = np.std(rs)
	return dict_out

	def percentile_rmse_beat_rel(rmse,perc):
	vec = []
	for beat in rmse.keys():
	vec.append(rmse[beat]['warp'] - rmse[beat]['resamp'])
	idx = percentile_idx(vec,perc)
	return list(rmse.keys())[idx]

	def percentile_rmse_beat_abs(rmse,perc):
	vec = []
	for beat in rmse.keys():
	vec.append(rmse[beat]['warp'])
	idx = percentile_idx(vec,perc)
	return list(rmse.keys())[idx]

	def find_all_connceted_template(id_temp,templates_info):
	id_collected = [id_temp]
	for id_connected in id_collected:
	for id_to_add in templates_info[id_connected]["connected_template"]:
	if id_to_add not in id_collected:
	id_collected.append(id_to_add)
	return id_collected

	def plot_perc(orig,warp,resamp,title,out_file_name):
	plt.figure()
	plt.plot(orig[0],orig[1])
	plt.plot(warp[0],warp[1])
	plt.plot(resamp[0],resamp[1])
	plt.legend(['original','warped template','resampled'])
	plt.title(title)
	plt.savefig(out_file_name)
	plt.close()

	def plot_beat_rmse_percentile(orig, recon, resamp, lvl, rmse, perc, log_dir_this_lvl, file):
	#Relative
	beat_to_plot_rel = percentile_rmse_beat_rel(rmse, perc)
	file_name_to_save_fig_rel = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_relative.svg")
	orig_rel = orig[beat_to_plot_rel]['t'],orig[beat_to_plot_rel]['v']
	recon_rel = recon[beat_to_plot_rel]['t'],recon[beat_to_plot_rel]['v']
	resamp_rel = resamp[beat_to_plot_rel]['t'],resamp[beat_to_plot_rel]['v']
	title_rel = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_rel}, {str(perc)} percentile'
	plot_perc(orig_rel,recon_rel,resamp_rel,title_rel,file_name_to_save_fig_rel)
	#Absolute
	beat_to_plot_abs = percentile_rmse_beat_abs(rmse, perc)
	file_name_to_save_fig_abs = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_absolute.svg")
	orig_abs = orig[beat_to_plot_abs]['t'],orig[beat_to_plot_abs]['v']
	recon_abs = recon[beat_to_plot_abs]['t'],recon[beat_to_plot_abs]['v']
	resamp_abs = resamp[beat_to_plot_abs]['t'],resamp[beat_to_plot_abs]['v']
	title_abs = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_to_plot_abs}, {str(perc)} percentile'
	plot_perc(orig_abs,recon_abs,resamp_abs,title_abs,file_name_to_save_fig_abs)

	def recontruct_and_compare(file):
	verbose = True
	log_dir_this_file = os.path.join(log_dir,file.split(".")[0])
	os.mkdir(log_dir_this_file)
	#recon,orig,resamp,templates_info, time_info = reconstruct_beats(file, level = LEVEL, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear
	#rmse = RMSE_warped_resampled(orig,recon,resamp)

	time_to_plot_rmse_perc = []
	time_to_build_rmse_vec_for_histogram = []
	time_to_plot_histogram = []

	if verbose:
	print("Extracting original data")
	data_orig = open_file(file, level = "original", start_after = 0)
	if verbose:
	print("Min-Max-Avg normaliztion of original data")
	data_orig,original_params = min_max_normalization(data_orig, params = None)
	if verbose:
	print("Building initial templates")
	beats_for_template = get_beats_in_time_span(data_orig, t_start_seconds=0,t_stop_seconds=60*5)
	templates_orig, templates_orig_descriptor = multi_template(beats_for_template, normalize = normalize, percentage_each_cluster = CLUSTER_PERCENTAGE)
	if verbose:
	print(f"Initial number of templates: {len(templates_orig)}")
	print("reconstructing")

	for lvl in LEVEL:
	if verbose:
	print(f"Analyzing level:{lvl}")
	print("Extracting...")
	data_lvl = open_file(file, level = lvl,start_after = 0)
	if verbose:
	print(f"Min-Max-Avg normaliztion for lvl {lvl}")
	data_lvl,_ = min_max_normalization(data_lvl, params = original_params)

	recon,resamp,templates_info, time_info = reconstruct_beats(data_orig, data_lvl, templates_orig_descriptor, lvl, start_after = SEC_TO_AVG, resample_type = 'flat', num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = True) #resample_type = flat vs linear
	rmse = RMSE_warped_resampled(data_orig,recon,resamp)

	log_dir_this_lvl = os.path.join(log_dir_this_file,str(lvl))
	os.mkdir(log_dir_this_lvl)
	stats = avg_std_for_rmse_dict(rmse)
	avg_rmse_warp = stats['warp']['avg']
	std_rmse_warp = stats['warp']['std']
	avg_rmse_resamp = stats['resamp']['avg']
	std_rmse_resamp = stats['resamp']['std']
	file_name_to_save = "L_"+file.split(".")[0]+".log"
	# Particular log for each lvl
	with open(os.path.join(log_dir_this_lvl,file_name_to_save),"a") as f:
	f.write(f"Lvl: {lvl}\n")
	f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n")
	f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n")
	for id_temp in templates_info:
	usage = templates_info[id_temp]["used"]
	deceased = templates_info[id_temp]["deceased"]
	f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \
	deceased? {deceased}\n")
	f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n")
	f.write(f"\n\n")
	# General log (the same but all toghether: more confusing but with all infos)
	with open(os.path.join(log_dir_this_file,file_name_to_save),"a") as f:
	f.write(f"Lvl: {lvl}\n")
	f.write(f"\tWarp: {avg_rmse_warp}, +-{std_rmse_warp}\n")
	f.write(f"\tInterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}\n")
	'''
	for id_temp in templates_info:
	usage = templates_info[id_temp]["used"]
	deceased = templates_info[id_temp]["deceased"]
	f.write(f"\t\tTemplate {id_temp} was used {usage} times out of {NUM_BEAT_ANALYZED}, \
	deceased? {deceased}\n")
	'''
	f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n")
	f.write(f"\n\n")
	print(f"File:{file_name_to_save}")
	print(f"\tLvl: {lvl}")
	print(f"\t\twarp: {avg_rmse_warp}, +-{std_rmse_warp}")
	print(f"\t\tinterpolation: {avg_rmse_resamp}, +-{std_rmse_resamp}")
	print(f"\t\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%")
	print("\n")


	t_0 = time.time()
	# 01 percentile
	plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 1, log_dir_this_lvl, file)

	# 25 percentile
	plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 25, log_dir_this_lvl, file)

	# 50 percentile
	plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 50, log_dir_this_lvl, file)

	# 75 percentile
	plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 75, log_dir_this_lvl, file)

	# 99 percentile
	plot_beat_rmse_percentile(data_orig, recon, resamp, lvl, rmse, 99, log_dir_this_lvl, file)

	# Histograms
	t_1 = time.time()
	file_name_to_save_fig_hist = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist"+str(lvl)+".svg")
	file_name_to_save_fig_hist_left = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_left"+str(lvl)+".svg")
	file_name_to_save_fig_hist_right = os.path.join(log_dir_this_lvl,file.split(".")[0]+"_hist_right"+str(lvl)+".svg")

	rmse_warp = []
	rmse_resamp = []
	rmse_warp_left = []
	rmse_resamp_left = []
	rmse_warp_right = []
	rmse_resamp_right = []
	for beat in rmse.keys():
	rmse_warp.append(rmse[beat]['warp'])
	rmse_resamp.append(rmse[beat]['resamp'])
	rmse_warp_left.append(rmse[beat]['warp_left'])
	rmse_resamp_left.append(rmse[beat]['resamp_left'])
	rmse_warp_right.append(rmse[beat]['warp_right'])
	rmse_resamp_right.append(rmse[beat]['resamp_right'])

	t_2 = time.time()
	n_bins = len(rmse_warp)*PERC_BINS//100
	min_bin = min(min(rmse_warp),min(rmse_resamp))
	max_bin = max(max(rmse_warp),max(rmse_resamp))
	delta = (max_bin-min_bin)/n_bins
	bins = np.arange(min_bin,max_bin+delta,delta)
	plt.figure()
	plt.hist(rmse_warp, bins = bins, alpha=0.5)
	plt.hist(rmse_resamp, bins = bins, alpha=0.5)
	plt.title(f'File: {file}, Lvl: {lvl}, RMSE histogram')
	plt.legend(['RMSE warp','RMSE resampled'])
	plt.savefig(file_name_to_save_fig_hist)
	plt.close()

	n_bins = len(rmse_warp_left)*PERC_BINS//100
	min_bin = min(min(rmse_warp_left),min(rmse_resamp_left))
	max_bin = max(max(rmse_warp_left),max(rmse_resamp_left))
	delta = (max_bin-min_bin)/n_bins
	bins = np.arange(min_bin,max_bin+delta,delta)
	plt.figure()
	plt.hist(rmse_warp_left, bins = bins, alpha=0.5)
	plt.hist(rmse_resamp_left, bins = bins, alpha=0.5)
	plt.title(f'File: {file}, Lvl: {lvl}, left RMSE histogram')
	plt.legend(['RMSE warp','RMSE resampled'])
	plt.savefig(file_name_to_save_fig_hist_left)
	plt.close()

	n_bins = len(rmse_warp_right)*PERC_BINS//100
	min_bin = min(min(rmse_warp_right),min(rmse_resamp_right))
	max_bin = max(max(rmse_warp_right),max(rmse_resamp_right))
	delta = (max_bin-min_bin)/n_bins
	bins = np.arange(min_bin,max_bin+delta,delta)
	plt.figure()
	plt.hist(rmse_warp_right, bins = bins, alpha=0.5)
	plt.hist(rmse_resamp_right, bins = bins, alpha=0.5)
	plt.title(f'File: {file}, Lvl: {lvl}, right RMSE histogram')
	plt.legend(['RMSE warp','RMSE resampled'])
	plt.savefig(file_name_to_save_fig_hist_right)
	plt.close()

	t_3 = time.time()
	'''
	template_heirarchy = {}
	counter = 0
	blacklist = []
	for id_temp in templates_info:
	if id_temp in blacklist:
	continue
	components = find_all_connceted_template(id_temp,templates_info)
	blacklist.extend(components)
	template_heirarchy[counter] = components
	counter += 1

	t_4 = time.time()
	for component in template_heirarchy:
	dir_component = os.path.join(log_dir_this_lvl,f"cluster_{str(component)}")
	os.mkdir(dir_component)
	for id_template in template_heirarchy[component]:
	file_name_to_save_fig_template = os.path.join(dir_component,f"template_{str(id_template)}_cluster_{str(component)}.svg")
	plt.figure()
	plt.plot(templates_info[id_template]['template'])
	plt.legend(['template'])
	plt.title(f'File: {file}, template {id_template}')
	plt.savefig(file_name_to_save_fig_template)
	plt.close()
	t_5 = time.time()
	'''

	time_to_plot_rmse_perc.append(t_1-t_0)
	time_to_build_rmse_vec_for_histogram.append(t_2-t_1)
	time_to_plot_histogram.append(t_3-t_2)
	print(f"\tLevel: {lvl}: Time to plot the beats relative to RMSE percentile: {t_1-t_0}")
	print(f"\tLevel: {lvl}: Time to build the rmse vectors for histogram plotting: {t_2-t_1}")
	print(f"\tLevel: {lvl}: Time to plot the histograms: {t_3-t_2}")
	print("\n----------------------------------------")

	print("\n--------------------------------------------\n")
	print(f"Time to plot the beats relative to RMSE percentile: {np.average(time_to_plot_rmse_perc)} +/- {np.std(time_to_plot_rmse_perc)} (total: {sum(time_to_plot_rmse_perc)})")
	print(f"Time to build the rmse vectors for histogram plotting: {np.average(time_to_build_rmse_vec_for_histogram)} +/- {np.std(time_to_build_rmse_vec_for_histogram)} (total: {sum(time_to_build_rmse_vec_for_histogram)})")
	print(f"Time to plot the histograms: {np.average(time_to_plot_histogram)} +/- {np.std(time_to_plot_histogram)} (total: {sum(time_to_plot_histogram)})")

	def process(files, multi=True, cores=1):
	# ------------ INIT ------------
	global log_dir
	for i in range(1,1000):
	tmp_log_dir = log_dir+str(i)
	if not os.path.isdir(tmp_log_dir):
	log_dir = tmp_log_dir
	break
	os.mkdir(log_dir)
	with open(os.path.join(log_dir,"specs.txt"), "w") as f:
	f.write(f"Results generated by script: {sys.argv[0]}\n")
	f.write(f"Time: {time.ctime(time.time())}\n\n")
	f.write(f"Files: {files}\n")
	f.write(f"Cores: {cores}\n")
	f.write(f"Beats: {NUM_BEAT_ANALYZED}\n")
	f.write(f"Cluster percentage: {CLUSTER_PERCENTAGE}\n")
	f.write(f"Interpolation type: {INTERPOLATION_TYPE}\n")

	# ------------ Extract DATA & ANNOTATIONS ------------
	if cores == 1:
	print("Single core")
	for f in files:
	recontruct_and_compare(f)
	else:
	with Pool(cores) as pool:
	pool.map(recontruct_and_compare, files)


	if __name__ == "__main__":
	import argparse
	import time

	seconds_start = time.time()
	local_time_start = time.ctime(seconds_start)
	print("\nStarted at:", local_time_start,"\n\n")

	#global NUM_BEAT_ANALYZED
	parser = argparse.ArgumentParser()
	parser.add_argument("--file", help="Force to analyze one specific file instead of default one (first found)")
	parser.add_argument("--not_norm", help="Force to NOT normalize each beats", action="store_true")
	parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones")
	parser.add_argument("--beats", help="Number of used beats, default: 5000")
	parser.add_argument("--cluster_opt", help="Percentage of points for a cluster to be considered")
	parser.add_argument("--template_type", help="Type of template, default: distance")
	args = parser.parse_args()
	files = os.listdir(os.path.join(data_beats_dir,"original"))
	if args.file is not None:
	if args.file == 'all':
	analyzed = files
	else:
	analyzed = list(filter(lambda string: True if args.file in string else False, files))
	else:
	analyzed = [files[0]]
	if args.not_norm:
	normalize = False
	else:
	normalize = True
	if args.cores is not None:
	used_cores = int(args.cores)
	else:
	used_cores = multiprocessing.cpu_count()//2
	if args.beats is not None:
	NUM_BEAT_ANALYZED = int(args.beats)
	else:
	NUM_BEAT_ANALYZED = 5000
	if args.template_type is not None:
	TEMPLATE_TYPE = 'average'
	else:
	TEMPLATE_TYPE = 'distance'
	if args.cluster_opt is not None:
	CLUSTER_PERCENTAGE = int(args.cluster_opt)
	else:
	CLUSTER_PERCENTAGE = 3

	print(f"Analyzing files: {analyzed}")
	print(f"Extracting data with {used_cores} cores...")

	process(files = analyzed, multi=True, cores=used_cores)

	seconds_stop = time.time()
	local_time_stop = time.ctime(seconds_stop)
	elapsed = seconds_stop - seconds_start
	hours = elapsed//60//60
	minutes = (elapsed - hours * 60 * 60) // 60
	seconds = (elapsed - hours * 60 * 60 - minutes * 60) // 1
	print("\n\n\n-----------------------------------------------------------------------------------------------------------------")
	print(f"Finished at: {local_time_stop}, elapsed: {elapsed} seconds ({hours} hours, {minutes} minutes, {seconds} seconds)")

multiple_templates_prog.pyNo OneTemporaryActions

File Metadata

multiple_templates_prog.pyView Options

Event Timeline

multiple_templates_prog.py
No OneTemporary
Actions

multiple_templates_prog.py
View Options