Page MenuHomec4science

precise_multiple_templates_prog.py
No OneTemporary

File Metadata

Created
Fri, May 3, 11:01

precise_multiple_templates_prog.py

import multiprocessing
import os
import pickle
from re import template
import pandas as pd
import shutil
import sys
from multiprocessing import Pool, Manager
import json
from time import time
import math
import matplotlib
import matplotlib.pyplot as plt
from numpy.core.numeric import base_repr
import scipy.stats
scripts = '../helper_scripts'
if scripts not in sys.path:
sys.path.insert(0,scripts)
import warnings
from copy import copy
# configure backend here
matplotlib.use('SVG')
import numpy as np
from numpy.core.fromnumeric import argmin
from dtw import dtw_std, ddtw_tv
from csv2mit_with_dir_out import csv2mit_with_dir_out
from scipy import stats
from scipy.interpolate import CubicSpline, interp1d
import wfdb
from build_template import build_template, multi_template
data_beats_dir = "../data/beats/"
log_dir = "../results/beat_recon_logs_multi_prog_"
# Signal freqeuncy variables
FREQ = 128
MULTIPLIER_FREQ = 20 #Final freq: 128*20 = 2.56 KHz
ADC_GAIN = 200
# Parameters fixed for result evaluation
PERC_BINS = 10
PERCENTILE_TO_PLOT = [1,25,50,75,99]
# Global constants
TIME_WEIGHT = 1
# Variable fixed by arguments (eg. arguments)
CLUSTER_PERCENTAGE = 3
NUM_BEAT_ANALYZED = 50
INTERPOLATION_TYPE = 'flat'
FILES_SELECTED = ["17052.pickle"]
PARALLELIZE_ALONG = 'files'
LEVELS = [3,4,5,6,7]#[5]#[3,4,5,6,7,8,9,10,11]
USE_DIFFERENTIAL_DTW = False
SEC_FOR_INITIAL_TEMPLATES = 3*60 #5*60
LEN_DISTANCE_VECTOR = 60 #80
LEN_DISTANCE_VECTOR_REF = 400 #500
SEC_FOR_NEW_TEMPLATES = 40 #2*60
TIME_MODE = 'short'
#Metrics to measure for paper
manager = Manager()
jfc = manager.dict()
use_ddtw_tv = False
#NOTE: currently substituting ddtw_tv for dtw_std (standard dtw implementation)
def is_odd(n):
return True if n%2 == 1 else 0
def is_even(n):
return not is_odd(n)
def delineate(log_dir_this_lvl_resampling, signal_type, QRS_pos):
#TODO: problem here: /!\ level 3- spline does not with many samples, maybe a random beats trigger a condition that breake ecgpwuave (not halting)
# IT IS SOLVED BY TERMINATING (CTRL-C ing) ONCE WE USE THE LEVEL 3 --> arresting ecgpuwave stop only the delineator and everything else works
# TO ANALYZE BY NOT DELETING LEVELS AND TEST FROM TERMINAL --> there are annotations where level 3 fails to have any reconizable peak around the given annotation !
dir_to_use = os.path.join(log_dir_this_lvl_resampling,signal_type)
annotations_tot = pd.DataFrame({"time": [], "idx": [], "type": [], "0": [], "1": [], "2": []})
end_record = False
start_from = 0
singal = os.path.join(dir_to_use,"data.csv")
#Trasform the signal in the MIT format
csv2mit_with_dir_out(singal,fs = FREQ, units = 'mV', header = False, fmt = '212', adc_gain=ADC_GAIN, baseline = 0)
os.remove(singal)
#Run the delineator
wfdb.wrann("data","atrN",np.array(QRS_pos), symbol= ['N']*len(QRS_pos),write_dir=dir_to_use)
while not end_record:
cmd_delineator = f"(cd {dir_to_use} ; timeout -s SIGINT 5 ecgpuwave -r data -a atrTemp -i atrN -f {start_from} > /dev/null 2>&1)"
os.system(cmd_delineator)
#Write the new annotations back
# OLD METHOD FAILED IF THE DELINEATOR DIDN'T TERMINATE (I.E. IT FAILED ANNOTITING AFTER A CERTAIN TIMESTAMP)
# SOLUTION: SUBSTITUTE WITH A SYS CALL AND READ THE STDOUT
cmd_read_ann = f"(cd {dir_to_use} ; rdann -r data -a atrTemp >annotations_text.txt 2>error.txt)"
os.system(cmd_read_ann)
annotations_this_chunk = pd.read_csv(os.path.join(dir_to_use,"annotations_text.txt"),header=None, delim_whitespace=True,names=["time", "idx", "type", "0", "1", "2"])
annotations_tot = annotations_tot.append(annotations_this_chunk, ignore_index = True)
if len(annotations_this_chunk) > 0:
start_from = int(annotations_this_chunk.idx.values[-1]/FREQ+300) #skip 300 seconds (5 minutes)
else:
start_from += 300 #skip 300 seconds (5 minutes)
with open(f"{dir_to_use}/error.txt","r") as error_file:
if len(error_file.readlines()) == 0 or start_from >= QRS_pos[-1]/FREQ:
end_record = True
else:
print(f"/!\/!\/!\/!\/!\/!\ dir: {dir_to_use}: ecgpuwave needed a second pass at idx: {start_from-1} /!\/!\/!\/!\/!\ ")
print(f"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Removing {os.path.join(dir_to_use,'data.atrTemp')} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
os.remove(os.path.join(dir_to_use,"data.atrTemp"))
os.remove(os.path.join(dir_to_use,"annotations_text.txt"))
os.remove(os.path.join(dir_to_use,"error.txt"))
#
annotations_tot_pt = annotations_tot[(annotations_tot['type'] == "p") | (annotations_tot['type'] == "t")]
if len(annotations_tot) == 0:
annotations_tot = pd.DataFrame({"time": [0], "idx": [0], "type": ['p'], "0": [0], "1": [0], "2": [0]})
if len(annotations_tot_pt) == 0:
annotations_tot_pt = pd.DataFrame({"time": [0], "idx": [0], "type": ['p'], "0": [0], "1": [0], "2": [0]})
annotations_tot = annotations_tot.sort_values(by = 'idx')
annotations_tot_pt = annotations_tot_pt.sort_values(by = 'idx')
wfdb.wrann("data","atrDelinAll",annotations_tot["idx"].values.astype(np.int32), symbol = annotations_tot['type'].values, write_dir=dir_to_use)#, num=annotations_tot['2'].values.astype(np.int32))
wfdb.wrann("data","atrDelinPT",annotations_tot_pt["idx"].values.astype(np.int32), symbol = annotations_tot_pt['type'].values, write_dir=dir_to_use)#, num=annotations_tot_pt['2'].values.astype(np.int32))
return annotations_tot
def compare(log_dir_this_lvl_resampling, resampling_type, annot_full_O, annot_full_r):
'''
bxb:
The -o option produces an output annotation file with annotator name bxb.
The output annotation file contains exact copies of all of the test annotator’s beat labels that match those of the reference annotator,
as well as NOTE annotations that describe all mismatches.
Mismatched annotation types are mapped into the AAMI ‘test label’ mnemonics.
The ‘aux’ field of each NOTE annotation indicates the element of the confusion matrix in which the mismatch is tallied:
e.g., Nv represents an eventcalled a normal beat by the reference annotator and a ventricular ectopic beat by the test annotator)
NOTE annotations that correspond to beats missed by the test annotator are placed at the sample indicated by the reference annotation;
all others are placed at that indicated by the test annotation.
bxb output:
...
4:59.672 38358 " 0 0 0 O/p
5:00.031 38404 t 0 0 0
5:00.328 38442 p 0 0 0
5:00.688 38488 t 0 0 0
5:00.938 38520 p 0 0 0
5:01.297 38566 " 0 0 5 t[0,0,0]/t ---> NOT PRESENT ANYMORE: here the reference say a normal t (code 0) while the test say it's a t but with code 5
5:01.367 38575 " 0 0 5 O/t ---> Nothing in the reference, "t" in test
5:01.523 38595 p 0 0 0
5:01.875 38640 t 0 0 0
5:02.102 38669 " 0 0 0 p/O ---> "p" in the reference, nothing in test
...
'''
dir_dest = os.path.join(log_dir_this_lvl_resampling,resampling_type)
shutil.copy(os.path.join(log_dir_this_lvl_resampling,"original","data.atrDelinPT"),os.path.join(dir_dest,"data.atrOrigPT"))
shutil.copy(os.path.join(log_dir_this_lvl_resampling,"original","data.atrDelinAll"),os.path.join(dir_dest,"data.atrDelinOrig"))
#This can compare all the annotations in one go: bxb -r data -a atrDelinTotOrig atrDelinTot -f 0 -v -O
cmd_bxb = f"(cd {dir_dest}; bxb -r data -a atrOrigPT atrDelinPT -O -f 0 2>/dev/null)"
os.system(cmd_bxb)
annots_pt = wfdb.rdann(os.path.join(dir_dest,"data"),"bxb")
p_wave_true_pos = 0
p_wave_false_pos = 0
p_wave_false_negative = 0
p_wave_sens = 0
p_wave_pp = 0
p_wave_f1 = 0
t_wave_true_pos = 0
t_wave_false_pos = 0
t_wave_false_negative = 0
t_wave_sens = 0
t_wave_pp = 0
t_wave_f1 = 0
for pos,sym,aux in zip(annots_pt.sample,annots_pt.symbol,annots_pt.aux_note):
if sym == 'p':
p_wave_true_pos += 1
elif aux == "p/O":
p_wave_false_negative += 1
elif aux == "O/p":
p_wave_false_pos += 1
if sym == 't':
t_wave_true_pos += 1
elif aux == "t/O":
t_wave_false_negative += 1
elif aux == "O/t":
t_wave_false_pos += 1
groud_truth_p = p_wave_true_pos+p_wave_false_negative
found_p = p_wave_true_pos+p_wave_false_pos
if groud_truth_p == 0:
p_wave_sens = 0
else:
p_wave_sens = p_wave_true_pos/(p_wave_true_pos+p_wave_false_negative)
if found_p == 0:
p_wave_pp = 0
else:
p_wave_pp = p_wave_true_pos/(p_wave_true_pos+p_wave_false_pos)
if p_wave_sens+p_wave_pp == 0:
p_wave_f1 = 0
else:
p_wave_f1 = 2*(p_wave_sens*p_wave_pp)/(p_wave_sens+p_wave_pp)
groud_truth_t = t_wave_true_pos+t_wave_false_negative
found_t = t_wave_true_pos+t_wave_false_pos
if groud_truth_t == 0:
t_wave_sens = 0
else:
t_wave_sens = t_wave_true_pos/(t_wave_true_pos+t_wave_false_negative)
if found_t == 0:
t_wave_pp = 0
else:
t_wave_pp = t_wave_true_pos/(t_wave_true_pos+t_wave_false_pos)
if t_wave_sens+t_wave_pp == 0:
t_wave_f1 = 0
else:
t_wave_f1 = 2*(t_wave_sens*t_wave_pp)/(t_wave_sens+t_wave_pp)
print("----------------------------------------")
print(f"delineation_score: {dir_dest}: \n\tp_s:{p_wave_sens}\n\tp_pp:{p_wave_pp}\n\tp_f1:{p_wave_f1}\n\tt_s:{t_wave_sens}\n\tt_pp:{t_wave_pp}\n\tt_f1:{t_wave_f1}")
return {"p":{'sens':p_wave_sens,'ppv':p_wave_pp,'f1':p_wave_f1,'Tp':p_wave_true_pos,'Fp':p_wave_false_pos,'Fn':p_wave_false_negative},
"t":{'sens':t_wave_sens,'ppv':t_wave_pp,'f1':t_wave_f1,'Tp':t_wave_true_pos,'Fp':t_wave_false_pos,'Fn':t_wave_false_negative}}
def delineate_and_compare(log_dir_this_lvl, QRS_pos):
annots_orig = delineate(log_dir_this_lvl, "original", QRS_pos)
annots_warp = delineate(log_dir_this_lvl, "warped", QRS_pos)
annots_resamp = delineate(log_dir_this_lvl, "resampled", QRS_pos)
results_warp = compare(log_dir_this_lvl, "warped", annots_orig,annots_warp)
results_resamp = compare(log_dir_this_lvl, "resampled", annots_orig,annots_resamp)
return {"resamp":results_resamp,"warp":results_warp}
def add_beat_to_csv(base_path_name,beat):
'''
base_path_name is the path to use to save the data in the form:
'results/$log_dir/$sample_name/$lvl/$resampling_type/$signal_type/
'''
mode = ''
data_path = os.path.join(base_path_name,"data.csv")
first_beat = True
if os.path.exists(data_path):
mode = 'a'
else:
mode = 'w'
with open(data_path,mode) as f:
for pt in beat:
if mode == 'w' and first_beat:
f.write(str(pt/ADC_GAIN))
first_beat = False
else:
f.write('\n'+str(pt/ADC_GAIN))
def decimate(v,factor):
return v[::factor]
def revert_normaliztion(data,params):
un_norm_data = []
saturate = lambda x: x if abs(x)<2**11 else (2**11-1)*np.sign(x)
for pt in data:
un_norm_pt = saturate((pt + params['avg'])*(params['max']-params['min']) + params['min'])
un_norm_data.append(un_norm_pt)
return un_norm_data
def un_norma_and_save_back_beat(beat,log_dir,signal_type,norm_params):
dir_to_save_to = os.path.join(log_dir,signal_type)
os.makedirs(dir_to_save_to, exist_ok=True)
data_v_un_norm = revert_normaliztion(beat['v'],norm_params)
add_beat_to_csv(dir_to_save_to,data_v_un_norm)
return data_v_un_norm
def z_score_filter(vector,threshold = 3):
out = None
z = stats.zscore(vector)
out = [vector[idx] for idx in range(len(vector)) if z[idx]<threshold ]
return out
def PRD(v_o,v_r): #percentage root-mean square difference
v_o_n = np.array(v_o)
v_r_n = np.array(v_r)
return np.sqrt(sum((v_o_n-v_r_n)**2)/sum(v_o_n**2))*100
def PRD_one_beat(orig,resamp,warped,QRS_idx): #percentage root-mean square difference
prd = {'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None}
l_resamp = resamp['v'][:QRS_idx]
r_resamp = resamp['v'][QRS_idx+1:]
l_warp = warped['v'][:QRS_idx]
r_warp = warped['v'][QRS_idx+1:]
l_orig = orig['v'][:QRS_idx]
r_orig = orig['v'][QRS_idx+1:]
prd['warp'] = PRD(orig['v'],warped['v'])
prd['resamp'] = PRD(orig['v'],resamp['v'])
prd['warp_left'] = PRD(l_orig,l_warp)
prd['resamp_left'] = PRD(l_orig,l_resamp)
prd['warp_right'] = PRD(r_orig,r_warp)
prd['resamp_right'] = PRD(r_orig,r_resamp)
return prd
def open_file(file_name, start_after = SEC_FOR_INITIAL_TEMPLATES):
'''
Data structure:
File:
|
DICTIONARY:
|
--> Beat_annot:
|
--> "t": []
|
--> "v": []
'''
file_name_full = os.path.join(data_beats_dir, os.path.basename(file_name))
data = {}
with open(file_name_full,"rb") as f:
data = pickle.load(f)
for k in data.keys():
if k <= FREQ*start_after:
data.pop(k)
return data
def upsample_uniform_beat(beat,multiplier):
out_beat = {}
upsampled = []
v = beat['v']
last_new_t = (len(v)-1)*multiplier
t = np.arange(0,last_new_t+1,multiplier,dtype = np.int64)
new_base = np.arange(0,last_new_t,1,dtype = np.int64)
f = interp1d(t, v)
upsampled = f(new_base)
out_beat = {'t':new_base,'v':upsampled}
return out_beat
def get_beats_in_time_span(data, t_start_seconds = 0, t_stop_seconds = SEC_FOR_INITIAL_TEMPLATES):
beats = {}
for annot in data.keys():
if annot >= int(t_start_seconds*FREQ) and annot <= int(t_stop_seconds*FREQ):
beats[annot] = data[annot]
elif annot > int(t_stop_seconds*FREQ):
break
return beats
def min_max_normalization_one_beat(beat,param = None):
params_out = {}
normalized_beat = {'t':beat['t'],'v':[]}
vector = beat['v']
if param is not None:
mi_v = param['min']
ma_v = param['max']
avg = param['avg']
norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
norm -= avg
else:
mi_v = min(vector)
ma_v = max(vector)
norm = (np.array(vector)-mi_v)/(ma_v-mi_v)
avg = np.average(norm)
norm -= avg
normalized_beat['v'] = norm.tolist()
params_out['min'] = mi_v
params_out['max'] = ma_v
params_out['avg'] = avg
return normalized_beat,params_out
def min_max_normalization_beats_chunk(beats, params = None):
params_out = {}
normalized_beats = {}
for beat in beats:
normalized_beats[beat],params_out[beat] = min_max_normalization_one_beat(beats[beat],params)
return normalized_beats,params_out
def resamp_one_signal(t,v,resample_type = INTERPOLATION_TYPE, min_t = None, max_t = None):
if min_t is None:
min_t = t[0]
if max_t is None:
max_t = t[-1]
t_extended = copy(t)
v_extended = copy(v)
if max_t not in t:
t_extended.insert(len(t_extended), max_t)
v_extended.insert(len(v_extended), 0)
if min_t not in t: # This is not needed in this implementation as the first sample is always an events for each beat
t_extended.insert(0, min_t) # Still, we write it for clarity and consistency
v_extended.insert(0, 0)
if resample_type == "linear":
f = interp1d(t_extended,v_extended, bounds_error = False, fill_value = (v[0],v[-1]))
elif resample_type == "flat":
f = interp1d(t_extended,v_extended, kind = 'previous', bounds_error = False, fill_value = (v[0],v[-1]))
elif resample_type == "spline":
f = CubicSpline(t_extended,v_extended, bc_type="natural")
t_new = list(range(min_t,max_t+1))
v_new = f(t_new)
return t_new,v_new
def percentile_idx(vector,perc):
pcen=np.percentile(np.array(vector),perc,method='nearest')
i_near=abs(np.array(vector)-pcen).argmin()
return i_near
def get_template_and_beat_at_idx(all_beats,all_templates,look_up_beat_template,idx_sel):
beat_id = look_up_beat_template['beats'][idx_sel]
template_id = look_up_beat_template['template_id'][idx_sel]
beat = all_beats[beat_id]
template = all_templates[template_id]
return beat,beat_id,template
def find_all_connceted_template(id_temp,templates_info):
id_collected = [id_temp]
for id_connected in id_collected:
for id_to_add in templates_info[id_connected]["connected_template"]:
if id_to_add not in id_collected:
id_collected.append(id_to_add)
return id_collected
def plot_perc(orig,warp,resamp,template,title,out_file_name):
fig, (ax1, ax2) = plt.subplots(2)
fig.suptitle(title)
ax1.plot(orig[0],orig[1])
ax1.plot(warp[0],warp[1])
ax1.plot(resamp[0],resamp[1])
ax1.legend(['original','warped template','resampled'])
ax2.plot(template)
ax2.legend(['template used'])
fig.savefig(out_file_name)
plt.close(fig)
def reconstruct_plot_and_save_beat(beat,interpolation_type,template,lvl,title,save_fig_to):
beat_norm_timing = copy(beat)
beat_norm_timing['t'] = list(range(len(beat['t'])))
events = ADC(beat,lvl,MULTIPLIER_FREQ)
beat_norm,params = min_max_normalization_one_beat(beat_norm_timing)
events_norm,_ = min_max_normalization_one_beat(events,params)
resampled = resample(events_norm, resample_type = interpolation_type, min_t = beat_norm['t'][0], max_t = beat_norm['t'][-1])
template_with_envelope = {"tempalte":template}
#TODO: rework this warp function with the new one
_, reconstructed, _, _ = warp_events(template_with_envelope,events_norm, start_t = beat_norm['t'][0], end_t = beat_norm['t'][-1])
orig = beat_norm['t'],beat_norm['v']
recon = reconstructed['t'],reconstructed['v']
resamp = resampled['t'],resampled['v']
plot_perc(orig,recon,resamp,template['point'],title,save_fig_to)
#-------------------------------------- WORK ON THIS --------------------------------------
def stitch_segments(segments):
stitched = {'t':[],'v':[]}
for segment in segments:
stitched['t'].extend(segment[0])
stitched['v'].extend(segment[1])
f = interp1d(stitched['t'],stitched['v'], bounds_error = False, fill_value = (stitched['v'][0],stitched['v'][-1]))
t_new = np.arange(math.floor(stitched['t'][0]),math.ceil(stitched['t'][-1])+1)
v_new = f(t_new)
stitched['t'] = t_new
stitched['v'] = v_new
return stitched
def segment_warp(segment):
v = segment['segment']
t = []
if len(v) == 1:
v = np.array([v[0],v[0]])
v = np.array(v)
t = np.linspace(segment['t_start'],segment['t_stop'],len(v))
v += (segment['v_start'] - v[0]) # Make the start of the segment to be the same as the starting event
m = (segment['v_stop'] - v[-1])/(len(v)-1)
v += [m*x for x in range(len(v))]
if segment['drop_end'] == True:
v = v[:-1]
t = t[:-1]
return t,v
def warp_events(templates,events,start_t,end_t):
warped = {}
segments = [] # --> {"segment":v, "length_to_warp":length, "v_start":v_eb_0, "v_stop":v_eb_1}
segment = {"segment":None, "length_to_warp":None, "v_start":None, "v_stop":None}
segment_start = None
event_start = None
#ad the first and last points of the resampled and truncated beat if not already in event
if start_t not in events['t']:
events['t'].insert(0, start_t)
events['v'].insert(0, 0)
if end_t not in events['t']:
events['t'].insert(len(events['t']),end_t)
events['v'].insert(len(events['v']),0)
dist = float('inf')
path = []
selected_template = []
disatances_vector = []
template_id = None
for id in templates:
template_v = templates[id]['point']
template_t = list(range(len(template_v)))
events_v = events['v']
events_t = events['t']
#dist_this_template, paths_this_template = dtw.warping_paths(v_src, t)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if use_ddtw_tv:
dist_this_template, _, path_this_template = ddtw_tv(events_t, events_v, template_t, template_v, dist_only=False, use_diff = USE_DIFFERENTIAL_DTW, time_weight = TIME_WEIGHT)
else:
dist_this_template, _, path_this_template = dtw_std(events_v, template_v, dist_only=False, squared=False)
disatances_vector.append(dist_this_template)
if dist_this_template < dist:
dist = dist_this_template
path = path_this_template
selected_template = template_v
template_id = id
path_events = path[0].tolist()
path_template = path[1].tolist()
path = [(event_idx,templ_idx) for event_idx,templ_idx in zip(path[0],path[1])]
#Find matching events and warp the templates between those points
segment_start = 0
segment_stop = None
idx_traker_place = path_events.count(0)-1 #Initialize the index to the last position of the samples associated to the first event
for eb_idx in range(1,max(path_events)): #This is equivalent to all unique indexes in path_events except first and last
len_this_event = path_events.count(eb_idx)
is_len_segment_odd = is_odd(len_this_event)
segment_stop = idx_traker_place + math.ceil(len_this_event/2) # We want the offset where we find the middle of this event
idx_traker_place += len_this_event
template_start = path_template[segment_start]
template_stop = path_template[segment_stop]
event_start = path_events[segment_start]
event_stop = path_events[segment_stop]
segment['segment'] = np.array(selected_template[template_start:template_stop+1], dtype=np.float64)
segment['t_start'] = events['t'][event_start]
segment['v_start'] = events['v'][event_start]
segment['t_stop'] = events['t'][event_stop]
segment['v_stop'] = events['v'][event_stop]
segment['drop_end'] = is_len_segment_odd
tw,vw = segment_warp(segment)
segments.append((tw,vw))
if is_len_segment_odd:
segment_start = segment_stop
else:
segment_start = segment_stop +1
#ending
segment_stop = len(path_events)-1
template_start = path_template[segment_start]
template_stop = path_template[segment_stop]
event_start = path_events[segment_start]
event_stop = path_events[segment_stop]
segment['segment'] = np.array(selected_template[template_start:template_stop+1], dtype=np.float64)
segment['t_start'] = events['t'][event_start]
segment['v_start'] = events['v'][event_start]
segment['t_stop'] = events['t'][event_stop]
segment['v_stop'] = events['v'][event_stop]
segment['drop_end'] = False
tw,vw = segment_warp(segment)
segments.append((tw,vw))
segment_stitched = stitch_segments(segments)
warped = {'t':segment_stitched['t'],'v':segment_stitched['v']}
return dist, warped, disatances_vector, template_id
#------------------------------------------------------------------------------------------
def dtw_dist(v1,v2):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
dist = dtw_std(v1, v2)
return dist
def ddtw_tv_dist(t1,v1,t2,v2):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if use_ddtw_tv:
dist = ddtw_tv(t1, v1, t2, v2, use_diff = USE_DIFFERENTIAL_DTW, time_weight = TIME_WEIGHT)
else:
dist = dtw_std(v1, v2, squared=False)
return dist
def compute_new_templates(data, t_start, t_stop, old_templates, templates_info):
beats_for_new_template = get_beats_in_time_span(data, t_start_seconds=t_start,t_stop_seconds=t_stop)
beats_for_new_template,_ = min_max_normalization_beats_chunk(beats_for_new_template)
_, new_templates_descriptor = multi_template(beats_for_new_template, percentage_each_cluster = CLUSTER_PERCENTAGE*2, freq= FREQ, use_diff_dtw = USE_DIFFERENTIAL_DTW, time_weight = TIME_WEIGHT)
lbls_new_templates = list(new_templates_descriptor.keys())
old_ids = list(old_templates.keys())
if old_ids == []:
next_id = 0
else:
next_id = max(old_ids) + 1
cluster_representatives = {}
new_template_set = {}
print(f"\n\tNew template built, number of new templates:{len(lbls_new_templates)}\n")
old_templates_kept = 0
old_templates_substituted = 0
new_templates_kept = 0
new_templates_substituted = 0
# We search which of the old templates can be considered clustered with the newly founded ones
# TODO: /!\ WRONG BACK PROJECTION!!!! need to send back the function to template prediction and measure distance from THAT template
if len(lbls_new_templates) > 0:
for id_old_template in old_templates:
t_o = old_templates[id_old_template]['point']
dists = np.zeros((len(lbls_new_templates)))
for k,t_n in enumerate(lbls_new_templates):
t1 = list(range(len(t_o)))
v1 = t_o
t2 = list(range(len(new_templates_descriptor[t_n]['center'])))
v2 = new_templates_descriptor[t_n]['center']
#TODO: what distance do we want to use here? the templates ar not event-based acquired
dists[k] = ddtw_tv_dist(t1,v1,t2,v2)
min_dist_pos = np.argmin(dists)
min_val = dists[min_dist_pos]
this_lbls_new_templates = lbls_new_templates[min_dist_pos]
distances_intra_cluster = new_templates_descriptor[this_lbls_new_templates]['dist_all_pt_cluster']
dist_threshold = np.average(distances_intra_cluster)+np.std(distances_intra_cluster)
if min_val < dist_threshold:
if this_lbls_new_templates not in cluster_representatives.keys():
cluster_representatives[this_lbls_new_templates] = {"ids": [], "dists": []}
cluster_representatives[this_lbls_new_templates]["ids"].append(id_old_template)
cluster_representatives[this_lbls_new_templates]["dists"].append(min_val)
else: #This templates minimum distances to the newly founded templates are too big to be considered the same cluster
new_template_set[id_old_template] = old_templates[id_old_template]
old_templates_kept += 1
# Now we check if the old templates, clustered with the newly founded one, are representative of the cluster of if we should use the new templates
for local_new_id in lbls_new_templates:
if local_new_id in cluster_representatives.keys():
connected_ids = cluster_representatives[local_new_id]["ids"]
arg_min_dist = np.argmin(cluster_representatives[local_new_id]["dists"])
old_template_id = cluster_representatives[local_new_id]["ids"][arg_min_dist]
old_template_dist = cluster_representatives[local_new_id]["dists"][arg_min_dist]
# The old template is nearer to the found cluster center than the beat found by the clustering alg
if old_template_dist < new_templates_descriptor[local_new_id]["dist"]:
new_template_set[old_template_id] = old_templates[old_template_id]
connected_ids.remove(old_template_id)
templates_info[old_template_id]["connected_template"].extend(connected_ids)
new_templates_substituted += 1
# The beat found by the clustering alg is nearer to the found cluster center than the old template
else:
new_template_set[next_id] = new_templates_descriptor[local_new_id]
templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": connected_ids}
next_id += 1
old_templates_substituted += len(cluster_representatives[local_new_id]["ids"])
# The templates in this cluster that are not the center are now deceased
for id_removed in connected_ids:
templates_info[id_removed]["deceased"] = True
else:
new_template_set[next_id] = new_templates_descriptor[local_new_id]
templates_info[next_id] = {"used": 0, "deceased": False, "connected_template": []}
next_id += 1
new_templates_kept += 1
print(f"\tOld templates kept untuched: {old_templates_kept}/{len(old_templates)}")
print(f"\tOld templates kept, representative of new clusters (but already present): {new_templates_substituted}/{len(old_templates)}")
print(f"\tOld templates removed for old clusters (with new defined params): {len(old_templates) - (old_templates_kept+old_templates_substituted+new_templates_substituted)}/{len(old_templates)}")
print(f"\tOld templates removed for new clusters: {old_templates_substituted}/{len(old_templates)}")
print(f"\n\tNew templates kept untuched: {new_templates_kept}/{len(lbls_new_templates)}")
print(f"\tNew templates kept, representative of old clusters (with new params): {len(lbls_new_templates) - (new_templates_kept+new_templates_substituted)}/{len(lbls_new_templates)}")
print(f"\tNew templates removed for old clusters: {new_templates_substituted}/{len(lbls_new_templates)}")
print(f"\n\tFinal lenght of the new tempalte set: {len(new_template_set)}")
else:
new_template_set = old_templates
print("\tNo new template found, keeping the previously computed ones")
return new_template_set
def resample(data, resample_type = INTERPOLATION_TYPE, min_t = None, max_t = None):
resampled_data = {"t":None,"v":None}
t = data['t']
v = data['v']
t_r,v_r = resamp_one_signal(t,v,resample_type = resample_type, min_t = min_t, max_t = max_t)
resampled_data['t'] = t_r
resampled_data['v'] = v_r
return resampled_data
def ADC(beat, nBits, frequency_multiplier ,hist = 5, original_bits = 11):
#ADC stats
up_samp_beat = upsample_uniform_beat(beat, frequency_multiplier)
delta = 2**original_bits
dV = (delta)/(2**nBits)
hist = (hist/100)*dV
min_val = -delta//2
events = {'t':[],'v':[]}
#init value, first sample (we assume we always sample the nearest level at start) and ADC status
v_0 = up_samp_beat['v'][0]
lowTh = min_val+((v_0-min_val)//dV)*dV
highTh = lowTh + dV
events['t'].append(up_samp_beat['t'][0]/frequency_multiplier)
events['v'].append(int(lowTh if v_0-lowTh < highTh - v_0 else highTh))
for val,time in zip(up_samp_beat['v'],up_samp_beat['t']):
#print(f"Value: {val}, time: {time}, low_th = {lowTh - hist}, high_th = { highTh + hist}")
if val > highTh + hist or val < lowTh - hist:
direction = 1 if val > highTh else -1
lowTh = min_val+((val-min_val)//dV)*dV #Delta from the bottom: (val-min_val)//dV*dV then compute the actual level summin min_val
highTh = lowTh + dV
events['t'].append(time/frequency_multiplier)
events['v'].append(int(lowTh if direction == 1 else highTh))
if events['t'][-1] < up_samp_beat['t'][-1]/frequency_multiplier:
#final value, last sample (we assume we always sample the nearest level at end)
v_end = up_samp_beat['v'][-1]
lowTh = min_val+((v_end-min_val)//dV)*dV
highTh = lowTh + dV
events['t'].append(up_samp_beat['t'][-1]/frequency_multiplier)
events['v'].append(int(lowTh if v_end-lowTh < highTh - v_end else highTh))
return events
def reconstruct_beats(data_orig, lvl_number, file_name,init_templates = None, start_after = SEC_FOR_INITIAL_TEMPLATES, resample_type = INTERPOLATION_TYPE, num_beats_analyzed = None, verbose = False, log_dir = None):
beat_seq_number = 0
prev_len = 0
all_new_QRS_pos = []
distances = []
distances_ref = []
num_distances_out = 0
skip_until = start_after
time_info = {'beats_low_res_num': 0, 'tot_beats_num': 0}
time_last_templates_computation = start_after
# COMPUTE INITIAL TEMPLATES
#Init templet info
templates_info = {} #{$templet_id: {used: 0, deceased: False, "connected_template": []}}
all_templates_collection = {}
if init_templates is None:
print(f"\n################################################################")
print(f"\n(File {file_name}) LEVELS:{lvl_number}, resample type: {resample_type}: Starting initial templates computation... ")
templates = compute_new_templates(data_orig, 0, start_after, {}, templates_info)
else:
templates = init_templates
for id_new in templates:
templates_info[id_new] = {"used": 0, "deceased": False, "connected_template": []}
all_templates_collection = copy (templates)
init_templates = copy(templates)
measurements_id_coupling = {"ids":\
{"template_id":[], "beats":[]},\
"measures":\
{"dtw":{"warp":[],"resamp":[]},\
"prd":{"warp":[],"resamp":[]},\
"prd_left":{"warp":[],"resamp":[]},\
"prd_right":{"warp":[],"resamp":[]}}}
dist_vector = [0]*len(templates)
jfc[lvl_number]['number_of_templates'].append(len(init_templates))
for beat in data_orig.keys():
t_beat = beat/FREQ
time_info['tot_beats_num'] += 1
if t_beat < skip_until:
continue
if beat_seq_number >= num_beats_analyzed:
break
analyzed_beat = copy(data_orig[beat])
#print(f"Reconstructing beat {beat} ({beat_seq_number}/{num_beats_analyzed}: {100*beat_seq_number /num_beats_analyzed}%, LEVELS:{lvl_number}, resample type: {resample_type})")
#print(f"\tmax_v = {max(analyzed_beat['v'])},\n\tmax_v = {min(analyzed_beat['v'])}\n\tmax_t = {max(analyzed_beat['t'])}\n\tmin_t = {min(analyzed_beat['t'])}")
QRS_idx = analyzed_beat['t'].index(beat)
#Define a new time-base for each beat
analyzed_beat['t'] = list(range(len(analyzed_beat['t'])))
time_info['beats_low_res_num'] += 1
beat_seq_number +=1
if (beat_seq_number %(num_beats_analyzed/20)==0 or beat_seq_number == 1) and verbose:
print(f"(File {file_name}) Reconstructing beat {beat} ({beat_seq_number}/{num_beats_analyzed}: {100*beat_seq_number /num_beats_analyzed}%, LEVELS:{lvl_number}, resample type: {resample_type})")
events = ADC(analyzed_beat,lvl_number,MULTIPLIER_FREQ)
# For the warping to work, both signal need to be in the same form (wether normalized or un-normailized), otherwise, the distance matrix
# express non coherent distances between points. Because the templates NEED to be normalized (to better express a topology, instead of raw magnitude)
# This oblige us to normalize also the input signals
analyzed_beat_norm, min_max_params = min_max_normalization_one_beat(analyzed_beat)
events_norm,_ = min_max_normalization_one_beat(events, min_max_params)
# Re-sample and warp
resampled = resample(events_norm, resample_type = resample_type, min_t = analyzed_beat_norm['t'][0], max_t = analyzed_beat_norm['t'][-1])
dist, reconstructed, dist_all_template, local_template_id = warp_events(templates,events_norm, start_t = analyzed_beat_norm['t'][0], end_t = analyzed_beat_norm['t'][-1])
# Save-back beat
un_norm_resamp = un_norma_and_save_back_beat(resampled,log_dir,"resampled",min_max_params)
un_norm_warped = un_norma_and_save_back_beat(reconstructed,log_dir,"warped",min_max_params)
un_norm_original = un_norma_and_save_back_beat(analyzed_beat_norm,log_dir,"original",min_max_params)
#Update templates usage info
template_id = list(templates_info.keys())[-len(templates)+local_template_id]
templates_info[template_id]["used"] += 1
dist_vector = [dist_vector[i]+dist_all_template[i] for i in range(len(dist_vector))]
#Compute PRD
#{'warp':None,'resamp':None,'warp_left':None,'resamp_left':None,'warp_right':None,'resamp_right':None}
prd_this_beat = PRD_one_beat(analyzed_beat_norm,resampled,reconstructed,QRS_idx)
measurements_id_coupling["ids"]["template_id"].append(local_template_id)
measurements_id_coupling["ids"]["beats"].append(beat)
#Do we want to modify this dtw into ddtw_tv?
measurements_id_coupling[ "measures"]["dtw"]["warp"].append(dtw_dist(un_norm_warped,un_norm_original))
measurements_id_coupling[ "measures"]["dtw"]["resamp"].append(dtw_dist(un_norm_resamp,un_norm_original))
measurements_id_coupling[ "measures"]["prd"]["warp"].append(prd_this_beat['warp'])
measurements_id_coupling[ "measures"]["prd"]["resamp"].append(prd_this_beat['resamp'])
measurements_id_coupling[ "measures"]["prd_left"]["warp"].append(prd_this_beat['warp_left'])
measurements_id_coupling[ "measures"]["prd_left"]["resamp"].append(prd_this_beat['resamp_left'])
measurements_id_coupling[ "measures"]["prd_right"]["warp"].append(prd_this_beat['warp_right'])
measurements_id_coupling[ "measures"]["prd_right"]["resamp"].append(prd_this_beat['resamp_right'])
# check if we need to re-compute the templates
if len(distances_ref) < LEN_DISTANCE_VECTOR_REF:
distances_ref.append(dist)
if len(distances_ref) >= LEN_DISTANCE_VECTOR_REF:
distances.append(dist)
if len(distances) >= LEN_DISTANCE_VECTOR:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
p = stats.anderson_ksamp([distances_ref,distances])[2]
distances = []
if (p>= 0.05):
num_distances_out = 0
else: # p value les than 0.05: null hypothesis (same distribution) rejected (please Kolmogorov forgive me)
num_distances_out += 1
if num_distances_out > 2: # The acquired vector of distances was out for 2 times
max_accum_dist = max(dist_vector)
print(f"\n################################################################")
print(f"\n(File {file_name}) LEVELS:{lvl_number}, resample type: {resample_type}: New template needed ... ")
print(f"Beat number:{beat_seq_number} ({beat_seq_number}/{num_beats_analyzed}: {100*beat_seq_number /num_beats_analyzed}%)")
print(f"\t p-value: {p}")
for j in range(len(dist_vector)):
print(f"\tTemplate {j}, dist: {dist_vector[j]}:\t","|"*int(20*dist_vector[j]/max_accum_dist))
print("\n")
templates = compute_new_templates(data_orig, t_beat, t_beat+SEC_FOR_NEW_TEMPLATES, templates, templates_info)
for t in templates:
all_templates_collection[t] = templates[t]
dist_vector = [0]*len(templates)
print(f"\n################################################################\n")
distances_ref = []
skip_until = t_beat + SEC_FOR_NEW_TEMPLATES
num_distances_out = 0
delta_t_templates_computation = t_beat-time_last_templates_computation
time_last_templates_computation = skip_until
jfc[lvl_number]['time_between_templates'].append(delta_t_templates_computation)
jfc[lvl_number]['number_of_templates'].append(len(templates))
#Save the used beats:
this_new_QRS_pos = prev_len+QRS_idx
prev_len += len(analyzed_beat_norm['t'])
all_new_QRS_pos.append(this_new_QRS_pos)
return all_templates_collection,measurements_id_coupling,init_templates,time_info,all_new_QRS_pos
def measurments_reuslts(measurements_id_coupling,interpolation_type,data_orig,all_templates_collection,time_info,file,lvl,log_dir_this_lvl,log_dir_this_file):
"""
measurements_id_coupling = {"ids":\
{"template_id":[], "beats":[]},\
"measures":\
{"dtw":{"warp":[],"resamp":[]},\
"prd":{"warp":[],"resamp":[]},\
"prd_left":{"warp":[],"resamp":[]},\
"prd_right":{"warp":[],"resamp":[]}}}
need to modify stats so to accept vectors
"""
for measurement_type in measurements_id_coupling['measures']:
warp = np.array(measurements_id_coupling['measures'][measurement_type]['warp'])
resamp = np.array(measurements_id_coupling['measures'][measurement_type]['resamp'])
avg_warp = np.average(warp)
std_warp = np.std(warp)
avg_resamp = np.average(resamp)
std_resamp = np.std(resamp)
log_dir_this_measurement = os.path.join(log_dir_this_lvl,measurement_type)
os.makedirs(log_dir_this_measurement,exist_ok=True)
file_name_to_save = "L_"+measurement_type+"_"+interpolation_type+"_"+file.split(".")[0]+".log"
# Particular log for each lvl
with open(os.path.join(log_dir_this_measurement,file_name_to_save),"a") as f:
f.write(f"({measurement_type}) Lvl: {lvl}, using the {interpolation_type} interpolation:\n")
f.write(f"\tWarp: {avg_warp}, +-{std_warp}\n")
f.write(f"\tInterpolation: {avg_resamp}, +-{std_resamp}\n")
f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n")
f.write(f"\n\n")
# General log (the same but all toghether: more confusing but with all infos)
with open(os.path.join(log_dir_this_file,file_name_to_save),"a") as f:
f.write(f"({measurement_type}) Lvl: {lvl}, using the {interpolation_type} interpolation:\n")
f.write(f"\tWarp: {avg_warp}, +-{std_warp}\n")
f.write(f"\tInterpolation: {avg_resamp}, +-{std_resamp}\n")
f.write(f"\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%\n")
f.write(f"\n\n")
print("\n-------------------------------------------------------------------------")
print(f"({measurement_type}) File:{file_name_to_save}, using the {interpolation_type} interpolation:")
print(f"\tLvl: {lvl}")
print(f"\t\twarp: {avg_warp}, +-{std_warp}")
print(f"\t\tinterpolation: {avg_resamp}, +-{std_resamp}")
print(f"\t\tTime (percentage) passed in low-sampling mode: {time_info['beats_low_res_num']/time_info['tot_beats_num']*100}%")
print("\n")
for perc in PERCENTILE_TO_PLOT:
idx_perc_abs = percentile_idx(warp,perc)
beat, beat_id, template = get_template_and_beat_at_idx(data_orig,all_templates_collection,measurements_id_coupling['ids'],idx_perc_abs)
title = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_id}, {str(perc)} percentile, Absolute'
file_name_to_save_fig = os.path.join(log_dir_this_measurement,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_absolute.svg")
reconstruct_plot_and_save_beat(beat,interpolation_type,template,lvl,title,file_name_to_save_fig)
idx_perc_rel = percentile_idx(warp-resamp,perc)
beat,beat_id,template = get_template_and_beat_at_idx(data_orig,all_templates_collection,measurements_id_coupling['ids'],idx_perc_rel)
title = f'File: {file}, Lvl: {lvl}, Beat time (samples): {beat_id}, {str(perc)} percentile, Relative'
file_name_to_save_fig = os.path.join(log_dir_this_measurement,file.split(".")[0]+"_"+str(perc)+"_perc"+str(lvl)+"_relative.svg")
reconstruct_plot_and_save_beat(beat,interpolation_type,template,lvl,title,file_name_to_save_fig)
#Filter and save back the values for better plots (the filtered data is still accounted for before)
warp = z_score_filter(warp)
resamp = z_score_filter(resamp)
file_name_to_save_fig_hist = os.path.join(log_dir_this_measurement,file.split(".")[0]+"_hist"+str(lvl)+".svg")
n_bins = min(len(warp),len(resamp))*PERC_BINS//100
min_bin = min(min(warp),min(resamp))
max_bin = max(max(warp),max(resamp))
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
plt.figure()
plt.hist(warp, bins = bins, alpha=0.5)
plt.hist(resamp, bins = bins, alpha=0.5)
plt.title(f'File: {file}, Lvl: {lvl}, {measurement_type} histogram')
plt.legend([f'{measurement_type} warp',f'{measurement_type} resampled'])
plt.savefig(file_name_to_save_fig_hist)
plt.close()
def delinetaion_results_save(delineation_results,log_dir_this_lvl):
log_dir_this_measurement = os.path.join(log_dir_this_lvl,"delineation")
os.makedirs(log_dir_this_measurement,exist_ok=True)
file_name_to_save = "L_delineation.log"
with open(os.path.join(log_dir_this_measurement,file_name_to_save), "w") as f:
f.write(f"Delineation_score:")
f.write(f"\n\twarping p-wave sensitivity:{delineation_results['warp']['p']['sens']}")
f.write(f"\n\tresampling p-wave sensitivity:{delineation_results['resamp']['p']['sens']}\n")
f.write(f"\n\twarping p-wave positive predicitivity:{delineation_results['warp']['p']['ppv']}")
f.write(f"\n\tresampling p-wave positive predicitivity:{delineation_results['resamp']['p']['ppv']}\n")
f.write(f"\n\twarping p-wave f1 score:{delineation_results['warp']['p']['f1']}")
f.write(f"\n\tresampling p-wave f1 score:{delineation_results['resamp']['p']['f1']}\n\n")
f.write(f"\n\twarping t-wave sensitivity:{delineation_results['warp']['t']['sens']}")
f.write(f"\n\tresampling t-wave sensitivity:{delineation_results['resamp']['t']['sens']}\n")
f.write(f"\n\twarping t-wave positive predicitivity:{delineation_results['warp']['t']['ppv']}")
f.write(f"\n\tresampling t-wave positive predicitivity:{delineation_results['resamp']['t']['ppv']}\n")
f.write(f"\n\twarping t-wave f1 score:{delineation_results['warp']['t']['f1']}")
f.write(f"\n\tresampling t-wave f1 score:{delineation_results['resamp']['t']['f1']}")
def reconstruction_one_lvl_one_file_and_compare(data_orig,lvl,file,initial_templates,verbose = True):
log_dir_this_file = os.path.join(log_dir,file.split(".")[0])
interpolation_type_list = None
measurements_each_interpolation_type = {}
measurements_id_coupling = {}
delineation_results_each_interp = {}
if INTERPOLATION_TYPE == 'all':
interpolation_type_list = ['flat','spline','linear']
else:
interpolation_type_list = [INTERPOLATION_TYPE]
for interpolation_type in interpolation_type_list:
if verbose:
print(f"(File {file.split('.')[0]}) Level:{lvl}, Using interpolation: {interpolation_type}")
log_dir_this_lvl = os.path.join(log_dir_this_file,str(lvl),interpolation_type)
os.makedirs(log_dir_this_lvl, exist_ok=True)
all_templates_collection, measurements_id_coupling, initial_templates, time_info, QRS_pos_new = reconstruct_beats(data_orig, lvl, file_name=file.split(".")[0],init_templates = initial_templates, start_after = SEC_FOR_INITIAL_TEMPLATES, resample_type = interpolation_type, num_beats_analyzed = NUM_BEAT_ANALYZED, verbose = False, log_dir = log_dir_this_lvl) #resample_type = flat vs linear
"""
measurements_id_coupling = {"ids":\
{"template_id":[], "beats":[]},\
"measures":\
{"dtw":{"warp":[],"resamp":[]},\
"prd":{"warp":[],"resamp":[]},\
"prd_left":{"warp":[],"resamp":[]},\
"prd_right":{"warp":[],"resamp":[]}}}
need to modify stats so to accept vectors
"""
#############################################################################
# RESULTS EVALUATION #
#############################################################################
delineation_results = delineate_and_compare(log_dir_this_lvl,QRS_pos_new)
measurements_each_interpolation_type[interpolation_type] = measurements_id_coupling['measures']
measurments_reuslts(measurements_id_coupling,interpolation_type,data_orig,all_templates_collection,time_info,file,lvl,log_dir_this_lvl,log_dir_this_file)
delinetaion_results_save(delineation_results,log_dir_this_lvl)
delineation_results_each_interp[interpolation_type]=delineation_results
if INTERPOLATION_TYPE == 'all':
for measure in measurements_id_coupling['measures']:
log_dir_this_lvl_combined = os.path.join(log_dir_this_file,str(lvl),"combined_results")
os.makedirs(log_dir_this_lvl_combined, exist_ok=True)
file_name_to_save = "L_"+measure+"_"+file.split(".")[0]+".log"
global_file_name_to_save = "L_"+measure+"_all_interpolations_"+file.split(".")[0]+".log"
with open(os.path.join(log_dir_this_lvl_combined,file_name_to_save),"a") as f:
f.write(f"Lvl: {lvl}\n")
with open(os.path.join(log_dir_this_file,global_file_name_to_save),"a") as f:
f.write(f"Lvl: {lvl}\n")
# Histograms
plt.figure()
file_name_to_save_fig_hist = os.path.join(log_dir_this_lvl_combined,file.split(".")[0]+"_"+measure+"_hist"+str(lvl)+".svg")
n_bins = np.inf
min_bin = np.inf
max_bin = -np.inf
legend_str = []
#Compute the bins
for interp_type in measurements_each_interpolation_type:
r_w = measurements_each_interpolation_type[interp_type][measure]["warp"]
r_r = measurements_each_interpolation_type[interp_type][measure]["resamp"]
n_bins = min(min(len(r_w),len(r_r))*PERC_BINS//100,n_bins)
min_bin = min(min(r_w),min(r_r),min_bin)
max_bin = max(max(r_w),max(r_r),max_bin)
legend_str.extend([f'{measure} warp {interp_type}',f'{measure} resampled {interp_type}'])
delta = (max_bin-min_bin)/n_bins
bins = np.arange(min_bin,max_bin+delta,delta)
#write results and plot
for interp_type in measurements_each_interpolation_type:
r_w = measurements_each_interpolation_type[interp_type][measure]["warp"]
r_r = measurements_each_interpolation_type[interp_type][measure]["resamp"]
avg_warp = np.average(r_w)
std_warp = np.std(r_w)
avg_resamp = np.average(r_r)
std_resamp = np.std(r_r)
# Particular log for each lvl
with open(os.path.join(log_dir_this_lvl_combined,file_name_to_save),"a") as f:
f.write(f"\tWarp using the {interp_type} interpolation: {avg_warp}, +-{std_warp}\n")
f.write(f"\tInterpolation using the {interp_type} interpolation: {avg_resamp}, +-{std_resamp}\n")
# General log (the same but all toghether: more confusing but with all infos)
with open(os.path.join(log_dir_this_file,global_file_name_to_save),"a") as f:
f.write(f"\tWarp using the {interp_type} interpolation: {avg_warp}, +-{std_warp}\n")
f.write(f"\tInterpolation using the {interp_type} interpolation: {avg_resamp}, +-{std_resamp}\n")
plt.hist(r_w, bins = bins, alpha=0.3)
plt.hist(r_r, bins = bins, alpha=0.3)
plt.legend(legend_str)
plt.title(f'File: {file}, Lvl: {lvl}, {measure} histogram')
plt.savefig(file_name_to_save_fig_hist)
plt.close()
with open(os.path.join(log_dir_this_lvl_combined,file_name_to_save),"a") as f:
f.write(f"\n\n")
with open(os.path.join(log_dir_this_file,global_file_name_to_save),"a") as f:
f.write(f"\n\n")
return initial_templates,measurements_each_interpolation_type,delineation_results_each_interp
def get_file_and_relative_percentile(vector,file_order,perc):
perc_val = np.percentile(vector,perc,method='nearest')
perc_pos = list(vector).index(perc_val)
file_perc = file_order[perc_pos]
vector_for_file_selected = [x for i,x in enumerate(vector) if file_order[i] == file_perc]
coresponding_percentile_in_file = scipy.stats.percentileofscore(vector_for_file_selected, perc_val, kind='weak')
return file_perc,coresponding_percentile_in_file
def reconstruct_and_compare_level_parallel(lvl):
"""
measurements_each_interpolation_type: {type:
measurements_id_coupling = {"ids":\
{"template_id":[], "beats":[]},\
"measures":\
{"dtw":{"warp":[],"resamp":[]},\
"prd":{"warp":[],"resamp":[]},\
"prd_left":{"warp":[],"resamp":[]},\
"prd_right":{"warp":[],"resamp":[]}}}
}
need to modify stats so to accept vectors
"""
dataset_results_dir = os.path.join(log_dir,"final_results")
os.makedirs(dataset_results_dir,exist_ok=True)
measurements_all_dataset = {'flat' :{'dtw':[],'prd':[],'dtw_w':[],'prd_w':[],'file_list':[]},
'spline':{'dtw':[],'prd':[],'dtw_w':[],'prd_w':[],'file_list':[]},
'linear':{'dtw':[],'prd':[],'dtw_w':[],'prd_w':[],'file_list':[]}}
delineation_all_dataset = { 'flat' :{'p':{'Tp':0,'Fp':0,'Fn':0},
'T':{'Tp':0,'Fp':0,'Fn':0},
'p_w':{'Tp':0,'Fp':0,'Fn':0},
'T_w':{'Tp':0,'Fp':0,'Fn':0}},
'spline':{'p':{'Tp':0,'Fp':0,'Fn':0},
'T':{'Tp':0,'Fp':0,'Fn':0},
'p_w':{'Tp':0,'Fp':0,'Fn':0},
'T_w':{'Tp':0,'Fp':0,'Fn':0}},
'linear':{'p':{'Tp':0,'Fp':0,'Fn':0},
'T':{'Tp':0,'Fp':0,'Fn':0},
'p_w':{'Tp':0,'Fp':0,'Fn':0},
'T_w':{'Tp':0,'Fp':0,'Fn':0}},
}
for file in FILES_SELECTED:
verbose = True
log_dir_this_file = os.path.join(log_dir,file.split(".")[0])
os.makedirs(log_dir_this_file,exist_ok=True)
init_templates = None
if verbose:
print(f"(File {file.split('.')[0]} Level: {lvl}): Extracting original data")
data_orig = open_file(file, start_after = 0)
init_templates,measurements_each_interpolation_type,delineation_results_each_interp = reconstruction_one_lvl_one_file_and_compare(data_orig,lvl,file,init_templates,verbose = True)
for interp_type in measurements_each_interpolation_type.keys():
measurements_id_coupling = measurements_each_interpolation_type[interp_type]
measurements_all_dataset[interp_type]['prd'].extend(measurements_id_coupling['prd']['resamp'])
measurements_all_dataset[interp_type]['prd_w'].extend(measurements_id_coupling['prd']['warp'])
measurements_all_dataset[interp_type]['dtw'].extend(measurements_id_coupling['dtw']['resamp'])
measurements_all_dataset[interp_type]['dtw_w'].extend(measurements_id_coupling['dtw']['warp'])
measurements_all_dataset[interp_type]['file_list'].extend([file]*len(measurements_id_coupling['dtw']['warp']))
delineation_all_dataset[interp_type]['p']['Tp'] += delineation_results_each_interp[interp_type]['resamp']['p']['Tp']
delineation_all_dataset[interp_type]['T']['Tp'] += delineation_results_each_interp[interp_type]['resamp']['t']['Tp']
delineation_all_dataset[interp_type]['p_w']['Tp'] += delineation_results_each_interp[interp_type]['warp']['p']['Tp']
delineation_all_dataset[interp_type]['T_w']['Tp'] += delineation_results_each_interp[interp_type]['warp']['t']['Tp']
delineation_all_dataset[interp_type]['p']['Fp'] += delineation_results_each_interp[interp_type]['resamp']['p']['Fp']
delineation_all_dataset[interp_type]['T']['Fp'] += delineation_results_each_interp[interp_type]['resamp']['t']['Fp']
delineation_all_dataset[interp_type]['p_w']['Fp'] += delineation_results_each_interp[interp_type]['warp']['p']['Fp']
delineation_all_dataset[interp_type]['T_w']['Fp'] += delineation_results_each_interp[interp_type]['warp']['t']['Fp']
delineation_all_dataset[interp_type]['p']['Fn'] += delineation_results_each_interp[interp_type]['resamp']['p']['Fn']
delineation_all_dataset[interp_type]['T']['Fn'] += delineation_results_each_interp[interp_type]['resamp']['t']['Fn']
delineation_all_dataset[interp_type]['p_w']['Fn'] += delineation_results_each_interp[interp_type]['warp']['p']['Fn']
delineation_all_dataset[interp_type]['T_w']['Fn'] += delineation_results_each_interp[interp_type]['warp']['t']['Fn']
for interp_type in measurements_each_interpolation_type.keys():
dataset_results_interp_dir = os.path.join(dataset_results_dir,interp_type)
os.makedirs(dataset_results_interp_dir,exist_ok=True)
#PERCENTILE-----------------------
dtw_w = measurements_all_dataset[interp_type]['dtw_w']
dtw_rel = np.array(measurements_all_dataset[interp_type]['dtw_w'])-np.array(measurements_all_dataset[interp_type]['dtw'])
files_list = measurements_all_dataset[interp_type]['file_list']
file_dtw_w_abs_25, dtw_w_abs_25 = get_file_and_relative_percentile(dtw_w,files_list,25)
file_dtw_w_abs_50, dtw_w_abs_50 = get_file_and_relative_percentile(dtw_w,files_list,50)
file_dtw_w_abs_75, dtw_w_abs_75 = get_file_and_relative_percentile(dtw_w,files_list,75)
file_dtw_w_rel_25, dtw_w_rel_25 = get_file_and_relative_percentile(dtw_rel,files_list,25)
file_dtw_w_rel_50, dtw_w_rel_50 = get_file_and_relative_percentile(dtw_rel,files_list,50)
file_dtw_w_rel_75, dtw_w_rel_75 = get_file_and_relative_percentile(dtw_rel,files_list,75)
with open(os.path.join(dataset_results_interp_dir,"dtw_percentiles.txt"),'a+') as f:
f.write(f'Number of bits: {lvl}\n')
f.write(f'\tAbsolute\n')
f.write(f'\tFile containing 25th percentile: {file_dtw_w_abs_25}. Corresponding percentile in the file: {dtw_w_abs_25}\n')
f.write(f'\tFile containing 50th percentile: {file_dtw_w_abs_50}. Corresponding percentile in the file: {dtw_w_abs_50}\n')
f.write(f'\tFile containing 75th percentile: {file_dtw_w_abs_75}. Corresponding percentile in the file: {dtw_w_abs_75}\n')
f.write(f'\n\tRelative\n')
f.write(f'\tFile containing 25th percentile: {file_dtw_w_rel_25}. Corresponding percentile in the file: {dtw_w_rel_25}\n')
f.write(f'\tFile containing 50th percentile: {file_dtw_w_rel_50}. Corresponding percentile in the file: {dtw_w_rel_50}\n')
f.write(f'\tFile containing 75th percentile: {file_dtw_w_rel_75}. Corresponding percentile in the file: {dtw_w_rel_75}\n')
f.write('\n--------------------------------------------------------------------------------------------------------------------\n')
#Measures ------------------------------
with open(os.path.join(dataset_results_interp_dir,"prd.csv"),'a+') as f:
f.write(f'{lvl}')
for measure in measurements_all_dataset[interp_type]['prd']:
f.write(f',{measure}')
f.write('\n')
with open(os.path.join(dataset_results_interp_dir,"prd_w.csv"),'a+') as f:
f.write(f'{lvl}')
for measure in measurements_all_dataset[interp_type]['prd_w']:
f.write(f',{measure}')
f.write('\n')
with open(os.path.join(dataset_results_interp_dir,"dtw.csv"),'a+') as f:
f.write(f'{lvl}')
for measure in measurements_all_dataset[interp_type]['dtw']:
f.write(f',{measure}')
f.write('\n')
with open(os.path.join(dataset_results_interp_dir,"dtw_w.csv"),'a+') as f:
f.write(f'{lvl}')
for measure in measurements_all_dataset[interp_type]['dtw_w']:
f.write(f',{measure}')
f.write('\n')
#Delineation ------------------------------
with open(os.path.join(dataset_results_interp_dir,"delineation.txt"),'a+') as f:
f.write(f'BITS NUMBER: {lvl}\n')
f.write(f'P-wave:\n')
f.write(f'\tResample:\n')
Tp = delineation_all_dataset[interp_type]["p"]["Tp"]
Fp = delineation_all_dataset[interp_type]["p"]["Fp"]
Fn = delineation_all_dataset[interp_type]["p"]["Fn"]
s = Tp/(Tp+Fn)
p = Tp/(Tp+Fp)
F1 = 2*(s*p)/(s+p)
f.write(f'\t\tTrue positive: {Tp}\n')
f.write(f'\t\tFalse positive: {Fp}\n')
f.write(f'\t\tfalse negative: {Fn}\n')
f.write(f'\n\t\tSensitivity: {s}\n')
f.write(f'\t\tPositive predictivity: {p}\n')
f.write(f'\t\tF1 Score: {F1}\n')
f.write(f'\n\tWarp:\n')
Tp = delineation_all_dataset[interp_type]["p_w"]["Tp"]
Fp = delineation_all_dataset[interp_type]["p_w"]["Fp"]
Fn = delineation_all_dataset[interp_type]["p_w"]["Fn"]
s = Tp/(Tp+Fn)
p = Tp/(Tp+Fp)
F1 = 2*(s*p)/(s+p)
f.write(f'\t\tTrue positive: {Tp}\n')
f.write(f'\t\tFalse positive: {Fp}\n')
f.write(f'\t\tfalse negative: {Fn}\n')
f.write(f'\n\t\tSensitivity: {s}\n')
f.write(f'\t\tPositive predictivity: {p}\n')
f.write(f'\t\tF1 Score: {F1}\n')
f.write(f'\n\n-----------------------------------------------------------------------------------------------------------------\n\n')
f.write(f'T-wave:\n')
f.write(f'\tResample:\n')
Tp = delineation_all_dataset[interp_type]["T"]["Tp"]
Fp = delineation_all_dataset[interp_type]["T"]["Fp"]
Fn = delineation_all_dataset[interp_type]["T"]["Fn"]
s = Tp/(Tp+Fn)
p = Tp/(Tp+Fp)
F1 = 2*(s*p)/(s+p)
f.write(f'\t\tTrue positive: {Tp}\n')
f.write(f'\t\tFalse positive: {Fp}\n')
f.write(f'\t\tfalse negative: {Fn}\n')
f.write(f'\n\t\tSensitivity: {s}\n')
f.write(f'\t\tPositive predictivity: {p}\n')
f.write(f'\t\tF1 Score: {F1}\n')
f.write(f'\n\tWarp:\n')
Tp = delineation_all_dataset[interp_type]["T_w"]["Tp"]
Fp = delineation_all_dataset[interp_type]["T_w"]["Fp"]
Fn = delineation_all_dataset[interp_type]["T_w"]["Fn"]
s = Tp/(Tp+Fn)
p = Tp/(Tp+Fp)
F1 = 2*(s*p)/(s+p)
f.write(f'\t\tTrue positive: {Tp}\n')
f.write(f'\t\tFalse positive: {Fp}\n')
f.write(f'\t\tfalse negative: {Fn}\n')
f.write(f'\n\t\tSensitivity: {s}\n')
f.write(f'\t\tPositive predictivity: {p}\n')
f.write(f'\t\tF1 Score: {F1}\n')
f.write(f'\n-----------------------------------------------------------------------------------------------------------------\n')
f.write(f'-----------------------------------------------------------------------------------------------------------------\n')
f.write(f'-----------------------------------------------------------------------------------------------------------------\n')
f.write(f'-----------------------------------------------------------------------------------------------------------------\n\n')
def recontruct_and_compare_file_parallel(file):
verbose = True
log_dir_this_file = os.path.join(log_dir,file.split(".")[0])
os.mkdir(log_dir_this_file)
init_templates = None
if verbose:
print(f"(File: {file}): Extracting original data")
data_orig = open_file(file, start_after = 0)
for lvl in LEVELS:
init_templates,measurements_id_coupling,delineation_results_each_interp = reconstruction_one_lvl_one_file_and_compare(data_orig,lvl,file,init_templates,verbose = True)
def process(files, levels, parallelize_along = PARALLELIZE_ALONG, cores=1):
# ------------ INIT ------------
global log_dir
for i in range(1,10000):
tmp_log_dir = log_dir+str(i)
if not os.path.isdir(tmp_log_dir):
log_dir = tmp_log_dir
break
os.makedirs(log_dir, exist_ok=True)
with open(os.path.join(log_dir,"specs.txt"), "w") as f:
f.write(f"Results generated by script: {sys.argv[0]}\n")
f.write(f"Time: {time.ctime(time.time())}\n\n")
f.write(f"Files: {files}\n")
f.write(f"Levels: {levels}\n")
f.write(f"Parallelize along: {parallelize_along}\n")
f.write(f"Cores: {cores}\n")
f.write(f"Beats: {NUM_BEAT_ANALYZED}\n")
f.write(f"Cluster percentage: {CLUSTER_PERCENTAGE}\n")
f.write(f"Interpolation type: {INTERPOLATION_TYPE}\n")
f.write(f"Timing mode: {TIME_MODE}\n")
f.write(f"Differential dtw: {USE_DIFFERENTIAL_DTW}\n")
f.write(f"Time weight for ddtw_tv: {TIME_WEIGHT}\n")
# ------------ Extract DATA & ANNOTATIONS ------------
if cores == 1:
print("Single core")
if parallelize_along == 'levels':
for lvl in levels:
reconstruct_and_compare_level_parallel(lvl)
elif parallelize_along == 'files':
for f in files:
recontruct_and_compare_file_parallel(f)
else:
with Pool(cores) as pool:
if parallelize_along == 'levels':
print(f"parallelizing along levels: {levels}")
pool.map(reconstruct_and_compare_level_parallel, levels)
elif parallelize_along == 'files':
print("parallelizing along files")
pool.map(recontruct_and_compare_file_parallel, files)
if __name__ == "__main__":
'''
Note, the long acquisition time is does not produce significantly different results
'''
import argparse
import time
seconds_start = time.time()
local_time_start = time.ctime(seconds_start)
print("\nStarted at:", local_time_start,"\n\n")
#global NUM_BEAT_ANALYZED
parser = argparse.ArgumentParser()
parser.add_argument("--file", help="Force to analyze one specific file instead of default one (first found)")
parser.add_argument("--levels", help="Decide how many bits to use in the ADC: options:\n\t->1: [3]\n\t->2: [3,4]\n\t->3: [3,4,5]\n\t->4: ...")
parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones")
parser.add_argument("--parallelize_along", help="describe if to parallelize on the number of 'files'\n or on the number of 'levels'")
parser.add_argument("--beats", help="Number of used beats, default: 5000")
parser.add_argument("--cluster_opt", help="Percentage of points for a cluster to be considered")
parser.add_argument("--interpolation_type", help="Chose between: spline, flat, linear, and all. Default: flat")
parser.add_argument("--use_differential_dtw", action="store_true", help="define if to use the differentail form of dtw or the original one")
parser.add_argument("--acquisition_time_mode", help="Menage the time length the algorithm acquire the data at \
full speed ant the time horizon used for template recomputation. \nChose between: short, normal, and long. Default: short")
parser.add_argument("--time_weight", help="The time weight in the ddtw_ev algorithm, default: 1")
args = parser.parse_args()
files = os.listdir(data_beats_dir)
if args.file is not None:
if args.file == 'all':
FILES_SELECTED = files
else:
FILES_SELECTED = list(filter(lambda string: True if args.file in string else False, files))
else:
FILES_SELECTED = [files[0]]
if args.cores is not None:
used_cores = int(args.cores)
else:
used_cores = multiprocessing.cpu_count()//3
if args.beats is not None:
NUM_BEAT_ANALYZED = int(args.beats)
else:
NUM_BEAT_ANALYZED = 5000
if args.cluster_opt is not None:
CLUSTER_PERCENTAGE = int(args.cluster_opt)
else:
CLUSTER_PERCENTAGE = 3
if args.interpolation_type is not None:
INTERPOLATION_TYPE = args.interpolation_type
else:
INTERPOLATION_TYPE = "flat"
if args.parallelize_along is not None:
PARALLELIZE_ALONG = args.parallelize_along
else:
PARALLELIZE_ALONG = "levels"
if args.levels is not None:
LEVELS = LEVELS[:int(args.levels)]
else:
LEVELS = [3,4,5]
if args.use_differential_dtw:
USE_DIFFERENTIAL_DTW = True
else:
USE_DIFFERENTIAL_DTW = False
if args.time_weight is not None:
TIME_WEIGHT = int(args.time_weight)
else:
TIME_WEIGHT = 1
if args.acquisition_time_mode is not None:
if args.acquisition_time_mode == "short":
SEC_FOR_INITIAL_TEMPLATES = 3*60
LEN_DISTANCE_VECTOR = 60
LEN_DISTANCE_VECTOR_REF = 400
SEC_FOR_NEW_TEMPLATES = 40
TIME_MODE = 'short'
elif args.acquisition_time_mode == "long":
SEC_FOR_INITIAL_TEMPLATES = 8*60
LEN_DISTANCE_VECTOR = 110
LEN_DISTANCE_VECTOR_REF = 650
SEC_FOR_NEW_TEMPLATES = 3*60
TIME_MODE = 'long'
else:
SEC_FOR_INITIAL_TEMPLATES = 5*60
LEN_DISTANCE_VECTOR = 80
LEN_DISTANCE_VECTOR_REF = 500
SEC_FOR_NEW_TEMPLATES = 2*60
TIME_MODE = 'medium'
else:
SEC_FOR_INITIAL_TEMPLATES = 3*60
LEN_DISTANCE_VECTOR = 60
LEN_DISTANCE_VECTOR_REF = 400
SEC_FOR_NEW_TEMPLATES = 40
TIME_MODE = 'short'
for lvl in LEVELS:
jfc[lvl] = manager.dict({'time_between_templates':manager.list(),'number_of_templates':manager.list()})
print(f"Analyzing files: {FILES_SELECTED}")
print(f"Extracting data with {used_cores} cores...")
process(files = FILES_SELECTED, levels = LEVELS, parallelize_along = PARALLELIZE_ALONG, cores=used_cores)
seconds_stop = time.time()
local_time_stop = time.ctime(seconds_stop)
elapsed = seconds_stop - seconds_start
hours = elapsed//60//60
minutes = (elapsed - hours * 60 * 60) // 60
seconds = (elapsed - hours * 60 * 60 - minutes * 60) // 1
print("\n\n\n-----------------------------------------------------------------------------------------------------------------")
print(f"Finished at: {local_time_stop}, elapsed: {elapsed} seconds ({hours} hours, {minutes} minutes, {seconds} seconds)")
print("\n\n\n-----------------------------------------------------------------------------------------------------------------")
for lvl in jfc.keys():
print("\n\n\n-----------------------------------------------------------------------------------------------------------------")
print(f"Lvl: {lvl}")
print(f"jfc[{lvl}]['time_between_templates'] is: {list(jfc[lvl]['time_between_templates'])}")
print(f"jfc[{lvl}]['number_of_templates'] is: {list(jfc[lvl]['number_of_templates'])}")
print(f"Time between templates recomputation: {np.average(jfc[lvl]['time_between_templates'])} +- {np.std(jfc[lvl]['time_between_templates'])}")
print(f"Number of templates: {np.average(jfc[lvl]['number_of_templates'])} +- {np.std(jfc[lvl]['number_of_templates'])}")

Event Timeline