diff --git a/pre_proc/MITAnnotation.py b/helper_scripts/MITAnnotation.py similarity index 100% rename from pre_proc/MITAnnotation.py rename to helper_scripts/MITAnnotation.py diff --git a/pre_proc/csvManager.py b/helper_scripts/csvManager.py similarity index 100% rename from pre_proc/csvManager.py rename to helper_scripts/csvManager.py diff --git a/pre_proc/ECG_lvlCrossing.py b/pre_proc/ECG_lvlCrossing.py index fe1ac27..8c2d5c4 100644 --- a/pre_proc/ECG_lvlCrossing.py +++ b/pre_proc/ECG_lvlCrossing.py @@ -1,169 +1,180 @@ """ Created on Fri Feb 22 09:07:30 2019 Simple script to perform a non-uniform subsampling of an ECG record in the MIT-BIH format, using the Wall-Danielsson algorithm. As output, it produces a 2-column csv file with the sample times and values. Dependencies: - numpy - tqdm (https://pypi.org/project/tqdm/) - wfdb-python (https://pypi.org/project/wfdb/) @authors: T. Teijeiro S.Zanoli """ +import multiprocessing +import os +import shutil +import sys +from multiprocessing import Pool, Process + import numpy as np import pandas as pd from tqdm import trange -import os -from multiprocessing import Pool, Process -import multiprocessing -import shutil -from csvManager import csvManager + +scripts = '../helper_scripts' +if scripts not in sys.path: + sys.path.insert(0,scripts) from time import time + import matplotlib.pyplot as plt +from csvManager import csvManager data_folder = "../data/extracted_data" out = "../data/level_crossing" log_dir = "../data/logs/" pos = False bits_data = 11 bits = range(1,bits_data+1,1) hister = 5 def ADC(values, original_bits = 12, all_pos = True, nBits = 5, hist = 0): """ To write """ delta = 2**original_bits dV = (delta)/(2**nBits) hist = hist/100*dV if all_pos: #max_val = delta min_val = 0 else: #max_val = delta//2 min_val = -delta//2 lowTh = min_val highTh = lowTh + dV index = [] for val,time in zip(values,range(len(values))): if val > highTh + hist or val < lowTh - hist: index.append(time) lowTh = min_val+((val-min_val)//dV)*dV #Delta from the bottom: (val-min_val)//dV*dV then compute the actual level summin min_val highTh = lowTh + dV return index def sample(file): manager = csvManager() _,vals = manager.read(file) #print("------------------------- Sub-sampling file: {} -------------------------".format(file)) for b in bits: #print("\nFile: {}, subsampling with {} bits".format(file,b)) idxs = ADC(vals, original_bits = bits_data, all_pos = pos, nBits = b, hist = hister) vals_samp = (np.array(vals)[idxs]).tolist() compression = 1-len(vals_samp)/len(vals) #---------- SAVE BACK ---------- out_dir = os.path.join(out,str(b)+"bits") file_name = os.path.join(out_dir,os.path.basename(file)) manager.write(idxs,vals_samp,file_name) log(file_name,b,compression) def log (source_file, bits, compression): name = os.path.basename(source_file).split('.')[0] file_name = os.path.join(log_dir,str(bits)+"Bits") str_to_write = name + ": "+str(compression)+"\n" with open(file_name,"a") as f: f.write(str_to_write) def log_resume(): resume = {} if not os.path.isdir(log_dir): return for l in os.listdir(log_dir): if "resume" in l: continue bits = int(l[0:l.find("Bits")]) resume[bits] = {"avg":None,"std":None, "num_files":None} compressions = [] text = "" num_file = 0 with open(os.path.join(log_dir,l)) as f: text = f.readlines() for line in text: num_file += 1 compr = float(line[line.find(": ")+len(": "):]) compressions.append(compr) resume[bits]["avg"] = np.average(compressions) resume[bits]["std"] = np.std(compressions) resume[bits]["num_files"] = num_file with open(os.path.join(log_dir,"resume.txt"),"w") as f: keys = sorted(list(resume.keys())) for k in keys: line = "Bits: {}\t\tAvg: {}\tStD: {} (Total number of files:{})\n".format(str(k),resume[k]["avg"],resume[k]["std"],resume[k]["num_files"]) f.write(line) def process(multi = False,cores = None): names = [os.path.join(data_folder,name) for name in os.listdir(data_folder)] if os.path.isdir(log_dir): shutil.rmtree(log_dir) os.mkdir(log_dir) if os.path.isdir(out): shutil.rmtree(out) os.mkdir(out) for b in bits: out_dir = os.path.join(out,str(b)+"bits") os.mkdir(out_dir) #sample(names[0]) if multi: used_cores = cores with Pool(used_cores) as pool: pool.map(sample, names) else: for arg in names: sample(arg) - #log_resume() + log_resume() def test_performances(): times = [] print("~"*85) print("Analyzing multicore performances in sampling signals with level crossing algorithm and saving in binary.\n" "Usefull? No, interesting, yes.") print("-"*85) print("-"*85) print("\nChecking single core (no library used)\n") start = time() process(multi=False) stop = time() print (f" Elapsed time for single core: {int((stop - start)//60)}:{int(((stop-start)-(stop - start)//60*60)//1)}\n\n") print("Using multicores...") for core_num in range(1,multiprocessing.cpu_count()): print("-"*85) print(f"Using {core_num} cores...") start = time() process(multi=True, cores=core_num) stop = time() times.append(start-stop) print (f" Elapsed time using {core_num} cores: {int((stop - start)//60)}:{int(((stop-start)-(stop - start)//60*60)//1)}\n\n") plt.figure() plt.plot(times) plt.savefig("../data/logs/perf.png") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--test", help="test multicore capabilities", action="store_true") + parser.add_argument("--cores", help="Force used number of cores (default, half of the available ones") args = parser.parse_args() if args.test: print("TEST MULTICORE...") test_performances() else: - used_cores = multiprocessing.cpu_count()-1 + if args.cores is not None: + used_cores = int(args.cores) + else: + used_cores = multiprocessing.cpu_count()//2 print(f"Execution level-crossing EB_sampling with {used_cores} cores...") - process(multi=True, cores=used_cores) \ No newline at end of file + process(multi=True, cores=used_cores) diff --git a/pre_proc/eval_mitdb.sh b/pre_proc/eval_mitdb.sh deleted file mode 100755 index 9cbb28a..0000000 --- a/pre_proc/eval_mitdb.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash - -# Bash script to perform a full validation of a QRS detection method on the MIT-BIH Arrhythmia database. -# Mind the paths when running! The simplest approach is to have the db records, the annotations and this script in the same folder. -# If you want to test different annotators, just change the extension in the `bxb` command. - -PARAMS="" -ANNOTORIG="atr" -ANNOTOBT="out" -OUTFILE="validation.txt" -FOLDER="../data/dataRaw/" -OUTFOLDER="../data/results/" - -while (( "$#" )); do - case "$1" in - -a|--in-annotations) - ANNOTORIG=$2 - shift 2 - ;; - -o|--out-annotations) - ANNOTOBT=$2 - shift 2 - ;; - -f|--file) - OUTFILE=$2 - shift 2 - ;; - -d|--dir) - OUTFOLDER=$2 - shift 2 - ;; - -s|--source) - FOLDER=$2 - shift 2 - ;; - --) # end argument parsing - shift - break - ;; - -*|--*=) # unsupported flags - echo "Error: Unsupported flag $1" >&2 - exit 1 - ;; - *) # preserve positional arguments - PARAMS="$PARAMS $1" - shift - ;; - esac -done - -# set positional arguments in their proper place -eval set -- "$PARAMS" - - -RECORDS="100 101 102 103 104 105 106 107 108 109 111 112 113 114 115 116 117 118 119 121 122 123 124 200 201 202 203 205 207 208 209 210 212 213 214 215 217 219 220 221 222 223 228 230 231 232 233 234" - -cd "$FOLDER" -rm -f file1 -rm -f file2 -for r in $RECORDS; do - echo $r - bxb -r $r -a "$ANNOTORIG" "$ANNOTOBT" -l file1 file2 -t 648000 2> /dev/null -done - -mkdir -p "$OUTFOLDER" -OUTPATH="$OUTFOLDER$OUTFILE" -sumstats file1 > "$OUTPATH" -rm -f file1 -rm -f file2 diff --git a/pre_proc/extract_data.py b/pre_proc/extract_data.py index 916936c..79fcc48 100644 --- a/pre_proc/extract_data.py +++ b/pre_proc/extract_data.py @@ -1,66 +1,70 @@ -import numpy as np +import os +import sys + import matplotlib.pyplot as plt +import numpy as np import pandas as pd -import sys -import os -scripts = './' + +scripts = '../helper_scripts' if scripts not in sys.path: sys.path.insert(0,scripts) +import multiprocessing +from multiprocessing import Pool + import MITAnnotation as MITA import wfdb -from multiprocessing import Pool -import multiprocessing from csvManager import csvManager FREE_CORES = 0 data_folder = "../data/dataRaw" dest_data = "../data/extracted_data" dest_annotation = "../data/extracted_annotation/" userChannel = "ECG1" def extract_data(args): file_name = args[0] last_annot = args[1] print("working on file: {}".format(file_name)) single_name = os.path.basename(file_name) #Record reading rec = wfdb.rdrecord(file_name, channel_names=[userChannel], physical=False) #Input signal as a plain array v = (rec.d_signal.reshape((1, -1))[0]).tolist()[0:last_annot+10] t = list(range(len(v)))[0:last_annot+10] manager = csvManager() manager.write(t,v,os.path.join(dest_data,single_name+".bin")) def extract_annot(file_name): single_name = os.path.basename(file_name) file_source = file_name+".atr" file_dest = os.path.join(dest_annotation,single_name+".annot") times= [x.time for x in MITA.read_annotations(file_source) if MITA.is_qrs_annotation(x)] df = pd.DataFrame(times) df.to_csv(file_dest,index= False) print("Extracted annotation: {}".format(file_name)) return times[-1] + if __name__ == "__main__": # ------------ INIT ------------ if not os.path.isdir(dest_data): os.mkdir(dest_data) if not os.path.isdir(dest_annotation): os.mkdir(dest_annotation) # ------------ Extract DATA & ANNOTATIONS ------------ #find files: files = [] for x in os.listdir(data_folder): thisFile = os.path.join(data_folder,x) thisFileNoExt = os.path.splitext(thisFile)[0] if os.path.isfile(thisFile) and os.path.exists(thisFileNoExt+".hea"): files.append(thisFileNoExt) listOfFiles = list(set(files)) with Pool(multiprocessing.cpu_count()-FREE_CORES) as pool: last_annot = pool.map(extract_annot, listOfFiles) pool.map(extract_data, zip(listOfFiles,last_annot)) \ No newline at end of file