diff --git a/PULP/Makefile b/PULP/Makefile index 2c2ed91..603266c 100755 --- a/PULP/Makefile +++ b/PULP/Makefile @@ -1,16 +1,22 @@ PULP_APP = adaptive_rpeak_detection PULP_APP_FC_SRCS = main.c \ adaptive_Rpeak_detection/adaptiveRpeakDetection.c \ Morph_filt/morpho_filtering.c \ REWARD_R_peak_detection/relativeEnergy.c \ REWARD_R_peak_detection/peakDetection.c \ error_detection/error_detection.c \ - profiling/profile.c + profiling/profile.c \ + profiling/profile_cl.c + +PULP_APP_CL_OMP_SRCS = test_double_buffering.c + CORES ?= 1 -CTARGET ?= 0 +CTARGET ?= 1 + +stackSize ?= 2048 PULP_CFLAGS += -DTARGET=$(CTARGET) -DNUM_CORES=$(CORES) -O3 -g3 -w PULP_LDFLAGS = -lm #-lg #uncomment -lg for pulpissimo include $(PULP_SDK_HOME)/install/rules/pulp_rt.mk diff --git a/PULP/adaptive_Rpeak_detection/adaptiveRpeakDetection.c b/PULP/adaptive_Rpeak_detection/adaptiveRpeakDetection.c index 1532bf7..992cb48 100755 --- a/PULP/adaptive_Rpeak_detection/adaptiveRpeakDetection.c +++ b/PULP/adaptive_Rpeak_detection/adaptiveRpeakDetection.c @@ -1,259 +1,380 @@ +#include "rt/rt_api.h" #include "adaptiveRpeakDetection.h" #include "../defines.h" #include "../profiling/profile.h" +#include "../profiling/profile_cl.h" +#include "../profiling/defines.h" #include "../data/signal.h" #include "../Morph_filt/morpho_filtering.h" #include "../Morph_filt/defines_globals.h" #include "../error_detection/error_detection.h" +#include "../test_double_buffering.h" #define N_WINDOWS (int) (2*((ECG_VECTOR_SIZE-LONG_WINDOW)/DIM)+1)// Counting the worst case scenario when overlap is DIM -RT_L2_DATA int16_t ecg_buff[(LONG_WINDOW+DIM)*(NLEADS+1)]; +RT_L2_DATA int16_t ecg_L2buff[(LONG_WINDOW+DIM)*(NLEADS+1)]; RT_L2_DATA rt_perf_t perf[NUM_CORES]; #ifdef MODULE_MF RT_L2_DATA int32_t *argMF[4]; RT_L2_DATA int32_t buffSize_windowMF; #endif #ifdef MODULE_RELEN RT_L2_DATA int32_t *argRelEn[4]; RT_L2_DATA int32_t start_RelEn = 1; RT_L2_DATA int32_t buffSize_windowRelEn; #endif #ifdef MODULE_RPEAK_REWARD RT_L2_DATA int32_t *argRW_Rpeak[3]; RT_L2_DATA int32_t indicesRpeaks[H_B+1]; #endif #ifdef MODULE_ERROR_DETECTION RT_L2_DATA int32_t *argErrDet[5]; RT_L2_DATA int32_t lastRpeak = 0; RT_L2_DATA int32_t lastRR = 0; RT_L2_DATA int32_t error_RWindow = 0; +#endif +#ifdef MODULE_CLUSTERING +RT_L2_DATA int16_t ecg_L2buff_prev[DIM]; +RT_L2_DATA int32_t rL2BufferIndex; +RT_L1_DATA int32_t rL1BufferIndex = 0; +RT_L1_DATA int16_t ecg_L1buff[DIM*(NLEADS+1)]; +RT_L1_DATA int32_t end_main_loop; +RT_L2_DATA int32_t flag_error_RWindow = 0; +RT_L2_DATA int32_t* argCL[3]; +RT_L2_DATA rt_event_sched_t * psched = 0; +RT_L2_DATA int32_t done = 0; #endif RT_L2_DATA int32_t overlap; RT_L2_DATA int32_t rWindow; void clearRelEn() { clearAndResetRelEn(); resetPeakDetection(); } +// static void cluster_Rpeaks(int32_t *arg[]) +// { +// rt_team_fork(NUM_CORES, rpeaks, arg); +// } + +static void cluster_test_doublebuff(int32_t *arg[]) +{ + rt_team_fork(NUM_CORES, testDoubleBuff, arg); +} + +extern void end_of_call(void *arg) +{ + done = 1; +} + +static void fCore0_DmaTransfer_Windows(void *arg) +{ + rt_dma_copy_t dmaCp; + +#ifdef MODULE_ERROR_DETECTION + if(flag_error_RWindow == 0){ + // Copy data block previous window from L2 to shared L1 memory using the cluster DMA + rt_dma_memcpy((unsigned int)&ecg_L2buff_prev[0], (unsigned int)&ecg_L1buff[0], 2*DIM, RT_DMA_DIR_EXT2LOC, 0, &dmaCp); + + // Wait for dma to finish + rt_dma_wait(&dmaCp); + } +#endif + + // Copy data block current window from L2 to shared L1 memory using the cluster DMA + rt_dma_memcpy((unsigned int)&ecg_L2buff[rL2BufferIndex], (unsigned int)&ecg_L1buff[DIM], 2*DIM, RT_DMA_DIR_EXT2LOC, 0, &dmaCp); + + // Wait for dma to finish + rt_dma_wait(&dmaCp); +} + void adaptiveRpeakDetection(){ - + +#ifdef MODULE_CLUSTERING + rt_cluster_mount(MOUNT, 0, 0, NULL); +#endif + int32_t count_sample = 0; int32_t offset_window = 0; int32_t offset_ind = LONG_WINDOW/2+1; int32_t tot_overlap = 0; overlap = 0; #ifdef MODULE_MF int32_t flagMF = 0; int32_t i_lead = 0; buffSize_windowMF = LONG_WINDOW+DIM; - argMF[0] = (int32_t*) ecg_buff; + argMF[0] = (int32_t*) ecg_L2buff; argMF[1] = &flagMF; argMF[2] = &i_lead; argMF[3] = &buffSize_windowMF; init_filtering(); #endif #ifdef MODULE_RELEN buffSize_windowRelEn = LONG_WINDOW+DIM; - argRelEn[0] = (int32_t*) ecg_buff; - argRelEn[1] = (int32_t*) &ecg_buff[(LONG_WINDOW+DIM)*NLEADS]; + argRelEn[0] = (int32_t*) ecg_L2buff; + argRelEn[1] = (int32_t*) &ecg_L2buff[(LONG_WINDOW+DIM)*NLEADS]; argRelEn[2] = &start_RelEn; argRelEn[3] = &buffSize_windowRelEn; clearRelEn(); #endif #ifdef MODULE_RPEAK_REWARD int32_t rpeaks_counter = 0; - argRW_Rpeak[0] = (int32_t*) &ecg_buff[LONG_WINDOW+(LONG_WINDOW + DIM)*NLEADS]; + argRW_Rpeak[0] = (int32_t*) &ecg_L2buff[LONG_WINDOW+(LONG_WINDOW + DIM)*NLEADS]; argRW_Rpeak[1] = indicesRpeaks; argRW_Rpeak[2] = &offset_ind; #endif +#ifdef MODULE_CLUSTERING + rL2BufferIndex = 0; + + // Allocate event on the default scheduler + if (rt_event_alloc(NULL, 1)) return -1; + rt_event_t *event; + +#endif for(rWindow=0; rWindow= ECG_VECTOR_SIZE){ return; } if(rWindow > 0){ offset_window = LONG_WINDOW; } else{ offset_window = 0; } if (rWindow > 0) { for(int32_t i=0; i 0) start_RelEn = 0; - argRelEn[0] = (int32_t*) &ecg_buff[offset_window + overlap]; - argRelEn[1] = (int32_t*) &ecg_buff[offset_window + (LONG_WINDOW + DIM)*NLEADS+overlap]; + argRelEn[0] = (int32_t*) &ecg_L2buff[offset_window + overlap]; + argRelEn[1] = (int32_t*) &ecg_L2buff[offset_window + (LONG_WINDOW + DIM)*NLEADS+overlap]; buffSize_windowRelEn = LONG_WINDOW - offset_window + DIM-overlap; -#endif + #endif relEn_w(argRelEn); #ifdef HWPERF_MODULES profile_stop(perf); #endif #ifdef PRINT_RELEN for(int32_t sample = offset_window + (LONG_WINDOW + DIM)*NLEADS; sample< (LONG_WINDOW + DIM)*(NLEADS+1); sample++) { - printf("%d\n", ecg_buff[sample]); + printf("%d\n", ecg_L2buff[sample]); } - #endif + #endif #endif #ifdef MODULE_RPEAK_REWARD #ifdef HWPERF_MODULE_RPEAK_REWARD || HWPERF_MODULES profile_start(perf); #endif getPeaks_w(argRW_Rpeak); rpeaks_counter = 0; while(indicesRpeaks[rpeaks_counter]!=0) { rpeaks_counter++; } #ifdef HWPERF_MODULE_RPEAK_REWARD || HWPERF_MODULES profile_stop(perf); #endif #ifdef PRINT_RPEAKS for(int32_t indR=0; indR 0 && error_RWindow == 1){ + argCL[0] = (int32_t*) &ecg_L1buff[rL1BufferIndex]; + argCL[1] = &rL1BufferIndex; + argCL[2] = &end_main_loop; + rt_cluster_call(NULL, CID, cluster_test_doublebuff, argCL, NULL, 2048, 2048, NUM_CORES, rt_event_get(psched, end_of_call, (void *) CID)); + while(!done) + rt_event_execute(psched, 1); + done = 0; + } #endif - + #ifdef ONLY_FIRST_WINDOW //Only for debug return; #endif #ifdef OVERLAP_MF overlap = LONG_WINDOW + LONG_WINDOW/2 + 1; #endif + #ifdef OVERLAP_RELEN overlap = 0; #endif tot_overlap += overlap; offset_ind = offset_ind + DIM - tot_overlap; #ifdef MODULE_RPEAK_REWARD rpeaks_counter = 0; for(int32_t ix_rp = 0; ix_rp < H_B+1 ; ix_rp++) { indicesRpeaks[ix_rp] = 0; } #endif } +#ifdef MODULE_CLUSTERING + rt_cluster_mount(UNMOUNT, 0, 0, NULL); +#endif + } diff --git a/PULP/defines.h b/PULP/defines.h index cb44555..f99cf57 100755 --- a/PULP/defines.h +++ b/PULP/defines.h @@ -1,79 +1,86 @@ #ifndef DEFINES_H_ #define DEFINES_H_ //========= DEFINE PRINT OUTPUT ========// // #define PRINT_SIG_MF // #define PRINT_RELEN // #define PRINT_RPEAKS #define PRINT_ERROR_RPEAKS //========= DEFINE PRINT DEBUG =========// // #define PRINT_DEBUG // #define PRINT_SIG_INPUT_PEAKS // #define PRINT_THR // #define PRINT_RPEAKS_BEFORE_T_CHECK // #define PRINT_DEBUG_ERRDET //=========== Sampling frequency ========== // #define ECG_SAMPLING_FREQUENCY 250 //=========== Number of leads/channels ============// #define NLEADS 1 //=========== Buffer size for R peak detection ========== // //The minumum distance between two ECG peaks based on physiological limits: 200 miliseconds * sampling frequency #define BUFFER_LENGTH_SECONDS (float) 1.75 //2.048 // #define BUFFER_SIZE (int16_t) (BUFFER_LENGTH_SECONDS*ECG_SAMPLING_FREQUENCY) //=========== Overlap for Rel-En ========== // //long window delay length (in samples): sampling frequency times long window length in seconds //Long window length = 0.95 seconds #define LONG_WINDOW (uint16_t) (0.95*ECG_SAMPLING_FREQUENCY+1) //=========== R PEAK DETECTION PARAMETERS ===========// // #define NEGATIVE_PEAK //For ISSUL-EPFL dataset, comment this line. For QTDB used for publication in EMBC 2019 and ESWEEK 2020, uncomment. +//=========== CLUSTERING PARAMETERS ===========// +#define type_f float +#define type_i int +#define SIZE_PERCENTILE_ARR DIM*2//1000 + //======== DEFINE MODULES ==========// #define MODULE_MF #define MODULE_RELEN #define MODULE_RPEAK_REWARD #ifdef MODULE_RPEAK_REWARD #define MODULE_ERROR_DETECTION #endif +#define MODULE_CLUSTERING + //======== DEFINE WINDOW OVERLAP ==========// // Copy the MF or the RelEn signal in the overlapping window. The two defines are mutually exclusive (only one can be uncommented) // #define OVERLAP_MF // not yet validated for this implementation #define OVERLAP_RELEN //default and validated for this implementation //======== DEFINE WINDOWS (ONLY FOR DEBUG) ==========// // #define ONLY_FIRST_WINDOW #define N 1 #define H_B 30 #define DIM (int16_t) (BUFFER_SIZE * N) //((BUFFER_SIZE * N) + LONG_WINDOW) #if ECG_SAMPLING_FREQUENCY == 250 #define OFFSET_MF 150 #else #define OFFSET_MF 300 #endif //========= DEFINE PROFILING ================// // #define HWPERF_MODULE_RPEAK_REWARD //start profiling module RPEAK // #define HWPERF_MODULES //start profiling separate modules // #define HWPERF_FULL //start profiling full app (N.B. it will profile also some buffering) // #define ACTIVE //#define EXTACC //# of loads and stores in EXT memory (L2) //#define INTACC //# of loads and stores in INT memory (L1) //#define STALL //# number of core stalls //#define INSTRUCTION //# number of instructions //#define TCDM //# of conflicts in TCDM (L1 memory) between cores #define PULP_L1_DATA RT_L2_DATA #define PULP_L2_DATA RT_L2_DATA #define MUL #define SCALE 100//6//64 #define CGRA_OFF #endif // DEFINES_H_ diff --git a/PULP/profiling/defines.h b/PULP/profiling/defines.h new file mode 100755 index 0000000..f1de78c --- /dev/null +++ b/PULP/profiling/defines.h @@ -0,0 +1,28 @@ +#include +#include "rt/rt_api.h" +#include "rt/rt_omp.h" + +#define BI +void __attribute__ ((noinline)) shift16(); +//#define GPIO +/*GPIO definitions*/ +#define PAD_GPIO_NUMBER 9 +#define GPIO_NUMBER 9 + +/*Other definitions*/ +#define RESET_FLAG 0 +#define SET_FLAG 1 + + +//FOR PARALLEL EXECUTION +#define STACK_SIZE 1024 +#define MOUNT 1 +#define UNMOUNT 0 +#define CID 0 + + +#if TARGET == 1 +#define CL +#else +#define FC +#endif diff --git a/PULP/profiling/profile_cl.c b/PULP/profiling/profile_cl.c new file mode 100644 index 0000000..9d08aec --- /dev/null +++ b/PULP/profiling/profile_cl.c @@ -0,0 +1,74 @@ +#include "profile_cl.h" +#include "rt/rt_api.h" +#include "rt/rt_omp.h" +#include "../defines.h" + + +void profile_cl_start(rt_perf_t *perf){ + + int id = rt_core_id(); + if(rt_core_id()==0){ + printf("\nstart profile\n"); + } + rt_perf_init(&perf[id]); + +#ifdef ACTIVE + rt_perf_conf(&perf[id],(1<