Page MenuHomec4science

perf.c
No OneTemporary

File Metadata

Created
Tue, Jan 7, 03:46
#include "rt/rt_api.h"
#include "perf.h"
// TODO pss ALL the arguments down to each core of the cluster
void startPerfCL(perfSettings* settings){
settings->numCalled++;
rt_cluster_call(NULL, 0, startPerfCLMaster, settings, NULL, 0, 0, 0, NULL);
}
void startPerfCLMaster(void *arg){
rt_team_fork(0, startPerfCLEachCore, arg);
}
void startPerfCLEachCore(void *arg){
perfSettings meData = *((perfSettings*)arg);
int core = rt_core_id();
//printf("CORE %d resetting perf structure at: %p\n",core,&(meData.perf[core]));
rt_perf_init(&(meData.perf[core]));
rt_perf_conf(&(meData.perf[core]), 1<<meData.event);
// Reset HW counters now and start and stop counters so that we benchmark
// only around the printf
rt_perf_reset(&(meData.perf[core]));
rt_perf_start(&(meData.perf[core]));
}
void stopGetPerfCL(perfSettings* settings,uint8_t verbose, char*tag){
rt_cluster_call(NULL, 0, stopPerfCLMaster, settings, NULL, 0, 0, 0, NULL);
// uint8_t verbose = 1;
for(uint8_t i = 0;i<8;i++){
settings->returnedPerf[i][settings->eventPos] += rt_perf_get(&(settings->perf[i]),settings->event);
}
if(verbose){
char thisTag[64];
int i=0;
while(*tag) {
thisTag[i] = *tag;
tag++;
i++;
}
thisTag[i] = '\0';
for(uint8_t i = 0;i<8;i++){
printf("(%s --> CORE:%d) --> EVENT %d : %u\n",thisTag,i,settings->eventPos, rt_perf_get(&(settings->perf[i]),settings->event));
}
}
}
void stopPerfCLMaster(void *arg){
rt_team_fork(0, stopPerfCLEachCore, arg);
}
void stopPerfCLEachCore(void *arg){
perfSettings meData = *((perfSettings*)arg);
int core = rt_core_id();
rt_perf_stop(&(meData.perf[core]));
rt_perf_save(&(meData.perf[core]));
}
void startPerfFC(perfSettings* settings){
//printf("Resetting perf structure at: %p\n",settings.perf);
// It must be initiliazed at least once, this will set all values in the
// structure to zero.
settings->numCalled++;
rt_perf_init(settings->perf);
rt_perf_conf(settings->perf, 1<<settings->event);
// Reset HW counters now and start and stop counters so that we benchmark
// only around the printf
rt_perf_reset(settings->perf);
rt_perf_start(settings->perf);
}
void stopGetPerfFC(perfSettings *settings,uint8_t verbose, char*tag){
rt_perf_stop(settings->perf);
rt_perf_save(settings->perf);
uint32_t returnedVal = -1;
returnedVal = rt_perf_get(settings->perf, settings->event);
//returnedVal = rt_perf_read(settings.event);
if(verbose){
char thisTag[64];
int i=0;
while(*tag) {
thisTag[i] = *tag;
tag++;
i++;
}
thisTag[i] = '\0';
printf("(%s) --> EVENT %d : %lu\n",thisTag,settings->eventPos ,returnedVal);
}
settings->returnedPerf[0][settings->eventPos] += returnedVal;
}
void printPerfAvg(char*tag,uint32_t len, uint32_t callFC, uint32_t callCL, uint32_t* returnedPerfFC, uint8_t printCL,uint32_t (*returnedPerfCL)[NUM_EVENT_PERF]){
char thisTag[64];
int i=0;
while(*tag) {
thisTag[i] = *tag;
tag++;
i++;
}
thisTag[i] = '\0';
// CORRECTIONS FOR OVERHEADS (SPERIMENTAL VALUES)
returnedPerfFC[0] = (int)returnedPerfFC[0]-30*callFC>0?(int)returnedPerfFC[0]-30*callFC:0;
returnedPerfFC[1] = (int)returnedPerfFC[1]-26*callFC>0?(int)returnedPerfFC[1]-26*callFC:0;
returnedPerfFC[2] = (int)returnedPerfFC[2]-30*callFC>0?(int)returnedPerfFC[2]-30*callFC:0;
returnedPerfFC[6] = (int)returnedPerfFC[6]-2*callFC>0?(int)returnedPerfFC[6]-2*callFC:0;
returnedPerfFC[7] = (int)returnedPerfFC[7]-6*callFC>0?(int)returnedPerfFC[7]-6*callFC:0;
returnedPerfFC[8] = (int)returnedPerfFC[8]-2*callFC>0?(int)returnedPerfFC[8]-2*callFC:0;
returnedPerfFC[9] = (int)returnedPerfFC[9]-1*callFC>0?(int)returnedPerfFC[9]-1*callFC:0;
returnedPerfFC[10] = (int)returnedPerfFC[10]-1*callFC>0?(int)returnedPerfFC[10]-1*callFC:0;
returnedPerfFC[11] = (int)returnedPerfFC[11]-19*callFC>0?(int)returnedPerfFC[11]-19*callFC:0;
returnedPerfCL[0][0] = (int)returnedPerfCL[0][0]-743*callCL>0?(int)returnedPerfCL[0][0]-743*callCL:0;
returnedPerfCL[0][1] = (int)returnedPerfCL[0][1]-83*callCL>0?(int)returnedPerfCL[0][1]-83*callCL:0;
returnedPerfCL[0][2] = (int)returnedPerfCL[0][2]-173*callCL>0?(int)returnedPerfCL[0][2]-173*callCL:0;
returnedPerfCL[0][3] = (int)returnedPerfCL[0][3]-4*callCL>0?(int)returnedPerfCL[0][3]-4*callCL:0;
returnedPerfCL[0][6] = (int)returnedPerfCL[0][6]-20*callCL>0?(int)returnedPerfCL[0][6]-20*callCL:0;
returnedPerfCL[0][7] = (int)returnedPerfCL[0][7]-18*callCL>0?(int)returnedPerfCL[0][7]-18*callCL:0;
returnedPerfCL[0][8] = (int)returnedPerfCL[0][8]-4*callCL>0?(int)returnedPerfCL[0][8]-4*callCL:0;
returnedPerfCL[0][9] = (int)returnedPerfCL[0][9]-10*callCL>0?(int)returnedPerfCL[0][9]-10*callCL:0;
returnedPerfCL[0][10] = (int)returnedPerfCL[0][10]-2*callCL>0?(int)returnedPerfCL[0][10]-2*callCL:0;
returnedPerfCL[0][11] = (int)returnedPerfCL[0][11]-20*callCL>0?(int)returnedPerfCL[0][11]-20*callCL:0;
returnedPerfCL[0][12] = (int)returnedPerfCL[0][12]-6*callCL>0?(int)returnedPerfCL[0][13]-6*callCL:0;
returnedPerfCL[0][13] = (int)returnedPerfCL[0][13]-15*callCL>0?(int)returnedPerfCL[0][13]-15*callCL:0;
returnedPerfCL[0][14] = (int)returnedPerfCL[0][14]-51*callCL>0?(int)returnedPerfCL[0][14]-51*callCL:0;
returnedPerfCL[0][15] = (int)returnedPerfCL[0][15]-24*callCL>0?(int)returnedPerfCL[0][15]-24*callCL:0;
for(uint8_t i = 1; i<8;i++)
{
returnedPerfCL[i][0] = (int)returnedPerfCL[i][0]-743*callCL>0?(int)returnedPerfCL[i][0]-743*callCL:0;
returnedPerfCL[i][1] = (int)returnedPerfCL[i][1]-47*callCL>0?(int)returnedPerfCL[i][1]-47*callCL:0;
returnedPerfCL[i][2] = (int)returnedPerfCL[i][2]-94*callCL>0?(int)returnedPerfCL[i][2]-94*callCL:0;
returnedPerfCL[i][3] = (int)returnedPerfCL[i][3]-1*callCL>0?(int)returnedPerfCL[i][3]-1*callCL:0;
returnedPerfCL[i][6] = (int)returnedPerfCL[i][6]-12*callCL>0?(int)returnedPerfCL[i][6]-12*callCL:0;
returnedPerfCL[i][7] = (int)returnedPerfCL[i][7]-1*callCL>0?(int)returnedPerfCL[i][7]-1*callCL:0;
returnedPerfCL[i][8] = (int)returnedPerfCL[i][8]-4*callCL>0?(int)returnedPerfCL[i][8]-4*callCL:0;
returnedPerfCL[i][9] = (int)returnedPerfCL[i][9]-5*callCL>0?(int)returnedPerfCL[i][9]-5*callCL:0;
returnedPerfCL[i][10] = (int)returnedPerfCL[i][10]-2*callCL>0?(int)returnedPerfCL[i][10]-2*callCL:0;
returnedPerfCL[i][11] = (int)returnedPerfCL[i][11]-14*callCL>0?(int)returnedPerfCL[i][11]-14*callCL:0;
returnedPerfCL[i][12] = (int)returnedPerfCL[i][12]-8*callCL>0?(int)returnedPerfCL[i][13]-8*callCL:0;
returnedPerfCL[i][13] = (int)returnedPerfCL[i][13]-1*callCL>0?(int)returnedPerfCL[i][13]-1*callCL:0;
returnedPerfCL[i][14] = (int)returnedPerfCL[i][14]-34*callCL>0?(int)returnedPerfCL[i][14]-34*callCL:0;
returnedPerfCL[i][15] = (int)returnedPerfCL[i][15]-2*callCL>0?(int)returnedPerfCL[i][15]-2*callCL:0;
}
//PRINTS
printf("(%s): Total cycles: %lu\n",thisTag, returnedPerfFC[0]/len);
printf("(%s): Instructions: %lu\n",thisTag,returnedPerfFC[1]/len );
printf("(%s): Active cycles: %lu\n",thisTag,returnedPerfFC[2]/len );
printf("(%s): Load data hazards: %lu\n",thisTag,returnedPerfFC[3]/len);
printf("(%s): Jump stalls: %lu\n",thisTag,returnedPerfFC[4]/len);
printf("(%s): Instruction cache misses: %lu\n",thisTag,returnedPerfFC[5]/len);
printf("(%s): Load accesses: %lu\n",thisTag,returnedPerfFC[6]/len );
printf("(%s): Store accesses: %lu\n",thisTag,returnedPerfFC[7]/len );
printf("(%s): Jumps: %lu\n",thisTag,returnedPerfFC[8]/len );
printf("(%s): Branches: %lu\n",thisTag,returnedPerfFC[9]/len );
printf("(%s): Branches taken: %lu\n",thisTag,returnedPerfFC[10]/len);
printf("(%s): Compressed instructions: %lu\n",thisTag,returnedPerfFC[11]/len );
printf("(%s): External load accesses: %lu\n",thisTag,returnedPerfFC[12]/len );
printf("(%s): External store accesses: %lu\n",thisTag,returnedPerfFC[13]/len );
printf("(%s): External load stall cycles: %lu\n",thisTag,returnedPerfFC[14]/len );
printf("(%s): External store stall cycles: %lu\n",thisTag,returnedPerfFC[15]/len );
printf("(%s): TCDM contention cycles: %lu\n",thisTag,returnedPerfFC[16]/len );
if(printCL){
for(uint8_t i = 0; i<8;i++)
{
printf("---------------------------------- CORE %d ----------------------------------\n",i);
printf("(%s, core: %d): Total cycles: %lu\n",thisTag, i ,returnedPerfCL[i][0]/len);
printf("(%s, core: %d): Instructions: %lu\n",thisTag,i ,returnedPerfCL[i][1]/len );
printf("(%s, core: %d): Active cycles: %lu\n",thisTag,i ,returnedPerfCL[i][2]/len );
printf("(%s, core: %d): Load data hazards: %lu\n",thisTag,i ,returnedPerfCL[i][3]/len);
printf("(%s, core: %d): Jump stalls: %lu\n",thisTag,i ,returnedPerfCL[i][4]/len);
printf("(%s, core: %d): Instruction cache misses: %lu\n",thisTag,i ,returnedPerfCL[i][5]/len);
printf("(%s, core: %d): Load accesses: %lu\n",thisTag,i ,returnedPerfCL[i][6]/len );
printf("(%s, core: %d): Store accesses: %lu\n",thisTag,i ,returnedPerfCL[i][7]/len );
printf("(%s, core: %d): Jumps: %lu\n",thisTag,i ,returnedPerfCL[i][8]/len );
printf("(%s, core: %d): Branches: %lu\n",thisTag,i ,returnedPerfCL[i][9]/len );
printf("(%s, core: %d): Branches taken: %lu\n",thisTag,i ,returnedPerfCL[i][10]/len);
printf("(%s, core: %d): Compressed instructions: %lu\n",thisTag,i ,returnedPerfCL[i][11]/len );
printf("(%s, core: %d): External load accesses: %lu\n",thisTag,i ,returnedPerfCL[i][12]/len );
printf("(%s, core: %d): External store accesses: %lu\n",thisTag,i ,returnedPerfCL[i][13]/len );
printf("(%s, core: %d): External load stall cycles: %lu\n",thisTag,i ,returnedPerfCL[i][14]/len );
printf("(%s, core: %d): External store stall cycles: %lu\n",thisTag,i ,returnedPerfCL[i][15]/len );
printf("(%s, core: %d): TCDM contention cycles: %lu\n",thisTag,i ,returnedPerfCL[i][16]/len );
}
}
}
void append(char *str1,char *str2,char *outTag){
int i=0;
while(*str1) {
outTag[i] = *str1;
str1++;
i++;
}
while(*str2) {
outTag[i] = *str2;
str2++;
i++;
}
outTag[i] = '\0';
return;
}
float evaluateRes(int32_t found[],int16_t sizef,int32_t ground[],int16_t sizeg){
uint8_t ver = 0;
int th = (int)(0.15*360);
int ok = 0;
float TP=0,FP=0;
float sens,spec,fScore;
float totalnumG = 0,totalnumF = 0;
int minStop = 1;
//printf("Size ground truth: %d\n",sizeg);
for (int l=0;l<sizeg;l++){
if (ground[l]>= 360*60*5 && ground[l] <= 650000-360*minStop) {
totalnumG++;
}
}
for(int i =0;i<sizef;i++){
int samp = found[i];
if (samp>= 360*60*5 && samp <= 650000-360*minStop) {
totalnumF++;
for (int j = 0; j < sizeg; j++) {
int delta = samp - ground[j];
if(delta<0)
delta*=-1;
if (delta < th) {
//if (samp<200000)
// printf("sample: %d--> original: %ld (delta: %d)\n",samp,ground[j],delta);
ok = 1;
break;
}
}
if (ok) {
TP++;
ok = 0;
}
else if(ver)
printf("point: %d did not found any corresponding point\n",samp);
}
}
for(int i =0;i<sizeg;i++){
int samp = ground[i];
if (samp>= 360*60*5 && samp <= 650000-360*minStop) {
for (int j = 0; j < sizef; j++) {
int delta = samp - found[j];
if(delta<0)
delta*=-1;
if (delta < th) {
//if (samp<200000)
// printf("sample: %d--> original: %ld (delta: %d)\n",samp,ground[j],delta);
ok = 1;
break;
}
}
if (ok) {
ok = 0;
}
else if(ver){
printf("original: %d did not found any corresponding point\n",samp);
}
}
}
sens = TP/totalnumG;
spec = TP/totalnumF;
fScore = (2*sens*spec)/(sens+spec);
printf("True Positive: %d \n",(int)(TP));
printf("Peak found: %d vs Original: %d\n",(int)(totalnumF),(int)(totalnumG));
printf("Sensitivity (x 10'000): %d\nSpecificity (x 10'000): %d\n",(int)(sens*10000),(int)(spec*10000));
return fScore;
}
void printFloat(float f, int prec){
int dec = 0;
int integ = 0;
integ = (int)f;
for (int i = 0; i< prec; i++){
f *= 10;
integ *= 10;
}
dec = (int)f-integ;
printf("%d.%d",integ,dec);
}

Event Timeline