Page MenuHomec4science

gqrsCL.c
No OneTemporary

File Metadata

Created
Tue, Jan 7, 03:25

gqrsCL.c

/*
* This code resides in a separate file to be able to compile FC and cluster codes
* separately in cae FC and cluster cores have different ISAs
*/
#include "rt/rt_api.h"
#include "shared.h"
#include "gqrsCL.h"
// This is entered by all cluster cores, some computation can then be done here
static void pe_entry(void *arg)
{
int coreID = rt_core_id();
uint16_t startIdx = TAIL_HEAD_TO_PASS+NUM_PT_CLUSTER*coreID;
uint16_t stopIdx = startIdx+NUM_PT_CLUSTER;
uint16_t outIdx = coreID*NUM_PT_CLUSTER;
for(uint16_t i=startIdx;i<stopIdx;i++){
qf(i,startIdx);
//printf("Core %d has index: %d\n",coreID,i);
}
return;
}
// This is entered by all cluster cores, some computation can then be done here
static void parallelInteg(void *arg)
{
int32_t mScaled;
int32_t qScaled;
int32_t integ;
uint16_t startIdx;
uint16_t stopIdx;
int32_t s = 0;
int coreID = rt_core_id();
//HEAD
if(coreID<4){
startIdx = coreID*TAIL_HEAD_TO_PASS/4+1;
stopIdx = startIdx+TAIL_HEAD_TO_PASS/4;
s = 0;
for(uint16_t i=startIdx;i<stopIdx;i++){
mScaled = ((batch.v[i]-batch.v[i-1])<<SLIDE)/(batch.t[i]-batch.t[i-1]);
qScaled = (batch.v[i-1]<<SLIDE) - batch.t[i-1]*mScaled;
//integ = (mScaled/2*(batch.t[i]*(batch.t[i]+1)-batch.t[i-1]*(batch.t[i-1]+1))+qScaled*(batch.t[i]-batch.t[i-1]))>>SLIDE;
integ = (mScaled/2*(batch.t[i]*(batch.t[i]+1)-batch.t[i-1]*(batch.t[i-1]+1))+qScaled*(batch.t[i]-batch.t[i-1]));
s+=integ;
integDataCL.i[i-1] = s;
integDataCL.mScaled[i-1] = mScaled;
integDataCL.qScaled[i-1] = qScaled;
integDataCL.t[i-1] = batch.t[i];
}
rt_team_barrier();
s = 0;
for (uint16_t i = 1;i<=coreID;i++){
s+=integDataCL.i[i*TAIL_HEAD_TO_PASS/4-1];
}
for(uint16_t i=startIdx;i<stopIdx;i++){integDataCL.i[i-1] += s;}
}
//BODY
startIdx = TAIL_HEAD_TO_PASS+NUM_PT_CLUSTER*coreID+1;
stopIdx = startIdx+NUM_PT_CLUSTER;
s = 0;
for(uint16_t i=startIdx;i<stopIdx;i++){
mScaled = ((batch.v[i]-batch.v[i-1])<<SLIDE)/(batch.t[i]-batch.t[i-1]);
qScaled = (batch.v[i-1]<<SLIDE) - batch.t[i-1]*mScaled;
//integ = (mScaled/2*(batch.t[i]*(batch.t[i]+1)-batch.t[i-1]*(batch.t[i-1]+1))+qScaled*(batch.t[i]-batch.t[i-1]))>>SLIDE;
integ = (mScaled/2*(batch.t[i]*(batch.t[i]+1)-batch.t[i-1]*(batch.t[i-1]+1))+qScaled*(batch.t[i]-batch.t[i-1]));
s+=integ;
integDataCL.i[i-1] = s;
integDataCL.mScaled[i-1] = mScaled;
integDataCL.qScaled[i-1] = qScaled;
integDataCL.t[i-1] = batch.t[i];
}
// TAIL
if(coreID>=4){
startIdx = TAIL_HEAD_TO_PASS+BODYLEN+(coreID-4)*TAIL_HEAD_TO_PASS/4+1;
stopIdx = startIdx+TAIL_HEAD_TO_PASS/4;
s = 0;
for(uint16_t i=startIdx;i<stopIdx;i++){
mScaled = ((batch.v[i]-batch.v[i-1])<<SLIDE)/(batch.t[i]-batch.t[i-1]);
qScaled = (batch.v[i-1]<<SLIDE) - batch.t[i-1]*mScaled;
//integ = (mScaled/2*(batch.t[i]*(batch.t[i]+1)-batch.t[i-1]*(batch.t[i-1]+1))+qScaled*(batch.t[i]-batch.t[i-1]))>>SLIDE;
integ = (mScaled/2*(batch.t[i]*(batch.t[i]+1)-batch.t[i-1]*(batch.t[i-1]+1))+qScaled*(batch.t[i]-batch.t[i-1]));
s+=integ;
integDataCL.i[i-1] = s;
integDataCL.mScaled[i-1] = mScaled;
integDataCL.qScaled[i-1] = qScaled;
integDataCL.t[i-1] = batch.t[i];
}
rt_team_barrier();
s = 0;
for (uint16_t i = 5;i<=coreID;i++){
s+=integDataCL.i[TAIL_HEAD_TO_PASS+BODYLEN+i*TAIL_HEAD_TO_PASS/4-1];
}
for(uint16_t i=startIdx;i<stopIdx;i++){integDataCL.i[i-1] += s;}
}
//rt_team_barrier();
// for (uint16_t i = 1;i<=coreID;i++){
// s+=integDataCL.i[TAIL_HEAD_TO_PASS+NUM_PT_CLUSTER*i-1];
// }
// for(uint16_t i=startIdx;i<stopIdx;i++){integDataCL.i[i-1] += s+integDataCL.i[TAIL_HEAD_TO_PASS-1];}
rt_team_barrier();
pe_entry(NULL);
return;
}
// This function is entered on cluster side when the fabric controller
// is calling it remotly using rt_cluster_call.
// Only core 0 is entering it and can then use rt_team_fork to fork
// the execution on multiple cores.
void cluster_entry(void *arg)
{
//printf("(%d, %d) Entered cluster\n", rt_cluster_id(), rt_core_id());
dmaAdrs *adrsPtr = arg;
dmaAdrs adrs = *adrsPtr;
//DMA
rt_dma_copy_t dma_cpy,dma_cpy2;
//let's use a simple one buffer transfer
//printf("size of the structure is: %d\n",sizeof(dataInteg));
rt_dma_memcpy((unsigned int )adrs.startAdr,(unsigned int )(&batch),sizeof(dataBatch),RT_DMA_DIR_EXT2LOC,0,&dma_cpy);
dtCL = 6;
v1normCL = dtCL * dtCL * 64;
//printf("Started DMA cpy(in)\n");
rt_dma_wait(&dma_cpy);
//printf("Finished DMA cpy(in)\n");
//integrateHead();
rt_team_fork(0, parallelInteg, NULL);
//integrateTail();
// for (int l = 0; l< MAXLEN;l++){printf("(J: %d) time: %ld, integ: %ld\n",l, integDataCL.t[l],integDataCL.i[l]);}
// printf("---------------------------------------------------------------------------\n");
// This will make all available cores entering pe_entry, including
// core 0 which is calling this function.
// Core 0 will return from rt_team_fork only when all cores have returned
// from pe_entry (there is an implicit barrier).
//printf("Forking\n");
//rt_team_fork(0, pe_entry, NULL);
//printf("Finish fork\n");
rt_dma_memcpy((unsigned int )adrs.retAdr,(unsigned int )vRes,(BODYLEN+1)*sizeof(uint32_t),RT_DMA_DIR_LOC2EXT,0,&dma_cpy2);
//printf("Started DMA cpy(out)\n");
rt_dma_wait(&dma_cpy2);
//printf("Finished DMA cpy(out)\n");
return;
}
void reSampleInteg(dataInteg *self, uint16_t index, uint16_t startIndex,uint16_t dt,int32_t *reSampled)
{
int32_t t = self->t[index];
int32_t newInteg = 0;
int32_t t0 = 0;
int32_t mScaled = 0;
int32_t qScaled = 0;
int32_t currentT;
int16_t idxFound = -1;
uint8_t present = 0;
//sprintf(num,"%ld",t);
//startPerf(genericPerformance, EVENT_CYCLES);
int16_t j=index ;
rt_team_critical_enter();
for(int16_t i = 1;i<=4;i++)
{
currentT = t+dt*i>=0?t+dt*i:0;
for (; j<MAXLEN; j++)
{
//l++;
// if(rt_core_id()==5){
// printf("fw, core: %d, central idx: %d,j: %d, Sub-Index: %d, associated T: %d, search Times: %d --- %d (found?: %d)\n",rt_core_id(),index,j,i,currentT,self->t[j], self->t[j+1],self->t[j]<currentT && self->t[j+1]>=currentT);
// }
if (self->t[j]==currentT)
{
idxFound = j;
present = 1;
break;
}
else if (self->t[j]<currentT && self->t[j+1]>=currentT)
{
idxFound = j;
break;
}
}
if (present)
{
newInteg = self->i[idxFound];
}
else if (idxFound != -1)
{
newInteg = self->i[idxFound];
t0 = self->t[idxFound];
mScaled = self->mScaled[idxFound+1];
qScaled = self->qScaled[idxFound+1];
//newInteg += (mScaled/2*(currentT*(currentT+1)-t0*(t0+1)) + qScaled*(currentT-t0))>>SLIDE;
newInteg += (mScaled/2*(currentT*(currentT+1)-t0*(t0+1)) + qScaled*(currentT-t0));
}
else{
//printf("\t\tPT. to interpolate not found (fw, core: %d, central idx: %d, index: %d, associated T: %d) !!\n",rt_core_id(),index,i,currentT);
newInteg = self->i[0];
//printf("POINT NOT FOUND ! 1\n");
}
present = 0;
if(idxFound>=startIndex)
newInteg+=self->i[startIndex-1];
if(idxFound>=startIndex+NUM_PT_CLUSTER)
newInteg+=self->i[startIndex+NUM_PT_CLUSTER-1];
reSampled[i+4] = newInteg;
}
//printf("core: %d, idx: %d\n",rt_core_id(),index);
j=index ;
for(int16_t i = -1;i>=-4;i--)
{
currentT = t+dt*i>=0?t+dt*i:0;
for (; j>0; j--)
{
// if(rt_core_id()==5){
// printf("bw, core: %d, central idx: %d,j: %d, Sub-Index: %d, associated T: %d, search Times: %d --- %d (found?: %d)\n",rt_core_id(),index,j,i,currentT,self->t[j], self->t[j+1],self->t[j]<currentT && self->t[j+1]>=currentT);
// }
if (self->t[j]==currentT)
{
idxFound = j;
present = 1;
break;
}
else if (self->t[j]<currentT && self->t[j+1]>=currentT)
{
idxFound = j;
break;
}
}
if (present)
{
newInteg = self->i[idxFound];
}
else if (idxFound != -1)
{
newInteg = self->i[idxFound];
t0 = self->t[idxFound];
mScaled = self->mScaled[idxFound+1];
qScaled = self->qScaled[idxFound+1];
//newInteg += (mScaled/2*(currentT*(currentT+1)-t0*(t0+1)) + qScaled*(currentT-t0))>>SLIDE;
newInteg += (mScaled/2*(currentT*(currentT+1)-t0*(t0+1)) + qScaled*(currentT-t0));
}
else{
//printf("\t\tPT. to interpolate not found (bw, core: %d, central idx: %d, index: %d, associated T: %d) !!\n",rt_core_id(),index,i,currentT);
newInteg = self->i[0];
}
present = 0;
if(idxFound>=startIndex)
newInteg+=self->i[startIndex-1];
if(idxFound>=startIndex+NUM_PT_CLUSTER)
newInteg+=self->i[startIndex+NUM_PT_CLUSTER-1];
reSampled[4+i] = newInteg;
}
reSampled[4] = self->i[index];
rt_team_critical_exit();
// stopGetPerf(genericPerformance,EVENTREQ_CYCLES,verboseNow,append(tagDebug,num));
}
void qf(uint16_t index,uint16_t startIdx) /* evaluate the QRS detector filter for the next sample */
{
int32_t dv, dv1, v0;
int32_t dataResampled[9];
reSampleInteg(&integDataCL,index,startIdx,dtCL,dataResampled);
dv1 = (dataResampled[5] - dataResampled[3]);//dt1
dv = dv1 << 1;
dv -= (dataResampled[6]- dataResampled[2]);//dt2
dv = dv << 1;
dv += dv1;
dv -= (dataResampled[7]- dataResampled[1]);//dt3
dv = dv << 1;
dv += (dataResampled[8]- dataResampled[0]);//dt4
v0 = (int32_t)(dv / v1normCL)>>SLIDE;
//if(coreID==3){printf("Core %d --> Put in idx: %d\n",coreID,index-TAIL_HEAD_TO_PASS);}
vRes[index-TAIL_HEAD_TO_PASS] = v0 * v0;
}

Event Timeline