Page MenuHomec4science

SequenceLayer.cpp
No OneTemporary

File Metadata

Created
Fri, May 10, 22:10

SequenceLayer.cpp

#include <SequenceLayer.hpp>
#include <stdexcept> // std::invalid_argument
#include <limits> // numeric_limits
#include <cmath> // log(), exp(), pow()
#include <vector>
#include <matrices.hpp>
SequenceLayer::SequenceLayer(const matrix2d_i& data,
size_t n_class,
size_t n_shift,
bool flip)
: DataLayer(data, n_class, n_shift, flip)
{ this->n_category = 4 ;
// initialise the empty model
this->model = matrix3d_d(this->n_class,
matrix2d_d(this->l_model,
vector_d(this->n_category, 0))) ;
}
SequenceLayer::SequenceLayer(const matrix2d_i& data,
const matrix3d_d& model,
bool flip)
:DataLayer(data, model,flip)
{}
SequenceLayer::SequenceLayer(const matrix2d_c& data,
const matrix3d_d& model,
bool flip)
{ // parameters
this->flip = flip ;
this->n_row = data.size() ;
this->n_col = data[0].size() ;
this->n_class = model.size() ;
this->l_model = model[0].size() ;
this->n_category = model[0][0].size() ;
this->n_flip = this->flip + 1 ;
this->n_shift = this->n_col - this->l_model + 1 ;
// check if model is not too long
if(this->n_col < this->l_model)
{ char msg[4096] ;
sprintf(msg,
"Error! model is longer than data : %zu / %zu",
this->l_model, this->n_col) ;
throw std::invalid_argument(msg) ;
}
// convert DNA char to DNA int code
this->data = matrix2d_i(this->n_row,
vector_i(this->n_col)) ;
for(size_t i=0; i<this->n_row; i++)
{ for(size_t j=0; j<this->n_col; j++)
{ switch (data[i][j])
{ case 'A':
this->data[i][j] = 0 ;
break ;
case 'C':
this->data[i][j] = 1 ;
break ;
case 'G':
this->data[i][j] = 2 ;
break ;
case 'T':
this->data[i][j] = 3 ;
break ;
default:
this->data[i][j] = 4 ;
break;
}
}
}
// model
this->model = model ;
}
SequenceLayer::~SequenceLayer()
{}
void SequenceLayer::seed_model_randomly()
{}
void SequenceLayer::seed_model_sampling()
{}
void SequenceLayer::seed_model_toy()
{}
void SequenceLayer::compute_loglikelihoods(matrix4d_d& loglikelihood,
vector_d& loglikelihood_max) const
{ // dimension checks
this->check_loglikelihood_dim(loglikelihood) ;
this->check_loglikelihood_max_dim(loglikelihood_max) ;
// compute log likelihood
for(size_t i=0; i<this->n_row; i++)
{
// set max to min possible value
loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;
for(size_t j=0; j<this->n_class; j++)
{ for(size_t s_fw=0, s_rev=this->n_shift-1;
s_fw<this->n_shift; s_fw++, s_rev--)
{ // slice is [from_fw,to)
// from_dat_fw to_dat_fw [from_dat_fw, to_dat_fw]
// fw |---------->>>----------|
// ----------------------------------> data
// rev |----------<<<----------| [from_dat_rev, to_dat_rev]
// to_dat_rev can be -1 -> int
// to_dat_rev from_dat_rev
// log likelihood
double ll_fw = 0. ;
double ll_rev = 0. ;
// --------------- forward ---------------
size_t from_dat_fw = s_fw ;
size_t to_dat_fw = from_dat_fw + this->l_model - 1 ;
// --------------- reverse ---------------
size_t from_dat_rev = this->n_col - 1 - s_fw ;
// size_t to_dat_rev = from_dat_rev - (this->l_model - 1) ;
for(size_t j_dat_fw=from_dat_fw,j_ref_fw=0, j_dat_rev=from_dat_rev;
j_dat_fw<to_dat_fw;
j_dat_fw++, j_ref_fw++, j_dat_rev--)
{
double ll ;
// --------------- forward ---------------
ll = log(this->model[j][j_ref_fw][this->data[i][j_dat_fw]]) ;
ll_fw += std::max(ll, SequenceLayer::p_min_log) ;
// --------------- reverse ---------------
if(this->flip)
{ ll = log(this->model[j][j_ref_fw][this->data[i][j_dat_rev]]) ;
ll_rev += std::max(ll, SequenceLayer::p_min_log) ;
}
}
loglikelihood[i][j][from_dat_fw][flip_states::FORWARD] = ll_fw ;
// keep track of the max per row
if(ll_fw > loglikelihood_max[i])
{ loglikelihood_max[i] = ll_fw ; }
if(this->flip)
{ loglikelihood[i][j][from_dat_fw][flip_states::REVERSE] = ll_rev ;
// keep track of the max per row
if(ll_rev > loglikelihood_max[i])
{ loglikelihood_max[i] = ll_rev ; }
}
}
}
}
}
void SequenceLayer::update_model(const matrix4d_d& posterior_prob)
{ // dimension checks
this->check_posterior_prob_dim(posterior_prob) ;
}

Event Timeline