Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F121991142
ReadLayer.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jul 15, 05:38
Size
10 KB
Mime Type
text/x-c
Expires
Thu, Jul 17, 05:38 (2 d)
Engine
blob
Format
Raw Data
Handle
27421770
Attached To
R8820 scATAC-seq
ReadLayer.cpp
View Options
#include <ReadLayer.hpp>
#include <stdexcept> // std::invalid_argument
#include <limits> // numeric_limits
#include <cmath> // log(), exp(), pow()
#include <vector>
#include <Statistics.hpp> // beta_pmf(), poisson_pmf()
#include <Random.hpp> // rand_real_uniform(), rand_int_uniform()
#include <matrices.hpp>
ReadLayer::ReadLayer(const matrix2d_i& data,
size_t n_class,
size_t n_shift,
bool flip)
: DataLayer(data, n_class, n_shift, flip),
window_means(n_row,
vector_d(n_shift, 0.))
{ this->n_category = 1 ;
// initialise the empty model
this->model = matrix3d_d(this->n_class,
matrix2d_d(this->l_model,
vector_d(this->n_category, 0))) ;
// compute window means
this->compute_window_means() ;
}
ReadLayer::ReadLayer(const matrix2d_i& data,
const matrix3d_d& model,
bool flip)
: DataLayer(data, model, flip),
window_means(n_row,
vector_d(n_shift, 0.))
{ // check that the model only has one category
if(this->n_category > 1)
{ char msg[4096] ;
sprintf(msg,
"Error! model is expected to have length 1 on "
"3rd dimension, not %zu",
this->n_category) ;
throw std::invalid_argument(msg) ;
}
// compute window means
this->compute_window_means() ;
}
ReadLayer::~ReadLayer()
{}
void ReadLayer::seed_model_randomly()
{
// get random values from a beta distribution cannot be done using boost so
// i) generate random number [0,1] x
// ii) compute f(x) where f is beta distribution
matrix2d_d prob(this->n_row, vector_d(this->n_class, 0.)) ;
vector_d prob_class(this->n_class, 0.) ;
double tot_sum = 0. ;
// sample the prob
// beta distribution parameters
double alpha = pow(this->n_row, -0.5) ;
double beta = 1. ;
for(size_t i=0; i<this->n_row; i++)
{ double row_sum = 0. ;
for(size_t j=0; j<this->n_class; j++)
{ double x = rand_real_uniform(0., 1.0) ;
double p = std::max(ReadLayer::p_min, beta_pmf(x, alpha, beta)) ;
prob[i][j] = p ;
prob_class[j] += p ;
tot_sum += p ;
row_sum += p ;
}
// normalize
for(size_t j=0; j<this->n_class; j++)
{ prob[i][j] /= row_sum ; }
}
// class prob
for(auto& p : prob_class)
{ p /= tot_sum ; }
// compute the refererences
for(size_t i=0; i<this->n_row; i++)
{ for(size_t j=0; j<this->n_class; j++)
{ for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
{ this->model[j][j_ref][0] += (this->data[i][j_dat] * prob[i][j]) ; }
}
}
// normalize
for(size_t i=0; i<this->n_class; i++)
{ for(size_t j=0; j<this->l_model; j++)
{ this->model[i][j][0] ; }
}
}
void ReadLayer::seed_model_sampling()
{ std::vector<bool> choosen(this->n_row, false) ;
for(size_t i=0; i<this->n_class; )
{ size_t index = rand_int_uniform(size_t(0), size_t(this->n_row-1)) ;
// already choose
if(choosen[index])
{ ; }
// not yet choosen as reference
else
{ for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
{ this->model[i][j_ref][0] = this->data[index][j_dat] ; }
choosen[index] = true ;
i++ ;
}
}
}
void ReadLayer::seed_model_toy()
{ // sample data to initialise the references
std::vector<bool> choosen(this->n_row, false) ;
for(size_t i=0; i<this->n_class; )
{ size_t index = i ;
// already choose
if(choosen[index])
{ ; }
// not yet choosen as reference
else
{ for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
{ this->model[i][j_ref][0] = this->data[index][j_dat] ; }
choosen[index] = true ;
i++ ;
}
}
}
void ReadLayer::compute_loglikelihoods(matrix4d_d& loglikelihood,
vector_d& loglikelihood_max) const
{ // dimension checks
this->check_loglikelihood_dim(loglikelihood) ;
this->check_loglikelihood_max_dim(loglikelihood_max) ;
// normalize the models
matrix3d_d model_norm = this->model ;
for(size_t i=0; i<this->n_class; i++)
{ double mean = 0. ;
for(size_t j=0; j<this->l_model; j++)
{ mean += model_norm[i][j][0] ; }
mean /= this->l_model ;
for(size_t j=0; j<this->l_model; j++)
{ model_norm[i][j][0] /= mean ; }
}
// compute log likelihood
for(size_t i=0; i<this->n_row; i++)
{
// set max to min possible value
loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;
for(size_t j=0; j<this->n_class; j++)
{ for(size_t s_fw=0, s_rev=this->n_shift-1;
s_fw<this->n_shift; s_fw++, s_rev--)
{ // slice is [from_fw,to)
// from_dat_fw to_dat_fw [from_dat_fw, to_dat_fw]
// fw |---------->>>----------|
// ----------------------------------> data
// rev |----------<<<----------| [from_dat_rev, to_dat_rev]
// to_dat_rev can be -1 -> int
// to_dat_rev from_dat_rev
// log likelihood
double ll_fw = 0. ;
double ll_rev = 0. ;
// --------------- forward ---------------
size_t from_dat_fw = s_fw ;
size_t to_dat_fw = from_dat_fw + this->l_model - 1 ;
// --------------- reverse ---------------
size_t from_dat_rev = this->n_col - 1 - s_fw ;
// size_t to_dat_rev = from_dat_rev - (this->l_model - 1) ;
for(size_t j_dat_fw=from_dat_fw,j_ref_fw=0, j_dat_rev=from_dat_rev;
j_dat_fw<to_dat_fw;
j_dat_fw++, j_ref_fw++, j_dat_rev--)
{
double ll ;
// --------------- forward ---------------
ll = log(poisson_pmf(this->data[i][j_dat_fw],
model_norm[j][j_ref_fw][0]*
this->window_means[i][s_fw])) ;
ll_fw += std::max(ll, ReadLayer::p_min_log) ;
// --------------- reverse ---------------
if(this->flip)
{ ll = log(poisson_pmf(this->data[i][j_dat_rev],
model_norm[j][j_ref_fw][0]*
this->window_means[i][s_rev])) ;
ll_rev += std::max(ll, ReadLayer::p_min_log) ;
}
}
loglikelihood[i][j][from_dat_fw][flip_states::FORWARD] = ll_fw ;
// keep track of the max per row
if(ll_fw > loglikelihood_max[i])
{ loglikelihood_max[i] = ll_fw ; }
if(this->flip)
{ loglikelihood[i][j][from_dat_fw][flip_states::REVERSE] = ll_rev ;
// keep track of the max per row
if(ll_rev > loglikelihood_max[i])
{ loglikelihood_max[i] = ll_rev ; }
}
}
}
}
}
void ReadLayer::update_model(const matrix4d_d& posterior_prob)
{ // computing sum over the columns (classes)
size_t n_row = posterior_prob.size() ;
size_t n_class = posterior_prob[0].size() ;
size_t n_shift = posterior_prob[0][0].size() ;
size_t n_flip = posterior_prob[0][0][0].size() ;
vector_d colsum(n_class, 0.) ;
for(size_t i=0; i<n_row; i++)
{ for(size_t j=0; j<n_class; j++)
{ for(size_t k=0; k<n_shift; k++)
{ for(size_t l=0; l<n_flip; l++)
{ colsum[j] += posterior_prob[i][j][k][l] ; }
}
}
}
this->update_model(posterior_prob, colsum) ;
}
void ReadLayer::update_model(const matrix4d_d& posterior_prob,
const vector_d& posterior_prob_colsum)
{ // dimension checks
this->check_posterior_prob_dim(posterior_prob) ;
this->check_posterior_prob_colsum_dim(posterior_prob_colsum) ;
// update models
// reset the parameters
this->model = matrix3d_d(this->n_class,
matrix2d_d(this->l_model,
vector_d(this->n_category, 0.))) ;
for(size_t n_class=0; n_class < this->n_class; n_class++)
{ for(size_t i=0; i<this->n_row; i++)
{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
{ // --------------- forward ---------------
int from_dat_fw = n_shift ;
int to_dat_fw = from_dat_fw + this->l_model - 1 ;
for(int j_dat_fw=from_dat_fw, j_ref_fw=0;
j_dat_fw<=to_dat_fw; j_dat_fw++, j_ref_fw++)
{ this->model[n_class][j_ref_fw][0] +=
(posterior_prob[i][n_class][n_shift][flip_states::FORWARD] *
this->data[i][j_dat_fw]) /
posterior_prob_colsum[n_class] ;
}
// --------------- reverse ---------------
if(this->flip)
{ int from_dat_rev = this->n_col - 1 - n_shift ;
int to_dat_rev = from_dat_rev - (this->l_model - 1) ;
for(int j_dat_rev=from_dat_rev, j_ref_fw=0;
j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++)
{ this->model[n_class][j_ref_fw][0] +=
(posterior_prob[i][n_class][n_shift][flip_states::REVERSE] *
this->data[i][j_dat_rev]) /
posterior_prob_colsum[n_class] ;
}
}
}
}
}
}
void ReadLayer::compute_window_means()
{ double l_window = double(this->l_model) ;
for(size_t i=0; i<this->n_row; i++)
{ for(size_t from=0; from<this->n_shift; from++)
{ double sum = 0. ;
// slice is [from,to)
size_t to = from + this->l_model ;
for(size_t j=from; j<to; j++)
{ sum += this->data[i][j] ;}
this->window_means[i][from] = sum / l_window ;
}
}
}
void ReadLayer::check_posterior_prob_colsum_dim(const vector_d& posterior_prob_colsum) const
{ if(posterior_prob_colsum.size() != this->n_class)
{ char msg[4096] ;
sprintf(msg,
"Error! posterior_class_prob matrix size is not "
"equal to model class number : %zu / %zu",
posterior_prob_colsum.size(), this->n_class) ;
throw std::invalid_argument(msg) ;
}
}
Event Timeline
Log In to Comment