Page MenuHomec4science

DataLayer.hpp.save
No OneTemporary

File Metadata

Created
Mon, Jun 10, 23:10

DataLayer.hpp.save

#ifndef DATALAYER_HPP
#define DATALAYER_HPP
#include <iostream>
#include <matrices.hpp>
/*!
* \brief The DataLayer class define the basic design
* to handle probabilistic models together with
* their data.
* A DataLayer is made of two parts :
* 1) a data matrix
* 2) a model
* The model contains the parameters of a probabilistic
* model with one or more classes that fits the data.
* The data likelihood given the model can be computed
* and the model can be updated given a set of
* posterior probabilities representing the data
* assignments to the different classes.
*/
class DataLayer
{
public:
/*!
* \brief the smallest acceptable probability
* for computations.
*/
static const double p_min ;
/*!
* \brief the log of the smallest probability.
*/
static const double p_min_log ;
/*!
* \brief The possible flip states.
*/
enum flip_states{FORWARD=0, REVERSE} ;
/*!
* \brief Default constructor.
*/
DataLayer() ;
/*!
* \brief Constructs an object with the
* given data.
* An empty model is not initialised yet
* as the model number of categories
* depends on the final class.
* \param data the data.
* \param n_class the number of classes
* of the model.
* \param n_shift the number of shift
* states of the model.
* \param flip whether flipping is allowed.
*/
DataLayer(const matrix2d_i& data,
size_t n_class,
size_t n_shift,
bool flip) ;
/*!
* \brief Constructs an object with the
* given data and model.
* The model dimensions set the number of
* classes and the shifting freedom.
* \param data the data.
* \param the model.
* \param flip whether flipping is allowed.
*/
DataLayer(const matrix2d_i& data,
const matrix3d_d& model,
bool flip) ;
/*!
* \brief Destructor.
*/
virtual ~DataLayer() ;
/*!
* \brief Sets the model values randomly.
*/
virtual void seed_model_randomly() = 0 ;
/*!
* \brief Sets the model values by
* sampling rows in the data and
* assigning them as initial model
* values.
*/
virtual void seed_model_sampling() = 0 ;
/*!
* \brief Sets the model values by
* using the first n_class rows in data.
*/
virtual void seed_model_toy() = 0 ;
/*!
* \brief Computes the log likelihood of the data
* given the current model parameters.
* \param loglikelihood a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data number of row
* 2nd : same as the model number of classes
* 3rd : same as the number of shifts
* 4th : same as the number of flip states
* \param loglikelihood_max a vector containing the
* max value for each row of log_likelihood.
* Its length should be equal to the data row number.
*/
virtual void compute_loglikelihoods(matrix4d_d& loglikelihood,
vector_d& loglikelihood_max) const = 0 ;
/*!
* \brief Updates the model given the posterior
* probabilities (the probabilities of each row
* in the data to be assigned to each class,
* for each shift and flip state).
* \param posterior_prob the data assignment probabilities to
* the different classes.
*/
virtual void update_model(const matrix4d_d& posterior_prob) = 0 ;
/*!
* \brief Returns a copy of the current model.
* \return the current model.
* 1st dim : the number of classes
* 2nd dim : the model length
* 3rd dim : the number of value categories.
*/
virtual matrix3d_d get_model() const ;
protected:
/*!
* \brief Checks the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param logliklihood a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data row number
* 2nd : same as the model class number
* 3rd : same as the shift state number
* 4th : same as the flip state number
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void check_loglikelihood_dim(const matrix4d_d& loglikelihood) const ;
/*!
* \brief Checks that the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param loglikelihood_max a vector containing the
* max value for each row of log_likelihood.
* It should have a length equal to the number of
* the data row number.
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void check_loglikelihood_max_dim(const vector_d& loglikelihood_max) const ;
/*!
* \brief Checks the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param posterior_prob a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data row number
* 2nd : same as the model class number
* 3rd : same as the shift state number
* 4th : same as the flip state number
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void check_posterior_prob_dim(const matrix4d_d& posterior_prob) const ;
/*!
* \brief the data.
*/
matrix2d_i data ;
/*!
* \brief the data model.
*/
matrix3d_d model ;
/*!
* \brief whether flip is enabled.
*/
bool flip ;
/*!
* \brief the number of row in the data.
*/
size_t n_row ;
/*!
* \brief the number of columns in the data.
*/
size_t n_col ;
/*!
* \brief the number of classes in the model.
*/
size_t n_class ;
/*!
* \brief the model length, its 2nd dimension.
*/
size_t l_model ;
/*!
* \brief the number of variable categories in
* the data. This is also the model 3rd
* dimension.
* Read counts are quantitative values and
* have a number of categories equal to one
* whereas as DNA sequences are made of
* A,C,G,T (at least) and have 4 different
* categories.
*/
size_t n_category ;
/*!
* \brief the number of shift states.
*/
size_t n_shift ;
/*!
* \brief the number of flip states.
*/
size_t n_flip ;
} ;
#endif // DATALAYER_HPP

Event Timeline