Page MenuHomec4science

DataLayer.hpp
No OneTemporary

File Metadata

Created
Sun, Aug 4, 06:22

DataLayer.hpp

#ifndef DATALAYER_HPP
#define DATALAYER_HPP
#include <iostream>
#include <future> // std::promise, std::future
#include <Matrix2D.hpp>
#include <Matrix3D.hpp>
#include <Matrix4D.hpp>
#include <ThreadPool.hpp>
typedef std::vector<double> vector_d ;
/*!
* \brief The DataLayer class define the basic design
* to handle probabilistic models together with
* their data.
* A DataLayer is made of two parts :
* 1) a data matrix
* 2) a model
* The model contains the parameters of a probabilistic
* model with one or more classes that fits the data.
* The data likelihood given the model can be computed
* and the model can be updated given a set of
* posterior probabilities representing the data
* assignments to the different classes.
*/
class DataLayer
{
public:
/*!
* \brief the smallest acceptable probability
* for computations.
*/
static const double p_min ;
/*!
* \brief the log of the smallest probability.
*/
static const double p_min_log ;
/*!
* \brief The possible flip states.
*/
enum flip_states{FORWARD=0, REVERSE} ;
/*!
* \brief Default constructor.
*/
DataLayer() ;
/*!
* \brief Constructs an object with the
* given data.
* An empty model is not initialised yet
* as the model number of categories
* depends on the final class.
* \param data the data.
* \param n_class the number of classes
* of the model.
* \param n_shift the number of shift
* states of the model.
* \param flip whether flipping is allowed.
*/
DataLayer(const Matrix2D<int>& data,
size_t n_class,
size_t n_shift,
bool flip) ;
/*!
* \brief Constructs an object with the
* given data.
* An empty model is not initialised yet
* as the model number of categories
* depends on the final class.
* \param data the data.
* \param n_class the number of classes
* of the model.
* \param n_shift the number of shift
* states of the model.
* \param flip whether flipping is allowed.
*/
DataLayer(Matrix2D<int>&& data,
size_t n_class,
size_t n_shift,
bool flip) ;
/*!
* \brief Constructs an object with the
* given data and model.
* The model dimensions set the number of
* classes and the shifting freedom.
* \param data the data.
* \param the model.
* \param flip whether flipping is allowed.
*/
DataLayer(const Matrix2D<int>& data,
const Matrix3D<double>& model,
bool flip) ;
/*!
* \brief Constructs an object with the
* given data and model.
* The model dimensions set the number of
* classes and the shifting freedom.
* \param data the data.
* \param the model.
* \param flip whether flipping is allowed.
*/
DataLayer(Matrix2D<int>&& data,
Matrix3D<double>&& model,
bool flip) ;
/*!
* \brief Destructor.
*/
virtual ~DataLayer() ;
/*!
* \brief Computes the log likelihood of the data
* given the current model parameters.
* \param loglikelihood a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data number of row
* 2nd : same as the model number of classes
* 3rd : same as the number of shifts
* 4th : same as the number of flip states
* \param loglikelihood_max a vector containing the
* max value for each row of log_likelihood.
* Its length should be equal to the data row number.
* \param threads a pointer to a thread pool to
* parallelize the computations. If nullptr is given,
* the computations are performed by the main thread.
*/
virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
vector_d& loglikelihood_max,
ThreadPool* threads=nullptr) const = 0 ;
/*!
* \brief Updates the model given the posterior
* probabilities (the probabilities of each row
* in the data to be assigned to each class,
* for each shift and flip state).
* \param posterior_prob the data assignment probabilities to
* the different classes.
* \param threads a pointer to a thread pool to
* parallelize the computations. If nullptr is given,
* the computations are performed by the main thread.
*/
virtual void update_model(const Matrix4D<double>& posterior_prob,
ThreadPool* threads=nullptr) = 0 ;
/*!
* \brief Returns a copy of the current model.
* \return the current model.
* 1st dim : the number of classes
* 2nd dim : the model length
* 3rd dim : the number of value categories.
*/
virtual Matrix3D<double> get_model() const ;
protected:
/*!
* \brief Checks the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param logliklihood a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data row number
* 2nd : same as the model class number
* 3rd : same as the shift state number
* 4th : same as the flip state number
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void check_loglikelihood_dim(const Matrix4D<double>& loglikelihood) const ;
/*!
* \brief Checks that the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param loglikelihood_max a vector containing the
* max value for each row of log_likelihood.
* It should have a length equal to the number of
* the data row number.
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void check_loglikelihood_max_dim(const vector_d& loglikelihood_max) const ;
/*!
* \brief Checks the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param posterior_prob a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data row number
* 2nd : same as the model class number
* 3rd : same as the shift state number
* 4th : same as the flip state number
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void check_posterior_prob_dim(const Matrix4D<double>& posterior_prob) const ;
/*!
* \brief the data.
*/
Matrix2D<int> data ;
/*!
* \brief the data model.
*/
Matrix3D<double> model ;
/*!
* \brief whether flip is enabled.
*/
bool flip ;
/*!
* \brief the number of row in the data.
*/
size_t n_row ;
/*!
* \brief the number of columns in the data.
*/
size_t n_col ;
/*!
* \brief the number of classes in the model.
*/
size_t n_class ;
/*!
* \brief the model length, its 2nd dimension.
*/
size_t l_model ;
/*!
* \brief the number of variable categories in
* the data. This is also the model 3rd
* dimension.
* Read counts are quantitative values and
* have a number of categories equal to one
* whereas as DNA sequences are made of
* A,C,G,T (at least) and have 4 different
* categories.
*/
size_t n_category ;
/*!
* \brief the number of shift states.
*/
size_t n_shift ;
/*!
* \brief the number of flip states.
*/
size_t n_flip ;
} ;
#endif // DATALAYER_HPP

Event Timeline