Page MenuHomec4science

EMRead.hpp
No OneTemporary

File Metadata

Created
Tue, Apr 30, 13:19

EMRead.hpp

#ifndef EMREAD_HPP
#define EMREAD_HPP
#include <EMBase.hpp>
#include <vector>
#include <string>
#include <future> // std::promise
#include <Matrix3D.hpp>
#include <ReadLayer.hpp>
typedef std::vector<double> vector_d ;
class EMRead : public EMBase
{ public:
/*!
* \brief Generates a model with mean number of counts
* in the data,at each position.
* \param data the count data.
* \return the motif. Its dimensions are:
* 1st 1, a one row matrix
* 2nd the motif length.
*/
static Matrix2D<double> generate_bckg_motif(const Matrix2D<int>& read_matrix,
size_t motif_length) ;
public:
/*!
* \brief Constructs an object to partition the
* region (rows) according to the shape of the signal
* with the given shifting and flipping freedom.
* \param read_matrix a matrix containing the read
* densitiy (ChIP-seq or related signal) for the
* regions of interest.
* \param n_class the number of region classes
* to search.
* \param n_iter the number of optimization iterations.
* \param n_shift the number of shift states allowed.
* \param flip whether flipping is allowed.
* \param bckg_class the last class is used to model the
* background by setting all its parameters, at all
* positions, to the mean number of counts. Since
* the background is constant, this class will never
* be updated.
* \param seed a seed to initialise the random number
* generator.
* \param n_threads the number of parallel threads
* to run the computations. 0 means no parallel
* computing, everything is run on the main thread.
*/
EMRead(const Matrix2D<int>& read_matrix,
size_t n_class,
size_t n_iter,
size_t n_shift,
bool flip,
bool bckg_class,
const std::string& seed="",
size_t n_threads=0) ;
/*!
* \brief Constructs an object to partition the
* region (rows) according to the shape of the signal
* with the given shifting and flipping freedom.
* \param read_matrix a matrix containing the read
* densitiy (ChIP-seq or related signal) for the
* regions of interest.
* \param n_class the number of region classes
* to search.
* \param n_iter the number of optimization iterations.
* \param n_shift the number of shift states allowed.
* \param flip whether flipping is allowed.
* \param bckg_class the last class is used to model the
* background by setting all its parameters, at all
* positions, to the mean number of counts. Since
* the background is constant, this class will never
* be updated.
* \param seed a seed to initialise the random number
* generator.
* \param n_threads the number of parallel threads
* to run the computations. 0 means no parallel
* computing, everything is run on the main thread.
*/
EMRead(Matrix2D<int>&& read_matrix,
size_t n_class,
size_t n_iter,
size_t n_shift,
bool flip,
bool bckg_class,
const std::string& seed="",
size_t n_threads=0) ;
EMRead(const EMRead& other) = delete ;
/*!
* \brief Destructor.
*/
virtual ~EMRead() override ;
/*!
* \brief Returns the class read signal model.
* \return the class read signal model.
*/
Matrix3D<double> get_read_models() const ;
/*!
* \brief Runs the read signal model optimization and
* the data classification.
* \return a code indicating how the optimization
* ended.
*/
virtual EMRead::exit_codes classify() override ;
private:
/*!
* \brief Computes the data log likelihood given the
* current models, for all layers and the joint
* likelihood for each state (the sum of the layer
* likelihoods for all layers, for a given state).
*/
virtual void compute_loglikelihood() override ;
/*!
* \brief This is a routine of compute_loglikelihood().
* This method rescales the loglikelihood values by
* substacting to each value the maximum loglikelihood
* value found in the same data row.
* This method
* \param from the index of the first row
* in the data to consider.
* \param to the index of the past last row
* in the data to consider.
* \param done a promise to fill when the method
* is done.
*/
void compute_loglikelihood_routine(size_t from,
size_t to,
std::promise<bool>& done) ;
/*!
* \brief Computes the data posterior probabilties.
*/
virtual void compute_post_prob() override ;
/*!
* \brief The routine that effectively computes
* the posterior probabilties.
* \param from the index of the first row
* in the data to consider.
* \param to the index of the past last row
* in the data to consider.
* \param done the partial column (over the classes)
* sum of posterior probabilities. If several routines
* are running together, the colsums are retrieved by
* summing up the vectors together.
*/
void compute_post_prob_routine(size_t from,
size_t to,
std::promise<vector_d>& post_prob_colsum) ;
/*!
* \brief Update the data models for all layers, given
* the current posterior and class probabilities.
*/
virtual void update_models() override ;
/*!
* \brief the max loglikelihood value for
* each data row.
*/
std::vector<double> loglikelihood_max ;
/*!
* \brief A pointer to the object managing
* the data and their model.
*/
ReadLayer* read_layer ;
} ;
#endif // EMREAD_HPP

Event Timeline