EMJoint.hpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, Apr 28, 13:39

EMJoint.hpp
View Options

	#ifndef EMJOINT_HPP
	#define EMJOINT_HPP


	#include <EMBase.hpp>

	#include <vector>
	#include <string>

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ReadLayer.hpp>
	#include <SequenceLayer.hpp>
	#include <ConsensusSequenceLayer.hpp>


	typedef std::vector<double> vector_d ;

	class EMJoint : public EMBase
	{
	public:
	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* with the given shifting and flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* with the given shifting and flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* and sequences with the given shifting and
	* flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	const Matrix2D<int>& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* and sequences with the given shifting and
	* flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	Matrix2D<int>&& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;



	EMJoint(const EMJoint& other) = delete ;

	/*!
	* \brief Destructor.
	*/
	virtual ~EMJoint() override ;

	/*!
	* \brief Returns all layer read models.
	* The models are in the same order
	* as the data were given to the
	* constructor.
	* \return a vector containing the
	* models.
	*/
	std::vector<Matrix3D<double>> get_read_models() const ;

	/*!
	* \brief Returns the sequence models.
	* \return a vector containing the
	* models.
	*/
	Matrix3D<double> get_sequence_models() const ;

	/*!
	* \brief Runs the sequence model optimization and
	* the data classification.
	* \return a code indicating how the optimization
	* ended.
	*/
	virtual EMJoint::exit_codes classify() override ;

	private:

	/*!
	* \brief Computes the data log likelihood given the
	* current models, for all layers and the joint
	* likelihood for each state (the sum of the layer
	* likelihoods for all layers, for a given state).
	*/
	virtual void compute_loglikelihood() override ;

	/*!
	* \brief This is a routine of compute_loglikelihood() that
	* computes the joint loglikelihood by summing the
	* individual loglikelihood obtained from each data layer.
	* At the same time, this method rescales the loglikelihood
	* values by substacting to each value the maximum
	* loglikelihood value found in the same data row,
	* for each layer.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done a promise to fill when the method
	* is done.
	*/
	void compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done) ;

	/*!
	* \brief Computes the data posterior probabilties.
	*/
	virtual void compute_post_prob() override ;

	/*!
	* \brief The routine that effectively computes
	* the posterior probabilties.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done the partial column (over the classes)
	* sum of posterior probabilities. If several routines
	* are running together, the colsums are retrieved by
	* summing up the vectors together.
	*/
	void compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum) ;

	/*!
	* \brief Update the data models for all layers, given
	* the current posterior and class probabilities.
	*/
	virtual void update_models() override ;

	/*!
	* \brief the number of data layers.
	*/
	size_t n_layer ;
	/*!
	* \brief the log likelihood buffers for each individual
	* layer (one element per layer).
	*/
	std::vector<Matrix4D<double>> loglikelihood_layer ;
	/*!
	* \brief the max loglikelihood value for
	* each each data layer (1st dimension)
	* and each data row of the given layer
	* (2nd dimension).
	*/
	std::vector<vector_d> loglikelihood_max ;
	/*!
	* \brief A vector containing the pointers
	* to the objects managing all the read
	* data and models.
	*/
	std::vector<ReadLayer*> read_layers ;
	/*!
	* \brief A pointer to the object managing
	* the sequence data and their models.
	*/
	SequenceLayer* seq_layer ;
	/*!
	* \brief A pointer to the object managing
	* the consensus sequences data and their
	* models.
	*/
	ConsensusSequenceLayer* conseq_layer ;
	} ;

	#endif // EMJOINT_HPP

EMJoint.hppNo OneTemporaryActions

File Metadata

EMJoint.hppView Options

Event Timeline

EMJoint.hpp
No OneTemporary
Actions

EMJoint.hpp
View Options