ConsensusSequenceLayer.hpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Fri, Sep 27, 01:18

ConsensusSequenceLayer.hpp
View Options

	#ifndef CONSENSUSSEQUENCELAYER_HPP
	#define CONSENSUSSEQUENCELAYER_HPP

	#include <Data3DLayer.hpp>

	#include <vector>
	#include <future> // std::promise

	#include <ThreadPool.hpp>
	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>


	typedef std::vector<double> vector_d ;


	class ConsensusSequenceLayer : public Data3DLayer
	{
	public:
	/*!
	* \brief Computes the log-likelihood of the sub-
	* sequence - stored in a given row (1st dimension) -
	* and starting at the offset <col> (2nd dimension) in
	* the given sequence matrix.
	* The subsequence length is determined by the model
	* lenght.
	* \param cons_seq a probability matrix containing
	* the consensus sequence. Its dimensions are :
	* 1st the different sequences
	* 2nd the sequences length
	* 3rd 4 for A,C,G,T.
	* \param row the row containing the sequence of
	* interest.
	* \param col the index at which the sub-sequence
	* is starting (1st index inside the subsequence
	* of interest).
	* \param model a model containing the probability
	* model.
	* \return the log-likelihood of the sub-sequence
	* given the model.
	* \throw std::invalid_argument if 1) the offset is
	* invalid, 2) the sequence and the model have
	* incompatible dimensions or 3) the model 2n dimension
	* is not 4 (A,C,G,T).
	*/
	static double score_consensus_subseq(const Matrix3D<double>& cons_seq,
	size_t row,
	size_t col,
	const Matrix2D<double>& model) ;
	public:
	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	ConsensusSequenceLayer(const Matrix3D<double>& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class) ;

	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	ConsensusSequenceLayer(Matrix3D<double>&& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class) ;

	/*!
	* \brief Construct an object with the
	* given data and model.
	* The shifting freedom is set to (data number
	* of columns) - (the model 2nd dimension)
	* + 1.
	* \param data the data.
	* \param model the model with the following
	* dimensions :
	* dim1 the number of classes
	* dim2 the model length
	* dim3 4 (A,C,G,T)
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	ConsensusSequenceLayer(const Matrix3D<double>& data,
	const Matrix3D<double>& model,
	bool flip,
	bool bckg_class) ;

	/*!
	* \brief Construct an object with the
	* given data and model.
	* The shifting freedom is set to (data number
	* of columns) - (the model 2nd dimension)
	* + 1.
	* \param data the data. The sequences
	* should be stored as integer values :
	* A:0, C:1, G:2, T:3, else:5.
	* \param model the model with the following
	* dimensions :
	* dim1 the number of classes
	* dim2 the model length
	* dim3 4 (A,C,G,T)
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	ConsensusSequenceLayer(Matrix3D<double>&& data,
	Matrix3D<double>&& model,
	bool flip,
	bool bckg_class) ;


	/*!
	* \brief Destructor.
	*/
	virtual ~ConsensusSequenceLayer() ;

	/*!
	* \brief Computes the log likelihood of the data
	* given the current model parameters.
	* \param logliklihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of loglikelihood.
	* Its length should be equal to the data row number.
	* \throw std::invalid_argument if the dimensions are
	* incorrect.
	*/
	virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	ThreadPool* threads=nullptr) const override ;

	/*!
	* \brief Updates the model given the posterior
	* probabilities (the probabilities of each row
	* in the data to be assigned to each class,
	* for each shift and flip state).
	* 0 values inside the model are forbidden.
	* The lowest possible value inside the model
	* is SequenceLayer::p_min.
	* \param posterior_prob the data assignment probabilities to
	* the different classes.
	*/
	virtual void update_model(const Matrix4D<double>& posterior_prob,
	ThreadPool* threads=nullptr) override ;

	protected:
	/*!
	* \brief The routine that effectively performs the
	* loglikelihood computations.
	* \param from the index of the first row of the data
	* to considered.
	* \param to the index of the past last row of the data
	* to considered.
	* \param loglikelihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of log_likelihood.
	* Its length should be equal to the data row number.
	* \param model_log a vector containing the matrices with
	* the log values of the model for each class.
	* \param model_log_rev a vector containing the matrices with
	* the log values of the reverse strand model for each class
	* (the 1st position in the model becomes the last in the
	* reverse model and probabilities are swapped A<->T and C<->G).
	* \param done a promise to be filled when the routine
	* is done running.
	*/
	void compute_loglikelihoods_routine(size_t from,
	size_t to,
	Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	const std::vector<Matrix2D<double>>& model_log,
	const std::vector<Matrix2D<double>>& model_log_rev,
	std::promise<bool>& done) const ;

	/*!
	* \brief The routine that effectively update the model.
	* 0 values inside the model are forbidden.
	* The lowest possible value inside the model
	* is SequenceLayer::p_min.
	* \param from the index of the first row of the
	* posterior probabilities to considered.
	* \param to the index of the past last row of the
	* posterior probabilities to considered.
	* \param posterior_prob the data assignment probabilities
	* to the different classes.
	* \param
	* \param done a promise containing the partial model
	* computed from the given data slice. If several routines
	* work together at updating the model, they need to be
	* summed and normalized (by the column sum) to get the
	* final model.
	*/
	void update_model_routine(size_t from,
	size_t to,
	const Matrix4D<double>& posterior_prob,
	std::promise<Matrix3D<double>>& done) const ;

	/*!
	* \brief Sets the last class as background class with the
	* mean base probababilities of the data at each position.
	*/
	void create_bckg_class() ;


	protected:




	} ;


	#endif // CONSENSUSSEQUENCELAYER_HPP

ConsensusSequenceLayer.hppNo OneTemporaryActions

File Metadata

ConsensusSequenceLayer.hppView Options

Event Timeline

ConsensusSequenceLayer.hpp
No OneTemporary
Actions

ConsensusSequenceLayer.hpp
View Options