Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60238250
EMJoint.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Apr 28, 13:39
Size
11 KB
Mime Type
text/x-c
Expires
Tue, Apr 30, 13:39 (2 d)
Engine
blob
Format
Raw Data
Handle
17323203
Attached To
R8820 scATAC-seq
EMJoint.hpp
View Options
#ifndef EMJOINT_HPP
#define EMJOINT_HPP
#include <EMBase.hpp>
#include <vector>
#include <string>
#include <Matrix2D.hpp>
#include <Matrix3D.hpp>
#include <Matrix4D.hpp>
#include <ReadLayer.hpp>
#include <SequenceLayer.hpp>
#include <ConsensusSequenceLayer.hpp>
typedef
std
::
vector
<
double
>
vector_d
;
class
EMJoint
:
public
EMBase
{
public
:
/*!
* \brief Constructs an object to partition the
* region according to all the given read densities
* with the given shifting and flipping freedom.
* \param read_matrices a vector containing all
* the different data densities (ChIP-seq or related
* signal) for the regions of interest. The matrix
* dimensions are as follow:
* 1st the number of regions,
* 2nd the region length.
* \param seq_matrix a matrix containing the DNA
* sequences for the regions of interest.
* \param n_class the number of region classes
* to search.
* \param n_iter the number of optimization iterations.
* \param n_shift the number of shift states allowed.
* \param flip whether flipping is allowed.
* \param bckg_class the last class is used to model
* the background by setting all its parameters, at all
* positions, to the background base probabilties (for
* sequence models) and the mean number of counts (for
* read signal models). Since the background is constant,
* this class will never be updated.
* \param seed a seed to initialise the random number
* generator.
* \param n_threads the number of parallel threads
* to run the computations. 0 means no parallel
* computing, everything is run on the main thread.
*/
EMJoint
(
const
std
::
vector
<
Matrix2D
<
int
>>&
read_matrices
,
size_t
n_class
,
size_t
n_iter
,
size_t
n_shift
,
bool
flip
,
bool
bckg_class
,
const
std
::
string
&
seed
=
""
,
size_t
n_threads
=
0
)
;
/*!
* \brief Constructs an object to partition the
* region according to all the given read densities
* with the given shifting and flipping freedom.
* \param read_matrices a vector containing all
* the different data densities (ChIP-seq or related
* signal) for the regions of interest. The matrix
* dimensions are as follow:
* 1st the number of regions,
* 2nd the region length.
* \param seq_matrix a matrix containing the DNA
* sequences for the regions of interest. The matrix
* dimensions are as follow:
* 1st the number of regions,
* 2nd the region length.
* \param n_class the number of region classes
* to search.
* \param n_iter the number of optimization iterations.
* \param n_shift the number of shift states allowed.
* \param flip whether flipping is allowed.
* \param bckg_class the last class is used to model
* the background by setting all its parameters, at all
* positions, to the background base probabilties (for
* sequence models) and the mean number of counts (for
* read signal models). Since the background is constant,
* this class will never be updated.
* \param seed a seed to initialise the random number
* generator.
* \param n_threads the number of parallel threads
* to run the computations. 0 means no parallel
* computing, everything is run on the main thread.
*/
EMJoint
(
std
::
vector
<
Matrix2D
<
int
>>&&
read_matrices
,
size_t
n_class
,
size_t
n_iter
,
size_t
n_shift
,
bool
flip
,
bool
bckg_class
,
const
std
::
string
&
seed
=
""
,
size_t
n_threads
=
0
)
;
/*!
* \brief Constructs an object to partition the
* region according to all the given read densities
* and sequences with the given shifting and
* flipping freedom.
* \param read_matrices a vector containing all
* the different data densities (ChIP-seq or related
* signal) for the regions of interest. The matrix
* dimensions are as follow:
* 1st the number of regions,
* 2nd the region length.
* \param seq_matrix a matrix containing the DNA
* sequences for the regions of interest. The matrix
* dimensions are as follow:
* 1st the number of regions,
* 2nd the region length.
* \param n_class the number of region classes
* to search.
* \param n_iter the number of optimization iterations.
* \param n_shift the number of shift states allowed.
* \param flip whether flipping is allowed.
* \param bckg_class the last class is used to model
* the background by setting all its parameters, at all
* positions, to the background base probabilties (for
* sequence models) and the mean number of counts (for
* read signal models). Since the background is constant,
* this class will never be updated.
* \param seed a seed to initialise the random number
* generator.
* \param n_threads the number of parallel threads
* to run the computations. 0 means no parallel
* computing, everything is run on the main thread.
*/
EMJoint
(
const
std
::
vector
<
Matrix2D
<
int
>>&
read_matrices
,
const
Matrix2D
<
int
>&
seq_matrix
,
size_t
n_class
,
size_t
n_iter
,
size_t
n_shift
,
bool
flip
,
bool
bckg_class
,
const
std
::
string
&
seed
=
""
,
size_t
n_threads
=
0
)
;
/*!
* \brief Constructs an object to partition the
* region according to all the given read densities
* and sequences with the given shifting and
* flipping freedom.
* \param read_matrices a vector containing all
* the different data densities (ChIP-seq or related
* signal) for the regions of interest. The matrix
* dimensions are as follow:
* 1st the number of regions,
* 2nd the region length.
* \param seq_matrix a matrix containing the DNA
* sequences for the regions of interest. The matrix
* dimensions are as follow:
* 1st the number of regions,
* 2nd the region length.
* \param n_class the number of region classes
* to search.
* \param n_iter the number of optimization iterations.
* \param n_shift the number of shift states allowed.
* \param flip whether flipping is allowed.
* \param bckg_class the last class is used to model
* the background by setting all its parameters, at all
* positions, to the background base probabilties (for
* sequence models) and the mean number of counts (for
* read signal models). Since the background is constant,
* this class will never be updated.
* \param seed a seed to initialise the random number
* generator.
* \param n_threads the number of parallel threads
* to run the computations. 0 means no parallel
* computing, everything is run on the main thread.
*/
EMJoint
(
std
::
vector
<
Matrix2D
<
int
>>&&
read_matrices
,
Matrix2D
<
int
>&&
seq_matrix
,
size_t
n_class
,
size_t
n_iter
,
size_t
n_shift
,
bool
flip
,
bool
bckg_class
,
const
std
::
string
&
seed
=
""
,
size_t
n_threads
=
0
)
;
EMJoint
(
const
EMJoint
&
other
)
=
delete
;
/*!
* \brief Destructor.
*/
virtual
~
EMJoint
()
override
;
/*!
* \brief Returns all layer read models.
* The models are in the same order
* as the data were given to the
* constructor.
* \return a vector containing the
* models.
*/
std
::
vector
<
Matrix3D
<
double
>>
get_read_models
()
const
;
/*!
* \brief Returns the sequence models.
* \return a vector containing the
* models.
*/
Matrix3D
<
double
>
get_sequence_models
()
const
;
/*!
* \brief Runs the sequence model optimization and
* the data classification.
* \return a code indicating how the optimization
* ended.
*/
virtual
EMJoint
::
exit_codes
classify
()
override
;
private
:
/*!
* \brief Computes the data log likelihood given the
* current models, for all layers and the joint
* likelihood for each state (the sum of the layer
* likelihoods for all layers, for a given state).
*/
virtual
void
compute_loglikelihood
()
override
;
/*!
* \brief This is a routine of compute_loglikelihood() that
* computes the joint loglikelihood by summing the
* individual loglikelihood obtained from each data layer.
* At the same time, this method rescales the loglikelihood
* values by substacting to each value the maximum
* loglikelihood value found in the same data row,
* for each layer.
* \param from the index of the first row
* in the data to consider.
* \param to the index of the past last row
* in the data to consider.
* \param done a promise to fill when the method
* is done.
*/
void
compute_loglikelihood_routine
(
size_t
from
,
size_t
to
,
std
::
promise
<
bool
>&
done
)
;
/*!
* \brief Computes the data posterior probabilties.
*/
virtual
void
compute_post_prob
()
override
;
/*!
* \brief The routine that effectively computes
* the posterior probabilties.
* \param from the index of the first row
* in the data to consider.
* \param to the index of the past last row
* in the data to consider.
* \param done the partial column (over the classes)
* sum of posterior probabilities. If several routines
* are running together, the colsums are retrieved by
* summing up the vectors together.
*/
void
compute_post_prob_routine
(
size_t
from
,
size_t
to
,
std
::
promise
<
vector_d
>&
post_prob_colsum
)
;
/*!
* \brief Update the data models for all layers, given
* the current posterior and class probabilities.
*/
virtual
void
update_models
()
override
;
/*!
* \brief the number of data layers.
*/
size_t
n_layer
;
/*!
* \brief the log likelihood buffers for each individual
* layer (one element per layer).
*/
std
::
vector
<
Matrix4D
<
double
>>
loglikelihood_layer
;
/*!
* \brief the max loglikelihood value for
* each each data layer (1st dimension)
* and each data row of the given layer
* (2nd dimension).
*/
std
::
vector
<
vector_d
>
loglikelihood_max
;
/*!
* \brief A vector containing the pointers
* to the objects managing all the read
* data and models.
*/
std
::
vector
<
ReadLayer
*>
read_layers
;
/*!
* \brief A pointer to the object managing
* the sequence data and their models.
*/
SequenceLayer
*
seq_layer
;
/*!
* \brief A pointer to the object managing
* the consensus sequences data and their
* models.
*/
ConsensusSequenceLayer
*
conseq_layer
;
}
;
#endif
// EMJOINT_HPP
Event Timeline
Log In to Comment