Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F75784851
DataLayer.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Aug 4, 06:22
Size
8 KB
Mime Type
text/x-c
Expires
Tue, Aug 6, 06:22 (2 d)
Engine
blob
Format
Raw Data
Handle
19608736
Attached To
R8820 scATAC-seq
DataLayer.hpp
View Options
#ifndef DATALAYER_HPP
#define DATALAYER_HPP
#include <iostream>
#include <future>
// std::promise, std::future
#include <Matrix2D.hpp>
#include <Matrix3D.hpp>
#include <Matrix4D.hpp>
#include <ThreadPool.hpp>
typedef
std
::
vector
<
double
>
vector_d
;
/*!
* \brief The DataLayer class define the basic design
* to handle probabilistic models together with
* their data.
* A DataLayer is made of two parts :
* 1) a data matrix
* 2) a model
* The model contains the parameters of a probabilistic
* model with one or more classes that fits the data.
* The data likelihood given the model can be computed
* and the model can be updated given a set of
* posterior probabilities representing the data
* assignments to the different classes.
*/
class
DataLayer
{
public
:
/*!
* \brief the smallest acceptable probability
* for computations.
*/
static
const
double
p_min
;
/*!
* \brief the log of the smallest probability.
*/
static
const
double
p_min_log
;
/*!
* \brief The possible flip states.
*/
enum
flip_states
{
FORWARD
=
0
,
REVERSE
}
;
/*!
* \brief Default constructor.
*/
DataLayer
()
;
/*!
* \brief Constructs an object with the
* given data.
* An empty model is not initialised yet
* as the model number of categories
* depends on the final class.
* \param data the data.
* \param n_class the number of classes
* of the model.
* \param n_shift the number of shift
* states of the model.
* \param flip whether flipping is allowed.
*/
DataLayer
(
const
Matrix2D
<
int
>&
data
,
size_t
n_class
,
size_t
n_shift
,
bool
flip
)
;
/*!
* \brief Constructs an object with the
* given data.
* An empty model is not initialised yet
* as the model number of categories
* depends on the final class.
* \param data the data.
* \param n_class the number of classes
* of the model.
* \param n_shift the number of shift
* states of the model.
* \param flip whether flipping is allowed.
*/
DataLayer
(
Matrix2D
<
int
>&&
data
,
size_t
n_class
,
size_t
n_shift
,
bool
flip
)
;
/*!
* \brief Constructs an object with the
* given data and model.
* The model dimensions set the number of
* classes and the shifting freedom.
* \param data the data.
* \param the model.
* \param flip whether flipping is allowed.
*/
DataLayer
(
const
Matrix2D
<
int
>&
data
,
const
Matrix3D
<
double
>&
model
,
bool
flip
)
;
/*!
* \brief Constructs an object with the
* given data and model.
* The model dimensions set the number of
* classes and the shifting freedom.
* \param data the data.
* \param the model.
* \param flip whether flipping is allowed.
*/
DataLayer
(
Matrix2D
<
int
>&&
data
,
Matrix3D
<
double
>&&
model
,
bool
flip
)
;
/*!
* \brief Destructor.
*/
virtual
~
DataLayer
()
;
/*!
* \brief Computes the log likelihood of the data
* given the current model parameters.
* \param loglikelihood a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data number of row
* 2nd : same as the model number of classes
* 3rd : same as the number of shifts
* 4th : same as the number of flip states
* \param loglikelihood_max a vector containing the
* max value for each row of log_likelihood.
* Its length should be equal to the data row number.
* \param threads a pointer to a thread pool to
* parallelize the computations. If nullptr is given,
* the computations are performed by the main thread.
*/
virtual
void
compute_loglikelihoods
(
Matrix4D
<
double
>&
loglikelihood
,
vector_d
&
loglikelihood_max
,
ThreadPool
*
threads
=
nullptr
)
const
=
0
;
/*!
* \brief Updates the model given the posterior
* probabilities (the probabilities of each row
* in the data to be assigned to each class,
* for each shift and flip state).
* \param posterior_prob the data assignment probabilities to
* the different classes.
* \param threads a pointer to a thread pool to
* parallelize the computations. If nullptr is given,
* the computations are performed by the main thread.
*/
virtual
void
update_model
(
const
Matrix4D
<
double
>&
posterior_prob
,
ThreadPool
*
threads
=
nullptr
)
=
0
;
/*!
* \brief Returns a copy of the current model.
* \return the current model.
* 1st dim : the number of classes
* 2nd dim : the model length
* 3rd dim : the number of value categories.
*/
virtual
Matrix3D
<
double
>
get_model
()
const
;
protected
:
/*!
* \brief Checks the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param logliklihood a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data row number
* 2nd : same as the model class number
* 3rd : same as the shift state number
* 4th : same as the flip state number
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void
check_loglikelihood_dim
(
const
Matrix4D
<
double
>&
loglikelihood
)
const
;
/*!
* \brief Checks that the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param loglikelihood_max a vector containing the
* max value for each row of log_likelihood.
* It should have a length equal to the number of
* the data row number.
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void
check_loglikelihood_max_dim
(
const
vector_d
&
loglikelihood_max
)
const
;
/*!
* \brief Checks the argument has compatible
* dimensions with the data and models. If this is
* not the case, throw a std::invalid_argument with
* a relevant message.
* \param posterior_prob a matrix to store the
* results. It should have the following dimensions :
* 1st : same as the data row number
* 2nd : same as the model class number
* 3rd : same as the shift state number
* 4th : same as the flip state number
* \throw std::invalid_argument if the dimensions are
* incorrect.
*/
void
check_posterior_prob_dim
(
const
Matrix4D
<
double
>&
posterior_prob
)
const
;
/*!
* \brief the data.
*/
Matrix2D
<
int
>
data
;
/*!
* \brief the data model.
*/
Matrix3D
<
double
>
model
;
/*!
* \brief whether flip is enabled.
*/
bool
flip
;
/*!
* \brief the number of row in the data.
*/
size_t
n_row
;
/*!
* \brief the number of columns in the data.
*/
size_t
n_col
;
/*!
* \brief the number of classes in the model.
*/
size_t
n_class
;
/*!
* \brief the model length, its 2nd dimension.
*/
size_t
l_model
;
/*!
* \brief the number of variable categories in
* the data. This is also the model 3rd
* dimension.
* Read counts are quantitative values and
* have a number of categories equal to one
* whereas as DNA sequences are made of
* A,C,G,T (at least) and have 4 different
* categories.
*/
size_t
n_category
;
/*!
* \brief the number of shift states.
*/
size_t
n_shift
;
/*!
* \brief the number of flip states.
*/
size_t
n_flip
;
}
;
#endif
// DATALAYER_HPP
Event Timeline
Log In to Comment