Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F75829294
Statistics.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Aug 4, 13:25
Size
9 KB
Mime Type
text/x-c
Expires
Tue, Aug 6, 13:25 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
19614865
Attached To
R8820 scATAC-seq
Statistics.hpp
View Options
#ifndef STATISTCS_HPP
#define STATISTICS_HPP
#include <vector>
#include <numeric>
#include <cmath>
// pow(), sqrt()
#include <assert.h>
#include <iostream>
/*!
* \brief Computes the density of x given a gaussian
* distribution with the given mean and standard
* deviation parameters.
* \param x the value of interest.
* \param mean the gaussian mean parameter value.
* \param sd the gaussian standard deviation value.
* \return the probability of x p(x|N(mean,sd))
*/
double
normal_pmf
(
double
x
,
double
mean
,
double
sd
)
;
/*!
* \brief Computes the probability of x given a Poisson
* distribution with the given lambda parameter.
* \param x the value of interest.
* \param lambda the lambda parameter.
* \return the probability of the value given the
* distribution.
*/
double
poisson_pmf
(
int
x
,
double
lambda
)
;
/*!
* \brief Computes the probability of x given a Beta
* distribution with the given alpha and beta parameters.
* \param x the value of interest
* \param alpha the alpha parameter.
* \param beta the beta parameter.
* \return the probability of the value given the
* distribution.
*/
double
beta_pmf
(
double
x
,
double
alpha
,
double
beta
)
;
/*!
* \brief Computes the weighted mean of a vector of measures <x> given their
* probability <p>. The sum of <p> is expected to be 1, if not it will
* be normalized.
* \param x a vector of measures.
* \param p a vector of probability associated to each element of <x>.
* \param from the starting index, if different from -1.
* \param to the ending index (not included), if different from -1.
* \return the mean.
*/
template
<
class
T
>
inline
double
mean
(
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
double
>&
p
)
;
/*!
* \brief Computes the standard deviation of a vector of measures <x> given
* their probability <p>.
* \param x a vector of measures.
* \param p a vector of probability associated to each element of <x>.
* \param unbiased whether the unbiased standard deviation should be returned
* (the square root of the variance is divided by a normalizing factor).
* \return the standard deviation.
*/
template
<
class
T
>
inline
double
sd
(
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
double
>&
p
,
bool
unbiased
=
true
)
;
/*! \brief Computes the unbiased standard deviation of a vector of
* measures <x> given their probability <p>.
* \param x a vector of measures.
* \param p a vector of probability associated to each element of <x>.
* \return the unbiased standard deviation.
*/
template
<
class
T
>
inline
double
sd_unbiased
(
const
std
::
vector
<
T
>
&
x
,
const
std
::
vector
<
double
>
&
p
)
;
/*! \brief Computes the biased (non-normalized) standard deviation of a vector of
* measures <x> given their probability <p>.
* \param x a vector of measures.
* \param p a vector of probability associated to each element of <x>.
* \return the biased standard deviation.
*/
template
<
class
T
>
inline
double
sd_biased
(
const
std
::
vector
<
T
>
&
x
,
const
std
::
vector
<
double
>
&
p
)
;
/*!
* \brief Computes the pearson correlation coefficient for two given vectors
* using v1[from1-->to1) and v2[from2-->to2). If from1, to1, from2, to2 are equal
* to -1, they are not accounted and the first and last elements considered are
* the beginning and the end of the vectors.
* \param v1 the first vector.
* \param v2 the second vector.
* \param from1 the index of the first value to use in the first vector.
* \param to1 the index after the last value to use in the fist vector.
* \param from2 the index of the first value to use in the second vector.
* \param to2 the index after the last value to use in the second vector.
* \return the correlation coefficient
*/
template
<
class
T
>
inline
double
cor_pearson
(
const
std
::
vector
<
T
>&
v1
,
const
std
::
vector
<
T
>&
v2
,
int
from1
=-
1
,
int
to1
=-
1
,
int
from2
=-
1
,
int
to2
=-
1
)
;
/*!
* \brief Computes the pearson correlation coefficient for two given vectors
* using v1[from1-->to1) and v2(to2<--from2] (the second vector is read backward,
* from the end).
* If from1, to1, from2, to2 are equal to -1, they are not accounted and the first
* and last elements considered are the beginning and the end of the vectors (for
* the last element considered in v2 to be the 1st of the vector (the 0th), to2
* should be set to -1, which is ignored. However this enables the default behaviour
* using the 0th element as the last one).
* \param v1 the first vector.
* \param v2 the second vector.
* \param from1 the index of the first value to use in the first vector.
* \param to1 the index after the last value to use in the fist vector.
* \param from2 the index of the first value to use in the second vector.
* \param to2 the index after the last value to use in the second vector.
* \return the correlation coefficient
*/
template
<
class
T
>
inline
double
cor_pearson_rev
(
const
std
::
vector
<
T
>&
v1
,
const
std
::
vector
<
T
>&
v2
,
int
from1
=-
1
,
int
to1
=-
1
,
int
from2
=-
1
,
int
to2
=-
1
)
;
template
<
class
T
>
inline
double
mean
(
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
double
>&
p
)
{
assert
(
x
.
size
()
==
p
.
size
())
;
double
mean
=
0.
;
double
total
=
0.
;
for
(
auto
i
:
p
)
{
total
+=
i
;
}
for
(
size_t
i
=
0
;
i
<
x
.
size
();
i
++
)
{
mean
+=
x
[
i
]
*
(
p
[
i
]
/
total
)
;
}
return
mean
;
}
template
<
class
T
>
inline
double
sd
(
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
double
>&
p
,
bool
unbiased
)
{
if
(
unbiased
)
{
return
sd_unbiased
(
x
,
p
)
;
}
else
{
return
sd_biased
(
x
,
p
)
;
}
}
template
<
class
T
>
inline
double
sd_unbiased
(
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
double
>&
p
)
{
assert
(
x
.
size
()
==
p
.
size
())
;
double
v1
=
0.
;
double
v2
=
0.
;
double
m
=
mean
(
x
,
p
)
;
double
sd
=
0.
;
double
total
=
0.
;
for
(
auto
i
:
p
)
{
total
+=
i
;
}
for
(
size_t
i
=
0
;
i
<
x
.
size
();
i
++
)
{
double
p_norm
=
p
[
i
]
/
total
;
sd
+=
pow
(
x
[
i
]
-
m
,
2
)
*
(
p_norm
)
;
v1
+=
p_norm
;
v2
+=
pow
(
p_norm
,
2
)
;
}
return
sqrt
(
sd
/
(
v1
-
(
v2
/
v1
)))
;
}
template
<
class
T
>
inline
double
sd_biased
(
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
double
>&
p
)
{
assert
(
x
.
size
()
==
p
.
size
())
;
double
m
=
mean
(
x
,
p
)
;
double
sd
=
0.
;
double
total
=
0.
;
for
(
auto
i
:
p
)
{
total
+=
i
;
}
for
(
size_t
i
=
0
;
i
<
x
.
size
();
i
++
)
{
double
p_norm
=
p
[
i
]
/
total
;
sd
+=
pow
(
x
[
i
]
-
m
,
2
)
*
(
p_norm
)
;
}
return
sqrt
(
sd
)
;
}
template
<
class
T
>
inline
double
cor_pearson
(
const
std
::
vector
<
T
>&
v1
,
const
std
::
vector
<
T
>&
v2
,
int
from1
,
int
to1
,
int
from2
,
int
to2
)
{
if
(
from1
==
-
1
)
{
from1
=
0
;
}
if
(
to1
==
-
1
)
{
to1
=
v1
.
size
()
;
}
if
(
from2
==
-
1
)
{
from2
=
0
;
}
if
(
to2
==
-
1
)
{
to2
=
v2
.
size
()
;
}
// 0 <= from < to < v.size()
// if from == to -> no sense, no value : return nan
// if from > to -> no sense, backward : does a segfault
assert
(
from1
>=
0
)
;
assert
(
from1
<
to1
)
;
assert
(
to1
<=
v1
.
size
())
;
assert
(
from2
>=
0
)
;
assert
(
from2
<
to2
)
;
assert
(
to2
<=
v2
.
size
())
;
// the terms of the formula
double
sum_v1v2
=
0.
;
double
sum_v1
=
0.
;
double
sum_v2
=
0.
;
double
sum_v1_2
=
0.
;
double
sum_v2_2
=
0.
;
// the effective number of values considered
double
n
=
to1
-
from1
;
for
(
size_t
i1
=
from1
,
i2
=
from2
;
i1
<
to1
;
i1
++
,
i2
++
)
{
sum_v1v2
+=
v1
[
i1
]
*
v2
[
i2
]
;
sum_v1
+=
v1
[
i1
]
;
sum_v2
+=
v2
[
i2
]
;
sum_v1_2
+=
pow
(
v1
[
i1
],
2
)
;
sum_v2_2
+=
pow
(
v2
[
i2
],
2
)
;
}
return
(
n
*
sum_v1v2
-
(
sum_v1
*
sum_v2
))
/
(
sqrt
(
n
*
sum_v1_2
-
pow
(
sum_v1
,
2
))
*
sqrt
(
n
*
sum_v2_2
-
pow
(
sum_v2
,
2
))
)
;
}
template
<
class
T
>
inline
double
cor_pearson_rev
(
const
std
::
vector
<
T
>&
v1
,
const
std
::
vector
<
T
>&
v2
,
int
from1
,
int
to1
,
int
from2
,
int
to2
)
{
if
(
from1
==
-
1
)
{
from1
=
0
;
}
if
(
to1
==
-
1
)
{
to1
=
v1
.
size
()
;
}
if
(
from2
==
-
1
)
{
from2
=
v2
.
size
()
-
1
;
}
if
(
to2
==
-
1
)
{
to2
=
0
;
}
// beware -> for v1 [from1, to1)
// for v2 [to2, from2]
// use from1 and to1 for the loop conditions
// 0 <= from < to < v1.size()
// 0 <= to2 < from2 < v2.size() because for v2 [to2, from2]
// if from == to -> no sense, no value : return nan
// if from > to -> no sense, backward : does a segfault
assert
(
from1
>=
0
)
;
assert
(
from1
<
to1
)
;
assert
(
to1
<=
v1
.
size
())
;
assert
(
to2
>=
0
)
;
assert
(
to2
<
from2
)
;
assert
(
from2
<
v2
.
size
())
;
// the terms of the formula
double
sum_v1v2
=
0.
;
double
sum_v1
=
0.
;
double
sum_v2
=
0.
;
double
sum_v1_2
=
0.
;
double
sum_v2_2
=
0.
;
// the effective number of values considered
double
n
=
to1
-
from1
;
for
(
int
i1
=
from1
,
i2
=
from2
;
i1
<
to1
;
i1
++
,
i2
--
)
{
sum_v1v2
+=
v1
[
i1
]
*
v2
[
i2
]
;
sum_v1
+=
v1
[
i1
]
;
sum_v2
+=
v2
[
i2
]
;
sum_v1_2
+=
pow
(
v1
[
i1
],
2
)
;
sum_v2_2
+=
pow
(
v2
[
i2
],
2
)
;
}
return
(
n
*
sum_v1v2
-
(
sum_v1
*
sum_v2
))
/
(
sqrt
(
n
*
sum_v1_2
-
pow
(
sum_v1
,
2
))
*
sqrt
(
n
*
sum_v2_2
-
pow
(
sum_v2
,
2
))
)
;
}
#endif
// STATISTICS_HPP
Event Timeline
Log In to Comment