Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F65087685
ReadLayer.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, May 31, 17:19
Size
10 KB
Mime Type
text/x-c
Expires
Sun, Jun 2, 17:19 (2 d)
Engine
blob
Format
Raw Data
Handle
18001950
Attached To
R8820 scATAC-seq
ReadLayer.cpp
View Options
#include <ReadLayer.hpp>
#include <stdexcept>
// std::invalid_argument
#include <limits>
// numeric_limits
#include <cmath>
// log(), exp(), pow()
#include <vector>
#include <Statistics.hpp>
// beta_pmf(), poisson_pmf()
#include <Random.hpp>
// rand_real_uniform(), rand_int_uniform()
#include <matrices.hpp>
ReadLayer
::
ReadLayer
(
const
matrix2d_i
&
data
,
size_t
n_class
,
size_t
n_shift
,
bool
flip
)
:
DataLayer
(
data
,
n_class
,
n_shift
,
flip
),
window_means
(
n_row
,
vector_d
(
n_shift
,
0.
))
{
this
->
n_category
=
1
;
// initialise the empty model
this
->
model
=
matrix3d_d
(
this
->
n_class
,
matrix2d_d
(
this
->
l_model
,
vector_d
(
this
->
n_category
,
0
)))
;
// compute window means
this
->
compute_window_means
()
;
}
ReadLayer
::
ReadLayer
(
const
matrix2d_i
&
data
,
const
matrix3d_d
&
model
,
bool
flip
)
:
DataLayer
(
data
,
model
,
flip
),
window_means
(
n_row
,
vector_d
(
n_shift
,
0.
))
{
// check that the model only has one category
if
(
this
->
n_category
>
1
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! model is expected to have length 1 on "
"3rd dimension, not %zu"
,
this
->
n_category
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
// compute window means
this
->
compute_window_means
()
;
}
ReadLayer
::~
ReadLayer
()
{}
void
ReadLayer
::
seed_model_randomly
()
{
// get random values from a beta distribution cannot be done using boost so
// i) generate random number [0,1] x
// ii) compute f(x) where f is beta distribution
matrix2d_d
prob
(
this
->
n_row
,
vector_d
(
this
->
n_class
,
0.
))
;
vector_d
prob_class
(
this
->
n_class
,
0.
)
;
double
tot_sum
=
0.
;
// sample the prob
// beta distribution parameters
double
alpha
=
pow
(
this
->
n_row
,
-
0.5
)
;
double
beta
=
1.
;
for
(
size_t
i
=
0
;
i
<
this
->
n_row
;
i
++
)
{
double
row_sum
=
0.
;
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
double
x
=
rand_real_uniform
(
0.
,
1.0
)
;
double
p
=
std
::
max
(
ReadLayer
::
p_min
,
beta_pmf
(
x
,
alpha
,
beta
))
;
prob
[
i
][
j
]
=
p
;
prob_class
[
j
]
+=
p
;
tot_sum
+=
p
;
row_sum
+=
p
;
}
// normalize
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
prob
[
i
][
j
]
/=
row_sum
;
}
}
// class prob
for
(
auto
&
p
:
prob_class
)
{
p
/=
tot_sum
;
}
// compute the refererences
for
(
size_t
i
=
0
;
i
<
this
->
n_row
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
for
(
size_t
j_ref
=
0
,
j_dat
=
this
->
n_shift
/
2
;
j_ref
<
this
->
l_model
;
j_ref
++
,
j_dat
++
)
{
this
->
model
[
j
][
j_ref
][
0
]
+=
(
this
->
data
[
i
][
j_dat
]
*
prob
[
i
][
j
])
;
}
}
}
// normalize
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
this
->
l_model
;
j
++
)
{
this
->
model
[
i
][
j
][
0
]
;
}
}
}
void
ReadLayer
::
seed_model_sampling
()
{
std
::
vector
<
bool
>
choosen
(
this
->
n_row
,
false
)
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
)
{
size_t
index
=
rand_int_uniform
(
size_t
(
0
),
size_t
(
this
->
n_row
-
1
))
;
// already choose
if
(
choosen
[
index
])
{
;
}
// not yet choosen as reference
else
{
for
(
size_t
j_ref
=
0
,
j_dat
=
this
->
n_shift
/
2
;
j_ref
<
this
->
l_model
;
j_ref
++
,
j_dat
++
)
{
this
->
model
[
i
][
j_ref
][
0
]
=
this
->
data
[
index
][
j_dat
]
;
}
choosen
[
index
]
=
true
;
i
++
;
}
}
}
void
ReadLayer
::
seed_model_toy
()
{
// sample data to initialise the references
std
::
vector
<
bool
>
choosen
(
this
->
n_row
,
false
)
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
)
{
size_t
index
=
i
;
// already choose
if
(
choosen
[
index
])
{
;
}
// not yet choosen as reference
else
{
for
(
size_t
j_ref
=
0
,
j_dat
=
this
->
n_shift
/
2
;
j_ref
<
this
->
l_model
;
j_ref
++
,
j_dat
++
)
{
this
->
model
[
i
][
j_ref
][
0
]
=
this
->
data
[
index
][
j_dat
]
;
}
choosen
[
index
]
=
true
;
i
++
;
}
}
}
void
ReadLayer
::
compute_loglikelihoods
(
matrix4d_d
&
loglikelihood
,
vector_d
&
loglikelihood_max
)
const
{
// dimension checks
this
->
check_loglikelihood_dim
(
loglikelihood
)
;
this
->
check_loglikelihood_max_dim
(
loglikelihood_max
)
;
// normalize the models
matrix3d_d
model_norm
=
this
->
model
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
i
++
)
{
double
mean
=
0.
;
for
(
size_t
j
=
0
;
j
<
this
->
l_model
;
j
++
)
{
mean
+=
model_norm
[
i
][
j
][
0
]
;
}
mean
/=
this
->
l_model
;
for
(
size_t
j
=
0
;
j
<
this
->
l_model
;
j
++
)
{
model_norm
[
i
][
j
][
0
]
/=
mean
;
}
}
// compute log likelihood
for
(
size_t
i
=
0
;
i
<
this
->
n_row
;
i
++
)
{
// set max to min possible value
loglikelihood_max
[
i
]
=
std
::
numeric_limits
<
double
>::
lowest
()
;
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
for
(
size_t
s_fw
=
0
,
s_rev
=
this
->
n_shift
-
1
;
s_fw
<
this
->
n_shift
;
s_fw
++
,
s_rev
--
)
{
// slice is [from_fw,to)
// from_dat_fw to_dat_fw [from_dat_fw, to_dat_fw]
// fw |---------->>>----------|
// ----------------------------------> data
// rev |----------<<<----------| [from_dat_rev, to_dat_rev]
// to_dat_rev can be -1 -> int
// to_dat_rev from_dat_rev
// log likelihood
double
ll_fw
=
0.
;
double
ll_rev
=
0.
;
// --------------- forward ---------------
size_t
from_dat_fw
=
s_fw
;
size_t
to_dat_fw
=
from_dat_fw
+
this
->
l_model
-
1
;
// --------------- reverse ---------------
size_t
from_dat_rev
=
this
->
n_col
-
1
-
s_fw
;
// size_t to_dat_rev = from_dat_rev - (this->l_model - 1) ;
for
(
size_t
j_dat_fw
=
from_dat_fw
,
j_ref_fw
=
0
,
j_dat_rev
=
from_dat_rev
;
j_dat_fw
<
to_dat_fw
;
j_dat_fw
++
,
j_ref_fw
++
,
j_dat_rev
--
)
{
double
ll
;
// --------------- forward ---------------
ll
=
log
(
poisson_pmf
(
this
->
data
[
i
][
j_dat_fw
],
model_norm
[
j
][
j_ref_fw
][
0
]
*
this
->
window_means
[
i
][
s_fw
]))
;
ll_fw
+=
std
::
max
(
ll
,
ReadLayer
::
p_min_log
)
;
// --------------- reverse ---------------
if
(
this
->
flip
)
{
ll
=
log
(
poisson_pmf
(
this
->
data
[
i
][
j_dat_rev
],
model_norm
[
j
][
j_ref_fw
][
0
]
*
this
->
window_means
[
i
][
s_rev
]))
;
ll_rev
+=
std
::
max
(
ll
,
ReadLayer
::
p_min_log
)
;
}
}
loglikelihood
[
i
][
j
][
from_dat_fw
][
flip_states
::
FORWARD
]
=
ll_fw
;
// keep track of the max per row
if
(
ll_fw
>
loglikelihood_max
[
i
])
{
loglikelihood_max
[
i
]
=
ll_fw
;
}
if
(
this
->
flip
)
{
loglikelihood
[
i
][
j
][
from_dat_fw
][
flip_states
::
REVERSE
]
=
ll_rev
;
// keep track of the max per row
if
(
ll_rev
>
loglikelihood_max
[
i
])
{
loglikelihood_max
[
i
]
=
ll_rev
;
}
}
}
}
}
}
void
ReadLayer
::
update_model
(
const
matrix4d_d
&
posterior_prob
)
{
// computing sum over the columns (classes)
size_t
n_row
=
posterior_prob
.
size
()
;
size_t
n_class
=
posterior_prob
[
0
].
size
()
;
size_t
n_shift
=
posterior_prob
[
0
][
0
].
size
()
;
size_t
n_flip
=
posterior_prob
[
0
][
0
][
0
].
size
()
;
vector_d
colsum
(
n_class
,
0.
)
;
for
(
size_t
i
=
0
;
i
<
n_row
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
n_class
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
n_shift
;
k
++
)
{
for
(
size_t
l
=
0
;
l
<
n_flip
;
l
++
)
{
colsum
[
j
]
+=
posterior_prob
[
i
][
j
][
k
][
l
]
;
}
}
}
}
this
->
update_model
(
posterior_prob
,
colsum
)
;
}
void
ReadLayer
::
update_model
(
const
matrix4d_d
&
posterior_prob
,
const
vector_d
&
posterior_prob_colsum
)
{
// dimension checks
this
->
check_posterior_prob_dim
(
posterior_prob
)
;
this
->
check_posterior_prob_colsum_dim
(
posterior_prob_colsum
)
;
// update models
// reset the parameters
this
->
model
=
matrix3d_d
(
this
->
n_class
,
matrix2d_d
(
this
->
l_model
,
vector_d
(
this
->
n_category
,
0.
)))
;
for
(
size_t
n_class
=
0
;
n_class
<
this
->
n_class
;
n_class
++
)
{
for
(
size_t
i
=
0
;
i
<
this
->
n_row
;
i
++
)
{
for
(
size_t
n_shift
=
0
;
n_shift
<
this
->
n_shift
;
n_shift
++
)
{
// --------------- forward ---------------
int
from_dat_fw
=
n_shift
;
int
to_dat_fw
=
from_dat_fw
+
this
->
l_model
-
1
;
for
(
int
j_dat_fw
=
from_dat_fw
,
j_ref_fw
=
0
;
j_dat_fw
<=
to_dat_fw
;
j_dat_fw
++
,
j_ref_fw
++
)
{
this
->
model
[
n_class
][
j_ref_fw
][
0
]
+=
(
posterior_prob
[
i
][
n_class
][
n_shift
][
flip_states
::
FORWARD
]
*
this
->
data
[
i
][
j_dat_fw
])
/
posterior_prob_colsum
[
n_class
]
;
}
// --------------- reverse ---------------
if
(
this
->
flip
)
{
int
from_dat_rev
=
this
->
n_col
-
1
-
n_shift
;
int
to_dat_rev
=
from_dat_rev
-
(
this
->
l_model
-
1
)
;
for
(
int
j_dat_rev
=
from_dat_rev
,
j_ref_fw
=
0
;
j_dat_rev
>=
to_dat_rev
;
j_dat_rev
--
,
j_ref_fw
++
)
{
this
->
model
[
n_class
][
j_ref_fw
][
0
]
+=
(
posterior_prob
[
i
][
n_class
][
n_shift
][
flip_states
::
REVERSE
]
*
this
->
data
[
i
][
j_dat_rev
])
/
posterior_prob_colsum
[
n_class
]
;
}
}
}
}
}
}
void
ReadLayer
::
compute_window_means
()
{
double
l_window
=
double
(
this
->
l_model
)
;
for
(
size_t
i
=
0
;
i
<
this
->
n_row
;
i
++
)
{
for
(
size_t
from
=
0
;
from
<
this
->
n_shift
;
from
++
)
{
double
sum
=
0.
;
// slice is [from,to)
size_t
to
=
from
+
this
->
l_model
;
for
(
size_t
j
=
from
;
j
<
to
;
j
++
)
{
sum
+=
this
->
data
[
i
][
j
]
;}
this
->
window_means
[
i
][
from
]
=
sum
/
l_window
;
}
}
}
void
ReadLayer
::
check_posterior_prob_colsum_dim
(
const
vector_d
&
posterior_prob_colsum
)
const
{
if
(
posterior_prob_colsum
.
size
()
!=
this
->
n_class
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! posterior_class_prob matrix size is not "
"equal to model class number : %zu / %zu"
,
posterior_prob_colsum
.
size
(),
this
->
n_class
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
}
Event Timeline
Log In to Comment