Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60513918
ReadLayer.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Apr 30, 18:54
Size
20 KB
Mime Type
text/x-c
Expires
Thu, May 2, 18:54 (2 d)
Engine
blob
Format
Raw Data
Handle
17365340
Attached To
R8820 scATAC-seq
ReadLayer.cpp
View Options
#include <ReadLayer.hpp>
#include <stdexcept>
// std::invalid_argument
#include <limits>
// numeric_limits
#include <cmath>
// log(), exp(), pow()
#include <vector>
#include <future>
// std::promise, std::future
#include <utility>
// std::pair, std::move()
#include <functional>
// std::bind(), std::ref()
#include <Statistics.hpp>
// beta_pmf(), poisson_pmf()
#include <Random.hpp>
// rand_real_uniform(), rand_int_uniform()
#include <matrices.hpp>
#include <ThreadPool.hpp>
ReadLayer
::
ReadLayer
(
const
matrix2d_i
&
data
,
size_t
n_class
,
size_t
n_shift
,
bool
flip
,
ThreadPool
*
threads
)
:
DataLayer
(
data
,
n_class
,
n_shift
,
flip
),
window_means
(
n_row
,
vector_d
(
n_shift
,
0.
))
{
this
->
n_category
=
1
;
// initialise the empty model
this
->
model
=
matrix3d_d
(
this
->
n_class
,
matrix2d_d
(
this
->
l_model
,
vector_d
(
this
->
n_category
,
0
)))
;
// compute window means
this
->
compute_window_means
(
threads
)
;
}
ReadLayer
::
ReadLayer
(
const
matrix2d_i
&
data
,
const
matrix3d_d
&
model
,
bool
flip
,
ThreadPool
*
threads
)
:
DataLayer
(
data
,
model
,
flip
),
window_means
(
n_row
,
vector_d
(
n_shift
,
0.
))
{
// check that the model only has one category
if
(
this
->
n_category
>
1
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! model is expected to have length 1 on "
"3rd dimension, not %zu"
,
this
->
n_category
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
// compute window means
this
->
compute_window_means
(
threads
)
;
}
ReadLayer
::~
ReadLayer
()
{}
void
ReadLayer
::
seed_model_randomly
()
{
// get random values from a beta distribution cannot be done using boost so
// i) generate random number [0,1] x
// ii) compute f(x) where f is beta distribution
matrix2d_d
prob
(
this
->
n_row
,
vector_d
(
this
->
n_class
,
0.
))
;
double
tot_sum
=
0.
;
// sample the prob
// beta distribution parameters
double
alpha
=
pow
(
this
->
n_row
,
-
0.5
)
;
double
beta
=
1.
;
for
(
size_t
i
=
0
;
i
<
this
->
n_row
;
i
++
)
{
double
row_sum
=
0.
;
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
double
x
=
rand_real_uniform
(
0.
,
1.0
)
;
double
p
=
std
::
max
(
ReadLayer
::
p_min
,
beta_pmf
(
x
,
alpha
,
beta
))
;
prob
[
i
][
j
]
=
p
;
tot_sum
+=
p
;
row_sum
+=
p
;
}
// normalize
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
prob
[
i
][
j
]
/=
row_sum
;
}
}
// compute the refererences
for
(
size_t
i
=
0
;
i
<
this
->
n_row
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
for
(
size_t
j_ref
=
0
,
j_dat
=
this
->
n_shift
/
2
;
j_ref
<
this
->
l_model
;
j_ref
++
,
j_dat
++
)
{
this
->
model
[
j
][
j_ref
][
0
]
+=
(
this
->
data
[
i
][
j_dat
]
*
prob
[
i
][
j
])
;
}
}
}
}
void
ReadLayer
::
seed_model_sampling
()
{
std
::
vector
<
bool
>
choosen
(
this
->
n_row
,
false
)
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
)
{
size_t
index
=
rand_int_uniform
(
size_t
(
0
),
size_t
(
this
->
n_row
-
1
))
;
// already choose
if
(
choosen
[
index
])
{
;
}
// not yet choosen as reference
else
{
for
(
size_t
j_ref
=
0
,
j_dat
=
this
->
n_shift
/
2
;
j_ref
<
this
->
l_model
;
j_ref
++
,
j_dat
++
)
{
this
->
model
[
i
][
j_ref
][
0
]
=
this
->
data
[
index
][
j_dat
]
;
}
choosen
[
index
]
=
true
;
i
++
;
}
}
}
void
ReadLayer
::
seed_model_toy
()
{
// sample data to initialise the references
std
::
vector
<
bool
>
choosen
(
this
->
n_row
,
false
)
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
)
{
size_t
index
=
i
;
// already choose
if
(
choosen
[
index
])
{
;
}
// not yet choosen as reference
else
{
for
(
size_t
j_ref
=
0
,
j_dat
=
this
->
n_shift
/
2
;
j_ref
<
this
->
l_model
;
j_ref
++
,
j_dat
++
)
{
this
->
model
[
i
][
j_ref
][
0
]
=
this
->
data
[
index
][
j_dat
]
;
}
choosen
[
index
]
=
true
;
i
++
;
}
}
}
void
ReadLayer
::
compute_loglikelihoods
(
matrix4d_d
&
loglikelihood
,
vector_d
&
loglikelihood_max
,
ThreadPool
*
threads
)
const
{
// dimension checks
this
->
check_loglikelihood_dim
(
loglikelihood
)
;
this
->
check_loglikelihood_max_dim
(
loglikelihood_max
)
;
// don't parallelize
if
(
threads
==
nullptr
)
{
std
::
promise
<
bool
>
promise
;
std
::
future
<
bool
>
future
=
promise
.
get_future
()
;
this
->
compute_loglikelihoods_routine
(
0
,
this
->
n_row
,
std
::
ref
(
loglikelihood
),
std
::
ref
(
loglikelihood_max
),
promise
)
;
future
.
get
()
;
}
// parallelize
else
{
size_t
n_threads
=
threads
->
getNThread
()
;
// compute the slices on which each thread will work
std
::
vector
<
std
::
pair
<
size_t
,
size_t
>>
slices
=
ThreadPool
::
split_range
(
0
,
this
->
n_row
,
n_threads
)
;
// get promises and futures
// the function run by the threads will simply fill the promise with
// "true" to indicate that they are done
std
::
vector
<
std
::
promise
<
bool
>>
promises
(
n_threads
)
;
std
::
vector
<
std
::
future
<
bool
>>
futures
(
n_threads
)
;
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
futures
[
i
]
=
promises
[
i
].
get_future
()
;
}
// distribute work to threads
// -------------------------- threads start --------------------------
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
auto
slice
=
slices
[
i
]
;
threads
->
addJob
(
std
::
move
(
std
::
bind
(
&
ReadLayer
::
compute_loglikelihoods_routine
,
this
,
slice
.
first
,
slice
.
second
,
std
::
ref
(
loglikelihood
),
std
::
ref
(
loglikelihood_max
),
std
::
ref
(
promises
[
i
]))))
;
}
// wait until all threads are done working
for
(
auto
&
future
:
futures
)
{
future
.
get
()
;
}
// -------------------------- threads stop ---------------------------
}
}
void
ReadLayer
::
compute_loglikelihoods_routine
(
size_t
from
,
size_t
to
,
matrix4d_d
&
loglikelihood
,
vector_d
&
loglikelihood_max
,
std
::
promise
<
bool
>&
done
)
const
{
// normalize the models
matrix3d_d
model_norm
=
this
->
model
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
i
++
)
{
double
mean
=
0.
;
for
(
size_t
j
=
0
;
j
<
this
->
l_model
;
j
++
)
{
mean
+=
model_norm
[
i
][
j
][
0
]
;
}
mean
/=
this
->
l_model
;
for
(
size_t
j
=
0
;
j
<
this
->
l_model
;
j
++
)
{
model_norm
[
i
][
j
][
0
]
/=
mean
;
}
}
// compute log likelihood
for
(
size_t
i
=
from
;
i
<
to
;
i
++
)
{
// set max to min possible value
loglikelihood_max
[
i
]
=
std
::
numeric_limits
<
double
>::
lowest
()
;
for
(
size_t
j
=
0
;
j
<
this
->
n_class
;
j
++
)
{
for
(
size_t
s_fw
=
0
,
s_rev
=
this
->
n_shift
-
1
;
s_fw
<
this
->
n_shift
;
s_fw
++
,
s_rev
--
)
{
// slice is [from_fw,to)
// from_dat_fw to_dat_fw [from_dat_fw, to_dat_fw]
// fw |---------->>>----------|
// ----------------------------------> data
// rev |----------<<<----------| [from_dat_rev, to_dat_rev]
// to_dat_rev can be -1 -> int
// to_dat_rev from_dat_rev
// log likelihood
double
ll_fw
=
0.
;
double
ll_rev
=
0.
;
// --------------- forward ---------------
size_t
from_dat_fw
=
s_fw
;
size_t
to_dat_fw
=
from_dat_fw
+
this
->
l_model
-
1
;
// --------------- reverse ---------------
size_t
from_dat_rev
=
this
->
n_col
-
1
-
s_fw
;
// size_t to_dat_rev = from_dat_rev - (this->l_model - 1) ;
for
(
size_t
j_dat_fw
=
from_dat_fw
,
j_ref_fw
=
0
,
j_dat_rev
=
from_dat_rev
;
j_dat_fw
<
to_dat_fw
;
j_dat_fw
++
,
j_ref_fw
++
,
j_dat_rev
--
)
{
double
ll
;
// --------------- forward ---------------
ll
=
log
(
poisson_pmf
(
this
->
data
[
i
][
j_dat_fw
],
model_norm
[
j
][
j_ref_fw
][
0
]
*
this
->
window_means
[
i
][
s_fw
]))
;
ll_fw
+=
std
::
max
(
ll
,
ReadLayer
::
p_min_log
)
;
// --------------- reverse ---------------
if
(
this
->
flip
)
{
ll
=
log
(
poisson_pmf
(
this
->
data
[
i
][
j_dat_rev
],
model_norm
[
j
][
j_ref_fw
][
0
]
*
this
->
window_means
[
i
][
s_rev
]))
;
ll_rev
+=
std
::
max
(
ll
,
ReadLayer
::
p_min_log
)
;
}
}
loglikelihood
[
i
][
j
][
from_dat_fw
][
flip_states
::
FORWARD
]
=
ll_fw
;
// keep track of the max per row
if
(
ll_fw
>
loglikelihood_max
[
i
])
{
loglikelihood_max
[
i
]
=
ll_fw
;
}
if
(
this
->
flip
)
{
loglikelihood
[
i
][
j
][
from_dat_fw
][
flip_states
::
REVERSE
]
=
ll_rev
;
// keep track of the max per row
if
(
ll_rev
>
loglikelihood_max
[
i
])
{
loglikelihood_max
[
i
]
=
ll_rev
;
}
}
}
}
}
done
.
set_value
(
true
)
;
}
void
ReadLayer
::
update_model
(
const
matrix4d_d
&
posterior_prob
,
ThreadPool
*
threads
)
{
// computing sum over the columns (classes)
size_t
n_row
=
posterior_prob
.
size
()
;
size_t
n_class
=
posterior_prob
[
0
].
size
()
;
size_t
n_shift
=
posterior_prob
[
0
][
0
].
size
()
;
size_t
n_flip
=
posterior_prob
[
0
][
0
][
0
].
size
()
;
vector_d
colsum
(
n_class
,
0.
)
;
for
(
size_t
i
=
0
;
i
<
n_row
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
n_class
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
n_shift
;
k
++
)
{
for
(
size_t
l
=
0
;
l
<
n_flip
;
l
++
)
{
colsum
[
j
]
+=
posterior_prob
[
i
][
j
][
k
][
l
]
;
}
}
}
}
// don't parallelize
if
(
threads
==
nullptr
)
{
std
::
promise
<
matrix3d_d
>
promise
;
std
::
future
<
matrix3d_d
>
future
=
promise
.
get_future
()
;
this
->
update_model_routine
(
0
,
this
->
n_row
,
posterior_prob
,
colsum
,
promise
)
;
this
->
model
=
future
.
get
()
;
}
// parallelize
else
{
size_t
n_threads
=
threads
->
getNThread
()
;
// compute the slices on which each thread will work
std
::
vector
<
std
::
pair
<
size_t
,
size_t
>>
slices
=
ThreadPool
::
split_range
(
0
,
this
->
n_row
,
n_threads
)
;
// get promises and futures
// the function run by the threads will simply fill the promise with
// "true" to indicate that they are done
std
::
vector
<
std
::
promise
<
matrix3d_d
>>
promises
(
n_threads
)
;
std
::
vector
<
std
::
future
<
matrix3d_d
>>
futures
(
n_threads
)
;
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
futures
[
i
]
=
promises
[
i
].
get_future
()
;
}
// distribute work to threads
// -------------------------- threads start --------------------------
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
auto
slice
=
slices
[
i
]
;
threads
->
addJob
(
std
::
move
(
std
::
bind
(
&
ReadLayer
::
update_model_routine
,
this
,
slice
.
first
,
slice
.
second
,
posterior_prob
,
colsum
,
std
::
ref
(
promises
[
i
]))))
;
}
// reinitialise the model
this
->
model
=
matrix3d_d
(
this
->
n_class
,
matrix2d_d
(
this
->
l_model
,
vector_d
(
this
->
n_category
,
0
)))
;
// wait until all threads are done working
// and update the mode
for
(
auto
&
future
:
futures
)
{
matrix3d_d
model_part
=
future
.
get
()
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
this
->
l_model
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
this
->
n_category
;
k
++
)
{
this
->
model
[
i
][
j
][
k
]
+=
model_part
[
i
][
j
][
k
]
;
}
}
}
}
// -------------------------- threads stop ---------------------------
}
}
void
ReadLayer
::
update_model
(
const
matrix4d_d
&
posterior_prob
,
const
vector_d
&
posterior_prob_colsum
,
ThreadPool
*
threads
)
{
// don't parallelize
if
(
threads
==
nullptr
)
{
std
::
promise
<
matrix3d_d
>
promise
;
std
::
future
<
matrix3d_d
>
future
=
promise
.
get_future
()
;
this
->
update_model_routine
(
0
,
this
->
n_row
,
posterior_prob
,
posterior_prob_colsum
,
promise
)
;
this
->
model
=
future
.
get
()
;
}
// parallelize
else
{
size_t
n_threads
=
threads
->
getNThread
()
;
// compute the slices on which each thread will work
std
::
vector
<
std
::
pair
<
size_t
,
size_t
>>
slices
=
ThreadPool
::
split_range
(
0
,
this
->
n_row
,
n_threads
)
;
// get promises and futures
// the function run by the threads will simply fill the promise with
// "true" to indicate that they are done
std
::
vector
<
std
::
promise
<
matrix3d_d
>>
promises
(
n_threads
)
;
std
::
vector
<
std
::
future
<
matrix3d_d
>>
futures
(
n_threads
)
;
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
futures
[
i
]
=
promises
[
i
].
get_future
()
;
}
// distribute work to threads
// -------------------------- threads start --------------------------
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
auto
slice
=
slices
[
i
]
;
threads
->
addJob
(
std
::
move
(
std
::
bind
(
&
ReadLayer
::
update_model_routine
,
this
,
slice
.
first
,
slice
.
second
,
posterior_prob
,
posterior_prob_colsum
,
std
::
ref
(
promises
[
i
]))))
;
}
// reinitialise the model
this
->
model
=
matrix3d_d
(
this
->
n_class
,
matrix2d_d
(
this
->
l_model
,
vector_d
(
this
->
n_category
,
0
)))
;
// wait until all threads are done working
// and update the mode
for
(
auto
&
future
:
futures
)
{
matrix3d_d
model_part
=
future
.
get
()
;
for
(
size_t
i
=
0
;
i
<
this
->
n_class
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
this
->
l_model
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
this
->
n_category
;
k
++
)
{
this
->
model
[
i
][
j
][
k
]
+=
model_part
[
i
][
j
][
k
]
;
}
}
}
}
// -------------------------- threads stop ---------------------------
}
}
void
ReadLayer
::
update_model_routine
(
size_t
from
,
size_t
to
,
const
matrix4d_d
&
posterior_prob
,
const
vector_d
&
posterior_prob_colsum
,
std
::
promise
<
matrix3d_d
>&
promise
)
const
{
// dimension checks
this
->
check_posterior_prob_dim
(
posterior_prob
)
;
this
->
check_posterior_prob_colsum_dim
(
posterior_prob_colsum
)
;
// partial model
matrix3d_d
model
=
matrix3d_d
(
this
->
n_class
,
matrix2d_d
(
this
->
l_model
,
vector_d
(
this
->
n_category
,
0.
)))
;
for
(
size_t
n_class
=
0
;
n_class
<
this
->
n_class
;
n_class
++
)
{
for
(
size_t
i
=
from
;
i
<
to
;
i
++
)
{
for
(
size_t
n_shift
=
0
;
n_shift
<
this
->
n_shift
;
n_shift
++
)
{
// --------------- forward ---------------
int
from_dat_fw
=
n_shift
;
int
to_dat_fw
=
from_dat_fw
+
this
->
l_model
-
1
;
for
(
int
j_dat_fw
=
from_dat_fw
,
j_ref_fw
=
0
;
j_dat_fw
<=
to_dat_fw
;
j_dat_fw
++
,
j_ref_fw
++
)
{
model
[
n_class
][
j_ref_fw
][
0
]
+=
(
posterior_prob
[
i
][
n_class
][
n_shift
][
flip_states
::
FORWARD
]
*
this
->
data
[
i
][
j_dat_fw
])
/
posterior_prob_colsum
[
n_class
]
;
}
// --------------- reverse ---------------
if
(
this
->
flip
)
{
int
from_dat_rev
=
this
->
n_col
-
1
-
n_shift
;
int
to_dat_rev
=
from_dat_rev
-
(
this
->
l_model
-
1
)
;
for
(
int
j_dat_rev
=
from_dat_rev
,
j_ref_fw
=
0
;
j_dat_rev
>=
to_dat_rev
;
j_dat_rev
--
,
j_ref_fw
++
)
{
model
[
n_class
][
j_ref_fw
][
0
]
+=
(
posterior_prob
[
i
][
n_class
][
n_shift
][
flip_states
::
REVERSE
]
*
this
->
data
[
i
][
j_dat_rev
])
/
posterior_prob_colsum
[
n_class
]
;
}
}
}
}
}
promise
.
set_value
(
model
)
;
}
void
ReadLayer
::
compute_window_means
(
ThreadPool
*
threads
)
{
// don't parallelize
if
(
threads
==
nullptr
)
{
std
::
promise
<
bool
>
promise
;
std
::
future
<
bool
>
future
=
promise
.
get_future
()
;
this
->
compute_window_means_routine
(
0
,
this
->
n_row
,
promise
)
;
future
.
get
()
;
}
// parallelize
else
{
size_t
n_threads
=
threads
->
getNThread
()
;
// compute the slices on which each thread will work
std
::
vector
<
std
::
pair
<
size_t
,
size_t
>>
slices
=
ThreadPool
::
split_range
(
0
,
this
->
n_row
,
n_threads
)
;
// get promises and futures
// the function run by the threads will simply fill the promise with
// "true" to indicate that they are done
std
::
vector
<
std
::
promise
<
bool
>>
promises
(
n_threads
)
;
std
::
vector
<
std
::
future
<
bool
>>
futures
(
n_threads
)
;
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
futures
[
i
]
=
promises
[
i
].
get_future
()
;
}
// distribute work to threads
// -------------------------- threads start --------------------------
for
(
size_t
i
=
0
;
i
<
n_threads
;
i
++
)
{
auto
slice
=
slices
[
i
]
;
threads
->
addJob
(
std
::
move
(
std
::
bind
(
&
ReadLayer
::
compute_window_means_routine
,
this
,
slice
.
first
,
slice
.
second
,
std
::
ref
(
promises
[
i
]))))
;
}
// wait until all threads are done working
for
(
auto
&
future
:
futures
)
{
future
.
get
()
;
}
// -------------------------- threads stop ---------------------------
}
}
void
ReadLayer
::
compute_window_means_routine
(
size_t
from
,
size_t
to
,
std
::
promise
<
bool
>&
done
)
{
double
l_window
=
double
(
this
->
l_model
)
;
for
(
size_t
i
=
from
;
i
<
to
;
i
++
)
{
for
(
size_t
from
=
0
;
from
<
this
->
n_shift
;
from
++
)
{
double
sum
=
0.
;
// slice is [from,to)
size_t
to
=
from
+
this
->
l_model
;
for
(
size_t
j
=
from
;
j
<
to
;
j
++
)
{
sum
+=
this
->
data
[
i
][
j
]
;}
this
->
window_means
[
i
][
from
]
=
sum
/
l_window
;
}
}
done
.
set_value
(
true
)
;
}
void
ReadLayer
::
check_posterior_prob_colsum_dim
(
const
vector_d
&
posterior_prob_colsum
)
const
{
if
(
posterior_prob_colsum
.
size
()
!=
this
->
n_class
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! posterior_class_prob matrix size is not "
"equal to model class number : %zu / %zu"
,
posterior_prob_colsum
.
size
(),
this
->
n_class
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
}
Event Timeline
Log In to Comment