Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61893415
ClassReadDataCreator.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, May 9, 15:41
Size
6 KB
Mime Type
text/x-c
Expires
Sat, May 11, 15:41 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17573331
Attached To
R8820 scATAC-seq
ClassReadDataCreator.cpp
View Options
#include <ClassReadDataCreator.hpp>
#include <string>
#include <vector>
#include <stdexcept>
// std::invalid_argument
#include <Matrix2D.hpp>
#include <Matrix4D.hpp>
template
<
class
T
>
std
::
ostream
&
operator
<<
(
std
::
ostream
&
stream
,
const
std
::
vector
<
T
>&
v
)
{
for
(
const
auto
&
x:
v
)
{
stream
<<
x
<<
" "
;
}
return
stream
;
}
ClassReadDataCreator
::
ClassReadDataCreator
(
const
std
::
string
&
bed_file_path
,
const
std
::
string
&
bam_file_path
,
const
std
::
string
&
bai_file_path
,
const
std
::
string
&
prob_file_path
,
int
from
,
int
to
,
int
bin_size
,
size_t
class_k
,
CorrelationMatrixCreator
::
methods
method
)
:
bed_file_path
(
bed_file_path
),
bam_file_path
(
bam_file_path
),
bai_file_path
(
bai_file_path
),
prob_file_path
(
prob_file_path
),
from
(
from
),
to
(
to
),
bin_size
(
bin_size
),
class_k
(
class_k
-
1
),
// (1-based -> 0-based)
method
(
method
)
{
;
}
ClassReadDataCreator
::~
ClassReadDataCreator
()
{
;
}
Matrix2D
<
int
>
ClassReadDataCreator
::
create_matrix
()
{
// load posterior prob
Matrix4D
<
double
>
prob
;
prob
.
load
(
this
->
prob_file_path
)
;
size_t
n_row
=
prob
.
get_dim
()[
0
]
;
size_t
n_class
=
prob
.
get_dim
()[
1
]
;
size_t
n_shift
=
prob
.
get_dim
()[
2
]
;
size_t
n_flip
=
prob
.
get_dim
()[
3
]
;
bool
flip
=
n_flip
==
2
;
// compute class prob
std
::
vector
<
double
>
prob_colsum
(
n_class
,
0.
)
;
double
tot
=
0.
;
for
(
size_t
i
=
0
;
i
<
n_row
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
n_class
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
n_shift
;
k
++
)
{
for
(
size_t
l
=
0
;
l
<
n_flip
;
l
++
)
{
double
p
=
prob
(
i
,
j
,
k
,
l
)
;
prob_colsum
[
j
]
+=
p
;
tot
+=
p
;
}
}
}
}
for
(
auto
&
p
:
prob_colsum
)
{
p
/=
tot
;
}
// check class of interest
if
(
this
->
class_k
>
n_class
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! invalid class of interest (%zu and %zu found)"
,
this
->
class_k
,
n_class
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
// realigning the data using the same shift as the partition
// will "trim" the matrix
// -> create a wider matrix such that the realigned matrix will
// be of the desired wide (original from and to parameters).
int
ext
=
n_shift
-
1
;
int
ext_r
=
ext
/
2
;
int
ext_l
=
ext
-
ext_r
;
int
from2
=
this
->
from
-
ext_l
;
int
to2
=
this
->
to
+
ext_r
;
CorrelationMatrixCreator
mc
(
this
->
bed_file_path
,
this
->
bam_file_path
,
this
->
bai_file_path
,
from2
,
to2
,
this
->
bin_size
,
this
->
method
)
;
Matrix2D
<
int
>
data2
=
mc
.
create_matrix
()
;
size_t
n_col2
=
data2
.
get_ncol
()
;
// check the compatibility with the prob matrix
if
(
data2
.
get_nrow
()
!=
prob
.
get_dim
()[
0
])
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! the number of references in the BED file is not equal to the"
"1st dimension of the probability matrix (%zu, %zu)."
,
data2
.
get_nrow
(),
prob
.
get_dim
()[
0
])
;
throw
(
std
::
invalid_argument
(
msg
))
;
}
else
if
(
data2
.
get_nrow
()
<
prob
.
get_dim
()[
1
])
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! the number of references in the BED file is smaller than"
"the number of classes in the probability matrix (%zu, %zu)."
,
data2
.
get_nrow
(),
prob
.
get_dim
()[
1
])
;
throw
(
std
::
invalid_argument
(
msg
))
;
}
else
if
(
data2
.
get_ncol
()
<
prob
.
get_dim
()[
2
])
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! the data matrix has fewer columns (bins) than the"
"the shifting freedom in the probability matrix (%zu, %zu)."
,
data2
.
get_ncol
(),
prob
.
get_dim
()[
2
])
;
throw
(
std
::
invalid_argument
(
msg
))
;
}
// realign matrix
// size_t n_col1 = data2.get_ncol() ;
size_t
n_col3
=
this
->
to
-
this
->
from
+
1
;
Matrix2D
<
double
>
data3
(
n_row
,
n_col3
,
0.
)
;
for
(
size_t
i
=
0
;
i
<
n_row
;
i
++
)
{
for
(
size_t
s
=
0
;
s
<
n_shift
;
s
++
)
{
// --------------- forward ---------------
int
from_dat2_fw
=
s
;
int
to_dat2_fw
=
from_dat2_fw
+
n_col3
-
1
;
for
(
int
j_dat2_fw
=
from_dat2_fw
,
j_dat3_fw
=
0
;
j_dat2_fw
<=
to_dat2_fw
;
j_dat2_fw
++
,
j_dat3_fw
++
)
{
data3
(
i
,
j_dat3_fw
)
+=
(
prob
(
i
,
class_k
,
s
,
0
)
*
data2
(
i
,
j_dat2_fw
))
/
prob_colsum
[
class_k
]
;
}
// --------------- reverse ---------------
if
(
flip
)
{
int
from_dat2_rev
=
n_col2
-
1
-
s
;
int
to_dat2_rev
=
from_dat2_rev
-
(
n_col3
-
1
)
;
for
(
int
j_dat2_rev
=
from_dat2_rev
,
j_dat3_fw
=
0
;
j_dat2_rev
>=
to_dat2_rev
;
j_dat2_rev
--
,
j_dat3_fw
++
)
{
data3
(
i
,
j_dat3_fw
)
+=
(
prob
(
i
,
class_k
,
s
,
1
)
*
data2
(
i
,
j_dat2_rev
))
/
prob_colsum
[
class_k
]
;
}
}
}
}
// clean memory
data2
=
Matrix2D
<
int
>
()
;
prob
=
Matrix4D
<
double
>
()
;
// convert to integer
Matrix2D
<
int
>
data4
(
data3
.
get_nrow
(),
data3
.
get_ncol
())
;
for
(
size_t
i
=
0
;
i
<
data4
.
get_nrow
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
data4
.
get_ncol
();
j
++
)
{
data4
(
i
,
j
)
=
(
int
)
data3
(
i
,
j
)
;
}
}
// clean memory
data3
=
Matrix2D
<
double
>
()
;
return
data4
;
}
Event Timeline
Log In to Comment