Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90330173
ClassCorrelationMatrixCreatorApplication.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Oct 31, 14:24
Size
12 KB
Mime Type
text/x-c
Expires
Sat, Nov 2, 14:24 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22052489
Attached To
R8820 scATAC-seq
ClassCorrelationMatrixCreatorApplication.cpp
View Options
#include <ClassCorrelationMatrixCreatorApplication.hpp>
#include <CorrelationMatrixCreator.hpp>
#include <boost/program_options.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <stdexcept>
// std::invalid_argument
#include <Matrix4D.hpp>
namespace
po
=
boost
::
program_options
;
// the valid values for --method option
std
::
string
method_read
=
"read"
;
std
::
string
method_read_atac
=
"read_atac"
;
std
::
string
method_fragment
=
"fragment"
;
std
::
string
method_fragment_center
=
"fragment_center"
;
ClassCorrelationMatrixCreatorApplication
::
ClassCorrelationMatrixCreatorApplication
(
int
argn
,
char
**
argv
)
:
file_bed
(
""
),
file_bam
(
""
),
file_bai
(
""
),
file_prob
(
""
),
from
(
0
),
to
(
0
),
bin_size
(
0
),
class_k
(
0
),
method
(
CorrelationMatrixCreator
::
FRAGMENT
),
runnable
(
true
)
{
// parse command line options and set the fields
this
->
parseOptions
(
argn
,
argv
)
;
}
int
ClassCorrelationMatrixCreatorApplication
::
run
()
{
if
(
this
->
runnable
)
{
// load posterior prob
Matrix4D
<
double
>
prob
(
this
->
file_prob
)
;
// prob.load(this->file_prob) ;
size_t
n_row
=
prob
.
get_dim
()[
0
]
;
size_t
n_class
=
prob
.
get_dim
()[
1
]
;
size_t
n_shift
=
prob
.
get_dim
()[
2
]
;
size_t
n_flip
=
prob
.
get_dim
()[
3
]
;
bool
flip
=
n_flip
==
2
;
// compute class prob
std
::
vector
<
double
>
prob_colsum
(
n_class
,
0.
)
;
double
tot
=
0.
;
for
(
size_t
i
=
0
;
i
<
n_row
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
n_class
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
n_shift
;
k
++
)
{
for
(
size_t
l
=
0
;
l
<
n_flip
;
l
++
)
{
double
p
=
prob
(
i
,
j
,
k
,
l
)
;
prob_colsum
[
j
]
+=
p
;
tot
+=
p
;
}
}
}
}
for
(
auto
&
p
:
prob_colsum
)
{
p
/=
tot
;
}
// class of interest (1 -> 0-based)
this
->
class_k
-=
1
;
if
(
this
->
class_k
>
n_class
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! invalid class of interest (%zu and %zu found)"
,
this
->
class_k
,
n_class
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
// extend limits
int
ext
=
n_shift
-
1
;
int
ext_r
=
ext
/
2
;
int
ext_l
=
ext
-
ext_r
;
int
from2
=
this
->
from
-
ext_l
;
int
to2
=
this
->
to
+
ext_r
;
CorrelationMatrixCreator
mc
(
this
->
file_bed
,
this
->
file_bam
,
this
->
file_bai
,
from2
,
to2
,
this
->
bin_size
,
this
->
method
)
;
Matrix2D
<
int
>
data2
=
mc
.
create_matrix
()
;
size_t
n_col2
=
data2
.
get_ncol
()
;
// realign matrix
size_t
n_col3
=
this
->
to
-
this
->
from
+
1
;
Matrix2D
<
double
>
data3
(
n_row
,
n_col3
,
0.
)
;
for
(
size_t
i
=
0
;
i
<
n_row
;
i
++
)
{
for
(
size_t
s
=
0
;
s
<
n_shift
;
s
++
)
{
// --------------- forward ---------------
int
from_dat2_fw
=
s
;
int
to_dat2_fw
=
from_dat2_fw
+
n_col3
-
1
;
for
(
int
j_dat2_fw
=
from_dat2_fw
,
j_dat3_fw
=
0
;
j_dat2_fw
<=
to_dat2_fw
;
j_dat2_fw
++
,
j_dat3_fw
++
)
{
data3
(
i
,
j_dat3_fw
)
+=
(
prob
(
i
,
class_k
,
s
,
0
)
*
data2
(
i
,
j_dat2_fw
))
/
prob_colsum
[
class_k
]
;
}
// --------------- reverse ---------------
if
(
flip
)
{
int
from_dat2_rev
=
n_col2
-
1
-
s
;
int
to_dat2_rev
=
from_dat2_rev
-
(
n_col3
-
1
)
;
for
(
int
j_dat2_rev
=
from_dat2_rev
,
j_dat3_fw
=
0
;
j_dat2_rev
>=
to_dat2_rev
;
j_dat2_rev
--
,
j_dat3_fw
++
)
{
data3
(
i
,
j_dat3_fw
)
+=
(
prob
(
i
,
class_k
,
s
,
1
)
*
data2
(
i
,
j_dat2_rev
))
/
prob_colsum
[
class_k
]
;
}
}
}
}
// clean memory
prob
=
Matrix4D
<
double
>
()
;
// convert to integer
Matrix2D
<
int
>
data4
(
data3
.
get_nrow
(),
data3
.
get_ncol
())
;
for
(
size_t
i
=
0
;
i
<
data4
.
get_nrow
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
data4
.
get_ncol
();
j
++
)
{
data4
(
i
,
j
)
=
(
int
)
data3
(
i
,
j
)
;
}
}
// display integer matrix
std
::
cout
<<
data4
<<
std
::
endl
;
return
EXIT_SUCCESS
;
}
else
{
return
EXIT_FAILURE
;
}
}
void
ClassCorrelationMatrixCreatorApplication
::
parseOptions
(
int
argn
,
char
**
argv
)
{
// no option to parse
if
(
argv
==
nullptr
)
{
std
::
string
message
=
"no options to parse!"
;
throw
std
::
invalid_argument
(
message
)
;
}
// help messages
std
::
string
desc_msg
=
"
\n
"
"ClassCorrelationMatrixCreator is an application that retro-compute
\n
"
"a fictive count matrix of a given class K, from a partition, assuming
\n
"
"that the classification is true.
\n
"
"To construct the final matrix M3, the original matrix M1 that was
\n
"
"partitioned is computed and extended into a matrix M2. The original
\n
"
"matrix M1 has L1 column and was partitioned allowing S shifting freedom.
\n
"
"A number of extra columns E, overall, is added to the matrix M1 (~E/2 on
\n
"
"each side). The final matrix M3, of L1 columns, is then computed. A row
\n
"
"of the final matrix M3 is the weighted average of each of the S possibles
\n
"
"slices of the corresponding row in M2. The weight used are th
\n
"
"probabilities with which this row was assigned to class K, for each of
\n
"
"the S slices."
;
std
::
string
opt_help_msg
=
"Produces this help message."
;
std
::
string
opt_bed_msg
=
"The path to the BED file containing the references."
;
std
::
string
opt_bam_msg
=
"The path to the BAM file containing the targets."
;
std
::
string
opt_bai_msg
=
"The path to the BAI file containing the BAM file index."
;
std
::
string
opt_prob_msg
=
"The path to the file containing the assignment probabilities
\n
"
"(the partition)."
;
std
::
string
opt_from_msg
=
"The most upstream position - in relative coordinate - of the regions
\n
"
"in the original matrix."
;
std
::
string
opt_to_msg
=
"The most downstream position - in relative coordinate - of the regions
\n
"
"in the original matrix."
;
std
::
string
opt_classk_msg
=
"The index (1-based) of the class of interest."
;
std
::
string
opt_binsize_msg
=
"The size of the bins, in the original matrix."
;
char
tmp
[
4096
]
;
sprintf
(
tmp
,
"How the data in the BAM file should be handled when computing
\n
"
"the number of counts in each bin.
\n
"
"
\t\"
%s
\"
uses each position within the reads (by default)
\n
"
"
\t\"
%s
\"
uses only the insertion site for ATAC-seq data
\n
"
"
\t\"
%s
\"
uses each position within the fragments
\n
"
"
\t\"
%s
\"
uses only the fragment central positions
\n
"
,
method_read
.
c_str
(),
method_read_atac
.
c_str
(),
method_fragment
.
c_str
(),
method_fragment_center
.
c_str
())
;
std
::
string
opt_method_msg
=
tmp
;
// option parser
boost
::
program_options
::
variables_map
vm
;
boost
::
program_options
::
options_description
desc
(
desc_msg
)
;
std
::
string
method
(
method_read
)
;
desc
.
add_options
()
(
"help,h"
,
opt_help_msg
.
c_str
())
(
"bed"
,
po
::
value
<
std
::
string
>
(
&
(
this
->
file_bed
)),
opt_bed_msg
.
c_str
())
(
"bam"
,
po
::
value
<
std
::
string
>
(
&
(
this
->
file_bam
)),
opt_bam_msg
.
c_str
())
(
"bai"
,
po
::
value
<
std
::
string
>
(
&
(
this
->
file_bai
)),
opt_bai_msg
.
c_str
())
(
"prob"
,
po
::
value
<
std
::
string
>
(
&
(
this
->
file_prob
)),
opt_bai_msg
.
c_str
())
(
"from"
,
po
::
value
<
int
>
(
&
(
this
->
from
)),
opt_from_msg
.
c_str
())
(
"to"
,
po
::
value
<
int
>
(
&
(
this
->
to
)),
opt_to_msg
.
c_str
())
(
"binSize"
,
po
::
value
<
int
>
(
&
(
this
->
bin_size
)),
opt_binsize_msg
.
c_str
())
(
"k"
,
po
::
value
<
size_t
>
(
&
(
this
->
class_k
)),
opt_classk_msg
.
c_str
())
(
"method"
,
po
::
value
<
std
::
string
>
(
&
(
method
)),
opt_method_msg
.
c_str
())
;
// parse
try
{
po
::
store
(
po
::
parse_command_line
(
argn
,
argv
,
desc
),
vm
)
;
po
::
notify
(
vm
)
;
}
catch
(
std
::
invalid_argument
&
e
)
{
std
::
string
msg
=
std
::
string
(
"Error! Invalid option given!
\n
"
)
+
std
::
string
(
e
.
what
())
;
throw
std
::
invalid_argument
(
msg
)
;
}
catch
(...)
{
throw
std
::
invalid_argument
(
"An unknown error occured while parsing the options"
)
;
}
bool
help
=
vm
.
count
(
"help"
)
;
// checks unproper option settings
if
(
this
->
file_bed
==
""
and
(
not
help
))
{
std
::
string
msg
(
"Error! No BED file was given (--bed)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
file_bam
==
""
and
(
not
help
))
{
std
::
string
msg
(
"Error! No BAM file was given (--bam)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
file_bai
==
""
and
(
not
help
))
{
std
::
string
msg
(
"Error! No BAM index file was given (--bai)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
file_prob
==
""
and
(
not
help
))
{
std
::
string
msg
(
"Error! No probability (partition) file was given (--prob)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
from
==
0
and
this
->
to
==
0
and
(
not
help
))
{
std
::
string
msg
(
"Error! No range given (--from and --to)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
from
>=
this
->
to
and
(
not
help
))
{
std
::
string
msg
(
"Error! from shoud be smaller than to (--from and --to)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
class_k
==
0
and
(
not
help
))
{
std
::
string
msg
(
"Error! no class of interest was given (--k)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
bin_size
<=
0
and
(
not
help
))
{
std
::
string
msg
(
"Error! bin size should be bigger than 0 (--binSize)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
method
!=
method_read
and
method
!=
method_read_atac
and
method
!=
method_fragment
and
method
!=
method_fragment_center
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! method should be %s, %s, %s or %s (--method)"
,
method_read
.
c_str
(),
method_read_atac
.
c_str
(),
method_fragment
.
c_str
(),
method_fragment_center
.
c_str
())
;
throw
std
::
invalid_argument
(
msg
)
;
}
// set method
if
(
method
==
method_read
)
{
this
->
method
=
CorrelationMatrixCreator
::
READ
;
}
else
if
(
method
==
method_read_atac
)
{
this
->
method
=
CorrelationMatrixCreator
::
READ_ATAC
;
}
else
if
(
method
==
method_fragment
)
{
this
->
method
=
CorrelationMatrixCreator
::
FRAGMENT
;
}
else
if
(
method
==
method_fragment_center
)
{
this
->
method
=
CorrelationMatrixCreator
::
FRAGMENT_CENTER
;
}
// help invoked, run() cannot be invoked
if
(
help
)
{
std
::
cout
<<
desc
<<
std
::
endl
;
this
->
runnable
=
false
;
return
;
}
// everything fine, run() can be called
else
{
this
->
runnable
=
true
;
return
;
}
}
int
main
(
int
argn
,
char
**
argv
)
{
ClassCorrelationMatrixCreatorApplication
app
(
argn
,
argv
)
;
return
app
.
run
()
;
}
Event Timeline
Log In to Comment