Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F71214300
SequenceModelExtenderApplication.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Jul 10, 08:30
Size
8 KB
Mime Type
text/x-c
Expires
Fri, Jul 12, 08:30 (2 d)
Engine
blob
Format
Raw Data
Handle
18919807
Attached To
R8820 scATAC-seq
SequenceModelExtenderApplication.cpp
View Options
#include <SequenceModelExtenderApplication.hpp>
#include <boost/program_options.hpp>
#include <string>
#include <iostream>
#include <utility>
// std::move()
#include <stdexcept>
// std::invalid_argument, std::runtime_error
#include <CorrelationMatrixCreator.hpp>
#include <Matrix2D.hpp>
#include <Matrix4D.hpp>
#include <SequenceLayer.hpp>
#include <SequenceModelComputer.hpp>
namespace
po
=
boost
::
program_options
;
SequenceModelExtenderApplication
::
SequenceModelExtenderApplication
(
int
argn
,
char
**
argv
)
:
file_bed
(
""
),
file_fasta
(
""
),
file_prob
(
""
),
from
(
0
),
to
(
0
),
ext
(
0
),
n_threads
(
0
),
runnable
(
false
)
{
this
->
parseOptions
(
argn
,
argv
)
;
}
SequenceModelExtenderApplication
::~
SequenceModelExtenderApplication
()
{}
int
SequenceModelExtenderApplication
::
run
()
{
if
(
this
->
runnable
)
{
// extend limits
int
ext_right
=
this
->
ext
/
2
;
int
ext_left
=
this
->
ext
-
ext_right
;
this
->
from
-=
ext_left
;
this
->
to
+=
ext_right
;
// create extended matrix
SequenceMatrixCreator
mc
(
this
->
file_bed
,
this
->
file_fasta
,
this
->
from
,
this
->
to
)
;
Matrix2D
<
int
>
data
=
mc
.
create_matrix
()
;
// compute model
// Matrix4D<double> prob(this->file_prob) ;
Matrix4D
<
double
>
prob
;
prob
.
load
(
this
->
file_prob
)
;
if
(
prob
.
get_dim
()[
0
]
!=
data
.
get_nrow
())
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! data matrix and probability matrix have "
"unequal row numbers (%zu and %zu)"
,
prob
.
get_dim
()[
0
],
data
.
get_nrow
())
;
throw
std
::
invalid_argument
(
msg
)
;
}
size_t
n_row
=
prob
.
get_dim
()[
0
]
;
size_t
n_class
=
prob
.
get_dim
()[
1
]
;
size_t
n_shift
=
prob
.
get_dim
()[
2
]
;
size_t
n_flip
=
prob
.
get_dim
()[
3
]
;
SequenceModelComputer
model_cp
(
std
::
move
(
data
),
prob
,
this
->
n_threads
)
;
Matrix2D
<
double
>
model
=
model_cp
.
get_model
()
;
// compute class prob
vector_d
class_prob
(
n_class
,
0.
)
;
double
p_tot
=
0.
;
for
(
size_t
i
=
0
;
i
<
n_row
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
n_class
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
n_shift
;
k
++
)
{
for
(
size_t
l
=
0
;
l
<
n_flip
;
l
++
)
{
class_prob
[
j
]
+=
prob
(
i
,
j
,
k
,
l
)
;
p_tot
+=
prob
(
i
,
j
,
k
,
l
)
;
}
}
}
}
for
(
auto
&
prob
:
class_prob
)
{
prob
/=
p_tot
;
}
// create a matrix containing the class prob in the 1st
// column and the model in the remaining columns
Matrix2D
<
double
>
model_final
(
model
.
get_nrow
(),
model
.
get_ncol
()
+
1
)
;
// 1st column contain the class prob
size_t
i_class
=
0
;
for
(
size_t
i
=
0
;
i
<
model_final
.
get_nrow
();
i
++
)
{
if
((
i
!=
0
)
and
(
i
%
4
==
0
))
{
i_class
++
;
}
model_final
(
i
,
0
)
=
class_prob
[
i_class
]
;
}
// fill the remaining with the model parameters
for
(
size_t
i
=
0
;
i
<
model
.
get_nrow
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
model
.
get_ncol
();
j
++
)
{
model_final
(
i
,
j
+
1
)
=
model
(
i
,
j
)
;
}
}
std
::
cout
<<
model_final
<<
std
::
endl
;
return
0
;
}
else
{
return
1
;
}
}
void
SequenceModelExtenderApplication
::
parseOptions
(
int
argn
,
char
**
argv
)
{
// no option to parse
if
(
argv
==
nullptr
)
{
std
::
string
message
=
"no options to parse!"
;
throw
std
::
invalid_argument
(
message
)
;
}
// help messages
std
::
string
desc_msg
=
"
\n
"
"SequenceModelExtender is an application to extend a sequence model of'
\n
"
"length L (L' = L - S + 1 where L is the number of column of the data
\n
"
"matrix and S the shifting freedom allowed during the classification)
\n
"
"to a new model of length L'' = L' + E (E is the number of columns
\n
"
"to add to the model) given the sequence matrix and the results of
\n
"
"its classification (posterior probability matrix).
\n
"
"To do this, the sequence matrix from which the original model was
\n
"
"computed is extended (plus 0.5*E columns on each side) and a model is
\n
"
"computed by realigning the extended matrix as the original matrix was,
\n
"
"using the given posterior probabities as class assignment
\n
"
"probabilities.
\n
"
"The extended model is returned through the stdout.
\n\n
"
;
std
::
string
opt_help_msg
=
"Produces this help message."
;
std
::
string
opt_thread_msg
=
"The number of threads dedicated to parallelize the computations,
\n
"
"by default 0 (no parallelization)."
;
std
::
string
opt_bed_msg
=
"The path to the BED file containing the references of the original matrix."
;
std
::
string
opt_fasta_msg
=
"The path to the fasta file containing sequences used to create the original
\n
"
"matrix."
;
std
::
string
opt_prob_msg
=
"The path to the file containing the posterior probabilities
\n
"
"of the original matrix classification."
;
std
::
string
opt_from_msg
=
"The upstream limit - in relative coordinate - of the region to build "
"around each reference center, as it was for the original matrix."
;
std
::
string
opt_to_msg
=
"The downstream limit - in relative coordinate - of the region to build "
"around each reference center, as it was for the original matrix."
;
std
::
string
opt_ext_msg
=
"The number of columns (E) to add to the model. Each side will be
\n
"
"extended by 0.5*E."
;
std
::
string
opt_binsize_msg
=
"The size of the bins, as it was for the original matrix."
;
// option parser
boost
::
program_options
::
variables_map
vm
;
boost
::
program_options
::
options_description
desc
(
desc_msg
)
;
desc
.
add_options
()
(
"help,h"
,
opt_help_msg
.
c_str
())
(
"bed"
,
po
::
value
<
std
::
string
>
(
&
(
this
->
file_bed
)),
opt_bed_msg
.
c_str
())
(
"fasta"
,
po
::
value
<
std
::
string
>
(
&
(
this
->
file_fasta
)),
opt_fasta_msg
.
c_str
())
(
"prob,"
,
po
::
value
<
std
::
string
>
(
&
(
this
->
file_prob
)),
opt_prob_msg
.
c_str
())
(
"from"
,
po
::
value
<
int
>
(
&
(
this
->
from
)),
opt_from_msg
.
c_str
())
(
"to"
,
po
::
value
<
int
>
(
&
(
this
->
to
)),
opt_to_msg
.
c_str
())
(
"ext"
,
po
::
value
<
int
>
(
&
(
this
->
ext
)),
opt_ext_msg
.
c_str
())
(
"thread"
,
po
::
value
<
std
::
size_t
>
(
&
(
this
->
n_threads
)),
opt_thread_msg
.
c_str
())
;
// parse
try
{
po
::
store
(
po
::
parse_command_line
(
argn
,
argv
,
desc
),
vm
)
;
po
::
notify
(
vm
)
;
}
catch
(
std
::
invalid_argument
&
e
)
{
std
::
string
msg
=
std
::
string
(
"Error! Invalid option given!
\n
"
)
+
std
::
string
(
e
.
what
())
;
throw
std
::
invalid_argument
(
msg
)
;
}
catch
(...)
{
throw
std
::
invalid_argument
(
"An unknown error occured while parsing the options"
)
;
}
bool
help
=
vm
.
count
(
"help"
)
;
// checks unproper option settings
if
(
this
->
file_bed
==
""
and
(
not
help
))
{
std
::
string
msg
(
"Error! No BED file was given (--bed)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
file_fasta
==
""
and
(
not
help
))
{
std
::
string
msg
(
"Error! No fasta file was given (--fasta)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
file_prob
==
""
and
(
not
help
))
{
std
::
string
msg
(
"Error! No posterior probability file was given (--prob)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
from
==
0
and
this
->
to
==
0
and
(
not
help
))
{
std
::
string
msg
(
"Error! No range given (--from and --to)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
this
->
from
>=
this
->
to
and
(
not
help
))
{
std
::
string
msg
(
"Error! from shoud be smaller than to (--from and --to)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
ext
<=
0
and
(
not
help
))
{
std
::
string
msg
(
"Error! the number of columns to add should be > 0 (--ext)!"
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
// help invoked, run() cannot be invoked
if
(
help
)
{
std
::
cout
<<
desc
<<
std
::
endl
;
this
->
runnable
=
false
;
return
;
}
// everything fine, run() can be called
else
{
this
->
runnable
=
true
;
return
;
}
}
int
main
(
int
argn
,
char
**
argv
)
{
SequenceModelExtenderApplication
app
(
argn
,
argv
)
;
return
app
.
run
()
;
}
Event Timeline
Log In to Comment