Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F82241181
split_by_length.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Sep 10, 08:36
Size
2 KB
Mime Type
text/x-python
Expires
Thu, Sep 12, 08:36 (2 d)
Engine
blob
Format
Raw Data
Handle
20670105
Attached To
R8820 scATAC-seq
split_by_length.py
View Options
import
optparse
import
sys
import
os
import
typing
as
tp
import
pysam
def
parse_lengths
(
lengths_str
:
str
)
->
tp
.
Tuple
[
int
,
int
]:
tuple_lengths
=
()
try
:
if
'-'
not
in
lengths_str
:
raise
RuntimeError
(
"invalid fragment lengths :
%s
"
%
lengths_str
)
else
:
duo
=
lengths_str
.
split
(
'-'
)
# not two values
if
len
(
duo
)
!=
2
:
raise
RuntimeError
(
"invalid list of fragment lengths :
%s
"
%
lengths_str
)
duo
=
(
int
(
duo
[
0
]),
int
(
duo
[
1
]))
# to <= from
if
duo
[
1
]
<=
duo
[
0
]:
raise
RuntimeError
(
"invalid list of fragment lengths :
%s
"
%
lengths_str
)
except
Exception
as
e
:
print
(
e
,
sys
.
stderr
)
raise
RuntimeError
(
"invalid list of fragment lengths :
%s
"
%
lengths_str
)
return
(
duo
[
0
],
duo
[
1
])
def
split_bam
(
file_bam
,
file_out
,
lengths
):
bam_in
=
pysam
.
AlignmentFile
(
file_in
)
bam_out
=
pysam
.
AlignmentFile
(
file_out
,
template
=
bam_in
,
mode
=
"wb"
)
for
read
in
bam_in
:
# don't know how to get fragment length from bam so convert the fragment to
# sam and parse the sam representation
# frag. with 1st read on reverse have negative length
read_l
=
abs
(
int
(
read
.
to_string
()
.
split
(
'
\t
'
)[
8
]))
if
read_l
>=
lengths
[
0
]
and
read_l
<=
lengths
[
1
]:
bam_out
.
write
(
read
)
bam_in
.
close
()
bam_out
.
close
()
if
__name__
==
"__main__"
:
# parse options
usage
=
"usage:
%s
[options]"
%
os
.
path
.
basename
(
__file__
)
epilog
=
"This program reads a bam file and filters out any read that is not associated with one of the given "
\
"tag values."
\
"Written by Romain Groux, February 2019"
parser
=
optparse
.
OptionParser
(
usage
=
usage
,
epilog
=
epilog
)
parser
.
add_option
(
"-i"
,
"--input"
,
dest
=
"file_in"
,
default
=
None
,
type
=
"string"
,
action
=
"store"
,
help
=
"the addresse of the bam file to filter."
)
parser
.
add_option
(
"-o"
,
"--output"
,
dest
=
"file_out"
,
default
=
None
,
type
=
"string"
,
action
=
"store"
,
help
=
"The addresse of the output file."
)
parser
.
add_option
(
"--length"
,
dest
=
"lengths"
,
default
=
None
,
type
=
"string"
,
action
=
"store"
,
help
=
"A pair of non-overlapping [from,to] values that will be used to "
"filter (including the boundaries) the fragments, for instance --length 1-200."
)
(
options
,
args
)
=
parser
.
parse_args
()
file_in
=
options
.
file_in
file_out
=
options
.
file_out
from_to
=
parse_lengths
(
options
.
lengths
)
# check options
if
file_in
is
None
:
print
(
"Error! No input file given (-i)!"
,
sys
.
stderr
)
exit
(
1
)
elif
not
os
.
path
.
isfile
(
file_in
):
print
(
"Error!
%s
does not exist!"
%
file_in
)
exit
(
1
)
elif
file_out
is
None
:
print
(
"Error! no output file given (-o)!"
)
split_bam
(
file_in
,
file_out
,
from_to
)
Event Timeline
Log In to Comment