Page MenuHomec4science

split_by_length.py
No OneTemporary

File Metadata

Created
Tue, Sep 10, 08:36

split_by_length.py

import optparse
import sys
import os
import typing as tp
import pysam
def parse_lengths(lengths_str: str) -> tp.Tuple[int, int]:
tuple_lengths = ()
try:
if '-' not in lengths_str:
raise RuntimeError("invalid fragment lengths : %s" % lengths_str)
else:
duo = lengths_str.split('-')
# not two values
if len(duo) != 2:
raise RuntimeError("invalid list of fragment lengths : %s" % lengths_str)
duo = (int(duo[0]), int(duo[1]))
# to <= from
if duo[1] <= duo[0]:
raise RuntimeError("invalid list of fragment lengths : %s" % lengths_str)
except Exception as e:
print(e, sys.stderr)
raise RuntimeError("invalid list of fragment lengths : %s" % lengths_str)
return (duo[0], duo[1])
def split_bam(file_bam, file_out, lengths):
bam_in = pysam.AlignmentFile(file_in)
bam_out = pysam.AlignmentFile(file_out, template=bam_in, mode="wb")
for read in bam_in:
# don't know how to get fragment length from bam so convert the fragment to
# sam and parse the sam representation
# frag. with 1st read on reverse have negative length
read_l = abs(int(read.to_string().split('\t')[8]))
if read_l >= lengths[0] and read_l <= lengths[1]:
bam_out.write(read)
bam_in.close()
bam_out.close()
if __name__ == "__main__":
# parse options
usage = "usage: %s [options]" % os.path.basename(__file__)
epilog = "This program reads a bam file and filters out any read that is not associated with one of the given " \
"tag values." \
"Written by Romain Groux, February 2019"
parser = optparse.OptionParser(usage=usage, epilog=epilog)
parser.add_option("-i", "--input", dest="file_in", default=None, type="string", action="store",
help="the addresse of the bam file to filter.")
parser.add_option("-o", "--output", dest="file_out", default=None, type="string", action="store",
help="The addresse of the output file.")
parser.add_option("--length", dest="lengths", default=None, type="string", action="store",
help="A pair of non-overlapping [from,to] values that will be used to "
"filter (including the boundaries) the fragments, for instance --length 1-200.")
(options, args) = parser.parse_args()
file_in = options.file_in
file_out = options.file_out
from_to = parse_lengths(options.lengths)
# check options
if file_in is None:
print("Error! No input file given (-i)!", sys.stderr)
exit(1)
elif not os.path.isfile(file_in):
print("Error! %s does not exist!" % file_in)
exit(1)
elif file_out is None:
print("Error! no output file given (-o)!")
split_bam(file_in, file_out, from_to)

Event Timeline