Page MenuHomec4science

fasta_filter.py
No OneTemporary

File Metadata

Created
Sat, Apr 27, 10:23

fasta_filter.py

#!/usr/bin/env python3
from __future__ import print_function
import sys
import os
import re
if __name__=='__main__':
helpmess="""Usage:
fasta_filter seq_list fasta_file [-invert]
Filters out the sequences in seq_list from fasta_file.
If -invert is specified, filters out the sequences NOT included.
"""
# Inputs
if len(sys.argv)<3:
print(helpmess)
sys.exit(0)
else:
infile=os.path.realpath(sys.argv[1])
fasta=os.path.realpath(sys.argv[2])
# Output folder
out_folder='filtered'
if os.path.exists(out_folder)==False:
os.system('mkdir %s' % out_folder)
# Get already sone sequences
dones = dict()
with open(infile) as f:
for line in f:
val = line.strip()
dones[val.split()[0]] = val
# Default name for the file
basedir,tmpfile=os.path.split(fasta)
poutname=os.path.join(out_folder, 'inc_'+tmpfile)
noutname=os.path.join(out_folder, 'mis_'+tmpfile)
# Copy the content of the file
with open(poutname, 'w') as p, open(noutname, 'w') as n, open(fasta) as f:
seq=False
for line in f:
if line[0]=='>':
val = line[1:].strip()
seq = (val in dones)
if seq:
p.write('>{}\n'.format(dones[val]))
else:
n.write(line)
else:
if seq:
p.write(line)
else:
n.write(line)

Event Timeline