Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60090386
fasta_filter.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Apr 27, 10:23
Size
1 KB
Mime Type
text/x-python
Expires
Mon, Apr 29, 10:23 (2 d)
Engine
blob
Format
Raw Data
Handle
17298939
Attached To
rSIBORG Simon Botrylloides Regeneration Group
fasta_filter.py
View Options
#!/usr/bin/env python3
from
__future__
import
print_function
import
sys
import
os
import
re
if
__name__
==
'__main__'
:
helpmess
=
"""Usage:
fasta_filter seq_list fasta_file [-invert]
Filters out the sequences in seq_list from fasta_file.
If -invert is specified, filters out the sequences NOT included.
"""
# Inputs
if
len
(
sys
.
argv
)
<
3
:
print
(
helpmess
)
sys
.
exit
(
0
)
else
:
infile
=
os
.
path
.
realpath
(
sys
.
argv
[
1
])
fasta
=
os
.
path
.
realpath
(
sys
.
argv
[
2
])
# Output folder
out_folder
=
'filtered'
if
os
.
path
.
exists
(
out_folder
)
==
False
:
os
.
system
(
'mkdir
%s
'
%
out_folder
)
# Get already sone sequences
dones
=
dict
()
with
open
(
infile
)
as
f
:
for
line
in
f
:
val
=
line
.
strip
()
dones
[
val
.
split
()[
0
]]
=
val
# Default name for the file
basedir
,
tmpfile
=
os
.
path
.
split
(
fasta
)
poutname
=
os
.
path
.
join
(
out_folder
,
'inc_'
+
tmpfile
)
noutname
=
os
.
path
.
join
(
out_folder
,
'mis_'
+
tmpfile
)
# Copy the content of the file
with
open
(
poutname
,
'w'
)
as
p
,
open
(
noutname
,
'w'
)
as
n
,
open
(
fasta
)
as
f
:
seq
=
False
for
line
in
f
:
if
line
[
0
]
==
'>'
:
val
=
line
[
1
:]
.
strip
()
seq
=
(
val
in
dones
)
if
seq
:
p
.
write
(
'>{}
\n
'
.
format
(
dones
[
val
]))
else
:
n
.
write
(
line
)
else
:
if
seq
:
p
.
write
(
line
)
else
:
n
.
write
(
line
)
Event Timeline
Log In to Comment