Page MenuHomec4science

fasta_reader.py
No OneTemporary

File Metadata

Created
Thu, Jul 10, 02:53

fasta_reader.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
" module containing class fastaReader"
##########################################################################
from reader import FileReader
from sequence import Sequence
from sequence_collection import SequenceCollection
##########################################################################
class FastaReader(FileReader):
"""
class fastaReader: fasta files are used to store sequences with names and sequences, this reader extract them
"""
# ------------------------------------------------------------------ #
# Constructors/Destructors #
# ------------------------------------------------------------------ #
def __init__(self,filename):
"""__init__: fasta readers are initialized with the name of the file to read and a sequence collection that will store part or all the sequences contained in the file as members """
FileReader.__init__(self,filename)
# Members ---------------------- #
# SequenceCollection new_sequence_collection
self.new_sequence_collection = SequenceCollection()
def __del__(self):
"""__del__: not implemented """
pass
# ------------------------------------------------------------------ #
# Methods #
# ------------------------------------------------------------------ #
# public:
def read(self, filter_reader, key_string = None):
"""read: reads line by line the fasta file, store the sequences and their names in the sequence collection of the reader, if a filter is given (as a list of sequence name) only the sequences matching these names are store in the sequence collection. The key string is given if the sampleID of the sequence is stored after this string into the sequence name."""
fasta_file = open(self.filename,'r')
line = fasta_file.readline()
while line:
# the sequence names are preceded by the symbole '>'
if line[:1]== '>':
# the seqID of the sequence is extracted from this line
seqID = line.split(key_string)[0].replace('>','').replace('\n','')
line = fasta_file.readline()
# if a sample id is inserted after the key_string
try:
sampleID = line.split(key_string)[1].replace('\n','')
except :
sampleID = None
# the sequence per se is extracted
seq = line.replace('\n','')
line = fasta_file.readline()
# if there is no filter or if the sequence name is in the filter, the sequence is added to de collection
if filter_reader == None or seqID in filter_reader :
sequence = Sequence(seqID, sampleID, seq)
self.new_sequence_collection.addSequence(sequence)
else:
print 'No sequence name found, skiping a line'
line = fasta_file.readline()
fasta_file.close()
return self.new_sequence_collection
def build_fasta(self, new_fasta_filename, sequence_collection = None):
""" build_fasta: build a new fasta file from a sequence collection ( the one of the present reader by default """
new_fasta_file = open(new_fasta_filename,'w')
# if no default sequence collection is provided, the default is the collection of the reader
if sequence_collection == None :
sequence_collection = self.new_sequence_collection
print 'building the fasta file ',new_fasta_filename, ' with ', len(sequence_collection.dict), ' sequences.'
# write the sequences in the file according to the format 'fasta'
for seqID in sequence_collection.dict:
new_fasta_file.write('>' + seqID + '\n')
new_fasta_file.write(sequence_collection.dict[seqID].seq + '\n')
return 0
##########################################################################
if __name__ == '__main__':
test = fastaReader('/home/aline/spe_repository_aa/project/class_diagram/input_files/R2_trim.contigs.good_all.fasta')

Event Timeline