File Metadata

Created: Thu, Jul 10, 02:53

fasta_reader.py
View Options

	#!/usr/bin/env python
	# -- coding: utf-8 --

	" module containing class fastaReader"

	##########################################################################
	from reader import FileReader
	from sequence import Sequence
	from sequence_collection import SequenceCollection
	##########################################################################


	class FastaReader(FileReader):

	"""
	class fastaReader: fasta files are used to store sequences with names and sequences, this reader extract them
	"""

	# ------------------------------------------------------------------ #
	# Constructors/Destructors #
	# ------------------------------------------------------------------ #

	def __init__(self,filename):
	"""__init__: fasta readers are initialized with the name of the file to read and a sequence collection that will store part or all the sequences contained in the file as members """

	FileReader.__init__(self,filename)

	# Members ---------------------- #

	# SequenceCollection new_sequence_collection
	self.new_sequence_collection = SequenceCollection()


	def __del__(self):
	"""__del__: not implemented """
	pass

	# ------------------------------------------------------------------ #
	# Methods #
	# ------------------------------------------------------------------ #

	# public:

	def read(self, filter_reader, key_string = None):

	"""read: reads line by line the fasta file, store the sequences and their names in the sequence collection of the reader, if a filter is given (as a list of sequence name) only the sequences matching these names are store in the sequence collection. The key string is given if the sampleID of the sequence is stored after this string into the sequence name."""

	fasta_file = open(self.filename,'r')

	line = fasta_file.readline()

	while line:

	# the sequence names are preceded by the symbole '>'
	if line[:1]== '>':
	# the seqID of the sequence is extracted from this line
	seqID = line.split(key_string)[0].replace('>','').replace('\n','')
	line = fasta_file.readline()

	# if a sample id is inserted after the key_string
	try:
	sampleID = line.split(key_string)[1].replace('\n','')

	except :
	sampleID = None

	# the sequence per se is extracted
	seq = line.replace('\n','')
	line = fasta_file.readline()

	# if there is no filter or if the sequence name is in the filter, the sequence is added to de collection
	if filter_reader == None or seqID in filter_reader :
	sequence = Sequence(seqID, sampleID, seq)
	self.new_sequence_collection.addSequence(sequence)

	else:
	print 'No sequence name found, skiping a line'
	line = fasta_file.readline()


	fasta_file.close()

	return self.new_sequence_collection


	def build_fasta(self, new_fasta_filename, sequence_collection = None):

	""" build_fasta: build a new fasta file from a sequence collection ( the one of the present reader by default """

	new_fasta_file = open(new_fasta_filename,'w')
	# if no default sequence collection is provided, the default is the collection of the reader
	if sequence_collection == None :
	sequence_collection = self.new_sequence_collection

	print 'building the fasta file ',new_fasta_filename, ' with ', len(sequence_collection.dict), ' sequences.'

	# write the sequences in the file according to the format 'fasta'
	for seqID in sequence_collection.dict:
	new_fasta_file.write('>' + seqID + '\n')
	new_fasta_file.write(sequence_collection.dict[seqID].seq + '\n')

	return 0


	##########################################################################


	if __name__ == '__main__':
	test = fastaReader('/home/aline/spe_repository_aa/project/class_diagram/input_files/R2_trim.contigs.good_all.fasta')

fasta_reader.py
No OneTemporary
Actions

File Metadata

fasta_reader.py
View Options

Event Timeline

fasta_reader.pyNo OneTemporaryActions

File Metadata

fasta_reader.pyView Options

Event Timeline

fasta_reader.py
No OneTemporary
Actions

fasta_reader.py
View Options