Page MenuHomec4science

taxonomic_database_reader.py
No OneTemporary

File Metadata

Created
Tue, Jul 15, 01:06

taxonomic_database_reader.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
" module containing class databaseReader"
##########################################################################
from reader import FileReader
from fasta_reader import FastaReader
from tax_file_reader import TaxReader
from sequence_collection import SequenceCollection
from taxonomy_collection import TaxonomyCollection
import fnmatch
import os
##########################################################################
class TaxonomicDatabaseReader(FileReader):
"""
class TaxonomicDatabaseReader: use readers to read and store information from a sequences database given the folder of the database. Usually these database are composed of a fasta file containing the sequences and their taxonomy or an ID. In the latest case, a second file (.tax) is provided, it contains the correspondance between the ID and a taxonomy.
"""
# ------------------------------------------------------------------ #
# Constructors/Destructors #
# ------------------------------------------------------------------ #
def __init__(self, filename):
"""__init__: take a folder name as argument and will get the name of the fasta file and the tax file (if it exists) contained in the folder. And store the information in a taxonomy_collection"""
FileReader.__init__(self,filename)
# Members ---------------------- #
# get the folder name from filename
path, name = os.path.split(filename)
# SequenceCollection sequence collection
self.sequence_collection = SequenceCollection()
# string tax_filename (optional)
try :
tax_filename = fnmatch.filter(os.listdir(path),'*.tax')[0]
self.reference_tax_file = path + '/' + tax_filename
except :
pass
# TaxonomyCollection taxonomy_collection
self.taxonomy_collection = TaxonomyCollection()
def __del__(self):
"""__del__: not implemented"""
pass
# ------------------------------------------------------------------ #
# Methods #
# ------------------------------------------------------------------ #
# public:
def read(self, filter_reader, key_string = None):
"""read: traditionally sequences databases are composed of a file '.fasta' containing the sequences and their name, often the taxonomy is in a separate file '.tax'
this read function will call the read function of the fasta_reader and optionally the read function of the taxfile_reader"""
# instantiate a fasta reader that will read the reference fasta_file
fasta_reader= FastaReader(self.filename)
# store the collected information in the sequence collection
fasta_reader.read(filter_reader)
self.sequence_collection = fasta_reader.new_sequence_collection
# and if needed do the same with a tax reader
if self.reference_tax_file != None:
tax_reader = TaxReader(self.reference_tax_file)
tax_reader.read(filter_reader)
self.taxonomy_collection = tax_reader.taxonomy_collection
print 'sequence_collection has ',len(self.sequence_collection.dict)
print 'taxonomy_collection has ',len(self.taxonomy_collection.dict)
return 0
def getReferenceTaxonomy(self, ref_id):
"""getReferenceTaxonomy returns the taxonomy corresponding to a reference ID (ref_id)"""
return self.taxonomy_collection.dict[ref_id]
def getReferenceSequence(self, ref_id):
"""getReferenceSequence returns the sequence from the reference database corresponding to a reference ID (ref_id)"""
return self.sequence_collection.dict[ref_id]
##########################################################################
if __name__ == '__main__':
test = TaxonomicDatabaseReader('/home/aline/spe_repository_aa/project/class_diagram/input_files/References/midas_s123_213/MiDAS_S123_2.1.3.fasta', '/home/aline/spe_repository_aa/project/class_diagram/input_files/References/midas_s123_213/MiDAS_S123_2.1.3.tax')
test.read(None)
test.getReferenceTaxonomy('FJ911837')
test.getReferenceSequence('FJ911837')

Event Timeline