File Metadata

Created: Sun, Jul 13, 17:56

blast_file_reader.py
View Options

	#!/usr/bin/env python
	# -- coding: utf-8 --

	" module blast_file_reader contains the definition of the class BlastFileReader"

	##########################################################################
	from reader import FileReader
	##########################################################################


	class BlastFileReader(FileReader):

	"""
	class BlastFileReader: can read and store information from blastn software output
	"""

	# ------------------------------------------------------------------ #
	# Constructors/Destructors #
	# ------------------------------------------------------------------ #

	def __init__(self,filename):
	"""__init__: the reader is initialized with 2 members : a filename and the dictionnary where the information will be stored """

	# Members ---------------------- #

	# the name of the blastfile to read (with full path)
	self.filename = filename

	# self.dict[seqID] = [refID, identity%]
	self.dict= {}


	def __del__(self):
	"""__del__: not implemented """
	pass

	# ------------------------------------------------------------------ #
	# Methods #
	# ------------------------------------------------------------------ #

	# public:

	def read(self,filter_reader, key_string = None):
	"""read: this function read the information in the blast_file; for each sequenceID from the query it will get the matching reference and the percentage of identity with it, if there is a match, the reference ID will be 'NONE' if there was no match and the identity will be 0. """

	blast_file = open(self.filename,'r')

	# read the file line by line from the start
	blast_line = blast_file.readline()

	while blast_line:
	# the seqID of the query is extracted and a new item is initialized in the dictionary 'self.dict'
	if blast_line[0:6]=='Query=':
	seqID = blast_line.split('ry= ')[1].replace('\n','')# extract sequence ID
	self.dict[seqID]=['','']

	#pass three lines
	for i in range(0,3):
	blast_file.readline()
	blast_line=blast_file.readline()

	# if 'Sequences is in the line, there is a match
	if 'Sequences ' in blast_line:
	# pass for line and extract the refID of the matching reference
	for i in range(0,4):
	blast_file.readline()
	blast_line=blast_file.readline()

	refID = blast_line.split('>')[1].replace('\n','').replace(' ','')

	# else, there is no match, the refID is set to 'NONE'
	else:
	blast_line=blast_file.readline()
	if '****' in blast_line :
	refID = 'NONE'

	# if neither 'Sequences' or '****' is found, there is a problem of reading
	else:
	print 'probleme de capture du blast'

	# the ID of the matching reference is stored in self.dict
	self.dict[seqID][0]=refID

	# pass 3 lines
	for i in range(0,3):
	blast_file.readline()
	blast_line=blast_file.readline()

	#extract identity score
	identities = blast_line.split(' ')[3].split('/')
	try :
	# if there is a match the percentage of identity is : number of matches/alignment length
	ID_percent = str(round(float(identities[0])*100/float(identities[1]),2))

	except:
	# if there is no match this identity percentage is set to zero
	ID_percent = str(0)

	# the percentage of identity is stored in self.dict
	self.dict[seqID][1]= ID_percent

	blast_line = blast_file.readline()

	# if the begining of the line is not 'Query=', a new line is read.
	else:
	blast_line = blast_file.readline()

	blast_file.close()

	return 0


	##########################################################################


	if __name__ == '__main__':

	test = BlastFileReader('/home/aline/spe_repository_aa/project/class_diagram/input_files/R2_trim.contigs.good_all_cluster_heads_blast_results')
	#print 'blast dictionnary ', test.read(None,None)

blast_file_reader.py
No OneTemporary
Actions

File Metadata

blast_file_reader.py
View Options

Event Timeline

blast_file_reader.pyNo OneTemporaryActions

File Metadata

blast_file_reader.pyView Options

Event Timeline

blast_file_reader.py
No OneTemporary
Actions

blast_file_reader.py
View Options