File Metadata

Created: Sat, Jul 12, 23:23

cluster.py
View Options

	#!/usr/bin/env python
	# -- coding: utf-8 --

	" module containing class Cluster "

	##########################################################################

	##########################################################################


	class Cluster(object):

	"""
	class Cluster: clusters are groups of sequences with a high similarity (>= similarity_threshold), they are represented by a sequence called cluster head. This sequence can be compared with a reference database to make a taxonomic affiliation ( that we will consider suitable for all the sequences of the cluster.
	"""

	# ------------------------------------------------------------------ #
	# Constructors/Destructors #
	# ------------------------------------------------------------------ #

	def __init__(self, cluster_head_id, sequence = None):
	"""__init__: clusters are initialised with an id generally corresponding to de cluster head sequence id, and optionally the cluster head sequence per se """

	# Members ---------------------- #

	# string best_match_ref_id (refID of the best match)
	self.best_match_ref_id = None

	# Taxonomy cluster_taxon based on best match
	self.taxonomic_affiliation = None

	# Float taxon_similarity
	self.taxon_similarity = None

	# Float similarity_threshold inside the cluster
	self.similarity_threshold = None

	# int clusterSize (total number of sequences of the cluster)
	self.size = 0

	# Sequence cluster_head
	self.head_id = cluster_head_id

	# Sequence cluster_head
	self.seq = sequence

	# dictionary sampleIDtoNumberOfSequences (stores the number of sequences belonging to this cluster in the different samples
	self.d_abundance_per_sample = {}


	def __del__(self):
	"""__del__: not implemented"""
	pass


	def attributeClusterHead(self,sequence):

	""" attributeClusterHead : sets the sequence given as argument as the cluster head of the cluster """

	self.head_id = sequence.seqID
	self.seq = sequence.seq



	def printCluster(self, number_per_sample = False):

	"""__printCluster__: prints informations about the cluster, if number_per_sample = True the number of sequence from this cluster for the different samples is printed """

	print 'cluster ', self.head_id
	if self.similarity_threshold != None :
	print 'with ', similarity_threshold, ' %','\n'

	print 'representative sequence : ', self.seq,'\n'
	if self.taxonomic_affiliation != None :
	print 'similar to ', self.taxonomic_affiliation,
	print 'with ', self.taxon_similarity, ' %','\n'
	if self.size != None :
	print 'size : ', self.size,'\n'

	# optionally print the number of sequences per sample
	if number_per_sample :
	print 'Abundance per sample'
	for sample_id in self.d_number_per_sample :
	print 'sample ', sample_id, ' : ', self.d_number_per_sample[sampleID],'\n'




	##########################################################################


	if __name__ == '__main__':
	test = Cluster('test_cluster_head_id', 'ATTCAGTAAT')

cluster.py
No OneTemporary
Actions

File Metadata

cluster.py
View Options

Event Timeline

cluster.pyNo OneTemporaryActions

File Metadata

cluster.pyView Options

Event Timeline

cluster.py
No OneTemporary
Actions

cluster.py
View Options