class Cluster: clusters are groups of sequences with a high similarity (>= similarity_threshold), they are represented by a sequence called cluster head. This sequence can be compared with a reference database to make a taxonomic affiliation ( that we will consider suitable for all the sequences of the cluster.
"""__init__: clusters are initialised with an id generally corresponding to de cluster head sequence id, and optionally the cluster head sequence per se """
# Members ---------------------- #
# string best_match_ref_id (refID of the best match)
self.best_match_ref_id=None
# Taxonomy cluster_taxon based on best match
self.taxonomic_affiliation=None
# Float taxon_similarity
self.taxon_similarity=None
# Float similarity_threshold inside the cluster
self.similarity_threshold=None
# int clusterSize (total number of sequences of the cluster)
self.size=0
# Sequence cluster_head
self.head_id=cluster_head_id
# Sequence cluster_head
self.seq=sequence
# dictionary sampleIDtoNumberOfSequences (stores the number of sequences belonging to this cluster in the different samples
self.d_abundance_per_sample={}
def__del__(self):
"""__del__: not implemented"""
pass
defattributeClusterHead(self,sequence):
""" attributeClusterHead : sets the sequence given as argument as the cluster head of the cluster """
self.head_id=sequence.seqID
self.seq=sequence.seq
defprintCluster(self,number_per_sample=False):
"""__printCluster__: prints informations about the cluster, if number_per_sample = True the number of sequence from this cluster for the different samples is printed """
print'cluster ',self.head_id
ifself.similarity_threshold!=None:
print'with ',similarity_threshold,' %','\n'
print'representative sequence : ',self.seq,'\n'
ifself.taxonomic_affiliation!=None:
print'similar to ',self.taxonomic_affiliation,
print'with ',self.taxon_similarity,' %','\n'
ifself.size!=None:
print'size : ',self.size,'\n'
# optionally print the number of sequences per sample