Page MenuHomec4science

cluster.py
No OneTemporary

File Metadata

Created
Sat, Jul 12, 23:23

cluster.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
" module containing class Cluster "
##########################################################################
##########################################################################
class Cluster(object):
"""
class Cluster: clusters are groups of sequences with a high similarity (>= similarity_threshold), they are represented by a sequence called cluster head. This sequence can be compared with a reference database to make a taxonomic affiliation ( that we will consider suitable for all the sequences of the cluster.
"""
# ------------------------------------------------------------------ #
# Constructors/Destructors #
# ------------------------------------------------------------------ #
def __init__(self, cluster_head_id, sequence = None):
"""__init__: clusters are initialised with an id generally corresponding to de cluster head sequence id, and optionally the cluster head sequence per se """
# Members ---------------------- #
# string best_match_ref_id (refID of the best match)
self.best_match_ref_id = None
# Taxonomy cluster_taxon based on best match
self.taxonomic_affiliation = None
# Float taxon_similarity
self.taxon_similarity = None
# Float similarity_threshold inside the cluster
self.similarity_threshold = None
# int clusterSize (total number of sequences of the cluster)
self.size = 0
# Sequence cluster_head
self.head_id = cluster_head_id
# Sequence cluster_head
self.seq = sequence
# dictionary sampleIDtoNumberOfSequences (stores the number of sequences belonging to this cluster in the different samples
self.d_abundance_per_sample = {}
def __del__(self):
"""__del__: not implemented"""
pass
def attributeClusterHead(self,sequence):
""" attributeClusterHead : sets the sequence given as argument as the cluster head of the cluster """
self.head_id = sequence.seqID
self.seq = sequence.seq
def printCluster(self, number_per_sample = False):
"""__printCluster__: prints informations about the cluster, if number_per_sample = True the number of sequence from this cluster for the different samples is printed """
print 'cluster ', self.head_id
if self.similarity_threshold != None :
print 'with ', similarity_threshold, ' %','\n'
print 'representative sequence : ', self.seq,'\n'
if self.taxonomic_affiliation != None :
print 'similar to ', self.taxonomic_affiliation,
print 'with ', self.taxon_similarity, ' %','\n'
if self.size != None :
print 'size : ', self.size,'\n'
# optionally print the number of sequences per sample
if number_per_sample :
print 'Abundance per sample'
for sample_id in self.d_number_per_sample :
print 'sample ', sample_id, ' : ', self.d_number_per_sample[sampleID],'\n'
##########################################################################
if __name__ == '__main__':
test = Cluster('test_cluster_head_id', 'ATTCAGTAAT')

Event Timeline