main.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Jul 12, 20:50

main.py
View Options

	#!/usr/bin/env python
	# -- coding: utf-8 --

	" This program can perform 2 differents process depending on the chosen option (-o): clustering take as input a file of clustered sequences and a reference database and compare a representant of the cluster to the reference database to assign a taxonomy to the sequences of the clusters, an evolution of the clusters through the different samples can be ouput. metadata take as input a metadata file and will store the informations stored in the file if it corresponds to a database table."


	##########################################################################
	import argparse
	import os
	from clustering_process_fct import clustering_process
	from metadata_reader import metadataReader
	from config_file_reader import ConfigFileReader
	from test_fct import check_clustering
	##########################################################################

	""" The arguments are parsed in the main and are dispatched to the appropriate function/object."""

	# command line for clustering#
	##############################
	# /home/aline/spe_repository_aa/project/class_diagram/skeleton/main.py -f '/home/aline/spe_repository_aa/project/class_diagram/input_files/config_clustering_cd_hit.txt'

	# command test
	################
	#/home/aline/spe_repository_aa/project/class_diagram/skeleton/main.py -f /home/aline/spe_repository_aa/project/class_diagram/Test_files/config_clustering_dbc_test.txt

	#/home/aline/spe_repository_aa/project/class_diagram/skeleton/main.py -f /home/aline/spe_repository_aa/project/class_diagram/Test_files/config_clustering_cd_hit_test.txt

	#/home/aline/spe_repository_aa/project/class_diagram/skeleton/main.py -f /home/aline/spe_repository_aa/project/class_diagram/Test_files/config_metadata_test.txt

	# command line for metadata#
	############################

	#./main.py -o 'metadata'-m '/home/aline/spe_repository_aa/project/class_diagram/skeleton/Data/performances.csv' -a '/home/aline/spe_repository_aa/project/class_diagram/skeleton/My_database/my_database.db'

	############################## inputs ####################################

	parser = argparse.ArgumentParser(description = 'Program that process and store molecular sequences and related metadata in a sqlite3 database')

	# change the parser if a configuration file is provided
	parser.add_argument('-f', '--config', help = 'configuration file', required = False)
	args = parser.parse_args()

	if args.config != None:
	parser = ConfigFileReader(args.config)
	#parser.read(None)


	# Main choice of option
	parser.add_argument('-o', '--option', help = 'choose between clustering (clustering), or store metadata (metadata)', required = True)

	# arguments for clustering
	parser.add_argument('-k', '--key_string', help = 'string marking the begining of sampleID in sequences names', required = False)
	parser.add_argument('-cs', '--cluster_soft', help = 'name of the clustering software, should be dbc or cd_hit', required = False)
	parser.add_argument('-r', '--ref_fasta', help = 'name of the reference fasta file', required = False)
	parser.add_argument('-d', '--database_name', help = 'output', required = False)
	parser.add_argument('-i', '--input_fasta', help = 'the initial fasta file containing the sequences ', required = False)
	parser.add_argument('-cf', '--cluster_file', help = 'the output of the clustering algorithm', required = False)
	parser.add_argument('-q', '--query_fasta', help = 'the file containing only the sequences that are cluster heads', required = False)
	parser.add_argument('-b', '--blast_file', help = 'the output of the blastn algorithm', required = False)
	parser.add_argument('-t', '--table_file', help = 'the name of the output file table to be created', required = False)
	parser.add_argument('-e', '--test', help = 'option set to True for the test mode', required = False)
	parser.add_argument('-s', '--seq_type', help = 'type of sequences, can be DNA, RNA or proteins', required = False)

	parser.add_argument('-st', '--store', help = 'true to store the clusters in the database', required = False)
	parser.add_argument('-p', '--min_clust_size', help = 'set the minimum cluster size', required = False)

	# arguments for metadata storage in the database
	parser.add_argument('-m', '--metadata', help = 'a list of metadata files containing the information we want to store in the database, separated with a ;', required = False)
	parser.add_argument('-a', '--my_db', help = 'name of the database', required = False)

	############################## arguments ##############################

	if args.config != None:
	parser.read()

	args = parser.parse_args()

	# choix du type de sequence
	sequence_type = args.seq_type
	if sequence_type == 'DNA' or sequence_type == 'RNA':
	dbtype = 'nucl'

	elif sequence_type =='protein' :
	dbtype = 'prot'

	else :
	dbtype = ''

	print 'dbtype :', dbtype

	# database name
	my_db_name = args.my_db

	# test mode
	test_mode = args.test

	# choice of option

	##################
	### CLUSTERING ###
	##################

	if args.option.lower() == 'clustering' :

	clustering_software = args.cluster_soft

	# string used to recognize the begining of the sampleID in the sequence names
	key_string = args.key_string

	# folder containing the files of the 16s reference database
	reference_filename = args.ref_fasta


	#The fasta_file used to do the clustering
	input_fasta_file = args.input_fasta

	cluster_filename = args.cluster_file

	# files with only the cluster head sequences
	query_fasta_file = args.query_fasta

	# result of the comparison with the reference database
	blast_file = args.blast_file

	# outputs file names
	output_table_file_name = args.table_file

	# minimal cluster size
	try :
	min_clust_size = int(args.min_clust_size)
	except :
	min_clust_size = None

	# launch the clustering
	cluster_collection = clustering_process(clustering_software,key_string, reference_filename, input_fasta_file,cluster_filename, query_fasta_file, blast_file, output_table_file_name,dbtype,min_clust_size)

	# if the option to store the clustering results in the local database is set to true
	if args.store.lower() == 'true':
	cluster_collection.storeInDatabase(my_db_name)

	if test_mode :
	check_clustering(clustering_software,cluster_collection)


	##################
	### METADATA ###
	##################

	elif args.option.lower() == 'metadata' :

	# metadata can be ',' separated list of metadata files
	metadata_file_list = args.metadata.split(',')

	# a metadata reader is created for each file
	for metadata_file in metadata_file_list :
	metadata_reader = metadataReader(metadata_file,my_db_name)
	metadata_reader.read(None,None) # possible here to add a filter or keystring to select metadata

	else :
	print 'error, unvalid choice of option: -o must be either clustering or metadata.'

main.pyNo OneTemporaryActions

File Metadata

main.pyView Options

Event Timeline

main.py
No OneTemporary
Actions

main.py
View Options