diff --git a/TP_prediction/find_best_TPs.py b/TP_prediction/find_best_TPs.py index 4ed03ee..f10e45c 100644 --- a/TP_prediction/find_best_TPs.py +++ b/TP_prediction/find_best_TPs.py @@ -1,231 +1,287 @@ import sys sys.path.insert(0, '../src/envipath-python/enviPath_python/') sys.path.insert(0, '../src/envipath-python/') from enviPath_python.enviPath import * from enviPath_python.enviPath import * from util import * import getpass #---------------------------# # enviPath SETTINGS # #---------------------------# # Define the instance to use INSTANCE_HOST = 'https://envipath.org' USERNAME = '' # TODO USER: enter your username # MODEL # TODO USER: Select model for relative reasoning. Default: Standard model BBD - ML - ECC - 2022 # New enviPath - default relative reasoning model on envipath: BBD - ML - ECC - 2022 -EP_MODEL_ID = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/646afb6c-6cfc-4d4b-8d22-e196d849ec73' +# EP_MODEL_ID = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/646afb6c-6cfc-4d4b-8d22-e196d849ec73' # New enviPath - BBD+SOIL relative reasoning model on envipath: BBD+SOIL - ML - ECC - 2022 -# rr_id = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/0aec0115-941d-4e33-a844-4431d3ec598d' +# EP_MODEL_ID = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/0aec0115-941d-4e33-a844-4431d3ec598d' # New enviPath - BBD+SOIL relative reasoning model on envipath: BBD+SLUDGE - ML - ECC - 2022 -# rr_id = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/2a41e599-f962-4268-b7d1-5f1cb252b937' +# EP_MODEL_ID = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/2a41e599-f962-4268-b7d1-5f1cb252b937' # New enviPath - BBD+SOIL relative reasoning model on envipath: BBD+SOIL+SLUDGE - ML - ECC - 2022 -# rr_id = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/76bd8654-e02f-4fbd-98df-bf61411f9b92' +# EP_MODEL_ID = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/relative-reasoning/76bd8654-e02f-4fbd-98df-bf61411f9b92' +# Old model - old default relative reasoning model on envipath from 2021: BBD - ML - ECC +EP_MODEL_ID = 'https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/relative-reasoning/edaf8d8c-430a-4277-848b-3e163a86febf' # PACKAGE - # TODO USER: prepare a new package (manually) and add it's URI here - make sure it is empty when running script! EP_PACKAGE_ID = 'https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55' # Test package # List of output packages used for Sludge TP paper -# pkg_id_1 = 'https://envipath.org/package/0915fad3-b889-4aa8-ac98-0707b717be57' # Package for results using BBD - ML - ECC - 2022 model -# pkg_id_2 = 'https://envipath.org/package/80cf58b1-21e2-4c28-9cc6-dc69c6445bdf' # Package for results using BBD+SOIL - ML - ECC - 2022 model -# pkg_id_3 = 'https://envipath.org/package/7d64aa85-2e3c-413f-a538-4d5f2bfd4662' # Package for results using BBD+SLUDGE - ML - ECC - 2022 model -# pkg_id_4 = 'https://envipath.org/package/11f2acd5-5209-4d49-ad77-93f6f6965886' # Package for results using BBD+SOIL+SLUDGE - ML - ECC - 2022 model +# EP_PACKAGE_ID = 'https://envipath.org/package/0915fad3-b889-4aa8-ac98-0707b717be57' # Package for results using BBD - ML - ECC - 2022 model +# EP_PACKAGE_ID = 'https://envipath.org/package/80cf58b1-21e2-4c28-9cc6-dc69c6445bdf' # Package for results using BBD+SOIL - ML - ECC - 2022 model +# EP_PACKAGE_ID = 'https://envipath.org/package/7d64aa85-2e3c-413f-a538-4d5f2bfd4662' # Package for results using BBD+SLUDGE - ML - ECC - 2022 model +# EP_PACKAGE_ID = 'https://envipath.org/package/11f2acd5-5209-4d49-ad77-93f6f6965886' # Package for results using BBD+SOIL+SLUDGE - ML - ECC - 2022 model #---------------------------# # PATHWAY SEARCH SETTINGS # #---------------------------# # These are the default settings used for the Sludge TP paper. # They can be modified to direct the pathway search towards a specific objective. # Maximum number of TPs to predict -MAX_TP = 50 +MAX_TP = 20 # Lower probability threshold -PROBABILITY_THRESHOLD = -1 # any value equal to or lower than the threshold will be excluded +PROBABILITY_THRESHOLD = 0 # any value equal to or lower than the threshold will be excluded # Set probabilities of 0 to 0.01 to continue having a weighting scheme downstream of the pathway INCLUDE_0_PROBABILITIES = False # Follow moiety - only compounds containing moiety in SMILES will be expanded MOIETY = "" # e.g., "C(F)(F)F" # To prioritize small compounds in the queue SORT_TPS_BY_SIZE = False # Follow labeled atoms FOLLOW_LABELED_ATOM = False ATOM_LABEL = '14' +# Print as a reaction file additionally to the list of TPs +PRINT_REACTION_FILE = True + #---------------------------# # FILE PATH SETTINGS # #---------------------------# # Input/output files INPUT_FILE_PATH = 'input/input_structures.tsv' OUTPUT_DIRECTORY = 'output/' OUTPUT_FILE_TAG = 'TEST' #---------------------------# # CONNECT TO ENVIPATH # #---------------------------# eP = enviPath(INSTANCE_HOST) password = getpass.getpass() eP.login(USERNAME, password) #---------------------------# # FUNCTIONS # #---------------------------# def __main__(rr_id, pkg_id, tag): """ Main function, predicts pathways for a list of input smiles Output: pathways are saved to specified enviPath package, TP list as .tsv file to output folder :param rr_id: URI of enviPath relative reasoning mode to be used :param pkg_id: URI of enviPath package to store resulting pathways :param tag: string tag to attach to output files for identification """ rr = RelativeReasoning(eP.requester, id=rr_id) pkg = Package(eP.requester, id=pkg_id) input_list = load_input(INPUT_FILE_PATH) - output_file_path = '{}TP_prediction_{}_top_{}.tsv'.format(OUTPUT_DIRECTORY, tag, MAX_TP) - outfile = open(output_file_path, 'w') + output_file_path_TP = '{}TP_prediction_{}_top_{}.tsv'.format(OUTPUT_DIRECTORY, tag, MAX_TP) + output_file_path_reaction = '{}reaction_prediction_{}_top_{}.tsv'.format(OUTPUT_DIRECTORY, tag, MAX_TP) + outfile_TP = open(output_file_path_TP, 'w') + outfile_reaction = open(output_file_path_reaction, 'w') + rule_dict = {} for compound_input in input_list: result = predict_TPs(compound_input['smiles'], compound_input['name'], rr) result_list = clean_result(result) # sort and name TPs pathway_info = upload_envipath_pathway(eP, result_list, pkg) - print_result(outfile, result_list, pathway_info) # continuous writing of result file + print_TP_file(outfile_TP, result_list, pathway_info) # continuous writing of result file + if PRINT_REACTION_FILE: + rule_dict = print_reaction_file(outfile_reaction, result_list, rule_dict, pathway_info) -def print_result(open_file, result, pathway = None): +def print_TP_file(open_TP_file, result, pathway = None): """ Prints output to open file :param open_file: writable file object :param result: clean result dictionary from predict_TPs() function :param pathway: pathway URI from upload_envipath_pathway() function """ - open_file.write('///') # signifies new pathway entry + open_TP_file.write('///') # signifies new pathway entry if pathway: - open_file.write(' Pathway name: {}, Pathway id: {}'.format(pathway['name'], pathway['id'])) - open_file.write('\n') + open_TP_file.write(' Pathway name: {}, Pathway id: {}'.format(pathway['name'], pathway['id'])) + open_TP_file.write('\n') header= ('SMILES\tname\tcombined_probability\trules\tgeneration\tprobability\tparent\n') - open_file.write(header) + open_TP_file.write(header) for TP in result: - open_file.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(TP['smiles'], + open_TP_file.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(TP['smiles'], TP['name'], TP['combined_probability'], TP['rules'], + TP['rule_IDs'], TP['generation'], TP['probability'], - TP['parent']) + TP['parent_smiles']) ) +def print_reaction_file(open_file, result, rule_dict, pathway=None): + open_file.write('///') # signifies new pathway entry + if pathway: + open_file.write(' Pathway name: {}, Pathway id: {}'.format(pathway['name'], pathway['id'])) + open_file.write('\n') + header= ('substrate_name\tproduct_name\tsubstrate_SMILES\tproduct_SMILES\trules\tEC_numbers\tgeneration\tcombined_probability\tprobability\n') + open_file.write(header) + for TP in result[1:]: # ignore first line which is parent + EC_list, rule_dict = get_EC_list(TP, rule_dict) + open_file.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( + TP['parent_name'], + TP['name'], + TP['parent_smiles'], + TP['smiles'], + TP['rules'], + ','.join(EC_list), + TP['generation'], + TP['combined_probability'], + TP['probability'])) + return rule_dict + +def get_EC_list(TP, rule_dict): + rule_id_list = TP['rule_IDs'].split(',') + EC_list = [] + for rule_id in rule_id_list: + if not rule_dict.get(rule_id): + ec_list_for_rule = [] + rule = ParallelCompositeRule(eP.requester, id = rule_id) + try: + rule_ECs = rule._get('ecNumbers') + except: + print('Warning: JsonDecodeError for ', rule_id) + else: + for ec in rule_ECs: + if 'KEGG' in ec['linkingMethod']: + ec_list_for_rule.append(ec['ecNumber']) + rule_dict[rule_id] = ec_list_for_rule + else: + ec_list_for_rule = rule_dict[rule_id] + + EC_list.extend(ec_list_for_rule) + return EC_list, rule_dict + + def predict_TPs(input_smiles, input_name, rr): """ Pathway prediction for single compound :param input_smiles: input smiles of parent compound :param input_name: name of parent compound :param rr: relative reasoning object :return: dictionary of resulting TPs """ print('\n### PREDICT TPs FOR COMPOUND {} ###\n'.format(input_name)) num_TP = -1 # counter starts at -1, because source compound is also in the TP list validated_TPs = {} # container for resulting predictions - queued_items = [{'probability': 1, 'combined_probability': 1, 'smiles': input_smiles, 'generation': 0, 'parent': '', - 'rules': '', 'name': input_name, 'size': len(input_smiles)}] + queued_items = [{'probability': 1, 'combined_probability': 1, 'smiles': input_smiles, 'generation': 0, 'parent_smiles': '', + 'rules': '', 'rule_IDs': '', 'name': input_name, 'size': len(input_smiles)}] queue = [input_smiles] # queue is updated after each cycle to have top TP first, list of smiles while num_TP < MAX_TP: if len(queue) == 0: print('\nEmpty queue - The exploration of has converged at {} predicted TPs'.format(num_TP)) return validated_TPs # stop TP prediction smiles = queue.pop(0) # get top item in queue data = queued_items.pop(0) # remove data from queued items result_list = expand_smiles(smiles, rr) # create children TP_dict = result_to_compound_dict(result_list) queue, queued_items, validated_TPs = update_queue(queue, queued_items, validated_TPs, TP_dict, data) validated_TPs[smiles] = data num_TP += 1 return validated_TPs def update_queue(_queue,_queued_items, _validated_TPs, _TPs, _parent_data): """ Update queue with TPs predicted in current iteration :param _queue: ordered list of smiles to explore :param _queued_items: ordered list of compound dictionaries, same order as _queue :param _validated_TPs: list of already validated TPs for resulting pathway :param _TPs: predicted TPs from current iteration, to be evaluated and added to queue :param _parent_data: compound dictionary of the parent compound of _TPs :return: new_queue: new ordered list of smiles to explore :return _queued_items: new ordered list of compound dictionaries :return: _validated_TPs: updated list of already validated TPs """ parent_probability = _parent_data['combined_probability'] parent_generation = _parent_data['generation'] parent_smiles = _parent_data['smiles'] queue_before = len(_queue) for smiles in _TPs: data = _TPs[smiles] # If the probability is 0 , we don't consider the TP further this_probability = data['probability'] if this_probability <= PROBABILITY_THRESHOLD: continue # If a moiety is given and it is not in SMILES, we don't follow the TP further if MOIETY not in smiles: continue if FOLLOW_LABELED_ATOM and ATOM_LABEL not in smiles: continue if INCLUDE_0_PROBABILITIES and this_probability == 0: this_probability = 0.01 # add combined probability this_combined_probability = parent_probability * this_probability this_generation = parent_generation + 1 rules = data['rules'] + rule_IDs = data['rule_IDs'] # first, check if compound already in validated. if yes, update if smiles in _validated_TPs.keys(): _validated_TPs[smiles] = update_compound_entry(_validated_TPs[smiles], - this_combined_probability, rules, + this_combined_probability, rules, rule_IDs, this_generation, parent_smiles, size_metric='size', size_value=len(smiles)) # next, check if compound is already in queue. if yes, update elif smiles in _queue: index = _queue.index(smiles) assert smiles == _queued_items[index]['smiles'], \ 'smiles {} does not match smiles in {}'.format(smiles, _queued_items[index]) _queued_items[index] = update_compound_entry(_queued_items[index], - this_combined_probability, rules, + this_combined_probability, rules, rule_IDs, this_generation, parent_smiles, size_metric='size', size_value=len(smiles)) # else, add new item to queue else: data['combined_probability'] = this_combined_probability data['generation'] = this_generation - data['parent'] = parent_smiles + data['parent_smiles'] = parent_smiles data['carbon_count'] = smiles.upper().count('C') _queued_items.append(data) _queue.append(smiles) assert len(_queued_items) == len(_queue) # First sort by size if SORT_TPS_BY_SIZE: _queued_items.sort(reverse=False, key=lambda x: x['size']) # order dict by combined probability _queued_items.sort(reverse=True, key=lambda x: x['combined_probability']) queue_after = len(_queue) print('Added {} smiles to queue'.format(queue_after - queue_before)) new_queue = [] # resetting queue [new_queue.append(x['smiles']) for x in _queued_items] print ('New queue for compound', parent_smiles) for q in new_queue: print(q, _queued_items[new_queue.index(q)]['combined_probability']) return new_queue, _queued_items, _validated_TPs #---------------------------# # MAIN # #---------------------------# __main__(rr_id = EP_MODEL_ID, pkg_id = EP_PACKAGE_ID, tag = OUTPUT_FILE_TAG) diff --git a/TP_prediction/output/TP_prediction_TEST_Sema_20.tsv b/TP_prediction/output/TP_prediction_TEST_Sema_20.tsv new file mode 100644 index 0000000..8cf9a76 --- /dev/null +++ b/TP_prediction/output/TP_prediction_TEST_Sema_20.tsv @@ -0,0 +1,58 @@ +/// Pathway name: DIU, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/a5500e06-4e48-4e6f-9266-c308e1cbd219 +SMILES name combined_probability rules generation probability parent +CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl DIU 1 0 1 +C1=C(C=C(C(=C1)Cl)Cl)NC(=O)[O-] TP_DIU_1 0.3 bt0068 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/17bd0bd9-4874-4cce-b829-d0cf7ca3a52e 1 0.3 +CN(C)C(=O)[O-] TP_DIU_2 0.3 bt0068 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/17bd0bd9-4874-4cce-b829-d0cf7ca3a52e 1 0.3 +CNC TP_DIU_3 0.3 bt0068 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/17bd0bd9-4874-4cce-b829-d0cf7ca3a52e 1 0.3 +C1=C(C=C(C(=C1)Cl)Cl)N TP_DIU_4 0.3 bt0068 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/17bd0bd9-4874-4cce-b829-d0cf7ca3a52e 1 0.3 +CNC(=O)NC1=CC=C(C(=C1)Cl)Cl TP_DIU_5 0.2 bt0243 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/03de91cf-fda9-4be6-a822-c2aa75be89a8 1 0.2 +CN(C)C(=O)NC1=CC=C(C=C1)Cl TP_DIU_6 0.1 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 1 0.1 +CN(C)C(=O)NC1=CC(=CC=C1)Cl TP_DIU_7 0.1 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 1 0.1 +C=O TP_DIU_8 0.27 bt0063 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/fbbba937-dd70-43a4-be8c-7de8fcc0f0df 2 0.2 +CN TP_DIU_9 0.27 bt0063 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/fbbba937-dd70-43a4-be8c-7de8fcc0f0df 2 0.9 +C1=C(C(=CC(=C1Cl)Cl)O)O TP_DIU_10 0.06 bt0065,bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02,https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 2 0.2 +C1=C(C(=C(C(=C1)O)O)Cl)Cl TP_DIU_11 0.06 bt0065,bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02,https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 2 0.2 +C(=O)(N)[O-] TP_DIU_12 0.06 bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 2 0.2 +C1=C(C=CC(=C1)N)Cl TP_DIU_13 0.06 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 2 0.2 +C1=CC(=CC(=C1)Cl)N TP_DIU_14 0.06 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 2 0.2 +CNC(=O)[O-] TP_DIU_15 0.06 bt0068 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/17bd0bd9-4874-4cce-b829-d0cf7ca3a52e 2 0.3 +C1=C(C=C(C(=C1)Cl)Cl)NC(=O)N TP_DIU_16 0.04000000000000001 bt0243 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/03de91cf-fda9-4be6-a822-c2aa75be89a8 2 0.2 +C1=C(C=CC(=C1)NC(=O)[O-])Cl TP_DIU_17 0.03 bt0068 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/17bd0bd9-4874-4cce-b829-d0cf7ca3a52e 2 0.3 +N TP_DIU_18 0.162 bt0063 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/fbbba937-dd70-43a4-be8c-7de8fcc0f0df 3 0.2 +C(=C(/C(=C\C(=O)[O-])/Cl)\Cl)/C(=O)[O-] TP_DIU_19 0.06 bt0254 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5f07518c-501f-4e70-bb0a-1bca4f34ebb1 3 1.0 +C(=C/C(=O)[O-])/C(=C(\C(=O)[O-])/Cl)/Cl TP_DIU_20 0.06 bt0254 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5f07518c-501f-4e70-bb0a-1bca4f34ebb1 3 1.0 +/// Pathway name: BRO, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/f451272d-580f-47e8-99e3-54b72944fc6d +SMILES name combined_probability rules generation probability parent +CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C#N)Br BRO 1 0 1 +CCCCCCCC(=O)[O-] TP_BRO_1 0.6 bt0024 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b2ca8622-d481-4f9f-95d5-dd8597083fb5 1 0.6 +C1=C(C(=C(C=C1C#N)Br)O)Br TP_BRO_2 0.6 bt0024 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b2ca8622-d481-4f9f-95d5-dd8597083fb5 1 0.6 +CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C(=O)[O-])Br TP_BRO_3 0.4 bt0030 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5e466030-4312-48a5-bb56-a505dba93eaa 1 0.4 +CCCCCCCC(=O)OC1=C(C=C(C=C1Br)O)Br TP_BRO_4 0.1 bt0031 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/c1704b66-0fe7-4131-88f6-ccdb31f427f2 1 0.1 +CCCCCCCC(=O)OC1=CC=C(C=C1Br)C#N TP_BRO_5 0.1 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 1 0.1 +C1=C(C(=C(C=C1C(=O)[O-])Br)O)Br TP_BRO_6 0.24 bt0030,bt0024 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5e466030-4312-48a5-bb56-a505dba93eaa,https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b2ca8622-d481-4f9f-95d5-dd8597083fb5 2 0.4 +C1=C(C(=C(C=C1O)Br)O)Br TP_BRO_7 0.06 bt0031,bt0024 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/c1704b66-0fe7-4131-88f6-ccdb31f427f2,https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b2ca8622-d481-4f9f-95d5-dd8597083fb5 2 0.1 +CCCCCCCC(=O)OC1=C(C=CC=C1Br)Br TP_BRO_8 0.04000000000000001 bt0051 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/2339e8b3-c9c6-40fd-b89a-aad5693baa18 2 0.1 +CCCCCCCC(=O)OC1=CC=C(C=C1Br)C(=O)[O-] TP_BRO_9 0.04000000000000001 bt0029,bt0030 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4,https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5e466030-4312-48a5-bb56-a505dba93eaa 2 0.1 +C1=C(C=C(C(=C1)O)Br)C#N TP_BRO_10 0.03 bt0024 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b2ca8622-d481-4f9f-95d5-dd8597083fb5 2 0.3 +CCCCCCCC(=O)OC1=CC=C(C=C1Br)O TP_BRO_11 0.010000000000000002 bt0029,bt0031 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4,https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/c1704b66-0fe7-4131-88f6-ccdb31f427f2 2 0.1 +CCCCCCCC(=O)OC1=CC=C(C=C1)C#N TP_BRO_12 0.010000000000000002 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 2 0.1 +C(=C(/C(=O)[O-])\Br)/C(=O)CC(=O)[O-] TP_BRO_13 0.03899999999999999 bt0357 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/0c79d13c-dca5-42a7-bb4e-3e2201f27042 3 0.6499999999999999 +C1=CC(=C(C(=C1)Br)O)Br TP_BRO_14 0.024 bt0051 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/2339e8b3-c9c6-40fd-b89a-aad5693baa18 3 0.1 +C1=C(C=C(C(=C1)O)Br)C(=O)[O-] TP_BRO_15 0.012000000000000002 bt0024 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b2ca8622-d481-4f9f-95d5-dd8597083fb5 3 0.3 +C1=C(C=C(C(=C1)O)Br)O TP_BRO_16 0.012 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 3 0.2 +C1=C(C(=C(C(=C1Br)O)Br)O)O TP_BRO_17 0.006 bt0014 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/d06a4cce-fc80-4d05-92df-cbd0df7b1cb8 3 0.1 +CCCCCCCC(=O)OC1=C(C=CC=C1)Br TP_BRO_18 0.004000000000000001 bt0029,bt0051 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4,https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/2339e8b3-c9c6-40fd-b89a-aad5693baa18 3 0.1 +C(=C/C(=O)[O-])/C(=O)CC(=O)[O-] TP_BRO_19 0.011699999999999997 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 4 0.3 +C(=C(/C(=C(\C(=O)[O-])/Br)/O)\Br)/C(=O)[O-] TP_BRO_20 0.0042 bt0254 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5f07518c-501f-4e70-bb0a-1bca4f34ebb1 4 0.7 +/// Pathway name: CAR, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/70ca701c-55e4-41a6-92c1-d40d4947ad1f +SMILES name combined_probability rules generation probability parent +CCNC(=O)C(C)OC(=O)NC1=CC=CC=C1 CAR 1 0 1 +C1=CC(=C(C=C1)O)O TP_CAR_1 0.4 bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 1 0.4 +CCNC(=O)C(C)OC(=O)N TP_CAR_2 0.4 bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 1 0.4 +C1=CC=C(C=C1)N TP_CAR_3 0.2 bt0318 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/66d5b8da-a61a-43f1-b49f-3da124c0095a 1 0.2 +CCNC(=O)C(C)OC(=O)[O-] TP_CAR_4 0.2 bt0318 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/66d5b8da-a61a-43f1-b49f-3da124c0095a 1 0.2 +C(=C/C(=O)[O-])/C=C\C(=O)[O-] TP_CAR_5 0.27999999999999997 bt0254 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5f07518c-501f-4e70-bb0a-1bca4f34ebb1 2 0.7 +N TP_CAR_6 0.06 bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 2 0.3 +/// Pathway name: IBU, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/0bb2737a-1eb0-4917-9230-75ec4e0475d6 +SMILES name combined_probability rules generation probability parent +CC(C)CC1=CC=C(C=C1)C(C)C(=O)[O-] IBU 1 0 1 diff --git a/TP_prediction/output/TP_prediction_TEST_top_2.tsv b/TP_prediction/output/TP_prediction_TEST_top_2.tsv new file mode 100644 index 0000000..e69de29 diff --git a/TP_prediction/output/TP_prediction_TEST_top_20.tsv b/TP_prediction/output/TP_prediction_TEST_top_20.tsv new file mode 100644 index 0000000..62c89f4 --- /dev/null +++ b/TP_prediction/output/TP_prediction_TEST_top_20.tsv @@ -0,0 +1,39 @@ +/// Pathway name: DIC, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/d5164686-4c13-4107-8159-d1c0907aac3d +SMILES name combined_probability rules generation probability parent +C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl DIC 1 0 1 +C1=CC(=C(C(=C1)Cl)N)Cl TP_DIC_1 0.6 bt0374 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/8ae1cca7-9c9d-4ce5-97a3-863196991f51 1 0.6 +C1=CC(=C(C(=C1)CC(=O)O)O)O TP_DIC_2 0.6 bt0374 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/8ae1cca7-9c9d-4ce5-97a3-863196991f51 1 0.6 +C1=CC(=C(C=C1)N)Cl TP_DIC_3 0.12 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 2 0.2 +C1=CC=C(C=C1)N TP_DIC_4 0.024 bt0029 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af13a678-1d06-402c-ae13-cb00f8830af4 3 0.2 +C1=CC(=C(C(=C1)Cl)O)O TP_DIC_5 0.024 bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 3 0.2 +N TP_DIC_6 0.024 bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 3 0.2 +C(=C/C(=O)[O-])/C=C(\C(=O)[O-])/Cl TP_DIC_7 0.0216 bt0254 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5f07518c-501f-4e70-bb0a-1bca4f34ebb1 4 0.9 +C1=CC(=C(C=C1)O)O TP_DIC_8 0.0072 bt0065 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/a3f8dcda-b54c-40a4-b0b4-b9fc1b15dd02 4 0.3 +C(=C/C(=O)[O-])/C=C\C(=O)[O-] TP_DIC_9 0.005039999999999999 bt0254 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/5f07518c-501f-4e70-bb0a-1bca4f34ebb1 5 0.7 +C/1=CC(=O)O\C1=C/C(=O)[O-] TP_DIC_10 0.00432 bt0181 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/85e6246d-4a80-4c93-bf18-906970d5251c 5 0.2 +C(=C/C(=O)[O-])/C(=O)CC(=O)[O-] TP_DIC_11 0.00432 bt0313 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/28a15491-30bc-465e-bf34-a4d03bc75e40 6 1.0 +C(=CC(=O)[O-])/C(=C/C(=O)[O-])/O TP_DIC_12 0.003888 bt0024 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b2ca8622-d481-4f9f-95d5-dd8597083fb5 6 0.9 +C(CC(=O)[O-])C(=O)CC(=O)[O-] TP_DIC_13 0.00043200000000000004 bt0291 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/6e0e8a5f-09e2-42d9-943e-6d4a823bfcf4 7 0.1 +/// Pathway name: ATR, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/cfdfe459-6c5d-413b-a905-545ea3000404 +SMILES name combined_probability rules generation probability parent +CCNC1=NC(=NC(=N1)NC(C)C)Cl ATR 1 0 1 +CC(=O)C TP_ATR_1 0.4 bt0339 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b330946e-f8a9-4860-86c5-1953f230c837 1 0.4 +CC(C)NC1=NC(=NC(=N1)Cl)N TP_ATR_2 0.4 bt0339 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b330946e-f8a9-4860-86c5-1953f230c837 1 0.4 +CCNC1=NC(=NC(=N1)Cl)N TP_ATR_3 0.4 bt0339 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b330946e-f8a9-4860-86c5-1953f230c837 1 0.4 +CC=O TP_ATR_4 0.4 bt0339 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b330946e-f8a9-4860-86c5-1953f230c837 1 0.4 +CCNC1=NC(=NC(=N1)NC(C)C)O TP_ATR_5 0.4 bt0330 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af745278-4d2a-44bf-b74d-91e4716c8ce1 1 0.4 +CCNC1=NC(=NC(=N1)O)N TP_ATR_6 0.32000000000000006 bt0330 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af745278-4d2a-44bf-b74d-91e4716c8ce1 2 0.8 +CCNC1=NC(=NC(=N1)O)Cl TP_ATR_7 0.32000000000000006 bt0330 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af745278-4d2a-44bf-b74d-91e4716c8ce1 2 0.8 +CC(C)NC1=NC(=NC(=N1)O)O TP_ATR_8 0.30000000000000004 bt0061 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/021d3098-b5af-4083-ba0d-029d9c18a299 2 0.75 +CC(C)N TP_ATR_9 0.30000000000000004 bt0061 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/021d3098-b5af-4083-ba0d-029d9c18a299 2 0.75 +CCN TP_ATR_10 0.30000000000000004 bt0061 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/021d3098-b5af-4083-ba0d-029d9c18a299 2 0.75 +CC(=O)[O-] TP_ATR_11 0.2 bt0003 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/f64df014-1e44-484b-b0fc-af35a7425f7a 2 0.5 +C1(=NC(=NC(=N1)N)N)Cl TP_ATR_12 0.12 bt0339 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/b330946e-f8a9-4860-86c5-1953f230c837 2 0.3 +CC(C)NC1=NC(=NC(=N1)O)Cl TP_ATR_13 0.12 bt0330 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af745278-4d2a-44bf-b74d-91e4716c8ce1 2 0.3 +CC(C)NC1=NC(=NC(=N1)O)N TP_ATR_14 0.12 bt0330 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af745278-4d2a-44bf-b74d-91e4716c8ce1 2 0.3 +CCNC1=NC(=NC(=N1)O)O TP_ATR_15 0.32000000000000006 bt0330 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af745278-4d2a-44bf-b74d-91e4716c8ce1 3 0.75 +N TP_ATR_16 0.15000000000000002 bt0063 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/fbbba937-dd70-43a4-be8c-7de8fcc0f0df 3 0.1 +C1(=NC(=NC(=N1)N)O)Cl TP_ATR_17 0.08399999999999999 bt0330 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/af745278-4d2a-44bf-b74d-91e4716c8ce1 3 0.05 +C1(=NC(=NC(=N1)O)O)N TP_ATR_18 0.08000000000000002 bt0061 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/021d3098-b5af-4083-ba0d-029d9c18a299 3 0.25 +C1(=NC(=NC(=N1)O)O)Cl TP_ATR_19 0.08000000000000002 bt0061 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/021d3098-b5af-4083-ba0d-029d9c18a299 3 0.25 +C1(=NC(=NC(=N1)O)O)O TP_ATR_20 0.24000000000000005 bt0061 https://envipath.org/package/32de3cf4-e3e6-4168-956e-32fa5ddb0ce1/parallel-rule/021d3098-b5af-4083-ba0d-029d9c18a299 4 0.75 diff --git a/TP_prediction/output/TP_prediction_TEST_top_3.tsv b/TP_prediction/output/TP_prediction_TEST_top_3.tsv new file mode 100644 index 0000000..e69de29 diff --git a/TP_prediction/output/reaction_prediction_Sema_top_20.tsv b/TP_prediction/output/reaction_prediction_Sema_top_20.tsv new file mode 100644 index 0000000..a13a054 --- /dev/null +++ b/TP_prediction/output/reaction_prediction_Sema_top_20.tsv @@ -0,0 +1,91 @@ +/// Pathway name: DIC, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/d5164686-4c13-4107-8159-d1c0907aac3d +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +DIC TP_DIC_1 C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl C1=CC(=C(C(=C1)Cl)N)Cl bt0374 1.14.12.22 1 0.6 0.6 +DIC TP_DIC_2 C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl C1=CC(=C(C(=C1)CC(=O)O)O)O bt0374 1.14.12.22 1 0.6 0.6 +TP_DIC_1 TP_DIC_3 C1=CC(=C(C(=C1)Cl)N)Cl C1=CC(=C(C=C1)N)Cl bt0029 1.21.99.5,1.11.1.8,1.3.1.32 2 0.12 0.2 +TP_DIC_3 TP_DIC_4 C1=CC(=C(C=C1)N)Cl C1=CC=C(C=C1)N bt0029 1.21.99.5,1.11.1.8,1.3.1.32 3 0.024 0.2 +TP_DIC_3 TP_DIC_5 C1=CC(=C(C=C1)N)Cl C1=CC(=C(C(=C1)Cl)O)O bt0065 1.14.13.35,1.14.12.14 3 0.024 0.2 +TP_DIC_3 TP_DIC_6 C1=CC(=C(C=C1)N)Cl N bt0065 1.14.13.35,1.14.12.14 3 0.024 0.2 +TP_DIC_5 TP_DIC_7 C1=CC(=C(C(=C1)Cl)O)O C(=C/C(=O)[O-])/C=C(\C(=O)[O-])/Cl bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 4 0.0216 0.9 +TP_DIC_4 TP_DIC_8 C1=CC=C(C=C1)N C1=CC(=C(C=C1)O)O bt0065 1.14.13.35,1.14.12.14 4 0.0072 0.3 +TP_DIC_8 TP_DIC_9 C1=CC(=C(C=C1)O)O C(=C/C(=O)[O-])/C=C\C(=O)[O-] bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 5 0.005039999999999999 0.7 +TP_DIC_7 TP_DIC_10 C(=C/C(=O)[O-])/C=C(\C(=O)[O-])/Cl C/1=CC(=O)O\C1=C/C(=O)[O-] bt0181 5.5.1.7 5 0.00432 0.2 +TP_DIC_10 TP_DIC_11 C/1=CC(=O)O\C1=C/C(=O)[O-] C(=C/C(=O)[O-])/C(=O)CC(=O)[O-] bt0313 6 0.00432 1.0 +TP_DIC_10 TP_DIC_12 C/1=CC(=O)O\C1=C/C(=O)[O-] C(=CC(=O)[O-])/C(=C/C(=O)[O-])/O bt0024 3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 6 0.003888 0.9 +TP_DIC_11 TP_DIC_13 C(=C/C(=O)[O-])/C(=O)CC(=O)[O-] C(CC(=O)[O-])C(=O)CC(=O)[O-] bt0291 1.3.1.106,1.3.1.82,1.3.1.32,1.3.1.22,1.3.1.122,1.3.1.21,1.3.1.93,1.3.1.81,1.3.1.38,1.3.1.75,1.3.1.8,1.3.1.70,1.3.1.72,1.1.1.384 7 0.00043200000000000004 0.1 +/// Pathway name: ATR, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/cfdfe459-6c5d-413b-a905-545ea3000404 +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +ATR TP_ATR_1 CCNC1=NC(=NC(=N1)NC(C)C)Cl CC(=O)C bt0339 1 0.4 0.4 +ATR TP_ATR_2 CCNC1=NC(=NC(=N1)NC(C)C)Cl CC(C)NC1=NC(=NC(=N1)Cl)N bt0339 1 0.4 0.4 +ATR TP_ATR_3 CCNC1=NC(=NC(=N1)NC(C)C)Cl CCNC1=NC(=NC(=N1)Cl)N bt0339 1 0.4 0.4 +ATR TP_ATR_4 CCNC1=NC(=NC(=N1)NC(C)C)Cl CC=O bt0339 1 0.4 0.4 +ATR TP_ATR_5 CCNC1=NC(=NC(=N1)NC(C)C)Cl CCNC1=NC(=NC(=N1)NC(C)C)O bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 1 0.4 0.4 +TP_ATR_3 TP_ATR_6 CCNC1=NC(=NC(=N1)Cl)N CCNC1=NC(=NC(=N1)O)N bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.32000000000000006 0.8 +TP_ATR_3 TP_ATR_7 CCNC1=NC(=NC(=N1)Cl)N CCNC1=NC(=NC(=N1)O)Cl bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.32000000000000006 0.8 +TP_ATR_5 TP_ATR_8 CCNC1=NC(=NC(=N1)NC(C)C)O CC(C)NC1=NC(=NC(=N1)O)O bt0061 3.5.4.43,3.5.4.42 2 0.30000000000000004 0.75 +TP_ATR_5 TP_ATR_9 CCNC1=NC(=NC(=N1)NC(C)C)O CC(C)N bt0061 3.5.4.43,3.5.4.42 2 0.30000000000000004 0.75 +TP_ATR_5 TP_ATR_10 CCNC1=NC(=NC(=N1)NC(C)C)O CCN bt0061 3.5.4.43,3.5.4.42 2 0.30000000000000004 0.75 +TP_ATR_4 TP_ATR_11 CC=O CC(=O)[O-] bt0003 1.2.1.57,1.2.1.22,1.2.1.32,1.2.1.79,1.2.1.28,1.2.1.33,1.2.1.10,1.2.1.62,1.2.1.90,1.2.1.87,1.2.1.73,1.2.1.99,1.2.1.26,1.2.1.4,1.2.1.75,1.2.1.78,1.2.3.8,1.2.1.77,1.2.1.63,1.2.1.8,1.2.1.102,1.2.3.7,1.2.1.54,1.2.1.48,1.2.1.5,1.2.1.71,1.2.1.23,1.2.1.3,1.2.1.24,1.2.1.68,1.2.3.15,1.2.1.39,1.2.1.82,1.2.3.1,1.2.1.16,1.2.1.96,1.1.1.312,1.1.3.39,1.2.1.20,1.2.3.9,1.1.3.23,1.2.1.36,1.2.3.5,1.2.1.85,1.2.1.9,1.2.1.31,1.2.1.47,1.1.3.17,1.2.1.15,1.2.1.44,1.2.1.81,1.2.1.65,1.2.3.14,1.2.1.7,1.2.1.98,1.14.13.246,1.2.1.17,1.2.1.91,1.2.1.29,1.2.1.19,1.2.1.97,1.2.1.69,1.2.1.64,1.2.1.53,1.2.1.83,1.1.1.23,1.2.1.89,1.2.1.88,1.2.1.67,1.2.1.74,1.2.1.86,1.2.1.42,1.2.1.21 2 0.2 0.5 +TP_ATR_2 TP_ATR_12 CC(C)NC1=NC(=NC(=N1)Cl)N C1(=NC(=NC(=N1)N)N)Cl bt0339 2 0.12 0.3 +TP_ATR_2 TP_ATR_13 CC(C)NC1=NC(=NC(=N1)Cl)N CC(C)NC1=NC(=NC(=N1)O)Cl bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.12 0.3 +TP_ATR_2 TP_ATR_14 CC(C)NC1=NC(=NC(=N1)Cl)N CC(C)NC1=NC(=NC(=N1)O)N bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.12 0.3 +TP_ATR_6 TP_ATR_15 CCNC1=NC(=NC(=N1)O)N CCNC1=NC(=NC(=N1)O)O bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 3 0.32000000000000006 0.75 +TP_ATR_10 TP_ATR_16 CCN N bt0063 1.4.1.9,1.4.1.3,1.4.3.1,1.5.3.19,1.4.1.1,1.4.3.7,1.4.1.12,1.5.1.10,1.4.1.17,1.5.1.28,1.4.3.12,1.4.1.18,1.5.3.16,1.4.3.5,1.4.3.14,1.4.1.26,1.5.3.5,1.5.3.13,1.4.1.10,1.4.1.21,1.5.1.19,1.4.1.8,1.5.3.6,1.4.3.4,1.4.3.24,1.5.3.1,1.5.1.22,1.4.1.13,1.5.3.18,1.4.3.8,1.5.3.21,1.4.1.4,1.4.3.11,1.4.1.23,1.4.3.20,1.5.1.23,1.4.3.16,1.5.1.24,1.5.1.11,1.14.13.178,1.5.1.16,1.5.3.4,1.4.3.25,1.4.3.3,1.4.1.11,1.5.3.17,1.4.1.14,1.4.3.19,1.4.1.7,1.5.1.8,1.4.1.16,1.14.13.128,1.5.3.10,1.4.1.2,1.5.3.15,1.4.3.22,1.5.1.51,1.4.3.10,1.4.1.19,1.5.1.26,1.14.13.239,1.5.1.7,1.14.14.1,1.4.3.15,1.4.1.20,1.4.3.21,1.14.13.179,1.4.3.2,1.14.13.238,1.5.1.52,1.5.1.9 3 0.15000000000000002 0.1 +TP_ATR_12 TP_ATR_17 C1(=NC(=NC(=N1)N)N)Cl C1(=NC(=NC(=N1)N)O)Cl bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 3 0.08399999999999999 0.05 +TP_ATR_6 TP_ATR_18 CCNC1=NC(=NC(=N1)O)N C1(=NC(=NC(=N1)O)O)N bt0061 3.5.4.43,3.5.4.42 3 0.08000000000000002 0.25 +TP_ATR_7 TP_ATR_19 CCNC1=NC(=NC(=N1)O)Cl C1(=NC(=NC(=N1)O)O)Cl bt0061 3.5.4.43,3.5.4.42 3 0.08000000000000002 0.25 +TP_ATR_15 TP_ATR_20 CCNC1=NC(=NC(=N1)O)O C1(=NC(=NC(=N1)O)O)O bt0061 3.5.4.43,3.5.4.42 4 0.24000000000000005 0.75 +/// Pathway name: DIU, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/a5500e06-4e48-4e6f-9266-c308e1cbd219 +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +DIU TP_DIU_1 CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl C1=C(C=C(C(=C1)Cl)Cl)NC(=O)[O-] bt0068 3.5.1.131,3.5.1.110,3.5.3.9,3.5.1.20,3.5.1.84,3.5.1.59,3.5.3.21,3.5.1.7,3.5.1.53,3.5.1.6 1 0.3 0.3 +DIU TP_DIU_2 CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl CN(C)C(=O)[O-] bt0068 3.5.1.131,3.5.1.110,3.5.3.9,3.5.1.20,3.5.1.84,3.5.1.59,3.5.3.21,3.5.1.7,3.5.1.53,3.5.1.6 1 0.3 0.3 +DIU TP_DIU_3 CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl CNC bt0068 3.5.1.131,3.5.1.110,3.5.3.9,3.5.1.20,3.5.1.84,3.5.1.59,3.5.3.21,3.5.1.7,3.5.1.53,3.5.1.6 1 0.3 0.3 +DIU TP_DIU_4 CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl C1=C(C=C(C(=C1)Cl)Cl)N bt0068 3.5.1.131,3.5.1.110,3.5.3.9,3.5.1.20,3.5.1.84,3.5.1.59,3.5.3.21,3.5.1.7,3.5.1.53,3.5.1.6 1 0.3 0.3 +DIU TP_DIU_5 CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl CNC(=O)NC1=CC=C(C(=C1)Cl)Cl bt0243 1.14.13.179,1.14.13.178 1 0.2 0.2 +DIU TP_DIU_6 CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl CN(C)C(=O)NC1=CC=C(C=C1)Cl bt0029 1.21.99.5,1.11.1.8,1.3.1.32 1 0.1 0.1 +DIU TP_DIU_7 CN(C)C(=O)NC1=CC=C(C(=C1)Cl)Cl CN(C)C(=O)NC1=CC(=CC=C1)Cl bt0029 1.21.99.5,1.11.1.8,1.3.1.32 1 0.1 0.1 +TP_DIU_3 TP_DIU_8 CNC C=O bt0063 1.4.1.9,1.4.1.3,1.4.3.1,1.5.3.19,1.4.1.1,1.4.3.7,1.4.1.12,1.5.1.10,1.4.1.17,1.5.1.28,1.4.3.12,1.4.1.18,1.5.3.16,1.4.3.5,1.4.3.14,1.4.1.26,1.5.3.5,1.5.3.13,1.4.1.10,1.4.1.21,1.5.1.19,1.4.1.8,1.5.3.6,1.4.3.4,1.4.3.24,1.5.3.1,1.5.1.22,1.4.1.13,1.5.3.18,1.4.3.8,1.5.3.21,1.4.1.4,1.4.3.11,1.4.1.23,1.4.3.20,1.5.1.23,1.4.3.16,1.5.1.24,1.5.1.11,1.14.13.178,1.5.1.16,1.5.3.4,1.4.3.25,1.4.3.3,1.4.1.11,1.5.3.17,1.4.1.14,1.4.3.19,1.4.1.7,1.5.1.8,1.4.1.16,1.14.13.128,1.5.3.10,1.4.1.2,1.5.3.15,1.4.3.22,1.5.1.51,1.4.3.10,1.4.1.19,1.5.1.26,1.14.13.239,1.5.1.7,1.14.14.1,1.4.3.15,1.4.1.20,1.4.3.21,1.14.13.179,1.4.3.2,1.14.13.238,1.5.1.52,1.5.1.9 2 0.27 0.2 +TP_DIU_3 TP_DIU_9 CNC CN bt0063 1.4.1.9,1.4.1.3,1.4.3.1,1.5.3.19,1.4.1.1,1.4.3.7,1.4.1.12,1.5.1.10,1.4.1.17,1.5.1.28,1.4.3.12,1.4.1.18,1.5.3.16,1.4.3.5,1.4.3.14,1.4.1.26,1.5.3.5,1.5.3.13,1.4.1.10,1.4.1.21,1.5.1.19,1.4.1.8,1.5.3.6,1.4.3.4,1.4.3.24,1.5.3.1,1.5.1.22,1.4.1.13,1.5.3.18,1.4.3.8,1.5.3.21,1.4.1.4,1.4.3.11,1.4.1.23,1.4.3.20,1.5.1.23,1.4.3.16,1.5.1.24,1.5.1.11,1.14.13.178,1.5.1.16,1.5.3.4,1.4.3.25,1.4.3.3,1.4.1.11,1.5.3.17,1.4.1.14,1.4.3.19,1.4.1.7,1.5.1.8,1.4.1.16,1.14.13.128,1.5.3.10,1.4.1.2,1.5.3.15,1.4.3.22,1.5.1.51,1.4.3.10,1.4.1.19,1.5.1.26,1.14.13.239,1.5.1.7,1.14.14.1,1.4.3.15,1.4.1.20,1.4.3.21,1.14.13.179,1.4.3.2,1.14.13.238,1.5.1.52,1.5.1.9 2 0.27 0.9 +TP_DIU_4 TP_DIU_10 C1=C(C=C(C(=C1)Cl)Cl)NC(=O)[O-],C1=C(C=C(C(=C1)Cl)Cl)N C1=C(C(=CC(=C1Cl)Cl)O)O bt0065,bt0065 1.14.13.35,1.14.12.14,1.14.13.35,1.14.12.14 2 0.06 0.2 +TP_DIU_4 TP_DIU_11 C1=C(C=C(C(=C1)Cl)Cl)NC(=O)[O-],C1=C(C=C(C(=C1)Cl)Cl)N C1=C(C(=C(C(=C1)O)O)Cl)Cl bt0065,bt0065 1.14.13.35,1.14.12.14,1.14.13.35,1.14.12.14 2 0.06 0.2 +TP_DIU_1 TP_DIU_12 C1=C(C=C(C(=C1)Cl)Cl)NC(=O)[O-] C(=O)(N)[O-] bt0065 1.14.13.35,1.14.12.14 2 0.06 0.2 +TP_DIU_4 TP_DIU_13 C1=C(C=C(C(=C1)Cl)Cl)N C1=C(C=CC(=C1)N)Cl bt0029 1.21.99.5,1.11.1.8,1.3.1.32 2 0.06 0.2 +TP_DIU_4 TP_DIU_14 C1=C(C=C(C(=C1)Cl)Cl)N C1=CC(=CC(=C1)Cl)N bt0029 1.21.99.5,1.11.1.8,1.3.1.32 2 0.06 0.2 +TP_DIU_5 TP_DIU_15 CNC(=O)NC1=CC=C(C(=C1)Cl)Cl CNC(=O)[O-] bt0068 3.5.1.131,3.5.1.110,3.5.3.9,3.5.1.20,3.5.1.84,3.5.1.59,3.5.3.21,3.5.1.7,3.5.1.53,3.5.1.6 2 0.06 0.3 +TP_DIU_5 TP_DIU_16 CNC(=O)NC1=CC=C(C(=C1)Cl)Cl C1=C(C=C(C(=C1)Cl)Cl)NC(=O)N bt0243 1.14.13.179,1.14.13.178 2 0.04000000000000001 0.2 +TP_DIU_6 TP_DIU_17 CN(C)C(=O)NC1=CC=C(C=C1)Cl C1=C(C=CC(=C1)NC(=O)[O-])Cl bt0068 3.5.1.131,3.5.1.110,3.5.3.9,3.5.1.20,3.5.1.84,3.5.1.59,3.5.3.21,3.5.1.7,3.5.1.53,3.5.1.6 2 0.03 0.3 +TP_DIU_9 TP_DIU_18 CN N bt0063 1.4.1.9,1.4.1.3,1.4.3.1,1.5.3.19,1.4.1.1,1.4.3.7,1.4.1.12,1.5.1.10,1.4.1.17,1.5.1.28,1.4.3.12,1.4.1.18,1.5.3.16,1.4.3.5,1.4.3.14,1.4.1.26,1.5.3.5,1.5.3.13,1.4.1.10,1.4.1.21,1.5.1.19,1.4.1.8,1.5.3.6,1.4.3.4,1.4.3.24,1.5.3.1,1.5.1.22,1.4.1.13,1.5.3.18,1.4.3.8,1.5.3.21,1.4.1.4,1.4.3.11,1.4.1.23,1.4.3.20,1.5.1.23,1.4.3.16,1.5.1.24,1.5.1.11,1.14.13.178,1.5.1.16,1.5.3.4,1.4.3.25,1.4.3.3,1.4.1.11,1.5.3.17,1.4.1.14,1.4.3.19,1.4.1.7,1.5.1.8,1.4.1.16,1.14.13.128,1.5.3.10,1.4.1.2,1.5.3.15,1.4.3.22,1.5.1.51,1.4.3.10,1.4.1.19,1.5.1.26,1.14.13.239,1.5.1.7,1.14.14.1,1.4.3.15,1.4.1.20,1.4.3.21,1.14.13.179,1.4.3.2,1.14.13.238,1.5.1.52,1.5.1.9 3 0.162 0.2 +TP_DIU_10 TP_DIU_19 C1=C(C(=CC(=C1Cl)Cl)O)O C(=C(/C(=C\C(=O)[O-])/Cl)\Cl)/C(=O)[O-] bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 3 0.06 1.0 +TP_DIU_11 TP_DIU_20 C1=C(C(=C(C(=C1)O)O)Cl)Cl C(=C/C(=O)[O-])/C(=C(\C(=O)[O-])/Cl)/Cl bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 3 0.06 1.0 +/// Pathway name: BRO, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/f451272d-580f-47e8-99e3-54b72944fc6d +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +BRO TP_BRO_1 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C#N)Br CCCCCCCC(=O)[O-] bt0024 3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 1 0.6 0.6 +BRO TP_BRO_2 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C#N)Br C1=C(C(=C(C=C1C#N)Br)O)Br bt0024 3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 1 0.6 0.6 +BRO TP_BRO_3 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C#N)Br CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C(=O)[O-])Br bt0030 3.5.5.7,3.5.5.4,3.5.5.2,3.5.5.6,3.5.5.5,3.5.5.1 1 0.4 0.4 +BRO TP_BRO_4 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C#N)Br CCCCCCCC(=O)OC1=C(C=C(C=C1Br)O)Br bt0031 1 0.1 0.1 +BRO TP_BRO_5 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C#N)Br CCCCCCCC(=O)OC1=CC=C(C=C1Br)C#N bt0029 1.21.99.5,1.11.1.8,1.3.1.32 1 0.1 0.1 +TP_BRO_3 TP_BRO_6 C1=C(C(=C(C=C1C#N)Br)O)Br,CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C(=O)[O-])Br C1=C(C(=C(C=C1C(=O)[O-])Br)O)Br bt0030,bt0024 3.5.5.7,3.5.5.4,3.5.5.2,3.5.5.6,3.5.5.5,3.5.5.1,3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 2 0.24 0.4 +TP_BRO_4 TP_BRO_7 C1=C(C(=C(C=C1C#N)Br)O)Br,CCCCCCCC(=O)OC1=C(C=C(C=C1Br)O)Br C1=C(C(=C(C=C1O)Br)O)Br bt0031,bt0024 3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 2 0.06 0.1 +TP_BRO_3 TP_BRO_8 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C(=O)[O-])Br CCCCCCCC(=O)OC1=C(C=CC=C1Br)Br bt0051 4.1.1.24,4.1.1.25,4.1.1.115,4.1.1.66,4.1.1.103,4.1.1.29,4.1.1.92,4.1.1.57,3.1.1.82,4.1.1.69,4.1.1.15,4.1.1.18,4.1.1.30,4.1.1.117,4.1.1.50,4.1.1.17,1.1.1.276,4.1.1.116,4.1.1.14,1.1.1.262,4.1.1.63,4.1.1.58,4.1.1.7,4.1.1.96,4.1.1.36,4.1.1.121,4.1.1.23,4.1.1.46,4.1.1.21,4.1.1.9,1.1.1.387,4.1.1.98,4.1.1.120,4.1.1.20,4.1.1.19,4.1.1.22,4.1.1.61,4.1.1.105,4.1.1.62,4.1.1.91,4.1.1.28,4.1.1.16,4.1.1.11,4.1.1.52,4.1.1.112,4.1.1.55,4.1.1.59,4.1.1.94,4.1.1.51,4.1.1.86,4.1.1.93,4.1.1.118,4.1.1.88,4.1.1.81,4.1.1.5,4.1.1.53,4.1.1.68 2 0.04000000000000001 0.1 +TP_BRO_5 TP_BRO_9 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)C(=O)[O-])Br,CCCCCCCC(=O)OC1=CC=C(C=C1Br)C#N CCCCCCCC(=O)OC1=CC=C(C=C1Br)C(=O)[O-] bt0029,bt0030 1.21.99.5,1.11.1.8,1.3.1.32,3.5.5.7,3.5.5.4,3.5.5.2,3.5.5.6,3.5.5.5,3.5.5.1 2 0.04000000000000001 0.1 +TP_BRO_5 TP_BRO_10 CCCCCCCC(=O)OC1=CC=C(C=C1Br)C#N C1=C(C=C(C(=C1)O)Br)C#N bt0024 3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 2 0.03 0.3 +TP_BRO_5 TP_BRO_11 CCCCCCCC(=O)OC1=C(C=C(C=C1Br)O)Br,CCCCCCCC(=O)OC1=CC=C(C=C1Br)C#N CCCCCCCC(=O)OC1=CC=C(C=C1Br)O bt0029,bt0031 1.21.99.5,1.11.1.8,1.3.1.32 2 0.010000000000000002 0.1 +TP_BRO_5 TP_BRO_12 CCCCCCCC(=O)OC1=CC=C(C=C1Br)C#N CCCCCCCC(=O)OC1=CC=C(C=C1)C#N bt0029 1.21.99.5,1.11.1.8,1.3.1.32 2 0.010000000000000002 0.1 +TP_BRO_7 TP_BRO_13 C1=C(C(=C(C=C1O)Br)O)Br C(=C(/C(=O)[O-])\Br)/C(=O)CC(=O)[O-] bt0357 3 0.03899999999999999 0.6499999999999999 +TP_BRO_6 TP_BRO_14 C1=C(C(=C(C=C1C(=O)[O-])Br)O)Br C1=CC(=C(C(=C1)Br)O)Br bt0051 4.1.1.24,4.1.1.25,4.1.1.115,4.1.1.66,4.1.1.103,4.1.1.29,4.1.1.92,4.1.1.57,3.1.1.82,4.1.1.69,4.1.1.15,4.1.1.18,4.1.1.30,4.1.1.117,4.1.1.50,4.1.1.17,1.1.1.276,4.1.1.116,4.1.1.14,1.1.1.262,4.1.1.63,4.1.1.58,4.1.1.7,4.1.1.96,4.1.1.36,4.1.1.121,4.1.1.23,4.1.1.46,4.1.1.21,4.1.1.9,1.1.1.387,4.1.1.98,4.1.1.120,4.1.1.20,4.1.1.19,4.1.1.22,4.1.1.61,4.1.1.105,4.1.1.62,4.1.1.91,4.1.1.28,4.1.1.16,4.1.1.11,4.1.1.52,4.1.1.112,4.1.1.55,4.1.1.59,4.1.1.94,4.1.1.51,4.1.1.86,4.1.1.93,4.1.1.118,4.1.1.88,4.1.1.81,4.1.1.5,4.1.1.53,4.1.1.68 3 0.024 0.1 +TP_BRO_9 TP_BRO_15 CCCCCCCC(=O)OC1=CC=C(C=C1Br)C(=O)[O-] C1=C(C=C(C(=C1)O)Br)C(=O)[O-] bt0024 3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 3 0.012000000000000002 0.3 +TP_BRO_7 TP_BRO_16 C1=C(C(=C(C=C1O)Br)O)Br C1=C(C=C(C(=C1)O)Br)O bt0029 1.21.99.5,1.11.1.8,1.3.1.32 3 0.012 0.2 +TP_BRO_7 TP_BRO_17 C1=C(C(=C(C=C1O)Br)O)Br C1=C(C(=C(C(=C1Br)O)Br)O)O bt0014 1.14.14.1,1.14.13.240,1.14.13.244,1.14.13.7,1.14.13.29,1.14.13.243,1.14.14.20,1.14.18.1,1.11.2.6,1.14.13.44,1.14.13.62,1.14.13.33,1.14.13.127,1.14.13.2,1.14.13.23,1.14.14.9,1.14.13.20,1.14.13.4,1.11.2.5 3 0.006 0.1 +TP_BRO_9 TP_BRO_18 CCCCCCCC(=O)OC1=C(C=CC=C1Br)Br,CCCCCCCC(=O)OC1=CC=C(C=C1Br)C(=O)[O-] CCCCCCCC(=O)OC1=C(C=CC=C1)Br bt0029,bt0051 1.21.99.5,1.11.1.8,1.3.1.32,4.1.1.24,4.1.1.25,4.1.1.115,4.1.1.66,4.1.1.103,4.1.1.29,4.1.1.92,4.1.1.57,3.1.1.82,4.1.1.69,4.1.1.15,4.1.1.18,4.1.1.30,4.1.1.117,4.1.1.50,4.1.1.17,1.1.1.276,4.1.1.116,4.1.1.14,1.1.1.262,4.1.1.63,4.1.1.58,4.1.1.7,4.1.1.96,4.1.1.36,4.1.1.121,4.1.1.23,4.1.1.46,4.1.1.21,4.1.1.9,1.1.1.387,4.1.1.98,4.1.1.120,4.1.1.20,4.1.1.19,4.1.1.22,4.1.1.61,4.1.1.105,4.1.1.62,4.1.1.91,4.1.1.28,4.1.1.16,4.1.1.11,4.1.1.52,4.1.1.112,4.1.1.55,4.1.1.59,4.1.1.94,4.1.1.51,4.1.1.86,4.1.1.93,4.1.1.118,4.1.1.88,4.1.1.81,4.1.1.5,4.1.1.53,4.1.1.68 3 0.004000000000000001 0.1 +TP_BRO_13 TP_BRO_19 C(=C(/C(=O)[O-])\Br)/C(=O)CC(=O)[O-] C(=C/C(=O)[O-])/C(=O)CC(=O)[O-] bt0029 1.21.99.5,1.11.1.8,1.3.1.32 4 0.011699999999999997 0.3 +TP_BRO_17 TP_BRO_20 C1=C(C(=C(C(=C1Br)O)Br)O)O C(=C(/C(=C(\C(=O)[O-])/Br)/O)\Br)/C(=O)[O-] bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 4 0.0042 0.7 +/// Pathway name: CAR, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/70ca701c-55e4-41a6-92c1-d40d4947ad1f +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +CAR TP_CAR_1 CCNC(=O)C(C)OC(=O)NC1=CC=CC=C1 C1=CC(=C(C=C1)O)O bt0065 1.14.13.35,1.14.12.14 1 0.4 0.4 +CAR TP_CAR_2 CCNC(=O)C(C)OC(=O)NC1=CC=CC=C1 CCNC(=O)C(C)OC(=O)N bt0065 1.14.13.35,1.14.12.14 1 0.4 0.4 +CAR TP_CAR_3 CCNC(=O)C(C)OC(=O)NC1=CC=CC=C1 C1=CC=C(C=C1)N bt0318 3.5.3.21 1 0.2 0.2 +CAR TP_CAR_4 CCNC(=O)C(C)OC(=O)NC1=CC=CC=C1 CCNC(=O)C(C)OC(=O)[O-] bt0318 3.5.3.21 1 0.2 0.2 +TP_CAR_1 TP_CAR_5 C1=CC(=C(C=C1)O)O C(=C/C(=O)[O-])/C=C\C(=O)[O-] bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 2 0.27999999999999997 0.7 +TP_CAR_3 TP_CAR_6 C1=CC=C(C=C1)N N bt0065 1.14.13.35,1.14.12.14 2 0.06 0.3 +/// Pathway name: IBU, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/0bb2737a-1eb0-4917-9230-75ec4e0475d6 +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability diff --git a/TP_prediction/output/reaction_prediction_TEST_top_2.tsv b/TP_prediction/output/reaction_prediction_TEST_top_2.tsv new file mode 100644 index 0000000..a693edd --- /dev/null +++ b/TP_prediction/output/reaction_prediction_TEST_top_2.tsv @@ -0,0 +1,8 @@ +/// Pathway name: Diclofenac, Pathway id: https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/pathway/9c732f7c-020a-42fe-9bf5-539bd15d7142 +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +TP_Diclofenac_1 Diclofenac C1=CC(=C(C(=C1)Cl)N)Cl C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl bt0374 1.14.12.22 1 0.6 0.6 +TP_Diclofenac_2 Diclofenac C1=CC(=C(C(=C1)CC(=O)O)O)O C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl bt0374 1.14.12.22 1 0.6 0.6 +/// Pathway name: Atrazine, Pathway id: https://envipath.org/package/de0cdca1-c3ff-44ed-8ffd-f29c269bfa55/pathway/8d3f1fbb-98f3-4084-9ef3-59c16e514322 +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +TP_Atrazine_1 Atrazine CC(=O)C CCNC1=NC(=NC(=N1)NC(C)C)Cl bt0339 1 0.4 0.4 +TP_Atrazine_2 Atrazine CC(C)NC1=NC(=NC(=N1)Cl)N CCNC1=NC(=NC(=N1)NC(C)C)Cl bt0339 1 0.4 0.4 diff --git a/TP_prediction/output/reaction_prediction_TEST_top_20.tsv b/TP_prediction/output/reaction_prediction_TEST_top_20.tsv new file mode 100644 index 0000000..fca3e1d --- /dev/null +++ b/TP_prediction/output/reaction_prediction_TEST_top_20.tsv @@ -0,0 +1,37 @@ +/// Pathway name: DIC, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/d5164686-4c13-4107-8159-d1c0907aac3d +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +DIC TP_DIC_1 C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl C1=CC(=C(C(=C1)Cl)N)Cl bt0374 1.14.12.22 1 0.6 0.6 +DIC TP_DIC_2 C1=CC=C(C(=C1)CC(=O)O)NC2=C(C=CC=C2Cl)Cl C1=CC(=C(C(=C1)CC(=O)O)O)O bt0374 1.14.12.22 1 0.6 0.6 +TP_DIC_1 TP_DIC_3 C1=CC(=C(C(=C1)Cl)N)Cl C1=CC(=C(C=C1)N)Cl bt0029 1.21.99.5,1.11.1.8,1.3.1.32 2 0.12 0.2 +TP_DIC_3 TP_DIC_4 C1=CC(=C(C=C1)N)Cl C1=CC=C(C=C1)N bt0029 1.21.99.5,1.11.1.8,1.3.1.32 3 0.024 0.2 +TP_DIC_3 TP_DIC_5 C1=CC(=C(C=C1)N)Cl C1=CC(=C(C(=C1)Cl)O)O bt0065 1.14.13.35,1.14.12.14 3 0.024 0.2 +TP_DIC_3 TP_DIC_6 C1=CC(=C(C=C1)N)Cl N bt0065 1.14.13.35,1.14.12.14 3 0.024 0.2 +TP_DIC_5 TP_DIC_7 C1=CC(=C(C(=C1)Cl)O)O C(=C/C(=O)[O-])/C=C(\C(=O)[O-])/Cl bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 4 0.0216 0.9 +TP_DIC_4 TP_DIC_8 C1=CC=C(C=C1)N C1=CC(=C(C=C1)O)O bt0065 1.14.13.35,1.14.12.14 4 0.0072 0.3 +TP_DIC_8 TP_DIC_9 C1=CC(=C(C=C1)O)O C(=C/C(=O)[O-])/C=C\C(=O)[O-] bt0254 1.13.11.37,1.13.11.8,1.13.11.57,1.13.11.3,1.13.11.1,1.13.11.35 5 0.005039999999999999 0.7 +TP_DIC_7 TP_DIC_10 C(=C/C(=O)[O-])/C=C(\C(=O)[O-])/Cl C/1=CC(=O)O\C1=C/C(=O)[O-] bt0181 5.5.1.7 5 0.00432 0.2 +TP_DIC_10 TP_DIC_11 C/1=CC(=O)O\C1=C/C(=O)[O-] C(=C/C(=O)[O-])/C(=O)CC(=O)[O-] bt0313 6 0.00432 1.0 +TP_DIC_10 TP_DIC_12 C/1=CC(=O)O\C1=C/C(=O)[O-] C(=CC(=O)[O-])/C(=C/C(=O)[O-])/O bt0024 3.1.1.7,3.1.1.59,3.1.1.49,3.2.1.175,3.1.1.66,3.1.1.15,3.1.1.91,3.1.1.10,3.1.1.70,3.1.1.93,3.1.1.30,3.1.1.27,3.1.1.33,3.1.1.46,3.1.1.55,3.1.1.83,3.1.1.88,3.1.1.107,3.1.1.14,3.1.1.82,3.1.1.113,3.1.1.35,3.1.1.95,3.1.1.31,3.1.1.41,3.1.1.8,3.1.1.114,3.1.1.51,3.1.1.1,3.1.1.45,3.1.1.56,3.1.1.60,2.3.1.133,3.1.1.53,3.1.1.102,3.1.1.36,3.1.1.94,3.1.1.99,2.3.1.175,3.1.1.65,3.1.1.54,3.1.1.106,3.1.1.68,3.1.1.37,3.1.1.17,3.1.1.25,3.1.1.105,3.1.1.44,3.1.1.112,3.1.1.63,3.1.1.84,3.1.1.80,3.1.1.2,3.1.1.110 6 0.003888 0.9 +TP_DIC_11 TP_DIC_13 C(=C/C(=O)[O-])/C(=O)CC(=O)[O-] C(CC(=O)[O-])C(=O)CC(=O)[O-] bt0291 1.3.1.106,1.3.1.82,1.3.1.32,1.3.1.22,1.3.1.122,1.3.1.21,1.3.1.93,1.3.1.81,1.3.1.38,1.3.1.75,1.3.1.8,1.3.1.70,1.3.1.72,1.1.1.384 7 0.00043200000000000004 0.1 +/// Pathway name: ATR, Pathway id: https://envipath.org/package/c72d3405-a454-4038-8d34-8f05e46447e6/pathway/cfdfe459-6c5d-413b-a905-545ea3000404 +substrate_name product_name substrate_SMILES product_SMILES rules EC_numbers generation combined_probability probability +ATR TP_ATR_1 CCNC1=NC(=NC(=N1)NC(C)C)Cl CC(=O)C bt0339 1 0.4 0.4 +ATR TP_ATR_2 CCNC1=NC(=NC(=N1)NC(C)C)Cl CC(C)NC1=NC(=NC(=N1)Cl)N bt0339 1 0.4 0.4 +ATR TP_ATR_3 CCNC1=NC(=NC(=N1)NC(C)C)Cl CCNC1=NC(=NC(=N1)Cl)N bt0339 1 0.4 0.4 +ATR TP_ATR_4 CCNC1=NC(=NC(=N1)NC(C)C)Cl CC=O bt0339 1 0.4 0.4 +ATR TP_ATR_5 CCNC1=NC(=NC(=N1)NC(C)C)Cl CCNC1=NC(=NC(=N1)NC(C)C)O bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 1 0.4 0.4 +TP_ATR_3 TP_ATR_6 CCNC1=NC(=NC(=N1)Cl)N CCNC1=NC(=NC(=N1)O)N bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.32000000000000006 0.8 +TP_ATR_3 TP_ATR_7 CCNC1=NC(=NC(=N1)Cl)N CCNC1=NC(=NC(=N1)O)Cl bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.32000000000000006 0.8 +TP_ATR_5 TP_ATR_8 CCNC1=NC(=NC(=N1)NC(C)C)O CC(C)NC1=NC(=NC(=N1)O)O bt0061 3.5.4.43,3.5.4.42 2 0.30000000000000004 0.75 +TP_ATR_5 TP_ATR_9 CCNC1=NC(=NC(=N1)NC(C)C)O CC(C)N bt0061 3.5.4.43,3.5.4.42 2 0.30000000000000004 0.75 +TP_ATR_5 TP_ATR_10 CCNC1=NC(=NC(=N1)NC(C)C)O CCN bt0061 3.5.4.43,3.5.4.42 2 0.30000000000000004 0.75 +TP_ATR_4 TP_ATR_11 CC=O CC(=O)[O-] bt0003 1.2.1.57,1.2.1.22,1.2.1.32,1.2.1.79,1.2.1.28,1.2.1.33,1.2.1.10,1.2.1.62,1.2.1.90,1.2.1.87,1.2.1.73,1.2.1.99,1.2.1.26,1.2.1.4,1.2.1.75,1.2.1.78,1.2.3.8,1.2.1.77,1.2.1.63,1.2.1.8,1.2.1.102,1.2.3.7,1.2.1.54,1.2.1.48,1.2.1.5,1.2.1.71,1.2.1.23,1.2.1.3,1.2.1.24,1.2.1.68,1.2.3.15,1.2.1.39,1.2.1.82,1.2.3.1,1.2.1.16,1.2.1.96,1.1.1.312,1.1.3.39,1.2.1.20,1.2.3.9,1.1.3.23,1.2.1.36,1.2.3.5,1.2.1.85,1.2.1.9,1.2.1.31,1.2.1.47,1.1.3.17,1.2.1.15,1.2.1.44,1.2.1.81,1.2.1.65,1.2.3.14,1.2.1.7,1.2.1.98,1.14.13.246,1.2.1.17,1.2.1.91,1.2.1.29,1.2.1.19,1.2.1.97,1.2.1.69,1.2.1.64,1.2.1.53,1.2.1.83,1.1.1.23,1.2.1.89,1.2.1.88,1.2.1.67,1.2.1.74,1.2.1.86,1.2.1.42,1.2.1.21 2 0.2 0.5 +TP_ATR_2 TP_ATR_12 CC(C)NC1=NC(=NC(=N1)Cl)N C1(=NC(=NC(=N1)N)N)Cl bt0339 2 0.12 0.3 +TP_ATR_2 TP_ATR_13 CC(C)NC1=NC(=NC(=N1)Cl)N CC(C)NC1=NC(=NC(=N1)O)Cl bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.12 0.3 +TP_ATR_2 TP_ATR_14 CC(C)NC1=NC(=NC(=N1)Cl)N CC(C)NC1=NC(=NC(=N1)O)N bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 2 0.12 0.3 +TP_ATR_6 TP_ATR_15 CCNC1=NC(=NC(=N1)O)N CCNC1=NC(=NC(=N1)O)O bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 3 0.32000000000000006 0.75 +TP_ATR_10 TP_ATR_16 CCN N bt0063 1.4.1.9,1.4.1.3,1.4.3.1,1.5.3.19,1.4.1.1,1.4.3.7,1.4.1.12,1.5.1.10,1.4.1.17,1.5.1.28,1.4.3.12,1.4.1.18,1.5.3.16,1.4.3.5,1.4.3.14,1.4.1.26,1.5.3.5,1.5.3.13,1.4.1.10,1.4.1.21,1.5.1.19,1.4.1.8,1.5.3.6,1.4.3.4,1.4.3.24,1.5.3.1,1.5.1.22,1.4.1.13,1.5.3.18,1.4.3.8,1.5.3.21,1.4.1.4,1.4.3.11,1.4.1.23,1.4.3.20,1.5.1.23,1.4.3.16,1.5.1.24,1.5.1.11,1.14.13.178,1.5.1.16,1.5.3.4,1.4.3.25,1.4.3.3,1.4.1.11,1.5.3.17,1.4.1.14,1.4.3.19,1.4.1.7,1.5.1.8,1.4.1.16,1.14.13.128,1.5.3.10,1.4.1.2,1.5.3.15,1.4.3.22,1.5.1.51,1.4.3.10,1.4.1.19,1.5.1.26,1.14.13.239,1.5.1.7,1.14.14.1,1.4.3.15,1.4.1.20,1.4.3.21,1.14.13.179,1.4.3.2,1.14.13.238,1.5.1.52,1.5.1.9 3 0.15000000000000002 0.1 +TP_ATR_12 TP_ATR_17 C1(=NC(=NC(=N1)N)N)Cl C1(=NC(=NC(=N1)N)O)Cl bt0330 3.5.4.45,3.5.4.42,3.5.4.43,3.8.1.8 3 0.08399999999999999 0.05 +TP_ATR_6 TP_ATR_18 CCNC1=NC(=NC(=N1)O)N C1(=NC(=NC(=N1)O)O)N bt0061 3.5.4.43,3.5.4.42 3 0.08000000000000002 0.25 +TP_ATR_7 TP_ATR_19 CCNC1=NC(=NC(=N1)O)Cl C1(=NC(=NC(=N1)O)O)Cl bt0061 3.5.4.43,3.5.4.42 3 0.08000000000000002 0.25 +TP_ATR_15 TP_ATR_20 CCNC1=NC(=NC(=N1)O)O C1(=NC(=NC(=N1)O)O)O bt0061 3.5.4.43,3.5.4.42 4 0.24000000000000005 0.75 diff --git a/TP_prediction/output/reaction_prediction_TEST_top_3.tsv b/TP_prediction/output/reaction_prediction_TEST_top_3.tsv new file mode 100644 index 0000000..e69de29 diff --git a/TP_prediction/util.py b/TP_prediction/util.py index c759157..e4a3dbf 100644 --- a/TP_prediction/util.py +++ b/TP_prediction/util.py @@ -1,118 +1,134 @@ import sys sys.path.insert(0, '../src/envipath-python/enviPath_python/') sys.path.insert(0, '../src/envipath-python/') from enviPath_python.enviPath import * from enviPath_python.enviPath import * def load_input(input_path): """ Load input smiles and names :param input_path: path to input file :return: list of dictionaries containing smiles and name of input compounds """ f = open(input_path) input_list = [] for line in f: if line not in ['', '\n']: line_split = line.rstrip().split('\t') input_list.append({'smiles': line_split[0], 'name': line_split[1]}) return input_list def upload_envipath_pathway(eP, result, pkg): """ Upload resulting pathway dictionary to enviPath :param eP: enviPath object :param result: result list of dictionaries from pathway prediction :param pkg: package object where results should be uploaded :return: dictionary {'name': pathway name, 'id': URI of pathway} """ assert 'anonymous' not in str(eP.who_am_i()), 'Upload not possible when not logged in' source = result[0] pkg.add_compound(smiles=source['smiles'],name=source['name']) pathway = Pathway.create(pkg, smiles=source['smiles'], name=source['name'], root_node_only=True) # Add the observed degradation product as a second node for TP in result[1:]: - # print('adding to pw:', TP['name'], TP['smiles'], TP['generation'], TP['parent']) - pathway.add_node(smiles=TP['smiles'], node_name=TP['name'], node_depth=TP['generation']) + # print('adding to pw:', TP['name'], TP['smiles'], TP['generation'], TP['parent_smiles']) + pathway.add_node(smiles=TP['smiles'], name=TP['name'], depth=TP['generation']) # check for the case of multiple parents: - for parent in TP['parent'].split(','): + for parent in TP['parent_smiles'].split(','): pathway.add_node(smiles=parent) pathway.add_edge(smirks='{}>>{}'.format(parent, TP['smiles'])) print('New pathway created for {}: {}'.format(source['name'], pathway.id)) return {'name': source['name'], 'id': pathway.id} def expand_smiles(smiles, rr): """ Get all potential TPs by applying enviPath biotransformation and relative reasoning rules :param smiles: input smiles :param rr: relative reasoning object :return: list of dictionaries for each predicted TP: {'smiles': smiles, 'name': rule name, 'probability': relative reasoning probability} """ res = rr.classify_smiles(smiles) # sort by probability res.sort(reverse=True, key=lambda x: x['probability']) return res def clean_result(result_dict): """ Sorts TP list for output :param result_dict: result dictionary :return: sorted and named list of TPs """ result_list = list(result_dict.values()) - result_list.sort(key=lambda x: x['generation']) # make sure that source compound is first result_list.sort(reverse=True, key=lambda x: x['combined_probability']) + result_list.sort(key=lambda x: x['generation']) # make sure that source compound is first # get name of source compound source_name = result_list[0]['name'] + source_smiles = result_list[0]['smiles'] TP_count = 0 + D = {source_smiles: source_name} + new_result_list = [] + new_result_list.append(result_list[0]) for res in result_list[1:]: + new_res = res TP_count += 1 - res['name'] = 'TP_{}_{}'.format(source_name, TP_count) - return result_list + new_name = 'TP_{}_{}'.format(source_name, TP_count) + new_res['name'] = new_name + multiple_parents = res['parent_smiles'].split(',') + for p in multiple_parents: + new_res['parent_name'] = D[p] + D[res['smiles']] = new_name + new_result_list.append(new_res) + return new_result_list def result_to_compound_dict(result): """ Translates result from enviPath node expansion into a compound dictionary :param result: list of dictionaries with predicted TP information :return: dictionary of TPs """ compound_dict = {} for r in result: probability = float(r['probability']) for product_smiles in r['products']: if product_smiles not in compound_dict.keys(): - compound_dict[product_smiles] = {'rules' : r['name'], 'probability': probability, 'smiles': product_smiles} + compound_dict[product_smiles] = {'rules' : r['name'], 'rule_IDs': r['id'], 'probability': probability, 'smiles': product_smiles} else: # check if there's a rule with better probability if probability > compound_dict[product_smiles]['probability']: # update probability and rules associated to this probability compound_dict[product_smiles]['probability'] = probability compound_dict[product_smiles]['rules'] = r['name'] + compound_dict[product_smiles]['rule_IDs'] = r['id'] return compound_dict -def update_compound_entry(compound_entry, this_combined_probability, rules, this_generation, parent_smiles, +def update_compound_entry(compound_entry, this_combined_probability, rules, rule_IDs, this_generation, parent_smiles, size_metric, size_value): """ Update the compound entry with new information :param compound_entry: dictionary of compound information :param this_combined_probability: new combined probability :param rules: new rules + :param rule_IDs: new rule IDs :param this_generation: new generation :param parent_smiles: new parent compound + :param parent_compound: new parent compound :param size_metric: size metric :param size_value: new size value :return: updated compound entry """ if compound_entry['combined_probability'] < this_combined_probability: compound_entry['combined_probability'] = this_combined_probability compound_entry['rules'] = rules + compound_entry['rule_IDs'] = rule_IDs compound_entry['generation'] = this_generation - compound_entry['parent'] = parent_smiles + compound_entry['parent_smiles'] = parent_smiles compound_entry[size_metric] = size_value elif compound_entry['combined_probability'] == this_combined_probability: compound_entry['rules'] += ',{}'.format(rules) - compound_entry['parent'] += ',{}'.format(parent_smiles) + compound_entry['rule_IDs'] += ',{}'.format(rule_IDs) + compound_entry['parent_smiles'] += ',{}'.format(parent_smiles) return compound_entry