diff --git a/Additional_analyses/Compare_methods/compare_method_pps.py b/Additional_analyses/Compare_methods/compare_method_pps.py
index 848fc49..2f6a303 100644
--- a/Additional_analyses/Compare_methods/compare_method_pps.py
+++ b/Additional_analyses/Compare_methods/compare_method_pps.py
@@ -1,399 +1,399 @@
 # -*- coding: utf-8 -*-
 """
 Created on Tue Sep 13 11:51:15 2022
 
 @author: trostele
 """
 # Python version 3.6.13
 ################################################################################################################################################################################################################################################################
 # import all necessary packages
 import pandas as pd
 import rdkit # rdkit is only supported before Python 3.7
 from rdkit import Chem
 from rdkit.Chem.MolStandardize import rdMolStandardize
 from rdkit.Chem import Descriptors
 from rdkit.Chem.rdMolDescriptors import CalcMolFormula
 import pickle
 ################################################################################################################################################################################################################################################################
 
 # script to compare predicted TPs with found TPs from different methods
 
 ################################################################################################################################################################################################################################################################
 
 # INPUT
 
 found_tp_smiles_input = "./input/found_TP_SMILES.txt"
 # contains SMILES of all TPs that were found in samples
 
 pickle_file_data_dict = "./input/data_dict_com_with_CAS.pickle"
 # pickle file location of combined dictionary with all predictions made by different methods
 
 package_method_1 = "EAWAG_BBD-PPS_round_2"
 package_method_2 = "EAWAG_BBD-PPS_round_2b"
 package_method_5 = "enviPath-BBD_1"
 package_method_6 = "enviPath-BBD+SOIL_2"
 package_method_7 = "enviPath-BBD+SLUDGE_3"
 package_method_8 = "enviPath-BBD+SOIL+SLUDGE_4"
 
 # enter used package (source name in combined data dict)
 # same input as in "get_mass_list_from_prediction.py"
 
 #!!!
 # I used two rounds of predictions with the EAWAG/BBD-PPS, so it combines them
 # the script only works with 7 methods, needs to be updated!
 #!!!
 
 ################################################################################################################################################################################################################################################################
 
 # FUNCTIONS
 
 def canonicalize_smiles(smiles):
     mol = Chem.MolFromSmiles(smiles)  # creates mol object from SMILES
     uncharger = rdMolStandardize.Uncharger() # easier to access
     uncharged = uncharger.uncharge(mol)  # protonates or deprotonates the mol object
     new_smiles = rdkit.Chem.rdmolfiles.MolToSmiles(uncharged)  # converts mol object to canonical SMILES
     can_smiles = Chem.CanonSmiles(new_smiles)
     return can_smiles
 
 def do_pickle(d, pickle_file):
     with open(pickle_file, 'wb') as handle:
         pickle.dump(d, handle, protocol=pickle.HIGHEST_PROTOCOL)
 
 def get_pickle(pickle_file):
     with open(pickle_file, 'rb') as handle:
         d = pickle.load(handle)
     return d
 
 def export(data_dict, smi_list):
     df_name_list_com = []
     df_smi_list_com = []
     df_ID_list_com = []
     df_Formula_list_com = []
     df_MolWeight_list_com = []
     df_name_parent_list_com = []
     df_inchikey_list_com = []
     df_source_list_com = []
     df_alt_parent_list_com = []   
     # add all the data to the lists
     for parent in data_dict:
         df_name_list_com.append(data_dict[parent]["code_parent"][0])
         df_smi_list_com.append(parent)
         df_ID_list_com.append(data_dict[parent]["ID_parent"])
         df_MolWeight_list_com.append(data_dict[parent]["mass_parent"][0])
         df_Formula_list_com.append(data_dict[parent]["Formula_parent"])
         df_name_parent_list_com.append(data_dict[parent]["name"][0])
         df_inchikey_list_com.append(data_dict[parent]["inchi_parent"])
         df_source_list_com.append("")
         df_alt_parent_list_com.append([])
         for tp in data_dict[parent]["TP_dict"]:
             if tp in predicted_and_found:
                 df_name_list_com.append(data_dict[parent]["TP_dict"][tp]["code"])
                 df_smi_list_com.append(tp)
                 df_ID_list_com.append(data_dict[parent]["TP_dict"][tp]["CAS"])
                 df_MolWeight_list_com.append(data_dict[parent]["TP_dict"][tp]["mass"])
                 df_Formula_list_com.append(data_dict[parent]["TP_dict"][tp]["Formula"])
                 df_name_parent_list_com.append(data_dict[parent]["name"][0])
                 df_inchikey_list_com.append(data_dict[parent]["TP_dict"][tp]["InchiKey"])
                 df_source_list_com.append(data_dict[parent]["TP_dict"][tp]["source_list"])
                 df_alt_parent_list_com.append(data_dict[parent]["TP_dict"][tp]["alternative_parent"])        
     df_complete_dict = {"Name of parent":df_name_parent_list_com,"SMILES": df_smi_list_com,
                    "Name": df_name_list_com,
                    "Source": df_source_list_com,
                    "CAS": df_ID_list_com,"Formula": df_Formula_list_com,
                    "MolWeight": df_MolWeight_list_com, "InchiKey":df_inchikey_list_com, "Alternative parent": df_alt_parent_list_com}
     df_complete = pd.DataFrame.from_dict(df_complete_dict)
     df_complete.to_csv("./output/predicted_and_found_TPs.csv", index = False, sep = ",")
  
 ################################################################################################################################################################################################################################################################
 
 # export overview of predicted TPs that were also found (on txt file) as csv file
 
 found_tp_smiles = []
 
 SMILES_comp_file = open(found_tp_smiles_input)
 for line in SMILES_comp_file:
     found_tp_smiles.append(line.rstrip())
 
 found_tp_smiles_canon = []
 
 for tp in found_tp_smiles:
     found_tp_smiles_canon.append(canonicalize_smiles(tp))
 
 data_dict = get_pickle(pickle_file_data_dict)
 
 Bar =  'CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1'
 Abe =  'CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C)C)c5c4)n3)nc2)CC1'
 
 # remove Abe and Bar from dict because they were not spiked
 data_dict.pop(Abe)
 data_dict.pop(Bar)
 
 predicted_tp_smiles = []
 
 for parent in data_dict:
     for tp in data_dict[parent]["TP_dict"]:
         predicted_tp_smiles.append(tp)
 
 predicted_and_found = []
 
 
 for smi in found_tp_smiles_canon:
     if smi in predicted_tp_smiles:
         predicted_and_found.append(smi)
 
 export(data_dict, predicted_and_found)
 
 ################################################################################################################################################################################################################################################################
 
 # export precision (= found/predicted) for each method as tsv file
 
 source_list_nest = []
 
 for parent in data_dict:
     for tp in data_dict[parent]["TP_dict"]:
         if tp in predicted_and_found:
             source_list_nest.append(data_dict[parent]["TP_dict"][tp]["source_list"])
 
 source_list = []
 for sublist in source_list_nest:
     for item in sublist:
         source_list.append(item)
 
 pps = []
 envi_updated = []
 envi_soil_updated = []
 envi_sludge = []
 envi_soil_sludge = []
 
 for source in source_list:
     if "EAWAG_BBD-PPS" in source:
         pps.append(source)
     if package_method_5 in source:
         envi_updated.append(source)    
     if package_method_6 in source:
         envi_soil_updated.append(source)
     if package_method_7 in source:
         envi_sludge.append(source)
     if package_method_8 in source:
         envi_soil_sludge.append(source)
 
 pps_predicted = []
 envi_updated_predicted = []
 envi_soil_updated_predicted = []
 envi_sludge_predicted = []
 envi_soil_sludge_predicted = []
 
 for parent in data_dict:
     for tp in data_dict[parent]["TP_dict"]:
         if package_method_1 in data_dict[parent]["TP_dict"][tp]["source_list"] or package_method_2 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             pps_predicted.append(tp)
         if package_method_5 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             envi_updated_predicted.append(tp) 
         if package_method_6 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             envi_soil_updated_predicted.append(tp)
         if package_method_7 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             envi_sludge_predicted.append(tp)
         if package_method_8 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             envi_soil_sludge_predicted.append(tp)
 
 string_precision = "Precision of methods:" + "\n" + "Prediction Method" + "\t" + "Found" + "\t" + "Predicted" + "\t" + "Precision" + "\n" + package_method_1 + "\t" + str(len(pps))+ "\t" + str(len(pps_predicted))+ "\t" + str(round(len(pps)/len(pps_predicted)*100, 2)) + "%\n"+ package_method_5+ "\t" + str(len(envi_updated))+ "\t"  + str(len(envi_updated_predicted))+ "\t" + str(round(len(envi_updated)/len(envi_updated_predicted)*100, 2)) + "%\n"+ package_method_6 + "\t"+ str(len(envi_soil_updated))+ "\t" + str(len(envi_soil_updated_predicted))+ "\t" + str(round(len(envi_soil_updated)/len(envi_soil_updated_predicted)*100, 2)) + "%\n"+ package_method_7+ "\t" + str(len(envi_sludge))+ "\t" + str(len(envi_sludge_predicted))+ "\t" + str(round(len(envi_sludge)/len(envi_sludge_predicted)*100, 2)) + "%\n"+ package_method_8+ "\t" + str(len(envi_soil_sludge))+ "\t" + str(len(envi_soil_sludge_predicted))+ "\t" + str(round(len(envi_soil_sludge)/len(envi_soil_sludge_predicted)*100, 2)) + "%\n"
 
 with open("./output/precision_of_methods.tsv", 'w') as t:
     t.write(string_precision)
 
 ################################################################################################################################################################################################################################################################
 
 # export how many times a subset of methods were used to predict found TPs as tsv
 
 source_dict = {}
 
 for source in source_list_nest:
     key = " ".join(source)
     source_dict[key] = 0
 
 for source in source_list_nest:
     key = " ".join(source)
     source_dict[key] += 1
 
 tp_num = 0
 
 for key in source_dict:
     tp_num += source_dict[key]
 
 string_sources = "Sources of TPs\t" + "Times used\n"
 
 for key in source_dict:
     string_sources += key + "\t"
     string_sources += str(source_dict[key]) + "\n"
 
 with open("./output/sources_of_TPs.tsv", 'w') as s:
     s.write(string_sources)
 
 ################################################################################################################################################################################################################################################################
 
 # check how many TPs were for a given parent for each method and export as tsv
 
 string_tp_tsv = "Number of predicted TPs per parent (without considering overlap): \n" + "Parent\t" + package_method_1 + "\t"  + package_method_5 + "\t" + package_method_6 + "\t" + package_method_7 + "\t" + package_method_8 + "\t" + "Total predicted TPs" + "\t" + "Found" + "\t" + "Overall Precision" + "\n"
 
 for parent in data_dict:
     list_tp = []
     list_pps = []
     list_envi_updated = []
     list_envi_soil_updated = []
     list_envi_sludge = []
     list_envi_soil_sludge = []
     
     found_tp_per_parent = []
     
     for tp in data_dict[parent]["TP_dict"]:
         list_tp.append(tp)
         if package_method_1 in data_dict[parent]["TP_dict"][tp]["source_list"] or package_method_2 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             list_pps.append(tp)
         if package_method_5 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             list_envi_updated.append(tp)    
         if package_method_6 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             list_envi_soil_updated.append(tp)
         if package_method_7 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             list_envi_sludge.append(tp)
         if package_method_8 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             list_envi_soil_sludge.append(tp)
             
         if tp in found_tp_smiles_canon:
             found_tp_per_parent.append(tp)
     
                 
     string_tp_tsv +=  data_dict[parent]["name"][0] + "\t" + str(len(list_pps)) + "\t"+  str(len(list_envi_updated)) + "\t" + str(len(list_envi_soil_updated)) + "\t"+ str(len(list_envi_sludge)) + "\t"+ str(len(list_envi_soil_sludge)) + "\t"  + str(len(list_tp)) + "\t"  + str(len(found_tp_per_parent)) + "\t" + str(round(len(found_tp_per_parent)/len(list_tp)*100, 2)) + "%" + "\n"
    
     list_tp.clear()
     list_pps.clear()
     list_envi_updated.clear()
     list_envi_soil_updated.clear()
     list_envi_sludge.clear()
     list_envi_soil_sludge.clear()
     
 with open("./output/number_of_predicted_TPs_per_parent.tsv", 'w') as h:
     h.write(string_tp_tsv)
 
 ################################################################################################################################################################################################################################################################
 
 # get combined probabilities of TPs that were predicted and found and only predicted but not found
 
 predicted_and_found_combined_probability = []
 only_predicted_combined_probability = []
 
 for parent in data_dict:
     for tp in data_dict[parent]["TP_dict"]:
         if data_dict.get(parent, {}).get("TP_dict", {}).get(tp, {}).get("combined_prob") is not None:
             if tp in predicted_and_found:
                 predicted_and_found_combined_probability += ((data_dict[parent]["TP_dict"][tp]["combined_prob"]))
             else:
                 only_predicted_combined_probability += ((data_dict[parent]["TP_dict"][tp]["combined_prob"]))
 
 predicted_and_found_combined_probability_float = []
 only_predicted_combined_probability_float = []
 
 for prob in predicted_and_found_combined_probability:
     predicted_and_found_combined_probability_float.append(float(prob))
 
 for prob in only_predicted_combined_probability:
     only_predicted_combined_probability_float.append(float(prob))
 
 
 do_pickle(predicted_and_found_combined_probability_float, "predicted_and_found_combined_probability.pickle")
 
 do_pickle(only_predicted_combined_probability_float, "only_predicted_combined_probability.pickle")
 
 
 ################################################################################################################################################################################################################################################################
 
 list_all = []
 
 for parent in data_dict:
     for tp in data_dict[parent]["TP_dict"]:
         if package_method_1 in data_dict[parent]["TP_dict"][tp]["source_list"] or package_method_2 in data_dict[parent]["TP_dict"][tp]["source_list"]:
             if package_method_5 in data_dict[parent]["TP_dict"][tp]["source_list"]:
                 if package_method_6 in data_dict[parent]["TP_dict"][tp]["source_list"]:
                     if package_method_7 in data_dict[parent]["TP_dict"][tp]["source_list"]:
                         if package_method_8 in data_dict[parent]["TP_dict"][tp]["source_list"]:
                             list_all.append(tp)
 
 # print(list_all)
 ################################################################################################################################################################################################################################################################
 
 # check which rules were used to predict:
 
 rule_list = []    
 
 for parent in data_dict:
     for tp in data_dict[parent]["TP_dict"]:
         rule = data_dict[parent]["TP_dict"][tp]["rule_list"][0]
         rule_split = rule.split(",")
         rule_list += rule_split
 
 
 # the bt rule classification is not final, those are only the most important rules
 add_O_rules = ["bt0063", "bt0023", "bt0003", "bt0242", "bt0243", "bt0193", "bt0014", "bt0259", "bt0374", "bt0005", "bt0332"]
 add_H2O_rules = ["bt0067", "bt0350", "bt0430", "bt0024", "bt0021", "bt0020", "bt0389", "bt0373", "bt0391"]
 desat_rules = ["bt0002", "bt0001"]
 
 add_O_list = []
 add_H2O_list = []
 desat_list = []
 
 for rule in rule_list:
     if rule in add_O_rules:
         add_O_list.append(rule)
     if rule in add_H2O_rules:
         add_H2O_list.append(rule)
     if rule in desat_rules:
         desat_list.append(rule)
 
 
 print("Oxygen addition: ", len(add_O_list), ", water addition: ", len(add_H2O_list), ", desat: ", len(desat_list))
 
 
-string_rule = "Rules that were used in the predictions:  need to remove all spaces " " and '"' signs manually \n"
+string_rule = "Rules that were used in the predictions:  need to remove all spaces \" \" and '' signs manually \n"
 
 for rule in rule_list:
     string_rule += rule
     string_rule += "\n"
 
 with open("./output/used_rules.txt", 'w') as p:
     p.write(string_rule)
 
 
 
 ################################################################################################################################################################################################################################################################
 
 t.close()
 h.close()
 s.close()
 
 print("Script finished successfully")
 
 ################################################################################################################################################################################################################################################################
 
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣀⣀⣠⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣴⣶⣶⣶⣶⣶⣶⣶⣶⣶⣶⣶⣶⣶⣦⣤⣤⣤⣤⣤⣤⣄⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣾⡿⠟⠛⠛⠛⠛⠋⠉⠉⠉⠉⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠉⠉⠉⠉⠛⣿⣦⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢠⣄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⠀⠀⣴⡄⠀⠀⠀⠀⣠⡄⠀⠀⠀⠀⠀⠀⠶⠀⠀⠀⠀⠀⠈⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⢰⣿⠀⠀⠀⠀⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⠀⠀⠉⠁⠀⠀⠀⠘⠟⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠁⠀⠀⠀⢸⣿⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣤⠀⢀⣶⣿⣷⣦⣄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⡇⠀⠀⠀⣀⣤⣄⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣤⠀⠀⠀⠀⠀⠀⣸⡇⠀⣸⡿⠀⠀⠉⠻⣿⣦⡀⠀⢰⡿⠀⠀⠀⠀⠀⣸⣿⣁⣴⣾⡿⠟⠛⣿⡄⠀⠀
 # ⣴⣿⠿⠿⣿⣶⣦⣄⡀⠀⠀⠀⠀⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠘⠛⠀⠀⠀⠀⠀⠀⠉⠁⠀⣿⡇⠀⠀⠀⠀⠈⠻⣿⣆⠀⠀⠀⠀⠀⠀⠀⣿⣿⣿⠟⠁⠀⠀⠀⣿⡇⠀⠀
 # ⢿⣧⠀⠀⠀⠀⠉⠛⢿⣶⣄⠀⠀⣿⣿⠀⠀⠀⠀⠀⠙⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣀⡀⠀⠀⠀⣿⠇⠀⠀⠀⠀⠀⠀⠈⢻⣷⣤⣤⣤⣤⣤⣼⣿⠟⠁⠀⠀⠀⠀⠀⣿⡇⠀⠀
 # ⠈⢿⣧⡀⠀⠀⠀⠀⠀⠈⢻⣷⡄⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⠃⠀⠀⢰⣿⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⡇⠀⠀
 # ⠀⠈⠻⣷⣄⠀⠀⠀⠀⠀⠀⠙⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⡿⠇⠀⠀⠀⠀⠀⠀⠀⠀⠀⣴⣿⠟⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠹⣿⣦⡀
 # ⠀⠀⠀⠘⢿⣷⣄⠀⠀⠀⠀⠀⠘⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠠⣼⡇⠀⠀⣾⡿⠁⠀⠀⠀⠀⢠⣾⠋⠉⢳⡄⠀⠀⠀⠀⠀⠀⠀⠀⢠⣾⠋⠙⢳⡄⠀⠀⠈⢿⣷
 # ⠀⠀⠀⠀⠀⠙⢿⣷⣄⡀⠀⠀⠀⢹⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠿⠁⠀⠀⣿⡇⠀⠀⠀⠀⠀⠸⣿⣶⣤⣾⡇⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣧⣤⣾⣿⠀⠀⠀⠘⣿
 # ⠀⠀⠀⠀⠀⠀⠀⠈⠻⢿⣦⣄⠀⢸⣿⠀⠀⠀⠀⠀⠀⣾⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⡇⠀⠀⠀⠀⠀⠀⠈⠛⠛⠛⠁⠀⠀⠀⢿⣉⣩⠿⠀⠀⠉⠛⠿⠛⠃⠀⠀⠀⠀⣿
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⠿⣿⣾⣿⠀⠀⠀⠀⠀⠀⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢿⣧⠀⠀⠀⠀⠀⠀⠀⠀⠀⡄⠀⠀⠀⠀⠀⢸⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢰⣿
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⢻⣿⠀⠀⠀⠀⠀⠀⠀⠀⣀⠀⠀⠀⠀⠀⣠⡄⠀⠀⠘⣿⣧⡀⠀⠀⠀⠀⠀⠀⠀⢷⣤⣀⣀⣀⣴⠟⢿⣤⣀⣀⣀⣴⠇⠀⠀⠀⠀⢠⣿⡟
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣟⠀⠀⠀⠀⠀⠀⠀⠀⠛⠀⠀⠀⠀⠀⠻⠃⠀⠀⠀⠈⢿⣷⣄⡀⠀⠀⠀⠀⠀⠀⠈⠉⠉⠉⠁⠀⠀⠈⠉⠉⠉⠁⠀⠀⠀⢀⣴⣿⠟⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣽⣿⣶⣦⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣽⣿⣿⣷⣶⣦⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣤⣴⣶⣶⣾⠿⠛⠁⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣼⡟⠀⠈⠉⠉⣻⡟⠛⣻⡿⠛⠛⠛⠛⢿⣿⠿⠛⠛⠛⠛⠛⠛⠛⢻⣿⠏⠉⠉⠉⠉⢻⡟⠛⠛⣻⣿⠋⠉⠉⠙⣿⠉⠉⠉⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⣅⠀⠀⣠⣾⠟⠁⠀⣿⠀⠀⠀⢀⣠⣿⠏⠀⠀⠀⠀⠀⠀⠀⠀⢸⣿⡀⠀⠀⠀⣠⣿⠃⠀⠀⣿⡇⠀⠀⠀⢸⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠘⠻⠿⠿⠛⠁⠀⠀⠀⠻⢿⣶⣾⠿⠛⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⢿⣷⣶⡿⠟⠁⠀⠀⠀⠻⣷⣄⣠⣴⣿⠃⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
 # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀   ⠀⠈⠛⠛⠉⠀
 
 
diff --git a/File_conversion/Prediction_output_to_mass_list/README.md b/File_conversion/Prediction_output_to_mass_list/README.md
index 94a80bb..89c7171 100644
--- a/File_conversion/Prediction_output_to_mass_list/README.md
+++ b/File_conversion/Prediction_output_to_mass_list/README.md
@@ -1,49 +1,50 @@
 # Generate Mass and Inclusion Lists from Prediction Output
 
 ## Purpose
 
 The purpose of the script is to generate a mass lists, which can be imported into `Compound Discoverer` from multiple `find_best_TP.py` outputs.
 Further, the script combines the data of all methods and can select transformation products (TP) based on a scoring system.
 It also creates inclusion lists for positive and negative mode with predicted NCE for the use on a QExactivePlus.
 The script uses the `PubChem` database and for every compound a request is sent, thus the code can run for some time.
 
 ## Input
 
 The scripts needs at least 1 input file, but it supports up to 8 files.
 The input file(s) consist of the tsv results from `find_best_TP.py`. The filepaths need to be changed at the start of the script.
 The prediction method (enter `EAWAG-PPS` or `envipath`) and package (do not use `.` or `/`) for each file needs to be specified in the script.
 Additionally, each optinal file can be considered in the processing (enter "yes") or not.
 Moreover, 3 txt files for mapping are required:
-`SMILES_selected_comp.txt`	contains the SMILES of the selected parent compounds
-`name_subst.txt`		contains the full names of the parents
-`code_subst.txt`		contains the short code of the parents (exp. first 3 letters of name)
-Each txt files contains one string per line and the order needs to be matched over the three txt files, so each line of the txt files corresponds to one parent.
+* `SMILES_selected_comp.txt`	contains the SMILES of the selected parent compounds
+* `name_subst.txt`		contains the full names of the parents
+* `code_subst.txt`		contains the short code of the parents (exp. first 3 letters of name)
+
+* Each txt file contains one string per line and the order needs to be matched over the three txt files, so each line of the txt files corresponds to one parent.
 These txt files can be easily created by copying from a Excel Worksheet and pasting into a new txt file.
 
 ## Scoring System
-Can be turned on (`True`) or off (`False`). A maximal number of allowed TPs per parent can be set as a variable.
+Can be turned on (`True`) or off (`False`, default). A maximal number of allowed TPs per parent can be set as a variable.
 The scoring system removes all TPs, which have a mass below 100 u.
 The score is lowered if the TPs has a CAS number, has a low probability or is not predicted by all methods.
 If a parent has more than the specified maximal allowed TPs per parent, they get removed starting with lowest score.
 
 ## Output
 
 The script generates a multitude of output files. Most of them are pickle files, which contain all the data from a given dictionary.
 These can be copied and moved to a different location to run just one part of the code or to save on code running time.
 Each of the input files get an individual csv file that can be imported into `Compound Discoverer`.
 If more than two input files are processed, 3 csv files are created from the combination of all methods.
 2 contain all the data about the parents and TPs before and after the scoring, which is meant to be an overview.
 The last one contains only the most important combined data with SMILES, Name, CAS, Formula, monoisotopic mass and InchiKey, which can be imported into `Compound Discoverer` as a mass list.
 
 Additionally, two inclusion lists as well as a txt file is created containing the maximal element count of C, H, O, N, S, P, Cl, Br an I (useful for `Compound Discoverer` workflow).
 The output paths set per default to an output folder that is in the same folder as the script.
 
 ## Run script in Spyder
 
 1. open the script: `get_mass_list_from_prediction.py`
 2. change the pathway of the input files according to the filepath on your harddrive and turn Scoring System on or off.
 3. click somewhere in the left window and press `F5` or click `Run File`, if Run settings windows appears, click `Run`
 4. wait for the script to finish running (may take a while, `Script finished successfully` marks end of script)
 
 
 Author: Leo Trostel, 2022
\ No newline at end of file
diff --git a/File_conversion/Prediction_output_to_mass_list/get_mass_list_from_prediction.py b/File_conversion/Prediction_output_to_mass_list/get_mass_list_from_prediction.py
index 9116a1f..1ee9d8a 100644
--- a/File_conversion/Prediction_output_to_mass_list/get_mass_list_from_prediction.py
+++ b/File_conversion/Prediction_output_to_mass_list/get_mass_list_from_prediction.py
@@ -1,1425 +1,1425 @@
 # -*- coding: utf-8 -*-
 """
 Created on Mon Nov 22 16:17:39 2021
 Edited on Tue Aug 17 13:55:32 2022
 
 @author: trostele
 """
 # start of script
 # Python version 3.6.13
 ################################################################################################################################################################################################################################################################
 # import all necessary packages
 import pandas as pd
 import rdkit # rdkit is only supported before Python 3.7
 from rdkit import Chem
 from rdkit.Chem.MolStandardize import rdMolStandardize
 from rdkit.Chem import Descriptors
 from rdkit.Chem.rdMolDescriptors import CalcMolFormula
 import pickle
 import pubchempy as pcp
 import re
 import copy
 ################################################################################################################################################################################################################################################################
 
 ######################################## see READme file for instructions! #####################################################################################################################################################################################
 
 ################################################################################################################################################################################################################################################################
 # INPUT FILES:
                          
 # first file (mandatory)
 file_location_1 = "./input/round 2b/Eawag_PPS_BBD_results.tsv"
 prediction_method_1 =  "EAWAG-PPS"            # "EAWAG-PPS" or "envipath" allowed
 package_method_1 =  "EAWAG_BBD-PPS_round_2b"         # enter used package (is used for naming source in combined data dicts and output)
 # second file (optional)
 file_location_2 = "./input/updated predictions 2/TP_prediction_BBD+SOIL_top_50.tsv"
 prediction_method_2 = "envipath"
 package_method_2 = "enviPath-BBD+SOIL_2"
 consider_file_2 = "yes"
 # third file (optional)
 file_location_3 = "./input/updated predictions 2/TP_prediction_BBD+SLUDGE_top_50.tsv"
 prediction_method_3 = "envipath"
 package_method_3 = "enviPath-BBD+SLUDGE_3"
 consider_file_3 = "yes"
 # fourth file (optional)
 file_location_4 = "./input/updated predictions 2/TP_prediction_BBD+SOIL+SLUDGE_top_50.tsv"
 prediction_method_4 = "envipath"
 package_method_4 = "enviPath-BBD+SOIL+SLUDGE_4"
 consider_file_4 = "yes"
 # fifth file (optional)
 file_location_5 = "./input/round 2/Eawag_PPS_BBD_results_edited.tsv"
 prediction_method_5 = "EAWAG-PPS"
 package_method_5 = "EAWAG_BBD-PPS_round_2"
 consider_file_5 = "yes"
 # sixth file (optional)
 file_location_6 = "./input/updated predictions 2/TP_prediction_BBD_top_50.tsv"
 prediction_method_6 = "envipath" 
 package_method_6 = "enviPath-BBD_1"
 consider_file_6 = "yes"
 # seventh file (optional)
 file_location_7 = ""
 prediction_method_7 =  ""           
 package_method_7 = ""
 consider_file_7 = "no"
 # eighth file (optional)
 file_location_8 = ""
 prediction_method_8 = ""
 package_method_8 = ""
 consider_file_8 = "no"                         
 # mapping files:  
 # code_location (mandatory)
 code_location = "./input/code_subst.txt"
 # SMILES_location (mandatory)
 smi_location = "./input/SMILES_selected_comp.txt"
 # name_location (mandatory)
 name_location = "./input/name_subst.txt"
 
 # turn search for CAS numbers for all compounds on (True) or off (False)
 CAS_search = False
 
 # SCORING SYSTEM:
     
-scoring_system = False   # True = active, False = inactive
+scoring_system = False   # True = active, False = inactive (default)
 max_TP_per_parent = 50   # add number of maximal allowed TPs per parent (must be an integer!)
 
 # OUTPUT FILES:
 
 # output could be changed
 output_location_1 = "./output/CD_masslist_1_" + package_method_1 + ".csv"
 output_location_2 = "./output/CD_masslist_2_" + package_method_2 + ".csv"
 output_location_3 = "./output/CD_masslist_3_" + package_method_3 + ".csv"
 output_location_4 = "./output/CD_masslist_4_" + package_method_4 + ".csv"
 output_location_5 = "./output/CD_masslist_5_" + package_method_5 + ".csv"
 output_location_6 = "./output/CD_masslist_6_" + package_method_6 + ".csv"
 output_location_7 = "./output/CD_masslist_7_" + package_method_7 + ".csv"
 output_location_8 = "./output/CD_masslist_8_" + package_method_8 + ".csv"
 # output: combined mass list of all methods
 output_file_CD_masslist = "./output/CD_masslist_combined.csv"
 # output: combined data of all methods
 output_file_all_data = "./output/combined_overview.csv"
 # output: inclusion list for QExactivePlus for positive mode
 output_inclusion_pos = "./output/inclusion_list_pos.csv"
 # output: inclusion list for QExactivePlus for negative mode
 output_inclusion_neg = "./output/inclusion_list_neg.csv"
 # output: max element count file
 output_max_element = "./output/max_element_count.txt"
 # output: removed TPs above 100 u
 output_removed_tps = "./output/removed_TPs_above_100_u.csv"
 
 ################################################################################################################################################################################################################################################################
 ################################################################################################################################################################################################################################################################
 # FUNCTIONS
 
 def file_to_csv(input_file, pickle_file, csv_file, file_type):
     """
 
     :param input_file:
     :param pickle_file:
     :param csv_file:
     :param file_type: 'envipath' or 'EAWAG-PPS'
     """
     print("Converting {} file".format(input_file))
     if file_type == 'envipath':
         D1 = read_enviPath_file_to_dict(input_file)
     elif file_type == 'EAWAG-PPS':
         D1 = read_PPS_file_to_dict(input_file)
     else:
         raise ValueError("Possible values for file_type: 'envipath' or 'EAWAG-PPS'")
     D2 = canonicalize_dict(D1)
     D3 = annotate_dict(D2, file_type)
     do_pickle(D3, pickle_file)
     dict_to_csv(D3, csv_file)
     return D3
 
 def read_enviPath_file_to_dict(input_file):
     envipath_file = open(input_file)
     sep = '\t'
     slash = "///" 
     smiles = "SMILES"
     data_dict_envi = {}
     for line in envipath_file:
         linelist_envi = line.rstrip().split(sep) 
         if line.startswith(slash): #skip pathway line
             substance = ""
             continue
         else:
             if line.startswith(smiles): #skip header line
                 continue
             else:
                 if len(linelist_envi) == 6:#skip parent line because TP_1 is always first generation (except Atv)
                     substance = linelist_envi[0]
                     continue
                 else:
                     if data_dict_envi.get(substance):  #if parent SMILES exists as key then append list
                         data_dict_envi[substance]['TP_list'].append(linelist_envi[0])
                         data_dict_envi[substance]['bt_list'].append(linelist_envi[3])
                         data_dict_envi[substance]['code_TP'].append(linelist_envi[1])
                         data_dict_envi[substance]['combined_prob'].append(linelist_envi[2])
                     else: #otherwise create new entry into dict
                         data_dict_envi[substance] = {'TP_list': [linelist_envi[0]], "TP_list_canon_2":[],"TP_list_canon":[],'combined_prob': [linelist_envi[2]], 'bt_list': [linelist_envi[3]], 'code_TP': [linelist_envi[1]], "code_parent":[], "name" : [], "ID_TP": [], "ID_parent":None, "mass_TP" : [], "mass_parent" : [], "Structure_TP": [], "Structure_parent": None, "Formula_TP":[], "Formula_parent":None, "inchi_TP":[], "inchi_parent": None}
     for parent in code_dict:
         if parent not in data_dict_envi:
             data_dict_envi[parent] = {'TP_list': [], "TP_list_canon": [], "TP_list_canon_2": [],
                                             'bt_list': [], 'code_TP': [], "code_parent": [], "name": [],
                                             "ID_TP": [], "ID_parent": None, "mass_TP": [], "mass_parent": [],
                                             "Structure_TP": [], "Structure_parent": None, "Formula_TP": [],
                                             "Formula_parent": None, "inchi_TP": [], "inchi_parent": None}
     for key in data_dict_envi: 
         if key in code_dict.keys():
             data_dict_envi[key]['code_parent'].append(code_dict.get(key))  # add the code to the data dict
         if key in name_dict.keys(): 
             data_dict_envi[key]["name"].append(name_dict.get(key)) # add name to data_dict
     return data_dict_envi
 
 def canonicalize_smiles(smiles):
     mol = Chem.MolFromSmiles(smiles)  # creates mol object from SMILES
     uncharger = rdMolStandardize.Uncharger() # easier to access
     uncharged = uncharger.uncharge(mol)  # protonates or deprotonates the mol object
     new_smiles = rdkit.Chem.rdmolfiles.MolToSmiles(uncharged)  # converts mol object to canonical SMILES
     can_smiles = Chem.CanonSmiles(new_smiles)
     return can_smiles
 
 def max_element_count(smi_list):
     max_C = 0
     max_N = 0
     max_F = 0
     max_O = 0
     max_S = 0
     max_P = 0
     max_Cl = 0
     max_Br = 0
     max_I = 0
     max_H = 0
     
     for smi in smi_list:
         if smi.count("C") > max_C:
             max_C = smi.count("C")
         if smi.count("N") > max_N:
             max_N = smi.count("N")
         if smi.count("F") > max_F:
             max_F = smi.count("F")
         if smi.count("O") > max_O:
             max_O = smi.count("O")        
         if smi.count("S") > max_S:
             max_S = smi.count("S")
         if smi.count("P") > max_P:
             max_P = smi.count("P")
         if smi.count("Cl") > max_Cl:
             max_Cl = smi.count("Cl")        
         if smi.count("Br") > max_Br:
             max_Br = smi.count("Br")
         if smi.count("I") > max_I:
             max_I = smi.count("I")
         mol = Chem.MolFromSmiles(smi)
         mol = Chem.AddHs(mol)
         mol = Chem.MolToSmiles(mol, allHsExplicit=True)
         hcount = mol.count("H")
         if hcount > max_H:
             max_H = hcount
             
     with open(output_max_element, 'w') as f:
         f.write("max. element count: " + "max C = " + str(max_C) + ", max H = " + str(max_H) + ", max O = " + str(max_O) + ", max N = " + str(max_N)
           + ", max S = " + str(max_S) + ", max P = " + str(max_P) + ", max Cl = " + str(max_Cl) + ", max Br = " + str(max_Br) + ", max I = " + str(max_I))        
     print("max. element count: " + "max C = " + str(max_C) + ", max H = " + str(max_H) + ", max O = " + str(max_O) + ", max N = " + str(max_N)
           + ", max S = " + str(max_S) + ", max P = " + str(max_P) + ", max Cl = " + str(max_Cl) + ", max Br = " + str(max_Br) + ", max I = " + str(max_I))
     return
 
 def suggest_stepped_nce(smi_list):
     MolWeight_list = []
     for compound in smi_list:
             MolWeight_list.append(Chem.Descriptors.ExactMolWt(Chem.MolFromSmiles(compound)))        
     nce_list = []
     for mass in MolWeight_list:
             if mass > 350:
                 nce_list.append(15)
             else:
                 nce_calc = 5 * round(((mass * -0.41) + 160)/5)
                 nce_list.append(nce_calc)
                 
     max_nce = max(nce_list)
     if max_nce > 120:
         high_nce = 100
     else:
         high_nce = max_nce - 20
     avg_nce = sum(nce_list)/len(nce_list)
     middle_nce = (5 * round(avg_nce/5)) - 5
     min_nce = min(nce_list)
     if min_nce == 15:
         low_nce = min_nce
     else:
         low_nce = min_nce - 5
     
     if high_nce < 0 or middle_nce - low_nce < 10 or high_nce - middle_nce < 10:
         print("Stepped NCE approach not recommended")
     else:
         print("Suggested Stepped NCE: " + "Low NCE = " + str(low_nce) + ", Middle NCE = " + str(middle_nce) + ", High NCE = " + str(high_nce))
     
     return
 
 def do_pickle(d, pickle_file):
     with open("./output/" + pickle_file, 'wb') as handle:
         pickle.dump(d, handle, protocol=pickle.HIGHEST_PROTOCOL)
 
 def get_pickle(pickle_file):
     with open("./output/" + pickle_file, 'rb') as handle:
         d = pickle.load(handle)
     return d
 
 def canonicalize_dict(D):
     new_D = {}
     for parent in D:
         tp_dict = D[parent]
         tp_dict["TP_list_canon"] = []
         for tp in D[parent]["TP_list"]:
             tp_dict["TP_list_canon"].append(canonicalize_smiles(tp))
         new_D[canonicalize_smiles(parent)] = tp_dict
     return new_D
 
 def annotate_dict(data_dict, data_type):
     for parent in data_dict:
         counter = 1
         for tp in data_dict[parent]["TP_list_canon"]:
             data_dict[parent]['mass_TP'].append(Chem.Descriptors.ExactMolWt(Chem.MolFromSmiles(tp)))
             # add structure of TP to dict
             data_dict[parent]["Structure_TP"].append(Chem.MolFromSmiles(tp))
             # add molecular formula of TP
             data_dict[parent]["Formula_TP"].append(CalcMolFormula(Chem.MolFromSmiles(tp)))
             # add inchikey of TP
             data_dict[parent]["inchi_TP"].append(Chem.inchi.MolToInchiKey(Chem.MolFromSmiles(tp)))
             if data_type == 'EAWAG-PPS': # used for PPS where TPs are not named automatically
                 data_dict[parent]['code_TP'].append("TP_" + data_dict[parent]['code_parent'][0] + "_" + str(counter))
                 counter += 1
         # add parent mass
         data_dict[parent]['mass_parent'].append(Chem.Descriptors.ExactMolWt(Chem.MolFromSmiles(parent)))
         # add structure of parent to dict
         data_dict[parent]["Structure_parent"] = Chem.MolFromSmiles(parent)
         # add molecular formula of parent
         data_dict[parent]["Formula_parent"] = CalcMolFormula(Chem.MolFromSmiles(parent))
         # add inchikey of parent
         data_dict[parent]["inchi_parent"] = Chem.inchi.MolToInchiKey(Chem.MolFromSmiles(parent))
         
         # !!!
         # add CAS of parent from inchi key, if there is no CAS number for TP then there still needs to be an entry so that later list has same length! 
         if CAS_search == True:
             parent_cas = get_cas_inchi(data_dict[parent]["inchi_parent"])
         if CAS_search == False:
             parent_cas = "CAS search disabled"
         if len(parent_cas) > 0: # only add CAS if it was found
             data_dict[parent]["ID_parent"] = parent_cas          
         else: #otherwise add empty string
             data_dict[parent]["ID_parent"] = ""
         # add CAS of TPs from inchi key
         for tp in data_dict[parent]["inchi_TP"]:
             if CAS_search == True:
                 tp_cas = get_cas_inchi(tp)
             if CAS_search == False:
                 tp_cas = "CAS search disabled"
             if len(tp_cas) > 0:
                 data_dict[parent]["ID_TP"].append(tp_cas)
             else:
                 data_dict[parent]["ID_TP"].append("")
         del data_dict[parent]["TP_list"]
 
     return data_dict
 
 def dict_to_csv(data_dict, output_file):
     # create lists for each column and then create dict with correct layout which can be converted to dataframe using pandas
     df_name_list = []
     df_ID_list = []
     df_Formula_list = []
     df_MolWeight_list = []
     df_Structure_list = []
     # add all the data to the lists
     for parent in data_dict:
         df_name_list.append(data_dict[parent]["code_parent"][0]) #append code of parent first
         for tp in data_dict[parent]["code_TP"]: #then add all the codes of the TPs
             df_name_list.append(tp)
         # add CAS number
         df_ID_list.append(data_dict[parent]["ID_parent"])
         for tp in data_dict[parent]["ID_TP"]:
             df_ID_list.append(tp)
         # add monoisotopic mass
         df_MolWeight_list.append(data_dict[parent]["mass_parent"][0])
         for tp in data_dict[parent]["mass_TP"]:
             df_MolWeight_list.append(tp)
         # add chemical formula
         df_Formula_list.append(data_dict[parent]["Formula_parent"])
         for tp in data_dict[parent]["Formula_TP"]:
             df_Formula_list.append(tp)
         # add mol file
         mol_rep_1 = (Chem.MolToMolBlock(data_dict[parent]["Structure_parent"])).replace("\n", ";") #replace newline character with semicolon
         mol_rep_2 = mol_rep_1[6:] #skip first few spaces
         mol_rep_3 = mol_rep_2[:-1] #remove the last semicolon
         df_Structure_list.append(mol_rep_3) #add string to the list
         for tp in data_dict[parent]["Structure_TP"]:
             tp_mol_rep_1 = (Chem.MolToMolBlock(tp)).replace("\n",";")
             tp_mol_rep_2 = tp_mol_rep_1[6:]
             tp_mol_rep_3 = tp_mol_rep_2[:-1]
             df_Structure_list.append(tp_mol_rep_3)   
     # all lists must be the same length to convert it to dataframe
     assert len(df_name_list) == len(df_Formula_list) == len(df_MolWeight_list) == len(df_Structure_list) == len(df_ID_list), "Error: all lists must be the same length to convert it to dataframe"
     #create dict and convert to dataframe
     df_dict = {"Name": df_name_list, "ID": df_ID_list,"Formula": df_Formula_list,"MolWeight": df_MolWeight_list, "Structure": df_Structure_list}
     df = pd.DataFrame.from_dict(df_dict)
     # export dataframe as csv
     df.to_csv(output_file, index = False, sep = "\t")
 
 def combined_dict_to_csv(data_dict, output_file_CD, output_file_complete):
     # create lists for each column and then create dict with correct which can be converted to dataframe using pandas
     df_name_list_com = []
     df_smi_list_com = []
     df_ID_list_com = []
     df_Formula_list_com = []
     df_MolWeight_list_com = []
     df_Structure_list_com = []
     df_name_parent_list_com = []
     df_inchikey_list_com = []
     df_score_list_com = []
     df_rules_list_com = []
     df_source_list_com = []
     df_alt_parent_list_com = []   
     # change the codes of the TPs, so each TP has its own name
     for parent in data_dict:
         counter = 1
         for tp in data_dict[parent]["TP_dict"]:
             data_dict[parent]["TP_dict"][tp]["code"] = "TP_" + data_dict[parent]['code_parent'][0] + "_" + str(counter)
             counter = counter + 1 
     # add all the data to the lists
     for parent in data_dict:
         df_name_list_com.append(data_dict[parent]["code_parent"][0])
         df_smi_list_com.append(parent)
         df_ID_list_com.append(data_dict[parent]["ID_parent"])
         df_MolWeight_list_com.append(data_dict[parent]["mass_parent"][0])
         df_Formula_list_com.append(data_dict[parent]["Formula_parent"])
         mol_rep_1 = (Chem.MolToMolBlock(data_dict[parent]["Structure_parent"])).replace("\n", ";") #replace newline character with semicolon
         mol_rep_2 = mol_rep_1[6:] #skip first few spaces
         mol_rep_3 = mol_rep_2[:-1] #remove the last semicolon
         df_Structure_list_com.append(mol_rep_3) #add string to the list  
         df_name_parent_list_com.append(data_dict[parent]["name"][0])
         df_inchikey_list_com.append(data_dict[parent]["inchi_parent"])
         df_score_list_com.append("100")
         df_rules_list_com.append("")
         df_source_list_com.append("")
         df_alt_parent_list_com.append([])
         for tp in data_dict[parent]["TP_dict"]: 
             df_name_list_com.append(data_dict[parent]["TP_dict"][tp]["code"])
             df_smi_list_com.append(tp)
             df_ID_list_com.append(data_dict[parent]["TP_dict"][tp]["CAS"])
             df_MolWeight_list_com.append(data_dict[parent]["TP_dict"][tp]["mass"])
             df_Formula_list_com.append(data_dict[parent]["TP_dict"][tp]["Formula"])
             tp_mol_rep_1 = (Chem.MolToMolBlock(data_dict[parent]["TP_dict"][tp]["Structure"])).replace("\n",";")
             tp_mol_rep_2 = tp_mol_rep_1[6:]
             tp_mol_rep_3 = tp_mol_rep_2[:-1]
             df_Structure_list_com.append(tp_mol_rep_3)
             df_name_parent_list_com.append(data_dict[parent]["name"][0])
             df_inchikey_list_com.append(data_dict[parent]["TP_dict"][tp]["InchiKey"])
             df_score_list_com.append(data_dict[parent]["TP_dict"][tp]["score"])
             df_rules_list_com.append(data_dict[parent]["TP_dict"][tp]["rule_list"]) 
             df_source_list_com.append(data_dict[parent]["TP_dict"][tp]["source_list"])
             df_alt_parent_list_com.append(data_dict[parent]["TP_dict"][tp]["alternative_parent"])        
     
     max_element_count(df_smi_list_com)
     
     # all lists must be the same length to convert it to dataframe
     assert len(df_name_list_com) == len(df_Formula_list_com) == len(df_MolWeight_list_com) == len(df_Structure_list_com) == len(df_ID_list_com), "Error: all lists must be the same length to convert it to dataframe"
 
     #create dict and convert to dataframe
     df_com_dict = {"Name": df_name_list_com, "ID": df_ID_list_com,"Formula": df_Formula_list_com,"MolWeight": df_MolWeight_list_com, "Structure": df_Structure_list_com}
     df_com = pd.DataFrame.from_dict(df_com_dict)
     # export dataframe as csv
     df_com.to_csv(output_file_CD, index = False, sep = "\t")
     df_complete_dict = {"Name of parent":df_name_parent_list_com,"SMILES": df_smi_list_com,
                    "Name": df_name_list_com, "Score": df_score_list_com,
                    "Source": df_source_list_com,
                    "CAS": df_ID_list_com,"Formula": df_Formula_list_com,
                    "MolWeight": df_MolWeight_list_com, "InchiKey":df_inchikey_list_com, "Alternative parent": df_alt_parent_list_com, "bt rules": df_rules_list_com}
     df_complete = pd.DataFrame.from_dict(df_complete_dict)
     # export dataframe as csv
     df_complete.to_csv(output_file_complete, index = False, sep = ",")
     
     # create inclusion list for QExactivePlus
     m_proton = 1.0072756
     df_M_plus_H = []
     df_M_minus_H = []
     df_polarity_pos = []
     df_polarity_neg = []
     df_empty = []
     df_nce_type = []
     df_nce = []
     for mass in df_MolWeight_list_com:
         df_M_plus_H.append(mass + m_proton)
         df_M_minus_H.append(mass - m_proton)
         df_polarity_pos.append("Positive")
         df_polarity_neg.append("Negative")
         df_empty.append(" ")
         df_nce_type.append("NCE")
         if mass > 350:
             df_nce.append(15)
         else:
             nce_calc = 5 * round(((mass * -0.41) + 160)/5)
             df_nce.append(nce_calc)
                    
     d_inclusion_pos = {"Mass [m/z]": df_M_plus_H ,"Formula [M]": df_empty,
                    "Formula type": df_empty, "Species": df_empty,
                    "CS [z]": df_empty, "Polarity": df_polarity_pos, "Start [min]": df_empty, "End [min]": df_empty,
                    "(N)CE": df_nce, "(N)CE type":df_nce_type, "MSX ID": df_empty, "Comment": df_name_list_com}
     df_inclusion_pos = pd.DataFrame.from_dict(d_inclusion_pos)
     # export dataframe as csv
     df_inclusion_pos.to_csv(output_inclusion_pos, index = False, sep = ",")  
     d_inclusion_neg = {"Mass [m/z]": df_M_minus_H ,"Formula [M]": df_empty,
                    "Formula type": df_empty, "Species": df_empty,
                    "CS [z]": df_empty, "Polarity": df_polarity_neg, "Start [min]": df_empty, "End [min]": df_empty,
                    "(N)CE": df_nce, "(N)CE type":df_nce_type, "MSX ID": df_empty, "Comment": df_name_list_com}
     df_inclusion_neg = pd.DataFrame.from_dict(d_inclusion_neg)
     # export dataframe as csv
     df_inclusion_neg.to_csv(output_inclusion_neg, index = False, sep = ",") 
     
     suggest_stepped_nce(df_smi_list_com)
     
     print("Export complete")
 
 def get_cas_inchi(inchi): # add get CAS from inchikey function
     cas_rns = []
     inchi_split = inchi.split("-")[0]
     results = pcp.get_synonyms(inchi_split, 'inchikey')
     for result in results:
         for syn in result.get('Synonym', []):
             match = re.match('(\d{2,7}-\d\d-\d)', syn)
             if match:
                 cas_rns.append(match.group(1))
     return cas_rns
 
 def read_PPS_file_to_dict(PPS_file_location):
     PPS_file = open(PPS_file_location)
     sep = '\t'
     line_1 = PPS_file.readline()
     line_list_1 = line_1.rstrip().split(sep)  # rstrip() removes the newline character '\n' at the end of the file
     Settings = {}
     Settings[line_list_1[0]] = line_list_1[1]
     line_2 = PPS_file.readline()
     line_list_2 = line_2.rstrip().split(sep)
     Settings[line_list_2[0]] = line_list_2[1]
     line_3 = PPS_file.readline()
     line_4 = PPS_file.readline()
     PPS_file.readline()
     header_line = PPS_file.readline()
     compound_list = header_line.rstrip().split(sep)
     data = {}
     data_dict = {}
     for line in PPS_file:
         linelist = line.rstrip().split(sep)  # we get the list
         # we know that the first item of the list is the TP, and the following items are biotransformation rules producing the TP from a given compound
         for index, substance in enumerate(compound_list):
             # The first item in the compound list is empty, so let's skip that
             if index == 0:
                 continue
             # empty fields at the end of the line are not imported as empty strings, add them manually
             while len(linelist) < len(compound_list):
                 linelist.append('')
             set_of_rules = linelist[index]      
             if set_of_rules != '': # Only print if the set of rules is not empty
                 if data_dict.get(substance):  # if key exist then append list
                     data_dict[substance]['TP_list'].append(linelist[0])
                     data_dict[substance]['bt_list'].append(set_of_rules)
                 else:  # otherwise create new entry into dict
                     data_dict[substance] = {'TP_list': [linelist[0]], "TP_list_canon": [], "TP_list_canon_2": [],
                                             'bt_list': [set_of_rules], 'code_TP': [], "code_parent": [], "name": [],
                                             "ID_TP": [], "ID_parent": None, "mass_TP": [], "mass_parent": [],
                                             "Structure_TP": [], "Structure_parent": None, "Formula_TP": [],
                                             "Formula_parent": None, "inchi_TP": [], "inchi_parent": None}
     for key in data_dict:  
         if key in code_dict.keys():
             data_dict[key]['code_parent'].append(code_dict.get(key))  #add the code to the data dict
         if key in name_dict.keys():
             data_dict[key]["name"].append(name_dict.get(key))   # add name to data_dict like the code before
     for parent in code_dict and name_dict:
         if parent not in data_dict.keys():
             data_dict[parent] = {'TP_list': [], "TP_list_canon": [], "TP_list_canon_2": [],
                                             'bt_list': [], 'code_TP': [], "code_parent": [code_dict.get(parent)], "name": [name_dict.get(parent)],
                                             "ID_TP": [], "ID_parent": None, "mass_TP": [], "mass_parent": [],
                                             "Structure_TP": [], "Structure_parent": None, "Formula_TP": [],
                                             "Formula_parent": None, "inchi_TP": [], "inchi_parent": None}
     return data_dict
 
 def combine_dict (d_1, method_1_package, d_2, method_2_package, d_3, method_3_package, d_4, method_4_package, d_5, method_5_package, d_6, method_6_package, d_7, method_7_package, d_8, method_8_package):
     print("Combining "+ method_1_package + ", " +  method_2_package + ", " +  method_3_package + " , " +  method_4_package + " , " +  method_5_package+ " , " +  method_6_package + " , " +  method_7_package+ " and " +  method_8_package)
     # create new dict from copy of envi dict and then delete TP info
     data_dict_com = copy.deepcopy(d_1) # need deepcopy otherwise it will still change original dict   
     for parent in data_dict_com:
         if data_dict_com.get(parent, {}).get("combined_prob") is None:
             if data_dict_com.get(parent, {}).get("bt_list") is not None:
                 del data_dict_com[parent]["bt_list"]
                 del data_dict_com[parent]["code_TP"]
                 del data_dict_com[parent]["inchi_TP"]
                 del data_dict_com[parent]["mass_TP"]
                 del data_dict_com[parent]["Formula_TP"]
                 del data_dict_com[parent]["Structure_TP"]
                 del data_dict_com[parent]["ID_TP"]
                 del data_dict_com[parent]["TP_list_canon"]
                 del data_dict_com[parent]["TP_list_canon_2"]         
         else:
             if data_dict_com.get(parent, {}).get("bt_list") is not None:
                 del data_dict_com[parent]["bt_list"]
                 del data_dict_com[parent]["code_TP"]
                 del data_dict_com[parent]["inchi_TP"]
                 del data_dict_com[parent]["mass_TP"]
                 del data_dict_com[parent]["Formula_TP"]
                 del data_dict_com[parent]["Structure_TP"]
                 del data_dict_com[parent]["ID_TP"]
                 del data_dict_com[parent]["TP_list_canon"]
                 del data_dict_com[parent]["combined_prob"]
                 del data_dict_com[parent]["TP_list_canon_2"]
 
     # add TP data from first data dict    
     for parent in d_1:
         if d_1.get(parent, {}).get("combined_prob") is not None:
             for index, tp in enumerate(d_1[parent]["TP_list_canon"]): 
                 if data_dict_com.get(parent, {}).get("TP_dict") is None:
                     data_dict_com[parent]["TP_dict"] = {}  
                     data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_1[parent]["ID_TP"][index],
                                                                       "rule_list":[d_1[parent]["bt_list"][index]],
                                                                       "mass": d_1[parent]["mass_TP"][index],
                                                                       "Formula": d_1[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_1_package],
                                                                       "code": d_1[parent]["code_TP"][index],
                                                                       "Structure" : d_1[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_1[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_1[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                 else:
                     if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                         if method_1_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                             data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_1_package)
                             data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_1[parent]["combined_prob"][index])
                     else:
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_1[parent]["ID_TP"][index],
                                                                       "rule_list":[d_1[parent]["bt_list"][index]],
                                                                       "mass": d_1[parent]["mass_TP"][index],
                                                                       "Formula": d_1[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_1_package],
                                                                       "code": d_1[parent]["code_TP"][index],
                                                                       "Structure" : d_1[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_1[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_1[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
 
         else:
             for parent in d_1:
                 for index, tp in enumerate(d_1[parent]["TP_list_canon"]):
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_1[parent]["ID_TP"][index],
                                                                       "rule_list":[d_1[parent]["bt_list"][index]],
                                                                       "mass": d_1[parent]["mass_TP"][index],
                                                                       "Formula": d_1[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_1_package],
                                                                       "code": d_1[parent]["code_TP"][index],
                                                                       "Structure" : d_1[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_1[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             if method_1_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_1_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_1[parent]["ID_TP"][index],
                                                                               "rule_list":[d_1[parent]["bt_list"][index]],
                                                                               "mass": d_1[parent]["mass_TP"][index],
                                                                               "Formula": d_1[parent]["Formula_TP"][index],
                                                                               "source_list" :[method_1_package],
                                                                               "code": d_1[parent]["code_TP"][index],
                                                                               "Structure" : d_1[parent]["Structure_TP"][index],
                                                                               "combined_prob": [],
                                                                               "score": 100, "InchiKey": d_1[parent]["inchi_TP"][index],
                                                                               "alternative_parent" : []}
        
     # add TP data from second data dict
     for parent in d_2:
         if d_2.get(parent, {}).get("combined_prob") is not None:
             for index, tp in enumerate(d_2[parent]["TP_list_canon"]): 
                 if data_dict_com.get(parent, {}).get("TP_dict") is None:
                     data_dict_com[parent]["TP_dict"] = {}
                     data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_2[parent]["ID_TP"][index],
                                                                       "rule_list":[d_2[parent]["bt_list"][index]],
                                                                       "mass": d_2[parent]["mass_TP"][index],
                                                                       "Formula": d_2[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_2_package],
                                                                       "code": d_2[parent]["code_TP"][index],
                                                                       "Structure" : d_2[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_2[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_2[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                 else:
                     if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                         data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_2[parent]["combined_prob"][index])
                         if d_2[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                             data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_2[parent]["bt_list"][index])
                         if method_2_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                             data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_2_package)
                     else:
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_2[parent]["ID_TP"][index],
                                                                       "rule_list":[d_2[parent]["bt_list"][index]],
                                                                       "mass": d_2[parent]["mass_TP"][index],
                                                                       "Formula": d_2[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_2_package],
                                                                       "code": d_2[parent]["code_TP"][index],
                                                                       "Structure" : d_2[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_2[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_2[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
 
         else:
             for parent in d_2:
                 for index, tp in enumerate(d_2[parent]["TP_list_canon"]):
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_2[parent]["ID_TP"][index],
                                                                       "rule_list":[d_2[parent]["bt_list"][index]],
                                                                       "mass": d_2[parent]["mass_TP"][index],
                                                                       "Formula": d_2[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_2_package],
                                                                       "code": d_2[parent]["code_TP"][index],
                                                                       "Structure" : d_2[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_2[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             if d_2[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_2[parent]["bt_list"][index])
                             if method_2_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_2_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_2[parent]["ID_TP"][index],
                                                                               "rule_list":[d_2[parent]["bt_list"][index]],
                                                                               "mass": d_2[parent]["mass_TP"][index],
                                                                               "Formula": d_2[parent]["Formula_TP"][index],
                                                                               "source_list" :[method_2_package],
                                                                               "code": d_2[parent]["code_TP"][index],
                                                                               "Structure" : d_2[parent]["Structure_TP"][index],
                                                                               "combined_prob": [],
                                                                               "score": 100, "InchiKey": d_2[parent]["inchi_TP"][index],
                                                                               "alternative_parent" : []}
                         
      # add TP data from third data dict (if specified)                       
     if d_3 != "none":
         for parent in d_3:
             if d_3.get(parent, {}).get("combined_prob") is not None:
                 for index, tp in enumerate(d_3[parent]["TP_list_canon"]): 
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_3[parent]["ID_TP"][index],
                                                                       "rule_list":[d_3[parent]["bt_list"][index]],
                                                                       "mass": d_3[parent]["mass_TP"][index],
                                                                       "Formula": d_3[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_3_package],
                                                                       "code": d_3[parent]["code_TP"][index],
                                                                       "Structure" : d_3[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_3[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_3[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_3[parent]["combined_prob"][index])
                             if d_3[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_3[parent]["bt_list"][index])
                             if method_3_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_3_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_3[parent]["ID_TP"][index],
                                                                           "rule_list":[d_3[parent]["bt_list"][index]],
                                                                           "mass": d_3[parent]["mass_TP"][index],
                                                                           "Formula": d_3[parent]["Formula_TP"][index],
                                                                           "source_list" :[method_3_package],
                                                                           "code": d_3[parent]["code_TP"][index],
                                                                           "Structure" : d_3[parent]["Structure_TP"][index],
                                                                           "combined_prob": [d_3[parent]["combined_prob"][index]],
                                                                           "score": 100, "InchiKey": d_3[parent]["inchi_TP"][index],
                                                                           "alternative_parent" : []}
     
             else:
                 for parent in d_3:
                     for index, tp in enumerate(d_3[parent]["TP_list_canon"]):
                         if data_dict_com.get(parent, {}).get("TP_dict") is None:
                             data_dict_com[parent]["TP_dict"] = {}
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_3[parent]["ID_TP"][index],
                                                                       "rule_list":[d_3[parent]["bt_list"][index]],
                                                                       "mass": d_3[parent]["mass_TP"][index],
                                                                       "Formula": d_3[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_3_package],
                                                                       "code": d_3[parent]["code_TP"][index],
                                                                       "Structure" : d_3[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_3[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                         else:
                             if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                                 if d_3[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_3[parent]["bt_list"][index])
                                 if method_3_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_3_package)
                             else:
                                 data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_3[parent]["ID_TP"][index],
                                                                                   "rule_list":[d_3[parent]["bt_list"][index]],
                                                                                   "mass": d_3[parent]["mass_TP"][index],
                                                                                   "Formula": d_3[parent]["Formula_TP"][index],
                                                                                   "source_list" :[method_3_package],
                                                                                   "code": d_3[parent]["code_TP"][index],
                                                                                   "Structure" : d_3[parent]["Structure_TP"][index],
                                                                                   "combined_prob": [],
                                                                                   "score": 100, "InchiKey": d_3[parent]["inchi_TP"][index],
                                                                                   "alternative_parent" : []}
                             
     # add TP data from fourth data dict (if specified)                             
     if d_4 != "none":
         for parent in d_4:
             if d_4.get(parent, {}).get("combined_prob") is not None:
                 for index, tp in enumerate(d_4[parent]["TP_list_canon"]): 
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_4[parent]["ID_TP"][index],
                                                                       "rule_list":[d_4[parent]["bt_list"][index]],
                                                                       "mass": d_4[parent]["mass_TP"][index],
                                                                       "Formula": d_4[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_4_package],
                                                                       "code": d_4[parent]["code_TP"][index],
                                                                       "Structure" : d_4[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_4[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_4[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_4[parent]["combined_prob"][index])
                             if d_4[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_4[parent]["bt_list"][index])
                             if method_4_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_4_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_4[parent]["ID_TP"][index],
                                                                           "rule_list":[d_4[parent]["bt_list"][index]],
                                                                           "mass": d_4[parent]["mass_TP"][index],
                                                                           "Formula": d_4[parent]["Formula_TP"][index],
                                                                           "source_list" :[method_4_package],
                                                                           "code": d_4[parent]["code_TP"][index],
                                                                           "Structure" : d_4[parent]["Structure_TP"][index],
                                                                           "combined_prob": [d_4[parent]["combined_prob"][index]],
                                                                           "score": 100, "InchiKey": d_4[parent]["inchi_TP"][index],
                                                                           "alternative_parent" : []}
     
             else:
                 for parent in d_4:
                     for index, tp in enumerate(d_4[parent]["TP_list_canon"]):
                         if data_dict_com.get(parent, {}).get("TP_dict") is None:
                             data_dict_com[parent]["TP_dict"] = {}
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_4[parent]["ID_TP"][index],
                                                                       "rule_list":[d_4[parent]["bt_list"][index]],
                                                                       "mass": d_4[parent]["mass_TP"][index],
                                                                       "Formula": d_4[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_4_package],
                                                                       "code": d_4[parent]["code_TP"][index],
                                                                       "Structure" : d_4[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_4[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                         else:
                             if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                                 if d_4[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_4[parent]["bt_list"][index])
                                 if method_4_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_4_package)
                             else:
                                 data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_4[parent]["ID_TP"][index],
                                                                                   "rule_list":[d_4[parent]["bt_list"][index]],
                                                                                   "mass": d_4[parent]["mass_TP"][index],
                                                                                   "Formula": d_4[parent]["Formula_TP"][index],
                                                                                   "source_list" :[method_4_package],
                                                                                   "code": d_4[parent]["code_TP"][index],
                                                                                   "Structure" : d_4[parent]["Structure_TP"][index],
                                                                                   "combined_prob": [],
                                                                                   "score": 100, "InchiKey": d_4[parent]["inchi_TP"][index],
                                                                                   "alternative_parent" : []}               
 
     # add TP data from fifth data dict (if specified)                             
     if d_5 != "none":
         for parent in d_5:
             if d_5.get(parent, {}).get("combined_prob") is not None:
                 for index, tp in enumerate(d_5[parent]["TP_list_canon"]): 
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_5[parent]["ID_TP"][index],
                                                                       "rule_list":[d_5[parent]["bt_list"][index]],
                                                                       "mass": d_5[parent]["mass_TP"][index],
                                                                       "Formula": d_5[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_5_package],
                                                                       "code": d_5[parent]["code_TP"][index],
                                                                       "Structure" : d_5[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_5[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_5[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_5[parent]["combined_prob"][index])
                             if d_5[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_5[parent]["bt_list"][index])
                             if method_5_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_5_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_5[parent]["ID_TP"][index],
                                                                           "rule_list":[d_5[parent]["bt_list"][index]],
                                                                           "mass": d_5[parent]["mass_TP"][index],
                                                                           "Formula": d_5[parent]["Formula_TP"][index],
                                                                           "source_list" :[method_5_package],
                                                                           "code": d_5[parent]["code_TP"][index],
                                                                           "Structure" : d_5[parent]["Structure_TP"][index],
                                                                           "combined_prob": [d_5[parent]["combined_prob"][index]],
                                                                           "score": 100, "InchiKey": d_5[parent]["inchi_TP"][index],
                                                                           "alternative_parent" : []}
     
             else:
                 for parent in d_5:
                     for index, tp in enumerate(d_5[parent]["TP_list_canon"]):
                         if data_dict_com.get(parent, {}).get("TP_dict") is None:
                             data_dict_com[parent]["TP_dict"] = {}
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_5[parent]["ID_TP"][index],
                                                                       "rule_list":[d_5[parent]["bt_list"][index]],
                                                                       "mass": d_5[parent]["mass_TP"][index],
                                                                       "Formula": d_5[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_5_package],
                                                                       "code": d_5[parent]["code_TP"][index],
                                                                       "Structure" : d_5[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_5[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                         else:
                             if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                                 if d_5[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_5[parent]["bt_list"][index])
                                 if method_5_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_5_package)
                             else:
                                 data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_5[parent]["ID_TP"][index],
                                                                                   "rule_list":[d_5[parent]["bt_list"][index]],
                                                                                   "mass": d_5[parent]["mass_TP"][index],
                                                                                   "Formula": d_5[parent]["Formula_TP"][index],
                                                                                   "source_list" :[method_5_package],
                                                                                   "code": d_5[parent]["code_TP"][index],
                                                                                   "Structure" : d_5[parent]["Structure_TP"][index],
                                                                                   "combined_prob": [],
                                                                                   "score": 100, "InchiKey": d_5[parent]["inchi_TP"][index],
                                                                                   "alternative_parent" : []}               
 
     # add TP data from sixth data dict (if specified)                             
     if d_6 != "none":
         for parent in d_6:
             if d_6.get(parent, {}).get("combined_prob") is not None:
                 for index, tp in enumerate(d_6[parent]["TP_list_canon"]): 
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_6[parent]["ID_TP"][index],
                                                                       "rule_list":[d_6[parent]["bt_list"][index]],
                                                                       "mass": d_6[parent]["mass_TP"][index],
                                                                       "Formula": d_6[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_6_package],
                                                                       "code": d_6[parent]["code_TP"][index],
                                                                       "Structure" : d_6[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_6[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_6[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_6[parent]["combined_prob"][index])
                             if d_6[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_6[parent]["bt_list"][index])
                             if method_6_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_6_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_6[parent]["ID_TP"][index],
                                                                           "rule_list":[d_6[parent]["bt_list"][index]],
                                                                           "mass": d_6[parent]["mass_TP"][index],
                                                                           "Formula": d_6[parent]["Formula_TP"][index],
                                                                           "source_list" :[method_6_package],
                                                                           "code": d_6[parent]["code_TP"][index],
                                                                           "Structure" : d_6[parent]["Structure_TP"][index],
                                                                           "combined_prob": [d_6[parent]["combined_prob"][index]],
                                                                           "score": 100, "InchiKey": d_6[parent]["inchi_TP"][index],
                                                                           "alternative_parent" : []}
     
             else:
                 for parent in d_6:
                     for index, tp in enumerate(d_6[parent]["TP_list_canon"]):
                         if data_dict_com.get(parent, {}).get("TP_dict") is None:
                             data_dict_com[parent]["TP_dict"] = {}
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_6[parent]["ID_TP"][index],
                                                                       "rule_list":[d_6[parent]["bt_list"][index]],
                                                                       "mass": d_6[parent]["mass_TP"][index],
                                                                       "Formula": d_6[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_6_package],
                                                                       "code": d_6[parent]["code_TP"][index],
                                                                       "Structure" : d_6[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_6[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                         else:
                             if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                                 if d_6[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_6[parent]["bt_list"][index])
                                 if method_6_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_6_package)
                             else:
                                 data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_6[parent]["ID_TP"][index],
                                                                                   "rule_list":[d_6[parent]["bt_list"][index]],
                                                                                   "mass": d_6[parent]["mass_TP"][index],
                                                                                   "Formula": d_6[parent]["Formula_TP"][index],
                                                                                   "source_list" :[method_6_package],
                                                                                   "code": d_6[parent]["code_TP"][index],
                                                                                   "Structure" : d_6[parent]["Structure_TP"][index],
                                                                                   "combined_prob": [],
                                                                                   "score": 100, "InchiKey": d_6[parent]["inchi_TP"][index],
                                                                                   "alternative_parent" : []}               
 
 
     # add TP data from seventh data dict (if specified)                             
     if d_7 != "none":
         for parent in d_7:
             if d_7.get(parent, {}).get("combined_prob") is not None:
                 for index, tp in enumerate(d_7[parent]["TP_list_canon"]): 
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_7[parent]["ID_TP"][index],
                                                                       "rule_list":[d_7[parent]["bt_list"][index]],
                                                                       "mass": d_7[parent]["mass_TP"][index],
                                                                       "Formula": d_7[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_7_package],
                                                                       "code": d_7[parent]["code_TP"][index],
                                                                       "Structure" : d_7[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_7[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_7[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_7[parent]["combined_prob"][index])
                             if d_7[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_7[parent]["bt_list"][index])
                             if method_7_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_7_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_7[parent]["ID_TP"][index],
                                                                           "rule_list":[d_7[parent]["bt_list"][index]],
                                                                           "mass": d_7[parent]["mass_TP"][index],
                                                                           "Formula": d_7[parent]["Formula_TP"][index],
                                                                           "source_list" :[method_7_package],
                                                                           "code": d_7[parent]["code_TP"][index],
                                                                           "Structure" : d_7[parent]["Structure_TP"][index],
                                                                           "combined_prob": [d_7[parent]["combined_prob"][index]],
                                                                           "score": 100, "InchiKey": d_7[parent]["inchi_TP"][index],
                                                                           "alternative_parent" : []}
     
             else:
                 for parent in d_7:
                     for index, tp in enumerate(d_7[parent]["TP_list_canon"]):
                         if data_dict_com.get(parent, {}).get("TP_dict") is None:
                             data_dict_com[parent]["TP_dict"] = {}
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_7[parent]["ID_TP"][index],
                                                                       "rule_list":[d_7[parent]["bt_list"][index]],
                                                                       "mass": d_7[parent]["mass_TP"][index],
                                                                       "Formula": d_7[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_7_package],
                                                                       "code": d_7[parent]["code_TP"][index],
                                                                       "Structure" : d_7[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_7[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                         else:
                             if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                                 if d_7[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_7[parent]["bt_list"][index])
                                 if method_7_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_7_package)
                             else:
                                 data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_7[parent]["ID_TP"][index],
                                                                                   "rule_list":[d_7[parent]["bt_list"][index]],
                                                                                   "mass": d_7[parent]["mass_TP"][index],
                                                                                   "Formula": d_7[parent]["Formula_TP"][index],
                                                                                   "source_list" :[method_7_package],
                                                                                   "code": d_7[parent]["code_TP"][index],
                                                                                   "Structure" : d_7[parent]["Structure_TP"][index],
                                                                                   "combined_prob": [],
                                                                                   "score": 100, "InchiKey": d_7[parent]["inchi_TP"][index],
                                                                                   "alternative_parent" : []} 
 
     # add TP data from eighth data dict (if specified)                             
     if d_8 != "none":
         for parent in d_8:
             if d_8.get(parent, {}).get("combined_prob") is not None:
                 for index, tp in enumerate(d_8[parent]["TP_list_canon"]): 
                     if data_dict_com.get(parent, {}).get("TP_dict") is None:
                         data_dict_com[parent]["TP_dict"] = {}
                         data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_8[parent]["ID_TP"][index],
                                                                       "rule_list":[d_8[parent]["bt_list"][index]],
                                                                       "mass": d_8[parent]["mass_TP"][index],
                                                                       "Formula": d_8[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_8_package],
                                                                       "code": d_8[parent]["code_TP"][index],
                                                                       "Structure" : d_8[parent]["Structure_TP"][index],
                                                                       "combined_prob": [d_8[parent]["combined_prob"][index]],
                                                                       "score": 100, "InchiKey": d_8[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                     else:
                         if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                             data_dict_com[parent]["TP_dict"][tp]["combined_prob"].append(d_8[parent]["combined_prob"][index])
                             if d_8[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_8[parent]["bt_list"][index])
                             if method_8_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                 data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_8_package)
                         else:
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_8[parent]["ID_TP"][index],
                                                                           "rule_list":[d_8[parent]["bt_list"][index]],
                                                                           "mass": d_8[parent]["mass_TP"][index],
                                                                           "Formula": d_8[parent]["Formula_TP"][index],
                                                                           "source_list" :[method_8_package],
                                                                           "code": d_8[parent]["code_TP"][index],
                                                                           "Structure" : d_8[parent]["Structure_TP"][index],
                                                                           "combined_prob": [d_8[parent]["combined_prob"][index]],
                                                                           "score": 100, "InchiKey": d_8[parent]["inchi_TP"][index],
                                                                           "alternative_parent" : []}
     
             else:
                 for parent in d_8:
                     for index, tp in enumerate(d_8[parent]["TP_list_canon"]):
                         if data_dict_com.get(parent, {}).get("TP_dict") is None:
                             data_dict_com[parent]["TP_dict"] = {}
                             data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_8[parent]["ID_TP"][index],
                                                                       "rule_list":[d_8[parent]["bt_list"][index]],
                                                                       "mass": d_8[parent]["mass_TP"][index],
                                                                       "Formula": d_8[parent]["Formula_TP"][index],
                                                                       "source_list" :[method_8_package],
                                                                       "code": d_8[parent]["code_TP"][index],
                                                                       "Structure" : d_8[parent]["Structure_TP"][index],
                                                                       "combined_prob": [],
                                                                       "score": 100, "InchiKey": d_8[parent]["inchi_TP"][index],
                                                                       "alternative_parent" : []}
                         else:
                             if data_dict_com[parent]["TP_dict"].get(tp): # if TP already there then append list
                                 if d_8[parent]["bt_list"][index] not in data_dict_com[parent]["TP_dict"][tp]["rule_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["rule_list"].append(d_8[parent]["bt_list"][index])
                                 if method_8_package not in data_dict_com[parent]["TP_dict"][tp]["source_list"]:
                                     data_dict_com[parent]["TP_dict"][tp]["source_list"].append(method_8_package)
                             else:
                                 data_dict_com[parent]["TP_dict"][tp] = {"CAS": d_8[parent]["ID_TP"][index],
                                                                                   "rule_list":[d_8[parent]["bt_list"][index]],
                                                                                   "mass": d_8[parent]["mass_TP"][index],
                                                                                   "Formula": d_8[parent]["Formula_TP"][index],
                                                                                   "source_list" :[method_8_package],
                                                                                   "code": d_8[parent]["code_TP"][index],
                                                                                   "Structure" : d_8[parent]["Structure_TP"][index],
                                                                                   "combined_prob": [],
                                                                                   "score": 100, "InchiKey": d_8[parent]["inchi_TP"][index],
                                                                                   "alternative_parent" : []} 
                                                    
     do_pickle(data_dict_com, "data_dict_com.pickle")                                               
     return data_dict_com
 
 def score_dict(data_dict_com):
     # Scoring system, each TP starts with a score of 100 made up points
     print("Scoring dictionary")
     # check the mass
     for parent in data_dict_com:
         for tp in data_dict_com[parent]["TP_dict"]:
             if data_dict_com[parent]["TP_dict"][tp]["mass"] < 100: # if the mass of the TPs is below 100 u then set the score to 0
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 100   
     # check if TP has CAS number
     # if TP has CAS number then it was studied before and we want to look for new TPs, however if it has no CAS number then the chance is higher that it is only a wanky prediction and not an actual TP that is observed in the environment
     for parent in data_dict_com:
         for tp in data_dict_com[parent]["TP_dict"]:
             if len(data_dict_com[parent]["TP_dict"][tp]["CAS"]) > 1: # if TP has (at least one) CAS number, then reduce score (empty string "" has len 0)
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 10   
     # check if TP was predicted by other method
     for parent in data_dict_com:
         for tp in data_dict_com[parent]["TP_dict"]:
             if len(data_dict_com[parent]["TP_dict"][tp]["source_list"]) == 1: # if TP was predicted by only one method
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 50
             if len(data_dict_com[parent]["TP_dict"][tp]["source_list"]) == 2: # if TP was predicted by two methods
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 40                
             if len(data_dict_com[parent]["TP_dict"][tp]["source_list"]) == 3: 
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 30
             if len(data_dict_com[parent]["TP_dict"][tp]["source_list"]) == 4: 
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 20    
             if len(data_dict_com[parent]["TP_dict"][tp]["source_list"]) == 5: 
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 15
             if len(data_dict_com[parent]["TP_dict"][tp]["source_list"]) == 6: 
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 10
             if len(data_dict_com[parent]["TP_dict"][tp]["source_list"]) == 7: 
                 data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 5
                 # if TP is predicted by all 8 methods then it doesn't change the score    
     # check the combined probability (is saved as str so must convert to float)(only for enviPath files possible)
     for parent in data_dict_com:
         for tp in data_dict_com[parent]["TP_dict"]: # check first if it comes from enviPath method because otherwise it doesnt have a probability
             if data_dict_com.get(parent, {}).get(tp, {}).get("combined_prob") is not None:
                 # each time a condition is fullfilled, the score is reduced (careful, the penalty is summed up!)
                 if float(data_dict_com[parent]["TP_dict"][tp]["combined_prob"]) < 0.3:
                     data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 5
                 if float(data_dict_com[parent]["TP_dict"][tp]["combined_prob"]) < 0.1:
                     data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 5
                 if float(data_dict_com[parent]["TP_dict"][tp]["combined_prob"]) < 0.01:
                     data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 5
                 if float(data_dict_com[parent]["TP_dict"][tp]["combined_prob"]) < 0.005:
                     data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 10
                 if float(data_dict_com[parent]["TP_dict"][tp]["combined_prob"]) < 0.001:
                     data_dict_com[parent]["TP_dict"][tp]["score"] = data_dict_com[parent]["TP_dict"][tp]["score"] - 10
     # now the unwanted TPs will be deleted (delete the entries with very low score)
     # first copy the data dict com, so we can iterate over the copy, otherwise it gives runtime error: dictionary changed size during iteration
     data_dict_com_copy = copy.deepcopy(data_dict_com)  
     for parent in data_dict_com_copy:
         for tp in data_dict_com_copy[parent]["TP_dict"]:
             if data_dict_com_copy[parent]["TP_dict"][tp]["score"] <= 0:
                 del data_dict_com[parent]["TP_dict"][tp]                 
     # check for dublicates, if TP is found for other parent then append alternative_parent list of that parent
     for parent in data_dict_com:
         for tp in data_dict_com[parent]["TP_dict"]:
             for parent2 in data_dict_com:
                 if tp in data_dict_com[parent2]["TP_dict"]:
                     if data_dict_com[parent]["code_parent"][0] not in data_dict_com[parent2]["TP_dict"][tp]["code"]:
                         # data_dict_com[parent2]["TP_dict"][tp]["alternative_parent"].append(data_dict_com[parent]["code_parent"])
                         data_dict_com[parent2]["TP_dict"][tp]["alternative_parent"] = data_dict_com[parent]["code_parent"]
                 else:
                     continue
 
     # create new dict to which the removed TPs are added
     removed_tps_dict = {}             
     # if a parent has more than max allowed TPs, they get removed starting with lowest score
     # first copy the data dict com, so we can iterate over the copy, otherwise it gives runtime error: dictionary changed size during iteration
     data_dict_com_copy_3 = copy.deepcopy(data_dict_com)   
     for parent in data_dict_com_copy_3:
         temp_tp_list = [] # create temporary lists containing the TP smiles and corresponding score of one parent
         temp_score_list = []      
         # create entries for every parent
         removed_tps_dict[parent] = {'TP_list': [], "parent_name" : [], "tp_name" : []}  
         for tp in data_dict_com_copy_3[parent]["TP_dict"]:
             temp_tp_list.append(tp)
             temp_score_list.append(data_dict_com_copy_3[parent]["TP_dict"][tp]["score"])
             while len(temp_tp_list) > max_TP_per_parent: # if parent has more than max_TP_per_parent TPs proceed
                 index = temp_score_list.index(min(temp_score_list)) # find the first index of the lowest score 
                 # add smiles of TP that is deleted
                 removed_tps_dict[parent]['TP_list'].append(temp_tp_list[index])
                 removed_tps_dict[parent]["parent_name"].append(data_dict_com_copy_3[parent]["code_parent"])
                 removed_tps_dict[parent]["tp_name"].append(data_dict_com_copy_3[parent]["TP_dict"][tp]["code"])
                 del data_dict_com[parent]["TP_dict"][(temp_tp_list[index])] # index of score is the same as the corresponding TP
                 temp_score_list.pop(index) # now remove the score and the TP smiles from temporary list
                 temp_tp_list.pop(index)
     do_pickle(data_dict_com, "data_dict_com_scored.pickle")
     parent_removed_list = []
     tp_removed_list = []
     tp_code_removed_list = []
     parent_code_removed_list = []
     for parent in removed_tps_dict:
         for tp in removed_tps_dict[parent]['TP_list']:
             parent_removed_list.append(parent)
             tp_removed_list.append(tp)
         for name in removed_tps_dict[parent]['parent_name']:
             parent_code_removed_list.append(name)
         for code in removed_tps_dict[parent]['tp_name']:
             tp_code_removed_list.append(code)
 
     df_removed_dict = {"Parent Code": parent_code_removed_list  ,"Parent SMILES": parent_removed_list, "TP Code" : tp_code_removed_list, "TP SMILES": tp_removed_list}
     df_removed = pd.DataFrame.from_dict(df_removed_dict)
     df_removed.to_csv(output_removed_tps , index = False, sep = "\t")    
     
     return data_dict_com
 
 ################################################################################################################################################################################################################################################################
 
 # START SCRIPT
 
 assert type(file_location_1) == str, "file_location_1 must be a string!"
 assert type(prediction_method_1) == str, "prediction_method_1 must be a string!"
 assert type(package_method_1) == str, "package_method_1 must be a string!"
 assert type(file_location_2) == str, "file_location_2 must be a string!"
 assert type(prediction_method_2) == str, "prediction_method_2 must be a string!"
 assert type(package_method_2) == str, "package_method_2 must be a string!"
 assert type(file_location_3) == str, "file_location_3 must be a string!"
 assert type(prediction_method_3) == str, "prediction_method_3 must be a string!"
 assert type(package_method_3) == str, "package_method_3 must be a string!"
 assert type(file_location_4) == str, "file_location_4 must be a string!"
 assert type(prediction_method_4) == str, "prediction_method_4 must be a string!"
 assert type(package_method_4) == str, "package_method_4 must be a string!"
 assert type(file_location_5) == str, "file_location_5 must be a string!"
 assert type(prediction_method_5) == str, "prediction_method_5 must be a string!"
 assert type(package_method_5) == str, "package_method_5 must be a string!"
 assert type(file_location_6) == str, "file_location_6 must be a string!"
 assert type(prediction_method_6) == str, "prediction_method_6 must be a string!"
 assert type(package_method_6) == str, "package_method_6 must be a string!"
 assert type(file_location_7) == str, "file_location_7 must be a string!"
 assert type(prediction_method_7) == str, "prediction_method_7 must be a string!"
 assert type(package_method_7) == str, "package_method_7 must be a string!"
 assert type(file_location_8) == str, "file_location_8 must be a string!"
 assert type(prediction_method_8) == str, "prediction_method_8 must be a string!"
 assert type(package_method_8) == str, "package_method_8 must be a string!"
 assert type(code_location) == str, "code_location must be a string!"
 assert type(smi_location) == str, "smi_location must be a string!"
 assert type(name_location) == str, "name_location must be a string!"
 assert type(output_location_1) == str, "output_location_1 must be a string!"
 assert type(output_location_2) == str, "output_location_2 must be a string!"
 assert type(output_location_3) == str, "output_location_3 must be a string!"
 assert type(output_location_4) == str, "output_location_4 must be a string!"
 assert type(output_location_5) == str, "output_location_5 must be a string!"
 assert type(output_location_6) == str, "output_location_6 must be a string!"
 assert type(output_file_CD_masslist) == str, "output_file_CD_masslist must be a string!"
 assert type(output_file_all_data) == str, "output_file_all_data must be a string!"
 assert type(output_inclusion_pos) == str, "output_inclusion_pos must be a string!"
 assert type(output_inclusion_neg) == str, "output_inclusion_neg must be a string!"
 assert type(scoring_system) == bool, "scoring_sytem must be either 'True' or 'False'!"
 assert type(max_TP_per_parent) == int, "max_TP_per_parent must be an integer (e.g. 15 or 50)!"
 
 def load_mapping_files(code_location, smi_location, name_location):
     # read files with code, name and SMILES of selected parents to get dictionaries, the order is important!
     code_file = open(code_location)
     code_list = []
     for line2 in code_file:
         code_list.append(line2.rstrip())
     SMILES_comp_file = open(smi_location)
     SMILES_list = []
     for line3 in SMILES_comp_file:
         SMILES_list.append(line3.rstrip())
     name_file = open(name_location)
     name_list = []
     for line4 in name_file:
         name_list.append(line4.rstrip())
     code_dict = dict(zip(SMILES_list, code_list))  # dictionary with code of selected compounds with corresponding SMILES
     name_dict = dict(zip(SMILES_list, name_list))  # dictionary with name of selected compounds with corresponding SMILES
     return code_dict, name_dict
 
 ################################################################################################################################################################################################################################################################
 #    MAIN
 ################################################################################################################################################################################################################################################################
 code_dict, name_dict = load_mapping_files(code_location, smi_location, name_location)
 
 # read first file
 data_dict_1 = file_to_csv(file_location_1, "data_dict_" + package_method_1 + ".pickle", output_location_1, prediction_method_1) 
 # read second file (if available)
 if consider_file_2 == "yes":
     data_dict_2 = file_to_csv(file_location_2, "data_dict_" + package_method_2 + ".pickle", output_location_2, prediction_method_2)
 # read third file (if available)
 if consider_file_3 == "yes":
     data_dict_3 = file_to_csv(file_location_3, "data_dict_" + package_method_3 + ".pickle", output_location_3, prediction_method_3)
 # read fourth file (if available)
 if consider_file_4 == "yes":
     data_dict_4 = file_to_csv(file_location_4, "data_dict_" + package_method_4 + ".pickle", output_location_4, prediction_method_4)  
 # read fifth file (if available)
 if consider_file_5 == "yes":
     data_dict_5 = file_to_csv(file_location_5, "data_dict_" + package_method_5 + ".pickle", output_location_5, prediction_method_5)  
 # read sixth file (if available)
 if consider_file_6 == "yes":
     data_dict_6 = file_to_csv(file_location_6, "data_dict_" + package_method_6 + ".pickle", output_location_6, prediction_method_6)     
 # read seventh file (if available)
 if consider_file_7 == "yes":
     data_dict_7 = file_to_csv(file_location_7, "data_dict_" + package_method_7 + ".pickle", output_location_7, prediction_method_7)    
 # read eighth file (if available)
 if consider_file_8 == "yes":
     data_dict_8 = file_to_csv(file_location_8, "data_dict_" + package_method_8 + ".pickle", output_location_8, prediction_method_8)
         
 # import saved dictionaries  
 # data_dict_1 = get_pickle("data_dict_1.pickle")
 # data_dict_2 = get_pickle("data_dict_2.pickle")
 # data_dict_3 = get_pickle("data_dict_3.pickle")
 # data_dict_4 = get_pickle("data_dict_4.pickle")
 # data_dict_5 = get_pickle("data_dict_5.pickle")
 # data_dict_6 = get_pickle("data_dict_6.pickle")
 # data_dict_7 = get_pickle("data_dict_7.pickle")
 # data_dict_8 = get_pickle("data_dict_8.pickle")
 
 # combine all data dicts into one and apply scoring system
 # also, generate mass list for CD, csv file with all the data and iclusion lists for QExactivePlus
 if scoring_system == True:
     if consider_file_2 == "yes":
         if consider_file_3 == "yes":
             if consider_file_4 == "yes":
                 if consider_file_5 == "yes":
                     if consider_file_6 == "yes":
                         if consider_file_7 == "yes":
                             if consider_file_8 == "yes":
                                 data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, data_dict_6, package_method_6, data_dict_7, package_method_7, data_dict_8, package_method_8)
                                 data_dict_com_scored = score_dict(data_dict_com)
                                 combined_dict_to_csv(data_dict_com_scored, output_file_CD_masslist, output_file_all_data)
                             else:
                                 data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, data_dict_6, package_method_6, data_dict_7, package_method_7, "none", "none")
                                 data_dict_com_scored = score_dict(data_dict_com)
                                 combined_dict_to_csv(data_dict_com_scored, output_file_CD_masslist, output_file_all_data)                                
                         else:
                             data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, data_dict_6, package_method_6, "none", "none", "none", "none")
                             data_dict_com_scored = score_dict(data_dict_com)
                             combined_dict_to_csv(data_dict_com_scored, output_file_CD_masslist, output_file_all_data)
                     else:
                         data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, "none", "none", "none", "none", "none", "none")
                         data_dict_com_scored = score_dict(data_dict_com)
                         combined_dict_to_csv(data_dict_com_scored, output_file_CD_masslist, output_file_all_data)
                 else:
                     data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, "none", "none", "none", "none", "none", "none", "none", "none")
                     data_dict_com_scored = score_dict(data_dict_com)
                     combined_dict_to_csv(data_dict_com_scored, output_file_CD_masslist, output_file_all_data)
             else:
                 data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, "none", "none", "none", "none", "none", "none", "none", "none", "none", "none")
                 data_dict_com_scored = score_dict(data_dict_com)
                 combined_dict_to_csv(data_dict_com_scored, output_file_CD_masslist, output_file_all_data)
         else:
             data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none")
             data_dict_com_scored = score_dict(data_dict_com)
             combined_dict_to_csv(data_dict_com_scored, output_file_CD_masslist, output_file_all_data)
 
 if scoring_system == False:
     if consider_file_2 == "yes":
         if consider_file_3 == "yes":
             if consider_file_4 == "yes":
                 if consider_file_5 == "yes":
                     if consider_file_6 == "yes":
                         if consider_file_7 == "yes":
                             if consider_file_8 == "yes":
                                 data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, data_dict_6, package_method_6, data_dict_7, package_method_7, data_dict_8, package_method_8)
                                 combined_dict_to_csv(data_dict_com, output_file_CD_masslist, output_file_all_data)
                             else:
                                 data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, data_dict_6, package_method_6, data_dict_7, package_method_7, "none", "none")
                                 combined_dict_to_csv(data_dict_com, output_file_CD_masslist, output_file_all_data)                                
                         else:
                             data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, data_dict_6, package_method_6, "none", "none", "none", "none")
                             combined_dict_to_csv(data_dict_com, output_file_CD_masslist, output_file_all_data)
                     else:
                         data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, data_dict_5, package_method_5, "none", "none", "none", "none", "none", "none")
                         combined_dict_to_csv(data_dict_com, output_file_CD_masslist, output_file_all_data)
                 else:
                     data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, data_dict_4, package_method_4, "none", "none", "none", "none", "none", "none", "none", "none")
                     combined_dict_to_csv(data_dict_com, output_file_CD_masslist, output_file_all_data)
             else:
                 data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, data_dict_3, package_method_3, "none", "none", "none", "none", "none", "none", "none", "none", "none", "none")
                 combined_dict_to_csv(data_dict_com, output_file_CD_masslist, output_file_all_data)
         else:
             data_dict_com = combine_dict(data_dict_1, package_method_1, data_dict_2, package_method_2, "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none")
             combined_dict_to_csv(data_dict_com, output_file_CD_masslist, output_file_all_data)
 
 ###############################################################################################################################################################################################################################################################
 print("Script finished successfully")
 # end of script
 ###############################################################################################################################################################################################################################################################
 # ░░░░░░░░███████████████░░░░░░░░
 # ░░░░░█████████████████████░░░░░
 # ░░░░████████████████████████░░░
 # ░░░██████████████████████████░░
 # ░░█████████████████████████████
 # ░░███████████▀░░░░░░░░░████████
 # ░░███████████░░░░░░░░░░░░░░░███
 # ░████████████░░░░░░░░░░░░░░░░██
 # ░█░░███████░░░░░░░░░░░▄▄░░░░░██
 # █░░░░█████░░░░░░▄███████░░██░░█
 # █░░█░░░███░░░░░██▀▀░░░░░░░░██░█
 # █░░░█░░░░░░░░░░░░▄██▄░░░░░░░███
 # █░░▄█░░░░░░░░░░░░░░░░░░█▀▀█▄░██
 # █░░░░░░░░░░░░░░░░░░░░░░█░░░░██░
 # ░███░░░░░░░░░░░░░░░░░░░█░░░░█░░
 # ░░█░█░░░░░░░█░░░░░██▀▄░▄██░░░█░
 # ░░█░█░░░░░░█░░░░░░░░░░░░░░░░░█░
 # ░░░██░░░░░░█░░░░▄▄▄▄▄▄░░░░░░█░░
 # ░░░██░░░░░░░█░░█▄▄▄▄░▀▀██░░█░░░
 # ░░░██░░░░░░░█░░▀████████░░█░░░░
 # ░░█░░█░░░░░░░█░░▀▄▄▄▄██░░█░░░░░
 # ░░█░░░█░░░░░░░█░░░░░░░░░█░░░░░░
 # ░█░░░░░█░░░░░░░░░░░░░░░░█░░░░░░
 # ░░░░░░░░█░░░░░░█░░░░░░░░█░░░░░░
 # ░░░░░░░░░░░░░░░░████████░░░░░░░
diff --git a/readme.md b/readme.md
index c849028..07bf684 100644
--- a/readme.md
+++ b/readme.md
@@ -1,46 +1,46 @@
 # TP_predict - Predict TPs and create suspect lists
 
 This collection of scripts allows the user to reproduce the TP prediction and data analyses presented in the following publication:
 
 Trostel, L. & Coll, C., Fenner, K., Hafner, J. Synergy of predictive and analytical methods advances elucidating biotransformation processes in activated sludge, 2023.
 [insert DOI]
 
 The tools can further be used to perform the same predictions and analyses on a different set of compounds.
 
 ## Content
 
 * **TP_prediction**: Script to predict TPs and corresponding biodegradation pathways
 * **File_conversion**: Conversion of prediction output to input for suspect screening tools
   * Prediction_output_to_mass_list
   * SMILES_to_mass_and_inclusion_list
 * **Additional_analyses**
   * Compare_methods
   * Analyse_cutoff_thresholds
 
 Specific user guidance can be found in the README.md files of the content folders.
 
 ## How to
 To fetch the code from the git repository, open a terminal and run:
 ```
 $ git clone [insert link]
 ```
 To install the dependencies, go to the nicepath directory and run:
 ```
 $ cd TP_predict
 $ make
 ```
 
 ## Installation and requirements
 The scripts requires rdkit for python, which is easiest installed in a conda environment.
 All scripts have been developed and tested in Python 3.6 and higher.
 
-### Anaconda: Step by step guide for non-python users:
+### Anaconda step by step guide for non-python users:
 
 1. [Download Anaconda](https://docs.anaconda.com/anaconda/install/index.html) and install it, then run `Anaconda Navigator`
 2. create new environment under the `Environment` tab, select python version 3.6.13
 3. go to environments, click `play button` on newly created environment, open Terminal
 4. run following lines individually (need to confirm: type `y` and press `enter`)(might take a while): `conda install -c rdkit rdkit` and `pip install pubchempy`
 5. check if pandas is installed and active according to [this Tutorial](https://docs.anaconda.com/anaconda/navigator/tutorials/pandas/)	
 6. open `Anaconda Navigator`, go to `Home` tab, check if `Applications on` is set to the new environment
 7. click `gear icon` on `Spyder` > install specific version > 5.0.5 and wait for installation to finish
 8. click `launch button` below `Spyder`