Page MenuHomec4science

test2.py
No OneTemporary

File Metadata

Created
Wed, Aug 14, 23:41

test2.py

# import sys
# # sys.path.insert(0, '/Users/jasmin/enviPath-python/enviPath_python/')
# sys.path.insert(0, '/home/jasmin/enviPath-python/enviPath_python/')
# from enviPath import *
# from objects import *
# import getpass
# # from subprocess import Popen, PIPE
# import os
import sys
sys.path.insert(0,'C:\\Users\\leetseng\\enviPath-python\\') #C:/envipath_code
from enviPath_python.enviPath import *
from enviPath_python.objects import *
import rdkit
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize
import time
from rdkit.Chem import Descriptors
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
INSTANCE_HOST = 'https://envipath.org'
eP = enviPath(INSTANCE_HOST)
eP.login('leetseng', 'Joan1388')
print(eP.who_am_i().get_name())
# # Define the instance to use
# INSTANCE_HOST = 'https://envipath.org'
# INSTANCE_HOST = 'http://localhost:8080'
# username = 'jasmin'
# username = 'admin'
# eP = enviPath(INSTANCE_HOST)
#
# password = getpass.getpass()
# eP.login(username, password)
def main():
# Package from where to fetch compounds
package_id = 'http://envipath.org/package/4a3cd0f4-4d2b-4f00-b3e6-a29e721f7038'
package = Package(eP.requester, id=package_id)
compounds = package.get_compounds()
print(len(compounds))
counter = 0
halflife_dict = dict()
# f = open('halflives.txt', 'a') made by Kunyang
f = open('test_append.txt', 'a')
for c in compounds:
counter += 1
if counter < 100:
continue
if counter >= 200: #if counter >= 648: made by Kunyang
break
print("Processing", counter, "/", len(compounds))
c.get_scenarios()
if len(c.halflifes) != 0:
for half_life in c.halflifes:
if 'hl' in half_life:
print(c.get_name(), c.halflifes)
if c.get_name() not in halflife_dict:
halflife_dict[c.get_name()] = set()
halflife_dict[c.get_name()].add(half_life['hl'])
if c.get_name() in halflife_dict:
line = c.get_name()
for value in halflife_dict[c.get_name()]:
line += " " + value
line += "\n"
f.write(line)
# if counter % 100 == 0:
# time.sleep(20)
f.close()
print(halflife_dict)
#
# counter = 0
# for c in compounds:
# if counter >= 100:
# break
# counter += 1
# if len(c.halflifes) != 0:
# for half_life in c.halflifes:
# if 'hl' in half_life:
# print(c.get_name(), c.halflifes)
# print(c.get_name(), c.halflifes)
# analyze_compounds(compounds, '../output/compound_list_soil.txt')
def canonicalize_smiles(smiles):
mol = Chem.MolFromSmiles(smiles) # creates mol object from SMILES
uncharger = rdMolStandardize.Uncharger() # easier to access
uncharged = uncharger.uncharge(mol) # protonates or deprotonates the mol object
new_smiles = rdkit.Chem.rdmolfiles.MolToSmiles(uncharged) # converts mol object to canonical SMILES
can_smiles = Chem.CanonSmiles(new_smiles)
return can_smiles
def analyze_compounds(all_compounds, output_filename):
# prepare output file
header = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format('Index', 'Name', 'ID', 'Full_SMILES', 'Cropped_SMILES',
'Is_composite', 'Canonical_SMILES',
'Cropped_canonical_SMILES', 'Number_halflives')
outfile = open(output_filename, 'w')
outfile.write(header)
# iterate through compounds
print("No of compounds:", len(all_compounds))
index = 0
for cpd in all_compounds:
index +=1
print('Checking compound #', index)
compound_structure = CompoundStructure(eP.requester, id=cpd.get_id())
compound = Compound(eP.requester, id=cpd.get_id())
# check if compound has halflifes
halflives = compound_structure.get_halflifes()
name = compound.get_name()
id = compound.get_id()
#smiles
full_smiles = compound.get_smiles()
canonical_smiles = canonicalize_smiles(full_smiles)
cropped_smiles = full_smiles
is_composite = False
if '.' in full_smiles:
is_composite = True
cropped_smiles = full_smiles.split('.')[0]
cropped_canonical_smiles = canonicalize_smiles(cropped_smiles)
data_list = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(index, name, id, full_smiles, cropped_smiles, is_composite, canonical_smiles, cropped_canonical_smiles, len(halflives))
outfile.write(data_list)
main()

Event Timeline