Page MenuHomec4science

sludge_caculate_PaDEL_descriptor.py
No OneTemporary

File Metadata

Created
Tue, Apr 16, 06:56

sludge_caculate_PaDEL_descriptor.py

from padelpy import from_smiles
import pandas as pd
file_location = "C:\\Users\\leetseng\\TWtest"
input_file_path_all_compounds = file_location+'\\input\\sludgeWithSmiles.tsv' #'input/sludgeWithSmiles.tsv' "Your concatenate the different iterations, so your index is not continuous!!!!"
output_file_path_padel = file_location+'/output\\descriptors\\sludge_PaDEL_test1.tsv'
data = pd.read_csv(input_file_path_all_compounds, sep='\t') #pandas Series
print(data.head(2))
#create the set of SMILES
list_of_canonicalize_smiles = data['canonicalize_smiles'].values.tolist()
set_of_canonicalize_smiles = set(list_of_canonicalize_smiles)
print(set_of_canonicalize_smiles)
D = {}
for index, row in data.iterrows():
id = row['index']
print(id)
try:
descriptors = from_smiles(row['canonicalize_smiles'], maxruntime=20)
except RuntimeError:
print('Warning: No PaDEL descriptor could be calculated for compound {}, smiles = {}'.format(id, row['canonicalize_smiles']))
else:
D[id] = descriptors
df = pd.DataFrame.from_dict(D, orient='index')
df.to_csv(output_file_path_padel, sep='\t')
# for index, row in data.iterrows():
# id = row['index']
# print(id)
# smiles_list = []
# smiles_list.append(row['canonicalize_smiles'])
# smiles_set = set(smiles_list)
# print(smiles_set)

Event Timeline