Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F85148392
sludge_caculate_PaDEL_descriptor.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Sep 27, 02:50
Size
1 KB
Mime Type
text/x-python
Expires
Sun, Sep 29, 02:50 (2 d)
Engine
blob
Format
Raw Data
Handle
21135036
Attached To
rTWTEST master_thesis_Lee
sludge_caculate_PaDEL_descriptor.py
View Options
from
padelpy
import
from_smiles
import
pandas
as
pd
file_location
=
"C:
\\
Users
\\
leetseng
\\
TWtest"
input_file_path_all_compounds
=
file_location
+
'
\\
input
\\
sludgeWithSmiles.tsv'
#'input/sludgeWithSmiles.tsv' "Your concatenate the different iterations, so your index is not continuous!!!!"
output_file_path_padel
=
file_location
+
'/output
\\
descriptors
\\
sludge_PaDEL_test1.tsv'
data
=
pd
.
read_csv
(
input_file_path_all_compounds
,
sep
=
'
\t
'
)
#pandas Series
print
(
data
.
head
(
2
))
#create the set of SMILES
list_of_canonicalize_smiles
=
data
[
'canonicalize_smiles'
]
.
values
.
tolist
()
set_of_canonicalize_smiles
=
set
(
list_of_canonicalize_smiles
)
print
(
set_of_canonicalize_smiles
)
D
=
{}
for
index
,
row
in
data
.
iterrows
():
id
=
row
[
'index'
]
print
(
id
)
try
:
descriptors
=
from_smiles
(
row
[
'canonicalize_smiles'
],
maxruntime
=
20
)
except
RuntimeError
:
print
(
'Warning: No PaDEL descriptor could be calculated for compound {}, smiles = {}'
.
format
(
id
,
row
[
'canonicalize_smiles'
]))
else
:
D
[
id
]
=
descriptors
df
=
pd
.
DataFrame
.
from_dict
(
D
,
orient
=
'index'
)
df
.
to_csv
(
output_file_path_padel
,
sep
=
'
\t
'
)
# for index, row in data.iterrows():
# id = row['index']
# print(id)
# smiles_list = []
# smiles_list.append(row['canonicalize_smiles'])
# smiles_set = set(smiles_list)
# print(smiles_set)
Event Timeline
Log In to Comment