Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61368382
xyz2mol.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, May 6, 05:52
Size
22 KB
Mime Type
text/x-python
Expires
Wed, May 8, 05:52 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17375491
Attached To
R11301 MOLEKUEHL
xyz2mol.py
View Options
"""
Module for generating rdkit molobj/smiles/molecular graph from free atoms
Implementation by Jan H. Jensen, based on the paper
Yeonjoon Kim and Woo Youn Kim
"Universal Structure Conversion Method for Organic Molecules: From Atomic Connectivity
to Three-Dimensional Geometry"
Bull. Korean Chem. Soc. 2015, Vol. 36, 1769-1777
DOI: 10.1002/bkcs.10334
"""
import
copy
import
itertools
from
rdkit.Chem
import
rdmolops
from
rdkit.Chem
import
rdchem
try
:
from
rdkit.Chem
import
rdEHTTools
#requires RDKit 2019.9.1 or later
except
ImportError
:
rdEHTTools
=
None
from
collections
import
defaultdict
import
numpy
as
np
import
networkx
as
nx
from
rdkit
import
Chem
from
rdkit.Chem
import
AllChem
,
rdmolops
import
sys
global
__ATOM_LIST__
__ATOM_LIST__
=
\
[
'h'
,
'he'
,
'li'
,
'be'
,
'b'
,
'c'
,
'n'
,
'o'
,
'f'
,
'ne'
,
'na'
,
'mg'
,
'al'
,
'si'
,
'p'
,
's'
,
'cl'
,
'ar'
,
'k'
,
'ca'
,
'sc'
,
'ti'
,
'v '
,
'cr'
,
'mn'
,
'fe'
,
'co'
,
'ni'
,
'cu'
,
'zn'
,
'ga'
,
'ge'
,
'as'
,
'se'
,
'br'
,
'kr'
,
'rb'
,
'sr'
,
'y'
,
'zr'
,
'nb'
,
'mo'
,
'tc'
,
'ru'
,
'rh'
,
'pd'
,
'ag'
,
'cd'
,
'in'
,
'sn'
,
'sb'
,
'te'
,
'i'
,
'xe'
,
'cs'
,
'ba'
,
'la'
,
'ce'
,
'pr'
,
'nd'
,
'pm'
,
'sm'
,
'eu'
,
'gd'
,
'tb'
,
'dy'
,
'ho'
,
'er'
,
'tm'
,
'yb'
,
'lu'
,
'hf'
,
'ta'
,
'w'
,
're'
,
'os'
,
'ir'
,
'pt'
,
'au'
,
'hg'
,
'tl'
,
'pb'
,
'bi'
,
'po'
,
'at'
,
'rn'
,
'fr'
,
'ra'
,
'ac'
,
'th'
,
'pa'
,
'u'
,
'np'
,
'pu'
]
global
atomic_valence
global
atomic_valence_electrons
atomic_valence
=
defaultdict
(
list
)
atomic_valence
[
1
]
=
[
1
]
atomic_valence
[
5
]
=
[
3
,
4
]
atomic_valence
[
6
]
=
[
4
]
atomic_valence
[
7
]
=
[
3
,
4
]
atomic_valence
[
8
]
=
[
2
,
1
,
3
]
atomic_valence
[
9
]
=
[
1
]
atomic_valence
[
14
]
=
[
4
]
atomic_valence
[
15
]
=
[
5
,
3
]
#[5,4,3]
atomic_valence
[
16
]
=
[
6
,
3
,
2
]
#[6,4,2]
atomic_valence
[
17
]
=
[
1
]
atomic_valence
[
32
]
=
[
4
]
atomic_valence
[
35
]
=
[
1
]
atomic_valence
[
53
]
=
[
1
]
atomic_valence_electrons
=
{}
atomic_valence_electrons
[
1
]
=
1
atomic_valence_electrons
[
5
]
=
3
atomic_valence_electrons
[
6
]
=
4
atomic_valence_electrons
[
7
]
=
5
atomic_valence_electrons
[
8
]
=
6
atomic_valence_electrons
[
9
]
=
7
atomic_valence_electrons
[
14
]
=
4
atomic_valence_electrons
[
15
]
=
5
atomic_valence_electrons
[
16
]
=
6
atomic_valence_electrons
[
17
]
=
7
atomic_valence_electrons
[
32
]
=
4
atomic_valence_electrons
[
35
]
=
7
atomic_valence_electrons
[
53
]
=
7
def
str_atom
(
atom
):
"""
convert integer atom to string atom
"""
global
__ATOM_LIST__
atom
=
__ATOM_LIST__
[
atom
-
1
]
return
atom
def
int_atom
(
atom
):
"""
convert str atom to integer atom
"""
global
__ATOM_LIST__
#print(atom)
atom
=
atom
.
lower
()
return
__ATOM_LIST__
.
index
(
atom
)
+
1
def
get_UA
(
maxValence_list
,
valence_list
):
"""
"""
UA
=
[]
DU
=
[]
for
i
,
(
maxValence
,
valence
)
in
enumerate
(
zip
(
maxValence_list
,
valence_list
)):
if
not
maxValence
-
valence
>
0
:
continue
UA
.
append
(
i
)
DU
.
append
(
maxValence
-
valence
)
return
UA
,
DU
def
get_BO
(
AC
,
UA
,
DU
,
valences
,
UA_pairs
,
use_graph
=
True
):
"""
"""
BO
=
AC
.
copy
()
DU_save
=
[]
while
DU_save
!=
DU
:
for
i
,
j
in
UA_pairs
:
BO
[
i
,
j
]
+=
1
BO
[
j
,
i
]
+=
1
BO_valence
=
list
(
BO
.
sum
(
axis
=
1
))
DU_save
=
copy
.
copy
(
DU
)
UA
,
DU
=
get_UA
(
valences
,
BO_valence
)
UA_pairs
=
get_UA_pairs
(
UA
,
AC
,
use_graph
=
use_graph
)[
0
]
return
BO
def
valences_not_too_large
(
BO
,
valences
):
"""
"""
number_of_bonds_list
=
BO
.
sum
(
axis
=
1
)
for
valence
,
number_of_bonds
in
zip
(
valences
,
number_of_bonds_list
):
if
number_of_bonds
>
valence
:
return
False
return
True
def
charge_is_OK
(
BO
,
AC
,
charge
,
DU
,
atomic_valence_electrons
,
atoms
,
valences
,
allow_charged_fragments
=
True
):
# total charge
Q
=
0
# charge fragment list
q_list
=
[]
if
allow_charged_fragments
:
BO_valences
=
list
(
BO
.
sum
(
axis
=
1
))
for
i
,
atom
in
enumerate
(
atoms
):
q
=
get_atomic_charge
(
atom
,
atomic_valence_electrons
[
atom
],
BO_valences
[
i
])
Q
+=
q
if
atom
==
6
:
number_of_single_bonds_to_C
=
list
(
BO
[
i
,
:])
.
count
(
1
)
if
number_of_single_bonds_to_C
==
2
and
BO_valences
[
i
]
==
2
:
Q
+=
1
q
=
2
if
number_of_single_bonds_to_C
==
3
and
Q
+
1
<
charge
:
Q
+=
2
q
=
1
if
q
!=
0
:
q_list
.
append
(
q
)
return
(
charge
==
Q
)
def
BO_is_OK
(
BO
,
AC
,
charge
,
DU
,
atomic_valence_electrons
,
atoms
,
valences
,
allow_charged_fragments
=
True
):
"""
Sanity of bond-orders
args:
BO -
AC -
charge -
DU -
optional
allow_charges_fragments -
returns:
boolean - true of molecule is OK, false if not
"""
if
not
valences_not_too_large
(
BO
,
valences
):
return
False
check_sum
=
(
BO
-
AC
)
.
sum
()
==
sum
(
DU
)
check_charge
=
charge_is_OK
(
BO
,
AC
,
charge
,
DU
,
atomic_valence_electrons
,
atoms
,
valences
,
allow_charged_fragments
)
if
check_charge
and
check_sum
:
return
True
return
False
def
get_atomic_charge
(
atom
,
atomic_valence_electrons
,
BO_valence
):
"""
"""
if
atom
==
1
:
charge
=
1
-
BO_valence
elif
atom
==
5
:
charge
=
3
-
BO_valence
elif
atom
==
15
and
BO_valence
==
5
:
charge
=
0
elif
atom
==
16
and
BO_valence
==
6
:
charge
=
0
else
:
charge
=
atomic_valence_electrons
-
8
+
BO_valence
return
charge
def
clean_charges
(
mol
):
"""
This hack should not be needed anymore, but is kept just in case
"""
Chem
.
SanitizeMol
(
mol
)
#rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
# '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
# '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
# '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
# '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
# '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']
rxn_smarts
=
[
'[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][CX3-,NX3-:5][#6,#7:6]1=[#6,#7:7]>>'
'[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][-0,-0:5]=[#6,#7:6]1[#6-,#7-:7]'
,
'[#6,#7:1]1=[#6,#7:2][#6,#7:3](=[#6,#7:4])[#6,#7:5]=[#6,#7:6][CX3-,NX3-:7]1>>'
'[#6,#7:1]1=[#6,#7:2][#6,#7:3]([#6-,#7-:4])=[#6,#7:5][#6,#7:6]=[-0,-0:7]1'
]
fragments
=
Chem
.
GetMolFrags
(
mol
,
asMols
=
True
,
sanitizeFrags
=
False
)
for
i
,
fragment
in
enumerate
(
fragments
):
for
smarts
in
rxn_smarts
:
patt
=
Chem
.
MolFromSmarts
(
smarts
.
split
(
">>"
)[
0
])
while
fragment
.
HasSubstructMatch
(
patt
):
rxn
=
AllChem
.
ReactionFromSmarts
(
smarts
)
ps
=
rxn
.
RunReactants
((
fragment
,))
fragment
=
ps
[
0
][
0
]
Chem
.
SanitizeMol
(
fragment
)
if
i
==
0
:
mol
=
fragment
else
:
mol
=
Chem
.
CombineMols
(
mol
,
fragment
)
return
mol
def
BO2mol
(
mol
,
BO_matrix
,
atoms
,
atomic_valence_electrons
,
mol_charge
,
allow_charged_fragments
=
True
):
"""
based on code written by Paolo Toscani
From bond order, atoms, valence structure and total charge, generate an
rdkit molecule.
args:
mol - rdkit molecule
BO_matrix - bond order matrix of molecule
atoms - list of integer atomic symbols
atomic_valence_electrons -
mol_charge - total charge of molecule
optional:
allow_charged_fragments - bool - allow charged fragments
returns
mol - updated rdkit molecule with bond connectivity
"""
l
=
len
(
BO_matrix
)
l2
=
len
(
atoms
)
BO_valences
=
list
(
BO_matrix
.
sum
(
axis
=
1
))
if
(
l
!=
l2
):
raise
RuntimeError
(
'sizes of adjMat ({0:d}) and Atoms {1:d} differ'
.
format
(
l
,
l2
))
rwMol
=
Chem
.
RWMol
(
mol
)
bondTypeDict
=
{
1
:
Chem
.
BondType
.
SINGLE
,
2
:
Chem
.
BondType
.
DOUBLE
,
3
:
Chem
.
BondType
.
TRIPLE
}
for
i
in
range
(
l
):
for
j
in
range
(
i
+
1
,
l
):
bo
=
int
(
round
(
BO_matrix
[
i
,
j
]))
if
(
bo
==
0
):
continue
bt
=
bondTypeDict
.
get
(
bo
,
Chem
.
BondType
.
SINGLE
)
rwMol
.
AddBond
(
i
,
j
,
bt
)
mol
=
rwMol
.
GetMol
()
if
allow_charged_fragments
:
mol
=
set_atomic_charges
(
mol
,
atoms
,
atomic_valence_electrons
,
BO_valences
,
BO_matrix
,
mol_charge
)
else
:
mol
=
set_atomic_radicals
(
mol
,
atoms
,
atomic_valence_electrons
,
BO_valences
)
return
mol
def
set_atomic_charges
(
mol
,
atoms
,
atomic_valence_electrons
,
BO_valences
,
BO_matrix
,
mol_charge
):
"""
"""
q
=
0
for
i
,
atom
in
enumerate
(
atoms
):
a
=
mol
.
GetAtomWithIdx
(
i
)
charge
=
get_atomic_charge
(
atom
,
atomic_valence_electrons
[
atom
],
BO_valences
[
i
])
q
+=
charge
if
atom
==
6
:
number_of_single_bonds_to_C
=
list
(
BO_matrix
[
i
,
:])
.
count
(
1
)
if
number_of_single_bonds_to_C
==
2
and
BO_valences
[
i
]
==
2
:
q
+=
1
charge
=
0
if
number_of_single_bonds_to_C
==
3
and
q
+
1
<
mol_charge
:
q
+=
2
charge
=
1
if
(
abs
(
charge
)
>
0
):
a
.
SetFormalCharge
(
int
(
charge
))
#mol = clean_charges(mol)
return
mol
def
set_atomic_radicals
(
mol
,
atoms
,
atomic_valence_electrons
,
BO_valences
):
"""
The number of radical electrons = absolute atomic charge
"""
for
i
,
atom
in
enumerate
(
atoms
):
a
=
mol
.
GetAtomWithIdx
(
i
)
charge
=
get_atomic_charge
(
atom
,
atomic_valence_electrons
[
atom
],
BO_valences
[
i
])
if
(
abs
(
charge
)
>
0
):
a
.
SetNumRadicalElectrons
(
abs
(
int
(
charge
)))
return
mol
def
get_bonds
(
UA
,
AC
):
"""
"""
bonds
=
[]
for
k
,
i
in
enumerate
(
UA
):
for
j
in
UA
[
k
+
1
:]:
if
AC
[
i
,
j
]
==
1
:
bonds
.
append
(
tuple
(
sorted
([
i
,
j
])))
return
bonds
def
get_UA_pairs
(
UA
,
AC
,
use_graph
=
True
):
"""
"""
bonds
=
get_bonds
(
UA
,
AC
)
if
len
(
bonds
)
==
0
:
return
[()]
if
use_graph
:
G
=
nx
.
Graph
()
G
.
add_edges_from
(
bonds
)
UA_pairs
=
[
list
(
nx
.
max_weight_matching
(
G
))]
return
UA_pairs
max_atoms_in_combo
=
0
UA_pairs
=
[()]
for
combo
in
list
(
itertools
.
combinations
(
bonds
,
int
(
len
(
UA
)
/
2
))):
flat_list
=
[
item
for
sublist
in
combo
for
item
in
sublist
]
atoms_in_combo
=
len
(
set
(
flat_list
))
if
atoms_in_combo
>
max_atoms_in_combo
:
max_atoms_in_combo
=
atoms_in_combo
UA_pairs
=
[
combo
]
elif
atoms_in_combo
==
max_atoms_in_combo
:
UA_pairs
.
append
(
combo
)
return
UA_pairs
def
AC2BO
(
AC
,
atoms
,
charge
,
allow_charged_fragments
=
True
,
use_graph
=
True
):
"""
implemenation of algorithm shown in Figure 2
UA: unsaturated atoms
DU: degree of unsaturation (u matrix in Figure)
best_BO: Bcurr in Figure
"""
global
atomic_valence
global
atomic_valence_electrons
# make a list of valences, e.g. for CO: [[4],[2,1]]
valences_list_of_lists
=
[]
AC_valence
=
list
(
AC
.
sum
(
axis
=
1
))
for
i
,(
atomicNum
,
valence
)
in
enumerate
(
zip
(
atoms
,
AC_valence
)):
# valence can't be smaller than number of neighbourgs
possible_valence
=
[
x
for
x
in
atomic_valence
[
atomicNum
]
if
x
>=
valence
]
if
not
possible_valence
:
print
(
'Valence of atom'
,
i
,
'is'
,
valence
,
'which bigger than allowed max'
,
max
(
atomic_valence
[
atomicNum
]),
'. Stopping'
)
sys
.
exit
()
valences_list_of_lists
.
append
(
possible_valence
)
# convert [[4],[2,1]] to [[4,2],[4,1]]
valences_list
=
itertools
.
product
(
*
valences_list_of_lists
)
best_BO
=
AC
.
copy
()
for
valences
in
valences_list
:
UA
,
DU_from_AC
=
get_UA
(
valences
,
AC_valence
)
check_len
=
(
len
(
UA
)
==
0
)
if
check_len
:
check_bo
=
BO_is_OK
(
AC
,
AC
,
charge
,
DU_from_AC
,
atomic_valence_electrons
,
atoms
,
valences
,
allow_charged_fragments
=
allow_charged_fragments
)
else
:
check_bo
=
None
if
check_len
and
check_bo
:
return
AC
,
atomic_valence_electrons
UA_pairs_list
=
get_UA_pairs
(
UA
,
AC
,
use_graph
=
use_graph
)
for
UA_pairs
in
UA_pairs_list
:
BO
=
get_BO
(
AC
,
UA
,
DU_from_AC
,
valences
,
UA_pairs
,
use_graph
=
use_graph
)
status
=
BO_is_OK
(
BO
,
AC
,
charge
,
DU_from_AC
,
atomic_valence_electrons
,
atoms
,
valences
,
allow_charged_fragments
=
allow_charged_fragments
)
charge_OK
=
charge_is_OK
(
BO
,
AC
,
charge
,
DU_from_AC
,
atomic_valence_electrons
,
atoms
,
valences
,
allow_charged_fragments
=
allow_charged_fragments
)
if
status
:
return
BO
,
atomic_valence_electrons
elif
BO
.
sum
()
>=
best_BO
.
sum
()
and
valences_not_too_large
(
BO
,
valences
)
and
charge_OK
:
best_BO
=
BO
.
copy
()
return
best_BO
,
atomic_valence_electrons
def
AC2mol
(
mol
,
AC
,
atoms
,
charge
,
allow_charged_fragments
=
True
,
use_graph
=
True
):
"""
"""
# convert AC matrix to bond order (BO) matrix
BO
,
atomic_valence_electrons
=
AC2BO
(
AC
,
atoms
,
charge
,
allow_charged_fragments
=
allow_charged_fragments
,
use_graph
=
use_graph
)
# add BO connectivity and charge info to mol object
mol
=
BO2mol
(
mol
,
BO
,
atoms
,
atomic_valence_electrons
,
charge
,
allow_charged_fragments
=
allow_charged_fragments
)
# If charge is not correct don't return mol
if
Chem
.
GetFormalCharge
(
mol
)
!=
charge
:
return
[]
# BO2mol returns an arbitrary resonance form. Let's make the rest
mols
=
rdchem
.
ResonanceMolSupplier
(
mol
,
Chem
.
UNCONSTRAINED_CATIONS
,
Chem
.
UNCONSTRAINED_ANIONS
)
mols
=
[
mol
for
mol
in
mols
]
return
mols
def
get_proto_mol
(
atoms
):
"""
"""
mol
=
Chem
.
MolFromSmarts
(
"[#"
+
str
(
atoms
[
0
])
+
"]"
)
rwMol
=
Chem
.
RWMol
(
mol
)
for
i
in
range
(
1
,
len
(
atoms
)):
a
=
Chem
.
Atom
(
atoms
[
i
])
rwMol
.
AddAtom
(
a
)
mol
=
rwMol
.
GetMol
()
return
mol
def
read_xyz_file
(
filename
,
look_for_charge
=
True
):
"""
"""
atomic_symbols
=
[]
xyz_coordinates
=
[]
charge
=
0
title
=
""
with
open
(
filename
,
"r"
)
as
file
:
for
line_number
,
line
in
enumerate
(
file
):
if
line_number
==
0
:
num_atoms
=
int
(
line
)
elif
line_number
==
1
:
title
=
line
if
"charge="
in
line
:
charge
=
int
(
line
.
split
(
"="
)[
1
])
else
:
atomic_symbol
,
x
,
y
,
z
=
line
.
split
()
atomic_symbols
.
append
(
atomic_symbol
)
xyz_coordinates
.
append
([
float
(
x
),
float
(
y
),
float
(
z
)])
atoms
=
[
int_atom
(
atom
)
for
atom
in
atomic_symbols
]
return
atoms
,
charge
,
xyz_coordinates
def
xyz2AC
(
atoms
,
xyz
,
charge
,
use_huckel
=
False
):
"""
atoms and coordinates to atom connectivity (AC)
args:
atoms - int atom types
xyz - coordinates
charge - molecule charge
optional:
use_huckel - Use Huckel method for atom connecitivty
returns
ac - atom connectivity matrix
mol - rdkit molecule
"""
if
use_huckel
:
return
xyz2AC_huckel
(
atoms
,
xyz
,
charge
)
else
:
return
xyz2AC_vdW
(
atoms
,
xyz
)
def
xyz2AC_vdW
(
atoms
,
xyz
):
# Get mol template
mol
=
get_proto_mol
(
atoms
)
# Set coordinates
conf
=
Chem
.
Conformer
(
mol
.
GetNumAtoms
())
for
i
in
range
(
mol
.
GetNumAtoms
()):
conf
.
SetAtomPosition
(
i
,
(
xyz
[
i
][
0
],
xyz
[
i
][
1
],
xyz
[
i
][
2
]))
mol
.
AddConformer
(
conf
)
AC
=
get_AC
(
mol
)
return
AC
,
mol
def
get_AC
(
mol
,
covalent_factor
=
1.3
):
"""
Generate adjacent matrix from atoms and coordinates.
AC is a (num_atoms, num_atoms) matrix with 1 being covalent bond and 0 is not
covalent_factor - 1.3 is an arbitrary factor
args:
mol - rdkit molobj with 3D conformer
optional
covalent_factor - increase covalent bond length threshold with facto
returns:
AC - adjacent matrix
"""
# Calculate distance matrix
dMat
=
Chem
.
Get3DDistanceMatrix
(
mol
)
pt
=
Chem
.
GetPeriodicTable
()
num_atoms
=
mol
.
GetNumAtoms
()
AC
=
np
.
zeros
((
num_atoms
,
num_atoms
),
dtype
=
int
)
for
i
in
range
(
num_atoms
):
a_i
=
mol
.
GetAtomWithIdx
(
i
)
Rcov_i
=
pt
.
GetRcovalent
(
a_i
.
GetAtomicNum
())
*
covalent_factor
for
j
in
range
(
i
+
1
,
num_atoms
):
a_j
=
mol
.
GetAtomWithIdx
(
j
)
Rcov_j
=
pt
.
GetRcovalent
(
a_j
.
GetAtomicNum
())
*
covalent_factor
if
dMat
[
i
,
j
]
<=
Rcov_i
+
Rcov_j
:
AC
[
i
,
j
]
=
1
AC
[
j
,
i
]
=
1
return
AC
def
xyz2AC_huckel
(
atomicNumList
,
xyz
,
charge
):
"""
args
atomicNumList - atom type list
xyz - coordinates
charge - molecule charge
returns
ac - atom connectivity
mol - rdkit molecule
"""
mol
=
get_proto_mol
(
atomicNumList
)
conf
=
Chem
.
Conformer
(
mol
.
GetNumAtoms
())
for
i
in
range
(
mol
.
GetNumAtoms
()):
conf
.
SetAtomPosition
(
i
,(
xyz
[
i
][
0
],
xyz
[
i
][
1
],
xyz
[
i
][
2
]))
mol
.
AddConformer
(
conf
)
num_atoms
=
len
(
atomicNumList
)
AC
=
np
.
zeros
((
num_atoms
,
num_atoms
))
.
astype
(
int
)
mol_huckel
=
Chem
.
Mol
(
mol
)
mol_huckel
.
GetAtomWithIdx
(
0
)
.
SetFormalCharge
(
charge
)
#mol charge arbitrarily added to 1st atom
passed
,
result
=
rdEHTTools
.
RunMol
(
mol_huckel
)
opop
=
result
.
GetReducedOverlapPopulationMatrix
()
tri
=
np
.
zeros
((
num_atoms
,
num_atoms
))
tri
[
np
.
tril
(
np
.
ones
((
num_atoms
,
num_atoms
),
dtype
=
bool
))]
=
opop
#lower triangular to square matrix
for
i
in
range
(
num_atoms
):
for
j
in
range
(
i
+
1
,
num_atoms
):
pair_pop
=
abs
(
tri
[
j
,
i
])
if
pair_pop
>=
0.15
:
#arbitry cutoff for bond. May need adjustment
AC
[
i
,
j
]
=
1
AC
[
j
,
i
]
=
1
return
AC
,
mol
def
chiral_stereo_check
(
mol
):
"""
Find and embed chiral information into the model based on the coordinates
args:
mol - rdkit molecule, with embeded conformer
"""
Chem
.
SanitizeMol
(
mol
)
Chem
.
DetectBondStereochemistry
(
mol
,
-
1
)
Chem
.
AssignStereochemistry
(
mol
,
flagPossibleStereoCenters
=
True
,
force
=
True
)
Chem
.
AssignAtomChiralTagsFromStructure
(
mol
,
-
1
)
return
def
xyz2mol
(
atoms
,
coordinates
,
charge
=
0
,
allow_charged_fragments
=
True
,
use_graph
=
True
,
use_huckel
=
False
,
embed_chiral
=
True
):
"""
Generate a rdkit molobj from atoms, coordinates and a total_charge.
args:
atoms - list of atom types (int)
coordinates - 3xN Cartesian coordinates
charge - total charge of the system (default: 0)
optional:
allow_charged_fragments - alternatively radicals are made
use_graph - use graph (networkx)
use_huckel - Use Huckel method for atom connectivity prediction
embed_chiral - embed chiral information to the molecule
returns:
mols - list of rdkit molobjects
"""
# Get atom connectivity (AC) matrix, list of atomic numbers, molecular charge,
# and mol object with no connectivity information
AC
,
mol
=
xyz2AC
(
atoms
,
coordinates
,
charge
,
use_huckel
=
use_huckel
)
# Convert AC to bond order matrix and add connectivity and charge info to
# mol object
new_mols
=
AC2mol
(
mol
,
AC
,
atoms
,
charge
,
allow_charged_fragments
=
allow_charged_fragments
,
use_graph
=
use_graph
)
# Check for stereocenters and chiral centers
if
embed_chiral
:
for
new_mol
in
new_mols
:
chiral_stereo_check
(
new_mol
)
return
new_mols
def
main
():
return
if
__name__
==
"__main__"
:
import
argparse
parser
=
argparse
.
ArgumentParser
(
usage
=
'
%(prog)s
[options] molecule.xyz'
)
parser
.
add_argument
(
'structure'
,
metavar
=
'structure'
,
type
=
str
)
parser
.
add_argument
(
'-s'
,
'--sdf'
,
action
=
"store_true"
,
help
=
"Dump sdf file"
)
parser
.
add_argument
(
'--ignore-chiral'
,
action
=
"store_true"
,
help
=
"Ignore chiral centers"
)
parser
.
add_argument
(
'--no-charged-fragments'
,
action
=
"store_true"
,
help
=
"Allow radicals to be made"
)
parser
.
add_argument
(
'--no-graph'
,
action
=
"store_true"
,
help
=
"Run xyz2mol without networkx dependencies"
)
# huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
# otherwise van der Waals radii are used
parser
.
add_argument
(
'--use-huckel'
,
action
=
"store_true"
,
help
=
"Use Huckel method for atom connectivity"
)
parser
.
add_argument
(
'-o'
,
'--output-format'
,
action
=
"store"
,
type
=
str
,
help
=
"Output format [smiles,sdf] (default=sdf)"
)
parser
.
add_argument
(
'-c'
,
'--charge'
,
action
=
"store"
,
metavar
=
"int"
,
type
=
int
,
help
=
"Total charge of the system"
)
args
=
parser
.
parse_args
()
# read xyz file
filename
=
args
.
structure
# allow for charged fragments, alternatively radicals are made
charged_fragments
=
not
args
.
no_charged_fragments
# quick is faster for large systems but requires networkx
# if you don't want to install networkx set quick=False and
# uncomment 'import networkx as nx' at the top of the file
quick
=
not
args
.
no_graph
# chiral comment
embed_chiral
=
not
args
.
ignore_chiral
# read atoms and coordinates. Try to find the charge
atoms
,
charge
,
xyz_coordinates
=
read_xyz_file
(
filename
)
# huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
# otherwise van der Waals radii are used
use_huckel
=
args
.
use_huckel
# if explicit charge from args, set it
if
args
.
charge
is
not
None
:
charge
=
int
(
args
.
charge
)
# Get the molobjs
mols
=
xyz2mol
(
atoms
,
xyz_coordinates
,
charge
=
charge
,
use_graph
=
quick
,
allow_charged_fragments
=
charged_fragments
,
embed_chiral
=
embed_chiral
,
use_huckel
=
use_huckel
)
# Print output
for
mol
in
mols
:
if
args
.
output_format
==
"sdf"
:
txt
=
Chem
.
MolToMolBlock
(
mol
)
print
(
txt
)
else
:
# Canonical hack
isomeric_smiles
=
not
args
.
ignore_chiral
smiles
=
Chem
.
MolToSmiles
(
mol
,
isomericSmiles
=
isomeric_smiles
)
m
=
Chem
.
MolFromSmiles
(
smiles
)
smiles
=
Chem
.
MolToSmiles
(
m
,
isomericSmiles
=
isomeric_smiles
)
print
(
smiles
)
Event Timeline
Log In to Comment