In [1]:
from rdkit import Chem

In [2]:
from xyz2mol import xyz2mol as x2m

In [3]:
from glob import glob

In [6]:
targets = sorted(glob("targets/*"))

In [7]:
targets

['targets/qm9_0.xyz', 'targets/vitc.xyz', 'targets/vitd.xyz']

In [9]:
NUCLEAR_CHARGE = {
 "H":1,
 "C":6,
 "O":8,
 "N":7,
 "F":9,
 "Cl":17
}

In [10]:
def read_xyz(filename):
 with open(filename, "r") as f:
 lines = f.readlines()

 natoms = int(lines[0])
 nuclear_charges = []
 coordinates = []

 for i, line in enumerate(lines[2:natoms+2]):
 tokens = line.split()

 if len(tokens) < 4:
 break

 nuclear_charges.append(NUCLEAR_CHARGE[tokens[0]])
 coordinates.append([float(token) for token in tokens[1:4]])
 
 return nuclear_charges, coordinates

In [13]:
def xyzfile_to_mol(filename):
 ncharges, coords = read_xyz(filename)
 mols = x2m(ncharges, coords)
 return mols[0]

In [33]:
def xyz_to_sdf(xyz):
 ncharges, coords = read_xyz(xyz)
 mol = x2m(ncharges, coords)[0]
 conf = mol.GetConformer()
 for i in range(mol.GetNumAtoms()):
 x, y, z = coords[i]
 conf.SetAtomPosition(i, Point3D(x, y, z))
 sdfstr = Chem.MolToMolBlock(mol)
 return sdfstr

In [38]:
vitc = targets[1]
sdfstr = xyz_to_sdf(vitc)

In [32]:
with open('targets/vitc.sdf', 'w') as f:
 f.write(sdfstr)

In [39]:
vitd = targets[2]
sdfstr = xyz_to_sdf(vitd)

In [41]:
with open('targets/vitd.sdf', 'w') as f:
 f.write(sdfstr)

In [42]:
qm9 = targets[0]
sdfstr = xyz_to_sdf(qm9)

In [43]:
with open('targets/qm9.sdf', 'w') as f:
 f.write(sdfstr)