{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "demanding-buffer", "metadata": {}, "outputs": [], "source": [ "from rdkit import Chem" ] }, { "cell_type": "code", "execution_count": 2, "id": "stuffed-charity", "metadata": {}, "outputs": [], "source": [ "from xyz2mol import xyz2mol as x2m" ] }, { "cell_type": "code", "execution_count": 3, "id": "motivated-category", "metadata": {}, "outputs": [], "source": [ "from glob import glob" ] }, { "cell_type": "code", "execution_count": 6, "id": "stunning-uncle", "metadata": {}, "outputs": [], "source": [ "targets = sorted(glob(\"targets/*\"))" ] }, { "cell_type": "code", "execution_count": 7, "id": "exceptional-knowing", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['targets/qm9_0.xyz', 'targets/vitc.xyz', 'targets/vitd.xyz']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "targets" ] }, { "cell_type": "code", "execution_count": 9, "id": "rental-toronto", "metadata": {}, "outputs": [], "source": [ "NUCLEAR_CHARGE = {\n", " \"H\":1,\n", " \"C\":6,\n", " \"O\":8,\n", " \"N\":7,\n", " \"F\":9,\n", " \"Cl\":17\n", "}" ] }, { "cell_type": "code", "execution_count": 10, "id": "associate-course", "metadata": {}, "outputs": [], "source": [ "def read_xyz(filename):\n", " with open(filename, \"r\") as f:\n", " lines = f.readlines()\n", "\n", " natoms = int(lines[0])\n", " nuclear_charges = []\n", " coordinates = []\n", "\n", " for i, line in enumerate(lines[2:natoms+2]):\n", " tokens = line.split()\n", "\n", " if len(tokens) < 4:\n", " break\n", "\n", " nuclear_charges.append(NUCLEAR_CHARGE[tokens[0]])\n", " coordinates.append([float(token) for token in tokens[1:4]])\n", " \n", " return nuclear_charges, coordinates" ] }, { "cell_type": "code", "execution_count": 13, "id": "beneficial-drilling", "metadata": {}, "outputs": [], "source": [ "def xyzfile_to_mol(filename):\n", " ncharges, coords = read_xyz(filename)\n", " mols = x2m(ncharges, coords)\n", " return mols[0]" ] }, { "cell_type": "code", "execution_count": 33, "id": "violent-monaco", "metadata": {}, "outputs": [], "source": [ "def xyz_to_sdf(xyz):\n", " ncharges, coords = read_xyz(xyz)\n", " mol = x2m(ncharges, coords)[0]\n", " conf = mol.GetConformer()\n", " for i in range(mol.GetNumAtoms()):\n", " x, y, z = coords[i]\n", " conf.SetAtomPosition(i, Point3D(x, y, z))\n", " sdfstr = Chem.MolToMolBlock(mol)\n", " return sdfstr" ] }, { "cell_type": "code", "execution_count": 38, "id": "compatible-proposal", "metadata": {}, "outputs": [], "source": [ "vitc = targets[1]\n", "sdfstr = xyz_to_sdf(vitc)" ] }, { "cell_type": "code", "execution_count": 32, "id": "ruled-forestry", "metadata": {}, "outputs": [], "source": [ "with open('targets/vitc.sdf', 'w') as f:\n", " f.write(sdfstr)" ] }, { "cell_type": "code", "execution_count": 39, "id": "consistent-building", "metadata": {}, "outputs": [], "source": [ "vitd = targets[2]\n", "sdfstr = xyz_to_sdf(vitd)" ] }, { "cell_type": "code", "execution_count": 41, "id": "amateur-debut", "metadata": {}, "outputs": [], "source": [ "with open('targets/vitd.sdf', 'w') as f:\n", " f.write(sdfstr)" ] }, { "cell_type": "code", "execution_count": 42, "id": "serial-nature", "metadata": {}, "outputs": [], "source": [ "qm9 = targets[0]\n", "sdfstr = xyz_to_sdf(qm9)" ] }, { "cell_type": "code", "execution_count": 43, "id": "extensive-shoot", "metadata": {}, "outputs": [], "source": [ "with open('targets/qm9.sdf', 'w') as f:\n", " f.write(sdfstr)" ] }, { "cell_type": "code", "execution_count": null, "id": "recent-drill", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 5 }