{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import qml " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from glob import glob\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from rdkit import Chem" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "def read_sdf(sdf):\n", " with open(sdf, \"r\") as f:\n", " txt = f.read().rstrip()\n", " return txt" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "def get_CM(sdf):\n", " mol = Chem.MolFromMolBlock(sdf)\n", " # rdkit molobj\n", " ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]\n", " conf = mol.GetConformer()\n", " coords = np.asarray(conf.GetPositions())\n", " CM = np.zeros((len(coords), len(coords)))\n", " for i in range(len(coords)):\n", " for j in range(len(coords)):\n", " if i==j:\n", " CM[i,j] = 0.5 * ncharges[i]**2.4\n", " else:\n", " CM[i,j] = ncharges[i] * ncharges[j] / np.linalg.norm(coords[j] - coords[i])\n", " \n", " return ncharges, CM" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['targets/qm9.sdf', 'targets/vitc.sdf', 'targets/vitd.sdf']" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_sdfs = sorted(glob(\"targets/*.sdf\"))\n", "target_sdfs" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "qm9_amons_files = sorted(glob(\"amons-qm9/*.sdf\"))" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "qm9_amons_sdfs = [read_sdf(x) for x in qm9_amons_files]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "qm9_amons_ncharges = []\n", "qm9_amons_CMs = []\n", "for sdf in qm9_amons_sdfs:\n", " ncharge, CM = get_CM(sdf)\n", " qm9_amons_ncharges.append(ncharge)\n", " qm9_amons_CMs.append(CM)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "qm9_amons_labels = [t.split(\"/\")[-1].split(\".sdf\")[0] for t in qm9_amons_files]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "vitc_amons_files = sorted(glob(\"amons-vitc/*.sdf\"))" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "vitc_amons_sdfs = [read_sdf(x) for x in vitc_amons_files]" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "vitc_amons_ncharges = []\n", "vitc_amons_CMs = []\n", "for sdf in vitc_amons_sdfs:\n", " ncharge, CM = get_CM(sdf)\n", " vitc_amons_ncharges.append(ncharge)\n", " vitc_amons_CMs.append(CM)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "vitc_amons_labels = [t.split(\"/\")[-1].split(\".sdf\")[0] for t in vitc_amons_files]" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "vitd_amons_files = sorted(glob(\"amons-vitd/*.sdf\"))" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "vitd_amons_sdfs = [read_sdf(x) for x in vitd_amons_files]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "vitd_amons_ncharges = []\n", "vitd_amons_CMs = []\n", "for sdf in vitd_amons_sdfs:\n", " ncharge, CM = get_CM(sdf)\n", " vitd_amons_ncharges.append(ncharge)\n", " vitd_amons_CMs.append(CM)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "vitd_amons_labels = [t.split(\"/\")[-1].split(\".sdf\")[0] for t in vitd_amons_files]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# np save " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/numpy/core/_asarray.py:136: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", " return array(a, dtype, copy=False, order=order, subok=True)\n" ] } ], "source": [ "np.savez(\"amons_data.npz\", \n", " vitd_amons_labels=vitd_amons_labels,\n", " vitc_amons_labels=vitc_amons_labels,\n", " qm9_amons_labels=qm9_amons_labels,\n", " vitd_amons_ncharges=vitd_amons_ncharges,\n", " vitc_amons_ncharges=vitc_amons_ncharges,\n", " qm9_amons_ncharges=qm9_amons_ncharges,\n", " vitd_amons_CMs=vitd_amons_CMs,\n", " vitc_amons_CMs=vitc_amons_CMs,\n", " qm9_amons_CMs=qm9_amons_CMs)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "x = np.load(\"amons_data.npz\", allow_pickle=True)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['vitd_amons_labels',\n", " 'vitc_amons_labels',\n", " 'qm9_amons_labels',\n", " 'vitd_amons_ncharges',\n", " 'vitc_amons_ncharges',\n", " 'qm9_amons_ncharges',\n", " 'vitd_amons_CMs',\n", " 'vitc_amons_CMs',\n", " 'qm9_amons_CMs']" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }