diff --git a/GetSLATM.ipynb b/GetSLATM.ipynb index 2898b32..026e195 100644 --- a/GetSLATM.ipynb +++ b/GetSLATM.ipynb @@ -1,246 +1,226 @@ { "cells": [ { "cell_type": "code", - "execution_count": 68, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import qml " ] }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from glob import glob\n", "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from rdkit import Chem" ] }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "target_xyzs = sorted(glob(\"targets/*.xyz\"))" ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def read_sdf(sdf):\n", " with open(sdf, \"r\") as f:\n", " txt = f.read().rstrip()\n", " return txt" ] }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def get_ncharges_coords(sdf):\n", " mol = Chem.MolFromMolBlock(sdf)\n", " #mol = Chem.AddHs(mol)\n", " # rdkit molobj\n", " ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]\n", " conf = mol.GetConformer()\n", " coords = np.asarray(conf.GetPositions())\n", " return ncharges, coords" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['targets/qm9.sdf', 'targets/vitc.sdf', 'targets/vitd.sdf']" ] }, - "execution_count": 74, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_files = sorted(glob(\"targets/*.sdf\"))\n", "target_files" ] }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "target_sdfs = [read_sdf(x) for x in target_files]" ] }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "conf_data = [get_ncharges_coords(x) for x in target_sdfs]" ] }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "ncharges_list, coords_list = zip(*conf_data)" ] }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# mbtypes separate to each target" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/ipykernel_launcher.py:5: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", " \"\"\"\n" ] } ], "source": [ "target_reps = np.array(\n", "[np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], \n", " mbtypes=qml.representations.get_slatm_mbtypes([ncharges_list[i]]),\n", " local=True))\n", "for i in range(len(ncharges_list))])" ] }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(9, 3121)" + "(28, 857)" ] }, - "execution_count": 80, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "target_reps[0].shape" + "target_reps[2].shape" ] }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "target_labels = [t.split(\"/\")[-1].split(\".xyz\")[0] for t in target_sdfs]" ] }, { "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [], - "source": [ - "np.savez(\"target_vector_data.npz\", \n", - " target_labels=target_labels, \n", - " target_reps=target_reps, \n", - " target_ncharges=ncharges_list,)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "x = np.load(\"target_vector_data.npz\", allow_pickle=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, + "execution_count": 15, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "['target_labels', 'target_reps', 'target_ncharges']" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/numpy/core/_asarray.py:136: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", + " return array(a, dtype, copy=False, order=order, subok=True)\n" + ] } ], "source": [ - "x.files" + "np.savez(\"target_vector_data.npz\", \n", + " target_labels=target_labels, \n", + " target_reps=target_reps, \n", + " target_ncharges=ncharges_list,)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 } diff --git a/amons_vector_data.npz b/amons_vector_data.npz index c5a2e73..3f2ccae 100644 Binary files a/amons_vector_data.npz and b/amons_vector_data.npz differ diff --git a/target_vector_data.npz b/target_vector_data.npz index 04b63f6..ea70a24 100644 Binary files a/target_vector_data.npz and b/target_vector_data.npz differ