{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import qml " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from glob import glob\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "target_xyzs = sorted(glob(\"targets/*.xyz\"))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "database_xyzs = sorted(glob(\"qm7/*.xyz\"))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "target_mols = [qml.Compound(x) for x in target_xyzs]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "database_mols = [qml.Compound(x) for x in database_xyzs]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def get_CM(mol):\n", " ncharges = mol.nuclear_charges\n", " coords = mol.coordinates \n", " CM = np.zeros((len(coords), len(coords)))\n", " for i in range(len(coords)):\n", " for j in range(len(coords)):\n", " if i==j:\n", " CM[i,j] = 0.5 * ncharges[i]**2.4\n", " else:\n", " CM[i,j] = ncharges[i] * ncharges[j] / np.linalg.norm(coords[j] - coords[i])\n", " \n", " return ncharges, CM" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "mol = target_mols[0]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "ncharges, CM = get_CM(mol)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "target_ncharges = []\n", "target_CMs = []\n", "for mol in target_mols: \n", " ncharge, CM = get_CM(mol)\n", " target_ncharges.append(ncharge)\n", " target_CMs.append(CM)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "database_ncharges = []\n", "database_CMs = []\n", "for mol in database_mols:\n", " ncharge, CM = get_CM(mol)\n", " database_ncharges.append(ncharge)\n", " database_CMs.append(CM)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", " target_CMs = np.array(target_CMs)\n" ] } ], "source": [ "target_CMs = np.array(target_CMs)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", " database_CMs = np.array(database_CMs)\n" ] } ], "source": [ "database_CMs = np.array(database_CMs)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "target_labels = [t.split(\"/\")[-1].split(\".xyz\")[0] for t in target_xyzs]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "database_labels = [t.split(\"/\")[-1].split(\".xyz\")[0] for t in database_xyzs]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "target_labels = np.array(target_labels)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "database_labels = np.array(database_labels)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", " target_ncharges = np.array(target_ncharges)\n" ] } ], "source": [ "target_ncharges = np.array(target_ncharges)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", " database_ncharges = np.array(database_ncharges)\n" ] } ], "source": [ "database_ncharges = np.array(database_ncharges)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "np.savez(\"data.npz\", \n", " target_labels=target_labels, \n", " target_CMs=target_CMs, \n", " target_ncharges=target_ncharges,\n", " database_labels=database_labels, \n", " database_CMs=database_CMs,\n", " database_ncharges=database_ncharges)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "x = np.load(\"data.npz\", allow_pickle=True)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['target_labels',\n", " 'target_CMs',\n", " 'target_ncharges',\n", " 'database_labels',\n", " 'database_CMs',\n", " 'database_ncharges']" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }