diff --git a/GetCM.ipynb b/GetCM.ipynb new file mode 100644 index 0000000..6960a29 --- /dev/null +++ b/GetCM.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import qml " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "from glob import glob\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "target_xyzs = sorted(glob(\"targets/*.xyz\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "database_xyzs = sorted(glob(\"qm7/*.xyz\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "target_mols = [qml.Compound(x) for x in target_xyzs]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "database_mols = [qml.Compound(x) for x in database_xyzs]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "def get_CM(mol):\n", + " ncharges = mol.nuclear_charges\n", + " coords = mol.coordinates \n", + " CM = np.zeros((len(coords), len(coords)))\n", + " for i in range(len(coords)):\n", + " for j in range(len(coords)):\n", + " if i==j:\n", + " CM[i,j] = 0.5 * ncharges[i]**2.4\n", + " else:\n", + " CM[i,j] = ncharges[i] * ncharges[j] / np.linalg.norm(coords[j] - coords[i])\n", + " \n", + " return CM" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "mol = target_mols[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "CM = get_CM(mol)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "target_CMs = [get_CM(mol) for mol in target_mols]" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "database_CMs = [get_CM(mol) for mol in database_mols]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", + " target_CMs = np.array(target_CMs)\n" + ] + } + ], + "source": [ + "target_CMs = np.array(target_CMs)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", + " database_CMs = np.array(database_CMs)\n" + ] + } + ], + "source": [ + "database_CMs = np.array(database_CMs)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "target_labels = [t.split(\"/\")[-1].split(\".xyz\")[0] for t in target_xyzs]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "database_labels = [t.split(\"/\")[-1].split(\".xyz\")[0] for t in database_xyzs]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "target_labels = np.array(target_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "database_labels = np.array(database_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "np.savez(\"data.npz\", \n", + " target_labels=target_labels, \n", + " target_CMs=target_CMs, \n", + " database_labels=database_labels, \n", + " database_CMs=database_CMs)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.load(\"data.npz\", allow_pickle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['target_labels', 'target_CMs', 'database_labels', 'database_CMs']" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.files" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11, 11)" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x[\"target_CMs\"][0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Project.aux b/Project.aux new file mode 100644 index 0000000..c11e7b2 --- /dev/null +++ b/Project.aux @@ -0,0 +1,4 @@ +\relax +\@writefile{toc}{\contentsline {section}{\numberline {1}Basic problem}{1}\protected@file@percent } +\newlabel{sec:1}{{1}{1}} +\@writefile{toc}{\contentsline {section}{\numberline {2}Optimal placement of molecules}{1}\protected@file@percent } diff --git a/Project.log b/Project.log new file mode 100644 index 0000000..f656f14 --- /dev/null +++ b/Project.log @@ -0,0 +1,155 @@ +This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2021.5.19) 25 JUN 2021 11:38 +entering extended mode + restricted \write18 enabled. + %&-line parsing enabled. +**Project.tex +(./Project.tex +LaTeX2e <2020-02-02> patch level 2 +L3 programming layer <2020-02-14> +(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls +Document Class: article 2019/12/20 v1.4l Standard LaTeX document class +(/usr/share/texlive/texmf-dist/tex/latex/base/size11.clo +File: size11.clo 2019/12/20 v1.4l Standard LaTeX file (size option) +) +\c@part=\count167 +\c@section=\count168 +\c@subsection=\count169 +\c@subsubsection=\count170 +\c@paragraph=\count171 +\c@subparagraph=\count172 +\c@figure=\count173 +\c@table=\count174 +\abovecaptionskip=\skip47 +\belowcaptionskip=\skip48 +\bibindent=\dimen134 +) +(/usr/share/texlive/texmf-dist/tex/latex/anysize/anysize.sty +Package: anysize 1994/08/13 setting margin sizes + +document style option `anysize' loaded +Michael Salzenberg, Thomas Esser, Dirk Hillbrecht +Version 1.0, Aug 13, 1994 +\@Leftmargin=\dimen135 +\@Rightmargin=\dimen136 +\@Topmargin=\dimen137 +\@Bottommargin=\dimen138 +) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty +Package: amsmath 2020/01/20 v2.17e AMS math features +\@mathmargin=\skip49 + +For additional information on amsmath, use the `?' option. +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty +Package: amstext 2000/06/29 v2.01 AMS text + +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty +File: amsgen.sty 1999/11/30 v2.0 generic functions +\@emptytoks=\toks14 +\ex@=\dimen139 +)) +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty +Package: amsbsy 1999/11/29 v1.2d Bold Symbols +\pmbraise@=\dimen140 +) +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty +Package: amsopn 2016/03/08 v2.02 operator names +) +\inf@bad=\count175 +LaTeX Info: Redefining \frac on input line 227. +\uproot@=\count176 +\leftroot@=\count177 +LaTeX Info: Redefining \overline on input line 389. +\classnum@=\count178 +\DOTSCASE@=\count179 +LaTeX Info: Redefining \ldots on input line 486. +LaTeX Info: Redefining \dots on input line 489. +LaTeX Info: Redefining \cdots on input line 610. +\Mathstrutbox@=\box45 +\strutbox@=\box46 +\big@size=\dimen141 +LaTeX Font Info: Redeclaring font encoding OML on input line 733. +LaTeX Font Info: Redeclaring font encoding OMS on input line 734. +\macc@depth=\count180 +\c@MaxMatrixCols=\count181 +\dotsspace@=\muskip16 +\c@parentequation=\count182 +\dspbrk@lvl=\count183 +\tag@help=\toks15 +\row@=\count184 +\column@=\count185 +\maxfields@=\count186 +\andhelp@=\toks16 +\eqnshift@=\dimen142 +\alignsep@=\dimen143 +\tagshift@=\dimen144 +\tagwidth@=\dimen145 +\totwidth@=\dimen146 +\lineht@=\dimen147 +\@envbody=\toks17 +\multlinegap=\skip50 +\multlinetaggap=\skip51 +\mathdisplay@stack=\toks18 +LaTeX Info: Redefining \[ on input line 2859. +LaTeX Info: Redefining \] on input line 2860. +) +(/usr/share/texlive/texmf-dist/tex/latex/bbold/bbold.sty +Package: bbold 1994/04/06 Bbold symbol package +) +(/usr/share/texlive/texmf-dist/tex/latex/l3backend/l3backend-pdfmode.def +File: l3backend-pdfmode.def 2020-02-03 L3 backend support: PDF mode +\l__kernel_color_stack_int=\count187 +\l__pdf_internal_box=\box47 +) +(./Project.aux) +\openout1 = `Project.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 16. +LaTeX Font Info: ... okay on input line 16. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 16. +LaTeX Font Info: ... okay on input line 16. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 16. +LaTeX Font Info: ... okay on input line 16. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 16. +LaTeX Font Info: ... okay on input line 16. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 16. +LaTeX Font Info: ... okay on input line 16. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 16. +LaTeX Font Info: ... okay on input line 16. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 16. +LaTeX Font Info: ... okay on input line 16. +LaTeX Font Info: Trying to load font information for U+bbold on input line 2 +2. + (/usr/share/texlive/texmf-dist/tex/latex/bbold/Ubbold.fd) +[1 + +{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] (./Project.aux) + +LaTeX Warning: Label(s) may have changed. Rerun to get cross-references right. + + ) +Here is how much of TeX's memory you used: + 1067 strings out of 481239 + 14465 string characters out of 5920377 + 251543 words of memory out of 5000000 + 16373 multiletter control sequences out of 15000+600000 + 537599 words of font info for 43 fonts, out of 8000000 for 9000 + 1141 hyphenation exceptions out of 8191 + 30i,9n,25p,504b,131s stack positions out of 5000i,500n,10000p,200000b,80000s +{/usr/share/texmf/fonts/enc/dvips/cm-super/cm-super-ts1.enc} +Output written on Project.pdf (1 page, 112593 bytes). +PDF statistics: + 57 PDF objects out of 1000 (max. 8388607) + 41 compressed objects within 1 object stream + 0 named destinations out of 1000 (max. 500000) + 1 words of extra memory for PDF output out of 10000 (max. 10000000) + diff --git a/Project.pdf b/Project.pdf new file mode 100644 index 0000000..66b381a Binary files /dev/null and b/Project.pdf differ diff --git a/Project.tex b/Project.tex index 6016861..9f64627 100644 --- a/Project.tex +++ b/Project.tex @@ -1,53 +1,70 @@ \documentclass[11pt,a4paper]{article} \usepackage{anysize} \usepackage{amsmath} \usepackage{bbold} -\usepackage{utf8math} +%\usepackage{utf8math} %\DeclareMathOperator{\lp}{\textbf{LP}} \DeclareMathOperator{\lp}{LP} \DeclareMathOperator{\supp}{supp} \newcommand{\A}{\mathcal{A}} \newcommand{\T}{\mathcal{T}} \newcommand{\LO}{\mathcal{O}} \begin{document} \section{Basic problem} \label{sec:1} -Given a matrix $A ∈ ℝ_{≥0}^{n × n}$, representing a \emph{target} molecule and a \emph{database} $B^{(i)} ∈ ℝ^{n_i ×n_i}$ $i ∈ \{1,\dots,k\}$. The $n_i$ are usually small, around $7$. +Given a matrix $A \in \mathbb{R}_{{\geq 0}}^{n \times n}$, representing a \emph{target} molecule and a \emph{database} $B^{(i)} \in \mathbb{R}^{n_i \times n_i}$, $i \in \{1,\dots,k\}$. The $n_i$ are usually small, around $7$. -A \emph{solution} is a permutation $π ∈ S_n$, a multiset $S ⊆ \{1,\dots,k\}$ and a mapping $p: S ⟶ \{1,\dots,n\}$. The \emph{objective value} of the solution is $\| A_π - A_{S,p}\|_2^2$, where $A_π$ is the matrix stemming from $A$ where the rows and columns were permuted by $π$ and $A_{S_p}$ is the block-diagonal matrix that stems from the elements in $S$ that are put with upper leftmost component on the corresponding position described by $p$. +A \emph{solution} is a permutation $\pi \in S_n$, a multiset $S \subseteq \{1,\dots,k\}$ and a mapping $p: S \to \{1,\dots,n\}$. The \emph{objective value} of the solution is $\| A_\pi - A_{S,p}\|_2^2$, where $A_\pi$ is the matrix stemming from $A$ where the rows and columns were permuted by $\pi$ and $A_{S_p}$ is the block-diagonal matrix that stems from the elements in $S$ that are put with upper leftmost component on the corresponding position described by $p$. A feasibility constraint is that $S$ and $p$ induce a partition of the set $\{1,\dots,n\}$. Overarching goal for Chemistry: Suggest new ways to synthesize a target molecule. \begin{itemize} \item Bigger database of pieces should make sense. Efficiency is a problem can be overcome. Also the pieces need a score (objective value) on the cost to synthesize them. This can be modeled in the objective function. \item Diversity of solutions: One way would be to list pareto optimal solutions using distance, sums of costs of pieces, .... Another way would be to model diversity \item Objective function should also model the number of smaller pieces. The more pieces in the solution, the more synthesis steps. \end{itemize} +\section{Optimal placement of molecules} +The first goal is to find a way to optimally place a small molecule represented by the matrix $M \in \mathbb{R}^{m \times m}$ inside the target represented by the matrix $T \in \mathbb{R}^{n \times n}$. +In other words, we want to find the mapping $\phi : \{1, \dots, m\} \to \{1, \dots, n\}$. This is an index assignment problem where the indices $x_{ij} \in \{0, 1\}$ are assigned: +\begin{equation*} + x_{ij} = + \begin{cases} + 1 & \text{if } \phi(i) = j \\ + 0 & \text{otherwise} + \end{cases} +\end{equation*} +under the constraints: +\begin{align*} + \forall i \in \{1, ..., m\} &: \sum_{j=1}^n x_{ij} = 1 \\ + \forall j \in \{1, ..., n\} &: \sum_{i=1}^m x_{ij} \leq 1 +\end{align*} + +This consists of minimising the objective function: +\begin{equation*} + \mathrm{min} \sum (m_{ij} - T_{kl})^2 x_{ik} * x_{jl} +\end{equation*} + \end{document} -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: