diff --git a/GetaCM.ipynb b/GetaCM.ipynb
index b5b822b..a084d66 100644
--- a/GetaCM.ipynb
+++ b/GetaCM.ipynb
@@ -1,229 +1,263 @@
 {
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import qml "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
     "from glob import glob\n",
     "import numpy as np"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
     "from rdkit import Chem"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
     "target_xyzs = sorted(glob(\"targets/*.xyz\"))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
     "def read_sdf(sdf):\n",
     "    with open(sdf, \"r\") as f:\n",
     "        txt = f.read().rstrip()\n",
     "    return txt"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
     "def get_ncharges_coords(sdf):\n",
     "    mol = Chem.MolFromMolBlock(sdf)\n",
     "   #mol = Chem.AddHs(mol)\n",
     "    # rdkit molobj\n",
     "    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]\n",
     "    conf = mol.GetConformer()\n",
     "    coords = np.asarray(conf.GetPositions())\n",
     "    return ncharges, coords"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cutoff_func(coord_a, coord_b, central_cutoff=1e6, central_decay=-1):\n",
+    "    R_ij = np.linalg.norm(coord_a - coord_b)\n",
+    "    if R_ij <= (central_cutoff - central_decay):\n",
+    "        func = 1.\n",
+    "    elif ((central_cutoff - central_decay) < R_ij) and (R_ij <= (central_cutoff + central_decay)):\n",
+    "        func = 0.5 * (1. + np.cos((np.pi * R_ij - central_cutoff + central_decay)))\n",
+    "    else:\n",
+    "        func = 0.\n",
+    "    return func"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_atomic_CM(ncharges, coords, max_natoms, central_cutoff=1e6, central_decay=-1):\n",
+    "    size = int((max_natoms + 1)*max_natoms / 2)\n",
+    "    rep = np.zeros((len(ncharges), size))\n",
+    "    \n",
+    "    # central atom loop\n",
+    "    for k in range(len(ncharges)):\n",
+    "        M = np.zeros((len(ncharges), len(ncharges)))\n",
+    "        for i in range(len(ncharges)):\n",
+    "            f_ik = cutoff_func(coords[i], coords[k])\n",
+    "            for j in range(i):\n",
+    "                if i == j:\n",
+    "                    M[i,j] = 0.5 * ncharges[i]**2.4 * f_ik**2\n",
+    "                    M[j,i] = M[i,j]\n",
+    "                    \n",
+    "                else:\n",
+    "                    f_jk = cutoff_func(coords[j], coords[k])\n",
+    "                    f_ij = cutoff_func(coords[i], coords[j])\n",
+    "                    M[i,j] = (ncharges[i]*ncharges[j]/np.linalg.norm(coords[i]-coords[j]))*f_ik*f_jk*f_ij\n",
+    "                    M[j,i] = M[i,j]\n",
+    "                    \n",
+    "        # concat upper triangular and diagonal\n",
+    "        upper_triang = np.triu(M)\n",
+    "        non_zero_i, non_zero_j = np.nonzero(upper_triang)\n",
+    "        unpadded_rep = upper_triang[non_zero_i, non_zero_j]\n",
+    "        # pad to full size\n",
+    "        n_zeros = size - len(unpadded_rep)\n",
+    "        zeros = np.zeros(n_zeros)\n",
+    "        rep[k] = np.concatenate((unpadded_rep, zeros))\n",
+    "        \n",
+    "    return rep"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "['targets/qm9.sdf', 'targets/vitc.sdf', 'targets/vitd.sdf']"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "target_files = sorted(glob(\"targets/*.sdf\"))\n",
     "target_files"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
     "target_sdfs = [read_sdf(x) for x in target_files]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
     "conf_data = [get_ncharges_coords(x) for x in target_sdfs]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
     "ncharges_list, coords_list = zip(*conf_data)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "[9, 12, 28]"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "sizes = [len(x) for x in ncharges_list]\n",
     "sizes"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/puck/anaconda3/envs/aqml/lib/python3.7/site-packages/ipykernel_launcher.py:5: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
-      "  \"\"\"\n"
+      "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/ipykernel_launcher.py:4: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+      "  after removing the cwd from sys.path.\n"
      ]
     }
    ],
    "source": [
     "target_reps = np.array(\n",
-    "[np.array(qml.representations.generate_atomic_coulomb_matrix(np.array(ncharges_list[i]), \n",
-    "                                                             np.array(coords_list[i]),\n",
-    "                                                            size=sizes[i]))\n",
+    "[np.array(get_atomic_CM(np.array(ncharges_list[i]), np.array(coords_list[i]),\n",
+    "                                max_natoms=sizes[i]))\n",
     "for i in range(len(ncharges_list))])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(9, 45)"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_reps[0].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
     "target_labels = [t.split(\"/\")[-1].split(\".xyz\")[0] for t in target_sdfs]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 44,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/numpy/core/_asarray.py:136: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+      "  return array(a, dtype, copy=False, order=order, subok=True)\n"
+     ]
+    }
+   ],
    "source": [
     "np.savez(\"target_aCM_data.npz\", \n",
     "         target_labels=target_labels, \n",
     "         target_reps=target_reps, \n",
     "         target_ncharges=ncharges_list,)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
     "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.9"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/GetaCMAmons.ipynb b/GetaCMAmons.ipynb
index 9cbd83b..90c98fa 100644
--- a/GetaCMAmons.ipynb
+++ b/GetaCMAmons.ipynb
@@ -1,448 +1,481 @@
 {
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import qml "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
     "from glob import glob\n",
     "import numpy as np"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "from rdkit import Chem"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "def read_sdf(sdf):\n",
     "    with open(sdf, \"r\") as f:\n",
     "        txt = f.read().rstrip()\n",
     "    return txt"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "def get_ncharges_coords(sdf):\n",
     "    mol = Chem.MolFromMolBlock(sdf)\n",
     "   #mol = Chem.AddHs(mol)\n",
     "    # rdkit molobj\n",
     "    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]\n",
     "    conf = mol.GetConformer()\n",
     "    coords = np.asarray(conf.GetPositions())\n",
     "    return ncharges, coords"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cutoff_func(coord_a, coord_b, central_cutoff=1e6, central_decay=-1):\n",
+    "    R_ij = np.linalg.norm(coord_a - coord_b)\n",
+    "    if R_ij <= (central_cutoff - central_decay):\n",
+    "        func = 1.\n",
+    "    elif ((central_cutoff - central_decay) < R_ij) and (R_ij <= (central_cutoff + central_decay)):\n",
+    "        func = 0.5 * (1. + np.cos((np.pi * R_ij - central_cutoff + central_decay)))\n",
+    "    else:\n",
+    "        func = 0.\n",
+    "    return func"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_atomic_CM(ncharges, coords, max_natoms, central_cutoff=1e6, central_decay=-1):\n",
+    "    size = int((max_natoms + 1)*max_natoms / 2)\n",
+    "    rep = np.zeros((len(ncharges), size))\n",
+    "    \n",
+    "    # central atom loop\n",
+    "    for k in range(len(ncharges)):\n",
+    "        M = np.zeros((len(ncharges), len(ncharges)))\n",
+    "        for i in range(len(ncharges)):\n",
+    "            f_ik = cutoff_func(coords[i], coords[k])\n",
+    "            for j in range(i):\n",
+    "                if i == j:\n",
+    "                    M[i,j] = 0.5 * ncharges[i]**2.4 * f_ik**2\n",
+    "                    M[j,i] = M[i,j]\n",
+    "                    \n",
+    "                else:\n",
+    "                    f_jk = cutoff_func(coords[j], coords[k])\n",
+    "                    f_ij = cutoff_func(coords[i], coords[j])\n",
+    "                    M[i,j] = (ncharges[i]*ncharges[j]/np.linalg.norm(coords[i]-coords[j]))*f_ik*f_jk*f_ij\n",
+    "                    M[j,i] = M[i,j]\n",
+    "                    \n",
+    "        # concat upper triangular and diagonal\n",
+    "        upper_triang = np.triu(M)\n",
+    "        non_zero_i, non_zero_j = np.nonzero(upper_triang)\n",
+    "        unpadded_rep = upper_triang[non_zero_i, non_zero_j]\n",
+    "        # pad to full size\n",
+    "        n_zeros = size - len(unpadded_rep)\n",
+    "        zeros = np.zeros(n_zeros)\n",
+    "        rep[k] = np.concatenate((unpadded_rep, zeros))\n",
+    "        \n",
+    "    return rep"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "['targets/qm9.sdf', 'targets/vitc.sdf', 'targets/vitd.sdf']"
       ]
      },
-     "execution_count": 73,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "target_sdfs = sorted(glob(\"targets/*.sdf\"))\n",
     "target_sdfs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
     "qm9_amons_files = sorted(glob(\"amons-qm9/*.sdf\"))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
     "qm9_amons_sdfs = [read_sdf(x) for x in qm9_amons_files]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
     "conf_data = [get_ncharges_coords(x) for x in qm9_amons_sdfs]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
     "ncharges_list, coords_list = zip(*conf_data)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
     "qm9_ncharges = ncharges_list"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
-    "qm9_reps = [np.array(qml.representations.generate_atomic_coulomb_matrix(np.array(ncharges_list[i]),\n",
+    "qm9_reps = [np.array(get_atomic_CM(np.array(ncharges_list[i]),\n",
     "                                                                        np.array(coords_list[i]), \n",
-    "                                                                        size=9))\n",
+    "                                                                        max_natoms=9))\n",
     "            for i in range(len(ncharges_list))]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/puck/anaconda3/envs/aqml/lib/python3.7/site-packages/ipykernel_launcher.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+      "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/ipykernel_launcher.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
       "  \"\"\"Entry point for launching an IPython kernel.\n"
      ]
     }
    ],
    "source": [
     "qm9_reps = np.array(qm9_reps)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "(1, 45)"
       ]
      },
-     "execution_count": 81,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "qm9_reps[0].shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
     "qm9_amons_labels = [t.split(\"/\")[-1].split(\".sdf\")[0] for t in qm9_amons_files]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitc_amons_files = sorted(glob(\"amons-vitc/*.sdf\"))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitc_amons_sdfs = [read_sdf(x) for x in vitc_amons_files]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
     "conf_data = [get_ncharges_coords(x) for x in vitc_amons_sdfs]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
     "ncharges_list, coords_list = zip(*conf_data)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitc_ncharges = ncharges_list"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "8"
-      ]
-     },
-     "execution_count": 88,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "max([len(x) for x in ncharges_list])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 89,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
-    "vitc_reps = [np.array(qml.representations.generate_atomic_coulomb_matrix(np.array(ncharges_list[i]), \n",
-    "                                                                         np.array(coords_list[i]), \n",
-    "                                                         size=12)) for i in \n",
+    "vitc_reps = [np.array(get_atomic_CM(np.array(ncharges_list[i]), np.array(coords_list[i]), \n",
+    "                                                         max_natoms=12)) for i in \n",
     "            range(len(ncharges_list))]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/puck/anaconda3/envs/aqml/lib/python3.7/site-packages/ipykernel_launcher.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+      "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/ipykernel_launcher.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
       "  \"\"\"Entry point for launching an IPython kernel.\n"
      ]
     }
    ],
    "source": [
     "vitc_reps = np.array(vitc_reps)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitc_amons_labels = [t.split(\"/\")[-1].split(\".sdf\")[0] for t in vitc_amons_files]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitd_amons_files = sorted(glob(\"amons-vitd/*.sdf\"))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitd_amons_sdfs = [read_sdf(x) for x in vitd_amons_files]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 94,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
     "conf_data = [get_ncharges_coords(x) for x in vitd_amons_sdfs]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
     "ncharges_list, coords_list = zip(*conf_data)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitd_ncharges = ncharges_list"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
-    "vitd_reps = [np.array(qml.representations.generate_atomic_coulomb_matrix(np.array(ncharges_list[i]),\n",
-    "                                                                         np.array(coords_list[i]),\n",
-    "                                                                         size=28))\n",
+    "vitd_reps = [np.array(get_atomic_CM(np.array(ncharges_list[i]), np.array(coords_list[i]),\n",
+    "                                                                         max_natoms=28))\n",
     "    for i in range(len(ncharges_list))]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/puck/anaconda3/envs/aqml/lib/python3.7/site-packages/ipykernel_launcher.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+      "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/ipykernel_launcher.py:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
       "  \"\"\"Entry point for launching an IPython kernel.\n"
      ]
     }
    ],
    "source": [
     "vitd_reps = np.array(vitd_reps)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
     "vitd_amons_labels = [t.split(\"/\")[-1].split(\".sdf\")[0] for t in vitd_amons_files]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
     "# np save "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
+   "execution_count": 37,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/puck/anaconda3/envs/rdkit/lib/python3.7/site-packages/numpy/core/_asarray.py:136: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
+      "  return array(a, dtype, copy=False, order=order, subok=True)\n"
+     ]
+    }
+   ],
    "source": [
     "np.savez(\"amons_aCM_data.npz\", \n",
     "         vitd_amons_labels=vitd_amons_labels,\n",
     "         vitc_amons_labels=vitc_amons_labels,\n",
     "         qm9_amons_labels=qm9_amons_labels,\n",
     "         vitd_amons_ncharges=vitd_ncharges,\n",
     "         vitc_amons_ncharges=vitc_ncharges,\n",
     "         qm9_amons_ncharges=qm9_ncharges,\n",
     "         vitd_amons_reps=vitd_reps,\n",
     "         vitc_amons_reps=vitc_reps,\n",
     "         qm9_amons_reps=qm9_reps)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 101,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "(1, 406)"
       ]
      },
-     "execution_count": 101,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "vitd_reps[0].shape"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
     "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.9"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/amons_aCM_data.npz b/amons_aCM_data.npz
index 4077659..293d92f 100644
Binary files a/amons_aCM_data.npz and b/amons_aCM_data.npz differ
diff --git a/onepass.py b/onepass.py
index 7684b55..4c83d5f 100644
--- a/onepass.py
+++ b/onepass.py
@@ -1,163 +1,163 @@
 import numpy as np 
 import timeit
 import gurobipy as gp
 from gurobipy import GRB
 
 def addvariables(Z):
     upperbounds=[]
     I=[]
     J=[]
     for M in database_indices:
         CM=data[targetname+"_amons_ncharges"][M]
         m=len(CM)
         I=I+[(i,j,M,G) for G in range(maxduplicates) for i in range(m) for j in range(n) if CM[i] == CT[j]] # if condition excludes j; i always takes all m values
         J=J+[(M,G) for G in range(maxduplicates)]
 
     x=Z.addVars(I, vtype=GRB.BINARY)
     y=Z.addVars(J, vtype=GRB.BINARY)
     print("Variables added.")
     return x,I,y
 
 def addconstraints(Z,x,I,y):
     # bijection into [n]
     Z.addConstrs(x.sum('*',j,'*', '*') == 1 for j in range(n))
     
     for M in database_indices:
         CM=data[targetname+"_amons_ncharges"][M]
         m=len(CM)
         # each i of each group is used at most once
         Z.addConstrs(x.sum(i,'*',M,G) <= 1 for i in range(m) for G in range(maxduplicates))
         # y[M,G] = OR gate of the x[i,j,M,G] for each (M,G) 
         Z.addConstrs(y[M,G] >= x[v] for G in range(maxduplicates) for v in I if v[2:]==(M,G))
         Z.addConstrs(y[M,G] <= x.sum('*','*',M,G) for G in range(maxduplicates))
     print("Constraints added.")
     return 0
 
 # objective value should then be square rooted in the end (doesn't change optimality)
 def setobjective(Z,x,I,y):
     print("Constructing objective function... ")
     key=0
     if(representation==0): # Coulomb case
         expr=gp.QuadExpr()
         T=targetdata['target_CMs'][target_index]
         for k in range(n):
             for l in range(n):
                 expr += T[k,l]**2
         for M in database_indices:
             key=key+1
             Mol=data[targetname+"_amons_CMs"][M]
             m=len(Mol)
             for G in range(maxduplicates):
                 for (i,k) in [v[:2] for v in I if v[2:]==(M,G)]:
                     for (j,l) in [v[:2] for v in I if v[2:]==(M,G)]:
                         expr += (Mol[i,j]**2 - 2*T[k,l]*Mol[i,j])*x[i,k,M,G]*x[j,l,M,G]
                 expr += y[M,G]*m 
             print(key, "  /  ", size_database)
         expr=expr-n
 
     else: #SLATM case
         expr=gp.LinExpr()
         T=targetdata["target_reps"][target_index]
         for M in database_indices:
             key=key+1
             Mol=data[targetname+"_amons_reps"][M]
             m=len(Mol)
             for G in range(maxduplicates):
                 for (i,j) in [v[:2] for v in I if v[2:]==(M,G)]:
                     C=np.linalg.norm(Mol[i]-T[j])**2
                     expr += C*x[i,j,M,G]
                 expr += y[M,G]*m
             print(key, "  /  ", size_database)
     
     Z.setObjective(expr, GRB.MINIMIZE)
     print("Objective function set.")
     return 0
 
 # prints mappings of positions (indices+1) of each molecule to positions inside target
 def print_sols(Z, x, I, y):
     SolCount=Z.SolCount
     print("Target has size", n)
     print("Using representation", repname)
     for solnb in range(SolCount):
         print()
         print("--------------------------------")
         Z.setParam("SolutionNumber",solnb)
         print("Solution number", solnb+1, ", objective value with size penalty", (Z.PoolObjVal))
         
         for M in database_indices:
             groups=[]
             for G in range(maxduplicates):
                 if np.rint(y[M,G].Xn) == 1:
                     groups.append(G)
 
             amount_picked=len(groups)
             for k in range(amount_picked):
                 G=groups[k]
                 m=len(data[targetname+"_amons_ncharges"][M])
                 label=data[targetname+"_amons_labels"][M]
                 if k==0:
                     print("Molecule", label, "has been picked", amount_picked, "time(s) ( size", m, ", used", sum([x[v].Xn for v in I if v[2]==M]), ")")
                 print(k+1, end=": ")
                 for (i,j) in [v[:2] for v in I if v[2:]==(M,G) and np.rint(x[v].Xn)==1]:
                     print(i+1, "->", j+1, end=", ")
                 print()
 
 def main():
     # construction of the model
     start=timeit.default_timer() 
     Z = gp.Model()
     Z.setParam('OutputFlag',1)
     x,I,y=addvariables(Z)
     addconstraints(Z,x,I,y)
     setobjective(Z,x,I,y)
     stop=timeit.default_timer()
     print("Model setup: ", stop-start, "s")
     
     # model parameters
     # PoolSearchMode 1/2 forces to fill the solution pool. 2 finds the best solutions.
     # Set to 1 because of duplicating solutions which differ by 1e-9 and are seen as different.
     Z.setParam("PoolSearchMode", 1) 
     # these prevent non integral values although some solutions are still duplicating -- to fix?
     Z.setParam("IntFeasTol", 1e-9)
     Z.setParam("IntegralityFocus", 1)
 
     Z.setParam("TimeLimit", timelimit) 
     Z.setParam("PoolSolutions", numbersolutions)
     
     # optimization
     print("------------")
     print("Optimization")
     print("------------")
     Z.optimize()
     print("------------")
     print()
     print("Optimization runtime: ", Z.RunTime, "s")
     
     if(Z.status == 3):
         print("Model was proven to be infeasible.")
         return 1
     
     print_sols(Z,x,I,y)
     return 0
 
 # modifiable global settings
-target_index=2 # 0, 1, or 2 for qm9, vitc, or vitd.
+target_index=1 # 0, 1, or 2 for qm9, vitc, or vitd.
 maxduplicates=1 # number of possible copies of each molecule of the database
 timelimit=120 # in seconds (not counting setup)
-numbersolutions=50 # size of solution pool
+numbersolutions=10 # size of solution pool
 representation=1 # 0 for Coulomb Matrix (CM), 1 for SLATM, 2 for aCM, 3 for SOAP, 4 for FCHL
 
 # global constants
 repname=["CM", "SLATM", "aCM", "SOAP", "FCHL"][representation]
 dataname="amons_"+repname+"_data.npz"
 data=np.load(dataname, allow_pickle=True)
 targetdataname="target_"+repname+"_data.npz"
 targetdata=np.load(targetdataname, allow_pickle=True)
 
 CT=targetdata['target_ncharges'][target_index]
 n=len(CT)
 targetname=["qm9", "vitc", "vitd"][target_index]
 size_database=len(data[targetname+"_amons_labels"])
 database_indices=range(size_database) 
 
 main()
diff --git a/target_aCM_data.npz b/target_aCM_data.npz
index 90a1dc4..53335e1 100644
Binary files a/target_aCM_data.npz and b/target_aCM_data.npz differ