In [37]:
import qml 

In [38]:
from glob import glob
import numpy as np

In [39]:
from rdkit import Chem

In [40]:
def read_sdf(sdf):
    with open(sdf, "r") as f:
        txt = f.read().rstrip()
    return txt

In [41]:
def get_ncharges_coords(sdf):
    mol = Chem.MolFromMolBlock(sdf)
   #mol = Chem.AddHs(mol)
    # rdkit molobj
    ncharges = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    conf = mol.GetConformer()
    coords = np.asarray(conf.GetPositions())
    return ncharges, coords

In [42]:
target_sdfs = sorted(glob("targets/*.sdf"))
target_sdfs

['targets/qm9.sdf', 'targets/vitc.sdf', 'targets/vitd.sdf']

In [43]:
qm9_amons_files = sorted(glob("amons-qm9/*.sdf"))

In [44]:
qm9_amons_sdfs = [read_sdf(x) for x in qm9_amons_files]

In [45]:
conf_data = [get_ncharges_coords(x) for x in qm9_amons_sdfs]

In [46]:
ncharges_list, coords_list = zip(*conf_data)

In [47]:
qm9_ncharges = ncharges_list

In [48]:
mbtypes = qml.representations.get_slatm_mbtypes(ncharges_list)

In [49]:
qm9_reps = [np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], mbtypes,
                                              local=True)) for i in 
       range(len(ncharges_list))]

In [50]:
qm9_reps = np.array(qm9_reps)

  """Entry point for launching an IPython kernel.


In [51]:
qm9_reps[0].shape

(1, 3121)

In [52]:
qm9_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in qm9_amons_files]

In [53]:
vitc_amons_files = sorted(glob("amons-vitc/*.sdf"))

In [54]:
vitc_amons_sdfs = [read_sdf(x) for x in vitc_amons_files]

In [55]:
conf_data = [get_ncharges_coords(x) for x in vitc_amons_sdfs]

In [56]:
ncharges_list, coords_list = zip(*conf_data)

In [57]:
vitc_ncharges = ncharges_list

In [58]:
mbtypes = qml.representations.get_slatm_mbtypes(ncharges_list)

In [59]:
vitc_reps = [np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], 
                                                         mbtypes, local=True)) for i in 
            range(len(ncharges_list))]

In [60]:
vitc_reps = np.array(vitc_reps)

  """Entry point for launching an IPython kernel.


In [61]:
vitc_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in vitc_amons_files]

In [62]:
vitd_amons_files = sorted(glob("amons-vitd/*.sdf"))

In [63]:
vitd_amons_sdfs = [read_sdf(x) for x in vitd_amons_files]

In [64]:
conf_data = [get_ncharges_coords(x) for x in vitd_amons_sdfs]

In [65]:
ncharges_list, coords_list = zip(*conf_data)

In [66]:
vitd_ncharges = ncharges_list

In [67]:
mbtypes = qml.representations.get_slatm_mbtypes(ncharges_list)

In [68]:
vitd_reps = [np.array(qml.representations.generate_slatm(coords_list[i], ncharges_list[i], 
                                                         mbtypes, local=True)) for i 
            in range(len(ncharges_list))]

In [69]:
vitd_reps = np.array(vitd_reps)

  """Entry point for launching an IPython kernel.


In [70]:
vitd_amons_labels = [t.split("/")[-1].split(".sdf")[0] for t in vitd_amons_files]

In [35]:
# np save 

In [71]:
np.savez("amons_SLATM_data.npz", 
         vitd_amons_labels=vitd_amons_labels,
         vitc_amons_labels=vitc_amons_labels,
         qm9_amons_labels=qm9_amons_labels,
         vitd_amons_ncharges=vitd_ncharges,
         vitc_amons_ncharges=vitc_ncharges,
         qm9_amons_ncharges=qm9_ncharges,
         vitd_amons_reps=vitd_reps,
         vitc_amons_reps=vitc_reps,
         qm9_amons_reps=qm9_reps)

In [72]:
vitd_reps[0].shape

(1, 857)