diff --git a/src/CNEURONET/NN_formalism.tex b/src/CNEURONET/NN_formalism.tex
index 2f9d1c857..2115e045a 100644
--- a/src/CNEURONET/NN_formalism.tex
+++ b/src/CNEURONET/NN_formalism.tex
@@ -1,494 +1,494 @@
 \documentclass[a4paper,10pt,twoside,twocolumn]{revtex4}
 \usepackage[a4paper]{geometry}
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage{amsmath}
 \usepackage{amssymb}
 \usepackage{xspace}
 %\usepackage[pdftex,bookmarks]{hyperref}%pdf support
 \usepackage{anysize}
 \marginsize{.9in}{.9in}{.8in}{.6in} %left right top bottom
 
 %\usepackage{natbib}
 %\bibliographystyle{plainnat}
 \usepackage{xstring}
 
 \usepackage{minted}
 \usepackage{algpseudocode}
 \usepackage{algorithm}
 \newcommand{\rc}{\ensuremath{r_\mathrm{c}}\xspace}
 \newcommand{\fs}[1][]{\ensuremath{f_{\mathrm{s},m_{#1}}}\xspace}
 \newcommand{\fa}{\ensuremath{f_{\mathrm{a}}}\xspace}
 \newcommand{\fc}{\ensuremath{f_{\mathrm{c}}}\xspace}
 \newcommand{\rij}{\ensuremath{r_{ij}}\xspace}
 \newcommand{\rik}{\ensuremath{r_{ik}}\xspace}
 \newcommand{\rkj}{\ensuremath{r_{kj}}\xspace}
 \newcommand{\rkl}{\ensuremath{r_{kl}}\xspace}
 \renewcommand{\r}[1]{\ensuremath{r_{#1}}\xspace}
 \newcommand{\ofrij}{\ensuremath{\left(\left\{\rij\right\}\right)\xspace}}
 \newcommand{\ofrkj}{\ensuremath{\left(\left\{\rkj\right\}\right)\xspace}}
 \newcommand{\ofr}[1]{\ensuremath{\left(\left\{\r{#1}\right\}\right)\xspace}}
 \newcommand{\ei}[1][i]{\ensuremath{E_{#1}^S}}
 \newcommand{\abs}[1]{\left|\left|#1\right|\right|}
 \renewcommand{\vec}[1]{\boldsymbol{#1}}
 \newcommand{\del}{\partial}
 \newcommand{\eg}{\textit{e.g.,}\xspace}
 
 \newcommand{\decrement}[1]{
 \IfInteger{#1}{\number\numexpr#1-1}{#1-1}
 }
 
 \newcommand{\xln}[2]{\ensuremath{#2_{m_{#1}m_{\decrement{#1}}}}}
 \newcommand{\yln}[2][i]{\xln{#2}{y^{#1}}}
 \newcommand{\wln}[1]{\xln{#1}{w}}
 \newcommand{\xlnp}[2]{\ensuremath{#2_{m_{#1+1}m_{#1}}}}
 \newcommand{\ylnp}[2][i]{\xlnp{#2}{y^{#1}}}
 \newcommand{\wlnp}[1]{\xlnp{#1}{w}}
 \newcommand{\nsum}[2]{\ensuremath{ \sum_{m_{#1}=1}^{N_#1}\left[#2\right]}}
 \newcommand{\klsum}[3][i]{\ensuremath{\sum_{#2\in S_{#1}}\left[#3\right]}}
 \newcommand{\fn}[2][i]{\xln{#2}{\varphi^{#1}}}%\ensuremath{\varphi_{#2}^{#1}}}
 \newcommand{\fnp}[2][i]{\xlnp{#2}{\varphi^{#1}}}%\ensuremath{\varphi_{#2}^{#1}}}
 \newcommand{\fnpr}[2][i]{\xln{#2}{{\varphi'}^{#1}}}%\ensuremath{{\varphi'}_{#2}^{#1}}}
 \newcommand{\ntwo}{\ensuremath{\frac{\left<N_\text{n}\right>}{2}}}
 
 \begin{document}
 \title{Energy, force and stress for NN}
 
 %\author{Till Junge}
 %\affiliation{LAMMM, EPFL}
 \maketitle
 
 \section{NN-Potential Description}
 \label{sec:energy}
 
 \subsection{With One Hidden Layer}
 \label{sec:1layer}
 
 
 Given a cut-off radius $\rc$, the energy $\ei$ for atom $i$ considering its neighbourhood structure $S$ is given by
 \begin{equation}
   \label{eq:energy1layer}
   \ei\ofrij = \nsum{1}{\wln{1}\,\yln{1}\ofrij},
 \end{equation}
 where the neighbourhood $S$ comprises all atoms $j$ within a distance $\rc$ of atom $i$
 \begin{equation}
   \label{eq:S}
   S = \left\{j|\rij=\abs{\vec r_i - \vec r_j}<\rc\right\},
 \end{equation}
 $w^1_{m_1m_0}$ denotes the weights active between the output and the innermost hidden layer and $N_1$ is the number of nodes in the innermost hidden layer. In the case of one hidden layer, the next layer is given by
 \begin{equation}
   \label{eq:hidden-layer}
   \yln{1}\ofrij = \fa\underbrace{\left(\nsum{2}{\wln{2}\,G_{im_2}\ofrij}\right)}_{\fn{2}\ofrij},
 \end{equation}
 where $\fa$ is a non-linear activation function and $G_{im_2}$ in the $m_2$-th symmetry function relating to atom $i$ given by
 \begin{equation}
   \label{eq:sym_fun}
   G_{im_2}\ofrij = \sum_{j \in S}\fs[2](\rij)\, \fc(\rij),
 \end{equation}
 where $\fs[2]$ and $\fc$ are a symmetry function and a smooth cut-off function, respectively.
 
 \subsection{With more Hidden Layers}
 \label{sec:more_layers}
 
 Structurally, more layers simply expand (\ref{eq:hidden-layer}), which is, for $N_l$ layers
 \begin{align}
   \label{eq:hidden_layers}
   &\yln{n}\ofrij =\notag \\ &\quad\fa\underbrace{\left(\sum_{m_{n+1}=1}^{N_{n+1}}\wlnp{n}\,\ylnp{n}\ofrij\right)}_{\fnp{n}\ofrij},\\
   &y^i_{m_{N_l-1}m_{N_l-2}}\ofrij =\notag\\
 &\quad\fa\left(\sum_{m_{N_l}=1}^{N_{N_l}}\wln{N_l}\,G_{im_{N_l}}\ofrij\right),  \label{eq:hidden_layersb}
 \end{align}
 with $G_{im_{N_l}}\ofrij$ defined analogously to \eqref{eq:sym_fun}.
 
 \subsection{Symmetry}
 \label{sec:symmetry}
 A look at the summands $G_{im,j}(\rij)$ in \eqref{eq:sym_fun},
 \begin{align}
   \label{eq:sym_fun_summands}
   G_{im}\ofrij &= \sum_{j \in S}\fs(\rij)\, \fc(\rij) \notag\\
                &= \sum_{j \in S}G_{im,j}(\rij)
 \end{align}
 reveals that -- since they depend only on the distance $\rij$ between atoms $i$ and $j$ -- they are symmetric, i.e.,
 \begin{equation}
   \label{eq:symmetry}
   G_{im,j} =   G_{jm,i}.
 \end{equation}
-This means than the potential energy can be computed using only half the neighbour list; while traversing the half neighbour list for each $m$ (which for on layer is $m_2$), the computed summands $G_{im,j}$ have to be accumulated for both atom $i$ and $j$.
+This means than the potential energy can be computed using only half the neighbour list; while traversing the half neighbour list for each $m$ (which for one layer is $m_2$), the computed summands $G_{im,j}$ have to be accumulated for both atom $i$ and $j$.
 
 
 
 \section{Interatomic Force Calculation}
 \label{sec:interatomic_force}
 With the system's total potential energy given by
 \begin{equation}
   \label{eq:e_tot}
   E_\mathrm{pot}\ofrij = \sum_{i=1}^{N_\mathrm{at}}\ei\ofrij,
 \end{equation}
 where $N_\mathrm{at}$ is the number of atoms, the total force $\vec f_k$ on atom $k$ can be decomposed into a sum of contributions $\vec f_{kl}$ from all its neighbours \citep[(5.107), p. 293 ]{Tadmor2012}:
 \begin{align}
   \label{eq:force_decomp}
-  \vec f_k &= \sum_{k\in S}\vec f_{kl}, \quad\mbox{with}\\
+  \vec f_k &= \sum_{l\in S}\vec f_{kl}, \quad\mbox{with}\\
   \vec f_{kl} &= \frac{\del E_\mathrm{pot}\ofrij}{\del\rkl}\, \vec n_{kl}\notag\\
 &= \vec n_{kl}\, \sum_{i=1}^{N_\mathrm{at}}\frac{\del\ei\ofrij}{\del\rkl}\notag \\
 &= \vec n_{kl}\,\left[ \frac{\del\ei[k]\ofrkj}{\del\rkl} + \klsum[k]{i}{\frac{\del\ei\ofrij}{\del\rkl}}\right],\label{eq:force_decomp_fin}
 \end{align}
 where $\vec n_{kl}$ is the unit vector pointing from atom $k$ to atom $l$ and $S_a$ in the neighbourhood of atom $a$.
 
 \paragraph*{Note: } The neighbour indices have been switched to $k$ and $l$ in order to distinguish them cleanly from the mute indices $i$ and $j$ inside the sums.
 
 \subsection{Implementation Implications}
 \label{sec:impl_ipml}
 It follows from \eqref{eq:force_decomp_fin}, that the computation of the atomic force for any atom requires not only knowledge of the positions of it's neighbouring atoms, but also (the gradient) of their energy. In the case of ghost atoms (periodic image atoms in the case of periodic boundary conditions or atoms from a neighbouring domain in the case of parallel computation) this will require communication.
 
 \subsection{For One Layer}
 \label{sec:force_1_layer}
 
 %\begin{widetext}
 %  \begin{align}
 %    \label{eq:f_ij}
 %    \vec f_{ik} &= \frac{\del\ei\ofrij}{\del\rik}\, \vec n_{ik},\notag\\
 %                & =\vec n_{ik} \frac{\del}{\del\rik}\left[\nsum{1}{\wln{1}\,\yln{1}\ofrij}\right],\notag\\
 %                & =\vec n_{ik} \nsum{1}{\wln{1}\frac{\del\yln{1}\ofrij}{\del\rik}}
 %                  =\vec n_{ik} \nsum{1}{\wln{1}\frac{\del}{\del\rik}\left[\fa\left(\varphi_{a2}\ofrij\right)\right]},\notag\\
 %                & =\vec n_{ik} \nsum{1}{\wln{1}\,\fa'\left(\varphi_{a2}\ofrij\right)\,\varphi'_{a2}\ofrij\frac{\del\rij}{\del\rik}},\notag\\
 %                & =\vec n_{ik} \nsum{1}{
 %                  \wln{1}\,\fa'\left(
 %                    \nsum{2}{\wln{2}\,G_{im_2}\ofrij}
 %                  \right)\, \nsum{2}{\wln{2}\,\frac{\del G_{im_2}\ofrij}{\del\rik}}
 %                  }, \notag\\
 %                & =\vec n_{ik} \nsum{1}{
 %                  \wln{1}\,\fa'\left(
 %                    \nsum{2}{\wln{2}\,\sum_{j \in S}\left[\fs[2]^{ij}\, \fc^{ij}\right]}
 %                  \right)\, \nsum{2}{\wln{2}
 %                  \left[
 %                    \fs[2]^{'ik}\, \fc^{ik} + \fs[2]^{ik}\, \fc^{'ik}
 %                  \right]
 %                  }
 %                  },
 %  \end{align}
 %\end{widetext}
 The second term in \eqref{eq:force_decomp_fin} can be simplified as follows
 \begin{align}
   \label{eq:f_kl_b}
   &f_l\ofrij= \klsum[k]{i}{\frac{\del\ei\ofrij}{\del\rkl}} \notag \\
   &= \klsum[k]{i}{\frac{\del}{\del\rkl}\left(\nsum{1}{\wln{1}\,\yln{1}\ofrij}\right)}\notag\\
   &= \klsum[k]{i}{\nsum{1}{\wln{1}\frac{\del\yln{1}\ofrij}{\del\rkl}}}\notag \\
   & = \klsum[k]{i}{
     \nsum{1}{\wln{1}\frac{\del}{\del\rkl}\left[\fa\left(\fn{2}\ofrij\right)\right]}},
 \end{align}
 with the derivative of $f_a$ given by
 \begin{equation}
   \label{eq:dfa}
   \begin{split}
 & \frac{\del}{\del\rkl}\left[\fa\left(\fn{2}\ofrij\right)\right]=\\ & \fa'\left(\fn{2}\ofrij\right)\,{\fnpr{2}\ofrij\frac{\del\rij}{\del\rkl}}.
   \end{split}
 \end{equation}
 Since $\r{ab} = \r{ba}\quad \forall a,b$ and \eqref{eq:force_decomp_fin} requires that $i\neq k$ the partial derivative in \eqref{eq:dfa} becomes
 \begin{equation}
   \label{eq:drijdrk}
   \frac{\del\rij}{\del\rkl} = \delta_{il}\delta_{kj}
 \end{equation}
 and simplifies $f_l\ofrij$ to
 \begin{equation}
   \label{eq:f_kl_c}
   \begin{split}
     &f_l\ofrij = f_l\ofr{lj}\\
     &\nsum{1}{\wln{1}\,\fa'\left(\fn[l]{2}\ofr{lj}\right)\,\fnpr[l]{2}(r_{kl})},
   \end{split}
 \end{equation}
 It is easy to bring the first term in \eqref{eq:force_decomp_fin} into the same form
 \begin{align}
   \label{eq:f_kl_a}
  & f_k\ofr{kj}= \frac{\del\ei\ofr{kj}}{\del\rkl}= \notag\\& \nsum{1}{\wln{1}\,\fa'\left(\fn[k]{2}\ofr{kj}\right)\,\fnpr[k]{2}(r_{kl})}.
 \end{align}
 
 The total force contribution from atom $l$ to atom $k$ then becomes
 \begin{widetext}
   \begin{align}
     \vec f_{kl} &= \vec n_{kl}\,\left[ f_k\ofr{kj} + f_l\ofr{lj}\right]\notag\\
                 &=\vec n_{kl}\,\left[
                   \nsum{1}{\wln{1}\,\fa'\left(\fn[k]{2}\ofr{kj}\right)\,\fnpr[k]{2}(r_{kl})} +
                   \nsum{1}{\wln{1}\,\fa'\left(\fn[l]{2}\ofr{lj}\right)\,\fnpr[l]{2}(r_{kl})}
                   \right]\notag\\
                 &=\vec n_{kl}\,
                     \nsum{1}{\wln{1}\left\{
                       \fa'\left(\fn[k]{2}\ofr{kj}\right)\,\fnpr[k]{2}(r_{kl})+
                       \fa'\left(\fn[l]{2}\ofr{lj}\right)\,\fnpr[l]{2}(r_{kl})
                   \right\}}\label{eq:phi-reuse}\\
                 &= \vec n_{kl}\,\sum_{m_1=1}^{N_1}\Bigg[\wln{1}
                   \begin{aligned}[t]
                     \Bigg\{&\fa'\left(
                     \nsum{2}{\wln{2}\,G_{km_2}\ofr{kj}}
                   \right)\, \nsum{2}{\wln{2}\,\frac{\del G_{km_2}\ofr{kj}}{\del\rkl}} + \notag\\
                 &\fa'\left(
                   \nsum{2}{\wln{2}\,G_{lm_2}\ofr{lj}}
                 \right)\, \nsum{2}{\wln{2}\,\frac{\del G_{lm_2}\ofr{lj}}{\del\rkl}}
                   \Bigg\}\Bigg]
                 \end{aligned}\notag
   \end{align}
 \end{widetext}
 
 \begin{widetext}
   \begin{align}
                 &= \vec n_{kl}\,\sum_{m_1=1}^{N_1}\Bigg[\wln{1}
                   \begin{aligned}[t]
                     \Bigg\{&\fa'\left(
                     \nsum{2}{\wln{2}\,\klsum{j}{\fs[2]^{kj}\, \fc^{kj}}}
                   \right)\, \nsum{2}{\wln{2}\,
                     \left[
                       \fs[2]^{'kl}\, \fc^{kl} + \fs[2]^{kl}\, \fc^{'kl}
                     \right]}
                   + \notag\\
                 &\fa'\left(
                     \nsum{2}{\wln{2}\,\klsum{j}{\fs[2]^{lj}\, \fc^{lj}}}
                 \right)\, \nsum{2}{\wln{2}\,
                     \left[
                       \fs[2]^{'lk}\, \fc^{lk} + \fs[2]^{lk}\, \fc^{'lk}
                     \right]}
                   \Bigg\}\Bigg]
                 \end{aligned}\notag\\
 %  \end{align}
 %\end{widetext}
 %
 %\begin{widetext}
 %  \begin{align}
     \label{eq:f_kl}
 &\mbox{note: } f_*^{ab} = f_*^{ba} \mbox{ and } {f'}_*^{ab} = {f'}_*^{ba} \mbox{ for both $\fs$ and $\fc$}\quad \Rightarrow\notag\\
     \vec f_{kl} &=\vec n_{kl}\,\sum_{m_1=1}^{N_1}\Bigg[
                   \wln{1}\left\{
                     \fa'\left(
                       \nsum{2}{\wln{2}\,\klsum[k]{j}{\fs[2]^{kj}\, \fc^{kj}}}
                     \right) +
                     \fa'\left(
                       \nsum{2}{\wln{2}\,\klsum[l]{j}{\fs[2]^{klj}\, \fc^{lj}}}
                     \right)
                   \right\}
                   \notag\\
     &\phantom{= \vec n_{k}\,\sum_{m_1=1}\Bigg[}\left.\, \nsum{2}{\wln{2}\,
                     \left[
                       \fs[2]^{'lk}\, \fc^{lk} + \fs[2]^{lk}\, \fc^{'lk}}
                     \right]
       \right]
   \end{align}
 \end{widetext}
 
 
 
 where $f_*^{ij}$ is short for $f_*\ofrij$ and $(*)'$ is the derivative of a scalar function with respect to its argument. Since $\vec f_{ij} = -\vec f_{ji}$, \eqref{eq:f_kl} need only be evaluated over half the neighbour list.
 
 \paragraph*{Note 1:} The cutoff function $\fc\ofrij$ does not depend on any summation parameters, so it can be precomputed once per time step for each neighbourhood relationship and -- for a system with $N_\text{a}$ atoms with $\left<N_\text{n}\right>$ neighbours on average each -- stored at a storage cost of $N_\text{a} \times \frac{\left<N_\text{n}\right>}{2}$ (factor $\frac{1}{2}$ because only half the neighbour list is required).
 
 \paragraph*{Note 2:} The symmetry function $\fs[2]\ofrij$ is parametrised by the input node index $m_2$, which means that precomputing it once per time step comes at a cost of $N_\text{a} \times \frac{\left<N_\text{n}\right>}{2}\times N_{m_2}$, where $N_{m_2}$ is the number of input nodes ($N_{m_2} = 120$ in the case of the AlSiMg potential).
 
 
 
 \subsection{With more Hidden Layers}
 \label{sec:with-more-hidden}
 Just as for the energy calculation, the force calculation can be expressed recursively for more than one hidden layer. For this we complement the recursive definition of energy in \eqref{eq:hidden_layers} and \eqref{eq:hidden_layersb} with their derivatives
 \begin{align}
   \label{eq:recurs_derivs}
   &\frac{\del\yln{n}\ofrij}{\del\rkl}=\notag\\
   &\quad\fa'\left(\fnp{n}
     \right)\,
     \sum_{m_{n+1}=1}^{N_{n+1}}\wlnp{n}\,\frac{\del\ylnp{n}\ofrij}{\del\r{kl}},
 \end{align}
 
 \begin{align}
 &\frac{\del y^i_{m_{N_l-1}m_{N_l-2}}\ofrij}{\del\rkl} =\notag\\
 &\quad\fa'\left(\varphi^i_{m_{N_l}m_{N_l-1}}\right)\,\sum_{m_{N_l}=1}^{N_{N_l}}\wln{N_l}\,\frac{\del G_{im_{N_l}}}{\del\r{kl}}\ofrij,  \label{eq:recurs_hidden_layersb}
 \end{align}
 which is trivial to do (and how it is done) in our Lammps implementation, see \ref{sec:impl}. The recipe becomes simply
 \begin{equation}
   \begin{split}
     \vec f_{kl} =\vec n_{kl}\,\Bigg[
                   &\nsum{1}{\wln{1}\,\frac{\del\yln[k]{1}}{\del\r{kl}}} +\\
                 &\nsum{1}{\wln{1}\,\frac{\del\yln[l]{1}}{\del\r{kl}}}
                   \Bigg],
   \end{split}
 \end{equation}
 with the recursive application of \eqref{eq:recurs_derivs} and \eqref{eq:recurs_hidden_layersb}.
 
 \section{Lammps Implementation}
 \label{sec:impl}
 
 The guiding principles for our implementation are
 \begin{enumerate}
 \item Don't repeat yourself: In order to avoid reevaluating the same expression, pre-compute and store every value that is used more than once.
 \item Use only half the neighbour list: Compute inter-atomic forces and accumulate them on both interaction partners, then rely on Lammps to transfer forces correctly from ghost atoms to the originals.
 \item Don't waste memory: The easiest way to satisfy the first principle above is to evaluate every expression once, store the results and access them when needed. Due to the insanely large number of terms (3630 per atom in the case of AlSiMg)
   \begin{equation*}
     N_\mathrm{terms} = \sum_{n = 1}^{N_l -1}N_nN_{n+1}
   \end{equation*}
 this strategy is prohibitively expensive. Instead, store only data that will allow the potential to be evaluated quicker.
 \end{enumerate}
 
 \subsection{Data Structures}
 \label{sec:data_structures}
 
 Almost all relevant quantities that appear in the evaluation of forces and energies are either relative to an atom $i$ (\eg $\fn{2}, \ei, G_{im}$) or an atom pair $ij$ (\eg $\fc^{ij},  \rij$) and are evaluated while working through the neighbour list structures. Our implementation provides a data structure called \texttt{NeighContainer} that provides an iterator by atom (\texttt{iterator}) giving consecutive access to each local non-ghost atom and its position, force, energy and virial data. The atom iterator also provides a neighbourhood iterator (\texttt{jterator}, to mimic the $ij$ of an atom pair) giving access to atom $i$'s neighbours \footnote{Considering only the half neighbourhood list.} and their position, force, energy and virial data.
 
 The \texttt{NeighContainer} allows also for the creation of associated atomic-data containers (\texttt{i\_container}) and pair-data containers (\texttt{ij\_container}). These containers can store scalar or array data relative to an atom or a pair of atoms and are accessed through the same iterators as provided by the \texttt{NeighContainer}. This abstraction allows for a straight-forward and hopefully fail-safe translation of the expressions of Sections \ref{sec:energy} and \ref{sec:interatomic_force} as shown in the almost real example in Listing \ref{lst:neighcont}.
 
 \begin{listing}
 \caption{Use of \texttt{NeighContainer} and its iterators}
 \label{lst:neighcont}
 \begin{minted}[bgcolor=olive!10]{cpp}
 /* loop through all atoms at_i in
    NeighContainer neighs            */
 for (auto & at_i:neighs) {
   // store some data relative to at_i
   G_im[at_i] = comp_G_im(at_i);
   /* loop through neighbours ij of
      atom at_i                      */
   for (auto & ij:at_i) {
     /* compute interatomic distances
        r_ij based on easy access to
        positions in neighs (get_x)
        of both atoms                */
     r_ij[ij] = comp_r_ij(at_i.get_x(),
                          ij.get_x());
     /* use atomic or pair data through
        the same iterators as when
        accessing data within neighs */
     f_ij[ij] = comp_f_ij(r_ij[ij],
                          G_im[at_i]);
   }
 }
   \end{minted}
 \end{listing}
 
 \begin{table}[htb]
   \centering
 \begin{tabular}{l|l|l|l|l}
   name              & type   & size            & content                                & update freq\\\hline\hline
   \texttt{r\_ij   } & pair   & \ntwo           &  $\rij$                                &  1          \\
   \texttt{fc\_ij  } & pair   & \ntwo           &  $\fc(\rij)$                           &  1          \\
   \texttt{dfc\_ij } & pair   & \ntwo           &  $\fc'(\rij)$                          &  1          \\
   %\texttt{fsn\_ij } & pair   & \ntwo           &  $\fs(\rij)$                           &  $N_{N_l}$          \\
   %\texttt{dfsn\_ij} & pair   & \ntwo           &  $\fs'(\rij)$                          &  $N_{N_l}$          \\
   \texttt{G\_im   } & atomic & $N_{N_l}$        &  $G_{im}\ofrij$                        &  $N_{N_l}$ \\
   \texttt{DG\_imj } & pair   & $\ntwo\,N_{N_l}$ &  $\frac{\del}{\del\rik}G_{im}\ofr{ij}$ &  $N_{N_l}$ \\
   \texttt{phi\_im } & atomic & $N_l -1$        &  $\fn{a}$                              &  depends
 \end{tabular}
 \caption{Container for reusable data. ``Size'' refers to the number of scalar values stored per atom and ``update frequency'' is the number of updates done to the container per neighbouring atom pair per time step.}
 \label{tab:data}
 \end{table}
 The containers used to store reusable data are listed in Table \ref{tab:data}.
 The first three containers contain the interatomic distances $\rij$ as well as the tabled evaluations of the cutoff function $\fc(\rij)$ and the corresponding derivatives $\fc'(\rij)$. These values depend only on the geometry and are evaluated once per time step and reused heavily for the next two containers, in the repeated evaluation of the symmetry function $G_{im}\ofrij$ and its directional gradient $\frac{\del}{\del\rik}G_{im}\ofr{ij}$ which both have to be evaluated as many times as there are input nodes to the neural net $N_{N_l}$, see \eqref{eq:hidden_layersb} and \eqref{eq:recurs_hidden_layersb}.
 
  The containers \texttt{G\_im} and \texttt{DG\_imj} are costly to evaluate (1 symmetry function evaluation per atom pair) and the evaluation of the atomic energies and forces is merely a succession of inner products of these values with the given weights $\wln{n}$ and a few evaluations of the activation function $\fa\ofrij$ and its derivative $\fa'\ofrij$.
 
 Note that the evaluated symmetry functions of both atoms $G_{im}\ofr{ik}$, $G_{jm}\ofr{jl}$ need to be known in order to compute the interatomic force between atom $i$ and $j$. This means that the container \texttt{G\_im} needs to be synchronised for ghost atoms across processor and periodic boundaries.
 
 Looking at \eqref{eq:hidden_layers} and \eqref{eq:phi-reuse} shows that argument $\fn{a}$ to the activation functions $\fa(\fn{a})$ and derivatives $\fa(\fn{a})$ reappears unchanged for multiple times (once in the evaluation of atom $i$'s energy plus every time a force between atom $i$ and a neighbour is evaluated, in the case of multiple hidden layers, values of lower level arguments $\fnp{a}$ are reused in every force evaluation as well), so they are stored and reused as well.
 
 \subsection{Evaluation Procedure}
 \label{sec:evaluation-procedure}
 
 The need for communication for the synchronisation of \texttt{G\_im} for ghost atoms naturally leads to a natural split of the procedure into three parts; i) precomputation of symmetry functions and associated derivatives ii) synchronisation of ghost symmetry functions iii) evaluation of forces and energies. The procedures are detailed in Algorithms \ref{alg:i}, \ref{alg:ii}, and \ref{alg:iii}.
 
 \begin{algorithm}[H]
  \caption{Precomputation of Geometric Data}
  \label{alg:i}
  \begin{algorithmic}
    \ForAll{atoms $i$}
    \State going through $i$'s half-neighbourhood:
      \ForAll{neighbours $j$}
        \State \texttt{r\_ij[ij] \ } $\gets \rij$
        \State \texttt{fc\_ij[ij] } $\gets \fc(\rij)$
        \State \texttt{Dfc\_ij[ij]} $\gets \fc'(\rij)$
        \For{$m \in \{1, 2, \cdots, N_{N_l}\}$}
          \State \texttt{G\_im \ [i] } $+\gets\fs^{ij}\,\fc^{ij}$
          \State \texttt{G\_im \ [j] } $+\gets\fs^{ij}\,\fc^{ij}$
          \State \texttt{DG\_imj[ij]} $+\gets\fs'^{ij}\,\fc^{ij} + \fs^{ij}\,\fc'^{ij}$
        \EndFor
      \EndFor
    \EndFor
    \State\Comment Discard \texttt{fc\_ij} and \texttt{Dfc\_ij}
  \end{algorithmic}
 \end{algorithm}
 
 \begin{algorithm}[H]
  \caption{Synchronise Ghost Data}
  \label{alg:ii}
  \begin{algorithmic}
    \ForAll{ghost atoms $i_\text{g}$}
      \For{$m \in \{1, 2, \cdots, N_{N_l}\}$}
        \State pack buffer \texttt{sendbuf}$\gets$ \texttt{G\_im[$i_\text{g}$][$m$]}
      \EndFor
    \EndFor
 
    \State communicate buffers \texttt{recvbuf}$\gets$\texttt{sendbuf}
    \ForAll{atoms $i_\text{h}$ who have ghosts}
      \For{$m \in \{1, 2, \cdots, N_{N_l}\}$}
      \State accumulate contributions from ghosts:
        \State\texttt{G\_im[$i_\text{h}$][$m$]} $+\gets$  \texttt{sendbuf}:
      \EndFor
    \EndFor
 \State\Comment{now all \emph{non-ghost} atoms have correct $G_{im}$}
    \ForAll{atoms $i_\text{h}$ who have ghosts}
      \For{$m \in \{1, 2, \cdots, N_{N_l}\}$}
        \State pack buffer \texttt{sendbuf}$\gets$ \texttt{G\_im[$i_\text{h}$][$m$]}
      \EndFor
    \EndFor
 
    \State communicate buffers \texttt{recvbuf}$\gets$\texttt{sendbuf}:
    \ForAll{ghost atoms $i_\text{g}$}
      \For{$m \in \{1, 2, \cdots, N_{N_l}\}$}
        \State overwrite local value:
        \State\texttt{G\_im[$i_\text{h}$][$m$]} $\gets$  \texttt{sendbuf}
      \EndFor
    \EndFor
 \State\Comment{now \emph{all atoms} have correct $G_{im}$}
  \end{algorithmic}
 \end{algorithm}
 
 \begin{algorithm}[H]
    \caption{Evaluate Potential and Forces}
  \label{alg:iii}
  \begin{algorithmic}
    \ForAll{atoms $i$}
      \State precompute activaton function arguments
      \For {$m \in \{I, 2, \cdots, N_1\}$}
        \State  \texttt{phi\_im[$i$][$m$]} $\gets\fn{2}$
      \EndFor
    \EndFor
    \State \emph{Note}: lower level $\varphi$ cannot be precomputed here
    \ForAll{atoms $i$}
      \State compute energy according to \eqref{eq:energy1layer} using \\\hskip\algorithmicindent\hskip\algorithmicindent precomputed $\varphi$
      \State \texttt{i.eatom} $\gets\ei$
      \State going through $i$'s half-neighbourhood:
      \ForAll{neighbours $j$}
        \State compute unit vector from $i$ to $j$
        \State$\vec n_{ij} \gets (j\mbox{\texttt{.get\_x()}}- i\mbox{\texttt{.get\_x()}})/\mbox{\texttt{r\_ij[$ij$]}}$
        \State compute $\fnpr{2}(\rij)$  using precomputed\\
          \hskip\algorithmicindent\hskip\algorithmicindent\hskip\algorithmicindent 
          \texttt{DG\_imj[$ij$]} and lower level \texttt{phi\_im[$i$]}\\
          \hskip\algorithmicindent\hskip\algorithmicindent\hskip\algorithmicindent 
          and \texttt{phi\_im[$j$]}
        \State sum up interatomic force
        \State $\vec f_{ij} \gets \textstyle\sum^{N_i}\left[\wln{1}\left\{
              \fa'(\fn{2})\fnpr{2} + \right.\right.$\\
          \hskip\algorithmicindent\hskip\algorithmicindent\hskip\algorithmicindent 
          \hskip\algorithmicindent\hskip\algorithmicindent\hskip\algorithmicindent 
          \hskip\algorithmicindent\hskip\algorithmicindent
          $\left.\left.\fa'(\fn[j]{2})\fnpr[j]{2}\right\}\right]$
 
        \State $i\mbox{\texttt{.get\_f()}} \gets i\mbox{\texttt{.get\_f()}} + \vec f_{ij}$
        \State $j\mbox{\texttt{.get\_f()}} \gets j\mbox{\texttt{.get\_f()}} - \vec f_{ij}$
      \EndFor
    \EndFor
  \end{algorithmic}
 \end{algorithm}
 
 \bibliography{./bib}
 
 \end{document}
 
 
 %%% Local Variables:
 %%% mode: latex
 %%% ispell-local-dictionary: "british"
 %%% reftex-default-bibliography: ("bib.bib")
 %%% TeX-master: t
 %%% TeX-command-extra-options: "-shell-escape"
 %%% End: