diff --git a/notebooks/proba_functions.ipynb b/notebooks/proba_functions.ipynb
index bcee49c..68945bb 100644
--- a/notebooks/proba_functions.ipynb
+++ b/notebooks/proba_functions.ipynb
@@ -1,355 +1,361 @@
 {
  "cells": [
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Make distribution tables to calculate probabilities of transfer\n",
     "\n",
     "<div class='alert alert-info'><b>Any application without a proper name would be promptly killed.</b></div>"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 55,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "Current session configs: <tt>{'conf': {'spark.app.name': 'lgptguys_final'}, 'kind': 'pyspark'}</tt><br>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "data": {
       "text/html": [
        "<table>\n",
        "<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr><tr><td>6800</td><td>application_1589299642358_1295</td><td>pyspark</td><td>busy</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1295/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster070.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1295_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6803</td><td>application_1589299642358_1298</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1298/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster067.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1298_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6806</td><td>application_1589299642358_1301</td><td>pyspark</td><td>busy</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1301/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster067.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1301_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6807</td><td>application_1589299642358_1302</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1302/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster066.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1302_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6808</td><td>application_1589299642358_1303</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1303/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster065.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1303_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6809</td><td>application_1589299642358_1305</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1305/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster068.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1305_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6810</td><td>application_1589299642358_1306</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1306/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster068.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1306_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6811</td><td>application_1589299642358_1308</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1308/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster066.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1308_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6815</td><td>application_1589299642358_1312</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1312/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster071.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1312_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6816</td><td>application_1589299642358_1313</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1313/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster072.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1313_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6818</td><td>application_1589299642358_1315</td><td>pyspark</td><td>busy</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1315/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster066.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1315_01_000001/ebouille\">Link</a></td><td></td></tr><tr><td>6819</td><td>application_1589299642358_1316</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1316/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster067.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1316_01_000001/ebouille\">Link</a></td><td></td></tr></table>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     }
    ],
    "source": [
     "%%configure\n",
     "{\"conf\": {\n",
     "    \"spark.app.name\": \"lgptguys_final\"\n",
     "}}"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### Start Spark"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 56,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Starting Spark application\n"
      ]
     },
     {
      "data": {
       "text/html": [
        "<table>\n",
        "<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr><tr><td>6820</td><td>application_1589299642358_1317</td><td>pyspark</td><td>idle</td><td><a target=\"_blank\" href=\"http://iccluster044.iccluster.epfl.ch:8088/proxy/application_1589299642358_1317/\">Link</a></td><td><a target=\"_blank\" href=\"http://iccluster065.iccluster.epfl.ch:8042/node/containerlogs/container_e06_1589299642358_1317_01_000001/ebouille\">Link</a></td><td>✔</td></tr></table>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
        "model_id": "",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
        "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "SparkSession available as 'spark'.\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
        "model_id": "",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
        "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "An error was encountered:\n",
       "unknown magic command '%spark'\n",
       "UnknownMagic: unknown magic command '%spark'\n",
       "\n"
      ]
     }
    ],
    "source": [
     "# Initialization\n",
     "%%spark"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Compute probability of missing a transfer from delays distributions\n",
     "\n",
     "Let's first have a look at a slice of the dictionnary of distribution"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "[('1286.TA.26-32-j19-1.12.H__8591182',\n",
        "  array([   0, 1158,  306,  162,   94,   24,   28,   21,    3,    2,    0,\n",
        "            1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,\n",
        "            0,    0,    0,    0,    0,    0,    0,    0,    0,    0])),\n",
        " ('1286.TA.26-32-j19-1.12.H__8591184',\n",
        "  array([  1, 762, 552, 292, 118,  48,  13,   8,   0,   1,   1,   0,   0,\n",
        "           0,   0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   0,\n",
        "           0,   0,   0,   0,   0,   0])),\n",
        " ('1286.TA.26-32-j19-1.12.H__8591195',\n",
        "  array([   0, 1083,  444,  143,   64,   35,   16,    9,    3,    1,    0,\n",
        "            0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,\n",
        "            0,    0,    0,    0,    0,    0,    0,    0,    0,    0])),\n",
        " ('1286.TA.26-32-j19-1.12.H__8591200',\n",
        "  array([  2, 239, 227, 228, 212, 128,  74,  42,  29,  17,   3,   3,   2,\n",
        "           0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   0,   0,\n",
        "           0,   0,   0,   0,   0,   1])),\n",
        " ('1286.TA.26-32-j19-1.12.H__8591209',\n",
        "  array([   0, 1151,  308,  169,   94,   24,   29,   16,    4,    3,    1,\n",
        "            0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,\n",
        "            0,    0,    0,    0,    0,    0,    0,    0,    0,    0]))]"
       ]
      },
      "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "%local\n",
     "\n",
     "import pickle \n",
     "import gzip\n",
     "from itertools import islice\n",
     "\n",
     "with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n",
     "    d = pickle.load(input_file)\n",
     "\n",
     "# Functon to take a slice from a dictionnary - head equivalent\n",
     "def take(n, iterable):\n",
     "    \"Return first n items of the iterable as a list\"\n",
     "    return list(islice(iterable, n))\n",
     "\n",
     "# display a slice of it\n",
     "take(5, d.items())"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### Poisson cumulative distribution\n",
     "\n",
-    "The Poisson distribution is popular for modeling the number of times an event occurs in an interval of time or space. We modeled a poisson distribution for delays assuming k is the time (as it was done [here](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0126137)).\n",
+    "The Poisson distribution is popular for modeling the number of times an event occurs in an interval of time or space. We modeled a poisson distribution for delays assuming parameter $k$ is the time in minutes (as it was done [here](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0126137), formulas $(4),(5),(6)$).\n",
     "\n",
     "A discrete random variable X is said to have a Poisson distribution with parameter λ > 0, if, for k = 0, 1, 2, ..., the probability mass function of X is given by:\n",
     "\n",
     "$${\\displaystyle \\!f(k;\\lambda )=\\Pr(X=k)={\\frac {\\lambda ^{k}e^{-\\lambda }}{k!}},}$$\n",
     "where\n",
     "\n",
     "e is Euler's number (e = 2.71828...)\n",
     "k! is the factorial of k.\n",
-    "The positive real number λ is equal to the expected value of X __and__ also to its variance.\n",
+    "The positive real number λ is equal to the expected value of X __and__ to its variance.\n",
     "\n",
     "$${\\displaystyle \\lambda =\\operatorname {E} (X)=\\operatorname {Var} (X)}$$\n",
     "\n",
+    "We can approximate E[𝑋]∼$\\mu_i$ for our data $X_i$ if performed on a sample of size N from that distribution if we estimate that $X_i$∼$X$.\n",
+    "\n",
     "The Poisson distribution can be applied to systems with a large number of possible events, each of which is rare. The number of such events that occur during a fixed time interval is, under the right circumstances, a random number with a Poisson distribution.\n",
     "\n",
     "Poisson-related __assumptions__ :\n",
-    "- k is the number of times an event occurs in an interval and k can take values 0, 1, 2, ... \n",
-    "    - __k is therefore our delay in minutes = number of times the event (=delay) occurs__\n",
-    "- The occurrence of one event does not affect the probability that a second event will occur. That is, events occur independently.\n",
+    "- $k$ is the __delay time in minutes__ and can take values 0, 1, 2, ... (strictly positive and discrete)\n",
+    "- We assume our sampling $X_i$ of $X$ is good enough to approximate E[X] ~ $\\mu_i$\n",
+    "- The occurrence of one event does not affect probability of others. That is, events occur independently.\n",
     "    - __We assume being late one day is not affecting the delay of the day after__ \n",
     "- The average rate at which events occur is independent of any occurrences. For simplicity, this is usually assumed to be constant, but may in practice vary with time.\n",
-    "    - __we assumes delays occurs with a constant rate__\n",
+    "    - __we assumes delays occurs with a constant rate over time__\n",
     "- Two events cannot occur at exactly the same instant\n",
     "\n",
-    "We made a function _poisson_proba_ that takes a trip_id, a stop_id, a arrival time and a departure time and a dictionnary {key : distribution} to compute a __probability to be at least 2 minutes before departure of next trip__. \n",
+    "We made a function `poisson_proba` that takes a `trip_id`, a `stop_id`, an `arrival time` and a `departure time` and a dictionnary {key : distribution} to compute a __probability to be at least 2 minutes before departure of next trip__. \n",
     "\n",
-    "We make a few __sssumptions__ on our side :\n",
+    "We make a few __assumptions__ on our side :\n",
     "- We assume that if we have less than 2 minutes for the transfer, we miss it.\n",
     "- We assume the next train is on time.\n",
-    "- As for poisson distribution $k$ is strictly positive, we assume trains ahead of schedule were on time ($k=0$)"
+    "- As for poisson distribution $k$ is strictly positive, we assume trains ahead of schedule were on time ($k=0$)\n",
+    "\n",
+    "\n",
+    "_Question we should address :_\n",
+    "- _Is the poisson a reasonable approximation of the binomial distribution in our case ?_"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's first test the poisson distribution and compare it with our distribution to see our well it fits the data. We will compute $Pr(X = k)$ for each values of k and look at the shape of the poisson distribution compared to the shape of our scaled data. Then, we will "
+    "Let's first test the poisson distribution and compare it with our distribution to see how well it fits the data. We will compute $Pr(X = k)$ for each values of k and look at the shape of the poisson distribution compared to the shape of our scaled data. Then, we will compare $\\sum_{k=0}^T Pr(X = k)$ with the cumulative distribution function which directly gives $Pr(k \\leq X)$"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "lambda (expectation given distribution):  1.0200445434298442 \n",
       "\n",
       "Probability of success for transfer time = 3.0 minutes :  0.9797581319055967\n"
      ]
     }
    ],
    "source": [
     "################################# POISSON FUNCTIONS ########################################\n",
     "%local\n",
     "\n",
     "import pickle \n",
     "import gzip\n",
     "import time\n",
     "import math \n",
     "import datetime\n",
     "import time\n",
     "from scipy.stats import poisson\n",
     "\n",
     "with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n",
     "    d = pickle.load(input_file)\n",
     "    \n",
     "def get_distrib(key, dico):\n",
     "    if key in dico:\n",
     "        return dico[key]\n",
     "    else:\n",
     "        raise ValueError(\"KEY ERROR: {} not found un distribution dictionnary\".format(key))\n",
     "    \n",
     "def evaluate_lamda(distrib):\n",
     "    # First calculate total number of measures N\n",
     "    N = -1 # by starting at -1 we ignore trains ahead of schedule\n",
     "    for x in distrib:\n",
     "        N += x\n",
     "\n",
     "    lambda_p = 0 # expectation - we want to calculate it\n",
     "    t        = -1 # time = index - 1\n",
     "\n",
     "    for x in distrib:\n",
     "        if t>0:\n",
     "            lambda_p += t*x\n",
     "        t += 1\n",
     "\n",
     "    # calculate lambda - the expectation of x\n",
     "    if N > 0:\n",
     "        lambda_p /= N \n",
     "        print('lambda (expectation given distribution): ',lambda_p, '\\n')\n",
     "        return lambda_p\n",
     "    else : \n",
     "        raise ValueError(\"ERROR : {} distribution has 0 counts\".format(key))\n",
     "        #print('Returning 1 to avoid later problem... \\n')\n",
     "        return 1\n",
     "\n",
     "def process_time(str_time):\n",
     "    x = time.strptime(str_time,'%H:%M')\n",
     "    return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()\n",
     "\n",
     "def get_transfer_time(arr_time, dep_time, delta=2.0):\n",
     "    diff_time_min = ( process_time(dep_time) - process_time(arr_time) ) / 60\n",
     "    return diff_time_min - delta\n",
     "\n",
     "def poisson_proba(trip_id, stop_id, arr_time, dep_time, dico):\n",
     "    # Generate key from trip_id / stop_id \n",
     "    key = str(trip_id) + '__' + str(stop_id[0:7]) # 7 first char to be sbb-compatible\n",
     "\n",
     "    # Get distribution from dictionnary\n",
     "    distrib = get_distrib(key, dico)\n",
     "    \n",
     "    # Calculate transfer time at disposal \n",
     "    T = get_transfer_time(arr_time, dep_time)\n",
     "    \n",
     "    # Get lambda value to calculate proba\n",
     "    lambda_p = evaluate_lamda(distrib)\n",
     "\n",
     "    # Get proba\n",
     "    poisson_p = poisson.cdf(T, lambda_p)\n",
     "    print('Probability of success for transfer time = {} minutes : '.format(T),poisson_p)\n",
     "\n",
     "    return poisson_p\n",
     "\n",
     "# input data :\n",
     "trip_id = '1286.TA.26-32-j19-1.12.H'\n",
     "stop_id = '8591184'\n",
     "arrival_time = '07:45'\n",
     "departure_time = '07:50'\n",
     "Pr = poisson_proba(trip_id, stop_id, arrival_time, departure_time, d)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
    "display_name": "PySpark",
    "language": "",
    "name": "pysparkkernel"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "python",
     "version": 3
    },
    "mimetype": "text/x-python",
    "name": "pyspark",
    "pygments_lexer": "python3"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
 }

ID	YARN Application ID	Kind	State	Spark UI	Driver log
6800	application_1589299642358_1295	pyspark	busy	Link	Link
6803	application_1589299642358_1298	pyspark	idle	Link	Link
6806	application_1589299642358_1301	pyspark	busy	Link	Link
6807	application_1589299642358_1302	pyspark	idle	Link	Link
6808	application_1589299642358_1303	pyspark	idle	Link	Link
6809	application_1589299642358_1305	pyspark	idle	Link	Link
6810	application_1589299642358_1306	pyspark	idle	Link	Link
6811	application_1589299642358_1308	pyspark	idle	Link	Link
6815	application_1589299642358_1312	pyspark	idle	Link	Link
6816	application_1589299642358_1313	pyspark	idle	Link	Link
6818	application_1589299642358_1315	pyspark	busy	Link	Link
6819	application_1589299642358_1316	pyspark	idle	Link	Link