diff --git a/notebooks/old_notebooks/OLD_Arrays_to_pickle.ipynb b/notebooks/old_notebooks/OLD_Arrays_to_pickle.ipynb
new file mode 100644
index 0000000..490e7a9
--- /dev/null
+++ b/notebooks/old_notebooks/OLD_Arrays_to_pickle.ipynb
@@ -0,0 +1,3924 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Preprocessing part 2: preparing the arrays\n",
+    "In this notebook we take 2 datasets prepared in spark: stop_times and transfers, and prepare them into the array format needed to run RAPTOR"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Outline\n",
+    "In this notebook the following actions are performed:\n",
+    "- Transform stop_ids with platform information into the parent station stop_id\n",
+    "- Keep only trips with a departure after 7 am and before 7 pm\n",
+    "- Delete trips which only have 1 stop\n",
+    "- Create integer IDs for routes, trips and stops, following the definition of the RAPTOR algorithm in Stop_times\n",
+    "- Add integer IDs to transfers and keep only stops that are inside the stop_times dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "import itertools"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read files\n",
+    "Before running make sure the .csv files are in /data . If not run notebook \"transfer_to_local\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_lat</th>\n",
+       "      <th>stop_lon</th>\n",
+       "      <th>trip_headsign</th>\n",
+       "      <th>trip_short_name</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>departure_first_stop</th>\n",
+       "      <th>route_int</th>\n",
+       "      <th>stop_count</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>route_desc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591205</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591205</td>\n",
+       "      <td>17:00:00</td>\n",
+       "      <td>17:00:00</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Zürich, Hürlimannplatz</td>\n",
+       "      <td>47.365066</td>\n",
+       "      <td>8.526539</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>1317</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591415</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591415</td>\n",
+       "      <td>17:02:00</td>\n",
+       "      <td>17:02:00</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Zürich, Waffenplatzstrasse</td>\n",
+       "      <td>47.361482</td>\n",
+       "      <td>8.525749</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>1267</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591204</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591204</td>\n",
+       "      <td>17:03:00</td>\n",
+       "      <td>17:03:00</td>\n",
+       "      <td>5</td>\n",
+       "      <td>Zürich, Hügelstrasse</td>\n",
+       "      <td>47.358543</td>\n",
+       "      <td>8.526997</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>67</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591098</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591098</td>\n",
+       "      <td>17:04:00</td>\n",
+       "      <td>17:04:00</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Zürich, Brunau/Mutschellenstr.</td>\n",
+       "      <td>47.355147</td>\n",
+       "      <td>8.527141</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>512</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591392</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591392</td>\n",
+       "      <td>17:05:00</td>\n",
+       "      <td>17:05:00</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Zürich, Thujastrasse</td>\n",
+       "      <td>47.350187</td>\n",
+       "      <td>8.527806</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>403</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0     route_id  stop_id_general                trip_id  stop_id  \\\n",
+       "0           0  26-66-j19-1          8591205  17.TA.26-66-j19-1.1.H  8591205   \n",
+       "1           1  26-66-j19-1          8591415  17.TA.26-66-j19-1.1.H  8591415   \n",
+       "2           2  26-66-j19-1          8591204  17.TA.26-66-j19-1.1.H  8591204   \n",
+       "3           3  26-66-j19-1          8591098  17.TA.26-66-j19-1.1.H  8591098   \n",
+       "4           4  26-66-j19-1          8591392  17.TA.26-66-j19-1.1.H  8591392   \n",
+       "\n",
+       "  arrival_time departure_time  stop_sequence                       stop_name  \\\n",
+       "0     17:00:00       17:00:00              3          Zürich, Hürlimannplatz   \n",
+       "1     17:02:00       17:02:00              4      Zürich, Waffenplatzstrasse   \n",
+       "2     17:03:00       17:03:00              5            Zürich, Hügelstrasse   \n",
+       "3     17:04:00       17:04:00              6  Zürich, Brunau/Mutschellenstr.   \n",
+       "4     17:05:00       17:05:00              7            Zürich, Thujastrasse   \n",
+       "\n",
+       "    stop_lat  stop_lon    trip_headsign  trip_short_name  direction_id  \\\n",
+       "0  47.365066  8.526539  Zürich, Neubühl             3870             0   \n",
+       "1  47.361482  8.525749  Zürich, Neubühl             3870             0   \n",
+       "2  47.358543  8.526997  Zürich, Neubühl             3870             0   \n",
+       "3  47.355147  8.527141  Zürich, Neubühl             3870             0   \n",
+       "4  47.350187  8.527806  Zürich, Neubühl             3870             0   \n",
+       "\n",
+       "  departure_first_stop  route_int  stop_count  stop_int route_desc  \n",
+       "0             16:55:00       1225          12      1317        Bus  \n",
+       "1             16:55:00       1225          12      1267        Bus  \n",
+       "2             16:55:00       1225          12        67        Bus  \n",
+       "3             16:55:00       1225          12       512        Bus  \n",
+       "4             16:55:00       1225          12       403        Bus  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#stop_times\n",
+    "stop_times_curated = pd.read_csv(\"../data/stop_times_final_cyril.csv\")\n",
+    "stop_times_curated.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>route_desc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>26-759-j19-1</td>\n",
+       "      <td>8573205:0:K</td>\n",
+       "      <td>1330.TA.26-759-j19-1.7.R</td>\n",
+       "      <td>05:28:00</td>\n",
+       "      <td>05:28:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Zürich Flughafen, Bahnhof</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>26-67-j19-1</td>\n",
+       "      <td>8591341</td>\n",
+       "      <td>46.TA.26-67-j19-1.1.R</td>\n",
+       "      <td>05:33:00</td>\n",
+       "      <td>05:33:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Zürich, Schmiede Wiedikon</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>26-325-j19-1</td>\n",
+       "      <td>8587020:0:D</td>\n",
+       "      <td>265.TA.26-325-j19-1.2.H</td>\n",
+       "      <td>05:34:00</td>\n",
+       "      <td>05:34:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Dietikon, Bahnhof</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>26-11-A-j19-1</td>\n",
+       "      <td>8591382</td>\n",
+       "      <td>1266.TA.26-11-A-j19-1.21.H</td>\n",
+       "      <td>05:37:00</td>\n",
+       "      <td>05:37:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Zürich, Sternen Oerlikon</td>\n",
+       "      <td>Tram</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>26-302-j19-1</td>\n",
+       "      <td>8590844</td>\n",
+       "      <td>162.TA.26-302-j19-1.4.R</td>\n",
+       "      <td>05:49:00</td>\n",
+       "      <td>05:49:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Urdorf, Oberurdorf</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0       route_id      stop_id                     trip_id  \\\n",
+       "0           0   26-759-j19-1  8573205:0:K    1330.TA.26-759-j19-1.7.R   \n",
+       "1           1    26-67-j19-1      8591341       46.TA.26-67-j19-1.1.R   \n",
+       "2           2   26-325-j19-1  8587020:0:D     265.TA.26-325-j19-1.2.H   \n",
+       "3           3  26-11-A-j19-1      8591382  1266.TA.26-11-A-j19-1.21.H   \n",
+       "4           4   26-302-j19-1      8590844     162.TA.26-302-j19-1.4.R   \n",
+       "\n",
+       "  arrival_time departure_time  stop_sequence  direction_id  \\\n",
+       "0     05:28:00       05:28:00              1             1   \n",
+       "1     05:33:00       05:33:00              1             1   \n",
+       "2     05:34:00       05:34:00              1             0   \n",
+       "3     05:37:00       05:37:00              1             0   \n",
+       "4     05:49:00       05:49:00              1             1   \n",
+       "\n",
+       "                   stop_name route_desc  \n",
+       "0  Zürich Flughafen, Bahnhof        Bus  \n",
+       "1  Zürich, Schmiede Wiedikon        Bus  \n",
+       "2          Dietikon, Bahnhof        Bus  \n",
+       "3   Zürich, Sternen Oerlikon       Tram  \n",
+       "4         Urdorf, Oberurdorf        Bus  "
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#stop_times\n",
+    "stop_times_curated = pd.read_csv(\"../data/stop_times_curated.csv\")\n",
+    "stop_times_curated.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We drop columns not useful to us"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_curated = stop_times_curated.drop(columns=[\"Unnamed: 0\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>stop_id2</th>\n",
+       "      <th>distance</th>\n",
+       "      <th>Transfer_time_sec</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_name2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590616</td>\n",
+       "      <td>0.122430</td>\n",
+       "      <td>146</td>\n",
+       "      <td>Oetwil a.d.L., Schweizäcker</td>\n",
+       "      <td>Geroldswil, Schweizäcker</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590737</td>\n",
+       "      <td>0.300175</td>\n",
+       "      <td>360</td>\n",
+       "      <td>Oetwil a.d.L., Schweizäcker</td>\n",
+       "      <td>Oetwil an der Limmat, Halde</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186:0:1</td>\n",
+       "      <td>0.006762</td>\n",
+       "      <td>8</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186:0:2</td>\n",
+       "      <td>0.013524</td>\n",
+       "      <td>16</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186P</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  stop_id     stop_id2  distance  Transfer_time_sec  \\\n",
+       "0           0  8500926      8590616  0.122430                146   \n",
+       "1           1  8500926      8590737  0.300175                360   \n",
+       "2           2  8502186  8502186:0:1  0.006762                  8   \n",
+       "3           3  8502186  8502186:0:2  0.013524                 16   \n",
+       "4           4  8502186     8502186P  0.000000                  0   \n",
+       "\n",
+       "                     stop_name                   stop_name2  \n",
+       "0  Oetwil a.d.L., Schweizäcker     Geroldswil, Schweizäcker  \n",
+       "1  Oetwil a.d.L., Schweizäcker  Oetwil an der Limmat, Halde  \n",
+       "2         Dietikon Stoffelbach         Dietikon Stoffelbach  \n",
+       "3         Dietikon Stoffelbach         Dietikon Stoffelbach  \n",
+       "4         Dietikon Stoffelbach         Dietikon Stoffelbach  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#transfers\n",
+    "transfers = pd.read_csv(\"../data/transfers.csv\")\n",
+    "transfers.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create stop_id same for all platforms\n",
+    "In the algorithm we make the simplifying assumptions that each time there is a change is the same station there is a 2 min change time. Due to this assumptions we can keep only the parent station name\n",
+    "The parent id is contained in the first 7 characters, so we can take the substring to create the parent stop_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#copy information stop_id with platform in stop_id_raw\n",
+    "stop_times_curated[\"stop_id_raw\"] = stop_times_curated[\"stop_id\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>stop_id_raw</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>26-759-j19-1</td>\n",
+       "      <td>8573205</td>\n",
+       "      <td>1330.TA.26-759-j19-1.7.R</td>\n",
+       "      <td>05:28:00</td>\n",
+       "      <td>05:28:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Zürich Flughafen, Bahnhof</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8573205:0:K</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>26-67-j19-1</td>\n",
+       "      <td>8591341</td>\n",
+       "      <td>46.TA.26-67-j19-1.1.R</td>\n",
+       "      <td>05:33:00</td>\n",
+       "      <td>05:33:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Zürich, Schmiede Wiedikon</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8591341</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>26-325-j19-1</td>\n",
+       "      <td>8587020</td>\n",
+       "      <td>265.TA.26-325-j19-1.2.H</td>\n",
+       "      <td>05:34:00</td>\n",
+       "      <td>05:34:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Dietikon, Bahnhof</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8587020:0:D</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>26-11-A-j19-1</td>\n",
+       "      <td>8591382</td>\n",
+       "      <td>1266.TA.26-11-A-j19-1.21.H</td>\n",
+       "      <td>05:37:00</td>\n",
+       "      <td>05:37:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Zürich, Sternen Oerlikon</td>\n",
+       "      <td>Tram</td>\n",
+       "      <td>8591382</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>26-302-j19-1</td>\n",
+       "      <td>8590844</td>\n",
+       "      <td>162.TA.26-302-j19-1.4.R</td>\n",
+       "      <td>05:49:00</td>\n",
+       "      <td>05:49:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Urdorf, Oberurdorf</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8590844</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        route_id  stop_id                     trip_id arrival_time  \\\n",
+       "0   26-759-j19-1  8573205    1330.TA.26-759-j19-1.7.R     05:28:00   \n",
+       "1    26-67-j19-1  8591341       46.TA.26-67-j19-1.1.R     05:33:00   \n",
+       "2   26-325-j19-1  8587020     265.TA.26-325-j19-1.2.H     05:34:00   \n",
+       "3  26-11-A-j19-1  8591382  1266.TA.26-11-A-j19-1.21.H     05:37:00   \n",
+       "4   26-302-j19-1  8590844     162.TA.26-302-j19-1.4.R     05:49:00   \n",
+       "\n",
+       "  departure_time  stop_sequence  direction_id                  stop_name  \\\n",
+       "0       05:28:00              1             1  Zürich Flughafen, Bahnhof   \n",
+       "1       05:33:00              1             1  Zürich, Schmiede Wiedikon   \n",
+       "2       05:34:00              1             0          Dietikon, Bahnhof   \n",
+       "3       05:37:00              1             0   Zürich, Sternen Oerlikon   \n",
+       "4       05:49:00              1             1         Urdorf, Oberurdorf   \n",
+       "\n",
+       "  route_desc  stop_id_raw  \n",
+       "0        Bus  8573205:0:K  \n",
+       "1        Bus      8591341  \n",
+       "2        Bus  8587020:0:D  \n",
+       "3       Tram      8591382  \n",
+       "4        Bus      8590844  "
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Use only first 7 characters for stop_id\n",
+    "stop_times_curated[\"stop_id\"] = stop_times_curated[\"stop_id_raw\"].str.slice(0, 7)\n",
+    "stop_times_curated[\"stop_id\"] = pd.to_numeric(stop_times_curated[\"stop_id\"])\n",
+    "stop_times_curated.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#copy information stop_id with platform in stop_id_raw\n",
+    "transfers[\"stop_id_raw\"] = transfers[\"stop_id\"]\n",
+    "transfers[\"stop_id2_raw\"] = transfers[\"stop_id2\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We do the operation also on the transfers dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>stop_id2</th>\n",
+       "      <th>distance</th>\n",
+       "      <th>Transfer_time_sec</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_name2</th>\n",
+       "      <th>stop_id_raw</th>\n",
+       "      <th>stop_id2_raw</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590616</td>\n",
+       "      <td>0.122430</td>\n",
+       "      <td>146</td>\n",
+       "      <td>Oetwil a.d.L., Schweizäcker</td>\n",
+       "      <td>Geroldswil, Schweizäcker</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590616</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590737</td>\n",
+       "      <td>0.300175</td>\n",
+       "      <td>360</td>\n",
+       "      <td>Oetwil a.d.L., Schweizäcker</td>\n",
+       "      <td>Oetwil an der Limmat, Halde</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590737</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>0.006762</td>\n",
+       "      <td>8</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186:0:1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>0.013524</td>\n",
+       "      <td>16</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186:0:2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186P</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  stop_id  stop_id2  distance  Transfer_time_sec  \\\n",
+       "0           0  8500926   8590616  0.122430                146   \n",
+       "1           1  8500926   8590737  0.300175                360   \n",
+       "2           2  8502186   8502186  0.006762                  8   \n",
+       "3           3  8502186   8502186  0.013524                 16   \n",
+       "4           4  8502186   8502186  0.000000                  0   \n",
+       "\n",
+       "                     stop_name                   stop_name2 stop_id_raw  \\\n",
+       "0  Oetwil a.d.L., Schweizäcker     Geroldswil, Schweizäcker     8500926   \n",
+       "1  Oetwil a.d.L., Schweizäcker  Oetwil an der Limmat, Halde     8500926   \n",
+       "2         Dietikon Stoffelbach         Dietikon Stoffelbach     8502186   \n",
+       "3         Dietikon Stoffelbach         Dietikon Stoffelbach     8502186   \n",
+       "4         Dietikon Stoffelbach         Dietikon Stoffelbach     8502186   \n",
+       "\n",
+       "  stop_id2_raw  \n",
+       "0      8590616  \n",
+       "1      8590737  \n",
+       "2  8502186:0:1  \n",
+       "3  8502186:0:2  \n",
+       "4     8502186P  "
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Use only first 7 characters for stop_id\n",
+    "transfers[\"stop_id\"] = transfers[\"stop_id_raw\"].str.slice(0, 7)\n",
+    "transfers[\"stop_id2\"] = transfers[\"stop_id2_raw\"].str.slice(0, 7)\n",
+    "transfers[\"stop_id\"] = pd.to_numeric(transfers[\"stop_id\"])\n",
+    "transfers[\"stop_id2\"] = pd.to_numeric(transfers[\"stop_id2\"])\n",
+    "transfers.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Keep only trips during the day\n",
+    "Our model will only consider trips during business days and normal hours, so we can delete all departures before 7 am and after 7 pm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can get the hour of departure using str.slice , and explore the hours we have in the dataset. Then we convert these hours in integers in order to filter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15',\n",
+       "       '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '00',\n",
+       "       '04', '01'], dtype=object)"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_curated.departure_time.str.slice(0,2).unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_curated[\"hour_departure\"] = pd.to_numeric(stop_times_curated.departure_time.str.slice(0,2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check if well converted to int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n",
+       "       22, 23, 24, 25,  0,  4,  1])"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_curated[\"hour_departure\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We drop the trips before 7 am and after 7 pm using np.where function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "331751"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_curated.trip_id.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trip_id_drop = np.where(((stop_times_curated.hour_departure > 19) |\\\n",
+    "                                                (stop_times_curated.hour_departure < 7)),\\\n",
+    "                                               stop_times_curated[\"trip_id\"] , None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_curated = stop_times_curated[~stop_times_curated[\"trip_id\"].isin(trip_id_drop)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "246576"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_curated.trip_id.count()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With this operation we have decreased the size of stop_times by about 90k lines"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Delete trips with 1 stop\n",
+    "Trips with only 1 stop are useless in our dataset and will only pollute the algorithm. For this reason we dete these"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by counting the stops of each trip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>stop_id_raw</th>\n",
+       "      <th>hour_departure</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>trip_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1.TA.1-231-j19-1.1.H</th>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1</td>\n",
+       "      <td>17</td>\n",
+       "      <td>17</td>\n",
+       "      <td>18</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.1-44-j19-1.1.R</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.1-444-j19-1.1.H</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>9</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.12-E03-j19-1.1.H</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.18-46-j19-1.1.H</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       route_id  stop_id  trip_id  arrival_time  \\\n",
+       "trip_id                                                           \n",
+       "1.TA.1-231-j19-1.1.H          1       15        1            17   \n",
+       "1.TA.1-44-j19-1.1.R           1        3        1             3   \n",
+       "1.TA.1-444-j19-1.1.H          1        9        1             9   \n",
+       "1.TA.12-E03-j19-1.1.H         1        2        1             2   \n",
+       "1.TA.18-46-j19-1.1.H          1        1        1             1   \n",
+       "\n",
+       "                       departure_time  stop_sequence  direction_id  stop_name  \\\n",
+       "trip_id                                                                         \n",
+       "1.TA.1-231-j19-1.1.H               17             18             1         15   \n",
+       "1.TA.1-44-j19-1.1.R                 3              3             1          3   \n",
+       "1.TA.1-444-j19-1.1.H                9              9             1          9   \n",
+       "1.TA.12-E03-j19-1.1.H               2              2             1          2   \n",
+       "1.TA.18-46-j19-1.1.H                1              1             1          1   \n",
+       "\n",
+       "                       route_desc  stop_id_raw  hour_departure  \n",
+       "trip_id                                                         \n",
+       "1.TA.1-231-j19-1.1.H            1           15               2  \n",
+       "1.TA.1-44-j19-1.1.R             1            3               1  \n",
+       "1.TA.1-444-j19-1.1.H            1            9               1  \n",
+       "1.TA.12-E03-j19-1.1.H           1            2               2  \n",
+       "1.TA.18-46-j19-1.1.H            1            1               1  "
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "number_stop = stop_times_curated.groupby('trip_id').nunique()\n",
+    "number_stop.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#get trips with 1 stop\n",
+    "trip_with_1_stop = np.where((number_stop.stop_id == 1), number_stop.index, None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check number of trips before cleaning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "20261"
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_curated.trip_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We drop the rows with a unique stop per trip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#drop trips with only 1 stop\n",
+    "stop_times_curated = stop_times_curated[~stop_times_curated[\"trip_id\"].isin(trip_with_1_stop)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And we check how many trips there still. About 900 trips with only 1 stop have been deleted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "19390"
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_curated.trip_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create route_int, trip_int and stop_int as consecutive integer IDs\n",
+    "This operation is needed for sorting the routes, trips and stops in the right order. Additionally integers are lighter than strings so the algorithm will need fewer memory to work with the arrays."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Route_int\n",
+    "The route_int Id is given in an abitrary order"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start creating a tuple with all the stops in a trip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_curated = stop_times_curated.sort_values([\"trip_id\", \"stop_sequence\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>stop_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>trip_id</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1.TA.1-231-j19-1.1.H</th>\n",
+       "      <td>(8572747, 8582462, 8572600, 8572601, 8502553, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.1-44-j19-1.1.R</th>\n",
+       "      <td>(8590275, 8591891, 8590279)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.1-444-j19-1.1.H</th>\n",
+       "      <td>(8572747, 8580847, 8581346, 8502894, 8502979, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.12-E03-j19-1.1.H</th>\n",
+       "      <td>(8573205, 8596126)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1.TA.21-23-j19-1.1.R</th>\n",
+       "      <td>(8503000, 8503003)</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                                 stop_id\n",
+       "trip_id                                                                 \n",
+       "1.TA.1-231-j19-1.1.H   (8572747, 8582462, 8572600, 8572601, 8502553, ...\n",
+       "1.TA.1-44-j19-1.1.R                          (8590275, 8591891, 8590279)\n",
+       "1.TA.1-444-j19-1.1.H   (8572747, 8580847, 8581346, 8502894, 8502979, ...\n",
+       "1.TA.12-E03-j19-1.1.H                                 (8573205, 8596126)\n",
+       "1.TA.21-23-j19-1.1.R                                  (8503000, 8503003)"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#group stops into a sequence\n",
+    "tuple_stops = stop_times_curated.groupby('trip_id')['stop_id'].apply(tuple).to_frame()\n",
+    "tuple_stops.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "19390"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tuple_stops.index.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And we can group all these sequences in unique groups"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>stop_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>(8502208, 8502209, 8503201, 8503010, 8503011, 8503000, 8503006, 8503016)</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>(8502208, 8502209, 8503201, 8503200, 8503010, 8503011, 8503016)</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>(8502208, 8502209, 8503202)</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>(8502208, 8502209, 8503202, 8503009, 8503010, 8503011, 8503000, 8503006, 8503016, 8503307)</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>(8502208, 8502209, 8503202, 8503200, 8503009, 8503000, 8503015, 8503016, 8503307, 8503305)</th>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: []\n",
+       "Index: [(8502208, 8502209, 8503201, 8503010, 8503011, 8503000, 8503006, 8503016), (8502208, 8502209, 8503201, 8503200, 8503010, 8503011, 8503016), (8502208, 8502209, 8503202), (8502208, 8502209, 8503202, 8503009, 8503010, 8503011, 8503000, 8503006, 8503016, 8503307), (8502208, 8502209, 8503202, 8503200, 8503009, 8503000, 8503015, 8503016, 8503307, 8503305)]"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#group to get unique stop sequences\n",
+    "unique_stop_sequence = tuple_stops.groupby(\"stop_id\").count()\n",
+    "unique_stop_sequence.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2555"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "unique_stop_sequence.index.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These unique sequences of stops are our routes. We can create a unique ID, an integer, for each route"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>route_int</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503010, 8503011, ...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503200, 8503010, ...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>(8502208, 8502209, 8503202)</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>(8502208, 8502209, 8503202, 8503009, 8503010, ...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>(8502208, 8502209, 8503202, 8503200, 8503009, ...</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                             stop_id  route_int\n",
+       "0  (8502208, 8502209, 8503201, 8503010, 8503011, ...          0\n",
+       "1  (8502208, 8502209, 8503201, 8503200, 8503010, ...          1\n",
+       "2                        (8502208, 8502209, 8503202)          2\n",
+       "3  (8502208, 8502209, 8503202, 8503009, 8503010, ...          3\n",
+       "4  (8502208, 8502209, 8503202, 8503200, 8503009, ...          4"
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#create dataframe and route_int\n",
+    "df_unique_stop_sequence = unique_stop_sequence.reset_index()\n",
+    "df_unique_stop_sequence[\"route_int\"] = df_unique_stop_sequence.index\n",
+    "df_unique_stop_sequence.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We add the route information to the trip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>route_int</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>trip_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>403.TA.26-24-j19-1.220.R</th>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503010, 8503011, ...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>425.TA.26-24-j19-1.220.R</th>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503200, 8503010, ...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22.TA.30-57-Y-j19-1.1.H</th>\n",
+       "      <td>(8502208, 8502209, 8503202)</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11.TA.30-57-Y-j19-1.1.H</th>\n",
+       "      <td>(8502208, 8502209, 8503202)</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14.TA.30-57-Y-j19-1.1.H</th>\n",
+       "      <td>(8502208, 8502209, 8503202)</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                                    stop_id  \\\n",
+       "trip_id                                                                       \n",
+       "403.TA.26-24-j19-1.220.R  (8502208, 8502209, 8503201, 8503010, 8503011, ...   \n",
+       "425.TA.26-24-j19-1.220.R  (8502208, 8502209, 8503201, 8503200, 8503010, ...   \n",
+       "22.TA.30-57-Y-j19-1.1.H                         (8502208, 8502209, 8503202)   \n",
+       "11.TA.30-57-Y-j19-1.1.H                         (8502208, 8502209, 8503202)   \n",
+       "14.TA.30-57-Y-j19-1.1.H                         (8502208, 8502209, 8503202)   \n",
+       "\n",
+       "                          route_int  \n",
+       "trip_id                              \n",
+       "403.TA.26-24-j19-1.220.R          0  \n",
+       "425.TA.26-24-j19-1.220.R          1  \n",
+       "22.TA.30-57-Y-j19-1.1.H           2  \n",
+       "11.TA.30-57-Y-j19-1.1.H           2  \n",
+       "14.TA.30-57-Y-j19-1.1.H           2  "
+      ]
+     },
+     "execution_count": 84,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#join with trip information\n",
+    "trip_with_routes = tuple_stops.join(df_unique_stop_sequence.set_index(\"stop_id\"), on=\"stop_id\", how=\"left\").sort_values(\"route_int\")\n",
+    "trip_with_routes.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trip_with_routes = trip_with_routes.rename(columns={\"stop_id\" : \"all_stops\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check if wrong manipulations cause to have the same, or higher, number of routes than trips. It is not the case"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "19390"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#check if routes and trips do not have the same number\n",
+    "trip_with_routes.index.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2555"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trip_with_routes.route_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We add the rout_int column to stop_times dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "245705"
+      ]
+     },
+     "execution_count": 88,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_curated.trip_id.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#join to get route_int in stop_times\n",
+    "stop_times_routes = stop_times_curated.join(trip_with_routes, how=\"left\", on=\"trip_id\" , lsuffix='_left', rsuffix='_right').drop_duplicates()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "245705"
+      ]
+     },
+     "execution_count": 90,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_routes.trip_id.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>stop_id_raw</th>\n",
+       "      <th>hour_departure</th>\n",
+       "      <th>all_stops</th>\n",
+       "      <th>route_int</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>81914</th>\n",
+       "      <td>1-231-j19-1</td>\n",
+       "      <td>8572747</td>\n",
+       "      <td>1.TA.1-231-j19-1.1.H</td>\n",
+       "      <td>09:37:00</td>\n",
+       "      <td>09:37:00</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Bremgarten AG, Bahnhof</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8572747</td>\n",
+       "      <td>9</td>\n",
+       "      <td>(8572747, 8582462, 8572600, 8572601, 8502553, ...</td>\n",
+       "      <td>618</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>181281</th>\n",
+       "      <td>1-231-j19-1</td>\n",
+       "      <td>8582462</td>\n",
+       "      <td>1.TA.1-231-j19-1.1.H</td>\n",
+       "      <td>09:38:00</td>\n",
+       "      <td>09:38:00</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Bremgarten AG, Zelgli</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8582462</td>\n",
+       "      <td>9</td>\n",
+       "      <td>(8572747, 8582462, 8572600, 8572601, 8502553, ...</td>\n",
+       "      <td>618</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42460</th>\n",
+       "      <td>1-231-j19-1</td>\n",
+       "      <td>8572600</td>\n",
+       "      <td>1.TA.1-231-j19-1.1.H</td>\n",
+       "      <td>09:39:00</td>\n",
+       "      <td>09:39:00</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Zufikon, Emaus</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8572600</td>\n",
+       "      <td>9</td>\n",
+       "      <td>(8572747, 8582462, 8572600, 8572601, 8502553, ...</td>\n",
+       "      <td>618</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>224454</th>\n",
+       "      <td>1-231-j19-1</td>\n",
+       "      <td>8572601</td>\n",
+       "      <td>1.TA.1-231-j19-1.1.H</td>\n",
+       "      <td>09:39:00</td>\n",
+       "      <td>09:39:00</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Zufikon, Algier</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8572601</td>\n",
+       "      <td>9</td>\n",
+       "      <td>(8572747, 8582462, 8572600, 8572601, 8502553, ...</td>\n",
+       "      <td>618</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11836</th>\n",
+       "      <td>1-231-j19-1</td>\n",
+       "      <td>8502553</td>\n",
+       "      <td>1.TA.1-231-j19-1.1.H</td>\n",
+       "      <td>09:43:00</td>\n",
+       "      <td>09:43:00</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Unterlunkhofen, Breitenäcker</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8502553</td>\n",
+       "      <td>9</td>\n",
+       "      <td>(8572747, 8582462, 8572600, 8572601, 8502553, ...</td>\n",
+       "      <td>618</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           route_id  stop_id               trip_id arrival_time  \\\n",
+       "81914   1-231-j19-1  8572747  1.TA.1-231-j19-1.1.H     09:37:00   \n",
+       "181281  1-231-j19-1  8582462  1.TA.1-231-j19-1.1.H     09:38:00   \n",
+       "42460   1-231-j19-1  8572600  1.TA.1-231-j19-1.1.H     09:39:00   \n",
+       "224454  1-231-j19-1  8572601  1.TA.1-231-j19-1.1.H     09:39:00   \n",
+       "11836   1-231-j19-1  8502553  1.TA.1-231-j19-1.1.H     09:43:00   \n",
+       "\n",
+       "       departure_time  stop_sequence  direction_id  \\\n",
+       "81914        09:37:00              1             0   \n",
+       "181281       09:38:00              3             0   \n",
+       "42460        09:39:00              4             0   \n",
+       "224454       09:39:00              5             0   \n",
+       "11836        09:43:00              6             0   \n",
+       "\n",
+       "                           stop_name route_desc stop_id_raw  hour_departure  \\\n",
+       "81914         Bremgarten AG, Bahnhof        Bus     8572747               9   \n",
+       "181281         Bremgarten AG, Zelgli        Bus     8582462               9   \n",
+       "42460                 Zufikon, Emaus        Bus     8572600               9   \n",
+       "224454               Zufikon, Algier        Bus     8572601               9   \n",
+       "11836   Unterlunkhofen, Breitenäcker        Bus     8502553               9   \n",
+       "\n",
+       "                                                all_stops  route_int  \n",
+       "81914   (8572747, 8582462, 8572600, 8572601, 8502553, ...        618  \n",
+       "181281  (8572747, 8582462, 8572600, 8572601, 8502553, ...        618  \n",
+       "42460   (8572747, 8582462, 8572600, 8572601, 8502553, ...        618  \n",
+       "224454  (8572747, 8582462, 8572600, 8572601, 8502553, ...        618  \n",
+       "11836   (8572747, 8582462, 8572600, 8572601, 8502553, ...        618  "
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_routes.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2554"
+      ]
+     },
+     "execution_count": 92,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#check if route_int is correct\n",
+    "stop_times_routes.route_int.max()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Trip_int\n",
+    "The trip_int number needs to be ordered by route_int and time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "19390"
+      ]
+     },
+     "execution_count": 93,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#check number trips in stop_times\n",
+    "stop_times_routes.trip_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>stop_id_raw</th>\n",
+       "      <th>hour_departure</th>\n",
+       "      <th>all_stops</th>\n",
+       "      <th>route_int</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>181290</th>\n",
+       "      <td>26-24-j19-1</td>\n",
+       "      <td>8502208</td>\n",
+       "      <td>403.TA.26-24-j19-1.220.R</td>\n",
+       "      <td>10:44:00</td>\n",
+       "      <td>10:45:00</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Horgen Oberdorf</td>\n",
+       "      <td>S-Bahn</td>\n",
+       "      <td>8502208:0:4</td>\n",
+       "      <td>10</td>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503010, 8503011, ...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>261974</th>\n",
+       "      <td>26-24-j19-1</td>\n",
+       "      <td>8502209</td>\n",
+       "      <td>403.TA.26-24-j19-1.220.R</td>\n",
+       "      <td>10:47:00</td>\n",
+       "      <td>10:47:00</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Oberrieden Dorf</td>\n",
+       "      <td>S-Bahn</td>\n",
+       "      <td>8502209:0:1</td>\n",
+       "      <td>10</td>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503010, 8503011, ...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>130162</th>\n",
+       "      <td>26-24-j19-1</td>\n",
+       "      <td>8503201</td>\n",
+       "      <td>403.TA.26-24-j19-1.220.R</td>\n",
+       "      <td>10:53:00</td>\n",
+       "      <td>10:53:00</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Rüschlikon</td>\n",
+       "      <td>S-Bahn</td>\n",
+       "      <td>8503201:0:2</td>\n",
+       "      <td>10</td>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503010, 8503011, ...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>173670</th>\n",
+       "      <td>26-24-j19-1</td>\n",
+       "      <td>8503010</td>\n",
+       "      <td>403.TA.26-24-j19-1.220.R</td>\n",
+       "      <td>11:02:00</td>\n",
+       "      <td>11:03:00</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Zürich Enge</td>\n",
+       "      <td>S-Bahn</td>\n",
+       "      <td>8503010:0:2</td>\n",
+       "      <td>11</td>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503010, 8503011, ...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>238129</th>\n",
+       "      <td>26-24-j19-1</td>\n",
+       "      <td>8503011</td>\n",
+       "      <td>403.TA.26-24-j19-1.220.R</td>\n",
+       "      <td>11:04:00</td>\n",
+       "      <td>11:04:00</td>\n",
+       "      <td>10</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Zürich Wiedikon</td>\n",
+       "      <td>S-Bahn</td>\n",
+       "      <td>8503011:0:2</td>\n",
+       "      <td>11</td>\n",
+       "      <td>(8502208, 8502209, 8503201, 8503010, 8503011, ...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           route_id  stop_id                   trip_id arrival_time  \\\n",
+       "181290  26-24-j19-1  8502208  403.TA.26-24-j19-1.220.R     10:44:00   \n",
+       "261974  26-24-j19-1  8502209  403.TA.26-24-j19-1.220.R     10:47:00   \n",
+       "130162  26-24-j19-1  8503201  403.TA.26-24-j19-1.220.R     10:53:00   \n",
+       "173670  26-24-j19-1  8503010  403.TA.26-24-j19-1.220.R     11:02:00   \n",
+       "238129  26-24-j19-1  8503011  403.TA.26-24-j19-1.220.R     11:04:00   \n",
+       "\n",
+       "       departure_time  stop_sequence  direction_id        stop_name  \\\n",
+       "181290       10:45:00              3             1  Horgen Oberdorf   \n",
+       "261974       10:47:00              4             1  Oberrieden Dorf   \n",
+       "130162       10:53:00              6             1       Rüschlikon   \n",
+       "173670       11:03:00              9             1      Zürich Enge   \n",
+       "238129       11:04:00             10             1  Zürich Wiedikon   \n",
+       "\n",
+       "       route_desc  stop_id_raw  hour_departure  \\\n",
+       "181290     S-Bahn  8502208:0:4              10   \n",
+       "261974     S-Bahn  8502209:0:1              10   \n",
+       "130162     S-Bahn  8503201:0:2              10   \n",
+       "173670     S-Bahn  8503010:0:2              11   \n",
+       "238129     S-Bahn  8503011:0:2              11   \n",
+       "\n",
+       "                                                all_stops  route_int  \n",
+       "181290  (8502208, 8502209, 8503201, 8503010, 8503011, ...          0  \n",
+       "261974  (8502208, 8502209, 8503201, 8503010, 8503011, ...          0  \n",
+       "130162  (8502208, 8502209, 8503201, 8503010, 8503011, ...          0  \n",
+       "173670  (8502208, 8502209, 8503201, 8503010, 8503011, ...          0  \n",
+       "238129  (8502208, 8502209, 8503201, 8503010, 8503011, ...          0  "
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_routes.sort_values([\"route_int\", \"arrival_time\"]).head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Generate sequential trip_int, ordered by route and by time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>trip_int</th>\n",
+       "      <th>trip_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>403.TA.26-24-j19-1.220.R</td>\n",
+       "      <td>0</td>\n",
+       "      <td>403.TA.26-24-j19-1.220.R</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>425.TA.26-24-j19-1.220.R</td>\n",
+       "      <td>1</td>\n",
+       "      <td>425.TA.26-24-j19-1.220.R</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4.TA.30-57-Y-j19-1.1.H</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4.TA.30-57-Y-j19-1.1.H</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>5.TA.30-57-Y-j19-1.1.H</td>\n",
+       "      <td>3</td>\n",
+       "      <td>5.TA.30-57-Y-j19-1.1.H</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>6.TA.30-57-Y-j19-1.1.H</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6.TA.30-57-Y-j19-1.1.H</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                          0  trip_int                   trip_id\n",
+       "0  403.TA.26-24-j19-1.220.R         0  403.TA.26-24-j19-1.220.R\n",
+       "1  425.TA.26-24-j19-1.220.R         1  425.TA.26-24-j19-1.220.R\n",
+       "2    4.TA.30-57-Y-j19-1.1.H         2    4.TA.30-57-Y-j19-1.1.H\n",
+       "3    5.TA.30-57-Y-j19-1.1.H         3    5.TA.30-57-Y-j19-1.1.H\n",
+       "4    6.TA.30-57-Y-j19-1.1.H         4    6.TA.30-57-Y-j19-1.1.H"
+      ]
+     },
+     "execution_count": 95,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trip_df = pd.DataFrame(stop_times_routes.sort_values([\"route_int\", \"arrival_time\"]).trip_id.unique())\n",
+    "trip_df[\"trip_int\"] = trip_df.index\n",
+    "trip_df[\"trip_id\"] = trip_df.iloc[:,0]\n",
+    "trip_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "19390"
+      ]
+     },
+     "execution_count": 96,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#check number trip_id\n",
+    "trip_df.trip_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We join trip_id to stop_times dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#join to get trip_int in stop_times\n",
+    "stop_times_routes_trip = stop_times_routes.join(trip_df.set_index(\"trip_id\"), how=\"inner\", on=\"trip_id\" , lsuffix='_left', rsuffix='_right').drop_duplicates()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#save ordered stop_times\n",
+    "stop_times_routes_trip = stop_times_routes_trip.sort_values([\"route_int\", \"trip_int\", \"stop_sequence\"])\n",
+    "stop_times_routes_trip.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#check if manipulations did not destroy trips\n",
+    "stop_times_routes_trip.trip_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Stop_int\n",
+    "Stop_int id needs to ordered by route, trip and stop sequence"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#check number stops at entry\n",
+    "stop_times_routes_trip.stop_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "stop_times_routes_trip is already in the right order. We create dataframe to create stop_int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops_df = pd.DataFrame(stop_times_routes_trip.stop_id.unique())\n",
+    "stops_df[\"stop_int\"] = stops_df.index\n",
+    "stops_df[\"stop_id\"] = stops_df.iloc[:,0]\n",
+    "stops_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#check if number stop_int correct\n",
+    "stops_df.stop_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We add stop_int information to stop_times"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#join to get stop_int\n",
+    "stop_times_routes_trip_stop = stop_times_routes_trip.join(stops_df.set_index(\"stop_id\"), how=\"inner\", on=\"stop_id\",  lsuffix='_left', rsuffix='_right').drop_duplicates()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_routes_trip_stop.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#check if no stops deleted during manipulation\n",
+    "stop_times_routes_trip_stop.stop_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_routes_trip_stop.stop_int.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#keep only useful columns \n",
+    "stop_times_int = stop_times_routes_trip_stop[[\"route_int\", \"trip_int\", \"stop_int\", \"stop_sequence\", \"arrival_time\", \"departure_time\",\\\n",
+    "                                          \"route_id\", \"trip_id\", \"stop_id\", \\\n",
+    "                                             \"route_desc\", \"stop_id_raw\", \"stop_name\"]].sort_values([\"route_int\", \"trip_int\", \"stop_sequence\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_int = stop_times_int.reset_index(drop=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_int.loc[100:150].head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An overview of number of routes, trips and stops"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_int.route_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_int.trip_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_int.stop_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_int.stop_int.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Transfer: delete transfer to same stop & get stop_int & stop_int2\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "12564"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#check number stops transfers\n",
+    "transfers.stop_id.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>stop_id2</th>\n",
+       "      <th>distance</th>\n",
+       "      <th>Transfer_time_sec</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_name2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590616</td>\n",
+       "      <td>0.122430</td>\n",
+       "      <td>146</td>\n",
+       "      <td>Oetwil a.d.L., Schweizäcker</td>\n",
+       "      <td>Geroldswil, Schweizäcker</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>8500926</td>\n",
+       "      <td>8590737</td>\n",
+       "      <td>0.300175</td>\n",
+       "      <td>360</td>\n",
+       "      <td>Oetwil a.d.L., Schweizäcker</td>\n",
+       "      <td>Oetwil an der Limmat, Halde</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186:0:1</td>\n",
+       "      <td>0.006762</td>\n",
+       "      <td>8</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186:0:2</td>\n",
+       "      <td>0.013524</td>\n",
+       "      <td>16</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>8502186</td>\n",
+       "      <td>8502186P</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "      <td>Dietikon Stoffelbach</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  stop_id     stop_id2  distance  Transfer_time_sec  \\\n",
+       "0           0  8500926      8590616  0.122430                146   \n",
+       "1           1  8500926      8590737  0.300175                360   \n",
+       "2           2  8502186  8502186:0:1  0.006762                  8   \n",
+       "3           3  8502186  8502186:0:2  0.013524                 16   \n",
+       "4           4  8502186     8502186P  0.000000                  0   \n",
+       "\n",
+       "                     stop_name                   stop_name2  \n",
+       "0  Oetwil a.d.L., Schweizäcker     Geroldswil, Schweizäcker  \n",
+       "1  Oetwil a.d.L., Schweizäcker  Oetwil an der Limmat, Halde  \n",
+       "2         Dietikon Stoffelbach         Dietikon Stoffelbach  \n",
+       "3         Dietikon Stoffelbach         Dietikon Stoffelbach  \n",
+       "4         Dietikon Stoffelbach         Dietikon Stoffelbach  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfers.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We delete transfers to the same stop"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers_df = transfers[transfers['stop_id'] != transfers['stop_id2']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "12564"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfers_df.stop_id.count()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We create the stop_int column in transfers. This action eliminates stops not in stop_times"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_int = stop_times_curated"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers_df = transfers_df.merge(stop_times_int[[\"stop_id\", \"stop_int\"]].set_index(\"stop_id\"), how=\"inner\", on = \"stop_id\").drop_duplicates()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers_df.stop_id.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#create dataframe with stops\n",
+    "df_stop_int2 = stop_times_int[[\"stop_id\", \"stop_int\"]].rename(columns={\"stop_id\": \"stop_id2\", \"stop_int\" : \"stop_int_2\"})\n",
+    "df_stop_int2.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We add the the stop id for the arrival destination, stop_int2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers_df_int = transfers_df.merge(df_stop_int2.set_index(\"stop_id2\"), how=\"inner\", on = \"stop_id2\").drop_duplicates()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers_df_int.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers_df_int.stop_id.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers = transfers_df_int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#check number unique stops2 in transfers\n",
+    "transfers.stop_id2.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers.stop_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_lat</th>\n",
+       "      <th>stop_lon</th>\n",
+       "      <th>trip_headsign</th>\n",
+       "      <th>trip_short_name</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>departure_first_stop</th>\n",
+       "      <th>route_int</th>\n",
+       "      <th>stop_count</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>route_desc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591205</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591205</td>\n",
+       "      <td>17:00:00</td>\n",
+       "      <td>17:00:00</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Zürich, Hürlimannplatz</td>\n",
+       "      <td>47.365066</td>\n",
+       "      <td>8.526539</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>1317</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591415</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591415</td>\n",
+       "      <td>17:02:00</td>\n",
+       "      <td>17:02:00</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Zürich, Waffenplatzstrasse</td>\n",
+       "      <td>47.361482</td>\n",
+       "      <td>8.525749</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>1267</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591204</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591204</td>\n",
+       "      <td>17:03:00</td>\n",
+       "      <td>17:03:00</td>\n",
+       "      <td>5</td>\n",
+       "      <td>Zürich, Hügelstrasse</td>\n",
+       "      <td>47.358543</td>\n",
+       "      <td>8.526997</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>67</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591098</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591098</td>\n",
+       "      <td>17:04:00</td>\n",
+       "      <td>17:04:00</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Zürich, Brunau/Mutschellenstr.</td>\n",
+       "      <td>47.355147</td>\n",
+       "      <td>8.527141</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>512</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>26-66-j19-1</td>\n",
+       "      <td>8591392</td>\n",
+       "      <td>17.TA.26-66-j19-1.1.H</td>\n",
+       "      <td>8591392</td>\n",
+       "      <td>17:05:00</td>\n",
+       "      <td>17:05:00</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Zürich, Thujastrasse</td>\n",
+       "      <td>47.350187</td>\n",
+       "      <td>8.527806</td>\n",
+       "      <td>Zürich, Neubühl</td>\n",
+       "      <td>3870</td>\n",
+       "      <td>0</td>\n",
+       "      <td>16:55:00</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>12</td>\n",
+       "      <td>403</td>\n",
+       "      <td>Bus</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0     route_id  stop_id_general                trip_id  stop_id  \\\n",
+       "0           0  26-66-j19-1          8591205  17.TA.26-66-j19-1.1.H  8591205   \n",
+       "1           1  26-66-j19-1          8591415  17.TA.26-66-j19-1.1.H  8591415   \n",
+       "2           2  26-66-j19-1          8591204  17.TA.26-66-j19-1.1.H  8591204   \n",
+       "3           3  26-66-j19-1          8591098  17.TA.26-66-j19-1.1.H  8591098   \n",
+       "4           4  26-66-j19-1          8591392  17.TA.26-66-j19-1.1.H  8591392   \n",
+       "\n",
+       "  arrival_time departure_time  stop_sequence                       stop_name  \\\n",
+       "0     17:00:00       17:00:00              3          Zürich, Hürlimannplatz   \n",
+       "1     17:02:00       17:02:00              4      Zürich, Waffenplatzstrasse   \n",
+       "2     17:03:00       17:03:00              5            Zürich, Hügelstrasse   \n",
+       "3     17:04:00       17:04:00              6  Zürich, Brunau/Mutschellenstr.   \n",
+       "4     17:05:00       17:05:00              7            Zürich, Thujastrasse   \n",
+       "\n",
+       "    stop_lat  stop_lon    trip_headsign  trip_short_name  direction_id  \\\n",
+       "0  47.365066  8.526539  Zürich, Neubühl             3870             0   \n",
+       "1  47.361482  8.525749  Zürich, Neubühl             3870             0   \n",
+       "2  47.358543  8.526997  Zürich, Neubühl             3870             0   \n",
+       "3  47.355147  8.527141  Zürich, Neubühl             3870             0   \n",
+       "4  47.350187  8.527806  Zürich, Neubühl             3870             0   \n",
+       "\n",
+       "  departure_first_stop  route_int  stop_count  stop_int route_desc  \n",
+       "0             16:55:00       1225          12      1317        Bus  \n",
+       "1             16:55:00       1225          12      1267        Bus  \n",
+       "2             16:55:00       1225          12        67        Bus  \n",
+       "3             16:55:00       1225          12       512        Bus  \n",
+       "4             16:55:00       1225          12       403        Bus  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stop_times_ordered = stop_times_curated\n",
+    "stop_times_ordered.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by making sure the order is correct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered = stop_times_int.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n",
+    "stop_times_ordered.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We add None to first arrival time and last departure time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#adding a shift\n",
+    "stop_times_ordered[\"sequence_shift_1\"] = stop_times_ordered[\"stop_sequence\"].shift(-1, fill_value=0)\n",
+    "stop_times_ordered.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered['departure_time'] = np.where((stop_times_ordered[\"stop_sequence\"] > stop_times_ordered[\"sequence_shift_1\"]), None, stop_times_ordered['departure_time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered[\"arrival_time\"] = np.where((stop_times_ordered[\"stop_sequence\"] == 1), None, stop_times_ordered['arrival_time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered[[\"arrival_time\",\"departure_time\", \"stop_sequence\", \"sequence_shift_1\"]].head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Array structure preparation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### StopTimes: \n",
+    "[[departure_route0_trip0_stop0, arrival_route0_trip0_stop_0], [departure_route0_trip0_stop1, arrival_route0_trip0_stop_1], …], [[departure_route0_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], [[[departure_route1_trip0_stop0, arrival_route1_trip0_stop_0], …], [[departure_route1_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], …]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We transform it in datetime as required by the raptor algorithm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered['arrival_time'] = pd.to_datetime(stop_times_ordered['arrival_time'])\n",
+    "stop_times_ordered['departure_time'] = pd.to_datetime(stop_times_ordered['departure_time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/stop_times_df.pkl','wb') as f: pickle.dump(stop_times_ordered, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered = stop_times_ordered.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n",
+    "stop_times_ordered.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And we transform it to array, ready ti be used by raptor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_array = stop_times_ordered[[\"arrival_time\", \"departure_time\"]].to_numpy()\n",
+    "stop_times_array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.size(stop_times_array,0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/stop_times_array.pkl','wb') as f: pickle.dump(stop_times_array, f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Routes: \n",
+    "[[route0_nr.Trips, route0_nr. Stops, route0_pointerRoutes, route0_pointerStops_times],[route1_nr.Trips, route1_nr. Stops,, route1_pointerRoutes, route1_pointerStops_times],…]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by getting the number of trips and stops there is for each route"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops = stop_times_ordered.groupby([\"route_int\"]).nunique()[[\"trip_int\",\"stop_int\"]].sort_index().rename(columns={\"trip_int\": \"n_Trips\", \"stop_int\": \"n_stops\"})\n",
+    "distinct_trips_stops.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We create the pointer for the route stops, by adding the unique stops for each route"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops['pointer_routes_stops'] = distinct_trips_stops.n_stops.cumsum().shift(1, fill_value=0)\n",
+    "distinct_trips_stops.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We create the pointer for stop_times by adding the number of stops in each route, counting duplicates (due to several trips)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops[\"pointer_stop_times\"] = (stop_times_ordered.groupby([\"route_int\"]).count().stop_id).cumsum().shift(1, fill_value=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops[\"pointer_routes_stops_shift\"] = distinct_trips_stops['pointer_routes_stops'].shift(-1, fill_value=0)\n",
+    "distinct_trips_stops[\"pointer_stop_times_shift\"] = distinct_trips_stops['pointer_stop_times'].shift(-1, fill_value=0)\n",
+    "distinct_trips_stops.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops['pointer_routes_stops'] = np.where((distinct_trips_stops[\"pointer_routes_stops\"] == distinct_trips_stops[\"pointer_routes_stops_shift\"]), None, distinct_trips_stops['pointer_routes_stops'])\n",
+    "distinct_trips_stops['pointer_stop_times'] = np.where((distinct_trips_stops[\"pointer_stop_times\"] == distinct_trips_stops[\"pointer_stop_times_shift\"]), None, distinct_trips_stops['pointer_stop_times'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops.isna().any()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/routes_array_df.pkl','wb') as f: pickle.dump(distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']], f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routes_array = distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']].to_numpy()\n",
+    "routes_array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.size(routes_array, 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/routes_array.pkl','wb') as f: pickle.dump(routes_array, f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "RouteStops: [route0_stop0, route0_stop1,…, route1_stop0, route1_stop1,…, …]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route_stops = stop_times_ordered.sort_values([\"route_int\", \"stop_sequence\"])\n",
+    "route_stops = route_stops[['route_int', 'stop_int']].drop_duplicates().reset_index()\n",
+    "route_stops.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route_stops.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route_stops.route_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/route_stops_df.pkl','wb') as f: pickle.dump(route_stops, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route_stops_array = route_stops.stop_int.to_numpy()\n",
+    "route_stops_array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.size(np.unique(route_stops_array))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.size(route_stops_array, 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route_stops_array.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/route_stops_array.pkl','wb') as f: pickle.dump(route_stops_array, f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check if pointers are correct\n",
+    "It is fundamental that the indexes, that serve as pointers, in Routes are correct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We start by looking at where the indexes for stop_times and route_stops diverge. This will allow us to change. We can see that Route stops should have a new route at 3 while stop_times should have it at 78, so we try with that"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_trips_stops.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can check if the pointer indicates the routes index number. At the pointer_routes should indicate the first stop of a new route. We try with 3 to see if route_stops has a new route at this index. It does so it works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route_stops.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We go and see if stop_times has a new route at 78. It does, so it works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_ordered.loc[75:80].head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Stops: [[stop0_pointerRoutes, stop0_pointerTransfer], [stop1_pointerRoutes, stop1_pointerTransfer], …]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops_join = route_stops.join(transfers.set_index(\"stop_int\"), how=\"left\", on=\"stop_int\").drop_duplicates()\n",
+    "stops_join.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops_join.stop_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_route_transfers = stops_join.sort_values(\"stop_int\").groupby([\"stop_int\"]).nunique().rename(columns={\"route_int\": \"n_Routes\", \"stop_int_2\": \"n_Transfers\"})\n",
+    "distinct_route_transfers = distinct_route_transfers[[\"n_Routes\", \"n_Transfers\"]].sort_index()\n",
+    "distinct_route_transfers.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_route_transfers['pointer_stop_routes'] = distinct_route_transfers.n_Routes.cumsum().shift(1, fill_value=0)\n",
+    "distinct_route_transfers['pointer_transfers'] = distinct_route_transfers.n_Transfers.cumsum().shift(1, fill_value=0)\n",
+    "distinct_route_transfers.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_route_transfers[\"pointer_stop_routes_shift\"] = distinct_route_transfers['pointer_stop_routes'].shift(-1, fill_value=0)\n",
+    "distinct_route_transfers[\"pointer_transfers_shift\"] = distinct_route_transfers['pointer_transfers'].shift(-1, fill_value=0)\n",
+    "distinct_route_transfers.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_route_transfers['pointer_stop_routes'] = np.where((distinct_route_transfers[\"pointer_stop_routes\"] == distinct_route_transfers[\"pointer_stop_routes_shift\"]), None, distinct_route_transfers['pointer_stop_routes'])\n",
+    "distinct_route_transfers['pointer_transfers'] = np.where((distinct_route_transfers[\"pointer_transfers\"] == distinct_route_transfers[\"pointer_transfers_shift\"]), None, distinct_route_transfers['pointer_transfers'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distinct_route_transfers.isna().any()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops_df = distinct_route_transfers[['pointer_stop_routes', 'pointer_transfers']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/stops_df.pkl','wb') as f: pickle.dump(stops_df, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops_array = stops_df.to_numpy()\n",
+    "stops_array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.size(stops_array, 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops_array.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/stops_array.pkl','wb') as f: pickle.dump(stops_array, f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "StopRoutes: [stop0_route1, stop0_route3, stop1_route1, stop2_route1, stop1_route4, …]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_routes = stop_times_ordered[[\"route_int\", \"stop_int\", \"stop_id\"]].drop_duplicates().sort_values([\"stop_int\", \"route_int\"])\n",
+    "stop_routes = stop_routes.reset_index()\n",
+    "stop_routes.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_routes.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_times_curated.route_id.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_routes.route_int.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/stop_routes_df.pkl','wb') as f: pickle.dump(stop_routes, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_routes_array = stop_routes[\"route_int\"].to_numpy()\n",
+    "stop_routes_array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.size(stop_routes_array, 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_routes_array.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/stop_routes_array.pkl','wb') as f: pickle.dump(stop_routes_array, f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Transfer: [[[stop0_nameTargetStop1, transferTime1], [stop0_nameTargetStop2, transferTime2],….], [stop1_nameTargetStop1, transferTime1], [stop1_nameTargetStop2, transferTime2],….],…]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers.stop_id.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfer_pandas = transfers[[\"stop_int\",\"stop_int_2\", \"Transfer_time_sec\", \"stop_id_raw\"]].sort_values([\"stop_int\", \"stop_int_2\", \"stop_id_raw\"]).drop_duplicates([\"stop_int\", \"stop_int_2\"])\n",
+    "transfer_pandas = transfer_pandas.reset_index(drop=True)\n",
+    "transfer_pandas.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfer_pandas.stop_int_2.nunique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/transfer_df.pkl','wb') as f: pickle.dump(transfers.sort_values(\"stop_id\"), f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfer_array = transfer_pandas[[\"stop_int_2\", \"Transfer_time_sec\"]].to_numpy()\n",
+    "transfer_array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/transfer_array.pkl','wb') as f: pickle.dump(transfer_array, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.size(transfer_array, 0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Check if indexes in stops is correct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see first the pointers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see that at the index 8 there should be a new stop. we check and it is false"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfer_pandas.loc[5:10].head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see that at index 4 we should have a new stop. we check and it true"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_routes.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stop_routes.loc[stop_routes['stop_int'] == 172]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "route_stops.loc[route_stops['stop_int'] == 172]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "read files as pickles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/stop_times_array.pkl','rb') as f: arrayname1 = pickle.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/routes_array.pkl','rb') as f: arrayname2 = pickle.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../data/route_stops_array.pkl','rb') as f: arrayname3 = pickle.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "arrayname1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "arrayname2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "arrayname3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/OLD_hdfs_data_processing_spark.ipynb b/notebooks/old_notebooks/OLD_hdfs_data_processing_spark.ipynb
similarity index 100%
rename from notebooks/OLD_hdfs_data_processing_spark.ipynb
rename to notebooks/old_notebooks/OLD_hdfs_data_processing_spark.ipynb
diff --git a/notebooks/old_notebooks/debugging.ipynb b/notebooks/old_notebooks/debugging.ipynb
new file mode 100644
index 0000000..9a66390
--- /dev/null
+++ b/notebooks/old_notebooks/debugging.ipynb
@@ -0,0 +1,1469 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def pkload(path):\n",
+    "    with open(path, 'rb') as f:\n",
+    "        obj = pickle.load(f)\n",
+    "    return obj"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Searching for journeys from Uster, Gschwader (stop 8588052) to Buchs ZH, Furttalstrasse (stop 8595356) with arrival at 17:30 leads to a footpath of over 3.2km + in reality, while the algorithm prints this:\n",
+    "\n",
+    "\" Walk 2.1 minutes from Birmensdorf ZH (stop 8502221)\n",
+    "        to Dällikon, Industrie (stop 8576276)\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_lat</th>\n",
+       "      <th>stop_lon</th>\n",
+       "      <th>trip_headsign</th>\n",
+       "      <th>trip_short_name</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>departure_first_stop</th>\n",
+       "      <th>route_int</th>\n",
+       "      <th>stop_count</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>27708</th>\n",
+       "      <td>197013</td>\n",
+       "      <td>26-5-A-j19-1</td>\n",
+       "      <td>8502221</td>\n",
+       "      <td>114.TA.26-5-A-j19-1.37.R</td>\n",
+       "      <td>8502221:0:2</td>\n",
+       "      <td>2020-05-24 19:35:00</td>\n",
+       "      <td>2020-05-24 19:35:00</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Birmensdorf ZH</td>\n",
+       "      <td>47.357496</td>\n",
+       "      <td>8.437543</td>\n",
+       "      <td>Pfäffikon SZ</td>\n",
+       "      <td>18575</td>\n",
+       "      <td>1</td>\n",
+       "      <td>19:22:00</td>\n",
+       "      <td>149</td>\n",
+       "      <td>10</td>\n",
+       "      <td>276</td>\n",
+       "      <td>S-Bahn</td>\n",
+       "      <td>180388626589</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        index      route_id  stop_id_general                   trip_id  \\\n",
+       "27708  197013  26-5-A-j19-1          8502221  114.TA.26-5-A-j19-1.37.R   \n",
+       "\n",
+       "           stop_id        arrival_time      departure_time  stop_sequence  \\\n",
+       "27708  8502221:0:2 2020-05-24 19:35:00 2020-05-24 19:35:00              9   \n",
+       "\n",
+       "            stop_name   stop_lat  stop_lon trip_headsign  trip_short_name  \\\n",
+       "27708  Birmensdorf ZH  47.357496  8.437543  Pfäffikon SZ            18575   \n",
+       "\n",
+       "       direction_id departure_first_stop  route_int  stop_count  stop_int  \\\n",
+       "27708             1             19:22:00        149          10       276   \n",
+       "\n",
+       "      route_desc  monotonically_increasing_id  \n",
+       "27708     S-Bahn                 180388626589  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Birmensdorf ZH\n",
+    "stop_times = pkload(\"../data/stop_times_df_cyril.pkl\")\n",
+    "stop_times[stop_times['stop_id_general']==8502221].head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_lat</th>\n",
+       "      <th>stop_lon</th>\n",
+       "      <th>trip_headsign</th>\n",
+       "      <th>trip_short_name</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>departure_first_stop</th>\n",
+       "      <th>route_int</th>\n",
+       "      <th>stop_count</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1935</th>\n",
+       "      <td>222352</td>\n",
+       "      <td>26-449-j19-1</td>\n",
+       "      <td>8576276</td>\n",
+       "      <td>17.TA.26-449-j19-1.1.H</td>\n",
+       "      <td>8576276</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>2020-05-24 07:00:00</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Dällikon, Industrie</td>\n",
+       "      <td>47.444737</td>\n",
+       "      <td>8.438783</td>\n",
+       "      <td>Buchs-Dällikon, Bahnhof</td>\n",
+       "      <td>4747</td>\n",
+       "      <td>0</td>\n",
+       "      <td>07:00:00</td>\n",
+       "      <td>19</td>\n",
+       "      <td>3</td>\n",
+       "      <td>474</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>8589935205</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       index      route_id  stop_id_general                 trip_id  stop_id  \\\n",
+       "1935  222352  26-449-j19-1          8576276  17.TA.26-449-j19-1.1.H  8576276   \n",
+       "\n",
+       "     arrival_time      departure_time  stop_sequence            stop_name  \\\n",
+       "1935          NaT 2020-05-24 07:00:00              2  Dällikon, Industrie   \n",
+       "\n",
+       "       stop_lat  stop_lon            trip_headsign  trip_short_name  \\\n",
+       "1935  47.444737  8.438783  Buchs-Dällikon, Bahnhof             4747   \n",
+       "\n",
+       "      direction_id departure_first_stop  route_int  stop_count  stop_int  \\\n",
+       "1935             0             07:00:00         19           3       474   \n",
+       "\n",
+       "     route_desc  monotonically_increasing_id  \n",
+       "1935        Bus                   8589935205  "
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Dallikon Industrie\n",
+    "stop_times[stop_times['stop_id_general']==8576276].head(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A google maps footpaths using GPS coordinates yields a walk of 14.1 km. Is there a footpath defined between those two stops in transfers ?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers = pkload(\"../data/transfer_df_cyril.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>stop_lat_first</th>\n",
+       "      <th>stop_lon_first</th>\n",
+       "      <th>stop_name_first</th>\n",
+       "      <th>stop_id_general_2</th>\n",
+       "      <th>stop_int_2</th>\n",
+       "      <th>stop_lat_first_2</th>\n",
+       "      <th>stop_lon_first_2</th>\n",
+       "      <th>stop_name_first_2</th>\n",
+       "      <th>distance</th>\n",
+       "      <th>walking_time</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1300</th>\n",
+       "      <td>3887</td>\n",
+       "      <td>8502221</td>\n",
+       "      <td>276</td>\n",
+       "      <td>47.357557</td>\n",
+       "      <td>8.437543</td>\n",
+       "      <td>Birmensdorf ZH</td>\n",
+       "      <td>8573718</td>\n",
+       "      <td>473</td>\n",
+       "      <td>47.357125</td>\n",
+       "      <td>8.438801</td>\n",
+       "      <td>Birmensdorf ZH, Bahnhof</td>\n",
+       "      <td>0.106219</td>\n",
+       "      <td>127</td>\n",
+       "      <td>352187318287</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1301</th>\n",
+       "      <td>3888</td>\n",
+       "      <td>8502221</td>\n",
+       "      <td>276</td>\n",
+       "      <td>47.357557</td>\n",
+       "      <td>8.437543</td>\n",
+       "      <td>Birmensdorf ZH</td>\n",
+       "      <td>8502950</td>\n",
+       "      <td>877</td>\n",
+       "      <td>47.353936</td>\n",
+       "      <td>8.437175</td>\n",
+       "      <td>Birmensdorf ZH, Zentrum</td>\n",
+       "      <td>0.403584</td>\n",
+       "      <td>484</td>\n",
+       "      <td>352187318288</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1302</th>\n",
+       "      <td>3889</td>\n",
+       "      <td>8502221</td>\n",
+       "      <td>276</td>\n",
+       "      <td>47.357557</td>\n",
+       "      <td>8.437543</td>\n",
+       "      <td>Birmensdorf ZH</td>\n",
+       "      <td>8583870</td>\n",
+       "      <td>1154</td>\n",
+       "      <td>47.357234</td>\n",
+       "      <td>8.437013</td>\n",
+       "      <td>Birmensdorf ZH, Bahnhof Süd</td>\n",
+       "      <td>0.053666</td>\n",
+       "      <td>64</td>\n",
+       "      <td>352187318289</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      index  stop_id_general  stop_int  stop_lat_first  stop_lon_first  \\\n",
+       "1300   3887          8502221       276       47.357557        8.437543   \n",
+       "1301   3888          8502221       276       47.357557        8.437543   \n",
+       "1302   3889          8502221       276       47.357557        8.437543   \n",
+       "\n",
+       "     stop_name_first  stop_id_general_2  stop_int_2  stop_lat_first_2  \\\n",
+       "1300  Birmensdorf ZH            8573718         473         47.357125   \n",
+       "1301  Birmensdorf ZH            8502950         877         47.353936   \n",
+       "1302  Birmensdorf ZH            8583870        1154         47.357234   \n",
+       "\n",
+       "      stop_lon_first_2            stop_name_first_2  distance  walking_time  \\\n",
+       "1300          8.438801      Birmensdorf ZH, Bahnhof  0.106219           127   \n",
+       "1301          8.437175      Birmensdorf ZH, Zentrum  0.403584           484   \n",
+       "1302          8.437013  Birmensdorf ZH, Bahnhof Süd  0.053666            64   \n",
+       "\n",
+       "      monotonically_increasing_id  \n",
+       "1300                 352187318287  \n",
+       "1301                 352187318288  \n",
+       "1302                 352187318289  "
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# transfers from Birmensdorf ZH\n",
+    "transfers[transfers['stop_id_general']==8502221]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Transfers from Dallikon Industrie"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>stop_lat_first</th>\n",
+       "      <th>stop_lon_first</th>\n",
+       "      <th>stop_name_first</th>\n",
+       "      <th>stop_id_general_2</th>\n",
+       "      <th>stop_int_2</th>\n",
+       "      <th>stop_lat_first_2</th>\n",
+       "      <th>stop_lon_first_2</th>\n",
+       "      <th>stop_name_first_2</th>\n",
+       "      <th>distance</th>\n",
+       "      <th>walking_time</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [index, stop_id_general, stop_int, stop_lat_first, stop_lon_first, stop_name_first, stop_id_general_2, stop_int_2, stop_lat_first_2, stop_lon_first_2, stop_name_first_2, distance, walking_time, monotonically_increasing_id]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfers[transfers['stop_id_general']==8576276]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are no footpaths between Birmensdorf ZH and Dallikon Industrie, and interestingly, Dallikon Industrie has no footpaths at all. That points to a bug with the gestion of 'None' pointers in the array stops for footpaths"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfer_array = pkload(\"../data/transfer_array_cyril.pkl\")\n",
+    "stops =  pkload(\"../data/stops_array_cyril.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[3075, 1297.0],\n",
+       "       [3119, 1300.0],\n",
+       "       [3131, 1303.0]], dtype=object)"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# transfers from Birmensdorf ZH\n",
+    "stop_int = 276\n",
+    "stops[stop_int-1:stop_int+2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 473,  127],\n",
+       "       [ 877,  484],\n",
+       "       [1154,   64]])"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfer_array[int(stops[stop_int][1]):int(stops[stop_int+1][1])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**The first entry of transfer_array[276] corresponding to Birmensdorf ZH is 473, which is one less than 474, the stop_id of Dallikon Industrie**. That may be a clue to what's going wrong with the algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[5408, 2157.0],\n",
+       "       [5417, nan],\n",
+       "       [5420, 2160.0]], dtype=object)"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# transfers from Dallikon Industrie\n",
+    "stop_int = 474\n",
+    "stops[stop_int-1:stop_int+2]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "cannot convert float NaN to integer",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-69-28a210e2fa85>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Trying to access the transfer for the nan pointer (SHOULD FAIL)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtransfer_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstops\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstops\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m: cannot convert float NaN to integer"
+     ]
+    }
+   ],
+   "source": [
+    "# Trying to access the transfer for the nan pointer (SHOULD FAIL)\n",
+    "transfer_array[int(stops[stop_int][1]):int(stops[stop_int+1][1])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# checking out what's around the nan pointer:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 276,  127],\n",
+       "       [ 877,  450],\n",
+       "       [1154,  162]])"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfer_array[int(stops[stop_int-1][1]):int(stops[stop_int+1][1])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The first entry of transfer_array[stop_int-1] is 276, the stop_int of Birmensdorf ZH."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## recapitulating the bug with another journey\n",
+    "\n",
+    "Searching for journeys from Dübendorf, Branzenäsch (stop 8590551) to Kloten, Weinbergstrasse (stop 8579967) with arrival at 17:30 gives an impossible first walk from Dübendorf, Branzenäsch to Uetliberg (stop 8503057)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_lat</th>\n",
+       "      <th>stop_lon</th>\n",
+       "      <th>trip_headsign</th>\n",
+       "      <th>trip_short_name</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>departure_first_stop</th>\n",
+       "      <th>route_int</th>\n",
+       "      <th>stop_count</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>95873</th>\n",
+       "      <td>110350</td>\n",
+       "      <td>26-752-j19-1</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>190.TA.26-752-j19-1.4.R</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>2020-05-24 07:16:00</td>\n",
+       "      <td>2020-05-24 07:16:00</td>\n",
+       "      <td>11</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Kunsteisbahn</td>\n",
+       "      <td>1420</td>\n",
+       "      <td>1</td>\n",
+       "      <td>07:06:00</td>\n",
+       "      <td>563</td>\n",
+       "      <td>15</td>\n",
+       "      <td>503</td>\n",
+       "      <td>Bus</td>\n",
+       "      <td>627065226053</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        index      route_id  stop_id_general                  trip_id  \\\n",
+       "95873  110350  26-752-j19-1          8590551  190.TA.26-752-j19-1.4.R   \n",
+       "\n",
+       "       stop_id        arrival_time      departure_time  stop_sequence  \\\n",
+       "95873  8590551 2020-05-24 07:16:00 2020-05-24 07:16:00             11   \n",
+       "\n",
+       "                    stop_name   stop_lat  stop_lon            trip_headsign  \\\n",
+       "95873  Dübendorf, Branzenäsch  47.394665  8.631157  Dübendorf, Kunsteisbahn   \n",
+       "\n",
+       "       trip_short_name  direction_id departure_first_stop  route_int  \\\n",
+       "95873             1420             1             07:06:00        563   \n",
+       "\n",
+       "       stop_count  stop_int route_desc  monotonically_increasing_id  \n",
+       "95873          15       503        Bus                 627065226053  "
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Dübendorf, Branzenäsch\n",
+    "stop_times[stop_times['stop_id_general']== 8590551].head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>route_id</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>trip_id</th>\n",
+       "      <th>stop_id</th>\n",
+       "      <th>arrival_time</th>\n",
+       "      <th>departure_time</th>\n",
+       "      <th>stop_sequence</th>\n",
+       "      <th>stop_name</th>\n",
+       "      <th>stop_lat</th>\n",
+       "      <th>stop_lon</th>\n",
+       "      <th>trip_headsign</th>\n",
+       "      <th>trip_short_name</th>\n",
+       "      <th>direction_id</th>\n",
+       "      <th>departure_first_stop</th>\n",
+       "      <th>route_int</th>\n",
+       "      <th>stop_count</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>route_desc</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>5304</th>\n",
+       "      <td>51749</td>\n",
+       "      <td>26-10-B-j19-1</td>\n",
+       "      <td>8503057</td>\n",
+       "      <td>181.TA.26-10-B-j19-1.9.H</td>\n",
+       "      <td>8503057</td>\n",
+       "      <td>2020-05-24 07:14:00</td>\n",
+       "      <td>NaT</td>\n",
+       "      <td>8</td>\n",
+       "      <td>Uetliberg</td>\n",
+       "      <td>47.352366</td>\n",
+       "      <td>8.487651</td>\n",
+       "      <td>Uetliberg</td>\n",
+       "      <td>23511</td>\n",
+       "      <td>0</td>\n",
+       "      <td>07:03:00</td>\n",
+       "      <td>62</td>\n",
+       "      <td>4</td>\n",
+       "      <td>415</td>\n",
+       "      <td>S-Bahn</td>\n",
+       "      <td>25769805095</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      index       route_id  stop_id_general                   trip_id  \\\n",
+       "5304  51749  26-10-B-j19-1          8503057  181.TA.26-10-B-j19-1.9.H   \n",
+       "\n",
+       "      stop_id        arrival_time departure_time  stop_sequence  stop_name  \\\n",
+       "5304  8503057 2020-05-24 07:14:00            NaT              8  Uetliberg   \n",
+       "\n",
+       "       stop_lat  stop_lon trip_headsign  trip_short_name  direction_id  \\\n",
+       "5304  47.352366  8.487651     Uetliberg            23511             0   \n",
+       "\n",
+       "     departure_first_stop  route_int  stop_count  stop_int route_desc  \\\n",
+       "5304             07:03:00         62           4       415     S-Bahn   \n",
+       "\n",
+       "      monotonically_increasing_id  \n",
+       "5304                  25769805095  "
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#  Uetliberg (stop 8503057)\n",
+    "stop_times[stop_times['stop_id_general']== 8503057].head(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A google maps search for footpaths between the coordinates yields a 14.5 km walk."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>stop_lat_first</th>\n",
+       "      <th>stop_lon_first</th>\n",
+       "      <th>stop_name_first</th>\n",
+       "      <th>stop_id_general_2</th>\n",
+       "      <th>stop_int_2</th>\n",
+       "      <th>stop_lat_first_2</th>\n",
+       "      <th>stop_lon_first_2</th>\n",
+       "      <th>stop_name_first_2</th>\n",
+       "      <th>distance</th>\n",
+       "      <th>walking_time</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2256</th>\n",
+       "      <td>2938</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>503</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>8590587</td>\n",
+       "      <td>414</td>\n",
+       "      <td>47.393053</td>\n",
+       "      <td>8.633367</td>\n",
+       "      <td>Dübendorf, Sonnenberg</td>\n",
+       "      <td>0.244501</td>\n",
+       "      <td>293</td>\n",
+       "      <td>618475290624</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2257</th>\n",
+       "      <td>2939</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>503</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>8590555</td>\n",
+       "      <td>599</td>\n",
+       "      <td>47.394877</td>\n",
+       "      <td>8.628714</td>\n",
+       "      <td>Dübendorf, Claridenstrasse</td>\n",
+       "      <td>0.185439</td>\n",
+       "      <td>222</td>\n",
+       "      <td>618475290625</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2258</th>\n",
+       "      <td>2940</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>503</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>8590581</td>\n",
+       "      <td>886</td>\n",
+       "      <td>47.391557</td>\n",
+       "      <td>8.634876</td>\n",
+       "      <td>Dübendorf, Raubbühl</td>\n",
+       "      <td>0.444715</td>\n",
+       "      <td>533</td>\n",
+       "      <td>618475290626</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2259</th>\n",
+       "      <td>2941</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>503</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>8590550</td>\n",
+       "      <td>931</td>\n",
+       "      <td>47.397048</td>\n",
+       "      <td>8.625614</td>\n",
+       "      <td>Dübendorf, Bettlistrasse</td>\n",
+       "      <td>0.494279</td>\n",
+       "      <td>593</td>\n",
+       "      <td>618475290627</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2260</th>\n",
+       "      <td>2942</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>503</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>8590590</td>\n",
+       "      <td>1226</td>\n",
+       "      <td>47.390535</td>\n",
+       "      <td>8.632469</td>\n",
+       "      <td>Dübendorf, Sunnhalde</td>\n",
+       "      <td>0.469644</td>\n",
+       "      <td>563</td>\n",
+       "      <td>618475290628</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2261</th>\n",
+       "      <td>2943</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>503</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>8590562</td>\n",
+       "      <td>1244</td>\n",
+       "      <td>47.396240</td>\n",
+       "      <td>8.629935</td>\n",
+       "      <td>Dübendorf, Glärnischstrasse</td>\n",
+       "      <td>0.197806</td>\n",
+       "      <td>237</td>\n",
+       "      <td>618475290629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2262</th>\n",
+       "      <td>2944</td>\n",
+       "      <td>8590551</td>\n",
+       "      <td>503</td>\n",
+       "      <td>47.394665</td>\n",
+       "      <td>8.631157</td>\n",
+       "      <td>Dübendorf, Branzenäsch</td>\n",
+       "      <td>8590549</td>\n",
+       "      <td>1392</td>\n",
+       "      <td>47.394817</td>\n",
+       "      <td>8.634580</td>\n",
+       "      <td>Dübendorf, Bauhof</td>\n",
+       "      <td>0.258181</td>\n",
+       "      <td>309</td>\n",
+       "      <td>618475290630</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      index  stop_id_general  stop_int  stop_lat_first  stop_lon_first  \\\n",
+       "2256   2938          8590551       503       47.394665        8.631157   \n",
+       "2257   2939          8590551       503       47.394665        8.631157   \n",
+       "2258   2940          8590551       503       47.394665        8.631157   \n",
+       "2259   2941          8590551       503       47.394665        8.631157   \n",
+       "2260   2942          8590551       503       47.394665        8.631157   \n",
+       "2261   2943          8590551       503       47.394665        8.631157   \n",
+       "2262   2944          8590551       503       47.394665        8.631157   \n",
+       "\n",
+       "             stop_name_first  stop_id_general_2  stop_int_2  stop_lat_first_2  \\\n",
+       "2256  Dübendorf, Branzenäsch            8590587         414         47.393053   \n",
+       "2257  Dübendorf, Branzenäsch            8590555         599         47.394877   \n",
+       "2258  Dübendorf, Branzenäsch            8590581         886         47.391557   \n",
+       "2259  Dübendorf, Branzenäsch            8590550         931         47.397048   \n",
+       "2260  Dübendorf, Branzenäsch            8590590        1226         47.390535   \n",
+       "2261  Dübendorf, Branzenäsch            8590562        1244         47.396240   \n",
+       "2262  Dübendorf, Branzenäsch            8590549        1392         47.394817   \n",
+       "\n",
+       "      stop_lon_first_2            stop_name_first_2  distance  walking_time  \\\n",
+       "2256          8.633367        Dübendorf, Sonnenberg  0.244501           293   \n",
+       "2257          8.628714   Dübendorf, Claridenstrasse  0.185439           222   \n",
+       "2258          8.634876          Dübendorf, Raubbühl  0.444715           533   \n",
+       "2259          8.625614     Dübendorf, Bettlistrasse  0.494279           593   \n",
+       "2260          8.632469         Dübendorf, Sunnhalde  0.469644           563   \n",
+       "2261          8.629935  Dübendorf, Glärnischstrasse  0.197806           237   \n",
+       "2262          8.634580            Dübendorf, Bauhof  0.258181           309   \n",
+       "\n",
+       "      monotonically_increasing_id  \n",
+       "2256                 618475290624  \n",
+       "2257                 618475290625  \n",
+       "2258                 618475290626  \n",
+       "2259                 618475290627  \n",
+       "2260                 618475290628  \n",
+       "2261                 618475290629  \n",
+       "2262                 618475290630  "
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# footpaths from Dübendorf, Branzenäsch\n",
+    "transfers[transfers['stop_id_general']== 8590551]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "No sign of Uetliberg here."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>stop_id_general</th>\n",
+       "      <th>stop_int</th>\n",
+       "      <th>stop_lat_first</th>\n",
+       "      <th>stop_lon_first</th>\n",
+       "      <th>stop_name_first</th>\n",
+       "      <th>stop_id_general_2</th>\n",
+       "      <th>stop_int_2</th>\n",
+       "      <th>stop_lat_first_2</th>\n",
+       "      <th>stop_lon_first_2</th>\n",
+       "      <th>stop_name_first_2</th>\n",
+       "      <th>distance</th>\n",
+       "      <th>walking_time</th>\n",
+       "      <th>monotonically_increasing_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [index, stop_id_general, stop_int, stop_lat_first, stop_lon_first, stop_name_first, stop_id_general_2, stop_int_2, stop_lat_first_2, stop_lon_first_2, stop_name_first_2, distance, walking_time, monotonically_increasing_id]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# transfers from Uetliberg (stop 8503057)\n",
+    "transfers[transfers['stop_id_general']== 8503057]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Again, utliberg has no footpaths, like Dallikon Industrie in the previous bug."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[5599, 2253.0],\n",
+       "       [5603, 2256.0],\n",
+       "       [5606, 2263.0]], dtype=object)"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# footpaths from Dübendorf, Branzenäsch\n",
+    "stop_int = 503\n",
+    "stops[stop_int-1:stop_int+2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 414,  293],\n",
+       "       [ 599,  222],\n",
+       "       [ 886,  533],\n",
+       "       [ 931,  593],\n",
+       "       [1226,  563],\n",
+       "       [1244,  237],\n",
+       "       [1392,  309]])"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfer_array[int(stops[stop_int][1]):int(stops[stop_int+1][1])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notice here again that the first transfer from Dübendorf, Branzenäsch goes to stop_int= 414, which is one less than the stop_int of Utliberg (415)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[4850, 1901.0],\n",
+       "       [4853, nan],\n",
+       "       [4860, 1907.0]], dtype=object)"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# footpahts from Utliberg:\n",
+    "stop_int = 415\n",
+    "stops[stop_int-1:stop_int+2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "cannot convert float NaN to integer",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-83-84fe17e7a4a7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# expected to fail: accessing a none pointer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtransfer_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstops\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstops\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m: cannot convert float NaN to integer"
+     ]
+    }
+   ],
+   "source": [
+    "# expected to fail: accessing a none pointer\n",
+    "transfer_array[int(stops[stop_int][1]):int(stops[stop_int+1][1])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "cannot convert float NaN to integer",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-84-e489285f87ed>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtransfer_array\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstops\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstops\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m: cannot convert float NaN to integer"
+     ]
+    }
+   ],
+   "source": [
+    "transfer_array[int(stops[stop_int][1]):int(stops[stop_int+1][1])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 503,  293],\n",
+       "       [ 599,  485],\n",
+       "       [ 886,  241],\n",
+       "       [1226,  345],\n",
+       "       [1244,  526],\n",
+       "       [1392,  259]])"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfer_array[int(stops[stop_int-1][1]):int(stops[stop_int+1][1])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Again, the first entry of transfer_array[stop_int-1] is 503, the stop_int of Dübendorf, Branzenäsch !"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Testing Felix's stop conversion:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([False])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.isnan(np.arange(1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 0 75]\n",
+      "[0 0]\n",
+      "(1407, 2)\n",
+      "[ 0  0  0 75]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[    0,    11,     0,     2],\n",
+       "       [   11,    20,     2,     7],\n",
+       "       [   20,    38,     7,    22],\n",
+       "       ...,\n",
+       "       [15303, 15334,  6242,  6250],\n",
+       "       [15334, 15339,  6250,  6257],\n",
+       "       [15339, 15344,  6257,  6264]], dtype=uint32)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stops = pkload(\"../data/stops_array_cyril.pkl\")\n",
+    "stopRoutes = pkload(\"../data/stop_routes_array_cyril.pkl\")\n",
+    "print(np.isnan(stops.astype(np.float64)).sum(axis=0))\n",
+    "print(np.equal(stops, None).sum(axis=0))\n",
+    "print(stops.shape)\n",
+    "stops = stops[:,[0,0,1,1]]\n",
+    "# Make column 1 contain the start_index of the next stop in stopRoutes\n",
+    "stops[:-1,1] = stops[1:,0]\n",
+    "stops[-1, 1] = stopRoutes.shape[0]\n",
+    "# Make column 3 contain the start_index of the next stop in stopRoutes\n",
+    "if np.isnan(stops[-1,2]).item():\n",
+    "    stops[-1,2] = transfers.shape[0]\n",
+    "for i in np.isnan(stops[:-1,2].astype(np.float64)).nonzero()[0][::-1]:\n",
+    "    stops[i,2] = stops[i+1,2]\n",
+    "print(np.isnan(stops.astype(np.float64)).sum(axis=0))\n",
+    "stops[:-1,3] = stops[1:,2]\n",
+    "stops[-1, 3] = transfers.shape[0]\n",
+    "# Convert to int\n",
+    "stops = stops.astype(np.uint32)\n",
+    "stops"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[5599, 5603, 2253, 2256],\n",
+       "       [5603, 5606, 2256, 2263],\n",
+       "       [5606, 5615, 2263, 2266]], dtype=uint32)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# footpaths from Dübendorf, Branzenäsch\n",
+    "stop_int = 503\n",
+    "stops[stop_int-1:stop_int+2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 414,  293],\n",
+       "       [ 599,  222],\n",
+       "       [ 886,  533],\n",
+       "       [ 931,  593],\n",
+       "       [1226,  563],\n",
+       "       [1244,  237],\n",
+       "       [1392,  309]])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transfer_array[stops[stop_int][2]:stops[stop_int][3]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[4850, 4853, 1901, 1907],\n",
+       "       [4853, 4860, 1907, 1907],\n",
+       "       [4860, 4885, 1907, 1914]], dtype=uint32)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# footpaths from utliberg\n",
+    "# footpahts from Utliberg:\n",
+    "stop_int = 415\n",
+    "stops[stop_int-1:stop_int+2]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There's the bug ! The pointers for transfer_array in the first entry shows 1901, 1901 when it should in fact be the second entry that shows 1901, 1901. The first entry should show (1901, 1907) (see cell nr 81)."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/old_notebooks/raptor_toy_example_2020_05_21.ipynb b/notebooks/old_notebooks/raptor_toy_example_2020_05_21.ipynb
new file mode 100644
index 0000000..0be9aca
--- /dev/null
+++ b/notebooks/old_notebooks/raptor_toy_example_2020_05_21.ipynb
@@ -0,0 +1,1916 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Coding a RAPTOR toy example\n",
+    "\n",
+    "## Goal\n",
+    "\n",
+    "Learn the RAPTOR algorithm by coding it for a toy example with the data structures advised in the paper. We code RAPTOR for a super simple toy example with only two routes and two trips each.\n",
+    "\n",
+    "## Toy example\n",
+    "- TODO updates:\n",
+    "    - additional route r2 that goes from A to E slowly\n",
+    "    - walking paths\n",
+    "![toy_example](img/RAPTOR_example.png) \n",
+    "\n",
+    "## Encoding the data structures\n",
+    "### General considerations\n",
+    "We adhere to the data structures proposed by Delling et al. These structures aim to minimize read times in memory by making use of consecutive in-memory adresses. Thus, structures with varying dimensions (e.g dataframes, python lists) are excluded. We illustrate the difficulty with an example. \n",
+    "\n",
+    "Each route has a potentially unique number of stops. Therefore, we cannot store stops in a 2D array of routes by stops, as the number of stops is not the same for each route. We adress this problem by storing stops consecutively by route, and keeping track of the index of the first stop for each route.\n",
+    "\n",
+    "This general strategy is applied to all the required data structures.\n",
+    "\n",
+    "### routes\n",
+    "The `routes` array will contain arrays `[n_trips, n_stops, pt_1st_stop, pt_1st_trip]` where all four values are `int`. To avoid overcomplicating things and try to mimic pointers in python, `pt_1st_stop` and `pt_1st_trip` contain integer indices."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "lines_to_next_cell": 0
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "routes = np.array([[2, 3, 0, 0], #r0\n",
+    "                   [2, 3, 3, 6], #r1\n",
+    "                  [2, 2, 6, 12]]) # r2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### routeStops\n",
+    "`routeStops` is an array that contains the ordered lists of stops for each route. `pt_1st_stop` in `routes` is required to get to the first stop of the route. is itself an array that contains the sequence of stops for route $r_i$."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "lines_to_next_cell": 0
+   },
+   "outputs": [],
+   "source": [
+    "routeStops = np.array([0, 1, 2, # A, B, C\n",
+    "                       3, 2, 4, # D, C, E\n",
+    "                      0, 4]) # A, E"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### stopTimes\n",
+    "\n",
+    "The i-th entry in the `stopTimes` array is itself an array which contains the arrival and departure time at a particular stop for a particular trip. `stopTimes` is sorted by routes, and then by trips. We retrieve the index of the first (earliest) trip of the route with the pointer `pt_1st_trip` stored in `routes`. We may use the built-in `numpy` [date and time data structures](https://blog.finxter.com/how-to-work-with-dates-and-times-in-python/). In short, declaring dates and times is done like this: `np.datetime64('YYYY-MM-DDThh:mm')`. Entries with a `NaT` arrival or departure times correspond to beginning and end of trips respectively.\n",
+    "\n",
+    "Note that trips are indexed implicitely in stopTimes, but we decided to change a little bit from the paper and index them according to their parent route instead of giving them an absolute index. It makes things a bit easier when coding the algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopTimes = np.array([\n",
+    "    # r0, t0\n",
+    "    [None, '2020-05-11T08:00'],\n",
+    "    ['2020-05-11T08:25', '2020-05-11T08:30'],\n",
+    "    ['2020-05-11T08:55', None],\n",
+    "\n",
+    "    # ro, t1\n",
+    "    [None, '2020-05-11T08:10'],\n",
+    "    ['2020-05-11T08:35', '2020-05-11T08:40'],\n",
+    "    ['2020-05-11T09:05', None],\n",
+    "    \n",
+    "    # r1, t0 \n",
+    "    [None, '2020-05-11T08:00'],\n",
+    "    ['2020-05-11T08:05', '2020-05-11T08:10'],\n",
+    "    ['2020-05-11T08:15', None],\n",
+    "\n",
+    "    # r1, t1\n",
+    "    [None, '2020-05-11T09:00'],\n",
+    "    ['2020-05-11T09:05', '2020-05-11T09:10'],\n",
+    "    ['2020-05-11T09:15', None],\n",
+    "    \n",
+    "    #r2, t0\n",
+    "    [None, '2020-05-11T08:20'],\n",
+    "    ['2020-05-11T09:20', None],\n",
+    "    \n",
+    "    #r2, t1\n",
+    "    [None, '2020-05-11T08:30'],\n",
+    "    ['2020-05-11T09:30', None]],\n",
+    "    dtype='datetime64')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`NaT` is the `None` equivalent for `numpy datetime64`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ True, False])"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.isnat(stopTimes[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### stopRoutes\n",
+    "\n",
+    "`stopRoutes` contains the routes associated with each stop. We need the pointer in `stops` to index `stopRoutes` correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopRoutes = np.array([0, 2, # A\n",
+    "                       0, # B\n",
+    "                       0,1, # C\n",
+    "                       1, # D\n",
+    "                       1, 2]) # E"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We should also build an array for transfer times (including walking times), but for now let's ignore this additional complexity. Finally, the i-th entry in the `stops` array points to the first entry in `stopRoutes` (and `transfers` when that will be tried) associated with stop $p_i$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = np.array([[0, None],# A\n",
+    "                 [2, None], # B\n",
+    "                 [3, None],# C\n",
+    "                 [5, None], # D\n",
+    "                 [6, None], # E\n",
+    "                 [len(stopRoutes), None]]) # fictive stop to account for length of E"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Coding the standard RAPTOR\n",
+    "\n",
+    "Below, we code RAPTOR as it is described in the paper, with all optimizations. That corresponds to the pseudocode block in the article. It solves the earliest arrival time problem: we enter an start stop, a target stop and a departure time and it finds the earliest arrival time in k rounds (i.e taking at most k transports). Note that walking between stops is not considered a transport."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p_s = 0 # start stop = A\n",
+    "p_t = 4 # target stop = E\n",
+    "tau_0 = np.datetime64('2020-05-11T08:05') # departure time 08:05\n",
+    "k_max = 10 # we set a maximum number of transports to pre-allocate memory for the numpy array tau_i"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def raptor_standard(p_s, p_t, tau_0, routes, routeStops, stopTimes, stopRoutes, stops,\n",
+    "                    k_max=10):\n",
+    "    \n",
+    "    #******************************************initialization******************************************\n",
+    "    n_stops = len(stops)-1 # to remove the fictive stop to account for all the routes belonging to the last stop\n",
+    "\n",
+    "    # earliest arrival time at each stop for each round.\n",
+    "    tau = np.full(shape=(k_max, n_stops), fill_value = np.datetime64('2100-01-01T00:00')) # 2100 instead of infinity # number of stops * max number of transports\n",
+    "\n",
+    "    # earliest arrival time at each stop, indep. of round\n",
+    "    tau_star = np.full(shape=n_stops, fill_value = np.datetime64('2100-01-01T00:00'))\n",
+    "\n",
+    "    # to backtrack the journey of TRANSPORTS once it is finished\n",
+    "    #[route, trip, boarding stop, exit stop]\n",
+    "    # we will keep [r, t, p_b, p_e, p_f1, pf2, t_w] i.e \n",
+    "            # [route, trip (offset by route, not absolute), boarding stop, exit stop, beginning stop of the walk, target stop of the walk, time walked]\n",
+    "    journey = np.full(shape=(k_max, n_stops, 7), fill_value = -1, dtype=int)\n",
+    "    \n",
+    "    marked = [p_s]\n",
+    "    q = []\n",
+    "    tau[0, p_s] = tau_0\n",
+    "    \n",
+    "    #Maybe TODO (but not in original raptor): footpaths from the departure stop\n",
+    "\n",
+    "    #****************************************** main loop******************************************\n",
+    "    for k in np.arange(1, k_max+1):\n",
+    "        print('\\n******************************STARTING round k={}******************************'.format(k))\n",
+    "        # accumulate routes serving marked stops from previous rounds\n",
+    "        q = []\n",
+    "        marked = list(set(marked)) # removing potential duplicate stops in marked due to walking paths\n",
+    "        print('Marked stops at the start of the round: {}'.format(marked))\n",
+    "        for p in marked:\n",
+    "            for r in stopRoutes[stops[p][0]:stops[p+1][0]]: # foreach route r serving p\n",
+    "                print('Route considered for the queue: ({0}, {1})'.format(r, p))\n",
+    "                inQueue = False\n",
+    "                for idx, (rPrime, pPrime) in enumerate(q): \n",
+    "                    # is there already another stop from the same route in q ?\n",
+    "                    if (rPrime == r): \n",
+    "                        # is there already a later stop from the same route in q ?\n",
+    "                        if(np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == pPrime)[0][0] >\\\n",
+    "                           np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p)[0][0]):\n",
+    "                            #in that case, replace the later stop pPrime by stop p in q\n",
+    "                            q[idx] = (r, p)\n",
+    "                            inQueue = True\n",
+    "                            # is there already an earlier stop from the same route in q ?\n",
+    "                        else:\n",
+    "                            # in that case, do not add p to the q.\n",
+    "                            inQueue=True\n",
+    "                if not inQueue:\n",
+    "                    q.append((r, p))\n",
+    "\n",
+    "        marked = [] # unmarking all stops\n",
+    "\n",
+    "        print('Queue before traversing each route: {}'.format(q))\n",
+    "        # traverse each route\n",
+    "        for (r, p) in q:\n",
+    "            print('\\n****TRAVERSING ROUTE r={0} from stop p={1}****'.format(r, p))\n",
+    "            # t is the t-th trip in route r, not the t-th trip in all trips. This makes things easier\n",
+    "            t = None\n",
+    "            # we will keep [r, t, p_b, p_e, p_f, t_w] i.e \n",
+    "            # [route, trip (offset by route, not absolute), boarding stop, exit stop, target stop of the walk, time walked]\n",
+    "            t_journey = np.empty(4, dtype=int)# contains tripID, board and exit stops to backtrack the journey\n",
+    "\n",
+    "\n",
+    "            # we only traverse the route starting at p, not from the beginning of the route\n",
+    "            for p_i in routeStops[routes[r][2]+np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p)[0][0]:\\\n",
+    "                                   routes[r][2]+routes[r][1]]:\n",
+    "                print(\"p_i: {}\".format(p_i))\n",
+    "\n",
+    "                if (t is not None):\n",
+    "                    # 1st trip of route + \n",
+    "                    # offset for the right trip + \n",
+    "                    # offset for the right stop\n",
+    "                    arr_t_p_i = stopTimes[routes[r][3] + \\\n",
+    "                              t * routes[r][1] + \\\n",
+    "                              np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i)[0][0]][0]\n",
+    "                    print(\"arr_t_p_i: {}\".format(arr_t_p_i))\n",
+    "\n",
+    "                    if arr_t_p_i < min(tau_star[p_i], tau_star[p_t]):\n",
+    "                        tau[k][p_i] = arr_t_p_i\n",
+    "                        tau_star[p_i] = arr_t_p_i\n",
+    "                        marked.append(p_i)\n",
+    "                        # keep a trace that we went down the trip taken before at this stop\n",
+    "                        t_journey[3] = p_i\n",
+    "                        journey[k][p_i][0:4] = t_journey\n",
+    "                # Can we catch an earlier trip at p_i ?\n",
+    "                print('\\n----scanning departure times for route r={0} at stop p_i={1}----'.format(r, p_i))\n",
+    "                t_r = 0\n",
+    "                while True:\n",
+    "                    t_r_dep = stopTimes[routes[r][3]+\\\n",
+    "                         # offset corresponding to stop p_i in route r\n",
+    "                         np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i)[0][0] + \\\n",
+    "                         routes[r][1]*t_r][1]\n",
+    "\n",
+    "                    print(\"Earliest arrival time at previous step: tau[k-1][p_i]: {}\".format(tau[k-1][p_i]))\n",
+    "                    print(\"Departure time considered: t_r_dep: {}\".format(t_r_dep))\n",
+    "                    # We hop on the first trip that departs later than our arrival time at p_i in k-1 transports\n",
+    "                    if t_r_dep > tau[k-1][p_i]:\n",
+    "                        t = t_r\n",
+    "                        print('\\n!!!!Hopped on route r={0}, trip t={1} at stop p_i={2}!!!!'.format(r, t, p_i))\n",
+    "\n",
+    "                        # here we probably need to save the trip and boarding stop (boarding time will not be useful)\n",
+    "                        t_journey[0] = r\n",
+    "                        t_journey[1] = t\n",
+    "                        t_journey[2] = p_i\n",
+    "                        break\n",
+    "                    t_r += 1\n",
+    "\n",
+    "                    # we could not hop on any trip at this stop\n",
+    "                    if t_r == routes[r][0]:\n",
+    "                        break\n",
+    "                        \n",
+    "        print('\\n****FOOTPATHS****')\n",
+    "        \n",
+    "        marked_footpaths = [] # storing marked stops for footpaths in a separate list to avoid inifinite loops\n",
+    "        for p in marked:\n",
+    "            if stops[p][1] is not None:\n",
+    "                print('checking walking paths from stop {}'.format(p))\n",
+    "                # making sure there are footpaths for that stop\n",
+    "                # finding the next stop where there are footpaths to find the next index\n",
+    "                next_stop = p\n",
+    "                next_stop_found = False\n",
+    "                while next_stop < len(stops)-1: #carefully check that's the correct version\n",
+    "                    next_stop = next_stop+1\n",
+    "                    if stops[next_stop][1] is not None:\n",
+    "                        next_stop_found = True\n",
+    "                        break\n",
+    "                \n",
+    "                # reinitializing next_stop to p in case no next stop with not 'None' stops[p][1] is found\n",
+    "                if not next_stop_found:\n",
+    "                    next_stop = p+1 # this works because transfers[p:None] is equivalent to transfers[p:]\n",
+    "                    \n",
+    "                        \n",
+    "                for f in transfers[stops[p][1]:stops[next_stop][1]]:\n",
+    "                    print(\"Considering footpaths from {} to {}\".format(p, f[0]))\n",
+    "                    \n",
+    "                    # we only consider footpaths if they strictly ameliorate the arrival time at the arrival stop of the path.\n",
+    "                    if(tau[k][p]+np.timedelta64(f[1], 's') < min(tau_star[f[0]], tau_star[p_t])): \n",
+    "                        print(\"Walking to {} is faster !\".format(f[0]))\n",
+    "                        tau[k][f[0]] = tau[k][p]+np.timedelta64(f[1], 's')\n",
+    "                        tau_star[f[0]] = tau[k][p]+np.timedelta64(f[1], 's')\n",
+    "                        marked_footpaths.append(f[0])\n",
+    "                        \n",
+    "                        # keeping tracks of footpaths to backtrack the journey:\n",
+    "                        # [departure stop, arrival stop, walking time]\n",
+    "                        journey[k][f[0]][4:7] = [p, f[0], f[1]]\n",
+    "        \n",
+    "        marked.extend(marked_footpaths) # to avoid infinite loops if marked gets appended dynamically\n",
+    "        # stopping criterion: no stops were marked\n",
+    "        if not marked:\n",
+    "            break\n",
+    "    return(tau, tau_star, k, journey)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "******************************STARTING round k=1******************************\n",
+      "Marked stops at the start of the round: [0]\n",
+      "Route considered for the queue: (0, 0)\n",
+      "Route considered for the queue: (2, 0)\n",
+      "Queue before traversing each route: [(0, 0), (2, 0)]\n",
+      "\n",
+      "****TRAVERSING ROUTE r=0 from stop p=0****\n",
+      "p_i: 0\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=0----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:00\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:10\n",
+      "\n",
+      "!!!!Hopped on route r=0, trip t=1 at stop p_i=0!!!!\n",
+      "p_i: 1\n",
+      "arr_t_p_i: 2020-05-11T08:35\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=1----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:30\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:40\n",
+      "p_i: 2\n",
+      "arr_t_p_i: 2020-05-11T09:05\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=2 from stop p=0****\n",
+      "p_i: 0\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=0----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:20\n",
+      "\n",
+      "!!!!Hopped on route r=2, trip t=0 at stop p_i=0!!!!\n",
+      "p_i: 4\n",
+      "arr_t_p_i: 2020-05-11T09:20\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****FOOTPATHS****\n",
+      "\n",
+      "******************************STARTING round k=2******************************\n",
+      "Marked stops at the start of the round: [1, 2, 4]\n",
+      "Route considered for the queue: (0, 1)\n",
+      "Route considered for the queue: (0, 2)\n",
+      "Route considered for the queue: (1, 2)\n",
+      "Route considered for the queue: (1, 4)\n",
+      "Route considered for the queue: (2, 4)\n",
+      "Queue before traversing each route: [(0, 1), (1, 2), (2, 4)]\n",
+      "\n",
+      "****TRAVERSING ROUTE r=0 from stop p=1****\n",
+      "p_i: 1\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=1----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:35\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:30\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:35\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:40\n",
+      "\n",
+      "!!!!Hopped on route r=0, trip t=1 at stop p_i=1!!!!\n",
+      "p_i: 2\n",
+      "arr_t_p_i: 2020-05-11T09:05\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=1 from stop p=2****\n",
+      "p_i: 2\n",
+      "\n",
+      "----scanning departure times for route r=1 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:10\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T09:10\n",
+      "\n",
+      "!!!!Hopped on route r=1, trip t=1 at stop p_i=2!!!!\n",
+      "p_i: 4\n",
+      "arr_t_p_i: 2020-05-11T09:15\n",
+      "\n",
+      "----scanning departure times for route r=1 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=2 from stop p=4****\n",
+      "p_i: 4\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****FOOTPATHS****\n",
+      "\n",
+      "******************************STARTING round k=3******************************\n",
+      "Marked stops at the start of the round: [4]\n",
+      "Route considered for the queue: (1, 4)\n",
+      "Route considered for the queue: (2, 4)\n",
+      "Queue before traversing each route: [(1, 4), (2, 4)]\n",
+      "\n",
+      "****TRAVERSING ROUTE r=1 from stop p=4****\n",
+      "p_i: 4\n",
+      "\n",
+      "----scanning departure times for route r=1 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:15\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:15\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=2 from stop p=4****\n",
+      "p_i: 4\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:15\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:15\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****FOOTPATHS****\n"
+     ]
+    }
+   ],
+   "source": [
+    "tau, tau_star, k, journey = raptor_standard(p_s, p_t, tau_0, \n",
+    "                                            routes = routes, routeStops = routeStops, stopTimes = stopTimes, stopRoutes = stopRoutes, stops = stops)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['2100-01-01T00:00', '2020-05-11T08:35', '2020-05-11T09:05',\n",
+       "       '2100-01-01T00:00', '2020-05-11T09:15'], dtype='datetime64[m]')"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tau_star"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([['2020-05-11T08:05', '2100-01-01T00:00', '2100-01-01T00:00',\n",
+       "        '2100-01-01T00:00', '2100-01-01T00:00'],\n",
+       "       ['2100-01-01T00:00', '2020-05-11T08:35', '2020-05-11T09:05',\n",
+       "        '2100-01-01T00:00', '2020-05-11T09:20'],\n",
+       "       ['2100-01-01T00:00', '2100-01-01T00:00', '2100-01-01T00:00',\n",
+       "        '2100-01-01T00:00', '2020-05-11T09:15'],\n",
+       "       ['2100-01-01T00:00', '2100-01-01T00:00', '2100-01-01T00:00',\n",
+       "        '2100-01-01T00:00', '2100-01-01T00:00']], dtype='datetime64[m]')"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "k_last = k\n",
+    "\n",
+    "tau[0:k_last+1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`journey` contains all the necessary information to backtrack from the solution to the actual journey in terms of sequence of transports.\n",
+    "\n",
+    "`journey` has dimensions `k` by `n_stops` by 4+3.\n",
+    "- The 4 first values store the route and trip taken, the departure and arrival stops.\n",
+    "- The 3 last values are used by footpaths. They contain the departure stop for the walk, the arrival stop for the walk and the walking time in seconds.\n",
+    "\n",
+    "When we hop on a trip, we store the trip (with the route) and the boarding and exit stops as the array `t_journey`: `(r, t, p_boarding, p_exit)`. At each stop `p_i` where we ameliorate the arrival time in round `k`, we store `t_journey` in the first 4 cells of `journey[k][p_i]`. `p_i` corresponds to the exit stop when backtracking.\n",
+    "\n",
+    "When walking to stop `p_i` is shorter, we store the departure, arrival stops and walking time in the 3 last cells of `journey[k][p_i]`.\n",
+    "\n",
+    "The end result is a `journey` array which contains -1 values in all seven cells in `journey[k][p_i]` if the arrival time at `p_i` was not ameliorated at step `k`. `journey[k][p_i]` where there are values other than -1 indicate that the arrival time was ameliorated either by walking or by taking a transport. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[[-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1]],\n",
+       "\n",
+       "       [[-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [ 0,  1,  0,  1, -1, -1, -1],\n",
+       "        [ 0,  1,  0,  2, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [ 2,  0,  0,  4, -1, -1, -1]],\n",
+       "\n",
+       "       [[-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [-1, -1, -1, -1, -1, -1, -1],\n",
+       "        [ 1,  1,  2,  4, -1, -1, -1]]])"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "journey[0:k_last]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Backtracking\n",
+    "\n",
+    "We reconstruct the actual journey from the `journey` array by backtracking from the arrival stop to the departure stop. At each round k where we notice that the arrival time for the target stop was ameliorated, we start a new leg corresponding to a journey reaching the target stop in k transports.\n",
+    "\n",
+    "When backtracking without footpaths, it is sufficient at each round k to check at which stop the trip at round k-1 began. \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def backtrack_journey(k_last, p_t, journey):\n",
+    "    # journey_act = actual journey, will contain the sequence of transports in the correct order\n",
+    "    journey_act = [[] for k in range(0, k_last)] # there's maximum k routes to get to the final stop\n",
+    "    p_board = p_t\n",
+    "    n_legs = 1 # each leg is a journey to from the departure stop to the target stop in exactly k transports\n",
+    "    journey_found = False\n",
+    "\n",
+    "    # iterating backwards in rounds from k_last -1 to 1\n",
+    "    for k in range(k_last-1, 0, -1): # second argument in range is not included in the boundaries\n",
+    "        # Was the tarrival time at the target stop ameliorated at round k ? \n",
+    "        if np.any(journey[k][p_t]!=np.array([-1, -1, -1, -1, -1, -1, -1])):\n",
+    "\n",
+    "            # starting a new leg in the list of actual journeys\n",
+    "            journey_found = True\n",
+    "            # iterating from k to 0 to reconstruct the actual journey in k transports\n",
+    "            p_board = p_t\n",
+    "            for k_prime in range(k, 0, -1):\n",
+    "\n",
+    "                # did we get to that stop by walking ?\n",
+    "                if journey[k_prime][p_board][5] !=-1:\n",
+    "\n",
+    "                    # we keep track of the stop to which we walked to as well as the departure stop of the walk\n",
+    "                    stop_walk_dep = journey[k_prime][p_board][4]\n",
+    "                    journey_act[k].append([journey[k_prime][stop_walk_dep], journey[k_prime][p_board]])\n",
+    "                    p_board = journey[k_prime][stop_walk_dep][2]\n",
+    "\n",
+    "                # we did not get to that stop by walking\n",
+    "                else:\n",
+    "\n",
+    "                    journey_act[k].append(journey[k_prime][p_board])\n",
+    "                    p_board = journey[k_prime][p_board][2]\n",
+    "\n",
+    "    # reversing the order of journey_act to get journeys from the start stop to the target stop\n",
+    "    journey_act = [j[::-1] for j in journey_act]\n",
+    "\n",
+    "    # building a human readable output for the trip:\n",
+    "    for k, j in enumerate(journey_act):\n",
+    "\n",
+    "        if j: # going only through non-empty journeys\n",
+    "            print('******************JOURNEY IN {} TRIPS******************'.format(k))\n",
+    "            print('raw representation of the journey in {} trips: {}'.format(k, j))\n",
+    "\n",
+    "            for k_prime, t in enumerate(j):\n",
+    "                # We did not walk at step k\n",
+    "                if len(t) !=2:\n",
+    "                    p_boarding = t[2]\n",
+    "                    p_exit = t[3]\n",
+    "                    r_k = t[0]\n",
+    "                    time_boarding = stopTimes[routes[r_k][3] + \\\n",
+    "                                              np.where(routeStops[routes[r_k][2]:routes[r_k][2]+routes[r_k][1]] == p_boarding)[0][0] + \\\n",
+    "                                              t[1]*routes[r_k][1]][1]\n",
+    "                    time_exit = stopTimes[routes[r_k][3] + \\\n",
+    "                                              np.where(routeStops[routes[r_k][2]:routes[r_k][2]+routes[r_k][1]] == p_exit)[0][0] + \\\n",
+    "                                              t[1]*routes[r_k][1]][0]\n",
+    "                    print(\"At stop {}, take route {} leaving at time {} \\n...\".format(p_boarding, r_k, time_boarding))\n",
+    "\n",
+    "                    print(\" and exit at stop {} at time {}\".format(p_exit, time_exit))\n",
+    "\n",
+    "                # We walked at step k\n",
+    "                elif len(t)==2:\n",
+    "                    print(t)\n",
+    "                    p_boarding = t[0][2]\n",
+    "                    p_exit = t[0][3]\n",
+    "                    r_k = t[0][0]\n",
+    "                    time_boarding = stopTimes[routes[r_k][3] + \\\n",
+    "                                              np.where(routeStops[routes[r_k][2]:routes[r_k][2]+routes[r_k][1]] == p_boarding)[0][0] + \\\n",
+    "                                              t[0][1]*routes[r_k][1]][1]\n",
+    "                    time_exit = stopTimes[routes[r_k][3] + \\\n",
+    "                                              np.where(routeStops[routes[r_k][2]:routes[r_k][2]+routes[r_k][1]] == p_exit)[0][0] + \\\n",
+    "                                              t[0][1]*routes[r_k][1]][0]\n",
+    "                    p_start_walk = t[1][4]\n",
+    "                    p_end_walk = t[1][5]\n",
+    "                    walk_duration = t[1][6]/60\n",
+    "\n",
+    "                    print(\"At stop {}, take route {} leaving at time {} \\n...\".format(p_boarding, r_k, time_boarding))\n",
+    "\n",
+    "                    print(\"... exit at stop {} at time {}... \".format(p_exit, time_exit))\n",
+    "\n",
+    "                    print(\"and walk for {} minutes from stop {} to stop {}.\".format(walk_duration, p_start_walk, p_end_walk))\n",
+    "    \n",
+    "    if not journey_found:\n",
+    "        print('No journey was found for this query')\n",
+    "    return journey_found        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "******************JOURNEY IN 1 TRIPS******************\n",
+      "raw representation of the journey in 1 trips: [array([ 2,  0,  0,  4, -1, -1, -1])]\n",
+      "At stop 0, take route 2 leaving at time 2020-05-11T08:20 \n",
+      "...\n",
+      " and exit at stop 4 at time 2020-05-11T09:20\n",
+      "******************JOURNEY IN 2 TRIPS******************\n",
+      "raw representation of the journey in 2 trips: [array([ 0,  1,  0,  2, -1, -1, -1]), array([ 1,  1,  2,  4, -1, -1, -1])]\n",
+      "At stop 0, take route 0 leaving at time 2020-05-11T08:10 \n",
+      "...\n",
+      " and exit at stop 2 at time 2020-05-11T09:05\n",
+      "At stop 2, take route 1 leaving at time 2020-05-11T09:10 \n",
+      "...\n",
+      " and exit at stop 4 at time 2020-05-11T09:15\n"
+     ]
+    }
+   ],
+   "source": [
+    "backtrack_journey(k_last, p_t, journey);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Let's add footpaths\n",
+    "\n",
+    "For now, we have not tried including footpaths in our dataset. Below, we modify the timetable by adding a new route r3, which links a new stop F to E in a single travel. F may be reached in a very long time from A, but in a short time from B, meaning that it should become shorter to:\n",
+    "\n",
+    "- Take a trip from A to B\n",
+    "- Walk from B to F\n",
+    "- Take a trip from F to E\n",
+    "\n",
+    "rather than the current best trip:\n",
+    "- Take a trip from A to C\n",
+    "- Take a trip from C to E\n",
+    "\n",
+    "\n",
+    "Note that the single transport solution:\n",
+    "- Take a trip from A to E\n",
+    "\n",
+    "should still appear as the optimal solution for k = 1, i.e one transport is taken."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routes = np.array([[2, 3, 0, 0], #r0\n",
+    "                   [2, 3, 3, 6], #r1\n",
+    "                  [2, 2, 6, 12], #r2\n",
+    "                 [2, 2, 8, 16]]) # r3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routeStops = np.array([0, 1, 2, # A, B, C\n",
+    "                       3, 2, 4, # D, C, E\n",
+    "                      0, 4, # A, E\n",
+    "                      5, 4]) #F, E"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopTimes = np.array([\n",
+    "    # r0, t0\n",
+    "    [None, '2020-05-11T08:00'],\n",
+    "    ['2020-05-11T08:25', '2020-05-11T08:30'],\n",
+    "    ['2020-05-11T08:55', None],\n",
+    "\n",
+    "    # ro, t1\n",
+    "    [None, '2020-05-11T08:10'],\n",
+    "    ['2020-05-11T08:35', '2020-05-11T08:40'],\n",
+    "    ['2020-05-11T09:05', None],\n",
+    "    \n",
+    "    # r1, t0 \n",
+    "    [None, '2020-05-11T08:00'],\n",
+    "    ['2020-05-11T08:05', '2020-05-11T08:10'],\n",
+    "    ['2020-05-11T08:15', None],\n",
+    "\n",
+    "    # r1, t1\n",
+    "    [None, '2020-05-11T09:00'],\n",
+    "    ['2020-05-11T09:05', '2020-05-11T09:10'],\n",
+    "    ['2020-05-11T09:15', None],\n",
+    "    \n",
+    "    #r2, t0\n",
+    "    [None, '2020-05-11T08:20'],\n",
+    "    ['2020-05-11T09:20', None],\n",
+    "    \n",
+    "    #r2, t1\n",
+    "    [None, '2020-05-11T08:30'],\n",
+    "    ['2020-05-11T09:30', None],\n",
+    "    \n",
+    "    #r3, t0\n",
+    "    [None, '2020-05-11T08:05'],\n",
+    "    ['2020-05-11T08:25', None],\n",
+    "\n",
+    "    #r3, t1\n",
+    "    [None, '2020-05-11T08:45'],\n",
+    "    ['2020-05-11T09:05', None]],\n",
+    "    dtype='datetime64')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopRoutes = np.array([0, 2, # A\n",
+    "                       0, # B\n",
+    "                       0,1, # C\n",
+    "                       1, # D\n",
+    "                       1, 2, 3, # E\n",
+    "                       3]) # F"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## transfers\n",
+    "The `transfers` is a 2D `np.ndarray` where each entry `[p_j, time]` represents the time it takes to reach p_j from stop p_i. The correspondance between the indexing of `transfers` and p_i is done via `stops[p_i][1]`, i.e the first entry in `transfers` containing a connection from stop p_i.\n",
+    "\n",
+    "As we cannot store different data types in numpy arras, `time` will have to be converted to `np.timedelta64`, the format used to make differences between `np.datetime.64` variables. We will consider all `time` values as **positive values in seconds**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.timedelta64(-30,'m')"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stopTimes[0][1] - stopTimes[1][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.timedelta64(30,'s')"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.timedelta64(30, 's')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transfers = np.array([[5, 3600], # A -> F\n",
+    "                      [5, 300], # B -> F\n",
+    "                      [0, 3600], # F -> A\n",
+    "                      [1, 300] # F -> A\n",
+    "                     ])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = np.array([[0, 0],# A\n",
+    "                 [2, 1], # B\n",
+    "                 [3, None],# C\n",
+    "                 [5, None], # D\n",
+    "                 [6, None], # E\n",
+    "                  [9, 2], # F\n",
+    "                 [len(stopRoutes), None]]) # fictive stop to account for length of E"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "******************************STARTING round k=1******************************\n",
+      "Marked stops at the start of the round: [0]\n",
+      "Route considered for the queue: (0, 0)\n",
+      "Route considered for the queue: (2, 0)\n",
+      "Queue before traversing each route: [(0, 0), (2, 0)]\n",
+      "\n",
+      "****TRAVERSING ROUTE r=0 from stop p=0****\n",
+      "p_i: 0\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=0----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:00\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:10\n",
+      "\n",
+      "!!!!Hopped on route r=0, trip t=1 at stop p_i=0!!!!\n",
+      "p_i: 1\n",
+      "arr_t_p_i: 2020-05-11T08:35\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=1----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:30\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:40\n",
+      "p_i: 2\n",
+      "arr_t_p_i: 2020-05-11T09:05\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=2 from stop p=0****\n",
+      "p_i: 0\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=0----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:20\n",
+      "\n",
+      "!!!!Hopped on route r=2, trip t=0 at stop p_i=0!!!!\n",
+      "p_i: 4\n",
+      "arr_t_p_i: 2020-05-11T09:20\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****FOOTPATHS****\n",
+      "checking walking paths from stop 1\n",
+      "Considering footpaths from 1 to 5\n",
+      "Walking to 5 is faster !\n",
+      "\n",
+      "******************************STARTING round k=2******************************\n",
+      "Marked stops at the start of the round: [1, 2, 4, 5]\n",
+      "Route considered for the queue: (0, 1)\n",
+      "Route considered for the queue: (0, 2)\n",
+      "Route considered for the queue: (1, 2)\n",
+      "Route considered for the queue: (1, 4)\n",
+      "Route considered for the queue: (2, 4)\n",
+      "Route considered for the queue: (3, 4)\n",
+      "Route considered for the queue: (3, 5)\n",
+      "Queue before traversing each route: [(0, 1), (1, 2), (2, 4), (3, 5)]\n",
+      "\n",
+      "****TRAVERSING ROUTE r=0 from stop p=1****\n",
+      "p_i: 1\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=1----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:35\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:30\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:35\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:40\n",
+      "\n",
+      "!!!!Hopped on route r=0, trip t=1 at stop p_i=1!!!!\n",
+      "p_i: 2\n",
+      "arr_t_p_i: 2020-05-11T09:05\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=1 from stop p=2****\n",
+      "p_i: 2\n",
+      "\n",
+      "----scanning departure times for route r=1 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:10\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: 2020-05-11T09:10\n",
+      "\n",
+      "!!!!Hopped on route r=1, trip t=1 at stop p_i=2!!!!\n",
+      "p_i: 4\n",
+      "arr_t_p_i: 2020-05-11T09:15\n",
+      "\n",
+      "----scanning departure times for route r=1 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=2 from stop p=4****\n",
+      "p_i: 4\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=3 from stop p=5****\n",
+      "p_i: 5\n",
+      "\n",
+      "----scanning departure times for route r=3 at stop p_i=5----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:40\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:05\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T08:40\n",
+      "Departure time considered: t_r_dep: 2020-05-11T08:45\n",
+      "\n",
+      "!!!!Hopped on route r=3, trip t=1 at stop p_i=5!!!!\n",
+      "p_i: 4\n",
+      "arr_t_p_i: 2020-05-11T09:05\n",
+      "\n",
+      "----scanning departure times for route r=3 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:20\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****FOOTPATHS****\n",
+      "\n",
+      "******************************STARTING round k=3******************************\n",
+      "Marked stops at the start of the round: [4]\n",
+      "Route considered for the queue: (1, 4)\n",
+      "Route considered for the queue: (2, 4)\n",
+      "Route considered for the queue: (3, 4)\n",
+      "Queue before traversing each route: [(1, 4), (2, 4), (3, 4)]\n",
+      "\n",
+      "****TRAVERSING ROUTE r=1 from stop p=4****\n",
+      "p_i: 4\n",
+      "\n",
+      "----scanning departure times for route r=1 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=2 from stop p=4****\n",
+      "p_i: 4\n",
+      "\n",
+      "----scanning departure times for route r=2 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=3 from stop p=4****\n",
+      "p_i: 4\n",
+      "\n",
+      "----scanning departure times for route r=3 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-11T09:05\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****FOOTPATHS****\n"
+     ]
+    }
+   ],
+   "source": [
+    "tau, tau_star, k, journey = raptor_standard(p_s, p_t, tau_0, \n",
+    "                                            routes = routes, routeStops = routeStops, stopTimes = stopTimes, stopRoutes = stopRoutes, stops = stops)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([['2020-05-11T08:05', '2100-01-01T00:00', '2100-01-01T00:00',\n",
+       "        '2100-01-01T00:00', '2100-01-01T00:00', '2100-01-01T00:00'],\n",
+       "       ['2100-01-01T00:00', '2020-05-11T08:35', '2020-05-11T09:05',\n",
+       "        '2100-01-01T00:00', '2020-05-11T09:20', '2020-05-11T08:40'],\n",
+       "       ['2100-01-01T00:00', '2100-01-01T00:00', '2100-01-01T00:00',\n",
+       "        '2100-01-01T00:00', '2020-05-11T09:05', '2100-01-01T00:00'],\n",
+       "       ['2100-01-01T00:00', '2100-01-01T00:00', '2100-01-01T00:00',\n",
+       "        '2100-01-01T00:00', '2100-01-01T00:00', '2100-01-01T00:00']],\n",
+       "      dtype='datetime64[m]')"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "k_last = k\n",
+    "tau[0:k_last+1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['2100-01-01T00:00', '2020-05-11T08:35', '2020-05-11T09:05',\n",
+       "       '2100-01-01T00:00', '2020-05-11T09:05', '2020-05-11T08:40'],\n",
+       "      dtype='datetime64[m]')"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tau_star"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[[ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1]],\n",
+       "\n",
+       "       [[ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [  0,   1,   0,   1,  -1,  -1,  -1],\n",
+       "        [  0,   1,   0,   2,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [  2,   0,   0,   4,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,   1,   5, 300]],\n",
+       "\n",
+       "       [[ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1],\n",
+       "        [  3,   1,   5,   4,  -1,  -1,  -1],\n",
+       "        [ -1,  -1,  -1,  -1,  -1,  -1,  -1]]])"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "journey[0:k_last]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Backtracking with footpaths\n",
+    "\n",
+    "When backtracking, with footpaths, we first look through the footpaths to backtrack to the departure stop for the walk, and then use the departure stop of the walk as an arrival stop for a transport.\n",
+    "\n",
+    "But with footpaths added, it is possible to reach a stop C from stop A by:\n",
+    "- first taking a transport to a stop B\n",
+    "- walking from stop B to stop C.\n",
+    "\n",
+    "Therefore, we need to keep track of all the footpaths taken at step i that ameliorated arrival times at the target stop.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "******************JOURNEY IN 1 TRIPS******************\n",
+      "raw representation of the journey in 1 trips: [array([ 2,  0,  0,  4, -1, -1, -1])]\n",
+      "At stop 0, take route 2 leaving at time 2020-05-11T08:20 \n",
+      "...\n",
+      " and exit at stop 4 at time 2020-05-11T09:20\n",
+      "******************JOURNEY IN 2 TRIPS******************\n",
+      "raw representation of the journey in 2 trips: [[array([ 0,  1,  0,  1, -1, -1, -1]), array([ -1,  -1,  -1,  -1,   1,   5, 300])], array([ 3,  1,  5,  4, -1, -1, -1])]\n",
+      "[array([ 0,  1,  0,  1, -1, -1, -1]), array([ -1,  -1,  -1,  -1,   1,   5, 300])]\n",
+      "At stop 0, take route 0 leaving at time 2020-05-11T08:10 \n",
+      "...\n",
+      "... exit at stop 1 at time 2020-05-11T08:35... \n",
+      "and walk for 5.0 minutes from stop 1 to stop 5.\n",
+      "At stop 5, take route 3 leaving at time 2020-05-11T08:45 \n",
+      "...\n",
+      " and exit at stop 4 at time 2020-05-11T09:05\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "backtrack_journey(k_last, p_t, journey)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Trying to run the standard RAPTOR on real size data\n",
+    "### Loading real sized data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[1 11 0 0]\n",
+      " [1 11 11 11]\n",
+      " [1 11 22 22]\n",
+      " ...\n",
+      " [1 6 237432 245713]\n",
+      " [1 13 237438 245719]\n",
+      " [3 2 237451 245732]]\n",
+      "We find 16210 routes in the data\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pickle\n",
+    "# step 1 convert the data from string to numpy series\n",
+    "routes_real = pickle.load( open( \"../data/routes_array2.pkl\", \"rb\" ) )\n",
+    "print(routes_real)\n",
+    "print('We find {} routes in the data'.format(len(routes_real)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0 None]\n",
+      " [4 None]\n",
+      " [7 None]\n",
+      " ...\n",
+      " [7841 None]\n",
+      " [7844 None]\n",
+      " [7847 None]]\n",
+      "We find 1407 stops in the data\n"
+     ]
+    }
+   ],
+   "source": [
+    "stops_real = pickle.load(open( \"../data/stops_array.pkl\", \"rb\" ) )\n",
+    "print(stops_real)\n",
+    "print('We find {} stops in the data'.format(len(stops_real)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[                          'NaT' '2020-05-21T16:53:00.000000000']\n",
+      " ['2020-05-21T16:55:00.000000000' '2020-05-21T16:55:00.000000000']\n",
+      " ['2020-05-21T16:57:00.000000000' '2020-05-21T16:57:00.000000000']\n",
+      " ...\n",
+      " ['2020-05-21T15:10:00.000000000'                           'NaT']\n",
+      " [                          'NaT' '2020-05-21T16:45:00.000000000']\n",
+      " ['2020-05-21T17:05:00.000000000'                           'NaT']]\n",
+      "We find 245738 arrival/departure times for stops in the data\n"
+     ]
+    }
+   ],
+   "source": [
+    "stopTimes_real = pickle.load(open( \"../data/stop_times_array1.pkl\", \"rb\" ) )\n",
+    "print(stopTimes_real)\n",
+    "print('We find {} arrival/departure times for stops in the data'.format(len(stopTimes_real)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[1166  146]\n",
+      " [1270  360]\n",
+      " [   2    8]\n",
+      " ...\n",
+      " [ 108  371]\n",
+      " [ 102  439]\n",
+      " [1739  519]]\n",
+      "We find 12564 footpaths (bidirectional) in the data\n"
+     ]
+    }
+   ],
+   "source": [
+    "transfer_real = pickle.load(open( \"../data/transfer_array.pkl\", \"rb\" ) )\n",
+    "print(transfer_real)\n",
+    "print('We find {} footpaths (bidirectional) in the data'.format(len(transfer_real)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[  0   1  88 ... 736 735 736]\n",
+      "We find 8000 (r, p) route stops combinations in the data\n"
+     ]
+    }
+   ],
+   "source": [
+    "stopRoutes_real = pickle.load(open( \"../data/stop_routes_array.pkl\", \"rb\" ) )\n",
+    "# The route index alone was not selected:\n",
+    "#stopRoutes_real = stopRoutes_real[:, 1]\n",
+    "print(stopRoutes_real)\n",
+    "print('We find {} (r, p) route stops combinations in the data'.format(len(stopRoutes_real)))\n",
+    "#print('We find {} unique routes desserving stops'.format(len(np.unique(stopRoutes_real))))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[  0   1   2 ... 759 554 493]\n",
+      "We find 7849 route, stops combinations\n",
+      "We find 1407 unique stops desserving routes\n"
+     ]
+    }
+   ],
+   "source": [
+    "routeStops_real = pickle.load(open( \"../data/route_stops_array.pkl\", \"rb\" ) )\n",
+    "print(routeStops_real)\n",
+    "print('We find {} route, stops combinations'.format(len(routeStops_real)))\n",
+    "print('We find {} unique stops desserving routes'.format(len(np.unique(routeStops_real))))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "******************************STARTING round k=1******************************\n",
+      "Marked stops at the start of the round: [0]\n",
+      "Route considered for the queue: (0, 0)\n",
+      "Route considered for the queue: (1, 0)\n",
+      "Route considered for the queue: (88, 0)\n",
+      "Route considered for the queue: (89, 0)\n",
+      "Queue before traversing each route: [(0, 0), (1, 0), (88, 0), (89, 0)]\n",
+      "\n",
+      "****TRAVERSING ROUTE r=0 from stop p=0****\n",
+      "p_i: 0\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=0----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-21T12:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T16:53:00.000000000\n",
+      "\n",
+      "!!!!Hopped on route r=0, trip t=0 at stop p_i=0!!!!\n",
+      "p_i: 1\n",
+      "arr_t_p_i: 2020-05-21T16:55:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=1----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T16:55:00.000000000\n",
+      "p_i: 2\n",
+      "arr_t_p_i: 2020-05-21T16:57:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T16:57:00.000000000\n",
+      "p_i: 0\n",
+      "arr_t_p_i: NaT\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=0----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2020-05-21T12:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T16:53:00.000000000\n",
+      "\n",
+      "!!!!Hopped on route r=0, trip t=0 at stop p_i=0!!!!\n",
+      "p_i: 1\n",
+      "arr_t_p_i: 2020-05-21T16:55:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=1----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T16:55:00.000000000\n",
+      "p_i: 2\n",
+      "arr_t_p_i: 2020-05-21T16:57:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=2----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T16:57:00.000000000\n",
+      "p_i: 3\n",
+      "arr_t_p_i: 2020-05-21T17:01:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=3----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T17:01:00.000000000\n",
+      "p_i: 4\n",
+      "arr_t_p_i: 2020-05-21T17:01:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=4----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T17:01:00.000000000\n",
+      "p_i: 5\n",
+      "arr_t_p_i: 2020-05-21T17:03:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=5----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T17:03:00.000000000\n",
+      "p_i: 6\n",
+      "arr_t_p_i: 2020-05-21T17:03:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=6----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: 2020-05-21T17:03:00.000000000\n",
+      "p_i: 7\n",
+      "arr_t_p_i: 2020-05-21T17:04:00.000000000\n",
+      "\n",
+      "----scanning departure times for route r=0 at stop p_i=7----\n",
+      "Earliest arrival time at previous step: tau[k-1][p_i]: 2100-01-01T00:00\n",
+      "Departure time considered: t_r_dep: NaT\n",
+      "\n",
+      "****TRAVERSING ROUTE r=1 from stop p=0****\n"
+     ]
+    },
+    {
+     "ename": "IndexError",
+     "evalue": "index 0 is out of bounds for axis 0 with size 0",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-71-b3adb7c00cc2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m tau, tau_star, k, journey = raptor_standard(p_s, p_t, tau_0, \n\u001b[0;32m----> 7\u001b[0;31m                                             routes = routes_real, routeStops = routeStops_real, stopTimes = stopTimes_real, stopRoutes = stopRoutes_real, stops = stops_real)\n\u001b[0m",
+      "\u001b[0;32m<ipython-input-42-d1fee493e6e8>\u001b[0m in \u001b[0;36mraptor_standard\u001b[0;34m(p_s, p_t, tau_0, routes, routeStops, stopTimes, stopRoutes, stops, k_max)\u001b[0m\n\u001b[1;32m     64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     65\u001b[0m             \u001b[0;31m# we only traverse the route starting at p, not from the beginning of the route\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m             for p_i in routeStops[routes[r][2]+np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p)[0][0]:\\\n\u001b[0m\u001b[1;32m     67\u001b[0m                                    routes[r][2]+routes[r][1]]:\n\u001b[1;32m     68\u001b[0m                 \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"p_i: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp_i\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0"
+     ]
+    }
+   ],
+   "source": [
+    "p_s_real = 10 # start stop = A\n",
+    "p_t = 4 # target stop = E\n",
+    "tau_0 = np.datetime64('2020-05-21T12:00:00') # departure time 08:05\n",
+    "k_max = 10 # we set a maximum number of transports to pre-allocate memory for the numpy array tau_i\n",
+    "\n",
+    "tau, tau_star, k, journey = raptor_standard(p_s, p_t, tau_0, \n",
+    "                                            routes = routes_real, routeStops = routeStops_real, stopTimes = stopTimes_real, stopRoutes = stopRoutes_real, stops = stops_real)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Code for prototyping and debugging:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p_s = 0 # start stop = A\n",
+    "p_t = 4 # target stop = E\n",
+    "tau_0 = np.datetime64('2020-05-11T08:05') # departure time 08:05\n",
+    "k_max = 10 # we set a maximum number of transports to pre-allocate memory for the numpy array tau_i"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialization\n",
+    "n_stops = len(stops)\n",
+    "\n",
+    "# earliest arrival time at each stop for each round.\n",
+    "tau = np.full(shape=(k_max, n_stops), fill_value = np.datetime64('2100-01-01T00:00')) # 2100 instead of infinity # number of stops * max number of transports\n",
+    "\n",
+    "# earliest arrival time at each stop, indep. of round\n",
+    "tau_star = np.full(shape=n_stops, fill_value = np.datetime64('2100-01-01T00:00'))\n",
+    "\n",
+    "marked = [p_s]\n",
+    "q = []\n",
+    "tau[0, p_s] = tau_0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i)[0][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p_i"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t_r_dep = stopTimes[routes[r][3]+\\\n",
+    "                     # offset corresponding to stop p_i in route r\n",
+    "                     np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i)[0][0] + \\\n",
+    "                     routes[r][1]*t_r][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if np.where(routeStops[routes[1][2]:routes[1][2]+routes[1][1]] == 2) <\\\n",
+    "np.where(routeStops[routes[1][2]:routes[1][2]+routes[1][1]] == 3):\n",
+    "    print(\"hello\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routeStops[routes[1][2] + np.where(routeStops[routes[1][2]:routes[1][2]+routes[1][1]] == 2)[0][0]:routes[1][2]+routes[1][1]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routeStops[routes[1][2] + np.where(routeStops[routes[1][2]:routes[1][2]+routes[1][1]] == 2)[0][0]:6]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routeStops[routes[1][2]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "routeStops[np.where(routeStops[routes[1][2]:routes[1][2]+routes[1][1]] == 2)[0][0]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if True and \\\n",
+    "        True:\n",
+    "    print(\"hello\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tau[0][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "stopTimes[3][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = np.arange(1, 10)\n",
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a[1:10:2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopTimes[routes[0][3]+\\\n",
+    "          # offset corresponding to stop p_i in route r\n",
+    "         np.where(routeStops[routes[0][2]:routes[0][2]+routes[0][1]] == 0)[0][0]:\\\n",
+    "         # end of the trips of r\n",
+    "         routes[0][3]+routes[0][0]*routes[0][1]:\\\n",
+    "          # we can jump from the number of stops in r to find the next departure of route r at p_i\n",
+    "         routes[0][1]\n",
+    "         ]\n",
+    "# we may more simply loop through all trips, and stop as soon as the departure time is after the arrival time\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopTimes[routes[0][3]+\\\n",
+    "          # offset corresponding to stop p_i in route r\n",
+    "         np.where(routeStops[routes[0][2]:routes[0][2]+routes[0][1]] == 0)[0][0]][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopTimes[routes[1][3]+\\\n",
+    "          # offset corresponding to stop p_i in route r\n",
+    "         np.where(routeStops[routes[1][2]:routes[1][2]+routes[1][1]] == 3)[0][0] + \\\n",
+    "         routes[1][1]*1][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# t_r is a trip that belongs to route r. t_r can take value 0 to routes[r][0]-1\n",
+    "t = None\n",
+    "r = 1\n",
+    "tau_k_1 = tau[0][0]\n",
+    "p_i = 3\n",
+    "\n",
+    "t_r = 0\n",
+    "while True:\n",
+    "    \n",
+    "    t_r_dep = stopTimes[routes[r][3]+\\\n",
+    "         # offset corresponding to stop p_i in route r\n",
+    "         np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i)[0][0] + \\\n",
+    "         routes[r][1]*t_r][1]\n",
+    "    \n",
+    "    if t_r_dep > tau_k_1:\n",
+    "        # retrieving the index of the departure time of the trip in stopTimes\n",
+    "        #t = routes[r][3] + t_r * routes[r][1]\n",
+    "        t = t_r\n",
+    "        break\n",
+    "    t_r += 1\n",
+    "    # we could not hop on any trip at this stop\n",
+    "    if t_r == routes[r][0]:\n",
+    "        break\n",
+    "        \n",
+    "print(\"done\")\n",
+    "print(t)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = 1\n",
+    "t = 1\n",
+    "p_i = 2\n",
+    "# 1st trip of route + offset for the right trip + offset for the right stop\n",
+    "stopTimes[routes[r][3] + t * routes[r][1] + np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "d = []\n",
+    "not d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "r = 1\n",
+    "t = 0\n",
+    "p_i = 4\n",
+    "arr_t_p_i = stopTimes[routes[r][3] + \\\n",
+    "                          t * routes[r][1] + \\\n",
+    "                          np.where(routeStops[routes[r][2]:routes[r][2]+routes[r][1]] == p_i)[0][0]][0]\n",
+    "arr_t_p_i"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.datetime64('NaT') > np.datetime64('2100-01-01')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.datetime64('NaT') < np.datetime64('2100-01-01')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb,md,py:percent"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

	Unnamed: 0	route_id	stop_id_general	trip_id	stop_id	arrival_time	departure_time	stop_sequence	stop_name	stop_lat	stop_lon	trip_headsign	trip_short_name	departure_first_stop	route_int	stop_count	stop_int	route_desc
0	0	26-66-j19-1	8591205	17.TA.26-66-j19-1.1.H	8591205	17:00:00	17:00:00	3	Zürich, Hürlimannplatz	47.365066	8.526539	Zürich, Neubühl	3870	16:55:00	1225	12	1317	Bus
1	1	26-66-j19-1	8591415	17.TA.26-66-j19-1.1.H	8591415	17:02:00	17:02:00	4	Zürich, Waffenplatzstrasse	47.361482	8.525749	Zürich, Neubühl	3870	16:55:00	1225	12	1267	Bus
2	2	26-66-j19-1	8591204	17.TA.26-66-j19-1.1.H	8591204	17:03:00	17:03:00	5	Zürich, Hügelstrasse	47.358543	8.526997	Zürich, Neubühl	3870	16:55:00	1225	12	67	Bus
3	3	26-66-j19-1	8591098	17.TA.26-66-j19-1.1.H	8591098	17:04:00	17:04:00	6	Zürich, Brunau/Mutschellenstr.	47.355147	8.527141	Zürich, Neubühl	3870	16:55:00	1225	12	512	Bus
4	4	26-66-j19-1	8591392	17.TA.26-66-j19-1.1.H	8591392	17:05:00	17:05:00	7	Zürich, Thujastrasse	47.350187	8.527806	Zürich, Neubühl	3870	16:55:00	1225	12	403	Bus
	Unnamed: 0	route_id	stop_id	trip_id	arrival_time	departure_time	stop_sequence	direction_id	stop_name	route_desc
0	0	26-759-j19-1	8573205:0:K	1330.TA.26-759-j19-1.7.R	05:28:00	05:28:00	1	1	Zürich Flughafen, Bahnhof	Bus
1	1	26-67-j19-1	8591341	46.TA.26-67-j19-1.1.R	05:33:00	05:33:00	1	1	Zürich, Schmiede Wiedikon	Bus
2	2	26-325-j19-1	8587020:0:D	265.TA.26-325-j19-1.2.H	05:34:00	05:34:00	1	0	Dietikon, Bahnhof	Bus
3	3	26-11-A-j19-1	8591382	1266.TA.26-11-A-j19-1.21.H	05:37:00	05:37:00	1	0	Zürich, Sternen Oerlikon	Tram
4	4	26-302-j19-1	8590844	162.TA.26-302-j19-1.4.R	05:49:00	05:49:00	1	1	Urdorf, Oberurdorf	Bus
	Unnamed: 0	stop_id	stop_id2	distance	Transfer_time_sec	stop_name	stop_name2
0	0	8500926	8590616	0.122430	146	Oetwil a.d.L., Schweizäcker	Geroldswil, Schweizäcker
1	1	8500926	8590737	0.300175	360	Oetwil a.d.L., Schweizäcker	Oetwil an der Limmat, Halde
2	2	8502186	8502186:0:1	0.006762	8	Dietikon Stoffelbach	Dietikon Stoffelbach
3	3	8502186	8502186:0:2	0.013524	16	Dietikon Stoffelbach	Dietikon Stoffelbach
4	4	8502186	8502186P	0.000000	0	Dietikon Stoffelbach	Dietikon Stoffelbach
	route_id	stop_id	trip_id	arrival_time	departure_time	stop_sequence	direction_id	stop_name	route_desc	stop_id_raw
0	26-759-j19-1	8573205	1330.TA.26-759-j19-1.7.R	05:28:00	05:28:00	1	1	Zürich Flughafen, Bahnhof	Bus	8573205:0:K
1	26-67-j19-1	8591341	46.TA.26-67-j19-1.1.R	05:33:00	05:33:00	1	1	Zürich, Schmiede Wiedikon	Bus	8591341
2	26-325-j19-1	8587020	265.TA.26-325-j19-1.2.H	05:34:00	05:34:00	1	0	Dietikon, Bahnhof	Bus	8587020:0:D
3	26-11-A-j19-1	8591382	1266.TA.26-11-A-j19-1.21.H	05:37:00	05:37:00	1	0	Zürich, Sternen Oerlikon	Tram	8591382
4	26-302-j19-1	8590844	162.TA.26-302-j19-1.4.R	05:49:00	05:49:00	1	1	Urdorf, Oberurdorf	Bus	8590844
	route_id	stop_id	trip_id	arrival_time	departure_time	stop_sequence	direction_id	stop_name	route_desc	stop_id_raw	hour_departure
trip_id
1.TA.1-231-j19-1.1.H	1	15	1	17	17	18	1	15	1	15	2
1.TA.1-44-j19-1.1.R	1	3	1	3	3	3	1	3	1	3	1
1.TA.1-444-j19-1.1.H	1	9	1	9	9	9	1	9	1	9	1
1.TA.12-E03-j19-1.1.H	1	2	1	2	2	2	1	2	1	2	2
1.TA.18-46-j19-1.1.H	1	1	1	1	1	1	1	1	1	1	1
	stop_id
trip_id
1.TA.1-231-j19-1.1.H	(8572747, 8582462, 8572600, 8572601, 8502553, ...
1.TA.1-44-j19-1.1.R	(8590275, 8591891, 8590279)
1.TA.1-444-j19-1.1.H	(8572747, 8580847, 8581346, 8502894, 8502979, ...
1.TA.12-E03-j19-1.1.H	(8573205, 8596126)
1.TA.21-23-j19-1.1.R	(8503000, 8503003)
	stop_id	route_int
0	(8502208, 8502209, 8503201, 8503010, 8503011, ...	0
1	(8502208, 8502209, 8503201, 8503200, 8503010, ...	1
2	(8502208, 8502209, 8503202)	2
3	(8502208, 8502209, 8503202, 8503009, 8503010, ...	3
4	(8502208, 8502209, 8503202, 8503200, 8503009, ...	4
	stop_id	route_int
trip_id
403.TA.26-24-j19-1.220.R	(8502208, 8502209, 8503201, 8503010, 8503011, ...	0
425.TA.26-24-j19-1.220.R	(8502208, 8502209, 8503201, 8503200, 8503010, ...	1
22.TA.30-57-Y-j19-1.1.H	(8502208, 8502209, 8503202)	2
11.TA.30-57-Y-j19-1.1.H	(8502208, 8502209, 8503202)	2
14.TA.30-57-Y-j19-1.1.H	(8502208, 8502209, 8503202)	2
	route_id	stop_id	trip_id	arrival_time	departure_time	stop_sequence	stop_name	route_desc	stop_id_raw	hour_departure	all_stops	route_int
81914	1-231-j19-1	8572747	1.TA.1-231-j19-1.1.H	09:37:00	09:37:00	1	Bremgarten AG, Bahnhof	Bus	8572747	9	(8572747, 8582462, 8572600, 8572601, 8502553, ...	618
181281	1-231-j19-1	8582462	1.TA.1-231-j19-1.1.H	09:38:00	09:38:00	3	Bremgarten AG, Zelgli	Bus	8582462	9	(8572747, 8582462, 8572600, 8572601, 8502553, ...	618
42460	1-231-j19-1	8572600	1.TA.1-231-j19-1.1.H	09:39:00	09:39:00	4	Zufikon, Emaus	Bus	8572600	9	(8572747, 8582462, 8572600, 8572601, 8502553, ...	618
224454	1-231-j19-1	8572601	1.TA.1-231-j19-1.1.H	09:39:00	09:39:00	5	Zufikon, Algier	Bus	8572601	9	(8572747, 8582462, 8572600, 8572601, 8502553, ...	618
11836	1-231-j19-1	8502553	1.TA.1-231-j19-1.1.H	09:43:00	09:43:00	6	Unterlunkhofen, Breitenäcker	Bus	8502553	9	(8572747, 8582462, 8572600, 8572601, 8502553, ...	618
	route_id	stop_id	trip_id	arrival_time	departure_time	stop_sequence	direction_id	stop_name	route_desc	stop_id_raw	hour_departure	all_stops
181290	26-24-j19-1	8502208	403.TA.26-24-j19-1.220.R	10:44:00	10:45:00	3	1	Horgen Oberdorf	S-Bahn	8502208:0:4	10	(8502208, 8502209, 8503201, 8503010, 8503011, ...
261974	26-24-j19-1	8502209	403.TA.26-24-j19-1.220.R	10:47:00	10:47:00	4	1	Oberrieden Dorf	S-Bahn	8502209:0:1	10	(8502208, 8502209, 8503201, 8503010, 8503011, ...
130162	26-24-j19-1	8503201	403.TA.26-24-j19-1.220.R	10:53:00	10:53:00	6	1	Rüschlikon	S-Bahn	8503201:0:2	10	(8502208, 8502209, 8503201, 8503010, 8503011, ...
173670	26-24-j19-1	8503010	403.TA.26-24-j19-1.220.R	11:02:00	11:03:00	9	1	Zürich Enge	S-Bahn	8503010:0:2	11	(8502208, 8502209, 8503201, 8503010, 8503011, ...
238129	26-24-j19-1	8503011	403.TA.26-24-j19-1.220.R	11:04:00	11:04:00	10	1	Zürich Wiedikon	S-Bahn	8503011:0:2	11	(8502208, 8502209, 8503201, 8503010, 8503011, ...