diff --git a/.gitattributes b/.gitattributes
index 101dcef..699bf1a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,37 +1,38 @@
data/distributions.pickle filter=lfs diff=lfs merge=lfs -text
data/stop_times_array_version2.csv filter=lfs diff=lfs merge=lfs -text
data/transfer_array_version2.csv filter=lfs diff=lfs merge=lfs -text
data/routes_array_version2.csv filter=lfs diff=lfs merge=lfs -text
data/route_stops_array_version2.csv filter=lfs diff=lfs merge=lfs -text
data/stop_routes_array_version3.csv filter=lfs diff=lfs merge=lfs -text
data/stops_array_version2.csv filter=lfs diff=lfs merge=lfs -text
object.data filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
data/*.pkl filter=lfs diff=lfs merge=lfs -text
data/dere.pkl filter=lfs diff=lfs merge=lfs -text
data/transfer_array.pkl filter=lfs diff=lfs merge=lfs -text
data/stops_array.pkl filter=lfs diff=lfs merge=lfs -text
data/stop_times_array.pkl filter=lfs diff=lfs merge=lfs -text
data/stop_routes_array.pkl filter=lfs diff=lfs merge=lfs -text
data/routes_array.pkl filter=lfs diff=lfs merge=lfs -text
data/route_stops_array.pkl filter=lfs diff=lfs merge=lfs -text
data/route_stops_df.pkl filter=lfs diff=lfs merge=lfs -text
data/routes_array_df.pkl filter=lfs diff=lfs merge=lfs -text
data/stop_routes_df.pkl filter=lfs diff=lfs merge=lfs -text
data/stop_times_df.pkl filter=lfs diff=lfs merge=lfs -text
data/stops_df.pkl filter=lfs diff=lfs merge=lfs -text
data/transfer_df.pkl filter=lfs diff=lfs merge=lfs -text
data/distrib_recov_tab_stopID_hour.pkl.gz filter=lfs diff=lfs merge=lfs -text
data/join_distribution_all.pkl.gz filter=lfs diff=lfs merge=lfs -text
data/join_distribution_cumulative_p.pkl.gz filter=lfs diff=lfs merge=lfs -text
data/join_distribution_cumulative_p_2.pkl.gz filter=lfs diff=lfs merge=lfs -text
data/route_stops_array_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/route_stops_df_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/routes_array_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/routes_array_df_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/stop_routes_array_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/stop_routes_df_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/stop_times_array_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/transfer_array_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/transfer_df_cyril.pkl filter=lfs diff=lfs merge=lfs -text
data/stops_array_cyril.pkl filter=lfs diff=lfs merge=lfs -text
+data/stop_times_df_cyril.pkl filter=lfs diff=lfs merge=lfs -text
diff --git a/data/route_stops_array.pkl b/data/route_stops_array.pkl
deleted file mode 100644
index 3a462f4..0000000
--- a/data/route_stops_array.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:33781be33c690045bf3ac6f60923baf31e26b2f06d4af4bfc06f0073b009b105
-size 62951
diff --git a/data/route_stops_array2.pkl b/data/route_stops_array2.pkl
deleted file mode 100644
index 6103926..0000000
--- a/data/route_stops_array2.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ec9af358ad5e0802d624b2795ac20f3282fb689c21752d71f9fef698cbc3785b
-size 1899785
diff --git a/data/route_stops_df.pkl b/data/route_stops_df.pkl
deleted file mode 100644
index 540c4ae..0000000
--- a/data/route_stops_df.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a55d740bc2affad2b88f0e0a6725b3a8edf8e41feda7e65858bc7b2466c7c997
-size 189233
diff --git a/data/route_stops_df_cyril.pkl b/data/route_stops_df_cyril.pkl
index d22734d..70461ea 100644
--- a/data/route_stops_df_cyril.pkl
+++ b/data/route_stops_df_cyril.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:c62d0046e831eee39e5ee0605aa0eac60d6bad85c0c5d386d91ee81380216265
+oid sha256:3f86289015b10f212f8e2415b29cf5bd584a575fdd860d701cef87a46edb0bb8
size 369113
diff --git a/data/routes_array.pkl b/data/routes_array.pkl
deleted file mode 100644
index 8c462d7..0000000
--- a/data/routes_array.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4edf41375449fd8f7c047b87e7eefabf62ce9bcd207501afffcb41289b3f0a38
-size 8349
diff --git a/data/routes_array2.pkl b/data/routes_array2.pkl
deleted file mode 100644
index dd063c1..0000000
--- a/data/routes_array2.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:76c403daeb38714084b99e1ecc0dd151ac2977d3445120df958715ebaf3478d8
-size 206451
diff --git a/data/routes_array_df.pkl b/data/routes_array_df.pkl
deleted file mode 100644
index af1e073..0000000
--- a/data/routes_array_df.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7383cccd985e005f6d42317719be584862bacb677b45da30fd478fa92ab6a4a4
-size 24047
diff --git a/data/stop_routes_array.pkl b/data/stop_routes_array.pkl
deleted file mode 100644
index 3b3b087..0000000
--- a/data/stop_routes_array.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dacc5dff452323522fc787b66a55e8704e72726298d6de190e8a6a2f493dd48a
-size 62951
diff --git a/data/stop_routes_df.pkl b/data/stop_routes_df.pkl
deleted file mode 100644
index 0db7ae1..0000000
--- a/data/stop_routes_df.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e47a3f2e849d4d5a2e20f232ff02c4f08e78c996b934a80af06d152425541ab2
-size 252167
diff --git a/data/stop_routes_df_cyril.pkl b/data/stop_routes_df_cyril.pkl
index 960efe5..484e287 100644
--- a/data/stop_routes_df_cyril.pkl
+++ b/data/stop_routes_df_cyril.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:b563d73903bf423b3c49fa4987691f28ee05261baad5595f19c46b56cf2a382f
+oid sha256:53fe1e7c5b985f8d14528085832799ded6cf202c28872d6ff1e63953c2c20716
size 538099
diff --git a/data/stop_times_array.pkl b/data/stop_times_array.pkl
deleted file mode 100644
index 8e5c056..0000000
--- a/data/stop_times_array.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3c38ce272796cd8dcc9f560ba57840e43ce167893d43f4840087be1652bd25a4
-size 4167529
diff --git a/data/stop_times_array1.pkl b/data/stop_times_array1.pkl
deleted file mode 100644
index b54a600..0000000
--- a/data/stop_times_array1.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9a5b61142e94d00f3e247423d80655e35601baffb7d63810446145e37bcdf775
-size 3931993
diff --git a/data/stop_times_array2.pkl b/data/stop_times_array2.pkl
deleted file mode 100644
index b54a600..0000000
--- a/data/stop_times_array2.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9a5b61142e94d00f3e247423d80655e35601baffb7d63810446145e37bcdf775
-size 3931993
diff --git a/data/stop_times_array_cyril.pkl b/data/stop_times_array_cyril.pkl
index 8e5c056..524a974 100644
--- a/data/stop_times_array_cyril.pkl
+++ b/data/stop_times_array_cyril.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:3c38ce272796cd8dcc9f560ba57840e43ce167893d43f4840087be1652bd25a4
+oid sha256:279ef5c4d19dc2f4d4194553ac95b57364efa7fdf19636831fcdc39d2b2a127d
size 4167529
diff --git a/data/stop_times_df.pkl b/data/stop_times_df.pkl
deleted file mode 100644
index 87e4d93..0000000
--- a/data/stop_times_df.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cd7f45ef2b0660ddc15bc6d752daa248ab85298e925c40bf4f393e6ae1edb084
-size 36954121
diff --git a/data/stop_times_df_cyril.pkl b/data/stop_times_df_cyril.pkl
new file mode 100644
index 0000000..daea263
--- /dev/null
+++ b/data/stop_times_df_cyril.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b44372accdcd34651a05e4ce97ff137a448fc5f8d87fb5ea97ffe1fd13a1ad0e
+size 36173268
diff --git a/data/stops_array.pkl b/data/stops_array.pkl
deleted file mode 100644
index 94cf569..0000000
--- a/data/stops_array.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c26208697b3262e9ee15bd779c2c700b2f2fbf9cfd7e4af84679b0a5d1e4e538
-size 8397
diff --git a/data/stops_df.pkl b/data/stops_df.pkl
deleted file mode 100644
index 4ac6057..0000000
--- a/data/stops_df.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fec751e9c11205049c3f1947efea0daeaf480b6abec757de949bfb799b11b79e
-size 20373
diff --git a/data/transfer_array.pkl b/data/transfer_array.pkl
deleted file mode 100644
index 0a980a5..0000000
--- a/data/transfer_array.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:aefdfcb53b474746b34a7585bb0439091929fbbaa2ee91223a61bedea4386684
-size 98045
diff --git a/data/transfer_df.pkl b/data/transfer_df.pkl
deleted file mode 100644
index 6127bbb..0000000
--- a/data/transfer_df.pkl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ff059f33c5f06de1afb7222683b4d58d01f279abb6025642d899319bc3fea3fe
-size 763207
diff --git a/notebooks/Arrays_Cyrill_data.ipynb b/notebooks/Arrays_Cyrill_data.ipynb
index c7cfa64..cd664e2 100644
--- a/notebooks/Arrays_Cyrill_data.ipynb
+++ b/notebooks/Arrays_Cyrill_data.ipynb
@@ -1,5236 +1,5236 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Preprocessing part 2: preparing the arrays\n",
"In this notebook we take 2 datasets prepared in spark: stop_times and transfers, and prepare them into the array format needed to run RAPTOR"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Outline\n",
"In this notebook the following actions are performed:\n",
"- create array"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import packages"
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"import itertools"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read files\n",
"Before running make sure the .csv files are in /data . If not run notebook \"transfer_to_local\""
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" route_id \n",
" stop_id_general \n",
" trip_id \n",
" stop_id \n",
" arrival_time \n",
" departure_time \n",
" stop_sequence \n",
" stop_name \n",
" stop_lat \n",
" stop_lon \n",
" trip_headsign \n",
" trip_short_name \n",
" direction_id \n",
" departure_first_stop \n",
" route_int \n",
" stop_count \n",
" stop_int \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 26-66-j19-1 \n",
" 8591205 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591205 \n",
" 17:00:00 \n",
" 17:00:00 \n",
" 3 \n",
" Zürich, Hürlimannplatz \n",
" 47.365066 \n",
" 8.526539 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 1317 \n",
" Bus \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 26-66-j19-1 \n",
" 8591415 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591415 \n",
" 17:02:00 \n",
" 17:02:00 \n",
" 4 \n",
" Zürich, Waffenplatzstrasse \n",
" 47.361482 \n",
" 8.525749 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 1267 \n",
" Bus \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 26-66-j19-1 \n",
" 8591204 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591204 \n",
" 17:03:00 \n",
" 17:03:00 \n",
" 5 \n",
" Zürich, Hügelstrasse \n",
" 47.358543 \n",
" 8.526997 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 67 \n",
" Bus \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 26-66-j19-1 \n",
" 8591098 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591098 \n",
" 17:04:00 \n",
" 17:04:00 \n",
" 6 \n",
" Zürich, Brunau/Mutschellenstr. \n",
" 47.355147 \n",
" 8.527141 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 512 \n",
" Bus \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 26-66-j19-1 \n",
" 8591392 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591392 \n",
" 17:05:00 \n",
" 17:05:00 \n",
" 7 \n",
" Zürich, Thujastrasse \n",
" 47.350187 \n",
" 8.527806 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 403 \n",
" Bus \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 route_id stop_id_general trip_id stop_id \\\n",
"0 0 26-66-j19-1 8591205 17.TA.26-66-j19-1.1.H 8591205 \n",
"1 1 26-66-j19-1 8591415 17.TA.26-66-j19-1.1.H 8591415 \n",
"2 2 26-66-j19-1 8591204 17.TA.26-66-j19-1.1.H 8591204 \n",
"3 3 26-66-j19-1 8591098 17.TA.26-66-j19-1.1.H 8591098 \n",
"4 4 26-66-j19-1 8591392 17.TA.26-66-j19-1.1.H 8591392 \n",
"\n",
" arrival_time departure_time stop_sequence stop_name \\\n",
"0 17:00:00 17:00:00 3 Zürich, Hürlimannplatz \n",
"1 17:02:00 17:02:00 4 Zürich, Waffenplatzstrasse \n",
"2 17:03:00 17:03:00 5 Zürich, Hügelstrasse \n",
"3 17:04:00 17:04:00 6 Zürich, Brunau/Mutschellenstr. \n",
"4 17:05:00 17:05:00 7 Zürich, Thujastrasse \n",
"\n",
" stop_lat stop_lon trip_headsign trip_short_name direction_id \\\n",
"0 47.365066 8.526539 Zürich, Neubühl 3870 0 \n",
"1 47.361482 8.525749 Zürich, Neubühl 3870 0 \n",
"2 47.358543 8.526997 Zürich, Neubühl 3870 0 \n",
"3 47.355147 8.527141 Zürich, Neubühl 3870 0 \n",
"4 47.350187 8.527806 Zürich, Neubühl 3870 0 \n",
"\n",
" departure_first_stop route_int stop_count stop_int route_desc \n",
"0 16:55:00 1225 12 1317 Bus \n",
"1 16:55:00 1225 12 1267 Bus \n",
"2 16:55:00 1225 12 67 Bus \n",
"3 16:55:00 1225 12 512 Bus \n",
"4 16:55:00 1225 12 403 Bus "
]
},
- "execution_count": 2,
+ "execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#stop_times\n",
"stop_times_curated = pd.read_csv(\"../data/stop_times_final_cyril.csv\")\n",
"stop_times_curated.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We drop columns not useful to us"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"stop_times_curated = stop_times_curated.drop(columns=[\"Unnamed: 0\"])"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" stop_id \n",
" stop_id2 \n",
" distance \n",
" Transfer_time_sec \n",
" stop_name \n",
" stop_name2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 8500926 \n",
" 8590616 \n",
" 0.122430 \n",
" 146 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Geroldswil, Schweizäcker \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 8500926 \n",
" 8590737 \n",
" 0.300175 \n",
" 360 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Oetwil an der Limmat, Halde \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 8502186 \n",
" 8502186:0:1 \n",
" 0.006762 \n",
" 8 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 8502186 \n",
" 8502186:0:2 \n",
" 0.013524 \n",
" 16 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 8502186 \n",
" 8502186P \n",
" 0.000000 \n",
" 0 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec \\\n",
"0 0 8500926 8590616 0.122430 146 \n",
"1 1 8500926 8590737 0.300175 360 \n",
"2 2 8502186 8502186:0:1 0.006762 8 \n",
"3 3 8502186 8502186:0:2 0.013524 16 \n",
"4 4 8502186 8502186P 0.000000 0 \n",
"\n",
" stop_name stop_name2 \n",
"0 Oetwil a.d.L., Schweizäcker Geroldswil, Schweizäcker \n",
"1 Oetwil a.d.L., Schweizäcker Oetwil an der Limmat, Halde \n",
"2 Dietikon Stoffelbach Dietikon Stoffelbach \n",
"3 Dietikon Stoffelbach Dietikon Stoffelbach \n",
"4 Dietikon Stoffelbach Dietikon Stoffelbach "
]
},
- "execution_count": 4,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#transfers\n",
"transfers = pd.read_csv(\"../data/transfers.csv\")\n",
"transfers.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transfer: delete transfer to same stop & get stop_int & stop_int2\n"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12564"
]
},
- "execution_count": 5,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check number stops transfers\n",
"transfers.stop_id.count()"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" stop_id \n",
" stop_id2 \n",
" distance \n",
" Transfer_time_sec \n",
" stop_name \n",
" stop_name2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 8500926 \n",
" 8590616 \n",
" 0.122430 \n",
" 146 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Geroldswil, Schweizäcker \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 8500926 \n",
" 8590737 \n",
" 0.300175 \n",
" 360 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Oetwil an der Limmat, Halde \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 8502186 \n",
" 8502186:0:1 \n",
" 0.006762 \n",
" 8 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 8502186 \n",
" 8502186:0:2 \n",
" 0.013524 \n",
" 16 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 8502186 \n",
" 8502186P \n",
" 0.000000 \n",
" 0 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec \\\n",
"0 0 8500926 8590616 0.122430 146 \n",
"1 1 8500926 8590737 0.300175 360 \n",
"2 2 8502186 8502186:0:1 0.006762 8 \n",
"3 3 8502186 8502186:0:2 0.013524 16 \n",
"4 4 8502186 8502186P 0.000000 0 \n",
"\n",
" stop_name stop_name2 \n",
"0 Oetwil a.d.L., Schweizäcker Geroldswil, Schweizäcker \n",
"1 Oetwil a.d.L., Schweizäcker Oetwil an der Limmat, Halde \n",
"2 Dietikon Stoffelbach Dietikon Stoffelbach \n",
"3 Dietikon Stoffelbach Dietikon Stoffelbach \n",
"4 Dietikon Stoffelbach Dietikon Stoffelbach "
]
},
- "execution_count": 6,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We delete transfers to the same stop"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"transfers_df = transfers[transfers['stop_id'] != transfers['stop_id2']]"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12564"
]
},
- "execution_count": 8,
+ "execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers_df.stop_id.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We create the stop_int column in transfers. This action eliminates stops not in stop_times"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_id \n",
" stop_id_general \n",
" trip_id \n",
" stop_id \n",
" arrival_time \n",
" departure_time \n",
" stop_sequence \n",
" stop_name \n",
" stop_lat \n",
" stop_lon \n",
" trip_headsign \n",
" trip_short_name \n",
" direction_id \n",
" departure_first_stop \n",
" route_int \n",
" stop_count \n",
" stop_int \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 26-66-j19-1 \n",
" 8591205 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591205 \n",
" 17:00:00 \n",
" 17:00:00 \n",
" 3 \n",
" Zürich, Hürlimannplatz \n",
" 47.365066 \n",
" 8.526539 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 1317 \n",
" Bus \n",
" \n",
" \n",
" 1 \n",
" 26-66-j19-1 \n",
" 8591415 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591415 \n",
" 17:02:00 \n",
" 17:02:00 \n",
" 4 \n",
" Zürich, Waffenplatzstrasse \n",
" 47.361482 \n",
" 8.525749 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 1267 \n",
" Bus \n",
" \n",
" \n",
" 2 \n",
" 26-66-j19-1 \n",
" 8591204 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591204 \n",
" 17:03:00 \n",
" 17:03:00 \n",
" 5 \n",
" Zürich, Hügelstrasse \n",
" 47.358543 \n",
" 8.526997 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 67 \n",
" Bus \n",
" \n",
" \n",
" 3 \n",
" 26-66-j19-1 \n",
" 8591098 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591098 \n",
" 17:04:00 \n",
" 17:04:00 \n",
" 6 \n",
" Zürich, Brunau/Mutschellenstr. \n",
" 47.355147 \n",
" 8.527141 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 512 \n",
" Bus \n",
" \n",
" \n",
" 4 \n",
" 26-66-j19-1 \n",
" 8591392 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591392 \n",
" 17:05:00 \n",
" 17:05:00 \n",
" 7 \n",
" Zürich, Thujastrasse \n",
" 47.350187 \n",
" 8.527806 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 403 \n",
" Bus \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_id stop_id_general trip_id stop_id arrival_time \\\n",
"0 26-66-j19-1 8591205 17.TA.26-66-j19-1.1.H 8591205 17:00:00 \n",
"1 26-66-j19-1 8591415 17.TA.26-66-j19-1.1.H 8591415 17:02:00 \n",
"2 26-66-j19-1 8591204 17.TA.26-66-j19-1.1.H 8591204 17:03:00 \n",
"3 26-66-j19-1 8591098 17.TA.26-66-j19-1.1.H 8591098 17:04:00 \n",
"4 26-66-j19-1 8591392 17.TA.26-66-j19-1.1.H 8591392 17:05:00 \n",
"\n",
" departure_time stop_sequence stop_name stop_lat \\\n",
"0 17:00:00 3 Zürich, Hürlimannplatz 47.365066 \n",
"1 17:02:00 4 Zürich, Waffenplatzstrasse 47.361482 \n",
"2 17:03:00 5 Zürich, Hügelstrasse 47.358543 \n",
"3 17:04:00 6 Zürich, Brunau/Mutschellenstr. 47.355147 \n",
"4 17:05:00 7 Zürich, Thujastrasse 47.350187 \n",
"\n",
" stop_lon trip_headsign trip_short_name direction_id \\\n",
"0 8.526539 Zürich, Neubühl 3870 0 \n",
"1 8.525749 Zürich, Neubühl 3870 0 \n",
"2 8.526997 Zürich, Neubühl 3870 0 \n",
"3 8.527141 Zürich, Neubühl 3870 0 \n",
"4 8.527806 Zürich, Neubühl 3870 0 \n",
"\n",
" departure_first_stop route_int stop_count stop_int route_desc \n",
"0 16:55:00 1225 12 1317 Bus \n",
"1 16:55:00 1225 12 1267 Bus \n",
"2 16:55:00 1225 12 67 Bus \n",
"3 16:55:00 1225 12 512 Bus \n",
"4 16:55:00 1225 12 403 Bus "
]
},
- "execution_count": 9,
+ "execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_int = stop_times_curated\n",
"stop_times_int.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"transfers_df = transfers_df.merge(stop_times_int[[\"stop_id\", \"stop_int\"]].set_index(\"stop_id\"), how=\"inner\", on = \"stop_id\").drop_duplicates()"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10707"
]
},
- "execution_count": 11,
+ "execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers_df.stop_id.count()"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" stop_id \n",
" stop_id2 \n",
" distance \n",
" Transfer_time_sec \n",
" stop_name \n",
" stop_name2 \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 8500926 \n",
" 8590616 \n",
" 0.122430 \n",
" 146 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Geroldswil, Schweizäcker \n",
" 1392 \n",
" \n",
" \n",
" 38 \n",
" 1 \n",
" 8500926 \n",
" 8590737 \n",
" 0.300175 \n",
" 360 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Oetwil an der Limmat, Halde \n",
" 1392 \n",
" \n",
" \n",
" 76 \n",
" 9 \n",
" 8502186:0:1 \n",
" 8502186 \n",
" 0.006762 \n",
" 8 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" 1394 \n",
" \n",
" \n",
" 128 \n",
" 10 \n",
" 8502186:0:1 \n",
" 8502186:0:2 \n",
" 0.006762 \n",
" 8 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" 1394 \n",
" \n",
" \n",
" 180 \n",
" 11 \n",
" 8502186:0:1 \n",
" 8502186P \n",
" 0.006762 \n",
" 8 \n",
" Dietikon Stoffelbach \n",
" Dietikon Stoffelbach \n",
" 1394 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec \\\n",
"0 0 8500926 8590616 0.122430 146 \n",
"38 1 8500926 8590737 0.300175 360 \n",
"76 9 8502186:0:1 8502186 0.006762 8 \n",
"128 10 8502186:0:1 8502186:0:2 0.006762 8 \n",
"180 11 8502186:0:1 8502186P 0.006762 8 \n",
"\n",
" stop_name stop_name2 stop_int \n",
"0 Oetwil a.d.L., Schweizäcker Geroldswil, Schweizäcker 1392 \n",
"38 Oetwil a.d.L., Schweizäcker Oetwil an der Limmat, Halde 1392 \n",
"76 Dietikon Stoffelbach Dietikon Stoffelbach 1394 \n",
"128 Dietikon Stoffelbach Dietikon Stoffelbach 1394 \n",
"180 Dietikon Stoffelbach Dietikon Stoffelbach 1394 "
]
},
- "execution_count": 12,
+ "execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers_df.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" stop_id2 \n",
" stop_int_2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 8591205 \n",
" 1317 \n",
" \n",
" \n",
" 1 \n",
" 8591415 \n",
" 1267 \n",
" \n",
" \n",
" 2 \n",
" 8591204 \n",
" 67 \n",
" \n",
" \n",
" 3 \n",
" 8591098 \n",
" 512 \n",
" \n",
" \n",
" 4 \n",
" 8591392 \n",
" 403 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_id2 stop_int_2\n",
"0 8591205 1317\n",
"1 8591415 1267\n",
"2 8591204 67\n",
"3 8591098 512\n",
"4 8591392 403"
]
},
- "execution_count": 13,
+ "execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#create dataframe with stops\n",
"df_stop_int2 = stop_times_int[[\"stop_id\", \"stop_int\"]].rename(columns={\"stop_id\": \"stop_id2\", \"stop_int\" : \"stop_int_2\"})\n",
"df_stop_int2.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We add the the stop id for the arrival destination, stop_int2"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"transfers_df_int = transfers_df.merge(df_stop_int2.set_index(\"stop_id2\"), how=\"inner\", on = \"stop_id2\").drop_duplicates()"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" stop_id \n",
" stop_id2 \n",
" distance \n",
" Transfer_time_sec \n",
" stop_name \n",
" stop_name2 \n",
" stop_int \n",
" stop_int_2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 8500926 \n",
" 8590616 \n",
" 0.122430 \n",
" 146 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Geroldswil, Schweizäcker \n",
" 1392 \n",
" 1310 \n",
" \n",
" \n",
" 37 \n",
" 8193 \n",
" 8590618 \n",
" 8590616 \n",
" 0.412676 \n",
" 495 \n",
" Geroldswil, Zentrum \n",
" Geroldswil, Schweizäcker \n",
" 590 \n",
" 1310 \n",
" \n",
" \n",
" 74 \n",
" 8821 \n",
" 8590737 \n",
" 8590616 \n",
" 0.422521 \n",
" 507 \n",
" Oetwil an der Limmat, Halde \n",
" Geroldswil, Schweizäcker \n",
" 901 \n",
" 1310 \n",
" \n",
" \n",
" 111 \n",
" 1 \n",
" 8500926 \n",
" 8590737 \n",
" 0.300175 \n",
" 360 \n",
" Oetwil a.d.L., Schweizäcker \n",
" Oetwil an der Limmat, Halde \n",
" 1392 \n",
" 901 \n",
" \n",
" \n",
" 186 \n",
" 8189 \n",
" 8590616 \n",
" 8590737 \n",
" 0.422521 \n",
" 507 \n",
" Geroldswil, Schweizäcker \n",
" Oetwil an der Limmat, Halde \n",
" 1310 \n",
" 901 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec \\\n",
"0 0 8500926 8590616 0.122430 146 \n",
"37 8193 8590618 8590616 0.412676 495 \n",
"74 8821 8590737 8590616 0.422521 507 \n",
"111 1 8500926 8590737 0.300175 360 \n",
"186 8189 8590616 8590737 0.422521 507 \n",
"\n",
" stop_name stop_name2 stop_int \\\n",
"0 Oetwil a.d.L., Schweizäcker Geroldswil, Schweizäcker 1392 \n",
"37 Geroldswil, Zentrum Geroldswil, Schweizäcker 590 \n",
"74 Oetwil an der Limmat, Halde Geroldswil, Schweizäcker 901 \n",
"111 Oetwil a.d.L., Schweizäcker Oetwil an der Limmat, Halde 1392 \n",
"186 Geroldswil, Schweizäcker Oetwil an der Limmat, Halde 1310 \n",
"\n",
" stop_int_2 \n",
"0 1310 \n",
"37 1310 \n",
"74 1310 \n",
"111 901 \n",
"186 901 "
]
},
- "execution_count": 15,
+ "execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers_df_int.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9434"
]
},
- "execution_count": 16,
+ "execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers_df_int.stop_id.count()"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"transfers = transfers_df_int"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1503"
]
},
- "execution_count": 18,
+ "execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check number unique stops2 in transfers\n",
"transfers.stop_id2.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1503"
]
},
- "execution_count": 19,
+ "execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers.stop_id.nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_id \n",
" stop_id_general \n",
" trip_id \n",
" stop_id \n",
" arrival_time \n",
" departure_time \n",
" stop_sequence \n",
" stop_name \n",
" stop_lat \n",
" stop_lon \n",
" trip_headsign \n",
" trip_short_name \n",
" direction_id \n",
" departure_first_stop \n",
" route_int \n",
" stop_count \n",
" stop_int \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 26-66-j19-1 \n",
" 8591205 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591205 \n",
" 17:00:00 \n",
" 17:00:00 \n",
" 3 \n",
" Zürich, Hürlimannplatz \n",
" 47.365066 \n",
" 8.526539 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 1317 \n",
" Bus \n",
" \n",
" \n",
" 1 \n",
" 26-66-j19-1 \n",
" 8591415 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591415 \n",
" 17:02:00 \n",
" 17:02:00 \n",
" 4 \n",
" Zürich, Waffenplatzstrasse \n",
" 47.361482 \n",
" 8.525749 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 1267 \n",
" Bus \n",
" \n",
" \n",
" 2 \n",
" 26-66-j19-1 \n",
" 8591204 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591204 \n",
" 17:03:00 \n",
" 17:03:00 \n",
" 5 \n",
" Zürich, Hügelstrasse \n",
" 47.358543 \n",
" 8.526997 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 67 \n",
" Bus \n",
" \n",
" \n",
" 3 \n",
" 26-66-j19-1 \n",
" 8591098 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591098 \n",
" 17:04:00 \n",
" 17:04:00 \n",
" 6 \n",
" Zürich, Brunau/Mutschellenstr. \n",
" 47.355147 \n",
" 8.527141 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 512 \n",
" Bus \n",
" \n",
" \n",
" 4 \n",
" 26-66-j19-1 \n",
" 8591392 \n",
" 17.TA.26-66-j19-1.1.H \n",
" 8591392 \n",
" 17:05:00 \n",
" 17:05:00 \n",
" 7 \n",
" Zürich, Thujastrasse \n",
" 47.350187 \n",
" 8.527806 \n",
" Zürich, Neubühl \n",
" 3870 \n",
" 0 \n",
" 16:55:00 \n",
" 1225 \n",
" 12 \n",
" 403 \n",
" Bus \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_id stop_id_general trip_id stop_id arrival_time \\\n",
"0 26-66-j19-1 8591205 17.TA.26-66-j19-1.1.H 8591205 17:00:00 \n",
"1 26-66-j19-1 8591415 17.TA.26-66-j19-1.1.H 8591415 17:02:00 \n",
"2 26-66-j19-1 8591204 17.TA.26-66-j19-1.1.H 8591204 17:03:00 \n",
"3 26-66-j19-1 8591098 17.TA.26-66-j19-1.1.H 8591098 17:04:00 \n",
"4 26-66-j19-1 8591392 17.TA.26-66-j19-1.1.H 8591392 17:05:00 \n",
"\n",
" departure_time stop_sequence stop_name stop_lat \\\n",
"0 17:00:00 3 Zürich, Hürlimannplatz 47.365066 \n",
"1 17:02:00 4 Zürich, Waffenplatzstrasse 47.361482 \n",
"2 17:03:00 5 Zürich, Hügelstrasse 47.358543 \n",
"3 17:04:00 6 Zürich, Brunau/Mutschellenstr. 47.355147 \n",
"4 17:05:00 7 Zürich, Thujastrasse 47.350187 \n",
"\n",
" stop_lon trip_headsign trip_short_name direction_id \\\n",
"0 8.526539 Zürich, Neubühl 3870 0 \n",
"1 8.525749 Zürich, Neubühl 3870 0 \n",
"2 8.526997 Zürich, Neubühl 3870 0 \n",
"3 8.527141 Zürich, Neubühl 3870 0 \n",
"4 8.527806 Zürich, Neubühl 3870 0 \n",
"\n",
" departure_first_stop route_int stop_count stop_int route_desc \n",
"0 16:55:00 1225 12 1317 Bus \n",
"1 16:55:00 1225 12 1267 Bus \n",
"2 16:55:00 1225 12 67 Bus \n",
"3 16:55:00 1225 12 512 Bus \n",
"4 16:55:00 1225 12 403 Bus "
]
},
- "execution_count": 20,
+ "execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered = stop_times_curated\n",
"stop_times_ordered.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We start by making sure the order is correct"
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_id \n",
" stop_id_general \n",
" trip_id \n",
" stop_id \n",
" arrival_time \n",
" departure_time \n",
" stop_sequence \n",
" stop_name \n",
" stop_lat \n",
" stop_lon \n",
" trip_headsign \n",
" trip_short_name \n",
" direction_id \n",
" departure_first_stop \n",
" route_int \n",
" stop_count \n",
" stop_int \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" 93111 \n",
" 26-10-j19-1 \n",
" 8573205 \n",
" 1672.TA.26-10-j19-1.11.R \n",
" 8573205 \n",
" 07:00:00 \n",
" 07:01:00 \n",
" 27 \n",
" Zürich Flughafen, Bahnhof \n",
" 47.450441 \n",
" 8.563729 \n",
" Zürich Flughafen, Fracht \n",
" 4096 \n",
" 1 \n",
" 07:01:00 \n",
" 0 \n",
" 2 \n",
" 298 \n",
" Tram \n",
" \n",
" \n",
" 93112 \n",
" 26-10-j19-1 \n",
" 8588553 \n",
" 1672.TA.26-10-j19-1.11.R \n",
" 8588553 \n",
" 07:02:00 \n",
" 07:02:00 \n",
" 28 \n",
" Zürich Flughafen, Fracht \n",
" 47.452494 \n",
" 8.572057 \n",
" Zürich Flughafen, Fracht \n",
" 4096 \n",
" 1 \n",
" 07:01:00 \n",
" 0 \n",
" 2 \n",
" 1295 \n",
" Tram \n",
" \n",
" \n",
" 93113 \n",
" 26-13-j19-1 \n",
" 8576240 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8576240 \n",
" 07:00:00 \n",
" 07:00:00 \n",
" 5 \n",
" Zürich, Meierhofplatz \n",
" 47.402010 \n",
" 8.499374 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 1222 \n",
" Tram \n",
" \n",
" \n",
" 93114 \n",
" 26-13-j19-1 \n",
" 8591353 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8591353 \n",
" 07:01:00 \n",
" 07:01:00 \n",
" 6 \n",
" Zürich, Schwert \n",
" 47.399730 \n",
" 8.504611 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 816 \n",
" Tram \n",
" \n",
" \n",
" 93115 \n",
" 26-13-j19-1 \n",
" 8591039 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8591039 \n",
" 07:02:00 \n",
" 07:02:00 \n",
" 7 \n",
" Zürich, Alte Trotte \n",
" 47.397766 \n",
" 8.507252 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 778 \n",
" Tram \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_id stop_id_general trip_id stop_id \\\n",
"93111 26-10-j19-1 8573205 1672.TA.26-10-j19-1.11.R 8573205 \n",
"93112 26-10-j19-1 8588553 1672.TA.26-10-j19-1.11.R 8588553 \n",
"93113 26-13-j19-1 8576240 2064.TA.26-13-j19-1.24.H 8576240 \n",
"93114 26-13-j19-1 8591353 2064.TA.26-13-j19-1.24.H 8591353 \n",
"93115 26-13-j19-1 8591039 2064.TA.26-13-j19-1.24.H 8591039 \n",
"\n",
" arrival_time departure_time stop_sequence stop_name \\\n",
"93111 07:00:00 07:01:00 27 Zürich Flughafen, Bahnhof \n",
"93112 07:02:00 07:02:00 28 Zürich Flughafen, Fracht \n",
"93113 07:00:00 07:00:00 5 Zürich, Meierhofplatz \n",
"93114 07:01:00 07:01:00 6 Zürich, Schwert \n",
"93115 07:02:00 07:02:00 7 Zürich, Alte Trotte \n",
"\n",
" stop_lat stop_lon trip_headsign trip_short_name \\\n",
"93111 47.450441 8.563729 Zürich Flughafen, Fracht 4096 \n",
"93112 47.452494 8.572057 Zürich Flughafen, Fracht 4096 \n",
"93113 47.402010 8.499374 Zürich, Albisgütli 1831 \n",
"93114 47.399730 8.504611 Zürich, Albisgütli 1831 \n",
"93115 47.397766 8.507252 Zürich, Albisgütli 1831 \n",
"\n",
" direction_id departure_first_stop route_int stop_count stop_int \\\n",
"93111 1 07:01:00 0 2 298 \n",
"93112 1 07:01:00 0 2 1295 \n",
"93113 0 07:00:00 1 26 1222 \n",
"93114 0 07:00:00 1 26 816 \n",
"93115 0 07:00:00 1 26 778 \n",
"\n",
" route_desc \n",
"93111 Tram \n",
"93112 Tram \n",
"93113 Tram \n",
"93114 Tram \n",
"93115 Tram "
]
},
- "execution_count": 23,
+ "execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "stop_times_ordered = stop_times_int.sort_values(by=[\"route_int\", \"departure_first_stop\", \"stop_sequence\"])\n",
+ "stop_times_ordered = stop_times_int.sort_values(by=[\"route_int\", \"departure_first_stop\", \"departure_time\"])\n",
"stop_times_ordered.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" arrival_time \n",
" departure_time \n",
" \n",
" \n",
" \n",
" \n",
" 93111 \n",
" 07:00:00 \n",
" 07:01:00 \n",
" \n",
" \n",
" 93112 \n",
" 07:02:00 \n",
" 07:02:00 \n",
" \n",
" \n",
" 93113 \n",
" 07:00:00 \n",
" 07:00:00 \n",
" \n",
" \n",
" 93114 \n",
" 07:01:00 \n",
" 07:01:00 \n",
" \n",
" \n",
" 93115 \n",
" 07:02:00 \n",
" 07:02:00 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" arrival_time departure_time\n",
"93111 07:00:00 07:01:00\n",
"93112 07:02:00 07:02:00\n",
"93113 07:00:00 07:00:00\n",
"93114 07:01:00 07:01:00\n",
"93115 07:02:00 07:02:00"
]
},
- "execution_count": 24,
+ "execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We add None to first arrival time and last departure time."
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_id \n",
" stop_id_general \n",
" trip_id \n",
" stop_id \n",
" arrival_time \n",
" departure_time \n",
" stop_sequence \n",
" stop_name \n",
" stop_lat \n",
" stop_lon \n",
" trip_headsign \n",
" trip_short_name \n",
" direction_id \n",
" departure_first_stop \n",
" route_int \n",
" stop_count \n",
" stop_int \n",
" route_desc \n",
- " sequence_shift_1 \n",
+ " departure_first_shift_1 \n",
" \n",
" \n",
" \n",
" \n",
" 93111 \n",
" 26-10-j19-1 \n",
" 8573205 \n",
" 1672.TA.26-10-j19-1.11.R \n",
" 8573205 \n",
" 07:00:00 \n",
" 07:01:00 \n",
" 27 \n",
" Zürich Flughafen, Bahnhof \n",
" 47.450441 \n",
" 8.563729 \n",
" Zürich Flughafen, Fracht \n",
" 4096 \n",
" 1 \n",
" 07:01:00 \n",
" 0 \n",
" 2 \n",
" 298 \n",
" Tram \n",
- " 28 \n",
+ " 07:01:00 \n",
" \n",
" \n",
" 93112 \n",
" 26-10-j19-1 \n",
" 8588553 \n",
" 1672.TA.26-10-j19-1.11.R \n",
" 8588553 \n",
" 07:02:00 \n",
" 07:02:00 \n",
" 28 \n",
" Zürich Flughafen, Fracht \n",
" 47.452494 \n",
" 8.572057 \n",
" Zürich Flughafen, Fracht \n",
" 4096 \n",
" 1 \n",
" 07:01:00 \n",
" 0 \n",
" 2 \n",
" 1295 \n",
" Tram \n",
- " 5 \n",
+ " 07:00:00 \n",
" \n",
" \n",
" 93113 \n",
" 26-13-j19-1 \n",
" 8576240 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8576240 \n",
" 07:00:00 \n",
" 07:00:00 \n",
" 5 \n",
" Zürich, Meierhofplatz \n",
" 47.402010 \n",
" 8.499374 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 1222 \n",
" Tram \n",
- " 6 \n",
+ " 07:00:00 \n",
" \n",
" \n",
" 93114 \n",
" 26-13-j19-1 \n",
" 8591353 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8591353 \n",
" 07:01:00 \n",
" 07:01:00 \n",
" 6 \n",
" Zürich, Schwert \n",
" 47.399730 \n",
" 8.504611 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 816 \n",
" Tram \n",
- " 7 \n",
+ " 07:00:00 \n",
" \n",
" \n",
" 93115 \n",
" 26-13-j19-1 \n",
" 8591039 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8591039 \n",
" 07:02:00 \n",
" 07:02:00 \n",
" 7 \n",
" Zürich, Alte Trotte \n",
" 47.397766 \n",
" 8.507252 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 778 \n",
" Tram \n",
- " 8 \n",
+ " 07:00:00 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_id stop_id_general trip_id stop_id \\\n",
"93111 26-10-j19-1 8573205 1672.TA.26-10-j19-1.11.R 8573205 \n",
"93112 26-10-j19-1 8588553 1672.TA.26-10-j19-1.11.R 8588553 \n",
"93113 26-13-j19-1 8576240 2064.TA.26-13-j19-1.24.H 8576240 \n",
"93114 26-13-j19-1 8591353 2064.TA.26-13-j19-1.24.H 8591353 \n",
"93115 26-13-j19-1 8591039 2064.TA.26-13-j19-1.24.H 8591039 \n",
"\n",
" arrival_time departure_time stop_sequence stop_name \\\n",
"93111 07:00:00 07:01:00 27 Zürich Flughafen, Bahnhof \n",
"93112 07:02:00 07:02:00 28 Zürich Flughafen, Fracht \n",
"93113 07:00:00 07:00:00 5 Zürich, Meierhofplatz \n",
"93114 07:01:00 07:01:00 6 Zürich, Schwert \n",
"93115 07:02:00 07:02:00 7 Zürich, Alte Trotte \n",
"\n",
" stop_lat stop_lon trip_headsign trip_short_name \\\n",
"93111 47.450441 8.563729 Zürich Flughafen, Fracht 4096 \n",
"93112 47.452494 8.572057 Zürich Flughafen, Fracht 4096 \n",
"93113 47.402010 8.499374 Zürich, Albisgütli 1831 \n",
"93114 47.399730 8.504611 Zürich, Albisgütli 1831 \n",
"93115 47.397766 8.507252 Zürich, Albisgütli 1831 \n",
"\n",
" direction_id departure_first_stop route_int stop_count stop_int \\\n",
"93111 1 07:01:00 0 2 298 \n",
"93112 1 07:01:00 0 2 1295 \n",
"93113 0 07:00:00 1 26 1222 \n",
"93114 0 07:00:00 1 26 816 \n",
"93115 0 07:00:00 1 26 778 \n",
"\n",
- " route_desc sequence_shift_1 \n",
- "93111 Tram 28 \n",
- "93112 Tram 5 \n",
- "93113 Tram 6 \n",
- "93114 Tram 7 \n",
- "93115 Tram 8 "
+ " route_desc departure_first_shift_1 \n",
+ "93111 Tram 07:01:00 \n",
+ "93112 Tram 07:00:00 \n",
+ "93113 Tram 07:00:00 \n",
+ "93114 Tram 07:00:00 \n",
+ "93115 Tram 07:00:00 "
]
},
- "execution_count": 25,
+ "execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#adding a shift\n",
- "stop_times_ordered[\"sequence_shift_1\"] = stop_times_ordered[\"stop_sequence\"].shift(-1, fill_value=0)\n",
+ "stop_times_ordered[\"departure_first_shift_1\"] = stop_times_ordered[\"departure_first_stop\"].shift(-1, fill_value=0)\n",
"stop_times_ordered.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
- "stop_times_ordered['departure_time'] = np.where((stop_times_ordered[\"stop_sequence\"] > stop_times_ordered[\"sequence_shift_1\"]), None, stop_times_ordered['departure_time'])"
+ "stop_times_ordered['departure_time'] = np.where((stop_times_ordered[\"departure_first_stop\"] != stop_times_ordered[\"departure_first_shift_1\"]), None, stop_times_ordered['departure_time'])"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
- "stop_times_ordered[\"arrival_time\"] = np.where((stop_times_ordered[\"stop_sequence\"] == 1), None, stop_times_ordered['arrival_time'])"
+ "stop_times_ordered[\"arrival_time\"] = np.where((stop_times_ordered[\"departure_first_stop\"] == stop_times_ordered[\"departure_time\"]), None, stop_times_ordered['arrival_time'])"
]
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" arrival_time \n",
" departure_time \n",
- " stop_sequence \n",
- " sequence_shift_1 \n",
" \n",
" \n",
" \n",
" \n",
" 93111 \n",
- " 07:00:00 \n",
+ " None \n",
" 07:01:00 \n",
- " 27 \n",
- " 28 \n",
" \n",
" \n",
" 93112 \n",
" 07:02:00 \n",
" None \n",
- " 28 \n",
- " 5 \n",
" \n",
" \n",
" 93113 \n",
+ " None \n",
" 07:00:00 \n",
- " 07:00:00 \n",
- " 5 \n",
- " 6 \n",
" \n",
" \n",
" 93114 \n",
" 07:01:00 \n",
" 07:01:00 \n",
- " 6 \n",
- " 7 \n",
" \n",
" \n",
" 93115 \n",
" 07:02:00 \n",
" 07:02:00 \n",
- " 7 \n",
- " 8 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " arrival_time departure_time stop_sequence sequence_shift_1\n",
- "93111 07:00:00 07:01:00 27 28\n",
- "93112 07:02:00 None 28 5\n",
- "93113 07:00:00 07:00:00 5 6\n",
- "93114 07:01:00 07:01:00 6 7\n",
- "93115 07:02:00 07:02:00 7 8"
+ " arrival_time departure_time\n",
+ "93111 None 07:01:00\n",
+ "93112 07:02:00 None\n",
+ "93113 None 07:00:00\n",
+ "93114 07:01:00 07:01:00\n",
+ "93115 07:02:00 07:02:00"
]
},
- "execution_count": 28,
+ "execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "stop_times_ordered[[\"arrival_time\",\"departure_time\", \"stop_sequence\", \"sequence_shift_1\"]].head(5)"
+ "stop_times_ordered[[\"arrival_time\",\"departure_time\"]].head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Array structure preparation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### StopTimes: \n",
"[[departure_route0_trip0_stop0, arrival_route0_trip0_stop_0], [departure_route0_trip0_stop1, arrival_route0_trip0_stop_1], …], [[departure_route0_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], [[[departure_route1_trip0_stop0, arrival_route1_trip0_stop_0], …], [[departure_route1_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], …]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We transform it in datetime as required by the raptor algorithm"
]
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"stop_times_ordered['arrival_time'] = pd.to_datetime(stop_times_ordered['arrival_time'])\n",
"stop_times_ordered['departure_time'] = pd.to_datetime(stop_times_ordered['departure_time'])"
]
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" arrival_time \n",
" departure_time \n",
" \n",
" \n",
" \n",
" \n",
" 93111 \n",
- " 2020-05-22 07:00:00 \n",
- " 2020-05-22 07:01:00 \n",
+ " NaT \n",
+ " 2020-05-23 07:01:00 \n",
" \n",
" \n",
" 93112 \n",
- " 2020-05-22 07:02:00 \n",
+ " 2020-05-23 07:02:00 \n",
" NaT \n",
" \n",
" \n",
" 93113 \n",
- " 2020-05-22 07:00:00 \n",
- " 2020-05-22 07:00:00 \n",
+ " NaT \n",
+ " 2020-05-23 07:00:00 \n",
" \n",
" \n",
" 93114 \n",
- " 2020-05-22 07:01:00 \n",
- " 2020-05-22 07:01:00 \n",
+ " 2020-05-23 07:01:00 \n",
+ " 2020-05-23 07:01:00 \n",
" \n",
" \n",
" 93115 \n",
- " 2020-05-22 07:02:00 \n",
- " 2020-05-22 07:02:00 \n",
+ " 2020-05-23 07:02:00 \n",
+ " 2020-05-23 07:02:00 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" arrival_time departure_time\n",
- "93111 2020-05-22 07:00:00 2020-05-22 07:01:00\n",
- "93112 2020-05-22 07:02:00 NaT\n",
- "93113 2020-05-22 07:00:00 2020-05-22 07:00:00\n",
- "93114 2020-05-22 07:01:00 2020-05-22 07:01:00\n",
- "93115 2020-05-22 07:02:00 2020-05-22 07:02:00"
+ "93111 NaT 2020-05-23 07:01:00\n",
+ "93112 2020-05-23 07:02:00 NaT\n",
+ "93113 NaT 2020-05-23 07:00:00\n",
+ "93114 2020-05-23 07:01:00 2020-05-23 07:01:00\n",
+ "93115 2020-05-23 07:02:00 2020-05-23 07:02:00"
]
},
- "execution_count": 30,
+ "execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
- "with open('../data/stop_times_df.pkl','wb') as f: pickle.dump(stop_times_ordered, f)"
+ "with open('../data/stop_times_df_cyril.pkl','wb') as f: pickle.dump(stop_times_ordered, f)"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_id \n",
" stop_id_general \n",
" trip_id \n",
" stop_id \n",
" arrival_time \n",
" departure_time \n",
" stop_sequence \n",
" stop_name \n",
" stop_lat \n",
" stop_lon \n",
" trip_headsign \n",
" trip_short_name \n",
" direction_id \n",
" departure_first_stop \n",
" route_int \n",
" stop_count \n",
" stop_int \n",
" route_desc \n",
- " sequence_shift_1 \n",
+ " departure_first_shift_1 \n",
" \n",
" \n",
" \n",
" \n",
" 93111 \n",
" 26-10-j19-1 \n",
" 8573205 \n",
" 1672.TA.26-10-j19-1.11.R \n",
" 8573205 \n",
- " 2020-05-22 07:00:00 \n",
- " 2020-05-22 07:01:00 \n",
+ " NaT \n",
+ " 2020-05-23 07:01:00 \n",
" 27 \n",
" Zürich Flughafen, Bahnhof \n",
" 47.450441 \n",
" 8.563729 \n",
" Zürich Flughafen, Fracht \n",
" 4096 \n",
" 1 \n",
" 07:01:00 \n",
" 0 \n",
" 2 \n",
" 298 \n",
" Tram \n",
- " 28 \n",
+ " 07:01:00 \n",
" \n",
" \n",
" 93112 \n",
" 26-10-j19-1 \n",
" 8588553 \n",
" 1672.TA.26-10-j19-1.11.R \n",
" 8588553 \n",
- " 2020-05-22 07:02:00 \n",
+ " 2020-05-23 07:02:00 \n",
" NaT \n",
" 28 \n",
" Zürich Flughafen, Fracht \n",
" 47.452494 \n",
" 8.572057 \n",
" Zürich Flughafen, Fracht \n",
" 4096 \n",
" 1 \n",
" 07:01:00 \n",
" 0 \n",
" 2 \n",
" 1295 \n",
" Tram \n",
- " 5 \n",
+ " 07:00:00 \n",
" \n",
" \n",
" 93113 \n",
" 26-13-j19-1 \n",
" 8576240 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8576240 \n",
- " 2020-05-22 07:00:00 \n",
- " 2020-05-22 07:00:00 \n",
+ " NaT \n",
+ " 2020-05-23 07:00:00 \n",
" 5 \n",
" Zürich, Meierhofplatz \n",
" 47.402010 \n",
" 8.499374 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 1222 \n",
" Tram \n",
- " 6 \n",
+ " 07:00:00 \n",
" \n",
" \n",
" 93114 \n",
" 26-13-j19-1 \n",
" 8591353 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8591353 \n",
- " 2020-05-22 07:01:00 \n",
- " 2020-05-22 07:01:00 \n",
+ " 2020-05-23 07:01:00 \n",
+ " 2020-05-23 07:01:00 \n",
" 6 \n",
" Zürich, Schwert \n",
" 47.399730 \n",
" 8.504611 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 816 \n",
" Tram \n",
- " 7 \n",
+ " 07:00:00 \n",
" \n",
" \n",
" 93115 \n",
" 26-13-j19-1 \n",
" 8591039 \n",
" 2064.TA.26-13-j19-1.24.H \n",
" 8591039 \n",
- " 2020-05-22 07:02:00 \n",
- " 2020-05-22 07:02:00 \n",
+ " 2020-05-23 07:02:00 \n",
+ " 2020-05-23 07:02:00 \n",
" 7 \n",
" Zürich, Alte Trotte \n",
" 47.397766 \n",
" 8.507252 \n",
" Zürich, Albisgütli \n",
" 1831 \n",
" 0 \n",
" 07:00:00 \n",
" 1 \n",
" 26 \n",
" 778 \n",
" Tram \n",
- " 8 \n",
+ " 07:00:00 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_id stop_id_general trip_id stop_id \\\n",
"93111 26-10-j19-1 8573205 1672.TA.26-10-j19-1.11.R 8573205 \n",
"93112 26-10-j19-1 8588553 1672.TA.26-10-j19-1.11.R 8588553 \n",
"93113 26-13-j19-1 8576240 2064.TA.26-13-j19-1.24.H 8576240 \n",
"93114 26-13-j19-1 8591353 2064.TA.26-13-j19-1.24.H 8591353 \n",
"93115 26-13-j19-1 8591039 2064.TA.26-13-j19-1.24.H 8591039 \n",
"\n",
" arrival_time departure_time stop_sequence \\\n",
- "93111 2020-05-22 07:00:00 2020-05-22 07:01:00 27 \n",
- "93112 2020-05-22 07:02:00 NaT 28 \n",
- "93113 2020-05-22 07:00:00 2020-05-22 07:00:00 5 \n",
- "93114 2020-05-22 07:01:00 2020-05-22 07:01:00 6 \n",
- "93115 2020-05-22 07:02:00 2020-05-22 07:02:00 7 \n",
+ "93111 NaT 2020-05-23 07:01:00 27 \n",
+ "93112 2020-05-23 07:02:00 NaT 28 \n",
+ "93113 NaT 2020-05-23 07:00:00 5 \n",
+ "93114 2020-05-23 07:01:00 2020-05-23 07:01:00 6 \n",
+ "93115 2020-05-23 07:02:00 2020-05-23 07:02:00 7 \n",
"\n",
" stop_name stop_lat stop_lon \\\n",
"93111 Zürich Flughafen, Bahnhof 47.450441 8.563729 \n",
"93112 Zürich Flughafen, Fracht 47.452494 8.572057 \n",
"93113 Zürich, Meierhofplatz 47.402010 8.499374 \n",
"93114 Zürich, Schwert 47.399730 8.504611 \n",
"93115 Zürich, Alte Trotte 47.397766 8.507252 \n",
"\n",
" trip_headsign trip_short_name direction_id \\\n",
"93111 Zürich Flughafen, Fracht 4096 1 \n",
"93112 Zürich Flughafen, Fracht 4096 1 \n",
"93113 Zürich, Albisgütli 1831 0 \n",
"93114 Zürich, Albisgütli 1831 0 \n",
"93115 Zürich, Albisgütli 1831 0 \n",
"\n",
" departure_first_stop route_int stop_count stop_int route_desc \\\n",
"93111 07:01:00 0 2 298 Tram \n",
"93112 07:01:00 0 2 1295 Tram \n",
"93113 07:00:00 1 26 1222 Tram \n",
"93114 07:00:00 1 26 816 Tram \n",
"93115 07:00:00 1 26 778 Tram \n",
"\n",
- " sequence_shift_1 \n",
- "93111 28 \n",
- "93112 5 \n",
- "93113 6 \n",
- "93114 7 \n",
- "93115 8 "
+ " departure_first_shift_1 \n",
+ "93111 07:01:00 \n",
+ "93112 07:00:00 \n",
+ "93113 07:00:00 \n",
+ "93114 07:00:00 \n",
+ "93115 07:00:00 "
]
},
- "execution_count": 33,
+ "execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered = stop_times_ordered.sort_values(by=[\"route_int\", \"departure_first_stop\", \"stop_sequence\"])\n",
"stop_times_ordered.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And we transform it to array, ready ti be used by raptor"
]
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([['2020-05-22T07:00:00.000000000', '2020-05-22T07:01:00.000000000'],\n",
- " ['2020-05-22T07:02:00.000000000', 'NaT'],\n",
- " ['2020-05-22T07:00:00.000000000', '2020-05-22T07:00:00.000000000'],\n",
+ "array([[ 'NaT', '2020-05-23T07:01:00.000000000'],\n",
+ " ['2020-05-23T07:02:00.000000000', 'NaT'],\n",
+ " [ 'NaT', '2020-05-23T07:00:00.000000000'],\n",
" ...,\n",
- " ['2020-05-22T07:35:00.000000000', '2020-05-22T07:35:00.000000000'],\n",
- " ['2020-05-22T07:36:00.000000000', '2020-05-22T07:36:00.000000000'],\n",
- " ['2020-05-22T07:37:00.000000000', 'NaT']],\n",
+ " ['2020-05-23T07:35:00.000000000', '2020-05-23T07:35:00.000000000'],\n",
+ " ['2020-05-23T07:36:00.000000000', '2020-05-23T07:36:00.000000000'],\n",
+ " ['2020-05-23T07:37:00.000000000', 'NaT']],\n",
" dtype='datetime64[ns]')"
]
},
- "execution_count": 34,
+ "execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_array = stop_times_ordered[[\"arrival_time\", \"departure_time\"]].to_numpy()\n",
"stop_times_array"
]
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"260459"
]
},
- "execution_count": 35,
+ "execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(stop_times_array,0)"
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_times_array_cyril.pkl','wb') as f: pickle.dump(stop_times_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Routes: \n",
"[[route0_nr.Trips, route0_nr. Stops, route0_pointerRoutes, route0_pointerStops_times],[route1_nr.Trips, route1_nr. Stops,, route1_pointerRoutes, route1_pointerStops_times],…]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We start by getting the number of trips and stops there is for each route"
]
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" n_Trips \n",
" n_stops \n",
" \n",
" \n",
" route_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 26 \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" 8 \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" 17 \n",
" \n",
" \n",
" 4 \n",
" 1 \n",
" 5 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Trips n_stops\n",
"route_int \n",
"0 1 2\n",
"1 1 26\n",
"2 1 8\n",
"3 1 17\n",
"4 1 5"
]
},
- "execution_count": 40,
+ "execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops = stop_times_ordered.groupby([\"route_int\"]).nunique()[[\"trip_id\",\"stop_int\"]].sort_index().rename(columns={\"trip_id\": \"n_Trips\", \"stop_int\": \"n_stops\"})\n",
"distinct_trips_stops.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1461, 2)"
]
},
- "execution_count": 41,
+ "execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We create the pointer for the route stops, by adding the unique stops for each route"
]
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" n_Trips \n",
" n_stops \n",
" pointer_routes_stops \n",
" \n",
" \n",
" route_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 0 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 26 \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" 8 \n",
" 28 \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" 17 \n",
" 36 \n",
" \n",
" \n",
" 4 \n",
" 1 \n",
" 5 \n",
" 53 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Trips n_stops pointer_routes_stops\n",
"route_int \n",
"0 1 2 0\n",
"1 1 26 2\n",
"2 1 8 28\n",
"3 1 17 36\n",
"4 1 5 53"
]
},
- "execution_count": 42,
+ "execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops['pointer_routes_stops'] = distinct_trips_stops.n_stops.cumsum().shift(1, fill_value=0)\n",
"distinct_trips_stops.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We create the pointer for stop_times by adding the number of stops in each route, counting duplicates (due to several trips)"
]
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"distinct_trips_stops[\"pointer_stop_times\"] = (stop_times_ordered.groupby([\"route_int\"]).count().stop_id).cumsum().shift(1, fill_value=0)"
]
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" n_Trips \n",
" n_stops \n",
" pointer_routes_stops \n",
" pointer_stop_times \n",
" pointer_routes_stops_shift \n",
" pointer_stop_times_shift \n",
" \n",
" \n",
" route_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 26 \n",
" 2 \n",
" 2 \n",
" 28 \n",
" 28 \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" 8 \n",
" 28 \n",
" 28 \n",
" 36 \n",
" 36 \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" 17 \n",
" 36 \n",
" 36 \n",
" 53 \n",
" 53 \n",
" \n",
" \n",
" 4 \n",
" 1 \n",
" 5 \n",
" 53 \n",
" 53 \n",
" 58 \n",
" 58 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Trips n_stops pointer_routes_stops pointer_stop_times \\\n",
"route_int \n",
"0 1 2 0 0 \n",
"1 1 26 2 2 \n",
"2 1 8 28 28 \n",
"3 1 17 36 36 \n",
"4 1 5 53 53 \n",
"\n",
" pointer_routes_stops_shift pointer_stop_times_shift \n",
"route_int \n",
"0 2 2 \n",
"1 28 28 \n",
"2 36 36 \n",
"3 53 53 \n",
"4 58 58 "
]
},
- "execution_count": 44,
+ "execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops[\"pointer_routes_stops_shift\"] = distinct_trips_stops['pointer_routes_stops'].shift(-1, fill_value=0)\n",
"distinct_trips_stops[\"pointer_stop_times_shift\"] = distinct_trips_stops['pointer_stop_times'].shift(-1, fill_value=0)\n",
"distinct_trips_stops.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"distinct_trips_stops['pointer_routes_stops'] = np.where((distinct_trips_stops[\"pointer_routes_stops\"] == distinct_trips_stops[\"pointer_routes_stops_shift\"]), None, distinct_trips_stops['pointer_routes_stops'])\n",
"distinct_trips_stops['pointer_stop_times'] = np.where((distinct_trips_stops[\"pointer_stop_times\"] == distinct_trips_stops[\"pointer_stop_times_shift\"]), None, distinct_trips_stops['pointer_stop_times'])\n"
]
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"n_Trips False\n",
"n_stops False\n",
"pointer_routes_stops False\n",
"pointer_stop_times False\n",
"pointer_routes_stops_shift False\n",
"pointer_stop_times_shift False\n",
"dtype: bool"
]
},
- "execution_count": 46,
+ "execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops.isna().any()"
]
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/routes_array_df_cyril.pkl','wb') as f: pickle.dump(distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']], f)"
]
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Int64Index: 1461 entries, 0 to 1460\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 n_Trips 1461 non-null int64 \n",
" 1 n_stops 1461 non-null int64 \n",
" 2 pointer_routes_stops 1461 non-null object\n",
" 3 pointer_stop_times 1461 non-null object\n",
" 4 pointer_routes_stops_shift 1461 non-null int64 \n",
" 5 pointer_stop_times_shift 1461 non-null int64 \n",
"dtypes: int64(4), object(2)\n",
"memory usage: 79.9+ KB\n"
]
}
],
"source": [
"distinct_trips_stops.info()"
]
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1, 2, 0, 0],\n",
" [1, 26, 2, 2],\n",
" [1, 8, 28, 28],\n",
" ...,\n",
" [1, 3, 15297, 260396],\n",
" [2, 16, 15300, 260399],\n",
" [1, 28, 15316, 260431]], dtype=object)"
]
},
- "execution_count": 49,
+ "execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"routes_array = distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']].to_numpy()\n",
"routes_array"
]
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1461"
]
},
- "execution_count": 50,
+ "execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(routes_array, 0)"
]
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/routes_array_cyril.pkl','wb') as f: pickle.dump(routes_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"RouteStops: [route0_stop0, route0_stop1,…, route1_stop0, route1_stop1,…, …]\n"
]
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" index \n",
" route_int \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 93111 \n",
" 0 \n",
" 298 \n",
" \n",
" \n",
" 1 \n",
" 93112 \n",
" 0 \n",
" 1295 \n",
" \n",
" \n",
" 2 \n",
" 93113 \n",
" 1 \n",
" 1222 \n",
" \n",
" \n",
" 3 \n",
" 93114 \n",
" 1 \n",
" 816 \n",
" \n",
" \n",
" 4 \n",
" 93115 \n",
" 1 \n",
" 778 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index route_int stop_int\n",
"0 93111 0 298\n",
"1 93112 0 1295\n",
"2 93113 1 1222\n",
"3 93114 1 816\n",
"4 93115 1 778"
]
},
- "execution_count": 52,
+ "execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops = stop_times_ordered.sort_values([\"route_int\", \"stop_sequence\"])\n",
"route_stops = route_stops[['route_int', 'stop_int']].drop_duplicates().reset_index()\n",
"route_stops.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 83,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 15344 entries, 0 to 15343\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype\n",
"--- ------ -------------- -----\n",
" 0 index 15344 non-null int64\n",
" 1 route_int 15344 non-null int64\n",
" 2 stop_int 15344 non-null int64\n",
"dtypes: int64(3)\n",
"memory usage: 359.8 KB\n"
]
}
],
"source": [
"route_stops.info()"
]
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1461"
]
},
- "execution_count": 54,
+ "execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops.route_int.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/route_stops_df_cyril.pkl','wb') as f: pickle.dump(route_stops, f)"
]
},
{
"cell_type": "code",
- "execution_count": 56,
+ "execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 298, 1295, 1222, ..., 1349, 1042, 549])"
]
},
- "execution_count": 56,
+ "execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops_array = route_stops.stop_int.to_numpy()\n",
"route_stops_array"
]
},
{
"cell_type": "code",
- "execution_count": 57,
+ "execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1407"
]
},
- "execution_count": 57,
+ "execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(np.unique(route_stops_array))"
]
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"15344"
]
},
- "execution_count": 58,
+ "execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(route_stops_array, 0)"
]
},
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15344,)"
]
},
- "execution_count": 59,
+ "execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops_array.shape"
]
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/route_stops_array_cyril.pkl','wb') as f: pickle.dump(route_stops_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Check if pointers are correct\n",
"It is fundamental that the indexes, that serve as pointers, in Routes are correct"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We start by looking at where the indexes for stop_times and route_stops diverge. This will allow us to change. We can see that Route stops should have a new route at 3 while stop_times should have it at 78, so we try with that"
]
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" n_Trips \n",
" n_stops \n",
" pointer_routes_stops \n",
" pointer_stop_times \n",
" pointer_routes_stops_shift \n",
" pointer_stop_times_shift \n",
" \n",
" \n",
" route_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 26 \n",
" 2 \n",
" 2 \n",
" 28 \n",
" 28 \n",
" \n",
" \n",
" 2 \n",
" 1 \n",
" 8 \n",
" 28 \n",
" 28 \n",
" 36 \n",
" 36 \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" 17 \n",
" 36 \n",
" 36 \n",
" 53 \n",
" 53 \n",
" \n",
" \n",
" 4 \n",
" 1 \n",
" 5 \n",
" 53 \n",
" 53 \n",
" 58 \n",
" 58 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Trips n_stops pointer_routes_stops pointer_stop_times \\\n",
"route_int \n",
"0 1 2 0 0 \n",
"1 1 26 2 2 \n",
"2 1 8 28 28 \n",
"3 1 17 36 36 \n",
"4 1 5 53 53 \n",
"\n",
" pointer_routes_stops_shift pointer_stop_times_shift \n",
"route_int \n",
"0 2 2 \n",
"1 28 28 \n",
"2 36 36 \n",
"3 53 53 \n",
"4 58 58 "
]
},
- "execution_count": 61,
+ "execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can check if the pointer indicates the routes index number. At the pointer_routes should indicate the first stop of a new route. We try with 3 to see if route_stops has a new route at this index. It does so it works"
]
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" index \n",
" route_int \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 93111 \n",
" 0 \n",
" 298 \n",
" \n",
" \n",
" 1 \n",
" 93112 \n",
" 0 \n",
" 1295 \n",
" \n",
" \n",
" 2 \n",
" 93113 \n",
" 1 \n",
" 1222 \n",
" \n",
" \n",
" 3 \n",
" 93114 \n",
" 1 \n",
" 816 \n",
" \n",
" \n",
" 4 \n",
" 93115 \n",
" 1 \n",
" 778 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index route_int stop_int\n",
"0 93111 0 298\n",
"1 93112 0 1295\n",
"2 93113 1 1222\n",
"3 93114 1 816\n",
"4 93115 1 778"
]
},
- "execution_count": 62,
+ "execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We go and see if stop_times has a new route at 78. It does, so it works"
]
},
{
"cell_type": "code",
- "execution_count": 63,
+ "execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_id \n",
" stop_id_general \n",
" trip_id \n",
" stop_id \n",
" arrival_time \n",
" departure_time \n",
" stop_sequence \n",
" stop_name \n",
" stop_lat \n",
" stop_lon \n",
" trip_headsign \n",
" trip_short_name \n",
" direction_id \n",
" departure_first_stop \n",
" route_int \n",
" stop_count \n",
" stop_int \n",
" route_desc \n",
- " sequence_shift_1 \n",
+ " departure_first_shift_1 \n",
" \n",
" \n",
" \n",
" \n",
" 75 \n",
" 26-66-j19-1 \n",
" 8591098 \n",
" 8.TA.26-66-j19-1.1.H \n",
" 8591098 \n",
- " 2020-05-22 18:04:00 \n",
- " 2020-05-22 18:04:00 \n",
+ " 2020-05-23 18:04:00 \n",
+ " 2020-05-23 18:04:00 \n",
" 6 \n",
" Zürich, Brunau/Mutschellenstr. \n",
" 47.355147 \n",
" 8.527141 \n",
" Zürich, Neubühl \n",
" 3762 \n",
" 0 \n",
" 17:55:00 \n",
" 1225 \n",
" 12 \n",
" 512 \n",
" Bus \n",
- " 7 \n",
+ " 17:55:00 \n",
" \n",
" \n",
" 76 \n",
" 26-66-j19-1 \n",
" 8591392 \n",
" 8.TA.26-66-j19-1.1.H \n",
" 8591392 \n",
- " 2020-05-22 18:05:00 \n",
- " 2020-05-22 18:05:00 \n",
+ " 2020-05-23 18:05:00 \n",
+ " 2020-05-23 18:05:00 \n",
" 7 \n",
" Zürich, Thujastrasse \n",
" 47.350187 \n",
" 8.527806 \n",
" Zürich, Neubühl \n",
" 3762 \n",
" 0 \n",
" 17:55:00 \n",
" 1225 \n",
" 12 \n",
" 403 \n",
" Bus \n",
- " 8 \n",
+ " 17:55:00 \n",
" \n",
" \n",
" 77 \n",
" 26-66-j19-1 \n",
" 8591216 \n",
" 8.TA.26-66-j19-1.1.H \n",
" 8591216 \n",
- " 2020-05-22 18:06:00 \n",
- " 2020-05-22 18:06:00 \n",
+ " 2020-05-23 18:06:00 \n",
+ " 2020-05-23 18:06:00 \n",
" 8 \n",
" Zürich, Jugendherberge \n",
" 47.348002 \n",
" 8.528210 \n",
" Zürich, Neubühl \n",
" 3762 \n",
" 0 \n",
" 17:55:00 \n",
" 1225 \n",
" 12 \n",
" 1375 \n",
" Bus \n",
- " 9 \n",
+ " 17:55:00 \n",
" \n",
" \n",
" 78 \n",
" 26-66-j19-1 \n",
" 8591279 \n",
" 8.TA.26-66-j19-1.1.H \n",
" 8591279 \n",
- " 2020-05-22 18:08:00 \n",
- " 2020-05-22 18:08:00 \n",
+ " 2020-05-23 18:08:00 \n",
+ " 2020-05-23 18:08:00 \n",
" 9 \n",
" Zürich, Morgental \n",
" 47.343948 \n",
" 8.530141 \n",
" Zürich, Neubühl \n",
" 3762 \n",
" 0 \n",
" 17:55:00 \n",
" 1225 \n",
" 12 \n",
" 1349 \n",
" Bus \n",
- " 10 \n",
+ " 17:55:00 \n",
" \n",
" \n",
" 79 \n",
" 26-66-j19-1 \n",
" 8591217 \n",
" 8.TA.26-66-j19-1.1.H \n",
" 8591217 \n",
- " 2020-05-22 18:09:00 \n",
- " 2020-05-22 18:09:00 \n",
+ " 2020-05-23 18:09:00 \n",
+ " 2020-05-23 18:09:00 \n",
" 10 \n",
" Zürich, Kalchbühlweg \n",
" 47.341818 \n",
" 8.531049 \n",
" Zürich, Neubühl \n",
" 3762 \n",
" 0 \n",
" 17:55:00 \n",
" 1225 \n",
" 12 \n",
" 1303 \n",
" Bus \n",
- " 11 \n",
+ " 17:55:00 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_id stop_id_general trip_id stop_id \\\n",
"75 26-66-j19-1 8591098 8.TA.26-66-j19-1.1.H 8591098 \n",
"76 26-66-j19-1 8591392 8.TA.26-66-j19-1.1.H 8591392 \n",
"77 26-66-j19-1 8591216 8.TA.26-66-j19-1.1.H 8591216 \n",
"78 26-66-j19-1 8591279 8.TA.26-66-j19-1.1.H 8591279 \n",
"79 26-66-j19-1 8591217 8.TA.26-66-j19-1.1.H 8591217 \n",
"\n",
" arrival_time departure_time stop_sequence \\\n",
- "75 2020-05-22 18:04:00 2020-05-22 18:04:00 6 \n",
- "76 2020-05-22 18:05:00 2020-05-22 18:05:00 7 \n",
- "77 2020-05-22 18:06:00 2020-05-22 18:06:00 8 \n",
- "78 2020-05-22 18:08:00 2020-05-22 18:08:00 9 \n",
- "79 2020-05-22 18:09:00 2020-05-22 18:09:00 10 \n",
+ "75 2020-05-23 18:04:00 2020-05-23 18:04:00 6 \n",
+ "76 2020-05-23 18:05:00 2020-05-23 18:05:00 7 \n",
+ "77 2020-05-23 18:06:00 2020-05-23 18:06:00 8 \n",
+ "78 2020-05-23 18:08:00 2020-05-23 18:08:00 9 \n",
+ "79 2020-05-23 18:09:00 2020-05-23 18:09:00 10 \n",
"\n",
" stop_name stop_lat stop_lon trip_headsign \\\n",
"75 Zürich, Brunau/Mutschellenstr. 47.355147 8.527141 Zürich, Neubühl \n",
"76 Zürich, Thujastrasse 47.350187 8.527806 Zürich, Neubühl \n",
"77 Zürich, Jugendherberge 47.348002 8.528210 Zürich, Neubühl \n",
"78 Zürich, Morgental 47.343948 8.530141 Zürich, Neubühl \n",
"79 Zürich, Kalchbühlweg 47.341818 8.531049 Zürich, Neubühl \n",
"\n",
" trip_short_name direction_id departure_first_stop route_int stop_count \\\n",
"75 3762 0 17:55:00 1225 12 \n",
"76 3762 0 17:55:00 1225 12 \n",
"77 3762 0 17:55:00 1225 12 \n",
"78 3762 0 17:55:00 1225 12 \n",
"79 3762 0 17:55:00 1225 12 \n",
"\n",
- " stop_int route_desc sequence_shift_1 \n",
- "75 512 Bus 7 \n",
- "76 403 Bus 8 \n",
- "77 1375 Bus 9 \n",
- "78 1349 Bus 10 \n",
- "79 1303 Bus 11 "
+ " stop_int route_desc departure_first_shift_1 \n",
+ "75 512 Bus 17:55:00 \n",
+ "76 403 Bus 17:55:00 \n",
+ "77 1375 Bus 17:55:00 \n",
+ "78 1349 Bus 17:55:00 \n",
+ "79 1303 Bus 17:55:00 "
]
},
- "execution_count": 63,
+ "execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered.loc[75:80].head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Stops: [[stop0_pointerRoutes, stop0_pointerTransfer], [stop1_pointerRoutes, stop1_pointerTransfer], …]"
]
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" index \n",
" route_int \n",
" stop_int \n",
" Unnamed: 0 \n",
" stop_id \n",
" stop_id2 \n",
" distance \n",
" Transfer_time_sec \n",
" stop_name \n",
" stop_name2 \n",
" stop_int_2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 93111 \n",
" 0 \n",
" 298 \n",
" 4536.0 \n",
" 8573205 \n",
" 8503016:0:2 \n",
" 0.101546 \n",
" 121.0 \n",
" Zürich Flughafen, Bahnhof \n",
" Zürich Flughafen \n",
" 1218.0 \n",
" \n",
" \n",
" 0 \n",
" 93111 \n",
" 0 \n",
" 298 \n",
" 4558.0 \n",
" 8573205:0:A \n",
" 8503016:0:2 \n",
" 0.118159 \n",
" 141.0 \n",
" Zürich Flughafen, Bahnhof \n",
" Zürich Flughafen \n",
" 1218.0 \n",
" \n",
" \n",
" 0 \n",
" 93111 \n",
" 0 \n",
" 298 \n",
" 4580.0 \n",
" 8573205:0:B \n",
" 8503016:0:2 \n",
" 0.104861 \n",
" 125.0 \n",
" Zürich Flughafen, Bahnhof \n",
" Zürich Flughafen \n",
" 1218.0 \n",
" \n",
" \n",
" 0 \n",
" 93111 \n",
" 0 \n",
" 298 \n",
" 4624.0 \n",
" 8573205:0:D \n",
" 8503016:0:2 \n",
" 0.103327 \n",
" 123.0 \n",
" Zürich Flughafen, Bahnhof \n",
" Zürich Flughafen \n",
" 1218.0 \n",
" \n",
" \n",
" 0 \n",
" 93111 \n",
" 0 \n",
" 298 \n",
" 4646.0 \n",
" 8573205:0:E \n",
" 8503016:0:2 \n",
" 0.101546 \n",
" 121.0 \n",
" Zürich Flughafen, Bahnhof \n",
" Zürich Flughafen \n",
" 1218.0 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index route_int stop_int Unnamed: 0 stop_id stop_id2 distance \\\n",
"0 93111 0 298 4536.0 8573205 8503016:0:2 0.101546 \n",
"0 93111 0 298 4558.0 8573205:0:A 8503016:0:2 0.118159 \n",
"0 93111 0 298 4580.0 8573205:0:B 8503016:0:2 0.104861 \n",
"0 93111 0 298 4624.0 8573205:0:D 8503016:0:2 0.103327 \n",
"0 93111 0 298 4646.0 8573205:0:E 8503016:0:2 0.101546 \n",
"\n",
" Transfer_time_sec stop_name stop_name2 stop_int_2 \n",
"0 121.0 Zürich Flughafen, Bahnhof Zürich Flughafen 1218.0 \n",
"0 141.0 Zürich Flughafen, Bahnhof Zürich Flughafen 1218.0 \n",
"0 125.0 Zürich Flughafen, Bahnhof Zürich Flughafen 1218.0 \n",
"0 123.0 Zürich Flughafen, Bahnhof Zürich Flughafen 1218.0 \n",
"0 121.0 Zürich Flughafen, Bahnhof Zürich Flughafen 1218.0 "
]
},
- "execution_count": 64,
+ "execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stops_join = route_stops.join(transfers.set_index(\"stop_int\"), how=\"left\", on=\"stop_int\").drop_duplicates()\n",
"stops_join.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1407"
]
},
- "execution_count": 65,
+ "execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stops_join.stop_int.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 96,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" n_Routes \n",
" n_Transfers \n",
" \n",
" \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 18 \n",
" 16 \n",
" \n",
" \n",
" 1 \n",
" 11 \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 23 \n",
" 4 \n",
" \n",
" \n",
" 3 \n",
" 6 \n",
" 6 \n",
" \n",
" \n",
" 4 \n",
" 6 \n",
" 0 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Routes n_Transfers\n",
"stop_int \n",
"0 18 16\n",
"1 11 2\n",
"2 23 4\n",
"3 6 6\n",
"4 6 0"
]
},
- "execution_count": 66,
+ "execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers = stops_join.sort_values(\"stop_int\").groupby([\"stop_int\"]).nunique().rename(columns={\"route_int\": \"n_Routes\", \"stop_int_2\": \"n_Transfers\"})\n",
"distinct_route_transfers = distinct_route_transfers[[\"n_Routes\", \"n_Transfers\"]].sort_index()\n",
"distinct_route_transfers.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 67,
+ "execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" n_Routes \n",
" n_Transfers \n",
" pointer_stop_routes \n",
" pointer_transfers \n",
" \n",
" \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 18 \n",
" 16 \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 1 \n",
" 11 \n",
" 2 \n",
" 18 \n",
" 16 \n",
" \n",
" \n",
" 2 \n",
" 23 \n",
" 4 \n",
" 29 \n",
" 18 \n",
" \n",
" \n",
" 3 \n",
" 6 \n",
" 6 \n",
" 52 \n",
" 22 \n",
" \n",
" \n",
" 4 \n",
" 6 \n",
" 0 \n",
" 58 \n",
" 28 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Routes n_Transfers pointer_stop_routes pointer_transfers\n",
"stop_int \n",
"0 18 16 0 0\n",
"1 11 2 18 16\n",
"2 23 4 29 18\n",
"3 6 6 52 22\n",
"4 6 0 58 28"
]
},
- "execution_count": 67,
+ "execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers['pointer_stop_routes'] = distinct_route_transfers.n_Routes.cumsum().shift(1, fill_value=0)\n",
"distinct_route_transfers['pointer_transfers'] = distinct_route_transfers.n_Transfers.cumsum().shift(1, fill_value=0)\n",
"distinct_route_transfers.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 68,
+ "execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" n_Routes \n",
" n_Transfers \n",
" pointer_stop_routes \n",
" pointer_transfers \n",
" pointer_stop_routes_shift \n",
" pointer_transfers_shift \n",
" \n",
" \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 18 \n",
" 16 \n",
" 0 \n",
" 0 \n",
" 18 \n",
" 16 \n",
" \n",
" \n",
" 1 \n",
" 11 \n",
" 2 \n",
" 18 \n",
" 16 \n",
" 29 \n",
" 18 \n",
" \n",
" \n",
" 2 \n",
" 23 \n",
" 4 \n",
" 29 \n",
" 18 \n",
" 52 \n",
" 22 \n",
" \n",
" \n",
" 3 \n",
" 6 \n",
" 6 \n",
" 52 \n",
" 22 \n",
" 58 \n",
" 28 \n",
" \n",
" \n",
" 4 \n",
" 6 \n",
" 0 \n",
" 58 \n",
" 28 \n",
" 64 \n",
" 28 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Routes n_Transfers pointer_stop_routes pointer_transfers \\\n",
"stop_int \n",
"0 18 16 0 0 \n",
"1 11 2 18 16 \n",
"2 23 4 29 18 \n",
"3 6 6 52 22 \n",
"4 6 0 58 28 \n",
"\n",
" pointer_stop_routes_shift pointer_transfers_shift \n",
"stop_int \n",
"0 18 16 \n",
"1 29 18 \n",
"2 52 22 \n",
"3 58 28 \n",
"4 64 28 "
]
},
- "execution_count": 68,
+ "execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers[\"pointer_stop_routes_shift\"] = distinct_route_transfers['pointer_stop_routes'].shift(-1, fill_value=0)\n",
"distinct_route_transfers[\"pointer_transfers_shift\"] = distinct_route_transfers['pointer_transfers'].shift(-1, fill_value=0)\n",
"distinct_route_transfers.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"distinct_route_transfers['pointer_stop_routes'] = np.where((distinct_route_transfers[\"pointer_stop_routes\"] == distinct_route_transfers[\"pointer_stop_routes_shift\"]), None, distinct_route_transfers['pointer_stop_routes'])\n",
"distinct_route_transfers['pointer_transfers'] = np.where((distinct_route_transfers[\"pointer_transfers\"] == distinct_route_transfers[\"pointer_transfers_shift\"]), None, distinct_route_transfers['pointer_transfers'])\n"
]
},
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"n_Routes False\n",
"n_Transfers False\n",
"pointer_stop_routes False\n",
"pointer_transfers True\n",
"pointer_stop_routes_shift False\n",
"pointer_transfers_shift False\n",
"dtype: bool"
]
},
- "execution_count": 70,
+ "execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers.isna().any()"
]
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"stops_df = distinct_route_transfers[['pointer_stop_routes', 'pointer_transfers']]"
]
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stops_df.pkl','wb') as f: pickle.dump(stops_df, f)"
]
},
{
"cell_type": "code",
- "execution_count": 73,
+ "execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 0],\n",
" [18, 16],\n",
" [29, 18],\n",
" ...,\n",
" [15329, 6322],\n",
" [15334, 6329],\n",
" [15339, 6334]], dtype=object)"
]
},
- "execution_count": 73,
+ "execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stops_array = stops_df.to_numpy()\n",
"stops_array"
]
},
{
"cell_type": "code",
- "execution_count": 74,
+ "execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1407"
]
},
- "execution_count": 74,
+ "execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(stops_array, 0)"
]
},
{
"cell_type": "code",
- "execution_count": 75,
+ "execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1407, 2)"
]
},
- "execution_count": 75,
+ "execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stops_array.shape"
]
},
{
"cell_type": "code",
- "execution_count": 76,
+ "execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stops_array_cyril.pkl','wb') as f: pickle.dump(stops_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"StopRoutes: [stop0_route1, stop0_route3, stop1_route1, stop2_route1, stop1_route4, …]"
]
},
{
"cell_type": "code",
- "execution_count": 77,
+ "execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" index \n",
" route_int \n",
" stop_int \n",
" stop_id \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 87163 \n",
" 82 \n",
" 0 \n",
" 8503088:0:22 \n",
" \n",
" \n",
" 1 \n",
" 95543 \n",
" 129 \n",
" 0 \n",
" 8503088:0:21 \n",
" \n",
" \n",
" 2 \n",
" 129332 \n",
" 187 \n",
" 0 \n",
" 8503088:0:21 \n",
" \n",
" \n",
" 3 \n",
" 73848 \n",
" 211 \n",
" 0 \n",
" 8503088:0:22 \n",
" \n",
" \n",
" 4 \n",
" 147285 \n",
" 251 \n",
" 0 \n",
" 8503088:0:21 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index route_int stop_int stop_id\n",
"0 87163 82 0 8503088:0:22\n",
"1 95543 129 0 8503088:0:21\n",
"2 129332 187 0 8503088:0:21\n",
"3 73848 211 0 8503088:0:22\n",
"4 147285 251 0 8503088:0:21"
]
},
- "execution_count": 77,
+ "execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes = stop_times_ordered[[\"route_int\", \"stop_int\", \"stop_id\"]].drop_duplicates().sort_values([\"stop_int\", \"route_int\"])\n",
"stop_routes = stop_routes.reset_index()\n",
"stop_routes.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 78,
+ "execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15486, 4)"
]
},
- "execution_count": 78,
+ "execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes.shape"
]
},
{
"cell_type": "code",
- "execution_count": 79,
+ "execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"249"
]
},
- "execution_count": 79,
+ "execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_curated.route_id.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 80,
+ "execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1461"
]
},
- "execution_count": 80,
+ "execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes.route_int.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 81,
+ "execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_routes_df_cyril.pkl','wb') as f: pickle.dump(stop_routes, f)"
]
},
{
"cell_type": "code",
- "execution_count": 82,
+ "execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 82, 129, 187, ..., 855, 977, 1087])"
]
},
- "execution_count": 82,
+ "execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes_array = stop_routes[\"route_int\"].to_numpy()\n",
"stop_routes_array"
]
},
{
"cell_type": "code",
- "execution_count": 83,
+ "execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"15486"
]
},
- "execution_count": 83,
+ "execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(stop_routes_array, 0)"
]
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15486,)"
]
},
- "execution_count": 84,
+ "execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes_array.shape"
]
},
{
"cell_type": "code",
- "execution_count": 85,
+ "execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_routes_array_cyril.pkl','wb') as f: pickle.dump(stop_routes_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Transfer: [[[stop0_nameTargetStop1, transferTime1], [stop0_nameTargetStop2, transferTime2],….], [stop1_nameTargetStop1, transferTime1], [stop1_nameTargetStop2, transferTime2],….],…]"
]
},
{
"cell_type": "code",
- "execution_count": 86,
+ "execution_count": 116,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9434"
]
},
- "execution_count": 86,
+ "execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers.stop_id.count()"
]
},
{
"cell_type": "code",
- "execution_count": 89,
+ "execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" stop_int \n",
" stop_int_2 \n",
" Transfer_time_sec \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 8 \n",
" \n",
" \n",
" 1 \n",
" 0 \n",
" 51 \n",
" 564 \n",
" \n",
" \n",
" 2 \n",
" 0 \n",
" 274 \n",
" 441 \n",
" \n",
" \n",
" 3 \n",
" 0 \n",
" 375 \n",
" 594 \n",
" \n",
" \n",
" 4 \n",
" 0 \n",
" 462 \n",
" 489 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_int stop_int_2 Transfer_time_sec\n",
"0 0 0 8\n",
"1 0 51 564\n",
"2 0 274 441\n",
"3 0 375 594\n",
"4 0 462 489"
]
},
- "execution_count": 89,
+ "execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfer_pandas = transfers[[\"stop_int\",\"stop_int_2\", \"Transfer_time_sec\"]].sort_values([\"stop_int\", \"stop_int_2\"]).drop_duplicates([\"stop_int\", \"stop_int_2\"])\n",
"transfer_pandas = transfer_pandas.reset_index(drop=True)\n",
"transfer_pandas.head()"
]
},
{
"cell_type": "code",
- "execution_count": 90,
+ "execution_count": 135,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1342"
]
},
- "execution_count": 90,
+ "execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfer_pandas.stop_int_2.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 91,
+ "execution_count": 136,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/transfer_df_cyril.pkl','wb') as f: pickle.dump(transfers.sort_values(\"stop_id\"), f)"
]
},
{
"cell_type": "code",
- "execution_count": 92,
+ "execution_count": 137,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0, 8],\n",
" [ 51, 564],\n",
" [ 274, 441],\n",
" ...,\n",
" [1120, 345],\n",
" [1266, 561],\n",
" [1406, 8]])"
]
},
- "execution_count": 92,
+ "execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfer_array = transfer_pandas[[\"stop_int_2\", \"Transfer_time_sec\"]].to_numpy()\n",
"transfer_array"
]
},
{
"cell_type": "code",
- "execution_count": 93,
+ "execution_count": 138,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/transfer_array_cyril.pkl','wb') as f: pickle.dump(transfer_array, f)"
]
},
{
"cell_type": "code",
- "execution_count": 94,
+ "execution_count": 139,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "6342"
+ "(6342, 2)"
]
},
- "execution_count": 94,
+ "execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "np.size(transfer_array, 0)"
+ "transfer_array.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Check if indexes in stops is correct"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We see first the pointers"
]
},
{
"cell_type": "code",
- "execution_count": 95,
+ "execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" pointer_stop_routes \n",
" pointer_transfers \n",
" \n",
" \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 0 \n",
" \n",
" \n",
" 1 \n",
" 18 \n",
" 16 \n",
" \n",
" \n",
" 2 \n",
" 29 \n",
" 18 \n",
" \n",
" \n",
" 3 \n",
" 52 \n",
" 22 \n",
" \n",
" \n",
" 4 \n",
" 58 \n",
" None \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pointer_stop_routes pointer_transfers\n",
"stop_int \n",
"0 0 0\n",
"1 18 16\n",
"2 29 18\n",
"3 52 22\n",
"4 58 None"
]
},
- "execution_count": 95,
+ "execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stops_df.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We see that at the index 16 there should be a new stop. we check and it is false"
]
},
{
"cell_type": "code",
- "execution_count": 97,
+ "execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" stop_int \n",
" stop_int_2 \n",
" Transfer_time_sec \n",
" \n",
" \n",
" \n",
" \n",
" 15 \n",
" 0 \n",
" 1289 \n",
" 460 \n",
" \n",
" \n",
" 16 \n",
" 1 \n",
" 814 \n",
" 267 \n",
" \n",
" \n",
" 17 \n",
" 1 \n",
" 1350 \n",
" 569 \n",
" \n",
" \n",
" 18 \n",
" 2 \n",
" 38 \n",
" 346 \n",
" \n",
" \n",
" 19 \n",
" 2 \n",
" 1062 \n",
" 413 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_int stop_int_2 Transfer_time_sec\n",
"15 0 1289 460\n",
"16 1 814 267\n",
"17 1 1350 569\n",
"18 2 38 346\n",
"19 2 1062 413"
]
},
- "execution_count": 97,
+ "execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfer_pandas.loc[15:20].head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We see that at index 18 we should have a new stop. we check and it true"
]
},
{
"cell_type": "code",
- "execution_count": 99,
+ "execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" index \n",
" route_int \n",
" stop_int \n",
" stop_id \n",
" \n",
" \n",
" \n",
" \n",
" 15 \n",
" 240179 \n",
" 1039 \n",
" 0 \n",
" 8503088:0:21 \n",
" \n",
" \n",
" 16 \n",
" 150919 \n",
" 1078 \n",
" 0 \n",
" 8503088:0:21 \n",
" \n",
" \n",
" 17 \n",
" 26670 \n",
" 1316 \n",
" 0 \n",
" 8503088:0:21 \n",
" \n",
" \n",
" 18 \n",
" 93857 \n",
" 18 \n",
" 1 \n",
" 8502508 \n",
" \n",
" \n",
" 19 \n",
" 236508 \n",
" 114 \n",
" 1 \n",
" 8502508 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index route_int stop_int stop_id\n",
"15 240179 1039 0 8503088:0:21\n",
"16 150919 1078 0 8503088:0:21\n",
"17 26670 1316 0 8503088:0:21\n",
"18 93857 18 1 8502508\n",
"19 236508 114 1 8502508"
]
},
- "execution_count": 99,
+ "execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes.loc[15:20].head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 106,
+ "execution_count": 126,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" index \n",
" route_int \n",
" stop_int \n",
" stop_id \n",
" \n",
" \n",
" \n",
" \n",
" 7024 \n",
" 108903 \n",
" 382 \n",
" 617 \n",
" 8503006:0:5 \n",
" \n",
" \n",
" 8204 \n",
" 108901 \n",
" 382 \n",
" 724 \n",
" 8503011:0:2 \n",
" \n",
" \n",
" 12599 \n",
" 108900 \n",
" 382 \n",
" 1138 \n",
" 8503010:0:2 \n",
" \n",
" \n",
" 12940 \n",
" 108902 \n",
" 382 \n",
" 1176 \n",
" 8503000:0:33 \n",
" \n",
" \n",
" 13590 \n",
" 108904 \n",
" 382 \n",
" 1218 \n",
" 8503016:0:3 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index route_int stop_int stop_id\n",
"7024 108903 382 617 8503006:0:5\n",
"8204 108901 382 724 8503011:0:2\n",
"12599 108900 382 1138 8503010:0:2\n",
"12940 108902 382 1176 8503000:0:33\n",
"13590 108904 382 1218 8503016:0:3"
]
},
- "execution_count": 106,
+ "execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes.loc[stop_routes['route_int'] == 382]"
]
},
{
"cell_type": "code",
- "execution_count": 108,
+ "execution_count": 127,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" index \n",
" route_int \n",
" stop_int \n",
" \n",
" \n",
" \n",
" \n",
" 4024 \n",
" 108900 \n",
" 382 \n",
" 1138 \n",
" \n",
" \n",
" 4025 \n",
" 108901 \n",
" 382 \n",
" 724 \n",
" \n",
" \n",
" 4026 \n",
" 108902 \n",
" 382 \n",
" 1176 \n",
" \n",
" \n",
" 4027 \n",
" 108903 \n",
" 382 \n",
" 617 \n",
" \n",
" \n",
" 4028 \n",
" 108904 \n",
" 382 \n",
" 1218 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index route_int stop_int\n",
"4024 108900 382 1138\n",
"4025 108901 382 724\n",
"4026 108902 382 1176\n",
"4027 108903 382 617\n",
"4028 108904 382 1218"
]
},
- "execution_count": 108,
+ "execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops.loc[route_stops['route_int'] == 382]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"read files as pickles"
]
},
{
"cell_type": "code",
- "execution_count": 102,
+ "execution_count": 128,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "FileNotFoundError",
+ "evalue": "[Errno 2] No such file or directory: '../data/stop_times_array.pkl'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../data/stop_times_array.pkl'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marrayname1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../data/stop_times_array.pkl'"
+ ]
+ }
+ ],
"source": [
- "with open('../data/stop_times_array.pkl','rb') as f: arrayname1 = pickle.load(f)"
+ "with open('../data/stop_times_array_cyril.pkl','rb') as f: arrayname1 = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"ename": "UnpicklingError",
"evalue": "invalid load key, 'v'.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mUnpicklingError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../data/routes_array.pkl'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marrayname2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mUnpicklingError\u001b[0m: invalid load key, 'v'."
]
}
],
"source": [
- "with open('../data/routes_array.pkl','rb') as f: arrayname2 = pickle.load(f)"
+ "with open('../data/routes_array_cyril.pkl','rb') as f: arrayname2 = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"ename": "UnpicklingError",
"evalue": "invalid load key, 'v'.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mUnpicklingError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../data/route_stops_array.pkl'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marrayname3\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mUnpicklingError\u001b[0m: invalid load key, 'v'."
]
}
],
"source": [
- "with open('../data/route_stops_array.pkl','rb') as f: arrayname3 = pickle.load(f)"
+ "with open('../data/route_stops_array_cyril.pkl','rb') as f: arrayname3 = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([['2020-05-22T07:00:00.000000000', '2020-05-22T07:01:00.000000000'],\n",
" ['2020-05-22T07:02:00.000000000', 'NaT'],\n",
" ['2020-05-22T07:00:00.000000000', '2020-05-22T07:00:00.000000000'],\n",
" ...,\n",
" ['2020-05-22T07:35:00.000000000', '2020-05-22T07:35:00.000000000'],\n",
" ['2020-05-22T07:36:00.000000000', '2020-05-22T07:36:00.000000000'],\n",
" ['2020-05-22T07:37:00.000000000', 'NaT']],\n",
" dtype='datetime64[ns]')"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arrayname1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"arrayname2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"arrayname3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
diff --git a/notebooks/From_Stop_id_to_stop_int.ipynb b/notebooks/From_Stop_id_to_stop_int.ipynb
new file mode 100644
index 0000000..1591163
--- /dev/null
+++ b/notebooks/From_Stop_id_to_stop_int.ipynb
@@ -0,0 +1,386 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# function to transfer from stop_id to stop_int\n",
+ "Note book to be deleted after incorporating function in MC raptor"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import pickle"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Import the stop_times dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with open('../data/stop_times_df_cyril.pkl','rb') as f: stop_times_df = pickle.load(f)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " route_id \n",
+ " stop_id_general \n",
+ " trip_id \n",
+ " stop_id \n",
+ " arrival_time \n",
+ " departure_time \n",
+ " stop_sequence \n",
+ " stop_name \n",
+ " stop_lat \n",
+ " stop_lon \n",
+ " trip_headsign \n",
+ " trip_short_name \n",
+ " direction_id \n",
+ " departure_first_stop \n",
+ " route_int \n",
+ " stop_count \n",
+ " stop_int \n",
+ " route_desc \n",
+ " departure_first_shift_1 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 93111 \n",
+ " 26-10-j19-1 \n",
+ " 8573205 \n",
+ " 1672.TA.26-10-j19-1.11.R \n",
+ " 8573205 \n",
+ " NaT \n",
+ " 2020-05-23 07:01:00 \n",
+ " 27 \n",
+ " Zürich Flughafen, Bahnhof \n",
+ " 47.450441 \n",
+ " 8.563729 \n",
+ " Zürich Flughafen, Fracht \n",
+ " 4096 \n",
+ " 1 \n",
+ " 07:01:00 \n",
+ " 0 \n",
+ " 2 \n",
+ " 298 \n",
+ " Tram \n",
+ " 07:01:00 \n",
+ " \n",
+ " \n",
+ " 93112 \n",
+ " 26-10-j19-1 \n",
+ " 8588553 \n",
+ " 1672.TA.26-10-j19-1.11.R \n",
+ " 8588553 \n",
+ " 2020-05-23 07:02:00 \n",
+ " NaT \n",
+ " 28 \n",
+ " Zürich Flughafen, Fracht \n",
+ " 47.452494 \n",
+ " 8.572057 \n",
+ " Zürich Flughafen, Fracht \n",
+ " 4096 \n",
+ " 1 \n",
+ " 07:01:00 \n",
+ " 0 \n",
+ " 2 \n",
+ " 1295 \n",
+ " Tram \n",
+ " 07:00:00 \n",
+ " \n",
+ " \n",
+ " 93113 \n",
+ " 26-13-j19-1 \n",
+ " 8576240 \n",
+ " 2064.TA.26-13-j19-1.24.H \n",
+ " 8576240 \n",
+ " NaT \n",
+ " 2020-05-23 07:00:00 \n",
+ " 5 \n",
+ " Zürich, Meierhofplatz \n",
+ " 47.402010 \n",
+ " 8.499374 \n",
+ " Zürich, Albisgütli \n",
+ " 1831 \n",
+ " 0 \n",
+ " 07:00:00 \n",
+ " 1 \n",
+ " 26 \n",
+ " 1222 \n",
+ " Tram \n",
+ " 07:00:00 \n",
+ " \n",
+ " \n",
+ " 93114 \n",
+ " 26-13-j19-1 \n",
+ " 8591353 \n",
+ " 2064.TA.26-13-j19-1.24.H \n",
+ " 8591353 \n",
+ " 2020-05-23 07:01:00 \n",
+ " 2020-05-23 07:01:00 \n",
+ " 6 \n",
+ " Zürich, Schwert \n",
+ " 47.399730 \n",
+ " 8.504611 \n",
+ " Zürich, Albisgütli \n",
+ " 1831 \n",
+ " 0 \n",
+ " 07:00:00 \n",
+ " 1 \n",
+ " 26 \n",
+ " 816 \n",
+ " Tram \n",
+ " 07:00:00 \n",
+ " \n",
+ " \n",
+ " 93115 \n",
+ " 26-13-j19-1 \n",
+ " 8591039 \n",
+ " 2064.TA.26-13-j19-1.24.H \n",
+ " 8591039 \n",
+ " 2020-05-23 07:02:00 \n",
+ " 2020-05-23 07:02:00 \n",
+ " 7 \n",
+ " Zürich, Alte Trotte \n",
+ " 47.397766 \n",
+ " 8.507252 \n",
+ " Zürich, Albisgütli \n",
+ " 1831 \n",
+ " 0 \n",
+ " 07:00:00 \n",
+ " 1 \n",
+ " 26 \n",
+ " 778 \n",
+ " Tram \n",
+ " 07:00:00 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " route_id stop_id_general trip_id stop_id \\\n",
+ "93111 26-10-j19-1 8573205 1672.TA.26-10-j19-1.11.R 8573205 \n",
+ "93112 26-10-j19-1 8588553 1672.TA.26-10-j19-1.11.R 8588553 \n",
+ "93113 26-13-j19-1 8576240 2064.TA.26-13-j19-1.24.H 8576240 \n",
+ "93114 26-13-j19-1 8591353 2064.TA.26-13-j19-1.24.H 8591353 \n",
+ "93115 26-13-j19-1 8591039 2064.TA.26-13-j19-1.24.H 8591039 \n",
+ "\n",
+ " arrival_time departure_time stop_sequence \\\n",
+ "93111 NaT 2020-05-23 07:01:00 27 \n",
+ "93112 2020-05-23 07:02:00 NaT 28 \n",
+ "93113 NaT 2020-05-23 07:00:00 5 \n",
+ "93114 2020-05-23 07:01:00 2020-05-23 07:01:00 6 \n",
+ "93115 2020-05-23 07:02:00 2020-05-23 07:02:00 7 \n",
+ "\n",
+ " stop_name stop_lat stop_lon \\\n",
+ "93111 Zürich Flughafen, Bahnhof 47.450441 8.563729 \n",
+ "93112 Zürich Flughafen, Fracht 47.452494 8.572057 \n",
+ "93113 Zürich, Meierhofplatz 47.402010 8.499374 \n",
+ "93114 Zürich, Schwert 47.399730 8.504611 \n",
+ "93115 Zürich, Alte Trotte 47.397766 8.507252 \n",
+ "\n",
+ " trip_headsign trip_short_name direction_id \\\n",
+ "93111 Zürich Flughafen, Fracht 4096 1 \n",
+ "93112 Zürich Flughafen, Fracht 4096 1 \n",
+ "93113 Zürich, Albisgütli 1831 0 \n",
+ "93114 Zürich, Albisgütli 1831 0 \n",
+ "93115 Zürich, Albisgütli 1831 0 \n",
+ "\n",
+ " departure_first_stop route_int stop_count stop_int route_desc \\\n",
+ "93111 07:01:00 0 2 298 Tram \n",
+ "93112 07:01:00 0 2 1295 Tram \n",
+ "93113 07:00:00 1 26 1222 Tram \n",
+ "93114 07:00:00 1 26 816 Tram \n",
+ "93115 07:00:00 1 26 778 Tram \n",
+ "\n",
+ " departure_first_shift_1 \n",
+ "93111 07:01:00 \n",
+ "93112 07:00:00 \n",
+ "93113 07:00:00 \n",
+ "93114 07:00:00 \n",
+ "93115 07:00:00 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_times_df.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1222"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p = 8576240\n",
+ "stop_times_df[stop_times_df.stop_id == str(p)].iloc[0].stop_int"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1222"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_int = stop_times_df[stop_times_df.stop_id == str(8576240)].iloc[0].stop_int\n",
+ "stop_int"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_stop_int_from_id(p):\n",
+ " stop_int = stop_times_df[stop_times_df.stop_id == str(p)].iloc[0].stop_int\n",
+ " return stop_int"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_stop_id_from_int(p):\n",
+ " stop_id = stop_times_df[stop_times_df.stop_int == p].iloc[0].stop_id\n",
+ " return stop_id"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1222"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "get_stop_int_from_id(p)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'8576240'"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "get_stop_id_from_int(1222)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/data_cyril.ipynb b/notebooks/data_cyril.ipynb
index a734be7..a6b574c 100644
--- a/notebooks/data_cyril.ipynb
+++ b/notebooks/data_cyril.ipynb
@@ -1,4236 +1,5004 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Current session configs: {'conf': {'spark.app.name': 'lgptguys_final'}, 'kind': 'pyspark'} "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
- "ID YARN Application ID Kind State Spark UI Driver log Current session? 7676 application_1589299642358_2172 pyspark idle Link Link 7684 application_1589299642358_2180 pyspark idle Link Link 7686 application_1589299642358_2182 pyspark idle Link Link 7691 application_1589299642358_2187 pyspark idle Link Link 7694 application_1589299642358_2190 pyspark idle Link Link 7699 application_1589299642358_2195 pyspark busy Link Link 7700 application_1589299642358_2196 pyspark idle Link Link 7701 application_1589299642358_2197 pyspark idle Link Link 7704 application_1589299642358_2200 pyspark idle Link Link 7705 application_1589299642358_2201 pyspark idle Link Link 7711 application_1589299642358_2207 pyspark idle Link Link 7718 application_1589299642358_2214 pyspark idle Link Link 7719 application_1589299642358_2215 pyspark idle Link Link 7720 application_1589299642358_2216 pyspark idle Link Link 7721 application_1589299642358_2217 pyspark idle Link Link 7722 application_1589299642358_2218 pyspark busy Link Link 7724 application_1589299642358_2220 pyspark idle Link Link 7725 application_1589299642358_2221 pyspark busy Link Link 7727 application_1589299642358_2223 pyspark idle Link Link 7728 application_1589299642358_2224 pyspark busy Link Link
"
+ "ID YARN Application ID Kind State Spark UI Driver log Current session? 7933 application_1589299642358_2451 pyspark idle Link Link 7946 application_1589299642358_2464 pyspark idle Link Link 7951 application_1589299642358_2469 pyspark idle Link Link 7958 application_1589299642358_2476 pyspark idle Link Link 7959 application_1589299642358_2477 pyspark idle Link Link 7962 application_1589299642358_2480 pyspark idle Link Link 7965 application_1589299642358_2485 pyspark idle Link Link 7968 application_1589299642358_2488 pyspark idle Link Link 7971 application_1589299642358_2491 pyspark idle Link Link 7972 application_1589299642358_2492 pyspark idle Link Link 7973 application_1589299642358_2493 pyspark idle Link Link 7975 application_1589299642358_2495 pyspark idle Link Link 7977 application_1589299642358_2497 pyspark idle Link Link 7978 application_1589299642358_2498 pyspark busy Link Link 7980 application_1589299642358_2501 pyspark idle Link Link 7981 application_1589299642358_2502 pyspark idle Link Link 7982 application_1589299642358_2503 pyspark busy Link Link 7983 application_1589299642358_2504 pyspark idle Link Link 7984 application_1589299642358_2505 pyspark busy Link Link "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%configure\n",
"{\"conf\": {\n",
" \"spark.app.name\": \"lgptguys_final\"\n",
"}}"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting Spark application\n"
]
},
{
"data": {
"text/html": [
"\n",
- "ID YARN Application ID Kind State Spark UI Driver log Current session? 7729 application_1589299642358_2225 pyspark idle Link Link ✔
"
+ "ID YARN Application ID Kind State Spark UI Driver log Current session? 7985 application_1589299642358_2506 pyspark idle Link Link ✔ "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SparkSession available as 'spark'.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"An error was encountered:\n",
"unknown magic command '%spark'\n",
"UnknownMagic: unknown magic command '%spark'\n",
"\n"
]
}
],
"source": [
"# Initialization\n",
"%%spark"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below, we pre-process the data to generate the files required to run RAPTOR.\n",
"\n",
"The main reasoning behind this way of cleaning the data is the following:\n",
"**Given a cleaned stop_times.txt file, it is possible to reconstruct everything required to run RAPTOR** to the exception of footpaths. In particular, routes are reconstructed from the cleaned stop_times.txt and not from routes.txt.\n",
"\n",
"We use the following strategy:\n",
"\n",
"- 1) Filter out stops out of 15km of ZH HB\n",
" - Done on stops\n",
"- 2) Merge stops that share a parent stop to the first 7 characters of the stop name\n",
" - Done on stops (add a general_stop column)\n",
" - Used as an input for stopTimes (add a general_stop column). At this point, stop_times contains only stops within 15km of ZH HB\n",
" \n",
"- 3) keep only services that run each day of the business week:\n",
" - Obtain the list of services from calendar\n",
" - Serves as an input to filter trips\n",
" - Serves as an input to filter stop_times\n",
"- 4) keep only stop times between 7am and 7pm\n",
" - Do that on stop_times\n",
" \n",
"- 5) Find unique trips, based on the stops sequence and the departure times sequence\n",
" - sort by trip, arrival_time (which is the same as stop_sequence)\n",
" - build a (sorted) all_stops column for each trip\n",
" - build a (sorted) all_departure_times column for each trip\n",
" - keep only one trip that has the same all_stops and all_departure_times column\n",
"- 6) building routes based on unique trips\n",
" - order unique_trips by stop_sequence, earliest departure time\n",
" - each window with the same stop_sequence gets a unique routeID\n",
" \n",
"- 7) giving unique integer indices to stops\n",
" - get unique general (parent) stop names from stop times\n",
" - assign an index with zipWithIndex\n",
" - stop_times -> inner join on result\n",
"- 8) indicating transport type from the route\n",
" - inner join stop_times with routes.txt on route_id"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from geopy.distance import great_circle\n",
"from pyspark.sql.functions import *\n",
"import numpy as np\n",
"import pandas as pd\n",
"from geopy.distance import great_circle\n",
"from pyspark.sql.types import DoubleType\n",
"from pyspark.sql.types import DateType"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1) Filtering out stops not within 15km of ZH HB"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+--------------------+----------------+-----------------+-------------+--------------+\n",
"|stop_id| stop_name| stop_lat| stop_lon|location_type|parent_station|\n",
"+-------+--------------------+----------------+-----------------+-------------+--------------+\n",
"|1322000| Altoggio|46.1672513851495| 8.345807131427| null| null|\n",
"|1322001| Antronapiana| 46.060121674738| 8.11361957990831| null| null|\n",
"|1322002| Anzola|45.9898698225697| 8.34571729989858| null| null|\n",
"|1322003| Baceno|46.2614983591677| 8.31925293162473| null| null|\n",
"|1322004|Beura Cardezza, c...|46.0790618438814| 8.29927439970313| null| null|\n",
"|1322005|Bognanco, T. Vill...|46.1222963432243| 8.21077237789936| null| null|\n",
"|1322006| Boschetto|46.0656504576122| 8.26113193273411| null| null|\n",
"|1322007| Cadarese|46.2978807772998| 8.3626325767009| null| null|\n",
"|1322010| Campioli|45.9695691829797| 8.04585965801774| null| null|\n",
"|1322011| Cascate del Toce|46.4091810825782| 8.4117524564434| null| null|\n",
"|1322012| Castiglione|46.0205875326422| 8.2148866619012| null| null|\n",
"|1322013| Ceppo Morelli|45.9710364221151| 8.06992552448265| null| null|\n",
"|1322014|Chiesa (Val Forma...|46.3530849443472| 8.42787721579558| null| null|\n",
"|1322015| Cosasca di Trontano|46.0967496675661| 8.31182386422403| null| null|\n",
"|1322016| Cresti|46.0664046229574| 8.2328978833503| null| null|\n",
"|1322017| Crevoladossola|46.1562758593614| 8.30343359946918| null| null|\n",
"|1322018| Crodo, Bagni|46.2141837457637| 8.32131905677849| null| null|\n",
"|1322019| Crodo, paese| 46.224016613202| 8.3235648449891| null| null|\n",
"|1322021| Croppo di Trontano|46.1103590121829| 8.31194064521098| null| null|\n",
"|1322022| Crusinallo|45.6945937446539|0.595870494345107| null| null|\n",
"+-------+--------------------+----------------+-----------------+-------------+--------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stops = spark.read.csv(\"/data/sbb/timetables/csv/stops/2019/05/14/stops.txt\", header=True, sep = \",\")\n",
"stops.show()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"30631"
]
}
],
"source": [
"stops.count()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#defining udf function\n",
"@udf(\"float\")\n",
"def great_circle_udf(x, y):\n",
" return great_circle(x, y).kilometers"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+--------------------+----------------+----------------+-------------+--------------+\n",
"| stop_id| stop_name| stop_lat| stop_lon|location_type|parent_station|\n",
"+-----------+--------------------+----------------+----------------+-------------+--------------+\n",
"| 8500926|Oetwil a.d.L., Sc...|47.4236270123012| 8.4031825286317| null| null|\n",
"| 8502186|Dietikon Stoffelbach|47.3934058321612|8.39894248049007| null| 8502186P|\n",
"|8502186:0:1|Dietikon Stoffelbach|47.3934666445388|8.39894248049007| null| 8502186P|\n",
"|8502186:0:2|Dietikon Stoffelbach|47.3935274568464|8.39894248049007| null| 8502186P|\n",
"| 8502186P|Dietikon Stoffelbach|47.3934058321612|8.39894248049007| 1| null|\n",
"| 8502187|Rudolfstetten Hof...|47.3646945560768|8.37709545277724| null| 8502187P|\n",
"|8502187:0:1|Rudolfstetten Hof...|47.3647554015789|8.37709545277724| null| 8502187P|\n",
"|8502187:0:2|Rudolfstetten Hof...|47.3648162470108|8.37709545277724| null| 8502187P|\n",
"| 8502187P|Rudolfstetten Hof...|47.3646945560768|8.37709545277724| 1| null|\n",
"| 8502188| Zufikon Hammergut|47.3558347019549|8.35472740219955| null| 8502188P|\n",
"|8502188:0:1| Zufikon Hammergut|47.3558955576756|8.35472740219955| null| 8502188P|\n",
"|8502188:0:2| Zufikon Hammergut|47.3559564133261|8.35472740219955| null| 8502188P|\n",
"| 8502188P| Zufikon Hammergut|47.3558347019549|8.35472740219955| 1| null|\n",
"| 8502208| Horgen Oberdorf|47.2587475534877|8.58979854578067| null| 8502208P|\n",
"|8502208:0:2| Horgen Oberdorf|47.2589304560815|8.58979854578067| null| 8502208P|\n",
"|8502208:0:3| Horgen Oberdorf|47.2588085210892|8.58979854578067| null| 8502208P|\n",
"|8502208:0:4| Horgen Oberdorf|47.2588694886204|8.58979854578067| null| 8502208P|\n",
"| 8502208P| Horgen Oberdorf|47.2587475534877|8.58979854578067| 1| null|\n",
"| 8502209| Oberrieden Dorf|47.2767238569466| 8.577635356832| null| 8502209P|\n",
"|8502209:0:1| Oberrieden Dorf|47.2768457506749| 8.577635356832| null| 8502209P|\n",
"+-----------+--------------------+----------------+----------------+-------------+--------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"# Zurich HB coordinates\n",
"zurich_geo = (47.378177, 8.540192)\n",
"\n",
"#transforming Zurich HB coordinates in a spark dataframe column object\n",
"zurich_geo_col = struct(lit(zurich_geo[0]), lit(zurich_geo[1]))\n",
"\n",
"#applying filter function based on distance\n",
"stops_15km = stops.filter(great_circle_udf(zurich_geo_col, struct(stops.stop_lat, stops.stop_lon)) < 15)\n",
"stops_15km.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1883"
]
}
],
"source": [
"stops_15km.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2) Merging stops that share a parent stop"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+------------+--------------------+----------------+----------------+-------------+--------------+\n",
"| stop_id| stop_name| stop_lat| stop_lon|location_type|parent_station|\n",
"+------------+--------------------+----------------+----------------+-------------+--------------+\n",
"| 8502186|Dietikon Stoffelbach|47.3934058321612|8.39894248049007| null| 8502186P|\n",
"| 8502186:0:1|Dietikon Stoffelbach|47.3934666445388|8.39894248049007| null| 8502186P|\n",
"| 8502186:0:2|Dietikon Stoffelbach|47.3935274568464|8.39894248049007| null| 8502186P|\n",
"| 8502187|Rudolfstetten Hof...|47.3646945560768|8.37709545277724| null| 8502187P|\n",
"| 8502187:0:1|Rudolfstetten Hof...|47.3647554015789|8.37709545277724| null| 8502187P|\n",
"| 8502187:0:2|Rudolfstetten Hof...|47.3648162470108|8.37709545277724| null| 8502187P|\n",
"| 8502188| Zufikon Hammergut|47.3558347019549|8.35472740219955| null| 8502188P|\n",
"| 8502188:0:1| Zufikon Hammergut|47.3558955576756|8.35472740219955| null| 8502188P|\n",
"| 8502188:0:2| Zufikon Hammergut|47.3559564133261|8.35472740219955| null| 8502188P|\n",
"| 8502208| Horgen Oberdorf|47.2587475534877|8.58979854578067| null| 8502208P|\n",
"| 8502208:0:2| Horgen Oberdorf|47.2589304560815|8.58979854578067| null| 8502208P|\n",
"| 8502208:0:3| Horgen Oberdorf|47.2588085210892|8.58979854578067| null| 8502208P|\n",
"| 8502208:0:4| Horgen Oberdorf|47.2588694886204|8.58979854578067| null| 8502208P|\n",
"| 8502209| Oberrieden Dorf|47.2767238569466| 8.577635356832| null| 8502209P|\n",
"| 8502209:0:1| Oberrieden Dorf|47.2768457506749| 8.577635356832| null| 8502209P|\n",
"| 8502209:0:2| Oberrieden Dorf|47.2767848038458| 8.577635356832| null| 8502209P|\n",
"| 8502220| Urdorf|47.3908820565997|8.43471339510869| null| 8502220P|\n",
"| 8502220:0:1| Urdorf|47.3909428718897|8.43471339510869| null| 8502220P|\n",
"| 8502220:0:2| Urdorf|47.3910036871096|8.43471339510869| null| 8502220P|\n",
"| 8502221| Birmensdorf ZH|47.3574351840587|8.43754308825406| null| 8502221P|\n",
"| 8502221:0:1| Birmensdorf ZH|47.3575568917382|8.43754308825406| null| 8502221P|\n",
"| 8502221:0:2| Birmensdorf ZH|47.3574960379336|8.43754308825406| null| 8502221P|\n",
"| 8502222| Bonstetten-Wettswil|47.3258973534906|8.46817563944679| null| 8502222P|\n",
"| 8502222:0:2| Bonstetten-Wettswil| 47.325958243729|8.46817563944679| null| 8502222P|\n",
"| 8502222:0:3| Bonstetten-Wettswil|47.3260191338971|8.46817563944679| null| 8502222P|\n",
"| 8502223| Hedingen|47.2987820476816|8.44595131931459| null| 8502223P|\n",
"| 8502223:0:1| Hedingen|47.2988429691695|8.44595131931459| null| 8502223P|\n",
"| 8502223:0:2| Hedingen|47.2989038905872|8.44595131931459| null| 8502223P|\n",
"| 8502224| Affoltern am Albis|47.2760656259617|8.44658014001356| null| 8502224P|\n",
"| 8502224:0:1| Affoltern am Albis|47.2761875212063|8.44658014001356| null| 8502224P|\n",
"| 8502224:0:2| Affoltern am Albis|47.2762484687233|8.44658014001356| null| 8502224P|\n",
"| 8502224:0:3| Affoltern am Albis|47.2761265736191|8.44658014001356| null| 8502224P|\n",
"| 8502229:0:1| Urdorf Weihermatt|47.3810351357388|8.43032961652157| null| 8502229P|\n",
"| 8502229:0:2| Urdorf Weihermatt|47.3810959623905|8.43032961652157| null| 8502229P|\n",
"| 8502273:0:1| Bremgarten|47.3519945640447| 8.3474779978557| null| 8502273P|\n",
"| 8502273:0:2| Bremgarten|47.3519337038252| 8.3474779978557| null| 8502273P|\n",
"| 8502276:0:1| Berikon-Widen|47.3622485087742|8.36679177646695| null| 8502276P|\n",
"| 8502276:0:2| Berikon-Widen|47.3623093570976|8.36679177646695| null| 8502276P|\n",
"| 8502758:0:A|Hausen am Albis, ...|47.2448085174147| 8.5329801040522| null| 8502758P|\n",
"| 8502758:0:B|Hausen am Albis, ...|47.2448695010648| 8.5329801040522| null| 8502758P|\n",
"| 8502758:0:C|Hausen am Albis, ...|47.2449304846447| 8.5329801040522| null| 8502758P|\n",
"| 8503000| Zürich HB|47.3781762039461|8.54019357578468| null| 8503000P|\n",
"|8503000:0:10| Zürich HB|47.3794536181612|8.54019357578468| null| 8503000P|\n",
"|8503000:0:11| Zürich HB|47.3795144466376|8.54019357578468| null| 8503000P|\n",
"|8503000:0:12| Zürich HB|47.3786020121232|8.54019357578468| null| 8503000P|\n",
"|8503000:0:13| Zürich HB|47.3785411825942|8.54019357578468| null| 8503000P|\n",
"|8503000:0:14| Zürich HB|47.3783586935859|8.54019357578468| null| 8503000P|\n",
"|8503000:0:15| Zürich HB|47.3784803529949|8.54019357578468| null| 8503000P|\n",
"|8503000:0:16| Zürich HB|47.3784195233255|8.54019357578468| null| 8503000P|\n",
"|8503000:0:17| Zürich HB| 47.379271132311|8.54019357578468| null| 8503000P|\n",
"+------------+--------------------+----------------+----------------+-------------+--------------+\n",
"only showing top 50 rows"
]
}
],
"source": [
"stops_15km.filter(stops_15km.parent_station.isNotNull()).show(50)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+--------------------+----------------+----------------+-------------+--------------+\n",
"| stop_id| stop_name| stop_lat| stop_lon|location_type|parent_station|\n",
"+-----------+--------------------+----------------+----------------+-------------+--------------+\n",
"| 8500926|Oetwil a.d.L., Sc...|47.4236270123012| 8.4031825286317| null| null|\n",
"| 8502186P|Dietikon Stoffelbach|47.3934058321612|8.39894248049007| 1| null|\n",
"| 8502187P|Rudolfstetten Hof...|47.3646945560768|8.37709545277724| 1| null|\n",
"| 8502188P| Zufikon Hammergut|47.3558347019549|8.35472740219955| 1| null|\n",
"| 8502208P| Horgen Oberdorf|47.2587475534877|8.58979854578067| 1| null|\n",
"| 8502209P| Oberrieden Dorf|47.2767238569466| 8.577635356832| 1| null|\n",
"| 8502220P| Urdorf|47.3908820565997|8.43471339510869| 1| null|\n",
"| 8502221P| Birmensdorf ZH|47.3574351840587|8.43754308825406| 1| null|\n",
"| 8502222P| Bonstetten-Wettswil|47.3258973534906|8.46817563944679| 1| null|\n",
"| 8502223P| Hedingen|47.2987820476816|8.44595131931459| 1| null|\n",
"| 8502224P| Affoltern am Albis|47.2760656259617|8.44658014001356| 1| null|\n",
"| 8502229P| Urdorf Weihermatt|47.3809743090169|8.43032961652157| 1| null|\n",
"| 8502268| Zufikon Belvédère|47.3575812332404|8.35923694492646| null| null|\n",
"|8502268:0:1| Zufikon Belvédère|47.3576420869468|8.35923694492646| null| null|\n",
"| 8502270| Bergfrieden|47.3977111751049|8.39908621093555| null| null|\n",
"|8502270:0:1| Bergfrieden|47.3977719825142|8.39908621093555| null| null|\n",
"| 8502273P| Bremgarten|47.3518728435356| 8.3474779978557| 1| null|\n",
"| 8502274| Zufikon|47.3525240449924|8.35470943589386| null| null|\n",
"|8502274:0:1| Zufikon|47.3525849045311|8.35470943589386| null| null|\n",
"| 8502275| Widen Heinrüti|47.3620598785256|8.35486214949218| null| null|\n",
"|8502275:0:1| Widen Heinrüti|47.3621207270666|8.35486214949218| null| null|\n",
"| 8502276P| Berikon-Widen|47.3621876603806|8.36679177646695| 1| null|\n",
"| 8502277| Rudolfstetten|47.3700243558558|8.38180262486668| null| null|\n",
"|8502277:0:1| Rudolfstetten| 47.37008519521|8.38180262486668| null| null|\n",
"| 8502278| Reppischhof|47.3847211041004| 8.3963463493186| null| null|\n",
"|8502278:0:1| Reppischhof|47.3847819264993| 8.3963463493186| null| null|\n",
"| 8502495|Zürich Wollishofe...|47.3476976601166|8.53331248070737| null| null|\n",
"| 8502508|Spreitenbach, Rai...|47.4154457211288|8.37718528430566| null| null|\n",
"| 8502553|Unterlunkhofen, B...|47.3221585583935| 8.380473118246| null| null|\n",
"| 8502559|Waldegg, Birmensd...|47.3683025730349|8.46346846735736| null| null|\n",
"| 8502560| Berikon, Kirche|47.3510512227562|8.37141810018081| null| null|\n",
"| 8502570| Rottenschwil, Hecht|47.3190589331876| 8.372091836644| null| null|\n",
"| 8502572|Zürich, Goldbrunn...|47.3702920484894|8.51391785372053| null| null|\n",
"| 8502574|Affoltern a. A., ...|47.2784669105587|8.45910265507593| null| null|\n",
"| 8502575| Widen, Dorf|47.3675724714769|8.36359377405504| null| null|\n",
"| 8502750| Bellikon, Post|47.3895076123306| 8.3433726970067| null| null|\n",
"| 8502758P|Hausen am Albis, ...|47.2447475336943| 8.5329801040522| 1| null|\n",
"| 8502762|Langnau a.A., Alb...|47.2761509526624|8.52069115096373| null| null|\n",
"| 8502763|Hausen am Albis, ...|47.2629785384073|8.51724162027223| null| null|\n",
"| 8502764|Hausen am Albis, ...|47.2590158104091| 8.5220386238901| null| null|\n",
"| 8502771|Aeugst am Albis, ...|47.2678309520619|8.48534244452871| null| null|\n",
"| 8502776|Gattikon, Obstgarten|47.2847255843239| 8.5511170896411| null| null|\n",
"| 8502779| Ottenbach, Post|47.2816481400898|8.40452101840523| null| null|\n",
"| 8502876|Aesch ZH, Gemeind...|47.3382079319594|8.43870191497073| null| null|\n",
"| 8502879| Jonen, Post|47.2961806346557|8.39551091610425| null| null|\n",
"| 8502883|Zwillikon, Gemein...|47.2873945890065|8.43218912915996| null| null|\n",
"| 8502885|Bonstetten, Dorfp...|47.3150882242354|8.46778038072173| null| null|\n",
"| 8502894|Oberwil-Lieli, Ob...|47.3371669407057|8.38639301596917| null| null|\n",
"| 8502950|Birmensdorf ZH, Z...|47.3539359682156|8.43717477898752| null| null|\n",
"| 8502953|Affoltern a. A., ...|47.2812215495339|8.45441344929217| null| null|\n",
"+-----------+--------------------+----------------+----------------+-------------+--------------+\n",
"only showing top 50 rows"
]
}
],
"source": [
"stops_15km.filter(stops_15km.parent_station.isNull()).show(50)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It is clear that parent stops were not properly assigned for all stops (e.g Zufikon Belvédère where there is a platform stop, but no parent stop). Thus, we create a new column `stop_id_general` that contains only the 7 first characters of `stop_id`"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+--------------------+----------------+----------------+-------------+--------------+---------------+\n",
"| stop_id| stop_name| stop_lat| stop_lon|location_type|parent_station|stop_id_general|\n",
"+-----------+--------------------+----------------+----------------+-------------+--------------+---------------+\n",
"| 8500926|Oetwil a.d.L., Sc...|47.4236270123012| 8.4031825286317| null| null| 8500926|\n",
"| 8502186|Dietikon Stoffelbach|47.3934058321612|8.39894248049007| null| 8502186P| 8502186|\n",
"|8502186:0:1|Dietikon Stoffelbach|47.3934666445388|8.39894248049007| null| 8502186P| 8502186|\n",
"|8502186:0:2|Dietikon Stoffelbach|47.3935274568464|8.39894248049007| null| 8502186P| 8502186|\n",
"| 8502186P|Dietikon Stoffelbach|47.3934058321612|8.39894248049007| 1| null| 8502186|\n",
"| 8502187|Rudolfstetten Hof...|47.3646945560768|8.37709545277724| null| 8502187P| 8502187|\n",
"|8502187:0:1|Rudolfstetten Hof...|47.3647554015789|8.37709545277724| null| 8502187P| 8502187|\n",
"|8502187:0:2|Rudolfstetten Hof...|47.3648162470108|8.37709545277724| null| 8502187P| 8502187|\n",
"| 8502187P|Rudolfstetten Hof...|47.3646945560768|8.37709545277724| 1| null| 8502187|\n",
"| 8502188| Zufikon Hammergut|47.3558347019549|8.35472740219955| null| 8502188P| 8502188|\n",
"|8502188:0:1| Zufikon Hammergut|47.3558955576756|8.35472740219955| null| 8502188P| 8502188|\n",
"|8502188:0:2| Zufikon Hammergut|47.3559564133261|8.35472740219955| null| 8502188P| 8502188|\n",
"| 8502188P| Zufikon Hammergut|47.3558347019549|8.35472740219955| 1| null| 8502188|\n",
"| 8502208| Horgen Oberdorf|47.2587475534877|8.58979854578067| null| 8502208P| 8502208|\n",
"|8502208:0:2| Horgen Oberdorf|47.2589304560815|8.58979854578067| null| 8502208P| 8502208|\n",
"|8502208:0:3| Horgen Oberdorf|47.2588085210892|8.58979854578067| null| 8502208P| 8502208|\n",
"|8502208:0:4| Horgen Oberdorf|47.2588694886204|8.58979854578067| null| 8502208P| 8502208|\n",
"| 8502208P| Horgen Oberdorf|47.2587475534877|8.58979854578067| 1| null| 8502208|\n",
"| 8502209| Oberrieden Dorf|47.2767238569466| 8.577635356832| null| 8502209P| 8502209|\n",
"|8502209:0:1| Oberrieden Dorf|47.2768457506749| 8.577635356832| null| 8502209P| 8502209|\n",
"+-----------+--------------------+----------------+----------------+-------------+--------------+---------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stops_15km = stops_15km.withColumn('stop_id_general',col('stop_id').substr(1, 7))\n",
"stops_15km.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we filter stop_times with the 15km radius, and add the stop_id_general column"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+---------------+--------------------+----------------+----------------+\n",
"| stop_id|stop_id_general| stop_name| stop_lat| stop_lon|\n",
"+-----------+---------------+--------------------+----------------+----------------+\n",
"| 8500926| 8500926|Oetwil a.d.L., Sc...|47.4236270123012| 8.4031825286317|\n",
"| 8502186| 8502186|Dietikon Stoffelbach|47.3934058321612|8.39894248049007|\n",
"|8502186:0:1| 8502186|Dietikon Stoffelbach|47.3934666445388|8.39894248049007|\n",
"|8502186:0:2| 8502186|Dietikon Stoffelbach|47.3935274568464|8.39894248049007|\n",
"| 8502186P| 8502186|Dietikon Stoffelbach|47.3934058321612|8.39894248049007|\n",
"| 8502187| 8502187|Rudolfstetten Hof...|47.3646945560768|8.37709545277724|\n",
"|8502187:0:1| 8502187|Rudolfstetten Hof...|47.3647554015789|8.37709545277724|\n",
"|8502187:0:2| 8502187|Rudolfstetten Hof...|47.3648162470108|8.37709545277724|\n",
"| 8502187P| 8502187|Rudolfstetten Hof...|47.3646945560768|8.37709545277724|\n",
"| 8502188| 8502188| Zufikon Hammergut|47.3558347019549|8.35472740219955|\n",
"|8502188:0:1| 8502188| Zufikon Hammergut|47.3558955576756|8.35472740219955|\n",
"|8502188:0:2| 8502188| Zufikon Hammergut|47.3559564133261|8.35472740219955|\n",
"| 8502188P| 8502188| Zufikon Hammergut|47.3558347019549|8.35472740219955|\n",
"| 8502208| 8502208| Horgen Oberdorf|47.2587475534877|8.58979854578067|\n",
"|8502208:0:2| 8502208| Horgen Oberdorf|47.2589304560815|8.58979854578067|\n",
"|8502208:0:3| 8502208| Horgen Oberdorf|47.2588085210892|8.58979854578067|\n",
"|8502208:0:4| 8502208| Horgen Oberdorf|47.2588694886204|8.58979854578067|\n",
"| 8502208P| 8502208| Horgen Oberdorf|47.2587475534877|8.58979854578067|\n",
"| 8502209| 8502209| Oberrieden Dorf|47.2767238569466| 8.577635356832|\n",
"|8502209:0:1| 8502209| Oberrieden Dorf|47.2768457506749| 8.577635356832|\n",
"+-----------+---------------+--------------------+----------------+----------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stops_15km_for_join = stops_15km.select(stops_15km.stop_id, \n",
" stops_15km.stop_id_general, \n",
" stops.stop_name, \n",
" stops.stop_lat, \n",
" stops.stop_lon)\n",
"stops_15km_for_join.show()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+------------+--------------+-----------+-------------+-----------+-------------+\n",
"| trip_id|arrival_time|departure_time| stop_id|stop_sequence|pickup_type|drop_off_type|\n",
"+--------------------+------------+--------------+-----------+-------------+-----------+-------------+\n",
"|1.TA.1-1-B-j19-1.1.R| 04:20:00| 04:20:00|8500010:0:3| 1| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:24:00| 04:24:00|8500020:0:3| 2| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:28:00| 04:28:00|8500021:0:5| 3| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:30:00| 04:30:00|8517131:0:2| 4| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:32:00| 04:32:00|8500300:0:5| 5| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:35:00| 04:35:00|8500313:0:2| 6| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:37:00| 04:38:00|8500301:0:3| 7| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:40:00| 04:41:00|8500302:0:3| 8| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:45:00| 04:45:00|8500303:0:2| 9| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:48:00| 04:49:00|8500320:0:3| 10| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:52:00| 04:52:00|8500304:0:2| 11| 0| 0|\n",
"|1.TA.1-1-B-j19-1.1.R| 04:56:00| 04:56:00|8500305:0:1| 12| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 05:50:00| 05:50:00|8500010:0:3| 1| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 05:54:00| 05:54:00|8500020:0:3| 2| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 05:58:00| 05:58:00|8500021:0:5| 3| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 06:00:00| 06:00:00|8517131:0:2| 4| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 06:02:00| 06:02:00|8500300:0:5| 5| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 06:05:00| 06:05:00|8500313:0:2| 6| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 06:07:00| 06:08:00|8500301:0:3| 7| 0| 0|\n",
"|25.TA.1-1-B-j19-1...| 06:10:00| 06:11:00|8500302:0:3| 8| 0| 0|\n",
"+--------------------+------------+--------------+-----------+-------------+-----------+-------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times = spark.read.csv(\"/data/sbb/timetables/csv/stop_times/2019/05/14/stop_times.txt\", header=True, sep = \",\")\n",
"stop_times.show()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11128930"
]
}
],
"source": [
"stop_times.count()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+--------------------+------------+--------------+-------------+-----------+-------------+---------------+-------------------+----------------+----------------+\n",
"| stop_id| trip_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon|\n",
"+-----------+--------------------+------------+--------------+-------------+-----------+-------------+---------------+-------------------+----------------+----------------+\n",
"|8503202:0:5|61.TA.25-75-j19-1...| 15:12:00| 15:14:00| 5| 0| 0| 8503202| Thalwil|47.2962171893553|8.56475351565593|\n",
"|8503202:0:5|99.TA.25-75-j19-1...| 19:14:00| 19:15:00| 5| 0| 0| 8503202| Thalwil|47.2962171893553|8.56475351565593|\n",
"|8503202:0:5|137.TA.25-75-j19-...| 23:12:00| 23:14:00| 5| 0| 0| 8503202| Thalwil|47.2962171893553|8.56475351565593|\n",
"|8503000:0:6|287.TA.25-75-j19-...| 19:35:00| 19:35:00| 1| 0| 0| 8503000| Zürich HB|47.3786628415821|8.54019357578468|\n",
"|8503000:0:7|337.TA.25-75-j19-...| 22:35:00| 22:35:00| 1| 0| 0| 8503000| Zürich HB|47.3787236709708|8.54019357578468|\n",
"| 8587860|8.TA.26-811-j19-1...| 23:38:00| 23:38:00| 9| 0| 0| 8587860| Uster, Strick|47.3564493415083|8.71275994188806|\n",
"| 8588052|35.TA.26-811-j19-...| 06:48:00| 06:48:00| 4| 0| 0| 8588052| Uster, Gschwader|47.3582688760385|8.71277790819375|\n",
"| 8588052|38.TA.26-811-j19-...| 18:33:00| 18:33:00| 4| 0| 0| 8588052| Uster, Gschwader|47.3582688760385|8.71277790819375|\n",
"| 8573504|42.TA.26-811-j19-...| 06:30:00| 06:30:00| 1| 0| 0| 8573504| Uster, Bahnhof|47.3511851173852|8.71683829327853|\n",
"| 8573504|98.TA.26-811-j19-...| 15:15:00| 15:15:00| 1| 0| 0| 8573504| Uster, Bahnhof|47.3511851173852|8.71683829327853|\n",
"| 8503152|133.TA.26-811-j19...| 17:51:00| 17:51:00| 7| 0| 0| 8503152|Uster, Brandschenke|47.3611410570261|8.71073873249851|\n",
"| 8588050|27.TA.26-812-j19-...| 20:05:00| 20:05:00| 7| 0| 0| 8588050| Uster, Bordacker|47.3519945640447|8.72933385888238|\n",
"| 8588051|35.TA.26-812-j19-...| 09:30:00| 09:30:00| 2| 0| 0| 8588051| Uster, Dammstrasse|47.3534491024058|8.71567946656186|\n",
"| 8588059|61.TA.26-812-j19-...| 06:04:00| 06:04:00| 6| 0| 0| 8588059| Uster, Weidli| 47.353996826126|8.73063641604453|\n",
"| 8503567|66.TA.26-812-j19-...| 06:17:00| 06:17:00| 3| 0| 0| 8503567| Uster, Spital|47.3538507670231|8.72349480953479|\n",
"| 8503567|105.TA.26-812-j19...| 08:32:00| 08:32:00| 3| 0| 0| 8503567| Uster, Spital|47.3538507670231|8.72349480953479|\n",
"| 8588056|116.TA.26-812-j19...| 20:07:00| 20:07:00| 9| 0| 0| 8588056| Uster, Talweg|47.3485132466833|8.73078014649001|\n",
"| 8503872|166.TA.26-812-j19...| 12:18:00| 12:18:00| 4| 0| 0| 8503872| Uster, Reithalle|47.3562972036718|8.72678264347512|\n",
"| 8588056|182.TA.26-812-j19...| 13:37:00| 13:37:00| 9| 0| 0| 8588056| Uster, Talweg|47.3485132466833|8.73078014649001|\n",
"| 8573504|12.TA.26-813-j19-...| 17:57:00| 17:57:00| 8| 0| 0| 8573504| Uster, Bahnhof|47.3511851173852|8.71683829327853|\n",
"+-----------+--------------------+------------+--------------+-------------+-----------+-------------+---------------+-------------------+----------------+----------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times_15km = stop_times.join(stops_15km_for_join, how=\"inner\", on = \"stop_id\").dropDuplicates()\n",
"stop_times_15km.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times_15km.write.csv('data/lgpt_guys/stop_times_15km.csv', header=True, mode='overwrite')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3) Keep only services that run each day of the week"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2322109"
]
}
],
"source": [
"stop_times_15km = spark.read.csv('data/lgpt_guys/stop_times_15km.csv', header=True)\n",
"stop_times_15km.count()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"calendar = spark.read.csv(\"/data/sbb/timetables/csv/calendar/2019/05/14/calendar.txt\", header=True, sep = \",\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+----------+------+-------+---------+--------+------+--------+------+----------+--------+\n",
"|service_id|monday|tuesday|wednesday|thursday|friday|saturday|sunday|start_date|end_date|\n",
"+----------+------+-------+---------+--------+------+--------+------+----------+--------+\n",
"| TA+b0nx9| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b03bf| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0008| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxg| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b08k4| 1| 0| 0| 0| 0| 0| 0| 20181209|20191214|\n",
"| TA+b06hs| 0| 0| 0| 0| 1| 0| 0| 20181209|20191214|\n",
"| TA+b09de| 0| 0| 0| 0| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxn| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b05qx| 1| 1| 1| 0| 0| 0| 0| 20181209|20191214|\n",
"| TA+b0nxa| 0| 0| 0| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b05k1| 1| 0| 0| 0| 0| 0| 0| 20181209|20191214|\n",
"| TA+b01pq| 0| 0| 0| 0| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxb| 0| 0| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b04l2| 0| 1| 0| 0| 0| 0| 0| 20181209|20191214|\n",
"| TA+b063g| 1| 0| 0| 0| 0| 0| 0| 20181209|20191214|\n",
"| TA+b08xi| 1| 0| 0| 0| 0| 0| 0| 20181209|20191214|\n",
"| TA+b0nxd| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxe| 0| 0| 0| 0| 0| 1| 1| 20181209|20191214|\n",
"| TA+b0nxf| 0| 0| 0| 0| 0| 1| 1| 20181209|20191214|\n",
"| TA+b08zi| 0| 0| 0| 0| 0| 0| 1| 20181209|20191214|\n",
"+----------+------+-------+---------+--------+------+--------+------+----------+--------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"calendar.show()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+----------+------+-------+---------+--------+------+--------+------+----------+--------+\n",
"|service_id|monday|tuesday|wednesday|thursday|friday|saturday|sunday|start_date|end_date|\n",
"+----------+------+-------+---------+--------+------+--------+------+----------+--------+\n",
"| TA+b0nx9| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b03bf| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0008| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxg| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxn| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxd| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxh| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxi| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxl| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0f63| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0f6a| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0ap6| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b03c1| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nke| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b09su| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b00bo| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxc| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxq| 1| 1| 1| 1| 1| 1| 1| 20181209|20191214|\n",
"| TA+b0nuo| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"| TA+b0nxv| 1| 1| 1| 1| 1| 0| 0| 20181209|20191214|\n",
"+----------+------+-------+---------+--------+------+--------+------+----------+--------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"calendar_business_days = calendar.filter((calendar.monday==1) & \\\n",
" (calendar.tuesday==1) & \\\n",
" (calendar.wednesday==1) & \\\n",
" (calendar.thursday==1) & \\\n",
" (calendar.friday==1))\n",
"calendar_business_days.show()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+----------+--------------------+------------------+---------------+------------+\n",
"| route_id|service_id| trip_id| trip_headsign|trip_short_name|direction_id|\n",
"+-----------+----------+--------------------+------------------+---------------+------------+\n",
"|1-1-C-j19-1| TA+b0001|5.TA.1-1-C-j19-1.3.R|Zofingen, Altachen| 108| 1|\n",
"|1-1-C-j19-1| TA+b0001|7.TA.1-1-C-j19-1.3.R|Zofingen, Altachen| 112| 1|\n",
"|1-1-C-j19-1| TA+b0001|9.TA.1-1-C-j19-1.3.R|Zofingen, Altachen| 116| 1|\n",
"|1-1-C-j19-1| TA+b0001|11.TA.1-1-C-j19-1...|Zofingen, Altachen| 120| 1|\n",
"|1-1-C-j19-1| TA+b0001|13.TA.1-1-C-j19-1...|Zofingen, Altachen| 124| 1|\n",
"|1-1-C-j19-1| TA+b0001|15.TA.1-1-C-j19-1...|Zofingen, Altachen| 128| 1|\n",
"|1-1-C-j19-1| TA+b0001|17.TA.1-1-C-j19-1...|Zofingen, Altachen| 132| 1|\n",
"|1-1-C-j19-1| TA+b0001|18.TA.1-1-C-j19-1...|Zofingen, Altachen| 134| 1|\n",
"|1-1-C-j19-1| TA+b0001|19.TA.1-1-C-j19-1...|Zofingen, Altachen| 136| 1|\n",
"|1-1-C-j19-1| TA+b0001|20.TA.1-1-C-j19-1...|Zofingen, Altachen| 138| 1|\n",
"|1-1-C-j19-1| TA+b0001|21.TA.1-1-C-j19-1...|Zofingen, Altachen| 140| 1|\n",
"|1-1-C-j19-1| TA+b0001|22.TA.1-1-C-j19-1...|Zofingen, Altachen| 142| 1|\n",
"|1-1-C-j19-1| TA+b0001|23.TA.1-1-C-j19-1...|Zofingen, Altachen| 144| 1|\n",
"|1-1-C-j19-1| TA+b0001|24.TA.1-1-C-j19-1...|Zofingen, Altachen| 146| 1|\n",
"|1-1-C-j19-1| TA+b0001|25.TA.1-1-C-j19-1...|Zofingen, Altachen| 148| 1|\n",
"|1-1-C-j19-1| TA+b0001|26.TA.1-1-C-j19-1...|Zofingen, Altachen| 150| 1|\n",
"|1-1-C-j19-1| TA+b0001|27.TA.1-1-C-j19-1...|Zofingen, Altachen| 152| 1|\n",
"|1-1-C-j19-1| TA+b0001|30.TA.1-1-C-j19-1...|Zofingen, Altachen| 156| 1|\n",
"|1-1-C-j19-1| TA+b0001|37.TA.1-1-C-j19-1...|Zofingen, Altachen| 168| 1|\n",
"|1-1-C-j19-1| TA+b0001|38.TA.1-1-C-j19-1...|Zofingen, Altachen| 172| 1|\n",
"+-----------+----------+--------------------+------------------+---------------+------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trips = spark.read.csv(\"/data/sbb/timetables/csv/trips/2019/05/14/trips.txt\", header=True, sep = \",\")\n",
"trips.show()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1017413"
]
}
],
"source": [
"trips.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Is there any useful information contained in `start_date` and `end_date` ?"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+----------+--------+\n",
"|start_date|end_date|\n",
"+----------+--------+\n",
"| 20181209|20191214|\n",
"+----------+--------+"
]
}
],
"source": [
"calendar_business_days.select(calendar_business_days.start_date, calendar_business_days.end_date).dropDuplicates().show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`start_date` and `end_date` will not provide us with useful information as their values are the same for all services."
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+----------+\n",
"|service_id|\n",
"+----------+\n",
"| TA+b0nx9|\n",
"| TA+b03bf|\n",
"| TA+b0008|\n",
"| TA+b0nxg|\n",
"| TA+b0nxn|\n",
"| TA+b0nxd|\n",
"| TA+b0nxh|\n",
"| TA+b0nxi|\n",
"| TA+b0nxl|\n",
"| TA+b0f63|\n",
"| TA+b0f6a|\n",
"| TA+b0ap6|\n",
"| TA+b03c1|\n",
"| TA+b0nke|\n",
"| TA+b09su|\n",
"| TA+b00bo|\n",
"| TA+b0nxc|\n",
"| TA+b0nxq|\n",
"| TA+b0nuo|\n",
"| TA+b0nxv|\n",
"+----------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"calendar_business_days_for_join = calendar_business_days.select(calendar_business_days.service_id) \n",
"calendar_business_days_for_join.show()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+----------+------------+--------------------+--------------------+---------------+------------+\n",
"|service_id| route_id| trip_id| trip_headsign|trip_short_name|direction_id|\n",
"+----------+------------+--------------------+--------------------+---------------+------------+\n",
"| TA+b0001| 1-1-C-j19-1|46.TA.1-1-C-j19-1...|Aarburg-Oftringen...| 113| 0|\n",
"| TA+b0001| 1-1-C-j19-1|59.TA.1-1-C-j19-1...|Aarburg-Oftringen...| 139| 0|\n",
"| TA+b0001| 1-340-j19-1|2.TA.1-340-j19-1.1.H| Wohlen AG, Bahnhof| 105| 0|\n",
"| TA+b0001| 1-354-j19-1|36.TA.1-354-j19-1...|Kaiserstuhl AG, B...| 35435| 0|\n",
"| TA+b0001| 1-354-j19-1|47.TA.1-354-j19-1...|Kaiserstuhl AG, B...| 35467| 0|\n",
"| TA+b0001| 1-393-j19-1|70.TA.1-393-j19-1...|Othmarsingen, Bah...| 14060| 0|\n",
"| TA+b0001| 1-508-j19-1|87.TA.1-508-j19-1...|Aarburg-Oftringen...| 8178| 1|\n",
"| TA+b0001| 2-230-j19-1|28.TA.2-230-j19-1...| Trogen, Bahnhof| 23023| 0|\n",
"| TA+b0001| 3-193-j19-1|221.TA.3-193-j19-...| Appenzell, Bahnhof| 1040| 1|\n",
"| TA+b0001| 4-76-j19-1|54.TA.4-76-j19-1.2.H| Lausen, Furlen| 76049| 0|\n",
"| TA+b0001| 6-101-j19-1|645.TA.6-101-j19-...| Beatenberg, Station| 10103| 0|\n",
"| TA+b0001|6-11-A-j19-1|188.TA.6-11-A-j19...| Bern, Holligen| 11596| 1|\n",
"| TA+b0001|6-11-B-j19-1|123.TA.6-11-B-j19...|Biel/Bienne, Bahn...| 11045| 1|\n",
"| TA+b0001| 6-151-j19-1|54.TA.6-151-j19-1...| Brienz BE, Bahnhof| 236| 1|\n",
"| TA+b0001| 6-21-j19-1|106.TA.6-21-j19-1...| Thun, Bahnhof| 21199| 1|\n",
"| TA+b0001| 6-210-j19-1|23.TA.6-210-j19-1...| Frutigen, Bahnhof| 21025| 0|\n",
"| TA+b0001| 6-27-j19-1|195.TA.6-27-j19-1...|Bern, Weyermannsh...| 17667| 1|\n",
"| TA+b0001| 6-871-j19-1|43.TA.6-871-j19-1...| Waltwil| 71031| 0|\n",
"| TA+b0001| 6-9-B-j19-1|28.TA.6-9-B-j19-1...| Bern| 9110| 1|\n",
"| TA+b0001| 6-9-B-j19-1|43.TA.6-9-B-j19-1...| Bern| 9146| 1|\n",
"+----------+------------+--------------------+--------------------+---------------+------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trips_business_week = trips.join(calendar_business_days_for_join, how=\"inner\", on = \"service_id\").dropDuplicates()\n",
"trips_business_week.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"528368"
]
}
],
"source": [
"trips_business_week.count()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+------------+--------------------+--------------------+---------------+------------+\n",
"| route_id| trip_id| trip_headsign|trip_short_name|direction_id|\n",
"+------------+--------------------+--------------------+---------------+------------+\n",
"| 1-1-C-j19-1|46.TA.1-1-C-j19-1...|Aarburg-Oftringen...| 113| 0|\n",
"| 1-1-C-j19-1|59.TA.1-1-C-j19-1...|Aarburg-Oftringen...| 139| 0|\n",
"| 1-340-j19-1|2.TA.1-340-j19-1.1.H| Wohlen AG, Bahnhof| 105| 0|\n",
"| 1-354-j19-1|36.TA.1-354-j19-1...|Kaiserstuhl AG, B...| 35435| 0|\n",
"| 1-354-j19-1|47.TA.1-354-j19-1...|Kaiserstuhl AG, B...| 35467| 0|\n",
"| 1-393-j19-1|70.TA.1-393-j19-1...|Othmarsingen, Bah...| 14060| 0|\n",
"| 1-508-j19-1|87.TA.1-508-j19-1...|Aarburg-Oftringen...| 8178| 1|\n",
"| 2-230-j19-1|28.TA.2-230-j19-1...| Trogen, Bahnhof| 23023| 0|\n",
"| 3-193-j19-1|221.TA.3-193-j19-...| Appenzell, Bahnhof| 1040| 1|\n",
"| 4-76-j19-1|54.TA.4-76-j19-1.2.H| Lausen, Furlen| 76049| 0|\n",
"| 6-101-j19-1|645.TA.6-101-j19-...| Beatenberg, Station| 10103| 0|\n",
"|6-11-A-j19-1|188.TA.6-11-A-j19...| Bern, Holligen| 11596| 1|\n",
"|6-11-B-j19-1|123.TA.6-11-B-j19...|Biel/Bienne, Bahn...| 11045| 1|\n",
"| 6-151-j19-1|54.TA.6-151-j19-1...| Brienz BE, Bahnhof| 236| 1|\n",
"| 6-21-j19-1|106.TA.6-21-j19-1...| Thun, Bahnhof| 21199| 1|\n",
"| 6-210-j19-1|23.TA.6-210-j19-1...| Frutigen, Bahnhof| 21025| 0|\n",
"| 6-27-j19-1|195.TA.6-27-j19-1...|Bern, Weyermannsh...| 17667| 1|\n",
"| 6-871-j19-1|43.TA.6-871-j19-1...| Waltwil| 71031| 0|\n",
"| 6-9-B-j19-1|28.TA.6-9-B-j19-1...| Bern| 9110| 1|\n",
"| 6-9-B-j19-1|43.TA.6-9-B-j19-1...| Bern| 9146| 1|\n",
"+------------+--------------------+--------------------+---------------+------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trips_business_week_for_join = trips_business_week.drop('service_id')\n",
"trips_business_week_for_join.show()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-----------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+------------+--------------------+---------------+------------+\n",
"| trip_id| stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|\n",
"+--------------------+-----------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+------------+--------------------+---------------+------------+\n",
"|1005.TA.26-131-j1...| 8589111| 16:15:00| 16:15:00| 2| 0| 0| 8589111|Horgen, Gumelenst...| 47.260856991692|8.59230484542371|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
"|1005.TA.26-131-j1...| 8588984| 16:21:00| 16:21:00| 7| 0| 0| 8588984| Horgen, Gehren| 47.252107761586|8.60141376240595|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
+ "|1005.TA.26-131-j1...|8503855:0:F| 16:14:00| 16:14:00| 1| 0| 0| 8503855| Horgen, Bahnhof|47.2618568116556|8.59697608490178|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
"|1005.TA.26-131-j1...| 8588985| 16:20:00| 16:20:00| 6| 0| 0| 8588985| Horgen, Heubach| 47.254772314364|8.59791931595024|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
"|1005.TA.26-131-j1...| 8573554| 16:18:00| 16:18:00| 4| 0| 0| 8573554|Horgen Oberdorf, ...|47.2586804890449|8.59024770342279|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
"|1005.TA.26-131-j1...| 8573553| 16:16:00| 16:16:00| 3| 0| 0| 8573553| Horgen, Stocker|47.2615154118397|8.58892717995495|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
"|1005.TA.26-131-j1...| 8573555| 16:19:00| 16:19:00| 5| 0| 0| 8573555| Horgen, Bergli|47.2576623184348| 8.5932121438608|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
- "|1005.TA.26-131-j1...|8503855:0:F| 16:14:00| 16:14:00| 1| 0| 0| 8503855| Horgen, Bahnhof|47.2618568116556|8.59697608490178|26-131-j19-1| Horgen, Aamüli| 636| 0|\n",
- "|103.TA.26-925-j19...| 8576080| 07:42:00| 07:42:00| 21| 0| 0| 8576080| Meilen, Bahnhof|47.2694401970586|8.64488323901054|26-925-j19-1| Meilen, Bahnhof| 571| 0|\n",
"|103.TA.26-925-j19...| 8576082| 07:38:00| 07:38:00| 20| 0| 0| 8576082| Meilen, Beugen|47.2672701430669|8.65071330520529|26-925-j19-1| Meilen, Bahnhof| 571| 0|\n",
+ "|103.TA.26-925-j19...| 8576080| 07:42:00| 07:42:00| 21| 0| 0| 8576080| Meilen, Bahnhof|47.2694401970586|8.64488323901054|26-925-j19-1| Meilen, Bahnhof| 571| 0|\n",
"|104.TA.26-733-j19...| 8587420| 07:40:00| 07:40:00| 6| 0| 0| 8587420| Kloten, Bahnhof| 47.448965141581|8.58388763121034|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...| 8573211| 07:36:00| 07:36:00| 4| 0| 0| 8573211|Kloten, Zum Wilde...| 47.453545361717|8.58019555539209|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...| 8588553| 07:34:00| 07:34:00| 3| 0| 0| 8588553|Zürich Flughafen,...|47.4524944976638|8.57205681891684|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...|8573205:0:D| 07:33:00| 07:33:00| 1| 0| 0| 8573205|Zürich Flughafen,...|47.4506842895344|8.56372943623189|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...| 8580433| 07:44:00| 07:44:00| 9| 0| 0| 8580433| Kloten, Graswinkel|47.4509394233112|8.59648201149545|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...| 8580434| 07:41:00| 07:41:00| 7| 0| 0| 8580434|Kloten, Lindenstr...|47.4522454458795|8.58714851569215|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...| 8590699| 07:37:00| 07:37:00| 5| 0| 0| 8590699| Kloten, Stadthaus| 47.450745035784|8.58126455058034|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...| 8580301| 07:33:00| 07:33:00| 2| 0| 0| 8580301|Zürich Flughafen,...|47.4526524323306|8.56566081409302|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|104.TA.26-733-j19...| 8576153| 07:42:00| 07:42:00| 8| 0| 0| 8576153| Kloten, Rankstrasse|47.4511884812805|8.59205331714412|26-733-j19-1| Kloten, Graswinkel| 4723| 1|\n",
"|1087.TA.26-5-B-j1...| 8591058| 15:49:00| 15:49:00| 6| 0| 0| 8591058|Zürich Enge, Bahnhof|47.3641286895461|8.53156974905593|26-5-B-j19-1|Zürich, Kirche Fl...| 3194| 1|\n",
"|1087.TA.26-5-B-j1...| 8591317| 15:51:00| 15:51:00| 7| 0| 0| 8591317|Zürich, Rentenans...|47.3633863608069|8.53503724605312|26-5-B-j19-1|Zürich, Kirche Fl...| 3194| 1|\n",
"+--------------------+-----------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+------------+--------------------+---------------+------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times_15km_business_week = stop_times_15km.join(trips_business_week_for_join, how=\"inner\", on = \"trip_id\").dropDuplicates()\n",
"stop_times_15km_business_week.show()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times_15km_business_week.write.csv('data/lgpt_guys/stop_times_15km_business_week.csv', header=True, mode='overwrite')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 4) Keeping only departure times between a certain time of the day (7am, 8pm)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+\n",
"| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+\n",
"|1.TA.26-925-j19-1...|8576080| 15:27:00| 15:27:00| 23| 0| 0| 8576080| Meilen, Bahnhof|47.2694401970586|8.64488323901054| 26-925-j19-1| Meilen, Bahnhof| 280| 0|\n",
"|1.TA.26-925-j19-1...|8576082| 15:22:00| 15:22:00| 22| 0| 0| 8576082| Meilen, Beugen|47.2672701430669|8.65071330520529| 26-925-j19-1| Meilen, Bahnhof| 280| 0|\n",
"|1014.TA.26-70-A-j...|8591304| 11:54:00| 11:54:00| 3| 0| 0| 8591304|Zürich, Post Woll...|47.3444717091534|8.53296213774651|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591278| 12:05:00| 12:05:00| 13| 0| 0| 8591278|Zürich, Mittellei...|47.3231389520848|8.51428616298707|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591106| 11:56:00| 11:56:00| 5| 0| 0| 8591106|Zürich, Butzenstr...|47.3414099167461|8.53031210765799|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591270| 12:02:00| 12:02:00| 10| 0| 0| 8591270| Zürich, Marbachweg|47.3303482449491|8.51537312448101|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591410| 11:57:00| 11:57:00| 6| 0| 0| 8591410|Zürich, Verenastr...|47.3408255385719|8.52538035674749|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
+ "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
+ "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591370| 12:04:00| 12:04:00| 12| 0| 0| 8591370|Zürich, Sihlweids...|47.3264149182794|8.51466345540645|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591154| 11:59:00| 11:59:00| 8| 0| 0| 8591154|Zürich, Frymannst...|47.3351336003511|8.51914604867483|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591210| 12:03:00| 12:03:00| 11| 0| 0| 8591210| Zürich, Im Hüsli|47.3282354882425|8.51269614493396|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591081| 11:53:00| 11:53:00| 2| 0| 0| 8591081|Zürich Wollishofe...|47.3470342259279| 8.5329172219823|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8591061| 12:00:00| 12:01:00| 9| 0| 0| 8591061|Zürich Leimbach, ...|47.3332523864039|8.51859807635144|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|1014.TA.26-70-A-j...|8502495| 11:51:00| 11:51:00| 1| 0| 0| 8502495|Zürich Wollishofe...|47.3476976601166|8.53331248070737|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
- "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
- "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0|\n",
"|102.TA.26-765-j19...|8573211| 19:26:00| 19:26:00| 17| 0| 0| 8573211|Kloten, Zum Wilde...| 47.453545361717|8.58019555539209| 26-765-j19-1|Zürich Flughafen,...| 1648| 1|\n",
"|102.TA.26-765-j19...|8503700| 19:12:00| 19:13:00| 7| 0| 0| 8503700|Bassersdorf, Bahnhof|47.4387159099396|8.62613539902836| 26-765-j19-1|Zürich Flughafen,...| 1648| 1|\n",
"|102.TA.26-765-j19...|8590539| 19:03:00| 19:03:00| 1| 0| 0| 8590539|Dietlikon, Bahnho...|47.4219616816404|8.62080838939279| 26-765-j19-1|Zürich Flughafen,...| 1648| 1|\n",
"|102.TA.26-765-j19...|8576153| 19:23:00| 19:23:00| 15| 0| 0| 8576153| Kloten, Rankstrasse|47.4511884812805|8.59205331714412| 26-765-j19-1|Zürich Flughafen,...| 1648| 1|\n",
"|102.TA.26-765-j19...|8590503| 19:16:00| 19:16:00| 9| 0| 0| 8590503|Bassersdorf, Chlu...|47.4446275362315|8.62462622935083| 26-765-j19-1|Zürich Flughafen,...| 1648| 1|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times_15km_business_week = spark.read.csv('data/lgpt_guys/stop_times_15km_business_week.csv', header=True)\n",
"stop_times_15km_business_week.show()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"398630"
]
}
],
"source": [
"stop_times_15km_business_week.count()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|departure_hour|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"|1.TA.26-925-j19-1...|8576080| 15:27:00| 15:27:00| 23| 0| 0| 8576080| Meilen, Bahnhof|47.2694401970586|8.64488323901054| 26-925-j19-1| Meilen, Bahnhof| 280| 0| 15|\n",
"|1.TA.26-925-j19-1...|8576082| 15:22:00| 15:22:00| 22| 0| 0| 8576082| Meilen, Beugen|47.2672701430669|8.65071330520529| 26-925-j19-1| Meilen, Bahnhof| 280| 0| 15|\n",
"|1014.TA.26-70-A-j...|8591304| 11:54:00| 11:54:00| 3| 0| 0| 8591304|Zürich, Post Woll...|47.3444717091534|8.53296213774651|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591278| 12:05:00| 12:05:00| 13| 0| 0| 8591278|Zürich, Mittellei...|47.3231389520848|8.51428616298707|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591106| 11:56:00| 11:56:00| 5| 0| 0| 8591106|Zürich, Butzenstr...|47.3414099167461|8.53031210765799|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591270| 12:02:00| 12:02:00| 10| 0| 0| 8591270| Zürich, Marbachweg|47.3303482449491|8.51537312448101|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591410| 11:57:00| 11:57:00| 6| 0| 0| 8591410|Zürich, Verenastr...|47.3408255385719|8.52538035674749|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
+ "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
+ "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591370| 12:04:00| 12:04:00| 12| 0| 0| 8591370|Zürich, Sihlweids...|47.3264149182794|8.51466345540645|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591154| 11:59:00| 11:59:00| 8| 0| 0| 8591154|Zürich, Frymannst...|47.3351336003511|8.51914604867483|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591210| 12:03:00| 12:03:00| 11| 0| 0| 8591210| Zürich, Im Hüsli|47.3282354882425|8.51269614493396|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591081| 11:53:00| 11:53:00| 2| 0| 0| 8591081|Zürich Wollishofe...|47.3470342259279| 8.5329172219823|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591061| 12:00:00| 12:01:00| 9| 0| 0| 8591061|Zürich Leimbach, ...|47.3332523864039|8.51859807635144|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8502495| 11:51:00| 11:51:00| 1| 0| 0| 8502495|Zürich Wollishofe...|47.3476976601166|8.53331248070737|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
- "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
- "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|102.TA.26-765-j19...|8573211| 19:26:00| 19:26:00| 17| 0| 0| 8573211|Kloten, Zum Wilde...| 47.453545361717|8.58019555539209| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8503700| 19:12:00| 19:13:00| 7| 0| 0| 8503700|Bassersdorf, Bahnhof|47.4387159099396|8.62613539902836| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8590539| 19:03:00| 19:03:00| 1| 0| 0| 8590539|Dietlikon, Bahnho...|47.4219616816404|8.62080838939279| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8576153| 19:23:00| 19:23:00| 15| 0| 0| 8576153| Kloten, Rankstrasse|47.4511884812805|8.59205331714412| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8590503| 19:16:00| 19:16:00| 9| 0| 0| 8590503|Bassersdorf, Chlu...|47.4446275362315|8.62462622935083| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times_15km_business_week = stop_times_15km_business_week.withColumn(\"departure_hour\", stop_times_15km_business_week.departure_time.substr(0, 2).cast('int'))\n",
"stop_times_15km_business_week.show()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|departure_hour|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"|1.TA.26-925-j19-1...|8576080| 15:27:00| 15:27:00| 23| 0| 0| 8576080| Meilen, Bahnhof|47.2694401970586|8.64488323901054| 26-925-j19-1| Meilen, Bahnhof| 280| 0| 15|\n",
"|1.TA.26-925-j19-1...|8576082| 15:22:00| 15:22:00| 22| 0| 0| 8576082| Meilen, Beugen|47.2672701430669|8.65071330520529| 26-925-j19-1| Meilen, Bahnhof| 280| 0| 15|\n",
"|1014.TA.26-70-A-j...|8591304| 11:54:00| 11:54:00| 3| 0| 0| 8591304|Zürich, Post Woll...|47.3444717091534|8.53296213774651|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591278| 12:05:00| 12:05:00| 13| 0| 0| 8591278|Zürich, Mittellei...|47.3231389520848|8.51428616298707|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591106| 11:56:00| 11:56:00| 5| 0| 0| 8591106|Zürich, Butzenstr...|47.3414099167461|8.53031210765799|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591270| 12:02:00| 12:02:00| 10| 0| 0| 8591270| Zürich, Marbachweg|47.3303482449491|8.51537312448101|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591410| 11:57:00| 11:57:00| 6| 0| 0| 8591410|Zürich, Verenastr...|47.3408255385719|8.52538035674749|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
+ "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
+ "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591370| 12:04:00| 12:04:00| 12| 0| 0| 8591370|Zürich, Sihlweids...|47.3264149182794|8.51466345540645|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591154| 11:59:00| 11:59:00| 8| 0| 0| 8591154|Zürich, Frymannst...|47.3351336003511|8.51914604867483|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591210| 12:03:00| 12:03:00| 11| 0| 0| 8591210| Zürich, Im Hüsli|47.3282354882425|8.51269614493396|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591081| 11:53:00| 11:53:00| 2| 0| 0| 8591081|Zürich Wollishofe...|47.3470342259279| 8.5329172219823|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591061| 12:00:00| 12:01:00| 9| 0| 0| 8591061|Zürich Leimbach, ...|47.3332523864039|8.51859807635144|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8502495| 11:51:00| 11:51:00| 1| 0| 0| 8502495|Zürich Wollishofe...|47.3476976601166|8.53331248070737|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
- "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
- "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|102.TA.26-765-j19...|8573211| 19:26:00| 19:26:00| 17| 0| 0| 8573211|Kloten, Zum Wilde...| 47.453545361717|8.58019555539209| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8503700| 19:12:00| 19:13:00| 7| 0| 0| 8503700|Bassersdorf, Bahnhof|47.4387159099396|8.62613539902836| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8590539| 19:03:00| 19:03:00| 1| 0| 0| 8590539|Dietlikon, Bahnho...|47.4219616816404|8.62080838939279| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8576153| 19:23:00| 19:23:00| 15| 0| 0| 8576153| Kloten, Rankstrasse|47.4511884812805|8.59205331714412| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8590503| 19:16:00| 19:16:00| 9| 0| 0| 8590503|Bassersdorf, Chlu...|47.4446275362315|8.62462622935083| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"departure_earliest = 7\n",
"departure_latest = 19\n",
"stop_times_15km_business_week_standard_hours = stop_times_15km_business_week.filter((stop_times_15km_business_week.departure_hour>=departure_earliest) & \\\n",
" (stop_times_15km_business_week.departure_hour<= departure_latest))\n",
"stop_times_15km_business_week_standard_hours.show()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# intermediate saving point\n",
"stop_times_15km_business_week_standard_hours.write.csv('data/lgpt_guys/stop_times_15km_business_week_standard_hours.csv', header = True, mode=\"overwrite\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 5) Order stop_times as to reconstruct routes for RAPTOR"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building a list of unique trips according to 1) the stop sequence and 2) the departure time sequence"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"304085"
]
}
],
"source": [
"# we start fresh from here, where stop_times is in fact stop_times_15km_business_week_standard_hours loaded from the server\n",
"stop_times = spark.read.csv('data/lgpt_guys/stop_times_15km_business_week_standard_hours.csv', header = True)\n",
"stop_times.count()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|departure_hour|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"|1.TA.26-925-j19-1...|8576080| 15:27:00| 15:27:00| 23| 0| 0| 8576080| Meilen, Bahnhof|47.2694401970586|8.64488323901054| 26-925-j19-1| Meilen, Bahnhof| 280| 0| 15|\n",
"|1.TA.26-925-j19-1...|8576082| 15:22:00| 15:22:00| 22| 0| 0| 8576082| Meilen, Beugen|47.2672701430669|8.65071330520529| 26-925-j19-1| Meilen, Bahnhof| 280| 0| 15|\n",
"|1014.TA.26-70-A-j...|8591304| 11:54:00| 11:54:00| 3| 0| 0| 8591304|Zürich, Post Woll...|47.3444717091534|8.53296213774651|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591278| 12:05:00| 12:05:00| 13| 0| 0| 8591278|Zürich, Mittellei...|47.3231389520848|8.51428616298707|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591106| 11:56:00| 11:56:00| 5| 0| 0| 8591106|Zürich, Butzenstr...|47.3414099167461|8.53031210765799|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591270| 12:02:00| 12:02:00| 10| 0| 0| 8591270| Zürich, Marbachweg|47.3303482449491|8.51537312448101|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591410| 11:57:00| 11:57:00| 6| 0| 0| 8591410|Zürich, Verenastr...|47.3408255385719|8.52538035674749|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
+ "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
+ "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591370| 12:04:00| 12:04:00| 12| 0| 0| 8591370|Zürich, Sihlweids...|47.3264149182794|8.51466345540645|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591154| 11:59:00| 11:59:00| 8| 0| 0| 8591154|Zürich, Frymannst...|47.3351336003511|8.51914604867483|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591210| 12:03:00| 12:03:00| 11| 0| 0| 8591210| Zürich, Im Hüsli|47.3282354882425|8.51269614493396|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8591081| 11:53:00| 11:53:00| 2| 0| 0| 8591081|Zürich Wollishofe...|47.3470342259279| 8.5329172219823|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|1014.TA.26-70-A-j...|8591061| 12:00:00| 12:01:00| 9| 0| 0| 8591061|Zürich Leimbach, ...|47.3332523864039|8.51859807635144|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 12|\n",
"|1014.TA.26-70-A-j...|8502495| 11:51:00| 11:51:00| 1| 0| 0| 8502495|Zürich Wollishofe...|47.3476976601166|8.53331248070737|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
- "|1014.TA.26-70-A-j...|8591279| 11:55:00| 11:55:00| 4| 0| 0| 8591279| Zürich, Morgental|47.3439482343686|8.53014142775399|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
- "|1014.TA.26-70-A-j...|8591268| 11:58:00| 11:58:00| 7| 0| 0| 8591268| Zürich, Manegg|47.3369660452942|8.52034979115572|26-70-A-j19-1|Zürich, Mittellei...| 3400| 0| 11|\n",
"|102.TA.26-765-j19...|8573211| 19:26:00| 19:26:00| 17| 0| 0| 8573211|Kloten, Zum Wilde...| 47.453545361717|8.58019555539209| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8503700| 19:12:00| 19:13:00| 7| 0| 0| 8503700|Bassersdorf, Bahnhof|47.4387159099396|8.62613539902836| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8590539| 19:03:00| 19:03:00| 1| 0| 0| 8590539|Dietlikon, Bahnho...|47.4219616816404|8.62080838939279| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8576153| 19:23:00| 19:23:00| 15| 0| 0| 8576153| Kloten, Rankstrasse|47.4511884812805|8.59205331714412| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"|102.TA.26-765-j19...|8590503| 19:16:00| 19:16:00| 9| 0| 0| 8590503|Bassersdorf, Chlu...|47.4446275362315|8.62462622935083| 26-765-j19-1|Zürich Flughafen,...| 1648| 1| 19|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times.show()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-----------+--------------------+---------------+------------+--------------+\n",
"| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|departure_hour|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-----------+--------------------+---------------+------------+--------------+\n",
"|1.TA.1-231-j19-1.1.H|8572747| 09:37:00| 09:37:00| 1| 0| 0| 8572747|Bremgarten AG, Ba...|47.3516902622456|8.34617544069354|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8582462| 09:38:00| 09:38:00| 3| 0| 0| 8582462|Bremgarten AG, Ze...|47.3475576701104|8.34819665008309|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8572600| 09:39:00| 09:39:00| 4| 0| 0| 8572600| Zufikon, Emaus| 47.34464822855| 8.3519875405826|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8572601| 09:39:00| 09:39:00| 5| 0| 0| 8572601| Zufikon, Algier|47.3417386266265|8.35463757067112|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8502553| 09:43:00| 09:43:00| 6| 0| 0| 8502553|Unterlunkhofen, B...|47.3221585583935| 8.380473118246|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8572602| 09:45:00| 09:45:00| 7| 0| 0| 8572602|Oberlunkhofen, Ke...|47.3133646488037| 8.3889172819179|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8502955| 09:46:00| 09:47:00| 8| 0| 0| 8502955| Oberlunkhofen, Post|47.3133829202162|8.38868371994399|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8573722| 09:48:00| 09:48:00| 9| 0| 0| 8573722|Oberlunkhofen, Ob...|47.3123840737352|8.39276207133446|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8573721| 09:50:00| 09:50:00| 10| 0| 0| 8573721|Oberlunkhofen, Wa...|47.3134255534873|8.39881671635027|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8503598| 09:53:00| 09:53:00| 11| 0| 0| 8503598| Arni AG, Dorf|47.3183951391194| 8.4197115298618|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8573720| 09:55:00| 09:59:00| 12| 0| 0| 8573720| Arni AG, Stockacker|47.3200332946963|8.42388869593354|1-231-j19-1| Jonen, Post| 23127| 0| 9|\n",
"|1.TA.1-231-j19-1.1.H|8503598| 10:00:00| 10:00:00| 13| 0| 0| 8503598| Arni AG, Dorf|47.3183951391194| 8.4197115298618|1-231-j19-1| Jonen, Post| 23127| 0| 10|\n",
"|1.TA.1-231-j19-1.1.H|8573721| 10:02:00| 10:02:00| 14| 0| 0| 8573721|Oberlunkhofen, Wa...|47.3134255534873|8.39881671635027|1-231-j19-1| Jonen, Post| 23127| 0| 10|\n",
"|1.TA.1-231-j19-1.1.H|8573722| 10:03:00| 10:03:00| 15| 0| 0| 8573722|Oberlunkhofen, Ob...|47.3123840737352|8.39276207133446|1-231-j19-1| Jonen, Post| 23127| 0| 10|\n",
"|1.TA.1-231-j19-1.1.H|8573723| 10:04:00| 10:04:00| 16| 0| 0| 8573723|Oberlunkhofen, Do...|47.3113973897738|8.39072289563923|1-231-j19-1| Jonen, Post| 23127| 0| 10|\n",
"|1.TA.1-231-j19-1.1.H|8583071| 10:05:00| 10:05:00| 17| 0| 0| 8583071| Jonen, Radmühle|47.3019681473342|8.39299563330837|1-231-j19-1| Jonen, Post| 23127| 0| 10|\n",
"|1.TA.1-231-j19-1.1.H|8572603| 10:06:00| 10:06:00| 18| 0| 0| 8572603| Jonen, Käppeli|47.2994704564101|8.39385801598124|1-231-j19-1| Jonen, Post| 23127| 0| 10|\n",
"|1.TA.1-231-j19-1.1.H|8502879| 10:07:00| 10:07:00| 19| 0| 0| 8502879| Jonen, Post|47.2961806346557|8.39551091610425|1-231-j19-1| Jonen, Post| 23127| 0| 10|\n",
"| 1.TA.1-44-j19-1.1.R|8590275| 08:31:00| 08:31:00| 1| 0| 0| 8590275| Spreitenbach, IKEA|47.4200714067302| 8.3754784852656| 1-44-j19-1|Spreitenbach, Sho...| 2001| 1| 8|\n",
"| 1.TA.1-44-j19-1.1.R|8591891| 08:34:00| 08:34:00| 2| 0| 0| 8591891|Spreitenbach, Alt...|47.4188375250837|8.36858840703544| 1-44-j19-1|Spreitenbach, Sho...| 2001| 1| 8|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-----------+--------------------+---------------+------------+--------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times = stop_times.sort(stop_times.trip_id, stop_times.stop_sequence.cast('int'))\n",
"stop_times.show()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pyspark.sql.window import Window\n",
"w= (\n",
" Window.partitionBy(\"trip_id\")\n",
" .orderBy(stop_times.stop_sequence.cast('int'))\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This step is a bit technical. We aim at identifying trips that are identical, although they may bear a different `trip_id`. Indeed, we used data from services running every day of a standard business week. But we do not take days of the week into account, only departure and arrival **hours**. Therefore, we must find a way to identify and merge identical trips in terms of stops served and arrival and departure times. \n",
"\n",
"To do so, we use window functions on each trip to build a stop sequence and a list of departure times. When departure times are identical, arrival times are considered identical."
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------------------------+---------------+--------------+-------------+--------------+------------------------------------------------------------------------------------------------------------------------------+\n",
"|trip_id |stop_id_general|departure_time|stop_sequence|departure_hour|all_stops |\n",
"+-------------------------+---------------+--------------+-------------+--------------+------------------------------------------------------------------------------------------------------------------------------+\n",
"|1005.TA.26-131-j19-1.9.H |8503855 |16:14:00 |1 |16 |[8503855] |\n",
"|1005.TA.26-131-j19-1.9.H |8589111 |16:15:00 |2 |16 |[8503855, 8589111] |\n",
"|1005.TA.26-131-j19-1.9.H |8573553 |16:16:00 |3 |16 |[8503855, 8589111, 8573553] |\n",
"|1005.TA.26-131-j19-1.9.H |8573554 |16:18:00 |4 |16 |[8503855, 8589111, 8573553, 8573554] |\n",
"|1005.TA.26-131-j19-1.9.H |8573555 |16:19:00 |5 |16 |[8503855, 8589111, 8573553, 8573554, 8573555] |\n",
"|1005.TA.26-131-j19-1.9.H |8588985 |16:20:00 |6 |16 |[8503855, 8589111, 8573553, 8573554, 8573555, 8588985] |\n",
"|1005.TA.26-131-j19-1.9.H |8588984 |16:21:00 |7 |16 |[8503855, 8589111, 8573553, 8573554, 8573555, 8588985, 8588984] |\n",
"|103.TA.26-925-j19-1.4.H |8576082 |07:38:00 |20 |7 |[8576082] |\n",
"|103.TA.26-925-j19-1.4.H |8576080 |07:42:00 |21 |7 |[8576082, 8576080] |\n",
"|104.TA.26-733-j19-1.2.R |8573205 |07:33:00 |1 |7 |[8573205] |\n",
"|104.TA.26-733-j19-1.2.R |8580301 |07:33:00 |2 |7 |[8573205, 8580301] |\n",
"|104.TA.26-733-j19-1.2.R |8588553 |07:34:00 |3 |7 |[8573205, 8580301, 8588553] |\n",
"|104.TA.26-733-j19-1.2.R |8573211 |07:36:00 |4 |7 |[8573205, 8580301, 8588553, 8573211] |\n",
"|104.TA.26-733-j19-1.2.R |8590699 |07:37:00 |5 |7 |[8573205, 8580301, 8588553, 8573211, 8590699] |\n",
"|104.TA.26-733-j19-1.2.R |8587420 |07:40:00 |6 |7 |[8573205, 8580301, 8588553, 8573211, 8590699, 8587420] |\n",
"|104.TA.26-733-j19-1.2.R |8580434 |07:41:00 |7 |7 |[8573205, 8580301, 8588553, 8573211, 8590699, 8587420, 8580434] |\n",
"|104.TA.26-733-j19-1.2.R |8576153 |07:42:00 |8 |7 |[8573205, 8580301, 8588553, 8573211, 8590699, 8587420, 8580434, 8576153] |\n",
"|104.TA.26-733-j19-1.2.R |8580433 |07:44:00 |9 |7 |[8573205, 8580301, 8588553, 8573211, 8590699, 8587420, 8580434, 8576153, 8580433] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591245 |15:42:00 |1 |15 |[8591245] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591329 |15:43:00 |2 |15 |[8591245, 8591329] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591366 |15:45:00 |3 |15 |[8591245, 8591329, 8591366] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591415 |15:46:00 |4 |15 |[8591245, 8591329, 8591366, 8591415] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591059 |15:47:00 |5 |15 |[8591245, 8591329, 8591366, 8591415, 8591059] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591058 |15:49:00 |6 |15 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591317 |15:51:00 |7 |15 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591105 |15:53:00 |8 |15 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8576193 |15:55:00 |9 |15 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591239 |15:57:00 |10 |15 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591220 |15:58:00 |11 |15 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591303 |16:00:00 |12 |16 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220, 8591303] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591412 |16:02:00 |13 |16 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220, 8591303, 8591412] |\n",
"|1087.TA.26-5-B-j19-1.23.R|8591230 |16:04:00 |14 |16 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220, 8591303, 8591412, 8591230]|\n",
"|109.TA.1-1-E-j19-1.12.R |8578679 |11:04:00 |1 |11 |[8578679] |\n",
"|109.TA.1-1-E-j19-1.12.R |8590314 |11:05:00 |2 |11 |[8578679, 8590314] |\n",
"|109.TA.1-1-E-j19-1.12.R |8590317 |11:06:00 |3 |11 |[8578679, 8590314, 8590317] |\n",
"|109.TA.79-24-j19-1.1.R |8503500 |16:50:00 |1 |16 |[8503500] |\n",
"|109.TA.79-24-j19-1.1.R |8503499 |16:53:00 |2 |16 |[8503500, 8503499] |\n",
"|1099.TA.26-142-j19-1.2.R |8590815 |17:31:00 |1 |17 |[8590815] |\n",
"|1099.TA.26-142-j19-1.2.R |8590817 |17:32:00 |2 |17 |[8590815, 8590817] |\n",
"|1099.TA.26-142-j19-1.2.R |8590812 |17:33:00 |3 |17 |[8590815, 8590817, 8590812] |\n",
"|1099.TA.26-142-j19-1.2.R |8590825 |17:35:00 |4 |17 |[8590815, 8590817, 8590812, 8590825] |\n",
"|1099.TA.26-142-j19-1.2.R |8590830 |17:36:00 |5 |17 |[8590815, 8590817, 8590812, 8590825, 8590830] |\n",
"|1099.TA.26-142-j19-1.2.R |8590818 |17:37:00 |6 |17 |[8590815, 8590817, 8590812, 8590825, 8590830, 8590818] |\n",
"|1099.TA.26-142-j19-1.2.R |8573167 |17:40:00 |7 |17 |[8590815, 8590817, 8590812, 8590825, 8590830, 8590818, 8573167] |\n",
"|11.TA.1-444-j19-1.1.H |8572747 |18:35:00 |2 |18 |[8572747] |\n",
"|11.TA.1-444-j19-1.1.H |8580847 |18:36:00 |3 |18 |[8572747, 8580847] |\n",
"|11.TA.1-444-j19-1.1.H |8581346 |18:41:00 |4 |18 |[8572747, 8580847, 8581346] |\n",
"|11.TA.1-444-j19-1.1.H |8502894 |18:42:00 |5 |18 |[8572747, 8580847, 8581346, 8502894] |\n",
"|11.TA.1-444-j19-1.1.H |8502979 |18:43:00 |6 |18 |[8572747, 8580847, 8581346, 8502894, 8502979] |\n",
"|11.TA.1-444-j19-1.1.H |8572596 |18:44:00 |7 |18 |[8572747, 8580847, 8581346, 8502894, 8502979, 8572596] |\n",
"|11.TA.1-444-j19-1.1.H |8591365 |18:59:00 |8 |18 |[8572747, 8580847, 8581346, 8502894, 8502979, 8572596, 8591365] |\n",
"|11.TA.1-444-j19-1.1.H |8591366 |19:01:00 |9 |19 |[8572747, 8580847, 8581346, 8502894, 8502979, 8572596, 8591365, 8591366] |\n",
"|11.TA.1-444-j19-1.1.H |8591059 |19:03:00 |10 |19 |[8572747, 8580847, 8581346, 8502894, 8502979, 8572596, 8591365, 8591366, 8591059] |\n",
"|111.TA.79-736-j19-1.5.H |8591031 |18:39:00 |1 |18 |[8591031] |\n",
"|111.TA.79-736-j19-1.5.H |8588553 |18:41:00 |2 |18 |[8591031, 8588553] |\n",
"|111.TA.79-736-j19-1.5.H |8580301 |18:42:00 |3 |18 |[8591031, 8588553, 8580301] |\n",
"|111.TA.79-736-j19-1.5.H |8573205 |18:44:00 |4 |18 |[8591031, 8588553, 8580301, 8573205] |\n",
"|111.TA.79-736-j19-1.5.H |8573213 |18:45:00 |5 |18 |[8591031, 8588553, 8580301, 8573205, 8573213] |\n",
"|111.TA.79-736-j19-1.5.H |8587799 |18:46:00 |6 |18 |[8591031, 8588553, 8580301, 8573205, 8573213, 8587799] |\n",
"|111.TA.79-736-j19-1.5.H |8591032 |18:49:00 |7 |18 |[8591031, 8588553, 8580301, 8573205, 8573213, 8587799, 8591032] |\n",
"|111.TA.79-736-j19-1.5.H |8593523 |18:51:00 |8 |18 |[8591031, 8588553, 8580301, 8573205, 8573213, 8587799, 8591032, 8593523] |\n",
"|1139.TA.26-156-j19-1.4.R |8573167 |11:57:00 |1 |11 |[8573167] |\n",
"|1139.TA.26-156-j19-1.4.R |8590824 |11:58:00 |2 |11 |[8573167, 8590824] |\n",
"|1139.TA.26-156-j19-1.4.R |8590822 |11:59:00 |3 |11 |[8573167, 8590824, 8590822] |\n",
"|1139.TA.26-156-j19-1.4.R |8590811 |12:00:00 |4 |12 |[8573167, 8590824, 8590822, 8590811] |\n",
"|1139.TA.26-156-j19-1.4.R |8590826 |12:01:00 |5 |12 |[8573167, 8590824, 8590822, 8590811, 8590826] |\n",
"|1139.TA.26-156-j19-1.4.R |8595406 |12:02:00 |6 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406] |\n",
"|1139.TA.26-156-j19-1.4.R |8590828 |12:03:00 |7 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828] |\n",
"|1139.TA.26-156-j19-1.4.R |8590780 |12:04:00 |8 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828, 8590780] |\n",
"|1139.TA.26-156-j19-1.4.R |8590779 |12:06:00 |9 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828, 8590780, 8590779] |\n",
"|1139.TA.26-156-j19-1.4.R |8590775 |12:06:00 |10 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828, 8590780, 8590779, 8590775] |\n",
"|1139.TA.26-156-j19-1.4.R |8590777 |12:07:00 |11 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828, 8590780, 8590779, 8590775, 8590777] |\n",
"|1139.TA.26-156-j19-1.4.R |8590482 |12:09:00 |12 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828, 8590780, 8590779, 8590775, 8590777, 8590482] |\n",
"|1139.TA.26-156-j19-1.4.R |8590464 |12:11:00 |13 |12 |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828, 8590780, 8590779, 8590775, 8590777, 8590482, 8590464] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591245 |10:35:00 |1 |10 |[8591245] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591329 |10:36:00 |2 |10 |[8591245, 8591329] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591366 |10:37:00 |3 |10 |[8591245, 8591329, 8591366] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591415 |10:39:00 |4 |10 |[8591245, 8591329, 8591366, 8591415] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591059 |10:40:00 |5 |10 |[8591245, 8591329, 8591366, 8591415, 8591059] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591058 |10:41:00 |6 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591317 |10:43:00 |7 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591105 |10:45:00 |8 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8576193 |10:48:00 |9 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591239 |10:50:00 |10 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591220 |10:51:00 |11 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591303 |10:53:00 |12 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220, 8591303] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591412 |10:55:00 |13 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220, 8591303, 8591412] |\n",
"|1141.TA.26-5-B-j19-1.23.R|8591230 |10:56:00 |14 |10 |[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220, 8591303, 8591412, 8591230]|\n",
"|1158.TA.26-69-j19-1.4.H |8591122 |14:30:00 |1 |14 |[8591122] |\n",
"|1158.TA.26-69-j19-1.4.H |8591201 |14:31:00 |2 |14 |[8591122, 8591201] |\n",
"|1158.TA.26-69-j19-1.4.H |8591213 |14:32:00 |3 |14 |[8591122, 8591201, 8591213] |\n",
"|1158.TA.26-69-j19-1.4.H |8591416 |14:33:00 |4 |14 |[8591122, 8591201, 8591213, 8591416] |\n",
"|1158.TA.26-69-j19-1.4.H |8591302 |14:34:00 |5 |14 |[8591122, 8591201, 8591213, 8591416, 8591302] |\n",
"|1158.TA.26-69-j19-1.4.H |8591419 |14:35:00 |6 |14 |[8591122, 8591201, 8591213, 8591416, 8591302, 8591419] |\n",
"|1158.TA.26-69-j19-1.4.H |8591425 |14:36:00 |7 |14 |[8591122, 8591201, 8591213, 8591416, 8591302, 8591419, 8591425] |\n",
"|1158.TA.26-69-j19-1.4.H |8591101 |14:38:00 |8 |14 |[8591122, 8591201, 8591213, 8591416, 8591302, 8591419, 8591425, 8591101] |\n",
"|1158.TA.26-69-j19-1.4.H |8591276 |14:41:00 |9 |14 |[8591122, 8591201, 8591213, 8591416, 8591302, 8591419, 8591425, 8591101, 8591276] |\n",
"|1164.TA.26-5-B-j19-1.23.R|8591245 |19:58:00 |1 |19 |[8591245] |\n",
"|1164.TA.26-5-B-j19-1.23.R|8591329 |19:59:00 |2 |19 |[8591245, 8591329] |\n",
"|1186.TA.26-69-j19-1.4.H |8591122 |11:00:00 |1 |11 |[8591122] |\n",
"+-------------------------+---------------+--------------+-------------+--------------+------------------------------------------------------------------------------------------------------------------------------+\n",
"only showing top 100 rows"
]
}
],
"source": [
"# code from https://stackoverflow.com/questions/56763946/concat-multiple-string-rows-for-each-unique-id-by-a-particular-order\n",
"from pyspark.sql import functions as F\n",
"stop_times.withColumn(\"all_stops\",F.collect_list(\"stop_id_general\").over(w))\\\n",
".withColumn(\"all_departures\",F.collect_list(\"departure_time\").over(w))\\\n",
".select(F.col('trip_id'), F.col('stop_id_general'), F.col('departure_time'), \n",
" F.col('stop_sequence'), F.col('departure_hour'), F.col('all_stops'))\\\n",
".show(100, 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We successfully built incremental lists of departure times and stop_id sequences. We now need to select for the longest list for each `trip_id`. The `groupBy` line below does exactly that."
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------------------------+------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+\n",
"|trip_id |all_stops |all_departures |all_arrivals |\n",
"+-------------------------+------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+\n",
"|1005.TA.26-131-j19-1.9.H |[8503855, 8589111, 8573553, 8573554, 8573555, 8588985, 8588984] |[16:14:00, 16:15:00, 16:16:00, 16:18:00, 16:19:00, 16:20:00, 16:21:00] |[16:14:00, 16:15:00, 16:16:00, 16:18:00, 16:19:00, 16:20:00, 16:21:00] |\n",
"|103.TA.26-925-j19-1.4.H |[8576082, 8576080] |[07:38:00, 07:42:00] |[07:38:00, 07:42:00] |\n",
"|104.TA.26-733-j19-1.2.R |[8573205, 8580301, 8588553, 8573211, 8590699, 8587420, 8580434, 8576153, 8580433] |[07:33:00, 07:33:00, 07:34:00, 07:36:00, 07:37:00, 07:40:00, 07:41:00, 07:42:00, 07:44:00] |[07:33:00, 07:33:00, 07:34:00, 07:36:00, 07:37:00, 07:40:00, 07:41:00, 07:42:00, 07:44:00] |\n",
"|1087.TA.26-5-B-j19-1.23.R|[8591245, 8591329, 8591366, 8591415, 8591059, 8591058, 8591317, 8591105, 8576193, 8591239, 8591220, 8591303, 8591412, 8591230]|[15:42:00, 15:43:00, 15:45:00, 15:46:00, 15:47:00, 15:49:00, 15:51:00, 15:53:00, 15:55:00, 15:57:00, 15:58:00, 16:00:00, 16:02:00, 16:04:00]|[15:42:00, 15:43:00, 15:45:00, 15:46:00, 15:47:00, 15:49:00, 15:51:00, 15:53:00, 15:55:00, 15:57:00, 15:58:00, 16:00:00, 16:02:00, 16:04:00]|\n",
"|109.TA.1-1-E-j19-1.12.R |[8578679, 8590314, 8590317] |[11:04:00, 11:05:00, 11:06:00] |[11:04:00, 11:05:00, 11:06:00] |\n",
"|109.TA.79-24-j19-1.1.R |[8503500, 8503499] |[16:50:00, 16:53:00] |[16:50:00, 16:53:00] |\n",
"|1099.TA.26-142-j19-1.2.R |[8590815, 8590817, 8590812, 8590825, 8590830, 8590818, 8573167] |[17:31:00, 17:32:00, 17:33:00, 17:35:00, 17:36:00, 17:37:00, 17:40:00] |[17:31:00, 17:32:00, 17:33:00, 17:35:00, 17:36:00, 17:37:00, 17:40:00] |\n",
"|11.TA.1-444-j19-1.1.H |[8572747, 8580847, 8581346, 8502894, 8502979, 8572596, 8591365, 8591366, 8591059] |[18:35:00, 18:36:00, 18:41:00, 18:42:00, 18:43:00, 18:44:00, 18:59:00, 19:01:00, 19:03:00] |[18:35:00, 18:36:00, 18:41:00, 18:42:00, 18:43:00, 18:44:00, 18:59:00, 19:01:00, 19:03:00] |\n",
"|111.TA.79-736-j19-1.5.H |[8591031, 8588553, 8580301, 8573205, 8573213, 8587799, 8591032, 8593523] |[18:39:00, 18:41:00, 18:42:00, 18:44:00, 18:45:00, 18:46:00, 18:49:00, 18:51:00] |[18:39:00, 18:41:00, 18:42:00, 18:43:00, 18:45:00, 18:46:00, 18:49:00, 18:51:00] |\n",
"|1139.TA.26-156-j19-1.4.R |[8573167, 8590824, 8590822, 8590811, 8590826, 8595406, 8590828, 8590780, 8590779, 8590775, 8590777, 8590482, 8590464] |[11:57:00, 11:58:00, 11:59:00, 12:00:00, 12:01:00, 12:02:00, 12:03:00, 12:04:00, 12:06:00, 12:06:00, 12:07:00, 12:09:00, 12:11:00] |[11:57:00, 11:58:00, 11:59:00, 12:00:00, 12:01:00, 12:02:00, 12:03:00, 12:04:00, 12:06:00, 12:06:00, 12:07:00, 12:09:00, 12:11:00] |\n",
"+-------------------------+------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+\n",
"only showing top 10 rows"
]
}
],
"source": [
"trips_with_duplicates= stop_times.withColumn(\"all_stops\",F.collect_list(\"stop_id_general\").over(w))\\\n",
".withColumn(\"all_departures\",F.collect_list(\"departure_time\").over(w))\\\n",
".withColumn(\"all_arrivals\",F.collect_list(\"arrival_time\").over(w))\\\n",
".groupBy(\"trip_id\")\\\n",
".agg(F.max(\"all_stops\").alias(\"all_stops\"), F.max(\"all_departures\").alias(\"all_departures\"), F.max(\"all_arrivals\").alias(\"all_arrivals\"))\\\n",
"\n",
"trips_with_duplicates.show(10, 0)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"25127"
]
}
],
"source": [
"trips_with_duplicates.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Are there many trips with a single stop ?"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+---------+--------------+------------+----------+\n",
"| trip_id|all_stops|all_departures|all_arrivals|stop_count|\n",
"+--------------------+---------+--------------+------------+----------+\n",
"|366.TA.11-3-j19-1...|[8503000]| [15:23:00]| [15:23:00]| 1|\n",
"|457.TA.26-24-j19-...|[8502208]| [16:45:00]| [16:45:00]| 1|\n",
"|99.TA.1-321-j19-1...|[8502750]| [16:07:00]| [16:07:00]| 1|\n",
"|31.TA.80-158-Y-j1...|[8503000]| [14:38:00]| [14:38:00]| 1|\n",
"|423.TA.1-36-j19-1...|[8503000]| [09:36:00]| [09:36:00]| 1|\n",
"|808.TA.26-24-j19-...|[8502208]| [17:15:00]| [17:15:00]| 1|\n",
"|1.TA.20-E03-j19-1...|[8596126]| [19:15:00]| [19:10:00]| 1|\n",
"|103.TA.1-321-j19-...|[8502750]| [18:07:00]| [18:07:00]| 1|\n",
"|123.TA.1-321-j19-...|[8502750]| [12:07:00]| [12:07:00]| 1|\n",
"|141.TA.20-2-j19-1...|[8503000]| [08:10:00]| [08:10:00]| 1|\n",
"|17.TA.17-4-j19-1.5.H|[8503000]| [12:37:00]| [12:37:00]| 1|\n",
"|17.TA.80-158-Y-j1...|[8503000]| [10:38:00]| [10:38:00]| 1|\n",
"|346.TA.1-37-j19-1...|[8503000]| [11:08:00]| [11:08:00]| 1|\n",
"|399.TA.11-3-j19-1...|[8503000]| [11:37:00]| [11:37:00]| 1|\n",
"| 4.TA.17-4-j19-1.4.H|[8503000]| [18:37:00]| [18:37:00]| 1|\n",
"|450.TA.1-16-j19-1...|[8503000]| [15:54:00]| [15:54:00]| 1|\n",
"|611.TA.26-8-A-j19...|[8503204]| [07:00:00]| [06:59:00]| 1|\n",
"|178.TA.20-2-j19-1...|[8503000]| [19:18:00]| [19:18:00]| 1|\n",
"|410.TA.26-24-j19-...|[8502208]| [10:45:00]| [10:45:00]| 1|\n",
"|1.TA.57-2-Y-j19-1...|[8503000]| [09:34:00]| [09:34:00]| 1|\n",
"+--------------------+---------+--------------+------------+----------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"from pyspark.sql.types import IntegerType\n",
"slen = udf(lambda s: len(s), IntegerType())\n",
"\n",
"trips_with_duplicates.withColumn(\"stop_count\", slen(trips_with_duplicates.all_stops)).filter(F.col('stop_count')==1).show()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"714"
]
}
],
"source": [
"trips_with_duplicates.withColumn(\"stop_count\", slen(trips_with_duplicates.all_stops)).filter(F.col('stop_count')==1).count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many trips share exactly the same departure **and** arrival times at all stops ?"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+--------------------+--------------------+\n",
"| trip_id| all_stops| all_departures| all_arrivals|\n",
"+--------------------+--------------------+--------------------+--------------------+\n",
"|382.TA.11-3-j19-1...| [8503000]| [15:53:00]| [15:53:00]|\n",
"|87.TA.6-8-j19-1.64.R| [8503016, 8503000]|[08:46:00, 09:02:00]|[08:44:00, 08:55:00]|\n",
"|32.TA.79-10-B-j19...|[8503054, 8503053...|[17:57:00, 17:59:...|[17:57:00, 17:59:...|\n",
"|448.TA.26-LAF-j19...| [8503082, 8503081]|[19:20:00, 19:25:00]|[19:20:00, 19:25:00]|\n",
"|294.TA.26-5-A-j19...|[8503125, 8503003...|[17:21:00, 17:32:...|[17:20:00, 17:32:...|\n",
"|996.TA.26-12-j19-...|[8503147, 8503003...|[14:34:00, 14:39:...|[14:33:00, 14:38:...|\n",
"|10.TA.30-170-Y-j1...|[8503202, 8502209...|[10:30:00, 10:35:...|[10:30:00, 10:35:...|\n",
"|43.TA.26-2-j19-1....|[8503204, 8503202...|[09:24:00, 09:29:...|[09:23:00, 09:28:...|\n",
"|580.TA.26-8-A-j19...|[8503204, 8503203...|[18:00:00, 18:02:...|[17:59:00, 18:02:...|\n",
"|585.TA.26-24-j19-...|[8503305, 8503307...|[17:53:00, 17:57:...|[17:52:00, 17:57:...|\n",
"|246.TA.26-9-A-j19...|[8503313, 8503312...|[10:02:00, 10:05:...|[10:02:00, 10:05:...|\n",
"|158.TA.26-9-A-j19...|[8503313, 8503312...|[15:32:00, 15:35:...|[15:32:00, 15:35:...|\n",
"|137.TA.26-15-j19-...|[8503316, 8503315...|[10:11:00, 10:14:...|[10:11:00, 10:14:...|\n",
"|293.TA.26-640-j19...|[8503382, 8594339...|[08:26:00, 08:27:...|[08:26:00, 08:27:...|\n",
"|278.TA.79-24-j19-...| [8503499, 8503500]|[12:25:00, 12:28:00]|[12:25:00, 12:28:00]|\n",
"|127.TA.1-17-A-j19...|[8503508, 8517376...|[14:33:00, 14:34:...|[14:33:00, 14:34:...|\n",
"|1859.TA.26-9-B-j1...|[8503610, 8580912...|[08:29:00, 08:30:...|[08:29:00, 08:30:...|\n",
"|476.TA.26-768-j19...|[8573205, 8573213...|[14:37:00, 14:38:...|[14:37:00, 14:38:...|\n",
"|40.TA.26-733-j19-...|[8573205, 8580301...|[14:33:00, 14:33:...|[14:33:00, 14:33:...|\n",
"|120.TA.26-731-j19...|[8573205, 8580301...|[12:03:00, 12:03:...|[12:03:00, 12:03:...|\n",
"+--------------------+--------------------+--------------------+--------------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trips_with_duplicates.dropDuplicates(['all_stops', 'all_departures', 'all_arrivals']).show()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20086"
]
}
],
"source": [
"trips_with_duplicates.dropDuplicates(['all_stops', 'all_departures', 'all_arrivals']).count()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20041"
]
}
],
"source": [
"trips_with_duplicates.dropDuplicates(['all_stops', 'all_departures']).count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There seem to be a fraction of trips (less 0.2%) that share the exact same departure times at all stops, but not the same arrival times at all stops. To be on the safe side, we define identical trips based on the sequence of stops and the sequence of departure times.\n",
"\n",
"All in all, we remove ~5000 duplicated trips from all trips."
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"trips_unique = trips_with_duplicates.dropDuplicates(['all_stops', 'all_departures'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Lastly, we remove trips that only serve a single stop (most likely due to the pruning of stops ouside the 15km radius of Zürich HB)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# removing trips with a single stop only:\n",
"trips_unique = trips_unique.withColumn(\"stop_count\", slen(trips_with_duplicates.all_stops)).filter(F.col('stop_count')>1)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+--------------------+--------------------+----------+\n",
"| trip_id| all_stops| all_departures| all_arrivals|stop_count|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+\n",
"|168.TA.1-17-A-j19...|[8502273, 8517377...|[17:51:00, 17:52:...|[17:51:00, 17:52:...| 7|\n",
"|15.TA.80-53-Y-j19...| [8503000, 8503202]|[09:12:00, 09:21:00]|[09:12:00, 09:21:00]| 2|\n",
"|80.TA.16-5-j19-1....|[8503016, 8503006...|[19:09:00, 19:15:...|[19:07:00, 19:13:...| 3|\n",
"|136.TA.26-10-B-j1...|[8503057, 8503056...|[12:54:00, 12:59:...|[12:54:00, 12:59:...| 8|\n",
"|73.TA.26-4-B-j19-...|[8503088, 8503090...|[11:38:00, 11:39:...|[11:38:00, 11:39:...| 12|\n",
"|55.TA.26-7-A-j19-...|[8503104, 8503003...|[14:03:00, 14:15:...|[14:02:00, 14:14:...| 11|\n",
"|551.TA.26-11-j19-...|[8503147, 8503003...|[18:20:00, 18:25:...|[18:20:00, 18:25:...| 8|\n",
"|216.TA.26-24-j19-...|[8503204, 8503203...|[19:15:00, 19:17:...|[19:15:00, 19:17:...| 12|\n",
"|154.TA.26-9-A-j19...|[8503313, 8503312...|[13:32:00, 13:35:...|[13:32:00, 13:35:...| 13|\n",
"|460.TA.79-24-j19-...| [8503499, 8503500]|[16:25:00, 16:28:00]|[16:25:00, 16:28:00]| 2|\n",
"|159.TA.79-24-j19-...| [8503500, 8503499]|[12:40:00, 12:43:00]|[12:40:00, 12:43:00]| 2|\n",
"|99.TA.79-24-j19-1...| [8503500, 8503499]|[17:40:00, 17:43:00]|[17:40:00, 17:43:00]| 2|\n",
"|80.TA.26-36-j19-1...|[8503508, 8503001...|[11:38:00, 11:44:...|[11:36:00, 11:43:...| 5|\n",
"|123.TA.1-350-j19-...|[8503610, 8573711...|[07:41:00, 07:43:...|[07:41:00, 07:43:...| 19|\n",
"|4.TA.26-769-j19-1...|[8503700, 8576161...|[17:35:00, 17:37:...|[17:35:00, 17:37:...| 10|\n",
"|281.TA.26-660-j19...|[8503700, 8588316...|[09:15:00, 09:16:...|[09:15:00, 09:16:...| 14|\n",
- "|269.TA.26-660-j19...|[8503700, 8588316...|[09:45:00, 09:46:...|[09:45:00, 09:46:...| 14|\n",
+ "|279.TA.26-660-j19...|[8503700, 8588316...|[09:45:00, 09:46:...|[09:45:00, 09:46:...| 14|\n",
"|983.TA.26-136-j19...|[8503855, 8594182...|[07:50:00, 07:51:...|[07:50:00, 07:51:...| 6|\n",
"|7.TA.90-71-Y-j19-...|[8530645, 8530646...|[14:07:00, 14:15:...|[14:07:00, 14:11:...| 3|\n",
"|138.TA.1-350-j19-...|[8572560, 8572599...|[17:49:00, 17:50:...|[17:49:00, 17:50:...| 20|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trips_unique.show()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19614"
]
}
],
"source": [
"trips_unique.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 6) building routes\n",
"\n",
"- 6) building routes based on unique trips\n",
" - order unique_trips by stop_sequence, earliest departure time\n",
" - each window with the same stop_sequence gets a unique routeID\n",
" \n",
"- 7) generate a RAPTOR compatible stop_times\n",
" - filter with unique_trips\n",
" - sort by routeID, earliest departure time\n",
" \n",
"We start by getting the first departure time for each unique trip, to be able to order them by route and first departure time for RAPTOR's `stopTimes` data structure."
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+\n",
"| trip_id| all_stops| all_departures| all_arrivals|stop_count|departure_first_stop|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+\n",
"|168.TA.1-17-A-j19...|[8502273, 8517377...|[17:51:00, 17:52:...|[17:51:00, 17:52:...| 7| 17:51:00|\n",
"|15.TA.80-53-Y-j19...| [8503000, 8503202]|[09:12:00, 09:21:00]|[09:12:00, 09:21:00]| 2| 09:12:00|\n",
"|80.TA.16-5-j19-1....|[8503016, 8503006...|[19:09:00, 19:15:...|[19:07:00, 19:13:...| 3| 19:09:00|\n",
"|136.TA.26-10-B-j1...|[8503057, 8503056...|[12:54:00, 12:59:...|[12:54:00, 12:59:...| 8| 12:54:00|\n",
"|73.TA.26-4-B-j19-...|[8503088, 8503090...|[11:38:00, 11:39:...|[11:38:00, 11:39:...| 12| 11:38:00|\n",
"|55.TA.26-7-A-j19-...|[8503104, 8503003...|[14:03:00, 14:15:...|[14:02:00, 14:14:...| 11| 14:03:00|\n",
"|551.TA.26-11-j19-...|[8503147, 8503003...|[18:20:00, 18:25:...|[18:20:00, 18:25:...| 8| 18:20:00|\n",
"|216.TA.26-24-j19-...|[8503204, 8503203...|[19:15:00, 19:17:...|[19:15:00, 19:17:...| 12| 19:15:00|\n",
"|154.TA.26-9-A-j19...|[8503313, 8503312...|[13:32:00, 13:35:...|[13:32:00, 13:35:...| 13| 13:32:00|\n",
"|460.TA.79-24-j19-...| [8503499, 8503500]|[16:25:00, 16:28:00]|[16:25:00, 16:28:00]| 2| 16:25:00|\n",
"|159.TA.79-24-j19-...| [8503500, 8503499]|[12:40:00, 12:43:00]|[12:40:00, 12:43:00]| 2| 12:40:00|\n",
"|99.TA.79-24-j19-1...| [8503500, 8503499]|[17:40:00, 17:43:00]|[17:40:00, 17:43:00]| 2| 17:40:00|\n",
"|80.TA.26-36-j19-1...|[8503508, 8503001...|[11:38:00, 11:44:...|[11:36:00, 11:43:...| 5| 11:38:00|\n",
"|123.TA.1-350-j19-...|[8503610, 8573711...|[07:41:00, 07:43:...|[07:41:00, 07:43:...| 19| 07:41:00|\n",
"|4.TA.26-769-j19-1...|[8503700, 8576161...|[17:35:00, 17:37:...|[17:35:00, 17:37:...| 10| 17:35:00|\n",
"|281.TA.26-660-j19...|[8503700, 8588316...|[09:15:00, 09:16:...|[09:15:00, 09:16:...| 14| 09:15:00|\n",
"|269.TA.26-660-j19...|[8503700, 8588316...|[09:45:00, 09:46:...|[09:45:00, 09:46:...| 14| 09:45:00|\n",
- "|983.TA.26-136-j19...|[8503855, 8594182...|[07:50:00, 07:51:...|[07:50:00, 07:51:...| 6| 07:50:00|\n",
+ "|982.TA.26-136-j19...|[8503855, 8594182...|[07:50:00, 07:51:...|[07:50:00, 07:51:...| 6| 07:50:00|\n",
"|7.TA.90-71-Y-j19-...|[8530645, 8530646...|[14:07:00, 14:15:...|[14:07:00, 14:11:...| 3| 14:07:00|\n",
"|138.TA.1-350-j19-...|[8572560, 8572599...|[17:49:00, 17:50:...|[17:49:00, 17:50:...| 20| 17:49:00|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"# code from https://stackoverflow.com/questions/52975567/get-first-n-elements-from-dataframe-arraytype-column-in-pyspark\n",
"\n",
"trips_unique = trips_unique.withColumn('departure_first_stop', F.col(\"all_departures\")[0])\n",
"trips_unique.show()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+\n",
"| trip_id| all_stops| all_departures| all_arrivals|stop_count|departure_first_stop|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+\n",
"|203.TA.1-17-A-j19...|[8502187, 8502277...|[07:01:00, 07:02:...|[07:01:00, 07:02:...| 7| 07:01:00|\n",
"|4.TA.30-57-Y-j19-...|[8502208, 8502209...|[07:18:00, 07:23:...|[07:18:00, 07:23:...| 3| 07:18:00|\n",
"|5.TA.30-57-Y-j19-...|[8502208, 8502209...|[07:48:00, 07:53:...|[07:48:00, 07:53:...| 3| 07:48:00|\n",
"|6.TA.30-57-Y-j19-...|[8502208, 8502209...|[08:18:00, 08:23:...|[08:18:00, 08:23:...| 3| 08:18:00|\n",
"|7.TA.30-57-Y-j19-...|[8502208, 8502209...|[08:48:00, 08:53:...|[08:48:00, 08:53:...| 3| 08:48:00|\n",
"|8.TA.30-57-Y-j19-...|[8502208, 8502209...|[09:18:00, 09:23:...|[09:18:00, 09:23:...| 3| 09:18:00|\n",
"|9.TA.30-57-Y-j19-...|[8502208, 8502209...|[09:48:00, 09:53:...|[09:48:00, 09:53:...| 3| 09:48:00|\n",
"|10.TA.30-57-Y-j19...|[8502208, 8502209...|[10:18:00, 10:23:...|[10:18:00, 10:23:...| 3| 10:18:00|\n",
"|11.TA.30-57-Y-j19...|[8502208, 8502209...|[10:48:00, 10:53:...|[10:48:00, 10:53:...| 3| 10:48:00|\n",
"|12.TA.30-57-Y-j19...|[8502208, 8502209...|[11:18:00, 11:23:...|[11:18:00, 11:23:...| 3| 11:18:00|\n",
"|13.TA.30-57-Y-j19...|[8502208, 8502209...|[11:48:00, 11:53:...|[11:48:00, 11:53:...| 3| 11:48:00|\n",
"|14.TA.30-57-Y-j19...|[8502208, 8502209...|[12:18:00, 12:23:...|[12:18:00, 12:23:...| 3| 12:18:00|\n",
"|15.TA.30-57-Y-j19...|[8502208, 8502209...|[12:48:00, 12:53:...|[12:48:00, 12:53:...| 3| 12:48:00|\n",
"|16.TA.30-57-Y-j19...|[8502208, 8502209...|[13:18:00, 13:23:...|[13:18:00, 13:23:...| 3| 13:18:00|\n",
"|17.TA.30-57-Y-j19...|[8502208, 8502209...|[13:48:00, 13:53:...|[13:48:00, 13:53:...| 3| 13:48:00|\n",
"|18.TA.30-57-Y-j19...|[8502208, 8502209...|[14:18:00, 14:23:...|[14:18:00, 14:23:...| 3| 14:18:00|\n",
"|19.TA.30-57-Y-j19...|[8502208, 8502209...|[14:48:00, 14:53:...|[14:48:00, 14:53:...| 3| 14:48:00|\n",
"|20.TA.30-57-Y-j19...|[8502208, 8502209...|[15:18:00, 15:23:...|[15:18:00, 15:23:...| 3| 15:18:00|\n",
"|21.TA.30-57-Y-j19...|[8502208, 8502209...|[15:48:00, 15:53:...|[15:48:00, 15:53:...| 3| 15:48:00|\n",
"|22.TA.30-57-Y-j19...|[8502208, 8502209...|[16:18:00, 16:23:...|[16:18:00, 16:23:...| 3| 16:18:00|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"#ordering by stop_sequence (arbitrary order) and departure at the first stop (ascending)\n",
"trips_unique = trips_unique.sort(trips_unique.all_stops, trips_unique.departure_first_stop)\n",
"trips_unique.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In RAPTOR, routes are defined as collections of unique trips serving the same stop sequences at different times. Therefore, there is one route per sequence of stops, i.e unique entry in column `all_stops`. However, there is no specific rule to order routes depending on the stops they serve. We simply subset unique sequences of stops and index them from 0 to n-1 routes."
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+\n",
"| all_stops|\n",
"+--------------------+\n",
- "| [8573205, 8588553]|\n",
- "|[8576240, 8591353...|\n",
"|[8591049, 8591128...|\n",
"|[8591057, 8591402...|\n",
- "|[8591061, 8591270...|\n",
"| [8591281, 8591046]|\n",
"|[8591825, 8590504...|\n",
+ "| [8573205, 8588553]|\n",
+ "|[8576240, 8591353...|\n",
+ "|[8591061, 8591270...|\n",
"|[8575921, 8575920...|\n",
"|[8591035, 8591134...|\n",
"|[8595129, 8590543...|\n",
+ "|[8576127, 8576139...|\n",
"|[8503010, 8503011...|\n",
"| [8575927, 8594339]|\n",
- "|[8576127, 8576139...|\n",
"|[8591031, 8588553...|\n",
- "|[8502208, 8502209...|\n",
"|[8503674, 8503659...|\n",
"|[8576171, 8576172...|\n",
"|[8576276, 8576277...|\n",
"|[8590805, 8590794...|\n",
"|[8591110, 8591306...|\n",
+ "|[8502208, 8502209...|\n",
"+--------------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"routes = trips_unique.select(trips_unique.all_stops).distinct()\n",
"routes.show()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#building an index from 0 to n_routes\n",
"# code from https://stackoverflow.com/questions/39057766/spark-equivelant-of-zipwithindex-in-dataframe\n",
"from pyspark.sql.types import StructType, StructField, LongType\n",
"def dfZipWithIndex (df, offset=0, colName=\"rowId\"):\n",
" '''\n",
" Enumerates dataframe rows is native order, like rdd.ZipWithIndex(), but on a dataframe \n",
" and preserves a schema\n",
"\n",
" :param df: source dataframe\n",
" :param offset: adjustment to zipWithIndex()'s index\n",
" :param colName: name of the index column\n",
" '''\n",
"\n",
" new_schema = StructType(\n",
" [StructField(colName,LongType(),True)] # new added field in front\n",
" + df.schema.fields # previous schema\n",
" )\n",
"\n",
" zipped_rdd = df.rdd.zipWithIndex()\n",
"\n",
" new_rdd = zipped_rdd.map(lambda args: ([args[1] + offset] + list(args[0])))\n",
"\n",
" return spark.createDataFrame(new_rdd, new_schema)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+--------------------+\n",
"|route_int| all_stops|\n",
"+---------+--------------------+\n",
- "| 0| [8573205, 8588553]|\n",
- "| 1|[8576240, 8591353...|\n",
- "| 2|[8591049, 8591128...|\n",
- "| 3|[8591057, 8591402...|\n",
- "| 4|[8591061, 8591270...|\n",
- "| 5| [8591281, 8591046]|\n",
- "| 6|[8591825, 8590504...|\n",
+ "| 0|[8576240, 8591353...|\n",
+ "| 1|[8591049, 8591128...|\n",
+ "| 2|[8591057, 8591402...|\n",
+ "| 3| [8591281, 8591046]|\n",
+ "| 4|[8591825, 8590504...|\n",
+ "| 5| [8573205, 8588553]|\n",
+ "| 6|[8591061, 8591270...|\n",
"| 7|[8575921, 8575920...|\n",
- "| 8|[8591035, 8591134...|\n",
- "| 9|[8595129, 8590543...|\n",
+ "| 8|[8595129, 8590543...|\n",
+ "| 9|[8591035, 8591134...|\n",
"| 10|[8503010, 8503011...|\n",
- "| 11| [8575927, 8594339]|\n",
- "| 12|[8576127, 8576139...|\n",
- "| 13|[8591031, 8588553...|\n",
+ "| 11|[8591031, 8588553...|\n",
+ "| 12| [8575927, 8594339]|\n",
+ "| 13|[8576127, 8576139...|\n",
"| 14|[8502208, 8502209...|\n",
"| 15|[8503674, 8503659...|\n",
"| 16|[8576171, 8576172...|\n",
- "| 17|[8576276, 8576277...|\n",
- "| 18|[8590805, 8590794...|\n",
- "| 19|[8591110, 8591306...|\n",
+ "| 17|[8590805, 8590794...|\n",
+ "| 18|[8591110, 8591306...|\n",
+ "| 19|[8576276, 8576277...|\n",
"+---------+--------------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"routes_indexed = dfZipWithIndex(routes, 0, 'route_int')\n",
"routes_indexed.show()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
"| all_stops| trip_id| all_departures| all_arrivals|stop_count|departure_first_stop|route_int|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
- "|[8503064, 8503065...|483.TA.26-18-j19-...|[10:41:00, 10:45:...|[10:41:00, 10:45:...| 14| 10:41:00| 41|\n",
- "|[8591355, 8591354...|1107.TA.26-75-A-j...|[15:59:00, 16:00:...|[15:59:00, 16:00:...| 21| 15:59:00| 80|\n",
- "|[8595899, 8591206...|110.TA.26-40-j19-...|[08:23:00, 08:23:...|[08:23:00, 08:23:...| 8| 08:23:00| 87|\n",
- "|[8591276, 8591101...|613.TA.26-69-j19-...|[18:20:00, 18:23:...|[18:20:00, 18:23:...| 9| 18:20:00| 105|\n",
- "|[8591401, 8503610...|1230.TA.26-80-j19...|[16:03:00, 16:04:...|[16:03:00, 16:04:...| 29| 16:03:00| 110|\n",
- "|[8591401, 8503610...|1316.TA.26-80-j19...|[13:22:00, 13:23:...|[13:22:00, 13:23:...| 29| 13:22:00| 110|\n",
- "|[8591401, 8503610...|1358.TA.26-80-j19...|[08:30:00, 08:31:...|[08:30:00, 08:31:...| 29| 08:30:00| 110|\n",
- "|[8502208, 8502209...|677.TA.26-24-j19-...|[15:15:00, 15:17:...|[15:15:00, 15:17:...| 14| 15:15:00| 111|\n",
- "|[8503150, 8576140...|107.TA.26-726-j19...|[19:02:00, 19:02:...|[19:02:00, 19:02:...| 11| 19:02:00| 122|\n",
+ "|[8591825, 8590504...|117.TA.26-703-j19...|[18:04:00, 18:05:...|[18:04:00, 18:05:...| 9| 18:04:00| 4|\n",
+ "|[8591355, 8591354...|1232.TA.26-75-A-j...|[17:06:00, 17:07:...|[17:06:00, 17:07:...| 21| 17:06:00| 78|\n",
+ "|[8591355, 8591354...|1135.TA.26-75-A-j...|[12:59:00, 13:00:...|[12:59:00, 13:00:...| 21| 12:59:00| 78|\n",
+ "|[8591401, 8503610...|1269.TA.26-80-j19...|[19:22:00, 19:23:...|[19:22:00, 19:23:...| 29| 19:22:00| 108|\n",
"|[8580449, 8591063...|1892.TA.26-781-j1...|[15:17:00, 15:18:...|[15:17:00, 15:18:...| 12| 15:17:00| 136|\n",
- "|[8590269, 8590276...|270.TA.26-303-j19...|[09:06:00, 09:07:...|[09:06:00, 09:07:...| 23| 09:06:00| 146|\n",
- "|[8590269, 8590276...|276.TA.26-303-j19...|[08:06:00, 08:07:...|[08:06:00, 08:07:...| 23| 08:06:00| 146|\n",
"| [8503081, 8503082]|1373.TA.26-LAF-j1...|[17:20:00, 17:25:00]|[17:20:00, 17:25:00]| 2| 17:20:00| 212|\n",
- "|[8591067, 8587349...|1647.TA.26-17-j19...|[16:03:00, 16:06:...|[16:03:00, 16:06:...| 17| 16:03:00| 221|\n",
- "|[8591276, 8591101...|270.TA.26-83-j19-...|[19:35:00, 19:38:...|[19:35:00, 19:38:...| 16| 19:35:00| 231|\n",
- "|[8591276, 8591101...|225.TA.26-83-j19-...|[18:20:00, 18:22:...|[18:20:00, 18:22:...| 16| 18:20:00| 231|\n",
- "|[8591122, 8591249...|31.TA.26-37-A-j19...|[11:02:00, 11:03:...|[11:02:00, 11:03:...| 6| 11:02:00| 265|\n",
- "|[8591233, 8591107...|212.TA.26-704-j19...|[18:38:00, 18:42:...|[18:38:00, 18:42:...| 15| 18:38:00| 266|\n",
+ "|[8590878, 8576280...|15.TA.26-453-j19-...|[18:53:00, 18:53:...|[18:53:00, 18:53:...| 6| 18:53:00| 268|\n",
+ "|[8590716, 8590714...|364.TA.26-743-j19...|[15:30:00, 15:30:...|[15:30:00, 15:30:...| 17| 15:30:00| 278|\n",
+ "|[8590804, 8590805...|730.TA.26-31-j19-...|[12:32:00, 12:33:...|[12:32:00, 12:33:...| 9| 12:32:00| 281|\n",
"| [8596126, 8573205]|4.TA.6-E02-j19-1.4.R|[14:50:00, 15:10:00]|[14:45:00, 15:10:00]| 2| 14:50:00| 283|\n",
- "|[8580449, 8591063...|645.TA.26-768-j19...|[07:41:00, 07:42:...|[07:41:00, 07:42:...| 13| 07:41:00| 297|\n",
+ "|[8591439, 8591106...|1316.TA.26-7-B-j1...|[18:20:00, 18:21:...|[18:20:00, 18:21:...| 31| 18:20:00| 321|\n",
+ "|[8503104, 8503101...|6.TA.26-20-j19-1.2.H|[07:46:00, 07:53:...|[07:45:00, 07:52:...| 5| 07:46:00| 375|\n",
+ "|[8591136, 8591435...|2056.TA.26-13-j19...|[19:47:00, 19:48:...|[19:47:00, 19:48:...| 10| 19:47:00| 396|\n",
+ "|[8591122, 8591201...|1133.TA.26-69-j19...|[17:37:00, 17:38:...|[17:37:00, 17:38:...| 9| 17:37:00| 400|\n",
+ "|[8503059, 8530813...|427.TA.26-18-j19-...|[14:33:00, 14:34:...|[14:33:00, 14:34:...| 13| 14:33:00| 432|\n",
+ "|[8590637, 8590636...|92.TA.26-727-j19-...|[14:17:00, 14:18:...|[14:17:00, 14:18:...| 9| 14:17:00| 434|\n",
+ "|[8590901, 8590903...|637.TA.26-759-j19...|[09:08:00, 09:09:...|[09:08:00, 09:09:...| 28| 09:08:00| 460|\n",
+ "|[8591190, 8591390...|285.TA.26-9-B-j19...|[09:54:00, 09:55:...|[09:54:00, 09:55:...| 32| 09:54:00| 551|\n",
+ "|[8591354, 8591124...|673.TA.26-768-j19...|[07:00:00, 07:02:...|[07:00:00, 07:02:...| 9| 07:00:00| 586|\n",
+ "|[8591057, 8591896...|1201.TA.26-4-j19-...|[12:31:00, 12:32:...|[12:31:00, 12:32:...| 26| 12:31:00| 608|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trips_unique = trips_unique.join(routes_indexed, how='inner', on='all_stops').dropDuplicates()\n",
"trips_unique.show()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
"| all_stops| trip_id| all_departures| all_arrivals|stop_count|departure_first_stop|route_int|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
- "|8503064 8503065 8...|483.TA.26-18-j19-...|10:41:00 10:45:00...|10:41:00 10:45:00...| 14| 10:41:00| 41|\n",
- "|8591355 8591354 8...|1107.TA.26-75-A-j...|15:59:00 16:00:00...|15:59:00 16:00:00...| 21| 15:59:00| 80|\n",
- "|8595899 8591206 8...|110.TA.26-40-j19-...|08:23:00 08:23:00...|08:23:00 08:23:00...| 8| 08:23:00| 87|\n",
- "|8591276 8591101 8...|613.TA.26-69-j19-...|18:20:00 18:23:00...|18:20:00 18:23:00...| 9| 18:20:00| 105|\n",
- "|8591401 8503610 8...|1230.TA.26-80-j19...|16:03:00 16:04:00...|16:03:00 16:04:00...| 29| 16:03:00| 110|\n",
- "|8591401 8503610 8...|1316.TA.26-80-j19...|13:22:00 13:23:00...|13:22:00 13:23:00...| 29| 13:22:00| 110|\n",
- "|8591401 8503610 8...|1358.TA.26-80-j19...|08:30:00 08:31:00...|08:30:00 08:31:00...| 29| 08:30:00| 110|\n",
- "|8502208 8502209 8...|677.TA.26-24-j19-...|15:15:00 15:17:00...|15:15:00 15:17:00...| 14| 15:15:00| 111|\n",
- "|8503150 8576140 8...|107.TA.26-726-j19...|19:02:00 19:02:00...|19:02:00 19:02:00...| 11| 19:02:00| 122|\n",
+ "|8591825 8590504 8...|117.TA.26-703-j19...|18:04:00 18:05:00...|18:04:00 18:05:00...| 9| 18:04:00| 4|\n",
+ "|8591355 8591354 8...|1232.TA.26-75-A-j...|17:06:00 17:07:00...|17:06:00 17:07:00...| 21| 17:06:00| 78|\n",
+ "|8591355 8591354 8...|1135.TA.26-75-A-j...|12:59:00 13:00:00...|12:59:00 13:00:00...| 21| 12:59:00| 78|\n",
+ "|8591276 8591101 8...|570.TA.26-69-j19-...|15:21:00 15:23:00...|15:21:00 15:23:00...| 9| 15:21:00| 104|\n",
+ "|8573504 8581548 8...|247.TA.26-813-j19...|19:15:00 19:15:00...|19:15:00 19:15:00...| 8| 19:15:00| 112|\n",
+ "|8590785 8590722 8...|14.TA.79-18-A-j19...|11:41:00 11:43:00...|11:41:00 11:43:00...| 5| 11:41:00| 117|\n",
"|8580449 8591063 8...|1892.TA.26-781-j1...|15:17:00 15:18:00...|15:17:00 15:18:00...| 12| 15:17:00| 136|\n",
- "|8590269 8590276 8...|270.TA.26-303-j19...|09:06:00 09:07:00...|09:06:00 09:07:00...| 23| 09:06:00| 146|\n",
- "|8590269 8590276 8...|276.TA.26-303-j19...|08:06:00 08:07:00...|08:06:00 08:07:00...| 23| 08:06:00| 146|\n",
- "| 8503081 8503082|1373.TA.26-LAF-j1...| 17:20:00 17:25:00| 17:20:00 17:25:00| 2| 17:20:00| 212|\n",
- "|8591067 8587349 8...|1647.TA.26-17-j19...|16:03:00 16:06:00...|16:03:00 16:06:00...| 17| 16:03:00| 221|\n",
- "|8591276 8591101 8...|270.TA.26-83-j19-...|19:35:00 19:38:00...|19:35:00 19:38:00...| 16| 19:35:00| 231|\n",
- "|8591276 8591101 8...|225.TA.26-83-j19-...|18:20:00 18:22:00...|18:20:00 18:22:00...| 16| 18:20:00| 231|\n",
- "|8591122 8591249 8...|31.TA.26-37-A-j19...|11:02:00 11:03:00...|11:02:00 11:03:00...| 6| 11:02:00| 265|\n",
- "|8591233 8591107 8...|212.TA.26-704-j19...|18:38:00 18:42:00...|18:38:00 18:42:00...| 15| 18:38:00| 266|\n",
- "| 8596126 8573205|4.TA.6-E02-j19-1.4.R| 14:50:00 15:10:00| 14:45:00 15:10:00| 2| 14:50:00| 283|\n",
- "|8580449 8591063 8...|645.TA.26-768-j19...|07:41:00 07:42:00...|07:41:00 07:42:00...| 13| 07:41:00| 297|\n",
+ "|8503016 8503006 8...|496.TA.26-24-j19-...|07:04:00 07:09:00...|07:02:00 07:08:00...| 12| 07:04:00| 198|\n",
+ "|8591067 8587349 8...|1673.TA.26-17-j19...|15:11:00 15:13:00...|15:11:00 15:13:00...| 17| 15:11:00| 218|\n",
+ "|8580433 8580438 8...|170.TA.26-733-j19...|18:30:00 18:31:00...|18:30:00 18:31:00...| 10| 18:30:00| 239|\n",
+ "|8591116 8591260 8...|190.TA.26-35-B-j1...|19:27:00 19:28:00...|19:27:00 19:28:00...| 8| 19:27:00| 290|\n",
+ "|8591439 8591106 8...|1316.TA.26-7-B-j1...|18:20:00 18:21:00...|18:20:00 18:21:00...| 31| 18:20:00| 321|\n",
+ "|8591136 8591435 8...|2056.TA.26-13-j19...|19:47:00 19:48:00...|19:47:00 19:48:00...| 10| 19:47:00| 396|\n",
+ "|8503059 8530813 8...|427.TA.26-18-j19-...|14:33:00 14:34:00...|14:33:00 14:34:00...| 13| 14:33:00| 432|\n",
+ "|8590637 8590636 8...|92.TA.26-727-j19-...|14:17:00 14:18:00...|14:17:00 14:18:00...| 9| 14:17:00| 434|\n",
+ "|8590318 8591051 8...|436.TA.26-2-A-j19...|09:58:00 09:59:00...|09:58:00 09:59:00...| 25| 09:58:00| 578|\n",
+ "|8591341 8502572 8...|605.TA.26-67-j19-...|17:21:00 17:22:00...|17:21:00 17:22:00...| 14| 17:21:00| 579|\n",
+ "|8591341 8502572 8...|43.TA.26-67-j19-1...|09:50:00 09:52:00...|09:50:00 09:52:00...| 14| 09:50:00| 579|\n",
+ "|8591341 8502572 8...|75.TA.26-67-j19-1...|09:28:00 09:29:00...|09:28:00 09:29:00...| 14| 09:28:00| 579|\n",
+ "|8591057 8591896 8...|1201.TA.26-4-j19-...|12:31:00 12:32:00...|12:31:00 12:32:00...| 26| 12:31:00| 608|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"# converting arrays to strings to be able to store the data as csv\n",
"trips_unique_string_lists = trips_unique.withColumn(\"all_stops\", F.concat_ws(\" \", \"all_stops\"))\\\n",
".withColumn(\"all_departures\", F.concat_ws(\" \", \"all_departures\"))\\\n",
".withColumn(\"all_arrivals\", F.concat_ws(\" \", \"all_arrivals\"))\n",
"trips_unique_string_lists.show()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"trips_unique_string_lists.write.csv('data/lgpt_guys/trips_unique_string_lists.csv', header = True, mode=\"overwrite\")"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
"| all_stops| trip_id| all_departures| all_arrivals|stop_count|departure_first_stop|route_int|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
- "|8590898 8590900 8...|130.TA.26-772-j19...|08:45:00 08:46:00...|08:45:00 08:46:00...| 12| 08:45:00| 47|\n",
- "|8575977 8575976 8...|275.TA.26-845-j19...|14:08:00 14:09:00...|14:08:00 14:09:00...| 4| 14:08:00| 69|\n",
- "|8581546 8581543 8...|157.TA.26-813-j19...|12:37:00 12:37:00...|12:37:00 12:37:00...| 8| 12:37:00| 71|\n",
- "|8573504 8588051 8...|37.TA.26-811-j19-...|11:30:00 11:31:00...|11:30:00 11:31:00...| 11| 11:30:00| 75|\n",
- "|8576127 8503154 8...|18.TA.26-726-j19-...|12:50:00 12:52:00...|12:50:00 12:52:00...| 10| 12:50:00| 77|\n",
- "|8576127 8503154 8...|30.TA.26-726-j19-...|10:20:00 10:22:00...|10:20:00 10:22:00...| 10| 10:20:00| 77|\n",
- "|8591355 8591354 8...|1111.TA.26-75-A-j...|15:29:00 15:30:00...|15:29:00 15:30:00...| 21| 15:29:00| 80|\n",
- "|8591276 8591101 8...|607.TA.26-69-j19-...|14:06:00 14:08:00...|14:06:00 14:08:00...| 9| 14:06:00| 105|\n",
- "|8591401 8503610 8...|1325.TA.26-80-j19...|17:15:00 17:17:00...|17:15:00 17:16:00...| 29| 17:15:00| 110|\n",
- "|8591401 8503610 8...|1351.TA.26-80-j19...|16:35:00 16:37:00...|16:35:00 16:36:00...| 29| 16:35:00| 110|\n",
- "|8591028 8591023 8...|37.TA.26-91-j19-1...|17:24:00 17:25:00...|17:24:00 17:25:00...| 13| 17:24:00| 116|\n",
- "|8591123 8591174 8...|17.TA.26-E-j19-1.4.R|11:04:00 11:07:00...|11:04:00 11:07:00...| 3| 11:04:00| 118|\n",
- "|8591123 8591174 8...|63.TA.26-E-j19-1.4.R|09:34:00 09:37:00...|09:34:00 09:37:00...| 3| 09:34:00| 118|\n",
- "|8503061 8590601 8...|51.TA.79-18-A-j19...|12:20:00 12:21:00...|12:20:00 12:21:00...| 5| 12:20:00| 121|\n",
- "|8530812 8591094 8...|199.TA.26-77-j19-...|16:24:00 16:26:00...|16:24:00 16:26:00...| 7| 16:24:00| 124|\n",
- "|8591365 8591329 8...|742.TA.26-89-j19-...|17:03:00 17:04:00...|17:03:00 17:04:00...| 27| 17:03:00| 135|\n",
- "|8591365 8591329 8...|603.TA.26-89-j19-...|10:35:00 10:37:00...|10:35:00 10:37:00...| 27| 10:35:00| 135|\n",
- "|8591065 8590566 8...|62.TA.26-743-j19-...|11:09:00 11:10:00...|11:09:00 11:10:00...| 18| 11:09:00| 147|\n",
- "|8591349 8591403 8...|408.TA.26-61-j19-...|12:09:00 12:10:00...|12:09:00 12:10:00...| 19| 12:09:00| 150|\n",
- "|8591349 8591403 8...|415.TA.26-61-j19-...|07:18:00 07:19:00...|07:18:00 07:19:00...| 19| 07:18:00| 150|\n",
+ "|8590464 8590463 8...|603.TA.26-185-j19...|17:51:00 17:53:00...|17:51:00 17:53:00...| 14| 17:51:00| 21|\n",
+ "|8503305 8503306 8...|641.TA.26-8-A-j19...|09:19:00 09:23:00...|09:18:00 09:23:00...| 13| 09:19:00| 43|\n",
+ "|8503057 8503056 8...|139.TA.26-10-B-j1...|13:54:00 13:59:00...|13:54:00 13:59:00...| 8| 13:54:00| 83|\n",
+ "|8591401 8503610 8...|1310.TA.26-80-j19...|17:51:00 17:52:00...|17:51:00 17:52:00...| 29| 17:51:00| 108|\n",
+ "|8591401 8503610 8...|1341.TA.26-80-j19...|16:51:00 16:52:00...|16:51:00 16:52:00...| 29| 16:51:00| 108|\n",
+ "|8503097 8503089 8...|19.TA.26-4-B-j19-...|11:10:00 11:11:00...|11:10:00 11:11:00...| 12| 11:10:00| 129|\n",
+ "|8590269 8590276 8...|280.TA.26-303-j19...|07:51:00 07:52:00...|07:51:00 07:52:00...| 23| 07:51:00| 144|\n",
+ "|8591065 8590566 8...|126.TA.26-751-j19...|15:59:00 15:59:00...|15:59:00 15:59:00...| 10| 15:59:00| 146|\n",
+ "|8591349 8591403 8...|421.TA.26-61-j19-...|08:42:00 08:43:00...|08:42:00 08:43:00...| 19| 08:42:00| 148|\n",
+ "|8591067 8587349 8...|1514.TA.26-17-j19...|14:11:00 14:13:00...|14:11:00 14:13:00...| 17| 14:11:00| 221|\n",
+ "|8591067 8587349 8...|1546.TA.26-17-j19...|12:33:00 12:36:00...|12:33:00 12:36:00...| 17| 12:33:00| 221|\n",
+ "|8580433 8580438 8...|195.TA.26-733-j19...|14:45:00 14:46:00...|14:45:00 14:46:00...| 10| 14:45:00| 238|\n",
+ "|8580433 8580438 8...|247.TA.26-733-j19...|07:15:00 07:16:00...|07:15:00 07:16:00...| 10| 07:15:00| 238|\n",
+ "|8590647 8587998 8...|42.TA.26-973-j19-...|15:08:00 15:09:00...|15:08:00 15:09:00...| 14| 15:08:00| 274|\n",
+ "|8591054 8576262 8...|113.TA.26-37-A-j1...|08:48:00 08:50:00...|08:48:00 08:50:00...| 6| 08:48:00| 275|\n",
+ "|8580449 8591063 8...|1101.TA.26-768-j1...|16:08:00 16:09:00...|16:08:00 16:09:00...| 13| 16:08:00| 297|\n",
+ "|8591439 8591106 8...|1320.TA.26-7-B-j1...|17:50:00 17:51:00...|17:50:00 17:51:00...| 31| 17:50:00| 320|\n",
+ "|8591439 8591106 8...|1338.TA.26-7-B-j1...|08:30:00 08:31:00...|08:30:00 08:31:00...| 31| 08:30:00| 320|\n",
+ "|8573504 8588051 8...|10.TA.26-812-j19-...|16:45:00 16:45:00...|16:45:00 16:45:00...| 12| 16:45:00| 331|\n",
+ "|8591230 8591087 8...|197.TA.26-751-j19...|16:35:00 16:36:00...|16:35:00 16:36:00...| 9| 16:35:00| 405|\n",
"+--------------------+--------------------+--------------------+--------------------+----------+--------------------+---------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"# we prepare an inner join on trips from trips_unique with stopTimes.\n",
"\n",
"trips_unique_string_lists = spark.read.csv('data/lgpt_guys/trips_unique_string_lists.csv', header = True)\n",
"trips_unique_string_lists.show()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+--------------------+---------+----------+\n",
"| trip_id|departure_first_stop|route_int|stop_count|\n",
"+--------------------+--------------------+---------+----------+\n",
- "|130.TA.26-772-j19...| 08:45:00| 47| 12|\n",
- "|275.TA.26-845-j19...| 14:08:00| 69| 4|\n",
- "|157.TA.26-813-j19...| 12:37:00| 71| 8|\n",
- "|37.TA.26-811-j19-...| 11:30:00| 75| 11|\n",
- "|18.TA.26-726-j19-...| 12:50:00| 77| 10|\n",
- "|30.TA.26-726-j19-...| 10:20:00| 77| 10|\n",
- "|1111.TA.26-75-A-j...| 15:29:00| 80| 21|\n",
- "|607.TA.26-69-j19-...| 14:06:00| 105| 9|\n",
- "|1325.TA.26-80-j19...| 17:15:00| 110| 29|\n",
- "|1351.TA.26-80-j19...| 16:35:00| 110| 29|\n",
- "|37.TA.26-91-j19-1...| 17:24:00| 116| 13|\n",
- "|17.TA.26-E-j19-1.4.R| 11:04:00| 118| 3|\n",
- "|63.TA.26-E-j19-1.4.R| 09:34:00| 118| 3|\n",
- "|51.TA.79-18-A-j19...| 12:20:00| 121| 5|\n",
- "|199.TA.26-77-j19-...| 16:24:00| 124| 7|\n",
- "|742.TA.26-89-j19-...| 17:03:00| 135| 27|\n",
- "|603.TA.26-89-j19-...| 10:35:00| 135| 27|\n",
- "|62.TA.26-743-j19-...| 11:09:00| 147| 18|\n",
- "|408.TA.26-61-j19-...| 12:09:00| 150| 19|\n",
- "|415.TA.26-61-j19-...| 07:18:00| 150| 19|\n",
+ "|603.TA.26-185-j19...| 17:51:00| 21| 14|\n",
+ "|641.TA.26-8-A-j19...| 09:19:00| 43| 13|\n",
+ "|139.TA.26-10-B-j1...| 13:54:00| 83| 8|\n",
+ "|1310.TA.26-80-j19...| 17:51:00| 108| 29|\n",
+ "|1341.TA.26-80-j19...| 16:51:00| 108| 29|\n",
+ "|19.TA.26-4-B-j19-...| 11:10:00| 129| 12|\n",
+ "|280.TA.26-303-j19...| 07:51:00| 144| 23|\n",
+ "|126.TA.26-751-j19...| 15:59:00| 146| 10|\n",
+ "|421.TA.26-61-j19-...| 08:42:00| 148| 19|\n",
+ "|1514.TA.26-17-j19...| 14:11:00| 221| 17|\n",
+ "|1546.TA.26-17-j19...| 12:33:00| 221| 17|\n",
+ "|195.TA.26-733-j19...| 14:45:00| 238| 10|\n",
+ "|247.TA.26-733-j19...| 07:15:00| 238| 10|\n",
+ "|42.TA.26-973-j19-...| 15:08:00| 274| 14|\n",
+ "|113.TA.26-37-A-j1...| 08:48:00| 275| 6|\n",
+ "|1101.TA.26-768-j1...| 16:08:00| 297| 13|\n",
+ "|1320.TA.26-7-B-j1...| 17:50:00| 320| 31|\n",
+ "|1338.TA.26-7-B-j1...| 08:30:00| 320| 31|\n",
+ "|10.TA.26-812-j19-...| 16:45:00| 331| 12|\n",
+ "|197.TA.26-751-j19...| 16:35:00| 405| 9|\n",
"+--------------------+--------------------+---------+----------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trips_unique_for_join = trips_unique_string_lists.select(trips_unique_string_lists.trip_id, \\\n",
" trips_unique_string_lists.departure_first_stop, \\\n",
" trips_unique_string_lists.route_int, \\\n",
" trips_unique_string_lists.stop_count)\n",
"trips_unique_for_join.show()"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"260459"
]
}
],
"source": [
"stop_times = stop_times.join(trips_unique_for_join, how='inner', on='trip_id')\n",
"stop_times.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "At this step, we sort `stop_times` in the same order as RAPTOR's `stopTimes` data structure, that is:\n",
- "- By route\n",
- "- By trip (starting with the one that leaves the first stop of the route at the earliest departure time)\n",
- "- By stop in the sequence of stops defining the route\n"
+ "Note that this csv is does not carry an index allowing to quickly sort it after loading."
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+--------------------+---------+----------+\n",
- "| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_id_general| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|departure_hour|departure_first_stop|route_int|stop_count|\n",
- "+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+--------------------+---------+----------+\n",
- "|1672.TA.26-10-j19...|8573205| 07:00:00| 07:01:00| 27| 0| 0| 8573205|Zürich Flughafen,...|47.4504413038344|8.56372943623189| 26-10-j19-1|Zürich Flughafen,...| 4096| 1| 7| 07:01:00| 0| 2|\n",
- "|1672.TA.26-10-j19...|8588553| 07:02:00| 07:02:00| 28| 0| 0| 8588553|Zürich Flughafen,...|47.4524944976638|8.57205681891684| 26-10-j19-1|Zürich Flughafen,...| 4096| 1| 7| 07:01:00| 0| 2|\n",
- "|2064.TA.26-13-j19...|8576240| 07:00:00| 07:00:00| 5| 0| 0| 8576240|Zürich, Meierhofp...|47.4020100860391|8.49937412926861| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591353| 07:01:00| 07:01:00| 6| 0| 0| 8591353| Zürich, Schwert|47.3997299435837|8.50461130737576| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591039| 07:02:00| 07:02:00| 7| 0| 0| 8591039| Zürich, Alte Trotte|47.3977659017765|8.50725235431143| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591121| 07:03:00| 07:03:00| 8| 0| 0| 8591121|Zürich, Eschergutweg|47.3962700189648|8.51204037477646| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591417| 07:05:00| 07:05:00| 9| 0| 0| 8591417| Zürich, Waidfussweg|47.3954977376399|8.51840044698891| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591437| 07:06:00| 07:06:00| 10| 0| 0| 8591437|Zürich, Wipkinger...|47.3925909395293|8.52357474302616| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8580522| 07:08:00| 07:08:00| 11| 0| 0| 8580522|Zürich, Escher-Wy...|47.3907969150758| 8.5223979500038| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591110| 07:09:00| 07:09:00| 12| 0| 0| 8591110| Zürich, Dammweg|47.3884919601296|8.52639545301869| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591306| 07:10:00| 07:10:00| 13| 0| 0| 8591306|Zürich, Quellenst...|47.3867403702341|8.52874903906341| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591257| 07:11:00| 07:11:00| 14| 0| 0| 8591257| Zürich, Limmatplatz|47.3845994590919|8.53162364797299| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591282| 07:12:00| 07:12:00| 15| 0| 0| 8591282|Zürich, Museum fü...|47.3821239221899|8.53493843137185| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591368| 07:14:00| 07:14:00| 16| 0| 0| 8591368| Zürich, Sihlquai/HB|47.3798733332196|8.53760642776606| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8587349| 07:16:00| 07:16:00| 17| 0| 0| 8587349|Zürich, Bahnhofqu...|47.3775618175159|8.54173867807358| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591067| 07:18:00| 07:18:00| 18| 0| 0| 8591067|Zürich, Bahnhofst...|47.3765581015114|8.53994204750509| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591316| 07:20:00| 07:20:00| 19| 0| 0| 8591316| Zürich, Rennweg|47.3730662375955|8.53845982728609| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591299| 07:22:00| 07:22:00| 20| 0| 0| 8591299| Zürich, Paradeplatz|47.3693672863583|8.53876525448273| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591384| 07:23:00| 07:23:00| 21| 0| 0| 8591384|Zürich, Stockerst...|47.3677002399791|8.53501029659459| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591404| 07:24:00| 07:24:00| 22| 0| 0| 8591404|Zürich, Tunnelstr...|47.3661426599847|8.53253094641008| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591059| 07:25:00| 07:25:00| 23| 0| 0| 8591059|Zürich Enge, Bahn...|47.3645546111557|8.53045583810347| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591415| 07:27:00| 07:27:00| 24| 0| 0| 8591415|Zürich, Waffenpla...|47.3614818138862|8.52574866601403| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591366| 07:28:00| 07:28:00| 25| 0| 0| 8591366|Zürich, Sihlcity ...|47.3600640074787|8.52303575385561| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591329| 07:29:00| 07:29:00| 26| 0| 0| 8591329|Zürich, Saalsport...|47.3578611597087|8.52040369007277| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591245| 07:30:00| 07:30:00| 27| 0| 0| 8591245| Zürich, Laubegg|47.3587313564196|8.51708890667391| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591405| 07:32:00| 07:32:00| 28| 0| 0| 8591405| Zürich, Uetlihof|47.3567353594536|8.51396276948474| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591385| 07:33:00| 07:33:00| 29| 0| 0| 8591385|Zürich, Strassenv...|47.3530717783138|8.51171698127413| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|2064.TA.26-13-j19...|8591034| 07:34:00| 07:34:00| 30| 0| 0| 8591034| Zürich, Albisgütli|47.3519945640447| 8.5077104951064| 26-13-j19-1| Zürich, Albisgütli| 1831| 0| 7| 07:00:00| 1| 26|\n",
- "|791.TA.26-11-A-j1...|8591049| 19:49:00| 19:49:00| 1| 0| 0| 8591049| Zürich, Auzelg|47.4166918393693| 8.568113214819|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|791.TA.26-11-A-j1...|8591128| 19:51:00| 19:51:00| 2| 0| 0| 8591128|Zürich, Fernsehst...|47.4181749855684|8.56174415945371|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|791.TA.26-11-A-j1...|8591830| 19:52:00| 19:52:00| 3| 0| 0| 8591830|Glattpark, Glattpark|47.4199559214972|8.55716275150406|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|791.TA.26-11-A-j1...|8591294| 19:53:00| 19:53:00| 4| 0| 0| 8591294| Zürich, Oerlikerhus|47.4175853791724| 8.5542072942189|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|791.TA.26-11-A-j1...|8591256| 19:54:00| 19:54:00| 5| 0| 0| 8591256|Zürich, Leutschen...|47.4146433269471|8.55130573585079|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|791.TA.26-11-A-j1...|8591273| 19:55:00| 19:55:00| 6| 0| 0| 8591273|Zürich, Messe/Hal...|47.4106919651348|8.55068589830466|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|791.TA.26-11-A-j1...|8591382| 19:57:00| 19:57:00| 7| 0| 0| 8591382|Zürich, Sternen O...|47.4100718783688|8.54623025449481|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|791.TA.26-11-A-j1...|8580449| 19:59:00| 19:59:00| 8| 0| 0| 8580449|Zürich Oerlikon, ...| 47.411494419524|8.54479295004002|26-11-A-j19-1| Zürich, Rehalp| 363| 0| 19| 19:49:00| 2| 8|\n",
- "|159.TA.26-304-j19...|8591057| 19:39:00| 19:39:00| 1| 0| 0| 8591057|Zürich Altstetten...| 47.392067942097|8.48990588617267| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8591402| 19:41:00| 19:41:00| 2| 0| 0| 8591402| Zürich, Tüffenwies|47.3979787271809|8.49434356367684| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8591434| 19:41:00| 19:41:00| 3| 0| 0| 8591434| Zürich, Winzerhalde|47.4000582901792| 8.4945681424979| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8591197| 19:42:00| 19:42:00| 4| 0| 0| 8591197|Zürich, Hohenklin...|47.4013473348052|8.49021131336931| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8591436| 19:43:00| 19:43:00| 5| 0| 0| 8591436|Zürich, Winzerstr...| 47.403372044054| 8.486123978826| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8591136| 19:46:00| 19:46:00| 6| 0| 0| 8591136| Zürich, Frankental|47.4057006674825|8.48137189097235| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590725| 19:47:00| 19:47:00| 7| 0| 0| 8590725|Oberengstringen, ...|47.4055243523393|8.47408655401713| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590726| 19:48:00| 19:48:00| 8| 0| 0| 8590726|Oberengstringen, ...| 47.407342193939|8.46795106062573| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590728| 19:49:00| 19:49:00| 9| 0| 0| 8590728|Oberengstringen, ...|47.4091295756792|8.46260608468448| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590727| 19:50:00| 19:50:00| 10| 0| 0| 8590727|Oberengstringen, ...|47.4104852703573|8.45874332896223| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590833| 19:51:00| 19:51:00| 11| 0| 0| 8590833|Unterengstringen,...|47.4122360710415|8.45316479104707| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8594732| 19:53:00| 19:53:00| 12| 0| 0| 8594732|Unterengstringen,...|47.4134944230824|8.44931101847766| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590831| 19:53:00| 19:53:00| 13| 0| 0| 8590831|Unterengstringen,...| 47.414977659342|8.44603216769017| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590911| 19:55:00| 19:55:00| 14| 0| 0| 8590911|Weiningen ZH, Aus...|47.4176826342903|8.43953734818508| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590913| 19:56:00| 19:56:00| 15| 0| 0| 8590913|Weiningen ZH, Lin...|47.4195547602987|8.43394084396424| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590914| 19:57:00| 19:57:00| 16| 0| 0| 8590914|Weiningen ZH, Sch...|47.4183512583635|8.42866773324572| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|159.TA.26-304-j19...|8590617| 19:59:00| 19:59:00| 17| 0| 0| 8590617| Geroldswil, Welbrig|47.4180716529658|8.41906474285715| 26-304-j19-1| Dietikon, Bahnhof| 5481| 1| 19| 19:39:00| 3| 17|\n",
- "|966.TA.26-70-A-j1...|8591061| 07:00:00| 07:00:00| 9| 0| 0| 8591061|Zürich Leimbach, ...|47.3332523864039|8.51859807635144|26-70-A-j19-1|Zürich, Mittellei...| 3928| 0| 7| 07:00:00| 4| 5|\n",
- "|966.TA.26-70-A-j1...|8591270| 07:02:00| 07:02:00| 10| 0| 0| 8591270| Zürich, Marbachweg|47.3303482449491|8.51537312448101|26-70-A-j19-1|Zürich, Mittellei...| 3928| 0| 7| 07:00:00| 4| 5|\n",
- "|966.TA.26-70-A-j1...|8591210| 07:03:00| 07:03:00| 11| 0| 0| 8591210| Zürich, Im Hüsli|47.3282354882425|8.51269614493396|26-70-A-j19-1|Zürich, Mittellei...| 3928| 0| 7| 07:00:00| 4| 5|\n",
- "|966.TA.26-70-A-j1...|8591370| 07:03:00| 07:03:00| 12| 0| 0| 8591370|Zürich, Sihlweids...|47.3264149182794|8.51466345540645|26-70-A-j19-1|Zürich, Mittellei...| 3928| 0| 7| 07:00:00| 4| 5|\n",
- "|966.TA.26-70-A-j1...|8591278| 07:04:00| 07:04:00| 13| 0| 0| 8591278|Zürich, Mittellei...|47.3231389520848|8.51428616298707|26-70-A-j19-1|Zürich, Mittellei...| 3928| 0| 7| 07:00:00| 4| 5|\n",
- "|269.TA.26-61-j19-...|8591281| 19:57:00| 19:57:00| 1| 0| 0| 8591281| Zürich, Mühlacker|47.4256326325821|8.49799970688372| 26-61-j19-1|Zürich, Schwamend...| 2076| 0| 19| 19:57:00| 5| 2|\n",
- "|269.TA.26-61-j19-...|8591046| 19:58:00| 19:58:00| 2| 0| 0| 8591046| Zürich, Aspholz| 47.425085652811|8.50058685490234| 26-61-j19-1|Zürich, Schwamend...| 2076| 0| 19| 19:57:00| 5| 2|\n",
- "|179.TA.26-703-j19...|8591825| 07:10:00| 07:10:00| 1| 0| 0| 8591825| Benglen, Bodenacher|47.3611288870976|8.63861299832652| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8590504| 07:11:00| 07:11:00| 2| 0| 0| 8590504|Benglen, Gerlisbr...|47.3610862923255|8.63360938219328| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8596005| 07:14:00| 07:14:00| 3| 0| 0| 8596005|Binz bei Maur, Tw...|47.3608915729295| 8.623476385787| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8591832| 07:14:00| 07:14:00| 4| 0| 0| 8591832|Pfaffhausen, Müseren|47.3626987847054|8.61754750491098| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8591147| 07:16:00| 07:16:00| 5| 0| 0| 8591147|Zürich, Friedhof ...|47.3613418604422|8.60282411740221| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8591162| 07:17:00| 07:17:00| 6| 0| 0| 8591162|Zürich, Glockenacker|47.3609767627537|8.59930272148798| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8591261| 07:18:00| 07:18:00| 7| 0| 0| 8591261|Zürich, Loorenstr...|47.3598631991991|8.59452368417579| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8591107| 07:19:00| 07:19:00| 8| 0| 0| 8591107|Zürich, Carl-Spit...|47.3583236436636|8.58659156021591| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|179.TA.26-703-j19...|8591233| 07:25:00| 07:25:00| 9| 0| 0| 8591233| Zürich, Klusplatz|47.3640374201824|8.56649624730736| 26-703-j19-1| Zürich, Klusplatz| 9385| 1| 7| 07:10:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8591825| 07:12:00| 07:12:00| 1| 0| 0| 8591825| Benglen, Bodenacher|47.3611288870976|8.63861299832652| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8590504| 07:13:00| 07:13:00| 2| 0| 0| 8590504|Benglen, Gerlisbr...|47.3610862923255|8.63360938219328| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8596005| 07:16:00| 07:16:00| 3| 0| 0| 8596005|Binz bei Maur, Tw...|47.3608915729295| 8.623476385787| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8591832| 07:16:00| 07:16:00| 4| 0| 0| 8591832|Pfaffhausen, Müseren|47.3626987847054|8.61754750491098| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8591147| 07:18:00| 07:18:00| 5| 0| 0| 8591147|Zürich, Friedhof ...|47.3613418604422|8.60282411740221| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8591162| 07:19:00| 07:19:00| 6| 0| 0| 8591162|Zürich, Glockenacker|47.3609767627537|8.59930272148798| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8591261| 07:20:00| 07:20:00| 7| 0| 0| 8591261|Zürich, Loorenstr...|47.3598631991991|8.59452368417579| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8591107| 07:21:00| 07:21:00| 8| 0| 0| 8591107|Zürich, Carl-Spit...|47.3583236436636|8.58659156021591| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|171.TA.26-703-j19...|8591233| 07:27:00| 07:27:00| 9| 0| 0| 8591233| Zürich, Klusplatz|47.3640374201824|8.56649624730736| 26-703-j19-1| Zürich, Klusplatz| 9346| 1| 7| 07:12:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8591825| 07:25:00| 07:25:00| 1| 0| 0| 8591825| Benglen, Bodenacher|47.3611288870976|8.63861299832652| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8590504| 07:26:00| 07:26:00| 2| 0| 0| 8590504|Benglen, Gerlisbr...|47.3610862923255|8.63360938219328| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8596005| 07:29:00| 07:29:00| 3| 0| 0| 8596005|Binz bei Maur, Tw...|47.3608915729295| 8.623476385787| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8591832| 07:29:00| 07:29:00| 4| 0| 0| 8591832|Pfaffhausen, Müseren|47.3626987847054|8.61754750491098| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8591147| 07:31:00| 07:31:00| 5| 0| 0| 8591147|Zürich, Friedhof ...|47.3613418604422|8.60282411740221| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8591162| 07:32:00| 07:32:00| 6| 0| 0| 8591162|Zürich, Glockenacker|47.3609767627537|8.59930272148798| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8591261| 07:33:00| 07:33:00| 7| 0| 0| 8591261|Zürich, Loorenstr...|47.3598631991991|8.59452368417579| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8591107| 07:34:00| 07:34:00| 8| 0| 0| 8591107|Zürich, Carl-Spit...|47.3583236436636|8.58659156021591| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|155.TA.26-703-j19...|8591233| 07:40:00| 07:40:00| 9| 0| 0| 8591233| Zürich, Klusplatz|47.3640374201824|8.56649624730736| 26-703-j19-1| Zürich, Klusplatz| 9267| 1| 7| 07:25:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8591825| 07:27:00| 07:27:00| 1| 0| 0| 8591825| Benglen, Bodenacher|47.3611288870976|8.63861299832652| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8590504| 07:28:00| 07:28:00| 2| 0| 0| 8590504|Benglen, Gerlisbr...|47.3610862923255|8.63360938219328| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8596005| 07:31:00| 07:31:00| 3| 0| 0| 8596005|Binz bei Maur, Tw...|47.3608915729295| 8.623476385787| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8591832| 07:31:00| 07:31:00| 4| 0| 0| 8591832|Pfaffhausen, Müseren|47.3626987847054|8.61754750491098| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8591147| 07:33:00| 07:33:00| 5| 0| 0| 8591147|Zürich, Friedhof ...|47.3613418604422|8.60282411740221| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8591162| 07:34:00| 07:34:00| 6| 0| 0| 8591162|Zürich, Glockenacker|47.3609767627537|8.59930272148798| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8591261| 07:35:00| 07:35:00| 7| 0| 0| 8591261|Zürich, Loorenstr...|47.3598631991991|8.59452368417579| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8591107| 07:36:00| 07:36:00| 8| 0| 0| 8591107|Zürich, Carl-Spit...|47.3583236436636|8.58659156021591| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|144.TA.26-703-j19...|8591233| 07:42:00| 07:42:00| 9| 0| 0| 8591233| Zürich, Klusplatz|47.3640374201824|8.56649624730736| 26-703-j19-1| Zürich, Klusplatz| 9231| 1| 7| 07:27:00| 6| 9|\n",
- "|120.TA.26-703-j19...|8591825| 07:40:00| 07:40:00| 1| 0| 0| 8591825| Benglen, Bodenacher|47.3611288870976|8.63861299832652| 26-703-j19-1| Zürich, Klusplatz| 9159| 1| 7| 07:40:00| 6| 9|\n",
- "|120.TA.26-703-j19...|8590504| 07:41:00| 07:41:00| 2| 0| 0| 8590504|Benglen, Gerlisbr...|47.3610862923255|8.63360938219328| 26-703-j19-1| Zürich, Klusplatz| 9159| 1| 7| 07:40:00| 6| 9|\n",
- "|120.TA.26-703-j19...|8596005| 07:44:00| 07:44:00| 3| 0| 0| 8596005|Binz bei Maur, Tw...|47.3608915729295| 8.623476385787| 26-703-j19-1| Zürich, Klusplatz| 9159| 1| 7| 07:40:00| 6| 9|\n",
- "|120.TA.26-703-j19...|8591832| 07:44:00| 07:44:00| 4| 0| 0| 8591832|Pfaffhausen, Müseren|47.3626987847054|8.61754750491098| 26-703-j19-1| Zürich, Klusplatz| 9159| 1| 7| 07:40:00| 6| 9|\n",
- "+--------------------+-------+------------+--------------+-------------+-----------+-------------+---------------+--------------------+----------------+----------------+-------------+--------------------+---------------+------------+--------------+--------------------+---------+----------+\n",
- "only showing top 100 rows"
- ]
- }
- ],
- "source": [
- "stop_times=stop_times.sort(stop_times.route_int.cast('int'), \n",
- " stop_times.departure_first_stop, \n",
- " stop_times.trip_id, \n",
- " stop_times.stop_sequence.cast('int'))\n",
- "stop_times.show(100)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 63,
- "metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times.write.csv('data/lgpt_guys/stop_times_with_route_int.csv', header=True, mode='overwrite')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Generating an index from 0 to n_stops-1 for stops:\n",
"\n",
"In RAPTOR, stops are indexed (in an arbitrary order) from 0 to the number of stops minus one. We generate this index below."
]
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times = spark.read.csv('data/lgpt_guys/stop_times_with_route_int.csv', header=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many unique routes do we find ?"
]
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1461"
]
}
],
"source": [
"stop_times.select(stop_times.route_int).dropDuplicates().count()"
]
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------+---------------+\n",
"|stop_int|stop_id_general|\n",
"+--------+---------------+\n",
- "| 0| 8503088|\n",
+ "| 0| 8503376|\n",
"| 1| 8502508|\n",
- "| 2| 8591190|\n",
- "| 3| 8591284|\n",
- "| 4| 8503376|\n",
- "| 5| 8503078|\n",
- "| 6| 8590819|\n",
+ "| 2| 8503088|\n",
+ "| 3| 8589111|\n",
+ "| 4| 8591284|\n",
+ "| 5| 8591190|\n",
+ "| 6| 8503078|\n",
"| 7| 8587967|\n",
- "| 8| 8589111|\n",
+ "| 8| 8590819|\n",
"| 9| 8591362|\n",
"| 10| 8591149|\n",
"| 11| 8591315|\n",
"| 12| 8588312|\n",
"| 13| 8590541|\n",
"| 14| 8590804|\n",
- "| 15| 8591165|\n",
+ "| 15| 8591085|\n",
"| 16| 8590273|\n",
- "| 17| 8591080|\n",
- "| 18| 8591271|\n",
- "| 19| 8591053|\n",
+ "| 17| 8591271|\n",
+ "| 18| 8591165|\n",
+ "| 19| 8591080|\n",
"+--------+---------------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stops_general_indexed = dfZipWithIndex(stop_times.select(stop_times.stop_id_general).dropDuplicates(),\n",
" 0,\n",
" 'stop_int')\n",
"stops_general_indexed.show()"
]
},
{
"cell_type": "code",
- "execution_count": 67,
+ "execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stops_general_indexed.write.csv('data/lgpt_guys/stops_general_indexed.csv', header=True, mode='overwrite')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we add this index to `stop_times` and drop columns we won't be using anymore:\n",
"- `pickup_type`\n",
"- `drop_off_type`\n",
"- `departure_hour`\n",
"\n",
"Note that spark does not maintain order after joins, therefore we will need to reorder stop_times after all the processing is done."
]
},
{
"cell_type": "code",
- "execution_count": 68,
+ "execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stops_general_indexed = spark.read.csv('data/lgpt_guys/stops_general_indexed.csv', header=True)"
]
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "+---------------+--------------------+------------+------------+--------------+-------------+--------------------+----------------+----------------+------------+----------------+---------------+------------+--------------------+---------+----------+--------+\n",
- "|stop_id_general| trip_id| stop_id|arrival_time|departure_time|stop_sequence| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|\n",
- "+---------------+--------------------+------------+------------+--------------+-------------+--------------------+----------------+----------------+------------+----------------+---------------+------------+--------------------+---------+----------+--------+\n",
- "| 8503086|127.TA.26-4-B-j19...| 8503086| 08:14:00| 08:14:00| 5| Zürich Brunau| 47.352122370277|8.52623375626752|26-4-B-j19-1|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 1373|\n",
- "| 8503093|127.TA.26-4-B-j19...| 8503093| 08:17:00| 08:17:00| 6| Zürich Manegg|47.3383783847121|8.51967605469254|26-4-B-j19-1|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 510|\n",
- "| 8503094|127.TA.26-4-B-j19...| 8503094| 08:18:00| 08:18:00| 7| Zürich Leimbach|47.3346039449498| 8.5196041894698|26-4-B-j19-1|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 533|\n",
- "| 8503095|127.TA.26-4-B-j19...| 8503095| 08:21:00| 08:21:00| 8| Sood-Oberleimbach|47.3195704752201|8.52143675264966|26-4-B-j19-1|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 1343|\n",
- "| 8503096|127.TA.26-4-B-j19...| 8503096| 08:23:00| 08:24:00| 9| Adliswil|47.3123048960724|8.52416763111376|26-4-B-j19-1|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 239|\n",
- "| 8503097|127.TA.26-4-B-j19...| 8503097| 08:30:00| 08:30:00| 10| Langnau-Gattikon|47.2868766420081|8.54402039889557|26-4-B-j19-1|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 669|\n",
- "| 8503088|128.TA.26-4-B-j19...|8503088:0:21| 08:28:00| 08:28:00| 1| Zürich HB SZU|47.3775557344462|8.53916949636064|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 0|\n",
- "| 8503090|128.TA.26-4-B-j19...| 8503090| 08:29:00| 08:29:00| 2| Zürich Selnau|47.3729384820921|8.53203687300374|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 48|\n",
- "| 8503091|128.TA.26-4-B-j19...| 8503091| 08:31:00| 08:31:00| 3| Zürich Giesshübel|47.3624553927874|8.52184997768041|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 114|\n",
- "| 8503087|128.TA.26-4-B-j19...| 8503087| 08:33:00| 08:33:00| 4|Zürich Saalsporth...| 47.357404757095|8.52214642172421|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 262|\n",
- "| 8503086|128.TA.26-4-B-j19...| 8503086| 08:34:00| 08:34:00| 5| Zürich Brunau| 47.352122370277|8.52623375626752|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 1373|\n",
- "| 8503093|128.TA.26-4-B-j19...| 8503093| 08:37:00| 08:37:00| 6| Zürich Manegg|47.3383783847121|8.51967605469254|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 510|\n",
- "| 8503094|128.TA.26-4-B-j19...| 8503094| 08:38:00| 08:38:00| 7| Zürich Leimbach|47.3346039449498| 8.5196041894698|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 533|\n",
- "| 8503095|128.TA.26-4-B-j19...| 8503095| 08:41:00| 08:41:00| 8| Sood-Oberleimbach|47.3195704752201|8.52143675264966|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 1343|\n",
- "| 8503096|128.TA.26-4-B-j19...| 8503096| 08:43:00| 08:44:00| 9| Adliswil|47.3123048960724|8.52416763111376|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 239|\n",
- "| 8503097|128.TA.26-4-B-j19...| 8503097| 08:50:00| 08:50:00| 10| Langnau-Gattikon|47.2868766420081|8.54402039889557|26-4-B-j19-1|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 669|\n",
- "| 8503855|294.TA.26-134-j19...| 8503855:0:B| 07:04:00| 07:04:00| 1| Horgen, Bahnhof|47.2619787396278|8.59697608490178|26-134-j19-1|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 49|\n",
- "| 8577912|294.TA.26-134-j19...| 8577912| 07:06:00| 07:06:00| 2|Horgen, untere Mühle|47.2591865186515|8.59809897900709|26-134-j19-1|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 247|\n",
- "| 8590663|294.TA.26-134-j19...| 8590663| 07:07:00| 07:07:00| 3| Horgen, Wannenthal|47.2565465760621|8.60232106084304|26-134-j19-1|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 1334|\n",
- "| 8590661|294.TA.26-134-j19...| 8590661| 07:08:00| 07:08:00| 4| Horgen, Teufenbach| 47.256253918433|8.60507888876567|26-134-j19-1|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 356|\n",
- "+---------------+--------------------+------------+------------+--------------+-------------+--------------------+----------------+----------------+------------+----------------+---------------+------------+--------------------+---------+----------+--------+\n",
+ "+---------------+--------------------+-------+------------+--------------+-------------+--------------------+----------------+----------------+------------+--------------------+---------------+------------+--------------------+---------+----------+--------+\n",
+ "|stop_id_general| trip_id|stop_id|arrival_time|departure_time|stop_sequence| stop_name| stop_lat| stop_lon| route_id| trip_headsign|trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|\n",
+ "+---------------+--------------------+-------+------------+--------------+-------------+--------------------+----------------+----------------+------------+--------------------+---------------+------------+--------------------+---------+----------+--------+\n",
+ "| 8590679|610.TA.26-185-j19...|8590679| 10:56:00| 10:56:00| 5|Kilchberg ZH, Spital|47.3217079365566|8.53537860586113|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 65|\n",
+ "| 8590468|610.TA.26-185-j19...|8590468| 10:58:00| 10:58:00| 6| Adliswil, Eichenweg|47.3200028461711|8.53368977312675|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1171|\n",
+ "| 8590477|610.TA.26-185-j19...|8590477| 10:59:00| 10:59:00| 7| Adliswil, Moos|47.3254528526241|8.53104872619107|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 15|\n",
+ "| 8591388|610.TA.26-185-j19...|8591388| 11:00:00| 11:00:00| 8| Zürich, Sunnau|47.3270847004784|8.52961142173628|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1234|\n",
+ "| 8591111|610.TA.26-185-j19...|8591111| 11:02:00| 11:02:00| 9|Zürich, Dangelstr...|47.3345491527085|8.52997973100282|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 433|\n",
+ "| 8591439|610.TA.26-185-j19...|8591439| 11:03:00| 11:03:00| 10| Zürich, Wollishofen|47.3384392605619|8.53015939405967|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 552|\n",
+ "| 8591106|610.TA.26-185-j19...|8591106| 11:03:00| 11:03:00| 11|Zürich, Butzenstr...|47.3414099167461|8.53031210765799|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1037|\n",
+ "| 8591279|610.TA.26-185-j19...|8591279| 11:05:00| 11:05:00| 12| Zürich, Morgental|47.3439482343686|8.53014142775399|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1349|\n",
+ "| 8591304|610.TA.26-185-j19...|8591304| 11:06:00| 11:06:00| 13|Zürich, Post Woll...|47.3444717091534|8.53296213774651|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 982|\n",
+ "| 8502495|610.TA.26-185-j19...|8502495| 11:07:00| 11:07:00| 14|Zürich Wollishofe...|47.3476976601166|8.53331248070737|26-185-j19-1|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 116|\n",
+ "| 8591036|610.TA.26-3-A-j19...|8591036| 08:18:00| 08:18:00| 1| Zürich, Albisrieden|47.3743863596743|8.48478548905248|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 1356|\n",
+ "| 8591126|610.TA.26-3-A-j19...|8591126| 08:20:00| 08:20:00| 2|Zürich, Fellenber...|47.3757186152716|8.48841468280083|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 744|\n",
+ "| 8591363|610.TA.26-3-A-j19...|8591363| 08:21:00| 08:21:00| 3| Zürich, Siemens|47.3785837632719|8.49449627727516|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 264|\n",
+ "| 8591203|610.TA.26-3-A-j19...|8591203| 08:23:00| 08:23:00| 4| Zürich, Hubertus|47.3768744261929|8.49947294394988|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 569|\n",
+ "| 8591236|610.TA.26-3-A-j19...|8591236| 08:24:00| 08:24:00| 5|Zürich, Krematori...|47.3778051397306|8.50787219185756|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 368|\n",
+ "| 8591038|610.TA.26-3-A-j19...|8591038| 08:25:00| 08:25:00| 6|Zürich, Albisried...|47.3782127019246|8.51039645780629|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 1190|\n",
+ "| 8591448|610.TA.26-3-A-j19...|8591448| 08:26:00| 08:26:00| 7|Zürich, Zypressen...|47.3766919314159|8.51376514012221|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 773|\n",
+ "| 8591259|610.TA.26-3-A-j19...|8591259| 08:28:00| 08:28:00| 8| Zürich, Lochergut|47.3753475337612|8.51791535673542|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 653|\n",
+ "| 8591218|610.TA.26-3-A-j19...|8591218| 08:29:00| 08:29:00| 9|Zürich,Kalkbreite...|47.3745992794953|8.52100556131322|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 247|\n",
+ "| 8591079|610.TA.26-3-A-j19...|8591079| 08:30:00| 08:30:00| 10|Zürich, Bezirksge...|47.3741673555329|8.52556001980434|26-3-A-j19-1| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 1062|\n",
+ "+---------------+--------------------+-------+------------+--------------+-------------+--------------------+----------------+----------------+------------+--------------------+---------------+------------+--------------------+---------+----------+--------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times = stop_times.join(stops_general_indexed, how='inner', on='stop_id_general')\\\n",
".drop('pickup_type', 'drop_off_type', 'departure_hour')\n",
"stop_times.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding transport types to stop_times from routes.txt"
]
},
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+---------+----------------+---------------+----------+----------+\n",
"| route_id|agency_id|route_short_name|route_long_name|route_desc|route_type|\n",
"+-----------+---------+----------------+---------------+----------+----------+\n",
"|11-40-j19-1| 801| 040| null| Bus| 700|\n",
"|11-61-j19-1| 7031| 061| null| Bus| 700|\n",
"|11-62-j19-1| 7031| 062| null| Bus| 700|\n",
"|24-64-j19-1| 801| 064| null| Bus| 700|\n",
"|11-83-j19-1| 801| 083| null| Bus| 700|\n",
"|1-1-B-j19-1| 11| 1| null| S-Bahn| 400|\n",
"|1-1-A-j19-1| 11| 1| null| S-Bahn| 400|\n",
"|1-1-C-j19-1| 723| 1| null| Bus| 700|\n",
"|1-1-D-j19-1| 840| 1| null| Bus| 700|\n",
"|1-1-E-j19-1| 886| 1| null| Bus| 700|\n",
"| 1-1-j19-1| 11| 1| null| Intercity| 102|\n",
"| 4-1-j19-1| 11| 1| null| S-Bahn| 400|\n",
"| 5-1-j19-1| 823| 1| null| Tram| 900|\n",
"|6-1-A-j19-1| 146| 1| null| Bus| 700|\n",
"|6-1-B-j19-1| 33| 1| null| S-Bahn| 400|\n",
"|6-1-C-j19-1| 801| 1| null| Bus| 700|\n",
"|6-1-D-j19-1| 889| 1| null| Bus| 700|\n",
"|6-1-E-j19-1| 889| 1| null| Bus| 700|\n",
"| 6-1-j19-1| 11| 1| null| Intercity| 102|\n",
"|8-1-A-j19-1| 834| 1| null| Bus| 700|\n",
"+-----------+---------+----------------+---------------+----------+----------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"routes = spark.read.csv(\"/data/sbb/timetables/csv/routes/2019/05/14/routes.txt\", header=True, sep = \",\")\n",
"routes.show()"
]
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+----------+\n",
"| route_id|route_desc|\n",
"+-----------+----------+\n",
"|11-40-j19-1| Bus|\n",
"|11-61-j19-1| Bus|\n",
"|11-62-j19-1| Bus|\n",
"|24-64-j19-1| Bus|\n",
"|11-83-j19-1| Bus|\n",
"|1-1-B-j19-1| S-Bahn|\n",
"|1-1-A-j19-1| S-Bahn|\n",
"|1-1-C-j19-1| Bus|\n",
"|1-1-D-j19-1| Bus|\n",
"|1-1-E-j19-1| Bus|\n",
"| 1-1-j19-1| Intercity|\n",
"| 4-1-j19-1| S-Bahn|\n",
"| 5-1-j19-1| Tram|\n",
"|6-1-A-j19-1| Bus|\n",
"|6-1-B-j19-1| S-Bahn|\n",
"|6-1-C-j19-1| Bus|\n",
"|6-1-D-j19-1| Bus|\n",
"|6-1-E-j19-1| Bus|\n",
"| 6-1-j19-1| Intercity|\n",
"|8-1-A-j19-1| Bus|\n",
"+-----------+----------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"routes_for_join = routes.select(routes.route_id, routes.route_desc)\n",
"routes_for_join.show()"
]
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "+------------+---------------+--------------------+------------+------------+--------------+-------------+--------------------+----------------+----------------+----------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "| route_id|stop_id_general| trip_id| stop_id|arrival_time|departure_time|stop_sequence| stop_name| stop_lat| stop_lon| trip_headsign|trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|\n",
- "+------------+---------------+--------------------+------------+------------+--------------+-------------+--------------------+----------------+----------------+----------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "|26-4-B-j19-1| 8503086|127.TA.26-4-B-j19...| 8503086| 08:14:00| 08:14:00| 5| Zürich Brunau| 47.352122370277|8.52623375626752|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 1373| S-Bahn|\n",
- "|26-4-B-j19-1| 8503093|127.TA.26-4-B-j19...| 8503093| 08:17:00| 08:17:00| 6| Zürich Manegg|47.3383783847121|8.51967605469254|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 510| S-Bahn|\n",
- "|26-4-B-j19-1| 8503094|127.TA.26-4-B-j19...| 8503094| 08:18:00| 08:18:00| 7| Zürich Leimbach|47.3346039449498| 8.5196041894698|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 533| S-Bahn|\n",
- "|26-4-B-j19-1| 8503095|127.TA.26-4-B-j19...| 8503095| 08:21:00| 08:21:00| 8| Sood-Oberleimbach|47.3195704752201|8.52143675264966|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 1343| S-Bahn|\n",
- "|26-4-B-j19-1| 8503096|127.TA.26-4-B-j19...| 8503096| 08:23:00| 08:24:00| 9| Adliswil|47.3123048960724|8.52416763111376|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 239| S-Bahn|\n",
- "|26-4-B-j19-1| 8503097|127.TA.26-4-B-j19...| 8503097| 08:30:00| 08:30:00| 10| Langnau-Gattikon|47.2868766420081|8.54402039889557|Langnau-Gattikon| 12473| 0| 08:08:00| 1316| 10| 669| S-Bahn|\n",
- "|26-4-B-j19-1| 8503088|128.TA.26-4-B-j19...|8503088:0:21| 08:28:00| 08:28:00| 1| Zürich HB SZU|47.3775557344462|8.53916949636064|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 0| S-Bahn|\n",
- "|26-4-B-j19-1| 8503090|128.TA.26-4-B-j19...| 8503090| 08:29:00| 08:29:00| 2| Zürich Selnau|47.3729384820921|8.53203687300374|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 48| S-Bahn|\n",
- "|26-4-B-j19-1| 8503091|128.TA.26-4-B-j19...| 8503091| 08:31:00| 08:31:00| 3| Zürich Giesshübel|47.3624553927874|8.52184997768041|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 114| S-Bahn|\n",
- "|26-4-B-j19-1| 8503087|128.TA.26-4-B-j19...| 8503087| 08:33:00| 08:33:00| 4|Zürich Saalsporth...| 47.357404757095|8.52214642172421|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 262| S-Bahn|\n",
- "|26-4-B-j19-1| 8503086|128.TA.26-4-B-j19...| 8503086| 08:34:00| 08:34:00| 5| Zürich Brunau| 47.352122370277|8.52623375626752|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 1373| S-Bahn|\n",
- "|26-4-B-j19-1| 8503093|128.TA.26-4-B-j19...| 8503093| 08:37:00| 08:37:00| 6| Zürich Manegg|47.3383783847121|8.51967605469254|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 510| S-Bahn|\n",
- "|26-4-B-j19-1| 8503094|128.TA.26-4-B-j19...| 8503094| 08:38:00| 08:38:00| 7| Zürich Leimbach|47.3346039449498| 8.5196041894698|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 533| S-Bahn|\n",
- "|26-4-B-j19-1| 8503095|128.TA.26-4-B-j19...| 8503095| 08:41:00| 08:41:00| 8| Sood-Oberleimbach|47.3195704752201|8.52143675264966|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 1343| S-Bahn|\n",
- "|26-4-B-j19-1| 8503096|128.TA.26-4-B-j19...| 8503096| 08:43:00| 08:44:00| 9| Adliswil|47.3123048960724|8.52416763111376|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 239| S-Bahn|\n",
- "|26-4-B-j19-1| 8503097|128.TA.26-4-B-j19...| 8503097| 08:50:00| 08:50:00| 10| Langnau-Gattikon|47.2868766420081|8.54402039889557|Langnau-Gattikon| 12477| 0| 08:28:00| 1316| 10| 669| S-Bahn|\n",
- "|26-134-j19-1| 8503855|294.TA.26-134-j19...| 8503855:0:B| 07:04:00| 07:04:00| 1| Horgen, Bahnhof|47.2619787396278|8.59697608490178|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 49| Bus|\n",
- "|26-134-j19-1| 8577912|294.TA.26-134-j19...| 8577912| 07:06:00| 07:06:00| 2|Horgen, untere Mühle|47.2591865186515|8.59809897900709|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 247| Bus|\n",
- "|26-134-j19-1| 8590663|294.TA.26-134-j19...| 8590663| 07:07:00| 07:07:00| 3| Horgen, Wannenthal|47.2565465760621|8.60232106084304|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 1334| Bus|\n",
- "|26-134-j19-1| 8590661|294.TA.26-134-j19...| 8590661| 07:08:00| 07:08:00| 4| Horgen, Teufenbach| 47.256253918433|8.60507888876567|Horgen, Risi/Dow| 2960| 0| 07:04:00| 1317| 6| 356| Bus|\n",
- "+------------+---------------+--------------------+------------+------------+--------------+-------------+--------------------+----------------+----------------+----------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
+ "+------------+---------------+--------------------+-------+------------+--------------+-------------+--------------------+----------------+----------------+--------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
+ "| route_id|stop_id_general| trip_id|stop_id|arrival_time|departure_time|stop_sequence| stop_name| stop_lat| stop_lon| trip_headsign|trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|\n",
+ "+------------+---------------+--------------------+-------+------------+--------------+-------------+--------------------+----------------+----------------+--------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
+ "|26-185-j19-1| 8590679|610.TA.26-185-j19...|8590679| 10:56:00| 10:56:00| 5|Kilchberg ZH, Spital|47.3217079365566|8.53537860586113|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 65| Bus|\n",
+ "|26-185-j19-1| 8590468|610.TA.26-185-j19...|8590468| 10:58:00| 10:58:00| 6| Adliswil, Eichenweg|47.3200028461711|8.53368977312675|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1171| Bus|\n",
+ "|26-185-j19-1| 8590477|610.TA.26-185-j19...|8590477| 10:59:00| 10:59:00| 7| Adliswil, Moos|47.3254528526241|8.53104872619107|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 15| Bus|\n",
+ "|26-185-j19-1| 8591388|610.TA.26-185-j19...|8591388| 11:00:00| 11:00:00| 8| Zürich, Sunnau|47.3270847004784|8.52961142173628|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1234| Bus|\n",
+ "|26-185-j19-1| 8591111|610.TA.26-185-j19...|8591111| 11:02:00| 11:02:00| 9|Zürich, Dangelstr...|47.3345491527085|8.52997973100282|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 433| Bus|\n",
+ "|26-185-j19-1| 8591439|610.TA.26-185-j19...|8591439| 11:03:00| 11:03:00| 10| Zürich, Wollishofen|47.3384392605619|8.53015939405967|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 552| Bus|\n",
+ "|26-185-j19-1| 8591106|610.TA.26-185-j19...|8591106| 11:03:00| 11:03:00| 11|Zürich, Butzenstr...|47.3414099167461|8.53031210765799|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1037| Bus|\n",
+ "|26-185-j19-1| 8591279|610.TA.26-185-j19...|8591279| 11:05:00| 11:05:00| 12| Zürich, Morgental|47.3439482343686|8.53014142775399|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 1349| Bus|\n",
+ "|26-185-j19-1| 8591304|610.TA.26-185-j19...|8591304| 11:06:00| 11:06:00| 13|Zürich, Post Woll...|47.3444717091534|8.53296213774651|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 982| Bus|\n",
+ "|26-185-j19-1| 8502495|610.TA.26-185-j19...|8502495| 11:07:00| 11:07:00| 14|Zürich Wollishofe...|47.3476976601166|8.53331248070737|Zürich Wollishofe...| 9270| 1| 10:51:00| 21| 14| 116| Bus|\n",
+ "|26-3-A-j19-1| 8591036|610.TA.26-3-A-j19...|8591036| 08:18:00| 08:18:00| 1| Zürich, Albisrieden|47.3743863596743|8.48478548905248| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 1356| Tram|\n",
+ "|26-3-A-j19-1| 8591126|610.TA.26-3-A-j19...|8591126| 08:20:00| 08:20:00| 2|Zürich, Fellenber...|47.3757186152716|8.48841468280083| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 744| Tram|\n",
+ "|26-3-A-j19-1| 8591363|610.TA.26-3-A-j19...|8591363| 08:21:00| 08:21:00| 3| Zürich, Siemens|47.3785837632719|8.49449627727516| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 264| Tram|\n",
+ "|26-3-A-j19-1| 8591203|610.TA.26-3-A-j19...|8591203| 08:23:00| 08:23:00| 4| Zürich, Hubertus|47.3768744261929|8.49947294394988| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 569| Tram|\n",
+ "|26-3-A-j19-1| 8591236|610.TA.26-3-A-j19...|8591236| 08:24:00| 08:24:00| 5|Zürich, Krematori...|47.3778051397306|8.50787219185756| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 368| Tram|\n",
+ "|26-3-A-j19-1| 8591038|610.TA.26-3-A-j19...|8591038| 08:25:00| 08:25:00| 6|Zürich, Albisried...|47.3782127019246|8.51039645780629| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 1190| Tram|\n",
+ "|26-3-A-j19-1| 8591448|610.TA.26-3-A-j19...|8591448| 08:26:00| 08:26:00| 7|Zürich, Zypressen...|47.3766919314159|8.51376514012221| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 773| Tram|\n",
+ "|26-3-A-j19-1| 8591259|610.TA.26-3-A-j19...|8591259| 08:28:00| 08:28:00| 8| Zürich, Lochergut|47.3753475337612|8.51791535673542| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 653| Tram|\n",
+ "|26-3-A-j19-1| 8591218|610.TA.26-3-A-j19...|8591218| 08:29:00| 08:29:00| 9|Zürich,Kalkbreite...|47.3745992794953|8.52100556131322| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 247| Tram|\n",
+ "|26-3-A-j19-1| 8591079|610.TA.26-3-A-j19...|8591079| 08:30:00| 08:30:00| 10|Zürich, Bezirksge...|47.3741673555329|8.52556001980434| Zürich, Klusplatz| 1059| 0| 08:18:00| 1282| 21| 1062| Tram|\n",
+ "+------------+---------------+--------------------+-------+------------+--------------+-------------+--------------------+----------------+----------------+--------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times = stop_times.join(routes_for_join, how='inner', on='route_id')\n",
"stop_times.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## VERY IMPORTANT: final sort before saving to csv"
+ "## VERY IMPORTANT: final sort before writing to csv"
]
},
{
"cell_type": "code",
- "execution_count": 73,
+ "execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "|route_id |stop_id_general|trip_id |stop_id|arrival_time|departure_time|stop_sequence|stop_name |stop_lat |stop_lon |trip_headsign |trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|\n",
- "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "|26-10-j19-1 |8573205 |1672.TA.26-10-j19-1.11.R|8573205|07:00:00 |07:01:00 |27 |Zürich Flughafen, Bahnhof |47.4504413038344|8.56372943623189|Zürich Flughafen, Fracht |4096 |1 |07:01:00 |0 |2 |298 |Tram |\n",
- "|26-10-j19-1 |8588553 |1672.TA.26-10-j19-1.11.R|8588553|07:02:00 |07:02:00 |28 |Zürich Flughafen, Fracht |47.4524944976638|8.57205681891684|Zürich Flughafen, Fracht |4096 |1 |07:01:00 |0 |2 |1295 |Tram |\n",
- "|26-13-j19-1 |8576240 |2064.TA.26-13-j19-1.24.H|8576240|07:00:00 |07:00:00 |5 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1222 |Tram |\n",
- "|26-13-j19-1 |8591353 |2064.TA.26-13-j19-1.24.H|8591353|07:01:00 |07:01:00 |6 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |816 |Tram |\n",
- "|26-13-j19-1 |8591039 |2064.TA.26-13-j19-1.24.H|8591039|07:02:00 |07:02:00 |7 |Zürich, Alte Trotte |47.3977659017765|8.50725235431143|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |778 |Tram |\n",
- "|26-13-j19-1 |8591121 |2064.TA.26-13-j19-1.24.H|8591121|07:03:00 |07:03:00 |8 |Zürich, Eschergutweg |47.3962700189648|8.51204037477646|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |313 |Tram |\n",
- "|26-13-j19-1 |8591417 |2064.TA.26-13-j19-1.24.H|8591417|07:05:00 |07:05:00 |9 |Zürich, Waidfussweg |47.3954977376399|8.51840044698891|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |350 |Tram |\n",
- "|26-13-j19-1 |8591437 |2064.TA.26-13-j19-1.24.H|8591437|07:06:00 |07:06:00 |10 |Zürich, Wipkingerplatz |47.3925909395293|8.52357474302616|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1019 |Tram |\n",
- "|26-13-j19-1 |8580522 |2064.TA.26-13-j19-1.24.H|8580522|07:08:00 |07:08:00 |11 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |454 |Tram |\n",
- "|26-13-j19-1 |8591110 |2064.TA.26-13-j19-1.24.H|8591110|07:09:00 |07:09:00 |12 |Zürich, Dammweg |47.3884919601296|8.52639545301869|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1101 |Tram |\n",
- "|26-13-j19-1 |8591306 |2064.TA.26-13-j19-1.24.H|8591306|07:10:00 |07:10:00 |13 |Zürich, Quellenstrasse |47.3867403702341|8.52874903906341|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |786 |Tram |\n",
- "|26-13-j19-1 |8591257 |2064.TA.26-13-j19-1.24.H|8591257|07:11:00 |07:11:00 |14 |Zürich, Limmatplatz |47.3845994590919|8.53162364797299|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |388 |Tram |\n",
- "|26-13-j19-1 |8591282 |2064.TA.26-13-j19-1.24.H|8591282|07:12:00 |07:12:00 |15 |Zürich, Museum für Gestaltung |47.3821239221899|8.53493843137185|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |138 |Tram |\n",
- "|26-13-j19-1 |8591368 |2064.TA.26-13-j19-1.24.H|8591368|07:14:00 |07:14:00 |16 |Zürich, Sihlquai/HB |47.3798733332196|8.53760642776606|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |880 |Tram |\n",
- "|26-13-j19-1 |8587349 |2064.TA.26-13-j19-1.24.H|8587349|07:16:00 |07:16:00 |17 |Zürich, Bahnhofquai/HB |47.3775618175159|8.54173867807358|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |602 |Tram |\n",
- "|26-13-j19-1 |8591067 |2064.TA.26-13-j19-1.24.H|8591067|07:18:00 |07:18:00 |18 |Zürich, Bahnhofstrasse/HB |47.3765581015114|8.53994204750509|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |616 |Tram |\n",
- "|26-13-j19-1 |8591316 |2064.TA.26-13-j19-1.24.H|8591316|07:20:00 |07:20:00 |19 |Zürich, Rennweg |47.3730662375955|8.53845982728609|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |375 |Tram |\n",
- "|26-13-j19-1 |8591299 |2064.TA.26-13-j19-1.24.H|8591299|07:22:00 |07:22:00 |20 |Zürich, Paradeplatz |47.3693672863583|8.53876525448273|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1221 |Tram |\n",
- "|26-13-j19-1 |8591384 |2064.TA.26-13-j19-1.24.H|8591384|07:23:00 |07:23:00 |21 |Zürich, Stockerstrasse |47.3677002399791|8.53501029659459|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |871 |Tram |\n",
- "|26-13-j19-1 |8591404 |2064.TA.26-13-j19-1.24.H|8591404|07:24:00 |07:24:00 |22 |Zürich, Tunnelstrasse |47.3661426599847|8.53253094641008|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |888 |Tram |\n",
- "|26-13-j19-1 |8591059 |2064.TA.26-13-j19-1.24.H|8591059|07:25:00 |07:25:00 |23 |Zürich Enge, Bahnhof/Bederstr.|47.3645546111557|8.53045583810347|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |175 |Tram |\n",
- "|26-13-j19-1 |8591415 |2064.TA.26-13-j19-1.24.H|8591415|07:27:00 |07:27:00 |24 |Zürich, Waffenplatzstrasse |47.3614818138862|8.52574866601403|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1267 |Tram |\n",
- "|26-13-j19-1 |8591366 |2064.TA.26-13-j19-1.24.H|8591366|07:28:00 |07:28:00 |25 |Zürich, Sihlcity Nord |47.3600640074787|8.52303575385561|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |968 |Tram |\n",
- "|26-13-j19-1 |8591329 |2064.TA.26-13-j19-1.24.H|8591329|07:29:00 |07:29:00 |26 |Zürich, Saalsporthalle |47.3578611597087|8.52040369007277|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1236 |Tram |\n",
- "|26-13-j19-1 |8591245 |2064.TA.26-13-j19-1.24.H|8591245|07:30:00 |07:30:00 |27 |Zürich, Laubegg |47.3587313564196|8.51708890667391|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1215 |Tram |\n",
- "|26-13-j19-1 |8591405 |2064.TA.26-13-j19-1.24.H|8591405|07:32:00 |07:32:00 |28 |Zürich, Uetlihof |47.3567353594536|8.51396276948474|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |847 |Tram |\n",
- "|26-13-j19-1 |8591385 |2064.TA.26-13-j19-1.24.H|8591385|07:33:00 |07:33:00 |29 |Zürich, Strassenverkehrsamt |47.3530717783138|8.51171698127413|Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |272 |Tram |\n",
- "|26-13-j19-1 |8591034 |2064.TA.26-13-j19-1.24.H|8591034|07:34:00 |07:34:00 |30 |Zürich, Albisgütli |47.3519945640447|8.5077104951064 |Zürich, Albisgütli |1831 |0 |07:00:00 |1 |26 |1352 |Tram |\n",
- "|26-11-A-j19-1|8591049 |791.TA.26-11-A-j19-1.3.H|8591049|19:49:00 |19:49:00 |1 |Zürich, Auzelg |47.4166918393693|8.568113214819 |Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |1117 |Tram |\n",
- "|26-11-A-j19-1|8591128 |791.TA.26-11-A-j19-1.3.H|8591128|19:51:00 |19:51:00 |2 |Zürich, Fernsehstudio |47.4181749855684|8.56174415945371|Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |152 |Tram |\n",
- "|26-11-A-j19-1|8591830 |791.TA.26-11-A-j19-1.3.H|8591830|19:52:00 |19:52:00 |3 |Glattpark, Glattpark |47.4199559214972|8.55716275150406|Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |672 |Tram |\n",
- "|26-11-A-j19-1|8591294 |791.TA.26-11-A-j19-1.3.H|8591294|19:53:00 |19:53:00 |4 |Zürich, Oerlikerhus |47.4175853791724|8.5542072942189 |Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |571 |Tram |\n",
- "|26-11-A-j19-1|8591256 |791.TA.26-11-A-j19-1.3.H|8591256|19:54:00 |19:54:00 |5 |Zürich, Leutschenbach |47.4146433269471|8.55130573585079|Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |444 |Tram |\n",
- "|26-11-A-j19-1|8591273 |791.TA.26-11-A-j19-1.3.H|8591273|19:55:00 |19:55:00 |6 |Zürich, Messe/Hallenstadion |47.4106919651348|8.55068589830466|Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |389 |Tram |\n",
- "|26-11-A-j19-1|8591382 |791.TA.26-11-A-j19-1.3.H|8591382|19:57:00 |19:57:00 |7 |Zürich, Sternen Oerlikon |47.4100718783688|8.54623025449481|Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |688 |Tram |\n",
- "|26-11-A-j19-1|8580449 |791.TA.26-11-A-j19-1.3.H|8580449|19:59:00 |19:59:00 |8 |Zürich Oerlikon, Bahnhof |47.411494419524 |8.54479295004002|Zürich, Rehalp |363 |0 |19:49:00 |2 |8 |766 |Tram |\n",
- "|26-304-j19-1 |8591057 |159.TA.26-304-j19-1.4.R |8591057|19:39:00 |19:39:00 |1 |Zürich Altstetten, Bahnhof N |47.392067942097 |8.48990588617267|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |520 |Bus |\n",
- "|26-304-j19-1 |8591402 |159.TA.26-304-j19-1.4.R |8591402|19:41:00 |19:41:00 |2 |Zürich, Tüffenwies |47.3979787271809|8.49434356367684|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |999 |Bus |\n",
- "|26-304-j19-1 |8591434 |159.TA.26-304-j19-1.4.R |8591434|19:41:00 |19:41:00 |3 |Zürich, Winzerhalde |47.4000582901792|8.4945681424979 |Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |710 |Bus |\n",
- "|26-304-j19-1 |8591197 |159.TA.26-304-j19-1.4.R |8591197|19:42:00 |19:42:00 |4 |Zürich, Hohenklingensteig |47.4013473348052|8.49021131336931|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |1123 |Bus |\n",
- "|26-304-j19-1 |8591436 |159.TA.26-304-j19-1.4.R |8591436|19:43:00 |19:43:00 |5 |Zürich, Winzerstrasse Süd |47.403372044054 |8.486123978826 |Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |147 |Bus |\n",
- "|26-304-j19-1 |8591136 |159.TA.26-304-j19-1.4.R |8591136|19:46:00 |19:46:00 |6 |Zürich, Frankental |47.4057006674825|8.48137189097235|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |689 |Bus |\n",
- "|26-304-j19-1 |8590725 |159.TA.26-304-j19-1.4.R |8590725|19:47:00 |19:47:00 |7 |Oberengstringen, Eggbühl |47.4055243523393|8.47408655401713|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |1325 |Bus |\n",
- "|26-304-j19-1 |8590726 |159.TA.26-304-j19-1.4.R |8590726|19:48:00 |19:48:00 |8 |Oberengstringen, Lanzrain |47.407342193939 |8.46795106062573|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |759 |Bus |\n",
- "|26-304-j19-1 |8590728 |159.TA.26-304-j19-1.4.R |8590728|19:49:00 |19:49:00 |9 |Oberengstringen, Zentrum |47.4091295756792|8.46260608468448|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |373 |Bus |\n",
- "|26-304-j19-1 |8590727 |159.TA.26-304-j19-1.4.R |8590727|19:50:00 |19:50:00 |10 |Oberengstringen, Paradies |47.4104852703573|8.45874332896223|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |735 |Bus |\n",
- "|26-304-j19-1 |8590833 |159.TA.26-304-j19-1.4.R |8590833|19:51:00 |19:51:00 |11 |Unterengstringen, Langacher |47.4122360710415|8.45316479104707|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |760 |Bus |\n",
- "|26-304-j19-1 |8594732 |159.TA.26-304-j19-1.4.R |8594732|19:53:00 |19:53:00 |12 |Unterengstringen, Sennenbüel N|47.4134944230824|8.44931101847766|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |987 |Bus |\n",
- "|26-304-j19-1 |8590831 |159.TA.26-304-j19-1.4.R |8590831|19:53:00 |19:53:00 |13 |Unterengstringen, Aegelsee |47.414977659342 |8.44603216769017|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |503 |Bus |\n",
- "|26-304-j19-1 |8590911 |159.TA.26-304-j19-1.4.R |8590911|19:55:00 |19:55:00 |14 |Weiningen ZH, Ausserdorf |47.4176826342903|8.43953734818508|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |751 |Bus |\n",
- "|26-304-j19-1 |8590913 |159.TA.26-304-j19-1.4.R |8590913|19:56:00 |19:56:00 |15 |Weiningen ZH, Lindenplatz |47.4195547602987|8.43394084396424|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |933 |Bus |\n",
- "|26-304-j19-1 |8590914 |159.TA.26-304-j19-1.4.R |8590914|19:57:00 |19:57:00 |16 |Weiningen ZH, Schulhaus |47.4183512583635|8.42866773324572|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |623 |Bus |\n",
- "|26-304-j19-1 |8590617 |159.TA.26-304-j19-1.4.R |8590617|19:59:00 |19:59:00 |17 |Geroldswil, Welbrig |47.4180716529658|8.41906474285715|Dietikon, Bahnhof |5481 |1 |19:39:00 |3 |17 |856 |Bus |\n",
- "|26-70-A-j19-1|8591061 |966.TA.26-70-A-j19-1.5.H|8591061|07:00:00 |07:00:00 |9 |Zürich Leimbach, Bahnhof |47.3332523864039|8.51859807635144|Zürich, Mittelleimbach |3928 |0 |07:00:00 |4 |5 |1203 |Bus |\n",
- "|26-70-A-j19-1|8591270 |966.TA.26-70-A-j19-1.5.H|8591270|07:02:00 |07:02:00 |10 |Zürich, Marbachweg |47.3303482449491|8.51537312448101|Zürich, Mittelleimbach |3928 |0 |07:00:00 |4 |5 |1197 |Bus |\n",
- "|26-70-A-j19-1|8591210 |966.TA.26-70-A-j19-1.5.H|8591210|07:03:00 |07:03:00 |11 |Zürich, Im Hüsli |47.3282354882425|8.51269614493396|Zürich, Mittelleimbach |3928 |0 |07:00:00 |4 |5 |723 |Bus |\n",
- "|26-70-A-j19-1|8591370 |966.TA.26-70-A-j19-1.5.H|8591370|07:03:00 |07:03:00 |12 |Zürich, Sihlweidstrasse |47.3264149182794|8.51466345540645|Zürich, Mittelleimbach |3928 |0 |07:00:00 |4 |5 |989 |Bus |\n",
- "|26-70-A-j19-1|8591278 |966.TA.26-70-A-j19-1.5.H|8591278|07:04:00 |07:04:00 |13 |Zürich, Mittelleimbach |47.3231389520848|8.51428616298707|Zürich, Mittelleimbach |3928 |0 |07:00:00 |4 |5 |139 |Bus |\n",
- "|26-61-j19-1 |8591281 |269.TA.26-61-j19-1.1.H |8591281|19:57:00 |19:57:00 |1 |Zürich, Mühlacker |47.4256326325821|8.49799970688372|Zürich, Schwamendingerplatz|2076 |0 |19:57:00 |5 |2 |208 |Bus |\n",
- "|26-61-j19-1 |8591046 |269.TA.26-61-j19-1.1.H |8591046|19:58:00 |19:58:00 |2 |Zürich, Aspholz |47.425085652811 |8.50058685490234|Zürich, Schwamendingerplatz|2076 |0 |19:57:00 |5 |2 |1002 |Bus |\n",
- "|26-703-j19-1 |8591825 |179.TA.26-703-j19-1.2.R |8591825|07:10:00 |07:10:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |587 |Bus |\n",
- "|26-703-j19-1 |8590504 |179.TA.26-703-j19-1.2.R |8590504|07:11:00 |07:11:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |865 |Bus |\n",
- "|26-703-j19-1 |8596005 |179.TA.26-703-j19-1.2.R |8596005|07:14:00 |07:14:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |1370 |Bus |\n",
- "|26-703-j19-1 |8591832 |179.TA.26-703-j19-1.2.R |8591832|07:14:00 |07:14:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |1024 |Bus |\n",
- "|26-703-j19-1 |8591147 |179.TA.26-703-j19-1.2.R |8591147|07:16:00 |07:16:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |1263 |Bus |\n",
- "|26-703-j19-1 |8591162 |179.TA.26-703-j19-1.2.R |8591162|07:17:00 |07:17:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |153 |Bus |\n",
- "|26-703-j19-1 |8591261 |179.TA.26-703-j19-1.2.R |8591261|07:18:00 |07:18:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |1199 |Bus |\n",
- "|26-703-j19-1 |8591107 |179.TA.26-703-j19-1.2.R |8591107|07:19:00 |07:19:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |1313 |Bus |\n",
- "|26-703-j19-1 |8591233 |179.TA.26-703-j19-1.2.R |8591233|07:25:00 |07:25:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9385 |1 |07:10:00 |6 |9 |1134 |Bus |\n",
- "|26-703-j19-1 |8591825 |171.TA.26-703-j19-1.2.R |8591825|07:12:00 |07:12:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |587 |Bus |\n",
- "|26-703-j19-1 |8590504 |171.TA.26-703-j19-1.2.R |8590504|07:13:00 |07:13:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |865 |Bus |\n",
- "|26-703-j19-1 |8596005 |171.TA.26-703-j19-1.2.R |8596005|07:16:00 |07:16:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |1370 |Bus |\n",
- "|26-703-j19-1 |8591832 |171.TA.26-703-j19-1.2.R |8591832|07:16:00 |07:16:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |1024 |Bus |\n",
- "|26-703-j19-1 |8591147 |171.TA.26-703-j19-1.2.R |8591147|07:18:00 |07:18:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |1263 |Bus |\n",
- "|26-703-j19-1 |8591162 |171.TA.26-703-j19-1.2.R |8591162|07:19:00 |07:19:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |153 |Bus |\n",
- "|26-703-j19-1 |8591261 |171.TA.26-703-j19-1.2.R |8591261|07:20:00 |07:20:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |1199 |Bus |\n",
- "|26-703-j19-1 |8591107 |171.TA.26-703-j19-1.2.R |8591107|07:21:00 |07:21:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |1313 |Bus |\n",
- "|26-703-j19-1 |8591233 |171.TA.26-703-j19-1.2.R |8591233|07:27:00 |07:27:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9346 |1 |07:12:00 |6 |9 |1134 |Bus |\n",
- "|26-703-j19-1 |8591825 |155.TA.26-703-j19-1.2.R |8591825|07:25:00 |07:25:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |587 |Bus |\n",
- "|26-703-j19-1 |8590504 |155.TA.26-703-j19-1.2.R |8590504|07:26:00 |07:26:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |865 |Bus |\n",
- "|26-703-j19-1 |8596005 |155.TA.26-703-j19-1.2.R |8596005|07:29:00 |07:29:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |1370 |Bus |\n",
- "|26-703-j19-1 |8591832 |155.TA.26-703-j19-1.2.R |8591832|07:29:00 |07:29:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |1024 |Bus |\n",
- "|26-703-j19-1 |8591147 |155.TA.26-703-j19-1.2.R |8591147|07:31:00 |07:31:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |1263 |Bus |\n",
- "|26-703-j19-1 |8591162 |155.TA.26-703-j19-1.2.R |8591162|07:32:00 |07:32:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |153 |Bus |\n",
- "|26-703-j19-1 |8591261 |155.TA.26-703-j19-1.2.R |8591261|07:33:00 |07:33:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |1199 |Bus |\n",
- "|26-703-j19-1 |8591107 |155.TA.26-703-j19-1.2.R |8591107|07:34:00 |07:34:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |1313 |Bus |\n",
- "|26-703-j19-1 |8591233 |155.TA.26-703-j19-1.2.R |8591233|07:40:00 |07:40:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9267 |1 |07:25:00 |6 |9 |1134 |Bus |\n",
- "|26-703-j19-1 |8591825 |144.TA.26-703-j19-1.2.R |8591825|07:27:00 |07:27:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |587 |Bus |\n",
- "|26-703-j19-1 |8590504 |144.TA.26-703-j19-1.2.R |8590504|07:28:00 |07:28:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |865 |Bus |\n",
- "|26-703-j19-1 |8596005 |144.TA.26-703-j19-1.2.R |8596005|07:31:00 |07:31:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |1370 |Bus |\n",
- "|26-703-j19-1 |8591832 |144.TA.26-703-j19-1.2.R |8591832|07:31:00 |07:31:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |1024 |Bus |\n",
- "|26-703-j19-1 |8591147 |144.TA.26-703-j19-1.2.R |8591147|07:33:00 |07:33:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |1263 |Bus |\n",
- "|26-703-j19-1 |8591162 |144.TA.26-703-j19-1.2.R |8591162|07:34:00 |07:34:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |153 |Bus |\n",
- "|26-703-j19-1 |8591261 |144.TA.26-703-j19-1.2.R |8591261|07:35:00 |07:35:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |1199 |Bus |\n",
- "|26-703-j19-1 |8591107 |144.TA.26-703-j19-1.2.R |8591107|07:36:00 |07:36:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |1313 |Bus |\n",
- "|26-703-j19-1 |8591233 |144.TA.26-703-j19-1.2.R |8591233|07:42:00 |07:42:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9231 |1 |07:27:00 |6 |9 |1134 |Bus |\n",
- "|26-703-j19-1 |8591825 |120.TA.26-703-j19-1.2.R |8591825|07:40:00 |07:40:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9159 |1 |07:40:00 |6 |9 |587 |Bus |\n",
- "|26-703-j19-1 |8590504 |120.TA.26-703-j19-1.2.R |8590504|07:41:00 |07:41:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9159 |1 |07:40:00 |6 |9 |865 |Bus |\n",
- "|26-703-j19-1 |8596005 |120.TA.26-703-j19-1.2.R |8596005|07:44:00 |07:44:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9159 |1 |07:40:00 |6 |9 |1370 |Bus |\n",
- "|26-703-j19-1 |8591832 |120.TA.26-703-j19-1.2.R |8591832|07:44:00 |07:44:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9159 |1 |07:40:00 |6 |9 |1024 |Bus |\n",
- "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
+ "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|route_id |stop_id_general|trip_id |stop_id|arrival_time|departure_time|stop_sequence|stop_name |stop_lat |stop_lon |trip_headsign |trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|monotonically_increasing_id|\n",
+ "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|26-13-j19-1 |8576240 |2064.TA.26-13-j19-1.24.H|8576240|07:00:00 |07:00:00 |5 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1221 |Tram |0 |\n",
+ "|26-13-j19-1 |8591353 |2064.TA.26-13-j19-1.24.H|8591353|07:01:00 |07:01:00 |6 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |816 |Tram |1 |\n",
+ "|26-13-j19-1 |8591039 |2064.TA.26-13-j19-1.24.H|8591039|07:02:00 |07:02:00 |7 |Zürich, Alte Trotte |47.3977659017765|8.50725235431143|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |776 |Tram |2 |\n",
+ "|26-13-j19-1 |8591121 |2064.TA.26-13-j19-1.24.H|8591121|07:03:00 |07:03:00 |8 |Zürich, Eschergutweg |47.3962700189648|8.51204037477646|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |307 |Tram |3 |\n",
+ "|26-13-j19-1 |8591417 |2064.TA.26-13-j19-1.24.H|8591417|07:05:00 |07:05:00 |9 |Zürich, Waidfussweg |47.3954977376399|8.51840044698891|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |347 |Tram |4 |\n",
+ "|26-13-j19-1 |8591437 |2064.TA.26-13-j19-1.24.H|8591437|07:06:00 |07:06:00 |10 |Zürich, Wipkingerplatz |47.3925909395293|8.52357474302616|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1015 |Tram |5 |\n",
+ "|26-13-j19-1 |8580522 |2064.TA.26-13-j19-1.24.H|8580522|07:08:00 |07:08:00 |11 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |455 |Tram |6 |\n",
+ "|26-13-j19-1 |8591110 |2064.TA.26-13-j19-1.24.H|8591110|07:09:00 |07:09:00 |12 |Zürich, Dammweg |47.3884919601296|8.52639545301869|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1102 |Tram |7 |\n",
+ "|26-13-j19-1 |8591306 |2064.TA.26-13-j19-1.24.H|8591306|07:10:00 |07:10:00 |13 |Zürich, Quellenstrasse |47.3867403702341|8.52874903906341|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |786 |Tram |8 |\n",
+ "|26-13-j19-1 |8591257 |2064.TA.26-13-j19-1.24.H|8591257|07:11:00 |07:11:00 |14 |Zürich, Limmatplatz |47.3845994590919|8.53162364797299|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |388 |Tram |9 |\n",
+ "|26-13-j19-1 |8591282 |2064.TA.26-13-j19-1.24.H|8591282|07:12:00 |07:12:00 |15 |Zürich, Museum für Gestaltung |47.3821239221899|8.53493843137185|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |138 |Tram |10 |\n",
+ "|26-13-j19-1 |8591368 |2064.TA.26-13-j19-1.24.H|8591368|07:14:00 |07:14:00 |16 |Zürich, Sihlquai/HB |47.3798733332196|8.53760642776606|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |880 |Tram |11 |\n",
+ "|26-13-j19-1 |8587349 |2064.TA.26-13-j19-1.24.H|8587349|07:16:00 |07:16:00 |17 |Zürich, Bahnhofquai/HB |47.3775618175159|8.54173867807358|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |602 |Tram |12 |\n",
+ "|26-13-j19-1 |8591067 |2064.TA.26-13-j19-1.24.H|8591067|07:18:00 |07:18:00 |18 |Zürich, Bahnhofstrasse/HB |47.3765581015114|8.53994204750509|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |616 |Tram |13 |\n",
+ "|26-13-j19-1 |8591316 |2064.TA.26-13-j19-1.24.H|8591316|07:20:00 |07:20:00 |19 |Zürich, Rennweg |47.3730662375955|8.53845982728609|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |373 |Tram |14 |\n",
+ "|26-13-j19-1 |8591299 |2064.TA.26-13-j19-1.24.H|8591299|07:22:00 |07:22:00 |20 |Zürich, Paradeplatz |47.3693672863583|8.53876525448273|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1223 |Tram |15 |\n",
+ "|26-13-j19-1 |8591384 |2064.TA.26-13-j19-1.24.H|8591384|07:23:00 |07:23:00 |21 |Zürich, Stockerstrasse |47.3677002399791|8.53501029659459|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |871 |Tram |16 |\n",
+ "|26-13-j19-1 |8591404 |2064.TA.26-13-j19-1.24.H|8591404|07:24:00 |07:24:00 |22 |Zürich, Tunnelstrasse |47.3661426599847|8.53253094641008|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |890 |Tram |17 |\n",
+ "|26-13-j19-1 |8591059 |2064.TA.26-13-j19-1.24.H|8591059|07:25:00 |07:25:00 |23 |Zürich Enge, Bahnhof/Bederstr.|47.3645546111557|8.53045583810347|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |177 |Tram |18 |\n",
+ "|26-13-j19-1 |8591415 |2064.TA.26-13-j19-1.24.H|8591415|07:27:00 |07:27:00 |24 |Zürich, Waffenplatzstrasse |47.3614818138862|8.52574866601403|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1265 |Tram |19 |\n",
+ "|26-13-j19-1 |8591366 |2064.TA.26-13-j19-1.24.H|8591366|07:28:00 |07:28:00 |25 |Zürich, Sihlcity Nord |47.3600640074787|8.52303575385561|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |968 |Tram |20 |\n",
+ "|26-13-j19-1 |8591329 |2064.TA.26-13-j19-1.24.H|8591329|07:29:00 |07:29:00 |26 |Zürich, Saalsporthalle |47.3578611597087|8.52040369007277|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1238 |Tram |21 |\n",
+ "|26-13-j19-1 |8591245 |2064.TA.26-13-j19-1.24.H|8591245|07:30:00 |07:30:00 |27 |Zürich, Laubegg |47.3587313564196|8.51708890667391|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1216 |Tram |22 |\n",
+ "|26-13-j19-1 |8591405 |2064.TA.26-13-j19-1.24.H|8591405|07:32:00 |07:32:00 |28 |Zürich, Uetlihof |47.3567353594536|8.51396276948474|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |847 |Tram |23 |\n",
+ "|26-13-j19-1 |8591385 |2064.TA.26-13-j19-1.24.H|8591385|07:33:00 |07:33:00 |29 |Zürich, Strassenverkehrsamt |47.3530717783138|8.51171698127413|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |275 |Tram |24 |\n",
+ "|26-13-j19-1 |8591034 |2064.TA.26-13-j19-1.24.H|8591034|07:34:00 |07:34:00 |30 |Zürich, Albisgütli |47.3519945640447|8.5077104951064 |Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1352 |Tram |25 |\n",
+ "|26-11-A-j19-1|8591049 |791.TA.26-11-A-j19-1.3.H|8591049|19:49:00 |19:49:00 |1 |Zürich, Auzelg |47.4166918393693|8.568113214819 |Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |1117 |Tram |26 |\n",
+ "|26-11-A-j19-1|8591128 |791.TA.26-11-A-j19-1.3.H|8591128|19:51:00 |19:51:00 |2 |Zürich, Fernsehstudio |47.4181749855684|8.56174415945371|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |156 |Tram |27 |\n",
+ "|26-11-A-j19-1|8591830 |791.TA.26-11-A-j19-1.3.H|8591830|19:52:00 |19:52:00 |3 |Glattpark, Glattpark |47.4199559214972|8.55716275150406|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |671 |Tram |28 |\n",
+ "|26-11-A-j19-1|8591294 |791.TA.26-11-A-j19-1.3.H|8591294|19:53:00 |19:53:00 |4 |Zürich, Oerlikerhus |47.4175853791724|8.5542072942189 |Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |573 |Tram |29 |\n",
+ "|26-11-A-j19-1|8591256 |791.TA.26-11-A-j19-1.3.H|8591256|19:54:00 |19:54:00 |5 |Zürich, Leutschenbach |47.4146433269471|8.55130573585079|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |444 |Tram |30 |\n",
+ "|26-11-A-j19-1|8591273 |791.TA.26-11-A-j19-1.3.H|8591273|19:55:00 |19:55:00 |6 |Zürich, Messe/Hallenstadion |47.4106919651348|8.55068589830466|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |389 |Tram |31 |\n",
+ "|26-11-A-j19-1|8591382 |791.TA.26-11-A-j19-1.3.H|8591382|19:57:00 |19:57:00 |7 |Zürich, Sternen Oerlikon |47.4100718783688|8.54623025449481|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |687 |Tram |32 |\n",
+ "|26-11-A-j19-1|8580449 |791.TA.26-11-A-j19-1.3.H|8580449|19:59:00 |19:59:00 |8 |Zürich Oerlikon, Bahnhof |47.411494419524 |8.54479295004002|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |766 |Tram |33 |\n",
+ "|26-304-j19-1 |8591057 |159.TA.26-304-j19-1.4.R |8591057|19:39:00 |19:39:00 |1 |Zürich Altstetten, Bahnhof N |47.392067942097 |8.48990588617267|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |521 |Bus |34 |\n",
+ "|26-304-j19-1 |8591402 |159.TA.26-304-j19-1.4.R |8591402|19:41:00 |19:41:00 |2 |Zürich, Tüffenwies |47.3979787271809|8.49434356367684|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |1000 |Bus |35 |\n",
+ "|26-304-j19-1 |8591434 |159.TA.26-304-j19-1.4.R |8591434|19:41:00 |19:41:00 |3 |Zürich, Winzerhalde |47.4000582901792|8.4945681424979 |Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |711 |Bus |36 |\n",
+ "|26-304-j19-1 |8591197 |159.TA.26-304-j19-1.4.R |8591197|19:42:00 |19:42:00 |4 |Zürich, Hohenklingensteig |47.4013473348052|8.49021131336931|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |1125 |Bus |37 |\n",
+ "|26-304-j19-1 |8591436 |159.TA.26-304-j19-1.4.R |8591436|19:43:00 |19:43:00 |5 |Zürich, Winzerstrasse Süd |47.403372044054 |8.486123978826 |Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |149 |Bus |38 |\n",
+ "|26-304-j19-1 |8591136 |159.TA.26-304-j19-1.4.R |8591136|19:46:00 |19:46:00 |6 |Zürich, Frankental |47.4057006674825|8.48137189097235|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |690 |Bus |39 |\n",
+ "|26-304-j19-1 |8590725 |159.TA.26-304-j19-1.4.R |8590725|19:47:00 |19:47:00 |7 |Oberengstringen, Eggbühl |47.4055243523393|8.47408655401713|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |1329 |Bus |40 |\n",
+ "|26-304-j19-1 |8590726 |159.TA.26-304-j19-1.4.R |8590726|19:48:00 |19:48:00 |8 |Oberengstringen, Lanzrain |47.407342193939 |8.46795106062573|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |763 |Bus |41 |\n",
+ "|26-304-j19-1 |8590728 |159.TA.26-304-j19-1.4.R |8590728|19:49:00 |19:49:00 |9 |Oberengstringen, Zentrum |47.4091295756792|8.46260608468448|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |376 |Bus |42 |\n",
+ "|26-304-j19-1 |8590727 |159.TA.26-304-j19-1.4.R |8590727|19:50:00 |19:50:00 |10 |Oberengstringen, Paradies |47.4104852703573|8.45874332896223|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |740 |Bus |43 |\n",
+ "|26-304-j19-1 |8590833 |159.TA.26-304-j19-1.4.R |8590833|19:51:00 |19:51:00 |11 |Unterengstringen, Langacher |47.4122360710415|8.45316479104707|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |764 |Bus |44 |\n",
+ "|26-304-j19-1 |8594732 |159.TA.26-304-j19-1.4.R |8594732|19:53:00 |19:53:00 |12 |Unterengstringen, Sennenbüel N|47.4134944230824|8.44931101847766|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |987 |Bus |45 |\n",
+ "|26-304-j19-1 |8590831 |159.TA.26-304-j19-1.4.R |8590831|19:53:00 |19:53:00 |13 |Unterengstringen, Aegelsee |47.414977659342 |8.44603216769017|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |502 |Bus |46 |\n",
+ "|26-304-j19-1 |8590911 |159.TA.26-304-j19-1.4.R |8590911|19:55:00 |19:55:00 |14 |Weiningen ZH, Ausserdorf |47.4176826342903|8.43953734818508|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |754 |Bus |47 |\n",
+ "|26-304-j19-1 |8590913 |159.TA.26-304-j19-1.4.R |8590913|19:56:00 |19:56:00 |15 |Weiningen ZH, Lindenplatz |47.4195547602987|8.43394084396424|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |934 |Bus |48 |\n",
+ "|26-304-j19-1 |8590914 |159.TA.26-304-j19-1.4.R |8590914|19:57:00 |19:57:00 |16 |Weiningen ZH, Schulhaus |47.4183512583635|8.42866773324572|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |622 |Bus |49 |\n",
+ "|26-304-j19-1 |8590617 |159.TA.26-304-j19-1.4.R |8590617|19:59:00 |19:59:00 |17 |Geroldswil, Welbrig |47.4180716529658|8.41906474285715|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |859 |Bus |50 |\n",
+ "|26-61-j19-1 |8591281 |269.TA.26-61-j19-1.1.H |8591281|19:57:00 |19:57:00 |1 |Zürich, Mühlacker |47.4256326325821|8.49799970688372|Zürich, Schwamendingerplatz|2076 |0 |19:57:00 |3 |2 |212 |Bus |51 |\n",
+ "|26-61-j19-1 |8591046 |269.TA.26-61-j19-1.1.H |8591046|19:58:00 |19:58:00 |2 |Zürich, Aspholz |47.425085652811 |8.50058685490234|Zürich, Schwamendingerplatz|2076 |0 |19:57:00 |3 |2 |1003 |Bus |52 |\n",
+ "|26-703-j19-1 |8591825 |179.TA.26-703-j19-1.2.R |8591825|07:10:00 |07:10:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |580 |Bus |53 |\n",
+ "|26-703-j19-1 |8590504 |179.TA.26-703-j19-1.2.R |8590504|07:11:00 |07:11:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |861 |Bus |54 |\n",
+ "|26-703-j19-1 |8596005 |179.TA.26-703-j19-1.2.R |8596005|07:14:00 |07:14:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1366 |Bus |55 |\n",
+ "|26-703-j19-1 |8591832 |179.TA.26-703-j19-1.2.R |8591832|07:14:00 |07:14:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1023 |Bus |56 |\n",
+ "|26-703-j19-1 |8591147 |179.TA.26-703-j19-1.2.R |8591147|07:16:00 |07:16:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1260 |Bus |57 |\n",
+ "|26-703-j19-1 |8591162 |179.TA.26-703-j19-1.2.R |8591162|07:17:00 |07:17:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |146 |Bus |58 |\n",
+ "|26-703-j19-1 |8591261 |179.TA.26-703-j19-1.2.R |8591261|07:18:00 |07:18:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1197 |Bus |59 |\n",
+ "|26-703-j19-1 |8591107 |179.TA.26-703-j19-1.2.R |8591107|07:19:00 |07:19:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1311 |Bus |60 |\n",
+ "|26-703-j19-1 |8591233 |179.TA.26-703-j19-1.2.R |8591233|07:25:00 |07:25:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1133 |Bus |61 |\n",
+ "|26-703-j19-1 |8591825 |171.TA.26-703-j19-1.2.R |8591825|07:12:00 |07:12:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |580 |Bus |62 |\n",
+ "|26-703-j19-1 |8590504 |171.TA.26-703-j19-1.2.R |8590504|07:13:00 |07:13:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |861 |Bus |63 |\n",
+ "|26-703-j19-1 |8596005 |171.TA.26-703-j19-1.2.R |8596005|07:16:00 |07:16:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1366 |Bus |64 |\n",
+ "|26-703-j19-1 |8591832 |171.TA.26-703-j19-1.2.R |8591832|07:16:00 |07:16:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1023 |Bus |65 |\n",
+ "|26-703-j19-1 |8591147 |171.TA.26-703-j19-1.2.R |8591147|07:18:00 |07:18:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1260 |Bus |66 |\n",
+ "|26-703-j19-1 |8591162 |171.TA.26-703-j19-1.2.R |8591162|07:19:00 |07:19:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |146 |Bus |67 |\n",
+ "|26-703-j19-1 |8591261 |171.TA.26-703-j19-1.2.R |8591261|07:20:00 |07:20:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1197 |Bus |68 |\n",
+ "|26-703-j19-1 |8591107 |171.TA.26-703-j19-1.2.R |8591107|07:21:00 |07:21:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1311 |Bus |69 |\n",
+ "|26-703-j19-1 |8591233 |171.TA.26-703-j19-1.2.R |8591233|07:27:00 |07:27:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1133 |Bus |70 |\n",
+ "|26-703-j19-1 |8591825 |156.TA.26-703-j19-1.2.R |8591825|07:25:00 |07:25:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |580 |Bus |71 |\n",
+ "|26-703-j19-1 |8590504 |156.TA.26-703-j19-1.2.R |8590504|07:26:00 |07:26:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |861 |Bus |72 |\n",
+ "|26-703-j19-1 |8596005 |156.TA.26-703-j19-1.2.R |8596005|07:29:00 |07:29:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1366 |Bus |73 |\n",
+ "|26-703-j19-1 |8591832 |156.TA.26-703-j19-1.2.R |8591832|07:29:00 |07:29:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1023 |Bus |74 |\n",
+ "|26-703-j19-1 |8591147 |156.TA.26-703-j19-1.2.R |8591147|07:31:00 |07:31:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1260 |Bus |75 |\n",
+ "|26-703-j19-1 |8591162 |156.TA.26-703-j19-1.2.R |8591162|07:32:00 |07:32:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |146 |Bus |76 |\n",
+ "|26-703-j19-1 |8591261 |156.TA.26-703-j19-1.2.R |8591261|07:33:00 |07:33:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1197 |Bus |77 |\n",
+ "|26-703-j19-1 |8591107 |156.TA.26-703-j19-1.2.R |8591107|07:34:00 |07:34:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1311 |Bus |78 |\n",
+ "|26-703-j19-1 |8591233 |156.TA.26-703-j19-1.2.R |8591233|07:40:00 |07:40:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1133 |Bus |79 |\n",
+ "|26-703-j19-1 |8591825 |144.TA.26-703-j19-1.2.R |8591825|07:27:00 |07:27:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |580 |Bus |80 |\n",
+ "|26-703-j19-1 |8590504 |144.TA.26-703-j19-1.2.R |8590504|07:28:00 |07:28:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |861 |Bus |81 |\n",
+ "|26-703-j19-1 |8596005 |144.TA.26-703-j19-1.2.R |8596005|07:31:00 |07:31:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1366 |Bus |82 |\n",
+ "|26-703-j19-1 |8591832 |144.TA.26-703-j19-1.2.R |8591832|07:31:00 |07:31:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1023 |Bus |83 |\n",
+ "|26-703-j19-1 |8591147 |144.TA.26-703-j19-1.2.R |8591147|07:33:00 |07:33:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1260 |Bus |84 |\n",
+ "|26-703-j19-1 |8591162 |144.TA.26-703-j19-1.2.R |8591162|07:34:00 |07:34:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |146 |Bus |85 |\n",
+ "|26-703-j19-1 |8591261 |144.TA.26-703-j19-1.2.R |8591261|07:35:00 |07:35:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1197 |Bus |86 |\n",
+ "|26-703-j19-1 |8591107 |144.TA.26-703-j19-1.2.R |8591107|07:36:00 |07:36:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1311 |Bus |87 |\n",
+ "|26-703-j19-1 |8591233 |144.TA.26-703-j19-1.2.R |8591233|07:42:00 |07:42:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1133 |Bus |88 |\n",
+ "|26-703-j19-1 |8591825 |120.TA.26-703-j19-1.2.R |8591825|07:40:00 |07:40:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |580 |Bus |89 |\n",
+ "|26-703-j19-1 |8590504 |120.TA.26-703-j19-1.2.R |8590504|07:41:00 |07:41:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |861 |Bus |90 |\n",
+ "|26-703-j19-1 |8596005 |120.TA.26-703-j19-1.2.R |8596005|07:44:00 |07:44:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1366 |Bus |91 |\n",
+ "|26-703-j19-1 |8591832 |120.TA.26-703-j19-1.2.R |8591832|07:44:00 |07:44:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1023 |Bus |92 |\n",
+ "|26-703-j19-1 |8591147 |120.TA.26-703-j19-1.2.R |8591147|07:46:00 |07:46:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1260 |Bus |93 |\n",
+ "|26-703-j19-1 |8591162 |120.TA.26-703-j19-1.2.R |8591162|07:47:00 |07:47:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |146 |Bus |94 |\n",
+ "|26-703-j19-1 |8591261 |120.TA.26-703-j19-1.2.R |8591261|07:48:00 |07:48:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1197 |Bus |95 |\n",
+ "|26-703-j19-1 |8591107 |120.TA.26-703-j19-1.2.R |8591107|07:49:00 |07:49:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1311 |Bus |96 |\n",
+ "|26-703-j19-1 |8591233 |120.TA.26-703-j19-1.2.R |8591233|07:55:00 |07:55:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1133 |Bus |97 |\n",
+ "|26-703-j19-1 |8591825 |95.TA.26-703-j19-1.2.R |8591825|07:42:00 |07:42:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9119 |1 |07:42:00 |4 |9 |580 |Bus |98 |\n",
+ "|26-703-j19-1 |8590504 |95.TA.26-703-j19-1.2.R |8590504|07:43:00 |07:43:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9119 |1 |07:42:00 |4 |9 |861 |Bus |99 |\n",
+ "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
"only showing top 100 rows"
]
}
],
"source": [
"stop_times = stop_times.sort(stop_times.route_int.cast('int'), \n",
" stop_times.departure_first_stop, \n",
" stop_times.trip_id, \n",
- " stop_times.stop_sequence.cast('int'))\n",
+ " stop_times.stop_sequence.cast('int'))\\\n",
+ ".withColumn('monotonically_increasing_id', F.monotonically_increasing_id())\n",
+ "\n",
"stop_times.show(100, 0)"
]
},
{
"cell_type": "code",
- "execution_count": 75,
+ "execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times.write.csv('data/lgpt_guys/stop_times_final_cyril.csv', header=True, mode = 'overwrite')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Footpaths\n",
"\n",
"We make the simplifying assumption that within stops sharing the same 7 first characters (`stop_id_general`) (almost the same as grouping them by parent stations, but more robust), transfer times take 2 minutes, no matter the stop. This may break for very large stations (such as Zürich HB).\n",
"\n",
"Between different `stop_id_general`, walking time is computed as the distance (which must be max. 500m) divided by a constant walking speed of 50 meters per minute.\n",
"\n",
"We only consider stops present in the final and filtered `stop_times` table.\n",
"\n",
"- Get all unique stop_int with a single pair of coordinates (first of the groupby, doesnt need to be dead precise)\n",
"\n",
"- join two copies of this dataframe\n",
"- drop lines where general_stop_id is the same\n",
"- compute the distance with geopy for all other lines between the pairs of coordinates\n",
"- filter based on distance <=500m\n",
"- transform distance to walking time in seconds\n",
"- order by stop_int1, then stop_int2\n",
"- save"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 74,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "stop_times = spark.read.csv('data/lgpt_guys/stop_times_final_cyril.csv', header=True)"
+ ]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Building lookup tables to pretty-print results after RAPTOR:"
+ "Getting one pair of coordinates per parent stop:"
]
},
{
"cell_type": "code",
- "execution_count": 76,
+ "execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------------+----------------+----------------+--------------------+\n",
+ "|stop_id_general| stop_lat| stop_lon| stop_name|\n",
+ "+---------------+----------------+----------------+--------------------+\n",
+ "| 8576163|47.4446882894765|8.63618754705906| Bassersdorf, Rietli|\n",
+ "| 8588279|47.4314059613092|8.66796994181563|Tagelswangen, Ger...|\n",
+ "| 8588740|47.4448766240902|8.57874926778446|Kloten, Neubrunne...|\n",
+ "| 8595714|47.4485885009565|8.45665025434994| Regensdorf, Allmend|\n",
+ "| 8588054|47.3600457522121|8.71459250506792| Uster, Meieracher|\n",
+ "| 8590851|47.3894589587793|8.67489595265716|Volketswil, Chappeli|\n",
+ "| 8591345|47.3845933768342|8.47776964668253|Zürich, Schulhaus...|\n",
+ "| 8591264|47.4060107375098|8.58139031472014| Zürich, Luegisland|\n",
+ "| 8503202|47.2960953402584|8.56475351565593| Thalwil|\n",
+ "| 8573163|47.2857066724744|8.55511459265598|Gattikon, Gattike...|\n",
+ "| 8590795|47.3955220616337|8.46779834702741| Schlieren, Mülligen|\n",
+ "| 8590652|47.2544735496734|8.60926503799025|Horgen, Glärnischhof|\n",
+ "| 8594307|47.4384910970626|8.36438429150518|Würenlos, Lärchenweg|\n",
+ "| 8591110|47.3884919601296|8.52639545301869| Zürich, Dammweg|\n",
+ "| 8591052|47.3783343616704|8.52287405710445|Zürich, Bäckeranlage|\n",
+ "| 8582529|47.3522623481765|8.35006514587432|Bremgarten AG, Zu...|\n",
+ "| 8591319|47.4106433311425| 8.5592648092692| Zürich, Riedgraben|\n",
+ "| 8502221|47.3574960379336|8.43754308825406| Birmensdorf ZH|\n",
+ "| 8591438|47.3591816623693|8.58524408728955|Zürich, Witikon Z...|\n",
+ "| 8590473|47.3151369463988|8.53576488143336|Adliswil, Hofacke...|\n",
+ "+---------------+----------------+----------------+--------------------+\n",
+ "only showing top 20 rows"
+ ]
}
],
"source": [
- "stop_times = spark.read.csv('data/lgpt_guys/stop_times_final_cyril.csv', header=True)"
+ "stop_times.select(stop_times.stop_id_general, stop_times.stop_lat, stop_times.stop_lon, stop_times.stop_name).dropDuplicates()\\\n",
+ ".show()"
]
},
{
"cell_type": "code",
- "execution_count": 77,
+ "execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
- "# lookup table for routes"
+ "from pyspark.sql.window import Window\n",
+ "w= (\n",
+ " Window.partitionBy(\"stop_id_general\")\n",
+ " .orderBy('stop_name')\n",
+ ")"
]
},
{
"cell_type": "code",
- "execution_count": 78,
+ "execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "+---------+--------------+-------------+--------------------+\n",
- "|route_int| route_id| route_desc| trip_headsign|\n",
- "+---------+--------------+-------------+--------------------+\n",
- "| 0| 26-10-j19-1| Tram|Zürich Flughafen,...|\n",
- "| 1| 26-13-j19-1| Tram| Zürich, Albisgütli|\n",
- "| 2| 26-11-A-j19-1| Tram| Zürich, Rehalp|\n",
- "| 3| 26-304-j19-1| Bus| Dietikon, Bahnhof|\n",
- "| 4| 26-70-A-j19-1| Bus|Zürich, Mittellei...|\n",
- "| 5| 26-61-j19-1| Bus|Zürich, Schwamend...|\n",
- "| 6| 26-703-j19-1| Bus| Zürich, Klusplatz|\n",
- "| 7| 26-652-j19-1| Bus| Effretikon, Bahnhof|\n",
- "| 8| 26-83-j19-1| Bus|Zürich Altstetten...|\n",
- "| 9| 26-749-j19-1| Bus| Dietlikon, Bahnhof|\n",
- "| 10| 26-24-j19-1| S-Bahn| Weinfelden|\n",
- "| 11| 26-640-j19-1| Bus| Neschwil, Post|\n",
- "| 12| 26-725-j19-1| Bus| Volketswil, Dorf|\n",
- "| 13| 79-736-j19-1| Bus|Zürich Flughafen,...|\n",
- "| 14| 26-24-j19-1| S-Bahn| Weinfelden|\n",
- "| 15|79-373-2-j19-1| Schiff| Küsnacht ZSG|\n",
- "| 16| 26-658-j19-1| Bus| Effretikon, Bahnhof|\n",
- "| 17| 26-449-j19-1| Bus|Buchs-Dällikon, B...|\n",
- "| 18| 26-303-j19-1| Bus| Killwangen, Bahnhof|\n",
- "| 19| 26-17-j19-1| Tram|Zürich, Bahnhofpl...|\n",
- "| 20| 26-7-A-j19-1| S-Bahn| Winterthur|\n",
- "| 21| 26-24-j19-1| S-Bahn| Zug|\n",
- "| 22| 26-720-j19-1| Bus| Effretikon, Bahnhof|\n",
- "| 23| 26-145-j19-1| Bus| Thalwil, Zentrum|\n",
- "| 24| 26-185-j19-1| Bus|Zürich Wollishofe...|\n",
- "| 25| 26-14-j19-1| S-Bahn| Hinwil|\n",
- "| 26| 26-8-A-j19-1| S-Bahn| Pfäffikon SZ|\n",
- "| 27| 1-322-j19-1| Bus|Baden, Postautost...|\n",
- "| 28|26-962-A-j19-1| Bus|Erlenbach ZH, Bah...|\n",
- "| 29| 26-301-j19-1| Bus| Dietikon, Bahnhof|\n",
- "| 30| 26-533-j19-1| Taxi|Niederhasli, Nass...|\n",
- "| 31| 26-78-j19-1| Bus| Zürich, Bändliweg|\n",
- "| 32| 26-4-j19-1| Tram|Zürich Altstetten...|\n",
- "| 33| 26-89-j19-1| Bus| Zürich, Heizenholz|\n",
- "| 34| 26-15-A-j19-1| Tram|Zürich, Bucheggplatz|\n",
- "| 35| 26-25-A-j19-1|Standseilbahn| Zürich, Dolder|\n",
- "| 36| 26-919-j19-1| Bus|Zumikon, Dorfzentrum|\n",
- "| 37| 26-302-j19-1| Bus| Urdorf, Oberurdorf|\n",
- "| 38| 1-444-j19-1| Bus|Bremgarten AG, Ob...|\n",
- "| 39| 26-8-C-j19-1| Tram| Zürich, Klusplatz|\n",
- "| 40| 79-18-A-j19-1| Bus| Forch, Bahnhof|\n",
- "| 41| 26-18-j19-1| S-Bahn|Zürich Stadelhofe...|\n",
- "| 42| 26-2-j19-1| S-Bahn| Ziegelbrücke|\n",
- "| 43| 26-8-A-j19-1| S-Bahn| Pfäffikon SZ|\n",
- "| 44| 26-726-j19-1| Bus|Schwerzenbach ZH,...|\n",
- "| 44| 26-721-j19-1| Bus|Schwerzenbach ZH,...|\n",
- "| 45| 26-456-j19-1| Bus|Regensdorf-Watt, ...|\n",
- "| 46| 26-311-j19-1| Bus| Dietikon, Bahnhof|\n",
- "| 47| 26-772-j19-1| Bus|Wallisellen, Schu...|\n",
- "| 48| 26-17-j19-1| Tram|Zürich Wiedikon, ...|\n",
- "| 49| 26-21-j19-1| S-Bahn| Regensdorf-Watt|\n",
- "| 50| 26-7-A-j19-1| S-Bahn| Winterthur|\n",
- "| 51| 26-845-j19-1| Bus|Gossau ZH, Mittel...|\n",
- "| 51| 26-845-j19-1| Bus| Grüningen, Adler|\n",
- "| 52| 26-768-j19-1| Bus|Zürich Flughafen,...|\n",
- "| 53| 26-13-j19-1| Tram| Zürich, Frankental|\n",
- "| 54| 26-2-A-j19-1| Tram|Zürich Tiefenbrun...|\n",
- "| 55| 26-36-j19-1| InterRegio| Basel SBB|\n",
- "| 55| 26-19-j19-1| S-Bahn| Koblenz|\n",
- "| 55| 26-19-j19-1| S-Bahn| Dietikon|\n",
- "| 56| 1-445-j19-1| Bus|Zürich Enge, Bahn...|\n",
- "| 57| 26-451-j19-1| Bus|Adlikon b. R., Le...|\n",
- "| 58| 26-2-A-j19-1| Tram|Zürich Tiefenbrun...|\n",
- "| 59| 26-304-j19-1| Bus| Dietikon, Bahnhof|\n",
- "| 60| 26-14-A-j19-1| Tram| Zürich, Seebach|\n",
- "| 61| 26-703-j19-1| Bus| Zürich, Klusplatz|\n",
- "| 62| 26-10-B-j19-1| S-Bahn| Uetliberg|\n",
- "| 63| 26-787-j19-1| Bus|Brüttisellen, Ob....|\n",
- "| 64| 26-743-j19-1| Bus| Stettbach, Bahnhof|\n",
- "| 65| 26-771-j19-1| Bus|Wallisellen, Bahnhof|\n",
- "| 66| 26-38-j19-1| Bus| Zürich, Waidspital|\n",
- "| 67| 26-14-A-j19-1| Tram| Zürich, Triemli|\n",
- "| 68|79-373-2-j19-1| Schiff| Thalwil ZSG|\n",
- "| 69| 26-845-j19-1| Bus| Uster, Bahnhof|\n",
- "| 70| 26-452-j19-1| Bus|Regensdorf, Moosä...|\n",
- "| 71| 26-813-j19-1| Bus| Uster, Bahnhof|\n",
- "| 72| 26-31-j19-1| Bus| Zürich, Farbhof|\n",
- "| 73| 26-6-A-j19-1| S-Bahn| Baden|\n",
- "| 74| 26-14-A-j19-1| Tram| Zürich, Seebach|\n",
- "| 75| 26-811-j19-1| Bus| Uster, Bahnhof|\n",
- "| 76| 26-652-j19-1| Bus| Effretikon, Bahnhof|\n",
- "| 77| 26-726-j19-1| Bus| Volketswil, Dorf|\n",
- "| 78| 26-165-j19-1| Bus|Rüschlikon, Park ...|\n",
- "| 79| 26-31-j19-1| Bus|Zürich, Kienasten...|\n",
- "| 80| 26-75-A-j19-1| Bus|Zürich, Schwamend...|\n",
- "| 81| 26-10-B-j19-1| S-Bahn| Zürich Selnau|\n",
- "| 82| 79-10-B-j19-1| S-Bahn| Zürich Triemli|\n",
- "| 83| 26-787-j19-1| Bus|Zürich Oerlikon, ...|\n",
- "| 84| 26-35-B-j19-1| Bus| Zürich, Solidapark|\n",
- "| 85| 26-89-j19-1| Bus|Zürich Altstetten...|\n",
- "| 86| 26-6-B-j19-1| Tram| Zürich, Zoo|\n",
- "| 87| 26-40-j19-1| Bus| Zürich, Seebach|\n",
- "| 88| 26-19-j19-1| S-Bahn| Zürich HB|\n",
- "| 88|80-160-Y-j19-1| RegioExpress| Zürich HB|\n",
- "| 89| 26-24-j19-1| S-Bahn| Schaffhausen|\n",
- "| 90| 26-24-j19-1| S-Bahn| Schaffhausen|\n",
- "| 91| 26-31-j19-1| Bus|Zürich Altstetten...|\n",
- "| 92| 26-3-A-j19-1| Tram| Zürich, Klusplatz|\n",
- "| 93| 26-40-j19-1| Bus|Zürich, Glaubtens...|\n",
- "| 94| 26-24-j19-1| S-Bahn| Zug|\n",
- "+---------+--------------+-------------+--------------------+\n",
- "only showing top 100 rows"
+ "+---------------+--------+----------------+----------------+--------------------+\n",
+ "|stop_id_general|stop_int| stop_lat_first| stop_lon_first| stop_name_first|\n",
+ "+---------------+--------+----------------+----------------+--------------------+\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...|\n",
+ "| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU|\n",
+ "| 8503376| 8|47.4353132339136| 8.7169371079598|Ottikon b. Kemptthal|\n",
+ "| 8587967| 7|47.2955835709855|8.60393802835468|Erlenbach ZH, Im ...|\n",
+ "| 8589111| 5| 47.260856991692|8.59230484542371|Horgen, Gumelenst...|\n",
+ "| 8590819| 6|47.2821844204798|8.57300004996529| Thalwil, Mettli|\n",
+ "| 8591190| 4|47.3694098744442|8.50635403902719| Zürich, Heuried|\n",
+ "| 8591284| 3|47.3909246273101|8.47396977303017| Zürich, Neeserweg|\n",
+ "| 8588312| 9|47.4329251530902|8.68904441838401|Effretikon, Kapel...|\n",
+ "| 8590541| 10|47.4165824252901|8.61689173475348|Dietlikon, Dornen...|\n",
+ "| 8590804| 14|47.3972672788911|8.44748743845064| Schlieren, Zentrum|\n",
+ "| 8591149| 12|47.3642868894025|8.50799795599736| Zürich, Friesenberg|\n",
+ "| 8591315| 13|47.3510268782417|8.58298033277325| Zürich, Rehalp|\n",
+ "| 8591362| 11|47.3859132103277|8.54836824487131|Zürich, Seilbahn ...|\n",
+ "| 8580432| 21|47.4439106426415|8.57842587428213| Kloten, Bramen|\n",
+ "| 8590273| 16|47.4263923172796|8.36551616876333|Spreitenbach, Fur...|\n",
+ "| 8590477| 15|47.3254528526241|8.53104872619107| Adliswil, Moos|\n",
+ "| 8591053| 22|47.4058952214384|8.53778609082291|Zürich, Bad Allen...|\n",
+ "| 8591080| 18| 47.347588102752|8.53434554328425|Zürich Wollishofe...|\n",
+ "+---------------+--------+----------------+----------------+--------------------+\n",
+ "only showing top 20 rows"
]
}
],
"source": [
- "stop_times.select(stop_times.route_int, stop_times.route_id, stop_times.route_desc, stop_times.trip_headsign)\\\n",
+ "from pyspark.sql import functions as F\n",
+ "\n",
+ "stop_coordinates = stop_times.select(stop_times.stop_id_general, stop_times.stop_int, stop_times.stop_lat, stop_times.stop_lon, stop_times.stop_name,\n",
+ " F.first(\"stop_lat\").over(w).alias(\"stop_lat_first\"),\n",
+ " F.first(\"stop_lon\").over(w).alias(\"stop_lon_first\"),\n",
+ " F.first(\"stop_name\").over(w).alias(\"stop_name_first\"))\\\n",
+ ".select(F.col('stop_id_general'), F.col('stop_int'), F.col('stop_lat_first'), F.col(\"stop_lon_first\"), F.col(\"stop_name_first\"))\\\n",
".dropDuplicates()\\\n",
- ".sort(F.col('route_int').cast('int'))\\\n",
- ".show(100)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Verifying a few routes and trips on real data"
+ "\n",
+ "stop_coordinates.show()"
]
},
{
"cell_type": "code",
- "execution_count": 108,
+ "execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1407"
+ ]
}
],
"source": [
- "stop_times = spark.read.csv('data/lgpt_guys/stop_times_final_cyril.csv', header=True)"
+ "stop_coordinates.count()"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "+------------+---------------+-----------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+--------------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "|route_id |stop_id_general|trip_id |stop_id|arrival_time|departure_time|stop_sequence|stop_name |stop_lat |stop_lon |trip_headsign |trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|\n",
- "+------------+---------------+-----------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+--------------------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "|26-660-j19-1|8576167 |486.TA.26-660-j19-1.9.R|8576167|07:16:00 |07:16:00 |1 |Nürensdorf, Chrüzstrass |47.4542439013472|8.63462447846447|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |1198 |Bus |\n",
- "|26-660-j19-1|8576168 |486.TA.26-660-j19-1.9.R|8576168|07:17:00 |07:17:00 |2 |Birchwil (Nürensdorf) |47.4577182388675|8.63876571192484|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |641 |Bus |\n",
- "|26-660-j19-1|8576169 |486.TA.26-660-j19-1.9.R|8576169|07:19:00 |07:19:00 |3 |Nürensdorf, Oberwil |47.4641014319557|8.63995148810004|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |861 |Bus |\n",
- "|26-660-j19-1|8576172 |486.TA.26-660-j19-1.9.R|8576172|07:22:00 |07:22:00 |4 |Breite b. N'dorf,Grünenwaldstr|47.461927224864 |8.66128647610085|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |43 |Bus |\n",
- "|26-660-j19-1|8576174 |486.TA.26-660-j19-1.9.R|8576174|07:24:00 |07:24:00 |5 |Brütten, Hofacher |47.470969635094 |8.67059302244562|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |92 |Bus |\n",
- "|26-660-j19-1|8506960 |486.TA.26-660-j19-1.9.R|8506960|07:26:00 |07:26:00 |6 |Brütten, Zentrum |47.472038346399 |8.67648597071027|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |1059 |Bus |\n",
- "|26-660-j19-1|8576176 |486.TA.26-660-j19-1.9.R|8576176|07:27:00 |07:27:00 |7 |Brütten, Harossen |47.4714675601105|8.68249569996186|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |777 |Bus |\n",
- "|26-660-j19-1|8591835 |486.TA.26-660-j19-1.9.R|8591835|07:27:00 |07:27:00 |8 |Brütten, Steighof |47.469621570521 |8.68646625351823|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |1126 |Bus |\n",
- "+------------+---------------+-----------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+--------------------------+---------------+------------+--------------------+---------+----------+--------+----------+"
+ "+-----------------+----------+----------------+----------------+--------------------+\n",
+ "|stop_id_general_2|stop_int_2|stop_lat_first_2|stop_lon_first_2| stop_name_first_2|\n",
+ "+-----------------+----------+----------------+----------------+--------------------+\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...|\n",
+ "| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU|\n",
+ "| 8503376| 8|47.4353132339136| 8.7169371079598|Ottikon b. Kemptthal|\n",
+ "| 8587967| 7|47.2955835709855|8.60393802835468|Erlenbach ZH, Im ...|\n",
+ "| 8589111| 5| 47.260856991692|8.59230484542371|Horgen, Gumelenst...|\n",
+ "| 8590819| 6|47.2821844204798|8.57300004996529| Thalwil, Mettli|\n",
+ "| 8591190| 4|47.3694098744442|8.50635403902719| Zürich, Heuried|\n",
+ "| 8591284| 3|47.3909246273101|8.47396977303017| Zürich, Neeserweg|\n",
+ "| 8588312| 9|47.4329251530902|8.68904441838401|Effretikon, Kapel...|\n",
+ "| 8590541| 10|47.4165824252901|8.61689173475348|Dietlikon, Dornen...|\n",
+ "| 8590804| 14|47.3972672788911|8.44748743845064| Schlieren, Zentrum|\n",
+ "| 8591149| 12|47.3642868894025|8.50799795599736| Zürich, Friesenberg|\n",
+ "| 8591315| 13|47.3510268782417|8.58298033277325| Zürich, Rehalp|\n",
+ "| 8591362| 11|47.3859132103277|8.54836824487131|Zürich, Seilbahn ...|\n",
+ "| 8580432| 21|47.4439106426415|8.57842587428213| Kloten, Bramen|\n",
+ "| 8590273| 16|47.4263923172796|8.36551616876333|Spreitenbach, Fur...|\n",
+ "| 8590477| 15|47.3254528526241|8.53104872619107| Adliswil, Moos|\n",
+ "| 8591053| 22|47.4058952214384|8.53778609082291|Zürich, Bad Allen...|\n",
+ "| 8591080| 18| 47.347588102752|8.53434554328425|Zürich Wollishofe...|\n",
+ "+-----------------+----------+----------------+----------------+--------------------+\n",
+ "only showing top 20 rows"
]
}
],
"source": [
- "stop_times.where(stop_times.route_int==500).show(100, 0)"
+ "stop_coordinates_for_join = stop_coordinates.select(stop_coordinates.stop_id_general.alias('stop_id_general_2'),\n",
+ " stop_coordinates.stop_int.alias('stop_int_2'),\n",
+ " stop_coordinates.stop_lat_first.alias('stop_lat_first_2'),\n",
+ " stop_coordinates.stop_lon_first.alias('stop_lon_first_2'),\n",
+ " stop_coordinates.stop_name_first.alias('stop_name_first_2'))\n",
+ "stop_coordinates_for_join.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Validated on sbb.ch"
+ "We perform a cross-join (every possible combination of row gets created), then drop every row where the stop_id is the same."
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "+-----------+---------------+------------------------+-----------+------------+--------------+-------------+------------------+----------------+----------------+-------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "|route_id |stop_id_general|trip_id |stop_id |arrival_time|departure_time|stop_sequence|stop_name |stop_lat |stop_lon |trip_headsign|trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|\n",
- "+-----------+---------------+------------------------+-----------+------------+--------------+-------------+------------------+----------------+----------------+-------------+---------------+------------+--------------------+---------+----------+--------+----------+\n",
- "|26-24-j19-1|8503016 |494.TA.26-24-j19-1.239.H|8503016:0:4|07:02:00 |07:04:00 |7 |Zürich Flughafen |47.4505627968247|8.56238196330552|Zug |20425 |0 |07:04:00 |200 |12 |1218 |S-Bahn |\n",
- "|26-24-j19-1|8503006 |494.TA.26-24-j19-1.239.H|8503006:0:3|07:08:00 |07:09:00 |8 |Zürich Oerlikon |47.412017224139 |8.54411023042399|Zug |20425 |0 |07:04:00 |200 |12 |617 |S-Bahn |\n",
- "|26-24-j19-1|8503015 |494.TA.26-24-j19-1.239.H|8503015:0:2|07:11:00 |07:11:00 |9 |Zürich Wipkingen |47.3931504194084|8.52935989345669|Zug |20425 |0 |07:04:00 |200 |12 |661 |S-Bahn |\n",
- "|26-24-j19-1|8503000 |494.TA.26-24-j19-1.239.H|8503000:0:4|07:16:00 |07:21:00 |10 |Zürich HB |47.3793319609979|8.54019357578468|Zug |20425 |0 |07:04:00 |200 |12 |1176 |S-Bahn |\n",
- "|26-24-j19-1|8503011 |494.TA.26-24-j19-1.239.H|8503011:0:1|07:24:00 |07:24:00 |11 |Zürich Wiedikon |47.3715939887424|8.52345796203921|Zug |20425 |0 |07:04:00 |200 |12 |724 |S-Bahn |\n",
- "|26-24-j19-1|8503010 |494.TA.26-24-j19-1.239.H|8503010:0:1|07:26:00 |07:27:00 |12 |Zürich Enge |47.3642199587519|8.53080618106433|Zug |20425 |0 |07:04:00 |200 |12 |1138 |S-Bahn |\n",
- "|26-24-j19-1|8503009 |494.TA.26-24-j19-1.239.H|8503009:0:4|07:29:00 |07:29:00 |13 |Zürich Wollishofen|47.3475028913112|8.53359095844549|Zug |20425 |0 |07:04:00 |200 |12 |921 |S-Bahn |\n",
- "|26-24-j19-1|8503200 |494.TA.26-24-j19-1.239.H|8503200:0:1|07:32:00 |07:32:00 |14 |Kilchberg |47.3244907694466|8.54799993560477|Zug |20425 |0 |07:04:00 |200 |12 |1009 |S-Bahn |\n",
- "|26-24-j19-1|8503201 |494.TA.26-24-j19-1.239.H|8503201:0:1|07:35:00 |07:35:00 |15 |Rüschlikon |47.3071946406515|8.55515052526735|Zug |20425 |0 |07:04:00 |200 |12 |754 |S-Bahn |\n",
- "|26-24-j19-1|8503202 |494.TA.26-24-j19-1.239.H|8503202:0:4|07:37:00 |07:38:00 |16 |Thalwil |47.2961562648419|8.56475351565593|Zug |20425 |0 |07:04:00 |200 |12 |415 |S-Bahn |\n",
- "|26-24-j19-1|8502209 |494.TA.26-24-j19-1.239.H|8502209:0:2|07:40:00 |07:40:00 |17 |Oberrieden Dorf |47.2767848038458|8.577635356832 |Zug |20425 |0 |07:04:00 |200 |12 |854 |S-Bahn |\n",
- "|26-24-j19-1|8502208 |494.TA.26-24-j19-1.239.H|8502208:0:3|07:43:00 |07:43:00 |18 |Horgen Oberdorf |47.2588085210892|8.58979854578067|Zug |20425 |0 |07:04:00 |200 |12 |163 |S-Bahn |\n",
- "|26-24-j19-1|8503016 |496.TA.26-24-j19-1.240.H|8503016:0:4|07:02:00 |07:04:00 |7 |Zürich Flughafen |47.4505627968247|8.56238196330552|Zug |20425 |0 |07:04:00 |200 |12 |1218 |S-Bahn |\n",
- "|26-24-j19-1|8503006 |496.TA.26-24-j19-1.240.H|8503006:0:3|07:08:00 |07:09:00 |8 |Zürich Oerlikon |47.412017224139 |8.54411023042399|Zug |20425 |0 |07:04:00 |200 |12 |617 |S-Bahn |\n",
- "|26-24-j19-1|8503015 |496.TA.26-24-j19-1.240.H|8503015:0:2|07:11:00 |07:11:00 |9 |Zürich Wipkingen |47.3931504194084|8.52935989345669|Zug |20425 |0 |07:04:00 |200 |12 |661 |S-Bahn |\n",
- "|26-24-j19-1|8503000 |496.TA.26-24-j19-1.240.H|8503000:0:4|07:16:00 |07:21:00 |10 |Zürich HB |47.3793319609979|8.54019357578468|Zug |20425 |0 |07:04:00 |200 |12 |1176 |S-Bahn |\n",
- "|26-24-j19-1|8503011 |496.TA.26-24-j19-1.240.H|8503011:0:1|07:24:00 |07:24:00 |11 |Zürich Wiedikon |47.3715939887424|8.52345796203921|Zug |20425 |0 |07:04:00 |200 |12 |724 |S-Bahn |\n",
- "|26-24-j19-1|8503010 |496.TA.26-24-j19-1.240.H|8503010:0:1|07:26:00 |07:27:00 |12 |Zürich Enge |47.3642199587519|8.53080618106433|Zug |20425 |0 |07:04:00 |200 |12 |1138 |S-Bahn |\n",
- "|26-24-j19-1|8503009 |496.TA.26-24-j19-1.240.H|8503009:0:4|07:29:00 |07:29:00 |13 |Zürich Wollishofen|47.3475028913112|8.53359095844549|Zug |20425 |0 |07:04:00 |200 |12 |921 |S-Bahn |\n",
- "|26-24-j19-1|8503200 |496.TA.26-24-j19-1.240.H|8503200:0:1|07:32:00 |07:32:00 |14 |Kilchberg |47.3244907694466|8.54799993560477|Zug |20425 |0 |07:04:00 |200 |12 |1009 |S-Bahn |\n",
- "|26-24-j19-1|8503201 |496.TA.26-24-j19-1.240.H|8503201:0:1|07:35:00 |07:35:00 |15 |Rüschlikon |47.3071946406515|8.55515052526735|Zug |20425 |0 |07:04:00 |200 |12 |754 |S-Bahn |\n",
- "|26-24-j19-1|8503202 |496.TA.26-24-j19-1.240.H|8503202:0:4|07:38:00 |07:38:00 |16 |Thalwil |47.2961562648419|8.56475351565593|Zug |20425 |0 |07:04:00 |200 |12 |415 |S-Bahn |\n",
- "|26-24-j19-1|8502209 |496.TA.26-24-j19-1.240.H|8502209:0:2|07:40:00 |07:40:00 |17 |Oberrieden Dorf |47.2767848038458|8.577635356832 |Zug |20425 |0 |07:04:00 |200 |12 |854 |S-Bahn |\n",
- "|26-24-j19-1|8502208 |496.TA.26-24-j19-1.240.H|8502208:0:3|07:44:00 |07:45:00 |18 |Horgen Oberdorf |47.2588085210892|8.58979854578067|Zug |20425 |0 |07:04:00 |200 |12 |163 |S-Bahn |\n",
- "+-----------+---------------+------------------------+-----------+------------+--------------+-------------+------------------+----------------+----------------+-------------+---------------+------------+--------------------+---------+----------+--------+----------+"
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+\n",
+ "|stop_id_general|stop_int| stop_lat_first| stop_lon_first| stop_name_first|stop_id_general_2|stop_int_2|stop_lat_first_2|stop_lon_first_2| stop_name_first_2|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503376| 8|47.4353132339136| 8.7169371079598|Ottikon b. Kemptthal|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8587967| 7|47.2955835709855|8.60393802835468|Erlenbach ZH, Im ...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8589111| 5| 47.260856991692|8.59230484542371|Horgen, Gumelenst...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590819| 6|47.2821844204798|8.57300004996529| Thalwil, Mettli|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591190| 4|47.3694098744442|8.50635403902719| Zürich, Heuried|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591284| 3|47.3909246273101|8.47396977303017| Zürich, Neeserweg|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8588312| 9|47.4329251530902|8.68904441838401|Effretikon, Kapel...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590541| 10|47.4165824252901|8.61689173475348|Dietlikon, Dornen...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590804| 14|47.3972672788911|8.44748743845064| Schlieren, Zentrum|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591149| 12|47.3642868894025|8.50799795599736| Zürich, Friesenberg|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591315| 13|47.3510268782417|8.58298033277325| Zürich, Rehalp|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591362| 11|47.3859132103277|8.54836824487131|Zürich, Seilbahn ...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8580432| 21|47.4439106426415|8.57842587428213| Kloten, Bramen|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590273| 16|47.4263923172796|8.36551616876333|Spreitenbach, Fur...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590477| 15|47.3254528526241|8.53104872619107| Adliswil, Moos|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591053| 22|47.4058952214384|8.53778609082291|Zürich, Bad Allen...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591080| 18| 47.347588102752|8.53434554328425|Zürich Wollishofe...|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591085| 17|47.4274011616099|8.54614042296638|Zürich, Birch-/Gl...|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+\n",
+ "only showing top 20 rows"
+ ]
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross = stop_coordinates.crossJoin(stop_coordinates_for_join)\\\n",
+ ".filter(F.col('stop_id_general')!=F.col('stop_id_general_2'))\n",
+ "stop_coordinates_cross.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we compute the distance in meters, as well as the time in seconds it takes to walk this distance"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+---------+\n",
+ "|stop_id_general|stop_int| stop_lat_first| stop_lon_first| stop_name_first|stop_id_general_2|stop_int_2|stop_lat_first_2|stop_lon_first_2| stop_name_first_2| distance|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+---------+\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg|18.017555|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU|12.902226|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503376| 8|47.4353132339136| 8.7169371079598|Ottikon b. Kemptthal| 25.65447|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8587967| 7|47.2955835709855|8.60393802835468|Erlenbach ZH, Im ...|21.665598|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8589111| 5| 47.260856991692|8.59230484542371|Horgen, Gumelenst...|23.627174|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+---------+\n",
+ "only showing top 5 rows"
+ ]
+ }
+ ],
+ "source": [
+ "# adding distance\n",
+ "stop_coordinates_cross_distance = stop_coordinates_cross.withColumn(\"distance\", \\\n",
+ " great_circle_udf(struct(stop_coordinates_cross.stop_lat_first, stop_coordinates_cross.stop_lon_first), \\\n",
+ " struct(stop_coordinates_cross.stop_lat_first_2, stop_coordinates_cross.stop_lon_first_2)))\n",
+ "stop_coordinates_cross_distance.show(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+----------+------------+\n",
+ "|stop_id_general|stop_int| stop_lat_first| stop_lon_first| stop_name_first|stop_id_general_2|stop_int_2|stop_lat_first_2|stop_lon_first_2| stop_name_first_2| distance|walking_time|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+----------+------------+\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg| 18.017555| 21621|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 12.902226| 15482|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8503376| 8|47.4353132339136| 8.7169371079598|Ottikon b. Kemptthal| 25.65447| 30785|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8587967| 7|47.2955835709855|8.60393802835468|Erlenbach ZH, Im ...| 21.665598| 25998|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8589111| 5| 47.260856991692|8.59230484542371|Horgen, Gumelenst...| 23.627174| 28352|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590819| 6|47.2821844204798|8.57300004996529| Thalwil, Mettli| 20.909445| 25091|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591190| 4|47.3694098744442|8.50635403902719| Zürich, Heuried| 10.988478| 13186|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591284| 3|47.3909246273101|8.47396977303017| Zürich, Neeserweg| 7.7776713| 9333|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8588312| 9|47.4329251530902|8.68904441838401|Effretikon, Kapel...| 23.541742| 28250|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590541| 10|47.4165824252901|8.61689173475348|Dietlikon, Dornen...| 18.036524| 21643|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590804| 14|47.3972672788911|8.44748743845064| Schlieren, Zentrum| 5.663664| 6796|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591149| 12|47.3642868894025|8.50799795599736| Zürich, Friesenberg| 11.372528| 13647|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591315| 13|47.3510268782417|8.58298033277325| Zürich, Rehalp| 17.069794| 20483|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591362| 11|47.3859132103277|8.54836824487131|Zürich, Seilbahn ...|13.2958765| 15955|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8580432| 21|47.4439106426415|8.57842587428213| Kloten, Bramen| 15.46525| 18558|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590273| 16|47.4263923172796|8.36551616876333|Spreitenbach, Fur...| 1.5007852| 1800|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590477| 15|47.3254528526241|8.53104872619107| Adliswil, Moos| 15.309975| 18371|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591053| 22|47.4058952214384|8.53778609082291|Zürich, Bad Allen...|12.1317835| 14558|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591080| 18| 47.347588102752|8.53434554328425|Zürich Wollishofe...| 14.033888| 16840|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8591085| 17|47.4274011616099|8.54614042296638|Zürich, Birch-/Gl...| 12.780608| 15336|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+----------+------------+\n",
+ "only showing top 20 rows"
+ ]
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross_distance_time = stop_coordinates_cross_distance.withColumn('walking_time', \n",
+ " (F.col('distance')/0.05*60).cast('int'))\n",
+ "stop_coordinates_cross_distance_time.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Removing pairs of stops more than 500 meters (0.5 km) away"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+-----------+------------+\n",
+ "|stop_id_general|stop_int| stop_lat_first| stop_lon_first| stop_name_first|stop_id_general_2|stop_int_2|stop_lat_first_2|stop_lon_first_2| stop_name_first_2| distance|walking_time|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+-----------+------------+\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590268| 815|47.4142117310803| 8.3795209040447| Spreitenbach, ASP| 0.22296342| 267|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590270| 1350|47.4179500849385|8.37208285349115| Spreitenbach, Brüel| 0.4742755| 569|\n",
+ "| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg| 8591903| 63|47.3483489130904|8.59604183700617|Zollikerberg, Spital| 0.39212397| 470|\n",
+ "| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg| 8591023| 242|47.3468211948325|8.59815287792414|Zollikerb., Langä...| 0.41439477| 497|\n",
+ "| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg| 8590879| 551|47.3453908209261|8.59330197538922| Waldburg, Station|0.023021813| 27|\n",
+ "| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg| 8503077| 705|47.3473202949034|8.59679642184493| Spital Zollikerberg| 0.35051093| 420|\n",
+ "| 8503078| 1|47.3454760357765| 8.5930234976511| Waldburg| 8576189| 1001|47.3457012457904|8.58848700546567|Zollikon, Rebwies...| 0.34270898| 411|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591327| 48|47.3743498590472|8.54323886459827|Zürich, Rudolf-Br...| 0.46496668| 557|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8588078| 272|47.3768440104406|8.54393955051998| Zürich, Central| 0.36639458| 439|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591316| 373|47.3730662375955|8.53845982728609| Zürich, Rennweg| 0.4953369| 594|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591367| 461|47.3760957774096|8.53420181283877| Zürich, Sihlpost/HB| 0.40512198| 486|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8587349| 602|47.3775618175159|8.54173867807358|Zürich, Bahnhofqu...| 0.19359568| 232|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591067| 616|47.3765581015114|8.53994204750509|Zürich, Bahnhofst...| 0.11930997| 143|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8503446| 785|47.3788453295376|8.54171172861506| Zürich Landesmuseum| 0.24329051| 291|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 0.42122737| 505|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591368| 880|47.3798733332196|8.53760642776606| Zürich, Sihlquai/HB| 0.28947487| 347|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591174| 947|47.3796482690385| 8.5445144723019| Zürich, Haldenegg| 0.46829802| 561|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591379| 1033| 47.379958492389|8.54280767326183|Zürich, Stampfenb...| 0.38740817| 464|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8503000| 1173|47.3782978637762|8.54019357578468| Zürich HB| 0.1179737| 141|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8587348| 1196|47.3772394138519|8.53934017626465|Zürich, Bahnhofpl...|0.031180955| 37|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+-----------+------------+\n",
+ "only showing top 20 rows"
+ ]
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross_distance_time_filtered = stop_coordinates_cross_distance_time.filter(F.col('distance')<=0.5)\n",
+ "stop_coordinates_cross_distance_time_filtered.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Sorting by stop_int, then stop_int 2 to sort it as `transfers` in RAPTOR."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross_distance_time_filtered_sorted = stop_coordinates_cross_distance_time_filtered.sort(F.col('stop_int').cast('int'), F.col('stop_int_2').cast('int'))\\\n",
+ ".withColumn('monotonically_increasing_id', F.monotonically_increasing_id())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross_distance_time_filtered_sorted.write.csv('data/lgpt_guys/transfers_cyril.csv', header=True, mode='overwrite')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross_distance_time_filtered_sorted = spark.read.csv('data/lgpt_guys/transfers_cyril.csv', header=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "6264"
+ ]
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross_distance_time_filtered_sorted.count()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+----------+------------+---------------------------+\n",
+ "|stop_id_general|stop_int| stop_lat_first| stop_lon_first| stop_name_first|stop_id_general_2|stop_int_2|stop_lat_first_2|stop_lon_first_2| stop_name_first_2| distance|walking_time|monotonically_increasing_id|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+----------+------------+---------------------------+\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590268| 815|47.4142117310803| 8.3795209040447| Spreitenbach, ASP|0.22296342| 267| 0|\n",
+ "| 8502508| 0|47.4154457211288|8.37718528430566|Spreitenbach, Rai...| 8590270| 1350|47.4179500849385|8.37208285349115| Spreitenbach, Brüel| 0.4742755| 569| 1|\n",
+ "| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU| 8591367| 461|47.3760957774096|8.53420181283877| Zürich, Sihlpost/HB|0.40512198| 486| 10|\n",
+ "| 8590474| 819|47.3129444045737|8.52782377432064| Adliswil, Krone| 8595717| 1241|47.3129504950936|8.53005159622556|Adliswil, Grundst...|0.16795586| 201| 1005022347264|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8503088| 2|47.3774949037101|8.53916949636064| Zürich HB SZU|0.42122737| 505| 1005022347265|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8591282| 138|47.3821239221899|8.53493843137185|Zürich, Museum fü...| 0.2132881| 255| 1005022347266|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8591071| 543|47.3840824646745|8.54044510406427| Zürich, Beckenhof| 0.4105776| 492| 1005022347267|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8503446| 785|47.3788453295376|8.54171172861506| Zürich Landesmuseum| 0.413811| 496| 1005022347268|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8591368| 880|47.3798733332196|8.53760642776606| Zürich, Sihlquai/HB|0.13454837| 161| 1005022347269|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8591379| 1033| 47.379958492389|8.54280767326183|Zürich, Stampfenb...|0.43250278| 519| 1005022347270|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8503000| 1173|47.3782978637762|8.54019357578468| Zürich HB|0.37702298| 452| 1005022347271|\n",
+ "| 8596126| 820|47.3810655490734|8.53730100056942| Zürich, Bus Station| 8587348| 1196|47.3772394138519|8.53934017626465|Zürich, Bahnhofpl...| 0.4523052| 542| 1005022347272|\n",
+ "| 8591354| 821|47.4218644344265|8.54977859986757| Zürich, Seebach| 8591048| 160|47.4245143567597| 8.548556891081|Zürich, Ausserdor...| 0.3086605| 370| 1005022347273|\n",
+ "| 8591354| 821|47.4218644344265|8.54977859986757| Zürich, Seebach| 8591355| 273| 47.419864748766|8.54836824487131|Zürich, Seebacher...|0.24637553| 295| 1005022347274|\n",
+ "| 8591354| 821|47.4218644344265|8.54977859986757| Zürich, Seebach| 8591187| 940|47.4228794433749|8.54349039287786|Zürich, Hertenste...|0.48635942| 583| 1005022347275|\n",
+ "| 8591354| 821|47.4218644344265|8.54977859986757| Zürich, Seebach| 8591124| 1250|47.4254624617068|8.55363237243698| Zürich, Ettenfeld| 0.4940879| 592| 1005022347276|\n",
+ "| 8591407| 822|47.4245872885157|8.51184274541392|Zürich, Unteraffo...| 8503008| 211|47.4210317478027|8.50856389462643| Zürich Affoltern|0.46600202| 559| 1005022347277|\n",
+ "| 8591407| 822|47.4245872885157|8.51184274541392|Zürich, Unteraffo...| 8591153| 465|47.4219616816404|8.51004611484544| Zürich, Fronwald|0.32172477| 386| 1005022347278|\n",
+ "| 8591062| 824|47.4125400235641|8.54190935797759|Zürich Oerlikon, ...| 8591272| 82|47.4140476021462|8.54133443619567|Zürich, Max-Bill-...| 0.1731274| 207| 1005022347279|\n",
+ "| 8591062| 824|47.4125400235641|8.54190935797759|Zürich Oerlikon, ...| 8591063| 357| 47.413336370854|8.54584397892258|Zürich Oerlikon, ...| 0.3090261| 370| 1005022347280|\n",
+ "+---------------+--------+----------------+----------------+--------------------+-----------------+----------+----------------+----------------+--------------------+----------+------------+---------------------------+\n",
+ "only showing top 20 rows"
+ ]
+ }
+ ],
+ "source": [
+ "stop_coordinates_cross_distance_time_filtered_sorted.sort(F.col('monotonically_increasing_id')).show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Building lookup tables to pretty-print results after RAPTOR:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "stop_times = spark.read.csv('data/lgpt_guys/stop_times_final_cyril.csv', header=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# lookup table for routes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------+--------------+-------------+--------------------+\n",
+ "|route_int| route_id| route_desc| trip_headsign|\n",
+ "+---------+--------------+-------------+--------------------+\n",
+ "| 0| 26-10-j19-1| Tram|Zürich Flughafen,...|\n",
+ "| 1| 26-13-j19-1| Tram| Zürich, Albisgütli|\n",
+ "| 2| 26-11-A-j19-1| Tram| Zürich, Rehalp|\n",
+ "| 3| 26-304-j19-1| Bus| Dietikon, Bahnhof|\n",
+ "| 4| 26-70-A-j19-1| Bus|Zürich, Mittellei...|\n",
+ "| 5| 26-61-j19-1| Bus|Zürich, Schwamend...|\n",
+ "| 6| 26-703-j19-1| Bus| Zürich, Klusplatz|\n",
+ "| 7| 26-652-j19-1| Bus| Effretikon, Bahnhof|\n",
+ "| 8| 26-83-j19-1| Bus|Zürich Altstetten...|\n",
+ "| 9| 26-749-j19-1| Bus| Dietlikon, Bahnhof|\n",
+ "| 10| 26-24-j19-1| S-Bahn| Weinfelden|\n",
+ "| 11| 26-640-j19-1| Bus| Neschwil, Post|\n",
+ "| 12| 26-725-j19-1| Bus| Volketswil, Dorf|\n",
+ "| 13| 79-736-j19-1| Bus|Zürich Flughafen,...|\n",
+ "| 14| 26-24-j19-1| S-Bahn| Weinfelden|\n",
+ "| 15|79-373-2-j19-1| Schiff| Küsnacht ZSG|\n",
+ "| 16| 26-658-j19-1| Bus| Effretikon, Bahnhof|\n",
+ "| 17| 26-449-j19-1| Bus|Buchs-Dällikon, B...|\n",
+ "| 18| 26-303-j19-1| Bus| Killwangen, Bahnhof|\n",
+ "| 19| 26-17-j19-1| Tram|Zürich, Bahnhofpl...|\n",
+ "| 20| 26-7-A-j19-1| S-Bahn| Winterthur|\n",
+ "| 21| 26-24-j19-1| S-Bahn| Zug|\n",
+ "| 22| 26-720-j19-1| Bus| Effretikon, Bahnhof|\n",
+ "| 23| 26-145-j19-1| Bus| Thalwil, Zentrum|\n",
+ "| 24| 26-185-j19-1| Bus|Zürich Wollishofe...|\n",
+ "| 25| 26-14-j19-1| S-Bahn| Hinwil|\n",
+ "| 26| 26-8-A-j19-1| S-Bahn| Pfäffikon SZ|\n",
+ "| 27| 1-322-j19-1| Bus|Baden, Postautost...|\n",
+ "| 28|26-962-A-j19-1| Bus|Erlenbach ZH, Bah...|\n",
+ "| 29| 26-301-j19-1| Bus| Dietikon, Bahnhof|\n",
+ "| 30| 26-533-j19-1| Taxi|Niederhasli, Nass...|\n",
+ "| 31| 26-78-j19-1| Bus| Zürich, Bändliweg|\n",
+ "| 32| 26-4-j19-1| Tram|Zürich Altstetten...|\n",
+ "| 33| 26-89-j19-1| Bus| Zürich, Heizenholz|\n",
+ "| 34| 26-15-A-j19-1| Tram|Zürich, Bucheggplatz|\n",
+ "| 35| 26-25-A-j19-1|Standseilbahn| Zürich, Dolder|\n",
+ "| 36| 26-919-j19-1| Bus|Zumikon, Dorfzentrum|\n",
+ "| 37| 26-302-j19-1| Bus| Urdorf, Oberurdorf|\n",
+ "| 38| 1-444-j19-1| Bus|Bremgarten AG, Ob...|\n",
+ "| 39| 26-8-C-j19-1| Tram| Zürich, Klusplatz|\n",
+ "| 40| 79-18-A-j19-1| Bus| Forch, Bahnhof|\n",
+ "| 41| 26-18-j19-1| S-Bahn|Zürich Stadelhofe...|\n",
+ "| 42| 26-2-j19-1| S-Bahn| Ziegelbrücke|\n",
+ "| 43| 26-8-A-j19-1| S-Bahn| Pfäffikon SZ|\n",
+ "| 44| 26-726-j19-1| Bus|Schwerzenbach ZH,...|\n",
+ "| 44| 26-721-j19-1| Bus|Schwerzenbach ZH,...|\n",
+ "| 45| 26-456-j19-1| Bus|Regensdorf-Watt, ...|\n",
+ "| 46| 26-311-j19-1| Bus| Dietikon, Bahnhof|\n",
+ "| 47| 26-772-j19-1| Bus|Wallisellen, Schu...|\n",
+ "| 48| 26-17-j19-1| Tram|Zürich Wiedikon, ...|\n",
+ "| 49| 26-21-j19-1| S-Bahn| Regensdorf-Watt|\n",
+ "| 50| 26-7-A-j19-1| S-Bahn| Winterthur|\n",
+ "| 51| 26-845-j19-1| Bus|Gossau ZH, Mittel...|\n",
+ "| 51| 26-845-j19-1| Bus| Grüningen, Adler|\n",
+ "| 52| 26-768-j19-1| Bus|Zürich Flughafen,...|\n",
+ "| 53| 26-13-j19-1| Tram| Zürich, Frankental|\n",
+ "| 54| 26-2-A-j19-1| Tram|Zürich Tiefenbrun...|\n",
+ "| 55| 26-36-j19-1| InterRegio| Basel SBB|\n",
+ "| 55| 26-19-j19-1| S-Bahn| Koblenz|\n",
+ "| 55| 26-19-j19-1| S-Bahn| Dietikon|\n",
+ "| 56| 1-445-j19-1| Bus|Zürich Enge, Bahn...|\n",
+ "| 57| 26-451-j19-1| Bus|Adlikon b. R., Le...|\n",
+ "| 58| 26-2-A-j19-1| Tram|Zürich Tiefenbrun...|\n",
+ "| 59| 26-304-j19-1| Bus| Dietikon, Bahnhof|\n",
+ "| 60| 26-14-A-j19-1| Tram| Zürich, Seebach|\n",
+ "| 61| 26-703-j19-1| Bus| Zürich, Klusplatz|\n",
+ "| 62| 26-10-B-j19-1| S-Bahn| Uetliberg|\n",
+ "| 63| 26-787-j19-1| Bus|Brüttisellen, Ob....|\n",
+ "| 64| 26-743-j19-1| Bus| Stettbach, Bahnhof|\n",
+ "| 65| 26-771-j19-1| Bus|Wallisellen, Bahnhof|\n",
+ "| 66| 26-38-j19-1| Bus| Zürich, Waidspital|\n",
+ "| 67| 26-14-A-j19-1| Tram| Zürich, Triemli|\n",
+ "| 68|79-373-2-j19-1| Schiff| Thalwil ZSG|\n",
+ "| 69| 26-845-j19-1| Bus| Uster, Bahnhof|\n",
+ "| 70| 26-452-j19-1| Bus|Regensdorf, Moosä...|\n",
+ "| 71| 26-813-j19-1| Bus| Uster, Bahnhof|\n",
+ "| 72| 26-31-j19-1| Bus| Zürich, Farbhof|\n",
+ "| 73| 26-6-A-j19-1| S-Bahn| Baden|\n",
+ "| 74| 26-14-A-j19-1| Tram| Zürich, Seebach|\n",
+ "| 75| 26-811-j19-1| Bus| Uster, Bahnhof|\n",
+ "| 76| 26-652-j19-1| Bus| Effretikon, Bahnhof|\n",
+ "| 77| 26-726-j19-1| Bus| Volketswil, Dorf|\n",
+ "| 78| 26-165-j19-1| Bus|Rüschlikon, Park ...|\n",
+ "| 79| 26-31-j19-1| Bus|Zürich, Kienasten...|\n",
+ "| 80| 26-75-A-j19-1| Bus|Zürich, Schwamend...|\n",
+ "| 81| 26-10-B-j19-1| S-Bahn| Zürich Selnau|\n",
+ "| 82| 79-10-B-j19-1| S-Bahn| Zürich Triemli|\n",
+ "| 83| 26-787-j19-1| Bus|Zürich Oerlikon, ...|\n",
+ "| 84| 26-35-B-j19-1| Bus| Zürich, Solidapark|\n",
+ "| 85| 26-89-j19-1| Bus|Zürich Altstetten...|\n",
+ "| 86| 26-6-B-j19-1| Tram| Zürich, Zoo|\n",
+ "| 87| 26-40-j19-1| Bus| Zürich, Seebach|\n",
+ "| 88| 26-19-j19-1| S-Bahn| Zürich HB|\n",
+ "| 88|80-160-Y-j19-1| RegioExpress| Zürich HB|\n",
+ "| 89| 26-24-j19-1| S-Bahn| Schaffhausen|\n",
+ "| 90| 26-24-j19-1| S-Bahn| Schaffhausen|\n",
+ "| 91| 26-31-j19-1| Bus|Zürich Altstetten...|\n",
+ "| 92| 26-3-A-j19-1| Tram| Zürich, Klusplatz|\n",
+ "| 93| 26-40-j19-1| Bus|Zürich, Glaubtens...|\n",
+ "| 94| 26-24-j19-1| S-Bahn| Zug|\n",
+ "+---------+--------------+-------------+--------------------+\n",
+ "only showing top 100 rows"
+ ]
+ }
+ ],
+ "source": [
+ "stop_times.select(stop_times.route_int, stop_times.route_id, stop_times.route_desc, stop_times.trip_headsign)\\\n",
+ ".dropDuplicates()\\\n",
+ ".sort(F.col('route_int').cast('int'))\\\n",
+ ".show(100)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Verifying a few routes and trips on real data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+---------+------------+--------------+---------------------------+\n",
+ "|route_int|arrival_time|departure_time|monotonically_increasing_id|\n",
+ "+---------+------------+--------------+---------------------------+\n",
+ "| 0| 07:00:00| 07:00:00| 0|\n",
+ "| 0| 07:01:00| 07:01:00| 1|\n",
+ "| 0| 07:02:00| 07:02:00| 2|\n",
+ "| 0| 07:03:00| 07:03:00| 3|\n",
+ "| 0| 07:05:00| 07:05:00| 4|\n",
+ "| 0| 07:06:00| 07:06:00| 5|\n",
+ "| 0| 07:08:00| 07:08:00| 6|\n",
+ "| 0| 07:09:00| 07:09:00| 7|\n",
+ "| 0| 07:10:00| 07:10:00| 8|\n",
+ "| 0| 07:11:00| 07:11:00| 9|\n",
+ "| 0| 07:12:00| 07:12:00| 10|\n",
+ "| 0| 07:14:00| 07:14:00| 11|\n",
+ "| 0| 07:16:00| 07:16:00| 12|\n",
+ "| 0| 07:18:00| 07:18:00| 13|\n",
+ "| 0| 07:20:00| 07:20:00| 14|\n",
+ "| 0| 07:22:00| 07:22:00| 15|\n",
+ "| 0| 07:23:00| 07:23:00| 16|\n",
+ "| 0| 07:24:00| 07:24:00| 17|\n",
+ "| 0| 07:25:00| 07:25:00| 18|\n",
+ "| 0| 07:27:00| 07:27:00| 19|\n",
+ "+---------+------------+--------------+---------------------------+\n",
+ "only showing top 20 rows"
+ ]
+ }
+ ],
+ "source": [
+ "stop_times = spark.read.csv('data/lgpt_guys/stop_times_final_cyril.csv', header=True)\n",
+ "stop_times.sort(stop_times.route_int.cast('int'), \n",
+ " stop_times.departure_first_stop, \n",
+ " stop_times.trip_id, \n",
+ " stop_times.stop_sequence.cast('int'))\\\n",
+ ".select(stop_times.route_int, stop_times.arrival_time, stop_times.departure_time, stop_times.monotonically_increasing_id)\\\n",
+ ".show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Casting spark's `monotonically_increasing_id` as `LongType` (and not `IntegerType`) is paramount to make sure the sort happens as expected. Otherwise, there are not enough bytes to represent the index as an integer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|route_id |stop_id_general|trip_id |stop_id|arrival_time|departure_time|stop_sequence|stop_name |stop_lat |stop_lon |trip_headsign |trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|monotonically_increasing_id|\n",
+ "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|26-13-j19-1 |8576240 |2064.TA.26-13-j19-1.24.H|8576240|07:00:00 |07:00:00 |5 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1221 |Tram |0 |\n",
+ "|26-13-j19-1 |8591353 |2064.TA.26-13-j19-1.24.H|8591353|07:01:00 |07:01:00 |6 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |816 |Tram |1 |\n",
+ "|26-13-j19-1 |8591039 |2064.TA.26-13-j19-1.24.H|8591039|07:02:00 |07:02:00 |7 |Zürich, Alte Trotte |47.3977659017765|8.50725235431143|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |776 |Tram |2 |\n",
+ "|26-13-j19-1 |8591121 |2064.TA.26-13-j19-1.24.H|8591121|07:03:00 |07:03:00 |8 |Zürich, Eschergutweg |47.3962700189648|8.51204037477646|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |307 |Tram |3 |\n",
+ "|26-13-j19-1 |8591417 |2064.TA.26-13-j19-1.24.H|8591417|07:05:00 |07:05:00 |9 |Zürich, Waidfussweg |47.3954977376399|8.51840044698891|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |347 |Tram |4 |\n",
+ "|26-13-j19-1 |8591437 |2064.TA.26-13-j19-1.24.H|8591437|07:06:00 |07:06:00 |10 |Zürich, Wipkingerplatz |47.3925909395293|8.52357474302616|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1015 |Tram |5 |\n",
+ "|26-13-j19-1 |8580522 |2064.TA.26-13-j19-1.24.H|8580522|07:08:00 |07:08:00 |11 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |455 |Tram |6 |\n",
+ "|26-13-j19-1 |8591110 |2064.TA.26-13-j19-1.24.H|8591110|07:09:00 |07:09:00 |12 |Zürich, Dammweg |47.3884919601296|8.52639545301869|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1102 |Tram |7 |\n",
+ "|26-13-j19-1 |8591306 |2064.TA.26-13-j19-1.24.H|8591306|07:10:00 |07:10:00 |13 |Zürich, Quellenstrasse |47.3867403702341|8.52874903906341|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |786 |Tram |8 |\n",
+ "|26-13-j19-1 |8591257 |2064.TA.26-13-j19-1.24.H|8591257|07:11:00 |07:11:00 |14 |Zürich, Limmatplatz |47.3845994590919|8.53162364797299|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |388 |Tram |9 |\n",
+ "|26-13-j19-1 |8591282 |2064.TA.26-13-j19-1.24.H|8591282|07:12:00 |07:12:00 |15 |Zürich, Museum für Gestaltung |47.3821239221899|8.53493843137185|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |138 |Tram |10 |\n",
+ "|26-13-j19-1 |8591368 |2064.TA.26-13-j19-1.24.H|8591368|07:14:00 |07:14:00 |16 |Zürich, Sihlquai/HB |47.3798733332196|8.53760642776606|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |880 |Tram |11 |\n",
+ "|26-13-j19-1 |8587349 |2064.TA.26-13-j19-1.24.H|8587349|07:16:00 |07:16:00 |17 |Zürich, Bahnhofquai/HB |47.3775618175159|8.54173867807358|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |602 |Tram |12 |\n",
+ "|26-13-j19-1 |8591067 |2064.TA.26-13-j19-1.24.H|8591067|07:18:00 |07:18:00 |18 |Zürich, Bahnhofstrasse/HB |47.3765581015114|8.53994204750509|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |616 |Tram |13 |\n",
+ "|26-13-j19-1 |8591316 |2064.TA.26-13-j19-1.24.H|8591316|07:20:00 |07:20:00 |19 |Zürich, Rennweg |47.3730662375955|8.53845982728609|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |373 |Tram |14 |\n",
+ "|26-13-j19-1 |8591299 |2064.TA.26-13-j19-1.24.H|8591299|07:22:00 |07:22:00 |20 |Zürich, Paradeplatz |47.3693672863583|8.53876525448273|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1223 |Tram |15 |\n",
+ "|26-13-j19-1 |8591384 |2064.TA.26-13-j19-1.24.H|8591384|07:23:00 |07:23:00 |21 |Zürich, Stockerstrasse |47.3677002399791|8.53501029659459|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |871 |Tram |16 |\n",
+ "|26-13-j19-1 |8591404 |2064.TA.26-13-j19-1.24.H|8591404|07:24:00 |07:24:00 |22 |Zürich, Tunnelstrasse |47.3661426599847|8.53253094641008|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |890 |Tram |17 |\n",
+ "|26-13-j19-1 |8591059 |2064.TA.26-13-j19-1.24.H|8591059|07:25:00 |07:25:00 |23 |Zürich Enge, Bahnhof/Bederstr.|47.3645546111557|8.53045583810347|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |177 |Tram |18 |\n",
+ "|26-13-j19-1 |8591415 |2064.TA.26-13-j19-1.24.H|8591415|07:27:00 |07:27:00 |24 |Zürich, Waffenplatzstrasse |47.3614818138862|8.52574866601403|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1265 |Tram |19 |\n",
+ "|26-13-j19-1 |8591366 |2064.TA.26-13-j19-1.24.H|8591366|07:28:00 |07:28:00 |25 |Zürich, Sihlcity Nord |47.3600640074787|8.52303575385561|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |968 |Tram |20 |\n",
+ "|26-13-j19-1 |8591329 |2064.TA.26-13-j19-1.24.H|8591329|07:29:00 |07:29:00 |26 |Zürich, Saalsporthalle |47.3578611597087|8.52040369007277|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1238 |Tram |21 |\n",
+ "|26-13-j19-1 |8591245 |2064.TA.26-13-j19-1.24.H|8591245|07:30:00 |07:30:00 |27 |Zürich, Laubegg |47.3587313564196|8.51708890667391|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1216 |Tram |22 |\n",
+ "|26-13-j19-1 |8591405 |2064.TA.26-13-j19-1.24.H|8591405|07:32:00 |07:32:00 |28 |Zürich, Uetlihof |47.3567353594536|8.51396276948474|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |847 |Tram |23 |\n",
+ "|26-13-j19-1 |8591385 |2064.TA.26-13-j19-1.24.H|8591385|07:33:00 |07:33:00 |29 |Zürich, Strassenverkehrsamt |47.3530717783138|8.51171698127413|Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |275 |Tram |24 |\n",
+ "|26-13-j19-1 |8591034 |2064.TA.26-13-j19-1.24.H|8591034|07:34:00 |07:34:00 |30 |Zürich, Albisgütli |47.3519945640447|8.5077104951064 |Zürich, Albisgütli |1831 |0 |07:00:00 |0 |26 |1352 |Tram |25 |\n",
+ "|26-11-A-j19-1|8591049 |791.TA.26-11-A-j19-1.3.H|8591049|19:49:00 |19:49:00 |1 |Zürich, Auzelg |47.4166918393693|8.568113214819 |Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |1117 |Tram |26 |\n",
+ "|26-11-A-j19-1|8591128 |791.TA.26-11-A-j19-1.3.H|8591128|19:51:00 |19:51:00 |2 |Zürich, Fernsehstudio |47.4181749855684|8.56174415945371|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |156 |Tram |27 |\n",
+ "|26-11-A-j19-1|8591830 |791.TA.26-11-A-j19-1.3.H|8591830|19:52:00 |19:52:00 |3 |Glattpark, Glattpark |47.4199559214972|8.55716275150406|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |671 |Tram |28 |\n",
+ "|26-11-A-j19-1|8591294 |791.TA.26-11-A-j19-1.3.H|8591294|19:53:00 |19:53:00 |4 |Zürich, Oerlikerhus |47.4175853791724|8.5542072942189 |Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |573 |Tram |29 |\n",
+ "|26-11-A-j19-1|8591256 |791.TA.26-11-A-j19-1.3.H|8591256|19:54:00 |19:54:00 |5 |Zürich, Leutschenbach |47.4146433269471|8.55130573585079|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |444 |Tram |30 |\n",
+ "|26-11-A-j19-1|8591273 |791.TA.26-11-A-j19-1.3.H|8591273|19:55:00 |19:55:00 |6 |Zürich, Messe/Hallenstadion |47.4106919651348|8.55068589830466|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |389 |Tram |31 |\n",
+ "|26-11-A-j19-1|8591382 |791.TA.26-11-A-j19-1.3.H|8591382|19:57:00 |19:57:00 |7 |Zürich, Sternen Oerlikon |47.4100718783688|8.54623025449481|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |687 |Tram |32 |\n",
+ "|26-11-A-j19-1|8580449 |791.TA.26-11-A-j19-1.3.H|8580449|19:59:00 |19:59:00 |8 |Zürich Oerlikon, Bahnhof |47.411494419524 |8.54479295004002|Zürich, Rehalp |363 |0 |19:49:00 |1 |8 |766 |Tram |33 |\n",
+ "|26-304-j19-1 |8591057 |159.TA.26-304-j19-1.4.R |8591057|19:39:00 |19:39:00 |1 |Zürich Altstetten, Bahnhof N |47.392067942097 |8.48990588617267|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |521 |Bus |34 |\n",
+ "|26-304-j19-1 |8591402 |159.TA.26-304-j19-1.4.R |8591402|19:41:00 |19:41:00 |2 |Zürich, Tüffenwies |47.3979787271809|8.49434356367684|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |1000 |Bus |35 |\n",
+ "|26-304-j19-1 |8591434 |159.TA.26-304-j19-1.4.R |8591434|19:41:00 |19:41:00 |3 |Zürich, Winzerhalde |47.4000582901792|8.4945681424979 |Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |711 |Bus |36 |\n",
+ "|26-304-j19-1 |8591197 |159.TA.26-304-j19-1.4.R |8591197|19:42:00 |19:42:00 |4 |Zürich, Hohenklingensteig |47.4013473348052|8.49021131336931|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |1125 |Bus |37 |\n",
+ "|26-304-j19-1 |8591436 |159.TA.26-304-j19-1.4.R |8591436|19:43:00 |19:43:00 |5 |Zürich, Winzerstrasse Süd |47.403372044054 |8.486123978826 |Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |149 |Bus |38 |\n",
+ "|26-304-j19-1 |8591136 |159.TA.26-304-j19-1.4.R |8591136|19:46:00 |19:46:00 |6 |Zürich, Frankental |47.4057006674825|8.48137189097235|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |690 |Bus |39 |\n",
+ "|26-304-j19-1 |8590725 |159.TA.26-304-j19-1.4.R |8590725|19:47:00 |19:47:00 |7 |Oberengstringen, Eggbühl |47.4055243523393|8.47408655401713|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |1329 |Bus |40 |\n",
+ "|26-304-j19-1 |8590726 |159.TA.26-304-j19-1.4.R |8590726|19:48:00 |19:48:00 |8 |Oberengstringen, Lanzrain |47.407342193939 |8.46795106062573|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |763 |Bus |41 |\n",
+ "|26-304-j19-1 |8590728 |159.TA.26-304-j19-1.4.R |8590728|19:49:00 |19:49:00 |9 |Oberengstringen, Zentrum |47.4091295756792|8.46260608468448|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |376 |Bus |42 |\n",
+ "|26-304-j19-1 |8590727 |159.TA.26-304-j19-1.4.R |8590727|19:50:00 |19:50:00 |10 |Oberengstringen, Paradies |47.4104852703573|8.45874332896223|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |740 |Bus |43 |\n",
+ "|26-304-j19-1 |8590833 |159.TA.26-304-j19-1.4.R |8590833|19:51:00 |19:51:00 |11 |Unterengstringen, Langacher |47.4122360710415|8.45316479104707|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |764 |Bus |44 |\n",
+ "|26-304-j19-1 |8594732 |159.TA.26-304-j19-1.4.R |8594732|19:53:00 |19:53:00 |12 |Unterengstringen, Sennenbüel N|47.4134944230824|8.44931101847766|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |987 |Bus |45 |\n",
+ "|26-304-j19-1 |8590831 |159.TA.26-304-j19-1.4.R |8590831|19:53:00 |19:53:00 |13 |Unterengstringen, Aegelsee |47.414977659342 |8.44603216769017|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |502 |Bus |46 |\n",
+ "|26-304-j19-1 |8590911 |159.TA.26-304-j19-1.4.R |8590911|19:55:00 |19:55:00 |14 |Weiningen ZH, Ausserdorf |47.4176826342903|8.43953734818508|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |754 |Bus |47 |\n",
+ "|26-304-j19-1 |8590913 |159.TA.26-304-j19-1.4.R |8590913|19:56:00 |19:56:00 |15 |Weiningen ZH, Lindenplatz |47.4195547602987|8.43394084396424|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |934 |Bus |48 |\n",
+ "|26-304-j19-1 |8590914 |159.TA.26-304-j19-1.4.R |8590914|19:57:00 |19:57:00 |16 |Weiningen ZH, Schulhaus |47.4183512583635|8.42866773324572|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |622 |Bus |49 |\n",
+ "|26-304-j19-1 |8590617 |159.TA.26-304-j19-1.4.R |8590617|19:59:00 |19:59:00 |17 |Geroldswil, Welbrig |47.4180716529658|8.41906474285715|Dietikon, Bahnhof |5481 |1 |19:39:00 |2 |17 |859 |Bus |50 |\n",
+ "|26-61-j19-1 |8591281 |269.TA.26-61-j19-1.1.H |8591281|19:57:00 |19:57:00 |1 |Zürich, Mühlacker |47.4256326325821|8.49799970688372|Zürich, Schwamendingerplatz|2076 |0 |19:57:00 |3 |2 |212 |Bus |51 |\n",
+ "|26-61-j19-1 |8591046 |269.TA.26-61-j19-1.1.H |8591046|19:58:00 |19:58:00 |2 |Zürich, Aspholz |47.425085652811 |8.50058685490234|Zürich, Schwamendingerplatz|2076 |0 |19:57:00 |3 |2 |1003 |Bus |52 |\n",
+ "|26-703-j19-1 |8591825 |179.TA.26-703-j19-1.2.R |8591825|07:10:00 |07:10:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |580 |Bus |53 |\n",
+ "|26-703-j19-1 |8590504 |179.TA.26-703-j19-1.2.R |8590504|07:11:00 |07:11:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |861 |Bus |54 |\n",
+ "|26-703-j19-1 |8596005 |179.TA.26-703-j19-1.2.R |8596005|07:14:00 |07:14:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1366 |Bus |55 |\n",
+ "|26-703-j19-1 |8591832 |179.TA.26-703-j19-1.2.R |8591832|07:14:00 |07:14:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1023 |Bus |56 |\n",
+ "|26-703-j19-1 |8591147 |179.TA.26-703-j19-1.2.R |8591147|07:16:00 |07:16:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1260 |Bus |57 |\n",
+ "|26-703-j19-1 |8591162 |179.TA.26-703-j19-1.2.R |8591162|07:17:00 |07:17:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |146 |Bus |58 |\n",
+ "|26-703-j19-1 |8591261 |179.TA.26-703-j19-1.2.R |8591261|07:18:00 |07:18:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1197 |Bus |59 |\n",
+ "|26-703-j19-1 |8591107 |179.TA.26-703-j19-1.2.R |8591107|07:19:00 |07:19:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1311 |Bus |60 |\n",
+ "|26-703-j19-1 |8591233 |179.TA.26-703-j19-1.2.R |8591233|07:25:00 |07:25:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9385 |1 |07:10:00 |4 |9 |1133 |Bus |61 |\n",
+ "|26-703-j19-1 |8591825 |171.TA.26-703-j19-1.2.R |8591825|07:12:00 |07:12:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |580 |Bus |62 |\n",
+ "|26-703-j19-1 |8590504 |171.TA.26-703-j19-1.2.R |8590504|07:13:00 |07:13:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |861 |Bus |63 |\n",
+ "|26-703-j19-1 |8596005 |171.TA.26-703-j19-1.2.R |8596005|07:16:00 |07:16:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1366 |Bus |64 |\n",
+ "|26-703-j19-1 |8591832 |171.TA.26-703-j19-1.2.R |8591832|07:16:00 |07:16:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1023 |Bus |65 |\n",
+ "|26-703-j19-1 |8591147 |171.TA.26-703-j19-1.2.R |8591147|07:18:00 |07:18:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1260 |Bus |66 |\n",
+ "|26-703-j19-1 |8591162 |171.TA.26-703-j19-1.2.R |8591162|07:19:00 |07:19:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |146 |Bus |67 |\n",
+ "|26-703-j19-1 |8591261 |171.TA.26-703-j19-1.2.R |8591261|07:20:00 |07:20:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1197 |Bus |68 |\n",
+ "|26-703-j19-1 |8591107 |171.TA.26-703-j19-1.2.R |8591107|07:21:00 |07:21:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1311 |Bus |69 |\n",
+ "|26-703-j19-1 |8591233 |171.TA.26-703-j19-1.2.R |8591233|07:27:00 |07:27:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9346 |1 |07:12:00 |4 |9 |1133 |Bus |70 |\n",
+ "|26-703-j19-1 |8591825 |156.TA.26-703-j19-1.2.R |8591825|07:25:00 |07:25:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |580 |Bus |71 |\n",
+ "|26-703-j19-1 |8590504 |156.TA.26-703-j19-1.2.R |8590504|07:26:00 |07:26:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |861 |Bus |72 |\n",
+ "|26-703-j19-1 |8596005 |156.TA.26-703-j19-1.2.R |8596005|07:29:00 |07:29:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1366 |Bus |73 |\n",
+ "|26-703-j19-1 |8591832 |156.TA.26-703-j19-1.2.R |8591832|07:29:00 |07:29:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1023 |Bus |74 |\n",
+ "|26-703-j19-1 |8591147 |156.TA.26-703-j19-1.2.R |8591147|07:31:00 |07:31:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1260 |Bus |75 |\n",
+ "|26-703-j19-1 |8591162 |156.TA.26-703-j19-1.2.R |8591162|07:32:00 |07:32:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |146 |Bus |76 |\n",
+ "|26-703-j19-1 |8591261 |156.TA.26-703-j19-1.2.R |8591261|07:33:00 |07:33:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1197 |Bus |77 |\n",
+ "|26-703-j19-1 |8591107 |156.TA.26-703-j19-1.2.R |8591107|07:34:00 |07:34:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1311 |Bus |78 |\n",
+ "|26-703-j19-1 |8591233 |156.TA.26-703-j19-1.2.R |8591233|07:40:00 |07:40:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9288 |1 |07:25:00 |4 |9 |1133 |Bus |79 |\n",
+ "|26-703-j19-1 |8591825 |144.TA.26-703-j19-1.2.R |8591825|07:27:00 |07:27:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |580 |Bus |80 |\n",
+ "|26-703-j19-1 |8590504 |144.TA.26-703-j19-1.2.R |8590504|07:28:00 |07:28:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |861 |Bus |81 |\n",
+ "|26-703-j19-1 |8596005 |144.TA.26-703-j19-1.2.R |8596005|07:31:00 |07:31:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1366 |Bus |82 |\n",
+ "|26-703-j19-1 |8591832 |144.TA.26-703-j19-1.2.R |8591832|07:31:00 |07:31:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1023 |Bus |83 |\n",
+ "|26-703-j19-1 |8591147 |144.TA.26-703-j19-1.2.R |8591147|07:33:00 |07:33:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1260 |Bus |84 |\n",
+ "|26-703-j19-1 |8591162 |144.TA.26-703-j19-1.2.R |8591162|07:34:00 |07:34:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |146 |Bus |85 |\n",
+ "|26-703-j19-1 |8591261 |144.TA.26-703-j19-1.2.R |8591261|07:35:00 |07:35:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1197 |Bus |86 |\n",
+ "|26-703-j19-1 |8591107 |144.TA.26-703-j19-1.2.R |8591107|07:36:00 |07:36:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1311 |Bus |87 |\n",
+ "|26-703-j19-1 |8591233 |144.TA.26-703-j19-1.2.R |8591233|07:42:00 |07:42:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9231 |1 |07:27:00 |4 |9 |1133 |Bus |88 |\n",
+ "|26-703-j19-1 |8591825 |120.TA.26-703-j19-1.2.R |8591825|07:40:00 |07:40:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |580 |Bus |89 |\n",
+ "|26-703-j19-1 |8590504 |120.TA.26-703-j19-1.2.R |8590504|07:41:00 |07:41:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |861 |Bus |90 |\n",
+ "|26-703-j19-1 |8596005 |120.TA.26-703-j19-1.2.R |8596005|07:44:00 |07:44:00 |3 |Binz bei Maur, Twäracher |47.3608915729295|8.623476385787 |Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1366 |Bus |91 |\n",
+ "|26-703-j19-1 |8591832 |120.TA.26-703-j19-1.2.R |8591832|07:44:00 |07:44:00 |4 |Pfaffhausen, Müseren |47.3626987847054|8.61754750491098|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1023 |Bus |92 |\n",
+ "|26-703-j19-1 |8591147 |120.TA.26-703-j19-1.2.R |8591147|07:46:00 |07:46:00 |5 |Zürich, Friedhof Witikon |47.3613418604422|8.60282411740221|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1260 |Bus |93 |\n",
+ "|26-703-j19-1 |8591162 |120.TA.26-703-j19-1.2.R |8591162|07:47:00 |07:47:00 |6 |Zürich, Glockenacker |47.3609767627537|8.59930272148798|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |146 |Bus |94 |\n",
+ "|26-703-j19-1 |8591261 |120.TA.26-703-j19-1.2.R |8591261|07:48:00 |07:48:00 |7 |Zürich, Loorenstrasse |47.3598631991991|8.59452368417579|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1197 |Bus |95 |\n",
+ "|26-703-j19-1 |8591107 |120.TA.26-703-j19-1.2.R |8591107|07:49:00 |07:49:00 |8 |Zürich, Carl-Spitteler-Strasse|47.3583236436636|8.58659156021591|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1311 |Bus |96 |\n",
+ "|26-703-j19-1 |8591233 |120.TA.26-703-j19-1.2.R |8591233|07:55:00 |07:55:00 |9 |Zürich, Klusplatz |47.3640374201824|8.56649624730736|Zürich, Klusplatz |9159 |1 |07:40:00 |4 |9 |1133 |Bus |97 |\n",
+ "|26-703-j19-1 |8591825 |95.TA.26-703-j19-1.2.R |8591825|07:42:00 |07:42:00 |1 |Benglen, Bodenacher |47.3611288870976|8.63861299832652|Zürich, Klusplatz |9119 |1 |07:42:00 |4 |9 |580 |Bus |98 |\n",
+ "|26-703-j19-1 |8590504 |95.TA.26-703-j19-1.2.R |8590504|07:43:00 |07:43:00 |2 |Benglen, Gerlisbrunnen |47.3610862923255|8.63360938219328|Zürich, Klusplatz |9119 |1 |07:42:00 |4 |9 |861 |Bus |99 |\n",
+ "+-------------+---------------+------------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+---------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "only showing top 100 rows"
+ ]
+ }
+ ],
+ "source": [
+ "from pyspark.sql.types import LongType\n",
+ "stop_times = spark.read.csv('data/lgpt_guys/stop_times_final_cyril.csv', header=True)\n",
+ "stop_times = stop_times.sort(stop_times.monotonically_increasing_id.cast(LongType()))\n",
+ "\n",
+ "stop_times.show(100, 0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+------------+---------------+-----------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+--------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|route_id |stop_id_general|trip_id |stop_id|arrival_time|departure_time|stop_sequence|stop_name |stop_lat |stop_lon |trip_headsign |trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|monotonically_increasing_id|\n",
+ "+------------+---------------+-----------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+--------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|26-660-j19-1|8576167 |486.TA.26-660-j19-1.9.R|8576167|07:16:00 |07:16:00 |1 |Nürensdorf, Chrüzstrass |47.4542439013472|8.63462447846447|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |1200 |Bus |532575945867 |\n",
+ "|26-660-j19-1|8576168 |486.TA.26-660-j19-1.9.R|8576168|07:17:00 |07:17:00 |2 |Birchwil (Nürensdorf) |47.4577182388675|8.63876571192484|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |646 |Bus |532575945868 |\n",
+ "|26-660-j19-1|8576169 |486.TA.26-660-j19-1.9.R|8576169|07:19:00 |07:19:00 |3 |Nürensdorf, Oberwil |47.4641014319557|8.63995148810004|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |865 |Bus |532575945869 |\n",
+ "|26-660-j19-1|8576172 |486.TA.26-660-j19-1.9.R|8576172|07:22:00 |07:22:00 |4 |Breite b. N'dorf,Grünenwaldstr|47.461927224864 |8.66128647610085|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |45 |Bus |532575945870 |\n",
+ "|26-660-j19-1|8576174 |486.TA.26-660-j19-1.9.R|8576174|07:24:00 |07:24:00 |5 |Brütten, Hofacher |47.470969635094 |8.67059302244562|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |94 |Bus |532575945871 |\n",
+ "|26-660-j19-1|8506960 |486.TA.26-660-j19-1.9.R|8506960|07:26:00 |07:26:00 |6 |Brütten, Zentrum |47.472038346399 |8.67648597071027|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |1060 |Bus |532575945872 |\n",
+ "|26-660-j19-1|8576176 |486.TA.26-660-j19-1.9.R|8576176|07:27:00 |07:27:00 |7 |Brütten, Harossen |47.4714675601105|8.68249569996186|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |778 |Bus |532575945873 |\n",
+ "|26-660-j19-1|8591835 |486.TA.26-660-j19-1.9.R|8591835|07:27:00 |07:27:00 |8 |Brütten, Steighof |47.469621570521 |8.68646625351823|Winterthur, Archstrasse/HB|2268 |1 |07:16:00 |500 |8 |1128 |Bus |532575945874 |\n",
+ "+------------+---------------+-----------------------+-------+------------+--------------+-------------+------------------------------+----------------+----------------+--------------------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+"
+ ]
+ }
+ ],
+ "source": [
+ "stop_times.where(stop_times.route_int==500).show(100, 0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Validated on sbb.ch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+-----------+---------------+---------------------+-------+------------+--------------+-------------+--------------------------+----------------+----------------+-----------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|route_id |stop_id_general|trip_id |stop_id|arrival_time|departure_time|stop_sequence|stop_name |stop_lat |stop_lon |trip_headsign |trip_short_name|direction_id|departure_first_stop|route_int|stop_count|stop_int|route_desc|monotonically_increasing_id|\n",
+ "+-----------+---------------+---------------------+-------+------------+--------------+-------------+--------------------------+----------------+----------------+-----------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+\n",
+ "|26-46-j19-1|8591328 |55.TA.26-46-j19-1.3.H|8591328|08:35:00 |08:35:00 |1 |Zürich, Rütihof |47.413451870606 |8.47731150588756|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |881 |Bus |970662609829 |\n",
+ "|26-46-j19-1|8591155 |55.TA.26-46-j19-1.3.H|8591155|08:36:00 |08:36:00 |2 |Zürich, Geeringstrasse |47.4144427264912|8.48043764307674|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |187 |Bus |970662609830 |\n",
+ "|26-46-j19-1|8576241 |55.TA.26-46-j19-1.3.H|8576241|08:38:00 |08:38:00 |3 |Zürich, Heizenholz |47.4122968616864|8.48390514007392|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |57 |Bus |970662609831 |\n",
+ "|26-46-j19-1|8591158 |55.TA.26-46-j19-1.3.H|8591158|08:38:00 |08:38:00 |4 |Zürich, Giblenstrasse |47.4107284405996|8.485953298922 |Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |704 |Bus |970662609832 |\n",
+ "|26-46-j19-1|8591358 |55.TA.26-46-j19-1.3.H|8591358|08:40:00 |08:40:00 |5 |Zürich, Segantinistrasse |47.4074455475966|8.48996876824257|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |1025 |Bus |970662609833 |\n",
+ "|26-46-j19-1|8591371 |55.TA.26-46-j19-1.3.H|8591371|08:40:00 |08:40:00 |6 |Zürich, Singlistrasse |47.4051109214132|8.49349016415681|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |879 |Bus |970662609834 |\n",
+ "|26-46-j19-1|8591431 |55.TA.26-46-j19-1.3.H|8591431|08:41:00 |08:41:00 |7 |Zürich, Wieslergasse |47.4040651698812|8.49596053118848|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |583 |Bus |970662609835 |\n",
+ "|26-46-j19-1|8576240 |55.TA.26-46-j19-1.3.H|8576240|08:43:00 |08:43:00 |8 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |1221 |Bus |970662609836 |\n",
+ "|26-46-j19-1|8591353 |55.TA.26-46-j19-1.3.H|8591353|08:44:00 |08:44:00 |9 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |816 |Bus |970662609837 |\n",
+ "|26-46-j19-1|8591226 |55.TA.26-46-j19-1.3.H|8591226|08:45:00 |08:45:00 |10 |Zürich, Kempfhofsteig |47.3973037636525|8.51002814853975|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |736 |Bus |970662609838 |\n",
+ "|26-46-j19-1|8591312 |55.TA.26-46-j19-1.3.H|8591312|08:46:00 |08:46:00 |11 |Zürich, Rebbergsteig |47.396902429888 |8.5149688826031 |Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |762 |Bus |970662609839 |\n",
+ "|26-46-j19-1|8591247 |55.TA.26-46-j19-1.3.H|8591247|08:47:00 |08:47:00 |12 |Zürich, Lehenstrasse |47.3962335334876|8.52015216179319|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |1206 |Bus |970662609840 |\n",
+ "|26-46-j19-1|8591323 |55.TA.26-46-j19-1.3.H|8591323|08:48:00 |08:48:00 |13 |Zürich, Rosengartenstrasse|47.394330172729 |8.52546120512307|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |1039 |Bus |970662609841 |\n",
+ "|26-46-j19-1|8580522 |55.TA.26-46-j19-1.3.H|8580522|08:49:00 |08:49:00 |14 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |455 |Bus |970662609842 |\n",
+ "|26-46-j19-1|8594239 |55.TA.26-46-j19-1.3.H|8594239|08:50:00 |08:50:00 |15 |Zürich, Schiffbau |47.3875735990751|8.51944249271863|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |832 |Bus |970662609843 |\n",
+ "|26-46-j19-1|8591060 |55.TA.26-46-j19-1.3.H|8591060|08:52:00 |08:52:00 |16 |Zürich Hardbrücke, Bahnhof|47.3849339821896|8.51703500775686|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |850 |Bus |970662609844 |\n",
+ "|26-46-j19-1|8591177 |55.TA.26-46-j19-1.3.H|8591177|08:52:00 |08:52:00 |17 |Zürich, Hardplatz |47.3823428932854|8.51452870811382|Zürich, Hardplatz|1106 |0 |08:35:00 |800 |17 |266 |Bus |970662609845 |\n",
+ "|26-46-j19-1|8591328 |54.TA.26-46-j19-1.3.H|8591328|18:35:00 |18:35:00 |1 |Zürich, Rütihof |47.413451870606 |8.47731150588756|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |881 |Bus |970662609846 |\n",
+ "|26-46-j19-1|8591155 |54.TA.26-46-j19-1.3.H|8591155|18:36:00 |18:36:00 |2 |Zürich, Geeringstrasse |47.4144427264912|8.48043764307674|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |187 |Bus |970662609847 |\n",
+ "|26-46-j19-1|8576241 |54.TA.26-46-j19-1.3.H|8576241|18:37:00 |18:37:00 |3 |Zürich, Heizenholz |47.4122968616864|8.48390514007392|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |57 |Bus |970662609848 |\n",
+ "|26-46-j19-1|8591158 |54.TA.26-46-j19-1.3.H|8591158|18:38:00 |18:38:00 |4 |Zürich, Giblenstrasse |47.4107284405996|8.485953298922 |Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |704 |Bus |970662609849 |\n",
+ "|26-46-j19-1|8591358 |54.TA.26-46-j19-1.3.H|8591358|18:39:00 |18:39:00 |5 |Zürich, Segantinistrasse |47.4074455475966|8.48996876824257|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |1025 |Bus |970662609850 |\n",
+ "|26-46-j19-1|8591371 |54.TA.26-46-j19-1.3.H|8591371|18:40:00 |18:40:00 |6 |Zürich, Singlistrasse |47.4051109214132|8.49349016415681|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |879 |Bus |970662609851 |\n",
+ "|26-46-j19-1|8591431 |54.TA.26-46-j19-1.3.H|8591431|18:41:00 |18:41:00 |7 |Zürich, Wieslergasse |47.4040651698812|8.49596053118848|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |583 |Bus |970662609852 |\n",
+ "|26-46-j19-1|8576240 |54.TA.26-46-j19-1.3.H|8576240|18:42:00 |18:42:00 |8 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |1221 |Bus |970662609853 |\n",
+ "|26-46-j19-1|8591353 |54.TA.26-46-j19-1.3.H|8591353|18:43:00 |18:43:00 |9 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |816 |Bus |970662609854 |\n",
+ "|26-46-j19-1|8591226 |54.TA.26-46-j19-1.3.H|8591226|18:45:00 |18:45:00 |10 |Zürich, Kempfhofsteig |47.3973037636525|8.51002814853975|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |736 |Bus |970662609855 |\n",
+ "|26-46-j19-1|8591312 |54.TA.26-46-j19-1.3.H|8591312|18:45:00 |18:45:00 |11 |Zürich, Rebbergsteig |47.396902429888 |8.5149688826031 |Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |762 |Bus |970662609856 |\n",
+ "|26-46-j19-1|8591247 |54.TA.26-46-j19-1.3.H|8591247|18:47:00 |18:47:00 |12 |Zürich, Lehenstrasse |47.3962335334876|8.52015216179319|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |1206 |Bus |970662609857 |\n",
+ "|26-46-j19-1|8591323 |54.TA.26-46-j19-1.3.H|8591323|18:48:00 |18:48:00 |13 |Zürich, Rosengartenstrasse|47.394330172729 |8.52546120512307|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |1039 |Bus |970662609858 |\n",
+ "|26-46-j19-1|8580522 |54.TA.26-46-j19-1.3.H|8580522|18:49:00 |18:49:00 |14 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |455 |Bus |970662609859 |\n",
+ "|26-46-j19-1|8594239 |54.TA.26-46-j19-1.3.H|8594239|18:50:00 |18:50:00 |15 |Zürich, Schiffbau |47.3875735990751|8.51944249271863|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |832 |Bus |970662609860 |\n",
+ "|26-46-j19-1|8591060 |54.TA.26-46-j19-1.3.H|8591060|18:51:00 |18:51:00 |16 |Zürich Hardbrücke, Bahnhof|47.3849339821896|8.51703500775686|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |850 |Bus |970662609861 |\n",
+ "|26-46-j19-1|8591177 |54.TA.26-46-j19-1.3.H|8591177|18:52:00 |18:52:00 |17 |Zürich, Hardplatz |47.3823428932854|8.51452870811382|Zürich, Hardplatz|531 |0 |18:35:00 |800 |17 |266 |Bus |970662609862 |\n",
+ "|26-46-j19-1|8591328 |53.TA.26-46-j19-1.3.H|8591328|18:42:00 |18:42:00 |1 |Zürich, Rütihof |47.413451870606 |8.47731150588756|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |881 |Bus |970662609863 |\n",
+ "|26-46-j19-1|8591155 |53.TA.26-46-j19-1.3.H|8591155|18:43:00 |18:43:00 |2 |Zürich, Geeringstrasse |47.4144427264912|8.48043764307674|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |187 |Bus |970662609864 |\n",
+ "|26-46-j19-1|8576241 |53.TA.26-46-j19-1.3.H|8576241|18:45:00 |18:45:00 |3 |Zürich, Heizenholz |47.4122968616864|8.48390514007392|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |57 |Bus |970662609865 |\n",
+ "|26-46-j19-1|8591158 |53.TA.26-46-j19-1.3.H|8591158|18:45:00 |18:45:00 |4 |Zürich, Giblenstrasse |47.4107284405996|8.485953298922 |Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |704 |Bus |970662609866 |\n",
+ "|26-46-j19-1|8591358 |53.TA.26-46-j19-1.3.H|8591358|18:47:00 |18:47:00 |5 |Zürich, Segantinistrasse |47.4074455475966|8.48996876824257|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |1025 |Bus |970662609867 |\n",
+ "|26-46-j19-1|8591371 |53.TA.26-46-j19-1.3.H|8591371|18:47:00 |18:47:00 |6 |Zürich, Singlistrasse |47.4051109214132|8.49349016415681|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |879 |Bus |970662609868 |\n",
+ "|26-46-j19-1|8591431 |53.TA.26-46-j19-1.3.H|8591431|18:48:00 |18:48:00 |7 |Zürich, Wieslergasse |47.4040651698812|8.49596053118848|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |583 |Bus |970662609869 |\n",
+ "|26-46-j19-1|8576240 |53.TA.26-46-j19-1.3.H|8576240|18:50:00 |18:50:00 |8 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |1221 |Bus |970662609870 |\n",
+ "|26-46-j19-1|8591353 |53.TA.26-46-j19-1.3.H|8591353|18:51:00 |18:51:00 |9 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |816 |Bus |970662609871 |\n",
+ "|26-46-j19-1|8591226 |53.TA.26-46-j19-1.3.H|8591226|18:52:00 |18:52:00 |10 |Zürich, Kempfhofsteig |47.3973037636525|8.51002814853975|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |736 |Bus |970662609872 |\n",
+ "|26-46-j19-1|8591312 |53.TA.26-46-j19-1.3.H|8591312|18:53:00 |18:53:00 |11 |Zürich, Rebbergsteig |47.396902429888 |8.5149688826031 |Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |762 |Bus |970662609873 |\n",
+ "|26-46-j19-1|8591247 |53.TA.26-46-j19-1.3.H|8591247|18:54:00 |18:54:00 |12 |Zürich, Lehenstrasse |47.3962335334876|8.52015216179319|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |1206 |Bus |970662609874 |\n",
+ "|26-46-j19-1|8591323 |53.TA.26-46-j19-1.3.H|8591323|18:55:00 |18:55:00 |13 |Zürich, Rosengartenstrasse|47.394330172729 |8.52546120512307|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |1039 |Bus |970662609875 |\n",
+ "|26-46-j19-1|8580522 |53.TA.26-46-j19-1.3.H|8580522|18:56:00 |18:56:00 |14 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |455 |Bus |970662609876 |\n",
+ "|26-46-j19-1|8594239 |53.TA.26-46-j19-1.3.H|8594239|18:57:00 |18:57:00 |15 |Zürich, Schiffbau |47.3875735990751|8.51944249271863|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |832 |Bus |970662609877 |\n",
+ "|26-46-j19-1|8591060 |53.TA.26-46-j19-1.3.H|8591060|18:59:00 |18:59:00 |16 |Zürich Hardbrücke, Bahnhof|47.3849339821896|8.51703500775686|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |850 |Bus |970662609878 |\n",
+ "|26-46-j19-1|8591177 |53.TA.26-46-j19-1.3.H|8591177|18:59:00 |18:59:00 |17 |Zürich, Hardplatz |47.3823428932854|8.51452870811382|Zürich, Hardplatz|525 |0 |18:42:00 |800 |17 |266 |Bus |970662609879 |\n",
+ "|26-46-j19-1|8591328 |52.TA.26-46-j19-1.3.H|8591328|18:57:00 |18:57:00 |1 |Zürich, Rütihof |47.413451870606 |8.47731150588756|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |881 |Bus |970662609880 |\n",
+ "|26-46-j19-1|8591155 |52.TA.26-46-j19-1.3.H|8591155|18:58:00 |18:58:00 |2 |Zürich, Geeringstrasse |47.4144427264912|8.48043764307674|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |187 |Bus |970662609881 |\n",
+ "|26-46-j19-1|8576241 |52.TA.26-46-j19-1.3.H|8576241|19:00:00 |19:00:00 |3 |Zürich, Heizenholz |47.4122968616864|8.48390514007392|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |57 |Bus |970662609882 |\n",
+ "|26-46-j19-1|8591158 |52.TA.26-46-j19-1.3.H|8591158|19:00:00 |19:00:00 |4 |Zürich, Giblenstrasse |47.4107284405996|8.485953298922 |Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |704 |Bus |970662609883 |\n",
+ "|26-46-j19-1|8591358 |52.TA.26-46-j19-1.3.H|8591358|19:02:00 |19:02:00 |5 |Zürich, Segantinistrasse |47.4074455475966|8.48996876824257|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |1025 |Bus |970662609884 |\n",
+ "|26-46-j19-1|8591371 |52.TA.26-46-j19-1.3.H|8591371|19:02:00 |19:02:00 |6 |Zürich, Singlistrasse |47.4051109214132|8.49349016415681|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |879 |Bus |970662609885 |\n",
+ "|26-46-j19-1|8591431 |52.TA.26-46-j19-1.3.H|8591431|19:03:00 |19:03:00 |7 |Zürich, Wieslergasse |47.4040651698812|8.49596053118848|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |583 |Bus |970662609886 |\n",
+ "|26-46-j19-1|8576240 |52.TA.26-46-j19-1.3.H|8576240|19:05:00 |19:05:00 |8 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |1221 |Bus |970662609887 |\n",
+ "|26-46-j19-1|8591353 |52.TA.26-46-j19-1.3.H|8591353|19:06:00 |19:06:00 |9 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |816 |Bus |970662609888 |\n",
+ "|26-46-j19-1|8591226 |52.TA.26-46-j19-1.3.H|8591226|19:07:00 |19:07:00 |10 |Zürich, Kempfhofsteig |47.3973037636525|8.51002814853975|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |736 |Bus |970662609889 |\n",
+ "|26-46-j19-1|8591312 |52.TA.26-46-j19-1.3.H|8591312|19:08:00 |19:08:00 |11 |Zürich, Rebbergsteig |47.396902429888 |8.5149688826031 |Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |762 |Bus |970662609890 |\n",
+ "|26-46-j19-1|8591247 |52.TA.26-46-j19-1.3.H|8591247|19:09:00 |19:09:00 |12 |Zürich, Lehenstrasse |47.3962335334876|8.52015216179319|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |1206 |Bus |970662609891 |\n",
+ "|26-46-j19-1|8591323 |52.TA.26-46-j19-1.3.H|8591323|19:10:00 |19:10:00 |13 |Zürich, Rosengartenstrasse|47.394330172729 |8.52546120512307|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |1039 |Bus |970662609892 |\n",
+ "|26-46-j19-1|8580522 |52.TA.26-46-j19-1.3.H|8580522|19:11:00 |19:11:00 |14 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |455 |Bus |970662609893 |\n",
+ "|26-46-j19-1|8594239 |52.TA.26-46-j19-1.3.H|8594239|19:12:00 |19:12:00 |15 |Zürich, Schiffbau |47.3875735990751|8.51944249271863|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |832 |Bus |970662609894 |\n",
+ "|26-46-j19-1|8591060 |52.TA.26-46-j19-1.3.H|8591060|19:14:00 |19:14:00 |16 |Zürich Hardbrücke, Bahnhof|47.3849339821896|8.51703500775686|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |850 |Bus |970662609895 |\n",
+ "|26-46-j19-1|8591177 |52.TA.26-46-j19-1.3.H|8591177|19:14:00 |19:14:00 |17 |Zürich, Hardplatz |47.3823428932854|8.51452870811382|Zürich, Hardplatz|514 |0 |18:57:00 |800 |17 |266 |Bus |970662609896 |\n",
+ "|26-46-j19-1|8591328 |51.TA.26-46-j19-1.3.H|8591328|19:05:00 |19:05:00 |1 |Zürich, Rütihof |47.413451870606 |8.47731150588756|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |881 |Bus |970662609897 |\n",
+ "|26-46-j19-1|8591155 |51.TA.26-46-j19-1.3.H|8591155|19:06:00 |19:06:00 |2 |Zürich, Geeringstrasse |47.4144427264912|8.48043764307674|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |187 |Bus |970662609898 |\n",
+ "|26-46-j19-1|8576241 |51.TA.26-46-j19-1.3.H|8576241|19:07:00 |19:07:00 |3 |Zürich, Heizenholz |47.4122968616864|8.48390514007392|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |57 |Bus |970662609899 |\n",
+ "|26-46-j19-1|8591158 |51.TA.26-46-j19-1.3.H|8591158|19:08:00 |19:08:00 |4 |Zürich, Giblenstrasse |47.4107284405996|8.485953298922 |Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |704 |Bus |970662609900 |\n",
+ "|26-46-j19-1|8591358 |51.TA.26-46-j19-1.3.H|8591358|19:09:00 |19:09:00 |5 |Zürich, Segantinistrasse |47.4074455475966|8.48996876824257|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |1025 |Bus |970662609901 |\n",
+ "|26-46-j19-1|8591371 |51.TA.26-46-j19-1.3.H|8591371|19:10:00 |19:10:00 |6 |Zürich, Singlistrasse |47.4051109214132|8.49349016415681|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |879 |Bus |970662609902 |\n",
+ "|26-46-j19-1|8591431 |51.TA.26-46-j19-1.3.H|8591431|19:11:00 |19:11:00 |7 |Zürich, Wieslergasse |47.4040651698812|8.49596053118848|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |583 |Bus |970662609903 |\n",
+ "|26-46-j19-1|8576240 |51.TA.26-46-j19-1.3.H|8576240|19:12:00 |19:12:00 |8 |Zürich, Meierhofplatz |47.4020100860391|8.49937412926861|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |1221 |Bus |970662609904 |\n",
+ "|26-46-j19-1|8591353 |51.TA.26-46-j19-1.3.H|8591353|19:13:00 |19:13:00 |9 |Zürich, Schwert |47.3997299435837|8.50461130737576|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |816 |Bus |970662609905 |\n",
+ "|26-46-j19-1|8591226 |51.TA.26-46-j19-1.3.H|8591226|19:15:00 |19:15:00 |10 |Zürich, Kempfhofsteig |47.3973037636525|8.51002814853975|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |736 |Bus |970662609906 |\n",
+ "|26-46-j19-1|8591312 |51.TA.26-46-j19-1.3.H|8591312|19:15:00 |19:15:00 |11 |Zürich, Rebbergsteig |47.396902429888 |8.5149688826031 |Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |762 |Bus |970662609907 |\n",
+ "|26-46-j19-1|8591247 |51.TA.26-46-j19-1.3.H|8591247|19:17:00 |19:17:00 |12 |Zürich, Lehenstrasse |47.3962335334876|8.52015216179319|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |1206 |Bus |970662609908 |\n",
+ "|26-46-j19-1|8591323 |51.TA.26-46-j19-1.3.H|8591323|19:18:00 |19:18:00 |13 |Zürich, Rosengartenstrasse|47.394330172729 |8.52546120512307|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |1039 |Bus |970662609909 |\n",
+ "|26-46-j19-1|8580522 |51.TA.26-46-j19-1.3.H|8580522|19:19:00 |19:19:00 |14 |Zürich, Escher-Wyss-Platz |47.3907969150758|8.5223979500038 |Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |455 |Bus |970662609910 |\n",
+ "|26-46-j19-1|8594239 |51.TA.26-46-j19-1.3.H|8594239|19:20:00 |19:20:00 |15 |Zürich, Schiffbau |47.3875735990751|8.51944249271863|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |832 |Bus |970662609911 |\n",
+ "|26-46-j19-1|8591060 |51.TA.26-46-j19-1.3.H|8591060|19:21:00 |19:21:00 |16 |Zürich Hardbrücke, Bahnhof|47.3849339821896|8.51703500775686|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |850 |Bus |970662609912 |\n",
+ "|26-46-j19-1|8591177 |51.TA.26-46-j19-1.3.H|8591177|19:22:00 |19:22:00 |17 |Zürich, Hardplatz |47.3823428932854|8.51452870811382|Zürich, Hardplatz|506 |0 |19:05:00 |800 |17 |266 |Bus |970662609913 |\n",
+ "+-----------+---------------+---------------------+-------+------------+--------------+-------------+--------------------------+----------------+----------------+-----------------+---------------+------------+--------------------+---------+----------+--------+----------+---------------------------+"
]
}
],
"source": [
- "stop_times.where(stop_times.route_int==200).show(100, 0)"
+ "stop_times.where(stop_times.route_int==800).show(100, 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Verified on sbb.ch. This one is interesting: the service stops in the middle of the day. That is also what is observed on sbb.ch."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "PySpark",
"language": "",
"name": "pysparkkernel"
},
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"mimetype": "text/x-python",
"name": "pyspark",
"pygments_lexer": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
diff --git a/notebooks/probabilities.ipynb b/notebooks/hdfs_get_distributions.ipynb
similarity index 95%
rename from notebooks/probabilities.ipynb
rename to notebooks/hdfs_get_distributions.ipynb
index faf0c2e..892cfea 100644
--- a/notebooks/probabilities.ipynb
+++ b/notebooks/hdfs_get_distributions.ipynb
@@ -1,2476 +1,2321 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Make distribution tables to calculate probabilities of transfer\n",
"\n",
"Any application without a proper name would be promptly killed.
"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Current session configs: {'conf': {'spark.app.name': 'lgptguys_final'}, 'kind': 'pyspark'} "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"ID YARN Application ID Kind State Spark UI Driver log Current session? 7272 application_1589299642358_1768 pyspark idle Link Link 7292 application_1589299642358_1788 pyspark busy Link Link 7326 application_1589299642358_1822 pyspark idle Link Link 7369 application_1589299642358_1865 pyspark idle Link Link 7388 application_1589299642358_1884 pyspark idle Link Link 7393 application_1589299642358_1889 pyspark idle Link Link 7398 application_1589299642358_1894 pyspark idle Link Link 7407 application_1589299642358_1903 pyspark idle Link Link 7412 application_1589299642358_1908 pyspark busy Link Link 7415 application_1589299642358_1911 pyspark idle Link Link 7418 application_1589299642358_1914 pyspark idle Link Link 7420 application_1589299642358_1916 pyspark busy Link Link 7421 application_1589299642358_1917 pyspark idle Link Link 7422 application_1589299642358_1918 pyspark busy Link Link 7423 application_1589299642358_1919 pyspark idle Link Link 7424 application_1589299642358_1920 pyspark idle Link Link 7426 application_1589299642358_1922 pyspark idle Link Link 7427 application_1589299642358_1923 pyspark idle Link Link 7428 application_1589299642358_1924 pyspark busy Link Link 7429 application_1589299642358_1925 pyspark idle Link Link 7431 application_1589299642358_1927 pyspark idle Link Link 7433 application_1589299642358_1929 pyspark idle Link Link 7434 application_1589299642358_1930 pyspark idle Link Link 7435 application_1589299642358_1931 pyspark busy Link Link 7437 application_1589299642358_1933 pyspark idle Link Link 7438 application_1589299642358_1934 pyspark idle Link Link 7440 application_1589299642358_1936 pyspark idle Link Link 7441 application_1589299642358_1937 pyspark idle Link Link 7443 application_1589299642358_1939 pyspark idle Link Link 7444 application_1589299642358_1940 pyspark idle Link Link 7445 application_1589299642358_1941 pyspark idle Link Link
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%configure\n",
"{\"conf\": {\n",
" \"spark.app.name\": \"lgptguys_final\"\n",
"}}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Start Spark"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting Spark application\n"
]
},
{
"data": {
"text/html": [
"\n",
"ID YARN Application ID Kind State Spark UI Driver log Current session? 7446 application_1589299642358_1942 pyspark idle Link Link ✔
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SparkSession available as 'spark'.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"An error was encountered:\n",
"unknown magic command '%spark'\n",
"UnknownMagic: unknown magic command '%spark'\n",
"\n"
]
}
],
"source": [
"# Initialization\n",
"%%spark"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"An error was encountered:\n",
"Variable named username not found.\n"
]
}
],
"source": [
"%%send_to_spark -i username -t str -n username"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import useful libraries "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from geopy.distance import great_circle\n",
"from pyspark.sql.functions import *\n",
"from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read TimeTable data for routes / trips "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------+---------------------------+-------+\n",
"|stop_id_raw|stop_name |stop_id|\n",
"+-----------+---------------------------+-------+\n",
"|8500926 |Oetwil a.d.L., Schweizäcker|8500926|\n",
"|8502186 |Dietikon Stoffelbach |8502186|\n",
"|8502186:0:1|Dietikon Stoffelbach |8502186|\n",
"|8502186:0:2|Dietikon Stoffelbach |8502186|\n",
"|8502186P |Dietikon Stoffelbach |8502186|\n",
"+-----------+---------------------------+-------+\n",
"only showing top 5 rows"
]
}
],
"source": [
"stops_15km = spark.read.csv('data/lgpt_guys/stops_15km.csv', header = True)\n",
"\n",
"# We use only first 7 characters of stop_id to remove special cases\n",
"stops_15km = stops_15km.select(col('stop_id').alias('stop_id_raw'), 'stop_name')\\\n",
" .withColumn('stop_id',col('stop_id_raw').substr(1, 7))\n",
"stops_15km.show(5, False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read the [SBB actual data](https://opentransportdata.swiss/en/dataset/istdaten) in ORC format"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sbb = spark.read.orc('/data/sbb/orc/istdaten')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
" |-- betriebstag: string (nullable = true)\n",
" |-- fahrt_bezeichner: string (nullable = true)\n",
" |-- betreiber_id: string (nullable = true)\n",
" |-- betreiber_abk: string (nullable = true)\n",
" |-- betreiber_name: string (nullable = true)\n",
" |-- produkt_id: string (nullable = true)\n",
" |-- linien_id: string (nullable = true)\n",
" |-- linien_text: string (nullable = true)\n",
" |-- umlauf_id: string (nullable = true)\n",
" |-- verkehrsmittel_text: string (nullable = true)\n",
" |-- zusatzfahrt_tf: string (nullable = true)\n",
" |-- faellt_aus_tf: string (nullable = true)\n",
" |-- bpuic: string (nullable = true)\n",
" |-- haltestellen_name: string (nullable = true)\n",
" |-- ankunftszeit: string (nullable = true)\n",
" |-- an_prognose: string (nullable = true)\n",
" |-- an_prognose_status: string (nullable = true)\n",
" |-- abfahrtszeit: string (nullable = true)\n",
" |-- ab_prognose: string (nullable = true)\n",
" |-- ab_prognose_status: string (nullable = true)\n",
" |-- durchfahrt_tf: string (nullable = true)"
]
}
],
"source": [
"sbb.printSchema()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Subset SBB data\n",
"\n",
"We take only stop_id in 15 km range from Zurich HB using `stop_id` field from _stops_15km_. We did not use only `geschaetz` prognose time as there was too few overlap between _timetable_ and _sbb_ datasets with only `geschaetz` arrival times. _To do next : Use only geschaetz when available_"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10848628\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+\n",
"|fahrt_bezeichner|haltestellen_name|ankunftszeit |abfahrtszeit |an_prognose |ab_prognose |stop_id|\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+\n",
"|85:11:10:002 |Zürich HB |03.09.2018 21:51| |03.09.2018 21:53:40| |8503000|\n",
"|85:11:11:001 |Zürich HB | |03.09.2018 06:09| |03.09.2018 06:10:22|8503000|\n",
"|85:11:12:001 |Zürich HB |03.09.2018 10:51| |03.09.2018 10:51:28| |8503000|\n",
"|85:11:1251:003 |Zürich HB |03.09.2018 07:00| |03.09.2018 07:00:01| |8503000|\n",
"|85:11:1252:001 |Zürich HB |03.09.2018 21:23|03.09.2018 21:36|03.09.2018 21:24:55|03.09.2018 21:36:57|8503000|\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+\n",
"only showing top 5 rows"
]
}
],
"source": [
"# Used to subset sbb table based on stop_id \n",
"l1_id = stops_15km.select('stop_id').collect()\n",
"l2_id = [item.stop_id for item in l1_id]\n",
"\n",
"# Used to subset sbb table based on stop_names \n",
"l1_name = stops_15km.select('stop_name').collect()\n",
"l2_name = [item.stop_name for item in l1_name]\n",
"\n",
"# Make the subset dataframe\n",
"sbb_filt = sbb.filter( ( sbb['bpuic'].isin(l2_id) | sbb['bpuic'].isin(l2_name) ) &\\\n",
" ((sbb.an_prognose_status == 'REAL') | \\\n",
" (sbb.an_prognose_status == 'GESCHAETZ') | \\\n",
" (sbb.ab_prognose_status == 'REAL') | \\\n",
" (sbb.ab_prognose_status == 'GESCHAETZ') ) ) \\\n",
" .select('fahrt_bezeichner','haltestellen_name', \\\n",
" 'ankunftszeit', 'abfahrtszeit', \\\n",
" 'an_prognose', 'ab_prognose', \\\n",
" col('bpuic').alias('stop_id'))\n",
"\n",
"print sbb_filt.count()\n",
"sbb_filt.show(5,False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Write subset table in HDFS for better performance during later usage"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# save\n",
"username = 'acoudray'\n",
"sbb_filt.write.format(\"orc\").save(\"/user/{}/sbb_filt_forDelays_GeschaetzAndReal.orc\".format(username))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Summary of tables writen in /user/{}/ :\n",
"- sbb_filt_forDelays_noGaeschetz.orc : table with all dates, < 15km, no GESCHAETZ, used 7-char trimmed stop_id in timetable data\n",
"- sbb_filt_forDelays2.orc : table with all dates, < 15km, only GESCHAETZ, used 7-char trimmed stop_id in timetable data\n",
"- sbb_filt_forDelays.orc : table with all dates, < 15km, only GESCHAETZ\n",
"- sbb_sub_forDelays.ord : Old to remove"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Measure Distributions of Delay Times per trip and station\n",
"\n",
"The goal of this chapter is to pre-compute probabilities for McRaptor implementation, which will be ultimately used to choose the best trip according to its time __and probability of success__. The goal is to create a distribution of arrival delays for each station / trip_id pair. \n",
"\n",
"We begin with a simple query of trip_id / station_id and build up to the full table generation made from correspondance tables between sbb and timetable trip_ids (they need to be translated first, which is done in `match_datasets.ipynb`.\n",
"\n",
"#### Simple task : returning the distribution for a given station / trip id\n",
"\n",
"Let's begin by exploring _sbb_ data and compute a distribution step by step for a given station / trip_id "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2133164"
]
}
],
"source": [
"# Load sbb data \n",
"username='acoudray'\n",
"sbb = spark.read.orc(\"/user/{}/sbb_filt_forDelays.orc\".format(username))\n",
"sbb.count()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------------------+\n",
"| haltestellen_name|\n",
"+-------------------+\n",
"|Winkel am Zürichsee|\n",
"| Zürich Flughafen|\n",
"| Kemptthal|\n",
"| Urdorf|\n",
"| Zürich Wiedikon|\n",
"+-------------------+\n",
"only showing top 5 rows"
]
}
],
"source": [
"sbb.select(\"haltestellen_name\").distinct().show(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here we show the first few lines of all unique stations. We pick one of them and show its first associated trip id."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+----------------+\n",
"|fahrt_bezeichner|\n",
"+----------------+\n",
"| 85:11:1507:002|\n",
"| 85:11:1509:003|\n",
"| 85:11:1510:003|\n",
"| 85:11:1511:003|\n",
"| 85:11:1512:003|\n",
"+----------------+\n",
"only showing top 5 rows"
]
}
],
"source": [
"stop=\"Zürich Flughafen\"\n",
"sbb.filter(sbb.haltestellen_name == stop).select(\"fahrt_bezeichner\").show(5)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+----------------+--------------+-------------------+-------------+-------------+-------+\n",
"| station| trip_id| arrival_true|DiffInSeconds|DiffInMinutes|weekday|\n",
"+----------------+--------------+-------------------+-------------+-------------+-------+\n",
"|Zürich Flughafen|85:11:1507:002|2018-05-06 06:49:24| 24| 0.0| Sun|\n",
"|Zürich Flughafen|85:11:1507:002|2018-05-05 06:49:15| 15| 0.0| Sat|\n",
"|Zürich Flughafen|85:11:1507:002|2018-05-04 06:50:38| 98| 2.0| Fri|\n",
"|Zürich Flughafen|85:11:1507:002|2018-05-03 06:50:11| 71| 1.0| Thu|\n",
"|Zürich Flughafen|85:11:1507:002|2018-05-02 06:49:30| 30| 1.0| Wed|\n",
"|Zürich Flughafen|85:11:1507:002|2018-05-01 06:49:38| 38| 1.0| Tue|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-30 06:49:59| 59| 1.0| Mon|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-29 06:49:16| 16| 0.0| Sun|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-28 06:49:37| 37| 1.0| Sat|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-27 06:50:00| 60| 1.0| Fri|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-26 06:49:58| 58| 1.0| Thu|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-25 06:49:44| 44| 1.0| Wed|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-24 06:50:10| 70| 1.0| Tue|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-23 06:49:53| 53| 1.0| Mon|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-22 06:49:33| 33| 1.0| Sun|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-21 06:49:00| 0| 0.0| Sat|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-20 06:49:43| 43| 1.0| Fri|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-19 06:49:00| 0| 0.0| Thu|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-18 06:49:39| 39| 1.0| Wed|\n",
"|Zürich Flughafen|85:11:1507:002|2018-04-17 06:49:36| 36| 1.0| Tue|\n",
"+----------------+--------------+-------------------+-------------+-------------+-------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"trip_id=\"85:11:1507:002\"\n",
"\n",
"# First filter - filter selected station/trip_id, with define arrival time and GAESCHETZ status\n",
"# Select 4 fields of interest, rename \n",
"# Convert date-like string to timestamp\n",
"# Compute difference between scheduled and actual arrivals times\n",
"# reselect to generate weekday\n",
"sbb_filt = sbb.filter( (sbb.fahrt_bezeichner == trip_id) & (sbb.haltestellen_name == stop) )\\\n",
" .select(col(\"haltestellen_name\").alias(\"station\"), \\\n",
" col(\"fahrt_bezeichner\").alias(\"trip_id\"), \\\n",
" col(\"an_prognose\").alias(\"arrival_true\"),\\\n",
" col(\"ankunftszeit\").alias(\"arrival_expected\"))\\\n",
" .withColumn('arrival_true',to_timestamp(col('arrival_true'),\\\n",
" format='dd.MM.yyyy HH:mm:ss'))\\\n",
" .withColumn('arrival_expected',to_timestamp(col('arrival_expected'),\\\n",
" format='dd.MM.yyyy HH:mm'))\\\n",
" .withColumn('DiffInSeconds',col('arrival_true').cast(LongType()) - col('arrival_expected').cast(LongType()))\\\n",
" .withColumn('DiffInMinutes',round(col('DiffInSeconds')/60))\\\n",
" .select(\"station\", \"trip_id\", \"arrival_true\", \"DiffInSeconds\", \"DiffInMinutes\",\\\n",
" date_format('arrival_expected', 'E').alias('weekday'))\\\n",
" .orderBy(\"arrival_true\", ascending=False)\n",
"sbb_filt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Given a station name and a trip id, we can get all arrival times (prognosed and real), and compute all delays in seconds and minutes. As we see the expected arrival time `ankunftzeit` is always the same as opposed to the actual arrival `an_prognose` with `an_prognose_status` equal to `GESCHAETZT` which varies.\n",
"\n",
"We remove Saturdays and Sundays to compute the arrival distribution only based on week days "
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------------+-----+\n",
"|DiffInMinutes|count|\n",
"+-------------+-----+\n",
"| 0.0| 19|\n",
"| 1.0| 50|\n",
"| 2.0| 14|\n",
"| 3.0| 4|\n",
"| 4.0| 1|\n",
"| 11.0| 1|\n",
"| 21.0| 1|\n",
"+-------------+-----+"
]
}
],
"source": [
"sbb_filt.filter( (sbb_filt.weekday != \"Sun\") & (sbb_filt.weekday != \"Sat\") )\\\n",
" .groupBy('DiffInMinutes').count()\\\n",
" .orderBy(\"DiffInMinutes\").show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For next steps, we will be able to pivot this kind of table for multiple trip ids at once. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Load Table and make distribution from list of stations/trip_id\n",
"\n",
"Here we compute distribution of delays for a group of stations with all associated trips. The goal is to develop a script able to make a distribution for all stations/trips of interests.\n",
"\n",
"To train a bit the concept, let's first use all station with _Zurich_ pattern in their name and compute their delay distribution."
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+\n",
"| haltestellen_name|\n",
"+--------------------+\n",
"| Winkel am Zürichsee|\n",
"| Zürich Flughafen|\n",
"| Zürich Wiedikon|\n",
"| Zürich Stadelhofen|\n",
"|Zürich Tiefenbrunnen|\n",
"+--------------------+\n",
"only showing top 5 rows"
]
}
],
"source": [
"expr = \"Z.rich*\" # regular expression to be used to get all Zurich* stations\n",
"sbb.filter(sbb[\"haltestellen_name\"].rlike(expr))\\\n",
" .select(\"haltestellen_name\")\\\n",
" .distinct().show(5)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+---------------+-------------------+-------------+-------------+-------+\n",
"| station| trip_id| arrival_true|DiffInSeconds|DiffInMinutes|weekday|\n",
"+--------------------+---------------+-------------------+-------------+-------------+-------+\n",
"| Zürich HB| 85:11:543:001|2018-05-07 01:12:14| 14| 0.0| Mon|\n",
"| Zürich HB|85:11:30797:011|2018-05-07 00:58:45| 225| 4.0| Mon|\n",
"| Zürich HB|85:11:19694:001|2018-05-07 00:56:12| -48| -1.0| Mon|\n",
"| Zürich HB| 85:11:2294:003|2018-05-07 00:54:06| -54| -1.0| Mon|\n",
"| Zürich Hardbrücke|85:11:30797:011|2018-05-07 00:54:03| 123| 2.0| Mon|\n",
"| Zürich Stadelhofen|85:11:19694:001|2018-05-07 00:53:40| 40| 1.0| Mon|\n",
"| Zürich Hardbrücke|85:11:30794:002|2018-05-07 00:50:26| 86| 1.0| Mon|\n",
"| Zürich Oerlikon|85:11:30797:011|2018-05-07 00:49:46| 106| 2.0| Mon|\n",
"|Zürich Tiefenbrunnen|85:11:19694:001|2018-05-07 00:49:38| -22| 0.0| Mon|\n",
"| Zürich HB|85:11:30794:002|2018-05-07 00:47:16| 136| 2.0| Mon|\n",
"| Zürich HB|85:11:30692:007|2018-05-07 00:45:02| 242| 4.0| Mon|\n",
"| Zürich HB|85:11:20495:001|2018-05-07 00:45:01| -59| -1.0| Mon|\n",
"| Zürich Altstetten|85:11:18594:001|2018-05-07 00:44:53| 53| 1.0| Mon|\n",
"| Zürich Stadelhofen|85:11:30794:002|2018-05-07 00:44:27| 147| 2.0| Mon|\n",
"| Zürich Stadelhofen|85:11:30692:007|2018-05-07 00:42:37| 277| 5.0| Mon|\n",
"| Zürich HB| 85:11:4793:001|2018-05-07 00:42:10| 10| 0.0| Mon|\n",
"| Zürich Flughafen| 85:11:2294:003|2018-05-07 00:42:07| -113| -2.0| Mon|\n",
"| Zürich Hardbrücke|85:11:18594:001|2018-05-07 00:41:27| 27| 0.0| Mon|\n",
"| Zürich HB|85:11:18795:001|2018-05-07 00:41:05| 65| 1.0| Mon|\n",
"| Zürich Wipkingen|85:11:20495:001|2018-05-07 00:40:52| 52| 1.0| Mon|\n",
"+--------------------+---------------+-------------------+-------------+-------------+-------+\n",
"only showing top 20 rows"
]
}
],
"source": [
"expr = \"Z.rich*\"\n",
"\n",
"# First filter - Take Zurich-like stations , with define arrival time and GAESCHETZ status\n",
"# Select 4 fields of interest, rename \n",
"# Convert date-like string to timestamp\n",
"# Compute difference between scheduled and actual arrivals times\n",
"# reselect to generate weekday\n",
"sbb_filt = sbb.filter((sbb[\"haltestellen_name\"].rlike(expr)) )\\\n",
" .select(col(\"haltestellen_name\").alias(\"station\"), \\\n",
" col(\"fahrt_bezeichner\").alias(\"trip_id\"), \\\n",
" col(\"an_prognose\").alias(\"arrival_true\"),\\\n",
" col(\"ankunftszeit\").alias(\"arrival_expected\"))\\\n",
" .withColumn('arrival_true',to_timestamp(col('arrival_true'),\\\n",
" format='dd.MM.yyyy HH:mm:ss'))\\\n",
" .withColumn('arrival_expected',to_timestamp(col('arrival_expected'),\\\n",
" format='dd.MM.yyyy HH:mm'))\\\n",
" .withColumn('DiffInSeconds',col('arrival_true').cast(LongType()) - col('arrival_expected').cast(LongType()))\\\n",
" .withColumn('DiffInMinutes',round(col('DiffInSeconds')/60))\\\n",
" .select(\"station\", \"trip_id\", \"arrival_true\", \"DiffInSeconds\", \"DiffInMinutes\",\\\n",
" date_format('arrival_expected', 'E').alias('weekday'))\\\n",
" .orderBy(\"arrival_true\", ascending=False)\n",
"\n",
"# Remove Saturday and Sunday weekdays from table - show\n",
"sbb_filt = sbb_filt.filter( (sbb_filt.weekday != \"Sun\") & (sbb_filt.weekday != \"Sat\") )\n",
"sbb_filt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To make distribution, we use groupBy followed by a pivot using delay time in minutes. We fill null values with 0. No lower/upper bounds for now. Negative column keys means arrival ahead of schedule."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+---------------+----+-----+-----+-----+-----+-----+-----+----+----+----+----+----+----+----+----+----+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+\n",
"| station| trip_id|null|-28.0|-15.0|-13.0|-12.0|-11.0|-10.0|-9.0|-8.0|-7.0|-6.0|-5.0|-4.0|-3.0|-2.0|-1.0|0.0|1.0|2.0|3.0|4.0|5.0|6.0|7.0|8.0|9.0|10.0|11.0|12.0|13.0|14.0|15.0|16.0|17.0|18.0|19.0|20.0|21.0|22.0|23.0|24.0|25.0|26.0|27.0|28.0|29.0|30.0|31.0|32.0|33.0|34.0|35.0|36.0|37.0|38.0|39.0|40.0|41.0|42.0|43.0|44.0|45.0|46.0|47.0|48.0|49.0|50.0|51.0|52.0|53.0|54.0|55.0|56.0|57.0|59.0|60.0|61.0|62.0|63.0|64.0|65.0|66.0|67.0|68.0|69.0|70.0|71.0|72.0|73.0|76.0|77.0|78.0|79.0|80.0|82.0|85.0|86.0|90.0|96.0|99.0|102.0|111.0|120.0|122.0|127.0|132.0|149.0|150.0|152.0|180.0|210.0|\n",
"+--------------------+---------------+----+-----+-----+-----+-----+-----+-----+----+----+----+----+----+----+----+----+----+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+\n",
"|Zürich Tiefenbrunnen|85:11:19639:001| 2| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 65| 10| 5| 2| 2| 2| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"| Zürich Enge|85:11:18267:001| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 5| 1| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"| Zürich HB|85:11:30992:009| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 9| 3| 1| 0| 0| 0| 0| 0| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"| Zürich Altstetten|85:11:19978:001| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 2| 70| 8| 2| 0| 1| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"| Zürich HB|85:11:18873:001| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 20| 22| 24| 7| 7| 1| 0| 0| 1| 0| 0| 0| 1| 0| 0| 0| 1| 0| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"+--------------------+---------------+----+-----+-----+-----+-----+-----+-----+----+----+----+----+----+----+----+----+----+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+\n",
"only showing top 5 rows"
]
}
],
"source": [
"sbb_filt.groupBy('station', 'trip_id').pivot(\"DiffInMinutes\").count()\\\n",
" .na.fill(0)\\\n",
" .show(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As an addition to this distribution, we can set up lower / upper bound to constrain the distribution to a specific window of interest. We do not really care about train being ahead, so we put them all in -1 column index, And we look at delays until 30 minutes only."
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+------------------+---------------+----+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+\n",
"| station| trip_id|-1.0|0.0|1.0|2.0|3.0|4.0|5.0|6.0|7.0|8.0|9.0|10.0|11.0|12.0|13.0|14.0|15.0|16.0|17.0|18.0|19.0|20.0|21.0|22.0|23.0|24.0|25.0|26.0|27.0|28.0|29.0|30.0|\n",
"+------------------+---------------+----+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+\n",
"| Zürich HB|85:11:30992:009| 9| 3| 1| 0| 0| 0| 0| 0| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"| Zürich HB|85:11:18873:001| 0| 20| 22| 24| 7| 7| 1| 0| 0| 1| 0| 0| 0| 1| 0| 0| 0| 1| 0| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"| Zürich Oerlikon|85:11:20438:002| 0| 0| 27| 50| 8| 4| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|Zürich Wollishofen|85:11:18822:001| 0| 2| 2| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"| Zürich Flughafen| 85:11:2270:001| 9| 46| 30| 2| 1| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"+------------------+---------------+----+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+\n",
"only showing top 5 rows"
]
}
],
"source": [
"lower_bound = -1.0\n",
"upped_bound = +30.0\n",
"\n",
"sbb_bounded = sbb_filt.withColumn('DiffInMinutes_bounded1',\\\n",
" greatest(col('DiffInMinutes'), lit(lower_bound) ))\\\n",
" .withColumn('DiffInMinutes_bounded2',\\\n",
" least(col('DiffInMinutes_bounded1'), lit(upped_bound) ))\n",
"\n",
"sbb_bounded.groupBy('station', 'trip_id').pivot(\"DiffInMinutes_bounded2\").count()\\\n",
" .na.fill(0)\\\n",
" .show(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Work from translation tables \n",
"\n",
"We will use data generated in `match_datasets.ipynb`. We begin by looking at all trip_id that are found in both dataset with at least 5 stations in common.\n",
"\n",
"Our goal is to find a match in sbb dataset for all _timetable_ trips (and not the other way around). So we will focus on getting this assymetrical correspondance table. \n",
"\n",
"When we find a clear one-one match, we will mark them as _resolved_, when there is a one-to-many relation, we will call it _partly_resolved_ and if we cannot find a sbb trip that correspond to a timetable trip_id, we will call it _fail_to_resolve_. \n",
"\n",
"These labels will be used to differentiate 3 different ways to compute probabilities :\n",
"- __One-to-one__ we find a clear match : we use distribution of delays on weekdays for a given trip/station_id based on all past sbb data. \n",
"- __One-to-many__ we find multiple match :\n",
" - First we double check the matches, if we have the same type of transportation for example.\n",
" - If they seem to be correct, we can merge the trips from sbb and get the merged distribution of their delays.\n",
"- __One-to-none__ we find no match : then we get the distribution of delays for similar transportation types, at similar hour (in a window), during weekdays of sbb dataset.\n",
" - Alternative : Try to find the best match and use only the closest location/time to infer a given distribution.\n",
" - Alternative 2 : use k-nearest neighbors in terms of location/time."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"243152\n",
"+------------------------+----------------------+-----+\n",
"|trip_id |fahrt_bezeichner |count|\n",
"+------------------------+----------------------+-----+\n",
"|241.TA.26-14-j19-1.43.H |85:11:19435:001 |13 |\n",
"|1419.TA.26-8-C-j19-1.8.R|85:3849:169172-07008-1|23 |\n",
"|1015.TA.26-4-j19-1.25.H |85:3849:49891-03002-1 |7 |\n",
"|1955.TA.26-13-j19-1.24.H|85:3849:89261-02013-1 |5 |\n",
"|1217.TA.26-72-j19-1.6.R |85:849:55624-25033-1 |7 |\n",
"+------------------------+----------------------+-----+\n",
"only showing top 5 rows"
]
}
],
"source": [
"joined_trip_atL5 = spark.read.csv('data/lgpt_guys/joined_trip_atL5.csv', header = True)\n",
"print joined_trip_atL5.count()\n",
"joined_trip_atL5.show(5, False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"31103"
]
}
],
"source": [
"joined_trip_atL5.select('fahrt_bezeichner').distinct().count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We also use the subset of sbb data (we use the filtered data `sbb_filt` made at the top of the notebook)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can begin by assembling sbb data set with translation table `joined_trip_atL5` "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10848628\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+\n",
"|fahrt_bezeichner|haltestellen_name| ankunftszeit| abfahrtszeit| an_prognose| ab_prognose|stop_id|\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+\n",
"| 85:11:10:002| Zürich HB|12.10.2018 21:51| |12.10.2018 21:51:50| |8503000|\n",
"| 85:11:10293:004| Zürich HB| |13.10.2018 00:25| |13.10.2018 00:26:08|8503000|\n",
"| 85:11:10293:004| Zürich Flughafen|13.10.2018 00:34|13.10.2018 00:35|13.10.2018 00:35:27|13.10.2018 00:36:44|8503016|\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+\n",
"only showing top 3 rows"
]
}
],
"source": [
"username = 'acoudray'\n",
"sbb_filt = spark.read.orc(\"/user/{}/sbb_filt_forDelays_GeschaetzAndReal.orc\".format(username))\n",
"print(sbb_filt.count())\n",
"sbb_filt.show(3)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16474877\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+-------+-----+\n",
"|fahrt_bezeichner|haltestellen_name|ankunftszeit |abfahrtszeit |an_prognose |ab_prognose |stop_id|trip_id|count|\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+-------+-----+\n",
"|85:11:10:002 |Zürich HB |12.10.2018 21:51| |12.10.2018 21:51:50| |8503000|null |null |\n",
"|85:11:10293:004 |Zürich HB | |13.10.2018 00:25| |13.10.2018 00:26:08|8503000|null |null |\n",
"|85:11:10293:004 |Zürich Flughafen |13.10.2018 00:34|13.10.2018 00:35|13.10.2018 00:35:27|13.10.2018 00:36:44|8503016|null |null |\n",
"|85:11:10536:004 |Zürich HB | |12.10.2018 20:03| |12.10.2018 20:04:20|8503000|null |null |\n",
"|85:11:10537:006 |Zürich HB |12.10.2018 21:59| |12.10.2018 22:01:43| |8503000|null |null |\n",
"|85:11:10538:004 |Zürich HB | |12.10.2018 21:03| |12.10.2018 21:04:42|8503000|null |null |\n",
"|85:11:10539:005 |Zürich HB |12.10.2018 22:59| |12.10.2018 23:00:10| |8503000|null |null |\n",
"|85:11:10540:004 |Zürich HB | |12.10.2018 22:03| |12.10.2018 22:06:29|8503000|null |null |\n",
"|85:11:10734:007 |Zürich Flughafen |12.10.2018 20:16|12.10.2018 20:18|12.10.2018 20:15:27|12.10.2018 20:18:39|8503016|null |null |\n",
"|85:11:10734:007 |Zürich HB |12.10.2018 20:27|12.10.2018 20:32|12.10.2018 20:26:44|12.10.2018 20:33:02|8503000|null |null |\n",
"+----------------+-----------------+----------------+----------------+-------------------+-------------------+-------+-------+-----+\n",
"only showing top 10 rows"
]
}
],
"source": [
"joined_sbb = sbb_filt.join(joined_trip_atL5, on = ['fahrt_bezeichner'], how = 'left_outer')\n",
"\n",
"print joined_sbb.count()\n",
"joined_sbb.show(10,False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"46399"
]
}
],
"source": [
"joined_sbb.select(\"fahrt_bezeichner\", \"trip_id\").distinct().count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The reference table we will use is the `stop_times` tables containing trip_id and stop_id. As a next step, we will put them in the same order raptor will read them."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+--------------+------------+------------+\n",
"| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|hour_departure| route_id|direction_id|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+--------------+------------+------------+\n",
"|666.TA.26-4-j19-1...|8576182| 07:02:00| 07:02:00| 1| 0| 0| 7.0| 26-4-j19-1| 1|\n",
"|243.TA.26-311-j19...|8590834| 07:16:00| 07:16:00| 1| 0| 0| 7.0|26-311-j19-1| 1|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+--------------+------------+------------+\n",
"only showing top 2 rows"
]
}
],
"source": [
"stop_times_curated = spark.read.csv('data/lgpt_guys/stop_times_curated.csv', header = True)\n",
"stop_times_curated.show(2)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"250777\n",
"+-----------+-----------------------+-------+\n",
"|stop_id_raw|trip_id |stop_id|\n",
"+-----------+-----------------------+-------+\n",
"|8576182 |666.TA.26-4-j19-1.20.R |8576182|\n",
"|8590834 |243.TA.26-311-j19-1.3.R|8590834|\n",
"|8591349 |406.TA.26-62-j19-1.3.R |8591349|\n",
"+-----------+-----------------------+-------+\n",
"only showing top 3 rows"
]
}
],
"source": [
"# We use only first 7 characters of stop_id to remove special cases\n",
"stop_times_curated = stop_times_curated.select(col('stop_id').alias('stop_id_raw'), \n",
" 'trip_id')\\\n",
" .withColumn('stop_id',col('stop_id_raw').substr(1, 7))\n",
"\n",
"print stop_times_curated.count()\n",
"stop_times_curated.show(3, False)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19800"
]
}
],
"source": [
"stop_times_curated.select('trip_id').distinct().count()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9478785\n",
"+-------------------------+-------+-----+----------------+------------+------------+-----------+-----------+\n",
"|trip_id |stop_id|count|fahrt_bezeichner|ankunftszeit|abfahrtszeit|an_prognose|ab_prognose|\n",
"+-------------------------+-------+-----+----------------+------------+------------+-----------+-----------+\n",
"|1.TA.26-89-j19-1.1.R |8591209|null |null |null |null |null |null |\n",
"|10.TA.1-305-j19-1.1.R |8587018|null |null |null |null |null |null |\n",
"|10.TA.26-69-j19-1.2.H |8591122|null |null |null |null |null |null |\n",
"|10.TA.26-845-j19-1.2.H |8580879|null |null |null |null |null |null |\n",
"|10.TA.26-918-j19-1.1.R |8590701|null |null |null |null |null |null |\n",
"|10.TA.79-485-j19-1.1.R |8590461|null |null |null |null |null |null |\n",
"|100.TA.26-748-j19-1.1.R |8590543|null |null |null |null |null |null |\n",
"|1001.TA.26-70-A-j19-1.5.H|8591106|null |null |null |null |null |null |\n",
"|1005.TA.26-70-A-j19-1.5.H|8591410|null |null |null |null |null |null |\n",
"|1008.TA.26-142-j19-1.2.R |8590830|null |null |null |null |null |null |\n",
"+-------------------------+-------+-----+----------------+------------+------------+-----------+-----------+\n",
"only showing top 10 rows"
]
}
],
"source": [
"stop_times_join = stop_times_curated.join(joined_sbb, on=['trip_id', 'stop_id'], \n",
" how='left_outer')\\\n",
" .select('trip_id', 'stop_id', 'count',\n",
" 'fahrt_bezeichner', 'ankunftszeit', 'abfahrtszeit',\n",
" 'an_prognose', 'ab_prognose')\n",
"\n",
"print stop_times_join.count()\n",
"stop_times_join.show(10, False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We then compute arrival delays using the following approach : \n",
"- arrival_true ( = `an_prognose`) - arrival_expected ( = `ankunftszeit`). Train being late have a positive delay and trains being ahead of schedule a negative one."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+------------------------+-------------------+-------------+-------------+-------+\n",
"|stop_id|trip_id |arrival_true |DiffInSeconds|DiffInMinutes|weekday|\n",
"+-------+------------------------+-------------------+-------------+-------------+-------+\n",
"|8503006|419.TA.26-2-j19-1.164.H |2018-10-12 06:10:43|43 |0 |Fri |\n",
"|8503000|419.TA.26-2-j19-1.164.H |2018-10-12 06:15:56|56 |0 |Fri |\n",
"|8503011|419.TA.26-2-j19-1.164.H |2018-10-12 06:19:45|45 |0 |Fri |\n",
"|8503010|419.TA.26-2-j19-1.164.H |2018-10-12 06:21:35|-25 |0 |Fri |\n",
"|8503202|419.TA.26-2-j19-1.164.H |2018-10-12 06:29:28|-32 |0 |Fri |\n",
"|8503204|419.TA.26-2-j19-1.164.H |2018-10-12 06:34:42|-18 |0 |Fri |\n",
"|8503204|214.TA.26-24-j19-1.121.R|2018-10-12 06:23:17|17 |0 |Fri |\n",
"|8503204|74.TA.26-2-j19-1.9.R |2018-10-12 06:23:17|17 |0 |Fri |\n",
"|8503202|214.TA.26-24-j19-1.121.R|2018-10-12 06:27:58|-2 |0 |Fri |\n",
"|8503202|74.TA.26-2-j19-1.9.R |2018-10-12 06:27:58|-2 |0 |Fri |\n",
"+-------+------------------------+-------------------+-------------+-------------+-------+\n",
"only showing top 10 rows"
]
}
],
"source": [
"stop_times_diff = stop_times_join.select( col(\"an_prognose\").alias(\"arrival_true\"),\\\n",
" col(\"ankunftszeit\").alias(\"arrival_expected\"),\\\n",
" 'trip_id', 'stop_id')\\\n",
" .withColumn('arrival_true',to_timestamp(col('arrival_true'),\\\n",
" format='dd.MM.yyyy HH:mm:ss'))\\\n",
" .withColumn('arrival_expected',to_timestamp(col('arrival_expected'),\\\n",
" format='dd.MM.yyyy HH:mm'))\\\n",
" .withColumn('DiffInSeconds',col('arrival_true').cast(LongType()) - col('arrival_expected').cast(LongType()))\\\n",
" .withColumn('DiffInMinutes',(col('DiffInSeconds')/60).cast('integer'))\\\n",
" .select(\"stop_id\", \"trip_id\", \"arrival_true\", \"DiffInSeconds\", \"DiffInMinutes\",\\\n",
" date_format('arrival_expected', 'E').alias('weekday'))\n",
"\n",
"# Remove Saturday and Sunday weekdays from table - show\n",
"stop_times_diff = stop_times_diff.filter( (stop_times_diff.weekday != \"Sun\") & (stop_times_diff.weekday != \"Sat\") )\n",
"stop_times_diff.show(10, False)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+-------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"|stop_id|trip_id |-1 |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |\n",
"+-------+-------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"|8503020|45.TA.26-7-A-j19-1.12.H |0 |537|107|38 |13 |2 |3 |4 |1 |1 |1 |0 |1 |1 |0 |1 |1 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8594307|44.TA.1-11-B-j19-1.2.H |0 |1 |4 |1 |2 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503125|59.TA.26-5-A-j19-1.28.R |0 |578|179|30 |12 |5 |2 |0 |1 |1 |0 |1 |1 |0 |0 |2 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8590275|501.TA.1-2-A-j19-1.15.R |0 |23 |28 |9 |4 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503000|147.TA.26-15-j19-1.41.H |0 |271|114|20 |7 |2 |1 |0 |0 |0 |0 |0 |0 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503203|590.TA.26-8-A-j19-1.353.H|0 |463|648|340|106|34 |20 |8 |5 |3 |6 |3 |3 |7 |3 |1 |1 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8502208|432.TA.26-24-j19-1.220.R |0 |86 |40 |10 |3 |2 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503305|201.TA.26-24-j19-1.121.R |0 |184|60 |22 |7 |5 |2 |0 |1 |0 |0 |0 |0 |1 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8590279|136.TA.1-4-B-j19-1.10.H |0 |4 |4 |3 |2 |2 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503003|258.TA.26-16-A-j19-1.93.H|85 |913|359|167|53 |34 |20 |10 |4 |5 |3 |2 |0 |0 |1 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503011|571.TA.26-8-A-j19-1.347.H|0 |590|457|116|40 |16 |4 |7 |1 |1 |2 |1 |1 |1 |0 |0 |0 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503020|389.TA.26-7-A-j19-1.108.R|0 |243|150|19 |1 |1 |0 |1 |0 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8517376|197.TA.1-17-A-j19-1.16.R |0 |274|499|212|101|34 |4 |2 |1 |0 |0 |1 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503006|377.TA.26-7-A-j19-1.108.R|0 |541|223|58 |9 |2 |2 |1 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503104|135.TA.26-6-A-j19-1.32.R |0 |394|165|63 |30 |10 |5 |5 |2 |2 |1 |1 |0 |1 |1 |0 |0 |0 |0 |0 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8502187|187.TA.1-17-A-j19-1.16.R |1 |479|71 |6 |4 |1 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503020|105.TA.26-5-A-j19-1.37.R |0 |443|226|98 |35 |14 |5 |2 |0 |2 |1 |0 |2 |1 |2 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8502221|54.TA.26-5-A-j19-1.28.R |152|492|116|41 |14 |6 |8 |2 |1 |0 |0 |1 |0 |1 |2 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8502273|101.TA.1-17-A-j19-1.9.R |2 |507|330|205|95 |25 |4 |2 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503052|238.TA.26-10-B-j19-1.10.R|0 |260|82 |47 |21 |12 |6 |3 |3 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"+-------+-------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"only showing top 20 rows"
]
}
],
"source": [
"# we bound distribution to this \n",
"lower_bound = -1\n",
"upped_bound = +30\n",
"\n",
"stop_times_bounded = stop_times_diff.withColumn('DiffInMinutes_bounded1',\\\n",
" greatest(col('DiffInMinutes'), lit(lower_bound) ))\\\n",
" .withColumn('DiffInMinutes_bounded2',\\\n",
" least(col('DiffInMinutes_bounded1'), lit(upped_bound) ))\n",
"\n",
"stop_times_distribution = stop_times_bounded.groupBy('stop_id', 'trip_id')\\\n",
" .pivot(\"DiffInMinutes_bounded2\").count()\\\n",
" .na.fill(0)\n",
"\n",
"stop_times_distribution.show(20, False)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12309"
]
}
],
"source": [
"stop_times_distribution.count()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times_distribution.write.csv('data/lgpt_guys/distribution_geschaetzAndReal.csv', \\\n",
" header = True, mode=\"overwrite\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Analysing matches found \n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+------------+------------+--------------+-------------+-----------+-------------+--------------+------------+------------+\n",
"| trip_id| stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|hour_departure| route_id|direction_id|\n",
"+--------------------+------------+------------+--------------+-------------+-----------+-------------+--------------+------------+------------+\n",
"|666.TA.26-4-j19-1...| 8576182| 07:02:00| 07:02:00| 1| 0| 0| 7.0| 26-4-j19-1| 1|\n",
"|243.TA.26-311-j19...| 8590834| 07:16:00| 07:16:00| 1| 0| 0| 7.0|26-311-j19-1| 1|\n",
"|406.TA.26-62-j19-...| 8591349| 07:24:00| 07:24:00| 1| 0| 0| 7.0| 26-62-j19-1| 1|\n",
"|62.TA.57-2-Y-j19-...|8503000:0:13| 07:34:00| 07:34:00| 1| 0| 0| 7.0|57-2-Y-j19-1| 0|\n",
"|1179.TA.26-5-B-j1...| 8591245| 07:36:00| 07:36:00| 1| 0| 0| 7.0|26-5-B-j19-1| 1|\n",
"+--------------------+------------+------------+--------------+-------------+-----------+-------------+--------------+------------+------------+\n",
"only showing top 5 rows"
]
}
],
"source": [
"stop_times_curated = spark.read.csv('data/lgpt_guys/stop_times_curated.csv', header = True)\n",
"stop_times_curated.show(5)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"250777\n",
"+-----------+-----------------------+-------+\n",
"|stop_id_raw|trip_id |stop_id|\n",
"+-----------+-----------------------+-------+\n",
"|8576182 |666.TA.26-4-j19-1.20.R |8576182|\n",
"|8590834 |243.TA.26-311-j19-1.3.R|8590834|\n",
"|8591349 |406.TA.26-62-j19-1.3.R |8591349|\n",
"+-----------+-----------------------+-------+\n",
"only showing top 3 rows"
]
}
],
"source": [
"# We use only first 7 characters of stop_id to remove special cases\n",
"stop_times_curated = stop_times_curated.select(col('stop_id').alias('stop_id_raw'), \n",
" 'trip_id')\\\n",
" .withColumn('stop_id',col('stop_id_raw').substr(1, 7))\n",
"\n",
"print stop_times_curated.count()\n",
"stop_times_curated.show(3, False)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12309\n",
"+-------+-------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"|stop_id|trip_id |-1 |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |\n",
"+-------+-------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"|8503003|286.TA.26-11-j19-1.80.H |0 |395|75 |23 |5 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503089|40.TA.26-4-B-j19-1.1.R |0 |378|238|194|70 |46 |30 |11 |9 |8 |6 |4 |2 |2 |1 |0 |2 |2 |1 |0 |1 |1 |0 |0 |0 |0 |0 |1 |0 |0 |0 |0 |\n",
"|8503094|166.TA.26-4-B-j19-1.7.H |0 |28 |158|102|50 |23 |21 |15 |7 |7 |1 |0 |1 |1 |2 |0 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503203|580.TA.26-8-A-j19-1.347.H|0 |659|504|246|110|58 |32 |13 |3 |1 |0 |5 |6 |3 |2 |2 |1 |1 |0 |2 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8502223|116.TA.26-14-j19-1.18.R |0 |31 |144|56 |12 |3 |0 |0 |0 |0 |3 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503053|55.TA.79-10-B-j19-1.3.H |0 |165|43 |7 |2 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503000|37.TA.26-15-j19-1.17.R |8 |220|93 |47 |26 |9 |5 |5 |1 |1 |1 |0 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503006|297.TA.26-14-j19-1.41.H |0 |123|197|59 |14 |11 |6 |1 |1 |0 |0 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503091|24.TA.26-4-B-j19-1.1.R |0 |184|258|64 |7 |2 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|8503147|150.TA.26-3-j19-1.12.H |5 |470|119|43 |14 |7 |1 |3 |1 |0 |0 |1 |1 |0 |1 |0 |0 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"+-------+-------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"only showing top 10 rows"
]
}
],
"source": [
"stop_times_distrib = spark.read.csv('data/lgpt_guys/distribution_geschaetzAndReal.csv', \\\n",
" header = True)\n",
"print stop_times_distrib.count()\n",
"stop_times_distrib.show(10, False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many unique combination of stop_id / trip_id do we have ?"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12309"
]
}
],
"source": [
"stop_times_distrib.select(\"stop_id\",\"trip_id\").distinct().count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many seems to have an empty line ? "
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4"
]
}
],
"source": [
"stop_times_distrib.filter( (stop_times_distrib['-1'] == 0) &\\\n",
" (stop_times_distrib['0'] == 0) &\\\n",
" (stop_times_distrib['1'] == 0) &\\\n",
" (stop_times_distrib['2'] == 0) ).count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Actually there is no line with all values equal to zero : it would not have been assembled at the pivot stage. Now we want to see how many of the `stop_times_curated` lines we can get from this table :"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"| key| -1| 0| 1| 2| 3| 4| 5| 6| 7| 8| 9| 10| 11| 12| 13| 14| 15| 16| 17| 18| 19| 20| 21| 22| 23| 24| 25| 26| 27| 28| 29| 30|\n",
"+--------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"|10.TA.1-11-B-j19-...| 0| 2| 2| 1| 3| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.1-11-B-j19-...| 0| 3| 2| 2| 3| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.1-11-B-j19-...| 0| 0| 4| 4| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.1-11-B-j19-...| 0| 1| 5| 3| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.1-11-B-j19-...| 0| 1| 3| 4| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.26-912-j19-...| 0| 0| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.79-10-B-j19...| 1|129|143| 71| 33| 17| 11| 4| 3| 3| 1| 1| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.79-10-B-j19...| 0|333| 40| 22| 9| 5| 3| 4| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.79-10-B-j19...| 1|340| 37| 21| 6| 2| 6| 2| 2| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.79-10-B-j19...| 0| 0|177| 23| 7| 0| 1| 0| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.79-10-B-j19...| 0|266| 81| 33| 15| 10| 3| 4| 3| 1| 0| 2| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|10.TA.79-10-B-j19...| 0|142|139| 69| 31| 16| 8| 5| 4| 1| 1| 1| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0|325| 62| 27| 1| 2| 2| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0|310| 84| 15| 5| 4| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0|330| 73| 9| 4| 2| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0|257|103| 42| 14| 1| 1| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0| 69|228| 88| 23| 8| 1| 2| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0|300| 86| 22| 8| 1| 0| 2| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0|200|158| 38| 19| 2| 0| 1| 1| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"|100.TA.26-6-A-j19...| 0|349| 47| 14| 7| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0|\n",
"+--------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"only showing top 20 rows"
]
}
],
"source": [
"stop_times_final = stop_times_curated.join(stop_times_distrib,\\\n",
" on = ['stop_id', 'trip_id'],\\\n",
" how = 'inner').drop('stop_id_raw')\\\n",
".orderBy('trip_id', 'stop_id')\\\n",
".withColumn('key2', concat(col('trip_id'), lit('__'), col('stop_id')))\\\n",
".drop('trip_id').drop('stop_id')\\\n",
".select(col('key2').alias('key'), \"*\")\\\n",
".drop('key2')\n",
"\n",
"stop_times_final.show(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We still have null values. Let's count how many null we have on the full table"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"reference table stop_times number of lines : 250777\n",
"distribution table number of lines : 12309\n",
"Number of missing keys in distribution : 0"
]
}
],
"source": [
"print \"reference table stop_times number of lines : {}\".format(stop_times_curated.count())\n",
"print \"distribution table number of lines : {}\".format(stop_times_final.count())\n",
"print \"Number of missing keys in distribution : {}\".format(stop_times_final.filter(stop_times_final['0'].isNull()).count())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We write two version of the table : one with missing values, and one with missing values filled with '1',allowing development of next steps in the meantime (filling these values with a better approach is discussed in next section _Recovering missing distributions_)"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times_final_fill1 = stop_times_final.na.fill(1) # not working, not IntegerType ..."
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"|key |-1 |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |\n",
"+-----------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"|1.TA.1-231-j19-1.1.H__8502553|1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8502879|0 |78 |21 |6 |3 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8502955|1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8503598|0 |170|42 |4 |2 |0 |2 |2 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8503598|0 |170|42 |4 |2 |0 |2 |2 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8572600|1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8572601|1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8572602|1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8572603|2 |94 |9 |2 |2 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"|1.TA.1-231-j19-1.1.H__8572747|1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |1 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |0 |\n",
"+-----------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n",
"only showing top 10 rows"
]
}
],
"source": [
"# This contains the list of columns where we apply replace() function\n",
"all_column_names = stop_times_final.columns\n",
"columns_to_remove = ['key']\n",
"columns_for_replacement = [i for i in all_column_names if i not in columns_to_remove]\n",
"\n",
"# Doing the replacement on all the requisite columns\n",
"for i in columns_for_replacement:\n",
" stop_times_final_fill1 = stop_times_final_fill1.withColumn(i,when((col(i).isNull()),int(int(i)<=10))\\\n",
" .otherwise(col(i).cast(IntegerType())))\n",
"stop_times_final_fill1.show(10, False)"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stop_times_final.write.csv('data/lgpt_guys/distribution_1to1match_wNull.csv', \\\n",
" header = True, mode=\"overwrite\")\n",
"stop_times_final_fill1.write.csv('data/lgpt_guys/distribution_1to1match_fill1.csv', \\\n",
" header = True, mode=\"overwrite\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Use local python to make definitive table with right ordering \n",
"\n",
"We first use the tables where null values were filled with 1 and 0 "
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"username = 'acoudray'\n",
"stop_times_final_fill1.write.csv(\"/user/{}/distribution_1to1match_fill1.csv\".format(username), \\\n",
" header = True, mode = 'overwrite')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# for geschaetz only\n",
"stop_times_final.write.csv(\"/user/{}/distribution_1to1match_geschaetz.csv\".format(username), \\\n",
" header = True, mode = 'overwrite')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# for geschaetz and real only\n",
"stop_times_final.write.csv(\"/user/{}/distribution_1to1match_geschaetzAndReal.csv\".format(username), \\\n",
" header = True, mode = 'overwrite')"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+--------+-----------+----------+--------------------+------------+\n",
"| trip_id|stop_id|arrival_time|departure_time|stop_sequence|pickup_type|drop_off_type|stop_int| route_id|sequence_1| trip_1| route_int|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+--------+-----------+----------+--------------------+------------+\n",
"|1.TA.1-231-j19-1.1.H|8572747| 09:37:00| 09:37:00| 1| 0| 0| 500|1-231-j19-1| 10.0|1.TA.1-231-j19-1.1.H|592705486850|\n",
"|1.TA.1-231-j19-1.1.H|8573721| 09:50:00| 09:50:00| 10| 0| 0| 599|1-231-j19-1| 11.0|1.TA.1-231-j19-1.1.H|592705486850|\n",
"|1.TA.1-231-j19-1.1.H|8503598| 09:53:00| 09:53:00| 11| 0| 0| 401|1-231-j19-1| 12.0|1.TA.1-231-j19-1.1.H|592705486850|\n",
"+--------------------+-------+------------+--------------+-------------+-----------+-------------+--------+-----------+----------+--------------------+------------+\n",
"only showing top 3 rows"
]
}
],
"source": [
"username = 'acoudray'\n",
"stop_times_curated.write.csv(\"/user/{}/stop_times_curated_sbbCompatible\".format(username), \\\n",
" header = True, mode = 'overwrite')\n",
"stop_times_curated.show(3)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('10.TA.1-11-B-j19-1.1.R__8590314',\n",
" array([0, 2, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.1-11-B-j19-1.1.R__8590317',\n",
" array([0, 3, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.1-11-B-j19-1.1.R__8594304',\n",
" array([0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.1-11-B-j19-1.1.R__8594307',\n",
" array([0, 1, 5, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.1-11-B-j19-1.1.R__8594310',\n",
" array([0, 1, 3, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.26-912-j19-1.2.R__8576195',\n",
" array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.79-10-B-j19-1.2.R__8503051',\n",
" array([ 1, 129, 143, 71, 33, 17, 11, 4, 3, 3, 1, 1, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.79-10-B-j19-1.2.R__8503052',\n",
" array([ 0, 333, 40, 22, 9, 5, 3, 4, 1, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.79-10-B-j19-1.2.R__8503053',\n",
" array([ 1, 340, 37, 21, 6, 2, 6, 2, 2, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0])),\n",
" ('10.TA.79-10-B-j19-1.2.R__8503054',\n",
" array([ 0, 0, 177, 23, 7, 0, 1, 0, 1, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0]))]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%local\n",
"\n",
"from hdfs3 import HDFileSystem\n",
"import pandas as pd\n",
"import numpy as np \n",
"import pickle \n",
"import gzip\n",
"from itertools import islice\n",
"\n",
"hdfs = HDFileSystem(host='hdfs://iccluster044.iccluster.epfl.ch', port=8020, user='ebouille')\n",
"\n",
"username = 'acoudray'\n",
"\n",
"# Load distribution file from HDFS and concatenate individual csv\n",
"distrib_files = hdfs.glob('/user/{}/distribution_1to1match_geschaetzAndReal.csv/*.csv'.format(username))\n",
"distrib = pd.DataFrame()\n",
"for file in distrib_files:\n",
" with hdfs.open(file) as f:\n",
" distrib = distrib.append(pd.read_csv(f))\n",
"distrib = distrib.set_index('key')\n",
"\n",
"# zip index and values to get {key : np.array()} shape \n",
"d = dict(zip(distrib.index, np.array(distrib.values)))\n",
"\n",
"# Write it to local \n",
"with gzip.open(\"../data/distributions_geschaetzAndReal.pkl.gz\", \"wb\") as output_file:\n",
" pickle.dump(d, output_file)\n",
"\n",
"# Functon to take a slice from a dictionnary - head equivalent\n",
"def take(n, iterable):\n",
" \"Return first n items of the iterable as a list\"\n",
" return list(islice(iterable, n))\n",
"\n",
"# display a slice of it\n",
"take(10, d.items())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many RAM does the dictionnary occupy when it is open ? Open pickle and calculate amount of memory occupied using _resource_ lib"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"length of dict : 246968\n",
"Data size is: 106968218\n"
]
}
],
"source": [
"%local \n",
"\n",
"import pickle \n",
"import gzip\n",
"import sys\n",
"import os\n",
"import resource\n",
"\n",
"with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n",
" d = pickle.load(input_file)\n",
" \n",
"\n",
"d['1290.TA.26-32-j19-1.12.H__8591151']\n",
"print('length of dict : ',len(d))\n",
"\n",
"def getsizeof_r(obj):\n",
" total = 0\n",
" if isinstance(obj, list):\n",
" for i in obj:\n",
" total += getsizeof_r(i)\n",
" elif isinstance(obj, dict):\n",
" for k, v in obj.items():\n",
" total += getsizeof_r(k) + getsizeof_r(v)\n",
" else:\n",
" total += sys.getsizeof(obj)\n",
" return total\n",
"\n",
"print('Data size is: {}'.format(getsizeof_r(d)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many time does it take to access elements in the dictionnary ?"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0 1008 405 207 95 39 25 11 5 3 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0]\n",
"running time to get value from key when exists : 0.0004305839538574219\n",
"\n",
"KEY ERROR: .26-32-j19-1.12.H__8591151 not found un distribution dictionnary\n",
"running time to get error when key does NOT exists : 0.00010466575622558594\n",
"\n"
]
}
],
"source": [
"%local\n",
"\n",
"import pickle \n",
"import gzip\n",
"import time\n",
"\n",
"def get_distribution(key, dico):\n",
" if key in dico:\n",
" print(dico[key])\n",
" else:\n",
" print(\"KEY ERROR: {} not found un distribution dictionnary\".format(key))\n",
" \n",
"with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n",
" d = pickle.load(input_file)\n",
" \n",
"this_key = '1290.TA.26-32-j19-1.12.H__8591151'\n",
"\n",
"start = time.time()\n",
"get_distribution(this_key, d)\n",
"end = time.time()\n",
"print(\"running time to get value from key when exists : {}\\n\".format(end - start))\n",
"\n",
"start = time.time()\n",
"get_distribution(this_key.replace('1290.TA',''), d)\n",
"end = time.time()\n",
"print(\"running time to get error when key does NOT exists : {}\\n\".format(end - start))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"when key exists we access it in $5\\cdot10^{-4}$ seconds and when it does not exists error message is displayed in $1\\cdot10^{-4}$ seconds. Should be more than enough to be called multiple time when using raptor."
]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Make function to compute probabilities from distributions\n",
- "\n",
- "We make a script that takes a key, a arrival time and a departure time to compute a probability to be at least 2 minutes ahead for transfert. We assume that with less than 2 minutes, we miss the transfert.\n",
- "\n",
- "We will use a Poisson distribution to compute this probability using its cumulative distribution.\n",
- "\n",
- "A Poisson Process meets the following criteria (in reality many phenomena modeled as Poisson processes don’t meet these exactly):\n",
- "- Events are independent of each other. The occurrence of one event does not affect the probability another event will occur.\n",
- "- The average rate (events per time period) is constant.\n",
- "- Two events cannot occur at the same time.\n",
- "\n",
- "Bounds for the tail probabilities of a Poisson random variable ${\\displaystyle X\\sim \\operatorname {Pois} (\\lambda )}$ can be derived using a Chernoff bound argument: \n",
- "\n",
- "$${\\displaystyle P(X\\leq x)\\leq {\\frac {(e\\lambda )^{x}e^{-\\lambda }}{x^{x}}},{\\text{ for }}x<\\lambda .}$$\n",
- "\n",
- "So in our case all we need to find is $\\lambda$. The positive real number λ is equal to the expected value of X and also to its variance :\n",
- "\n",
- "$${\\lambda =\\operatorname {E} (X)=\\operatorname {Var} (X)}$$\n",
- "\n",
- "We can easily find $\\lambda$ by finding the _average number of success per unit time_. We have a distribution going from -1 to +30, therefore we will iterate over it and sum up all successes $x_t$ at each time point $t$, for all our time points. \n",
- "\n",
- "$$ {\\lambda = \\frac{1}{N} \\displaystyle\\sum_{t=-1}^{N=30} x_t \\cdot t}$$"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "lambda (expectation given distribution): 4.0 \n",
- "\n",
- "Probability of success for transfer time = 5.0 minutes : 0.7851303870304052\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "0.7851303870304052"
- ]
- },
- "execution_count": 40,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%local\n",
- "\n",
- "import pickle \n",
- "import gzip\n",
- "import time\n",
- "import math \n",
- "import datetime\n",
- "import time\n",
- "from scipy.stats import poisson\n",
- "\n",
- "with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n",
- " d = pickle.load(input_file)\n",
- " \n",
- "def get_distrib(key, dico):\n",
- " if key in dico:\n",
- " return dico[key]\n",
- " else:\n",
- " raise ValueError(\"KEY ERROR: {} not found un distribution dictionnary\".format(key))\n",
- " \n",
- "def evaluate_lamda(distrib):\n",
- " # First calculate total number of measures N\n",
- " N = 0\n",
- " for x in distrib:\n",
- " N += x\n",
- "\n",
- " lambda_p = 0 # expectation - we want to calculate it\n",
- " t = -1 # time = index - 1\n",
- "\n",
- " for x in distrib:\n",
- " lambda_p += t*x\n",
- " t += 1\n",
- "\n",
- " # calculate lambda which is the expectation of x\n",
- " if N > 0:\n",
- " lambda_p /= N \n",
- " print('lambda (expectation given distribution): ',lambda_p, '\\n')\n",
- " return lambda_p\n",
- " else : \n",
- " raise ValueError(\"ERROR : {} distribution has 0 counts\".format(key))\n",
- " #print('Returning 1 to avoid later problem... \\n')\n",
- " return 1\n",
- "\n",
- "def process_time(str_time):\n",
- " x = time.strptime(str_time,'%H:%M')\n",
- " return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()\n",
- "\n",
- "def get_transfer_time(arr_time, dep_time, delta=2.0):\n",
- " diff_time_min = ( process_time(dep_time) - process_time(arr_time) ) / 60\n",
- " return diff_time_min - delta\n",
- "\n",
- "def poisson_proba(trip_id, stop_id, arr_time, dep_time, dico):\n",
- " # Generate key from trip_id / stop_id \n",
- " key = str(trip_id) + '__' + str(stop_id[0:7]) # 7 first char to be sbb-compatible\n",
- "\n",
- " # Get distribution from dictionnary\n",
- " distrib = get_distrib(key, dico)\n",
- " \n",
- " # Calculate transfer time at disposal \n",
- " T = get_transfer_time(arr_time, dep_time)\n",
- " \n",
- " # Get lambda value to calculate proba\n",
- " lambda_p = evaluate_lamda(distrib)\n",
- "\n",
- " # Get proba\n",
- " poisson_p = poisson.cdf(T, lambda_p)\n",
- " print('Probability of success for transfer time = {} minutes : '.format(T),poisson_p)\n",
- "\n",
- " return poisson_p\n",
- "\n",
- "# 129.TA.90-173-Y-j19-1.1.H__8530643\n",
- "# input data :\n",
- "trip_id = '129.TA.90-173-Y-j19-1.1.H'\n",
- "stop_id = '8530643'\n",
- "arrival_time = '07:45'\n",
- "departure_time = '07:52'\n",
- "Pr = poisson_proba(trip_id, stop_id, arrival_time, departure_time, d)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# trip_id missing \n",
- "\n",
- "stop_id , time , transport_type -> estimate proba \n",
- "\n",
- "# Make recovery tables\n",
- "1500 x 24 x 5 = 180'000\n",
- "\n",
- "# Validate recovery table \n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "PySpark",
"language": "",
"name": "pysparkkernel"
},
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"mimetype": "text/x-python",
"name": "pyspark",
"pygments_lexer": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
diff --git a/notebooks/match_datasets.ipynb b/notebooks/hdfs_match_datasets.ipynb
similarity index 100%
rename from notebooks/match_datasets.ipynb
rename to notebooks/hdfs_match_datasets.ipynb
diff --git a/notebooks/proba_functions.ipynb b/notebooks/proba_functions.ipynb
index 70e7bc6..0b1962b 100644
--- a/notebooks/proba_functions.ipynb
+++ b/notebooks/proba_functions.ipynb
@@ -1,2682 +1,2668 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Compute probability of missing a transfer from delays distributions\n",
+ "## Compute probability of transfer success from delays distributions\n",
"\n",
- "Let's first have a look at a slice of the dictionnary of distribution"
+ "To be able to compute the probability of success of a given transfert, we use the arrival delay distribution compared with the next trip departure. To be able to do that, we need delay distributions for each trip arrival to a given station. Whenever we have a clear match, we can use an __cumulative distribution function__ to compute $P(X \\leq x)$ :\n",
+ "\n",
+ "$${\\displaystyle F_{X}(x)=\\operatorname {P} (T\\leq t)=\\sum _{t_{i}\\leq t}\\operatorname {P} (T=t_{i})=\\sum _{t_{i}\\leq t}p(t_{i}).}$$\n",
+ "\n",
+ "The strategy was to rely entirely on past data we have to compute $p(t_i)$, without the need of building a model which imply making additionnal assumptions. If we have enough data for a given transfer with known trip_id x stop_id, we use the the abovementionned formula to compute each $p(t_i)$ by simply using :\n",
+ "\n",
+ "$$p(t_i) = \\frac{x_i}{\\sum x_i}$$\n",
+ "\n",
+ "with $x_i$ being the number of delays at time $t_i$ from SBB dataset.\n",
+ "\n",
+ "### Recover missing data \n",
+ "\n",
+ "As we are using SBB data to compute delays from timetable trip_id, we may encounter problems with the translation between the two datasets (certain trip_id/stop_id have no correspondance datasets!). We may also encounter To recover missing or faulty data, the strategy is the following :\n",
+ "\n",
+ "1. If we have more than 100 data points in `real` group, we rely exclusively on its delay distribution to compute probabilities for a given transfer on a `trip_id x stop_id`.\n",
+ "\n",
+ "_Note : `real` group corresponds to arrival time with status `geschaetz` or `real`, meaning it comes from actual measurments._\n",
+ "\n",
+ "2. If we do not find enough data within `real` group, we use delay distributions in `all` group (contains all delays including `prognose` status), if there is more than 100 data points for a given `trip_id x stop_id`.\n",
+ "\n",
+ "3. If `all` group still does not have more than 100 data points, we rely on `recovery tables` to estimate delay distributions. The strategy is the following :\n",
+ " - As we will always know the `stop_id`, the `time` and the `transport_type`, we rely on arrival delays from aggregated values of similar transfer. \n",
+ " - First, we compute a table of distribution with all possible combination of `stop_id`, `time` (round to hours) and `transport_type`, and aggregate all the counts we have to compute cumulative distribution probabilities. \n",
+ " - Is there is less than 100 data points in one of these intersections, we use the last possibilities : a table with `transport_type` x `time` aggregate counts.\n",
+ " - The last values with no match are given the overall average of cumulative distribution probabilities for each `transport_type` with no limit for the minimum number of data points.\n",
+ "\n",
+ "Following this approach, we can find cumulative distribution probabilities for every combination of `trip_id x stop_id` as defined in `stop_times_df`. We will make a table with the same row order so that McRaptor can easily find their indexes. "
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"import pickle \n",
"import gzip\n",
"from itertools import islice\n",
"import matplotlib as mlt \n",
"import matplotlib.pyplot as plt\n",
"import numpy as np \n",
"import pandas as pd \n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Functon to take a slice from a dictionnary - head equivalent\n",
"def take(n, iterable):\n",
" \"Return first n items of the iterable as a list\"\n",
" return list(islice(iterable, n))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load dictionnaries of distributions"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"len dict_real : 12309\n",
"[('10.TA.1-11-B-j19-1.1.R__8590314', array([0, 2, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8590317', array([0, 3, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8594304', array([0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8594307', array([0, 1, 5, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8594310', array([0, 1, 3, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))]\n",
"len dict_all : 246968\n",
"[('1286.TA.26-32-j19-1.12.H__8591182', array([ 0, 1158, 306, 162, 94, 24, 28, 21, 3, 2, 0,\n",
" 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('1286.TA.26-32-j19-1.12.H__8591184', array([ 1, 762, 552, 292, 118, 48, 13, 8, 0, 1, 1, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0])), ('1286.TA.26-32-j19-1.12.H__8591195', array([ 0, 1083, 444, 143, 64, 35, 16, 9, 3, 1, 0,\n",
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('1286.TA.26-32-j19-1.12.H__8591200', array([ 2, 239, 227, 228, 212, 128, 74, 42, 29, 17, 3, 3, 2,\n",
" 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 1])), ('1286.TA.26-32-j19-1.12.H__8591209', array([ 0, 1151, 308, 169, 94, 24, 29, 16, 4, 3, 1,\n",
" 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))]\n"
]
}
],
"source": [
"with gzip.open(\"../data/distributions_geschaetzAndReal.pkl.gz\", \"rb\") as input_file:\n",
" d_real = pickle.load(input_file)\n",
"\n",
"with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n",
" d_all = pickle.load(input_file)\n",
"\n",
"# display a slice of it\n",
"print('len dict_real : ', len(d_real))\n",
"print(take(5, d_real.items()))\n",
"\n",
"# display a slice of it\n",
"print('len dict_all : ', len(d_all))\n",
"print(take(5, d_all.items()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Probability using cumulative distribution based on frequency of delays \n",
"\n",
"When we have __enough data__ and no ambiguity about `trip_id` and `stop_id` for a given distribution, then we can compute the probability $P(x \\leq X)$ for every x (delay in minute). \n",
"\n",
"Let's take a __threshold of 100__ sample points (=number of time we could measure a delay) as a minimum number of points to use this approach. \n",
"\n",
"_How many keys in our distionnary of distribution have at least this number of samples ?_"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def plot_data_points_hist(dico):\n",
" list_tot_points = []\n",
" for key in dico:\n",
" distrib = dico[key]\n",
" list_tot_points.append(np.sum(distrib))\n",
"\n",
" tot_per_key = np.array(list_tot_points)\n",
" binwidth = 100\n",
" n_keys_less_than_binwidth = np.sum(np.array(tot_per_key < binwidth))\n",
" perc_key_to_recover = round(100 * ( n_keys_less_than_binwidth / len(tot_per_key) ), 2)\n",
" plt.figure(figsize = (10,5))\n",
" plt.hist(tot_per_key, bins = range(min(tot_per_key), max(tot_per_key) + binwidth, binwidth))\n",
" plt.title(\"Total number of data points per trip_id / stop_id key. N keys with less than {0} points: {1} ({2}%)\"\\\n",
" .format(binwidth, n_keys_less_than_binwidth, perc_key_to_recover))\n",
" plt.xlabel('n data points')\n",
" plt.ylabel('n keys')\n",
" return plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAo4AAAFNCAYAAACOmu5nAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZglVXn48e/LjmwDMvKTGWRQiQZ3IYJxI2JgABFUUBRlCRENLpgYFYwRQVDQKGIAEQXZVEBiFBWDuIAryODCKjqyyM7ADPsO7++Pcy7cufTtPj3Tt5eZ7+d5+um6p7ZTVaeq3jp1qioyE0mSJGkky0x0BiRJkjQ1GDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKnJEh84RkRGxDMnQT7OiYh/nqB5rxwR342IOyLimw3Dbx4R141H3hZHROwSET+c6HyMl4j4QUTsNqhpRMSsur8stzjzmCgRcXdEPL1Pv90j4hd9+k3q5R4u75NFRDytrv9lhxmm+VgcER+PiJPHLoejMxXW+Xha2o61iyIitoqIb090PhZHRLw3Ig4dabgJCxzrQabz92hE3Nf1e5c+40yJgGYS2hFYB3hyZu40lhOeyANsZn4tM7dsGXYynwhaT5KZuXVmnrA481rcaUTEuqPdByPi+Ig4aFHn2SozV83MKwc9n8XRFaSe2ZN+ckR8fIKytdgy8691/T8CE3uhPFoTfeEQEW+KiF9FxL0Rcc4Q/V8YERfW/hdGxAu7+kVEHBoRt9W/QyMixjqPk+FYGxHH9V581P3mxoi4MyL+1K/MRcTH6riv6Ur7dERcW8e9JiI+0jPOdhFxSY1JfhURG42QxYOBQ7rG/0REXBwRD/fu2xHxkZ4Y6L4aB63dJ/9X98RIP+zqt2JEHBYRN0TEgog4KiKW7+r/+Zr+64iY2ZX+1oj4Qs+svgzsEhFPGW5BJyxwrAeZVTNzVeCvwHZdaV+bqHxNdvVAMdrttj7wp8x8eBB5UrtFOTkt4jYflG2A/5voTCwBNo2Iv5/oTGhSmA98nq6goyMiVgC+A5wMrAmcAHynpgPsBewAvAB4PrAd8M5xyPO4ioiXA88YotengFmZuTrwOuCgiNi4Z9xnADsBN/aMeyzw7Dru31MCpjfUcTYEvga8C5gGfBc4o9/xOyL+DlgjM8/rSp4LfAj4fu/wmfnJnhjoUOCczLx1mNXQHSN1B/H7ApsAzwX+Bngx8NGar5cAGwP/D/hFHZaIWAP4YGe4rnzdD/wA2HWYfEBmTvgfcDXwmtq9ImUnuqH+fb6mrQLcBzwK3F3/1gVeAvwauJ1SMI4AVuiadgLP7DPfc4BPAL8E7gJ+CKxd+20OXDdMPj8OfJOyQ98FXFw32n7ALcC1wJY98/oU8BvgTsrBYK2u/psBv6rL8Qdg855xD675vG+o5QH+tg53O3Ap8LqafgDwIPBQXWd7DjHuysDxwALgMkqBuq6r/77AX+pyXga8vmue9wOP1GnfXtO3BX5Xl/Na4OPDbPvNgeuAjwC31nW8S1f/NYATgXnANZSCvkzttzvwi55t/S7gz3U9HAnEMPncpi7PXcD1wL/3yePudd0fAdwB/BHYoiePx1LK3/XAQcCyPeMeBtwGHNQz7dk92+cP/bZ5TfvnljwNs767p7Es8F91vV8JvLuuw+WGGf9bwBuGSI+6jLfU7X4x5UC2V122B+vyfXe48lr7HQ8cDZxdt825wPoNy/bYvg48GTij5uU3lP38F33Gm9W93MAbKeXwuZSL6075vw04jbrfUk4I7+2Z1kXU/WOEeX0Y+GlX+sn02U94Yjn/DOUksEa/sgesQAlIntc13lOAe4HpwNrA9+r6nw/8nLpf9cz7AOC/a/fywD3AZ7qOG/cDa3WvQ0q5faT2uxs4Yrj9s88yfxw4ufH4uDul/N4FXEU9flD2mXMp+8etwKl95vXXmrfOeeWlnXVO2T8W1Olu3TXOHsDldZ5XAu8c4pj2Acr+cCOwR0P5/WdK8NCdtmXdrtGT39m1+1fAXl399gTOm6rH2j75Xo5yPnk+w5/Pn1XX9Zt60v+vzv9q6vl7iHFnUI5ZH6q/3wN8v6v/MpTj8JDHWOBjwFf69Ou7b9f+UcvQbsMMM1ze5wA7df1+K3Bt7X4z8KnaPRs4s3YfAby1z/R2oevYNOQwrRtvkH8sHJAdCJxHOchNrzvGJ7oLfs+4G1MOKstRDl6XA+/vKeDDBY5/oQR8K9ffhwwzr+58frzuIFvVeZ9IObj8B+UA+w7gqp55XU85Ga0C/A/1wFgL7W21cC8D/GP9Pb1r3L8Cz6nzWr4nX8tTrm4+QjlhvJqygz6rK68nD7P+D6GcONYC1gMuYeHAcSdKkL5MLYj3AE+t/Xan54Rc193z6vDPB24Gdugz782Bh4HPUS4QXlWn38n7iZQge7W6ff9EDX5751239fcoV4hPoxwAZw+TzxuBV9TuNYEX98nj7jWP/1rX9ZspJ6NOAPG/wJfqdn0KJVB5Z8+4763bbuUhpv+E7TPUNueJgWPfPA2zrbun8S5KwLle3fY/ZZjAsc7nVmC1IfptBVxY133nBNIpI8fTFTAzcnk9vv5+ZS0Th/duuz756w4cT6EEeatQ9rnr+02DhYOePWreOtPZh3I8mlnz8iXgG7Xfm4Dzu6bzAsp+u8IweezMa7Wap87xZMTAkbI/fRk4C3hSQ9k7Cji0azr78Hjg/ilKcL58/XsFQwRxddtcXLv/nnK8PL+r3x9612FvOWvZP4fbJxjm+FiX+86usvNU4Dm1+xuU4/EywErAy0fa/j3r/CHKcXxZ4F8oFRlR+29LqQELyjHrXurxg8ePaQfWdbtN7b/mCOV3qMDxX4Ef9KR9D/hA7b4D2LSr3ybAXX2m38nXpDrWUgLPIbdN7f9B4PDefbyr/1F1/SbwW2DVrn47Ad+p3VfTE3xRLgrvruNeCcys6e+hBln197KU8/0+ffL4TeCDffqNFDi+suZh1WGGuZpyHp1HqeB6QVe/OXQFy5TALykXAs+lHDtWplxwfqaWkbOHmdeLgfnDldXJcvur2y7AgZl5S2bOo1zxvr3fwJl5YWael5kPZ+bVlIPoq0Yxv69m5p8y8z7KieaFI43Q5eeZeVaWW8DfpBzIDsnMhygnrlkRMa1r+JMy85LMvAf4T+BNURqTv41SSM/MzEcz82xKYdima9zjM/PSupwP9eRjM2DVOu8HM/MnlJ36LY3L8Sbg4Mycn5nXAgu1e8jMb2bmDTVvp1KuMl/Sb2KZeU5mXlyHv4hyAB9pm/xnZj6QmedSanI662ZnYL/MvKtu388yTHmgrIPbM/OvlEBouO35ELBRRKyemQsy87fDDHsL8PnMfKiugyuAbSNiHcp2en9m3pOZt1Bq3nbuGveGzPzvuu3uG3YtLGy4bd43T6OY/pvq+Ndm5nxKMDGcV1IChbuG6PcQ5YTzbMrJ9fLM7L011NFSXr+fmT/LzAcoJ/+XRsR6LQtVy80bgY/VbXIJ5RbfSN5POUltnplza9q7gP/IzOtqXj4O7FhvWZ0B/E29rQWlXJ6amQ82zOs+Ss1ca9vP5Sn70VqUW1b3NpS9E4C3dLV5eztwUu1+iBJkrV/Lz8+znjV6/BrYMCKeTNn+xwIzImJVyj59bmP+O0azf3aMdHx8FHhuRKycmTdm5qVdy7g+sG5m3p+Zo213d01mfjlLu80TKOtrHYDM/H5m/iWLcykn81d0jfsQ5Tz2UGaeSQkMnjXK+UPZT+7oSbuDsq8N1f8OYNUR2jlOqmNtZk7rt23qPv9OSo3ekDJzb8r6eAXljsgDddzVgE9SLpj6jXtIHffFlH2jsy5/BLwqynMVK/D4Re6T+kxqGuVid1HsBpyemXcPM8wulGB+fcq6Pqsrtvg/YJ+ImB4R/w94X01/Uj32/Q/l4vdpwKcp5/f3RcT7IuJnEfG1njjlLkrQ2ddkDBzXpVSTd1xT04YUEX8TEd+LiJsi4k5KQRmygWkfN3V130vZEVvd3NV9H3BrPch0ftMzvWu7uq+hnAzWphSGnSLi9s4f8HLKgWqocXutS6mafrRn+jMal2PdIfL2mIjYNSJ+35W35zLMOo6ITSPipxExLyLuoJx8h9smC2ow3T3/des4y/PE8jDcco1me76RcvK5JiLOjYiXDjPs9T0n1k4e1695vLFr/XyJUvvTMdy2G85I4/XLU6tht/sQtgHOHKpHDf6OoNyyuiUijomI1Yeb7wjl9bF81QPqfNqXbTql9nA0ywYlaDwyM7sf/lkf+N+ubXs55TbcOlnaA50KvK22QX0LjwdmLb4CrBMR2zUM+0xge+CArsB02LKXmedT9oHNI+LZdRpn1HE/Q6lZ/WFEXBkR+w4103qhM4cSJL6SEij+CngZixY4Lsrxtu/xsR433kw5xtwYEd+vywqlfVkAv4mISyPinxY1r5l5b+1cFSAito6I8yJifs3PNix8jLstF25TPtpzS8fdQO9+tDqPBym9/VcH7u5zEQBT41jb7fOUALw3eF5IZj5Sg8+ZlNphKBd5J9UgeLhxMzN/RzlnH1DT/kgJ6I6g1JauTbnV3u/BwAU8Hsw3i4gnUWpFh72wzcxfZuZ9mXlvZn6KUkvbuVA5mHIr//eUffPblED95jruYZn5gsx8M6Wy4GeU2G8vYAvKMa17/1+NJ16sLGQyBo43UA4UHU+raVCqX3t9kXK7bcMsjVw/QjlYLK576Lq6qFdk0xdzmt01Jk+jbNxbKSe4k+qVV+dvlXo11NHvQABl/azX8wDF0yi3wlrcOETeAIiI9Sm3x95DeSp7GuVWdmcdD5Wvr1NOUOtl5hqUW2LDbZM1I2KVnvnfQFk3nVqD7n6ty9XtCfnMzAsyc3vKifbblBrnfmb0XMV38ngt5Qp37a5tt3pmPme4eY+Ut8bx+uWpVd/t3kffwBEgM7+QmRsDG1Gaf3yw06tn0Jby+li+au3WWrQv2zzKLbnRLBuU9mQfjYg3dqVdS2nb1r1vrpSZnbyeQKkN2AK4NzN/3ZhHagB4AKX95UjHrMspt9F/EBGdmquWsncCpcbu7ZRajfvrvO/KzA9k5tMpDxX8W0Rs0Wfe51JuS78IuKD+3opy1+Fn/RZvhOUZjWGPj1nu+vwj5UL7j5TjFZl5U2a+IzPXpdRaHRVDvw5oVHmNiBUptTj/RbmAmEbZL8b8aWZK+9/n9+znz6/pnf4v6Or3gq5+Q5kKx9puWwCfqRVDnUD11xHx1j7DL8fjD9FsQalZ64y7HnBaRHy4YVwy8/TMfG5mPhnYn1Ljd0GfcS+iHPNG6/WUi+JzRjleUstbDSjfk5kz6v58G3Bhz4U59Q7FXpQmFM8FLspyJ+sCSpnq+FtKO+K+JmPg+A3KwXt6lEfTP0ZpIwAlgn5yfSKoYzVKG5e765XmvzA2/gSsFBHbRnm0/aOUdiGL420RsVG9yjiQciB/hLJ820V5D9SyEbFSrSKfOfzkHtOpWfhQRCwfEZtTnq47pXH804D9ImLNOs/3dvVbhVJI5wFExB6UQtdxMzAzHn/KD8o2mZ+Z99enuvrt5N0OiIgVIuIVwGuBb9Z1cxpwcESsVoPYf+Px8jAaC+WzzmuXiFij7jx3Um559fMUykFo+YjYibJznZnlduwPgc9GxOoRsUxEPCMiRtNc4mZKs4bR7o9D5mkU459Wx58ZEWuy8FXnQiJiA2DFzLy8T/+/qzXNnQco7ufx9Xkz0P1+xZbyuk1EvLxur09QGvw31dzWcvMt4OMR8aQor9HYrWHUSykNyI+MiNfVtKMp5W/9upzTI2L7rnn9ui7nZxldbWPHSZT2d7NHGjAzv0G5MP5RRDyjseydTDk5vY3Sho26HK+NiGfWgOQOSi1qv/J/LuUpy8tqsHsOpT3eVVmaEw2ld5svjr7Hx4hYJyK2r8HQA5QauEfrMu7UdQxdQDmODbWM82p6a35XoJwL5gEPR8TWlIuORdJZJkrgskxdvs7rVM6hbJv3RXntyntq+k/q/xMpQf+MiFiX8kDO8SPMcrIfa7v9DSUYfiGP3wrfjnIX4CkRsXNErFrX4VaUWv8f1+G2oJyrOuPeQLmAOLLuK++s57yo56l3d41LRGxcpzsdOAY4o9ZEDuVMeppj1WPbSpQ4a7m6XXvfc7obcOIwNcSdd6S+rK7HlSLig5Qa0F/W/jOivCYtImIzSjO4/YeY1OcobS3vpTyP8XdRLso3p7Tv7HgV5cnqviZj4HgQ5dbIRZSnnH5b0zrVx98Aroxyy2Jd4N8pgcldlCvNU8ciE7VqfG/K7aTrKSfDxX2H5EmUnfomysnifXVe11JuQ32EcjC6llJb07R96sF8O2BrypXjUcCuwxTyXgdQbktcRTkRPXYCzMzLKCfFX1MOCM+jFtjqJ5QT7k0R0XmVwN7AgRFxFyXwH+nq8ibKgf0G6isQuvL+Xsq6v5LSyPfrwHGNy9VtqHy+Hbg6ShOHd1Fqjvo5H9iQsn4PBnbMzNtqv10pJ5PL6nKczsLNDEbSeSn7bRExXDvL0eSpRedBiz9Q9rNvDTPstgwflK5ep7eAUpZuo9wOhdIubqO6z367sbx+nXLwm095AO5to1guKDXkq1LK1vHAV1tGysw/UE6mX64BweGU2vMf1vJ8HrBpz2gnUvaLkwEi4uiIOLpxfo9Q9pG1Goc/gXLR+ZOImMUIZa8eW35LCZp+3jWpDSntuO6m7NtHZeZP+8z2V5TG9Z3axcsoFwb9ahuhrLcdo7w/rvddcaMywvFxGUqAcwOlrLyKxysP/g44PyLupmzDfXKI93zWE+nBwC9rGd1shPzcRTl2n0ZZ52/l8SYAi+LtlNukX6TcfryPx2tNH6S8bmdXyu3Jf6I8aNhprvAlyqtiLqbcCfp+Tetn0h1ro7yX8BVPnAxkedbhps5fTb61NqFIyra+ri7Tf1Ha+55Rx72tZ9xHKLfqO20JX8/jbws5Gfjv+tdxOGWdX1Gn/45+C5ylzeYdEdF9bPgyZVu+hdJO+z662oxGxAxKTf6J9Og5hqxGKRsLKLHIbMpdkM6x/hmUffQeyh2GfTPzhz3TezUwLTP/t+b3N5Syci3wD9RXQdVAdxtGuHXeeUJMmhC1tunkzGytXR13EbE75QnRl090XjrGO09RXlh9RJaG/oOe1/GUp/o/OtKwk0FE7Ep5JcqkKR/dIuI4ygNaU2J9ajCmwrF2KouILYG9M3OHic7LooqI91KamH1ouOEm5Se2JE0651Ce5lOXKM1O9qbUmk46tVbyDZT2iZIGpNbyTenPMmbmf4881OS8VS1pMcTCn7Lq/hvydlCLzPx0ju5VQmMuIl7Rb9kmKD9bUW6d3ky5rTepRMQnKLcvP5OZV010fiQtGbxVLUmSpCbWOEqSJKmJgaMkSZKaLHUPx6y99to5a9asic6GJEnSiC688MJbM3NxP0AyZpa6wHHWrFnMmTNnorMhSZI0ooho+WTquPFWtSRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmix136oeD7P2/f4T0q4+ZNsJyIkkSdLYscZRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktRk4IFjRCwbEb+LiO/V3xtExPkRMTciTo2IFWr6ivX33Np/Vtc09qvpV0TEVl3ps2va3IjYd9DLIkmStDQbjxrHfYDLu34fChyWmc8EFgB71vQ9gQU1/bA6HBGxEbAz8BxgNnBUDUaXBY4EtgY2At5Sh5UkSdIADDRwjIiZwLbAV+rvAF4NnF4HOQHYoXZvX39T+29Rh98eOCUzH8jMq4C5wEvq39zMvDIzHwROqcNKkiRpAAZd4/h54EPAo/X3k4HbM/Ph+vs6YEbtngFcC1D731GHfyy9Z5x+6ZIkSRqAgQWOEfFa4JbMvHBQ8xhFXvaKiDkRMWfevHkTnR1JkqQpaZA1ji8DXhcRV1NuI78aOByYFhHL1WFmAtfX7uuB9QBq/zWA27rTe8bpl/4EmXlMZm6SmZtMnz598ZdMkiRpKTSwwDEz98vMmZk5i/Jwy08ycxfgp8COdbDdgO/U7jPqb2r/n2Rm1vSd61PXGwAbAr8BLgA2rE9pr1DnccaglkeSJGlpt9zIg4y5DwOnRMRBwO+AY2v6scBJETEXmE8JBMnMSyPiNOAy4GHg3Zn5CEBEvAc4C1gWOC4zLx3XJZEkSVqKjEvgmJnnAOfU7ispT0T3DnM/sFOf8Q8GDh4i/UzgzDHMqiRJkvrwyzGSJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJajKwwDEiVoqI30TEHyLi0og4oKZvEBHnR8TciDg1Ilao6SvW33Nr/1ld09qvpl8REVt1pc+uaXMjYt9BLYskSZIGW+P4APDqzHwB8EJgdkRsBhwKHJaZzwQWAHvW4fcEFtT0w+pwRMRGwM7Ac4DZwFERsWxELAscCWwNbAS8pQ4rSZKkARhY4JjF3fXn8vUvgVcDp9f0E4Adavf29Te1/xYRETX9lMx8IDOvAuYCL6l/czPzysx8EDilDitJkqQBGGgbx1oz+HvgFuBs4C/A7Zn5cB3kOmBG7Z4BXAtQ+98BPLk7vWecfulD5WOviJgTEXPmzZs3FosmSZK01Blo4JiZj2TmC4GZlBrCZw9yfsPk45jM3CQzN5k+ffpEZEGSJGnKG5enqjPzduCnwEuBaRGxXO01E7i+dl8PrAdQ+68B3Nad3jNOv3RJkiQNwCCfqp4eEdNq98rAPwKXUwLIHetguwHfqd1n1N/U/j/JzKzpO9enrjcANgR+A1wAbFif0l6B8gDNGYNaHkmSpKXdciMPssieCpxQn35eBjgtM78XEZcBp0TEQcDvgGPr8McCJ0XEXGA+JRAkMy+NiNOAy4CHgXdn5iMAEfEe4CxgWeC4zLx0gMsjSZK0VBtY4JiZFwEvGiL9Skp7x970+4Gd+kzrYODgIdLPBM5c7MxKkiRpRH45RpIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUZMTAMSJeFhGr1O63RcTnImL9wWdNkiRJk0lLjeMXgXsj4gXAB4C/ACcONFeSJEmadFoCx4czM4HtgSMy80hgtcFmS5IkSZPNcg3D3BUR+wFvA14ZEcsAyw82W5IkSZpsWmoc3ww8AOyZmTcBM4HPDDRXkiRJmnRaahx3Ar6amQsAMvOv2MZRkiRpqdNS47gOcEFEnBYRsyMiBp0pSZIkTT4jBo6Z+VFgQ+BYYHfgzxHxyYh4xoDzJkmSpEmk6QXg9anqm+rfw8CawOkR8ekB5k2SJEmTyIhtHCNiH2BX4FbgK8AHM/Oh+nT1n4EPDTaLkiRJmgxaHo5ZC3hDZl7TnZiZj0bEaweTLUmSJE02LW0c9wfWi4g9ACJiekRsUPtdPuD8SZIkaZJo+Vb1/sCHgf1q0vLAyYPMlCRJkiaflodjXg+8DrgHIDNvwE8OSpIkLXVaAscH61PVCRARqww2S5IkSZqMWgLH0yLiS8C0iHgH8CPK09WSJElairQ8Vf1Z4DXAncCzgI8BPxtkpiRJkjT5tASOx2bmPwFnA0TEqsCZwBaDzJgkSZIml5Zb1ddHxFEAEbEm8EN8qlqSJGmp0/Iex/8E7o6IoylB42cz86sDz5kkSZImlb63qiPiDV0/zwf+E/gNkBHxhsz81qAzJ0mSpMljuDaO2/X8/h3l5d/bUV7NY+AoSZK0FOkbOGbmHuOZEUmSJE1uLQ/HSJIkSQaOkiRJamPgKEmSpCYjvgA8IlYE3gjM6h4+Mw8cXLYkSZI02bR8OeY7wB3AhcADg82OJEmSJquWwHFmZs4e7YQjYj3gRGAdyut7jsnMwyNiLeBUSg3m1cCbMnNBRARwOLANcC+we2b+tk5rN+CjddIHZeYJNX1j4HhgZcpnEPfJzBxtXiVJkjSyljaOv4qI5y3CtB8GPpCZGwGbAe+OiI2AfYEfZ+aGwI/rb4CtgQ3r317AFwFqoLk/sCnwEmD/+ulD6jDv6Bpv1AGuJEmS2rQEji8HLoyIKyLiooi4OCIuGmmkzLyxU2OYmXcBlwMzgO2BE+pgJwA71O7tgROzOA+YFhFPBbYCzs7M+Zm5ADgbmF37rZ6Z59VaxhO7piVJkqQx1nKreuvFnUlEzAJeRPl04TqZeWPtdRPlVjaUoPLartGuq2nDpV83RLokSZIGYMTAMTOvWZwZRMSqwP8A78/MO0tTxsemnREx8DaJEbEX5fY3T3va0wY9O0mSpCXSQN/jGBHLU4LGr2Vm59vWN9fbzNT/t9T064H1ukafWdOGS585RPoTZOYxmblJZm4yffr0xVsoSZKkpdTAAsf6lPSxwOWZ+bmuXmcAu9Xu3Siv++mk7xrFZsAd9Zb2WcCWEbFmfShmS+Cs2ocyyKcAAA4cSURBVO/OiNiszmvXrmlJkiRpjLW0cVxULwPeDlwcEb+vaR8BDgFOi4g9gWuAN9V+Z1JexTOX8jqePQAyc35EfAK4oA53YGbOr9178/jreH5Q/yRJkjQAAwscM/MXQPTpvcUQwyfw7j7TOg44boj0OcBzFyObkiRJauS3qiVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTZab6AwsrWbt+/0npF19yLYTkBNJkqQ21jhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpyXITnQENb9a+31/o99WHbDtBOZEkSUs7axwlSZLUxMBRkiRJTQwcJUmS1MQ2juOkt62iJEnSVGONoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpycACx4g4LiJuiYhLutLWioizI+LP9f+aNT0i4gsRMTciLoqIF3eNs1sd/s8RsVtX+sYRcXEd5wsREYNaFkmSJA32W9XHA0cAJ3al7Qv8ODMPiYh96+8PA1sDG9a/TYEvAptGxFrA/sAmQAIXRsQZmbmgDvMO4HzgTGA28IMBLs+UMtS3sa8+ZNsJyIkkSVpSDKzGMTN/BszvSd4eOKF2nwDs0JV+YhbnAdMi4qnAVsDZmTm/BotnA7Nrv9Uz87zMTEpwugOSJEkamPFu47hOZt5Yu28C1qndM4Bru4a7rqYNl37dEOlDioi9ImJORMyZN2/e4i2BJEnSUmrCHo6pNYU5TvM6JjM3ycxNpk+fPh6zlCRJWuKMd+B4c73NTP1/S02/Hliva7iZNW249JlDpEuSJGlAxjtwPAPoPBm9G/CdrvRd69PVmwF31FvaZwFbRsSa9QnsLYGzar87I2Kz+jT1rl3TkiRJ0gAM7KnqiPgGsDmwdkRcR3k6+hDgtIjYE7gGeFMd/ExgG2AucC+wB0Bmzo+ITwAX1OEOzMzOAzd7U57cXpnyNLVPVEuSJA3QwALHzHxLn15bDDFsAu/uM53jgOOGSJ8DPHdx8ihJkqR2fjlGkiRJTQwcJUmS1MTAUZIkSU0G+clBjdJQnwkc7/n5WUJJktSPgaMWYjApSZL68Va1JEmSmhg4SpIkqYmBoyRJkprYxnGKsQ2iJEmaKNY4SpIkqYmBoyRJkpoYOEqSJKmJbRyXAOP94nBJkrR0ssZRkiRJTQwcJUmS1MTAUZIkSU1s47gUsS2kJElaHNY4SpIkqYmBoyRJkpoYOEqSJKmJbRw1aq3fy+4dzm9qS5I0tVnjKEmSpCbWOGpEg3wau7X2UpIkTTxrHCVJktTEwFGSJElNDBwlSZLUxDaOmvRsBylJ0uRg4Kgx4ecMJUla8hk4atIxCJUkaXIycNS4GcuA0JeQS5I0/gwctdSzDaUkSW0MHKUGBpeSJPk6HkmSJDUycJQkSVITA0dJkiQ1MXCUJElSEx+O0RLD9z9KkjRYBo7SGFrU90b61LYkaSowcNRSpbVWcqxqLw0IJUlLEgNHaRGN961xg1BJ0kTz4RhJkiQ1MXCUJElSE29VS0uYRX1AR5KkkRg4SuPMtpGSpKnKwFGawsYyCLWmUpI0Ets4SpIkqUlk5kTnYVxtsskmOWfOnIHOwy+YaEm1qLWQ3i6XpEUTERdm5iYTnY8Ob1VLWixjGRR6u1ySJjcDR0nNxvvLO63TNsCUpPFhG0dJkiQ1mfJtHCNiNnA4sCzwlcw8ZLjhbeMoaTSszZQ0kWzjOIYiYlngSOAfgeuACyLijMy8bGJzJmlJ0XohaIApaWkwpQNH4CXA3My8EiAiTgG2BwwcJY2rsbrTYAAqaTKb6oHjDODart/XAZtOUF4kabFNhqYuBq+S+pnqgWOTiNgL2Kv+vDsirhjwLNcGbh3wPPRErveJ4XqfGANb73HoIKa6xLC8T4yleb2vP9EZ6DbVA8frgfW6fs+saQvJzGOAY8YrUxExZzI1ZF1auN4nhut9YrjeJ4brfWK43iePqf46nguADSNig4hYAdgZOGOC8yRJkrREmtI1jpn5cES8BziL8jqe4zLz0gnOliRJ0hJpSgeOAJl5JnDmROejx7jdFtdCXO8Tw/U+MVzvE8P1PjFc75PElH8BuCRJksbHVG/jKEmSpHFi4DiGImJ2RFwREXMjYt+Jzs9UFxHrRcRPI+KyiLg0Ivap6WtFxNkR8ef6f82aHhHxhbr+L4qIF3dNa7c6/J8jYreJWqapJCKWjYjfRcT36u8NIuL8un5PrQ+kEREr1t9za/9ZXdPYr6ZfERFbTcySTB0RMS0iTo+IP0bE5RHxUsv74EXEv9ZjzCUR8Y2IWMnyPvYi4riIuCUiLulKG7PyHREbR8TFdZwvRESM7xIuJTLTvzH4ozyc8xfg6cAKwB+AjSY6X1P5D3gq8OLavRrwJ2Aj4NPAvjV9X+DQ2r0N8AMggM2A82v6WsCV9f+atXvNiV6+yf4H/BvwdeB79fdpwM61+2jgX2r33sDRtXtn4NTavVHdD1YENqj7x7ITvVyT+Q84Afjn2r0CMM3yPvB1PgO4Cli5/j4N2N3yPpB1/UrgxcAlXWljVr6B39Rho4679UQv85L4Z43j2Hns84eZ+SDQ+fyhFlFm3piZv63ddwGXUw7y21NOsNT/O9Tu7YETszgPmBYRTwW2As7OzPmZuQA4G5g9josy5UTETGBb4Cv1dwCvBk6vg/Su9872OB3Yog6/PXBKZj6QmVcBcyn7iYYQEWtQTqzHAmTmg5l5O5b38bAcsHJELAc8CbgRy/uYy8yfAfN7ksekfNd+q2fmeVmiyBO7pqUxZOA4dob6/OGMCcrLEqfeDnoRcD6wTmbeWHvdBKxTu/ttA7fN6H0e+BDwaP39ZOD2zHy4/u5eh4+t39r/jjq86310NgDmAV+tTQS+EhGrYHkfqMy8Hvgv4K+UgPEO4EIs7+NlrMr3jNrdm64xZuCoSS8iVgX+B3h/Zt7Z3a9eWfpqgDEUEa8FbsnMCyc6L0uZ5Si38b6YmS8C7qHcunuM5X3s1TZ121MC93WBVbCGdkJYvqcGA8ex0/T5Q41ORCxPCRq/lpnfqsk319sS1P+31PR+28BtMzovA14XEVdTmly8Gjiccquo8+7X7nX42Pqt/dcAbsP1PlrXAddl5vn19+mUQNLyPlivAa7KzHmZ+RDwLco+YHkfH2NVvq+v3b3pGmMGjmPHzx+Osdpu6Fjg8sz8XFevM4DOk3S7Ad/pSt+1Po23GXBHvQVyFrBlRKxZaxe2rGkaQmbul5kzM3MWpRz/JDN3AX4K7FgH613vne2xYx0+a/rO9SnUDYANKY3XNYTMvAm4NiKeVZO2AC7D8j5ofwU2i4gn1WNOZ71b3sfHmJTv2u/OiNisbsddu6alsTTRT+csSX+Up8D+RHma7j8mOj9T/Q94OeW2xUXA7+vfNpT2RD8G/gz8CFirDh/AkXX9Xwxs0jWtf6I0Vp8L7DHRyzZV/oDNefyp6qdTToRzgW8CK9b0lervubX/07vG/4+6Pa7AJxxb1vcLgTm1zH+b8tSo5X3w6/0A4I/AJcBJlCejLe9jv56/QWlH+hClhn3PsSzfwCZ1G/4FOIL6kRP/xvbPL8dIkiSpibeqJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBR0lIrIu4eof+0iNh7HPJxYES8ZoRhNo+Ivx90XiRpOAaOktTfNGDggWNmfiwzfzTCYJsDBo6SJpSBo6QpLyJmRcTlEfHliLg0In4YESsPMdwGEfHriLg4Ig7qSl81In4cEb+t/bavvQ4BnhERv4+IzwwzXO987o6Iw2pefhwR02v6CyPivIi4KCL+t375gog4PiJ2rN1XR8QBXfN4dkTMAt4F/GvNyysiYqeIuCQi/hARPxvL9SlJ/Rg4SlpSbAgcmZnPAW4H3jjEMIcDX8zM51G+YNFxP/D6zHwx8A/AZ+tny/YF/pKZL8zMDw4zXK9VgDk1L+cC+9f0E4EPZ+bzKV/D2H+IcQFurfP4IvDvmXk1cDRwWM3Lz4GPAVtl5guA1424diRpDBg4SlpSXJWZv6/dFwKzhhjmZZTPnkH5tFxHAJ+MiIsonz2bAawzxPitwz0KnFq7TwZeHhFrANMy89yafgLwyj7L8q0RlgPgl8DxEfEOYNk+w0jSmFpuojMgSWPkga7uR4An3KquhvrO6i7AdGDjzHwoIq6mfJN4UYdrmedwOsvyCH2O05n5rojYFNgWuDAiNs7M20Y5H0kaFWscJS1NfgnsXLt36UpfA7ilBoP/AKxf0+8CVmsYrtcywI61+63ALzLzDmBBRLyipr+dchu71UJ5iYhnZOb5mfkxYB6w3iimJUmLxBpHSUuTfYCvR8SHge90pX8N+G5EXAzMAf4IkJm3RcQvI+IS4AfAoUMNN4R7gJdExEeBW4A31/TdgKMj4knAlcAeo8j7d4HT6wM576U8KLMh5fb5j4E/jGJakrRIInO0d1AkScOJiLszc9WJzockjTVvVUuSJKmJNY6SJElqYo2jJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpyf8Hc80y5QKg82EAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_data_points_hist(d_all)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoQAAAFNCAYAAACZuH6uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3debglVXn3/e+PySiDgHR4GG00ZMCoiDxI4kReDWMQR9SoIDFBE0k0MQPGRFBjgjFqYlQMPhJAnDDRiIJRNIojyBBkVGmhEbCZkUFQQe/3j7UObA5n2Ifuc0531/dzXec6tVdNq1atWnXvWlW1U1VIkiRpuNZZ7AxIkiRpcRkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQN3FoZECapJL+0GuTjS0l+f5HW/eAkn0pyS5KPjTH9HkmuWoi8rYwkL0ryucXOx0JJ8pkkB8/XMpIs7cfLeiuzjsWS5PYkj5hm3EuTfHWacav1ds+U99VFku17+a87wzRjt8VJjkxy4qrL4dysCWW+kIbW1k4nyYeTPHOx87EyknwzyaNmm25BA8LeeEz8/TzJnSOfXzTNPGtEoLIaei6wJfCwqnreqlzwYjacVfXBqtpznGlX5wZ+3JNfVe1TVcevzLpWdhlJtp7rMZjkuCR/90DXOa6q2qiqLpvv9ayMkeDz1EnpJyY5cpGytdKq6vu9/H8Gi/sFeK4W+wtBkgOTfD3JHUm+NMX4nZOc08efk2TnkXFJ8pYkN/a/tyTJqs7jYrW1SbZKcnKSH/R9tHTS+IsmxRJ3J/nUyPj9k1zYx309yU4j4x6U5B192TcneU+S9WfIy2OAxwKfHCdvI/NtnuT62cokySOSfDrJbUluSPKPI/l8f5Ir+rjzkuwzMt92Sc5IclOSt01a5meS7DppVf8EvHGmvMACB4S98dioqjYCvg/sP5L2wYXMy5qkNwBz3VcPB75bVXfPR540vgdy0nmA+3y+7Av892JnYi3whCS/udiZ0GrhJuCfgaMmj0iyAS0AORHYDDge+GRPBzgUeCYtUHkMsD/w8gXI80L5Oa29ec5UI6vqUSNxxMbAlcDHAJLsCHwQeAWwKfAp4OSRNvhwYFfg14FfBnYB/maGvLwc+GDd+wseM+ZtxFuAS2aaoO/P04D/Af4PsC1tnwOs17frqcBDex5PGglAX0urFzsAz5wIAJM8H7i8qs6etLqTgd9K8n9mzHVVLcofsBx4eh9+EO3g+EH/++eetiFwJ20n3N7/tgZ2A74B/BBYAbwL2GBk2QX80jTr/RLwJuBrwG3A54At+rg9gKtmyOeRtIp3Yp/3Alqlei1wXd+Be05a1z8A3wRupR3km4+M3x34et+ObwF7TJr3zT2fd061PcCv9el+CFwEPKOnvwH4KXBXL7OXTTHvg4HjgJuBi4G/GN122oHzvb6dFwPPGlnnj4Gf9WX/sKfvB/xv384rgSNn2Pd7AFcBfw3c0Mv4RSPjHwqcAFwPXEE7GNbp414KfHXSvn4FcGkvh3cDmSGf+/btuQ24GvjzafL40l727wJuAb4NPG1SHt9Pq39XA38HrDtp3ncANwJ/N2nZe0/aP9+abp/3tN8fJ08zlPfoMtalfVu8AbgMeGUvw/VmmP/jwLOnSE/fxuv6fr+A1tAe2rftp337PjVTfe3jjgPeS2sgbwNOBx4+xrbdc6wDD6M1fLfSjrk3jdaVSfMtHd1uWgO/vOd/He6t/zcCJ9GPW+AU4I8nLet8+vExy7r+CvjiSPqJTHOccP96/lbgq73eTVn3gA1ogcajR+b7ReAOYAmwBfDpXv43AV+hH1eT1v0G4F/78PrAj4C3jrQbPwY2Hy1DWr39WR93O/CumY7Pabb5SODEMdvHl9Lq723A5fT2g3bMnE47Pm4APjrNur7f8zZxXvmNiTKnHR839+XuMzLPIbST/G193S+fok17De14WAEcMkb9/X3gS5PS9uz7NZPyu3cf/jpw6Mi4lwFnrKlt7Qxls15f59IZpnlqX/6G/fNhwCkj49ehtaVP65/PBp43Mv53gStnWP5lwJPmkjfgN2nxySFM0/706Q4FvjKH8jgfeE4f/gzwK334I8CBwCa0c/Cm08x/GnDwjOuYyw5alX/cN9B6I3AGrfFa0iv8m0Yr9KR5H09rLNajNUqXAK+eVHFnCgi/RwvkHtw/HzXDukbzeWSv+Hv1dZ9AazReR2s4/4AWnY+u62raSWZD4D/pDR6wDe1ks2+vtL/dPy8Zmff7wKP6utaflK/1gWW0A30D4P+jHRi/MpLXE2co/6NoJ4TNge2AC7lvQPg8WvC9DvB82klhqz7upZMrei+7R/fpHwNcCzxzmnXvAdwNvJ0W+D+1L38i7yfQgueN+/79Lj2onbzuvq8/Tfs2uD2tYdt7hnyuAJ7chzcDdpkmjy/tefzTXtbPp51kJgKDTwD/1vfrL9ICkJdPmveP+7578BTLv9/+mWqfc/+AcNo8zbCvR5fxCloguV3f919khoCwr+cGYOMpxu0FnNPLfuLEMFFHjmMkEGb2+npc//yUXif+ZfK+myZ/owHhR2jB24a0Y+7q6ZbBfYOZQ3reJpbzKlp7tG3Py78BH+7jDgTOHFnOY2nH7QYz5HFiXRv3PE20J7MGhLTj6X3AZ4GHjFH33gO8ZWQ5r+LegPwfaEH3+v3vyUwRnPV9c0Ef/k1ae3nmyLhvTS7DyfVsnONzpmOCGdrHvt23jtSdrYBH9eEP09rjdYBfYIqT+VR5Hynzu2jt+LrAH9IuUKSP3w94JK2uP5UWaO8yqU17Yy/bffv4zWapv1MFhH8KfGZS2qeB1/ThW4AnjIzbFbhtmuVP5Gu1amtpAeWU+2ZkmnECwmOB40Y+HwacOvJ5Xdo5+1X989nAgSPjX9TX8dAplr1hH7dk3Lz19Z1Li1HuVyZT5P0DtODuBtrx8+hppt2yb8ev9s9v7du6KS1AfxStzTx4hvW9E3j7TGW+unRJvQh4Y1VdV1XX076hvmS6iavqnKo6o6rurqrltMbxqXNY379X1Xer6k7aCWTn2WYY8ZWq+my1rtiP0Rqoo6rqLtoJaWmSTUem/0BVXVhVPwL+Fjgw7SbsF9Mq7qlV9fOqOo1WWfcdmfe4qrqob+ddk/KxO7BRX/dPq+p/aAfrC8fcjgOBN1fVTVV1Ja2y3KOqPlZVP+h5+yit0u023cKq6ktVdUGf/nxawzzbPvnbqvpJVZ1Ou/IyUTYvAF5bVbf1/fs2ZqgPtDL4YVV9nxbgzLQ/7wJ2SrJJVd1cVefOMO11wD9X1V29DL4D7JdkS9p+enVV/aiqrqNdKXvByLw/qKp/7fvuzhlL4b5m2ufT5mkOyz+wz39lVd1ECxJm8hRaAHDbFOPuop1IfpV20rykqlZMs5xx6uspVfXlqvoJ7aT+G0m2G2ejer15DvD6vk8upHWpzObVtKvje1TVsp72CuB1VXVVz8uRwHN7t9PJwC/3rilo9fKjVfXTMdZ1J+1K2rj3Vq5PO442p91ec8cYde944IUj95S9hHbSgba/tqJdeb2rqr5S/UwxyTeAHZM8jLb/3w9sk2Qj2jF9+pj5nzCX43PCbO3jz4FfT/LgqlpRVReNbOPDga2r6sdVNdf72q6oqvdVuy/yeFp5bQlQVadU1feqOZ3Wu/TkkXnvop3H7qqqU2lXy35ljuuHdpzcMintFtqxNtX4W4CNZrmPcLVqa6tq0wewb+4jyUNo98ofN5L8eeCpac8ebMC9X0Af0sf/N/CqJEt69+mf9PSHcH8T5/Gp2r7p/Anty9M5Y0y7La3830m7+HIK9701AIB+j+MHgeOr6ts9+R9ode902pfADWgXYj6V5ENJvpzksEnru21km6a0ugSEW9MuV0+4oqdNKckv9xsxr0lyK/D3tO6QcV0zMnwH7QAb17Ujw3cCN/TGY+Izk5Z35cjwFbRGfgtao/W8JD+c+AOeRGuAppp3sq1pl7p/Pmn524y5HVtPkbd7JDmo38g6kbdfZ4YyTvKEJF/sN9LeQjupzrRPbu5B8uj6t+7zrM/968NM2zWX/fkc2knliiSnJ/mNGaa9etIJcyKPD+95XDFSPv9Gu1ozYaZ9N5PZ5psuT+Oacb9PYV/g1KlG9KDuXbSuo+uSHJNkk5nWO0t9vSdfVXU7rVtz3G1bwr333YwufzZ/Aby7qkYfmnk48ImRfXsJrTtsy6r6MfBR4MX9Hs8Xcm/ANY7/B2yZZP8xpv0l4ADgDSMB54x1r6rOpB0DeyT51b6Mk/u8b6VdCf1cksuSHD7VSvsXmLNpwd9TaCedrwNP5IEFhA+kvZ22feztxvNpbcyKJKf0bQX4S9oVvG+mPXzwew80r1V1Rx/cCCDJPiM38v+QdmyMtnE31n3v2Z7ruWXC7bTuv1GbcG9gMnn8JsDt0wT3sGa0tQ/Es2ltxD31sQdMB9PapRW0bbyY1m0O7QvZ/wLn0er0f9EC19Hz+oQf9v8bTzHufpJsTQsIXzdm/u+kXUH8TD++/4l228uvjSxzHVr78lPaFUEA+oWc51fVY2lXBv+V1iN1OK237+nAK5Lcs6y+HT9kBqtLQPgDWgMwYfueBu2y7GRH07q9dqyqTWjfAlbFU1Y/YuSbQv8GtWQllzl6hWN7WuW7gXbi+kD/pjTxt2FVjd5kPN0BDq18tpv04MH2tC6pcayYIm8AJHk4rZvqMNpTypvSKtlEGU+Vrw/RTjzbVdVDaV1TM+2TzZJsOGn9P6CVzcS3/NFx427XqPvls6rOqqoDaCfQ/6JdIZ7ONpO+dU/k8UrgJ7R7Tyf23SZVNfpY/0z7bqbxs803XZ7GNe1+n8a0ASFAVb2zqh4P7ES7DeMvJkZNmnSc+npPvvrVqM0Zf9uup3WNzWXboN2v9TdJRm8Sv5J279josfkLVTWR1+NpvRpPA+6oqm+MmUd6w/8G2v2Ns7VZl9C6sz+TZOJK0zh173jaFbaXAP/Rg1j6VaDXVNUjgGcAf5bkadOs+3Ra9/DjgLP6571ovQRfnm7zZtmeuZixfazWS/PbtC/Q36a1V1TVNVX1B1W1Ne2BgPdk6tfezCmvSR5Eu+Xnn2hfDDalHRer/Ole2v21j5l0nD+mp0+Mf+zIuMeOjJvKmtDWPhAHAydMDoSr6j+q6ter6mHAEbSu8LP6uDur6rCq2qYfBzcC50z6ojqxnB9x7+1l49iNVh8vTnINLVDbrV+4murVTOczQz3s+//9tCvUz6mpe4yg3Yt4Ru8VeTRwdm9nLuifJ/wa7V7caa0uAeGHaY3ykiRbAK/n3qdtrgUeluShI9NvTLuH5Pb+zfAPV1E+vgv8QpL9+mXav6Hdd7EyXpxkp355+420BvpntO3bP8leSdZN8gv9Mve2Yy534krAXyZZP8ketKfNPjLm/CcBr02yWV/nH4+Mm7h34nqAJIfQrhBOuBbYdtKl7Y2Bm6rqx0l2o92sO5s3JNkgyZOB3wE+1svmJODNSTbuwemfcW99mIv75LOv60VJHtoPrltpXU/T+UXgT3r5Po92QJ1arVv0c8DbkmySZJ0kj0wyl9sWrqXdXjDXY3DKPM1h/pP6/Nsm2Yz2jXJKSXYAHlRVUz4tl+T/9ivDEw8e/Jh7y/NaYPT9gOPU132TPKnvrzfRGrmxrrT2evNx4MgkD0l71cTBY8x6Ee0hn3cneUZPey+t/j28b+eSJAeMrOsbfTvfxtyuDk74AO3+tr1nm7CqPkz7wvv5JI8cs+6dCDyLFhSeMJGY5HeS/FI/0dxCu+o5Xf0/HTgIuLifXL5Eu9/t8mq39Uxl8j5fGdO2j0m2THJAD3J+Qrti9vO+jc8baUNvprVjU23j9T193PxuQDsXXA/cnfYKkLFeyTKViW2iXdVep2/fxOtPvkTbN3+S9vqRiStD/9P/n0AL5rfpV6Vew327Taeyure199HLZuLc+6D+eXT8tsBvMcVtIUke38t3CXAMcPJEV+tEmaXZnXYb1xEzZOVUJt36NEPePkMLPnfuf6+nXY3ceaQXcdSJwO5Jnt4DxlfTgvSJ9vZoWvu+f01z21GSX6Q9GHhkT7qc9jTxRrR7Sy8byfPjaQ+WTGt1CQj/jtZFcT4tqj23p01cAv4wcFla18HWwJ/TAo7baN8MP7oqMlFVtwB/ROvWuZp2klvZdyB+gHawXkM7CfxJX9eVtO6gv6Y1MlfSrq6MtU96I70/sA+tEr0HOGjkHoPZvIHWPXA57QRzz4mtqi6mney+QTvQH017unXC/9BOpNckuaGn/RHwxiS30Q6E2b4NXkNrsH9Af03ASN7/mFb2l9FurP8Q7QbcuZoqny8BlqfdavAK2pWe6ZwJ7Egr3zcDz62qG/u4g2gniYv7dvwH9+3un83Ey8JvTDLTfYxzydM4Jh5Q+BbtOPv4DNPux8zB5iZ9eTfT6tKNtG5JaN9sd+rH7H+NWV8/RGucb6I1Xi+ew3ZBu6K9Ea1uHQf8+zgzVdW3aCfJ9/UT/b/QrnZ/rtfnM4AnTJrtBNpxcSJAkvcmee+Y6/sZ7RjZfMzpj6d9mfyftNdOzFj3ettyLi0Y+srIonak3WN1O+3Yfk9VfXGa1X6d9tDdxNXAi2kB/3RXB6GV23PT3u/2zhmmm9Us7eM6tMDlB7S68lTuvSjwf4Ezk9xO24evqineU9m7g98MfK3X0d1nyc9ttLb7JFqZ/y73dsU/EC+hdRkeTbsX7E7uvcr5U9prZQ6idfH9Hu0BvYnbBv6N9jqVC2g9N6f0tOmsdm1t2jsCn3z/xdzjTlo9hXYFeHJA9BLgG1X1vSnm/RdauX2Htt1/MDLukbS6/SNaMHl4Vc308u1jgBf1L1Ez5q3aPZrXTPzRvnTd1YdHX+S+fZ/+O7Q27r09nwfQ3rzw0x6cv5wWWF6T6d/X/E+0+1Yn8vMPtCv7V9IeJpt4/cz+tIeXZuxxmXh6Slow/erQiVU17tXQBZfkpbQnJp+02HmZsNB5SnuR8ruq3SA/3+s6jvaU+0zvBFttJDmI9uqP1aZ+jEpyLO3BpjWiPDU/1oS2dnWX5EPASVX1X4udlwcqyZm0p8cvnGm61fJnmyStFr5Ee5JQI9Ju//gj2lXO1U6/ivhs2v1/klZCVY1z+9Nqraom93BMaXXpMpa0EnLfn3Ia/ZupW2ZGVfWP0927slCSPHm6bVuk/OxF68K8lta9tlpJ8iZaN+Jbq+ryxc6PpDWHXcaSJEkD5xVCSZKkgTMglCRJGri18qGSLbbYopYuXbrY2ZAkSZrVOeecc0NVrewPYayUtTIgXLp0KWefffbsE0qSJC2yJOP81Oa8sstYkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGngDAglSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGng1srfMpZW1tLDT1mU9S4/ar9FWa8kadi8QihJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHDzFhAm2S7JF5NcnOSiJK/q6UcmuTrJef1v35F5XptkWZLvJNlrJH3vnrYsyeHzlWdJkqQhWm8el3038JqqOjfJxsA5SU7r495RVf80OnGSnYAXAI8CtgY+n+SX++h3A78NXAWcleTkqrp4HvMuSZI0GPMWEFbVCmBFH74tySXANjPMcgDwkar6CXB5kmXAbn3csqq6DCDJR/q0BoSSJEmrwILcQ5hkKfA44MyedFiS85Mcm2SznrYNcOXIbFf1tOnSJ6/j0CRnJzn7+uuvX8VbIEmStPaa94AwyUbAfwKvrqpbgaOBRwI7064gvm1VrKeqjqmqXatq1yVLlqyKRUqSJA3CfN5DSJL1acHgB6vq4wBVde3I+PcBn+4frwa2G5l9257GDOmSJElaSfP5lHGA9wOXVNXbR9K3GpnsWcCFffhk4AVJHpRkB2BH4JvAWcCOSXZIsgHtwZOT5yvfkiRJQzOfVwifCLwEuCDJeT3tr4EXJtkZKGA58HKAqrooyUm0h0XuBl5ZVT8DSHIY8FlgXeDYqrpoHvMtSZI0KPP5lPFXgUwx6tQZ5nkz8OYp0k+daT5JkiQ9cP5SiSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA3cvAWESbZL8sUkFye5KMmrevrmSU5Lcmn/v1lPT5J3JlmW5Pwku4ws6+A+/aVJDp6vPEuSJA3RfF4hvBt4TVXtBOwOvDLJTsDhwBeqakfgC/0zwD7Ajv3vUOBoaAEkcATwBGA34IiJIFKSJEkrb94CwqpaUVXn9uHbgEuAbYADgOP7ZMcDz+zDBwAnVHMGsGmSrYC9gNOq6qaquhk4Ddh7vvItSZI0NAtyD2GSpcDjgDOBLatqRR91DbBlH94GuHJktqt62nTpkiRJWgXWm+8VJNkI+E/g1VV1a5J7xlVVJalVtJ5DaV3NbL/99qtikZLWUksPP2XB17n8qP0WfJ2SNK55vUKYZH1aMPjBqvp4T762dwXT/1/X068GthuZfdueNl36fVTVMVW1a1XtumTJklW7IZIkSWux+XzKOMD7gUuq6u0jo04GJp4UPhj45Ej6Qf1p492BW3rX8meBPZNs1h8m2bOnSZIkaRWYzy7jJwIvAS5Icl5P+2vgKOCkJC8DrgAO7ONOBfYFlgF3AIcAVNVNSd4EnNWne2NV3TSP+ZYkSRqUeQsIq+qrQKYZ/bQppi/gldMs61jg2FWXO0mSJE3wl0okSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGngDAglSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGngDAglSZIGzoBQkiRp4NZb7AysyZYefsqirHf5UfstynolSdLaySuEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJAzdrQJjkiUk27MMvTvL2JA+f/6xJkiRpIYxzhfBo4I4kjwVeA3wPOGFecyVJkqQFM05AeHdVFXAA8K6qejew8fxmS5IkSQtlvTGmuS3Ja4EXA09Jsg6w/vxmS5IkSQtlnCuEzwd+Arysqq4BtgXeOq+5kiRJ0oIZJyB8HvDvVfUVgKr6flXNeg9hkmOTXJfkwpG0I5NcneS8/rfvyLjXJlmW5DtJ9hpJ37unLUty+Nw2T5IkSbMZJyDcEjgryUk9OMuYyz4O2HuK9HdU1c7971SAJDsBLwAe1ed5T5J1k6wLvBvYB9gJeGGfVpIkSavIrAFhVf0NsCPwfuClwKVJ/j7JI2eZ78vATWPm4wDgI1X1k6q6HFgG7Nb/llXVZVX1U+AjfVpJkiStIuM8VEJVVZJrgGuAu4HNgP9IclpV/eUc13lYkoOAs4HXVNXNwDbAGSPTXNXTAK6clP6EqRaa5FDgUIDtt99+jlmSVg9LDz9lUda7/Kj9FmW9kqTVwzgvpn5VknOAfwS+Bjy6qv4QeDzwnDmu72jgkcDOwArgbXOcf1pVdUxV7VpVuy5ZsmRVLVaSJGmtN84Vws2BZ1fVFaOJVfXzJL8zl5VV1bUTw0neB3y6f7wa2G5k0m17GjOkS5IkaRUY5x7CI4DtkhwCkGRJkh36uEvmsrIkW418fBYw8QTyycALkjyoL3tH4JvAWcCOSXZIsgHtwZOT57JOSZIkzWzWK4RJjgB2BX4F+HfaS6lPBJ44y3wfBvYAtkhyFXAEsEeSnYEClgMvB6iqi5KcBFxMu0fxlVX1s76cw4DPAusCx1bVRXPeSkmSJE1rnC7jZwGPA84FqKofJJn1p+uq6oVTJL9/hunfDLx5ivRTgVPHyKckSZIegHHeQ/jT/lvGBZBkw/nNkiRJkhbSOAHhSUn+Ddg0yR8Anwf+3/xmS5IkSQtlnC7jtwFPB26l3Uf4euDL85kpSZIkLZxxAsL3V9XvAacBJNmIdk/f0+YzY5IkSVoY43QZX53kPQBJNgM+R3vKWJIkSWuBcd5D+LfA7UneSwsG31ZV/z7vOZMkSdKCmLbLOMmzRz6eCfwt7WXRleTZVfXx+c6cJEmS5t9M9xDuP+nz/9JeSr0/7RU0BoQDsvTwUxZlvcuP2m9R1itJ0pBMGxBW1SELmRFJkiQtjnEeKpEkSdJazIBQkiRp4AwIJUmSBm7WF1MneRDwHGDp6PRV9cb5y5YkSZIWyji/VPJJ4BbgHOAn85sdSZIkLbRxAsJtq2rvec+JJEmSFsU49xB+Pcmj5z0nkiRJWhTjXCF8EvDSJJfTuowDVFU9Zl5zJkmSpAUxTkC4z7znQpIkSYtm1oCwqq5YiIxIkiRpcfgeQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIGbt4AwybFJrkty4Uja5klOS3Jp/79ZT0+SdyZZluT8JLuMzHNwn/7SJAfPV34lSZKGaj6vEB4H7D0p7XDgC1W1I/CF/hlgH2DH/ncocDS0ABI4AngCsBtwxEQQKUmSpFVj3gLCqsAkl04AAA1hSURBVPoycNOk5AOA4/vw8cAzR9JPqOYMYNMkWwF7AadV1U1VdTNwGvcPMiVJkrQSFvoewi2rakUfvgbYsg9vA1w5Mt1VPW26dEmSJK0ii/ZQSVUVUKtqeUkOTXJ2krOvv/76VbVYSZKktd5CB4TX9q5g+v/revrVwHYj023b06ZLv5+qOqaqdq2qXZcsWbLKMy5JkrS2WuiA8GRg4knhg4FPjqQf1J823h24pXctfxbYM8lm/WGSPXuaJEmSVpH15mvBST4M7AFskeQq2tPCRwEnJXkZcAVwYJ/8VGBfYBlwB3AIQFXdlORNwFl9ujdW1eQHVSRJkrQS5i0grKoXTjPqaVNMW8Arp1nOscCxqzBrkiRJGuEvlUiSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sCtt9gZkLT4lh5+yqKsd/lR+y3KeiVJ9+UVQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRq4RQkIkyxPckGS85Kc3dM2T3Jakkv7/816epK8M8myJOcn2WUx8ixJkrS2WswrhL9VVTtX1a798+HAF6pqR+AL/TPAPsCO/e9Q4OgFz6kkSdJabHXqMj4AOL4PHw88cyT9hGrOADZNstViZFCSJGlttFgBYQGfS3JOkkN72pZVtaIPXwNs2Ye3Aa4cmfeqnnYfSQ5NcnaSs6+//vr5yrckSdJaZ71FWu+TqurqJL8InJbk26Mjq6qS1FwWWFXHAMcA7LrrrnOaV5IkacgW5QphVV3d/18HfALYDbh2oiu4/7+uT341sN3I7Nv2NEmSJK0CCx4QJtkwycYTw8CewIXAycDBfbKDgU/24ZOBg/rTxrsDt4x0LUuSJGklLUaX8ZbAJ5JMrP9DVfXfSc4CTkryMuAK4MA+/anAvsAy4A7gkIXPsiRpLpYefsqirHf5UfstynqlNd2CB4RVdRnw2CnSbwSeNkV6Aa9cgKxJkiQN0ur02hlJkiQtAgNCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBW6yfrpOkRXtXnSTpvrxCKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHC+h1CS1mK+61HSOLxCKEmSNHAGhJIkSQNnl7FWa3Z3SZI0/7xCKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQPni6klSVpJi/US/eVH7bco69Xax4BQkiSt9gy655cBoSRpreHPXUoPjAHhGsgGT5IkrUo+VCJJkjRwBoSSJEkDZ5exJC0Ab/XQfFiMejWUhyyGxiuEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDt8YEhEn2TvKdJMuSHL7Y+ZEkSVpbrBEBYZJ1gXcD+wA7AS9MstPi5kqSJGntsKa8h3A3YFlVXQaQ5CPAAcDFi5orSZIGxndqrp3WiCuEwDbAlSOfr+ppkiRJWklryhXCWSU5FDi0f7w9yXcWYLVbADcswHrWdJbT+Cyr8VhO47OsxmdZjWdQ5ZS3rNTs45bVw1dqLavAmhIQXg1sN/J52552j6o6BjhmITOV5Oyq2nUh17kmspzGZ1mNx3Ian2U1PstqPJbT+NakslpTuozPAnZMskOSDYAXACcvcp4kSZLWCmvEFcKqujvJYcBngXWBY6vqokXOliRJ0lphjQgIAarqVODUxc7HJAvaRb0Gs5zGZ1mNx3Ian2U1PstqPJbT+NaYskpVLXYeJEmStIjWlHsIJUmSNE8MCB8Af0bvXkm2S/LFJBcnuSjJq3r6kUmuTnJe/9t3ZJ7X9rL7TpK9Fi/3Cy/J8iQX9DI5u6dtnuS0JJf2/5v19CR5Zy+r85Pssri5XzhJfmWk7pyX5NYkr7ZeQZJjk1yX5MKRtDnXoSQH9+kvTXLwYmzLfJumrN6a5Nu9PD6RZNOevjTJnSN1670j8zy+H7fLenlmMbZnPk1TVnM+3tb28+M05fTRkTJanuS8nr5m1amq8m8Of7SHWr4HPALYAPgWsNNi52sRy2MrYJc+vDHwXdrPCx4J/PkU0+/Uy+xBwA69LNdd7O1YwPJaDmwxKe0fgcP78OHAW/rwvsBngAC7A2cudv4XqczWBa6hvadr8PUKeAqwC3DhA61DwObAZf3/Zn14s8XetgUqqz2B9frwW0bKaunodJOW881efunluc9ib9sCldWcjrchnB+nKqdJ498GvH5NrFNeIZy7e35Gr6p+Ckz8jN4gVdWKqjq3D98GXMLMvyJzAPCRqvpJVV0OLKOV6ZAdABzfh48HnjmSfkI1ZwCbJtlqMTK4yJ4GfK+qrphhmsHUq6r6MnDTpOS51qG9gNOq6qaquhk4Ddh7/nO/sKYqq6r6XFXd3T+eQXuv7bR6eW1SVWdUO5OfwL3lu9aYpl5NZ7rjba0/P85UTv0q34HAh2daxupapwwI586f0ZtGkqXA44Aze9JhvVvm2IkuLCy/Aj6X5Jy0X9cB2LKqVvTha4At+/DQy2rCC7hvA2u9ur+51qGhl9eE36NdnZmwQ5L/TXJ6kif3tG1o5TNhaGU1l+Nt6PXqycC1VXXpSNoaU6cMCLVKJNkI+E/g1VV1K3A08EhgZ2AF7TK64ElVtQuwD/DKJE8ZHdm/Lfrof5f2IvpnAB/rSdarWViHxpPkdcDdwAd70gpg+6p6HPBnwIeSbLJY+VtNeLzNzQu575fXNapOGRDO3aw/ozc0SdanBYMfrKqPA1TVtVX1s6r6OfA+7u2+G3T5VdXV/f91wCdo5XLtRFdw/39dn3zQZdXtA5xbVdeC9WoGc61Dgy6vJC8Ffgd4UQ+g6d2fN/bhc2j3wv0yrVxGu5UHU1YP4HgbbL1Ksh7wbOCjE2lrWp0yIJw7f0ZvRL9n4v3AJVX19pH00XvdngVMPJF1MvCCJA9KsgOwI+3m2rVekg2TbDwxTLu5/UJamUw85Xkw8Mk+fDJwUH9SdHfglpFuwaG4zzdu69W05lqHPgvsmWSz3g24Z09b6yXZG/hL4BlVdcdI+pIk6/bhR9Dq0GW9vG5Nsntv7w7i3vJdqz2A423I58enA9+uqnu6gte4OrXYT7WsiX+0J/e+S4v2X7fY+VnksngSrXvqfOC8/rcv8AHggp5+MrDVyDyv62X3HVaDJ6sWsKweQXvq7lvARRN1B3gY8AXgUuDzwOY9PcC7e1ldAOy62NuwwOW1IXAj8NCRtMHXK1qAvAK4i3bv0cseSB2i3T+3rP8dstjbtYBltYx2n9tEe/XePu1z+nF5HnAusP/IcnalBUPfA95F/1GHtelvmrKa8/G2tp8fpyqnnn4c8IpJ065RdcpfKpEkSRo4u4wlSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFDSWi/J7bOM3zTJHy1APt6Y5OmzTLNHkt+c77xI0igDQkmCTYF5Dwir6vVV9flZJtsDMCCUtKAMCCWtMZIsTXJJkvcluSjJ55I8eIrpdkjyjSQXJPm7kfSNknwhybl93AF91FHAI5Ocl+StM0w3eT23J3lHz8sXkizp6TsnOSPJ+Uk+0X8NhCTHJXluH16e5A0j6/jVJEuBVwB/2vPy5CTPS3Jhkm8l+fKqLE9JmmBAKGlNsyPw7qp6FPBD2q8BTPYvwNFV9WjarwpM+DHwrKraBfgt4G39p6MOB75XVTtX1V/MMN1kGwJn97ycDhzR008A/qqqHkP7pYcjppgX4Ia+jqOBP6+q5cB7gXf0vHwFeD2wV1U9FnjGrKUjSQ+AAaGkNc3lVXVeHz4HWDrFNE/k3t9A/sBIeoC/T3I+7SfetgG2nGL+caf7Off+mP2JwJOSPBTYtKpO7+nHA0+ZZls+Pst2AHwNOC7JHwDrTjONJK2U9RY7A5I0Rz8ZGf4ZcL8u426q3+V8EbAEeHxV3ZVkOfALKzHdOOucycS2/Ixp2uOqekWSJwD7AeckeXxV3TjH9UjSjLxCKGlt9DXgBX34RSPpDwWu60HebwEP7+m3ARuPMd1k6wDP7cO/C3y1qm4Bbk7y5J7+Elp38rjuk5ckj6yqM6vq9cD1wHZzWJYkjcUrhJLWRq8CPpTkr4BPjqR/EPhUkguAs4FvA1TVjUm+luRC4DPAW6aabgo/AnZL8jfAdcDze/rBwHuTPAS4DDhkDnn/FPAf/UGWP6Y9YLIjrRv7C8C35rAsSRpLqubawyFJgvaUcVVttNj5kKSVZZexJEnSwHmFUJIkaeC8QihJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQP3/wOHXATG1obzCgAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_data_points_hist(d_real)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First we generate a dictionnary with cumulative probability based on frequency of delays, for each keys in our reference dictionnary."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def cumul_distri_probas_dict(dico):\n",
" list_tot_points = []\n",
" for key in dico:\n",
" distrib = dico[key]\n",
"\n",
" # get total number of elements \n",
" N = np.sum(distrib)\n",
"\n",
" # make cumulative distribution probabilities\n",
" cdf_distrib = np.empty((len(distrib)), dtype=float)\n",
" save_x = 0\n",
" for x in range(len(distrib)):\n",
" cdf_distrib[x] = float(distrib[x])/float(N) + float(save_x)/float(N)\n",
" save_x += distrib[x]\n",
"\n",
" dico[key] = cdf_distrib\n",
" return dico"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('1286.TA.26-32-j19-1.12.H__8591182',\n",
" array([0. , 0.64333333, 0.81333333, 0.90333333, 0.95555556,\n",
" 0.96888889, 0.98444444, 0.99611111, 0.99777778, 0.99888889,\n",
" 0.99888889, 0.99944444, 0.99944444, 0.99944444, 0.99944444,\n",
" 0.99944444, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. ])),\n",
" ('1286.TA.26-32-j19-1.12.H__8591184',\n",
" array([5.56483027e-04, 4.24596550e-01, 7.31775181e-01, 8.94268225e-01,\n",
" 9.59933222e-01, 9.86644407e-01, 9.93878687e-01, 9.98330551e-01,\n",
" 9.98330551e-01, 9.98887034e-01, 9.99443517e-01, 9.99443517e-01,\n",
" 9.99443517e-01, 9.99443517e-01, 9.99443517e-01, 9.99443517e-01,\n",
" 9.99443517e-01, 9.99443517e-01, 9.99443517e-01, 9.99443517e-01,\n",
" 9.99443517e-01, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,\n",
" 1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,\n",
" 1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00])),\n",
" ('1286.TA.26-32-j19-1.12.H__8591195',\n",
" array([0. , 0.60166667, 0.84833333, 0.92777778, 0.96333333,\n",
" 0.98277778, 0.99166667, 0.99666667, 0.99833333, 0.99888889,\n",
" 0.99888889, 0.99888889, 0.99888889, 0.99888889, 0.99888889,\n",
" 0.99944444, 0.99944444, 0.99944444, 0.99944444, 0.99944444,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. ]))]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_all_cdp = cumul_distri_probas_dict(d_all)\n",
"take(3, d_all_cdp.items())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('10.TA.1-11-B-j19-1.1.R__8590314',\n",
" array([0. , 0.25 , 0.5 , 0.625, 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ])),\n",
" ('10.TA.1-11-B-j19-1.1.R__8590317',\n",
" array([0. , 0.3, 0.5, 0.7, 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. , 1. ])),\n",
" ('10.TA.1-11-B-j19-1.1.R__8594304',\n",
" array([0. , 0. , 0.5, 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. , 1. ]))]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_real_cdp = cumul_distri_probas_dict(d_real)\n",
"take(3, d_real_cdp.items())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# write dictionnary \n",
"with gzip.open(\"../data/distributions_cumulative_real.pkl.gz\", \"wb\") as output_file:\n",
" pickle.dump(d_real_cdp, output_file)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'd' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# write dictionnary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mgzip\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"../data/distributions_cumulative.pickle\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"wb\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0moutput_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'd' is not defined"
]
}
],
"source": [
"# write dictionnary \n",
"with gzip.open(\"../data/distributions_cumulative.pickle\", \"wb\") as output_file:\n",
" pickle.dump(d, output_file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Construct recovery tables \n",
"\n",
"First approach is to simple sum up similar distribution to get a new distribution we can use. For that, we need to have transport type (`route_desc`), `time` (rounded to hour) and `stop_id` which are valid. We then make all combination of these tree parameters and get the associate distributions"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_int \n",
" trip_int \n",
" stop_int \n",
" stop_sequence \n",
" arrival_time \n",
" departure_time \n",
" route_id \n",
" trip_id \n",
" stop_id \n",
" route_desc \n",
" stop_id_raw \n",
" sequence_shift_1 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" NaT \n",
" 2020-05-21 07:18:00 \n",
" 30-57-Y-j19-1 \n",
" 4.TA.30-57-Y-j19-1.1.H \n",
" 8502208 \n",
" Bus \n",
" 8502208 \n",
" 2 \n",
" \n",
" \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 2020-05-21 07:23:00 \n",
" 2020-05-21 07:23:00 \n",
" 30-57-Y-j19-1 \n",
" 4.TA.30-57-Y-j19-1.1.H \n",
" 8502209 \n",
" Bus \n",
" 8502209 \n",
" 3 \n",
" \n",
" \n",
" 2 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 3 \n",
" 2020-05-21 07:29:00 \n",
" NaT \n",
" 30-57-Y-j19-1 \n",
" 4.TA.30-57-Y-j19-1.1.H \n",
" 8503202 \n",
" Bus \n",
" 8503202 \n",
" 1 \n",
" \n",
" \n",
" 3 \n",
" 0 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" NaT \n",
" 2020-05-21 07:48:00 \n",
" 30-57-Y-j19-1 \n",
" 5.TA.30-57-Y-j19-1.1.H \n",
" 8502208 \n",
" Bus \n",
" 8502208 \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2020-05-21 07:53:00 \n",
" 2020-05-21 07:53:00 \n",
" 30-57-Y-j19-1 \n",
" 5.TA.30-57-Y-j19-1.1.H \n",
" 8502209 \n",
" Bus \n",
" 8502209 \n",
" 3 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_int trip_int stop_int stop_sequence arrival_time \\\n",
"0 0 0 0 1 NaT \n",
"1 0 0 1 2 2020-05-21 07:23:00 \n",
"2 0 0 2 3 2020-05-21 07:29:00 \n",
"3 0 1 0 1 NaT \n",
"4 0 1 1 2 2020-05-21 07:53:00 \n",
"\n",
" departure_time route_id trip_id stop_id \\\n",
"0 2020-05-21 07:18:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502208 \n",
"1 2020-05-21 07:23:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502209 \n",
"2 NaT 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8503202 \n",
"3 2020-05-21 07:48:00 30-57-Y-j19-1 5.TA.30-57-Y-j19-1.1.H 8502208 \n",
"4 2020-05-21 07:53:00 30-57-Y-j19-1 5.TA.30-57-Y-j19-1.1.H 8502209 \n",
"\n",
" route_desc stop_id_raw sequence_shift_1 \n",
"0 Bus 8502208 2 \n",
"1 Bus 8502209 3 \n",
"2 Bus 8503202 1 \n",
"3 Bus 8502208 2 \n",
"4 Bus 8502209 3 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"../data/stop_times_df.pkl\", \"rb\") as input_file:\n",
" stoptimes = pickle.load(input_file)\n",
" \n",
"stoptimes.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0%, 4.07%, 8.14%, 12.21%, 16.28%, 20.35%, 24.42%, 28.49%, 32.55%, 36.62%, 40.69%, 44.76%, 48.83%, 52.9%, 56.97%, 61.04%, 65.11%, 69.18%, 73.25%, 77.32%, 81.39%, 85.46%, 89.53%, 93.6%, 97.66%, "
]
}
],
"source": [
"# Set same stoptimes index as distribution dict \n",
"stoptimes['stop_id'] = stoptimes['stop_id'].astype(str).str[0:7]\n",
"stoptimes['key'] = stoptimes['trip_id'] + '__' + stoptimes['stop_id']\n",
"stoptimes = stoptimes.set_index('key')\n",
"\n",
"stoptimes = stoptimes[['trip_id','stop_id', 'route_desc', 'arrival_time', 'departure_time']]\n",
"\n",
"list_hours = []\n",
"size_stop_times = stoptimes.shape[0]\n",
"for x in range(size_stop_times):\n",
" if (x % 10000) == 0 :\n",
" print('{}%'.format(round(100*x/size_stop_times,2)), end = ', ')\n",
" \n",
" arr_time_hour = pd.to_datetime(stoptimes.iloc[x,:]['arrival_time']).hour\n",
" if math.isnan(arr_time_hour): # if arrival is NaT, use departure time\n",
" arr_time_hour = pd.to_datetime(stoptimes.iloc[x,:]['departure_time']).hour\n",
" list_hours.append(int(arr_time_hour))\n",
" \n",
"stoptimes['hour'] = list_hours\n",
"stoptimes = stoptimes.drop(columns=['trip_id', 'arrival_time', 'departure_time'])\n",
"\n",
"# Write this pickle to avoid re-running this above code all the time\n",
"with gzip.open(\"../data/stop_times_wHour.pkl\", \"wb\") as output_file:\n",
" pickle.dump(stoptimes, output_file) \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(17321, 32)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 5 \n",
" 6 \n",
" 7 \n",
" 8 \n",
" 9 \n",
" ... \n",
" 22 \n",
" 23 \n",
" 24 \n",
" 25 \n",
" 26 \n",
" 27 \n",
" 28 \n",
" 29 \n",
" 30 \n",
" 31 \n",
" \n",
" \n",
" stop_id \n",
" hour \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 8500926 \n",
" 8.0 \n",
" Bus \n",
" 0 \n",
" 2 \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" ... \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" \n",
" \n",
" 9.0 \n",
" Bus \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 3 \n",
" \n",
" \n",
" 10.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 11.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
4 rows × 32 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 22 23 \\\n",
"stop_id hour route_desc ... \n",
"8500926 8.0 Bus 0 2 3 3 4 4 4 4 4 4 ... 4 4 \n",
" 9.0 Bus 0 1 2 2 2 2 2 2 2 2 ... 2 2 \n",
" 10.0 Bus 0 0 1 1 1 1 1 2 2 2 ... 2 2 \n",
" 11.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n",
"\n",
" 24 25 26 27 28 29 30 31 \n",
"stop_id hour route_desc \n",
"8500926 8.0 Bus 4 4 4 4 4 4 4 4 \n",
" 9.0 Bus 2 2 2 2 2 2 2 3 \n",
" 10.0 Bus 2 2 2 2 2 2 2 2 \n",
" 11.0 Bus 2 2 2 2 2 2 2 2 \n",
"\n",
"[4 rows x 32 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with gzip.open(\"../data/stop_times_wHour.pkl\", \"rb\") as input_file:\n",
" stoptimes = pickle.load(input_file)\n",
" \n",
"distrib_df = pd.DataFrame(d_all).transpose()\n",
"distrib_to_rm = np.array(distrib_df.iloc[:,range(11)].sum(axis=1) == 11) # missing trips\n",
"distrib_df = distrib_df.iloc[~distrib_to_rm,:]\n",
"\n",
"stoptimes_df = pd.DataFrame(stoptimes)\n",
"\n",
"recovery_df = distrib_df.join(stoptimes_df)\n",
"list_bins = [x for x in range(32)]\n",
"\n",
"recovery_df = recovery_df.groupby(['stop_id','hour', 'route_desc'])[list_bins].apply(lambda x : x.astype(float).sum())\n",
"recovery_df = recovery_df.astype('int')\n",
"print(recovery_df.shape)\n",
"recovery_df.head(4)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def plot_df_missing(df, max_bin = 10000):\n",
" tot_per_key = np.array(df.sum(axis=1)).astype('int')\n",
" binwidth = 100\n",
" n_keys_less_than_binwidth = np.sum(np.array(tot_per_key < binwidth))\n",
" perc_key_to_recover = round(100 * ( n_keys_less_than_binwidth / len(tot_per_key) ), 2)\n",
" plt.figure(figsize = (10,5))\n",
" plt.hist(tot_per_key, bins = range(min(tot_per_key), max_bin + binwidth, binwidth))\n",
" plt.title(\"Total number of data points per stop_id / hour key. N keys with less than {0} points: {1} ({2}%)\"\\\n",
" .format(binwidth, n_keys_less_than_binwidth, perc_key_to_recover))\n",
" plt.xlabel('n data points')\n",
" plt.ylabel('n keys')\n",
" return plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAn8AAAFNCAYAAABxFAnAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd5xtZX3v8c9XmgWlCDFSwkFFE4xXokQxlqAkgqJirEQENOZyLUmMN4mCURFLgjHW2KKigg2RWFDxKkEFKwoWqsihhSqHXkSl/O4fzzOwz5wpe2DmzJyzPu/Xa16z97PWXutZ/buetdbeqSokSZI0DHdZ7ApIkiRp9TH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKADCb8JakkD1gC9fhWkr9epHHfLcmXklyT5LNj9L9zkgtXR93ujCR7Jfn6YtdjTZfk1Uk+PEP385L82R0c9mKu9y9I8p3FGPc4knwsyZsWux4zmW0bm+u+YjHXhz7+JT/PV6ckH0jy2sWux2JKskGS05Pcd7Hrcmck+WGSB8/W36KHvyTXj/zdmuTGkfd7TfOZNSKULEHPAu4D3Luqnj2fA17MA2xVfbKqnjhOv0s9CEwlybJ+8rLuQo6nqv6lqu7UATnJmUkeOF91WtP09auSvHJS+YVJdl6kat1pk7expXIyPY7F3uaTvDHJKUluTvL6Kbo/L8n5SW5I8oUkm4502zTJ53u385M8byHqWFUvrqo3jtPvfAbnJK9Ick6Sa5NcnOQdo/u52ebdSH8fmbxO9v3m0UmuSnJpkvfMsg/dDzi+qi7pn398km/2xpLzphjnDkm+3btfOFN4TvKHSb6W5PIkq3y58qQcdH2SW5L8R++2dZIfJLkyydsmfe6rSXacNLh/B94ww3QCSyD8VdWGE3/A/wBPHSn75GLXb6lKM9fltw3wi6q6eSHqpPEtdJBbDEnuD6xTVb9YxDoshfl6JfDKJPdc7IpoSVgOvBL4yuQOvYXmP4G9aSfmvwLeN9LLe4Hf9m57Ae8fp1VnDXIU8LCquhfwh8BDgb8b6T7tvJuQ5DHA/afo9D7gMuC+wA7AnwIvnaEuLwY+PvL+BuAjwD9N0/+ngOOBTSeGneRp0/R7E3AE8KKpOk7KQb8L3AhMXJ07ADgU2BZ4+kTYS/Jc4NyqOnHS4I4CHp/kd6ebUFgC4W86vQn2nf1s4OL+eoMk9wC+CmwxkpK3SPKIJN9PcnWSS3rKX3/McX2rn2F8N8l1Sb6eZLPebZVWxoxc/kry+iSfTfKJ/tlTkjwwyQFJLktyQZLJrVL3T2uavTbJFyed6e2U5Ht9On6WkdaCXs83J/kubSdxvymm5Q96f1cnOW1iZUxyEPA64Ll9nq2yEqZdFv5YP1M6HfjjSd33T3J2n87Tk/zFxDiBDwCP6sO+upfvnuQnfTovmOXMbed+9vTqfnZ0XkZafpNslOSwJCvSzoBfMxF+M+nMPu0M8MVJzurz4b09LE9Xzyf36bkuyUVJ/nGaOr6gryPvSTvb+3mSXSbV8ZC+/l2U5E1J1pn02XckuQJYZV70dfjEPr9+meTtvdPx/f/Vvd6PSnKXPg/O7+vZYUk26sOZaCncL23buWS6aZo0/tcn+cTI+7378K9I8s+zfR7YHTh6hu7bZIptrI/raX19vbqvv38w0m3yGf1tLQ8j682rklwKfHSM6Xxrku/05TXlMkuyftqZ9kNGPvc7SX6VZPNZRnEG8H3g/85Wlynqds+01oZ393X295Mc0+tyZpLn9P7+uK8j64x89hlJftZfT7cuTR7fcUme2V8/us/r3fv7XZL8tL++bRtLMrE+/qyvj88dGd4/9PXxkiQvnMN0/1WSM9L2PV9Lsk0vT99mLuvTckqSP+zdZt1uM802322S5Cv98yeknbxMfO5dafusa5OclOSxI91en+SIvs1d19fbya0vt6mqQ6vqq8B1U3TeC/hSVR1fVdcDrwWe0deDewDPBF5bVddX1XdoB/a9p5mHr09yZJLP9Hr9OMlDR+dFpjg29G5TbVOrLMsk+/U6v7LPzy/18lf1ZXBdX093YQxVdXZVTSyTALcCDxjpPtO8mzjZ+w/gb6fovC1wRFX9uqouBf4fMGVwTvJ7tOPpCSPj/mFVfRw4Z5rqLwM+WVW3VNXZwHemG35VnVlVhwCnTTOsUc+khdZvj0zHN6rqGuBHwP2S3AvYH3j1FOP6NXASsOtMI1my4Q/4Z2AnWmJ/KPAI4DVVdQPwJODikbR8MXAL8ApgM+BRwC7MnPInex7wQuB3gPWBWQ+WI55KO2PYBPgJ8DXavN2S1vz6n5P63wf4K9oZyc3AuwGSbEk7w3kT7WziH4H/mnSw2ZvWPH1P4PzRgSZZD/gS8PU+HX8LfDLJg6rqQOBfgM/0eXbIFNNxIO0M6v60FWffSd3PBh4LbAQcBHwiyX2r6gzaWdP3+7A37v3f0Kd1Y1oweEmSp081A7vfpS2/Lfu4P5jkQb3bf/Tx3o92lrUPbXlN5ym08Pq/gOcAu85Qz0OA/1NV96SdfX5jhuE+ss+HzWjz63O5Pbx/jLY8HwD8EfBE4K8nffYc2ln8m6cY9ruAd/Wz4PvTzhQBHtf/b9zr/X3gBf3v8bR5siHwnknDezywXa/HqzKH+/WSbA+8n7a+bQHcG9hqlo89mRnO0JlmG0u7TPxp4O+BzWkB8ksZ8+SNtt5sSmvZ3m+6ntIC84do68QT+870Y0yxzKrqt8DhwPNHBvGXwLFVtWKMOr0W+PuRdWNWSe4NHAt8t6r+Drg7cAytheF3gD2B9yXZvqp+BFzR6zthb+Cw/nq6dWmy44Cd++s/pa2fjxt5f9zkD1TVRPeH9vXxM/3979K20S1pLRzvTbLJGNO9B+0g9gza8v82bX2gT9/jgAf2YT+nTzeMsd3OsM1Dm58H0fbby1l5m/wR7dizKW3+fzbJXUe6P422fmxMC2STt71xPRj42Uh9z6a19D2w/908qSX9Z0wTMLo9aC1GE/X+QpL1Zjo2TDOcKZdlVX0Q+CTwb31+PrUP42+AP+7LYlfgPGitcpMC9yrSLntfC1xOO9ZPPl7O5BW0S7UnT9HtncCeSe7ej61PogXAqTwEOGeOV8XeCezT5++DaLnjv+fw+ensCxxWt//27qnAnyfZGHg4LUC+EXjnSHCe7AzavJzWUg5/ewFvqKrL+s72IKY54wGoqpOq6gdVdXNVnUdbgf50DuP7aFX9oqpupO0od5jDZ79dVV/rK85naTuwg6vqJtoOYllfcBM+XlWn9iD7WuA5aWfwzweOrqqjq+rWqjoGOJF2UJ3wsao6rU/nTZPqsRMtBBxcVb+tqm8AX6YdtMbxHODNVXVlVV1AD6UTquqzVXVxr9tngLNooXxKVfWtqjql938ybYc+2zJ5bVX9pqqOowWJiXmzJ3BAVV3Xl+/bmGF9oM2Dq6vqf4BvMvPyvAnYPsm9quqqqvrxDP1eRtvoburz4Exg9yT3oS2nv6+qG6rqMuAdvd4TLq6q/+jL7sZp6vGAJJv1M/0fzFCPvYC3V9U5vcXgANqObvSy50G9LqfQWsTGXQ+g3R/65d4i8RvaenrrdD0nuTstbH9rhmFOt409F/hKVR3T1+l/B+4G/MmYdb0VOLCvN1PNV4D1aOvfprRbS341xjI7FPjLJOnv92bly0LTqqqf0oLbq8achi1oQeuzVfWaXvYU4Lyq+mhfZ34C/Bcwcb/uofRw2kPmrrQDPoy/Lh3H7dvk44B/HXk/ZfibwU20ffZNVXU0cD0wXbgY9WLgX6vqjL4P/Rdgh976dxPtRPf3gfR+LhkZ37jb7VQ+X61152ZaoLltH1FVn6iqK/p8fxuwwaRp+U7fT99CWydmPNDOYEPgmkll19CmeUPg2mm6Teekqjqyb0dvB+5KOy7M9dgwl2V5C23+bJ9kvao6r4dYquo7kwL3KqrqU/0k5YG0VtpfztT/hCRbA/+HdkVrKsfTgvK1wIW0Y+kXpul3Y6ZpXZzBl2n7yRuBnwOH9JOyO6yv839K27Yn/Cut0eU42qXs9WknsF9K8qkkxyf5m0mDuo42TdNayuFvC1Zu2Tq/l00p7VLrl9Nu7LyWtgPZbLr+p3DpyOtf0TaUcY2urDcCl/edwsR7Jg3vgpHX59MOTJvRWi6e3Zvlr+5nTI+htRBO9dnJtgAuqKrRg/T5tLO3cWwxRd1uk2SfJD8dqdsfMsM8TvLItEtYK5JcQ9vJz7RMruqBeHT8W/TPrMeq68NM0zWX5flMWgg4P+0y2KNm6PeikTOy0Tpu0+t4ycj8+U/aWfaEmZYdtDPsBwI/T/KjJE+Zod+pto91aa2KU41vxu1nmuHf9vm+XK6Yvnd2Ab7Xg+J0plsmK01LX38vYPz1dkW1Sx0zeQCtVeSgaq16MMsyq6oTej13TvL7fRhHjVknaAell/SQOZvdaYH3AyNl2wCPnLQ/2IvWKgPwCeCp/fLgc2gnoRPBaNx16fvAA3sdd6C1HG6ddkn+Edx+y8E4rpjUcjLufnQb4F0j03gl7RLglj2kvId279tlST7YL3nB3LbbqUy7j0jyj2mXoa/pddqIlfddkz9719yx+02vB+41qexetIP3TN2mM7rN3koLPVsw92PD2MuyqpbTWu1fT1tGhyeZy75mYjhn0Vq13jdbv907aQF1cngm7Zag/wd8DrgHbdltArxlmmFdxcyhevLwN+3DfwMtYG8N7JpkLlcbp7I37cTi3ImC3hjz3Kp6KK1Ff+Iy9/60VsE/A16ckVtl+rTM2OK6lMPfxbSdwoTf62UAqzwtQ7tE9XNgu34W8WraDuTOuoF2+QWA3go12z0/s9l65PXv0c6yLqdtuB+vqo1H/u5RVQeP9D/VtE+4mLbjHl2uvwdcNGa9LpmibsBtZyQfojXv37ufzZ3K7fN4qnp9inaw3LqqNqId2GZaJpv0A9no+C+mzZubWHV9GHe6Rq1Sz6r6UVXtQTvof4HpL5EBbDnSEjRaxwuA3wCbjSy7e1XV6CWamZYdVXVWVf1lr8dbgCP7/Jjqc1NtHzez8onI5GV5MeNbaV3oLXv3nqH/JzPz/X4zWWla+vzdmtuX768Y2Qa5PfxMmHG+dmfQLjl/deRS1zjLbKJ1bW/gyDFC5u2Vqvo57eAzzv2SH6IdTI4e2QYuAI6btD/YsKpe0od/ES28PYNJrZIzrEuT6/gr2v1BLwdO7cH4e7T7Fc+uqsvHnd474QLa5dvR6bxbVX2v1/HdVfVwYHtaoP2nXj7udjvO+nGbtPv7XkkL1Jv0fd01zM/xZLLTGGk1THI/WivaL/rfukm2G+n/ocx839joNnsX2q0aF3Pnjw2jptqHfqqqHkPbjovpQ9Zs1mXqhzemsgvw1t7gMxHGv5/2RPSmtOl7T78icAXt6seTpxnWycC2cwjw9wNuqarDeuvwhbSrfNMNf1z7sHKr32T7AT+oqlNpl6pP7NvsKf39hD9g5HaCqSzl8Pdp4DVJNu9noa+jnelCO8DdO/0G9+6etObd6/tZ+kvmqR6/oJ3V7d7vm3gNbeO8M56fZPt+QH0D7aByC7efye+adtP5XdNuvp3tXqsJEy0Vr+z3IexMux/x8DE/fwRwQJJN+jhHb6KdCCErANJuAP7Dke6/BLbKyvdp3RO4sqp+neQRtHu+ZnNQ2s32j6Vd9vpsnzdHAG9OuxF6G9rB6RMzDWgaK9Wzj2uvJBv1SyXXMsPlTdqB5u/6/H02bSM7ure4fB14W5J7pd1fdv8kY996kOT5STbvZ+cTZ2230ub5raz8gM+ngVck2TbJhtx+P+fo2fpr0+53eTAt+HyG8R0JPCXtnp31aevpTPuLJzHz/X4zOYJ26XyXvo39Ay2Ufa93/ynwvL5N7Mbcbue4TVV9mnZS+N9J7j/mMvsE8Be0AHjYqkOd1UG0eT/jJZjub2i3EXwpyd1ol5UemPbgzXr9748nneEfRgsqD6EFTWDGdWkqx/VxT1zi/dak91P5JVM8cHYHfYC233kw3Pbg1LP76z9Ou4KwHu1E/NfArXPcbqfaN83knrQTqRW08PU6Vm2BG1tfbnelbT/r9v36xIM6n6Tt8x/bw/kbgM9Vu73lBtoyfUOSeyR5NK31eqZbDx6e9uDPurTWuN8AP+DOHxtGrbTskzwoyROSbEBbPjcy8z70Nkn+Osnv9Nfb025fOXak+0zz7oG0MLwDt1+yfyrtcv7lwLm0lvd102672pcW8lbRw9tyRm5j6vuDu9KuDqSPe2Id+kUve17v73dpt69MOfw0d6VdsqUPa4NJ/fwJrSV2yu/g7fPpZdz+sOC5tKd6NwR2pD+Y0sfzcNptJ9NayuHvTbRr9CfTUu2Pe9nEGfWngXPSLhVsQbt5/Hm0JvEPMbcD3bR6k/JLgQ/TzpJuoDWl3xkfp91ofimtyfjv+rguoG3cr6bteC6gneWOtZz6GcBTaQfiy2nN5/v0+TWOg2iXAs6lHRRHWxJOp91n933axv8Q4Lsjn/0G7Yz00iQTrQUvpe24rqOF95la1KDNj6toZ6mfBF48Uve/pc37c2hPVX2K9hj+XE1Vz72B89JuF3gx7dLadE6gPURxOe0G8Wf1s0poZ23rA6f36TiSlS/Zz2Y34LQk19Oa9/esqht768ybge/29X0n2rR/nHZZ7lzaTnfyE2/H0XZoxwL/XlVjfxF2VZ1G29F8itYKeBXTrPdpT19eX+3+yjmrqjNp4eo/aPP1qbT78iYuz768l01c9pzuvp1xxnUo7QD7jSTLmGWZ9W3yx7QTn28DpD0pOdM6Mjq+c2nLaZVWtyn6LdqZ/YXAF2mt3U+k3YN4MW37eAsrn3x+ntba8vm+nkyYcl2aZtTH0QLP8dO8n8rrgUP7+vic2aZtJlX1edp0Hd63wVNp+zBooetDtGVzPu3Wg7f2buNut1Nt8zP5Gq0V9hd9nL9m9ls2ZvIhWiD6S1or8I297hPb2Ytp+7vLaPN99NLhS2m3A1xGO+a9pH9mOl+khZCr+jie0e/bu7PHhlGH0O7vuzrJF2jr48F9uJfSTpAPgNaK2tfB6TwaOCXJDbQrB0ez8hOsM827y6rq0om/3v/lI+v5M2jbwQrafvAm2gMi05n4yp0Jj+vjO5rWingj7bhIVV3bh/8K2rz+KW29nXhi+vfSnoaeuHq2Tf/8xLK7kXaiN2pfevCfpn7/TrvMPTE//xV4Am3d/FLd/pUvTwW+Ve1B2Gll5duXpMXRz0Q/UVXjtnKudkleQHsS9DGLXZeZ9FBzLrBerYbvdEz7QuPNquqVs/a8BkryEdrDOq+ZtedFkORs2mXT+XjSUGuotK/SekBVPX+2frWq3hL3E2CXkXtn1zhJTgBe1C8NT2spfCGqpDXbebSvkVjr9CD9DNrXwCw5ad/RV8z89USSZlHtYbXtF7sed1ZVPXKc/pbyZV9J8yzt54Am/5TQ9UlW+bLQcVXVEdW+T22tkuSNtEs5bx19+m6pSPIt2oNuL6uVn+KUpBl52VeSJGlAbPmTJEkaEMOfJEnSgKyVD3xsttlmtWzZssWuhiRJ0qxOOumky6vqzv6AxNjWyvC3bNkyTjzxxNl7lCRJWmRJzp+9r/njZV9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQNbK3/ZdTMv2/8oqZecdvPsi1ESSJGlVtvxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQBY8/CVZJ8lPkny5v982yQlJlif5TJL1e/kG/f3y3n3ZyDAO6OVnJtl1oessSZK0tlodLX8vB84Yef8W4B1V9QDgKuBFvfxFwFW9/B29P5JsD+wJPBjYDXhfknVWQ70lSZLWOgsa/pJsBewOfLi/D/AE4Mjey6HA0/vrPfp7evddev97AIdX1W+q6lxgOfCIhay3JEnS2mqhW/7eCbwSuLW/vzdwdVXd3N9fCGzZX28JXADQu1/T+7+tfIrPSJIkaQ4WLPwleQpwWVWdtFDjmDS+/ZKcmOTEFStWrI5RSpIkrXEWsuXv0cDTkpwHHE673PsuYOMk6/Z+tgIu6q8vArYG6N03Aq4YLZ/iM7epqg9W1Y5VtePmm28+/1MjSZK0Fliw8FdVB1TVVlW1jPbAxjeqai/gm8Czem/7Al/sr4/q7+ndv1FV1cv37E8DbwtsB/xwoeotSZK0Nlt39l7m3auAw5O8CfgJcEgvPwT4eJLlwJW0wEhVnZbkCOB04GbgZVV1y+qvtiRJ0ppvtYS/qvoW8K3++hymeFq3qn4NPHuaz78ZePPC1VCSJGkY/IUPSZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUDWXewKrMmW7f+Vxa6CJEnSnNjyJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkakAULf0numuSHSX6W5LQkB/XybZOckGR5ks8kWb+Xb9DfL+/dl40M64BefmaSXReqzpIkSWu7hWz5+w3whKp6KLADsFuSnYC3AO+oqgcAVwEv6v2/CLiql7+j90eS7YE9gQcDuwHvS7LOAtZbkiRprbVg4a+a6/vb9fpfAU8AjuzlhwJP76/36O/p3XdJkl5+eFX9pqrOBZYDj1ioekuSJK3NFvSevyTrJPkpcBlwDHA2cHVV3dx7uRDYsr/eErgAoHe/Brj3aPkUn5EkSdIcLGj4q6pbqmoHYCtaa93vL9S4kuyX5MQkJ65YsWKhRiNJkrRGWy1P+1bV1cA3gUcBGyeZ+E3hrYCL+uuLgK0BeveNgCtGy6f4zOg4PlhVO1bVjptvvvmCTIckSdKabt3Ze7ljkmwO3FRVVye5G/DntIc4vgk8Czgc2Bf4Yv/IUf3993v3b1RVJTkK+FSStwNbANsBP1yoei+EZft/ZZWy8w7efRFqIkmShm7Bwh9wX+DQ/mTuXYAjqurLSU4HDk/yJuAnwCG9/0OAjydZDlxJe8KXqjotyRHA6cDNwMuq6pYFrLckSdJaa8HCX1WdDPzRFOXnMMXTulX1a+DZ0wzrzcCb57uOkiRJQ+MvfEiSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgMwa/pI8Osk9+uvnJ3l7km0WvmqSJEmab+O0/L0f+FWShwL/AJwNHLagtZIkSdKCGCf83VxVBewBvKeq3gvcc2GrJUmSpIWw7hj9XJfkAOD5wOOS3AVYb2GrJUmSpIUwTsvfc4HfAC+qqkuBrYC3LmitJEmStCDGafl7NvDRqroKoKr+B+/5kyRJWiON0/J3H+BHSY5IsluSLHSlJEmStDBmDX9V9RpgO+AQ4AXAWUn+Jcn9F7hukiRJmmdjfclzf9r30v53M7AJcGSSf1vAukmSJGmezXrPX5KXA/sAlwMfBv6pqm7qT/2eBbxyYasoSZKk+TLOAx+bAs+oqvNHC6vq1iRPWZhqSZIkaSGMc8/fgcDWSV4IkGTzJNv2bmcscP0kSZI0j8b5bd8DgVcBB/Si9YBPLGSlJEmStDDGeeDjL4CnATcAVNXF+PNukiRJa6Rxwt9v+9O+BZDkHgtbJUmSJC2UccLfEUn+E9g4yf8G/pv21K8kSZLWMOM87fs24M+Aa4EHAa8Djl/ISkmSJGlhjBP+DqmqvwKOAUiyIXA0sMtCVkySJEnzb5zLvhcleR9Akk2Ar+PTvpIkSWukcb7n77XA9Uk+QAt+b6uqjy54zSRJkjTvpr3sm+QZI29PAF4L/BCoJM+oqs8tdOUkSZI0v2a65++pk97/hPYFz0+lfe2L4U+SJGkNM234q6oXrs6KCJbt/5VVys47ePdFqIkkSVpbjfPAhyRJktYShj9JkqQBMfxJkiQNyKxf8pxkA+CZwLLR/qvqDQtXLUmSJC2EcX7h44vANcBJwG8WtjqSJElaSOOEv62qarcFr4kkSZIW3Dj3/H0vyUPmOuAkWyf5ZpLTk5yW5OW9fNMkxyQ5q//fpJcnybuTLE9ycpKHjQxr397/WUn2nWtdJEmS1IwT/h4DnJTkzB7KTkly8hifuxn4h6raHtgJeFmS7YH9gWOrajvg2P4e4EnAdv1vP+D90MIicCDwSOARwIETgVGSJElzM85l3yfdkQFX1SXAJf31dUnOALYE9gB27r0dCnwLeFUvP6yqCvhBko2T3Lf3e0xVXQmQ5BhgN+DTd6RekiRJQzZr+Kuq8+/sSJIsA/6I9hvB9+nBEOBS4D799ZbABSMfu7CXTVcuSZKkOVrw7/lLsiHwX8DfV9W1o916K1/N03j2S3JikhNXrFgxH4OUJEla6yxo+EuyHi34fbKqPteLf9kv59L/X9bLLwK2Hvn4Vr1suvKVVNUHq2rHqtpx8803n98JkSRJWkssWPhLEuAQ4IyqevtIp6OAiSd296V9j+BE+T79qd+dgGv65eGvAU9Mskl/0OOJvUySJElzNM4DH3fUo4G9gVOS/LSXvRo4GDgiyYuA84Hn9AhEDfIAAAznSURBVG5HA08GlgO/Al4IUFVXJnkj8KPe3xsmHv6QJEnS3CxY+Kuq7wCZpvMuU/RfwMumGdZHgI/MX+0kSZKGacEf+JAkSdLSYfiTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA3IuotdgaFatv9XFrsKkiRpgGz5kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAsWPhL8pEklyU5daRs0yTHJDmr/9+klyfJu5MsT3JykoeNfGbf3v9ZSfZdqPpKkiQNwUK2/H0M2G1S2f7AsVW1HXBsfw/wJGC7/rcf8H5oYRE4EHgk8AjgwInAKEmSpLlbsPBXVccDV04q3gM4tL8+FHj6SPlh1fwA2DjJfYFdgWOq6sqqugo4hlUDpSRJksa0uu/5u09VXdJfXwrcp7/eErhgpL8Le9l05ZIkSboDFu2Bj6oqoOZreEn2S3JikhNXrFgxX4OVJElaq6zu8PfLfjmX/v+yXn4RsPVIf1v1sunKV1FVH6yqHatqx80333zeKy5JkrQ2WN3h7yhg4ondfYEvjpTv05/63Qm4pl8e/hrwxCSb9Ac9ntjLJEmSdAesu1ADTvJpYGdgsyQX0p7aPRg4IsmLgPOB5/TejwaeDCwHfgW8EKCqrkzyRuBHvb83VNXkh0gkSZI0pgULf1X1l9N02mWKfgt42TTD+QjwkXmsmiRJ0mD5Cx+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgCzYb/tqaVq2/1dWKTvv4N0XoSaSJGkx2PInSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IP7CxxLnL3JIkqT5ZMufJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAHxgY81kA+BSJKkO8qWP0mSpAGx5W8tYWugJEkahy1/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgZk3cWugBbOsv2/sthVkCRJS4wtf5IkSQNiy5+mbCE87+DdF6EmkiRpodnyJ0mSNCC2/GlK090vaIugJElrNsOfFoSXkiVJWpq87CtJkjQgtvxpTmzRkyRpzWb4053m9wlKkrTmWGMu+ybZLcmZSZYn2X+x6yNJkrQmWiNa/pKsA7wX+HPgQuBHSY6qqtMXt2aaizvTQjjVpWUvQUuSNHdrRPgDHgEsr6pzAJIcDuwBGP4GYtzgOG5/hkRJ0lCtKeFvS+CCkfcXAo9cpLpoLbCYIXG+W0AlSZqLNSX8zSrJfsB+/e31Sc5cDaPdDLh8NYxH45vXZZK3zNeQ5sdSq88cuK0sPS6TpcdlsjStjuWyzQIPfyVrSvi7CNh65P1Wvew2VfVB4IOrs1JJTqyqHVfnODUzl8nS5HJZelwmS4/LZGlaG5fLmvK074+A7ZJsm2R9YE/gqEWukyRJ0hpnjWj5q6qbk/wN8DVgHeAjVXXaIldLkiRpjbNGhD+AqjoaOHqx6zHJar3MrLG4TJYml8vS4zJZelwmS9Nat1xSVYtdB0mSJK0ma8o9f5IkSZoHhr87wJ+aW32SbJ3km0lOT3Jakpf38k2THJPkrP5/k16eJO/uy+bkJA8bGda+vf+zkuy7WNO0tkiyTpKfJPlyf79tkhP6vP9MfziLJBv098t792Ujwzigl5+ZZNfFmZK1R5KNkxyZ5OdJzkjyKLeVxZfkFX3/dWqSTye5q9vL6pXkI0kuS3LqSNm8bRtJHp7klP6ZdyfJ6p3COaoq/+bwR3vg5GzgfsD6wM+A7Re7XmvrH3Bf4GH99T2BXwDbA/8G7N/L9wfe0l8/GfgqEGAn4IRevilwTv+/SX+9yWJP35r8B/xf4FPAl/v7I4A9++sPAC/pr18KfKC/3hP4TH+9fd9+NgC27dvVOos9XWvyH3Ao8Nf99frAxm4ri75MtgTOBe7W3x8BvMDtZbUvh8cBDwNOHSmbt20D+GHvN/2zT1rsaZ7pz5a/ubvtp+aq6rfAxE/NaQFU1SVV9eP++jrgDNrOdA/agY7+/+n99R7AYdX8ANg4yX2BXYFjqurKqroKOAbYbTVOylolyVbA7sCH+/sATwCO7L1MXiYTy+pIYJfe/x7A4VX1m6o6F1hO2750ByTZiHaAOwSgqn5bVVfjtrIUrAvcLcm6wN2BS3B7Wa2q6njgyknF87Jt9G73qqofVEuCh40Ma0ky/M3dVD81t+Ui1WVQ+uWPPwJOAO5TVZf0TpcC9+mvp1s+Lrf59U7glcCt/f29gaur6ub+fnT+3jbve/drev8uk/m1LbAC+Gi/HP/hJPfAbWVRVdVFwL8D/0MLfdcAJ+H2shTM17axZX89uXzJMvxpjZBkQ+C/gL+vqmtHu/UzLR9bX02SPAW4rKpOWuy6aCXr0i5rvb+q/gi4gXYp6zZuK6tfv49sD1o43wK4B7akLjlD2zYMf3M360/NaX4lWY8W/D5ZVZ/rxb/sTe30/5f18umWj8tt/jwaeFqS82i3PTwBeBft0sjEd4eOzt/b5n3vvhFwBS6T+XYhcGFVndDfH0kLg24ri+vPgHOrakVV3QR8jrYNub0svvnaNi7qryeXL1mGv7nzp+ZWo36vyyHAGVX19pFORwETT1rtC3xxpHyf/rTWTsA1vVn/a8ATk2zSz8Sf2Ms0R1V1QFVtVVXLaOv/N6pqL+CbwLN6b5OXycSyelbvv3r5nv3pxm2B7Wg3TesOqKpLgQuSPKgX7QKcjtvKYvsfYKckd+/7s4nl4vay+OZl2+jdrk2yU1/G+4wMa2la7CdO1sQ/2pNAv6A9bfXPi12ftfkPeAytKf5k4Kf978m0e2COBc4C/hvYtPcf4L192ZwC7DgyrL+i3SS9HHjhYk/b2vAH7MztT/vej3YwWg58Ftigl9+1v1/eu99v5PP/3JfVmSzxp+PWhD9gB+DEvr18gfZEotvK4i+Xg4CfA6cCH6c9sev2snqXwadp91zeRGslf9F8bhvAjn35ng28h/4jGkv1z1/4kCRJGhAv+0qSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JK3Vklw/S/eNk7x0NdTjDUn+bJZ+dk7yJwtdF0nDZviTNHQbAwse/qrqdVX137P0tjNg+JO0oAx/ktYISZYlOSPJh5KcluTrSe42RX/bJvl+klOSvGmkfMMkxyb5ce+2R+90MHD/JD9N8tYZ+ps8nuuTvKPX5dgkm/fyHZL8IMnJST7ffwmAJB9L8qz++rwkB42M4/eTLANeDLyi1+WxSZ6d5NQkP0ty/HzOT0nDZfiTtCbZDnhvVT0YuBp45hT9vAt4f1U9hPaN/hN+DfxFVT0MeDzwtv5TTPsDZ1fVDlX1TzP0N9k9gBN7XY4DDuzlhwGvqqr/Rft1gAOn+CzA5X0c7wf+sarOAz4AvKPX5dvA64Bdq+qhwNNmnTuSNAbDn6Q1yblV9dP++iRg2RT9PJr2U07QfkprQoB/SXIy7aectgTuM8Xnx+3vVuAz/fUngMck2QjYuKqO6+WHAo+bZlo+N8t0AHwX+FiS/w2sM00/kjQn6y52BSRpDn4z8voWYJXLvt1Uv1u5F7A58PCquinJebTfUb2j/Y0zzplMTMstTLMvrqoXJ3kksDtwUpKHV9UVcxyPJK3Elj9Ja5vvAnv213uNlG8EXNYD3eOBbXr5dcA9x+hvsrsAz+qvnwd8p6quAa5K8thevjftkvC4VqpLkvtX1QlV9TpgBbD1HIYlSVOy5U/S2ublwKeSvAr44kj5J4EvJTkFOBH4OUBVXZHku0lOBb4KvGWq/qZwA/CIJK8BLgOe28v3BT6Q5O7AOcAL51D3LwFH9odM/pb28Md2tEvRxwI/m8OwJGlKqZrrlQpJUpLrq2rDxa6HJM2Vl30lSZIGxJY/SZKkAbHlT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IP8fb/CXoziWI40AAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_df_missing(recovery_df)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 5 \n",
" 6 \n",
" 7 \n",
" 8 \n",
" 9 \n",
" ... \n",
" 22 \n",
" 23 \n",
" 24 \n",
" 25 \n",
" 26 \n",
" 27 \n",
" 28 \n",
" 29 \n",
" 30 \n",
" 31 \n",
" \n",
" \n",
" stop_id \n",
" hour \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 8500926 \n",
" 8.0 \n",
" Bus \n",
" 0 \n",
" 2 \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" ... \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" \n",
" \n",
" 9.0 \n",
" Bus \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 3 \n",
" \n",
" \n",
" 10.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 11.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 12.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 13.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 14.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 15.0 \n",
" Bus \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 16.0 \n",
" Bus \n",
" 0 \n",
" 1 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" ... \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" \n",
" \n",
" 17.0 \n",
" Bus \n",
" 0 \n",
" 1 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" ... \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" \n",
" \n",
" 18.0 \n",
" Bus \n",
" 0 \n",
" 2 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" ... \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" 4 \n",
" \n",
" \n",
" 19.0 \n",
" Bus \n",
" 0 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" ... \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" \n",
" \n",
" 8502186 \n",
" 8.0 \n",
" S-Bahn \n",
" 0 \n",
" 3 \n",
" 6 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" \n",
" \n",
" 9.0 \n",
" S-Bahn \n",
" 0 \n",
" 4 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" \n",
" \n",
" 10.0 \n",
" S-Bahn \n",
" 0 \n",
" 4 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" \n",
" \n",
" 11.0 \n",
" S-Bahn \n",
" 0 \n",
" 4 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 8 \n",
" \n",
" \n",
" 12.0 \n",
" S-Bahn \n",
" 0 \n",
" 3 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 8 \n",
" \n",
" \n",
" 13.0 \n",
" S-Bahn \n",
" 0 \n",
" 3 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" \n",
" \n",
" 14.0 \n",
" S-Bahn \n",
" 0 \n",
" 3 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" \n",
" \n",
" 15.0 \n",
" S-Bahn \n",
" 0 \n",
" 3 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" ... \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 7 \n",
" 8 \n",
" 8 \n",
" \n",
" \n",
"
\n",
"
20 rows × 32 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 22 23 \\\n",
"stop_id hour route_desc ... \n",
"8500926 8.0 Bus 0 2 3 3 4 4 4 4 4 4 ... 4 4 \n",
" 9.0 Bus 0 1 2 2 2 2 2 2 2 2 ... 2 2 \n",
" 10.0 Bus 0 0 1 1 1 1 1 2 2 2 ... 2 2 \n",
" 11.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n",
" 12.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n",
" 13.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n",
" 14.0 Bus 0 0 1 1 2 2 2 2 2 2 ... 2 2 \n",
" 15.0 Bus 0 0 1 1 2 2 2 2 2 2 ... 2 2 \n",
" 16.0 Bus 0 1 3 3 3 3 3 4 4 4 ... 4 4 \n",
" 17.0 Bus 0 1 3 3 3 3 3 3 3 3 ... 4 4 \n",
" 18.0 Bus 0 2 3 3 3 3 3 3 3 3 ... 4 4 \n",
" 19.0 Bus 0 2 2 2 2 3 3 3 3 3 ... 3 3 \n",
"8502186 8.0 S-Bahn 0 3 6 7 7 7 7 7 7 7 ... 8 8 \n",
" 9.0 S-Bahn 0 4 7 7 7 7 7 7 7 7 ... 8 8 \n",
" 10.0 S-Bahn 0 4 7 7 7 7 7 7 7 7 ... 7 7 \n",
" 11.0 S-Bahn 0 4 7 7 7 7 7 7 7 7 ... 7 7 \n",
" 12.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 7 7 \n",
" 13.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 8 8 \n",
" 14.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 8 8 \n",
" 15.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 7 7 \n",
"\n",
" 24 25 26 27 28 29 30 31 \n",
"stop_id hour route_desc \n",
"8500926 8.0 Bus 4 4 4 4 4 4 4 4 \n",
" 9.0 Bus 2 2 2 2 2 2 2 3 \n",
" 10.0 Bus 2 2 2 2 2 2 2 2 \n",
" 11.0 Bus 2 2 2 2 2 2 2 2 \n",
" 12.0 Bus 2 2 2 2 2 2 2 2 \n",
" 13.0 Bus 2 2 2 2 2 2 2 2 \n",
" 14.0 Bus 2 2 2 2 2 2 2 2 \n",
" 15.0 Bus 2 2 2 2 2 2 2 2 \n",
" 16.0 Bus 4 4 4 4 4 4 4 4 \n",
" 17.0 Bus 4 4 4 4 4 4 4 4 \n",
" 18.0 Bus 4 4 4 4 4 4 4 4 \n",
" 19.0 Bus 3 3 3 3 3 3 3 3 \n",
"8502186 8.0 S-Bahn 8 8 8 8 8 8 8 8 \n",
" 9.0 S-Bahn 8 8 8 8 8 8 8 8 \n",
" 10.0 S-Bahn 7 7 7 8 8 8 8 8 \n",
" 11.0 S-Bahn 7 7 7 7 7 7 7 8 \n",
" 12.0 S-Bahn 7 7 7 7 7 7 7 8 \n",
" 13.0 S-Bahn 8 8 8 8 8 8 8 8 \n",
" 14.0 S-Bahn 8 8 8 8 8 8 8 8 \n",
" 15.0 S-Bahn 7 7 7 7 7 7 8 8 \n",
"\n",
"[20 rows x 32 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"recovery_df.head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Make second recovery table\n",
"\n",
"Here only taking combination of `transport_type x hour`"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(127, 32)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 5 \n",
" 6 \n",
" 7 \n",
" 8 \n",
" 9 \n",
" ... \n",
" 22 \n",
" 23 \n",
" 24 \n",
" 25 \n",
" 26 \n",
" 27 \n",
" 28 \n",
" 29 \n",
" 30 \n",
" 31 \n",
" \n",
" \n",
" hour \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 7.0 \n",
" Bus \n",
" 0 \n",
" 5 \n",
" 7 \n",
" 9 \n",
" 9 \n",
" 10 \n",
" 10 \n",
" 10 \n",
" 11 \n",
" 11 \n",
" ... \n",
" 11 \n",
" 12 \n",
" 12 \n",
" 12 \n",
" 12 \n",
" 12 \n",
" 12 \n",
" 12 \n",
" 12 \n",
" 12 \n",
" \n",
" \n",
" InterRegio \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" Intercity \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" ... \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" S-Bahn \n",
" 0 \n",
" 3 \n",
" 5 \n",
" 6 \n",
" 7 \n",
" 8 \n",
" 8 \n",
" 8 \n",
" 9 \n",
" 9 \n",
" ... \n",
" 9 \n",
" 9 \n",
" 9 \n",
" 9 \n",
" 9 \n",
" 9 \n",
" 10 \n",
" 10 \n",
" 10 \n",
" 10 \n",
" \n",
" \n",
"
\n",
"
4 rows × 32 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 22 23 24 25 \\\n",
"hour route_desc ... \n",
"7.0 Bus 0 5 7 9 9 10 10 10 11 11 ... 11 12 12 12 \n",
" InterRegio 0 0 0 0 0 1 1 1 1 1 ... 2 2 2 2 \n",
" Intercity 0 0 0 0 0 1 1 1 1 1 ... 2 2 2 2 \n",
" S-Bahn 0 3 5 6 7 8 8 8 9 9 ... 9 9 9 9 \n",
"\n",
" 26 27 28 29 30 31 \n",
"hour route_desc \n",
"7.0 Bus 12 12 12 12 12 12 \n",
" InterRegio 2 2 2 2 2 2 \n",
" Intercity 2 2 2 2 2 2 \n",
" S-Bahn 9 9 10 10 10 10 \n",
"\n",
"[4 rows x 32 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with gzip.open(\"../data/stop_times_wHour.pkl\", \"rb\") as input_file:\n",
" stoptimes = pickle.load(input_file)\n",
" \n",
"distrib_df = pd.DataFrame(d_all).transpose()\n",
"distrib_to_rm = np.array(distrib_df.iloc[:,range(11)].sum(axis=1) == 11) # missing trips\n",
"distrib_df = distrib_df.iloc[~distrib_to_rm,:]\n",
"\n",
"stoptimes_df = pd.DataFrame(stoptimes)\n",
"\n",
"recovery_df2 = distrib_df.join(stoptimes_df)\n",
"list_bins = [x for x in range(32)]\n",
"\n",
"recovery_df2 = recovery_df2.groupby(['hour', 'route_desc'])[list_bins].apply(lambda x : x.astype(float).sum())\n",
"recovery_df2 = recovery_df2.astype('int')\n",
"print(recovery_df2.shape)\n",
"recovery_df2.head(4)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Last recovery table \n",
"\n",
"Takes only transport type distribution"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(11, 32)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 5 \n",
" 6 \n",
" 7 \n",
" 8 \n",
" 9 \n",
" ... \n",
" 22 \n",
" 23 \n",
" 24 \n",
" 25 \n",
" 26 \n",
" 27 \n",
" 28 \n",
" 29 \n",
" 30 \n",
" 31 \n",
" \n",
" \n",
" route_desc \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" Bus \n",
" 1572 \n",
" 59211 \n",
" 97405 \n",
" 115766 \n",
" 124269 \n",
" 128687 \n",
" 131397 \n",
" 133346 \n",
" 134908 \n",
" 136278 \n",
" ... \n",
" 137998 \n",
" 138003 \n",
" 138007 \n",
" 138012 \n",
" 138014 \n",
" 138016 \n",
" 138018 \n",
" 138021 \n",
" 138023 \n",
" 138087 \n",
" \n",
" \n",
" Eurocity \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" ... \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" 3 \n",
" \n",
" \n",
" InterRegio \n",
" 33 \n",
" 74 \n",
" 107 \n",
" 141 \n",
" 174 \n",
" 207 \n",
" 240 \n",
" 273 \n",
" 306 \n",
" 339 \n",
" ... \n",
" 371 \n",
" 371 \n",
" 371 \n",
" 372 \n",
" 372 \n",
" 372 \n",
" 372 \n",
" 372 \n",
" 372 \n",
" 372 \n",
" \n",
" \n",
" Intercity \n",
" 9 \n",
" 19 \n",
" 29 \n",
" 39 \n",
" 49 \n",
" 59 \n",
" 69 \n",
" 79 \n",
" 89 \n",
" 99 \n",
" ... \n",
" 109 \n",
" 109 \n",
" 109 \n",
" 109 \n",
" 109 \n",
" 109 \n",
" 109 \n",
" 109 \n",
" 109 \n",
" 109 \n",
" \n",
" \n",
"
\n",
"
4 rows × 32 columns
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 \\\n",
"route_desc \n",
"Bus 1572 59211 97405 115766 124269 128687 131397 133346 \n",
"Eurocity 0 0 0 1 1 1 1 2 \n",
"InterRegio 33 74 107 141 174 207 240 273 \n",
"Intercity 9 19 29 39 49 59 69 79 \n",
"\n",
" 8 9 ... 22 23 24 25 26 \\\n",
"route_desc ... \n",
"Bus 134908 136278 ... 137998 138003 138007 138012 138014 \n",
"Eurocity 2 2 ... 3 3 3 3 3 \n",
"InterRegio 306 339 ... 371 371 371 372 372 \n",
"Intercity 89 99 ... 109 109 109 109 109 \n",
"\n",
" 27 28 29 30 31 \n",
"route_desc \n",
"Bus 138016 138018 138021 138023 138087 \n",
"Eurocity 3 3 3 3 3 \n",
"InterRegio 372 372 372 372 372 \n",
"Intercity 109 109 109 109 109 \n",
"\n",
"[4 rows x 32 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with gzip.open(\"../data/stop_times_wHour.pkl\", \"rb\") as input_file:\n",
" stoptimes = pickle.load(input_file)\n",
" \n",
"distrib_df = pd.DataFrame(d_all).transpose()\n",
"distrib_to_rm = np.array(distrib_df.iloc[:,range(11)].sum(axis=1) == 11) # missing trips\n",
"distrib_df = distrib_df.iloc[~distrib_to_rm,:]\n",
"\n",
"stoptimes_df = pd.DataFrame(stoptimes)\n",
"\n",
"recovery_df3 = distrib_df.join(stoptimes_df)\n",
"list_bins = [x for x in range(32)]\n",
"\n",
"recovery_df3 = recovery_df3.groupby(['route_desc'])[list_bins].apply(lambda x : x.astype(float).sum())\n",
"recovery_df3 = recovery_df3.astype('int')\n",
"print(recovery_df3.shape)\n",
"recovery_df3.head(4)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Reconstruct cumulative distribution probabilities from multiple distributions to recover data with few/missing points \n",
"\n",
- "To recover missing or faulty data, the strategy is the following :\n",
- "1. If we have more than 100 data points in `real` group, we rely exclusively on it to compute probabilities for a given transfer on a `trip_id x stop_id` \n",
- " - `real` group : the delay was calculated with actual arrival time with status `geschaetz` or `real`, meaning it comes from actual measurments.\n",
- "2. If we do not find enough data within `real` group, we look at distributions in `all` group (contains all delays including `prognose` status) to compute probabilities, if there is more than 100 data points for a given `trip_id x stop_id`.\n",
- "3. If `all` group still does not have more than 100 data points, we rely on `recovery tables` to estimate delay distributions. The strategy is the following :\n",
- " - As we will always know the `stop_id`, the `time` and the `transport_type`, we rely on arrival delays from aggregated values of similar transfer. \n",
- " - First, we compute a table of distribution with all possible combination of `stop_id`, `time` (round to hours) and `transport_type`, and aggregate all the counts we have to compute cumulative distribution probabilities. \n",
- " - Is there is less than 100 data points in one of these intersections, we use the last possibilities : a table with `transport_type` x `time` aggregate counts.\n",
- " - The last values with no match are given the overall average of cumulative distribution probabilities for each `transport_type` with no limit for the minimum number of data points.\n",
- "\n",
- "Following this approach, we can find cumulative distribution probabilities for every combination of `trip_id x stop_id` as defined in `stop_times_df`. We will make a table with the same row order so that McRaptor can easily find their indexes. \n",
+ "At this point, we have 2 dictionnaries of distributions and 3 recovery dataframes :\n",
"\n",
- "In order to do that, we have two dictionnaries of distributions and two recovery dataframes :\n",
- " - `df_real` : contains delay distribution for each keys in form `trip_id + __ + stop_id` calculated from delays with status `geschaetz` or `real` in sbb datasets.\n",
- " - `df_all` : contains delay distributions for each keys in form `trip_id + __ + stop_id`. No filter was applied on status (contains `geschaetz`, `real` __and__ `prognose` = evaluated delay).\n",
+ " - `d_real` : contains delay distribution for each keys in form `trip_id + __ + stop_id` calculated from delays with status `geschaetz` or `real` in sbb datasets.\n",
+ " - `d_all` : contains delay distributions for each keys in form `trip_id + __ + stop_id`. No filter was applied on status (contains `geschaetz`, `real` __and__ `prognose` = evaluated delay).\n",
" - `recovery_df` : contains aggregated delay distributions for each combination of `stop_id`, `route_desc` (transport type) and `hour` (time rounded to hour). \n",
" - `recovery_df2` : contains aggregated delay distributions for each combination of `route_desc` (transport type) and `hour` (time rounded to hour). \n",
- " - `recovery_df3` : contains aggregated delay distributions for `route_desc` (transport type) "
+ " - `recovery_df3` : contains aggregated delay distributions for `route_desc` (transport type) \n",
+ " \n",
+ "We will now use these in order to reconstruct the final table with $P(T\\leq t_i)$ for each time points between -1 and +30, using a cumulative probability function as mentionned above."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" route_int \n",
" trip_int \n",
" stop_int \n",
" stop_sequence \n",
" arrival_time \n",
" departure_time \n",
" route_id \n",
" trip_id \n",
" stop_id \n",
" route_desc \n",
" stop_id_raw \n",
" sequence_shift_1 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" NaT \n",
" 2020-05-21 07:18:00 \n",
" 30-57-Y-j19-1 \n",
" 4.TA.30-57-Y-j19-1.1.H \n",
" 8502208 \n",
" Bus \n",
" 8502208 \n",
" 2 \n",
" \n",
" \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 2 \n",
" 2020-05-21 07:23:00 \n",
" 2020-05-21 07:23:00 \n",
" 30-57-Y-j19-1 \n",
" 4.TA.30-57-Y-j19-1.1.H \n",
" 8502209 \n",
" Bus \n",
" 8502209 \n",
" 3 \n",
" \n",
" \n",
" 2 \n",
" 0 \n",
" 0 \n",
" 2 \n",
" 3 \n",
" 2020-05-21 07:29:00 \n",
" NaT \n",
" 30-57-Y-j19-1 \n",
" 4.TA.30-57-Y-j19-1.1.H \n",
" 8503202 \n",
" Bus \n",
" 8503202 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_int trip_int stop_int stop_sequence arrival_time \\\n",
"0 0 0 0 1 NaT \n",
"1 0 0 1 2 2020-05-21 07:23:00 \n",
"2 0 0 2 3 2020-05-21 07:29:00 \n",
"\n",
" departure_time route_id trip_id stop_id \\\n",
"0 2020-05-21 07:18:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502208 \n",
"1 2020-05-21 07:23:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502209 \n",
"2 NaT 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8503202 \n",
"\n",
" route_desc stop_id_raw sequence_shift_1 \n",
"0 Bus 8502208 2 \n",
"1 Bus 8502209 3 \n",
"2 Bus 8503202 1 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"###################### MAKE CUMULATIVE PROBABILITY TABLE #######################\n",
"\n",
"# Load stop_time table, to use its order as a template for our final table \n",
"with open(\"../data/stop_times_df.pkl\", \"rb\") as input_file:\n",
" stoptimes = pickle.load(input_file)\n",
" \n",
"stoptimes.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0%, 4.07%, 8.14%, 12.21%, 16.28%, 20.35%, 24.42%, 28.49%, 32.55%, 36.62%, 40.69%, 44.76%, 48.83%, 52.9%, 56.97%, 61.04%, 65.11%, 69.18%, 73.25%, 77.32%, 81.39%, 85.46%, 89.53%, 93.6%, 97.66%, "
]
}
],
"source": [
"summary_df = pd.DataFrame(columns = ['key', 'key_int', 'trip_id', 'stop_id', 'transport_type', 'hour', 'distribution'])\n",
"n_fail = 0\n",
"size_stop_times = stoptimes.shape[0]\n",
"n_real = 0\n",
"n_all = 0\n",
"n_recov1 = 0\n",
"n_recov2 = 0\n",
"n_recov3 = 0\n",
"all_distrib = []\n",
"all_transport_type = []\n",
"all_hours = []\n",
"all_keys = []\n",
"\n",
"i = 0\n",
"for index, row in stoptimes.iterrows():\n",
" \n",
" trip_id = row[7]\n",
" stop_id = str(row[8])[:7]\n",
" transport_type = row[9]\n",
" key = trip_id + '__' + stop_id\n",
"\n",
" # Compute rounded hour using arrival if possible - recover with departure\n",
" hour = pd.to_datetime(stoptimes.loc[index]['arrival_time']).hour\n",
" if math.isnan(hour): # if arrival is NaT, use departure time\n",
" hour = pd.to_datetime(stoptimes.loc[index]['departure_time']).hour\n",
" \n",
" distrib = np.zeros(31)\n",
" keep_trying = True\n",
" \n",
" # 1) try d_real to get distribution from measured delays\n",
" if key in d_real:\n",
" distrib = d_real[key]\n",
" sum_distrib = np.sum(distrib)\n",
" if sum_distrib > 100 :\n",
" #summary_df.loc[index, 'distribution'] = distrib\n",
" all_distrib.append(distrib)\n",
" keep_trying = False \n",
" n_real += 1\n",
" \n",
" # 2) try d_all to get distribution from measured + estimated delays\n",
" if keep_trying and key in d_all:\n",
" distrib = d_all[key]\n",
" sum_distrib = np.sum(distrib)\n",
" if sum_distrib > 100 :\n",
" #summary_df.loc[index, 'distribution'] = distrib\n",
" all_distrib.append(distrib)\n",
" keep_trying = False\n",
" n_all += 1\n",
" \n",
" # 3) try first recovery table using stop_id, transport_type and hour\n",
" if keep_trying and (stop_id, hour, transport_type) in recovery_df.index:\n",
" distrib = np.array(recovery_df.loc[(stop_id, hour, transport_type)])\n",
" sum_distrib = np.sum(distrib)\n",
" if sum_distrib > 100 :\n",
" #summary_df.loc[index, 'distribution'] = distrib\n",
" all_distrib.append(distrib)\n",
" keep_trying = False \n",
" n_recov1 += 1\n",
" \n",
" # 4) use second recovery table using transport_type and hour \n",
" if keep_trying and (hour, transport_type) in recovery_df2.index:\n",
" distrib = np.array(recovery_df2.loc[(hour, transport_type)])\n",
" sum_distrib = np.sum(distrib)\n",
" if sum_distrib > 100 :\n",
" #summary_df.loc[index, 'distribution'] = distrib\n",
" all_distrib.append(distrib)\n",
" keep_trying = False \n",
" n_recov2 += 1\n",
" \n",
" # 5) use third recovery table using transport_type only \n",
" if keep_trying and (transport_type) in recovery_df3.index:\n",
" distrib = np.array(recovery_df3.loc[(transport_type)])\n",
" sum_distrib = np.sum(distrib)\n",
" #summary_df.loc[index, 'distribution'] = distrib\n",
" all_distrib.append(distrib)\n",
" keep_trying = False \n",
" n_recov3 += 1\n",
" \n",
" # Record results in summary\n",
- " #summary_df.loc[index, 'key'] = key\n",
- " #summary_df.loc[index, 'key_int'] = index\n",
- " #summary_df.loc[index, 'trip_id'] = trip_id\n",
- " #summary_df.loc[index, 'stop_id'] = stop_id\n",
- " #summary_df.loc[index, 'transport_type'] = transport_type\n",
- " #summary_df.loc[index, 'hour'] = hour\n",
" all_keys.append(key)\n",
" all_transport_type.append(transport_type)\n",
" all_hours.append(hour)\n",
"\n",
" # save number of failure for recovery\n",
" if keep_trying:\n",
" print('fail{}'.format(index), end = ', ')\n",
" n_fail += 1 \n",
" \n",
" # print progression \n",
" if (index % 10000) == 0 :\n",
" print('{}%'.format(round(100*index/size_stop_times,2)), end = ', ')"
]
},
{
"cell_type": "code",
- "execution_count": 68,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "10422\n",
- "173225\n",
- "37031\n",
- "20853\n",
- "4207\n"
- ]
- }
- ],
- "source": [
- "print(n_real)\n",
- "print(n_all)\n",
- "print(n_recov1)\n",
- "print(n_recov2)\n",
- "print(n_recov3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 82,
+ "execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAEICAYAAAD2u0vkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAeEklEQVR4nO3de7xVdZ3/8dc7JEBEFNEGUTyalHdJ0XQiNfOH18aacNBIRZrx0eR0m8rolznaTf01NWpqDpTX0AgnG9Kfg6YJlqAcEAUvKCr8iLwhiqLWGH1+f6zvdhbbvc91n/09wPv5eOzHWfu7vmt9P2udffb7rMs+RxGBmZlZDu/IXYCZmW2+HEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiE7C2SQtLuuesAkHSEpN/30Lpb0rZukZ7fJun0Bq37g5KWlp4vl3RUI9ad1vewpCMatb42xjlP0k97epzeStK7JM2R9Kqk7+euZ1O2Re4CzKAIQGBkRCxr9tgRcWxH+nWkxoi4B3hvI+qSdA3w+4g4p7T+vRux7kaqVecm4ExgNbB1+MOUPcpHQtZwlSOMzc3mut2bqF2AR+oFkL/XjeMQ2sRJOkPSr0rPn5A0o/R8paRRpUWOSn1elnS5JJX6TpL0qKSXJM2StEtpXkg6S9ITwBOp7QRJi9K67pW0X50a56TJByWtkzS+NO9Lkp6X9IykM0rt/ST9q6T/J+k5SVdKGlBn/X1S39WSngKOr5p/t6S/T9O7S5otaW3qP71ejZVThpK+KulZ4Oo6pxEPkvRI2m9XS+qf1jlR0m+raolUw5nABODsNN6v0vy3Tu+lfXCxpD+kx8WS+qV5ldpq7r8a+2jXtN2vSroDGFo1f4akZ9N+mSNp79Rer87Jkp5M63tE0sfaGLuPpP9d6r9A0s5p3l9Lmp/GnS/pr6u+b99Or611kn4laTtJ0yS9kvq3lPrvIekOSWskLZX0d3XquQY4vbRNR6k4PXmTpJ9KegWYKGlHSTPT+pZJ+ofSOs5L++ynaZsWS3qPpK+l78dKSWPr7ZPNSkT4sQk/gN2Alyl+4dgRWEFx6qQy7yXgHel5ALcA2wAjgBeAY9K8E4FlwJ4Up3HPAe4tjRPAHcAQYADwPuB54P1AH4of6uVAvzp1BrB76fkRwJ+BbwJ9geOA14Ft0/x/A2am8QYBvwIuqLPuTwOPATun/r9J422R5t8N/H2avhH4etpf/YExHajxIqBf2u4jKvs39VkOLCmN/Tvg22neROC39fYDcE2lb9X6jkrT3wTmATsA2wP3At/qyP6rsY/mAj9I23EY8Crw09L8SWk/9wMuBhaV5tWq8ySK19s7gPHAa8CwOmN/BVhMcRpTwP7Adml/vQScSvGaOyU93670fVsGvBsYDDwCPA4clfpfB1yd+g4EVgJnpHnvozjdtledmjbYJuA84E3go2mbBgBzgCvS62QUxc/LkaX+fwSOLtXyNMVrqy/wD8DTud8fesMjewF+NOGbXPzwHQCcDEwB7gf2SD+QM0v9gg3fdH8OTE7TtwGfKs17R3pT26W07JGl+T+qvCGW2pYCh9epsdYb/BukoEhtzwOHpDeq14B3l+YdWu+HGrgL+HTp+Vjqh9B1aR/t1MEa/xvoX9VWHULlsY8DnkzTE+leCD0JHFeadzSwvL39V2O7RlAE1sBS2w2UQqiq/zapzsH16qyxzCLgxDrzltaaRxE+91e1zQUmlr5vXy/N+z5wW+n5R0hhSRGE91St69+Bf6lT0wbbRBEqc0rPdwbWA4NKbRcA15T631FVyzqgT3o+KO3DbTr6c7ypPnw6bvMwm+JN6bA0fTdweHrMrur7bGn6dWCrNL0LcImKU2svA2sowmB4qf/K0vQuwJcq/dMyO1P8dtxRL0bEn2vUsz2wJbCgtO7/Su217FhV24o2xjybYrvuV3En2qR2anwhIv7YTp/qsTuzD9pSObKtt+56+6/Wel6KiNeq1gW8dbrswnS67BWKIISqU3Zlkk7T/5yKfRnYp43+O1MEaq26qr9XK9jwNfdcafqNGs/Lr9/3V70eJwB/VW8baih/H3cE1kTEq52obXVErC89h9rfj82KL65tHmZT/Ca2K/BditNzEyiOHi7r4DpWAt+JiGlt9ClfxK30/07ny23Xaoof4r0jYlUH+j9D8UZXMaJex4h4luJUCZLGAL+WNCfq3xHXkTunqsf+Q5p+jSJMSeNVvyG2t+4/ULy5Plxj3Z3xDLCtpIGlIBpRGv8TFKdjj6IIoMEUp8Uq1ws3qFPFtcKpwIeBuRGxXtKiUv9qKylOqS2paq9sX9kIil84OmslMDsi/lcXlq0ob+cfgCGSBpWCaATQkdejlfhIaPMwG/gQMCAifg/cAxxDcd79gQ6u40rga6UL0oMlndRG/6nApyW9X4WBko6XNKhO/+corlG1KyL+ktb/b5J2SPUMl3R0nUV+DnxO0k6StgUm11u3pJMk7ZSevkTxxvOXztZY5aw09hCKawLTU/uDwN6SRqm4WeG8quXaG+9G4BxJ20saCpwLdPqzPRGxAmgFzpf0zhS+Hyl1GQT8CXiRIjS/206dAyn22wtQ3BxDcSRUz4+Bb0kamV4r+0naDvi/wHskfULSFipuWNmL4rplZ92S1nWqpL7pcZCkPbuwLiJiJcU1uAsk9Vdx082n6ML+39w5hDYDEfE4xfnoe9LzV4CngN+VTg+0t46bKS7A/yydklkC1P18TUS0UhxRXEbxZr6M4hpIPecB16ZTJTXvWqry1bTOeameX1P/8zlTgVkUb/oLgV+0sd6DgPskraO48eHzEfFUF2usuAG4nWKfPwl8G976vnwz1f4E8Nuq5X4C7JXG+2WN9X6bIjweoriwv7Cy7i74BMVNJGuAf6G4NlZxHcWpplUUF//ntVVnRDxCcX1mLkVA7UtxQ0Y9P6D4ReF24JW0vgER8SJwAvAligA8GzghIlZ3duPS0cpYiuuif6A47Vy5oaSrTgFa0vpupri+9OturG+zpHSRzMzMrOl8JGRmZtk4hMzMLBuHkJmZZeMQMjOzbPw5oRqGDh0aLS0tucswM9uoLFiwYHVE1PvQeE0OoRpaWlpobW3NXYaZ2UZFUlt/jaQmn44zM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4w+r1rB41VpaJt/akHUtv/D4hqzHzGxT5CMhMzPLxiFkZmbZOITMzCwbh5CZmWXjEDIzs2wcQmZmlo1DyMzMsnEImZlZNg4hMzPLxiFkZmbZOITMzCwbh5CZmWXjEDIzs2w2uxCStFzS0Nx1mJnZRh5CKmzU22Bmtjnb6N7AJbVIWirpOmAJ8A1J8yU9JOn8Ur9fSlog6WFJZ+ar2MzM6tlY/6ndSOB0YGtgHHAwIGCmpMMiYg4wKSLWSBoAzJf0HxHxYr0VpqA6E6DP1tv3+AaYmdlGeCSUrIiIecDY9HgAWAjsQRFQAJ+T9CAwD9i51F5TREyJiNERMbrPloN7rnIzM3vLxnok9Fr6KuCCiPj38kxJRwBHAYdGxOuS7gb6N7VCMzNr18Z6JFQxC5gkaSsAScMl7QAMBl5KAbQHcEjOIs3MrLaN9UgIgIi4XdKewFxJAOuATwL/BXxa0qPAUopTcmZm1stsdCEUEcuBfUrPLwEuqdH12DrLt/RIYWZm1mkb++k4MzPbiDmEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsm43uz/Y0w77DB9N64fG5yzAz2+T5SMjMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7Ns/DmhGhavWkvL5Ftzl2Fm1lTLM3w+0kdCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLJumhZCkPs0ay8zMNg4NCSFJLZIekzRN0qOSbpK0paTlki6StBA4SdIpkhZLWiLpotLyn5L0uKT7JU2VdFlqv0bSpZLulfSUpHGpXZK+l9azWNL41D5M0hxJi9K8D6b2sZLmSlooaYakrRqx3WZm1j2NPBJ6L3BFROwJvAJ8JrW/GBEHAHOAi4AjgVHAQZI+KmlH4BvAIcAHgD2q1jsMGAOcAFyY2v42rWN/4Cjge5KGAZ8AZkVEZd4iSUOBc4CjUh2twD9XFy/pTEmtklrXv762+3vDzMza1cj/rLoyIn6Xpn8KfC5NT09fDwLujogXACRNAw5L82ZHxJrUPgN4T2m9v4yIvwCPSHpXahsD3BgR64HnJM1O658PXCWpb1pukaTDgb2A30kCeCcwt7r4iJgCTAHoN2xkdGM/mJlZBzUyhKrfuCvPX+vmev9UmlabBUTMkXQYcDxwjaQfAC8Bd0TEKd2sw8zMGqyRp+NGSDo0TX8C+G3V/PuBwyUNTTcpnALMpjh6OVzStpK2AD7egbHuAcZL6iNpe4ojqvsl7QI8FxFTgR8DBwDzgA9I2h1A0kBJ76m3YjMza55GhtBS4CxJjwLbAj8qz4yIZ4DJwG+AB4EFEfGfEbEK+C5FSP0OWA60d1HmZuChtJ67gLMj4lngCOBBSQ8A44FL0um/icCNkh6iOBVXfd3JzMwyUET3L39IagFuiYh9urj8VhGxLh0J3QxcFRE3d7uwLuo3bGQMO/3iXMObmWWx/MLju7W8pAURMbozy/SWD6ueJ2kRsAR4Gvhl5nrMzKwJGnJjQkQsB7p0FJSW/3Ij6jAzs41LbzkSMjOzzZBDyMzMsnEImZlZNg4hMzPLxiFkZmbZOITMzCwbh5CZmWXjEDIzs2wa+Ve0Nxn7Dh9Mazf/fIWZmbXPR0JmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7Ns/GHVGhavWkvL5Ftzl9FrdPf/zpuZ1eMjITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7Nsen0ISZooaccO9Ltb0uga7XtImivpT5K+3DNVmplZV3QqhFRodnBNBNoNoTasAT4H/GtDqjEzs4ZpN1AktUhaKuk6YAnwDUnzJT0k6fxSv9NS24OSri8te1dqv1PSCEmDJa2ohJmkgZJWSupbY+xxwGhgmqRFkgZIOjeNv0TSFEkqLXJq6rdE0sEAEfF8RMwH3mxnO8+U1Cqpdf3ra9vfc2Zm1m0dPaoZCVwBfBEYDhwMjAIOlHSYpL2Bc4AjI2J/4PNpuR8C10bEfsA04NKIWAssAg5PfU4AZkXE20IiIm4CWoEJETEqIt4ALouIgyJiH2BAWr5iy4gYBXwGuKqD21YZa0pEjI6I0X22HNyZRc3MrIs6GkIrImIeMDY9HgAWAntQBNSRwIyIWA0QEWvScocCN6Tp64ExaXo6MD5Nn5yed9SHJN0naXEad+/SvBvT+HOArSVt04n1mplZk3U0hF5LXwVckI5KRkXE7hHxky6MOxM4RtIQ4EDgro4sJKk/xRHZuIjYF5gK9C91iapFqp+bmVkv0tmbDGYBkyRtBSBpuKQdKELkJEnbpfYhqf+9FEc6ABOAewAiYh0wH7gEuCUi1rcx5qvAoDRdCZzVqYZxVX3Hp/HHAGvTqT8zM+ultuhM54i4XdKewNx0P8A64JMR8bCk7wCzJa2nOF03EfgscLWkrwAvAGeUVjcdmAEc0c6w1wBXSnqD4vTeVIobJJ6lCLKyP0p6AOgLTAKQ9FcU15W2Bv4i6QvAXhHxSme23czMGk8RPmNVrd+wkTHs9Itzl9FrLL/w+NwlmNlGQNKCiHjb5zXb0us/rGpmZpuuTp2O60mSLgc+UNV8SURcnaMeMzPreb0mhCLirNw1mJlZc/l0nJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtn0ms8J9Sb7Dh9Mq/9UjZlZj/ORkJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtn4c0I1LF61lpbJt+Yuoyb/q20z25T4SMjMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsm14fQpImStqxA/3uljS6RvsESQ9JWizpXkn790ylZmbWWZ0KIRWaHVwTgXZDqA1PA4dHxL7At4ApjSjKzMy6r91AkdQiaamk64AlwDckzU9HF+eX+p2W2h6UdH1p2btS+52SRkgaLGlFJcwkDZS0UlLfGmOPA0YD0yQtkjRA0rlp/CWSpkhSaZFTU78lkg4GiIh7I+KlNH8esFOd7TxTUquk1vWvr+3QzjMzs+7p6FHNSOAK4IvAcOBgYBRwoKTDJO0NnAMcGRH7A59Py/0QuDYi9gOmAZdGxFpgEXB46nMCMCsi3qweNCJuAlqBCRExKiLeAC6LiIMiYh9gQFq+YsuIGAV8BriqxnZ8Crit1gZGxJSIGB0Ro/tsObiDu8XMzLqjoyG0IiLmAWPT4wFgIbAHRUAdCcyIiNUAEbEmLXcocEOavh4Yk6anA+PT9MnpeUd9SNJ9khancfcuzbsxjT8H2FrSNpUZkj5EEUJf7cRYZmbWgzoaQq+lrwIuSEcloyJi94j4SRfGnQkcI2kIcCBwV0cWktSf4ohsXLrGMxXoX+oSVYtEWm4/4MfAiRHxYhfqNTOzHtDZmwxmAZMkbQUgabikHShC5CRJ26X2Ian/vRRHOgATgHsAImIdMB+4BLglIta3MearwKA0XQmc1amGcVV9x6fxxwBrI2KtpBHAL4BTI+LxTm6vmZn1oC060zkibpe0JzA33Q+wDvhkRDws6TvAbEnrKU7XTQQ+C1wt6SvAC8AZpdVNB2YAR7Qz7DXAlZLeoDi9N5XiBolnKYKs7I+SHgD6ApNS27nAdsAVqeY/R8TbbuU2M7PmU0T1GSzrN2xkDDv94txl1LT8wuNzl2BmVpOkBZ39Jb/Xf1jVzMw2XZ06HdeTJF0OfKCq+ZKIuDpHPWZm1vN6TQhFxFm5azAzs+by6TgzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyy6TWfE+pN9h0+mFb/eRwzsx7nIyEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyy8eeEali8ai0tk2+tO9//YtvMrDF8JGRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWTa8PIUkTJe3YgX53Sxpdo/1ESQ9JWiSpVdKYnqnUzMw6q1MhpEKzg2si0G4IteFOYP+IGAVMAn7ciKLMzKz72g0USS2Slkq6DlgCfEPS/HR0cX6p32mp7UFJ15eWvSu13ylphKTBklZUwkzSQEkrJfWtMfY4YDQwLR3JDJB0bhp/iaQpklRa5NTUb4mkgwEiYl1ERJo/EAhqkHRmOlJqXf/62g7tPDMz656OHtWMBK4AvggMBw4GRgEHSjpM0t7AOcCREbE/8Pm03A+BayNiP2AacGlErAUWAYenPicAsyLizepBI+ImoBWYEBGjIuIN4LKIOCgi9gEGpOUrtkxHPJ8Brqo0SvqYpMeAWymOht4mIqZExOiIGN1ny8Ed3C1mZtYdHQ2hFRExDxibHg8AC4E9KALqSGBGRKwGiIg1ablDgRvS9PVA5XrMdGB8mj45Pe+oD0m6T9LiNO7epXk3pvHnAFtL2iY9vzki9gA+CnyrE2OZmVkP6mgIvZa+CrggHZWMiojdI+InXRh3JnCMpCHAgcBdHVlIUn+KI7JxEbEvMBXoX+pSfaptg+cpnHaTNLQLNZuZWYN19iaDWcAkSVsBSBouaQeKEDlJ0napfUjqfy/FkQ7ABOAeKK7TAPOBS4BbImJ9G2O+CgxK05XAWZ1qGFfVd3wafwywNiLWStq9ct1I0gFAP+DFTm63mZn1gC060zkibpe0JzA3va+vAz4ZEQ9L+g4wW9J6itN1E4HPAldL+grwAnBGaXXTgRnAEe0Mew1wpaQ3KE7vTaW4QeJZiiAr+6OkB4C+/M+1n48Dp0l6E3gDGF+6UcHMzDKS34/frt+wkTHs9Ivrzl9+4fFNrMbMbOMgaUFEvO3zmm3p9R9WNTOzTVenTsf1JEmXAx+oar4kIq7OUY+ZmfW8XhNCEXFW7hrMzKy5fDrOzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbHrN54R6k32HD6bVf5rHzKzH+UjIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyyUUTkrqHXkfQqsDR3HV0wFFidu4gucN3N5bqba3Oqe5eI2L4zC/jP9tS2NCJG5y6isyS1uu7mcd3N5bqbq1l1+3ScmZll4xAyM7NsHEK1TcldQBe57uZy3c3lupurKXX7xgQzM8vGR0JmZpaNQ8jMzLJxCFWRdIykpZKWSZqcYfydJf1G0iOSHpb0+dR+nqRVkhalx3GlZb6W6l0q6ej2tkXSrpLuS+3TJb2zQbUvl7Q41dea2oZIukPSE+nrtqldki5NNTwk6YDSek5P/Z+QdHqp/cC0/mVpWTWg5veW9ukiSa9I+kJv3d+SrpL0vKQlpbYe38f1xuhGzd+T9Fiq62ZJ26T2FklvlPb7lV2tra3t72btPf7akNQvPV+W5rc0oO7ppZqXS1qU2vPu84jwIz2APsCTwG7AO4EHgb2aXMMw4IA0PQh4HNgLOA/4co3+e6U6+wG7pvr7tLUtwM+Bk9P0lcA/Nqj25cDQqrb/A0xO05OBi9L0ccBtgIBDgPtS+xDgqfR12zS9bZp3f+qrtOyxPfD9fxbYpbfub+Aw4ABgSTP3cb0xulHzWGCLNH1RqeaWcr+q9XSqtnrb34D93eOvDeAzwJVp+mRgenfrrpr/feDc3rDPfSS0oYOBZRHxVET8N/Az4MRmFhARz0TEwjT9KvAoMLyNRU4EfhYRf4qIp4FlFNtRc1vSbzJHAjel5a8FPtozW/NWfdfWGOtE4LoozAO2kTQMOBq4IyLWRMRLwB3AMWne1hExL4pX+3U9UPeHgScjYkU725Ntf0fEHGBNjZp6eh/XG6NLNUfE7RHx5/R0HrBTW+voYm31tr/D6uzvehr52ihv003AhytHId2tO63n74Ab21pHs/a5Q2hDw4GVpee/p+0A6FHpEPx9wH2p6Z/SIe5VpdMh9Wqu174d8HLpDaCR2xjA7ZIWSDoztb0rIp5J088C7+pi3cPTdHV7I53Mhj+YvX1/VzRjH9cboxEmUfz2XLGrpAckzZb0wdTWldp68ue5p18bby2T5q9N/Rvhg8BzEfFEqS3bPncI9VKStgL+A/hCRLwC/Ah4NzAKeIbicLq3GRMRBwDHAmdJOqw8M/021Ss/E5DOxf8NMCM1bQz7+22asY8bOYakrwN/BqalpmeAERHxPuCfgRskbZ2jtjZslK+NklPY8JetrPvcIbShVcDOpec7pbamktSXIoCmRcQvACLiuYhYHxF/AaZSHOJD/Zrrtb9IcYi8RVV7t0XEqvT1eeDmVONzlcPx9PX5Lta9ig1P2TT6e3MssDAinkvb0Ov3d0kz9nG9MbpM0kTgBGBCeiMjncp6MU0voLiW8p4u1tYjP89Nem28tUyaPzj175a0rr8Fppe2J+s+dwhtaD4wMt2x8k6K0zMzm1lAOl/7E+DRiPhBqb18XvVjQOWul5nAyeluml2BkRQXE2tuS/ph/w0wLi1/OvCfDah7oKRBlWmKC89LUn2Vu6/KY80ETkt30xwCrE2H97OAsZK2Tac5xgKz0rxXJB2S9tFpjai7ZIPfDnv7/q7SjH1cb4wukXQMcDbwNxHxeql9e0l90vRuFPv3qS7WVm/7u6VJr43yNo0D7qoEdTcdBTwWEW+dZsu+z6vvVNjcHxR3dzxO8dvA1zOMP4bi0PYhYFF6HAdcDyxO7TOBYaVlvp7qXUrpjrF620Jxl879FBdOZwD9GlD3bhR3/TwIPFwZj+I89p3AE8CvgSGpXcDlqbbFwOjSuial2pYBZ5TaR1P8wD8JXEb6ix8NqH0gxW+Zg0ttvXJ/UwTlM8CbFOfbP9WMfVxvjG7UvIzi2kHlNV65E+zj6fWzCFgIfKSrtbW1/d2svcdfG0D/9HxZmr9bd+tO7dcAn67qm3Wf+8/2mJlZNj4dZ2Zm2TiEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTb/H6u0PI6DXdGYAAAAAElFTkSuQmCC\n",
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhUAAAEICAYAAADoRAamAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de7xd07338c9XkMQtRLSNFCmNuktJnKq7aqpoaUtDFWmcOm0dvZxTbfrUo9EbqqdFURIlQqoRLVKeupMoQhISiUsIkpMSl7iEuLTE7/ljjMXMstbat7kv2fm+X6/1ypxjjjnmb4697PlbY4y9KCIwMzMza6vVOjsAMzMz6x6cVJiZmVkpnFSYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUmHVTkkLSRzs7DgBJe0v6Rzu1PTDf6+p5/2+Sjimp7T0kzSvsL5C0Xxlt5/YelLR3We01uM5oSZe193W6KkkflDRV0quS/qez4+nOVu/sAMys+5EUwKCImN/R146IzzanXnNijIg7gI+VEZekccA/IuKkQvvbltF2mWrF2Q0cBywB1gt/OVO78kiFmTVUGQFY1ayq991NbQY8VC+h8M+6PE4qzFYikr4m6a+F/cckTSrsL5I0uHDKfrnOy5LOlaRC3ZGSHpb0kqQbJG1WOBaSjpf0GPBYLjtI0qzc1l2SdqgT49S8OVvSMknDC8f+W9JzkhZL+lqhvKekX0v6X0nPSjpfUu867ffIdZdIegI4sOr47ZL+PW9/VNIUSUtz/Yn1YqxM0Uj6oaRngIvrTNsMlfRQ7reLJfXKbY6Q9PeqWCLHcBxwJPCDfL2/5uPvTqfkPjhT0tP5daaknvlYJbaa/Vejjz6S7/tVSTcB/aqOT5L0TO6XqZK2zeX14hwl6fHc3kOSvtDg2j0k/Z9C/ZmSNsnHPilper7udEmfrPq5/Ty/t5ZJ+qukDSVNkPRKrj+wUH8rSTdJelHSPElfrhPPOOCYwj3tpzQddKWkyyS9AoyQtLGkybm9+ZK+XmhjdO6zy/I9zZG0paQf5Z/HIknD6vXJKiUi/PLLr5XkBWwOvEz6QLAxsJA0VF059hKwWt4P4FpgfWBT4Hlg/3zsYGA+sDVpGvQk4K7CdQK4CegL9AY+DjwH/BvQg/RLegHQs06cAXy0sL838DbwU2AN4ADgdWCDfPy3wOR8vXWBvwKn1mn7G8AjwCa5/m35eqvn47cD/563Lwd+nPurF7B7M2I8HeiZ73vvSv/mOguAuYVr3wn8PB8bAfy9Xj8A4yp1q9rbL2//FJgGfADYCLgL+Flz+q9GH90N/Cbfx57Aq8BlheMjcz/3BM4EZhWO1YrzMNL7bTVgOPAa0L/OtU8E5pCmjQTsCGyY++sl4CjSe+6IvL9h4ec2H9gC6AM8BDwK7JfrjwcuznXXBhYBX8vHPk6a3timTkwr3BMwGngLOCTfU29gKnBefp8MJv33sm+h/pvAZwqxPEl6b60BfB14srN/P3SFV6cH4JdffrXslX+Z7gQcDowB7gW2yr9gJxfqBSs+RK8ARuXtvwHHFo6tlh9SmxXO3bdw/PeVB1yhbB6wV50Yaz2w3yA/+HPZc8An8oPnNWCLwrFd6/2SBm4FvlHYH0b9pGJ87qMPNzPGfwG9qsqqk4ritQ8AHs/bI2hbUvE4cEDh2GeABU31X4372pSUgKxdKPsjhaSiqv76Oc4+9eKscc4s4OA6x+bVOkZKJu6tKrsbGFH4uf24cOx/gL8V9j9HTn5Iic0dVW1dAPykTkwr3BMpSZha2N8EWA6sWyg7FRhXqH9TVSzLgB55f93ch+s397/j7vry9IfZymcK6SGzZ96+Hdgrv6ZU1X2msP06sE7e3gw4S2kq42XgRdLDfUCh/qLC9mbAf1fq53M2IX16ba4XIuLtGvFsBKwFzCy0fX0ur2XjqtgWNrjmD0j3da/SX1qMbCLG5yPizSbqVF+7JX3QSGXkqV7b9fqvVjsvRcRrVW0B705PnJanJ14hJTZQNUVSJOlovTf19TKwXYP6m5ASpFpxVf+sFrLie+7ZwvYbNfaL799/q3o/Hgl8qN491FD8OW4MvBgRr7YgtiURsbywD7V/HqsUL04xW/lMIX1S+gjwS9J0yJGkT/fnNLONRcAvImJCgzrFRW2V+r9oebhNWkL6pbxtRDzVjPqLSQ+uik3rVYyIZ0hD00jaHbhZ0tSo/xcfzfnLgOprP523XyMlR+TrVT/gmmr7adLD8sEabbfEYmADSWsXEotNC9f/Cmn6az9SQtGHNA1RWW+zQpxKa23GAp8C7o6I5ZJmFepXW0SawphbVV65v6JNSQlkSy0CpkTEp1txbkXxPp8G+kpat5BYbAo05/1oBR6pMFv5TAH2AXpHxD+AO4D9SfPW9zezjfOBHxUW6PWRdFiD+mOBb0j6NyVrSzpQ0rp16j9LWuPRpIh4J7f/W0kfyPEMkPSZOqdcAXxb0oclbQCMqte2pMMkfTjvvkR6kLzT0hirHJ+v3Zc0pz4xl88GtpU0WGnx5uiq85q63uXASZI2ktQPOBlo8XdLRMRCYAZwiqQ1czL1uUKVdYF/Ai+QkqBfNhHn2qR+ex7SYmHSSEU9FwI/kzQov1d2kLQh8P+ALSV9RdLqSgt4tyGt+2mpa3NbR0laI7+GStq6FW0REYtIa1hOldRLaRHysbSi/1d1TirMVjIR8ShpPveOvP8K8ARwZ2E4tqk2riItSPxTHgKfC9T9foeImEH6xH8O6eE8n7SGoJ7RwCV5aLrmqvwqP8xtTsvx3Ez974cYC9xAeojfB/ylQbtDgXskLSMtBP1ORDzRyhgr/gjcSOrzx4Gfw7s/l5/m2B8D/l513h+AbfL1rq7R7s9JycADpIWO91XaboWvkBbVvgj8hLS2pGI8aWj/KdJiyGmN4oyIh0jrG+4mJRzbkxao1vMbUuJ3I/BKbq93RLwAHAT8Nymh+QFwUEQsaenN5dGEYaR1RU+TpvkqC2xb6whgYG7vKtL6jJvb0N4qSXmRiZmZmVmbeKTCzMzMSuGkwszMzErhpMLMzMxK4aTCzMzMSuHvqbBur1+/fjFw4MDODsPMbKUyc+bMJRFR70voanJSYd3ewIEDmTFjRmeHYWa2UpHU6Ntqa/L0h5mZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwl9+Zd3enKeWMnDUdaW0teC0A0tpx8ysO/JIhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSUU3ImmBpH51jo2SdGQHxrK3pGtLausQSduU0ZaZmbUfJxVdkJKyfzafAW4suc2OcgjgpMLMrItzUtFFSBooaZ6k8cBcYBNJJ0qaLukBSacU6l4taaakByUd14y21wPWjIjnq8o3knRTbudCSQsrIx2SvirpXkmzJF0gqUd+jZM0V9IcSd/LdT8q6WZJsyXdJ2mLfIl1JF0p6RFJEyQp1z8539dcSWMK5VtIuj7f2x2StpL0SeDzwBk5lt3yv5XXckmbtf0nYGZmbeWkomsZBJwXEdsCH8v7uwCDgZ0l7ZnrjYyInYEhwLclbdhEu/sBt9Qo/wlwa77elcCmAJK2BoYDu0XEYGA5cGSOY0BEbBcR2wMX53YmAOdGxI7AJ4HFufzjwHdJowybA7vl8nMiYmhEbAf0Bg7K5WOAE/K9fT/3xV3AZODEiBgcEXfmfwcDY4E/R8TC6huTdJykGZJmLH99aRPdY2ZmZVi9swOwFSyMiGl5e1h+3Z/31yElGVNJicQXcvkmufyFBu3uz3sJQNHuwBcAIuJ6SS/l8k8BOwPT8yBCb+A54K/A5pJ+B1wH3ChpXVKicVVu502AfN69EfGPvD8LGAj8HdhH0g+AtYC+wIOSbiMlJJPyuQA9692QpN2Ar+d7eJ+IGENKUujZf1DU7xozMyuLk4qu5bXCtoBTI+KCYgVJe5NGHnaNiNcl3Q70aqLdXYBvtiAOAZdExI/ed0DakbQ+4xvAl4HvNGjnn4Xt5cDqknoB5wFDImKRpNGk+FcDXs4jEI2Dk/oDfwA+HxHLmndLZmbW3jz90XXdAIyUtA6ApAGSPgD0AV7KCcVWwCcaNSJpW+CRiFhe4/CdpMQAScOADXL5LcCh+XpI6itps7zeYrWI+DNwErBTRLwK/EPSIbluT0lrNQipkgAtyfd2KEBEvAI8Kemw3I5yAgPwKrBuLl8DmAT8MCIebXTvZmbWsZxUdFERcSPwR+BuSXNIax7WBa4nfeJ/GDgNmFa/FQA+m8+p5RRgmKS5wGHAM8CrEfEQKWm4UdIDwE1Af2AAcHueyrgMqIxkHEWaknkAuAv4UIP7epm0FmIuKXGaXjh8JHCspNnAg8DBufxPwImS7idNkQwBTiks1ty4iT4wM7MOoAhPN3dnkm4Cjo6IxTWO9QSWR8TbknYFft+c6YeVTc/+g6L/MWeW0taC0w4spR0zs65O0syIGNKSc7ymopuLiE83OLwpcEX+Tox/kRY+mpmZtYqTilVYRDxG+rNPMzOzNvOaCjMzMyuFkwozMzMrhZMKMzMzK4WTCjMzMyuFkwozMzMrhZMKMzMzK4WTCjMzMyuFkwozMzMrhb/8yrq97Qf0YYa/XtvMrN15pMLMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4e+psG5vzlNLGTjqus4Ow8ysQy3ohO/n8UiFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYW1iqQenR2DmZl1LU4q7H0kDZT0iKQJkh6WdKWktSQtkHS6pPuAwyQdIWmOpLmSTi+cf6ykRyXdK2mspHNy+ThJZ0u6S9ITkg7N5ZJ0Rm5njqThuby/pKmSZuVje+TyYZLulnSfpEmS1umEbjIzsypOKqyejwHnRcTWwCvAt3L5CxGxEzAVOB3YFxgMDJV0iKSNgf8LfALYDdiqqt3+wO7AQcBpueyLuY0dgf2AMyT1B74C3BARlWOzJPUDTgL2y3HMAP6rOnhJx0maIWnG8teXtr03zMysSat3dgDWZS2KiDvz9mXAt/P2xPzvUOD2iHgeQNIEYM98bEpEvJjLJwFbFtq9OiLeAR6S9MFctjtweUQsB56VNCW3Px24SNIa+bxZkvYCtgHulASwJnB3dfARMQYYA9Cz/6BoQz+YmVkzOamweqofxJX919rY7j8L22oYQMRUSXsCBwLjJP0GeAm4KSKOaGMcZmZWMk9/WD2bSto1b38F+HvV8XuBvST1y4s2jwCmkEYX9pK0gaTVgS8141p3AMMl9ZC0EWnE415JmwHPRsRY4EJgJ2AasJukjwJIWlvSlvUaNjOzjuOkwuqZBxwv6WFgA+D3xYMRsRgYBdwGzAZmRsQ1EfEU8EtS0nEnsABoalHDVcADuZ1bgR9ExDPA3sBsSfcDw4Gz8nTLCOBySQ+Qpj6q122YmVknUISnm21FkgYC10bEdq08f52IWJZHKq4CLoqIq0oMsUV69h8U/Y85s7Mub2bWKRacdmCbzpc0MyKGtOQcj1RYexgtaRYwF3gSuLqT4zEzsw7ghZr2PhGxAGjVKEU+//vlRWNmZisLj1SYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUmJmZWSn85VfW7W0/oA8z2vh1tWZm1jSPVJiZmVkpnFSYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKfzlV9btzXlqKQNHXdfZYXQZC/xFYGbWTjxSYWZmZqVwUmFmZmalcFJhZmZmpXBSYWZmZqVwUmFmZmalcFJhZmZmpXBSYWZmZqVwUmFmZmalcFJhZmZmpXBSYWZmZqVwUmFmZmalcFJhZmZmpXBSYWZmZqXo8kmFpBGSNm5GvdslDalRvpWkuyX9U9L3G5wvSbdKWq9Bne9KWqv50TdN0k8l7VejfG9J17ayzRGSzml7dG0nabCkA9qh3Y0kXV92u2Zm1nrNTiryQ7czkpARQJNJRQMvAt8Gft1EvQOA2RHxSoM63wVKTSoi4uSIuLnMNtuLpNVbcdpgUt+WKiKeBxZL2q3sts3MrHUaJgmSBkqaJ2k8MBfYRNKJkqZLekDSKYW6R+ey2ZIuLZx/ay6/RdKmkvpIWlhJUCStLWmRpDVqXP9QYAgwQdIsSb0lnZyvP1fSGEkqnHJUrjdX0i4AEfFcREwH3mqiL44ErinEdF2+l7mShkv6Nim5uU3SbbneEZLm5DqnF+JeJum3kh7M971Rgz4el+8TSftLekTSfcAX69T/nqSL8vb2+dq1Ep2NJV0v6TFJvyqcXzfmwvahksYV4jtf0j3Ar4oXaCoWSWsCPwWG55/L8BzPRvn4apLm51GHynVmSHpU0kG5Tg9JZxTec/9RCOFq0s+tVj8dl9uasfz1pbWqmJlZyZoz8jAIOC8itgU+lvd3IX0C3VnSnpK2BU4C9o2IHYHv5HN/B1wSETsAE4CzI2IpMAvYK9c5CLghIt730I+IK4EZwJERMTgi3gDOiYihEbEd0DufX7FWRAwGvgVc1PxuAGA3YGbe3h94OiJ2zNe5PiLOBp4G9omIfZSmZE4H9s19MVTSIfn8tYEZuc+mAD9p6uKSegFjgc8BOwMfqlP1LOCjkr4AXAz8R0S8XqPeYGA4sD3pob5JEzE38mHgkxHxXy2JJSL+BZwMTMw/v4nAZbyXCOxHGh16Pu8PJL23DgTOz31yLLA0IoYCQ4GvS/pIrj8D2KNWwBExJiKGRMSQHmv1acYtmplZWzUnqVgYEdPy9rD8uh+4D9iKlGTsC0yKiCUAEfFirr8r8Me8fSmwe96eSHrgARye95trH0n3SJqTr7tt4djl+fpTgfUkrd+CdvtGxKt5ew7waUmnS9ojJ0LVhgK3R8TzEfE2KWnaMx97p3BPl/HefTeyFfBkRDwWEZHPe5+IeIc0JXQpMCUi7qzT3i0RsTQi3gQeAjZrIuZGJkXE8jbEUnQRcHTeHklKRiquiIh3IuIx4AlSnwwDjpY0C7gH2JD0ngN4jrZNjZmZWYmaM0f+WmFbwKkRcUGxgqQTWnjdycAvJfUlfSq/tTkn5U+u5wFDImKRpNFAr0KVqDqler+RtyWtlh9qj0raibQW4OeSbomIn7agrWotiaM5BgHLaPxA/WdhezlN/6yLMfaqOvYa9TUnlvcukn5uz0ralzQqUZy+qPXzE3BCRNxQo7lewBvNua6ZmbW/li68vAEYKWkdAEkDJH2AlBQcJmnDXN4317+LNBIB6eFxB0BELAOmk4bPr631KbjgVWDdvF152C3JMRxaVXd4vv7upCHzlkymzwM2z+dvDLweEZcBZwA71YjlXmAvSf0k9QCOIE11QOrXSmxfAf7ejOs/AgyUtEXeP6JWJUl9gLNJIwwbVtZjNFOjmJ+VtHVe6/KF5jTWzFiKfVZxIWkkpnoE5LC8zmIL0s9iHuk9983KmhtJW0paO9ffkrTWx8zMuoAWreaPiBslbQ3cnddHLgO+GhEPSvoFMEXSctL0yAjgBOBiSScCzwNfKzQ3EZgE7N3EZceR5tffIE2njCU9SJ4hJSZFb0q6H1iDNLSOpA+R5t7XA96R9F1gmxp/5XFdjmU+aR3CGZLeIS3w/GauMwa4XtLTeV3FKOA20qfp6yLimlzvNWAXSSeRhuiH04SIeFPSccB1kl4nJWDVD2OA3wLn5tGUY0kLR6dGxHPNuMbiBjGPAq4l/ZxmAOs01V4zY7kNGJWnL07N6yomk6Y9Lq5q739Jic96wDdyn1xIWmtxn9Kb7nmgsg5kH9LPzczMugCl6XuT1B8YHxGfLqGtZRHRnIfyKknp+0R+GxF7FMrGkUatrmxBO1OBgyPipUb1evYfFP2PObO14XY7C047sLNDMLOVgKSZEfG+739qpMt/+VVHiYjFwFg1+PIra7s8UvJn4EdtbGcj4DdNJRRmZtZxWvNlRu1C0rmkP+ssOisiqofI201EXFFSO+8bpegK99cVRMRpwGk1yke0sJ3nSd9TYWZmXUSXSSoi4vjOjqE9dff7MzMz8/SHmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlcJJhZmZmZXCSYWZmZmVwkmFmZmZlaLLfE+FWXvZfkAfZvirqc3M2p1HKszMzKwUTirMzMysFE4qzMzMrBROKszMzKwUTirMzMysFE4qzMzMrBROKszMzKwU/p4K6/bmPLWUgaOu6+ww3meBvzvDzLoZj1SYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUmJmZWSmcVJiZmVkpnFSYmZlZKZxUtDNJIyRt3Ix6t0saUqP8SEkPSJoj6S5JO9Y5X5JulbSepIGS5pYRf1tJWl/St9qp7ZslbdAebZuZWcutMklFfuh2xv2OAJpMKhp4EtgrIrYHfgaMqVPvAGB2RLzShms1JGn1Vpy2PtAuSQVwaTu2bWZmLdStk4r8iX2epPHAXGATSSdKmp4//Z9SqHt0Lpst6dLC+bfm8lskbSqpj6SFlQRF0tqSFklao8b1DwWGABMkzZLUW9LJ+fpzJY2RpMIpR+V6cyXtAhARd0XES/n4NODDdW73SOCawn4PSWMlPSjpRkm9c0yDJU3L93RV5ZN+caREUj9JC/L2CEmTJd0K3FJ1f0NzO71yPzwoabuquE4Dtsj3dYak8ZIOKbQxQdLB+TrX5Dgek/STQp2vSro3t3GBpB750GTgiFqdIek4STMkzVj++tI6XWZmZmXq1klFNgg4LyK2BT6W93cBBgM7S9pT0rbAScC+EbEj8J187u+ASyJiB2ACcHZELAVmAXvlOgcBN0TEW9UXjogrgRnAkRExOCLeAM6JiKERsR3QO59fsVZEDCZ9+r6oxr0cC/ytzn3uBsysuu9z832/DHwpl48HfpjvaQ7wE5q2E3BoROxVLIyI6aQH+8+BXwGXRUT1tMso4PF8/ycCfyCN3iCpD/BJ4Lpcd5cc5w7AYZKGSNoaGA7slvtmOSmBIidbPSVtWB1wRIyJiCERMaTHWn2acYtmZtZWrRnOXtksjIhpeXtYft2f99chPXx3BCZFxBKAiHgxH98V+GLevpT04ASYSHrQ3QYcDpzXgnj2kfQDYC2gL/Ag8Nd87PJ8/al5bcT6EfEygKR9SEnF7nXa7RsRrxb2n4yIWXl7JjAwP8TXj4gpufwSYFIzYr6p0CfVfgpMB94Evt1UQxExRdJ5kjYiJRB/joi384DNTRHxAoCkv5Du9W1gZ2B6rtMbeK7Q5HOk6aUXmnEfZmbWjlaFpOK1wraAUyPigmIFSSe0sM3JwC8l9SU98G5tzkmSepESkCERsUjSaKBXoUpUnRL5vB2AC4HPVh66NbwtabWIeCfv/7NwbDnpYdzI27w3ctWr6thr1LchKTlbI5/XqG7FeOCrpITsa4XyWvcv0mjRj+q01Qt4oxnXNDOzdrYqTH8U3QCMlLQOgKQBkj5ASgoOqwyj52QB4C7Sgw/SkPsdABGxjPTp/Czg2ohY3uCarwLr5u3Kw3pJjuHQqrrD8/V3B5ZGxFJJmwJ/AY6KiEcbXGcesHmD4+Spm5ck7ZGLjgIqoxYLSAkSNeJq5ALg/5Kmh06vcbx4/xXjgO/mmB4qlH9aUt+8/uMQ4E7SOo5D88+JfHyzvC3gQzl2MzPrZKvCSMW7IuLGPEd/dx5KXwZ8NSIelPQLYIqk5aTpkRHACcDFkk4EnmfFT9UTSVMHezdx2XHA+ZLeIE2njCUtGn2GlJgUvSnpftKn/pG57GTSaMB5Oea3I+J9f3pKWpewNzC/iXiOyfGsBTxRuKdfA1dIOo731jg0JOlo4K2I+GNePHmXpH0j4t2Rm4h4QdKdSn/i+reIODEinpX0MHB1VZP3An8mLUa9LCJm5OucBNyYF8e+BRwPLCQlQdMi4u3mxGtmZu1LEdUjzrYyktQfGB8Rn+7sWJqSE5o5wE559ARJI0jTQv/ZgnbOAiZHxC2N6vXsPyj6H3NmGyJuHwtOO7CzQzAzq0vSzDofYuta1aY/uq2IWAyMlbReZ8fSiKT9gIeB31USijaY21RCYWZmHWeVmv5oT5LOJf1ZZ9FZEXFxR8UQEVd01LVaKyJuBjarUT6ONFXUkrbGlhOVmZmVwUlFSSLi+M6OwczMrDN5+sPMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4e+psG5v+wF9mOGvxDYza3ceqTAzM7NSOKkwMzOzUjipMDMzs1I4qTAzM7NSOKkwMzOzUjipMDMzs1I4qTAzM7NS+HsqrNub89RSBo66ru7xBf4OCzOzUnikwszMzErhpMLMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4aTCzMzMSuGkwszMzErhpGIVImmEpI2bUe92SUNqlB8s6QFJsyTNkLR7nfN7S5oiqYekgZK+Ukb8VddYU9JUSf4/7ZqZdRFOKjqBks7o+xFAk0lFA7cAO0bEYGAkcGGdeiOBv0TEcmAgUHpSERH/yvEML7ttMzNrHScVHSR/Yp8naTwwF9hE0omSpudP/6cU6h6dy2ZLurRw/q25/BZJm0rqI2lhJUGRtLakRZLWqHH9Q4EhwIQ80tBb0sn5+nMljZGkwilH5XpzJe0CEBHLIiLy8bWBoLYjgWvy9mnAHrmt7+XRhcGFuP4uaUdJoyVdKuluSY9J+nqhTs1+Aq7O16rV38fl0WSlW6cAAAmOSURBVJQZy19fWidMMzMrk5OKjjUIOC8itgU+lvd3AQYDO0vaU9K2wEnAvhGxI/CdfO7vgEsiYgdgAnB2RCwFZgF75ToHATdExFvVF46IK4EZwJERMTgi3gDOiYihEbEd0DufX7FWHpH4FnBRpVDSFyQ9AlxHGpFYgaQ1gc0jYkEuGgXcka/5W+APpBETJG0J9IqI2bnuDsC+wK7AyZI2ljSsVj/l+nOBobU6OiLGRMSQiBjSY60+taqYmVnJnFR0rIURMS1vD8uv+4H7gK1ID899gUkRsQQgIl7M9XcF/pi3LwUq6xkm8t4UwOF5v7n2kXSPpDn5utsWjl2erz8VWE/S+nn/qojYCjgE+FmNNvsBLze45iTgoDyaMhIYVzh2TUS8ke/9NlIiUa+fyNMr/5K0bgvu2czM2okXuXWs1wrbAk6NiAuKFSSd0MI2JwO/lNQX2Bm4tTknSeoFnAcMiYhFkkYDvQpVqqc2VtiPiKmSNpfUr5IAZW9UtUPVea9Lugk4GPhyjrnRNWv2U0FP4M161zMzs47jkYrOcwMwUtI6AJIGSPoAKSk4TNKGubxvrn8XaSQC0jqCOyCtcwCmA2cB1+ZP7/W8ClQ+1Vce/EtyDIdW1R2er787sDQilkr6aGXdhaSdSA/0F4onRcRLQI+ctFRfs+JC4Gxgeq5fcbCkXvne9873Va+fyPWW1JruMTOzjueRik4SETdK2hq4Oz+nlwFfjYgHJf0CmCJpOWnYfwRwAnCxpBOB54GvFZqbSJpW2LuJy44Dzpf0Bmk6ZSxpXcIzpAd40ZuS7gcq0xQAXwKOlvQWaURieGHhZtGNpOmZm4EHgOWSZgPjIuK3ETFT0ivAxVXnPUCa9ugH/CwingaertVPwHPAPqS1HWZm1gWo9jPBrPXyKMb3IuKoOsc3Bm4HtoqId3LZaGBZRPy6Bdf5CzAqIh5tVK9n/0HR/5gz6x5fcNqBzb2kmdkqQ9LMiHjfdxY14ukPK11E3AfcJqlH9TFJRwP3AD+uJBStkf/K5OqmEgozM+s4nv7ohiSdC+xWVXxWRFRPN7SbiLioTvl4YHyN8tEtbP9ftdoxM7PO46SiG4qI4zs7BjMzW/V4+sPMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4aTCzMzMSuGkwszMzErhpMLMzMxK4e+psG5v+wF9mOGv4jYza3ceqTAzM7NSOKkwMzOzUjipMDMzs1I4qTAzM7NSOKkwMzOzUjipMDMzs1I4qTAzM7NSOKkwMzOzUjipMDMzs1IoIjo7BrN2JelVYF5nx9EK/YAlnR1EKzjujuW4O9aqFPdmEbFRS07w13TbqmBeRAzp7CBaStIMx91xHHfHctwdq6Pi9vSHmZmZlcJJhZmZmZXCSYWtCsZ0dgCt5Lg7luPuWI67Y3VI3F6oaWZmZqXwSIWZmZmVwkmFmZmZlcJJhXVbkvaXNE/SfEmjOimGTSTdJukhSQ9K+k4uHy3pKUmz8uuAwjk/yjHPk/SZpu5H0kck3ZPLJ0pas6TYF0iak+Obkcv6SrpJ0mP53w1yuSSdnWN4QNJOhXaOyfUfk3RMoXzn3P78fK5KiPljhT6dJekVSd/tiv0t6SJJz0maWyhr9/6td402xn2GpEdybFdJWj+XD5T0RqHfz29tfI36oA1xt/v7QlLPvD8/Hx9YQtwTCzEvkDQrl3d+f0eEX351uxfQA3gc2BxYE5gNbNMJcfQHdsrb6wKPAtsAo4Hv16i/TY61J/CRfA89Gt0PcAVweN4+H/hmSbEvAPpVlf0KGJW3RwGn5+0DgL8BAj4B3JPL+wJP5H83yNsb5GP35rrK5362Hd4DzwCbdcX+BvYEdgLmdmT/1rtGG+MeBqyet08vxD2wWK+qnRbFV68P2hh3u78vgG8B5+ftw4GJbY276vj/ACd3lf72SIV1V7sA8yPiiYj4F/An4OCODiIiFkfEfXn7VeBhYECDUw4G/hQR/4yIJ4H5pHupeT/508a+wJX5/EuAQ9rnbt6N75Ia1zoYGB/JNGB9Sf2BzwA3RcSLEfEScBOwfz62XkRMi/QbbHw7xP0p4PGIWNjE/XRKf0fEVODFGvG0d//Wu0ar446IGyPi7bw7DfhwozZaGV+9Pmh13A2U+b4o3s+VwKcqowRtjTu382Xg8kZtdGR/O6mw7moAsKiw/w8aP8zbXR72/DhwTy76zzyseFFhCLpe3PXKNwReLvxCL/M+A7hR0kxJx+WyD0bE4rz9DPDBVsY9IG9Xl5fpcFb8ZdvV+xs6pn/rXaMsI0mfcCs+Iul+SVMk7ZHLWhNfe/033d7vi3fPyceX5vpl2AN4NiIeK5R1an87qTDrAJLWAf4MfDciXgF+D2wBDAYWk4Ywu5rdI2In4LPA8ZL2LB7Mn3i65N+k5/nszwOTctHK0N8r6Ij+Lfsakn4MvA1MyEWLgU0j4uPAfwF/lLReZ8VXw0r3vqhyBCsmzp3e304qrLt6CtiksP/hXNbhJK1BSigmRMRfACLi2YhYHhHvAGNJw6pQP+565S+QhiVXrypvs4h4Kv/7HHBVjvHZyhBo/ve5Vsb9FCsOkZf98/kscF9EPJvvocv3d9YR/VvvGm0iaQRwEHBkfjiRpw9eyNszSesRtmxlfKX/N91B74t3z8nH++T6bZLb+iIwsXA/nd7fTiqsu5oODMorstckDYVP7ugg8pznH4CHI+I3hfLi3OQXgMrK7snA4XnF+EeAQaQFVjXvJ//yvg04NJ9/DHBNCXGvLWndyjZpId7cHF/lLwyK15oMHJ1XjH8CWJqHVG8AhknaIA8tDwNuyMdekfSJ3EdHlxF3wQqf4Lp6fxd0RP/Wu0arSdof+AHw+Yh4vVC+kaQeeXtzUv8+0cr46vVBW+LuiPdF8X4OBW6tJF1ttB/wSES8O63RJfq7euWmX351lxdp9fKjpGz9x50Uw+6k4cQHgFn5dQBwKTAnl08G+hfO+XGOeR6Fv4iodz+klej3khaTTQJ6lhD35qSV7bOBByvXI80F3wI8BtwM9M3lAs7Nsc0BhhTaGpljmw98rVA+hPRL/HHgHPI3/JYQ+9qkT4J9CmVdrr9JSc9i4C3SfPWxHdG/9a7Rxrjnk+bfK+/xyl87fCm/f2YB9wGfa218jfqgDXG3+/sC6JX35+fjm7c17lw+DvhGVd1O729/TbeZmZmVwtMfZmZmVgonFWZmZlYKJxVmZmZWCicVZmZmVgonFWZmZlYKJxVmZmZWCicVZmZmVor/D4KE0GKTr3HGAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
- "plt.barh(np.array(['real','prognose','recov_tab1','recov_tab2','recov_tab3'])[::-1],\\\n",
+ "plt.barh(np.array(['real / geschaetz','prognose','recov_tab1 (stop_id x hour x type)','recov_tab2 (hour x type)','recov_tab3 (type)'])[::-1],\\\n",
" np.array([n_real,n_all,n_recov1,n_recov2, n_recov3])[::-1])\n",
"plt.title('where the distribution data come from')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" key \n",
" hour \n",
" transport_type \n",
" distribution \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 4.TA.30-57-Y-j19-1.1.H__8502208 \n",
" 7 \n",
" Bus \n",
" [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
" \n",
" \n",
" 1 \n",
" 4.TA.30-57-Y-j19-1.1.H__8502209 \n",
" 7 \n",
" Bus \n",
" [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
" \n",
" \n",
" 2 \n",
" 4.TA.30-57-Y-j19-1.1.H__8503202 \n",
" 7 \n",
" Bus \n",
" [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
" \n",
" \n",
" 3 \n",
" 5.TA.30-57-Y-j19-1.1.H__8502208 \n",
" 7 \n",
" Bus \n",
" [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
" \n",
" \n",
" 4 \n",
" 5.TA.30-57-Y-j19-1.1.H__8502209 \n",
" 7 \n",
" Bus \n",
" [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" key hour transport_type \\\n",
"0 4.TA.30-57-Y-j19-1.1.H__8502208 7 Bus \n",
"1 4.TA.30-57-Y-j19-1.1.H__8502209 7 Bus \n",
"2 4.TA.30-57-Y-j19-1.1.H__8503202 7 Bus \n",
"3 5.TA.30-57-Y-j19-1.1.H__8502208 7 Bus \n",
"4 5.TA.30-57-Y-j19-1.1.H__8502209 7 Bus \n",
"\n",
" distribution \n",
"0 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
"1 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
"2 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
"3 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n",
"4 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... "
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"summary_df = pd.DataFrame([all_keys, all_hours, all_transport_type, all_distrib],\\\n",
" index = ['key','hour','transport_type','distribution']).transpose()\n",
"summary_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"# Load stop_time table, to use its order as a template for our final table \n",
"with gzip.open(\"../data/join_distribution_all.pkl.gz\", \"wb\") as out_file:\n",
" pickle.dump(summary_df, out_file)"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"list_all_rows = []\n",
"for index, row in summary_df.iterrows():\n",
" distrib = np.array(row['distribution'])\n",
" \n",
" # get total number of elements \n",
" N = np.sum(distrib)\n",
" \n",
" # make cumulative distribution probabilities\n",
" cdf_distrib = np.empty((len(distrib)), dtype=float)\n",
" save_x = 0\n",
" for x in range(len(distrib)):\n",
" cdf_distrib[x] = float(distrib[x])/float(N) + float(save_x)/float(N)\n",
" save_x += distrib[x]\n",
" \n",
" list_all_rows.append(cdf_distrib)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n",
" 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n",
" 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n",
" 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n",
" 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n",
" 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n",
" 0.96396396, 1. ],\n",
" [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n",
" 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n",
" 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n",
" 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n",
" 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n",
" 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n",
" 0.96396396, 1. ],\n",
" [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n",
" 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n",
" 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n",
" 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n",
" 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n",
" 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n",
" 0.96396396, 1. ],\n",
" [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n",
" 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n",
" 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n",
" 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n",
" 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n",
" 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n",
" 0.96396396, 1. ],\n",
" [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n",
" 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n",
" 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n",
" 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n",
" 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n",
" 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n",
" 0.96396396, 1. ]])"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_df = pd.DataFrame(list_all_rows)\n",
"final_df.index = summary_df.index\n",
"final_np = final_df.to_numpy()\n",
"final_np[0:5,:]"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(np.array(final_df.index == stoptimes.index)) == stoptimes.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"# write recovery table \n",
"with gzip.open(\"../data/join_distribution_cumulative_p_2.pkl.gz\", \"wb\") as output_file:\n",
" pickle.dump(final_np, output_file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Poisson cumulative distribution\n",
"\n",
"The Poisson distribution is popular for modeling the number of times an event occurs in an interval of time or space. We modeled a poisson distribution for delays assuming parameter $k$ is the time in minutes (as it was done [here](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0126137), formulas $(4),(5),(6)$).\n",
"\n",
"A discrete random variable X is said to have a Poisson distribution with parameter λ > 0, if, for k = 0, 1, 2, ..., the probability mass function of X is given by:\n",
"\n",
"$${\\displaystyle \\!f(k;\\lambda )=\\Pr(X=k)={\\frac {\\lambda ^{k}e^{-\\lambda }}{k!}},}$$\n",
"where\n",
"\n",
"e is Euler's number (e = 2.71828...)\n",
"k! is the factorial of k.\n",
"The positive real number λ is equal to the expected value of X __and__ to its variance.\n",
"\n",
"$${\\displaystyle \\lambda =\\operatorname {E} (X)=\\operatorname {Var} (X)}$$\n",
"\n",
"We can approximate E[𝑋]∼$\\mu_i$ for our data $X_i$, if we assume the sample $X_i$ of size N follow the distribution of $X$ meaning $X_i$∼$X$.\n",
"\n",
"Poisson-related __assumptions__ :\n",
"- $k$ is the __delay time in minutes__ and can take values 0, 1, 2, ... (strictly positive and discrete)\n",
"- We assume our sampling $X_i$ of $X$ is good enough to approximate E[X] ~ $\\mu_i$\n",
"- The occurrence of one event does not affect probability of others. That is, events occur independently.\n",
" - __We assume being late one day is not affecting the delay of the day after__ \n",
"- The average rate at which events occur is independent of any occurrences. For simplicity, this is usually assumed to be constant, but may in practice vary with time.\n",
" - __we assumes delays occurs with a constant rate over time__\n",
"- Two events cannot occur at exactly the same instant\n",
"\n",
"We made a function `poisson_proba` that takes a `trip_id`, a `stop_id`, an `arrival time` and a `departure time` and a dictionnary {key : distribution} to compute a __probability to be at least 2 minutes before departure of next trip__. \n",
"\n",
"We make a few __assumptions__ on our side :\n",
"- We assume that if we have less than 2 minutes for the transfer, we miss it.\n",
"- We assume the next train is on time.\n",
"- As for poisson distribution $k$ is strictly positive, we assume trains ahead of schedule were on time ($k=0$)\n",
"\n",
"\n",
"_Question we should address :_\n",
"- _Is the poisson a reasonable approximation of the binomial distribution in our case ?_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's first test the poisson distribution and compare it with our distribution to see how well it fits the data. We will compute $Pr(X = k)$ for each values of k and look at the shape of the poisson distribution compared to the shape of our scaled data. Then, we will compare $\\sum_{k=0}^T Pr(X = k)$ with the cumulative distribution function which directly gives $Pr(k \\leq X)$"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"An error was encountered:\n",
"Invalid status code '404' from http://iccluster044.iccluster.epfl.ch:8998/sessions/6821 with error payload: \"Session '6821' not found.\"\n"
]
}
],
"source": [
"################################# POISSON FIT TEST #########################################\n",
"\n",
"# to do .. \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here are all the functions needed to calculate probability of success for a given transfer. We need the `trip_id`, `stop_id`, `departure_time`, `arrival_time` and dictionnary `d` (pickled load at the beginning of the cell) to be able to compute a probability of success with following function : \n",
"\n",
"`poisson_proba(trip_id, stop_id, arrival_time, departure_time, d)`"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"lambda (expectation given distribution): 1.0194769059543685 \n",
"\n",
"Probability of success for transfer time = 13.0 minutes : 0.999999999994185\n"
]
}
],
"source": [
"%local\n",
"################################# POISSON FUNCTIONS ########################################\n",
"\n",
"import pickle \n",
"import gzip\n",
"import time\n",
"import math \n",
"import datetime\n",
"import time\n",
"from scipy.stats import poisson\n",
"\n",
"# Load dictionnary\n",
"with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n",
" d = pickle.load(input_file)\n",
"\n",
"# Load dictionnary\n",
"with open(\"../data/stop_times_array.pkl\", \"rb\") as input_file:\n",
" times = pickle.load(input_file)\n",
"\n",
"# we take two exemple time in format numpy.datetime64\n",
"arr_time = times[4][1]\n",
"dep_time = times[0][1]\n",
"\n",
"# Load distribution in dictinonary given a key\n",
"def get_distrib(key, dico):\n",
" if key in dico:\n",
" return dico[key]\n",
" else:\n",
" raise ValueError(\"KEY ERROR: {} not found un distribution dictionnary\".format(key))\n",
" \n",
"# Evaluate lambda parameter assuming it is equal to average \n",
"def evaluate_lamda(distrib):\n",
" # First calculate total number of measures N\n",
" N = 0 # by starting at -1 we ignore trains ahead of schedule\n",
" for x in distrib:\n",
" N += x\n",
"\n",
" lambda_p = 0 # expectation - we want to calculate it\n",
" t = -1 # time = index - 1\n",
"\n",
" for x in distrib:\n",
" if t>0:\n",
" lambda_p += t*x\n",
" t += 1\n",
"\n",
" # calculate lambda - the expectation of x\n",
" if N > 0:\n",
" lambda_p /= N \n",
" print('lambda (expectation given distribution): ',lambda_p, '\\n')\n",
" return lambda_p\n",
" else : \n",
" raise ValueError(\"ERROR : {} distribution has 0 counts\".format(key))\n",
" #print('Returning 1 to avoid later problem... \\n')\n",
" return 1\n",
"\n",
"# process time given as string in format 'hh:mm' - not needed\n",
"def process_time_str(str_time):\n",
" x = time.strptime(str_time,'%H:%M')\n",
" return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()\n",
"\n",
"# Calculate transfer time given two times in string format 'hh:mm'\n",
"def get_transfer_time(arr_time, dep_time, delta=2.0):\n",
" diff_time_min = (arr_time - dep_time).astype('timedelta64[m]') / np.timedelta64(1, 'm')\n",
" return diff_time_min - delta\n",
"\n",
"# Calculate poisson probability of success for a given transfert \n",
"# for a given trip_id, stop_id, arrival/departure times and dict\n",
"def poisson_proba(trip_id, stop_id, arr_time, dep_time, dico):\n",
" # Generate key from trip_id / stop_id \n",
" key = str(trip_id) + '__' + str(stop_id[0:7]) # 7 first char to be sbb-compatible\n",
"\n",
" # Get distribution from dictionnary\n",
" distrib = get_distrib(key, dico)\n",
" \n",
" # Calculate transfer time at disposal \n",
" T = get_transfer_time(arr_time, dep_time)\n",
" \n",
" # Get lambda value to calculate proba\n",
" lambda_p = evaluate_lamda(distrib)\n",
"\n",
" # Get proba\n",
" if T > 2:\n",
" poisson_p = poisson.cdf(T, lambda_p)\n",
" else : \n",
" poisson_p = 0.0 # if we have less than 2 minutes, we miss it\n",
" \n",
" print('Probability of success for transfer time = {} minutes : '.format(T),poisson_p)\n",
" return poisson_p\n",
"\n",
"# Mock exemple of probability calculations with given inputs\n",
"trip_id = '1286.TA.26-32-j19-1.12.H'\n",
"stop_id = '8591184'\n",
"\n",
"# we take two exemple time from stop_times_array in format numpy.datetime64\n",
"arr_time = times[3][1]\n",
"dep_time = times[0][1]\n",
"\n",
"Pr = poisson_proba(trip_id, stop_id, arr_time, dep_time, d)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
diff --git a/notebooks/transfer_to_local.ipynb b/notebooks/transfer_to_local.ipynb
index c9649d5..8196c42 100644
--- a/notebooks/transfer_to_local.ipynb
+++ b/notebooks/transfer_to_local.ipynb
@@ -1,261 +1,261 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## transfer files from HDFS to local\n",
"\n",
"Any application without a proper name would be promptly killed.
"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Current session configs: {'conf': {'spark.app.name': 'lgptguys_final'}, 'kind': 'pyspark'} "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
- "ID YARN Application ID Kind State Spark UI Driver log Current session? 7704 application_1589299642358_2200 pyspark idle Link Link 7735 application_1589299642358_2231 pyspark idle Link Link 7737 application_1589299642358_2233 pyspark idle Link Link 7739 application_1589299642358_2235 pyspark dead Link Link 7743 application_1589299642358_2239 pyspark idle Link Link 7745 application_1589299642358_2241 pyspark idle Link Link 7750 application_1589299642358_2246 pyspark busy Link Link 7753 application_1589299642358_2249 pyspark idle Link Link 7756 application_1589299642358_2252 pyspark idle Link Link 7759 application_1589299642358_2255 pyspark busy Link Link 7760 application_1589299642358_2256 pyspark idle Link Link 7761 application_1589299642358_2257 pyspark idle Link Link 7762 application_1589299642358_2258 pyspark idle Link Link 7764 application_1589299642358_2260 pyspark idle Link Link 7767 application_1589299642358_2263 pyspark idle Link Link 7768 application_1589299642358_2264 pyspark idle Link Link 7770 application_1589299642358_2266 pyspark idle Link Link 7772 application_1589299642358_2268 pyspark idle Link Link 7773 application_1589299642358_2269 pyspark idle Link Link 7774 application_1589299642358_2270 pyspark idle Link Link 7775 application_1589299642358_2272 pyspark idle Link Link 7776 application_1589299642358_2273 pyspark idle Link Link 7777 application_1589299642358_2274 pyspark idle Link Link 7778 application_1589299642358_2275 pyspark idle Link Link 7779 application_1589299642358_2276 pyspark busy Link Link 7780 application_1589299642358_2277 pyspark idle Link Link 7781 application_1589299642358_2278 pyspark busy Link Link
"
+ "ID YARN Application ID Kind State Spark UI Driver log Current session? 7932 application_1589299642358_2450 pyspark idle Link Link 7933 application_1589299642358_2451 pyspark idle Link Link 7935 application_1589299642358_2453 pyspark idle Link Link 7939 application_1589299642358_2457 pyspark idle Link Link 7940 application_1589299642358_2458 pyspark idle Link Link 7941 application_1589299642358_2459 pyspark idle Link Link 7942 application_1589299642358_2460 pyspark idle Link Link 7944 application_1589299642358_2462 pyspark idle Link Link 7945 application_1589299642358_2463 pyspark dead Link Link 7946 application_1589299642358_2464 pyspark idle Link Link 7947 application_1589299642358_2465 pyspark idle Link Link 7948 application_1589299642358_2466 pyspark idle Link Link 7949 application_1589299642358_2467 pyspark idle Link Link "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%configure\n",
"{\"conf\": {\n",
" \"spark.app.name\": \"lgptguys_final\"\n",
"}}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Start Spark"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting Spark application\n"
]
},
{
"data": {
"text/html": [
"\n",
- "ID YARN Application ID Kind State Spark UI Driver log Current session? 7782 application_1589299642358_2279 pyspark idle Link Link ✔
"
+ "ID YARN Application ID Kind State Spark UI Driver log Current session? 7950 application_1589299642358_2468 pyspark idle Link Link ✔ "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SparkSession available as 'spark'.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"An error was encountered:\n",
"unknown magic command '%spark'\n",
"UnknownMagic: unknown magic command '%spark'\n",
"\n"
]
}
],
"source": [
"# Initialization\n",
"%%spark"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transfert and save tables from hdfs to local \n",
"\n",
"Here we describe the process of loading a table on hdfs, saving it to a proper place so that we can load it in local and then save it.\n",
"\n",
"First, we load the data that is in an otherwise not accessible place in hdfs :"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"username = 'tturner'\n",
"\n",
"my_files = ['stop_times_curated.csv',\n",
" 'stops_15km.csv', 'transfers.csv', 'stop_times_final_cyril.csv']\n",
"\n",
"for file in my_files:\n",
" this_file = spark.read.csv('data/lgpt_guys/{}'.format(file), \\\n",
" header = True) \n",
" this_file.write.csv(\"/user/{0}/{1}\".format(username, file.replace('.csv','')), \\\n",
" header = True, mode = 'overwrite')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/work/final_project/notebooks\n",
"stop_times_curated\n",
"stops_15km\n",
"transfers\n",
"stop_times_final_cyril\n"
]
}
],
"source": [
"%local\n",
"\n",
"from hdfs3 import HDFileSystem\n",
"import pandas as pd\n",
"import numpy as np \n",
"import os\n",
"print(os.getcwd())\n",
"\n",
"hdfs = HDFileSystem(host='hdfs://iccluster044.iccluster.epfl.ch', port=8020, user='ebouille')\n",
"\n",
"username = 'tturner'\n",
"\n",
"my_folders = ['stop_times_curated', \n",
" 'stops_15km', 'transfers', 'stop_times_final_cyril']\n",
"\n",
"for folder in my_folders:\n",
" print(folder)\n",
" array_files = hdfs.glob('/user/{0}/{1}/*.csv'.format(username, folder))\n",
" array = pd.DataFrame()\n",
" for file in array_files:\n",
" with hdfs.open(file) as f:\n",
" array = array.append(pd.read_csv(f))\n",
"\n",
" array.to_csv('../data/{}.csv'.format(folder), header=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "PySpark",
"language": "",
"name": "pysparkkernel"
},
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"mimetype": "text/x-python",
"name": "pyspark",
"pygments_lexer": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}