diff --git a/data/route_stops_array.pkl b/data/route_stops_array.pkl
index 68960bd..6103926 100644
--- a/data/route_stops_array.pkl
+++ b/data/route_stops_array.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:be9cd21f1b21a5e3202f8cb41841ff83fb0fb142692bd61d3196796a78507397
-size 44519
+oid sha256:ec9af358ad5e0802d624b2795ac20f3282fb689c21752d71f9fef698cbc3785b
+size 1899785
diff --git a/data/route_stops_df.pkl b/data/route_stops_df.pkl
index 7a010da..9634efe 100644
--- a/data/route_stops_df.pkl
+++ b/data/route_stops_df.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:3c999884bd261c116631fa0ff45b911784fc919f8d27aa8dbc500530a17815bd
-size 133926
+oid sha256:517e26beb718385fc4a5651fb5460ca7c61aa7a578f0a771427ef0415eff521c
+size 5699733
diff --git a/data/routes_array.pkl b/data/routes_array.pkl
index 29c8320..dd063c1 100644
--- a/data/routes_array.pkl
+++ b/data/routes_array.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:fb0794e9424d5343abc3729d91f99226ca19e8e580dfbe1953c66d4a7bef0820
-size 2920
+oid sha256:76c403daeb38714084b99e1ecc0dd151ac2977d3445120df958715ebaf3478d8
+size 206451
diff --git a/data/routes_array_df.pkl b/data/routes_array_df.pkl
index 0ece1e5..47b1efb 100644
--- a/data/routes_array_df.pkl
+++ b/data/routes_array_df.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:04b5fd2e0173563e9cf6658de02c1aa90e05940fd740a44c038671580c9c8fc0
-size 12713
+oid sha256:e642f9776ecd768916550628e7262f98eca451d84dab969419ed8f1c28c3ad7b
+size 531467
diff --git a/data/stop_times_array.pkl b/data/stop_times_array.pkl
index 866b9f9..b54a600 100644
--- a/data/stop_times_array.pkl
+++ b/data/stop_times_array.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:7e847bbbc6a9e7bf078cd3ba4595752a54969aed3b7ca2dae32176cb2e61d8b1
-size 3966905
+oid sha256:9a5b61142e94d00f3e247423d80655e35601baffb7d63810446145e37bcdf775
+size 3931993
diff --git a/data/stop_times_df.pkl b/data/stop_times_df.pkl
index d5fd6af..a230267 100644
--- a/data/stop_times_df.pkl
+++ b/data/stop_times_df.pkl
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:58a87e63f73f61e935780c4150c62ff27d1f0a2d55ed9f8457a2c8353caf3c65
-size 26154587
+oid sha256:83ef53159dcc6ed2394199f68d68424290b9ee10981ada263dee4c6b0282e1ab
+size 21899011
diff --git a/notebooks/Arrays_to_pickle.ipynb b/notebooks/Arrays_to_pickle.ipynb
index b5f9be9..2fdeb9c 100644
--- a/notebooks/Arrays_to_pickle.ipynb
+++ b/notebooks/Arrays_to_pickle.ipynb
@@ -1,7506 +1,5868 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Transform csv preproccesed files to pickle"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import packages"
]
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"import itertools"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read files"
]
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" trip_id | \n",
" stop_id | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
" pickup_type | \n",
" drop_off_type | \n",
" hour_departure | \n",
" route_id | \n",
" direction_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 666.TA.26-4-j19-1.20.R | \n",
" 8576182 | \n",
" 07:02:00 | \n",
" 07:02:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-4-j19-1 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 243.TA.26-311-j19-1.3.R | \n",
" 8590834 | \n",
" 07:16:00 | \n",
" 07:16:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-311-j19-1 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 406.TA.26-62-j19-1.3.R | \n",
" 8591349 | \n",
" 07:24:00 | \n",
" 07:24:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-62-j19-1 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 62.TA.57-2-Y-j19-1.33.H | \n",
" 8503000:0:13 | \n",
" 07:34:00 | \n",
" 07:34:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 57-2-Y-j19-1 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1179.TA.26-5-B-j19-1.23.R | \n",
" 8591245 | \n",
" 07:36:00 | \n",
" 07:36:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-5-B-j19-1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 trip_id stop_id arrival_time \\\n",
"0 0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 \n",
"1 1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 \n",
"2 2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 \n",
"3 3 62.TA.57-2-Y-j19-1.33.H 8503000:0:13 07:34:00 \n",
"4 4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 \n",
"\n",
" departure_time stop_sequence pickup_type drop_off_type hour_departure \\\n",
"0 07:02:00 1 0 0 7.0 \n",
"1 07:16:00 1 0 0 7.0 \n",
"2 07:24:00 1 0 0 7.0 \n",
"3 07:34:00 1 0 0 7.0 \n",
"4 07:36:00 1 0 0 7.0 \n",
"\n",
" route_id direction_id \n",
"0 26-4-j19-1 1 \n",
"1 26-311-j19-1 1 \n",
"2 26-62-j19-1 1 \n",
"3 57-2-Y-j19-1 0 \n",
"4 26-5-B-j19-1 1 "
]
},
- "execution_count": 62,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#stop_times\n",
"stop_times_curated = pd.read_csv(\"../data/stop_times_curated.csv\")\n",
"stop_times_curated.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 65,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([ 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,\n",
- " nan])"
- ]
- },
- "execution_count": 65,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "stop_times_curated.hour_departure.unique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 66,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array(['07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17',\n",
- " '18', '19', '24', '25'], dtype=object)"
- ]
- },
- "execution_count": 66,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "stop_times_curated.departure_time.str.slice(0,2).unique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 63,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" stop_id | \n",
" stop_id2 | \n",
" distance | \n",
" Transfer_time_sec | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 8500926 | \n",
" 8590616 | \n",
" 0.122430 | \n",
" 146 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 8500926 | \n",
" 8590737 | \n",
" 0.300175 | \n",
" 360 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 8502186 | \n",
" 8502186:0:1 | \n",
" 0.006762 | \n",
" 8 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 8502186 | \n",
" 8502186:0:2 | \n",
" 0.013524 | \n",
" 16 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 8502186 | \n",
" 8502186P | \n",
" 0.000000 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec\n",
"0 0 8500926 8590616 0.122430 146\n",
"1 1 8500926 8590737 0.300175 360\n",
"2 2 8502186 8502186:0:1 0.006762 8\n",
"3 3 8502186 8502186:0:2 0.013524 16\n",
"4 4 8502186 8502186P 0.000000 0"
]
},
- "execution_count": 63,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#transfers\n",
"transfers = pd.read_csv(\"../data/transfers.csv\")\n",
"transfers.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#routes\n",
"routes_curated = pd.read_csv(\"../data/transfers.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create stop_id same for all platforms"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#copy information stop_id with platform in stop_id_raw\n",
"stop_times_curated[\"stop_id_raw\"] = stop_times_curated[\"stop_id\"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" trip_id | \n",
" stop_id | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
" pickup_type | \n",
" drop_off_type | \n",
" hour_departure | \n",
" route_id | \n",
" direction_id | \n",
" stop_id_raw | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 666.TA.26-4-j19-1.20.R | \n",
" 8576182 | \n",
" 07:02:00 | \n",
" 07:02:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-4-j19-1 | \n",
" 1 | \n",
" 8576182 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 243.TA.26-311-j19-1.3.R | \n",
" 8590834 | \n",
" 07:16:00 | \n",
" 07:16:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-311-j19-1 | \n",
" 1 | \n",
" 8590834 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 406.TA.26-62-j19-1.3.R | \n",
" 8591349 | \n",
" 07:24:00 | \n",
" 07:24:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-62-j19-1 | \n",
" 1 | \n",
" 8591349 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 62.TA.57-2-Y-j19-1.33.H | \n",
" 8503000 | \n",
" 07:34:00 | \n",
" 07:34:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 57-2-Y-j19-1 | \n",
" 0 | \n",
" 8503000:0:13 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1179.TA.26-5-B-j19-1.23.R | \n",
" 8591245 | \n",
" 07:36:00 | \n",
" 07:36:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 7.0 | \n",
" 26-5-B-j19-1 | \n",
" 1 | \n",
" 8591245 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 trip_id stop_id arrival_time departure_time \\\n",
"0 0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 07:02:00 \n",
"1 1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 07:16:00 \n",
"2 2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 07:24:00 \n",
"3 3 62.TA.57-2-Y-j19-1.33.H 8503000 07:34:00 07:34:00 \n",
"4 4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 07:36:00 \n",
"\n",
" stop_sequence pickup_type drop_off_type hour_departure route_id \\\n",
"0 1 0 0 7.0 26-4-j19-1 \n",
"1 1 0 0 7.0 26-311-j19-1 \n",
"2 1 0 0 7.0 26-62-j19-1 \n",
"3 1 0 0 7.0 57-2-Y-j19-1 \n",
"4 1 0 0 7.0 26-5-B-j19-1 \n",
"\n",
" direction_id stop_id_raw \n",
"0 1 8576182 \n",
"1 1 8590834 \n",
"2 1 8591349 \n",
"3 0 8503000:0:13 \n",
"4 1 8591245 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Use only first 7 characters for stop_id\n",
"stop_times_curated[\"stop_id\"] = stop_times_curated[\"stop_id_raw\"].str.slice(0, 7)\n",
"stop_times_curated[\"stop_id\"] = pd.to_numeric(stop_times_curated[\"stop_id\"])\n",
"stop_times_curated.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#copy information stop_id with platform in stop_id_raw\n",
"transfers[\"stop_id_raw\"] = transfers[\"stop_id\"]\n",
"transfers[\"stop_id2_raw\"] = transfers[\"stop_id2\"]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" stop_id | \n",
" stop_id2 | \n",
" distance | \n",
" Transfer_time_sec | \n",
" stop_id_raw | \n",
" stop_id2_raw | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 8500926 | \n",
" 8590616 | \n",
" 0.122430 | \n",
" 146 | \n",
" 8500926 | \n",
" 8590616 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 8500926 | \n",
" 8590737 | \n",
" 0.300175 | \n",
" 360 | \n",
" 8500926 | \n",
" 8590737 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 8502186 | \n",
" 8502186 | \n",
" 0.006762 | \n",
" 8 | \n",
" 8502186 | \n",
" 8502186:0:1 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 8502186 | \n",
" 8502186 | \n",
" 0.013524 | \n",
" 16 | \n",
" 8502186 | \n",
" 8502186:0:2 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 8502186 | \n",
" 8502186 | \n",
" 0.000000 | \n",
" 0 | \n",
" 8502186 | \n",
" 8502186P | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec stop_id_raw \\\n",
"0 0 8500926 8590616 0.122430 146 8500926 \n",
"1 1 8500926 8590737 0.300175 360 8500926 \n",
"2 2 8502186 8502186 0.006762 8 8502186 \n",
"3 3 8502186 8502186 0.013524 16 8502186 \n",
"4 4 8502186 8502186 0.000000 0 8502186 \n",
"\n",
" stop_id2_raw \n",
"0 8590616 \n",
"1 8590737 \n",
"2 8502186:0:1 \n",
"3 8502186:0:2 \n",
"4 8502186P "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Use only first 7 characters for stop_id\n",
"transfers[\"stop_id\"] = transfers[\"stop_id_raw\"].str.slice(0, 7)\n",
"transfers[\"stop_id2\"] = transfers[\"stop_id2_raw\"].str.slice(0, 7)\n",
"transfers[\"stop_id\"] = pd.to_numeric(transfers[\"stop_id\"])\n",
"transfers[\"stop_id2\"] = pd.to_numeric(transfers[\"stop_id2\"])\n",
"transfers.head(5)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Solve prblem in hour_departure"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can see that hour departure does not contain times above 23, so we need to correct this"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,\n",
+ " nan])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_times_curated.hour_departure.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17',\n",
+ " '18', '19', '24', '25'], dtype=object)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_times_curated.departure_time.str.slice(0,2).unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "stop_times_curated[\"hour_departure\"] = pd.to_numeric(stop_times_curated.departure_time.str.slice(0,2))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Check if well converted to int"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_times_curated[\"hour_departure\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We drop the trips before 7 am and after 7 pm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "250777"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_times_curated.trip_id.count()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trip_id_drop = np.where(((stop_times_curated.hour_departure > 19) |\\\n",
+ " (stop_times_curated.hour_departure < 7)),\\\n",
+ " stop_times_curated[\"trip_id\"] , None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "stop_times_curated = stop_times_curated[~stop_times_curated[\"trip_id\"].isin(trip_id_drop)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "246514"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_times_curated.trip_id.count()"
+ ]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete trips with 1 option\n",
"To finish"
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"#drop columns not useful or source of wrong manipulations in \"to_begin_with\"\n",
"stop_times_curated = stop_times_curated.drop(columns=[\"Unnamed: 0\", \"hour_departure\", \"drop_off_type\", \"pickup_type\"])"
]
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" trip_id | \n",
" stop_id | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
" route_id | \n",
" direction_id | \n",
" stop_id_raw | \n",
"
\n",
" \n",
" trip_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1.TA.1-231-j19-1.1.H | \n",
" 1 | \n",
" 15 | \n",
" 17 | \n",
" 17 | \n",
" 18 | \n",
" 1 | \n",
" 1 | \n",
" 15 | \n",
"
\n",
" \n",
" 1.TA.1-44-j19-1.1.R | \n",
" 1 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 1 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 1.TA.1-444-j19-1.1.H | \n",
" 1 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 1 | \n",
" 1 | \n",
" 9 | \n",
"
\n",
" \n",
" 1.TA.12-E03-j19-1.1.H | \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" 1.TA.18-46-j19-1.1.H | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" trip_id stop_id arrival_time departure_time \\\n",
"trip_id \n",
"1.TA.1-231-j19-1.1.H 1 15 17 17 \n",
"1.TA.1-44-j19-1.1.R 1 3 3 3 \n",
"1.TA.1-444-j19-1.1.H 1 9 9 9 \n",
"1.TA.12-E03-j19-1.1.H 1 2 2 2 \n",
"1.TA.18-46-j19-1.1.H 1 1 1 1 \n",
"\n",
" stop_sequence route_id direction_id stop_id_raw \n",
"trip_id \n",
"1.TA.1-231-j19-1.1.H 18 1 1 15 \n",
"1.TA.1-44-j19-1.1.R 3 1 1 3 \n",
"1.TA.1-444-j19-1.1.H 9 1 1 9 \n",
"1.TA.12-E03-j19-1.1.H 2 1 1 2 \n",
"1.TA.18-46-j19-1.1.H 1 1 1 1 "
]
},
- "execution_count": 51,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#drop trips with only 1 stop\n",
"number_stop = stop_times_curated.groupby('trip_id').nunique()\n",
"number_stop.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"#get trips with 1 stop\n",
"trip_with_1_stop = np.where((number_stop.stop_id == 1), number_stop.index, None)"
]
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[None None None ... None None None]\n"
]
}
],
"source": [
"np.set_printoptions(threshold=50)\n",
"print(trip_with_1_stop)"
]
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19800"
+ "19444"
]
},
- "execution_count": 45,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_curated.trip_id.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"#drop trips with only 1 stop\n",
"stop_times_curated = stop_times_curated[~stop_times_curated[\"trip_id\"].isin(trip_with_1_stop)]"
]
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19800"
+ "18668"
]
},
- "execution_count": 47,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_curated.trip_id.nunique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Add information on transport type\n",
"To do"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create route_int, trip_int and stop_int as consecutive integer IDs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Route_int\n",
"The route_int Id is given arbitrarily"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stop_id | \n",
"
\n",
" \n",
" trip_id | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1.TA.1-231-j19-1.1.H | \n",
" (8583071, 8502553, 8572600, 8573722, 8572747, ... | \n",
"
\n",
" \n",
" 1.TA.1-44-j19-1.1.R | \n",
" (8590279, 8590275, 8591891) | \n",
"
\n",
" \n",
" 1.TA.1-444-j19-1.1.H | \n",
" (8591365, 8502979, 8591059, 8581346, 8572596, ... | \n",
"
\n",
" \n",
" 1.TA.12-E03-j19-1.1.H | \n",
" (8573205, 8596126) | \n",
"
\n",
" \n",
- " 1.TA.18-46-j19-1.1.H | \n",
- " (8503000,) | \n",
+ " 1.TA.21-23-j19-1.1.R | \n",
+ " (8503003, 8503000) | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_id\n",
"trip_id \n",
"1.TA.1-231-j19-1.1.H (8583071, 8502553, 8572600, 8573722, 8572747, ...\n",
"1.TA.1-44-j19-1.1.R (8590279, 8590275, 8591891)\n",
"1.TA.1-444-j19-1.1.H (8591365, 8502979, 8591059, 8581346, 8572596, ...\n",
"1.TA.12-E03-j19-1.1.H (8573205, 8596126)\n",
- "1.TA.18-46-j19-1.1.H (8503000,)"
+ "1.TA.21-23-j19-1.1.R (8503003, 8503000)"
]
},
- "execution_count": 9,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#group stops into a sequence\n",
"tuple_stops = stop_times_curated.groupby('trip_id')['stop_id'].apply(tuple).to_frame()\n",
"tuple_stops.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" stop_id | \n",
"
\n",
" \n",
" \n",
" \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, 8590618, 8587020, 8590739, 8590614, 8590603, 8590615) | \n",
"
\n",
" \n",
" (8500926, 8590603, 8587020, 8590615, 8590739, 8591829, 8590602, 8590618, 8590737, 8595511, 8590614) | \n",
"
\n",
" \n",
- " (8500926, 8590603, 8587020, 8590739, 8590618, 8590737, 8590614, 8590615, 8590602, 8591829, 8595511) | \n",
- "
\n",
- " \n",
" (8500926, 8590603, 8590739, 8595511, 8590614, 8590737, 8590618, 8587020, 8591829, 8590615, 8590602) | \n",
"
\n",
" \n",
" (8500926, 8590618, 8590737, 8590739, 8587020, 8590602, 8590615, 8595511, 8591829, 8590614, 8590603) | \n",
"
\n",
+ " \n",
+ " (8500926, 8590737, 8590603, 8590614, 8590615, 8591829, 8595511, 8590739, 8587020, 8590618, 8590602) | \n",
+ "
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
- "Index: [(8500926, 8590602, 8595511, 8590737, 8591829, 8590618, 8587020, 8590739, 8590614, 8590603, 8590615), (8500926, 8590603, 8587020, 8590615, 8590739, 8591829, 8590602, 8590618, 8590737, 8595511, 8590614), (8500926, 8590603, 8587020, 8590739, 8590618, 8590737, 8590614, 8590615, 8590602, 8591829, 8595511), (8500926, 8590603, 8590739, 8595511, 8590614, 8590737, 8590618, 8587020, 8591829, 8590615, 8590602), (8500926, 8590618, 8590737, 8590739, 8587020, 8590602, 8590615, 8595511, 8591829, 8590614, 8590603)]"
+ "Index: [(8500926, 8590602, 8595511, 8590737, 8591829, 8590618, 8587020, 8590739, 8590614, 8590603, 8590615), (8500926, 8590603, 8587020, 8590615, 8590739, 8591829, 8590602, 8590618, 8590737, 8595511, 8590614), (8500926, 8590603, 8590739, 8595511, 8590614, 8590737, 8590618, 8587020, 8591829, 8590615, 8590602), (8500926, 8590618, 8590737, 8590739, 8587020, 8590602, 8590615, 8595511, 8591829, 8590614, 8590603), (8500926, 8590737, 8590603, 8590614, 8590615, 8591829, 8595511, 8590739, 8587020, 8590618, 8590602)]"
]
},
- "execution_count": 10,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#group to get unique stop sequences\n",
"unique_stop_sequence = tuple_stops.groupby(\"stop_id\").count()\n",
"unique_stop_sequence.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stop_id | \n",
" route_int | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" (8500926, 8590603, 8587020, 8590615, 8590739, ... | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
- " (8500926, 8590603, 8587020, 8590739, 8590618, ... | \n",
+ " (8500926, 8590603, 8590739, 8595511, 8590614, ... | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
- " (8500926, 8590603, 8590739, 8595511, 8590614, ... | \n",
+ " (8500926, 8590618, 8590737, 8590739, 8587020, ... | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
- " (8500926, 8590618, 8590737, 8590739, 8587020, ... | \n",
+ " (8500926, 8590737, 8590603, 8590614, 8590615, ... | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_id route_int\n",
"0 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0\n",
"1 (8500926, 8590603, 8587020, 8590615, 8590739, ... 1\n",
- "2 (8500926, 8590603, 8587020, 8590739, 8590618, ... 2\n",
- "3 (8500926, 8590603, 8590739, 8595511, 8590614, ... 3\n",
- "4 (8500926, 8590618, 8590737, 8590739, 8587020, ... 4"
+ "2 (8500926, 8590603, 8590739, 8595511, 8590614, ... 2\n",
+ "3 (8500926, 8590618, 8590737, 8590739, 8587020, ... 3\n",
+ "4 (8500926, 8590737, 8590603, 8590614, 8590615, ... 4"
]
},
- "execution_count": 11,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#create dataframe and route_int\n",
"df_unique_stop_sequence = unique_stop_sequence.reset_index()\n",
"df_unique_stop_sequence[\"route_int\"] = df_unique_stop_sequence.index\n",
"df_unique_stop_sequence.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stop_id | \n",
" route_int | \n",
"
\n",
" \n",
" trip_id | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 131.TA.26-301-j19-1.1.R | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
"
\n",
" \n",
" 32.TA.26-301-j19-1.1.R | \n",
" (8500926, 8590603, 8587020, 8590615, 8590739, ... | \n",
" 1 | \n",
"
\n",
" \n",
- " 143.TA.26-301-j19-1.1.R | \n",
- " (8500926, 8590603, 8587020, 8590739, 8590618, ... | \n",
- " 2 | \n",
- "
\n",
- " \n",
" 133.TA.26-301-j19-1.1.R | \n",
" (8500926, 8590603, 8590739, 8595511, 8590614, ... | \n",
- " 3 | \n",
+ " 2 | \n",
"
\n",
" \n",
" 31.TA.26-301-j19-1.1.R | \n",
" (8500926, 8590618, 8590737, 8590739, 8587020, ... | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 140.TA.26-301-j19-1.1.R | \n",
+ " (8500926, 8590737, 8590603, 8590614, 8590615, ... | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_id \\\n",
"trip_id \n",
"131.TA.26-301-j19-1.1.R (8500926, 8590602, 8595511, 8590737, 8591829, ... \n",
"32.TA.26-301-j19-1.1.R (8500926, 8590603, 8587020, 8590615, 8590739, ... \n",
- "143.TA.26-301-j19-1.1.R (8500926, 8590603, 8587020, 8590739, 8590618, ... \n",
"133.TA.26-301-j19-1.1.R (8500926, 8590603, 8590739, 8595511, 8590614, ... \n",
"31.TA.26-301-j19-1.1.R (8500926, 8590618, 8590737, 8590739, 8587020, ... \n",
+ "140.TA.26-301-j19-1.1.R (8500926, 8590737, 8590603, 8590614, 8590615, ... \n",
"\n",
" route_int \n",
"trip_id \n",
"131.TA.26-301-j19-1.1.R 0 \n",
"32.TA.26-301-j19-1.1.R 1 \n",
- "143.TA.26-301-j19-1.1.R 2 \n",
- "133.TA.26-301-j19-1.1.R 3 \n",
- "31.TA.26-301-j19-1.1.R 4 "
+ "133.TA.26-301-j19-1.1.R 2 \n",
+ "31.TA.26-301-j19-1.1.R 3 \n",
+ "140.TA.26-301-j19-1.1.R 4 "
]
},
- "execution_count": 12,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#join with trip information\n",
"trip_with_routes = tuple_stops.join(df_unique_stop_sequence.set_index(\"stop_id\"), on=\"stop_id\", how=\"left\").sort_values(\"route_int\")\n",
"trip_with_routes.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"trip_with_routes = trip_with_routes.rename(columns={\"stop_id\" : \"all_stops\"})"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19800"
+ "18668"
]
},
- "execution_count": 14,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check if routes and trips do not have the same number\n",
"trip_with_routes.index.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "16538"
+ "16210"
]
},
- "execution_count": 15,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trip_with_routes.route_int.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"#join to get route_int in stop_times\n",
"stop_times_routes = stop_times_curated.join(trip_with_routes, how=\"left\", on=\"trip_id\" , lsuffix='_left', rsuffix='_right')\n"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
- " Unnamed: 0 | \n",
" trip_id | \n",
" stop_id | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
- " pickup_type | \n",
- " drop_off_type | \n",
- " hour_departure | \n",
" route_id | \n",
" direction_id | \n",
" stop_id_raw | \n",
" all_stops | \n",
" route_int | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 0 | \n",
" 666.TA.26-4-j19-1.20.R | \n",
" 8576182 | \n",
" 07:02:00 | \n",
" 07:02:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7.0 | \n",
" 26-4-j19-1 | \n",
" 1 | \n",
" 8576182 | \n",
" (8576182, 8591391, 8576197, 8576195, 8576193, ... | \n",
- " 3322 | \n",
+ " 3239 | \n",
"
\n",
" \n",
" 1 | \n",
- " 1 | \n",
" 243.TA.26-311-j19-1.3.R | \n",
" 8590834 | \n",
" 07:16:00 | \n",
" 07:16:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7.0 | \n",
" 26-311-j19-1 | \n",
" 1 | \n",
" 8590834 | \n",
" (8590834, 8590849, 8590619, 8590848, 8590525, ... | \n",
- " 8607 | \n",
+ " 8443 | \n",
"
\n",
" \n",
" 2 | \n",
- " 2 | \n",
" 406.TA.26-62-j19-1.3.R | \n",
" 8591349 | \n",
" 07:24:00 | \n",
" 07:24:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7.0 | \n",
" 26-62-j19-1 | \n",
" 1 | \n",
" 8591349 | \n",
" (8591349, 8591403, 8591293, 8576262, 8580449, ... | \n",
- " 14529 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 3 | \n",
- " 62.TA.57-2-Y-j19-1.33.H | \n",
- " 8503000 | \n",
- " 07:34:00 | \n",
- " 07:34:00 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7.0 | \n",
- " 57-2-Y-j19-1 | \n",
- " 0 | \n",
- " 8503000:0:13 | \n",
- " (8503000,) | \n",
- " 291 | \n",
+ " 14245 | \n",
"
\n",
" \n",
" 4 | \n",
- " 4 | \n",
" 1179.TA.26-5-B-j19-1.23.R | \n",
" 8591245 | \n",
" 07:36:00 | \n",
" 07:36:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7.0 | \n",
" 26-5-B-j19-1 | \n",
" 1 | \n",
" 8591245 | \n",
" (8591245, 8591366, 8591415, 8576193, 8591059, ... | \n",
- " 12572 | \n",
+ " 12326 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 25.TA.26-845-j19-1.2.H | \n",
+ " 8573504 | \n",
+ " 08:00:00 | \n",
+ " 08:00:00 | \n",
+ " 1 | \n",
+ " 26-845-j19-1 | \n",
+ " 0 | \n",
+ " 8573504 | \n",
+ " (8573504, 8580879, 8575977, 8575976) | \n",
+ " 2526 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " Unnamed: 0 trip_id stop_id arrival_time departure_time \\\n",
- "0 0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 07:02:00 \n",
- "1 1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 07:16:00 \n",
- "2 2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 07:24:00 \n",
- "3 3 62.TA.57-2-Y-j19-1.33.H 8503000 07:34:00 07:34:00 \n",
- "4 4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 07:36:00 \n",
+ " trip_id stop_id arrival_time departure_time \\\n",
+ "0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 07:02:00 \n",
+ "1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 07:16:00 \n",
+ "2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 07:24:00 \n",
+ "4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 07:36:00 \n",
+ "5 25.TA.26-845-j19-1.2.H 8573504 08:00:00 08:00:00 \n",
"\n",
- " stop_sequence pickup_type drop_off_type hour_departure route_id \\\n",
- "0 1 0 0 7.0 26-4-j19-1 \n",
- "1 1 0 0 7.0 26-311-j19-1 \n",
- "2 1 0 0 7.0 26-62-j19-1 \n",
- "3 1 0 0 7.0 57-2-Y-j19-1 \n",
- "4 1 0 0 7.0 26-5-B-j19-1 \n",
- "\n",
- " direction_id stop_id_raw \\\n",
- "0 1 8576182 \n",
- "1 1 8590834 \n",
- "2 1 8591349 \n",
- "3 0 8503000:0:13 \n",
- "4 1 8591245 \n",
+ " stop_sequence route_id direction_id stop_id_raw \\\n",
+ "0 1 26-4-j19-1 1 8576182 \n",
+ "1 1 26-311-j19-1 1 8590834 \n",
+ "2 1 26-62-j19-1 1 8591349 \n",
+ "4 1 26-5-B-j19-1 1 8591245 \n",
+ "5 1 26-845-j19-1 0 8573504 \n",
"\n",
" all_stops route_int \n",
- "0 (8576182, 8591391, 8576197, 8576195, 8576193, ... 3322 \n",
- "1 (8590834, 8590849, 8590619, 8590848, 8590525, ... 8607 \n",
- "2 (8591349, 8591403, 8591293, 8576262, 8580449, ... 14529 \n",
- "3 (8503000,) 291 \n",
- "4 (8591245, 8591366, 8591415, 8576193, 8591059, ... 12572 "
+ "0 (8576182, 8591391, 8576197, 8576195, 8576193, ... 3239 \n",
+ "1 (8590834, 8590849, 8590619, 8590848, 8590525, ... 8443 \n",
+ "2 (8591349, 8591403, 8591293, 8576262, 8580449, ... 14245 \n",
+ "4 (8591245, 8591366, 8591415, 8576193, 8591059, ... 12326 \n",
+ "5 (8573504, 8580879, 8575977, 8575976) 2526 "
]
},
- "execution_count": 17,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_routes.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "16537"
+ "16209"
]
},
- "execution_count": 18,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check if route_int is correct\n",
"stop_times_routes.route_int.max()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Trip_int\n",
"The trip_int number needs to be ordered by route_int and time"
]
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19800"
+ "18668"
]
},
- "execution_count": 19,
+ "execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check number trips in stop_times\n",
"stop_times_routes.trip_id.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
- " Unnamed: 0 | \n",
" trip_id | \n",
" stop_id | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
- " pickup_type | \n",
- " drop_off_type | \n",
- " hour_departure | \n",
" route_id | \n",
" direction_id | \n",
" stop_id_raw | \n",
" all_stops | \n",
" route_int | \n",
"
\n",
" \n",
" \n",
" \n",
" 77516 | \n",
- " 35816 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
" 16:53:00 | \n",
" 16:53:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8587020:0:G | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
"
\n",
" \n",
" 49961 | \n",
- " 8261 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8595511 | \n",
" 16:55:00 | \n",
" 16:55:00 | \n",
" 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8595511 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
"
\n",
" \n",
" 163549 | \n",
- " 759 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590603 | \n",
" 16:57:00 | \n",
" 16:57:00 | \n",
" 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8590603 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
"
\n",
" \n",
" 70704 | \n",
- " 29004 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8591829 | \n",
" 16:58:00 | \n",
" 16:58:00 | \n",
" 4 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8591829 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
"
\n",
" \n",
" 45228 | \n",
- " 3528 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590602 | \n",
" 16:59:00 | \n",
" 16:59:00 | \n",
" 5 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8590602 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " Unnamed: 0 trip_id stop_id arrival_time \\\n",
- "77516 35816 131.TA.26-301-j19-1.1.R 8587020 16:53:00 \n",
- "49961 8261 131.TA.26-301-j19-1.1.R 8595511 16:55:00 \n",
- "163549 759 131.TA.26-301-j19-1.1.R 8590603 16:57:00 \n",
- "70704 29004 131.TA.26-301-j19-1.1.R 8591829 16:58:00 \n",
- "45228 3528 131.TA.26-301-j19-1.1.R 8590602 16:59:00 \n",
- "\n",
- " departure_time stop_sequence pickup_type drop_off_type \\\n",
- "77516 16:53:00 1 0 0 \n",
- "49961 16:55:00 2 0 0 \n",
- "163549 16:57:00 3 0 0 \n",
- "70704 16:58:00 4 0 0 \n",
- "45228 16:59:00 5 0 0 \n",
+ " trip_id stop_id arrival_time departure_time \\\n",
+ "77516 131.TA.26-301-j19-1.1.R 8587020 16:53:00 16:53:00 \n",
+ "49961 131.TA.26-301-j19-1.1.R 8595511 16:55:00 16:55:00 \n",
+ "163549 131.TA.26-301-j19-1.1.R 8590603 16:57:00 16:57:00 \n",
+ "70704 131.TA.26-301-j19-1.1.R 8591829 16:58:00 16:58:00 \n",
+ "45228 131.TA.26-301-j19-1.1.R 8590602 16:59:00 16:59:00 \n",
"\n",
- " hour_departure route_id direction_id stop_id_raw \\\n",
- "77516 16.0 26-301-j19-1 1 8587020:0:G \n",
- "49961 16.0 26-301-j19-1 1 8595511 \n",
- "163549 16.0 26-301-j19-1 1 8590603 \n",
- "70704 16.0 26-301-j19-1 1 8591829 \n",
- "45228 16.0 26-301-j19-1 1 8590602 \n",
+ " stop_sequence route_id direction_id stop_id_raw \\\n",
+ "77516 1 26-301-j19-1 1 8587020:0:G \n",
+ "49961 2 26-301-j19-1 1 8595511 \n",
+ "163549 3 26-301-j19-1 1 8590603 \n",
+ "70704 4 26-301-j19-1 1 8591829 \n",
+ "45228 5 26-301-j19-1 1 8590602 \n",
"\n",
" all_stops route_int \n",
"77516 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"49961 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"163549 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"70704 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"45228 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 "
]
},
- "execution_count": 20,
+ "execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_routes.sort_values([\"route_int\", \"arrival_time\"]).head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" trip_int | \n",
" trip_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
"
\n",
" \n",
" 1 | \n",
" 32.TA.26-301-j19-1.1.R | \n",
" 1 | \n",
" 32.TA.26-301-j19-1.1.R | \n",
"
\n",
" \n",
" 2 | \n",
- " 143.TA.26-301-j19-1.1.R | \n",
+ " 133.TA.26-301-j19-1.1.R | \n",
" 2 | \n",
- " 143.TA.26-301-j19-1.1.R | \n",
+ " 133.TA.26-301-j19-1.1.R | \n",
"
\n",
" \n",
" 3 | \n",
- " 133.TA.26-301-j19-1.1.R | \n",
+ " 31.TA.26-301-j19-1.1.R | \n",
" 3 | \n",
- " 133.TA.26-301-j19-1.1.R | \n",
+ " 31.TA.26-301-j19-1.1.R | \n",
"
\n",
" \n",
" 4 | \n",
- " 31.TA.26-301-j19-1.1.R | \n",
+ " 140.TA.26-301-j19-1.1.R | \n",
" 4 | \n",
- " 31.TA.26-301-j19-1.1.R | \n",
+ " 140.TA.26-301-j19-1.1.R | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 trip_int trip_id\n",
"0 131.TA.26-301-j19-1.1.R 0 131.TA.26-301-j19-1.1.R\n",
"1 32.TA.26-301-j19-1.1.R 1 32.TA.26-301-j19-1.1.R\n",
- "2 143.TA.26-301-j19-1.1.R 2 143.TA.26-301-j19-1.1.R\n",
- "3 133.TA.26-301-j19-1.1.R 3 133.TA.26-301-j19-1.1.R\n",
- "4 31.TA.26-301-j19-1.1.R 4 31.TA.26-301-j19-1.1.R"
+ "2 133.TA.26-301-j19-1.1.R 2 133.TA.26-301-j19-1.1.R\n",
+ "3 31.TA.26-301-j19-1.1.R 3 31.TA.26-301-j19-1.1.R\n",
+ "4 140.TA.26-301-j19-1.1.R 4 140.TA.26-301-j19-1.1.R"
]
},
- "execution_count": 21,
+ "execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#generate sequential trip_int, ordered by route and by time\n",
"trip_df = pd.DataFrame(stop_times_routes.sort_values([\"route_int\", \"arrival_time\"]).trip_id.unique())\n",
"trip_df[\"trip_int\"] = trip_df.index\n",
"trip_df[\"trip_id\"] = trip_df.iloc[:,0]\n",
"trip_df.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19800"
+ "18668"
]
},
- "execution_count": 22,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check number trip_id\n",
"trip_df.trip_id.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"#join to get trip_int in stop_times\n",
"stop_times_routes_trip = stop_times_routes.join(trip_df.set_index(\"trip_id\"), how=\"inner\", on=\"trip_id\" , lsuffix='_left', rsuffix='_right')\n"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
- " Unnamed: 0 | \n",
" trip_id | \n",
" stop_id | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
- " pickup_type | \n",
- " drop_off_type | \n",
- " hour_departure | \n",
" route_id | \n",
" direction_id | \n",
" stop_id_raw | \n",
" all_stops | \n",
" route_int | \n",
" 0 | \n",
" trip_int | \n",
"
\n",
" \n",
" \n",
" \n",
" 77516 | \n",
- " 35816 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
" 16:53:00 | \n",
" 16:53:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8587020:0:G | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 0 | \n",
"
\n",
" \n",
" 49961 | \n",
- " 8261 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8595511 | \n",
" 16:55:00 | \n",
" 16:55:00 | \n",
" 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8595511 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 0 | \n",
"
\n",
" \n",
" 163549 | \n",
- " 759 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590603 | \n",
" 16:57:00 | \n",
" 16:57:00 | \n",
" 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8590603 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 0 | \n",
"
\n",
" \n",
" 70704 | \n",
- " 29004 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8591829 | \n",
" 16:58:00 | \n",
" 16:58:00 | \n",
" 4 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8591829 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 0 | \n",
"
\n",
" \n",
" 45228 | \n",
- " 3528 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590602 | \n",
" 16:59:00 | \n",
" 16:59:00 | \n",
" 5 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8590602 | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " Unnamed: 0 trip_id stop_id arrival_time \\\n",
- "77516 35816 131.TA.26-301-j19-1.1.R 8587020 16:53:00 \n",
- "49961 8261 131.TA.26-301-j19-1.1.R 8595511 16:55:00 \n",
- "163549 759 131.TA.26-301-j19-1.1.R 8590603 16:57:00 \n",
- "70704 29004 131.TA.26-301-j19-1.1.R 8591829 16:58:00 \n",
- "45228 3528 131.TA.26-301-j19-1.1.R 8590602 16:59:00 \n",
+ " trip_id stop_id arrival_time departure_time \\\n",
+ "77516 131.TA.26-301-j19-1.1.R 8587020 16:53:00 16:53:00 \n",
+ "49961 131.TA.26-301-j19-1.1.R 8595511 16:55:00 16:55:00 \n",
+ "163549 131.TA.26-301-j19-1.1.R 8590603 16:57:00 16:57:00 \n",
+ "70704 131.TA.26-301-j19-1.1.R 8591829 16:58:00 16:58:00 \n",
+ "45228 131.TA.26-301-j19-1.1.R 8590602 16:59:00 16:59:00 \n",
"\n",
- " departure_time stop_sequence pickup_type drop_off_type \\\n",
- "77516 16:53:00 1 0 0 \n",
- "49961 16:55:00 2 0 0 \n",
- "163549 16:57:00 3 0 0 \n",
- "70704 16:58:00 4 0 0 \n",
- "45228 16:59:00 5 0 0 \n",
- "\n",
- " hour_departure route_id direction_id stop_id_raw \\\n",
- "77516 16.0 26-301-j19-1 1 8587020:0:G \n",
- "49961 16.0 26-301-j19-1 1 8595511 \n",
- "163549 16.0 26-301-j19-1 1 8590603 \n",
- "70704 16.0 26-301-j19-1 1 8591829 \n",
- "45228 16.0 26-301-j19-1 1 8590602 \n",
+ " stop_sequence route_id direction_id stop_id_raw \\\n",
+ "77516 1 26-301-j19-1 1 8587020:0:G \n",
+ "49961 2 26-301-j19-1 1 8595511 \n",
+ "163549 3 26-301-j19-1 1 8590603 \n",
+ "70704 4 26-301-j19-1 1 8591829 \n",
+ "45228 5 26-301-j19-1 1 8590602 \n",
"\n",
" all_stops route_int \\\n",
"77516 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"49961 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"163549 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"70704 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"45228 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"\n",
" 0 trip_int \n",
"77516 131.TA.26-301-j19-1.1.R 0 \n",
"49961 131.TA.26-301-j19-1.1.R 0 \n",
"163549 131.TA.26-301-j19-1.1.R 0 \n",
"70704 131.TA.26-301-j19-1.1.R 0 \n",
"45228 131.TA.26-301-j19-1.1.R 0 "
]
},
- "execution_count": 24,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#save ordered stop_times\n",
"stop_times_routes_trip = stop_times_routes_trip.sort_values([\"route_int\", \"trip_int\", \"stop_sequence\"])\n",
"stop_times_routes_trip.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "19800"
+ "18668"
]
},
- "execution_count": 25,
+ "execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check if manipulations did not destroy trips\n",
"stop_times_routes_trip.trip_id.nunique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Stop_int\n",
"Stop_int id needs to ordered by route, trip and stop sequence"
]
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "1408"
+ "1407"
]
},
- "execution_count": 26,
+ "execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check number stops at entry\n",
"stop_times_routes_trip.stop_id.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" stop_int | \n",
" stop_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 8587020 | \n",
" 0 | \n",
" 8587020 | \n",
"
\n",
" \n",
" 1 | \n",
" 8595511 | \n",
" 1 | \n",
" 8595511 | \n",
"
\n",
" \n",
" 2 | \n",
" 8590603 | \n",
" 2 | \n",
" 8590603 | \n",
"
\n",
" \n",
" 3 | \n",
" 8591829 | \n",
" 3 | \n",
" 8591829 | \n",
"
\n",
" \n",
" 4 | \n",
" 8590602 | \n",
" 4 | \n",
" 8590602 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 stop_int stop_id\n",
"0 8587020 0 8587020\n",
"1 8595511 1 8595511\n",
"2 8590603 2 8590603\n",
"3 8591829 3 8591829\n",
"4 8590602 4 8590602"
]
},
- "execution_count": 27,
+ "execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#stop_times_curated already in the right order\n",
"#we create dataframe to create stop_int\n",
"stops_df = pd.DataFrame(stop_times_routes_trip.stop_id.unique())\n",
"stops_df[\"stop_int\"] = stops_df.index\n",
"stops_df[\"stop_id\"] = stops_df.iloc[:,0]\n",
"stops_df.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "1408"
+ "1407"
]
},
- "execution_count": 28,
+ "execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check if number stop_int correct\n",
"stops_df.stop_int.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"#join to get stop_int\n",
"stop_times_routes_trip_stop = stop_times_routes_trip.join(stops_df.set_index(\"stop_id\"), how=\"inner\", on=\"stop_id\", lsuffix='_left', rsuffix='_right')\n"
]
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
- " Unnamed: 0 | \n",
" trip_id | \n",
" stop_id | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
- " pickup_type | \n",
- " drop_off_type | \n",
- " hour_departure | \n",
" route_id | \n",
" direction_id | \n",
" stop_id_raw | \n",
" all_stops | \n",
" route_int | \n",
" 0_left | \n",
" trip_int | \n",
" 0_right | \n",
" stop_int | \n",
"
\n",
" \n",
" \n",
" \n",
" 77516 | \n",
- " 35816 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
" 16:53:00 | \n",
" 16:53:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8587020:0:G | \n",
" (8500926, 8590602, 8595511, 8590737, 8591829, ... | \n",
" 0 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 0 | \n",
" 8587020 | \n",
" 0 | \n",
"
\n",
" \n",
" 46837 | \n",
- " 5137 | \n",
" 32.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
" 07:38:00 | \n",
" 07:38:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8587020:0:G | \n",
" (8500926, 8590603, 8587020, 8590615, 8590739, ... | \n",
" 1 | \n",
" 32.TA.26-301-j19-1.1.R | \n",
" 1 | \n",
" 8587020 | \n",
" 0 | \n",
"
\n",
" \n",
- " 23742 | \n",
- " 23742 | \n",
- " 143.TA.26-301-j19-1.1.R | \n",
+ " 208591 | \n",
+ " 133.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
- " 24:03:00 | \n",
- " 24:03:00 | \n",
+ " 16:38:00 | \n",
+ " 16:38:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " NaN | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8587020:0:G | \n",
- " (8500926, 8590603, 8587020, 8590739, 8590618, ... | \n",
+ " (8500926, 8590603, 8590739, 8595511, 8590614, ... | \n",
" 2 | \n",
- " 143.TA.26-301-j19-1.1.R | \n",
+ " 133.TA.26-301-j19-1.1.R | \n",
" 2 | \n",
" 8587020 | \n",
" 0 | \n",
"
\n",
" \n",
- " 208591 | \n",
- " 6202 | \n",
- " 133.TA.26-301-j19-1.1.R | \n",
+ " 81269 | \n",
+ " 31.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
- " 16:38:00 | \n",
- " 16:38:00 | \n",
+ " 10:38:00 | \n",
+ " 10:38:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8587020:0:G | \n",
- " (8500926, 8590603, 8590739, 8595511, 8590614, ... | \n",
+ " (8500926, 8590618, 8590737, 8590739, 8587020, ... | \n",
" 3 | \n",
- " 133.TA.26-301-j19-1.1.R | \n",
+ " 31.TA.26-301-j19-1.1.R | \n",
" 3 | \n",
" 8587020 | \n",
" 0 | \n",
"
\n",
" \n",
- " 81269 | \n",
- " 39569 | \n",
- " 31.TA.26-301-j19-1.1.R | \n",
+ " 212257 | \n",
+ " 140.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
- " 10:38:00 | \n",
- " 10:38:00 | \n",
+ " 18:23:00 | \n",
+ " 18:23:00 | \n",
" 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10.0 | \n",
" 26-301-j19-1 | \n",
" 1 | \n",
" 8587020:0:G | \n",
- " (8500926, 8590618, 8590737, 8590739, 8587020, ... | \n",
+ " (8500926, 8590737, 8590603, 8590614, 8590615, ... | \n",
" 4 | \n",
- " 31.TA.26-301-j19-1.1.R | \n",
+ " 140.TA.26-301-j19-1.1.R | \n",
" 4 | \n",
" 8587020 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " Unnamed: 0 trip_id stop_id arrival_time \\\n",
- "77516 35816 131.TA.26-301-j19-1.1.R 8587020 16:53:00 \n",
- "46837 5137 32.TA.26-301-j19-1.1.R 8587020 07:38:00 \n",
- "23742 23742 143.TA.26-301-j19-1.1.R 8587020 24:03:00 \n",
- "208591 6202 133.TA.26-301-j19-1.1.R 8587020 16:38:00 \n",
- "81269 39569 31.TA.26-301-j19-1.1.R 8587020 10:38:00 \n",
- "\n",
- " departure_time stop_sequence pickup_type drop_off_type \\\n",
- "77516 16:53:00 1 0 0 \n",
- "46837 07:38:00 1 0 0 \n",
- "23742 24:03:00 1 0 0 \n",
- "208591 16:38:00 1 0 0 \n",
- "81269 10:38:00 1 0 0 \n",
+ " trip_id stop_id arrival_time departure_time \\\n",
+ "77516 131.TA.26-301-j19-1.1.R 8587020 16:53:00 16:53:00 \n",
+ "46837 32.TA.26-301-j19-1.1.R 8587020 07:38:00 07:38:00 \n",
+ "208591 133.TA.26-301-j19-1.1.R 8587020 16:38:00 16:38:00 \n",
+ "81269 31.TA.26-301-j19-1.1.R 8587020 10:38:00 10:38:00 \n",
+ "212257 140.TA.26-301-j19-1.1.R 8587020 18:23:00 18:23:00 \n",
"\n",
- " hour_departure route_id direction_id stop_id_raw \\\n",
- "77516 16.0 26-301-j19-1 1 8587020:0:G \n",
- "46837 7.0 26-301-j19-1 1 8587020:0:G \n",
- "23742 NaN 26-301-j19-1 1 8587020:0:G \n",
- "208591 16.0 26-301-j19-1 1 8587020:0:G \n",
- "81269 10.0 26-301-j19-1 1 8587020:0:G \n",
+ " stop_sequence route_id direction_id stop_id_raw \\\n",
+ "77516 1 26-301-j19-1 1 8587020:0:G \n",
+ "46837 1 26-301-j19-1 1 8587020:0:G \n",
+ "208591 1 26-301-j19-1 1 8587020:0:G \n",
+ "81269 1 26-301-j19-1 1 8587020:0:G \n",
+ "212257 1 26-301-j19-1 1 8587020:0:G \n",
"\n",
" all_stops route_int \\\n",
"77516 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n",
"46837 (8500926, 8590603, 8587020, 8590615, 8590739, ... 1 \n",
- "23742 (8500926, 8590603, 8587020, 8590739, 8590618, ... 2 \n",
- "208591 (8500926, 8590603, 8590739, 8595511, 8590614, ... 3 \n",
- "81269 (8500926, 8590618, 8590737, 8590739, 8587020, ... 4 \n",
+ "208591 (8500926, 8590603, 8590739, 8595511, 8590614, ... 2 \n",
+ "81269 (8500926, 8590618, 8590737, 8590739, 8587020, ... 3 \n",
+ "212257 (8500926, 8590737, 8590603, 8590614, 8590615, ... 4 \n",
"\n",
" 0_left trip_int 0_right stop_int \n",
"77516 131.TA.26-301-j19-1.1.R 0 8587020 0 \n",
"46837 32.TA.26-301-j19-1.1.R 1 8587020 0 \n",
- "23742 143.TA.26-301-j19-1.1.R 2 8587020 0 \n",
- "208591 133.TA.26-301-j19-1.1.R 3 8587020 0 \n",
- "81269 31.TA.26-301-j19-1.1.R 4 8587020 0 "
+ "208591 133.TA.26-301-j19-1.1.R 2 8587020 0 \n",
+ "81269 31.TA.26-301-j19-1.1.R 3 8587020 0 \n",
+ "212257 140.TA.26-301-j19-1.1.R 4 8587020 0 "
]
},
- "execution_count": 30,
+ "execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_routes_trip_stop.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "1408"
+ "1407"
]
},
- "execution_count": 31,
+ "execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check if no stops deleted during manipulation\n",
"stop_times_routes_trip_stop.stop_id.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "1407"
+ "1406"
]
},
- "execution_count": 32,
+ "execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_routes_trip_stop.stop_int.max()"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"#keep only useful columns \n",
"stop_times_int = stop_times_routes_trip_stop[[\"route_int\", \"trip_int\", \"stop_int\", \"stop_sequence\", \"arrival_time\", \"departure_time\",\\\n",
" \"route_id\", \"trip_id\", \"stop_id\"]].sort_values([\"route_int\", \"trip_int\", \"stop_sequence\"])"
]
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"stop_times_int = stop_times_int.reset_index(drop=True)"
]
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" route_int | \n",
" trip_int | \n",
" stop_int | \n",
" stop_sequence | \n",
" arrival_time | \n",
" departure_time | \n",
" route_id | \n",
" trip_id | \n",
" stop_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 16:53:00 | \n",
" 16:53:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 16:55:00 | \n",
" 16:55:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8595511 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 2 | \n",
" 3 | \n",
" 16:57:00 | \n",
" 16:57:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590603 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 4 | \n",
" 16:58:00 | \n",
" 16:58:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8591829 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 5 | \n",
" 16:59:00 | \n",
" 16:59:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590602 | \n",
"
\n",
" \n",
" 5 | \n",
" 0 | \n",
" 0 | \n",
" 5 | \n",
" 6 | \n",
" 17:00:00 | \n",
" 17:00:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590615 | \n",
"
\n",
" \n",
" 6 | \n",
" 0 | \n",
" 0 | \n",
" 6 | \n",
" 7 | \n",
" 17:01:00 | \n",
" 17:01:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590614 | \n",
"
\n",
" \n",
" 7 | \n",
" 0 | \n",
" 0 | \n",
" 7 | \n",
" 8 | \n",
" 17:01:00 | \n",
" 17:01:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590618 | \n",
"
\n",
" \n",
" 8 | \n",
" 0 | \n",
" 0 | \n",
" 8 | \n",
" 9 | \n",
" 17:03:00 | \n",
" 17:03:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8500926 | \n",
"
\n",
" \n",
" 9 | \n",
" 0 | \n",
" 0 | \n",
" 9 | \n",
" 10 | \n",
" 17:03:00 | \n",
" 17:03:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590737 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_int trip_int stop_int stop_sequence arrival_time departure_time \\\n",
"0 0 0 0 1 16:53:00 16:53:00 \n",
"1 0 0 1 2 16:55:00 16:55:00 \n",
"2 0 0 2 3 16:57:00 16:57:00 \n",
"3 0 0 3 4 16:58:00 16:58:00 \n",
"4 0 0 4 5 16:59:00 16:59:00 \n",
"5 0 0 5 6 17:00:00 17:00:00 \n",
"6 0 0 6 7 17:01:00 17:01:00 \n",
"7 0 0 7 8 17:01:00 17:01:00 \n",
"8 0 0 8 9 17:03:00 17:03:00 \n",
"9 0 0 9 10 17:03:00 17:03:00 \n",
"\n",
" route_id trip_id stop_id \n",
"0 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 \n",
"1 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 \n",
"2 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 \n",
"3 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 \n",
"4 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 \n",
"5 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590615 \n",
"6 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590614 \n",
"7 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590618 \n",
"8 26-301-j19-1 131.TA.26-301-j19-1.1.R 8500926 \n",
"9 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590737 "
]
},
- "execution_count": 35,
+ "execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_int.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transfer: delete transfer to same stop & get stop_int & stop_int2\n",
"to finish"
]
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1497"
]
},
- "execution_count": 36,
+ "execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check number stops transfers\n",
"transfers.stop_id.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1497"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers.stop_id.nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#create stop_int column\n",
"#this action eliminates stops not in stop_times\n",
"transfers = transfers.join(stop_times_int[[\"stop_id\", \"stop_int\"]].set_index(\"stop_id\"), how=\"inner\", on = \"stop_id\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'transfers' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtransfers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'transfers' is not defined"
]
}
],
"source": [
"transfers.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stop_id2 | \n",
" stop_int_two | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 8573721 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 8573722 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 8573723 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 8583071 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" 8572603 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_id2 stop_int_two\n",
"0 8573721 0\n",
"1 8573722 1\n",
"2 8573723 2\n",
"3 8583071 3\n",
"4 8572603 4"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#create dataframe with stops\n",
"df_stop_int2 = stop_times_curated[[\"stop_id\", \"stop_int\"]].rename(columns={\"stop_id\": \"stop_id2\", \"stop_int\" : \"stop_int_two\"})\n",
"df_stop_int2.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"transfers = transfers.join(df_stop_int2.set_index(\"stop_id2\"), how=\"inner\", on = \"stop_id2\")"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" stop_id | \n",
" stop_id2 | \n",
" Transfer_time_sec | \n",
" stop_int2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 8500926 | \n",
" 8590616 | \n",
" 146 | \n",
" 1439 | \n",
"
\n",
" \n",
" 0 | \n",
" 0 | \n",
" 8500926 | \n",
" 8590616 | \n",
" 146 | \n",
" 1439 | \n",
"
\n",
" \n",
" 0 | \n",
" 0 | \n",
" 8500926 | \n",
" 8590616 | \n",
" 146 | \n",
" 1439 | \n",
"
\n",
" \n",
" 0 | \n",
" 0 | \n",
" 8500926 | \n",
" 8590616 | \n",
" 146 | \n",
" 1439 | \n",
"
\n",
" \n",
" 0 | \n",
" 0 | \n",
" 8500926 | \n",
" 8590616 | \n",
" 146 | \n",
" 1439 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 stop_id stop_id2 Transfer_time_sec stop_int2\n",
"0 0 8500926 8590616 146 1439\n",
"0 0 8500926 8590616 146 1439\n",
"0 0 8500926 8590616 146 1439\n",
"0 0 8500926 8590616 146 1439\n",
"0 0 8500926 8590616 146 1439"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfers.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"transfers_stop_int2 = transfers"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1512"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check number unique stops2 in transfers\n",
"transfers.stop_id2.nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"transfers.join(stop_times_curated[[\"stop_id\", \"stop_int\"]].set_index(\"stop_id\"), how=\"inner\", on = \"stop_id\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Raptor specific preprocessing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "StopTimes: [[departure_route0_trip0_stop0, arrival_route0_trip0_stop_0], [departure_route0_trip0_stop1, arrival_route0_trip0_stop_1], …], [[departure_route0_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], [[[departure_route1_trip0_stop0, arrival_route1_trip0_stop_0], …], [[departure_route1_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], …]"
+ "#### StopTimes: \n",
+ "[[departure_route0_trip0_stop0, arrival_route0_trip0_stop_0], [departure_route0_trip0_stop1, arrival_route0_trip0_stop_1], …], [[departure_route0_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], [[[departure_route1_trip0_stop0, arrival_route1_trip0_stop_0], …], [[departure_route1_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], …]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We start by making sure the order is correct"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" route_int | \n",
" trip_int | \n",
" stop_int | \n",
" stop_sequence | \n",
" arrival_time | \n",
" departure_time | \n",
" route_id | \n",
" trip_id | \n",
" stop_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 16:53:00 | \n",
" 16:53:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 16:55:00 | \n",
" 16:55:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8595511 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 2 | \n",
" 3 | \n",
" 16:57:00 | \n",
" 16:57:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590603 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 4 | \n",
" 16:58:00 | \n",
" 16:58:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8591829 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 5 | \n",
" 16:59:00 | \n",
" 16:59:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590602 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_int trip_int stop_int stop_sequence arrival_time departure_time \\\n",
"0 0 0 0 1 16:53:00 16:53:00 \n",
"1 0 0 1 2 16:55:00 16:55:00 \n",
"2 0 0 2 3 16:57:00 16:57:00 \n",
"3 0 0 3 4 16:58:00 16:58:00 \n",
"4 0 0 4 5 16:59:00 16:59:00 \n",
"\n",
" route_id trip_id stop_id \n",
"0 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 \n",
"1 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 \n",
"2 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 \n",
"3 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 \n",
"4 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered = stop_times_int.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n",
"stop_times_ordered.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" arrival_time | \n",
" departure_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 16:53:00 | \n",
" 16:53:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 16:55:00 | \n",
" 16:55:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 16:57:00 | \n",
" 16:57:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 16:58:00 | \n",
" 16:58:00 | \n",
"
\n",
" \n",
" 4 | \n",
" 16:59:00 | \n",
" 16:59:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" arrival_time departure_time\n",
"0 16:53:00 16:53:00\n",
"1 16:55:00 16:55:00\n",
"2 16:57:00 16:57:00\n",
"3 16:58:00 16:58:00\n",
"4 16:59:00 16:59:00"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Adding None to first arrival time and last departure time"
+ "We add None to first arrival time and last departure time."
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" route_int | \n",
" trip_int | \n",
" stop_int | \n",
" stop_sequence | \n",
" arrival_time | \n",
" departure_time | \n",
" route_id | \n",
" trip_id | \n",
" stop_id | \n",
" sequence_shift_1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 16:53:00 | \n",
" 16:53:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8587020 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 16:55:00 | \n",
" 16:55:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8595511 | \n",
" 3 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 2 | \n",
" 3 | \n",
" 16:57:00 | \n",
" 16:57:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590603 | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 4 | \n",
" 16:58:00 | \n",
" 16:58:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8591829 | \n",
" 5 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 5 | \n",
" 16:59:00 | \n",
" 16:59:00 | \n",
" 26-301-j19-1 | \n",
" 131.TA.26-301-j19-1.1.R | \n",
" 8590602 | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_int trip_int stop_int stop_sequence arrival_time departure_time \\\n",
"0 0 0 0 1 16:53:00 16:53:00 \n",
"1 0 0 1 2 16:55:00 16:55:00 \n",
"2 0 0 2 3 16:57:00 16:57:00 \n",
"3 0 0 3 4 16:58:00 16:58:00 \n",
"4 0 0 4 5 16:59:00 16:59:00 \n",
"\n",
" route_id trip_id stop_id sequence_shift_1 \n",
"0 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 2 \n",
"1 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 3 \n",
"2 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 4 \n",
"3 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 5 \n",
"4 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 6 "
]
},
- "execution_count": 38,
+ "execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#adding a shift\n",
"stop_times_ordered[\"sequence_shift_1\"] = stop_times_ordered[\"stop_sequence\"].shift(-1, fill_value=0)\n",
"stop_times_ordered.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"stop_times_ordered['departure_time'] = np.where((stop_times_ordered[\"stop_sequence\"] > stop_times_ordered[\"sequence_shift_1\"]), None, stop_times_ordered['departure_time'])"
]
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"stop_times_ordered[\"arrival_time\"] = np.where((stop_times_ordered[\"stop_sequence\"] == 1), None, stop_times_ordered['arrival_time'])"
]
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" arrival_time | \n",
" departure_time | \n",
" stop_sequence | \n",
" sequence_shift_1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" None | \n",
" 16:53:00 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 16:55:00 | \n",
" 16:55:00 | \n",
" 2 | \n",
" 3 | \n",
"
\n",
" \n",
" 2 | \n",
" 16:57:00 | \n",
" 16:57:00 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 16:58:00 | \n",
" 16:58:00 | \n",
" 4 | \n",
" 5 | \n",
"
\n",
" \n",
" 4 | \n",
" 16:59:00 | \n",
" 16:59:00 | \n",
" 5 | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" arrival_time departure_time stop_sequence sequence_shift_1\n",
"0 None 16:53:00 1 2\n",
"1 16:55:00 16:55:00 2 3\n",
"2 16:57:00 16:57:00 3 4\n",
"3 16:58:00 16:58:00 4 5\n",
"4 16:59:00 16:59:00 5 6"
]
},
- "execution_count": 41,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered[[\"arrival_time\",\"departure_time\", \"stop_sequence\", \"sequence_shift_1\"]].head(5)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We transform it in datetime as required by the raptor algorithm"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 58,
"metadata": {},
- "outputs": [
- {
- "ename": "ParserError",
- "evalue": "hour must be in 0..23: 24:04:00",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_build_naive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36m_build_naive\u001b[0;34m(self, res, default)\u001b[0m\n\u001b[1;32m 1240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1241\u001b[0;31m \u001b[0mnaive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mrepl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: hour must be in 0..23",
- "\nThe above exception was the direct cause of the following exception:\n",
- "\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslibs/parsing.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.parsing.parse_datetime_string\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(timestr, parserinfo, **kwargs)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1374\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mDEFAULTPARSER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimestr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1375\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 657\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mParserError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\": %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimestr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 658\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n",
- "\u001b[0;31mParserError\u001b[0m: hour must be in 0..23: 24:04:00",
- "\nDuring handling of the above exception, another exception occurred:\n",
- "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;31mTypeError\u001b[0m: invalid string coercion to datetime",
- "\nDuring handling of the above exception, another exception occurred:\n",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_build_naive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36m_build_naive\u001b[0;34m(self, res, default)\u001b[0m\n\u001b[1;32m 1240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1241\u001b[0;31m \u001b[0mnaive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mrepl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: hour must be in 0..23",
- "\nThe above exception was the direct cause of the following exception:\n",
- "\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'arrival_time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'arrival_time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'departure_time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'departure_time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 722\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtz_localize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtz\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 724\u001b[0;31m \u001b[0mcache_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_maybe_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert_listlike\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 725\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcache_array\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcache_array\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36m_maybe_cache\u001b[0;34m(arg, format, cache, convert_listlike)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0munique_dates\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0munique_dates\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 152\u001b[0;31m \u001b[0mcache_dates\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_listlike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0munique_dates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 153\u001b[0m \u001b[0mcache_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcache_dates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munique_dates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcache_array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 445\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 446\u001b[0m \u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 447\u001b[0;31m \u001b[0mallow_object\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 448\u001b[0m )\n\u001b[1;32m 449\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py\u001b[0m in \u001b[0;36mobjects_to_datetime64ns\u001b[0;34m(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)\u001b[0m\n\u001b[1;32m 1861\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"i8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtz_parsed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1862\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mValueError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1863\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1864\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1865\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtz_parsed\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py\u001b[0m in \u001b[0;36mobjects_to_datetime64ns\u001b[0;34m(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)\u001b[0m\n\u001b[1;32m 1852\u001b[0m \u001b[0mdayfirst\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdayfirst\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1853\u001b[0m \u001b[0myearfirst\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0myearfirst\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1854\u001b[0;31m \u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1855\u001b[0m )\n\u001b[1;32m 1856\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime_object\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime_object\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslibs/parsing.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.parsing.parse_datetime_string\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(timestr, parserinfo, **kwargs)\u001b[0m\n\u001b[1;32m 1372\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparserinfo\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimestr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1374\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mDEFAULTPARSER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimestr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1375\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1376\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_build_naive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 657\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mParserError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\": %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimestr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 658\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 659\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mignoretz\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n",
- "\u001b[0;31mParserError\u001b[0m: hour must be in 0..23: 24:04:00"
- ]
- }
- ],
+ "outputs": [],
"source": [
"stop_times_ordered['arrival_time'] = pd.to_datetime(stop_times_ordered['arrival_time'])\n",
"stop_times_ordered['departure_time'] = pd.to_datetime(stop_times_ordered['departure_time'])"
]
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" arrival_time | \n",
" departure_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 2020-05-20 08:00:00 | \n",
- " 2020-05-20 08:00:00 | \n",
+ " NaT | \n",
+ " 2020-05-21 16:53:00 | \n",
"
\n",
" \n",
" 1 | \n",
- " 2020-05-20 08:01:00 | \n",
- " 2020-05-20 08:01:00 | \n",
+ " 2020-05-21 16:55:00 | \n",
+ " 2020-05-21 16:55:00 | \n",
"
\n",
" \n",
" 2 | \n",
- " 2020-05-20 08:02:00 | \n",
- " 2020-05-20 08:02:00 | \n",
+ " 2020-05-21 16:57:00 | \n",
+ " 2020-05-21 16:57:00 | \n",
"
\n",
" \n",
" 3 | \n",
- " 2020-05-20 08:03:00 | \n",
- " 2020-05-20 08:03:00 | \n",
+ " 2020-05-21 16:58:00 | \n",
+ " 2020-05-21 16:58:00 | \n",
"
\n",
" \n",
" 4 | \n",
- " 2020-05-20 08:04:00 | \n",
- " 2020-05-20 08:04:00 | \n",
+ " 2020-05-21 16:59:00 | \n",
+ " 2020-05-21 16:59:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" arrival_time departure_time\n",
- "0 2020-05-20 08:00:00 2020-05-20 08:00:00\n",
- "1 2020-05-20 08:01:00 2020-05-20 08:01:00\n",
- "2 2020-05-20 08:02:00 2020-05-20 08:02:00\n",
- "3 2020-05-20 08:03:00 2020-05-20 08:03:00\n",
- "4 2020-05-20 08:04:00 2020-05-20 08:04:00"
+ "0 NaT 2020-05-21 16:53:00\n",
+ "1 2020-05-21 16:55:00 2020-05-21 16:55:00\n",
+ "2 2020-05-21 16:57:00 2020-05-21 16:57:00\n",
+ "3 2020-05-21 16:58:00 2020-05-21 16:58:00\n",
+ "4 2020-05-21 16:59:00 2020-05-21 16:59:00"
]
},
- "execution_count": 40,
+ "execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_times_df.pkl','wb') as f: pickle.dump(stop_times_ordered, f)"
]
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" route_int | \n",
" trip_int | \n",
" stop_int | \n",
" stop_sequence | \n",
" arrival_time | \n",
" departure_time | \n",
" route_id | \n",
" trip_id | \n",
" stop_id | \n",
- " pickup_type | \n",
- " drop_off_type | \n",
" sequence_shift_1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
- " 14 | \n",
- " 2020-05-20 08:00:00 | \n",
- " 2020-05-20 08:00:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 15 | \n",
+ " 1 | \n",
+ " NaT | \n",
+ " 2020-05-21 16:53:00 | \n",
+ " 26-301-j19-1 | \n",
+ " 131.TA.26-301-j19-1.1.R | \n",
+ " 8587020 | \n",
+ " 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
- " 15 | \n",
- " 2020-05-20 08:01:00 | \n",
- " 2020-05-20 08:01:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- " 16 | \n",
- " 2020-05-20 08:02:00 | \n",
- " 2020-05-20 08:02:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8573723 | \n",
- " 0 | \n",
- " 0 | \n",
- " 17 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- " 17 | \n",
- " 2020-05-20 08:03:00 | \n",
- " 2020-05-20 08:03:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 18 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- " 18 | \n",
- " 2020-05-20 08:04:00 | \n",
- " 2020-05-20 08:04:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 19 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- " 19 | \n",
- " 2020-05-20 08:05:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 0 | \n",
- " 1 | \n",
- " 5 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:06:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 0 | \n",
- " 1 | \n",
- " 4 | \n",
- " 2 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- " 3 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 0 | \n",
- " 1 | \n",
- " 6 | \n",
- " 4 | \n",
- " 2020-05-20 08:10:00 | \n",
- " 2020-05-20 08:12:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0 | \n",
- " 1 | \n",
- " 7 | \n",
- " 5 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 0 | \n",
- " 1 | \n",
- " 8 | \n",
- " 6 | \n",
- " 2020-05-20 08:17:00 | \n",
- " 2020-05-20 08:17:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572601 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 0 | \n",
- " 1 | \n",
- " 9 | \n",
- " 7 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 0 | \n",
- " 1 | \n",
- " 10 | \n",
- " 8 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 0 | \n",
- " 1 | \n",
- " 11 | \n",
- " 10 | \n",
- " 2020-05-20 08:22:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 0 | \n",
- " 2 | \n",
- " 11 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:07:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 0 | \n",
- " 2 | \n",
- " 10 | \n",
- " 3 | \n",
- " 2020-05-20 08:08:00 | \n",
- " 2020-05-20 08:08:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 0 | \n",
- " 2 | \n",
- " 9 | \n",
- " 4 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 0 | \n",
- " 2 | \n",
- " 8 | \n",
- " 5 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572601 | \n",
- " 0 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 0 | \n",
- " 2 | \n",
- " 7 | \n",
- " 6 | \n",
- " 2020-05-20 08:13:00 | \n",
- " 2020-05-20 08:13:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 0 | \n",
- " 2 | \n",
- " 12 | \n",
- " 7 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572602 | \n",
- " 0 | \n",
- " 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 0 | \n",
- " 2 | \n",
- " 6 | \n",
- " 8 | \n",
- " 2020-05-20 08:16:00 | \n",
- " 2020-05-20 08:17:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 9 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 9 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 10 | \n",
- " 2020-05-20 08:20:00 | \n",
- " 2020-05-20 08:20:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 11 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 0 | \n",
- " 2 | \n",
- " 13 | \n",
- " 11 | \n",
- " 2020-05-20 08:23:00 | \n",
- " 2020-05-20 08:23:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8503598 | \n",
- " 0 | \n",
- " 0 | \n",
- " 12 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 0 | \n",
- " 2 | \n",
- " 14 | \n",
- " 12 | \n",
- " 2020-05-20 08:25:00 | \n",
- " 2020-05-20 08:27:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573720 | \n",
- " 0 | \n",
- " 0 | \n",
- " 13 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 0 | \n",
- " 2 | \n",
- " 13 | \n",
- " 13 | \n",
- " 2020-05-20 08:28:00 | \n",
- " 2020-05-20 08:28:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8503598 | \n",
- " 0 | \n",
- " 0 | \n",
- " 14 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 14 | \n",
- " 2020-05-20 08:30:00 | \n",
- " 2020-05-20 08:30:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 15 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 15 | \n",
- " 2020-05-20 08:31:00 | \n",
- " 2020-05-20 08:31:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 0 | \n",
- " 2 | \n",
- " 2 | \n",
- " 16 | \n",
- " 2020-05-20 08:32:00 | \n",
- " 2020-05-20 08:32:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573723 | \n",
- " 0 | \n",
- " 0 | \n",
- " 17 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " 0 | \n",
- " 2 | \n",
- " 3 | \n",
- " 17 | \n",
- " 2020-05-20 08:33:00 | \n",
- " 2020-05-20 08:33:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 18 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " 0 | \n",
- " 2 | \n",
- " 4 | \n",
- " 18 | \n",
- " 2020-05-20 08:34:00 | \n",
- " 2020-05-20 08:34:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 19 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " 0 | \n",
- " 2 | \n",
- " 5 | \n",
- " 19 | \n",
- " 2020-05-20 08:35:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " 0 | \n",
- " 3 | \n",
- " 5 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:36:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 34 | \n",
- " 0 | \n",
- " 3 | \n",
- " 4 | \n",
- " 2 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 35 | \n",
- " 0 | \n",
- " 3 | \n",
- " 3 | \n",
- " 3 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 36 | \n",
- " 0 | \n",
- " 3 | \n",
- " 6 | \n",
- " 4 | \n",
- " 2020-05-20 08:40:00 | \n",
- " 2020-05-20 08:42:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 37 | \n",
- " 0 | \n",
- " 3 | \n",
- " 7 | \n",
- " 5 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 38 | \n",
- " 0 | \n",
- " 3 | \n",
- " 8 | \n",
- " 6 | \n",
- " 2020-05-20 08:47:00 | \n",
- " 2020-05-20 08:47:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572601 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 39 | \n",
- " 0 | \n",
- " 3 | \n",
- " 9 | \n",
- " 7 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 40 | \n",
- " 0 | \n",
- " 3 | \n",
- " 10 | \n",
- " 8 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 41 | \n",
- " 0 | \n",
- " 3 | \n",
- " 11 | \n",
- " 10 | \n",
- " 2020-05-20 08:52:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 42 | \n",
- " 0 | \n",
- " 4 | \n",
- " 11 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:37:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 43 | \n",
- " 0 | \n",
- " 4 | \n",
- " 10 | \n",
- " 3 | \n",
- " 2020-05-20 08:38:00 | \n",
- " 2020-05-20 08:38:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 44 | \n",
- " 0 | \n",
- " 4 | \n",
- " 9 | \n",
- " 4 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 45 | \n",
- " 0 | \n",
- " 4 | \n",
- " 8 | \n",
- " 5 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572601 | \n",
- " 0 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 46 | \n",
- " 0 | \n",
- " 4 | \n",
- " 7 | \n",
- " 6 | \n",
- " 2020-05-20 08:43:00 | \n",
- " 2020-05-20 08:43:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 47 | \n",
- " 0 | \n",
- " 4 | \n",
- " 12 | \n",
- " 7 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572602 | \n",
- " 0 | \n",
- " 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 48 | \n",
- " 0 | \n",
- " 4 | \n",
- " 6 | \n",
- " 8 | \n",
- " 2020-05-20 08:46:00 | \n",
- " 2020-05-20 08:47:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 9 | \n",
- "
\n",
- " \n",
- " 49 | \n",
- " 0 | \n",
- " 4 | \n",
- " 1 | \n",
- " 9 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 50 | \n",
- " 0 | \n",
- " 4 | \n",
- " 0 | \n",
- " 10 | \n",
- " 2020-05-20 08:50:00 | \n",
- " 2020-05-20 08:50:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 11 | \n",
- "
\n",
- " \n",
- " 51 | \n",
- " 0 | \n",
- " 4 | \n",
- " 13 | \n",
- " 11 | \n",
- " 2020-05-20 08:53:00 | \n",
- " 2020-05-20 08:53:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8503598 | \n",
- " 0 | \n",
- " 0 | \n",
- " 12 | \n",
- "
\n",
- " \n",
- " 52 | \n",
- " 0 | \n",
- " 4 | \n",
- " 14 | \n",
- " 12 | \n",
- " 2020-05-20 08:55:00 | \n",
- " 2020-05-20 08:59:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8573720 | \n",
- " 0 | \n",
- " 0 | \n",
- " 13 | \n",
- "
\n",
- " \n",
- " 53 | \n",
- " 0 | \n",
- " 4 | \n",
- " 13 | \n",
- " 13 | \n",
- " 2020-05-20 09:00:00 | \n",
- " 2020-05-20 09:00:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8503598 | \n",
- " 0 | \n",
- " 0 | \n",
- " 14 | \n",
- "
\n",
- " \n",
- " 54 | \n",
- " 0 | \n",
- " 4 | \n",
- " 0 | \n",
- " 14 | \n",
- " 2020-05-20 09:02:00 | \n",
- " 2020-05-20 09:02:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 15 | \n",
- "
\n",
- " \n",
- " 55 | \n",
- " 0 | \n",
- " 4 | \n",
- " 1 | \n",
- " 15 | \n",
- " 2020-05-20 09:03:00 | \n",
- " 2020-05-20 09:03:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16 | \n",
- "
\n",
- " \n",
- " 56 | \n",
- " 0 | \n",
- " 4 | \n",
- " 2 | \n",
- " 16 | \n",
- " 2020-05-20 09:04:00 | \n",
- " 2020-05-20 09:04:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8573723 | \n",
- " 0 | \n",
- " 0 | \n",
- " 17 | \n",
- "
\n",
- " \n",
- " 57 | \n",
- " 0 | \n",
- " 4 | \n",
- " 3 | \n",
- " 17 | \n",
- " 2020-05-20 09:05:00 | \n",
- " 2020-05-20 09:05:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 18 | \n",
- "
\n",
- " \n",
- " 58 | \n",
- " 0 | \n",
- " 4 | \n",
- " 4 | \n",
- " 18 | \n",
- " 2020-05-20 09:06:00 | \n",
- " 2020-05-20 09:06:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 19 | \n",
- "
\n",
- " \n",
- " 59 | \n",
- " 0 | \n",
- " 4 | \n",
- " 5 | \n",
- " 19 | \n",
- " 2020-05-20 09:07:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " route_int trip_int stop_int stop_sequence arrival_time \\\n",
- "0 0 0 0 14 2020-05-20 08:00:00 \n",
- "1 0 0 1 15 2020-05-20 08:01:00 \n",
- "2 0 0 2 16 2020-05-20 08:02:00 \n",
- "3 0 0 3 17 2020-05-20 08:03:00 \n",
- "4 0 0 4 18 2020-05-20 08:04:00 \n",
- "5 0 0 5 19 2020-05-20 08:05:00 \n",
- "6 0 1 5 1 NaT \n",
- "7 0 1 4 2 2020-05-20 08:07:00 \n",
- "8 0 1 3 3 2020-05-20 08:07:00 \n",
- "9 0 1 6 4 2020-05-20 08:10:00 \n",
- "10 0 1 7 5 2020-05-20 08:15:00 \n",
- "11 0 1 8 6 2020-05-20 08:17:00 \n",
- "12 0 1 9 7 2020-05-20 08:18:00 \n",
- "13 0 1 10 8 2020-05-20 08:18:00 \n",
- "14 0 1 11 10 2020-05-20 08:22:00 \n",
- "15 0 2 11 1 NaT \n",
- "16 0 2 10 3 2020-05-20 08:08:00 \n",
- "17 0 2 9 4 2020-05-20 08:09:00 \n",
- "18 0 2 8 5 2020-05-20 08:09:00 \n",
- "19 0 2 7 6 2020-05-20 08:13:00 \n",
- "20 0 2 12 7 2020-05-20 08:15:00 \n",
- "21 0 2 6 8 2020-05-20 08:16:00 \n",
- "22 0 2 1 9 2020-05-20 08:18:00 \n",
- "23 0 2 0 10 2020-05-20 08:20:00 \n",
- "24 0 2 13 11 2020-05-20 08:23:00 \n",
- "25 0 2 14 12 2020-05-20 08:25:00 \n",
- "26 0 2 13 13 2020-05-20 08:28:00 \n",
- "27 0 2 0 14 2020-05-20 08:30:00 \n",
- "28 0 2 1 15 2020-05-20 08:31:00 \n",
- "29 0 2 2 16 2020-05-20 08:32:00 \n",
- "30 0 2 3 17 2020-05-20 08:33:00 \n",
- "31 0 2 4 18 2020-05-20 08:34:00 \n",
- "32 0 2 5 19 2020-05-20 08:35:00 \n",
- "33 0 3 5 1 NaT \n",
- "34 0 3 4 2 2020-05-20 08:37:00 \n",
- "35 0 3 3 3 2020-05-20 08:37:00 \n",
- "36 0 3 6 4 2020-05-20 08:40:00 \n",
- "37 0 3 7 5 2020-05-20 08:45:00 \n",
- "38 0 3 8 6 2020-05-20 08:47:00 \n",
- "39 0 3 9 7 2020-05-20 08:48:00 \n",
- "40 0 3 10 8 2020-05-20 08:48:00 \n",
- "41 0 3 11 10 2020-05-20 08:52:00 \n",
- "42 0 4 11 1 NaT \n",
- "43 0 4 10 3 2020-05-20 08:38:00 \n",
- "44 0 4 9 4 2020-05-20 08:39:00 \n",
- "45 0 4 8 5 2020-05-20 08:39:00 \n",
- "46 0 4 7 6 2020-05-20 08:43:00 \n",
- "47 0 4 12 7 2020-05-20 08:45:00 \n",
- "48 0 4 6 8 2020-05-20 08:46:00 \n",
- "49 0 4 1 9 2020-05-20 08:48:00 \n",
- "50 0 4 0 10 2020-05-20 08:50:00 \n",
- "51 0 4 13 11 2020-05-20 08:53:00 \n",
- "52 0 4 14 12 2020-05-20 08:55:00 \n",
- "53 0 4 13 13 2020-05-20 09:00:00 \n",
- "54 0 4 0 14 2020-05-20 09:02:00 \n",
- "55 0 4 1 15 2020-05-20 09:03:00 \n",
- "56 0 4 2 16 2020-05-20 09:04:00 \n",
- "57 0 4 3 17 2020-05-20 09:05:00 \n",
- "58 0 4 4 18 2020-05-20 09:06:00 \n",
- "59 0 4 5 19 2020-05-20 09:07:00 \n",
- "\n",
- " departure_time route_id trip_id stop_id \\\n",
- "0 2020-05-20 08:00:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573721 \n",
- "1 2020-05-20 08:01:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573722 \n",
- "2 2020-05-20 08:02:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573723 \n",
- "3 2020-05-20 08:03:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8583071 \n",
- "4 2020-05-20 08:04:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8572603 \n",
- "5 NaT 1-231-j19-1 14.TA.1-231-j19-1.1.H 8502879 \n",
- "6 2020-05-20 08:06:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502879 \n",
- "7 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572603 \n",
- "8 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8583071 \n",
- "9 2020-05-20 08:12:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502955 \n",
- "10 2020-05-20 08:15:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502553 \n",
- "11 2020-05-20 08:17:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572601 \n",
- "12 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572600 \n",
- "13 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8582462 \n",
- "14 NaT 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572747 \n",
- "15 2020-05-20 08:07:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572747 \n",
- "16 2020-05-20 08:08:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8582462 \n",
- "17 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572600 \n",
- "18 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572601 \n",
- "19 2020-05-20 08:13:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502553 \n",
- "20 2020-05-20 08:15:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572602 \n",
- "21 2020-05-20 08:17:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502955 \n",
- "22 2020-05-20 08:18:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n",
- "23 2020-05-20 08:20:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n",
- "24 2020-05-20 08:23:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n",
- "25 2020-05-20 08:27:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573720 \n",
- "26 2020-05-20 08:28:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n",
- "27 2020-05-20 08:30:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n",
- "28 2020-05-20 08:31:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n",
- "29 2020-05-20 08:32:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573723 \n",
- "30 2020-05-20 08:33:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8583071 \n",
- "31 2020-05-20 08:34:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572603 \n",
- "32 NaT 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502879 \n",
- "33 2020-05-20 08:36:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502879 \n",
- "34 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572603 \n",
- "35 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8583071 \n",
- "36 2020-05-20 08:42:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502955 \n",
- "37 2020-05-20 08:45:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502553 \n",
- "38 2020-05-20 08:47:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572601 \n",
- "39 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572600 \n",
- "40 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8582462 \n",
- "41 NaT 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572747 \n",
- "42 2020-05-20 08:37:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572747 \n",
- "43 2020-05-20 08:38:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8582462 \n",
- "44 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572600 \n",
- "45 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572601 \n",
- "46 2020-05-20 08:43:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502553 \n",
- "47 2020-05-20 08:45:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572602 \n",
- "48 2020-05-20 08:47:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502955 \n",
- "49 2020-05-20 08:48:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573722 \n",
- "50 2020-05-20 08:50:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573721 \n",
- "51 2020-05-20 08:53:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8503598 \n",
- "52 2020-05-20 08:59:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573720 \n",
- "53 2020-05-20 09:00:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8503598 \n",
- "54 2020-05-20 09:02:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573721 \n",
- "55 2020-05-20 09:03:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573722 \n",
- "56 2020-05-20 09:04:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573723 \n",
- "57 2020-05-20 09:05:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8583071 \n",
- "58 2020-05-20 09:06:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572603 \n",
- "59 NaT 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502879 \n",
- "\n",
- " pickup_type drop_off_type sequence_shift_1 \n",
- "0 0 0 15 \n",
- "1 0 0 16 \n",
- "2 0 0 17 \n",
- "3 0 0 18 \n",
- "4 0 0 19 \n",
- "5 0 0 1 \n",
- "6 0 0 2 \n",
- "7 0 0 3 \n",
- "8 0 0 4 \n",
- "9 0 0 5 \n",
- "10 0 0 6 \n",
- "11 0 0 7 \n",
- "12 0 0 8 \n",
- "13 0 0 10 \n",
- "14 0 0 1 \n",
- "15 0 0 3 \n",
- "16 0 0 4 \n",
- "17 0 0 5 \n",
- "18 0 0 6 \n",
- "19 0 0 7 \n",
- "20 0 0 8 \n",
- "21 0 0 9 \n",
- "22 0 0 10 \n",
- "23 0 0 11 \n",
- "24 0 0 12 \n",
- "25 0 0 13 \n",
- "26 0 0 14 \n",
- "27 0 0 15 \n",
- "28 0 0 16 \n",
- "29 0 0 17 \n",
- "30 0 0 18 \n",
- "31 0 0 19 \n",
- "32 0 0 1 \n",
- "33 0 0 2 \n",
- "34 0 0 3 \n",
- "35 0 0 4 \n",
- "36 0 0 5 \n",
- "37 0 0 6 \n",
- "38 0 0 7 \n",
- "39 0 0 8 \n",
- "40 0 0 10 \n",
- "41 0 0 1 \n",
- "42 0 0 3 \n",
- "43 0 0 4 \n",
- "44 0 0 5 \n",
- "45 0 0 6 \n",
- "46 0 0 7 \n",
- "47 0 0 8 \n",
- "48 0 0 9 \n",
- "49 0 0 10 \n",
- "50 0 0 11 \n",
- "51 0 0 12 \n",
- "52 0 0 13 \n",
- "53 0 0 14 \n",
- "54 0 0 15 \n",
- "55 0 0 16 \n",
- "56 0 0 17 \n",
- "57 0 0 18 \n",
- "58 0 0 19 \n",
- "59 0 0 1 "
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "stop_times_ordered = stop_times_ordered.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n",
- "stop_times_ordered.head(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 51,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " route_int | \n",
- " trip_int | \n",
- " stop_int | \n",
- " stop_sequence | \n",
- " arrival_time | \n",
- " departure_time | \n",
- " route_id | \n",
- " trip_id | \n",
- " stop_id | \n",
- " pickup_type | \n",
- " drop_off_type | \n",
- " sequence_shift_1 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 14 | \n",
- " 2020-05-20 08:00:00 | \n",
- " 2020-05-20 08:00:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 15 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " 15 | \n",
- " 2020-05-20 08:01:00 | \n",
- " 2020-05-20 08:01:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- " 16 | \n",
- " 2020-05-20 08:02:00 | \n",
- " 2020-05-20 08:02:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8573723 | \n",
- " 0 | \n",
- " 0 | \n",
- " 17 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- " 17 | \n",
- " 2020-05-20 08:03:00 | \n",
- " 2020-05-20 08:03:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 18 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- " 18 | \n",
- " 2020-05-20 08:04:00 | \n",
- " 2020-05-20 08:04:00 | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 19 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- " 19 | \n",
- " 2020-05-20 08:05:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 14.TA.1-231-j19-1.1.H | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 0 | \n",
- " 1 | \n",
- " 5 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:06:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 0 | \n",
- " 1 | \n",
- " 4 | \n",
- " 2 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- " 3 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 2020-05-20 08:07:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 0 | \n",
- " 1 | \n",
- " 6 | \n",
- " 4 | \n",
- " 2020-05-20 08:10:00 | \n",
- " 2020-05-20 08:12:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0 | \n",
- " 1 | \n",
- " 7 | \n",
- " 5 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 0 | \n",
- " 1 | \n",
- " 8 | \n",
- " 6 | \n",
- " 2020-05-20 08:17:00 | \n",
- " 2020-05-20 08:17:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572601 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 0 | \n",
- " 1 | \n",
- " 9 | \n",
- " 7 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 0 | \n",
- " 1 | \n",
- " 10 | \n",
- " 8 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 0 | \n",
- " 1 | \n",
- " 11 | \n",
- " 10 | \n",
- " 2020-05-20 08:22:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 35.TA.1-231-j19-1.2.R | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 0 | \n",
- " 2 | \n",
- " 11 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:07:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 0 | \n",
- " 2 | \n",
- " 10 | \n",
- " 3 | \n",
- " 2020-05-20 08:08:00 | \n",
- " 2020-05-20 08:08:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 0 | \n",
- " 2 | \n",
- " 9 | \n",
- " 4 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 0 | \n",
- " 2 | \n",
- " 8 | \n",
- " 5 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 2020-05-20 08:09:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572601 | \n",
- " 0 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 0 | \n",
- " 2 | \n",
- " 7 | \n",
- " 6 | \n",
- " 2020-05-20 08:13:00 | \n",
- " 2020-05-20 08:13:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 0 | \n",
- " 2 | \n",
- " 12 | \n",
- " 7 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 2020-05-20 08:15:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572602 | \n",
- " 0 | \n",
- " 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 0 | \n",
- " 2 | \n",
- " 6 | \n",
- " 8 | \n",
- " 2020-05-20 08:16:00 | \n",
- " 2020-05-20 08:17:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 9 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 9 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 2020-05-20 08:18:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 10 | \n",
- " 2020-05-20 08:20:00 | \n",
- " 2020-05-20 08:20:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 11 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 0 | \n",
- " 2 | \n",
- " 13 | \n",
- " 11 | \n",
- " 2020-05-20 08:23:00 | \n",
- " 2020-05-20 08:23:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8503598 | \n",
- " 0 | \n",
- " 0 | \n",
- " 12 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 0 | \n",
- " 2 | \n",
- " 14 | \n",
- " 12 | \n",
- " 2020-05-20 08:25:00 | \n",
- " 2020-05-20 08:27:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573720 | \n",
- " 0 | \n",
- " 0 | \n",
- " 13 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 0 | \n",
- " 2 | \n",
- " 13 | \n",
- " 13 | \n",
- " 2020-05-20 08:28:00 | \n",
- " 2020-05-20 08:28:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8503598 | \n",
- " 0 | \n",
- " 0 | \n",
- " 14 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 14 | \n",
- " 2020-05-20 08:30:00 | \n",
- " 2020-05-20 08:30:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573721 | \n",
- " 0 | \n",
- " 0 | \n",
- " 15 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 15 | \n",
- " 2020-05-20 08:31:00 | \n",
- " 2020-05-20 08:31:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
- " 0 | \n",
- " 0 | \n",
- " 16 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 0 | \n",
- " 2 | \n",
- " 2 | \n",
- " 16 | \n",
- " 2020-05-20 08:32:00 | \n",
- " 2020-05-20 08:32:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8573723 | \n",
- " 0 | \n",
- " 0 | \n",
- " 17 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " 0 | \n",
- " 2 | \n",
- " 3 | \n",
- " 17 | \n",
- " 2020-05-20 08:33:00 | \n",
- " 2020-05-20 08:33:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 18 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " 0 | \n",
- " 2 | \n",
- " 4 | \n",
- " 18 | \n",
- " 2020-05-20 08:34:00 | \n",
- " 2020-05-20 08:34:00 | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 19 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " 0 | \n",
- " 2 | \n",
- " 5 | \n",
- " 19 | \n",
- " 2020-05-20 08:35:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 16.TA.1-231-j19-1.1.H | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " 0 | \n",
- " 3 | \n",
- " 5 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:36:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8502879 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 34 | \n",
- " 0 | \n",
- " 3 | \n",
- " 4 | \n",
- " 2 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572603 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 35 | \n",
- " 0 | \n",
- " 3 | \n",
- " 3 | \n",
- " 3 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 2020-05-20 08:37:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8583071 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 36 | \n",
- " 0 | \n",
- " 3 | \n",
- " 6 | \n",
- " 4 | \n",
- " 2020-05-20 08:40:00 | \n",
- " 2020-05-20 08:42:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 37 | \n",
- " 0 | \n",
- " 3 | \n",
- " 7 | \n",
- " 5 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 38 | \n",
- " 0 | \n",
- " 3 | \n",
- " 8 | \n",
- " 6 | \n",
- " 2020-05-20 08:47:00 | \n",
- " 2020-05-20 08:47:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572601 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 39 | \n",
- " 0 | \n",
- " 3 | \n",
- " 9 | \n",
- " 7 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 40 | \n",
- " 0 | \n",
- " 3 | \n",
- " 10 | \n",
- " 8 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 41 | \n",
- " 0 | \n",
- " 3 | \n",
- " 11 | \n",
- " 10 | \n",
- " 2020-05-20 08:52:00 | \n",
- " NaT | \n",
- " 1-231-j19-1 | \n",
- " 37.TA.1-231-j19-1.2.R | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 42 | \n",
- " 0 | \n",
- " 4 | \n",
- " 11 | \n",
- " 1 | \n",
- " NaT | \n",
- " 2020-05-20 08:37:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572747 | \n",
- " 0 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 43 | \n",
- " 0 | \n",
- " 4 | \n",
- " 10 | \n",
- " 3 | \n",
- " 2020-05-20 08:38:00 | \n",
- " 2020-05-20 08:38:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8582462 | \n",
- " 0 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 44 | \n",
- " 0 | \n",
- " 4 | \n",
- " 9 | \n",
- " 4 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572600 | \n",
- " 0 | \n",
- " 0 | \n",
- " 5 | \n",
+ " 2 | \n",
+ " 2020-05-21 16:55:00 | \n",
+ " 2020-05-21 16:55:00 | \n",
+ " 26-301-j19-1 | \n",
+ " 131.TA.26-301-j19-1.1.R | \n",
+ " 8595511 | \n",
+ " 3 | \n",
"
\n",
" \n",
- " 45 | \n",
- " 0 | \n",
- " 4 | \n",
- " 8 | \n",
- " 5 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 2020-05-20 08:39:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572601 | \n",
- " 0 | \n",
+ " 2 | \n",
" 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 46 | \n",
" 0 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 2020-05-21 16:57:00 | \n",
+ " 2020-05-21 16:57:00 | \n",
+ " 26-301-j19-1 | \n",
+ " 131.TA.26-301-j19-1.1.R | \n",
+ " 8590603 | \n",
" 4 | \n",
- " 7 | \n",
- " 6 | \n",
- " 2020-05-20 08:43:00 | \n",
- " 2020-05-20 08:43:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8502553 | \n",
- " 0 | \n",
- " 0 | \n",
- " 7 | \n",
"
\n",
" \n",
- " 47 | \n",
- " 0 | \n",
- " 4 | \n",
- " 12 | \n",
- " 7 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 2020-05-20 08:45:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8572602 | \n",
- " 0 | \n",
+ " 3 | \n",
" 0 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 48 | \n",
" 0 | \n",
+ " 3 | \n",
" 4 | \n",
- " 6 | \n",
- " 8 | \n",
- " 2020-05-20 08:46:00 | \n",
- " 2020-05-20 08:47:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8502955 | \n",
- " 0 | \n",
- " 0 | \n",
- " 9 | \n",
+ " 2020-05-21 16:58:00 | \n",
+ " 2020-05-21 16:58:00 | \n",
+ " 26-301-j19-1 | \n",
+ " 131.TA.26-301-j19-1.1.R | \n",
+ " 8591829 | \n",
+ " 5 | \n",
"
\n",
" \n",
- " 49 | \n",
- " 0 | \n",
- " 4 | \n",
- " 1 | \n",
- " 9 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 2020-05-20 08:48:00 | \n",
- " 1-231-j19-1 | \n",
- " 18.TA.1-231-j19-1.1.H | \n",
- " 8573722 | \n",
+ " 4 | \n",
" 0 | \n",
" 0 | \n",
- " 10 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 2020-05-21 16:59:00 | \n",
+ " 2020-05-21 16:59:00 | \n",
+ " 26-301-j19-1 | \n",
+ " 131.TA.26-301-j19-1.1.R | \n",
+ " 8590602 | \n",
+ " 6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " route_int trip_int stop_int stop_sequence arrival_time \\\n",
- "0 0 0 0 14 2020-05-20 08:00:00 \n",
- "1 0 0 1 15 2020-05-20 08:01:00 \n",
- "2 0 0 2 16 2020-05-20 08:02:00 \n",
- "3 0 0 3 17 2020-05-20 08:03:00 \n",
- "4 0 0 4 18 2020-05-20 08:04:00 \n",
- "5 0 0 5 19 2020-05-20 08:05:00 \n",
- "6 0 1 5 1 NaT \n",
- "7 0 1 4 2 2020-05-20 08:07:00 \n",
- "8 0 1 3 3 2020-05-20 08:07:00 \n",
- "9 0 1 6 4 2020-05-20 08:10:00 \n",
- "10 0 1 7 5 2020-05-20 08:15:00 \n",
- "11 0 1 8 6 2020-05-20 08:17:00 \n",
- "12 0 1 9 7 2020-05-20 08:18:00 \n",
- "13 0 1 10 8 2020-05-20 08:18:00 \n",
- "14 0 1 11 10 2020-05-20 08:22:00 \n",
- "15 0 2 11 1 NaT \n",
- "16 0 2 10 3 2020-05-20 08:08:00 \n",
- "17 0 2 9 4 2020-05-20 08:09:00 \n",
- "18 0 2 8 5 2020-05-20 08:09:00 \n",
- "19 0 2 7 6 2020-05-20 08:13:00 \n",
- "20 0 2 12 7 2020-05-20 08:15:00 \n",
- "21 0 2 6 8 2020-05-20 08:16:00 \n",
- "22 0 2 1 9 2020-05-20 08:18:00 \n",
- "23 0 2 0 10 2020-05-20 08:20:00 \n",
- "24 0 2 13 11 2020-05-20 08:23:00 \n",
- "25 0 2 14 12 2020-05-20 08:25:00 \n",
- "26 0 2 13 13 2020-05-20 08:28:00 \n",
- "27 0 2 0 14 2020-05-20 08:30:00 \n",
- "28 0 2 1 15 2020-05-20 08:31:00 \n",
- "29 0 2 2 16 2020-05-20 08:32:00 \n",
- "30 0 2 3 17 2020-05-20 08:33:00 \n",
- "31 0 2 4 18 2020-05-20 08:34:00 \n",
- "32 0 2 5 19 2020-05-20 08:35:00 \n",
- "33 0 3 5 1 NaT \n",
- "34 0 3 4 2 2020-05-20 08:37:00 \n",
- "35 0 3 3 3 2020-05-20 08:37:00 \n",
- "36 0 3 6 4 2020-05-20 08:40:00 \n",
- "37 0 3 7 5 2020-05-20 08:45:00 \n",
- "38 0 3 8 6 2020-05-20 08:47:00 \n",
- "39 0 3 9 7 2020-05-20 08:48:00 \n",
- "40 0 3 10 8 2020-05-20 08:48:00 \n",
- "41 0 3 11 10 2020-05-20 08:52:00 \n",
- "42 0 4 11 1 NaT \n",
- "43 0 4 10 3 2020-05-20 08:38:00 \n",
- "44 0 4 9 4 2020-05-20 08:39:00 \n",
- "45 0 4 8 5 2020-05-20 08:39:00 \n",
- "46 0 4 7 6 2020-05-20 08:43:00 \n",
- "47 0 4 12 7 2020-05-20 08:45:00 \n",
- "48 0 4 6 8 2020-05-20 08:46:00 \n",
- "49 0 4 1 9 2020-05-20 08:48:00 \n",
+ " route_int trip_int stop_int stop_sequence arrival_time \\\n",
+ "0 0 0 0 1 NaT \n",
+ "1 0 0 1 2 2020-05-21 16:55:00 \n",
+ "2 0 0 2 3 2020-05-21 16:57:00 \n",
+ "3 0 0 3 4 2020-05-21 16:58:00 \n",
+ "4 0 0 4 5 2020-05-21 16:59:00 \n",
"\n",
- " departure_time route_id trip_id stop_id \\\n",
- "0 2020-05-20 08:00:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573721 \n",
- "1 2020-05-20 08:01:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573722 \n",
- "2 2020-05-20 08:02:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573723 \n",
- "3 2020-05-20 08:03:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8583071 \n",
- "4 2020-05-20 08:04:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8572603 \n",
- "5 NaT 1-231-j19-1 14.TA.1-231-j19-1.1.H 8502879 \n",
- "6 2020-05-20 08:06:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502879 \n",
- "7 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572603 \n",
- "8 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8583071 \n",
- "9 2020-05-20 08:12:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502955 \n",
- "10 2020-05-20 08:15:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502553 \n",
- "11 2020-05-20 08:17:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572601 \n",
- "12 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572600 \n",
- "13 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8582462 \n",
- "14 NaT 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572747 \n",
- "15 2020-05-20 08:07:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572747 \n",
- "16 2020-05-20 08:08:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8582462 \n",
- "17 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572600 \n",
- "18 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572601 \n",
- "19 2020-05-20 08:13:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502553 \n",
- "20 2020-05-20 08:15:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572602 \n",
- "21 2020-05-20 08:17:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502955 \n",
- "22 2020-05-20 08:18:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n",
- "23 2020-05-20 08:20:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n",
- "24 2020-05-20 08:23:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n",
- "25 2020-05-20 08:27:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573720 \n",
- "26 2020-05-20 08:28:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n",
- "27 2020-05-20 08:30:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n",
- "28 2020-05-20 08:31:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n",
- "29 2020-05-20 08:32:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573723 \n",
- "30 2020-05-20 08:33:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8583071 \n",
- "31 2020-05-20 08:34:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572603 \n",
- "32 NaT 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502879 \n",
- "33 2020-05-20 08:36:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502879 \n",
- "34 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572603 \n",
- "35 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8583071 \n",
- "36 2020-05-20 08:42:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502955 \n",
- "37 2020-05-20 08:45:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502553 \n",
- "38 2020-05-20 08:47:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572601 \n",
- "39 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572600 \n",
- "40 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8582462 \n",
- "41 NaT 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572747 \n",
- "42 2020-05-20 08:37:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572747 \n",
- "43 2020-05-20 08:38:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8582462 \n",
- "44 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572600 \n",
- "45 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572601 \n",
- "46 2020-05-20 08:43:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502553 \n",
- "47 2020-05-20 08:45:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572602 \n",
- "48 2020-05-20 08:47:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502955 \n",
- "49 2020-05-20 08:48:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573722 \n",
+ " departure_time route_id trip_id stop_id \\\n",
+ "0 2020-05-21 16:53:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 \n",
+ "1 2020-05-21 16:55:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 \n",
+ "2 2020-05-21 16:57:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 \n",
+ "3 2020-05-21 16:58:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 \n",
+ "4 2020-05-21 16:59:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 \n",
"\n",
- " pickup_type drop_off_type sequence_shift_1 \n",
- "0 0 0 15 \n",
- "1 0 0 16 \n",
- "2 0 0 17 \n",
- "3 0 0 18 \n",
- "4 0 0 19 \n",
- "5 0 0 1 \n",
- "6 0 0 2 \n",
- "7 0 0 3 \n",
- "8 0 0 4 \n",
- "9 0 0 5 \n",
- "10 0 0 6 \n",
- "11 0 0 7 \n",
- "12 0 0 8 \n",
- "13 0 0 10 \n",
- "14 0 0 1 \n",
- "15 0 0 3 \n",
- "16 0 0 4 \n",
- "17 0 0 5 \n",
- "18 0 0 6 \n",
- "19 0 0 7 \n",
- "20 0 0 8 \n",
- "21 0 0 9 \n",
- "22 0 0 10 \n",
- "23 0 0 11 \n",
- "24 0 0 12 \n",
- "25 0 0 13 \n",
- "26 0 0 14 \n",
- "27 0 0 15 \n",
- "28 0 0 16 \n",
- "29 0 0 17 \n",
- "30 0 0 18 \n",
- "31 0 0 19 \n",
- "32 0 0 1 \n",
- "33 0 0 2 \n",
- "34 0 0 3 \n",
- "35 0 0 4 \n",
- "36 0 0 5 \n",
- "37 0 0 6 \n",
- "38 0 0 7 \n",
- "39 0 0 8 \n",
- "40 0 0 10 \n",
- "41 0 0 1 \n",
- "42 0 0 3 \n",
- "43 0 0 4 \n",
- "44 0 0 5 \n",
- "45 0 0 6 \n",
- "46 0 0 7 \n",
- "47 0 0 8 \n",
- "48 0 0 9 \n",
- "49 0 0 10 "
+ " sequence_shift_1 \n",
+ "0 2 \n",
+ "1 3 \n",
+ "2 4 \n",
+ "3 5 \n",
+ "4 6 "
]
},
- "execution_count": 51,
+ "execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "stop_times_ordered.head(50)"
+ "stop_times_ordered = stop_times_ordered.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n",
+ "stop_times_ordered.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "And we transform it to array, ready ti be used by raptor"
]
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([['2020-05-20T08:00:00.000000000', '2020-05-20T08:00:00.000000000'],\n",
- " ['2020-05-20T08:01:00.000000000', '2020-05-20T08:01:00.000000000'],\n",
- " ['2020-05-20T08:02:00.000000000', '2020-05-20T08:02:00.000000000'],\n",
+ "array([[ 'NaT', '2020-05-21T16:53:00.000000000'],\n",
+ " ['2020-05-21T16:55:00.000000000', '2020-05-21T16:55:00.000000000'],\n",
+ " ['2020-05-21T16:57:00.000000000', '2020-05-21T16:57:00.000000000'],\n",
" ...,\n",
- " ['2020-05-20T20:44:00.000000000', '2020-05-20T20:46:00.000000000'],\n",
- " ['2020-05-20T18:53:00.000000000', 'NaT'],\n",
- " [ 'NaT', 'NaT']],\n",
+ " ['2020-05-21T15:10:00.000000000', 'NaT'],\n",
+ " [ 'NaT', '2020-05-21T16:45:00.000000000'],\n",
+ " ['2020-05-21T17:05:00.000000000', 'NaT']],\n",
" dtype='datetime64[ns]')"
]
},
- "execution_count": 44,
+ "execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_array = stop_times_ordered[[\"arrival_time\", \"departure_time\"]].to_numpy()\n",
"stop_times_array"
]
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "247920"
+ "245738"
]
},
- "execution_count": 45,
+ "execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(stop_times_array,0)"
]
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_times_array.pkl','wb') as f: pickle.dump(stop_times_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Routes: [[route0_nr. Stops, route0_nr.Trips, route0_pointerStops, route0_pointerTrip],[ route1_nr. Stops, route1_nr. Trips, route1_pointerStops, route1_pointerTrip],…]"
+ "#### Routes: \n",
+ "[[route0_nr.Trips, route0_nr. Stops, route0_pointerRoutes, route0_pointerStops_times],[route1_nr.Trips, route1_nr. Stops,, route1_pointerRoutes, route1_pointerStops_times],…]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We start by getting the number of trips and stops there is for each route"
]
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
- " n_stops | \n",
" n_Trips | \n",
+ " n_stops | \n",
"
\n",
" \n",
" route_int | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 18 | \n",
- " 58 | \n",
+ " 1 | \n",
+ " 11 | \n",
"
\n",
" \n",
" 1 | \n",
- " 3 | \n",
- " 101 | \n",
+ " 1 | \n",
+ " 11 | \n",
"
\n",
" \n",
" 2 | \n",
- " 10 | \n",
- " 58 | \n",
+ " 1 | \n",
+ " 11 | \n",
"
\n",
" \n",
" 3 | \n",
- " 2 | \n",
- " 3 | \n",
+ " 1 | \n",
+ " 11 | \n",
"
\n",
" \n",
" 4 | \n",
- " 6 | \n",
- " 23 | \n",
+ " 1 | \n",
+ " 11 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " n_stops n_Trips\n",
+ " n_Trips n_stops\n",
"route_int \n",
- "0 18 58\n",
- "1 3 101\n",
- "2 10 58\n",
- "3 2 3\n",
- "4 6 23"
+ "0 1 11\n",
+ "1 1 11\n",
+ "2 1 11\n",
+ "3 1 11\n",
+ "4 1 11"
]
},
- "execution_count": 47,
+ "execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "distinct_trips_stops = stop_times_curated.groupby([\"route_int\"]).nunique()[[\"stop_int\", \"trip_int\"]].sort_index().rename(columns={\"trip_int\": \"n_Trips\", \"stop_int\": \"n_stops\"})\n",
+ "distinct_trips_stops = stop_times_ordered.groupby([\"route_int\"]).nunique()[[\"trip_int\",\"stop_int\"]].sort_index().rename(columns={\"trip_int\": \"n_Trips\", \"stop_int\": \"n_stops\"})\n",
"distinct_trips_stops.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "(279, 2)"
+ "(16210, 2)"
]
},
- "execution_count": 46,
+ "execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops.shape"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We create the pointer for the route stops, by adding the unique stops for each route"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
- " n_stops | \n",
" n_Trips | \n",
- " Cumsum_stops | \n",
- " Cumsum_trips | \n",
+ " n_stops | \n",
+ " pointer_routes_stops | \n",
"
\n",
" \n",
" route_int | \n",
" | \n",
" | \n",
" | \n",
- " | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 18 | \n",
- " 58 | \n",
- " 0 | \n",
+ " 1 | \n",
+ " 11 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
- " 3 | \n",
- " 101 | \n",
- " 18 | \n",
- " 58 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 11 | \n",
"
\n",
" \n",
" 2 | \n",
- " 10 | \n",
- " 58 | \n",
- " 21 | \n",
- " 159 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 22 | \n",
"
\n",
" \n",
" 3 | \n",
- " 2 | \n",
- " 3 | \n",
- " 31 | \n",
- " 217 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 33 | \n",
"
\n",
" \n",
" 4 | \n",
- " 6 | \n",
- " 23 | \n",
- " 33 | \n",
- " 220 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 44 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " n_stops n_Trips Cumsum_stops Cumsum_trips\n",
- "route_int \n",
- "0 18 58 0 0\n",
- "1 3 101 18 58\n",
- "2 10 58 21 159\n",
- "3 2 3 31 217\n",
- "4 6 23 33 220"
+ " n_Trips n_stops pointer_routes_stops\n",
+ "route_int \n",
+ "0 1 11 0\n",
+ "1 1 11 11\n",
+ "2 1 11 22\n",
+ "3 1 11 33\n",
+ "4 1 11 44"
]
},
- "execution_count": 47,
+ "execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "distinct_trips_stops['Cumsum_stops'] = distinct_trips_stops.n_stops.cumsum().shift(1, fill_value=0)\n",
- "distinct_trips_stops['Cumsum_trips'] = distinct_trips_stops.n_Trips.cumsum().shift(1, fill_value=0)\n",
+ "distinct_trips_stops['pointer_routes_stops'] = distinct_trips_stops.n_stops.cumsum().shift(1, fill_value=0)\n",
"distinct_trips_stops.head(5)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We create the pointer for stop_times by adding the number of stops in each route, counting duplicates (due to several trips)"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 68,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Int64Index: 279 entries, 0 to 278\n",
- "Data columns (total 4 columns):\n",
- " # Column Non-Null Count Dtype\n",
- "--- ------ -------------- -----\n",
- " 0 n_stops 279 non-null int64\n",
- " 1 n_Trips 279 non-null int64\n",
- " 2 Cumsum_stops 279 non-null int64\n",
- " 3 Cumsum_trips 279 non-null int64\n",
- "dtypes: int64(4)\n",
- "memory usage: 10.9 KB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "distinct_trips_stops.info()"
+ "distinct_trips_stops[\"pointer_stop_times\"] = (stop_times_ordered.groupby([\"route_int\"]).count().stop_id).cumsum().shift(1, fill_value=0)"
]
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
- " n_stops | \n",
" n_Trips | \n",
- " Cumsum_stops | \n",
- " Cumsum_trips | \n",
- " Cumsum_stops_shift | \n",
- " Cumsum_trips_shift | \n",
+ " n_stops | \n",
+ " pointer_routes_stops | \n",
+ " pointer_stop_times | \n",
+ " pointer_routes_stops_shift | \n",
+ " pointer_stop_times_shift | \n",
"
\n",
" \n",
" route_int | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 18 | \n",
- " 58 | \n",
+ " 1 | \n",
+ " 11 | \n",
" 0 | \n",
" 0 | \n",
- " 18 | \n",
- " 58 | \n",
+ " 11 | \n",
+ " 11 | \n",
"
\n",
" \n",
" 1 | \n",
- " 3 | \n",
- " 101 | \n",
- " 18 | \n",
- " 58 | \n",
- " 21 | \n",
- " 159 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 22 | \n",
+ " 22 | \n",
"
\n",
" \n",
" 2 | \n",
- " 10 | \n",
- " 58 | \n",
- " 21 | \n",
- " 159 | \n",
- " 31 | \n",
- " 217 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 22 | \n",
+ " 22 | \n",
+ " 33 | \n",
+ " 33 | \n",
"
\n",
" \n",
" 3 | \n",
- " 2 | \n",
- " 3 | \n",
- " 31 | \n",
- " 217 | \n",
+ " 1 | \n",
+ " 11 | \n",
" 33 | \n",
- " 220 | \n",
+ " 33 | \n",
+ " 44 | \n",
+ " 44 | \n",
"
\n",
" \n",
" 4 | \n",
- " 6 | \n",
- " 23 | \n",
- " 33 | \n",
- " 220 | \n",
- " 39 | \n",
- " 243 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 44 | \n",
+ " 44 | \n",
+ " 55 | \n",
+ " 55 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " n_stops n_Trips Cumsum_stops Cumsum_trips Cumsum_stops_shift \\\n",
- "route_int \n",
- "0 18 58 0 0 18 \n",
- "1 3 101 18 58 21 \n",
- "2 10 58 21 159 31 \n",
- "3 2 3 31 217 33 \n",
- "4 6 23 33 220 39 \n",
+ " n_Trips n_stops pointer_routes_stops pointer_stop_times \\\n",
+ "route_int \n",
+ "0 1 11 0 0 \n",
+ "1 1 11 11 11 \n",
+ "2 1 11 22 22 \n",
+ "3 1 11 33 33 \n",
+ "4 1 11 44 44 \n",
"\n",
- " Cumsum_trips_shift \n",
- "route_int \n",
- "0 58 \n",
- "1 159 \n",
- "2 217 \n",
- "3 220 \n",
- "4 243 "
+ " pointer_routes_stops_shift pointer_stop_times_shift \n",
+ "route_int \n",
+ "0 11 11 \n",
+ "1 22 22 \n",
+ "2 33 33 \n",
+ "3 44 44 \n",
+ "4 55 55 "
]
},
- "execution_count": 49,
+ "execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "distinct_trips_stops[\"Cumsum_stops_shift\"] = distinct_trips_stops['Cumsum_stops'].shift(-1, fill_value=0)\n",
- "distinct_trips_stops[\"Cumsum_trips_shift\"] = distinct_trips_stops['Cumsum_trips'].shift(-1, fill_value=0)\n",
+ "distinct_trips_stops[\"pointer_routes_stops_shift\"] = distinct_trips_stops['pointer_routes_stops'].shift(-1, fill_value=0)\n",
+ "distinct_trips_stops[\"pointer_stop_times_shift\"] = distinct_trips_stops['pointer_stop_times'].shift(-1, fill_value=0)\n",
"distinct_trips_stops.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
- "distinct_trips_stops['Cumsum_trips'] = np.where((distinct_trips_stops[\"Cumsum_trips\"] == distinct_trips_stops[\"Cumsum_trips_shift\"]), None, distinct_trips_stops['Cumsum_trips'])\n",
- "distinct_trips_stops['Cumsum_stops'] = np.where((distinct_trips_stops[\"Cumsum_stops\"] == distinct_trips_stops[\"Cumsum_stops_shift\"]), None, distinct_trips_stops['Cumsum_stops'])\n"
+ "distinct_trips_stops['pointer_routes_stops'] = np.where((distinct_trips_stops[\"pointer_routes_stops\"] == distinct_trips_stops[\"pointer_routes_stops_shift\"]), None, distinct_trips_stops['pointer_routes_stops'])\n",
+ "distinct_trips_stops['pointer_stop_times'] = np.where((distinct_trips_stops[\"pointer_stop_times\"] == distinct_trips_stops[\"pointer_stop_times_shift\"]), None, distinct_trips_stops['pointer_stop_times'])\n"
]
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "n_stops False\n",
- "n_Trips False\n",
- "Cumsum_stops False\n",
- "Cumsum_trips False\n",
- "Cumsum_stops_shift False\n",
- "Cumsum_trips_shift False\n",
+ "n_Trips False\n",
+ "n_stops False\n",
+ "pointer_routes_stops False\n",
+ "pointer_stop_times False\n",
+ "pointer_routes_stops_shift False\n",
+ "pointer_stop_times_shift False\n",
"dtype: bool"
]
},
- "execution_count": 51,
+ "execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_trips_stops.isna().any()"
]
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
- "with open('../data/routes_array_df.pkl','wb') as f: pickle.dump(distinct_trips_stops[['n_Trips', 'n_stops', 'Cumsum_trips', 'Cumsum_stops']], f)"
+ "with open('../data/routes_array_df.pkl','wb') as f: pickle.dump(distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']], f)"
]
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([[58, 18, 0, 0],\n",
- " [101, 3, 58, 18],\n",
- " [58, 10, 159, 21],\n",
+ "array([[1, 11, 0, 0],\n",
+ " [1, 11, 11, 11],\n",
+ " [1, 11, 22, 22],\n",
" ...,\n",
- " [2, 2, 22803, 3278],\n",
- " [1, 1, 22805, 3280],\n",
- " [1, 1, 22806, 3281]], dtype=object)"
+ " [1, 6, 237432, 245713],\n",
+ " [1, 13, 237438, 245719],\n",
+ " [3, 2, 237451, 245732]], dtype=object)"
]
},
- "execution_count": 53,
+ "execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "routes_array = distinct_trips_stops[['n_Trips', 'n_stops', 'Cumsum_trips', 'Cumsum_stops']].to_numpy()\n",
+ "routes_array = distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']].to_numpy()\n",
"routes_array"
]
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "279"
+ "16210"
]
},
- "execution_count": 54,
+ "execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(routes_array, 0)"
]
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/routes_array.pkl','wb') as f: pickle.dump(routes_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"RouteStops: [route0_stop0, route0_stop1,…, route1_stop0, route1_stop1,…, …]\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 57,
+ "execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
+ " index | \n",
" route_int | \n",
" stop_int | \n",
"
\n",
" \n",
" \n",
" \n",
- " 137807 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
" 0 | \n",
- " 5 | \n",
"
\n",
" \n",
- " 63951 | \n",
+ " 1 | \n",
+ " 1 | \n",
" 0 | \n",
- " 11 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " 212369 | \n",
+ " 2 | \n",
+ " 2 | \n",
" 0 | \n",
- " 14 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " 137809 | \n",
+ " 3 | \n",
+ " 3 | \n",
" 0 | \n",
- " 4 | \n",
+ " 3 | \n",
"
\n",
" \n",
- " 212370 | \n",
+ " 4 | \n",
+ " 4 | \n",
" 0 | \n",
- " 13 | \n",
+ " 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
- "text/plain": [
- " route_int stop_int\n",
- "137807 0 5\n",
- "63951 0 11\n",
- "212369 0 14\n",
- "137809 0 4\n",
- "212370 0 13"
+ "text/plain": [
+ " index route_int stop_int\n",
+ "0 0 0 0\n",
+ "1 1 0 1\n",
+ "2 2 0 2\n",
+ "3 3 0 3\n",
+ "4 4 0 4"
]
},
- "execution_count": 57,
+ "execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "route_stops = stop_times_curated.sort_values([\"route_int\", \"stop_sequence\"])\n",
- "route_stops = route_stops[['route_int', 'stop_int']].drop_duplicates()\n",
+ "route_stops = stop_times_ordered.sort_values([\"route_int\", \"stop_sequence\"])\n",
+ "route_stops = route_stops[['route_int', 'stop_int']].drop_duplicates().reset_index()\n",
"route_stops.head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": 77,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
- "Int64Index: 3282 entries, 137807 to 239684\n",
- "Data columns (total 2 columns):\n",
- " # Column Non-Null Count Dtype\n",
- "--- ------ -------------- -----\n",
- " 0 route_int 3282 non-null int64\n",
- " 1 stop_int 3282 non-null int64\n",
- "dtypes: int64(2)\n",
- "memory usage: 76.9 KB\n"
+ "RangeIndex: 237453 entries, 0 to 237452\n",
+ "Data columns (total 3 columns):\n",
+ " # Column Non-Null Count Dtype\n",
+ "--- ------ -------------- -----\n",
+ " 0 index 237453 non-null int64\n",
+ " 1 route_int 237453 non-null int64\n",
+ " 2 stop_int 237453 non-null int64\n",
+ "dtypes: int64(3)\n",
+ "memory usage: 5.4 MB\n"
]
}
],
"source": [
"route_stops.info()"
]
},
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "279"
+ "16210"
]
},
- "execution_count": 59,
+ "execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops.route_int.nunique()"
]
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/route_stops_df.pkl','wb') as f: pickle.dump(route_stops, f)"
]
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([ 5, 11, 14, ..., 1192, 1069, 1069])"
+ "array([ 0, 1, 2, ..., 1187, 573, 778])"
]
},
- "execution_count": 61,
+ "execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops_array = route_stops.stop_int.to_numpy()\n",
"route_stops_array"
]
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "1566"
+ "1407"
]
},
- "execution_count": 62,
+ "execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(np.unique(route_stops_array))"
]
},
{
"cell_type": "code",
- "execution_count": 63,
+ "execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "3282"
+ "237453"
]
},
- "execution_count": 63,
+ "execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(route_stops_array, 0)"
]
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "(3282,)"
+ "(237453,)"
]
},
- "execution_count": 64,
+ "execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops_array.shape"
]
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/route_stops_array.pkl','wb') as f: pickle.dump(route_stops_array, f)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Check if pointers are correct\n",
+ "It is fundamental that the indexes, that serve as pointers, in Routes are correct"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We start by looking at where the indexes for stop_times and route_stops diverge. This will allow us to change. We can see that Route stops should have a new route at 318 while stop_times should have it at 348, so we try with that"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " n_Trips | \n",
+ " n_stops | \n",
+ " pointer_routes_stops | \n",
+ " pointer_stop_times | \n",
+ " pointer_routes_stops_shift | \n",
+ " pointer_stop_times_shift | \n",
+ "
\n",
+ " \n",
+ " route_int | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 24 | \n",
+ " 11 | \n",
+ " 3 | \n",
+ " 315 | \n",
+ " 315 | \n",
+ " 318 | \n",
+ " 348 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 318 | \n",
+ " 348 | \n",
+ " 332 | \n",
+ " 362 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 12 | \n",
+ " 2 | \n",
+ " 332 | \n",
+ " 362 | \n",
+ " 334 | \n",
+ " 386 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 15 | \n",
+ " 3 | \n",
+ " 334 | \n",
+ " 386 | \n",
+ " 337 | \n",
+ " 431 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 10 | \n",
+ " 3 | \n",
+ " 337 | \n",
+ " 431 | \n",
+ " 340 | \n",
+ " 461 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " n_Trips n_stops pointer_routes_stops pointer_stop_times \\\n",
+ "route_int \n",
+ "24 11 3 315 315 \n",
+ "25 1 14 318 348 \n",
+ "26 12 2 332 362 \n",
+ "27 15 3 334 386 \n",
+ "28 10 3 337 431 \n",
+ "\n",
+ " pointer_routes_stops_shift pointer_stop_times_shift \n",
+ "route_int \n",
+ "24 318 348 \n",
+ "25 332 362 \n",
+ "26 334 386 \n",
+ "27 337 431 \n",
+ "28 340 461 "
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "distinct_trips_stops.loc[24:29].head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can check if the pointer indicates the routes index number. At the pointer_routes should indicate the first stop of a new route. We try with 318 to see if route_stops has a new route at this index. It does so it works"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " index | \n",
+ " route_int | \n",
+ " stop_int | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 315 | \n",
+ " 315 | \n",
+ " 24 | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ " 316 | \n",
+ " 321 | \n",
+ " 24 | \n",
+ " 27 | \n",
+ "
\n",
+ " \n",
+ " 317 | \n",
+ " 316 | \n",
+ " 24 | \n",
+ " 26 | \n",
+ "
\n",
+ " \n",
+ " 318 | \n",
+ " 348 | \n",
+ " 25 | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ " 319 | \n",
+ " 349 | \n",
+ " 25 | \n",
+ " 26 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index route_int stop_int\n",
+ "315 315 24 25\n",
+ "316 321 24 27\n",
+ "317 316 24 26\n",
+ "318 348 25 25\n",
+ "319 349 25 26"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "route_stops.loc[315:320].head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We go and see if stop_times has a new route at 348. It does, so it works"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " route_int | \n",
+ " trip_int | \n",
+ " stop_int | \n",
+ " stop_sequence | \n",
+ " arrival_time | \n",
+ " departure_time | \n",
+ " route_id | \n",
+ " trip_id | \n",
+ " stop_id | \n",
+ " sequence_shift_1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 346 | \n",
+ " 24 | \n",
+ " 34 | \n",
+ " 26 | \n",
+ " 2 | \n",
+ " 2020-05-21 18:53:00 | \n",
+ " 2020-05-21 18:53:00 | \n",
+ " 30-57-Y-j19-1 | \n",
+ " 27.TA.30-57-Y-j19-1.1.H | \n",
+ " 8502209 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 347 | \n",
+ " 24 | \n",
+ " 34 | \n",
+ " 27 | \n",
+ " 3 | \n",
+ " 2020-05-21 18:59:00 | \n",
+ " 2020-05-21 18:59:00 | \n",
+ " 30-57-Y-j19-1 | \n",
+ " 27.TA.30-57-Y-j19-1.1.H | \n",
+ " 8503202 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 348 | \n",
+ " 25 | \n",
+ " 35 | \n",
+ " 25 | \n",
+ " 3 | \n",
+ " 2020-05-21 13:14:00 | \n",
+ " 2020-05-21 13:15:00 | \n",
+ " 26-24-j19-1 | \n",
+ " 662.TA.26-24-j19-1.254.R | \n",
+ " 8502208 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 349 | \n",
+ " 25 | \n",
+ " 35 | \n",
+ " 26 | \n",
+ " 4 | \n",
+ " 2020-05-21 13:17:00 | \n",
+ " 2020-05-21 13:17:00 | \n",
+ " 26-24-j19-1 | \n",
+ " 662.TA.26-24-j19-1.254.R | \n",
+ " 8502209 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 350 | \n",
+ " 25 | \n",
+ " 35 | \n",
+ " 27 | \n",
+ " 5 | \n",
+ " 2020-05-21 13:21:00 | \n",
+ " 2020-05-21 13:21:00 | \n",
+ " 26-24-j19-1 | \n",
+ " 662.TA.26-24-j19-1.254.R | \n",
+ " 8503202 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " route_int trip_int stop_int stop_sequence arrival_time \\\n",
+ "346 24 34 26 2 2020-05-21 18:53:00 \n",
+ "347 24 34 27 3 2020-05-21 18:59:00 \n",
+ "348 25 35 25 3 2020-05-21 13:14:00 \n",
+ "349 25 35 26 4 2020-05-21 13:17:00 \n",
+ "350 25 35 27 5 2020-05-21 13:21:00 \n",
+ "\n",
+ " departure_time route_id trip_id stop_id \\\n",
+ "346 2020-05-21 18:53:00 30-57-Y-j19-1 27.TA.30-57-Y-j19-1.1.H 8502209 \n",
+ "347 2020-05-21 18:59:00 30-57-Y-j19-1 27.TA.30-57-Y-j19-1.1.H 8503202 \n",
+ "348 2020-05-21 13:15:00 26-24-j19-1 662.TA.26-24-j19-1.254.R 8502208 \n",
+ "349 2020-05-21 13:17:00 26-24-j19-1 662.TA.26-24-j19-1.254.R 8502209 \n",
+ "350 2020-05-21 13:21:00 26-24-j19-1 662.TA.26-24-j19-1.254.R 8503202 \n",
+ "\n",
+ " sequence_shift_1 \n",
+ "346 3 \n",
+ "347 3 \n",
+ "348 4 \n",
+ "349 5 \n",
+ "350 6 "
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "stop_times_ordered.loc[346:400].head(5)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Stops: [[stop0_pointerRoutes, stop0_pointerTransfer], [stop1_pointerRoutes, stop1_pointerTransfer], …]"
]
},
{
"cell_type": "code",
- "execution_count": 67,
+ "execution_count": 88,
"metadata": {},
"outputs": [
{
- "ename": "ValueError",
- "evalue": "You are trying to merge on int64 and object columns. If you wish to proceed you should use pd.concat",
+ "ename": "KeyError",
+ "evalue": "\"None of ['stop_int'] are in the columns\"",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstops_join\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroute_stops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransfers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"stop_id\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"left\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"stop_int\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop_duplicates\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mstops_join\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[1;32m 7207\u001b[0m \"\"\"\n\u001b[1;32m 7208\u001b[0m return self._join_compat(\n\u001b[0;32m-> 7209\u001b[0;31m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhow\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlsuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrsuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7210\u001b[0m )\n\u001b[1;32m 7211\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_join_compat\u001b[0;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[1;32m 7230\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7231\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrsuffix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 7232\u001b[0;31m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7233\u001b[0m )\n\u001b[1;32m 7234\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mindicator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindicator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0mvalidate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m )\n\u001b[1;32m 88\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 629\u001b[0m \u001b[0;31m# validate the merge keys dtypes. We may need to coerce\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[0;31m# to avoid incompat dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 631\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_coerce_merge_keys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 632\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 633\u001b[0m \u001b[0;31m# If argument passed to validate,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m_maybe_coerce_merge_keys\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1144\u001b[0m \u001b[0minferred_right\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstring_types\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0minferred_left\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstring_types\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1145\u001b[0m ):\n\u001b[0;32m-> 1146\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1148\u001b[0m \u001b[0;31m# datetimelikes must match exactly\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: You are trying to merge on int64 and object columns. If you wish to proceed you should use pd.concat"
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstops_join\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroute_stops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransfers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"stop_int\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"left\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"stop_int\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop_duplicates\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mstops_join\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mset_index\u001b[0;34m(self, keys, drop, append, inplace, verify_integrity)\u001b[0m\n\u001b[1;32m 4301\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4302\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmissing\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4303\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of {missing} are in the columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4304\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4305\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: \"None of ['stop_int'] are in the columns\""
]
}
],
"source": [
"stops_join = route_stops.join(transfers.set_index(\"stop_int\"), how=\"left\", on=\"stop_int\").drop_duplicates()\n",
"stops_join.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'stops_join' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstops_join\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnunique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'stops_join' is not defined"
]
}
],
"source": [
"stops_join.stop_int.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" n_Routes | \n",
" n_Transfers | \n",
"
\n",
" \n",
" stop_int | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 6 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 7 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 7 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 7 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 7 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Routes n_Transfers\n",
"stop_int \n",
"0 6 2\n",
"1 1 7\n",
"2 1 7\n",
"3 1 7\n",
"4 1 7"
]
},
"execution_count": 198,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers = stops_join.groupby([\"stop_int\"]).nunique().rename(columns={\"route_int\": \"n_Routes\", \"stop_int2\": \"n_Transfers\"})\n",
"distinct_route_transfers = distinct_route_transfers[[\"n_Routes\", \"n_Transfers\"]].sort_index().rename(columns={\"route_int\": \"n_Trips\", \"stop_int2\": \"n_stops\"})\n",
"distinct_route_transfers.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 199,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1566, 2)"
]
},
"execution_count": 199,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers.shape"
]
},
{
"cell_type": "code",
"execution_count": 200,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" n_Routes | \n",
" n_Transfers | \n",
" Cumsum_routes | \n",
" Cumsum_transfers | \n",
"
\n",
" \n",
" stop_int | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 6 | \n",
" 2 | \n",
" 6 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 7 | \n",
" 7 | \n",
" 9 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 7 | \n",
" 8 | \n",
" 16 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 7 | \n",
" 9 | \n",
" 23 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 7 | \n",
" 10 | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Routes n_Transfers Cumsum_routes Cumsum_transfers\n",
"stop_int \n",
"0 6 2 6 2\n",
"1 1 7 7 9\n",
"2 1 7 8 16\n",
"3 1 7 9 23\n",
"4 1 7 10 30"
]
},
"execution_count": 200,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers['Cumsum_routes'] = distinct_route_transfers.n_Routes.cumsum().shift(1, fill_value=0)\n",
"distinct_route_transfers['Cumsum_transfers'] = distinct_route_transfers.n_Transfers.cumsum().shift(1, fill_value=0)\n",
"distinct_route_transfers.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 201,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" n_Routes | \n",
" n_Transfers | \n",
" Cumsum_routes | \n",
" Cumsum_transfers | \n",
" Cumsum_routes_shift | \n",
" Cumsum_transfers_shift | \n",
"
\n",
" \n",
" stop_int | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 6 | \n",
" 2 | \n",
" 6 | \n",
" 2 | \n",
" 7 | \n",
" 9 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 7 | \n",
" 7 | \n",
" 9 | \n",
" 8 | \n",
" 16 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 7 | \n",
" 8 | \n",
" 16 | \n",
" 9 | \n",
" 23 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 7 | \n",
" 9 | \n",
" 23 | \n",
" 10 | \n",
" 30 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 7 | \n",
" 10 | \n",
" 30 | \n",
" 11 | \n",
" 33 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n_Routes n_Transfers Cumsum_routes Cumsum_transfers \\\n",
"stop_int \n",
"0 6 2 6 2 \n",
"1 1 7 7 9 \n",
"2 1 7 8 16 \n",
"3 1 7 9 23 \n",
"4 1 7 10 30 \n",
"\n",
" Cumsum_routes_shift Cumsum_transfers_shift \n",
"stop_int \n",
"0 7 9 \n",
"1 8 16 \n",
"2 9 23 \n",
"3 10 30 \n",
"4 11 33 "
]
},
"execution_count": 201,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers[\"Cumsum_routes_shift\"] = distinct_route_transfers['Cumsum_routes'].shift(-1, fill_value=0)\n",
"distinct_route_transfers[\"Cumsum_transfers_shift\"] = distinct_route_transfers['Cumsum_transfers'].shift(-1, fill_value=0)\n",
"distinct_route_transfers.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 202,
"metadata": {},
"outputs": [],
"source": [
"distinct_route_transfers['Cumsum_routes'] = np.where((distinct_route_transfers[\"Cumsum_routes\"] == distinct_route_transfers[\"Cumsum_routes_shift\"]), None, distinct_route_transfers['Cumsum_routes'])\n",
"distinct_route_transfers['Cumsum_transfers'] = np.where((distinct_route_transfers[\"Cumsum_transfers\"] == distinct_route_transfers[\"Cumsum_transfers_shift\"]), None, distinct_route_transfers['Cumsum_transfers'])\n"
]
},
{
"cell_type": "code",
"execution_count": 203,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"n_Routes False\n",
"n_Transfers False\n",
"Cumsum_routes False\n",
"Cumsum_transfers True\n",
"Cumsum_routes_shift False\n",
"Cumsum_transfers_shift False\n",
"dtype: bool"
]
},
"execution_count": 203,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distinct_route_transfers.isna().any()"
]
},
{
"cell_type": "code",
"execution_count": 204,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stops_df.pkl','wb') as f: pickle.dump(distinct_route_transfers[['Cumsum_routes', 'Cumsum_transfers']], f)"
]
},
{
"cell_type": "code",
"execution_count": 205,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[6, 2],\n",
" [7, 9],\n",
" [8, 16],\n",
" ...,\n",
" [3280, 10566],\n",
" [3281, 10571],\n",
" [3282, 10575]], dtype=object)"
]
},
"execution_count": 205,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stops_array = distinct_route_transfers[['Cumsum_routes', 'Cumsum_transfers']].to_numpy()\n",
"stops_array"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1566"
]
},
"execution_count": 206,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(stops_array, 0)"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1566, 2)"
]
},
"execution_count": 228,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stops_array.shape"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stops_array.pkl','wb') as f: pickle.dump(stops_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"StopRoutes: [stop0_route1, stop0_route3, stop1_route1, stop2_route1, stop1_route4, …]"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" route_int | \n",
" stop_int | \n",
"
\n",
" \n",
" \n",
" \n",
" 137807 | \n",
" 0 | \n",
" 5 | \n",
"
\n",
" \n",
" 63951 | \n",
" 0 | \n",
" 11 | \n",
"
\n",
" \n",
" 212369 | \n",
" 0 | \n",
" 14 | \n",
"
\n",
" \n",
" 137809 | \n",
" 0 | \n",
" 4 | \n",
"
\n",
" \n",
" 212370 | \n",
" 0 | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_int stop_int\n",
"137807 0 5\n",
"63951 0 11\n",
"212369 0 14\n",
"137809 0 4\n",
"212370 0 13"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"route_stops.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" route_id | \n",
" route_int | \n",
" stop_id | \n",
" stop_int | \n",
"
\n",
" \n",
" \n",
" \n",
" 49208 | \n",
" 1-231-j19-1 | \n",
" 0 | \n",
" 8573721 | \n",
" 0 | \n",
"
\n",
" \n",
" 49209 | \n",
" 1-231-j19-1 | \n",
" 0 | \n",
" 8573722 | \n",
" 1 | \n",
"
\n",
" \n",
" 49210 | \n",
" 1-231-j19-1 | \n",
" 0 | \n",
" 8573723 | \n",
" 2 | \n",
"
\n",
" \n",
" 49211 | \n",
" 1-231-j19-1 | \n",
" 0 | \n",
" 8583071 | \n",
" 3 | \n",
"
\n",
" \n",
" 49212 | \n",
" 1-231-j19-1 | \n",
" 0 | \n",
" 8572603 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" route_id route_int stop_id stop_int\n",
"49208 1-231-j19-1 0 8573721 0\n",
"49209 1-231-j19-1 0 8573722 1\n",
"49210 1-231-j19-1 0 8573723 2\n",
"49211 1-231-j19-1 0 8583071 3\n",
"49212 1-231-j19-1 0 8572603 4"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes = stop_times_curated[[\"route_id\", \"route_int\", \"stop_id\", \"stop_int\"]].drop_duplicates().sort_values([\"stop_int\", \"route_int\"])\n",
"stop_routes.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3282, 4)"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes.shape"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"279"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_times_curated.route_id.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"279"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes.route_int.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_routes_df.pkl','wb') as f: pickle.dump(stop_routes, f)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 0, 0, ..., 274, 275, 275])"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes_array = stop_routes[\"route_int\"].to_numpy()\n",
"stop_routes_array"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3282"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(stop_routes_array, 0)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3282,)"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stop_routes_array.shape"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_routes_array.pkl','wb') as f: pickle.dump(stop_routes_array, f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Transfer: [[[stop0_nameTargetStop1, transferTime1], [stop0_nameTargetStop2, transferTime2],….], [stop1_nameTargetStop1, transferTime1], [stop1_nameTargetStop2, transferTime2],….],…]"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stop_int2 | \n",
" Transfer_time_sec | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1166 | \n",
" 146 | \n",
"
\n",
" \n",
" 1 | \n",
" 1270 | \n",
" 360 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 8 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 16 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_int2 Transfer_time_sec\n",
"0 1166 146\n",
"1 1270 360\n",
"2 2 8\n",
"3 3 16\n",
"4 4 0"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfer_pandas = transfers.sort_values(\"stop_int\")[[\"stop_int2\", \"Transfer_time_sec\"]]\n",
"transfer_pandas.head()"
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1794"
]
},
"execution_count": 226,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfer_pandas.stop_int2.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 225,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/transfer_df.pkl','wb') as f: pickle.dump(transfers.sort_values(\"stop_id\"), f)"
]
},
{
"cell_type": "code",
"execution_count": 219,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1166, 146],\n",
" [1270, 360],\n",
" [ 2, 8],\n",
" ...,\n",
" [ 108, 371],\n",
" [ 102, 439],\n",
" [1739, 519]])"
]
},
"execution_count": 219,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transfer_array = transfer_pandas.to_numpy()\n",
"transfer_array"
]
},
{
"cell_type": "code",
"execution_count": 220,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/transfer_array.pkl','wb') as f: pickle.dump(transfer_array, f)"
]
},
{
"cell_type": "code",
"execution_count": 221,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12564"
]
},
"execution_count": 221,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.size(transfer_array, 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"read files as pickles"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/stop_times_array.pkl','rb') as f: arrayname1 = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"with open('../data/transfer_array.pkl','rb') as f: arrayname2 = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 'NaT', '2020-05-19T08:49:00.000000000'],\n",
" ['2020-05-19T08:50:00.000000000', '2020-05-19T08:50:00.000000000'],\n",
" ['2020-05-19T08:51:00.000000000', '2020-05-19T08:51:00.000000000'],\n",
" ...,\n",
" [ 'NaT', '2020-05-19T16:07:00.000000000'],\n",
" ['2020-05-19T16:11:00.000000000', '2020-05-19T16:15:00.000000000'],\n",
" ['2020-05-19T16:19:00.000000000', 'NaT']],\n",
" dtype='datetime64[ns]')"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arrayname1"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1166, 146],\n",
" [1270, 360],\n",
" [ 2, 8],\n",
" ...,\n",
" [ 108, 371],\n",
" [ 102, 439],\n",
" [1739, 519]])"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arrayname2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}