diff --git a/data/route_stops_array.pkl b/data/route_stops_array.pkl index 68960bd..6103926 100644 --- a/data/route_stops_array.pkl +++ b/data/route_stops_array.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be9cd21f1b21a5e3202f8cb41841ff83fb0fb142692bd61d3196796a78507397 -size 44519 +oid sha256:ec9af358ad5e0802d624b2795ac20f3282fb689c21752d71f9fef698cbc3785b +size 1899785 diff --git a/data/route_stops_df.pkl b/data/route_stops_df.pkl index 7a010da..9634efe 100644 --- a/data/route_stops_df.pkl +++ b/data/route_stops_df.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c999884bd261c116631fa0ff45b911784fc919f8d27aa8dbc500530a17815bd -size 133926 +oid sha256:517e26beb718385fc4a5651fb5460ca7c61aa7a578f0a771427ef0415eff521c +size 5699733 diff --git a/data/routes_array.pkl b/data/routes_array.pkl index 29c8320..dd063c1 100644 --- a/data/routes_array.pkl +++ b/data/routes_array.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb0794e9424d5343abc3729d91f99226ca19e8e580dfbe1953c66d4a7bef0820 -size 2920 +oid sha256:76c403daeb38714084b99e1ecc0dd151ac2977d3445120df958715ebaf3478d8 +size 206451 diff --git a/data/routes_array_df.pkl b/data/routes_array_df.pkl index 0ece1e5..47b1efb 100644 --- a/data/routes_array_df.pkl +++ b/data/routes_array_df.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04b5fd2e0173563e9cf6658de02c1aa90e05940fd740a44c038671580c9c8fc0 -size 12713 +oid sha256:e642f9776ecd768916550628e7262f98eca451d84dab969419ed8f1c28c3ad7b +size 531467 diff --git a/data/stop_times_array.pkl b/data/stop_times_array.pkl index 866b9f9..b54a600 100644 --- a/data/stop_times_array.pkl +++ b/data/stop_times_array.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e847bbbc6a9e7bf078cd3ba4595752a54969aed3b7ca2dae32176cb2e61d8b1 -size 3966905 +oid sha256:9a5b61142e94d00f3e247423d80655e35601baffb7d63810446145e37bcdf775 +size 3931993 diff --git a/data/stop_times_df.pkl b/data/stop_times_df.pkl index d5fd6af..a230267 100644 --- a/data/stop_times_df.pkl +++ b/data/stop_times_df.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58a87e63f73f61e935780c4150c62ff27d1f0a2d55ed9f8457a2c8353caf3c65 -size 26154587 +oid sha256:83ef53159dcc6ed2394199f68d68424290b9ee10981ada263dee4c6b0282e1ab +size 21899011 diff --git a/notebooks/Arrays_to_pickle.ipynb b/notebooks/Arrays_to_pickle.ipynb index b5f9be9..2fdeb9c 100644 --- a/notebooks/Arrays_to_pickle.ipynb +++ b/notebooks/Arrays_to_pickle.ipynb @@ -1,7506 +1,5868 @@ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Transform csv preproccesed files to pickle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Import packages" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import pickle\n", "import itertools" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read files" ] }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0trip_idstop_idarrival_timedeparture_timestop_sequencepickup_typedrop_off_typehour_departureroute_iddirection_id
00666.TA.26-4-j19-1.20.R857618207:02:0007:02:001007.026-4-j19-11
11243.TA.26-311-j19-1.3.R859083407:16:0007:16:001007.026-311-j19-11
22406.TA.26-62-j19-1.3.R859134907:24:0007:24:001007.026-62-j19-11
3362.TA.57-2-Y-j19-1.33.H8503000:0:1307:34:0007:34:001007.057-2-Y-j19-10
441179.TA.26-5-B-j19-1.23.R859124507:36:0007:36:001007.026-5-B-j19-11
\n", "
" ], "text/plain": [ " Unnamed: 0 trip_id stop_id arrival_time \\\n", "0 0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 \n", "1 1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 \n", "2 2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 \n", "3 3 62.TA.57-2-Y-j19-1.33.H 8503000:0:13 07:34:00 \n", "4 4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 \n", "\n", " departure_time stop_sequence pickup_type drop_off_type hour_departure \\\n", "0 07:02:00 1 0 0 7.0 \n", "1 07:16:00 1 0 0 7.0 \n", "2 07:24:00 1 0 0 7.0 \n", "3 07:34:00 1 0 0 7.0 \n", "4 07:36:00 1 0 0 7.0 \n", "\n", " route_id direction_id \n", "0 26-4-j19-1 1 \n", "1 26-311-j19-1 1 \n", "2 26-62-j19-1 1 \n", "3 57-2-Y-j19-1 0 \n", "4 26-5-B-j19-1 1 " ] }, - "execution_count": 62, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#stop_times\n", "stop_times_curated = pd.read_csv(\"../data/stop_times_curated.csv\")\n", "stop_times_curated.head(5)" ] }, { "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,\n", - " nan])" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "stop_times_curated.hour_departure.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17',\n", - " '18', '19', '24', '25'], dtype=object)" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "stop_times_curated.departure_time.str.slice(0,2).unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 63, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0stop_idstop_id2distanceTransfer_time_sec
00850092685906160.122430146
11850092685907370.300175360
2285021868502186:0:10.0067628
3385021868502186:0:20.01352416
4485021868502186P0.0000000
\n", "
" ], "text/plain": [ " Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec\n", "0 0 8500926 8590616 0.122430 146\n", "1 1 8500926 8590737 0.300175 360\n", "2 2 8502186 8502186:0:1 0.006762 8\n", "3 3 8502186 8502186:0:2 0.013524 16\n", "4 4 8502186 8502186P 0.000000 0" ] }, - "execution_count": 63, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#transfers\n", "transfers = pd.read_csv(\"../data/transfers.csv\")\n", "transfers.head(5)" ] }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#routes\n", "routes_curated = pd.read_csv(\"../data/transfers.csv\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create stop_id same for all platforms" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#copy information stop_id with platform in stop_id_raw\n", "stop_times_curated[\"stop_id_raw\"] = stop_times_curated[\"stop_id\"]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0trip_idstop_idarrival_timedeparture_timestop_sequencepickup_typedrop_off_typehour_departureroute_iddirection_idstop_id_raw
00666.TA.26-4-j19-1.20.R857618207:02:0007:02:001007.026-4-j19-118576182
11243.TA.26-311-j19-1.3.R859083407:16:0007:16:001007.026-311-j19-118590834
22406.TA.26-62-j19-1.3.R859134907:24:0007:24:001007.026-62-j19-118591349
3362.TA.57-2-Y-j19-1.33.H850300007:34:0007:34:001007.057-2-Y-j19-108503000:0:13
441179.TA.26-5-B-j19-1.23.R859124507:36:0007:36:001007.026-5-B-j19-118591245
\n", "
" ], "text/plain": [ " Unnamed: 0 trip_id stop_id arrival_time departure_time \\\n", "0 0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 07:02:00 \n", "1 1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 07:16:00 \n", "2 2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 07:24:00 \n", "3 3 62.TA.57-2-Y-j19-1.33.H 8503000 07:34:00 07:34:00 \n", "4 4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 07:36:00 \n", "\n", " stop_sequence pickup_type drop_off_type hour_departure route_id \\\n", "0 1 0 0 7.0 26-4-j19-1 \n", "1 1 0 0 7.0 26-311-j19-1 \n", "2 1 0 0 7.0 26-62-j19-1 \n", "3 1 0 0 7.0 57-2-Y-j19-1 \n", "4 1 0 0 7.0 26-5-B-j19-1 \n", "\n", " direction_id stop_id_raw \n", "0 1 8576182 \n", "1 1 8590834 \n", "2 1 8591349 \n", "3 0 8503000:0:13 \n", "4 1 8591245 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Use only first 7 characters for stop_id\n", "stop_times_curated[\"stop_id\"] = stop_times_curated[\"stop_id_raw\"].str.slice(0, 7)\n", "stop_times_curated[\"stop_id\"] = pd.to_numeric(stop_times_curated[\"stop_id\"])\n", "stop_times_curated.head(5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#copy information stop_id with platform in stop_id_raw\n", "transfers[\"stop_id_raw\"] = transfers[\"stop_id\"]\n", "transfers[\"stop_id2_raw\"] = transfers[\"stop_id2\"]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0stop_idstop_id2distanceTransfer_time_secstop_id_rawstop_id2_raw
00850092685906160.12243014685009268590616
11850092685907370.30017536085009268590737
22850218685021860.006762885021868502186:0:1
33850218685021860.0135241685021868502186:0:2
44850218685021860.000000085021868502186P
\n", "
" ], "text/plain": [ " Unnamed: 0 stop_id stop_id2 distance Transfer_time_sec stop_id_raw \\\n", "0 0 8500926 8590616 0.122430 146 8500926 \n", "1 1 8500926 8590737 0.300175 360 8500926 \n", "2 2 8502186 8502186 0.006762 8 8502186 \n", "3 3 8502186 8502186 0.013524 16 8502186 \n", "4 4 8502186 8502186 0.000000 0 8502186 \n", "\n", " stop_id2_raw \n", "0 8590616 \n", "1 8590737 \n", "2 8502186:0:1 \n", "3 8502186:0:2 \n", "4 8502186P " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Use only first 7 characters for stop_id\n", "transfers[\"stop_id\"] = transfers[\"stop_id_raw\"].str.slice(0, 7)\n", "transfers[\"stop_id2\"] = transfers[\"stop_id2_raw\"].str.slice(0, 7)\n", "transfers[\"stop_id\"] = pd.to_numeric(transfers[\"stop_id\"])\n", "transfers[\"stop_id2\"] = pd.to_numeric(transfers[\"stop_id2\"])\n", "transfers.head(5)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Solve prblem in hour_departure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that hour departure does not contain times above 23, so we need to correct this" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,\n", + " nan])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_times_curated.hour_departure.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17',\n", + " '18', '19', '24', '25'], dtype=object)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_times_curated.departure_time.str.slice(0,2).unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "stop_times_curated[\"hour_departure\"] = pd.to_numeric(stop_times_curated.departure_time.str.slice(0,2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if well converted to int" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_times_curated[\"hour_departure\"].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We drop the trips before 7 am and after 7 pm" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "250777" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_times_curated.trip_id.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "trip_id_drop = np.where(((stop_times_curated.hour_departure > 19) |\\\n", + " (stop_times_curated.hour_departure < 7)),\\\n", + " stop_times_curated[\"trip_id\"] , None)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "stop_times_curated = stop_times_curated[~stop_times_curated[\"trip_id\"].isin(trip_id_drop)]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "246514" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_times_curated.trip_id.count()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Delete trips with 1 option\n", "To finish" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "#drop columns not useful or source of wrong manipulations in \"to_begin_with\"\n", "stop_times_curated = stop_times_curated.drop(columns=[\"Unnamed: 0\", \"hour_departure\", \"drop_off_type\", \"pickup_type\"])" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
trip_idstop_idarrival_timedeparture_timestop_sequenceroute_iddirection_idstop_id_raw
trip_id
1.TA.1-231-j19-1.1.H1151717181115
1.TA.1-44-j19-1.1.R13333113
1.TA.1-444-j19-1.1.H19999119
1.TA.12-E03-j19-1.1.H12222112
1.TA.18-46-j19-1.1.H11111111
\n", "
" ], "text/plain": [ " trip_id stop_id arrival_time departure_time \\\n", "trip_id \n", "1.TA.1-231-j19-1.1.H 1 15 17 17 \n", "1.TA.1-44-j19-1.1.R 1 3 3 3 \n", "1.TA.1-444-j19-1.1.H 1 9 9 9 \n", "1.TA.12-E03-j19-1.1.H 1 2 2 2 \n", "1.TA.18-46-j19-1.1.H 1 1 1 1 \n", "\n", " stop_sequence route_id direction_id stop_id_raw \n", "trip_id \n", "1.TA.1-231-j19-1.1.H 18 1 1 15 \n", "1.TA.1-44-j19-1.1.R 3 1 1 3 \n", "1.TA.1-444-j19-1.1.H 9 1 1 9 \n", "1.TA.12-E03-j19-1.1.H 2 1 1 2 \n", "1.TA.18-46-j19-1.1.H 1 1 1 1 " ] }, - "execution_count": 51, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#drop trips with only 1 stop\n", "number_stop = stop_times_curated.groupby('trip_id').nunique()\n", "number_stop.head(5)" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "#get trips with 1 stop\n", "trip_with_1_stop = np.where((number_stop.stop_id == 1), number_stop.index, None)" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[None None None ... None None None]\n" ] } ], "source": [ "np.set_printoptions(threshold=50)\n", "print(trip_with_1_stop)" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19800" + "19444" ] }, - "execution_count": 45, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_curated.trip_id.nunique()" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "#drop trips with only 1 stop\n", "stop_times_curated = stop_times_curated[~stop_times_curated[\"trip_id\"].isin(trip_with_1_stop)]" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19800" + "18668" ] }, - "execution_count": 47, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_curated.trip_id.nunique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add information on transport type\n", "To do" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create route_int, trip_int and stop_int as consecutive integer IDs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Route_int\n", "The route_int Id is given arbitrarily" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
stop_id
trip_id
1.TA.1-231-j19-1.1.H(8583071, 8502553, 8572600, 8573722, 8572747, ...
1.TA.1-44-j19-1.1.R(8590279, 8590275, 8591891)
1.TA.1-444-j19-1.1.H(8591365, 8502979, 8591059, 8581346, 8572596, ...
1.TA.12-E03-j19-1.1.H(8573205, 8596126)
1.TA.18-46-j19-1.1.H(8503000,)1.TA.21-23-j19-1.1.R(8503003, 8503000)
\n", "
" ], "text/plain": [ " stop_id\n", "trip_id \n", "1.TA.1-231-j19-1.1.H (8583071, 8502553, 8572600, 8573722, 8572747, ...\n", "1.TA.1-44-j19-1.1.R (8590279, 8590275, 8591891)\n", "1.TA.1-444-j19-1.1.H (8591365, 8502979, 8591059, 8581346, 8572596, ...\n", "1.TA.12-E03-j19-1.1.H (8573205, 8596126)\n", - "1.TA.18-46-j19-1.1.H (8503000,)" + "1.TA.21-23-j19-1.1.R (8503003, 8503000)" ] }, - "execution_count": 9, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#group stops into a sequence\n", "tuple_stops = stop_times_curated.groupby('trip_id')['stop_id'].apply(tuple).to_frame()\n", "tuple_stops.head(5)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", " \n", "
stop_id
(8500926, 8590602, 8595511, 8590737, 8591829, 8590618, 8587020, 8590739, 8590614, 8590603, 8590615)
(8500926, 8590603, 8587020, 8590615, 8590739, 8591829, 8590602, 8590618, 8590737, 8595511, 8590614)
(8500926, 8590603, 8587020, 8590739, 8590618, 8590737, 8590614, 8590615, 8590602, 8591829, 8595511)
(8500926, 8590603, 8590739, 8595511, 8590614, 8590737, 8590618, 8587020, 8591829, 8590615, 8590602)
(8500926, 8590618, 8590737, 8590739, 8587020, 8590602, 8590615, 8595511, 8591829, 8590614, 8590603)
(8500926, 8590737, 8590603, 8590614, 8590615, 8591829, 8595511, 8590739, 8587020, 8590618, 8590602)
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: []\n", - "Index: [(8500926, 8590602, 8595511, 8590737, 8591829, 8590618, 8587020, 8590739, 8590614, 8590603, 8590615), (8500926, 8590603, 8587020, 8590615, 8590739, 8591829, 8590602, 8590618, 8590737, 8595511, 8590614), (8500926, 8590603, 8587020, 8590739, 8590618, 8590737, 8590614, 8590615, 8590602, 8591829, 8595511), (8500926, 8590603, 8590739, 8595511, 8590614, 8590737, 8590618, 8587020, 8591829, 8590615, 8590602), (8500926, 8590618, 8590737, 8590739, 8587020, 8590602, 8590615, 8595511, 8591829, 8590614, 8590603)]" + "Index: [(8500926, 8590602, 8595511, 8590737, 8591829, 8590618, 8587020, 8590739, 8590614, 8590603, 8590615), (8500926, 8590603, 8587020, 8590615, 8590739, 8591829, 8590602, 8590618, 8590737, 8595511, 8590614), (8500926, 8590603, 8590739, 8595511, 8590614, 8590737, 8590618, 8587020, 8591829, 8590615, 8590602), (8500926, 8590618, 8590737, 8590739, 8587020, 8590602, 8590615, 8595511, 8591829, 8590614, 8590603), (8500926, 8590737, 8590603, 8590614, 8590615, 8591829, 8595511, 8590739, 8587020, 8590618, 8590602)]" ] }, - "execution_count": 10, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#group to get unique stop sequences\n", "unique_stop_sequence = tuple_stops.groupby(\"stop_id\").count()\n", "unique_stop_sequence.head(5)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", "
stop_idroute_int
0(8500926, 8590602, 8595511, 8590737, 8591829, ...0
1(8500926, 8590603, 8587020, 8590615, 8590739, ...1
2(8500926, 8590603, 8587020, 8590739, 8590618, ...(8500926, 8590603, 8590739, 8595511, 8590614, ...2
3(8500926, 8590603, 8590739, 8595511, 8590614, ...(8500926, 8590618, 8590737, 8590739, 8587020, ...3
4(8500926, 8590618, 8590737, 8590739, 8587020, ...(8500926, 8590737, 8590603, 8590614, 8590615, ...4
\n", "
" ], "text/plain": [ " stop_id route_int\n", "0 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0\n", "1 (8500926, 8590603, 8587020, 8590615, 8590739, ... 1\n", - "2 (8500926, 8590603, 8587020, 8590739, 8590618, ... 2\n", - "3 (8500926, 8590603, 8590739, 8595511, 8590614, ... 3\n", - "4 (8500926, 8590618, 8590737, 8590739, 8587020, ... 4" + "2 (8500926, 8590603, 8590739, 8595511, 8590614, ... 2\n", + "3 (8500926, 8590618, 8590737, 8590739, 8587020, ... 3\n", + "4 (8500926, 8590737, 8590603, 8590614, 8590615, ... 4" ] }, - "execution_count": 11, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#create dataframe and route_int\n", "df_unique_stop_sequence = unique_stop_sequence.reset_index()\n", "df_unique_stop_sequence[\"route_int\"] = df_unique_stop_sequence.index\n", "df_unique_stop_sequence.head(5)" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", "
stop_idroute_int
trip_id
131.TA.26-301-j19-1.1.R(8500926, 8590602, 8595511, 8590737, 8591829, ...0
32.TA.26-301-j19-1.1.R(8500926, 8590603, 8587020, 8590615, 8590739, ...1
143.TA.26-301-j19-1.1.R(8500926, 8590603, 8587020, 8590739, 8590618, ...2
133.TA.26-301-j19-1.1.R(8500926, 8590603, 8590739, 8595511, 8590614, ...32
31.TA.26-301-j19-1.1.R(8500926, 8590618, 8590737, 8590739, 8587020, ...3
140.TA.26-301-j19-1.1.R(8500926, 8590737, 8590603, 8590614, 8590615, ...4
\n", "
" ], "text/plain": [ " stop_id \\\n", "trip_id \n", "131.TA.26-301-j19-1.1.R (8500926, 8590602, 8595511, 8590737, 8591829, ... \n", "32.TA.26-301-j19-1.1.R (8500926, 8590603, 8587020, 8590615, 8590739, ... \n", - "143.TA.26-301-j19-1.1.R (8500926, 8590603, 8587020, 8590739, 8590618, ... \n", "133.TA.26-301-j19-1.1.R (8500926, 8590603, 8590739, 8595511, 8590614, ... \n", "31.TA.26-301-j19-1.1.R (8500926, 8590618, 8590737, 8590739, 8587020, ... \n", + "140.TA.26-301-j19-1.1.R (8500926, 8590737, 8590603, 8590614, 8590615, ... \n", "\n", " route_int \n", "trip_id \n", "131.TA.26-301-j19-1.1.R 0 \n", "32.TA.26-301-j19-1.1.R 1 \n", - "143.TA.26-301-j19-1.1.R 2 \n", - "133.TA.26-301-j19-1.1.R 3 \n", - "31.TA.26-301-j19-1.1.R 4 " + "133.TA.26-301-j19-1.1.R 2 \n", + "31.TA.26-301-j19-1.1.R 3 \n", + "140.TA.26-301-j19-1.1.R 4 " ] }, - "execution_count": 12, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#join with trip information\n", "trip_with_routes = tuple_stops.join(df_unique_stop_sequence.set_index(\"stop_id\"), on=\"stop_id\", how=\"left\").sort_values(\"route_int\")\n", "trip_with_routes.head(5)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "trip_with_routes = trip_with_routes.rename(columns={\"stop_id\" : \"all_stops\"})" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19800" + "18668" ] }, - "execution_count": 14, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check if routes and trips do not have the same number\n", "trip_with_routes.index.nunique()" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "16538" + "16210" ] }, - "execution_count": 15, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trip_with_routes.route_int.nunique()" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "#join to get route_int in stop_times\n", "stop_times_routes = stop_times_curated.join(trip_with_routes, how=\"left\", on=\"trip_id\" , lsuffix='_left', rsuffix='_right')\n" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
Unnamed: 0trip_idstop_idarrival_timedeparture_timestop_sequencepickup_typedrop_off_typehour_departureroute_iddirection_idstop_id_rawall_stopsroute_int
00666.TA.26-4-j19-1.20.R857618207:02:0007:02:001007.026-4-j19-118576182(8576182, 8591391, 8576197, 8576195, 8576193, ...33223239
11243.TA.26-311-j19-1.3.R859083407:16:0007:16:001007.026-311-j19-118590834(8590834, 8590849, 8590619, 8590848, 8590525, ...86078443
22406.TA.26-62-j19-1.3.R859134907:24:0007:24:001007.026-62-j19-118591349(8591349, 8591403, 8591293, 8576262, 8580449, ...14529
3362.TA.57-2-Y-j19-1.33.H850300007:34:0007:34:001007.057-2-Y-j19-108503000:0:13(8503000,)29114245
441179.TA.26-5-B-j19-1.23.R859124507:36:0007:36:001007.026-5-B-j19-118591245(8591245, 8591366, 8591415, 8576193, 8591059, ...1257212326
525.TA.26-845-j19-1.2.H857350408:00:0008:00:00126-845-j19-108573504(8573504, 8580879, 8575977, 8575976)2526
\n", "
" ], "text/plain": [ - " Unnamed: 0 trip_id stop_id arrival_time departure_time \\\n", - "0 0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 07:02:00 \n", - "1 1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 07:16:00 \n", - "2 2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 07:24:00 \n", - "3 3 62.TA.57-2-Y-j19-1.33.H 8503000 07:34:00 07:34:00 \n", - "4 4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 07:36:00 \n", + " trip_id stop_id arrival_time departure_time \\\n", + "0 666.TA.26-4-j19-1.20.R 8576182 07:02:00 07:02:00 \n", + "1 243.TA.26-311-j19-1.3.R 8590834 07:16:00 07:16:00 \n", + "2 406.TA.26-62-j19-1.3.R 8591349 07:24:00 07:24:00 \n", + "4 1179.TA.26-5-B-j19-1.23.R 8591245 07:36:00 07:36:00 \n", + "5 25.TA.26-845-j19-1.2.H 8573504 08:00:00 08:00:00 \n", "\n", - " stop_sequence pickup_type drop_off_type hour_departure route_id \\\n", - "0 1 0 0 7.0 26-4-j19-1 \n", - "1 1 0 0 7.0 26-311-j19-1 \n", - "2 1 0 0 7.0 26-62-j19-1 \n", - "3 1 0 0 7.0 57-2-Y-j19-1 \n", - "4 1 0 0 7.0 26-5-B-j19-1 \n", - "\n", - " direction_id stop_id_raw \\\n", - "0 1 8576182 \n", - "1 1 8590834 \n", - "2 1 8591349 \n", - "3 0 8503000:0:13 \n", - "4 1 8591245 \n", + " stop_sequence route_id direction_id stop_id_raw \\\n", + "0 1 26-4-j19-1 1 8576182 \n", + "1 1 26-311-j19-1 1 8590834 \n", + "2 1 26-62-j19-1 1 8591349 \n", + "4 1 26-5-B-j19-1 1 8591245 \n", + "5 1 26-845-j19-1 0 8573504 \n", "\n", " all_stops route_int \n", - "0 (8576182, 8591391, 8576197, 8576195, 8576193, ... 3322 \n", - "1 (8590834, 8590849, 8590619, 8590848, 8590525, ... 8607 \n", - "2 (8591349, 8591403, 8591293, 8576262, 8580449, ... 14529 \n", - "3 (8503000,) 291 \n", - "4 (8591245, 8591366, 8591415, 8576193, 8591059, ... 12572 " + "0 (8576182, 8591391, 8576197, 8576195, 8576193, ... 3239 \n", + "1 (8590834, 8590849, 8590619, 8590848, 8590525, ... 8443 \n", + "2 (8591349, 8591403, 8591293, 8576262, 8580449, ... 14245 \n", + "4 (8591245, 8591366, 8591415, 8576193, 8591059, ... 12326 \n", + "5 (8573504, 8580879, 8575977, 8575976) 2526 " ] }, - "execution_count": 17, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_routes.head(5)" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "16537" + "16209" ] }, - "execution_count": 18, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check if route_int is correct\n", "stop_times_routes.route_int.max()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Trip_int\n", "The trip_int number needs to be ordered by route_int and time" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19800" + "18668" ] }, - "execution_count": 19, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check number trips in stop_times\n", "stop_times_routes.trip_id.nunique()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0trip_idstop_idarrival_timedeparture_timestop_sequencepickup_typedrop_off_typehour_departureroute_iddirection_idstop_id_rawall_stopsroute_int
7751635816131.TA.26-301-j19-1.1.R858702016:53:0016:53:0010016.026-301-j19-118587020:0:G(8500926, 8590602, 8595511, 8590737, 8591829, ...0
499618261131.TA.26-301-j19-1.1.R859551116:55:0016:55:0020016.026-301-j19-118595511(8500926, 8590602, 8595511, 8590737, 8591829, ...0
163549759131.TA.26-301-j19-1.1.R859060316:57:0016:57:0030016.026-301-j19-118590603(8500926, 8590602, 8595511, 8590737, 8591829, ...0
7070429004131.TA.26-301-j19-1.1.R859182916:58:0016:58:0040016.026-301-j19-118591829(8500926, 8590602, 8595511, 8590737, 8591829, ...0
452283528131.TA.26-301-j19-1.1.R859060216:59:0016:59:0050016.026-301-j19-118590602(8500926, 8590602, 8595511, 8590737, 8591829, ...0
\n", "
" ], "text/plain": [ - " Unnamed: 0 trip_id stop_id arrival_time \\\n", - "77516 35816 131.TA.26-301-j19-1.1.R 8587020 16:53:00 \n", - "49961 8261 131.TA.26-301-j19-1.1.R 8595511 16:55:00 \n", - "163549 759 131.TA.26-301-j19-1.1.R 8590603 16:57:00 \n", - "70704 29004 131.TA.26-301-j19-1.1.R 8591829 16:58:00 \n", - "45228 3528 131.TA.26-301-j19-1.1.R 8590602 16:59:00 \n", - "\n", - " departure_time stop_sequence pickup_type drop_off_type \\\n", - "77516 16:53:00 1 0 0 \n", - "49961 16:55:00 2 0 0 \n", - "163549 16:57:00 3 0 0 \n", - "70704 16:58:00 4 0 0 \n", - "45228 16:59:00 5 0 0 \n", + " trip_id stop_id arrival_time departure_time \\\n", + "77516 131.TA.26-301-j19-1.1.R 8587020 16:53:00 16:53:00 \n", + "49961 131.TA.26-301-j19-1.1.R 8595511 16:55:00 16:55:00 \n", + "163549 131.TA.26-301-j19-1.1.R 8590603 16:57:00 16:57:00 \n", + "70704 131.TA.26-301-j19-1.1.R 8591829 16:58:00 16:58:00 \n", + "45228 131.TA.26-301-j19-1.1.R 8590602 16:59:00 16:59:00 \n", "\n", - " hour_departure route_id direction_id stop_id_raw \\\n", - "77516 16.0 26-301-j19-1 1 8587020:0:G \n", - "49961 16.0 26-301-j19-1 1 8595511 \n", - "163549 16.0 26-301-j19-1 1 8590603 \n", - "70704 16.0 26-301-j19-1 1 8591829 \n", - "45228 16.0 26-301-j19-1 1 8590602 \n", + " stop_sequence route_id direction_id stop_id_raw \\\n", + "77516 1 26-301-j19-1 1 8587020:0:G \n", + "49961 2 26-301-j19-1 1 8595511 \n", + "163549 3 26-301-j19-1 1 8590603 \n", + "70704 4 26-301-j19-1 1 8591829 \n", + "45228 5 26-301-j19-1 1 8590602 \n", "\n", " all_stops route_int \n", "77516 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "49961 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "163549 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "70704 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "45228 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 " ] }, - "execution_count": 20, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_routes.sort_values([\"route_int\", \"arrival_time\"]).head(5)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", "
0trip_inttrip_id
0131.TA.26-301-j19-1.1.R0131.TA.26-301-j19-1.1.R
132.TA.26-301-j19-1.1.R132.TA.26-301-j19-1.1.R
2143.TA.26-301-j19-1.1.R133.TA.26-301-j19-1.1.R2143.TA.26-301-j19-1.1.R133.TA.26-301-j19-1.1.R
3133.TA.26-301-j19-1.1.R31.TA.26-301-j19-1.1.R3133.TA.26-301-j19-1.1.R31.TA.26-301-j19-1.1.R
431.TA.26-301-j19-1.1.R140.TA.26-301-j19-1.1.R431.TA.26-301-j19-1.1.R140.TA.26-301-j19-1.1.R
\n", "
" ], "text/plain": [ " 0 trip_int trip_id\n", "0 131.TA.26-301-j19-1.1.R 0 131.TA.26-301-j19-1.1.R\n", "1 32.TA.26-301-j19-1.1.R 1 32.TA.26-301-j19-1.1.R\n", - "2 143.TA.26-301-j19-1.1.R 2 143.TA.26-301-j19-1.1.R\n", - "3 133.TA.26-301-j19-1.1.R 3 133.TA.26-301-j19-1.1.R\n", - "4 31.TA.26-301-j19-1.1.R 4 31.TA.26-301-j19-1.1.R" + "2 133.TA.26-301-j19-1.1.R 2 133.TA.26-301-j19-1.1.R\n", + "3 31.TA.26-301-j19-1.1.R 3 31.TA.26-301-j19-1.1.R\n", + "4 140.TA.26-301-j19-1.1.R 4 140.TA.26-301-j19-1.1.R" ] }, - "execution_count": 21, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#generate sequential trip_int, ordered by route and by time\n", "trip_df = pd.DataFrame(stop_times_routes.sort_values([\"route_int\", \"arrival_time\"]).trip_id.unique())\n", "trip_df[\"trip_int\"] = trip_df.index\n", "trip_df[\"trip_id\"] = trip_df.iloc[:,0]\n", "trip_df.head(5)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19800" + "18668" ] }, - "execution_count": 22, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check number trip_id\n", "trip_df.trip_id.nunique()" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "#join to get trip_int in stop_times\n", "stop_times_routes_trip = stop_times_routes.join(trip_df.set_index(\"trip_id\"), how=\"inner\", on=\"trip_id\" , lsuffix='_left', rsuffix='_right')\n" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0trip_idstop_idarrival_timedeparture_timestop_sequencepickup_typedrop_off_typehour_departureroute_iddirection_idstop_id_rawall_stopsroute_int0trip_int
7751635816131.TA.26-301-j19-1.1.R858702016:53:0016:53:0010016.026-301-j19-118587020:0:G(8500926, 8590602, 8595511, 8590737, 8591829, ...0131.TA.26-301-j19-1.1.R0
499618261131.TA.26-301-j19-1.1.R859551116:55:0016:55:0020016.026-301-j19-118595511(8500926, 8590602, 8595511, 8590737, 8591829, ...0131.TA.26-301-j19-1.1.R0
163549759131.TA.26-301-j19-1.1.R859060316:57:0016:57:0030016.026-301-j19-118590603(8500926, 8590602, 8595511, 8590737, 8591829, ...0131.TA.26-301-j19-1.1.R0
7070429004131.TA.26-301-j19-1.1.R859182916:58:0016:58:0040016.026-301-j19-118591829(8500926, 8590602, 8595511, 8590737, 8591829, ...0131.TA.26-301-j19-1.1.R0
452283528131.TA.26-301-j19-1.1.R859060216:59:0016:59:0050016.026-301-j19-118590602(8500926, 8590602, 8595511, 8590737, 8591829, ...0131.TA.26-301-j19-1.1.R0
\n", "
" ], "text/plain": [ - " Unnamed: 0 trip_id stop_id arrival_time \\\n", - "77516 35816 131.TA.26-301-j19-1.1.R 8587020 16:53:00 \n", - "49961 8261 131.TA.26-301-j19-1.1.R 8595511 16:55:00 \n", - "163549 759 131.TA.26-301-j19-1.1.R 8590603 16:57:00 \n", - "70704 29004 131.TA.26-301-j19-1.1.R 8591829 16:58:00 \n", - "45228 3528 131.TA.26-301-j19-1.1.R 8590602 16:59:00 \n", + " trip_id stop_id arrival_time departure_time \\\n", + "77516 131.TA.26-301-j19-1.1.R 8587020 16:53:00 16:53:00 \n", + "49961 131.TA.26-301-j19-1.1.R 8595511 16:55:00 16:55:00 \n", + "163549 131.TA.26-301-j19-1.1.R 8590603 16:57:00 16:57:00 \n", + "70704 131.TA.26-301-j19-1.1.R 8591829 16:58:00 16:58:00 \n", + "45228 131.TA.26-301-j19-1.1.R 8590602 16:59:00 16:59:00 \n", "\n", - " departure_time stop_sequence pickup_type drop_off_type \\\n", - "77516 16:53:00 1 0 0 \n", - "49961 16:55:00 2 0 0 \n", - "163549 16:57:00 3 0 0 \n", - "70704 16:58:00 4 0 0 \n", - "45228 16:59:00 5 0 0 \n", - "\n", - " hour_departure route_id direction_id stop_id_raw \\\n", - "77516 16.0 26-301-j19-1 1 8587020:0:G \n", - "49961 16.0 26-301-j19-1 1 8595511 \n", - "163549 16.0 26-301-j19-1 1 8590603 \n", - "70704 16.0 26-301-j19-1 1 8591829 \n", - "45228 16.0 26-301-j19-1 1 8590602 \n", + " stop_sequence route_id direction_id stop_id_raw \\\n", + "77516 1 26-301-j19-1 1 8587020:0:G \n", + "49961 2 26-301-j19-1 1 8595511 \n", + "163549 3 26-301-j19-1 1 8590603 \n", + "70704 4 26-301-j19-1 1 8591829 \n", + "45228 5 26-301-j19-1 1 8590602 \n", "\n", " all_stops route_int \\\n", "77516 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "49961 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "163549 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "70704 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "45228 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "\n", " 0 trip_int \n", "77516 131.TA.26-301-j19-1.1.R 0 \n", "49961 131.TA.26-301-j19-1.1.R 0 \n", "163549 131.TA.26-301-j19-1.1.R 0 \n", "70704 131.TA.26-301-j19-1.1.R 0 \n", "45228 131.TA.26-301-j19-1.1.R 0 " ] }, - "execution_count": 24, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#save ordered stop_times\n", "stop_times_routes_trip = stop_times_routes_trip.sort_values([\"route_int\", \"trip_int\", \"stop_sequence\"])\n", "stop_times_routes_trip.head(5)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "19800" + "18668" ] }, - "execution_count": 25, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check if manipulations did not destroy trips\n", "stop_times_routes_trip.trip_id.nunique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Stop_int\n", "Stop_int id needs to ordered by route, trip and stop sequence" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1408" + "1407" ] }, - "execution_count": 26, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check number stops at entry\n", "stop_times_routes_trip.stop_id.nunique()" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0stop_intstop_id
0858702008587020
1859551118595511
2859060328590603
3859182938591829
4859060248590602
\n", "
" ], "text/plain": [ " 0 stop_int stop_id\n", "0 8587020 0 8587020\n", "1 8595511 1 8595511\n", "2 8590603 2 8590603\n", "3 8591829 3 8591829\n", "4 8590602 4 8590602" ] }, - "execution_count": 27, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#stop_times_curated already in the right order\n", "#we create dataframe to create stop_int\n", "stops_df = pd.DataFrame(stop_times_routes_trip.stop_id.unique())\n", "stops_df[\"stop_int\"] = stops_df.index\n", "stops_df[\"stop_id\"] = stops_df.iloc[:,0]\n", "stops_df.head(5)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1408" + "1407" ] }, - "execution_count": 28, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check if number stop_int correct\n", "stops_df.stop_int.nunique()" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "#join to get stop_int\n", "stop_times_routes_trip_stop = stop_times_routes_trip.join(stops_df.set_index(\"stop_id\"), how=\"inner\", on=\"stop_id\", lsuffix='_left', rsuffix='_right')\n" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0trip_idstop_idarrival_timedeparture_timestop_sequencepickup_typedrop_off_typehour_departureroute_iddirection_idstop_id_rawall_stopsroute_int0_lefttrip_int0_rightstop_int
7751635816131.TA.26-301-j19-1.1.R858702016:53:0016:53:0010016.026-301-j19-118587020:0:G(8500926, 8590602, 8595511, 8590737, 8591829, ...0131.TA.26-301-j19-1.1.R085870200
46837513732.TA.26-301-j19-1.1.R858702007:38:0007:38:001007.026-301-j19-118587020:0:G(8500926, 8590603, 8587020, 8590615, 8590739, ...132.TA.26-301-j19-1.1.R185870200
2374223742143.TA.26-301-j19-1.1.R208591133.TA.26-301-j19-1.1.R858702024:03:0024:03:0016:38:0016:38:00100NaN26-301-j19-118587020:0:G(8500926, 8590603, 8587020, 8590739, 8590618, ...(8500926, 8590603, 8590739, 8595511, 8590614, ...2143.TA.26-301-j19-1.1.R133.TA.26-301-j19-1.1.R285870200
2085916202133.TA.26-301-j19-1.1.R8126931.TA.26-301-j19-1.1.R858702016:38:0016:38:0010:38:0010:38:0010016.026-301-j19-118587020:0:G(8500926, 8590603, 8590739, 8595511, 8590614, ...(8500926, 8590618, 8590737, 8590739, 8587020, ...3133.TA.26-301-j19-1.1.R31.TA.26-301-j19-1.1.R385870200
812693956931.TA.26-301-j19-1.1.R212257140.TA.26-301-j19-1.1.R858702010:38:0010:38:0018:23:0018:23:0010010.026-301-j19-118587020:0:G(8500926, 8590618, 8590737, 8590739, 8587020, ...(8500926, 8590737, 8590603, 8590614, 8590615, ...431.TA.26-301-j19-1.1.R140.TA.26-301-j19-1.1.R485870200
\n", "
" ], "text/plain": [ - " Unnamed: 0 trip_id stop_id arrival_time \\\n", - "77516 35816 131.TA.26-301-j19-1.1.R 8587020 16:53:00 \n", - "46837 5137 32.TA.26-301-j19-1.1.R 8587020 07:38:00 \n", - "23742 23742 143.TA.26-301-j19-1.1.R 8587020 24:03:00 \n", - "208591 6202 133.TA.26-301-j19-1.1.R 8587020 16:38:00 \n", - "81269 39569 31.TA.26-301-j19-1.1.R 8587020 10:38:00 \n", - "\n", - " departure_time stop_sequence pickup_type drop_off_type \\\n", - "77516 16:53:00 1 0 0 \n", - "46837 07:38:00 1 0 0 \n", - "23742 24:03:00 1 0 0 \n", - "208591 16:38:00 1 0 0 \n", - "81269 10:38:00 1 0 0 \n", + " trip_id stop_id arrival_time departure_time \\\n", + "77516 131.TA.26-301-j19-1.1.R 8587020 16:53:00 16:53:00 \n", + "46837 32.TA.26-301-j19-1.1.R 8587020 07:38:00 07:38:00 \n", + "208591 133.TA.26-301-j19-1.1.R 8587020 16:38:00 16:38:00 \n", + "81269 31.TA.26-301-j19-1.1.R 8587020 10:38:00 10:38:00 \n", + "212257 140.TA.26-301-j19-1.1.R 8587020 18:23:00 18:23:00 \n", "\n", - " hour_departure route_id direction_id stop_id_raw \\\n", - "77516 16.0 26-301-j19-1 1 8587020:0:G \n", - "46837 7.0 26-301-j19-1 1 8587020:0:G \n", - "23742 NaN 26-301-j19-1 1 8587020:0:G \n", - "208591 16.0 26-301-j19-1 1 8587020:0:G \n", - "81269 10.0 26-301-j19-1 1 8587020:0:G \n", + " stop_sequence route_id direction_id stop_id_raw \\\n", + "77516 1 26-301-j19-1 1 8587020:0:G \n", + "46837 1 26-301-j19-1 1 8587020:0:G \n", + "208591 1 26-301-j19-1 1 8587020:0:G \n", + "81269 1 26-301-j19-1 1 8587020:0:G \n", + "212257 1 26-301-j19-1 1 8587020:0:G \n", "\n", " all_stops route_int \\\n", "77516 (8500926, 8590602, 8595511, 8590737, 8591829, ... 0 \n", "46837 (8500926, 8590603, 8587020, 8590615, 8590739, ... 1 \n", - "23742 (8500926, 8590603, 8587020, 8590739, 8590618, ... 2 \n", - "208591 (8500926, 8590603, 8590739, 8595511, 8590614, ... 3 \n", - "81269 (8500926, 8590618, 8590737, 8590739, 8587020, ... 4 \n", + "208591 (8500926, 8590603, 8590739, 8595511, 8590614, ... 2 \n", + "81269 (8500926, 8590618, 8590737, 8590739, 8587020, ... 3 \n", + "212257 (8500926, 8590737, 8590603, 8590614, 8590615, ... 4 \n", "\n", " 0_left trip_int 0_right stop_int \n", "77516 131.TA.26-301-j19-1.1.R 0 8587020 0 \n", "46837 32.TA.26-301-j19-1.1.R 1 8587020 0 \n", - "23742 143.TA.26-301-j19-1.1.R 2 8587020 0 \n", - "208591 133.TA.26-301-j19-1.1.R 3 8587020 0 \n", - "81269 31.TA.26-301-j19-1.1.R 4 8587020 0 " + "208591 133.TA.26-301-j19-1.1.R 2 8587020 0 \n", + "81269 31.TA.26-301-j19-1.1.R 3 8587020 0 \n", + "212257 140.TA.26-301-j19-1.1.R 4 8587020 0 " ] }, - "execution_count": 30, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_routes_trip_stop.head(5)" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1408" + "1407" ] }, - "execution_count": 31, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check if no stops deleted during manipulation\n", "stop_times_routes_trip_stop.stop_id.nunique()" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1407" + "1406" ] }, - "execution_count": 32, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_routes_trip_stop.stop_int.max()" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "#keep only useful columns \n", "stop_times_int = stop_times_routes_trip_stop[[\"route_int\", \"trip_int\", \"stop_int\", \"stop_sequence\", \"arrival_time\", \"departure_time\",\\\n", " \"route_id\", \"trip_id\", \"stop_id\"]].sort_values([\"route_int\", \"trip_int\", \"stop_sequence\"])" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "stop_times_int = stop_times_int.reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_id
0000116:53:0016:53:0026-301-j19-1131.TA.26-301-j19-1.1.R8587020
1001216:55:0016:55:0026-301-j19-1131.TA.26-301-j19-1.1.R8595511
2002316:57:0016:57:0026-301-j19-1131.TA.26-301-j19-1.1.R8590603
3003416:58:0016:58:0026-301-j19-1131.TA.26-301-j19-1.1.R8591829
4004516:59:0016:59:0026-301-j19-1131.TA.26-301-j19-1.1.R8590602
5005617:00:0017:00:0026-301-j19-1131.TA.26-301-j19-1.1.R8590615
6006717:01:0017:01:0026-301-j19-1131.TA.26-301-j19-1.1.R8590614
7007817:01:0017:01:0026-301-j19-1131.TA.26-301-j19-1.1.R8590618
8008917:03:0017:03:0026-301-j19-1131.TA.26-301-j19-1.1.R8500926
90091017:03:0017:03:0026-301-j19-1131.TA.26-301-j19-1.1.R8590737
\n", "
" ], "text/plain": [ " route_int trip_int stop_int stop_sequence arrival_time departure_time \\\n", "0 0 0 0 1 16:53:00 16:53:00 \n", "1 0 0 1 2 16:55:00 16:55:00 \n", "2 0 0 2 3 16:57:00 16:57:00 \n", "3 0 0 3 4 16:58:00 16:58:00 \n", "4 0 0 4 5 16:59:00 16:59:00 \n", "5 0 0 5 6 17:00:00 17:00:00 \n", "6 0 0 6 7 17:01:00 17:01:00 \n", "7 0 0 7 8 17:01:00 17:01:00 \n", "8 0 0 8 9 17:03:00 17:03:00 \n", "9 0 0 9 10 17:03:00 17:03:00 \n", "\n", " route_id trip_id stop_id \n", "0 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 \n", "1 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 \n", "2 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 \n", "3 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 \n", "4 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 \n", "5 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590615 \n", "6 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590614 \n", "7 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590618 \n", "8 26-301-j19-1 131.TA.26-301-j19-1.1.R 8500926 \n", "9 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590737 " ] }, - "execution_count": 35, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_int.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transfer: delete transfer to same stop & get stop_int & stop_int2\n", "to finish" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1497" ] }, - "execution_count": 36, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check number stops transfers\n", "transfers.stop_id.nunique()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1497" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "transfers.stop_id.nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#create stop_int column\n", "#this action eliminates stops not in stop_times\n", "transfers = transfers.join(stop_times_int[[\"stop_id\", \"stop_int\"]].set_index(\"stop_id\"), how=\"inner\", on = \"stop_id\")" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'transfers' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtransfers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'transfers' is not defined" ] } ], "source": [ "transfers.head(5)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stop_id2stop_int_two
085737210
185737221
285737232
385830713
485726034
\n", "
" ], "text/plain": [ " stop_id2 stop_int_two\n", "0 8573721 0\n", "1 8573722 1\n", "2 8573723 2\n", "3 8583071 3\n", "4 8572603 4" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#create dataframe with stops\n", "df_stop_int2 = stop_times_curated[[\"stop_id\", \"stop_int\"]].rename(columns={\"stop_id\": \"stop_id2\", \"stop_int\" : \"stop_int_two\"})\n", "df_stop_int2.head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "transfers = transfers.join(df_stop_int2.set_index(\"stop_id2\"), how=\"inner\", on = \"stop_id2\")" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0stop_idstop_id2Transfer_time_secstop_int2
00850092685906161461439
00850092685906161461439
00850092685906161461439
00850092685906161461439
00850092685906161461439
\n", "
" ], "text/plain": [ " Unnamed: 0 stop_id stop_id2 Transfer_time_sec stop_int2\n", "0 0 8500926 8590616 146 1439\n", "0 0 8500926 8590616 146 1439\n", "0 0 8500926 8590616 146 1439\n", "0 0 8500926 8590616 146 1439\n", "0 0 8500926 8590616 146 1439" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "transfers.head(5)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "transfers_stop_int2 = transfers" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1512" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#check number unique stops2 in transfers\n", "transfers.stop_id2.nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "transfers.join(stop_times_curated[[\"stop_id\", \"stop_int\"]].set_index(\"stop_id\"), how=\"inner\", on = \"stop_id\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Raptor specific preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "StopTimes: [[departure_route0_trip0_stop0, arrival_route0_trip0_stop_0], [departure_route0_trip0_stop1, arrival_route0_trip0_stop_1], …], [[departure_route0_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], [[[departure_route1_trip0_stop0, arrival_route1_trip0_stop_0], …], [[departure_route1_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], …]" + "#### StopTimes: \n", + "[[departure_route0_trip0_stop0, arrival_route0_trip0_stop_0], [departure_route0_trip0_stop1, arrival_route0_trip0_stop_1], …], [[departure_route0_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], [[[departure_route1_trip0_stop0, arrival_route1_trip0_stop_0], …], [[departure_route1_trip1_stop0, arrival_route0_trip1_stop_0], …], ….], …]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by making sure the order is correct" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_id
0000116:53:0016:53:0026-301-j19-1131.TA.26-301-j19-1.1.R8587020
1001216:55:0016:55:0026-301-j19-1131.TA.26-301-j19-1.1.R8595511
2002316:57:0016:57:0026-301-j19-1131.TA.26-301-j19-1.1.R8590603
3003416:58:0016:58:0026-301-j19-1131.TA.26-301-j19-1.1.R8591829
4004516:59:0016:59:0026-301-j19-1131.TA.26-301-j19-1.1.R8590602
\n", "
" ], "text/plain": [ " route_int trip_int stop_int stop_sequence arrival_time departure_time \\\n", "0 0 0 0 1 16:53:00 16:53:00 \n", "1 0 0 1 2 16:55:00 16:55:00 \n", "2 0 0 2 3 16:57:00 16:57:00 \n", "3 0 0 3 4 16:58:00 16:58:00 \n", "4 0 0 4 5 16:59:00 16:59:00 \n", "\n", " route_id trip_id stop_id \n", "0 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 \n", "1 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 \n", "2 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 \n", "3 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 \n", "4 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 " ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_ordered = stop_times_int.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n", "stop_times_ordered.head(5)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
arrival_timedeparture_time
016:53:0016:53:00
116:55:0016:55:00
216:57:0016:57:00
316:58:0016:58:00
416:59:0016:59:00
\n", "
" ], "text/plain": [ " arrival_time departure_time\n", "0 16:53:00 16:53:00\n", "1 16:55:00 16:55:00\n", "2 16:57:00 16:57:00\n", "3 16:58:00 16:58:00\n", "4 16:59:00 16:59:00" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Adding None to first arrival time and last departure time" + "We add None to first arrival time and last departure time." ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_idsequence_shift_1
0000116:53:0016:53:0026-301-j19-1131.TA.26-301-j19-1.1.R85870202
1001216:55:0016:55:0026-301-j19-1131.TA.26-301-j19-1.1.R85955113
2002316:57:0016:57:0026-301-j19-1131.TA.26-301-j19-1.1.R85906034
3003416:58:0016:58:0026-301-j19-1131.TA.26-301-j19-1.1.R85918295
4004516:59:0016:59:0026-301-j19-1131.TA.26-301-j19-1.1.R85906026
\n", "
" ], "text/plain": [ " route_int trip_int stop_int stop_sequence arrival_time departure_time \\\n", "0 0 0 0 1 16:53:00 16:53:00 \n", "1 0 0 1 2 16:55:00 16:55:00 \n", "2 0 0 2 3 16:57:00 16:57:00 \n", "3 0 0 3 4 16:58:00 16:58:00 \n", "4 0 0 4 5 16:59:00 16:59:00 \n", "\n", " route_id trip_id stop_id sequence_shift_1 \n", "0 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 2 \n", "1 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 3 \n", "2 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 4 \n", "3 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 5 \n", "4 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 6 " ] }, - "execution_count": 38, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#adding a shift\n", "stop_times_ordered[\"sequence_shift_1\"] = stop_times_ordered[\"stop_sequence\"].shift(-1, fill_value=0)\n", "stop_times_ordered.head(5)" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "stop_times_ordered['departure_time'] = np.where((stop_times_ordered[\"stop_sequence\"] > stop_times_ordered[\"sequence_shift_1\"]), None, stop_times_ordered['departure_time'])" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "stop_times_ordered[\"arrival_time\"] = np.where((stop_times_ordered[\"stop_sequence\"] == 1), None, stop_times_ordered['arrival_time'])" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
arrival_timedeparture_timestop_sequencesequence_shift_1
0None16:53:0012
116:55:0016:55:0023
216:57:0016:57:0034
316:58:0016:58:0045
416:59:0016:59:0056
\n", "
" ], "text/plain": [ " arrival_time departure_time stop_sequence sequence_shift_1\n", "0 None 16:53:00 1 2\n", "1 16:55:00 16:55:00 2 3\n", "2 16:57:00 16:57:00 3 4\n", "3 16:58:00 16:58:00 4 5\n", "4 16:59:00 16:59:00 5 6" ] }, - "execution_count": 41, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_ordered[[\"arrival_time\",\"departure_time\", \"stop_sequence\", \"sequence_shift_1\"]].head(5)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We transform it in datetime as required by the raptor algorithm" + ] + }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 58, "metadata": {}, - "outputs": [ - { - "ename": "ParserError", - "evalue": "hour must be in 0..23: 24:04:00", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_build_naive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36m_build_naive\u001b[0;34m(self, res, default)\u001b[0m\n\u001b[1;32m 1240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1241\u001b[0;31m \u001b[0mnaive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mrepl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: hour must be in 0..23", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mpandas/_libs/tslibs/parsing.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.parsing.parse_datetime_string\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(timestr, parserinfo, **kwargs)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1374\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mDEFAULTPARSER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimestr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1375\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 657\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mParserError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\": %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimestr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 658\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n", - "\u001b[0;31mParserError\u001b[0m: hour must be in 0..23: 24:04:00", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: invalid string coercion to datetime", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_build_naive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36m_build_naive\u001b[0;34m(self, res, default)\u001b[0m\n\u001b[1;32m 1240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1241\u001b[0;31m \u001b[0mnaive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mrepl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: hour must be in 0..23", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'arrival_time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'arrival_time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'departure_time'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstop_times_ordered\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'departure_time'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 722\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtz_localize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtz\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 724\u001b[0;31m \u001b[0mcache_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_maybe_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert_listlike\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 725\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcache_array\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcache_array\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36m_maybe_cache\u001b[0;34m(arg, format, cache, convert_listlike)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0munique_dates\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0munique_dates\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 152\u001b[0;31m \u001b[0mcache_dates\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_listlike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0munique_dates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 153\u001b[0m \u001b[0mcache_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcache_dates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munique_dates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcache_array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 445\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 446\u001b[0m \u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 447\u001b[0;31m \u001b[0mallow_object\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 448\u001b[0m )\n\u001b[1;32m 449\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py\u001b[0m in \u001b[0;36mobjects_to_datetime64ns\u001b[0;34m(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)\u001b[0m\n\u001b[1;32m 1861\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"i8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtz_parsed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1862\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mValueError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1863\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1864\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1865\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtz_parsed\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py\u001b[0m in \u001b[0;36mobjects_to_datetime64ns\u001b[0;34m(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)\u001b[0m\n\u001b[1;32m 1852\u001b[0m \u001b[0mdayfirst\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdayfirst\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1853\u001b[0m \u001b[0myearfirst\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0myearfirst\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1854\u001b[0;31m \u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequire_iso8601\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1855\u001b[0m )\n\u001b[1;32m 1856\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime_object\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime_object\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mpandas/_libs/tslibs/parsing.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.parsing.parse_datetime_string\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(timestr, parserinfo, **kwargs)\u001b[0m\n\u001b[1;32m 1372\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparserinfo\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimestr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1374\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mDEFAULTPARSER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimestr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1375\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1376\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/dateutil/parser/_parser.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, timestr, default, ignoretz, tzinfos, **kwargs)\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_build_naive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 657\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mParserError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\": %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimestr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 658\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 659\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mignoretz\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n", - "\u001b[0;31mParserError\u001b[0m: hour must be in 0..23: 24:04:00" - ] - } - ], + "outputs": [], "source": [ "stop_times_ordered['arrival_time'] = pd.to_datetime(stop_times_ordered['arrival_time'])\n", "stop_times_ordered['departure_time'] = pd.to_datetime(stop_times_ordered['departure_time'])" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
arrival_timedeparture_time
02020-05-20 08:00:002020-05-20 08:00:00NaT2020-05-21 16:53:00
12020-05-20 08:01:002020-05-20 08:01:002020-05-21 16:55:002020-05-21 16:55:00
22020-05-20 08:02:002020-05-20 08:02:002020-05-21 16:57:002020-05-21 16:57:00
32020-05-20 08:03:002020-05-20 08:03:002020-05-21 16:58:002020-05-21 16:58:00
42020-05-20 08:04:002020-05-20 08:04:002020-05-21 16:59:002020-05-21 16:59:00
\n", "
" ], "text/plain": [ " arrival_time departure_time\n", - "0 2020-05-20 08:00:00 2020-05-20 08:00:00\n", - "1 2020-05-20 08:01:00 2020-05-20 08:01:00\n", - "2 2020-05-20 08:02:00 2020-05-20 08:02:00\n", - "3 2020-05-20 08:03:00 2020-05-20 08:03:00\n", - "4 2020-05-20 08:04:00 2020-05-20 08:04:00" + "0 NaT 2020-05-21 16:53:00\n", + "1 2020-05-21 16:55:00 2020-05-21 16:55:00\n", + "2 2020-05-21 16:57:00 2020-05-21 16:57:00\n", + "3 2020-05-21 16:58:00 2020-05-21 16:58:00\n", + "4 2020-05-21 16:59:00 2020-05-21 16:59:00" ] }, - "execution_count": 40, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_ordered[[\"arrival_time\", \"departure_time\"]].head(5)" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "with open('../data/stop_times_df.pkl','wb') as f: pickle.dump(stop_times_ordered, f)" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_idpickup_typedrop_off_typesequence_shift_1
0000142020-05-20 08:00:002020-05-20 08:00:001-231-j19-114.TA.1-231-j19-1.1.H857372100151NaT2020-05-21 16:53:0026-301-j19-1131.TA.26-301-j19-1.1.R85870202
1001152020-05-20 08:01:002020-05-20 08:01:001-231-j19-114.TA.1-231-j19-1.1.H85737220016
2002162020-05-20 08:02:002020-05-20 08:02:001-231-j19-114.TA.1-231-j19-1.1.H85737230017
3003172020-05-20 08:03:002020-05-20 08:03:001-231-j19-114.TA.1-231-j19-1.1.H85830710018
4004182020-05-20 08:04:002020-05-20 08:04:001-231-j19-114.TA.1-231-j19-1.1.H85726030019
5005192020-05-20 08:05:00NaT1-231-j19-114.TA.1-231-j19-1.1.H8502879001
60151NaT2020-05-20 08:06:001-231-j19-135.TA.1-231-j19-1.2.R8502879002
701422020-05-20 08:07:002020-05-20 08:07:001-231-j19-135.TA.1-231-j19-1.2.R8572603003
801332020-05-20 08:07:002020-05-20 08:07:001-231-j19-135.TA.1-231-j19-1.2.R8583071004
901642020-05-20 08:10:002020-05-20 08:12:001-231-j19-135.TA.1-231-j19-1.2.R8502955005
1001752020-05-20 08:15:002020-05-20 08:15:001-231-j19-135.TA.1-231-j19-1.2.R8502553006
1101862020-05-20 08:17:002020-05-20 08:17:001-231-j19-135.TA.1-231-j19-1.2.R8572601007
1201972020-05-20 08:18:002020-05-20 08:18:001-231-j19-135.TA.1-231-j19-1.2.R8572600008
13011082020-05-20 08:18:002020-05-20 08:18:001-231-j19-135.TA.1-231-j19-1.2.R85824620010
140111102020-05-20 08:22:00NaT1-231-j19-135.TA.1-231-j19-1.2.R8572747001
1502111NaT2020-05-20 08:07:001-231-j19-116.TA.1-231-j19-1.1.H8572747003
16021032020-05-20 08:08:002020-05-20 08:08:001-231-j19-116.TA.1-231-j19-1.1.H8582462004
1702942020-05-20 08:09:002020-05-20 08:09:001-231-j19-116.TA.1-231-j19-1.1.H8572600005
1802852020-05-20 08:09:002020-05-20 08:09:001-231-j19-116.TA.1-231-j19-1.1.H8572601006
1902762020-05-20 08:13:002020-05-20 08:13:001-231-j19-116.TA.1-231-j19-1.1.H8502553007
20021272020-05-20 08:15:002020-05-20 08:15:001-231-j19-116.TA.1-231-j19-1.1.H8572602008
2102682020-05-20 08:16:002020-05-20 08:17:001-231-j19-116.TA.1-231-j19-1.1.H8502955009
2202192020-05-20 08:18:002020-05-20 08:18:001-231-j19-116.TA.1-231-j19-1.1.H85737220010
23020102020-05-20 08:20:002020-05-20 08:20:001-231-j19-116.TA.1-231-j19-1.1.H85737210011
240213112020-05-20 08:23:002020-05-20 08:23:001-231-j19-116.TA.1-231-j19-1.1.H85035980012
250214122020-05-20 08:25:002020-05-20 08:27:001-231-j19-116.TA.1-231-j19-1.1.H85737200013
260213132020-05-20 08:28:002020-05-20 08:28:001-231-j19-116.TA.1-231-j19-1.1.H85035980014
27020142020-05-20 08:30:002020-05-20 08:30:001-231-j19-116.TA.1-231-j19-1.1.H85737210015
28021152020-05-20 08:31:002020-05-20 08:31:001-231-j19-116.TA.1-231-j19-1.1.H85737220016
29022162020-05-20 08:32:002020-05-20 08:32:001-231-j19-116.TA.1-231-j19-1.1.H85737230017
30023172020-05-20 08:33:002020-05-20 08:33:001-231-j19-116.TA.1-231-j19-1.1.H85830710018
31024182020-05-20 08:34:002020-05-20 08:34:001-231-j19-116.TA.1-231-j19-1.1.H85726030019
32025192020-05-20 08:35:00NaT1-231-j19-116.TA.1-231-j19-1.1.H8502879001
330351NaT2020-05-20 08:36:001-231-j19-137.TA.1-231-j19-1.2.R8502879002
3403422020-05-20 08:37:002020-05-20 08:37:001-231-j19-137.TA.1-231-j19-1.2.R8572603003
3503332020-05-20 08:37:002020-05-20 08:37:001-231-j19-137.TA.1-231-j19-1.2.R8583071004
3603642020-05-20 08:40:002020-05-20 08:42:001-231-j19-137.TA.1-231-j19-1.2.R8502955005
3703752020-05-20 08:45:002020-05-20 08:45:001-231-j19-137.TA.1-231-j19-1.2.R8502553006
3803862020-05-20 08:47:002020-05-20 08:47:001-231-j19-137.TA.1-231-j19-1.2.R8572601007
3903972020-05-20 08:48:002020-05-20 08:48:001-231-j19-137.TA.1-231-j19-1.2.R8572600008
40031082020-05-20 08:48:002020-05-20 08:48:001-231-j19-137.TA.1-231-j19-1.2.R85824620010
410311102020-05-20 08:52:00NaT1-231-j19-137.TA.1-231-j19-1.2.R8572747001
4204111NaT2020-05-20 08:37:001-231-j19-118.TA.1-231-j19-1.1.H8572747003
43041032020-05-20 08:38:002020-05-20 08:38:001-231-j19-118.TA.1-231-j19-1.1.H8582462004
4404942020-05-20 08:39:002020-05-20 08:39:001-231-j19-118.TA.1-231-j19-1.1.H8572600005
4504852020-05-20 08:39:002020-05-20 08:39:001-231-j19-118.TA.1-231-j19-1.1.H8572601006
4604762020-05-20 08:43:002020-05-20 08:43:001-231-j19-118.TA.1-231-j19-1.1.H8502553007
47041272020-05-20 08:45:002020-05-20 08:45:001-231-j19-118.TA.1-231-j19-1.1.H8572602008
4804682020-05-20 08:46:002020-05-20 08:47:001-231-j19-118.TA.1-231-j19-1.1.H8502955009
4904192020-05-20 08:48:002020-05-20 08:48:001-231-j19-118.TA.1-231-j19-1.1.H85737220010
50040102020-05-20 08:50:002020-05-20 08:50:001-231-j19-118.TA.1-231-j19-1.1.H85737210011
510413112020-05-20 08:53:002020-05-20 08:53:001-231-j19-118.TA.1-231-j19-1.1.H85035980012
520414122020-05-20 08:55:002020-05-20 08:59:001-231-j19-118.TA.1-231-j19-1.1.H85737200013
530413132020-05-20 09:00:002020-05-20 09:00:001-231-j19-118.TA.1-231-j19-1.1.H85035980014
54040142020-05-20 09:02:002020-05-20 09:02:001-231-j19-118.TA.1-231-j19-1.1.H85737210015
55041152020-05-20 09:03:002020-05-20 09:03:001-231-j19-118.TA.1-231-j19-1.1.H85737220016
56042162020-05-20 09:04:002020-05-20 09:04:001-231-j19-118.TA.1-231-j19-1.1.H85737230017
57043172020-05-20 09:05:002020-05-20 09:05:001-231-j19-118.TA.1-231-j19-1.1.H85830710018
58044182020-05-20 09:06:002020-05-20 09:06:001-231-j19-118.TA.1-231-j19-1.1.H85726030019
59045192020-05-20 09:07:00NaT1-231-j19-118.TA.1-231-j19-1.1.H8502879001
\n", - "
" - ], - "text/plain": [ - " route_int trip_int stop_int stop_sequence arrival_time \\\n", - "0 0 0 0 14 2020-05-20 08:00:00 \n", - "1 0 0 1 15 2020-05-20 08:01:00 \n", - "2 0 0 2 16 2020-05-20 08:02:00 \n", - "3 0 0 3 17 2020-05-20 08:03:00 \n", - "4 0 0 4 18 2020-05-20 08:04:00 \n", - "5 0 0 5 19 2020-05-20 08:05:00 \n", - "6 0 1 5 1 NaT \n", - "7 0 1 4 2 2020-05-20 08:07:00 \n", - "8 0 1 3 3 2020-05-20 08:07:00 \n", - "9 0 1 6 4 2020-05-20 08:10:00 \n", - "10 0 1 7 5 2020-05-20 08:15:00 \n", - "11 0 1 8 6 2020-05-20 08:17:00 \n", - "12 0 1 9 7 2020-05-20 08:18:00 \n", - "13 0 1 10 8 2020-05-20 08:18:00 \n", - "14 0 1 11 10 2020-05-20 08:22:00 \n", - "15 0 2 11 1 NaT \n", - "16 0 2 10 3 2020-05-20 08:08:00 \n", - "17 0 2 9 4 2020-05-20 08:09:00 \n", - "18 0 2 8 5 2020-05-20 08:09:00 \n", - "19 0 2 7 6 2020-05-20 08:13:00 \n", - "20 0 2 12 7 2020-05-20 08:15:00 \n", - "21 0 2 6 8 2020-05-20 08:16:00 \n", - "22 0 2 1 9 2020-05-20 08:18:00 \n", - "23 0 2 0 10 2020-05-20 08:20:00 \n", - "24 0 2 13 11 2020-05-20 08:23:00 \n", - "25 0 2 14 12 2020-05-20 08:25:00 \n", - "26 0 2 13 13 2020-05-20 08:28:00 \n", - "27 0 2 0 14 2020-05-20 08:30:00 \n", - "28 0 2 1 15 2020-05-20 08:31:00 \n", - "29 0 2 2 16 2020-05-20 08:32:00 \n", - "30 0 2 3 17 2020-05-20 08:33:00 \n", - "31 0 2 4 18 2020-05-20 08:34:00 \n", - "32 0 2 5 19 2020-05-20 08:35:00 \n", - "33 0 3 5 1 NaT \n", - "34 0 3 4 2 2020-05-20 08:37:00 \n", - "35 0 3 3 3 2020-05-20 08:37:00 \n", - "36 0 3 6 4 2020-05-20 08:40:00 \n", - "37 0 3 7 5 2020-05-20 08:45:00 \n", - "38 0 3 8 6 2020-05-20 08:47:00 \n", - "39 0 3 9 7 2020-05-20 08:48:00 \n", - "40 0 3 10 8 2020-05-20 08:48:00 \n", - "41 0 3 11 10 2020-05-20 08:52:00 \n", - "42 0 4 11 1 NaT \n", - "43 0 4 10 3 2020-05-20 08:38:00 \n", - "44 0 4 9 4 2020-05-20 08:39:00 \n", - "45 0 4 8 5 2020-05-20 08:39:00 \n", - "46 0 4 7 6 2020-05-20 08:43:00 \n", - "47 0 4 12 7 2020-05-20 08:45:00 \n", - "48 0 4 6 8 2020-05-20 08:46:00 \n", - "49 0 4 1 9 2020-05-20 08:48:00 \n", - "50 0 4 0 10 2020-05-20 08:50:00 \n", - "51 0 4 13 11 2020-05-20 08:53:00 \n", - "52 0 4 14 12 2020-05-20 08:55:00 \n", - "53 0 4 13 13 2020-05-20 09:00:00 \n", - "54 0 4 0 14 2020-05-20 09:02:00 \n", - "55 0 4 1 15 2020-05-20 09:03:00 \n", - "56 0 4 2 16 2020-05-20 09:04:00 \n", - "57 0 4 3 17 2020-05-20 09:05:00 \n", - "58 0 4 4 18 2020-05-20 09:06:00 \n", - "59 0 4 5 19 2020-05-20 09:07:00 \n", - "\n", - " departure_time route_id trip_id stop_id \\\n", - "0 2020-05-20 08:00:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573721 \n", - "1 2020-05-20 08:01:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573722 \n", - "2 2020-05-20 08:02:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573723 \n", - "3 2020-05-20 08:03:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8583071 \n", - "4 2020-05-20 08:04:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8572603 \n", - "5 NaT 1-231-j19-1 14.TA.1-231-j19-1.1.H 8502879 \n", - "6 2020-05-20 08:06:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502879 \n", - "7 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572603 \n", - "8 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8583071 \n", - "9 2020-05-20 08:12:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502955 \n", - "10 2020-05-20 08:15:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502553 \n", - "11 2020-05-20 08:17:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572601 \n", - "12 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572600 \n", - "13 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8582462 \n", - "14 NaT 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572747 \n", - "15 2020-05-20 08:07:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572747 \n", - "16 2020-05-20 08:08:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8582462 \n", - "17 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572600 \n", - "18 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572601 \n", - "19 2020-05-20 08:13:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502553 \n", - "20 2020-05-20 08:15:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572602 \n", - "21 2020-05-20 08:17:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502955 \n", - "22 2020-05-20 08:18:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n", - "23 2020-05-20 08:20:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n", - "24 2020-05-20 08:23:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n", - "25 2020-05-20 08:27:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573720 \n", - "26 2020-05-20 08:28:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n", - "27 2020-05-20 08:30:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n", - "28 2020-05-20 08:31:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n", - "29 2020-05-20 08:32:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573723 \n", - "30 2020-05-20 08:33:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8583071 \n", - "31 2020-05-20 08:34:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572603 \n", - "32 NaT 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502879 \n", - "33 2020-05-20 08:36:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502879 \n", - "34 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572603 \n", - "35 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8583071 \n", - "36 2020-05-20 08:42:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502955 \n", - "37 2020-05-20 08:45:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502553 \n", - "38 2020-05-20 08:47:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572601 \n", - "39 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572600 \n", - "40 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8582462 \n", - "41 NaT 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572747 \n", - "42 2020-05-20 08:37:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572747 \n", - "43 2020-05-20 08:38:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8582462 \n", - "44 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572600 \n", - "45 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572601 \n", - "46 2020-05-20 08:43:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502553 \n", - "47 2020-05-20 08:45:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572602 \n", - "48 2020-05-20 08:47:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502955 \n", - "49 2020-05-20 08:48:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573722 \n", - "50 2020-05-20 08:50:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573721 \n", - "51 2020-05-20 08:53:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8503598 \n", - "52 2020-05-20 08:59:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573720 \n", - "53 2020-05-20 09:00:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8503598 \n", - "54 2020-05-20 09:02:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573721 \n", - "55 2020-05-20 09:03:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573722 \n", - "56 2020-05-20 09:04:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573723 \n", - "57 2020-05-20 09:05:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8583071 \n", - "58 2020-05-20 09:06:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572603 \n", - "59 NaT 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502879 \n", - "\n", - " pickup_type drop_off_type sequence_shift_1 \n", - "0 0 0 15 \n", - "1 0 0 16 \n", - "2 0 0 17 \n", - "3 0 0 18 \n", - "4 0 0 19 \n", - "5 0 0 1 \n", - "6 0 0 2 \n", - "7 0 0 3 \n", - "8 0 0 4 \n", - "9 0 0 5 \n", - "10 0 0 6 \n", - "11 0 0 7 \n", - "12 0 0 8 \n", - "13 0 0 10 \n", - "14 0 0 1 \n", - "15 0 0 3 \n", - "16 0 0 4 \n", - "17 0 0 5 \n", - "18 0 0 6 \n", - "19 0 0 7 \n", - "20 0 0 8 \n", - "21 0 0 9 \n", - "22 0 0 10 \n", - "23 0 0 11 \n", - "24 0 0 12 \n", - "25 0 0 13 \n", - "26 0 0 14 \n", - "27 0 0 15 \n", - "28 0 0 16 \n", - "29 0 0 17 \n", - "30 0 0 18 \n", - "31 0 0 19 \n", - "32 0 0 1 \n", - "33 0 0 2 \n", - "34 0 0 3 \n", - "35 0 0 4 \n", - "36 0 0 5 \n", - "37 0 0 6 \n", - "38 0 0 7 \n", - "39 0 0 8 \n", - "40 0 0 10 \n", - "41 0 0 1 \n", - "42 0 0 3 \n", - "43 0 0 4 \n", - "44 0 0 5 \n", - "45 0 0 6 \n", - "46 0 0 7 \n", - "47 0 0 8 \n", - "48 0 0 9 \n", - "49 0 0 10 \n", - "50 0 0 11 \n", - "51 0 0 12 \n", - "52 0 0 13 \n", - "53 0 0 14 \n", - "54 0 0 15 \n", - "55 0 0 16 \n", - "56 0 0 17 \n", - "57 0 0 18 \n", - "58 0 0 19 \n", - "59 0 0 1 " - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "stop_times_ordered = stop_times_ordered.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n", - "stop_times_ordered.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_idpickup_typedrop_off_typesequence_shift_1
0000142020-05-20 08:00:002020-05-20 08:00:001-231-j19-114.TA.1-231-j19-1.1.H85737210015
1001152020-05-20 08:01:002020-05-20 08:01:001-231-j19-114.TA.1-231-j19-1.1.H85737220016
2002162020-05-20 08:02:002020-05-20 08:02:001-231-j19-114.TA.1-231-j19-1.1.H85737230017
3003172020-05-20 08:03:002020-05-20 08:03:001-231-j19-114.TA.1-231-j19-1.1.H85830710018
4004182020-05-20 08:04:002020-05-20 08:04:001-231-j19-114.TA.1-231-j19-1.1.H85726030019
5005192020-05-20 08:05:00NaT1-231-j19-114.TA.1-231-j19-1.1.H8502879001
60151NaT2020-05-20 08:06:001-231-j19-135.TA.1-231-j19-1.2.R8502879002
701422020-05-20 08:07:002020-05-20 08:07:001-231-j19-135.TA.1-231-j19-1.2.R8572603003
801332020-05-20 08:07:002020-05-20 08:07:001-231-j19-135.TA.1-231-j19-1.2.R8583071004
901642020-05-20 08:10:002020-05-20 08:12:001-231-j19-135.TA.1-231-j19-1.2.R8502955005
1001752020-05-20 08:15:002020-05-20 08:15:001-231-j19-135.TA.1-231-j19-1.2.R8502553006
1101862020-05-20 08:17:002020-05-20 08:17:001-231-j19-135.TA.1-231-j19-1.2.R8572601007
1201972020-05-20 08:18:002020-05-20 08:18:001-231-j19-135.TA.1-231-j19-1.2.R8572600008
13011082020-05-20 08:18:002020-05-20 08:18:001-231-j19-135.TA.1-231-j19-1.2.R85824620010
140111102020-05-20 08:22:00NaT1-231-j19-135.TA.1-231-j19-1.2.R8572747001
1502111NaT2020-05-20 08:07:001-231-j19-116.TA.1-231-j19-1.1.H8572747003
16021032020-05-20 08:08:002020-05-20 08:08:001-231-j19-116.TA.1-231-j19-1.1.H8582462004
1702942020-05-20 08:09:002020-05-20 08:09:001-231-j19-116.TA.1-231-j19-1.1.H8572600005
1802852020-05-20 08:09:002020-05-20 08:09:001-231-j19-116.TA.1-231-j19-1.1.H8572601006
1902762020-05-20 08:13:002020-05-20 08:13:001-231-j19-116.TA.1-231-j19-1.1.H8502553007
20021272020-05-20 08:15:002020-05-20 08:15:001-231-j19-116.TA.1-231-j19-1.1.H8572602008
2102682020-05-20 08:16:002020-05-20 08:17:001-231-j19-116.TA.1-231-j19-1.1.H8502955009
2202192020-05-20 08:18:002020-05-20 08:18:001-231-j19-116.TA.1-231-j19-1.1.H85737220010
23020102020-05-20 08:20:002020-05-20 08:20:001-231-j19-116.TA.1-231-j19-1.1.H85737210011
240213112020-05-20 08:23:002020-05-20 08:23:001-231-j19-116.TA.1-231-j19-1.1.H85035980012
250214122020-05-20 08:25:002020-05-20 08:27:001-231-j19-116.TA.1-231-j19-1.1.H85737200013
260213132020-05-20 08:28:002020-05-20 08:28:001-231-j19-116.TA.1-231-j19-1.1.H85035980014
27020142020-05-20 08:30:002020-05-20 08:30:001-231-j19-116.TA.1-231-j19-1.1.H85737210015
28021152020-05-20 08:31:002020-05-20 08:31:001-231-j19-116.TA.1-231-j19-1.1.H85737220016
29022162020-05-20 08:32:002020-05-20 08:32:001-231-j19-116.TA.1-231-j19-1.1.H85737230017
30023172020-05-20 08:33:002020-05-20 08:33:001-231-j19-116.TA.1-231-j19-1.1.H85830710018
31024182020-05-20 08:34:002020-05-20 08:34:001-231-j19-116.TA.1-231-j19-1.1.H85726030019
32025192020-05-20 08:35:00NaT1-231-j19-116.TA.1-231-j19-1.1.H8502879001
330351NaT2020-05-20 08:36:001-231-j19-137.TA.1-231-j19-1.2.R8502879002
3403422020-05-20 08:37:002020-05-20 08:37:001-231-j19-137.TA.1-231-j19-1.2.R8572603003
3503332020-05-20 08:37:002020-05-20 08:37:001-231-j19-137.TA.1-231-j19-1.2.R8583071004
3603642020-05-20 08:40:002020-05-20 08:42:001-231-j19-137.TA.1-231-j19-1.2.R8502955005
3703752020-05-20 08:45:002020-05-20 08:45:001-231-j19-137.TA.1-231-j19-1.2.R8502553006
3803862020-05-20 08:47:002020-05-20 08:47:001-231-j19-137.TA.1-231-j19-1.2.R8572601007
3903972020-05-20 08:48:002020-05-20 08:48:001-231-j19-137.TA.1-231-j19-1.2.R8572600008
40031082020-05-20 08:48:002020-05-20 08:48:001-231-j19-137.TA.1-231-j19-1.2.R85824620010
410311102020-05-20 08:52:00NaT1-231-j19-137.TA.1-231-j19-1.2.R8572747001
4204111NaT2020-05-20 08:37:001-231-j19-118.TA.1-231-j19-1.1.H8572747003
43041032020-05-20 08:38:002020-05-20 08:38:001-231-j19-118.TA.1-231-j19-1.1.H8582462004
4404942020-05-20 08:39:002020-05-20 08:39:001-231-j19-118.TA.1-231-j19-1.1.H857260000522020-05-21 16:55:002020-05-21 16:55:0026-301-j19-1131.TA.26-301-j19-1.1.R85955113
4504852020-05-20 08:39:002020-05-20 08:39:001-231-j19-118.TA.1-231-j19-1.1.H85726010206
460232020-05-21 16:57:002020-05-21 16:57:0026-301-j19-1131.TA.26-301-j19-1.1.R85906034762020-05-20 08:43:002020-05-20 08:43:001-231-j19-118.TA.1-231-j19-1.1.H8502553007
47041272020-05-20 08:45:002020-05-20 08:45:001-231-j19-118.TA.1-231-j19-1.1.H85726020308
48034682020-05-20 08:46:002020-05-20 08:47:001-231-j19-118.TA.1-231-j19-1.1.H85029550092020-05-21 16:58:002020-05-21 16:58:0026-301-j19-1131.TA.26-301-j19-1.1.R85918295
4904192020-05-20 08:48:002020-05-20 08:48:001-231-j19-118.TA.1-231-j19-1.1.H857372240010452020-05-21 16:59:002020-05-21 16:59:0026-301-j19-1131.TA.26-301-j19-1.1.R85906026
\n", "
" ], "text/plain": [ - " route_int trip_int stop_int stop_sequence arrival_time \\\n", - "0 0 0 0 14 2020-05-20 08:00:00 \n", - "1 0 0 1 15 2020-05-20 08:01:00 \n", - "2 0 0 2 16 2020-05-20 08:02:00 \n", - "3 0 0 3 17 2020-05-20 08:03:00 \n", - "4 0 0 4 18 2020-05-20 08:04:00 \n", - "5 0 0 5 19 2020-05-20 08:05:00 \n", - "6 0 1 5 1 NaT \n", - "7 0 1 4 2 2020-05-20 08:07:00 \n", - "8 0 1 3 3 2020-05-20 08:07:00 \n", - "9 0 1 6 4 2020-05-20 08:10:00 \n", - "10 0 1 7 5 2020-05-20 08:15:00 \n", - "11 0 1 8 6 2020-05-20 08:17:00 \n", - "12 0 1 9 7 2020-05-20 08:18:00 \n", - "13 0 1 10 8 2020-05-20 08:18:00 \n", - "14 0 1 11 10 2020-05-20 08:22:00 \n", - "15 0 2 11 1 NaT \n", - "16 0 2 10 3 2020-05-20 08:08:00 \n", - "17 0 2 9 4 2020-05-20 08:09:00 \n", - "18 0 2 8 5 2020-05-20 08:09:00 \n", - "19 0 2 7 6 2020-05-20 08:13:00 \n", - "20 0 2 12 7 2020-05-20 08:15:00 \n", - "21 0 2 6 8 2020-05-20 08:16:00 \n", - "22 0 2 1 9 2020-05-20 08:18:00 \n", - "23 0 2 0 10 2020-05-20 08:20:00 \n", - "24 0 2 13 11 2020-05-20 08:23:00 \n", - "25 0 2 14 12 2020-05-20 08:25:00 \n", - "26 0 2 13 13 2020-05-20 08:28:00 \n", - "27 0 2 0 14 2020-05-20 08:30:00 \n", - "28 0 2 1 15 2020-05-20 08:31:00 \n", - "29 0 2 2 16 2020-05-20 08:32:00 \n", - "30 0 2 3 17 2020-05-20 08:33:00 \n", - "31 0 2 4 18 2020-05-20 08:34:00 \n", - "32 0 2 5 19 2020-05-20 08:35:00 \n", - "33 0 3 5 1 NaT \n", - "34 0 3 4 2 2020-05-20 08:37:00 \n", - "35 0 3 3 3 2020-05-20 08:37:00 \n", - "36 0 3 6 4 2020-05-20 08:40:00 \n", - "37 0 3 7 5 2020-05-20 08:45:00 \n", - "38 0 3 8 6 2020-05-20 08:47:00 \n", - "39 0 3 9 7 2020-05-20 08:48:00 \n", - "40 0 3 10 8 2020-05-20 08:48:00 \n", - "41 0 3 11 10 2020-05-20 08:52:00 \n", - "42 0 4 11 1 NaT \n", - "43 0 4 10 3 2020-05-20 08:38:00 \n", - "44 0 4 9 4 2020-05-20 08:39:00 \n", - "45 0 4 8 5 2020-05-20 08:39:00 \n", - "46 0 4 7 6 2020-05-20 08:43:00 \n", - "47 0 4 12 7 2020-05-20 08:45:00 \n", - "48 0 4 6 8 2020-05-20 08:46:00 \n", - "49 0 4 1 9 2020-05-20 08:48:00 \n", + " route_int trip_int stop_int stop_sequence arrival_time \\\n", + "0 0 0 0 1 NaT \n", + "1 0 0 1 2 2020-05-21 16:55:00 \n", + "2 0 0 2 3 2020-05-21 16:57:00 \n", + "3 0 0 3 4 2020-05-21 16:58:00 \n", + "4 0 0 4 5 2020-05-21 16:59:00 \n", "\n", - " departure_time route_id trip_id stop_id \\\n", - "0 2020-05-20 08:00:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573721 \n", - "1 2020-05-20 08:01:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573722 \n", - "2 2020-05-20 08:02:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8573723 \n", - "3 2020-05-20 08:03:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8583071 \n", - "4 2020-05-20 08:04:00 1-231-j19-1 14.TA.1-231-j19-1.1.H 8572603 \n", - "5 NaT 1-231-j19-1 14.TA.1-231-j19-1.1.H 8502879 \n", - "6 2020-05-20 08:06:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502879 \n", - "7 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572603 \n", - "8 2020-05-20 08:07:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8583071 \n", - "9 2020-05-20 08:12:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502955 \n", - "10 2020-05-20 08:15:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8502553 \n", - "11 2020-05-20 08:17:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572601 \n", - "12 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572600 \n", - "13 2020-05-20 08:18:00 1-231-j19-1 35.TA.1-231-j19-1.2.R 8582462 \n", - "14 NaT 1-231-j19-1 35.TA.1-231-j19-1.2.R 8572747 \n", - "15 2020-05-20 08:07:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572747 \n", - "16 2020-05-20 08:08:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8582462 \n", - "17 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572600 \n", - "18 2020-05-20 08:09:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572601 \n", - "19 2020-05-20 08:13:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502553 \n", - "20 2020-05-20 08:15:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572602 \n", - "21 2020-05-20 08:17:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502955 \n", - "22 2020-05-20 08:18:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n", - "23 2020-05-20 08:20:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n", - "24 2020-05-20 08:23:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n", - "25 2020-05-20 08:27:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573720 \n", - "26 2020-05-20 08:28:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8503598 \n", - "27 2020-05-20 08:30:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573721 \n", - "28 2020-05-20 08:31:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573722 \n", - "29 2020-05-20 08:32:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8573723 \n", - "30 2020-05-20 08:33:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8583071 \n", - "31 2020-05-20 08:34:00 1-231-j19-1 16.TA.1-231-j19-1.1.H 8572603 \n", - "32 NaT 1-231-j19-1 16.TA.1-231-j19-1.1.H 8502879 \n", - "33 2020-05-20 08:36:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502879 \n", - "34 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572603 \n", - "35 2020-05-20 08:37:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8583071 \n", - "36 2020-05-20 08:42:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502955 \n", - "37 2020-05-20 08:45:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8502553 \n", - "38 2020-05-20 08:47:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572601 \n", - "39 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572600 \n", - "40 2020-05-20 08:48:00 1-231-j19-1 37.TA.1-231-j19-1.2.R 8582462 \n", - "41 NaT 1-231-j19-1 37.TA.1-231-j19-1.2.R 8572747 \n", - "42 2020-05-20 08:37:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572747 \n", - "43 2020-05-20 08:38:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8582462 \n", - "44 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572600 \n", - "45 2020-05-20 08:39:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572601 \n", - "46 2020-05-20 08:43:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502553 \n", - "47 2020-05-20 08:45:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8572602 \n", - "48 2020-05-20 08:47:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8502955 \n", - "49 2020-05-20 08:48:00 1-231-j19-1 18.TA.1-231-j19-1.1.H 8573722 \n", + " departure_time route_id trip_id stop_id \\\n", + "0 2020-05-21 16:53:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8587020 \n", + "1 2020-05-21 16:55:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8595511 \n", + "2 2020-05-21 16:57:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590603 \n", + "3 2020-05-21 16:58:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8591829 \n", + "4 2020-05-21 16:59:00 26-301-j19-1 131.TA.26-301-j19-1.1.R 8590602 \n", "\n", - " pickup_type drop_off_type sequence_shift_1 \n", - "0 0 0 15 \n", - "1 0 0 16 \n", - "2 0 0 17 \n", - "3 0 0 18 \n", - "4 0 0 19 \n", - "5 0 0 1 \n", - "6 0 0 2 \n", - "7 0 0 3 \n", - "8 0 0 4 \n", - "9 0 0 5 \n", - "10 0 0 6 \n", - "11 0 0 7 \n", - "12 0 0 8 \n", - "13 0 0 10 \n", - "14 0 0 1 \n", - "15 0 0 3 \n", - "16 0 0 4 \n", - "17 0 0 5 \n", - "18 0 0 6 \n", - "19 0 0 7 \n", - "20 0 0 8 \n", - "21 0 0 9 \n", - "22 0 0 10 \n", - "23 0 0 11 \n", - "24 0 0 12 \n", - "25 0 0 13 \n", - "26 0 0 14 \n", - "27 0 0 15 \n", - "28 0 0 16 \n", - "29 0 0 17 \n", - "30 0 0 18 \n", - "31 0 0 19 \n", - "32 0 0 1 \n", - "33 0 0 2 \n", - "34 0 0 3 \n", - "35 0 0 4 \n", - "36 0 0 5 \n", - "37 0 0 6 \n", - "38 0 0 7 \n", - "39 0 0 8 \n", - "40 0 0 10 \n", - "41 0 0 1 \n", - "42 0 0 3 \n", - "43 0 0 4 \n", - "44 0 0 5 \n", - "45 0 0 6 \n", - "46 0 0 7 \n", - "47 0 0 8 \n", - "48 0 0 9 \n", - "49 0 0 10 " + " sequence_shift_1 \n", + "0 2 \n", + "1 3 \n", + "2 4 \n", + "3 5 \n", + "4 6 " ] }, - "execution_count": 51, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "stop_times_ordered.head(50)" + "stop_times_ordered = stop_times_ordered.sort_values(by=[\"route_int\", \"trip_int\", \"stop_sequence\"])\n", + "stop_times_ordered.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we transform it to array, ready ti be used by raptor" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([['2020-05-20T08:00:00.000000000', '2020-05-20T08:00:00.000000000'],\n", - " ['2020-05-20T08:01:00.000000000', '2020-05-20T08:01:00.000000000'],\n", - " ['2020-05-20T08:02:00.000000000', '2020-05-20T08:02:00.000000000'],\n", + "array([[ 'NaT', '2020-05-21T16:53:00.000000000'],\n", + " ['2020-05-21T16:55:00.000000000', '2020-05-21T16:55:00.000000000'],\n", + " ['2020-05-21T16:57:00.000000000', '2020-05-21T16:57:00.000000000'],\n", " ...,\n", - " ['2020-05-20T20:44:00.000000000', '2020-05-20T20:46:00.000000000'],\n", - " ['2020-05-20T18:53:00.000000000', 'NaT'],\n", - " [ 'NaT', 'NaT']],\n", + " ['2020-05-21T15:10:00.000000000', 'NaT'],\n", + " [ 'NaT', '2020-05-21T16:45:00.000000000'],\n", + " ['2020-05-21T17:05:00.000000000', 'NaT']],\n", " dtype='datetime64[ns]')" ] }, - "execution_count": 44, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_array = stop_times_ordered[[\"arrival_time\", \"departure_time\"]].to_numpy()\n", "stop_times_array" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "247920" + "245738" ] }, - "execution_count": 45, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.size(stop_times_array,0)" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "with open('../data/stop_times_array.pkl','wb') as f: pickle.dump(stop_times_array, f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Routes: [[route0_nr. Stops, route0_nr.Trips, route0_pointerStops, route0_pointerTrip],[ route1_nr. Stops, route1_nr. Trips, route1_pointerStops, route1_pointerTrip],…]" + "#### Routes: \n", + "[[route0_nr.Trips, route0_nr. Stops, route0_pointerRoutes, route0_pointerStops_times],[route1_nr.Trips, route1_nr. Stops,, route1_pointerRoutes, route1_pointerStops_times],…]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by getting the number of trips and stops there is for each route" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", - " \n", " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
n_stopsn_Tripsn_stops
route_int
01858111
13101111
21058111
323111
4623111
\n", "
" ], "text/plain": [ - " n_stops n_Trips\n", + " n_Trips n_stops\n", "route_int \n", - "0 18 58\n", - "1 3 101\n", - "2 10 58\n", - "3 2 3\n", - "4 6 23" + "0 1 11\n", + "1 1 11\n", + "2 1 11\n", + "3 1 11\n", + "4 1 11" ] }, - "execution_count": 47, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "distinct_trips_stops = stop_times_curated.groupby([\"route_int\"]).nunique()[[\"stop_int\", \"trip_int\"]].sort_index().rename(columns={\"trip_int\": \"n_Trips\", \"stop_int\": \"n_stops\"})\n", + "distinct_trips_stops = stop_times_ordered.groupby([\"route_int\"]).nunique()[[\"trip_int\",\"stop_int\"]].sort_index().rename(columns={\"trip_int\": \"n_Trips\", \"stop_int\": \"n_stops\"})\n", "distinct_trips_stops.head(5)" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(279, 2)" + "(16210, 2)" ] }, - "execution_count": 46, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct_trips_stops.shape" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We create the pointer for the route stops, by adding the unique stops for each route" + ] + }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", - " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
n_stopsn_TripsCumsum_stopsCumsum_tripsn_stopspointer_routes_stops
route_int
0185801110
13101185811111
210582115911122
3233121711133
46233322011144
\n", "
" ], "text/plain": [ - " n_stops n_Trips Cumsum_stops Cumsum_trips\n", - "route_int \n", - "0 18 58 0 0\n", - "1 3 101 18 58\n", - "2 10 58 21 159\n", - "3 2 3 31 217\n", - "4 6 23 33 220" + " n_Trips n_stops pointer_routes_stops\n", + "route_int \n", + "0 1 11 0\n", + "1 1 11 11\n", + "2 1 11 22\n", + "3 1 11 33\n", + "4 1 11 44" ] }, - "execution_count": 47, + "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "distinct_trips_stops['Cumsum_stops'] = distinct_trips_stops.n_stops.cumsum().shift(1, fill_value=0)\n", - "distinct_trips_stops['Cumsum_trips'] = distinct_trips_stops.n_Trips.cumsum().shift(1, fill_value=0)\n", + "distinct_trips_stops['pointer_routes_stops'] = distinct_trips_stops.n_stops.cumsum().shift(1, fill_value=0)\n", "distinct_trips_stops.head(5)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We create the pointer for stop_times by adding the number of stops in each route, counting duplicates (due to several trips)" + ] + }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 68, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 279 entries, 0 to 278\n", - "Data columns (total 4 columns):\n", - " # Column Non-Null Count Dtype\n", - "--- ------ -------------- -----\n", - " 0 n_stops 279 non-null int64\n", - " 1 n_Trips 279 non-null int64\n", - " 2 Cumsum_stops 279 non-null int64\n", - " 3 Cumsum_trips 279 non-null int64\n", - "dtypes: int64(4)\n", - "memory usage: 10.9 KB\n" - ] - } - ], + "outputs": [], "source": [ - "distinct_trips_stops.info()" + "distinct_trips_stops[\"pointer_stop_times\"] = (stop_times_ordered.groupby([\"route_int\"]).count().stop_id).cumsum().shift(1, fill_value=0)" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
n_stopsn_TripsCumsum_stopsCumsum_tripsCumsum_stops_shiftCumsum_trips_shiftn_stopspointer_routes_stopspointer_stop_timespointer_routes_stops_shiftpointer_stop_times_shift
route_int
018581110018581111
1310118582115911111112222
21058211593121711122223333
3233121711133220334444
4623332203924311144445555
\n", "
" ], "text/plain": [ - " n_stops n_Trips Cumsum_stops Cumsum_trips Cumsum_stops_shift \\\n", - "route_int \n", - "0 18 58 0 0 18 \n", - "1 3 101 18 58 21 \n", - "2 10 58 21 159 31 \n", - "3 2 3 31 217 33 \n", - "4 6 23 33 220 39 \n", + " n_Trips n_stops pointer_routes_stops pointer_stop_times \\\n", + "route_int \n", + "0 1 11 0 0 \n", + "1 1 11 11 11 \n", + "2 1 11 22 22 \n", + "3 1 11 33 33 \n", + "4 1 11 44 44 \n", "\n", - " Cumsum_trips_shift \n", - "route_int \n", - "0 58 \n", - "1 159 \n", - "2 217 \n", - "3 220 \n", - "4 243 " + " pointer_routes_stops_shift pointer_stop_times_shift \n", + "route_int \n", + "0 11 11 \n", + "1 22 22 \n", + "2 33 33 \n", + "3 44 44 \n", + "4 55 55 " ] }, - "execution_count": 49, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "distinct_trips_stops[\"Cumsum_stops_shift\"] = distinct_trips_stops['Cumsum_stops'].shift(-1, fill_value=0)\n", - "distinct_trips_stops[\"Cumsum_trips_shift\"] = distinct_trips_stops['Cumsum_trips'].shift(-1, fill_value=0)\n", + "distinct_trips_stops[\"pointer_routes_stops_shift\"] = distinct_trips_stops['pointer_routes_stops'].shift(-1, fill_value=0)\n", + "distinct_trips_stops[\"pointer_stop_times_shift\"] = distinct_trips_stops['pointer_stop_times'].shift(-1, fill_value=0)\n", "distinct_trips_stops.head(5)" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ - "distinct_trips_stops['Cumsum_trips'] = np.where((distinct_trips_stops[\"Cumsum_trips\"] == distinct_trips_stops[\"Cumsum_trips_shift\"]), None, distinct_trips_stops['Cumsum_trips'])\n", - "distinct_trips_stops['Cumsum_stops'] = np.where((distinct_trips_stops[\"Cumsum_stops\"] == distinct_trips_stops[\"Cumsum_stops_shift\"]), None, distinct_trips_stops['Cumsum_stops'])\n" + "distinct_trips_stops['pointer_routes_stops'] = np.where((distinct_trips_stops[\"pointer_routes_stops\"] == distinct_trips_stops[\"pointer_routes_stops_shift\"]), None, distinct_trips_stops['pointer_routes_stops'])\n", + "distinct_trips_stops['pointer_stop_times'] = np.where((distinct_trips_stops[\"pointer_stop_times\"] == distinct_trips_stops[\"pointer_stop_times_shift\"]), None, distinct_trips_stops['pointer_stop_times'])\n" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "n_stops False\n", - "n_Trips False\n", - "Cumsum_stops False\n", - "Cumsum_trips False\n", - "Cumsum_stops_shift False\n", - "Cumsum_trips_shift False\n", + "n_Trips False\n", + "n_stops False\n", + "pointer_routes_stops False\n", + "pointer_stop_times False\n", + "pointer_routes_stops_shift False\n", + "pointer_stop_times_shift False\n", "dtype: bool" ] }, - "execution_count": 51, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct_trips_stops.isna().any()" ] }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 72, "metadata": {}, "outputs": [], "source": [ - "with open('../data/routes_array_df.pkl','wb') as f: pickle.dump(distinct_trips_stops[['n_Trips', 'n_stops', 'Cumsum_trips', 'Cumsum_stops']], f)" + "with open('../data/routes_array_df.pkl','wb') as f: pickle.dump(distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']], f)" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([[58, 18, 0, 0],\n", - " [101, 3, 58, 18],\n", - " [58, 10, 159, 21],\n", + "array([[1, 11, 0, 0],\n", + " [1, 11, 11, 11],\n", + " [1, 11, 22, 22],\n", " ...,\n", - " [2, 2, 22803, 3278],\n", - " [1, 1, 22805, 3280],\n", - " [1, 1, 22806, 3281]], dtype=object)" + " [1, 6, 237432, 245713],\n", + " [1, 13, 237438, 245719],\n", + " [3, 2, 237451, 245732]], dtype=object)" ] }, - "execution_count": 53, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "routes_array = distinct_trips_stops[['n_Trips', 'n_stops', 'Cumsum_trips', 'Cumsum_stops']].to_numpy()\n", + "routes_array = distinct_trips_stops[['n_Trips', 'n_stops', 'pointer_routes_stops', 'pointer_stop_times']].to_numpy()\n", "routes_array" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "279" + "16210" ] }, - "execution_count": 54, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.size(routes_array, 0)" ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "with open('../data/routes_array.pkl','wb') as f: pickle.dump(routes_array, f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "RouteStops: [route0_stop0, route0_stop1,…, route1_stop0, route1_stop1,…, …]\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 57, + "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", " \n", - " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", "
indexroute_intstop_int
13780700005
63951110111
212369220142
13780933043
212370440134
\n", "
" ], - "text/plain": [ - " route_int stop_int\n", - "137807 0 5\n", - "63951 0 11\n", - "212369 0 14\n", - "137809 0 4\n", - "212370 0 13" + "text/plain": [ + " index route_int stop_int\n", + "0 0 0 0\n", + "1 1 0 1\n", + "2 2 0 2\n", + "3 3 0 3\n", + "4 4 0 4" ] }, - "execution_count": 57, + "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "route_stops = stop_times_curated.sort_values([\"route_int\", \"stop_sequence\"])\n", - "route_stops = route_stops[['route_int', 'stop_int']].drop_duplicates()\n", + "route_stops = stop_times_ordered.sort_values([\"route_int\", \"stop_sequence\"])\n", + "route_stops = route_stops[['route_int', 'stop_int']].drop_duplicates().reset_index()\n", "route_stops.head(5)" ] }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", - "Int64Index: 3282 entries, 137807 to 239684\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype\n", - "--- ------ -------------- -----\n", - " 0 route_int 3282 non-null int64\n", - " 1 stop_int 3282 non-null int64\n", - "dtypes: int64(2)\n", - "memory usage: 76.9 KB\n" + "RangeIndex: 237453 entries, 0 to 237452\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 index 237453 non-null int64\n", + " 1 route_int 237453 non-null int64\n", + " 2 stop_int 237453 non-null int64\n", + "dtypes: int64(3)\n", + "memory usage: 5.4 MB\n" ] } ], "source": [ "route_stops.info()" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "279" + "16210" ] }, - "execution_count": 59, + "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "route_stops.route_int.nunique()" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "with open('../data/route_stops_df.pkl','wb') as f: pickle.dump(route_stops, f)" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([ 5, 11, 14, ..., 1192, 1069, 1069])" + "array([ 0, 1, 2, ..., 1187, 573, 778])" ] }, - "execution_count": 61, + "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "route_stops_array = route_stops.stop_int.to_numpy()\n", "route_stops_array" ] }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1566" + "1407" ] }, - "execution_count": 62, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.size(np.unique(route_stops_array))" ] }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3282" + "237453" ] }, - "execution_count": 63, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.size(route_stops_array, 0)" ] }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(3282,)" + "(237453,)" ] }, - "execution_count": 64, + "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "route_stops_array.shape" ] }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "with open('../data/route_stops_array.pkl','wb') as f: pickle.dump(route_stops_array, f)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if pointers are correct\n", + "It is fundamental that the indexes, that serve as pointers, in Routes are correct" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by looking at where the indexes for stop_times and route_stops diverge. This will allow us to change. We can see that Route stops should have a new route at 318 while stop_times should have it at 348, so we try with that" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_Tripsn_stopspointer_routes_stopspointer_stop_timespointer_routes_stops_shiftpointer_stop_times_shift
route_int
24113315315318348
25114318348332362
26122332362334386
27153334386337431
28103337431340461
\n", + "
" + ], + "text/plain": [ + " n_Trips n_stops pointer_routes_stops pointer_stop_times \\\n", + "route_int \n", + "24 11 3 315 315 \n", + "25 1 14 318 348 \n", + "26 12 2 332 362 \n", + "27 15 3 334 386 \n", + "28 10 3 337 431 \n", + "\n", + " pointer_routes_stops_shift pointer_stop_times_shift \n", + "route_int \n", + "24 318 348 \n", + "25 332 362 \n", + "26 334 386 \n", + "27 337 431 \n", + "28 340 461 " + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "distinct_trips_stops.loc[24:29].head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can check if the pointer indicates the routes index number. At the pointer_routes should indicate the first stop of a new route. We try with 318 to see if route_stops has a new route at this index. It does so it works" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexroute_intstop_int
3153152425
3163212427
3173162426
3183482525
3193492526
\n", + "
" + ], + "text/plain": [ + " index route_int stop_int\n", + "315 315 24 25\n", + "316 321 24 27\n", + "317 316 24 26\n", + "318 348 25 25\n", + "319 349 25 26" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "route_stops.loc[315:320].head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We go and see if stop_times has a new route at 348. It does, so it works" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_idsequence_shift_1
34624342622020-05-21 18:53:002020-05-21 18:53:0030-57-Y-j19-127.TA.30-57-Y-j19-1.1.H85022093
34724342732020-05-21 18:59:002020-05-21 18:59:0030-57-Y-j19-127.TA.30-57-Y-j19-1.1.H85032023
34825352532020-05-21 13:14:002020-05-21 13:15:0026-24-j19-1662.TA.26-24-j19-1.254.R85022084
34925352642020-05-21 13:17:002020-05-21 13:17:0026-24-j19-1662.TA.26-24-j19-1.254.R85022095
35025352752020-05-21 13:21:002020-05-21 13:21:0026-24-j19-1662.TA.26-24-j19-1.254.R85032026
\n", + "
" + ], + "text/plain": [ + " route_int trip_int stop_int stop_sequence arrival_time \\\n", + "346 24 34 26 2 2020-05-21 18:53:00 \n", + "347 24 34 27 3 2020-05-21 18:59:00 \n", + "348 25 35 25 3 2020-05-21 13:14:00 \n", + "349 25 35 26 4 2020-05-21 13:17:00 \n", + "350 25 35 27 5 2020-05-21 13:21:00 \n", + "\n", + " departure_time route_id trip_id stop_id \\\n", + "346 2020-05-21 18:53:00 30-57-Y-j19-1 27.TA.30-57-Y-j19-1.1.H 8502209 \n", + "347 2020-05-21 18:59:00 30-57-Y-j19-1 27.TA.30-57-Y-j19-1.1.H 8503202 \n", + "348 2020-05-21 13:15:00 26-24-j19-1 662.TA.26-24-j19-1.254.R 8502208 \n", + "349 2020-05-21 13:17:00 26-24-j19-1 662.TA.26-24-j19-1.254.R 8502209 \n", + "350 2020-05-21 13:21:00 26-24-j19-1 662.TA.26-24-j19-1.254.R 8503202 \n", + "\n", + " sequence_shift_1 \n", + "346 3 \n", + "347 3 \n", + "348 4 \n", + "349 5 \n", + "350 6 " + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_times_ordered.loc[346:400].head(5)" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "Stops: [[stop0_pointerRoutes, stop0_pointerTransfer], [stop1_pointerRoutes, stop1_pointerTransfer], …]" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 88, "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": "You are trying to merge on int64 and object columns. If you wish to proceed you should use pd.concat", + "ename": "KeyError", + "evalue": "\"None of ['stop_int'] are in the columns\"", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstops_join\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroute_stops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransfers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"stop_id\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"left\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"stop_int\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop_duplicates\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mstops_join\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[1;32m 7207\u001b[0m \"\"\"\n\u001b[1;32m 7208\u001b[0m return self._join_compat(\n\u001b[0;32m-> 7209\u001b[0;31m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhow\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlsuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrsuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7210\u001b[0m )\n\u001b[1;32m 7211\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_join_compat\u001b[0;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[1;32m 7230\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7231\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrsuffix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 7232\u001b[0;31m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7233\u001b[0m )\n\u001b[1;32m 7234\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mindicator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindicator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0mvalidate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m )\n\u001b[1;32m 88\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 629\u001b[0m \u001b[0;31m# validate the merge keys dtypes. We may need to coerce\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[0;31m# to avoid incompat dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 631\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_coerce_merge_keys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 632\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 633\u001b[0m \u001b[0;31m# If argument passed to validate,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m_maybe_coerce_merge_keys\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1144\u001b[0m \u001b[0minferred_right\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstring_types\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0minferred_left\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstring_types\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1145\u001b[0m ):\n\u001b[0;32m-> 1146\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1148\u001b[0m \u001b[0;31m# datetimelikes must match exactly\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: You are trying to merge on int64 and object columns. If you wish to proceed you should use pd.concat" + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstops_join\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mroute_stops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransfers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"stop_int\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"left\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"stop_int\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop_duplicates\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mstops_join\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mset_index\u001b[0;34m(self, keys, drop, append, inplace, verify_integrity)\u001b[0m\n\u001b[1;32m 4301\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4302\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmissing\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4303\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"None of {missing} are in the columns\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4304\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4305\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: \"None of ['stop_int'] are in the columns\"" ] } ], "source": [ "stops_join = route_stops.join(transfers.set_index(\"stop_int\"), how=\"left\", on=\"stop_int\").drop_duplicates()\n", "stops_join.head(5)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'stops_join' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstops_join\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstop_int\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnunique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'stops_join' is not defined" ] } ], "source": [ "stops_join.stop_int.nunique()" ] }, { "cell_type": "code", "execution_count": 198, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
n_Routesn_Transfers
stop_int
062
117
217
317
417
\n", "
" ], "text/plain": [ " n_Routes n_Transfers\n", "stop_int \n", "0 6 2\n", "1 1 7\n", "2 1 7\n", "3 1 7\n", "4 1 7" ] }, "execution_count": 198, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct_route_transfers = stops_join.groupby([\"stop_int\"]).nunique().rename(columns={\"route_int\": \"n_Routes\", \"stop_int2\": \"n_Transfers\"})\n", "distinct_route_transfers = distinct_route_transfers[[\"n_Routes\", \"n_Transfers\"]].sort_index().rename(columns={\"route_int\": \"n_Trips\", \"stop_int2\": \"n_stops\"})\n", "distinct_route_transfers.head(5)" ] }, { "cell_type": "code", "execution_count": 199, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1566, 2)" ] }, "execution_count": 199, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct_route_transfers.shape" ] }, { "cell_type": "code", "execution_count": 200, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
n_Routesn_TransfersCumsum_routesCumsum_transfers
stop_int
06262
11779
217816
317923
4171030
\n", "
" ], "text/plain": [ " n_Routes n_Transfers Cumsum_routes Cumsum_transfers\n", "stop_int \n", "0 6 2 6 2\n", "1 1 7 7 9\n", "2 1 7 8 16\n", "3 1 7 9 23\n", "4 1 7 10 30" ] }, "execution_count": 200, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct_route_transfers['Cumsum_routes'] = distinct_route_transfers.n_Routes.cumsum().shift(1, fill_value=0)\n", "distinct_route_transfers['Cumsum_transfers'] = distinct_route_transfers.n_Transfers.cumsum().shift(1, fill_value=0)\n", "distinct_route_transfers.head(5)" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
n_Routesn_TransfersCumsum_routesCumsum_transfersCumsum_routes_shiftCumsum_transfers_shift
stop_int
0626279
11779816
217816923
3179231030
41710301133
\n", "
" ], "text/plain": [ " n_Routes n_Transfers Cumsum_routes Cumsum_transfers \\\n", "stop_int \n", "0 6 2 6 2 \n", "1 1 7 7 9 \n", "2 1 7 8 16 \n", "3 1 7 9 23 \n", "4 1 7 10 30 \n", "\n", " Cumsum_routes_shift Cumsum_transfers_shift \n", "stop_int \n", "0 7 9 \n", "1 8 16 \n", "2 9 23 \n", "3 10 30 \n", "4 11 33 " ] }, "execution_count": 201, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct_route_transfers[\"Cumsum_routes_shift\"] = distinct_route_transfers['Cumsum_routes'].shift(-1, fill_value=0)\n", "distinct_route_transfers[\"Cumsum_transfers_shift\"] = distinct_route_transfers['Cumsum_transfers'].shift(-1, fill_value=0)\n", "distinct_route_transfers.head(5)" ] }, { "cell_type": "code", "execution_count": 202, "metadata": {}, "outputs": [], "source": [ "distinct_route_transfers['Cumsum_routes'] = np.where((distinct_route_transfers[\"Cumsum_routes\"] == distinct_route_transfers[\"Cumsum_routes_shift\"]), None, distinct_route_transfers['Cumsum_routes'])\n", "distinct_route_transfers['Cumsum_transfers'] = np.where((distinct_route_transfers[\"Cumsum_transfers\"] == distinct_route_transfers[\"Cumsum_transfers_shift\"]), None, distinct_route_transfers['Cumsum_transfers'])\n" ] }, { "cell_type": "code", "execution_count": 203, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "n_Routes False\n", "n_Transfers False\n", "Cumsum_routes False\n", "Cumsum_transfers True\n", "Cumsum_routes_shift False\n", "Cumsum_transfers_shift False\n", "dtype: bool" ] }, "execution_count": 203, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distinct_route_transfers.isna().any()" ] }, { "cell_type": "code", "execution_count": 204, "metadata": {}, "outputs": [], "source": [ "with open('../data/stops_df.pkl','wb') as f: pickle.dump(distinct_route_transfers[['Cumsum_routes', 'Cumsum_transfers']], f)" ] }, { "cell_type": "code", "execution_count": 205, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[6, 2],\n", " [7, 9],\n", " [8, 16],\n", " ...,\n", " [3280, 10566],\n", " [3281, 10571],\n", " [3282, 10575]], dtype=object)" ] }, "execution_count": 205, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stops_array = distinct_route_transfers[['Cumsum_routes', 'Cumsum_transfers']].to_numpy()\n", "stops_array" ] }, { "cell_type": "code", "execution_count": 206, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1566" ] }, "execution_count": 206, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.size(stops_array, 0)" ] }, { "cell_type": "code", "execution_count": 228, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1566, 2)" ] }, "execution_count": 228, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stops_array.shape" ] }, { "cell_type": "code", "execution_count": 207, "metadata": {}, "outputs": [], "source": [ "with open('../data/stops_array.pkl','wb') as f: pickle.dump(stops_array, f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "StopRoutes: [stop0_route1, stop0_route3, stop1_route1, stop2_route1, stop1_route4, …]" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
route_intstop_int
13780705
63951011
212369014
13780904
212370013
\n", "
" ], "text/plain": [ " route_int stop_int\n", "137807 0 5\n", "63951 0 11\n", "212369 0 14\n", "137809 0 4\n", "212370 0 13" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "route_stops.head(5)" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
route_idroute_intstop_idstop_int
492081-231-j19-1085737210
492091-231-j19-1085737221
492101-231-j19-1085737232
492111-231-j19-1085830713
492121-231-j19-1085726034
\n", "
" ], "text/plain": [ " route_id route_int stop_id stop_int\n", "49208 1-231-j19-1 0 8573721 0\n", "49209 1-231-j19-1 0 8573722 1\n", "49210 1-231-j19-1 0 8573723 2\n", "49211 1-231-j19-1 0 8583071 3\n", "49212 1-231-j19-1 0 8572603 4" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_routes = stop_times_curated[[\"route_id\", \"route_int\", \"stop_id\", \"stop_int\"]].drop_duplicates().sort_values([\"stop_int\", \"route_int\"])\n", "stop_routes.head(5)" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3282, 4)" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_routes.shape" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "279" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_times_curated.route_id.nunique()" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "279" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_routes.route_int.nunique()" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "with open('../data/stop_routes_df.pkl','wb') as f: pickle.dump(stop_routes, f)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0, 0, 0, ..., 274, 275, 275])" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_routes_array = stop_routes[\"route_int\"].to_numpy()\n", "stop_routes_array" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3282" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.size(stop_routes_array, 0)" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3282,)" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stop_routes_array.shape" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "with open('../data/stop_routes_array.pkl','wb') as f: pickle.dump(stop_routes_array, f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Transfer: [[[stop0_nameTargetStop1, transferTime1], [stop0_nameTargetStop2, transferTime2],….], [stop1_nameTargetStop1, transferTime1], [stop1_nameTargetStop2, transferTime2],….],…]" ] }, { "cell_type": "code", "execution_count": 224, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stop_int2Transfer_time_sec
01166146
11270360
228
3316
440
\n", "
" ], "text/plain": [ " stop_int2 Transfer_time_sec\n", "0 1166 146\n", "1 1270 360\n", "2 2 8\n", "3 3 16\n", "4 4 0" ] }, "execution_count": 224, "metadata": {}, "output_type": "execute_result" } ], "source": [ "transfer_pandas = transfers.sort_values(\"stop_int\")[[\"stop_int2\", \"Transfer_time_sec\"]]\n", "transfer_pandas.head()" ] }, { "cell_type": "code", "execution_count": 226, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1794" ] }, "execution_count": 226, "metadata": {}, "output_type": "execute_result" } ], "source": [ "transfer_pandas.stop_int2.nunique()" ] }, { "cell_type": "code", "execution_count": 225, "metadata": {}, "outputs": [], "source": [ "with open('../data/transfer_df.pkl','wb') as f: pickle.dump(transfers.sort_values(\"stop_id\"), f)" ] }, { "cell_type": "code", "execution_count": 219, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1166, 146],\n", " [1270, 360],\n", " [ 2, 8],\n", " ...,\n", " [ 108, 371],\n", " [ 102, 439],\n", " [1739, 519]])" ] }, "execution_count": 219, "metadata": {}, "output_type": "execute_result" } ], "source": [ "transfer_array = transfer_pandas.to_numpy()\n", "transfer_array" ] }, { "cell_type": "code", "execution_count": 220, "metadata": {}, "outputs": [], "source": [ "with open('../data/transfer_array.pkl','wb') as f: pickle.dump(transfer_array, f)" ] }, { "cell_type": "code", "execution_count": 221, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12564" ] }, "execution_count": 221, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.size(transfer_array, 0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "read files as pickles" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "with open('../data/stop_times_array.pkl','rb') as f: arrayname1 = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "with open('../data/transfer_array.pkl','rb') as f: arrayname2 = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 'NaT', '2020-05-19T08:49:00.000000000'],\n", " ['2020-05-19T08:50:00.000000000', '2020-05-19T08:50:00.000000000'],\n", " ['2020-05-19T08:51:00.000000000', '2020-05-19T08:51:00.000000000'],\n", " ...,\n", " [ 'NaT', '2020-05-19T16:07:00.000000000'],\n", " ['2020-05-19T16:11:00.000000000', '2020-05-19T16:15:00.000000000'],\n", " ['2020-05-19T16:19:00.000000000', 'NaT']],\n", " dtype='datetime64[ns]')" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arrayname1" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1166, 146],\n", " [1270, 360],\n", " [ 2, 8],\n", " ...,\n", " [ 108, 371],\n", " [ 102, 439],\n", " [1739, 519]])" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arrayname2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }