{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute probability of missing a transfer from delays distributions\n", "\n", "Let's first have a look at a slice of the dictionnary of distribution" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pickle \n", "import gzip\n", "from itertools import islice\n", "import matplotlib as mlt \n", "import matplotlib.pyplot as plt\n", "import numpy as np \n", "import pandas as pd \n", "import math" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Functon to take a slice from a dictionnary - head equivalent\n", "def take(n, iterable):\n", " \"Return first n items of the iterable as a list\"\n", " return list(islice(iterable, n))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load dictionnaries of distributions" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "len dict_real : 12309\n", "[('10.TA.1-11-B-j19-1.1.R__8590314', array([0, 2, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8590317', array([0, 3, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8594304', array([0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8594307', array([0, 1, 5, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('10.TA.1-11-B-j19-1.1.R__8594310', array([0, 1, 3, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))]\n", "len dict_all : 246968\n", "[('1286.TA.26-32-j19-1.12.H__8591182', array([ 0, 1158, 306, 162, 94, 24, 28, 21, 3, 2, 0,\n", " 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('1286.TA.26-32-j19-1.12.H__8591184', array([ 1, 762, 552, 292, 118, 48, 13, 8, 0, 1, 1, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0])), ('1286.TA.26-32-j19-1.12.H__8591195', array([ 0, 1083, 444, 143, 64, 35, 16, 9, 3, 1, 0,\n", " 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), ('1286.TA.26-32-j19-1.12.H__8591200', array([ 2, 239, 227, 228, 212, 128, 74, 42, 29, 17, 3, 3, 2,\n", " 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 1])), ('1286.TA.26-32-j19-1.12.H__8591209', array([ 0, 1151, 308, 169, 94, 24, 29, 16, 4, 3, 1,\n", " 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))]\n" ] } ], "source": [ "with gzip.open(\"../data/distributions_geschaetzAndReal.pkl.gz\", \"rb\") as input_file:\n", " d_real = pickle.load(input_file)\n", "\n", "with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n", " d_all = pickle.load(input_file)\n", "\n", "# display a slice of it\n", "print('len dict_real : ', len(d_real))\n", "print(take(5, d_real.items()))\n", "\n", "# display a slice of it\n", "print('len dict_all : ', len(d_all))\n", "print(take(5, d_all.items()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Probability using cumulative distribution based on frequency of delays \n", "\n", "When we have __enough data__ and no ambiguity about `trip_id` and `stop_id` for a given distribution, then we can compute the probability $P(x \\leq X)$ for every x (delay in minute). \n", "\n", "Let's take a __threshold of 100__ sample points (=number of time we could measure a delay) as a minimum number of points to use this approach. \n", "\n", "_How many keys in our distionnary of distribution have at least this number of samples ?_" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def plot_data_points_hist(dico):\n", " list_tot_points = []\n", " for key in dico:\n", " distrib = dico[key]\n", " list_tot_points.append(np.sum(distrib))\n", "\n", " tot_per_key = np.array(list_tot_points)\n", " binwidth = 100\n", " n_keys_less_than_binwidth = np.sum(np.array(tot_per_key < binwidth))\n", " perc_key_to_recover = round(100 * ( n_keys_less_than_binwidth / len(tot_per_key) ), 2)\n", " plt.figure(figsize = (10,5))\n", " plt.hist(tot_per_key, bins = range(min(tot_per_key), max(tot_per_key) + binwidth, binwidth))\n", " plt.title(\"Total number of data points per trip_id / stop_id key. N keys with less than {0} points: {1} ({2}%)\"\\\n", " .format(binwidth, n_keys_less_than_binwidth, perc_key_to_recover))\n", " plt.xlabel('n data points')\n", " plt.ylabel('n keys')\n", " return plt.show()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAo4AAAFNCAYAAACOmu5nAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZglVXn48e/LjmwDMvKTGWRQiQZ3IYJxI2JgABFUUBRlCRENLpgYFYwRQVDQKGIAEQXZVEBiFBWDuIAryODCKjqyyM7ADPsO7++Pcy7cufTtPj3Tt5eZ7+d5+um6p7ZTVaeq3jp1qioyE0mSJGkky0x0BiRJkjQ1GDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKnJEh84RkRGxDMnQT7OiYh/nqB5rxwR342IOyLimw3Dbx4R141H3hZHROwSET+c6HyMl4j4QUTsNqhpRMSsur8stzjzmCgRcXdEPL1Pv90j4hd9+k3q5R4u75NFRDytrv9lhxmm+VgcER+PiJPHLoejMxXW+Xha2o61iyIitoqIb090PhZHRLw3Ig4dabgJCxzrQabz92hE3Nf1e5c+40yJgGYS2hFYB3hyZu40lhOeyANsZn4tM7dsGXYynwhaT5KZuXVmnrA481rcaUTEuqPdByPi+Ig4aFHn2SozV83MKwc9n8XRFaSe2ZN+ckR8fIKytdgy8691/T8CE3uhPFoTfeEQEW+KiF9FxL0Rcc4Q/V8YERfW/hdGxAu7+kVEHBoRt9W/QyMixjqPk+FYGxHH9V581P3mxoi4MyL+1K/MRcTH6riv6Ur7dERcW8e9JiI+0jPOdhFxSY1JfhURG42QxYOBQ7rG/0REXBwRD/fu2xHxkZ4Y6L4aB63dJ/9X98RIP+zqt2JEHBYRN0TEgog4KiKW7+r/+Zr+64iY2ZX+1oj4Qs+svgzsEhFPGW5BJyxwrAeZVTNzVeCvwHZdaV+bqHxNdvVAMdrttj7wp8x8eBB5UrtFOTkt4jYflG2A/5voTCwBNo2Iv5/oTGhSmA98nq6goyMiVgC+A5wMrAmcAHynpgPsBewAvAB4PrAd8M5xyPO4ioiXA88YotengFmZuTrwOuCgiNi4Z9xnADsBN/aMeyzw7Dru31MCpjfUcTYEvga8C5gGfBc4o9/xOyL+DlgjM8/rSp4LfAj4fu/wmfnJnhjoUOCczLx1mNXQHSN1B/H7ApsAzwX+Bngx8NGar5cAGwP/D/hFHZaIWAP4YGe4rnzdD/wA2HWYfEBmTvgfcDXwmtq9ImUnuqH+fb6mrQLcBzwK3F3/1gVeAvwauJ1SMI4AVuiadgLP7DPfc4BPAL8E7gJ+CKxd+20OXDdMPj8OfJOyQ98FXFw32n7ALcC1wJY98/oU8BvgTsrBYK2u/psBv6rL8Qdg855xD675vG+o5QH+tg53O3Ap8LqafgDwIPBQXWd7DjHuysDxwALgMkqBuq6r/77AX+pyXga8vmue9wOP1GnfXtO3BX5Xl/Na4OPDbPvNgeuAjwC31nW8S1f/NYATgXnANZSCvkzttzvwi55t/S7gz3U9HAnEMPncpi7PXcD1wL/3yePudd0fAdwB/BHYoiePx1LK3/XAQcCyPeMeBtwGHNQz7dk92+cP/bZ5TfvnljwNs767p7Es8F91vV8JvLuuw+WGGf9bwBuGSI+6jLfU7X4x5UC2V122B+vyfXe48lr7HQ8cDZxdt825wPoNy/bYvg48GTij5uU3lP38F33Gm9W93MAbKeXwuZSL6075vw04jbrfUk4I7+2Z1kXU/WOEeX0Y+GlX+sn02U94Yjn/DOUksEa/sgesQAlIntc13lOAe4HpwNrA9+r6nw/8nLpf9cz7AOC/a/fywD3AZ7qOG/cDa3WvQ0q5faT2uxs4Yrj9s88yfxw4ufH4uDul/N4FXEU9flD2mXMp+8etwKl95vXXmrfOeeWlnXVO2T8W1Olu3TXOHsDldZ5XAu8c4pj2Acr+cCOwR0P5/WdK8NCdtmXdrtGT39m1+1fAXl399gTOm6rH2j75Xo5yPnk+w5/Pn1XX9Zt60v+vzv9q6vl7iHFnUI5ZH6q/3wN8v6v/MpTj8JDHWOBjwFf69Ou7b9f+UcvQbsMMM1ze5wA7df1+K3Bt7X4z8KnaPRs4s3YfAby1z/R2oevYNOQwrRtvkH8sHJAdCJxHOchNrzvGJ7oLfs+4G1MOKstRDl6XA+/vKeDDBY5/oQR8K9ffhwwzr+58frzuIFvVeZ9IObj8B+UA+w7gqp55XU85Ga0C/A/1wFgL7W21cC8D/GP9Pb1r3L8Cz6nzWr4nX8tTrm4+QjlhvJqygz6rK68nD7P+D6GcONYC1gMuYeHAcSdKkL5MLYj3AE+t/Xan54Rc193z6vDPB24Gdugz782Bh4HPUS4QXlWn38n7iZQge7W6ff9EDX5751239fcoV4hPoxwAZw+TzxuBV9TuNYEX98nj7jWP/1rX9ZspJ6NOAPG/wJfqdn0KJVB5Z8+4763bbuUhpv+E7TPUNueJgWPfPA2zrbun8S5KwLle3fY/ZZjAsc7nVmC1IfptBVxY133nBNIpI8fTFTAzcnk9vv5+ZS0Th/duuz756w4cT6EEeatQ9rnr+02DhYOePWreOtPZh3I8mlnz8iXgG7Xfm4Dzu6bzAsp+u8IweezMa7Wap87xZMTAkbI/fRk4C3hSQ9k7Cji0azr78Hjg/ilKcL58/XsFQwRxddtcXLv/nnK8PL+r3x9612FvOWvZP4fbJxjm+FiX+86usvNU4Dm1+xuU4/EywErAy0fa/j3r/CHKcXxZ4F8oFRlR+29LqQELyjHrXurxg8ePaQfWdbtN7b/mCOV3qMDxX4Ef9KR9D/hA7b4D2LSr3ybAXX2m38nXpDrWUgLPIbdN7f9B4PDefbyr/1F1/SbwW2DVrn47Ad+p3VfTE3xRLgrvruNeCcys6e+hBln197KU8/0+ffL4TeCDffqNFDi+suZh1WGGuZpyHp1HqeB6QVe/OXQFy5TALykXAs+lHDtWplxwfqaWkbOHmdeLgfnDldXJcvur2y7AgZl5S2bOo1zxvr3fwJl5YWael5kPZ+bVlIPoq0Yxv69m5p8y8z7KieaFI43Q5eeZeVaWW8DfpBzIDsnMhygnrlkRMa1r+JMy85LMvAf4T+BNURqTv41SSM/MzEcz82xKYdima9zjM/PSupwP9eRjM2DVOu8HM/MnlJ36LY3L8Sbg4Mycn5nXAgu1e8jMb2bmDTVvp1KuMl/Sb2KZeU5mXlyHv4hyAB9pm/xnZj6QmedSanI662ZnYL/MvKtu388yTHmgrIPbM/OvlEBouO35ELBRRKyemQsy87fDDHsL8PnMfKiugyuAbSNiHcp2en9m3pOZt1Bq3nbuGveGzPzvuu3uG3YtLGy4bd43T6OY/pvq+Ndm5nxKMDGcV1IChbuG6PcQ5YTzbMrJ9fLM7L011NFSXr+fmT/LzAcoJ/+XRsR6LQtVy80bgY/VbXIJ5RbfSN5POUltnplza9q7gP/IzOtqXj4O7FhvWZ0B/E29rQWlXJ6amQ82zOs+Ss1ca9vP5Sn70VqUW1b3NpS9E4C3dLV5eztwUu1+iBJkrV/Lz8+znjV6/BrYMCKeTNn+xwIzImJVyj59bmP+O0azf3aMdHx8FHhuRKycmTdm5qVdy7g+sG5m3p+Zo213d01mfjlLu80TKOtrHYDM/H5m/iWLcykn81d0jfsQ5Tz2UGaeSQkMnjXK+UPZT+7oSbuDsq8N1f8OYNUR2jlOqmNtZk7rt23qPv9OSo3ekDJzb8r6eAXljsgDddzVgE9SLpj6jXtIHffFlH2jsy5/BLwqynMVK/D4Re6T+kxqGuVid1HsBpyemXcPM8wulGB+fcq6Pqsrtvg/YJ+ImB4R/w94X01/Uj32/Q/l4vdpwKcp5/f3RcT7IuJnEfG1njjlLkrQ2ddkDBzXpVSTd1xT04YUEX8TEd+LiJsi4k5KQRmygWkfN3V130vZEVvd3NV9H3BrPch0ftMzvWu7uq+hnAzWphSGnSLi9s4f8HLKgWqocXutS6mafrRn+jMal2PdIfL2mIjYNSJ+35W35zLMOo6ITSPipxExLyLuoJx8h9smC2ow3T3/des4y/PE8jDcco1me76RcvK5JiLOjYiXDjPs9T0n1k4e1695vLFr/XyJUvvTMdy2G85I4/XLU6tht/sQtgHOHKpHDf6OoNyyuiUijomI1Yeb7wjl9bF81QPqfNqXbTql9nA0ywYlaDwyM7sf/lkf+N+ubXs55TbcOlnaA50KvK22QX0LjwdmLb4CrBMR2zUM+0xge+CArsB02LKXmedT9oHNI+LZdRpn1HE/Q6lZ/WFEXBkR+w4103qhM4cSJL6SEij+CngZixY4Lsrxtu/xsR433kw5xtwYEd+vywqlfVkAv4mISyPinxY1r5l5b+1cFSAito6I8yJifs3PNix8jLstF25TPtpzS8fdQO9+tDqPBym9/VcH7u5zEQBT41jb7fOUALw3eF5IZj5Sg8+ZlNphKBd5J9UgeLhxMzN/RzlnH1DT/kgJ6I6g1JauTbnV3u/BwAU8Hsw3i4gnUWpFh72wzcxfZuZ9mXlvZn6KUkvbuVA5mHIr//eUffPblED95jruYZn5gsx8M6Wy4GeU2G8vYAvKMa17/1+NJ16sLGQyBo43UA4UHU+raVCqX3t9kXK7bcMsjVw/QjlYLK576Lq6qFdk0xdzmt01Jk+jbNxbKSe4k+qVV+dvlXo11NHvQABl/azX8wDF0yi3wlrcOETeAIiI9Sm3x95DeSp7GuVWdmcdD5Wvr1NOUOtl5hqUW2LDbZM1I2KVnvnfQFk3nVqD7n6ty9XtCfnMzAsyc3vKifbblBrnfmb0XMV38ngt5Qp37a5tt3pmPme4eY+Ut8bx+uWpVd/t3kffwBEgM7+QmRsDG1Gaf3yw06tn0Jby+li+au3WWrQv2zzKLbnRLBuU9mQfjYg3dqVdS2nb1r1vrpSZnbyeQKkN2AK4NzN/3ZhHagB4AKX95UjHrMspt9F/EBGdmquWsncCpcbu7ZRajfvrvO/KzA9k5tMpDxX8W0Rs0Wfe51JuS78IuKD+3opy1+Fn/RZvhOUZjWGPj1nu+vwj5UL7j5TjFZl5U2a+IzPXpdRaHRVDvw5oVHmNiBUptTj/RbmAmEbZL8b8aWZK+9/n9+znz6/pnf4v6Or3gq5+Q5kKx9puWwCfqRVDnUD11xHx1j7DL8fjD9FsQalZ64y7HnBaRHy4YVwy8/TMfG5mPhnYn1Ljd0GfcS+iHPNG6/WUi+JzRjleUstbDSjfk5kz6v58G3Bhz4U59Q7FXpQmFM8FLspyJ+sCSpnq+FtKO+K+JmPg+A3KwXt6lEfTP0ZpIwAlgn5yfSKoYzVKG5e765XmvzA2/gSsFBHbRnm0/aOUdiGL420RsVG9yjiQciB/hLJ820V5D9SyEbFSrSKfOfzkHtOpWfhQRCwfEZtTnq47pXH804D9ImLNOs/3dvVbhVJI5wFExB6UQtdxMzAzHn/KD8o2mZ+Z99enuvrt5N0OiIgVIuIVwGuBb9Z1cxpwcESsVoPYf+Px8jAaC+WzzmuXiFij7jx3Um559fMUykFo+YjYibJznZnlduwPgc9GxOoRsUxEPCMiRtNc4mZKs4bR7o9D5mkU459Wx58ZEWuy8FXnQiJiA2DFzLy8T/+/qzXNnQco7ufx9Xkz0P1+xZbyuk1EvLxur09QGvw31dzWcvMt4OMR8aQor9HYrWHUSykNyI+MiNfVtKMp5W/9upzTI2L7rnn9ui7nZxldbWPHSZT2d7NHGjAzv0G5MP5RRDyjseydTDk5vY3Sho26HK+NiGfWgOQOSi1qv/J/LuUpy8tqsHsOpT3eVVmaEw2ld5svjr7Hx4hYJyK2r8HQA5QauEfrMu7UdQxdQDmODbWM82p6a35XoJwL5gEPR8TWlIuORdJZJkrgskxdvs7rVM6hbJv3RXntyntq+k/q/xMpQf+MiFiX8kDO8SPMcrIfa7v9DSUYfiGP3wrfjnIX4CkRsXNErFrX4VaUWv8f1+G2oJyrOuPeQLmAOLLuK++s57yo56l3d41LRGxcpzsdOAY4o9ZEDuVMeppj1WPbSpQ4a7m6XXvfc7obcOIwNcSdd6S+rK7HlSLig5Qa0F/W/jOivCYtImIzSjO4/YeY1OcobS3vpTyP8XdRLso3p7Tv7HgV5cnqviZj4HgQ5dbIRZSnnH5b0zrVx98Aroxyy2Jd4N8pgcldlCvNU8ciE7VqfG/K7aTrKSfDxX2H5EmUnfomysnifXVe11JuQ32EcjC6llJb07R96sF8O2BrypXjUcCuwxTyXgdQbktcRTkRPXYCzMzLKCfFX1MOCM+jFtjqJ5QT7k0R0XmVwN7AgRFxFyXwH+nq8ibKgf0G6isQuvL+Xsq6v5LSyPfrwHGNy9VtqHy+Hbg6ShOHd1Fqjvo5H9iQsn4PBnbMzNtqv10pJ5PL6nKczsLNDEbSeSn7bRExXDvL0eSpRedBiz9Q9rNvDTPstgwflK5ep7eAUpZuo9wOhdIubqO6z367sbx+nXLwm095AO5to1guKDXkq1LK1vHAV1tGysw/UE6mX64BweGU2vMf1vJ8HrBpz2gnUvaLkwEi4uiIOLpxfo9Q9pG1Goc/gXLR+ZOImMUIZa8eW35LCZp+3jWpDSntuO6m7NtHZeZP+8z2V5TG9Z3axcsoFwb9ahuhrLcdo7w/rvddcaMywvFxGUqAcwOlrLyKxysP/g44PyLupmzDfXKI93zWE+nBwC9rGd1shPzcRTl2n0ZZ52/l8SYAi+LtlNukX6TcfryPx2tNH6S8bmdXyu3Jf6I8aNhprvAlyqtiLqbcCfp+Tetn0h1ro7yX8BVPnAxkedbhps5fTb61NqFIyra+ri7Tf1Ha+55Rx72tZ9xHKLfqO20JX8/jbws5Gfjv+tdxOGWdX1Gn/45+C5ylzeYdEdF9bPgyZVu+hdJO+z662oxGxAxKTf6J9Og5hqxGKRsLKLHIbMpdkM6x/hmUffQeyh2GfTPzhz3TezUwLTP/t+b3N5Syci3wD9RXQdVAdxtGuHXeeUJMmhC1tunkzGytXR13EbE75QnRl090XjrGO09RXlh9RJaG/oOe1/GUp/o/OtKwk0FE7Ep5JcqkKR/dIuI4ygNaU2J9ajCmwrF2KouILYG9M3OHic7LooqI91KamH1ouOEm5Se2JE0651Ce5lOXKM1O9qbUmk46tVbyDZT2iZIGpNbyTenPMmbmf4881OS8VS1pMcTCn7Lq/hvydlCLzPx0ju5VQmMuIl7Rb9kmKD9bUW6d3ky5rTepRMQnKLcvP5OZV010fiQtGbxVLUmSpCbWOEqSJKmJgaMkSZKaLHUPx6y99to5a9asic6GJEnSiC688MJbM3NxP0AyZpa6wHHWrFnMmTNnorMhSZI0ooho+WTquPFWtSRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmix136oeD7P2/f4T0q4+ZNsJyIkkSdLYscZRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktRk4IFjRCwbEb+LiO/V3xtExPkRMTciTo2IFWr6ivX33Np/Vtc09qvpV0TEVl3ps2va3IjYd9DLIkmStDQbjxrHfYDLu34fChyWmc8EFgB71vQ9gQU1/bA6HBGxEbAz8BxgNnBUDUaXBY4EtgY2At5Sh5UkSdIADDRwjIiZwLbAV+rvAF4NnF4HOQHYoXZvX39T+29Rh98eOCUzH8jMq4C5wEvq39zMvDIzHwROqcNKkiRpAAZd4/h54EPAo/X3k4HbM/Ph+vs6YEbtngFcC1D731GHfyy9Z5x+6ZIkSRqAgQWOEfFa4JbMvHBQ8xhFXvaKiDkRMWfevHkTnR1JkqQpaZA1ji8DXhcRV1NuI78aOByYFhHL1WFmAtfX7uuB9QBq/zWA27rTe8bpl/4EmXlMZm6SmZtMnz598ZdMkiRpKTSwwDEz98vMmZk5i/Jwy08ycxfgp8COdbDdgO/U7jPqb2r/n2Rm1vSd61PXGwAbAr8BLgA2rE9pr1DnccaglkeSJGlpt9zIg4y5DwOnRMRBwO+AY2v6scBJETEXmE8JBMnMSyPiNOAy4GHg3Zn5CEBEvAc4C1gWOC4zLx3XJZEkSVqKjEvgmJnnAOfU7ispT0T3DnM/sFOf8Q8GDh4i/UzgzDHMqiRJkvrwyzGSJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJamLgKEmSpCYGjpIkSWpi4ChJkqQmBo6SJElqYuAoSZKkJgaOkiRJajKwwDEiVoqI30TEHyLi0og4oKZvEBHnR8TciDg1Ilao6SvW33Nr/1ld09qvpl8REVt1pc+uaXMjYt9BLYskSZIGW+P4APDqzHwB8EJgdkRsBhwKHJaZzwQWAHvW4fcEFtT0w+pwRMRGwM7Ac4DZwFERsWxELAscCWwNbAS8pQ4rSZKkARhY4JjF3fXn8vUvgVcDp9f0E4Adavf29Te1/xYRETX9lMx8IDOvAuYCL6l/czPzysx8EDilDitJkqQBGGgbx1oz+HvgFuBs4C/A7Zn5cB3kOmBG7Z4BXAtQ+98BPLk7vWecfulD5WOviJgTEXPmzZs3FosmSZK01Blo4JiZj2TmC4GZlBrCZw9yfsPk45jM3CQzN5k+ffpEZEGSJGnKG5enqjPzduCnwEuBaRGxXO01E7i+dl8PrAdQ+68B3Nad3jNOv3RJkiQNwCCfqp4eEdNq98rAPwKXUwLIHetguwHfqd1n1N/U/j/JzKzpO9enrjcANgR+A1wAbFif0l6B8gDNGYNaHkmSpKXdciMPssieCpxQn35eBjgtM78XEZcBp0TEQcDvgGPr8McCJ0XEXGA+JRAkMy+NiNOAy4CHgXdn5iMAEfEe4CxgWeC4zLx0gMsjSZK0VBtY4JiZFwEvGiL9Skp7x970+4Gd+kzrYODgIdLPBM5c7MxKkiRpRH45RpIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUZMTAMSJeFhGr1O63RcTnImL9wWdNkiRJk0lLjeMXgXsj4gXAB4C/ACcONFeSJEmadFoCx4czM4HtgSMy80hgtcFmS5IkSZPNcg3D3BUR+wFvA14ZEcsAyw82W5IkSZpsWmoc3ww8AOyZmTcBM4HPDDRXkiRJmnRaahx3Ar6amQsAMvOv2MZRkiRpqdNS47gOcEFEnBYRsyMiBp0pSZIkTT4jBo6Z+VFgQ+BYYHfgzxHxyYh4xoDzJkmSpEmk6QXg9anqm+rfw8CawOkR8ekB5k2SJEmTyIhtHCNiH2BX4FbgK8AHM/Oh+nT1n4EPDTaLkiRJmgxaHo5ZC3hDZl7TnZiZj0bEaweTLUmSJE02LW0c9wfWi4g9ACJiekRsUPtdPuD8SZIkaZJo+Vb1/sCHgf1q0vLAyYPMlCRJkiaflodjXg+8DrgHIDNvwE8OSpIkLXVaAscH61PVCRARqww2S5IkSZqMWgLH0yLiS8C0iHgH8CPK09WSJElairQ8Vf1Z4DXAncCzgI8BPxtkpiRJkjT5tASOx2bmPwFnA0TEqsCZwBaDzJgkSZIml5Zb1ddHxFEAEbEm8EN8qlqSJGmp0/Iex/8E7o6IoylB42cz86sDz5kkSZImlb63qiPiDV0/zwf+E/gNkBHxhsz81qAzJ0mSpMljuDaO2/X8/h3l5d/bUV7NY+AoSZK0FOkbOGbmHuOZEUmSJE1uLQ/HSJIkSQaOkiRJamPgKEmSpCYjvgA8IlYE3gjM6h4+Mw8cXLYkSZI02bR8OeY7wB3AhcADg82OJEmSJquWwHFmZs4e7YQjYj3gRGAdyut7jsnMwyNiLeBUSg3m1cCbMnNBRARwOLANcC+we2b+tk5rN+CjddIHZeYJNX1j4HhgZcpnEPfJzBxtXiVJkjSyljaOv4qI5y3CtB8GPpCZGwGbAe+OiI2AfYEfZ+aGwI/rb4CtgQ3r317AFwFqoLk/sCnwEmD/+ulD6jDv6Bpv1AGuJEmS2rQEji8HLoyIKyLiooi4OCIuGmmkzLyxU2OYmXcBlwMzgO2BE+pgJwA71O7tgROzOA+YFhFPBbYCzs7M+Zm5ADgbmF37rZ6Z59VaxhO7piVJkqQx1nKreuvFnUlEzAJeRPl04TqZeWPtdRPlVjaUoPLartGuq2nDpV83RLokSZIGYMTAMTOvWZwZRMSqwP8A78/MO0tTxsemnREx8DaJEbEX5fY3T3va0wY9O0mSpCXSQN/jGBHLU4LGr2Vm59vWN9fbzNT/t9T064H1ukafWdOGS585RPoTZOYxmblJZm4yffr0xVsoSZKkpdTAAsf6lPSxwOWZ+bmuXmcAu9Xu3Siv++mk7xrFZsAd9Zb2WcCWEbFmfShmS+Cs2ocyyKcAAA4cSURBVO/OiNiszmvXrmlJkiRpjLW0cVxULwPeDlwcEb+vaR8BDgFOi4g9gWuAN9V+Z1JexTOX8jqePQAyc35EfAK4oA53YGbOr9178/jreH5Q/yRJkjQAAwscM/MXQPTpvcUQwyfw7j7TOg44boj0OcBzFyObkiRJauS3qiVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTQwcJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBRkiRJTZab6AwsrWbt+/0npF19yLYTkBNJkqQ21jhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpyXITnQENb9a+31/o99WHbDtBOZEkSUs7axwlSZLUxMBRkiRJTQwcJUmS1MQ2juOkt62iJEnSVGONoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpiYGjJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpycACx4g4LiJuiYhLutLWioizI+LP9f+aNT0i4gsRMTciLoqIF3eNs1sd/s8RsVtX+sYRcXEd5wsREYNaFkmSJA32W9XHA0cAJ3al7Qv8ODMPiYh96+8PA1sDG9a/TYEvAptGxFrA/sAmQAIXRsQZmbmgDvMO4HzgTGA28IMBLs+UMtS3sa8+ZNsJyIkkSVpSDKzGMTN/BszvSd4eOKF2nwDs0JV+YhbnAdMi4qnAVsDZmTm/BotnA7Nrv9Uz87zMTEpwugOSJEkamPFu47hOZt5Yu28C1qndM4Bru4a7rqYNl37dEOlDioi9ImJORMyZN2/e4i2BJEnSUmrCHo6pNYU5TvM6JjM3ycxNpk+fPh6zlCRJWuKMd+B4c73NTP1/S02/Hliva7iZNW249JlDpEuSJGlAxjtwPAPoPBm9G/CdrvRd69PVmwF31FvaZwFbRsSa9QnsLYGzar87I2Kz+jT1rl3TkiRJ0gAM7KnqiPgGsDmwdkRcR3k6+hDgtIjYE7gGeFMd/ExgG2AucC+wB0Bmzo+ITwAX1OEOzMzOAzd7U57cXpnyNLVPVEuSJA3QwALHzHxLn15bDDFsAu/uM53jgOOGSJ8DPHdx8ihJkqR2fjlGkiRJTQwcJUmS1MTAUZIkSU0G+clBjdJQnwkc7/n5WUJJktSPgaMWYjApSZL68Va1JEmSmhg4SpIkqYmBoyRJkprYxnGKsQ2iJEmaKNY4SpIkqYmBoyRJkpoYOEqSJKmJbRyXAOP94nBJkrR0ssZRkiRJTQwcJUmS1MTAUZIkSU1s47gUsS2kJElaHNY4SpIkqYmBoyRJkpoYOEqSJKmJbRw1aq3fy+4dzm9qS5I0tVnjKEmSpCbWOGpEg3wau7X2UpIkTTxrHCVJktTEwFGSJElNDBwlSZLUxDaOmvRsBylJ0uRg4Kgx4ecMJUla8hk4atIxCJUkaXIycNS4GcuA0JeQS5I0/gwctdSzDaUkSW0MHKUGBpeSJPk6HkmSJDUycJQkSVITA0dJkiQ1MXCUJElSEx+O0RLD9z9KkjRYBo7SGFrU90b61LYkaSowcNRSpbVWcqxqLw0IJUlLEgNHaRGN961xg1BJ0kTz4RhJkiQ1MXCUJElSE29VS0uYRX1AR5KkkRg4SuPMtpGSpKnKwFGawsYyCLWmUpI0Ets4SpIkqUlk5kTnYVxtsskmOWfOnIHOwy+YaEm1qLWQ3i6XpEUTERdm5iYTnY8Ob1VLWixjGRR6u1ySJjcDR0nNxvvLO63TNsCUpPFhG0dJkiQ1mfJtHCNiNnA4sCzwlcw8ZLjhbeMoaTSszZQ0kWzjOIYiYlngSOAfgeuACyLijMy8bGJzJmlJ0XohaIApaWkwpQNH4CXA3My8EiAiTgG2BwwcJY2rsbrTYAAqaTKb6oHjDODart/XAZtOUF4kabFNhqYuBq+S+pnqgWOTiNgL2Kv+vDsirhjwLNcGbh3wPPRErveJ4XqfGANb73HoIKa6xLC8T4yleb2vP9EZ6DbVA8frgfW6fs+saQvJzGOAY8YrUxExZzI1ZF1auN4nhut9YrjeJ4brfWK43iePqf46nguADSNig4hYAdgZOGOC8yRJkrREmtI1jpn5cES8BziL8jqe4zLz0gnOliRJ0hJpSgeOAJl5JnDmROejx7jdFtdCXO8Tw/U+MVzvE8P1PjFc75PElH8BuCRJksbHVG/jKEmSpHFi4DiGImJ2RFwREXMjYt+Jzs9UFxHrRcRPI+KyiLg0Ivap6WtFxNkR8ef6f82aHhHxhbr+L4qIF3dNa7c6/J8jYreJWqapJCKWjYjfRcT36u8NIuL8un5PrQ+kEREr1t9za/9ZXdPYr6ZfERFbTcySTB0RMS0iTo+IP0bE5RHxUsv74EXEv9ZjzCUR8Y2IWMnyPvYi4riIuCUiLulKG7PyHREbR8TFdZwvRESM7xIuJTLTvzH4ozyc8xfg6cAKwB+AjSY6X1P5D3gq8OLavRrwJ2Aj4NPAvjV9X+DQ2r0N8AMggM2A82v6WsCV9f+atXvNiV6+yf4H/BvwdeB79fdpwM61+2jgX2r33sDRtXtn4NTavVHdD1YENqj7x7ITvVyT+Q84Afjn2r0CMM3yPvB1PgO4Cli5/j4N2N3yPpB1/UrgxcAlXWljVr6B39Rho4679UQv85L4Z43j2Hns84eZ+SDQ+fyhFlFm3piZv63ddwGXUw7y21NOsNT/O9Tu7YETszgPmBYRTwW2As7OzPmZuQA4G5g9josy5UTETGBb4Cv1dwCvBk6vg/Su9872OB3Yog6/PXBKZj6QmVcBcyn7iYYQEWtQTqzHAmTmg5l5O5b38bAcsHJELAc8CbgRy/uYy8yfAfN7ksekfNd+q2fmeVmiyBO7pqUxZOA4dob6/OGMCcrLEqfeDnoRcD6wTmbeWHvdBKxTu/ttA7fN6H0e+BDwaP39ZOD2zHy4/u5eh4+t39r/jjq86310NgDmAV+tTQS+EhGrYHkfqMy8Hvgv4K+UgPEO4EIs7+NlrMr3jNrdm64xZuCoSS8iVgX+B3h/Zt7Z3a9eWfpqgDEUEa8FbsnMCyc6L0uZ5Si38b6YmS8C7qHcunuM5X3s1TZ121MC93WBVbCGdkJYvqcGA8ex0/T5Q41ORCxPCRq/lpnfqsk319sS1P+31PR+28BtMzovA14XEVdTmly8Gjiccquo8+7X7nX42Pqt/dcAbsP1PlrXAddl5vn19+mUQNLyPlivAa7KzHmZ+RDwLco+YHkfH2NVvq+v3b3pGmMGjmPHzx+Osdpu6Fjg8sz8XFevM4DOk3S7Ad/pSt+1Po23GXBHvQVyFrBlRKxZaxe2rGkaQmbul5kzM3MWpRz/JDN3AX4K7FgH613vne2xYx0+a/rO9SnUDYANKY3XNYTMvAm4NiKeVZO2AC7D8j5ofwU2i4gn1WNOZ71b3sfHmJTv2u/OiNisbsddu6alsTTRT+csSX+Up8D+RHma7j8mOj9T/Q94OeW2xUXA7+vfNpT2RD8G/gz8CFirDh/AkXX9Xwxs0jWtf6I0Vp8L7DHRyzZV/oDNefyp6qdTToRzgW8CK9b0lervubX/07vG/4+6Pa7AJxxb1vcLgTm1zH+b8tSo5X3w6/0A4I/AJcBJlCejLe9jv56/QWlH+hClhn3PsSzfwCZ1G/4FOIL6kRP/xvbPL8dIkiSpibeqJUmS1MTAUZIkSU0MHCVJktTEwFGSJElNDBwlSZLUxMBR0lIrIu4eof+0iNh7HPJxYES8ZoRhNo+Ivx90XiRpOAaOktTfNGDggWNmfiwzfzTCYJsDBo6SJpSBo6QpLyJmRcTlEfHliLg0In4YESsPMdwGEfHriLg4Ig7qSl81In4cEb+t/bavvQ4BnhERv4+IzwwzXO987o6Iw2pefhwR02v6CyPivIi4KCL+t375gog4PiJ2rN1XR8QBXfN4dkTMAt4F/GvNyysiYqeIuCQi/hARPxvL9SlJ/Rg4SlpSbAgcmZnPAW4H3jjEMIcDX8zM51G+YNFxP/D6zHwx8A/AZ+tny/YF/pKZL8zMDw4zXK9VgDk1L+cC+9f0E4EPZ+bzKV/D2H+IcQFurfP4IvDvmXk1cDRwWM3Lz4GPAVtl5guA1424diRpDBg4SlpSXJWZv6/dFwKzhhjmZZTPnkH5tFxHAJ+MiIsonz2bAawzxPitwz0KnFq7TwZeHhFrANMy89yafgLwyj7L8q0RlgPgl8DxEfEOYNk+w0jSmFpuojMgSWPkga7uR4An3KquhvrO6i7AdGDjzHwoIq6mfJN4UYdrmedwOsvyCH2O05n5rojYFNgWuDAiNs7M20Y5H0kaFWscJS1NfgnsXLt36UpfA7ilBoP/AKxf0+8CVmsYrtcywI61+63ALzLzDmBBRLyipr+dchu71UJ5iYhnZOb5mfkxYB6w3iimJUmLxBpHSUuTfYCvR8SHge90pX8N+G5EXAzMAf4IkJm3RcQvI+IS4AfAoUMNN4R7gJdExEeBW4A31/TdgKMj4knAlcAeo8j7d4HT6wM576U8KLMh5fb5j4E/jGJakrRIInO0d1AkScOJiLszc9WJzockjTVvVUuSJKmJNY6SJElqYo2jJEmSmhg4SpIkqYmBoyRJkpoYOEqSJKmJgaMkSZKaGDhKkiSpyf8Hc80y5QKg82EAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plot_data_points_hist(d_all)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoQAAAFNCAYAAACZuH6uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3debglVXn3/e+PySiDgHR4GG00ZMCoiDxI4kReDWMQR9SoIDFBE0k0MQPGRFBjgjFqYlQMPhJAnDDRiIJRNIojyBBkVGmhEbCZkUFQQe/3j7UObA5n2Ifuc0531/dzXec6tVdNq1atWnXvWlW1U1VIkiRpuNZZ7AxIkiRpcRkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQN3FoZECapJL+0GuTjS0l+f5HW/eAkn0pyS5KPjTH9HkmuWoi8rYwkL0ryucXOx0JJ8pkkB8/XMpIs7cfLeiuzjsWS5PYkj5hm3EuTfHWacav1ds+U99VFku17+a87wzRjt8VJjkxy4qrL4dysCWW+kIbW1k4nyYeTPHOx87EyknwzyaNmm25BA8LeeEz8/TzJnSOfXzTNPGtEoLIaei6wJfCwqnreqlzwYjacVfXBqtpznGlX5wZ+3JNfVe1TVcevzLpWdhlJtp7rMZjkuCR/90DXOa6q2qiqLpvv9ayMkeDz1EnpJyY5cpGytdKq6vu9/H8Gi/sFeK4W+wtBkgOTfD3JHUm+NMX4nZOc08efk2TnkXFJ8pYkN/a/tyTJqs7jYrW1SbZKcnKSH/R9tHTS+IsmxRJ3J/nUyPj9k1zYx309yU4j4x6U5B192TcneU+S9WfIy2OAxwKfHCdvI/NtnuT62cokySOSfDrJbUluSPKPI/l8f5Ir+rjzkuwzMt92Sc5IclOSt01a5meS7DppVf8EvHGmvMACB4S98dioqjYCvg/sP5L2wYXMy5qkNwBz3VcPB75bVXfPR540vgdy0nmA+3y+7Av892JnYi3whCS/udiZ0GrhJuCfgaMmj0iyAS0AORHYDDge+GRPBzgUeCYtUHkMsD/w8gXI80L5Oa29ec5UI6vqUSNxxMbAlcDHAJLsCHwQeAWwKfAp4OSRNvhwYFfg14FfBnYB/maGvLwc+GDd+wseM+ZtxFuAS2aaoO/P04D/Af4PsC1tnwOs17frqcBDex5PGglAX0urFzsAz5wIAJM8H7i8qs6etLqTgd9K8n9mzHVVLcofsBx4eh9+EO3g+EH/++eetiFwJ20n3N7/tgZ2A74B/BBYAbwL2GBk2QX80jTr/RLwJuBrwG3A54At+rg9gKtmyOeRtIp3Yp/3Alqlei1wXd+Be05a1z8A3wRupR3km4+M3x34et+ObwF7TJr3zT2fd061PcCv9el+CFwEPKOnvwH4KXBXL7OXTTHvg4HjgJuBi4G/GN122oHzvb6dFwPPGlnnj4Gf9WX/sKfvB/xv384rgSNn2Pd7AFcBfw3c0Mv4RSPjHwqcAFwPXEE7GNbp414KfHXSvn4FcGkvh3cDmSGf+/btuQ24GvjzafL40l727wJuAb4NPG1SHt9Pq39XA38HrDtp3ncANwJ/N2nZe0/aP9+abp/3tN8fJ08zlPfoMtalfVu8AbgMeGUvw/VmmP/jwLOnSE/fxuv6fr+A1tAe2rftp337PjVTfe3jjgPeS2sgbwNOBx4+xrbdc6wDD6M1fLfSjrk3jdaVSfMtHd1uWgO/vOd/He6t/zcCJ9GPW+AU4I8nLet8+vExy7r+CvjiSPqJTHOccP96/lbgq73eTVn3gA1ogcajR+b7ReAOYAmwBfDpXv43AV+hH1eT1v0G4F/78PrAj4C3jrQbPwY2Hy1DWr39WR93O/CumY7Pabb5SODEMdvHl9Lq723A5fT2g3bMnE47Pm4APjrNur7f8zZxXvmNiTKnHR839+XuMzLPIbST/G193S+fok17De14WAEcMkb9/X3gS5PS9uz7NZPyu3cf/jpw6Mi4lwFnrKlt7Qxls15f59IZpnlqX/6G/fNhwCkj49ehtaVP65/PBp43Mv53gStnWP5lwJPmkjfgN2nxySFM0/706Q4FvjKH8jgfeE4f/gzwK334I8CBwCa0c/Cm08x/GnDwjOuYyw5alX/cN9B6I3AGrfFa0iv8m0Yr9KR5H09rLNajNUqXAK+eVHFnCgi/RwvkHtw/HzXDukbzeWSv+Hv1dZ9AazReR2s4/4AWnY+u62raSWZD4D/pDR6wDe1ks2+vtL/dPy8Zmff7wKP6utaflK/1gWW0A30D4P+jHRi/MpLXE2co/6NoJ4TNge2AC7lvQPg8WvC9DvB82klhqz7upZMrei+7R/fpHwNcCzxzmnXvAdwNvJ0W+D+1L38i7yfQgueN+/79Lj2onbzuvq8/Tfs2uD2tYdt7hnyuAJ7chzcDdpkmjy/tefzTXtbPp51kJgKDTwD/1vfrL9ICkJdPmveP+7578BTLv9/+mWqfc/+AcNo8zbCvR5fxCloguV3f919khoCwr+cGYOMpxu0FnNPLfuLEMFFHjmMkEGb2+npc//yUXif+ZfK+myZ/owHhR2jB24a0Y+7q6ZbBfYOZQ3reJpbzKlp7tG3Py78BH+7jDgTOHFnOY2nH7QYz5HFiXRv3PE20J7MGhLTj6X3AZ4GHjFH33gO8ZWQ5r+LegPwfaEH3+v3vyUwRnPV9c0Ef/k1ae3nmyLhvTS7DyfVsnONzpmOCGdrHvt23jtSdrYBH9eEP09rjdYBfYIqT+VR5Hynzu2jt+LrAH9IuUKSP3w94JK2uP5UWaO8yqU17Yy/bffv4zWapv1MFhH8KfGZS2qeB1/ThW4AnjIzbFbhtmuVP5Gu1amtpAeWU+2ZkmnECwmOB40Y+HwacOvJ5Xdo5+1X989nAgSPjX9TX8dAplr1hH7dk3Lz19Z1Li1HuVyZT5P0DtODuBtrx8+hppt2yb8ev9s9v7du6KS1AfxStzTx4hvW9E3j7TGW+unRJvQh4Y1VdV1XX076hvmS6iavqnKo6o6rurqrltMbxqXNY379X1Xer6k7aCWTn2WYY8ZWq+my1rtiP0Rqoo6rqLtoJaWmSTUem/0BVXVhVPwL+Fjgw7SbsF9Mq7qlV9fOqOo1WWfcdmfe4qrqob+ddk/KxO7BRX/dPq+p/aAfrC8fcjgOBN1fVTVV1Ja2y3KOqPlZVP+h5+yit0u023cKq6ktVdUGf/nxawzzbPvnbqvpJVZ1Ou/IyUTYvAF5bVbf1/fs2ZqgPtDL4YVV9nxbgzLQ/7wJ2SrJJVd1cVefOMO11wD9X1V29DL4D7JdkS9p+enVV/aiqrqNdKXvByLw/qKp/7fvuzhlL4b5m2ufT5mkOyz+wz39lVd1ECxJm8hRaAHDbFOPuop1IfpV20rykqlZMs5xx6uspVfXlqvoJ7aT+G0m2G2ejer15DvD6vk8upHWpzObVtKvje1TVsp72CuB1VXVVz8uRwHN7t9PJwC/3rilo9fKjVfXTMdZ1J+1K2rj3Vq5PO442p91ec8cYde944IUj95S9hHbSgba/tqJdeb2rqr5S/UwxyTeAHZM8jLb/3w9sk2Qj2jF9+pj5nzCX43PCbO3jz4FfT/LgqlpRVReNbOPDga2r6sdVNdf72q6oqvdVuy/yeFp5bQlQVadU1feqOZ3Wu/TkkXnvop3H7qqqU2lXy35ljuuHdpzcMintFtqxNtX4W4CNZrmPcLVqa6tq0wewb+4jyUNo98ofN5L8eeCpac8ebMC9X0Af0sf/N/CqJEt69+mf9PSHcH8T5/Gp2r7p/Anty9M5Y0y7La3830m7+HIK9701AIB+j+MHgeOr6ts9+R9ode902pfADWgXYj6V5ENJvpzksEnru21km6a0ugSEW9MuV0+4oqdNKckv9xsxr0lyK/D3tO6QcV0zMnwH7QAb17Ujw3cCN/TGY+Izk5Z35cjwFbRGfgtao/W8JD+c+AOeRGuAppp3sq1pl7p/Pmn524y5HVtPkbd7JDmo38g6kbdfZ4YyTvKEJF/sN9LeQjupzrRPbu5B8uj6t+7zrM/968NM2zWX/fkc2knliiSnJ/mNGaa9etIJcyKPD+95XDFSPv9Gu1ozYaZ9N5PZ5psuT+Oacb9PYV/g1KlG9KDuXbSuo+uSHJNkk5nWO0t9vSdfVXU7rVtz3G1bwr333YwufzZ/Aby7qkYfmnk48ImRfXsJrTtsy6r6MfBR4MX9Hs8Xcm/ANY7/B2yZZP8xpv0l4ADgDSMB54x1r6rOpB0DeyT51b6Mk/u8b6VdCf1cksuSHD7VSvsXmLNpwd9TaCedrwNP5IEFhA+kvZ22feztxvNpbcyKJKf0bQX4S9oVvG+mPXzwew80r1V1Rx/cCCDJPiM38v+QdmyMtnE31n3v2Z7ruWXC7bTuv1GbcG9gMnn8JsDt0wT3sGa0tQ/Es2ltxD31sQdMB9PapRW0bbyY1m0O7QvZ/wLn0er0f9EC19Hz+oQf9v8bTzHufpJsTQsIXzdm/u+kXUH8TD++/4l228uvjSxzHVr78lPaFUEA+oWc51fVY2lXBv+V1iN1OK237+nAK5Lcs6y+HT9kBqtLQPgDWgMwYfueBu2y7GRH07q9dqyqTWjfAlbFU1Y/YuSbQv8GtWQllzl6hWN7WuW7gXbi+kD/pjTxt2FVjd5kPN0BDq18tpv04MH2tC6pcayYIm8AJHk4rZvqMNpTypvSKtlEGU+Vrw/RTjzbVdVDaV1TM+2TzZJsOGn9P6CVzcS3/NFx427XqPvls6rOqqoDaCfQ/6JdIZ7ONpO+dU/k8UrgJ7R7Tyf23SZVNfpY/0z7bqbxs803XZ7GNe1+n8a0ASFAVb2zqh4P7ES7DeMvJkZNmnSc+npPvvrVqM0Zf9uup3WNzWXboN2v9TdJRm8Sv5J279josfkLVTWR1+NpvRpPA+6oqm+MmUd6w/8G2v2Ns7VZl9C6sz+TZOJK0zh173jaFbaXAP/Rg1j6VaDXVNUjgGcAf5bkadOs+3Ra9/DjgLP6571ovQRfnm7zZtmeuZixfazWS/PbtC/Q36a1V1TVNVX1B1W1Ne2BgPdk6tfezCmvSR5Eu+Xnn2hfDDalHRer/Ole2v21j5l0nD+mp0+Mf+zIuMeOjJvKmtDWPhAHAydMDoSr6j+q6ter6mHAEbSu8LP6uDur6rCq2qYfBzcC50z6ojqxnB9x7+1l49iNVh8vTnINLVDbrV+4murVTOczQz3s+//9tCvUz6mpe4yg3Yt4Ru8VeTRwdm9nLuifJ/wa7V7caa0uAeGHaY3ykiRbAK/n3qdtrgUeluShI9NvTLuH5Pb+zfAPV1E+vgv8QpL9+mXav6Hdd7EyXpxkp355+420BvpntO3bP8leSdZN8gv9Mve2Yy534krAXyZZP8ketKfNPjLm/CcBr02yWV/nH4+Mm7h34nqAJIfQrhBOuBbYdtKl7Y2Bm6rqx0l2o92sO5s3JNkgyZOB3wE+1svmJODNSTbuwemfcW99mIv75LOv60VJHtoPrltpXU/T+UXgT3r5Po92QJ1arVv0c8DbkmySZJ0kj0wyl9sWrqXdXjDXY3DKPM1h/pP6/Nsm2Yz2jXJKSXYAHlRVUz4tl+T/9ivDEw8e/Jh7y/NaYPT9gOPU132TPKnvrzfRGrmxrrT2evNx4MgkD0l71cTBY8x6Ee0hn3cneUZPey+t/j28b+eSJAeMrOsbfTvfxtyuDk74AO3+tr1nm7CqPkz7wvv5JI8cs+6dCDyLFhSeMJGY5HeS/FI/0dxCu+o5Xf0/HTgIuLifXL5Eu9/t8mq39Uxl8j5fGdO2j0m2THJAD3J+Qrti9vO+jc8baUNvprVjU23j9T193PxuQDsXXA/cnfYKkLFeyTKViW2iXdVep2/fxOtPvkTbN3+S9vqRiStD/9P/n0AL5rfpV6Vew327Taeyure199HLZuLc+6D+eXT8tsBvMcVtIUke38t3CXAMcPJEV+tEmaXZnXYb1xEzZOVUJt36NEPePkMLPnfuf6+nXY3ceaQXcdSJwO5Jnt4DxlfTgvSJ9vZoWvu+f01z21GSX6Q9GHhkT7qc9jTxRrR7Sy8byfPjaQ+WTGt1CQj/jtZFcT4tqj23p01cAv4wcFla18HWwJ/TAo7baN8MP7oqMlFVtwB/ROvWuZp2klvZdyB+gHawXkM7CfxJX9eVtO6gv6Y1MlfSrq6MtU96I70/sA+tEr0HOGjkHoPZvIHWPXA57QRzz4mtqi6mney+QTvQH017unXC/9BOpNckuaGn/RHwxiS30Q6E2b4NXkNrsH9Af03ASN7/mFb2l9FurP8Q7QbcuZoqny8BlqfdavAK2pWe6ZwJ7Egr3zcDz62qG/u4g2gniYv7dvwH9+3un83Ey8JvTDLTfYxzydM4Jh5Q+BbtOPv4DNPux8zB5iZ9eTfT6tKNtG5JaN9sd+rH7H+NWV8/RGucb6I1Xi+ew3ZBu6K9Ea1uHQf8+zgzVdW3aCfJ9/UT/b/QrnZ/rtfnM4AnTJrtBNpxcSJAkvcmee+Y6/sZ7RjZfMzpj6d9mfyftNdOzFj3ettyLi0Y+srIonak3WN1O+3Yfk9VfXGa1X6d9tDdxNXAi2kB/3RXB6GV23PT3u/2zhmmm9Us7eM6tMDlB7S68lTuvSjwf4Ezk9xO24evqineU9m7g98MfK3X0d1nyc9ttLb7JFqZ/y73dsU/EC+hdRkeTbsX7E7uvcr5U9prZQ6idfH9Hu0BvYnbBv6N9jqVC2g9N6f0tOmsdm1t2jsCn3z/xdzjTlo9hXYFeHJA9BLgG1X1vSnm/RdauX2Htt1/MDLukbS6/SNaMHl4Vc308u1jgBf1L1Ez5q3aPZrXTPzRvnTd1YdHX+S+fZ/+O7Q27r09nwfQ3rzw0x6cv5wWWF6T6d/X/E+0+1Yn8vMPtCv7V9IeJpt4/cz+tIeXZuxxmXh6Slow/erQiVU17tXQBZfkpbQnJp+02HmZsNB5SnuR8ruq3SA/3+s6jvaU+0zvBFttJDmI9uqP1aZ+jEpyLO3BpjWiPDU/1oS2dnWX5EPASVX1X4udlwcqyZm0p8cvnGm61fJnmyStFr5Ee5JQI9Ju//gj2lXO1U6/ivhs2v1/klZCVY1z+9Nqraom93BMaXXpMpa0EnLfn3Ia/ZupW2ZGVfWP0927slCSPHm6bVuk/OxF68K8lta9tlpJ8iZaN+Jbq+ryxc6PpDWHXcaSJEkD5xVCSZKkgTMglCRJGri18qGSLbbYopYuXbrY2ZAkSZrVOeecc0NVrewPYayUtTIgXLp0KWefffbsE0qSJC2yJOP81Oa8sstYkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGngDAglSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGng1srfMpZW1tLDT1mU9S4/ar9FWa8kadi8QihJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHDzFhAm2S7JF5NcnOSiJK/q6UcmuTrJef1v35F5XptkWZLvJNlrJH3vnrYsyeHzlWdJkqQhWm8el3038JqqOjfJxsA5SU7r495RVf80OnGSnYAXAI8CtgY+n+SX++h3A78NXAWcleTkqrp4HvMuSZI0GPMWEFbVCmBFH74tySXANjPMcgDwkar6CXB5kmXAbn3csqq6DCDJR/q0BoSSJEmrwILcQ5hkKfA44MyedFiS85Mcm2SznrYNcOXIbFf1tOnSJ6/j0CRnJzn7+uuvX8VbIEmStPaa94AwyUbAfwKvrqpbgaOBRwI7064gvm1VrKeqjqmqXatq1yVLlqyKRUqSJA3CfN5DSJL1acHgB6vq4wBVde3I+PcBn+4frwa2G5l9257GDOmSJElaSfP5lHGA9wOXVNXbR9K3GpnsWcCFffhk4AVJHpRkB2BH4JvAWcCOSXZIsgHtwZOT5yvfkiRJQzOfVwifCLwEuCDJeT3tr4EXJtkZKGA58HKAqrooyUm0h0XuBl5ZVT8DSHIY8FlgXeDYqrpoHvMtSZI0KPP5lPFXgUwx6tQZ5nkz8OYp0k+daT5JkiQ9cP5SiSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA3cvAWESbZL8sUkFye5KMmrevrmSU5Lcmn/v1lPT5J3JlmW5Pwku4ws6+A+/aVJDp6vPEuSJA3RfF4hvBt4TVXtBOwOvDLJTsDhwBeqakfgC/0zwD7Ajv3vUOBoaAEkcATwBGA34IiJIFKSJEkrb94CwqpaUVXn9uHbgEuAbYADgOP7ZMcDz+zDBwAnVHMGsGmSrYC9gNOq6qaquhk4Ddh7vvItSZI0NAtyD2GSpcDjgDOBLatqRR91DbBlH94GuHJktqt62nTpkiRJWgXWm+8VJNkI+E/g1VV1a5J7xlVVJalVtJ5DaV3NbL/99qtikZLWUksPP2XB17n8qP0WfJ2SNK55vUKYZH1aMPjBqvp4T762dwXT/1/X068GthuZfdueNl36fVTVMVW1a1XtumTJklW7IZIkSWux+XzKOMD7gUuq6u0jo04GJp4UPhj45Ej6Qf1p492BW3rX8meBPZNs1h8m2bOnSZIkaRWYzy7jJwIvAS5Icl5P+2vgKOCkJC8DrgAO7ONOBfYFlgF3AIcAVNVNSd4EnNWne2NV3TSP+ZYkSRqUeQsIq+qrQKYZ/bQppi/gldMs61jg2FWXO0mSJE3wl0okSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGngDAglSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFCSJGngDAglSZIGzoBQkiRp4NZb7AysyZYefsqirHf5UfstynolSdLaySuEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJAzdrQJjkiUk27MMvTvL2JA+f/6xJkiRpIYxzhfBo4I4kjwVeA3wPOGFecyVJkqQFM05AeHdVFXAA8K6qejew8fxmS5IkSQtlvTGmuS3Ja4EXA09Jsg6w/vxmS5IkSQtlnCuEzwd+Arysqq4BtgXeOq+5kiRJ0oIZJyB8HvDvVfUVgKr6flXNeg9hkmOTXJfkwpG0I5NcneS8/rfvyLjXJlmW5DtJ9hpJ37unLUty+Nw2T5IkSbMZJyDcEjgryUk9OMuYyz4O2HuK9HdU1c7971SAJDsBLwAe1ed5T5J1k6wLvBvYB9gJeGGfVpIkSavIrAFhVf0NsCPwfuClwKVJ/j7JI2eZ78vATWPm4wDgI1X1k6q6HFgG7Nb/llXVZVX1U+AjfVpJkiStIuM8VEJVVZJrgGuAu4HNgP9IclpV/eUc13lYkoOAs4HXVNXNwDbAGSPTXNXTAK6clP6EqRaa5FDgUIDtt99+jlmSVg9LDz9lUda7/Kj9FmW9kqTVwzgvpn5VknOAfwS+Bjy6qv4QeDzwnDmu72jgkcDOwArgbXOcf1pVdUxV7VpVuy5ZsmRVLVaSJGmtN84Vws2BZ1fVFaOJVfXzJL8zl5VV1bUTw0neB3y6f7wa2G5k0m17GjOkS5IkaRUY5x7CI4DtkhwCkGRJkh36uEvmsrIkW418fBYw8QTyycALkjyoL3tH4JvAWcCOSXZIsgHtwZOT57JOSZIkzWzWK4RJjgB2BX4F+HfaS6lPBJ44y3wfBvYAtkhyFXAEsEeSnYEClgMvB6iqi5KcBFxMu0fxlVX1s76cw4DPAusCx1bVRXPeSkmSJE1rnC7jZwGPA84FqKofJJn1p+uq6oVTJL9/hunfDLx5ivRTgVPHyKckSZIegHHeQ/jT/lvGBZBkw/nNkiRJkhbSOAHhSUn+Ddg0yR8Anwf+3/xmS5IkSQtlnC7jtwFPB26l3Uf4euDL85kpSZIkLZxxAsL3V9XvAacBJNmIdk/f0+YzY5IkSVoY43QZX53kPQBJNgM+R3vKWJIkSWuBcd5D+LfA7UneSwsG31ZV/z7vOZMkSdKCmLbLOMmzRz6eCfwt7WXRleTZVfXx+c6cJEmS5t9M9xDuP+nz/9JeSr0/7RU0BoQDsvTwUxZlvcuP2m9R1itJ0pBMGxBW1SELmRFJkiQtjnEeKpEkSdJazIBQkiRp4AwIJUmSBm7WF1MneRDwHGDp6PRV9cb5y5YkSZIWyji/VPJJ4BbgHOAn85sdSZIkLbRxAsJtq2rvec+JJEmSFsU49xB+Pcmj5z0nkiRJWhTjXCF8EvDSJJfTuowDVFU9Zl5zJkmSpAUxTkC4z7znQpIkSYtm1oCwqq5YiIxIkiRpcfgeQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIGbt4AwybFJrkty4Uja5klOS3Jp/79ZT0+SdyZZluT8JLuMzHNwn/7SJAfPV34lSZKGaj6vEB4H7D0p7XDgC1W1I/CF/hlgH2DH/ncocDS0ABI4AngCsBtwxEQQKUmSpFVj3gLCqsAkl04AAA1hSURBVPoycNOk5AOA4/vw8cAzR9JPqOYMYNMkWwF7AadV1U1VdTNwGvcPMiVJkrQSFvoewi2rakUfvgbYsg9vA1w5Mt1VPW26dEmSJK0ii/ZQSVUVUKtqeUkOTXJ2krOvv/76VbVYSZKktd5CB4TX9q5g+v/revrVwHYj023b06ZLv5+qOqaqdq2qXZcsWbLKMy5JkrS2WuiA8GRg4knhg4FPjqQf1J823h24pXctfxbYM8lm/WGSPXuaJEmSVpH15mvBST4M7AFskeQq2tPCRwEnJXkZcAVwYJ/8VGBfYBlwB3AIQFXdlORNwFl9ujdW1eQHVSRJkrQS5i0grKoXTjPqaVNMW8Arp1nOscCxqzBrkiRJGuEvlUiSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sAZEEqSJA2cAaEkSdLAGRBKkiQNnAGhJEnSwBkQSpIkDZwBoSRJ0sCtt9gZkLT4lh5+yqKsd/lR+y3KeiVJ9+UVQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBMyCUJEkaOANCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRq4RQkIkyxPckGS85Kc3dM2T3Jakkv7/816epK8M8myJOcn2WUx8ixJkrS2WswrhL9VVTtX1a798+HAF6pqR+AL/TPAPsCO/e9Q4OgFz6kkSdJabHXqMj4AOL4PHw88cyT9hGrOADZNstViZFCSJGlttFgBYQGfS3JOkkN72pZVtaIPXwNs2Ye3Aa4cmfeqnnYfSQ5NcnaSs6+//vr5yrckSdJaZ71FWu+TqurqJL8InJbk26Mjq6qS1FwWWFXHAMcA7LrrrnOaV5IkacgW5QphVV3d/18HfALYDbh2oiu4/7+uT341sN3I7Nv2NEmSJK0CCx4QJtkwycYTw8CewIXAycDBfbKDgU/24ZOBg/rTxrsDt4x0LUuSJGklLUaX8ZbAJ5JMrP9DVfXfSc4CTkryMuAK4MA+/anAvsAy4A7gkIXPsiRpLpYefsqirHf5UfstynqlNd2CB4RVdRnw2CnSbwSeNkV6Aa9cgKxJkiQN0ur02hlJkiQtAgNCSZKkgTMglCRJGjgDQkmSpIEzIJQkSRo4A0JJkqSBW6yfrpOkRXtXnSTpvrxCKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHC+h1CS1mK+61HSOLxCKEmSNHAGhJIkSQNnl7FWa3Z3SZI0/7xCKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQPni6klSVpJi/US/eVH7bco69Xax4BQkiSt9gy655cBoSRpreHPXUoPjAHhGsgGT5IkrUo+VCJJkjRwBoSSJEkDZ5exJC0Ab/XQfFiMejWUhyyGxiuEkiRJA2dAKEmSNHAGhJIkSQNnQChJkjRwBoSSJEkDt8YEhEn2TvKdJMuSHL7Y+ZEkSVpbrBEBYZJ1gXcD+wA7AS9MstPi5kqSJGntsKa8h3A3YFlVXQaQ5CPAAcDFi5orSZIGxndqrp3WiCuEwDbAlSOfr+ppkiRJWklryhXCWSU5FDi0f7w9yXcWYLVbADcswHrWdJbT+Cyr8VhO47OsxmdZjWdQ5ZS3rNTs45bVw1dqLavAmhIQXg1sN/J52552j6o6BjhmITOV5Oyq2nUh17kmspzGZ1mNx3Ian2U1PstqPJbT+NakslpTuozPAnZMskOSDYAXACcvcp4kSZLWCmvEFcKqujvJYcBngXWBY6vqokXOliRJ0lphjQgIAarqVODUxc7HJAvaRb0Gs5zGZ1mNx3Ian2U1PstqPJbT+NaYskpVLXYeJEmStIjWlHsIJUmSNE8MCB8Af0bvXkm2S/LFJBcnuSjJq3r6kUmuTnJe/9t3ZJ7X9rL7TpK9Fi/3Cy/J8iQX9DI5u6dtnuS0JJf2/5v19CR5Zy+r85Pssri5XzhJfmWk7pyX5NYkr7ZeQZJjk1yX5MKRtDnXoSQH9+kvTXLwYmzLfJumrN6a5Nu9PD6RZNOevjTJnSN1670j8zy+H7fLenlmMbZnPk1TVnM+3tb28+M05fTRkTJanuS8nr5m1amq8m8Of7SHWr4HPALYAPgWsNNi52sRy2MrYJc+vDHwXdrPCx4J/PkU0+/Uy+xBwA69LNdd7O1YwPJaDmwxKe0fgcP78OHAW/rwvsBngAC7A2cudv4XqczWBa6hvadr8PUKeAqwC3DhA61DwObAZf3/Zn14s8XetgUqqz2B9frwW0bKaunodJOW881efunluc9ib9sCldWcjrchnB+nKqdJ498GvH5NrFNeIZy7e35Gr6p+Ckz8jN4gVdWKqjq3D98GXMLMvyJzAPCRqvpJVV0OLKOV6ZAdABzfh48HnjmSfkI1ZwCbJtlqMTK4yJ4GfK+qrphhmsHUq6r6MnDTpOS51qG9gNOq6qaquhk4Ddh7/nO/sKYqq6r6XFXd3T+eQXuv7bR6eW1SVWdUO5OfwL3lu9aYpl5NZ7rjba0/P85UTv0q34HAh2daxupapwwI586f0ZtGkqXA44Aze9JhvVvm2IkuLCy/Aj6X5Jy0X9cB2LKqVvTha4At+/DQy2rCC7hvA2u9ur+51qGhl9eE36NdnZmwQ5L/TXJ6kif3tG1o5TNhaGU1l+Nt6PXqycC1VXXpSNoaU6cMCLVKJNkI+E/g1VV1K3A08EhgZ2AF7TK64ElVtQuwD/DKJE8ZHdm/Lfrof5f2IvpnAB/rSdarWViHxpPkdcDdwAd70gpg+6p6HPBnwIeSbLJY+VtNeLzNzQu575fXNapOGRDO3aw/ozc0SdanBYMfrKqPA1TVtVX1s6r6OfA+7u2+G3T5VdXV/f91wCdo5XLtRFdw/39dn3zQZdXtA5xbVdeC9WoGc61Dgy6vJC8Ffgd4UQ+g6d2fN/bhc2j3wv0yrVxGu5UHU1YP4HgbbL1Ksh7wbOCjE2lrWp0yIJw7f0ZvRL9n4v3AJVX19pH00XvdngVMPJF1MvCCJA9KsgOwI+3m2rVekg2TbDwxTLu5/UJamUw85Xkw8Mk+fDJwUH9SdHfglpFuwaG4zzdu69W05lqHPgvsmWSz3g24Z09b6yXZG/hL4BlVdcdI+pIk6/bhR9Dq0GW9vG5Nsntv7w7i3vJdqz2A423I58enA9+uqnu6gte4OrXYT7WsiX+0J/e+S4v2X7fY+VnksngSrXvqfOC8/rcv8AHggp5+MrDVyDyv62X3HVaDJ6sWsKweQXvq7lvARRN1B3gY8AXgUuDzwOY9PcC7e1ldAOy62NuwwOW1IXAj8NCRtMHXK1qAvAK4i3bv0cseSB2i3T+3rP8dstjbtYBltYx2n9tEe/XePu1z+nF5HnAusP/IcnalBUPfA95F/1GHtelvmrKa8/G2tp8fpyqnnn4c8IpJ065RdcpfKpEkSRo4u4wlSZIGzoBQkiRp4AwIJUmSBs6AUJIkaeAMCCVJkgbOgFDSWi/J7bOM3zTJHy1APt6Y5OmzTLNHkt+c77xI0igDQkmCTYF5Dwir6vVV9flZJtsDMCCUtKAMCCWtMZIsTXJJkvcluSjJ55I8eIrpdkjyjSQXJPm7kfSNknwhybl93AF91FHAI5Ocl+StM0w3eT23J3lHz8sXkizp6TsnOSPJ+Uk+0X8NhCTHJXluH16e5A0j6/jVJEuBVwB/2vPy5CTPS3Jhkm8l+fKqLE9JmmBAKGlNsyPw7qp6FPBD2q8BTPYvwNFV9WjarwpM+DHwrKraBfgt4G39p6MOB75XVTtX1V/MMN1kGwJn97ycDhzR008A/qqqHkP7pYcjppgX4Ia+jqOBP6+q5cB7gXf0vHwFeD2wV1U9FnjGrKUjSQ+AAaGkNc3lVXVeHz4HWDrFNE/k3t9A/sBIeoC/T3I+7SfetgG2nGL+caf7Off+mP2JwJOSPBTYtKpO7+nHA0+ZZls+Pst2AHwNOC7JHwDrTjONJK2U9RY7A5I0Rz8ZGf4ZcL8u426q3+V8EbAEeHxV3ZVkOfALKzHdOOucycS2/Ixp2uOqekWSJwD7AeckeXxV3TjH9UjSjLxCKGlt9DXgBX34RSPpDwWu60HebwEP7+m3ARuPMd1k6wDP7cO/C3y1qm4Bbk7y5J7+Elp38rjuk5ckj6yqM6vq9cD1wHZzWJYkjcUrhJLWRq8CPpTkr4BPjqR/EPhUkguAs4FvA1TVjUm+luRC4DPAW6aabgo/AnZL8jfAdcDze/rBwHuTPAS4DDhkDnn/FPAf/UGWP6Y9YLIjrRv7C8C35rAsSRpLqubawyFJgvaUcVVttNj5kKSVZZexJEnSwHmFUJIkaeC8QihJkjRwBoSSJEkDZ0AoSZI0cAaEkiRJA2dAKEmSNHAGhJIkSQP3/wOHXATG1obzCgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plot_data_points_hist(d_real)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First we generate a dictionnary with cumulative probability based on frequency of delays, for each keys in our reference dictionnary." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def cumul_distri_probas_dict(dico):\n", " list_tot_points = []\n", " for key in dico:\n", " distrib = dico[key]\n", "\n", " # get total number of elements \n", " N = np.sum(distrib)\n", "\n", " # make cumulative distribution probabilities\n", " cdf_distrib = np.empty((len(distrib)), dtype=float)\n", " save_x = 0\n", " for x in range(len(distrib)):\n", " cdf_distrib[x] = float(distrib[x])/float(N) + float(save_x)/float(N)\n", " save_x += distrib[x]\n", "\n", " dico[key] = cdf_distrib\n", " return dico" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('1286.TA.26-32-j19-1.12.H__8591182',\n", " array([0. , 0.64333333, 0.81333333, 0.90333333, 0.95555556,\n", " 0.96888889, 0.98444444, 0.99611111, 0.99777778, 0.99888889,\n", " 0.99888889, 0.99944444, 0.99944444, 0.99944444, 0.99944444,\n", " 0.99944444, 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. ])),\n", " ('1286.TA.26-32-j19-1.12.H__8591184',\n", " array([5.56483027e-04, 4.24596550e-01, 7.31775181e-01, 8.94268225e-01,\n", " 9.59933222e-01, 9.86644407e-01, 9.93878687e-01, 9.98330551e-01,\n", " 9.98330551e-01, 9.98887034e-01, 9.99443517e-01, 9.99443517e-01,\n", " 9.99443517e-01, 9.99443517e-01, 9.99443517e-01, 9.99443517e-01,\n", " 9.99443517e-01, 9.99443517e-01, 9.99443517e-01, 9.99443517e-01,\n", " 9.99443517e-01, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,\n", " 1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,\n", " 1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00])),\n", " ('1286.TA.26-32-j19-1.12.H__8591195',\n", " array([0. , 0.60166667, 0.84833333, 0.92777778, 0.96333333,\n", " 0.98277778, 0.99166667, 0.99666667, 0.99833333, 0.99888889,\n", " 0.99888889, 0.99888889, 0.99888889, 0.99888889, 0.99888889,\n", " 0.99944444, 0.99944444, 0.99944444, 0.99944444, 0.99944444,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. ]))]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_all_cdp = cumul_distri_probas_dict(d_all)\n", "take(3, d_all_cdp.items())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('10.TA.1-11-B-j19-1.1.R__8590314',\n", " array([0. , 0.25 , 0.5 , 0.625, 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. ])),\n", " ('10.TA.1-11-B-j19-1.1.R__8590317',\n", " array([0. , 0.3, 0.5, 0.7, 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. , 1. ])),\n", " ('10.TA.1-11-B-j19-1.1.R__8594304',\n", " array([0. , 0. , 0.5, 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,\n", " 1. , 1. , 1. , 1. , 1. , 1. ]))]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_real_cdp = cumul_distri_probas_dict(d_real)\n", "take(3, d_real_cdp.items())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# write dictionnary \n", "with gzip.open(\"../data/distributions_cumulative_real.pkl.gz\", \"wb\") as output_file:\n", " pickle.dump(d_real_cdp, output_file)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'd' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# write dictionnary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mgzip\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"../data/distributions_cumulative.pickle\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"wb\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0moutput_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'd' is not defined" ] } ], "source": [ "# write dictionnary \n", "with gzip.open(\"../data/distributions_cumulative.pickle\", \"wb\") as output_file:\n", " pickle.dump(d, output_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Construct recovery tables \n", "\n", "First approach is to simple sum up similar distribution to get a new distribution we can use. For that, we need to have transport type (`route_desc`), `time` (rounded to hour) and `stop_id` which are valid. We then make all combination of these tree parameters and get the associate distributions" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_idroute_descstop_id_rawsequence_shift_1
00001NaT2020-05-21 07:18:0030-57-Y-j19-14.TA.30-57-Y-j19-1.1.H8502208Bus85022082
100122020-05-21 07:23:002020-05-21 07:23:0030-57-Y-j19-14.TA.30-57-Y-j19-1.1.H8502209Bus85022093
200232020-05-21 07:29:00NaT30-57-Y-j19-14.TA.30-57-Y-j19-1.1.H8503202Bus85032021
30101NaT2020-05-21 07:48:0030-57-Y-j19-15.TA.30-57-Y-j19-1.1.H8502208Bus85022082
401122020-05-21 07:53:002020-05-21 07:53:0030-57-Y-j19-15.TA.30-57-Y-j19-1.1.H8502209Bus85022093
\n", "
" ], "text/plain": [ " route_int trip_int stop_int stop_sequence arrival_time \\\n", "0 0 0 0 1 NaT \n", "1 0 0 1 2 2020-05-21 07:23:00 \n", "2 0 0 2 3 2020-05-21 07:29:00 \n", "3 0 1 0 1 NaT \n", "4 0 1 1 2 2020-05-21 07:53:00 \n", "\n", " departure_time route_id trip_id stop_id \\\n", "0 2020-05-21 07:18:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502208 \n", "1 2020-05-21 07:23:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502209 \n", "2 NaT 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8503202 \n", "3 2020-05-21 07:48:00 30-57-Y-j19-1 5.TA.30-57-Y-j19-1.1.H 8502208 \n", "4 2020-05-21 07:53:00 30-57-Y-j19-1 5.TA.30-57-Y-j19-1.1.H 8502209 \n", "\n", " route_desc stop_id_raw sequence_shift_1 \n", "0 Bus 8502208 2 \n", "1 Bus 8502209 3 \n", "2 Bus 8503202 1 \n", "3 Bus 8502208 2 \n", "4 Bus 8502209 3 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"../data/stop_times_df.pkl\", \"rb\") as input_file:\n", " stoptimes = pickle.load(input_file)\n", " \n", "stoptimes.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.0%, 4.07%, 8.14%, 12.21%, 16.28%, 20.35%, 24.42%, 28.49%, 32.55%, 36.62%, 40.69%, 44.76%, 48.83%, 52.9%, 56.97%, 61.04%, 65.11%, 69.18%, 73.25%, 77.32%, 81.39%, 85.46%, 89.53%, 93.6%, 97.66%, " ] } ], "source": [ "# Set same stoptimes index as distribution dict \n", "stoptimes['stop_id'] = stoptimes['stop_id'].astype(str).str[0:7]\n", "stoptimes['key'] = stoptimes['trip_id'] + '__' + stoptimes['stop_id']\n", "stoptimes = stoptimes.set_index('key')\n", "\n", "stoptimes = stoptimes[['trip_id','stop_id', 'route_desc', 'arrival_time', 'departure_time']]\n", "\n", "list_hours = []\n", "size_stop_times = stoptimes.shape[0]\n", "for x in range(size_stop_times):\n", " if (x % 10000) == 0 :\n", " print('{}%'.format(round(100*x/size_stop_times,2)), end = ', ')\n", " \n", " arr_time_hour = pd.to_datetime(stoptimes.iloc[x,:]['arrival_time']).hour\n", " if math.isnan(arr_time_hour): # if arrival is NaT, use departure time\n", " arr_time_hour = pd.to_datetime(stoptimes.iloc[x,:]['departure_time']).hour\n", " list_hours.append(int(arr_time_hour))\n", " \n", "stoptimes['hour'] = list_hours\n", "stoptimes = stoptimes.drop(columns=['trip_id', 'arrival_time', 'departure_time'])\n", "\n", "# Write this pickle to avoid re-running this above code all the time\n", "with gzip.open(\"../data/stop_times_wHour.pkl\", \"wb\") as output_file:\n", " pickle.dump(stoptimes, output_file) \n", " " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(17321, 32)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...22232425262728293031
stop_idhourroute_desc
85009268.0Bus0233444444...4444444444
9.0Bus0122222222...2222222223
10.0Bus0011111222...2222222222
11.0Bus0011122222...2222222222
\n", "

4 rows × 32 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 ... 22 23 \\\n", "stop_id hour route_desc ... \n", "8500926 8.0 Bus 0 2 3 3 4 4 4 4 4 4 ... 4 4 \n", " 9.0 Bus 0 1 2 2 2 2 2 2 2 2 ... 2 2 \n", " 10.0 Bus 0 0 1 1 1 1 1 2 2 2 ... 2 2 \n", " 11.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n", "\n", " 24 25 26 27 28 29 30 31 \n", "stop_id hour route_desc \n", "8500926 8.0 Bus 4 4 4 4 4 4 4 4 \n", " 9.0 Bus 2 2 2 2 2 2 2 3 \n", " 10.0 Bus 2 2 2 2 2 2 2 2 \n", " 11.0 Bus 2 2 2 2 2 2 2 2 \n", "\n", "[4 rows x 32 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with gzip.open(\"../data/stop_times_wHour.pkl\", \"rb\") as input_file:\n", " stoptimes = pickle.load(input_file)\n", " \n", "distrib_df = pd.DataFrame(d_all).transpose()\n", "distrib_to_rm = np.array(distrib_df.iloc[:,range(11)].sum(axis=1) == 11) # missing trips\n", "distrib_df = distrib_df.iloc[~distrib_to_rm,:]\n", "\n", "stoptimes_df = pd.DataFrame(stoptimes)\n", "\n", "recovery_df = distrib_df.join(stoptimes_df)\n", "list_bins = [x for x in range(32)]\n", "\n", "recovery_df = recovery_df.groupby(['stop_id','hour', 'route_desc'])[list_bins].apply(lambda x : x.astype(float).sum())\n", "recovery_df = recovery_df.astype('int')\n", "print(recovery_df.shape)\n", "recovery_df.head(4)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "def plot_df_missing(df, max_bin = 10000):\n", " tot_per_key = np.array(df.sum(axis=1)).astype('int')\n", " binwidth = 100\n", " n_keys_less_than_binwidth = np.sum(np.array(tot_per_key < binwidth))\n", " perc_key_to_recover = round(100 * ( n_keys_less_than_binwidth / len(tot_per_key) ), 2)\n", " plt.figure(figsize = (10,5))\n", " plt.hist(tot_per_key, bins = range(min(tot_per_key), max_bin + binwidth, binwidth))\n", " plt.title(\"Total number of data points per stop_id / hour key. N keys with less than {0} points: {1} ({2}%)\"\\\n", " .format(binwidth, n_keys_less_than_binwidth, perc_key_to_recover))\n", " plt.xlabel('n data points')\n", " plt.ylabel('n keys')\n", " return plt.show()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAn8AAAFNCAYAAABxFAnAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd5xtZX3v8c9XmgWlCDFSwkFFE4xXokQxlqAkgqJirEQENOZyLUmMN4mCURFLgjHW2KKigg2RWFDxKkEFKwoWqsihhSqHXkSl/O4fzzOwz5wpe2DmzJyzPu/Xa16z97PWXutZ/buetdbeqSokSZI0DHdZ7ApIkiRp9TH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKADCb8JakkD1gC9fhWkr9epHHfLcmXklyT5LNj9L9zkgtXR93ujCR7Jfn6YtdjTZfk1Uk+PEP385L82R0c9mKu9y9I8p3FGPc4knwsyZsWux4zmW0bm+u+YjHXhz7+JT/PV6ckH0jy2sWux2JKskGS05Pcd7Hrcmck+WGSB8/W36KHvyTXj/zdmuTGkfd7TfOZNSKULEHPAu4D3Luqnj2fA17MA2xVfbKqnjhOv0s9CEwlybJ+8rLuQo6nqv6lqu7UATnJmUkeOF91WtP09auSvHJS+YVJdl6kat1pk7expXIyPY7F3uaTvDHJKUluTvL6Kbo/L8n5SW5I8oUkm4502zTJ53u385M8byHqWFUvrqo3jtPvfAbnJK9Ick6Sa5NcnOQdo/u52ebdSH8fmbxO9v3m0UmuSnJpkvfMsg/dDzi+qi7pn398km/2xpLzphjnDkm+3btfOFN4TvKHSb6W5PIkq3y58qQcdH2SW5L8R++2dZIfJLkyydsmfe6rSXacNLh/B94ww3QCSyD8VdWGE3/A/wBPHSn75GLXb6lKM9fltw3wi6q6eSHqpPEtdJBbDEnuD6xTVb9YxDoshfl6JfDKJPdc7IpoSVgOvBL4yuQOvYXmP4G9aSfmvwLeN9LLe4Hf9m57Ae8fp1VnDXIU8LCquhfwh8BDgb8b6T7tvJuQ5DHA/afo9D7gMuC+wA7AnwIvnaEuLwY+PvL+BuAjwD9N0/+ngOOBTSeGneRp0/R7E3AE8KKpOk7KQb8L3AhMXJ07ADgU2BZ4+kTYS/Jc4NyqOnHS4I4CHp/kd6ebUFgC4W86vQn2nf1s4OL+eoMk9wC+CmwxkpK3SPKIJN9PcnWSS3rKX3/McX2rn2F8N8l1Sb6eZLPebZVWxoxc/kry+iSfTfKJ/tlTkjwwyQFJLktyQZLJrVL3T2uavTbJFyed6e2U5Ht9On6WkdaCXs83J/kubSdxvymm5Q96f1cnOW1iZUxyEPA64Ll9nq2yEqZdFv5YP1M6HfjjSd33T3J2n87Tk/zFxDiBDwCP6sO+upfvnuQnfTovmOXMbed+9vTqfnZ0XkZafpNslOSwJCvSzoBfMxF+M+nMPu0M8MVJzurz4b09LE9Xzyf36bkuyUVJ/nGaOr6gryPvSTvb+3mSXSbV8ZC+/l2U5E1J1pn02XckuQJYZV70dfjEPr9+meTtvdPx/f/Vvd6PSnKXPg/O7+vZYUk26sOZaCncL23buWS6aZo0/tcn+cTI+7378K9I8s+zfR7YHTh6hu7bZIptrI/raX19vbqvv38w0m3yGf1tLQ8j682rklwKfHSM6Xxrku/05TXlMkuyftqZ9kNGPvc7SX6VZPNZRnEG8H3g/85Wlynqds+01oZ393X295Mc0+tyZpLn9P7+uK8j64x89hlJftZfT7cuTR7fcUme2V8/us/r3fv7XZL8tL++bRtLMrE+/qyvj88dGd4/9PXxkiQvnMN0/1WSM9L2PV9Lsk0vT99mLuvTckqSP+zdZt1uM802322S5Cv98yeknbxMfO5dafusa5OclOSxI91en+SIvs1d19fbya0vt6mqQ6vqq8B1U3TeC/hSVR1fVdcDrwWe0deDewDPBF5bVddX1XdoB/a9p5mHr09yZJLP9Hr9OMlDR+dFpjg29G5TbVOrLMsk+/U6v7LPzy/18lf1ZXBdX093YQxVdXZVTSyTALcCDxjpPtO8mzjZ+w/gb6fovC1wRFX9uqouBf4fMGVwTvJ7tOPpCSPj/mFVfRw4Z5rqLwM+WVW3VNXZwHemG35VnVlVhwCnTTOsUc+khdZvj0zHN6rqGuBHwP2S3AvYH3j1FOP6NXASsOtMI1my4Q/4Z2AnWmJ/KPAI4DVVdQPwJODikbR8MXAL8ApgM+BRwC7MnPInex7wQuB3gPWBWQ+WI55KO2PYBPgJ8DXavN2S1vz6n5P63wf4K9oZyc3AuwGSbEk7w3kT7WziH4H/mnSw2ZvWPH1P4PzRgSZZD/gS8PU+HX8LfDLJg6rqQOBfgM/0eXbIFNNxIO0M6v60FWffSd3PBh4LbAQcBHwiyX2r6gzaWdP3+7A37v3f0Kd1Y1oweEmSp081A7vfpS2/Lfu4P5jkQb3bf/Tx3o92lrUPbXlN5ym08Pq/gOcAu85Qz0OA/1NV96SdfX5jhuE+ss+HzWjz63O5Pbx/jLY8HwD8EfBE4K8nffYc2ln8m6cY9ruAd/Wz4PvTzhQBHtf/b9zr/X3gBf3v8bR5siHwnknDezywXa/HqzKH+/WSbA+8n7a+bQHcG9hqlo89mRnO0JlmG0u7TPxp4O+BzWkB8ksZ8+SNtt5sSmvZ3m+6ntIC84do68QT+870Y0yxzKrqt8DhwPNHBvGXwLFVtWKMOr0W+PuRdWNWSe4NHAt8t6r+Drg7cAytheF3gD2B9yXZvqp+BFzR6zthb+Cw/nq6dWmy44Cd++s/pa2fjxt5f9zkD1TVRPeH9vXxM/3979K20S1pLRzvTbLJGNO9B+0g9gza8v82bX2gT9/jgAf2YT+nTzeMsd3OsM1Dm58H0fbby1l5m/wR7dizKW3+fzbJXUe6P422fmxMC2STt71xPRj42Uh9z6a19D2w/908qSX9Z0wTMLo9aC1GE/X+QpL1Zjo2TDOcKZdlVX0Q+CTwb31+PrUP42+AP+7LYlfgPGitcpMC9yrSLntfC1xOO9ZPPl7O5BW0S7UnT9HtncCeSe7ej61PogXAqTwEOGeOV8XeCezT5++DaLnjv+fw+ensCxxWt//27qnAnyfZGHg4LUC+EXjnSHCe7AzavJzWUg5/ewFvqKrL+s72IKY54wGoqpOq6gdVdXNVnUdbgf50DuP7aFX9oqpupO0od5jDZ79dVV/rK85naTuwg6vqJtoOYllfcBM+XlWn9iD7WuA5aWfwzweOrqqjq+rWqjoGOJF2UJ3wsao6rU/nTZPqsRMtBBxcVb+tqm8AX6YdtMbxHODNVXVlVV1AD6UTquqzVXVxr9tngLNooXxKVfWtqjql938ybYc+2zJ5bVX9pqqOowWJiXmzJ3BAVV3Xl+/bmGF9oM2Dq6vqf4BvMvPyvAnYPsm9quqqqvrxDP1eRtvoburz4Exg9yT3oS2nv6+qG6rqMuAdvd4TLq6q/+jL7sZp6vGAJJv1M/0fzFCPvYC3V9U5vcXgANqObvSy50G9LqfQWsTGXQ+g3R/65d4i8RvaenrrdD0nuTstbH9rhmFOt409F/hKVR3T1+l/B+4G/MmYdb0VOLCvN1PNV4D1aOvfprRbS341xjI7FPjLJOnv92bly0LTqqqf0oLbq8achi1oQeuzVfWaXvYU4Lyq+mhfZ34C/Bcwcb/uofRw2kPmrrQDPoy/Lh3H7dvk44B/HXk/ZfibwU20ffZNVXU0cD0wXbgY9WLgX6vqjL4P/Rdgh976dxPtRPf3gfR+LhkZ37jb7VQ+X61152ZaoLltH1FVn6iqK/p8fxuwwaRp+U7fT99CWydmPNDOYEPgmkll19CmeUPg2mm6Teekqjqyb0dvB+5KOy7M9dgwl2V5C23+bJ9kvao6r4dYquo7kwL3KqrqU/0k5YG0VtpfztT/hCRbA/+HdkVrKsfTgvK1wIW0Y+kXpul3Y6ZpXZzBl2n7yRuBnwOH9JOyO6yv839K27Yn/Cut0eU42qXs9WknsF9K8qkkxyf5m0mDuo42TdNayuFvC1Zu2Tq/l00p7VLrl9Nu7LyWtgPZbLr+p3DpyOtf0TaUcY2urDcCl/edwsR7Jg3vgpHX59MOTJvRWi6e3Zvlr+5nTI+htRBO9dnJtgAuqKrRg/T5tLO3cWwxRd1uk2SfJD8dqdsfMsM8TvLItEtYK5JcQ9vJz7RMruqBeHT8W/TPrMeq68NM0zWX5flMWgg4P+0y2KNm6PeikTOy0Tpu0+t4ycj8+U/aWfaEmZYdtDPsBwI/T/KjJE+Zod+pto91aa2KU41vxu1nmuHf9vm+XK6Yvnd2Ab7Xg+J0plsmK01LX38vYPz1dkW1Sx0zeQCtVeSgaq16MMsyq6oTej13TvL7fRhHjVknaAell/SQOZvdaYH3AyNl2wCPnLQ/2IvWKgPwCeCp/fLgc2gnoRPBaNx16fvAA3sdd6C1HG6ddkn+Edx+y8E4rpjUcjLufnQb4F0j03gl7RLglj2kvId279tlST7YL3nB3LbbqUy7j0jyj2mXoa/pddqIlfddkz9719yx+02vB+41qexetIP3TN2mM7rN3koLPVsw92PD2MuyqpbTWu1fT1tGhyeZy75mYjhn0Vq13jdbv907aQF1cngm7Zag/wd8DrgHbdltArxlmmFdxcyhevLwN+3DfwMtYG8N7JpkLlcbp7I37cTi3ImC3hjz3Kp6KK1Ff+Iy9/60VsE/A16ckVtl+rTM2OK6lMPfxbSdwoTf62UAqzwtQ7tE9XNgu34W8WraDuTOuoF2+QWA3go12z0/s9l65PXv0c6yLqdtuB+vqo1H/u5RVQeP9D/VtE+4mLbjHl2uvwdcNGa9LpmibsBtZyQfojXv37ufzZ3K7fN4qnp9inaw3LqqNqId2GZaJpv0A9no+C+mzZubWHV9GHe6Rq1Sz6r6UVXtQTvof4HpL5EBbDnSEjRaxwuA3wCbjSy7e1XV6CWamZYdVXVWVf1lr8dbgCP7/Jjqc1NtHzez8onI5GV5MeNbaV3oLXv3nqH/JzPz/X4zWWla+vzdmtuX768Y2Qa5PfxMmHG+dmfQLjl/deRS1zjLbKJ1bW/gyDFC5u2Vqvo57eAzzv2SH6IdTI4e2QYuAI6btD/YsKpe0od/ES28PYNJrZIzrEuT6/gr2v1BLwdO7cH4e7T7Fc+uqsvHnd474QLa5dvR6bxbVX2v1/HdVfVwYHtaoP2nXj7udjvO+nGbtPv7XkkL1Jv0fd01zM/xZLLTGGk1THI/WivaL/rfukm2G+n/ocx839joNnsX2q0aF3Pnjw2jptqHfqqqHkPbjovpQ9Zs1mXqhzemsgvw1t7gMxHGv5/2RPSmtOl7T78icAXt6seTpxnWycC2cwjw9wNuqarDeuvwhbSrfNMNf1z7sHKr32T7AT+oqlNpl6pP7NvsKf39hD9g5HaCqSzl8Pdp4DVJNu9noa+jnelCO8DdO/0G9+6etObd6/tZ+kvmqR6/oJ3V7d7vm3gNbeO8M56fZPt+QH0D7aByC7efye+adtP5XdNuvp3tXqsJEy0Vr+z3IexMux/x8DE/fwRwQJJN+jhHb6KdCCErANJuAP7Dke6/BLbKyvdp3RO4sqp+neQRtHu+ZnNQ2s32j6Vd9vpsnzdHAG9OuxF6G9rB6RMzDWgaK9Wzj2uvJBv1SyXXMsPlTdqB5u/6/H02bSM7ure4fB14W5J7pd1fdv8kY996kOT5STbvZ+cTZ2230ub5raz8gM+ngVck2TbJhtx+P+fo2fpr0+53eTAt+HyG8R0JPCXtnp31aevpTPuLJzHz/X4zOYJ26XyXvo39Ay2Ufa93/ynwvL5N7Mbcbue4TVV9mnZS+N9J7j/mMvsE8Be0AHjYqkOd1UG0eT/jJZjub2i3EXwpyd1ol5UemPbgzXr9748nneEfRgsqD6EFTWDGdWkqx/VxT1zi/dak91P5JVM8cHYHfYC233kw3Pbg1LP76z9Ou4KwHu1E/NfArXPcbqfaN83knrQTqRW08PU6Vm2BG1tfbnelbT/r9v36xIM6n6Tt8x/bw/kbgM9Vu73lBtoyfUOSeyR5NK31eqZbDx6e9uDPurTWuN8AP+DOHxtGrbTskzwoyROSbEBbPjcy8z70Nkn+Osnv9Nfb025fOXak+0zz7oG0MLwDt1+yfyrtcv7lwLm0lvd102672pcW8lbRw9tyRm5j6vuDu9KuDqSPe2Id+kUve17v73dpt69MOfw0d6VdsqUPa4NJ/fwJrSV2yu/g7fPpZdz+sOC5tKd6NwR2pD+Y0sfzcNptJ9NayuHvTbRr9CfTUu2Pe9nEGfWngXPSLhVsQbt5/Hm0JvEPMbcD3bR6k/JLgQ/TzpJuoDWl3xkfp91ofimtyfjv+rguoG3cr6bteC6gneWOtZz6GcBTaQfiy2nN5/v0+TWOg2iXAs6lHRRHWxJOp91n933axv8Q4Lsjn/0G7Yz00iQTrQUvpe24rqOF95la1KDNj6toZ6mfBF48Uve/pc37c2hPVX2K9hj+XE1Vz72B89JuF3gx7dLadE6gPURxOe0G8Wf1s0poZ23rA6f36TiSlS/Zz2Y34LQk19Oa9/esqht768ybge/29X0n2rR/nHZZ7lzaTnfyE2/H0XZoxwL/XlVjfxF2VZ1G29F8itYKeBXTrPdpT19eX+3+yjmrqjNp4eo/aPP1qbT78iYuz768l01c9pzuvp1xxnUo7QD7jSTLmGWZ9W3yx7QTn28DpD0pOdM6Mjq+c2nLaZVWtyn6LdqZ/YXAF2mt3U+k3YN4MW37eAsrn3x+ntba8vm+nkyYcl2aZtTH0QLP8dO8n8rrgUP7+vic2aZtJlX1edp0Hd63wVNp+zBooetDtGVzPu3Wg7f2buNut1Nt8zP5Gq0V9hd9nL9m9ls2ZvIhWiD6S1or8I297hPb2Ytp+7vLaPN99NLhS2m3A1xGO+a9pH9mOl+khZCr+jie0e/bu7PHhlGH0O7vuzrJF2jr48F9uJfSTpAPgNaK2tfB6TwaOCXJDbQrB0ez8hOsM827y6rq0om/3v/lI+v5M2jbwQrafvAm2gMi05n4yp0Jj+vjO5rWingj7bhIVV3bh/8K2rz+KW29nXhi+vfSnoaeuHq2Tf/8xLK7kXaiN2pfevCfpn7/TrvMPTE//xV4Am3d/FLd/pUvTwW+Ve1B2Gll5duXpMXRz0Q/UVXjtnKudkleQHsS9DGLXZeZ9FBzLrBerYbvdEz7QuPNquqVs/a8BkryEdrDOq+ZtedFkORs2mXT+XjSUGuotK/SekBVPX+2frWq3hL3E2CXkXtn1zhJTgBe1C8NT2spfCGqpDXbebSvkVjr9CD9DNrXwCw5ad/RV8z89USSZlHtYbXtF7sed1ZVPXKc/pbyZV9J8yzt54Am/5TQ9UlW+bLQcVXVEdW+T22tkuSNtEs5bx19+m6pSPIt2oNuL6uVn+KUpBl52VeSJGlAbPmTJEkaEMOfJEnSgKyVD3xsttlmtWzZssWuhiRJ0qxOOumky6vqzv6AxNjWyvC3bNkyTjzxxNl7lCRJWmRJzp+9r/njZV9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQNbK3/ZdTMv2/8oqZecdvPsi1ESSJGlVtvxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQBY8/CVZJ8lPkny5v982yQlJlif5TJL1e/kG/f3y3n3ZyDAO6OVnJtl1oessSZK0tlodLX8vB84Yef8W4B1V9QDgKuBFvfxFwFW9/B29P5JsD+wJPBjYDXhfknVWQ70lSZLWOgsa/pJsBewOfLi/D/AE4Mjey6HA0/vrPfp7evddev97AIdX1W+q6lxgOfCIhay3JEnS2mqhW/7eCbwSuLW/vzdwdVXd3N9fCGzZX28JXADQu1/T+7+tfIrPSJIkaQ4WLPwleQpwWVWdtFDjmDS+/ZKcmOTEFStWrI5RSpIkrXEWsuXv0cDTkpwHHE673PsuYOMk6/Z+tgIu6q8vArYG6N03Aq4YLZ/iM7epqg9W1Y5VtePmm28+/1MjSZK0Fliw8FdVB1TVVlW1jPbAxjeqai/gm8Czem/7Al/sr4/q7+ndv1FV1cv37E8DbwtsB/xwoeotSZK0Nlt39l7m3auAw5O8CfgJcEgvPwT4eJLlwJW0wEhVnZbkCOB04GbgZVV1y+qvtiRJ0ppvtYS/qvoW8K3++hymeFq3qn4NPHuaz78ZePPC1VCSJGkY/IUPSZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IIY/SZKkATH8SZIkDYjhT5IkaUDWXewKrMmW7f+Vxa6CJEnSnNjyJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkakAULf0numuSHSX6W5LQkB/XybZOckGR5ks8kWb+Xb9DfL+/dl40M64BefmaSXReqzpIkSWu7hWz5+w3whKp6KLADsFuSnYC3AO+oqgcAVwEv6v2/CLiql7+j90eS7YE9gQcDuwHvS7LOAtZbkiRprbVg4a+a6/vb9fpfAU8AjuzlhwJP76/36O/p3XdJkl5+eFX9pqrOBZYDj1ioekuSJK3NFvSevyTrJPkpcBlwDHA2cHVV3dx7uRDYsr/eErgAoHe/Brj3aPkUn5EkSdIcLGj4q6pbqmoHYCtaa93vL9S4kuyX5MQkJ65YsWKhRiNJkrRGWy1P+1bV1cA3gUcBGyeZ+E3hrYCL+uuLgK0BeveNgCtGy6f4zOg4PlhVO1bVjptvvvmCTIckSdKabt3Ze7ljkmwO3FRVVye5G/DntIc4vgk8Czgc2Bf4Yv/IUf3993v3b1RVJTkK+FSStwNbANsBP1yoei+EZft/ZZWy8w7efRFqIkmShm7Bwh9wX+DQ/mTuXYAjqurLSU4HDk/yJuAnwCG9/0OAjydZDlxJe8KXqjotyRHA6cDNwMuq6pYFrLckSdJaa8HCX1WdDPzRFOXnMMXTulX1a+DZ0wzrzcCb57uOkiRJQ+MvfEiSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgMwa/pI8Osk9+uvnJ3l7km0WvmqSJEmab+O0/L0f+FWShwL/AJwNHLagtZIkSdKCGCf83VxVBewBvKeq3gvcc2GrJUmSpIWw7hj9XJfkAOD5wOOS3AVYb2GrJUmSpIUwTsvfc4HfAC+qqkuBrYC3LmitJEmStCDGafl7NvDRqroKoKr+B+/5kyRJWiON0/J3H+BHSY5IsluSLHSlJEmStDBmDX9V9RpgO+AQ4AXAWUn+Jcn9F7hukiRJmmdjfclzf9r30v53M7AJcGSSf1vAukmSJGmezXrPX5KXA/sAlwMfBv6pqm7qT/2eBbxyYasoSZKk+TLOAx+bAs+oqvNHC6vq1iRPWZhqSZIkaSGMc8/fgcDWSV4IkGTzJNv2bmcscP0kSZI0j8b5bd8DgVcBB/Si9YBPLGSlJEmStDDGeeDjL4CnATcAVNXF+PNukiRJa6Rxwt9v+9O+BZDkHgtbJUmSJC2UccLfEUn+E9g4yf8G/pv21K8kSZLWMOM87fs24M+Aa4EHAa8Djl/ISkmSJGlhjBP+DqmqvwKOAUiyIXA0sMtCVkySJEnzb5zLvhcleR9Akk2Ar+PTvpIkSWukcb7n77XA9Uk+QAt+b6uqjy54zSRJkjTvpr3sm+QZI29PAF4L/BCoJM+oqs8tdOUkSZI0v2a65++pk97/hPYFz0+lfe2L4U+SJGkNM234q6oXrs6KCJbt/5VVys47ePdFqIkkSVpbjfPAhyRJktYShj9JkqQBMfxJkiQNyKxf8pxkA+CZwLLR/qvqDQtXLUmSJC2EcX7h44vANcBJwG8WtjqSJElaSOOEv62qarcFr4kkSZIW3Dj3/H0vyUPmOuAkWyf5ZpLTk5yW5OW9fNMkxyQ5q//fpJcnybuTLE9ycpKHjQxr397/WUn2nWtdJEmS1IwT/h4DnJTkzB7KTkly8hifuxn4h6raHtgJeFmS7YH9gWOrajvg2P4e4EnAdv1vP+D90MIicCDwSOARwIETgVGSJElzM85l3yfdkQFX1SXAJf31dUnOALYE9gB27r0dCnwLeFUvP6yqCvhBko2T3Lf3e0xVXQmQ5BhgN+DTd6RekiRJQzZr+Kuq8+/sSJIsA/6I9hvB9+nBEOBS4D799ZbABSMfu7CXTVcuSZKkOVrw7/lLsiHwX8DfV9W1o916K1/N03j2S3JikhNXrFgxH4OUJEla6yxo+EuyHi34fbKqPteLf9kv59L/X9bLLwK2Hvn4Vr1suvKVVNUHq2rHqtpx8803n98JkSRJWkssWPhLEuAQ4IyqevtIp6OAiSd296V9j+BE+T79qd+dgGv65eGvAU9Mskl/0OOJvUySJElzNM4DH3fUo4G9gVOS/LSXvRo4GDgiyYuA84Hn9AhEDfIAAAznSURBVG5HA08GlgO/Al4IUFVXJnkj8KPe3xsmHv6QJEnS3CxY+Kuq7wCZpvMuU/RfwMumGdZHgI/MX+0kSZKGacEf+JAkSdLSYfiTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA3IuotdgaFatv9XFrsKkiRpgGz5kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAsWPhL8pEklyU5daRs0yTHJDmr/9+klyfJu5MsT3JykoeNfGbf3v9ZSfZdqPpKkiQNwUK2/H0M2G1S2f7AsVW1HXBsfw/wJGC7/rcf8H5oYRE4EHgk8AjgwInAKEmSpLlbsPBXVccDV04q3gM4tL8+FHj6SPlh1fwA2DjJfYFdgWOq6sqqugo4hlUDpSRJksa0uu/5u09VXdJfXwrcp7/eErhgpL8Le9l05ZIkSboDFu2Bj6oqoOZreEn2S3JikhNXrFgxX4OVJElaq6zu8PfLfjmX/v+yXn4RsPVIf1v1sunKV1FVH6yqHatqx80333zeKy5JkrQ2WN3h7yhg4ondfYEvjpTv05/63Qm4pl8e/hrwxCSb9Ac9ntjLJEmSdAesu1ADTvJpYGdgsyQX0p7aPRg4IsmLgPOB5/TejwaeDCwHfgW8EKCqrkzyRuBHvb83VNXkh0gkSZI0pgULf1X1l9N02mWKfgt42TTD+QjwkXmsmiRJ0mD5Cx+SJEkDYviTJEkaEMOfJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAEx/EmSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgCzYb/tqaVq2/1dWKTvv4N0XoSaSJGkx2PInSZI0IIY/SZKkATH8SZIkDYjhT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IP7CxxLnL3JIkqT5ZMufJEnSgBj+JEmSBsTwJ0mSNCCGP0mSpAHxgY81kA+BSJKkO8qWP0mSpAGx5W8tYWugJEkahy1/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgbE8CdJkjQghj9JkqQBMfxJkiQNiOFPkiRpQAx/kiRJA2L4kyRJGhDDnyRJ0oAY/iRJkgZk3cWugBbOsv2/sthVkCRJS4wtf5IkSQNiy5+mbCE87+DdF6EmkiRpodnyJ0mSNCC2/GlK090vaIugJElrNsOfFoSXkiVJWpq87CtJkjQgtvxpTmzRkyRpzWb4053m9wlKkrTmWGMu+ybZLcmZSZYn2X+x6yNJkrQmWiNa/pKsA7wX+HPgQuBHSY6qqtMXt2aaizvTQjjVpWUvQUuSNHdrRPgDHgEsr6pzAJIcDuwBGP4GYtzgOG5/hkRJ0lCtKeFvS+CCkfcXAo9cpLpoLbCYIXG+W0AlSZqLNSX8zSrJfsB+/e31Sc5cDaPdDLh8NYxH45vXZZK3zNeQ5sdSq88cuK0sPS6TpcdlsjStjuWyzQIPfyVrSvi7CNh65P1Wvew2VfVB4IOrs1JJTqyqHVfnODUzl8nS5HJZelwmS4/LZGlaG5fLmvK074+A7ZJsm2R9YE/gqEWukyRJ0hpnjWj5q6qbk/wN8DVgHeAjVXXaIldLkiRpjbNGhD+AqjoaOHqx6zHJar3MrLG4TJYml8vS4zJZelwmS9Nat1xSVYtdB0mSJK0ma8o9f5IkSZoHhr87wJ+aW32SbJ3km0lOT3Jakpf38k2THJPkrP5/k16eJO/uy+bkJA8bGda+vf+zkuy7WNO0tkiyTpKfJPlyf79tkhP6vP9MfziLJBv098t792Ujwzigl5+ZZNfFmZK1R5KNkxyZ5OdJzkjyKLeVxZfkFX3/dWqSTye5q9vL6pXkI0kuS3LqSNm8bRtJHp7klP6ZdyfJ6p3COaoq/+bwR3vg5GzgfsD6wM+A7Re7XmvrH3Bf4GH99T2BXwDbA/8G7N/L9wfe0l8/GfgqEGAn4IRevilwTv+/SX+9yWJP35r8B/xf4FPAl/v7I4A9++sPAC/pr18KfKC/3hP4TH+9fd9+NgC27dvVOos9XWvyH3Ao8Nf99frAxm4ri75MtgTOBe7W3x8BvMDtZbUvh8cBDwNOHSmbt20D+GHvN/2zT1rsaZ7pz5a/ubvtp+aq6rfAxE/NaQFU1SVV9eP++jrgDNrOdA/agY7+/+n99R7AYdX8ANg4yX2BXYFjqurKqroKOAbYbTVOylolyVbA7sCH+/sATwCO7L1MXiYTy+pIYJfe/x7A4VX1m6o6F1hO2750ByTZiHaAOwSgqn5bVVfjtrIUrAvcLcm6wN2BS3B7Wa2q6njgyknF87Jt9G73qqofVEuCh40Ma0ky/M3dVD81t+Ui1WVQ+uWPPwJOAO5TVZf0TpcC9+mvp1s+Lrf59U7glcCt/f29gaur6ub+fnT+3jbve/drev8uk/m1LbAC+Gi/HP/hJPfAbWVRVdVFwL8D/0MLfdcAJ+H2shTM17axZX89uXzJMvxpjZBkQ+C/gL+vqmtHu/UzLR9bX02SPAW4rKpOWuy6aCXr0i5rvb+q/gi4gXYp6zZuK6tfv49sD1o43wK4B7akLjlD2zYMf3M360/NaX4lWY8W/D5ZVZ/rxb/sTe30/5f18umWj8tt/jwaeFqS82i3PTwBeBft0sjEd4eOzt/b5n3vvhFwBS6T+XYhcGFVndDfH0kLg24ri+vPgHOrakVV3QR8jrYNub0svvnaNi7qryeXL1mGv7nzp+ZWo36vyyHAGVX19pFORwETT1rtC3xxpHyf/rTWTsA1vVn/a8ATk2zSz8Sf2Ms0R1V1QFVtVVXLaOv/N6pqL+CbwLN6b5OXycSyelbvv3r5nv3pxm2B7Wg3TesOqKpLgQuSPKgX7QKcjtvKYvsfYKckd+/7s4nl4vay+OZl2+jdrk2yU1/G+4wMa2la7CdO1sQ/2pNAv6A9bfXPi12ftfkPeAytKf5k4Kf978m0e2COBc4C/hvYtPcf4L192ZwC7DgyrL+i3SS9HHjhYk/b2vAH7MztT/vej3YwWg58Ftigl9+1v1/eu99v5PP/3JfVmSzxp+PWhD9gB+DEvr18gfZEotvK4i+Xg4CfA6cCH6c9sev2snqXwadp91zeRGslf9F8bhvAjn35ng28h/4jGkv1z1/4kCRJGhAv+0qSJA2I4U+SJGlADH+SJEkDYviTJEkaEMOfJEnSgBj+JK3Vklw/S/eNk7x0NdTjDUn+bJZ+dk7yJwtdF0nDZviTNHQbAwse/qrqdVX137P0tjNg+JO0oAx/ktYISZYlOSPJh5KcluTrSe42RX/bJvl+klOSvGmkfMMkxyb5ce+2R+90MHD/JD9N8tYZ+ps8nuuTvKPX5dgkm/fyHZL8IMnJST7ffwmAJB9L8qz++rwkB42M4/eTLANeDLyi1+WxSZ6d5NQkP0ty/HzOT0nDZfiTtCbZDnhvVT0YuBp45hT9vAt4f1U9hPaN/hN+DfxFVT0MeDzwtv5TTPsDZ1fVDlX1TzP0N9k9gBN7XY4DDuzlhwGvqqr/Rft1gAOn+CzA5X0c7wf+sarOAz4AvKPX5dvA64Bdq+qhwNNmnTuSNAbDn6Q1yblV9dP++iRg2RT9PJr2U07QfkprQoB/SXIy7aectgTuM8Xnx+3vVuAz/fUngMck2QjYuKqO6+WHAo+bZlo+N8t0AHwX+FiS/w2sM00/kjQn6y52BSRpDn4z8voWYJXLvt1Uv1u5F7A58PCquinJebTfUb2j/Y0zzplMTMstTLMvrqoXJ3kksDtwUpKHV9UVcxyPJK3Elj9Ja5vvAnv213uNlG8EXNYD3eOBbXr5dcA9x+hvsrsAz+qvnwd8p6quAa5K8thevjftkvC4VqpLkvtX1QlV9TpgBbD1HIYlSVOy5U/S2ublwKeSvAr44kj5J4EvJTkFOBH4OUBVXZHku0lOBb4KvGWq/qZwA/CIJK8BLgOe28v3BT6Q5O7AOcAL51D3LwFH9odM/pb28Md2tEvRxwI/m8OwJGlKqZrrlQpJUpLrq2rDxa6HJM2Vl30lSZIGxJY/SZKkAbHlT5IkaUAMf5IkSQNi+JMkSRoQw58kSdKAGP4kSZIGxPAnSZI0IP8fb/CXoziWI40AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plot_df_missing(recovery_df)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...22232425262728293031
stop_idhourroute_desc
85009268.0Bus0233444444...4444444444
9.0Bus0122222222...2222222223
10.0Bus0011111222...2222222222
11.0Bus0011122222...2222222222
12.0Bus0011122222...2222222222
13.0Bus0011122222...2222222222
14.0Bus0011222222...2222222222
15.0Bus0011222222...2222222222
16.0Bus0133333444...4444444444
17.0Bus0133333333...4444444444
18.0Bus0233333333...4444444444
19.0Bus0222233333...3333333333
85021868.0S-Bahn0367777777...8888888888
9.0S-Bahn0477777777...8888888888
10.0S-Bahn0477777777...7777788888
11.0S-Bahn0477777777...7777777778
12.0S-Bahn0377777777...7777777778
13.0S-Bahn0377777777...8888888888
14.0S-Bahn0377777777...8888888888
15.0S-Bahn0377777777...7777777788
\n", "

20 rows × 32 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 ... 22 23 \\\n", "stop_id hour route_desc ... \n", "8500926 8.0 Bus 0 2 3 3 4 4 4 4 4 4 ... 4 4 \n", " 9.0 Bus 0 1 2 2 2 2 2 2 2 2 ... 2 2 \n", " 10.0 Bus 0 0 1 1 1 1 1 2 2 2 ... 2 2 \n", " 11.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n", " 12.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n", " 13.0 Bus 0 0 1 1 1 2 2 2 2 2 ... 2 2 \n", " 14.0 Bus 0 0 1 1 2 2 2 2 2 2 ... 2 2 \n", " 15.0 Bus 0 0 1 1 2 2 2 2 2 2 ... 2 2 \n", " 16.0 Bus 0 1 3 3 3 3 3 4 4 4 ... 4 4 \n", " 17.0 Bus 0 1 3 3 3 3 3 3 3 3 ... 4 4 \n", " 18.0 Bus 0 2 3 3 3 3 3 3 3 3 ... 4 4 \n", " 19.0 Bus 0 2 2 2 2 3 3 3 3 3 ... 3 3 \n", "8502186 8.0 S-Bahn 0 3 6 7 7 7 7 7 7 7 ... 8 8 \n", " 9.0 S-Bahn 0 4 7 7 7 7 7 7 7 7 ... 8 8 \n", " 10.0 S-Bahn 0 4 7 7 7 7 7 7 7 7 ... 7 7 \n", " 11.0 S-Bahn 0 4 7 7 7 7 7 7 7 7 ... 7 7 \n", " 12.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 7 7 \n", " 13.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 8 8 \n", " 14.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 8 8 \n", " 15.0 S-Bahn 0 3 7 7 7 7 7 7 7 7 ... 7 7 \n", "\n", " 24 25 26 27 28 29 30 31 \n", "stop_id hour route_desc \n", "8500926 8.0 Bus 4 4 4 4 4 4 4 4 \n", " 9.0 Bus 2 2 2 2 2 2 2 3 \n", " 10.0 Bus 2 2 2 2 2 2 2 2 \n", " 11.0 Bus 2 2 2 2 2 2 2 2 \n", " 12.0 Bus 2 2 2 2 2 2 2 2 \n", " 13.0 Bus 2 2 2 2 2 2 2 2 \n", " 14.0 Bus 2 2 2 2 2 2 2 2 \n", " 15.0 Bus 2 2 2 2 2 2 2 2 \n", " 16.0 Bus 4 4 4 4 4 4 4 4 \n", " 17.0 Bus 4 4 4 4 4 4 4 4 \n", " 18.0 Bus 4 4 4 4 4 4 4 4 \n", " 19.0 Bus 3 3 3 3 3 3 3 3 \n", "8502186 8.0 S-Bahn 8 8 8 8 8 8 8 8 \n", " 9.0 S-Bahn 8 8 8 8 8 8 8 8 \n", " 10.0 S-Bahn 7 7 7 8 8 8 8 8 \n", " 11.0 S-Bahn 7 7 7 7 7 7 7 8 \n", " 12.0 S-Bahn 7 7 7 7 7 7 7 8 \n", " 13.0 S-Bahn 8 8 8 8 8 8 8 8 \n", " 14.0 S-Bahn 8 8 8 8 8 8 8 8 \n", " 15.0 S-Bahn 7 7 7 7 7 7 8 8 \n", "\n", "[20 rows x 32 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recovery_df.head(20)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Make second recovery table\n", "\n", "Here only taking combination of `transport_type x hour`" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(127, 32)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...22232425262728293031
hourroute_desc
7.0Bus057991010101111...11121212121212121212
InterRegio0000011111...2222222222
Intercity0000011111...2222222222
S-Bahn0356788899...99999910101010
\n", "

4 rows × 32 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 ... 22 23 24 25 \\\n", "hour route_desc ... \n", "7.0 Bus 0 5 7 9 9 10 10 10 11 11 ... 11 12 12 12 \n", " InterRegio 0 0 0 0 0 1 1 1 1 1 ... 2 2 2 2 \n", " Intercity 0 0 0 0 0 1 1 1 1 1 ... 2 2 2 2 \n", " S-Bahn 0 3 5 6 7 8 8 8 9 9 ... 9 9 9 9 \n", "\n", " 26 27 28 29 30 31 \n", "hour route_desc \n", "7.0 Bus 12 12 12 12 12 12 \n", " InterRegio 2 2 2 2 2 2 \n", " Intercity 2 2 2 2 2 2 \n", " S-Bahn 9 9 10 10 10 10 \n", "\n", "[4 rows x 32 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with gzip.open(\"../data/stop_times_wHour.pkl\", \"rb\") as input_file:\n", " stoptimes = pickle.load(input_file)\n", " \n", "distrib_df = pd.DataFrame(d_all).transpose()\n", "distrib_to_rm = np.array(distrib_df.iloc[:,range(11)].sum(axis=1) == 11) # missing trips\n", "distrib_df = distrib_df.iloc[~distrib_to_rm,:]\n", "\n", "stoptimes_df = pd.DataFrame(stoptimes)\n", "\n", "recovery_df2 = distrib_df.join(stoptimes_df)\n", "list_bins = [x for x in range(32)]\n", "\n", "recovery_df2 = recovery_df2.groupby(['hour', 'route_desc'])[list_bins].apply(lambda x : x.astype(float).sum())\n", "recovery_df2 = recovery_df2.astype('int')\n", "print(recovery_df2.shape)\n", "recovery_df2.head(4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Last recovery table \n", "\n", "Takes only transport type distribution" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(11, 32)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...22232425262728293031
route_desc
Bus15725921197405115766124269128687131397133346134908136278...137998138003138007138012138014138016138018138021138023138087
Eurocity0001111222...3333333333
InterRegio3374107141174207240273306339...371371371372372372372372372372
Intercity9192939495969798999...109109109109109109109109109109
\n", "

4 rows × 32 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 \\\n", "route_desc \n", "Bus 1572 59211 97405 115766 124269 128687 131397 133346 \n", "Eurocity 0 0 0 1 1 1 1 2 \n", "InterRegio 33 74 107 141 174 207 240 273 \n", "Intercity 9 19 29 39 49 59 69 79 \n", "\n", " 8 9 ... 22 23 24 25 26 \\\n", "route_desc ... \n", "Bus 134908 136278 ... 137998 138003 138007 138012 138014 \n", "Eurocity 2 2 ... 3 3 3 3 3 \n", "InterRegio 306 339 ... 371 371 371 372 372 \n", "Intercity 89 99 ... 109 109 109 109 109 \n", "\n", " 27 28 29 30 31 \n", "route_desc \n", "Bus 138016 138018 138021 138023 138087 \n", "Eurocity 3 3 3 3 3 \n", "InterRegio 372 372 372 372 372 \n", "Intercity 109 109 109 109 109 \n", "\n", "[4 rows x 32 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with gzip.open(\"../data/stop_times_wHour.pkl\", \"rb\") as input_file:\n", " stoptimes = pickle.load(input_file)\n", " \n", "distrib_df = pd.DataFrame(d_all).transpose()\n", "distrib_to_rm = np.array(distrib_df.iloc[:,range(11)].sum(axis=1) == 11) # missing trips\n", "distrib_df = distrib_df.iloc[~distrib_to_rm,:]\n", "\n", "stoptimes_df = pd.DataFrame(stoptimes)\n", "\n", "recovery_df3 = distrib_df.join(stoptimes_df)\n", "list_bins = [x for x in range(32)]\n", "\n", "recovery_df3 = recovery_df3.groupby(['route_desc'])[list_bins].apply(lambda x : x.astype(float).sum())\n", "recovery_df3 = recovery_df3.astype('int')\n", "print(recovery_df3.shape)\n", "recovery_df3.head(4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reconstruct cumulative distribution probabilities from multiple distributions to recover data with few/missing points \n", "\n", "To recover missing or faulty data, the strategy is the following :\n", "1. If we have more than 100 data points in `real` group, we rely exclusively on it to compute probabilities for a given transfer on a `trip_id x stop_id` \n", " - `real` group : the delay was calculated with actual arrival time with status `geschaetz` or `real`, meaning it comes from actual measurments.\n", "2. If we do not find enough data within `real` group, we look at distributions in `all` group (contains all delays including `prognose` status) to compute probabilities, if there is more than 100 data points for a given `trip_id x stop_id`.\n", "3. If `all` group still does not have more than 100 data points, we rely on `recovery tables` to estimate delay distributions. The strategy is the following :\n", " - As we will always know the `stop_id`, the `time` and the `transport_type`, we rely on arrival delays from aggregated values of similar transfer. \n", " - First, we compute a table of distribution with all possible combination of `stop_id`, `time` (round to hours) and `transport_type`, and aggregate all the counts we have to compute cumulative distribution probabilities. \n", " - Is there is less than 100 data points in one of these intersections, we use the last possibilities : a table with `transport_type` x `time` aggregate counts.\n", " - The last values with no match are given the overall average of cumulative distribution probabilities for each `transport_type` with no limit for the minimum number of data points.\n", "\n", "Following this approach, we can find cumulative distribution probabilities for every combination of `trip_id x stop_id` as defined in `stop_times_df`. We will make a table with the same row order so that McRaptor can easily find their indexes. \n", "\n", "In order to do that, we have two dictionnaries of distributions and two recovery dataframes :\n", " - `df_real` : contains delay distribution for each keys in form `trip_id + __ + stop_id` calculated from delays with status `geschaetz` or `real` in sbb datasets.\n", " - `df_all` : contains delay distributions for each keys in form `trip_id + __ + stop_id`. No filter was applied on status (contains `geschaetz`, `real` __and__ `prognose` = evaluated delay).\n", " - `recovery_df` : contains aggregated delay distributions for each combination of `stop_id`, `route_desc` (transport type) and `hour` (time rounded to hour). \n", " - `recovery_df2` : contains aggregated delay distributions for each combination of `route_desc` (transport type) and `hour` (time rounded to hour). \n", " - `recovery_df3` : contains aggregated delay distributions for `route_desc` (transport type) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
route_inttrip_intstop_intstop_sequencearrival_timedeparture_timeroute_idtrip_idstop_idroute_descstop_id_rawsequence_shift_1
00001NaT2020-05-21 07:18:0030-57-Y-j19-14.TA.30-57-Y-j19-1.1.H8502208Bus85022082
100122020-05-21 07:23:002020-05-21 07:23:0030-57-Y-j19-14.TA.30-57-Y-j19-1.1.H8502209Bus85022093
200232020-05-21 07:29:00NaT30-57-Y-j19-14.TA.30-57-Y-j19-1.1.H8503202Bus85032021
\n", "
" ], "text/plain": [ " route_int trip_int stop_int stop_sequence arrival_time \\\n", "0 0 0 0 1 NaT \n", "1 0 0 1 2 2020-05-21 07:23:00 \n", "2 0 0 2 3 2020-05-21 07:29:00 \n", "\n", " departure_time route_id trip_id stop_id \\\n", "0 2020-05-21 07:18:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502208 \n", "1 2020-05-21 07:23:00 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8502209 \n", "2 NaT 30-57-Y-j19-1 4.TA.30-57-Y-j19-1.1.H 8503202 \n", "\n", " route_desc stop_id_raw sequence_shift_1 \n", "0 Bus 8502208 2 \n", "1 Bus 8502209 3 \n", "2 Bus 8503202 1 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "###################### MAKE CUMULATIVE PROBABILITY TABLE #######################\n", "\n", "# Load stop_time table, to use its order as a template for our final table \n", "with open(\"../data/stop_times_df.pkl\", \"rb\") as input_file:\n", " stoptimes = pickle.load(input_file)\n", " \n", "stoptimes.head(3)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.0%, 4.07%, 8.14%, 12.21%, 16.28%, 20.35%, 24.42%, 28.49%, 32.55%, 36.62%, 40.69%, 44.76%, 48.83%, 52.9%, 56.97%, 61.04%, 65.11%, 69.18%, 73.25%, 77.32%, 81.39%, 85.46%, 89.53%, 93.6%, 97.66%, " ] } ], "source": [ "summary_df = pd.DataFrame(columns = ['key', 'key_int', 'trip_id', 'stop_id', 'transport_type', 'hour', 'distribution'])\n", "n_fail = 0\n", "size_stop_times = stoptimes.shape[0]\n", "n_real = 0\n", "n_all = 0\n", "n_recov1 = 0\n", "n_recov2 = 0\n", "n_recov3 = 0\n", "all_distrib = []\n", "all_transport_type = []\n", "all_hours = []\n", "all_keys = []\n", "\n", "i = 0\n", "for index, row in stoptimes.iterrows():\n", " \n", " trip_id = row[7]\n", " stop_id = str(row[8])[:7]\n", " transport_type = row[9]\n", " key = trip_id + '__' + stop_id\n", "\n", " # Compute rounded hour using arrival if possible - recover with departure\n", " hour = pd.to_datetime(stoptimes.loc[index]['arrival_time']).hour\n", " if math.isnan(hour): # if arrival is NaT, use departure time\n", " hour = pd.to_datetime(stoptimes.loc[index]['departure_time']).hour\n", " \n", " distrib = np.zeros(31)\n", " keep_trying = True\n", " \n", " # 1) try d_real to get distribution from measured delays\n", " if key in d_real:\n", " distrib = d_real[key]\n", " sum_distrib = np.sum(distrib)\n", " if sum_distrib > 100 :\n", " #summary_df.loc[index, 'distribution'] = distrib\n", " all_distrib.append(distrib)\n", " keep_trying = False \n", " n_real += 1\n", " \n", " # 2) try d_all to get distribution from measured + estimated delays\n", " if keep_trying and key in d_all:\n", " distrib = d_all[key]\n", " sum_distrib = np.sum(distrib)\n", " if sum_distrib > 100 :\n", " #summary_df.loc[index, 'distribution'] = distrib\n", " all_distrib.append(distrib)\n", " keep_trying = False\n", " n_all += 1\n", " \n", " # 3) try first recovery table using stop_id, transport_type and hour\n", " if keep_trying and (stop_id, hour, transport_type) in recovery_df.index:\n", " distrib = np.array(recovery_df.loc[(stop_id, hour, transport_type)])\n", " sum_distrib = np.sum(distrib)\n", " if sum_distrib > 100 :\n", " #summary_df.loc[index, 'distribution'] = distrib\n", " all_distrib.append(distrib)\n", " keep_trying = False \n", " n_recov1 += 1\n", " \n", " # 4) use second recovery table using transport_type and hour \n", " if keep_trying and (hour, transport_type) in recovery_df2.index:\n", " distrib = np.array(recovery_df2.loc[(hour, transport_type)])\n", " sum_distrib = np.sum(distrib)\n", " if sum_distrib > 100 :\n", " #summary_df.loc[index, 'distribution'] = distrib\n", " all_distrib.append(distrib)\n", " keep_trying = False \n", " n_recov2 += 1\n", " \n", " # 5) use third recovery table using transport_type only \n", " if keep_trying and (transport_type) in recovery_df3.index:\n", " distrib = np.array(recovery_df3.loc[(transport_type)])\n", " sum_distrib = np.sum(distrib)\n", " #summary_df.loc[index, 'distribution'] = distrib\n", " all_distrib.append(distrib)\n", " keep_trying = False \n", " n_recov3 += 1\n", " \n", " # Record results in summary\n", " #summary_df.loc[index, 'key'] = key\n", " #summary_df.loc[index, 'key_int'] = index\n", " #summary_df.loc[index, 'trip_id'] = trip_id\n", " #summary_df.loc[index, 'stop_id'] = stop_id\n", " #summary_df.loc[index, 'transport_type'] = transport_type\n", " #summary_df.loc[index, 'hour'] = hour\n", " all_keys.append(key)\n", " all_transport_type.append(transport_type)\n", " all_hours.append(hour)\n", "\n", " # save number of failure for recovery\n", " if keep_trying:\n", " print('fail{}'.format(index), end = ', ')\n", " n_fail += 1 \n", " \n", " # print progression \n", " if (index % 10000) == 0 :\n", " print('{}%'.format(round(100*index/size_stop_times,2)), end = ', ')" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10422\n", "173225\n", "37031\n", "20853\n", "4207\n" ] } ], "source": [ "print(n_real)\n", "print(n_all)\n", "print(n_recov1)\n", "print(n_recov2)\n", "print(n_recov3)" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAEICAYAAAD2u0vkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAeEklEQVR4nO3de7xVdZ3/8dc7JEBEFNEGUTyalHdJ0XQiNfOH18aacNBIRZrx0eR0m8rolznaTf01NWpqDpTX0AgnG9Kfg6YJlqAcEAUvKCr8iLwhiqLWGH1+f6zvdhbbvc91n/09wPv5eOzHWfu7vmt9P2udffb7rMs+RxGBmZlZDu/IXYCZmW2+HEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiE7C2SQtLuuesAkHSEpN/30Lpb0rZukZ7fJun0Bq37g5KWlp4vl3RUI9ad1vewpCMatb42xjlP0k97epzeStK7JM2R9Kqk7+euZ1O2Re4CzKAIQGBkRCxr9tgRcWxH+nWkxoi4B3hvI+qSdA3w+4g4p7T+vRux7kaqVecm4ExgNbB1+MOUPcpHQtZwlSOMzc3mut2bqF2AR+oFkL/XjeMQ2sRJOkPSr0rPn5A0o/R8paRRpUWOSn1elnS5JJX6TpL0qKSXJM2StEtpXkg6S9ITwBOp7QRJi9K67pW0X50a56TJByWtkzS+NO9Lkp6X9IykM0rt/ST9q6T/J+k5SVdKGlBn/X1S39WSngKOr5p/t6S/T9O7S5otaW3qP71ejZVThpK+KulZ4Oo6pxEPkvRI2m9XS+qf1jlR0m+raolUw5nABODsNN6v0vy3Tu+lfXCxpD+kx8WS+qV5ldpq7r8a+2jXtN2vSroDGFo1f4akZ9N+mSNp79Rer87Jkp5M63tE0sfaGLuPpP9d6r9A0s5p3l9Lmp/GnS/pr6u+b99Or611kn4laTtJ0yS9kvq3lPrvIekOSWskLZX0d3XquQY4vbRNR6k4PXmTpJ9KegWYKGlHSTPT+pZJ+ofSOs5L++ynaZsWS3qPpK+l78dKSWPr7ZPNSkT4sQk/gN2Alyl+4dgRWEFx6qQy7yXgHel5ALcA2wAjgBeAY9K8E4FlwJ4Up3HPAe4tjRPAHcAQYADwPuB54P1AH4of6uVAvzp1BrB76fkRwJ+BbwJ9geOA14Ft0/x/A2am8QYBvwIuqLPuTwOPATun/r9J422R5t8N/H2avhH4etpf/YExHajxIqBf2u4jKvs39VkOLCmN/Tvg22neROC39fYDcE2lb9X6jkrT3wTmATsA2wP3At/qyP6rsY/mAj9I23EY8Crw09L8SWk/9wMuBhaV5tWq8ySK19s7gPHAa8CwOmN/BVhMcRpTwP7Adml/vQScSvGaOyU93670fVsGvBsYDDwCPA4clfpfB1yd+g4EVgJnpHnvozjdtledmjbYJuA84E3go2mbBgBzgCvS62QUxc/LkaX+fwSOLtXyNMVrqy/wD8DTud8fesMjewF+NOGbXPzwHQCcDEwB7gf2SD+QM0v9gg3fdH8OTE7TtwGfKs17R3pT26W07JGl+T+qvCGW2pYCh9epsdYb/BukoEhtzwOHpDeq14B3l+YdWu+HGrgL+HTp+Vjqh9B1aR/t1MEa/xvoX9VWHULlsY8DnkzTE+leCD0JHFeadzSwvL39V2O7RlAE1sBS2w2UQqiq/zapzsH16qyxzCLgxDrzltaaRxE+91e1zQUmlr5vXy/N+z5wW+n5R0hhSRGE91St69+Bf6lT0wbbRBEqc0rPdwbWA4NKbRcA15T631FVyzqgT3o+KO3DbTr6c7ypPnw6bvMwm+JN6bA0fTdweHrMrur7bGn6dWCrNL0LcImKU2svA2sowmB4qf/K0vQuwJcq/dMyO1P8dtxRL0bEn2vUsz2wJbCgtO7/Su217FhV24o2xjybYrvuV3En2qR2anwhIv7YTp/qsTuzD9pSObKtt+56+6/Wel6KiNeq1gW8dbrswnS67BWKIISqU3Zlkk7T/5yKfRnYp43+O1MEaq26qr9XK9jwNfdcafqNGs/Lr9/3V70eJwB/VW8baih/H3cE1kTEq52obXVErC89h9rfj82KL65tHmZT/Ca2K/BditNzEyiOHi7r4DpWAt+JiGlt9ClfxK30/07ny23Xaoof4r0jYlUH+j9D8UZXMaJex4h4luJUCZLGAL+WNCfq3xHXkTunqsf+Q5p+jSJMSeNVvyG2t+4/ULy5Plxj3Z3xDLCtpIGlIBpRGv8TFKdjj6IIoMEUp8Uq1ws3qFPFtcKpwIeBuRGxXtKiUv9qKylOqS2paq9sX9kIil84OmslMDsi/lcXlq0ob+cfgCGSBpWCaATQkdejlfhIaPMwG/gQMCAifg/cAxxDcd79gQ6u40rga6UL0oMlndRG/6nApyW9X4WBko6XNKhO/+corlG1KyL+ktb/b5J2SPUMl3R0nUV+DnxO0k6StgUm11u3pJMk7ZSevkTxxvOXztZY5aw09hCKawLTU/uDwN6SRqm4WeG8quXaG+9G4BxJ20saCpwLdPqzPRGxAmgFzpf0zhS+Hyl1GQT8CXiRIjS/206dAyn22wtQ3BxDcSRUz4+Bb0kamV4r+0naDvi/wHskfULSFipuWNmL4rplZ92S1nWqpL7pcZCkPbuwLiJiJcU1uAsk9Vdx082n6ML+39w5hDYDEfE4xfnoe9LzV4CngN+VTg+0t46bKS7A/yydklkC1P18TUS0UhxRXEbxZr6M4hpIPecB16ZTJTXvWqry1bTOeameX1P/8zlTgVkUb/oLgV+0sd6DgPskraO48eHzEfFUF2usuAG4nWKfPwl8G976vnwz1f4E8Nuq5X4C7JXG+2WN9X6bIjweoriwv7Cy7i74BMVNJGuAf6G4NlZxHcWpplUUF//ntVVnRDxCcX1mLkVA7UtxQ0Y9P6D4ReF24JW0vgER8SJwAvAligA8GzghIlZ3duPS0cpYiuuif6A47Vy5oaSrTgFa0vpupri+9OturG+zpHSRzMzMrOl8JGRmZtk4hMzMLBuHkJmZZeMQMjOzbPw5oRqGDh0aLS0tucswM9uoLFiwYHVE1PvQeE0OoRpaWlpobW3NXYaZ2UZFUlt/jaQmn44zM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4w+r1rB41VpaJt/akHUtv/D4hqzHzGxT5CMhMzPLxiFkZmbZOITMzCwbh5CZmWXjEDIzs2wcQmZmlo1DyMzMsnEImZlZNg4hMzPLxiFkZmbZOITMzCwbh5CZmWXjEDIzs2w2uxCStFzS0Nx1mJnZRh5CKmzU22Bmtjnb6N7AJbVIWirpOmAJ8A1J8yU9JOn8Ur9fSlog6WFJZ+ar2MzM6tlY/6ndSOB0YGtgHHAwIGCmpMMiYg4wKSLWSBoAzJf0HxHxYr0VpqA6E6DP1tv3+AaYmdlGeCSUrIiIecDY9HgAWAjsQRFQAJ+T9CAwD9i51F5TREyJiNERMbrPloN7rnIzM3vLxnok9Fr6KuCCiPj38kxJRwBHAYdGxOuS7gb6N7VCMzNr18Z6JFQxC5gkaSsAScMl7QAMBl5KAbQHcEjOIs3MrLaN9UgIgIi4XdKewFxJAOuATwL/BXxa0qPAUopTcmZm1stsdCEUEcuBfUrPLwEuqdH12DrLt/RIYWZm1mkb++k4MzPbiDmEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsm43uz/Y0w77DB9N64fG5yzAz2+T5SMjMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7Ns/DmhGhavWkvL5Ftzl2Fm1lTLM3w+0kdCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLJumhZCkPs0ay8zMNg4NCSFJLZIekzRN0qOSbpK0paTlki6StBA4SdIpkhZLWiLpotLyn5L0uKT7JU2VdFlqv0bSpZLulfSUpHGpXZK+l9azWNL41D5M0hxJi9K8D6b2sZLmSlooaYakrRqx3WZm1j2NPBJ6L3BFROwJvAJ8JrW/GBEHAHOAi4AjgVHAQZI+KmlH4BvAIcAHgD2q1jsMGAOcAFyY2v42rWN/4Cjge5KGAZ8AZkVEZd4iSUOBc4CjUh2twD9XFy/pTEmtklrXv762+3vDzMza1cj/rLoyIn6Xpn8KfC5NT09fDwLujogXACRNAw5L82ZHxJrUPgN4T2m9v4yIvwCPSHpXahsD3BgR64HnJM1O658PXCWpb1pukaTDgb2A30kCeCcwt7r4iJgCTAHoN2xkdGM/mJlZBzUyhKrfuCvPX+vmev9UmlabBUTMkXQYcDxwjaQfAC8Bd0TEKd2sw8zMGqyRp+NGSDo0TX8C+G3V/PuBwyUNTTcpnALMpjh6OVzStpK2AD7egbHuAcZL6iNpe4ojqvsl7QI8FxFTgR8DBwDzgA9I2h1A0kBJ76m3YjMza55GhtBS4CxJjwLbAj8qz4yIZ4DJwG+AB4EFEfGfEbEK+C5FSP0OWA60d1HmZuChtJ67gLMj4lngCOBBSQ8A44FL0um/icCNkh6iOBVXfd3JzMwyUET3L39IagFuiYh9urj8VhGxLh0J3QxcFRE3d7uwLuo3bGQMO/3iXMObmWWx/MLju7W8pAURMbozy/SWD6ueJ2kRsAR4Gvhl5nrMzKwJGnJjQkQsB7p0FJSW/3Ij6jAzs41LbzkSMjOzzZBDyMzMsnEImZlZNg4hMzPLxiFkZmbZOITMzCwbh5CZmWXjEDIzs2wa+Ve0Nxn7Dh9Mazf/fIWZmbXPR0JmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7Ns/GHVGhavWkvL5Ftzl9FrdPf/zpuZ1eMjITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7Nsen0ISZooaccO9Ltb0uga7XtImivpT5K+3DNVmplZV3QqhFRodnBNBNoNoTasAT4H/GtDqjEzs4ZpN1AktUhaKuk6YAnwDUnzJT0k6fxSv9NS24OSri8te1dqv1PSCEmDJa2ohJmkgZJWSupbY+xxwGhgmqRFkgZIOjeNv0TSFEkqLXJq6rdE0sEAEfF8RMwH3mxnO8+U1Cqpdf3ra9vfc2Zm1m0dPaoZCVwBfBEYDhwMjAIOlHSYpL2Bc4AjI2J/4PNpuR8C10bEfsA04NKIWAssAg5PfU4AZkXE20IiIm4CWoEJETEqIt4ALouIgyJiH2BAWr5iy4gYBXwGuKqD21YZa0pEjI6I0X22HNyZRc3MrIs6GkIrImIeMDY9HgAWAntQBNSRwIyIWA0QEWvScocCN6Tp64ExaXo6MD5Nn5yed9SHJN0naXEad+/SvBvT+HOArSVt04n1mplZk3U0hF5LXwVckI5KRkXE7hHxky6MOxM4RtIQ4EDgro4sJKk/xRHZuIjYF5gK9C91iapFqp+bmVkv0tmbDGYBkyRtBSBpuKQdKELkJEnbpfYhqf+9FEc6ABOAewAiYh0wH7gEuCUi1rcx5qvAoDRdCZzVqYZxVX3Hp/HHAGvTqT8zM+ultuhM54i4XdKewNx0P8A64JMR8bCk7wCzJa2nOF03EfgscLWkrwAvAGeUVjcdmAEc0c6w1wBXSnqD4vTeVIobJJ6lCLKyP0p6AOgLTAKQ9FcU15W2Bv4i6QvAXhHxSme23czMGk8RPmNVrd+wkTHs9Itzl9FrLL/w+NwlmNlGQNKCiHjb5zXb0us/rGpmZpuuTp2O60mSLgc+UNV8SURcnaMeMzPreb0mhCLirNw1mJlZc/l0nJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtn0ms8J9Sb7Dh9Mq/9UjZlZj/ORkJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtn4c0I1LF61lpbJt+Yuoyb/q20z25T4SMjMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTYOITMzy8YhZGZm2TiEzMwsm14fQpImStqxA/3uljS6RvsESQ9JWizpXkn790ylZmbWWZ0KIRWaHVwTgXZDqA1PA4dHxL7At4ApjSjKzMy6r91AkdQiaamk64AlwDckzU9HF+eX+p2W2h6UdH1p2btS+52SRkgaLGlFJcwkDZS0UlLfGmOPA0YD0yQtkjRA0rlp/CWSpkhSaZFTU78lkg4GiIh7I+KlNH8esFOd7TxTUquk1vWvr+3QzjMzs+7p6FHNSOAK4IvAcOBgYBRwoKTDJO0NnAMcGRH7A59Py/0QuDYi9gOmAZdGxFpgEXB46nMCMCsi3qweNCJuAlqBCRExKiLeAC6LiIMiYh9gQFq+YsuIGAV8BriqxnZ8Crit1gZGxJSIGB0Ro/tsObiDu8XMzLqjoyG0IiLmAWPT4wFgIbAHRUAdCcyIiNUAEbEmLXcocEOavh4Yk6anA+PT9MnpeUd9SNJ9khancfcuzbsxjT8H2FrSNpUZkj5EEUJf7cRYZmbWgzoaQq+lrwIuSEcloyJi94j4SRfGnQkcI2kIcCBwV0cWktSf4ohsXLrGMxXoX+oSVYtEWm4/4MfAiRHxYhfqNTOzHtDZmwxmAZMkbQUgabikHShC5CRJ26X2Ian/vRRHOgATgHsAImIdMB+4BLglIta3MearwKA0XQmc1amGcVV9x6fxxwBrI2KtpBHAL4BTI+LxTm6vmZn1oC060zkibpe0JzA33Q+wDvhkRDws6TvAbEnrKU7XTQQ+C1wt6SvAC8AZpdVNB2YAR7Qz7DXAlZLeoDi9N5XiBolnKYKs7I+SHgD6ApNS27nAdsAVqeY/R8TbbuU2M7PmU0T1GSzrN2xkDDv94txl1LT8wuNzl2BmVpOkBZ39Jb/Xf1jVzMw2XZ06HdeTJF0OfKCq+ZKIuDpHPWZm1vN6TQhFxFm5azAzs+by6TgzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyy6TWfE+pN9h0+mFb/eRwzsx7nIyEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyy8eeEali8ai0tk2+tO9//YtvMrDF8JGRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWTa8PIUkTJe3YgX53Sxpdo/1ESQ9JWiSpVdKYnqnUzMw6q1MhpEKzg2si0G4IteFOYP+IGAVMAn7ciKLMzKz72g0USS2Slkq6DlgCfEPS/HR0cX6p32mp7UFJ15eWvSu13ylphKTBklZUwkzSQEkrJfWtMfY4YDQwLR3JDJB0bhp/iaQpklRa5NTUb4mkgwEiYl1ERJo/EAhqkHRmOlJqXf/62g7tPDMz656OHtWMBK4AvggMBw4GRgEHSjpM0t7AOcCREbE/8Pm03A+BayNiP2AacGlErAUWAYenPicAsyLizepBI+ImoBWYEBGjIuIN4LKIOCgi9gEGpOUrtkxHPJ8Brqo0SvqYpMeAWymOht4mIqZExOiIGN1ny8Ed3C1mZtYdHQ2hFRExDxibHg8AC4E9KALqSGBGRKwGiIg1ablDgRvS9PVA5XrMdGB8mj45Pe+oD0m6T9LiNO7epXk3pvHnAFtL2iY9vzki9gA+CnyrE2OZmVkP6mgIvZa+CrggHZWMiojdI+InXRh3JnCMpCHAgcBdHVlIUn+KI7JxEbEvMBXoX+pSfaptg+cpnHaTNLQLNZuZWYN19iaDWcAkSVsBSBouaQeKEDlJ0napfUjqfy/FkQ7ABOAeKK7TAPOBS4BbImJ9G2O+CgxK05XAWZ1qGFfVd3wafwywNiLWStq9ct1I0gFAP+DFTm63mZn1gC060zkibpe0JzA3va+vAz4ZEQ9L+g4wW9J6itN1E4HPAldL+grwAnBGaXXTgRnAEe0Mew1wpaQ3KE7vTaW4QeJZiiAr+6OkB4C+/M+1n48Dp0l6E3gDGF+6UcHMzDKS34/frt+wkTHs9Ivrzl9+4fFNrMbMbOMgaUFEvO3zmm3p9R9WNTOzTVenTsf1JEmXAx+oar4kIq7OUY+ZmfW8XhNCEXFW7hrMzKy5fDrOzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbHrN54R6k32HD6bVf5rHzKzH+UjIzMyycQiZmVk2DiEzM8vGIWRmZtk4hMzMLBuHkJmZZeMQMjOzbBxCZmaWjUPIzMyyUUTkrqHXkfQqsDR3HV0wFFidu4gucN3N5bqba3Oqe5eI2L4zC/jP9tS2NCJG5y6isyS1uu7mcd3N5bqbq1l1+3ScmZll4xAyM7NsHEK1TcldQBe57uZy3c3lupurKXX7xgQzM8vGR0JmZpaNQ8jMzLJxCFWRdIykpZKWSZqcYfydJf1G0iOSHpb0+dR+nqRVkhalx3GlZb6W6l0q6ej2tkXSrpLuS+3TJb2zQbUvl7Q41dea2oZIukPSE+nrtqldki5NNTwk6YDSek5P/Z+QdHqp/cC0/mVpWTWg5veW9ukiSa9I+kJv3d+SrpL0vKQlpbYe38f1xuhGzd+T9Fiq62ZJ26T2FklvlPb7lV2tra3t72btPf7akNQvPV+W5rc0oO7ppZqXS1qU2vPu84jwIz2APsCTwG7AO4EHgb2aXMMw4IA0PQh4HNgLOA/4co3+e6U6+wG7pvr7tLUtwM+Bk9P0lcA/Nqj25cDQqrb/A0xO05OBi9L0ccBtgIBDgPtS+xDgqfR12zS9bZp3f+qrtOyxPfD9fxbYpbfub+Aw4ABgSTP3cb0xulHzWGCLNH1RqeaWcr+q9XSqtnrb34D93eOvDeAzwJVp+mRgenfrrpr/feDc3rDPfSS0oYOBZRHxVET8N/Az4MRmFhARz0TEwjT9KvAoMLyNRU4EfhYRf4qIp4FlFNtRc1vSbzJHAjel5a8FPtozW/NWfdfWGOtE4LoozAO2kTQMOBq4IyLWRMRLwB3AMWne1hExL4pX+3U9UPeHgScjYkU725Ntf0fEHGBNjZp6eh/XG6NLNUfE7RHx5/R0HrBTW+voYm31tr/D6uzvehr52ihv003AhytHId2tO63n74Ab21pHs/a5Q2hDw4GVpee/p+0A6FHpEPx9wH2p6Z/SIe5VpdMh9Wqu174d8HLpDaCR2xjA7ZIWSDoztb0rIp5J088C7+pi3cPTdHV7I53Mhj+YvX1/VzRjH9cboxEmUfz2XLGrpAckzZb0wdTWldp68ue5p18bby2T5q9N/Rvhg8BzEfFEqS3bPncI9VKStgL+A/hCRLwC/Ah4NzAKeIbicLq3GRMRBwDHAmdJOqw8M/021Ss/E5DOxf8NMCM1bQz7+22asY8bOYakrwN/BqalpmeAERHxPuCfgRskbZ2jtjZslK+NklPY8JetrPvcIbShVcDOpec7pbamktSXIoCmRcQvACLiuYhYHxF/AaZSHOJD/Zrrtb9IcYi8RVV7t0XEqvT1eeDmVONzlcPx9PX5Lta9ig1P2TT6e3MssDAinkvb0Ov3d0kz9nG9MbpM0kTgBGBCeiMjncp6MU0voLiW8p4u1tYjP89Nem28tUyaPzj175a0rr8Fppe2J+s+dwhtaD4wMt2x8k6K0zMzm1lAOl/7E+DRiPhBqb18XvVjQOWul5nAyeluml2BkRQXE2tuS/ph/w0wLi1/OvCfDah7oKRBlWmKC89LUn2Vu6/KY80ETkt30xwCrE2H97OAsZK2Tac5xgKz0rxXJB2S9tFpjai7ZIPfDnv7/q7SjH1cb4wukXQMcDbwNxHxeql9e0l90vRuFPv3qS7WVm/7u6VJr43yNo0D7qoEdTcdBTwWEW+dZsu+z6vvVNjcHxR3dzxO8dvA1zOMP4bi0PYhYFF6HAdcDyxO7TOBYaVlvp7qXUrpjrF620Jxl879FBdOZwD9GlD3bhR3/TwIPFwZj+I89p3AE8CvgSGpXcDlqbbFwOjSuial2pYBZ5TaR1P8wD8JXEb6ix8NqH0gxW+Zg0ttvXJ/UwTlM8CbFOfbP9WMfVxvjG7UvIzi2kHlNV65E+zj6fWzCFgIfKSrtbW1/d2svcdfG0D/9HxZmr9bd+tO7dcAn67qm3Wf+8/2mJlZNj4dZ2Zm2TiEzMwsG4eQmZll4xAyM7NsHEJmZpaNQ8jMzLJxCJmZWTb/H6u0PI6DXdGYAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.barh(np.array(['real','prognose','recov_tab1','recov_tab2','recov_tab3'])[::-1],\\\n", " np.array([n_real,n_all,n_recov1,n_recov2, n_recov3])[::-1])\n", "plt.title('where the distribution data come from')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyhourtransport_typedistribution
04.TA.30-57-Y-j19-1.1.H__85022087Bus[0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11...
14.TA.30-57-Y-j19-1.1.H__85022097Bus[0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11...
24.TA.30-57-Y-j19-1.1.H__85032027Bus[0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11...
35.TA.30-57-Y-j19-1.1.H__85022087Bus[0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11...
45.TA.30-57-Y-j19-1.1.H__85022097Bus[0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11...
\n", "
" ], "text/plain": [ " key hour transport_type \\\n", "0 4.TA.30-57-Y-j19-1.1.H__8502208 7 Bus \n", "1 4.TA.30-57-Y-j19-1.1.H__8502209 7 Bus \n", "2 4.TA.30-57-Y-j19-1.1.H__8503202 7 Bus \n", "3 5.TA.30-57-Y-j19-1.1.H__8502208 7 Bus \n", "4 5.TA.30-57-Y-j19-1.1.H__8502209 7 Bus \n", "\n", " distribution \n", "0 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n", "1 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n", "2 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n", "3 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... \n", "4 [0, 5, 7, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11... " ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "summary_df = pd.DataFrame([all_keys, all_hours, all_transport_type, all_distrib],\\\n", " index = ['key','hour','transport_type','distribution']).transpose()\n", "summary_df.head()" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "# Load stop_time table, to use its order as a template for our final table \n", "with gzip.open(\"../data/join_distribution_all.pkl.gz\", \"wb\") as out_file:\n", " pickle.dump(summary_df, out_file)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "list_all_rows = []\n", "for index, row in summary_df.iterrows():\n", " distrib = np.array(row['distribution'])\n", " \n", " # get total number of elements \n", " N = np.sum(distrib)\n", " \n", " # make cumulative distribution probabilities\n", " cdf_distrib = np.empty((len(distrib)), dtype=float)\n", " save_x = 0\n", " for x in range(len(distrib)):\n", " cdf_distrib[x] = float(distrib[x])/float(N) + float(save_x)/float(N)\n", " save_x += distrib[x]\n", " \n", " list_all_rows.append(cdf_distrib)" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n", " 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n", " 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n", " 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n", " 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n", " 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n", " 0.96396396, 1. ],\n", " [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n", " 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n", " 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n", " 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n", " 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n", " 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n", " 0.96396396, 1. ],\n", " [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n", " 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n", " 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n", " 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n", " 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n", " 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n", " 0.96396396, 1. ],\n", " [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n", " 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n", " 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n", " 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n", " 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n", " 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n", " 0.96396396, 1. ],\n", " [0. , 0.01501502, 0.03603604, 0.06306306, 0.09009009,\n", " 0.12012012, 0.15015015, 0.18018018, 0.21321321, 0.24624625,\n", " 0.27927928, 0.31231231, 0.34534535, 0.37837838, 0.41141141,\n", " 0.44444444, 0.47747748, 0.51051051, 0.54354354, 0.57657658,\n", " 0.60960961, 0.64264264, 0.67567568, 0.71171171, 0.74774775,\n", " 0.78378378, 0.81981982, 0.85585586, 0.89189189, 0.92792793,\n", " 0.96396396, 1. ]])" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_df = pd.DataFrame(list_all_rows)\n", "final_df.index = summary_df.index\n", "final_np = final_df.to_numpy()\n", "final_np[0:5,:]" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(np.array(final_df.index == stoptimes.index)) == stoptimes.shape[0]" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "# write recovery table \n", "with gzip.open(\"../data/join_distribution_cumulative_p_2.pkl.gz\", \"wb\") as output_file:\n", " pickle.dump(final_np, output_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Poisson cumulative distribution\n", "\n", "The Poisson distribution is popular for modeling the number of times an event occurs in an interval of time or space. We modeled a poisson distribution for delays assuming parameter $k$ is the time in minutes (as it was done [here](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0126137), formulas $(4),(5),(6)$).\n", "\n", "A discrete random variable X is said to have a Poisson distribution with parameter λ > 0, if, for k = 0, 1, 2, ..., the probability mass function of X is given by:\n", "\n", "$${\\displaystyle \\!f(k;\\lambda )=\\Pr(X=k)={\\frac {\\lambda ^{k}e^{-\\lambda }}{k!}},}$$\n", "where\n", "\n", "e is Euler's number (e = 2.71828...)\n", "k! is the factorial of k.\n", "The positive real number λ is equal to the expected value of X __and__ to its variance.\n", "\n", "$${\\displaystyle \\lambda =\\operatorname {E} (X)=\\operatorname {Var} (X)}$$\n", "\n", "We can approximate E[𝑋]∼$\\mu_i$ for our data $X_i$, if we assume the sample $X_i$ of size N follow the distribution of $X$ meaning $X_i$∼$X$.\n", "\n", "Poisson-related __assumptions__ :\n", "- $k$ is the __delay time in minutes__ and can take values 0, 1, 2, ... (strictly positive and discrete)\n", "- We assume our sampling $X_i$ of $X$ is good enough to approximate E[X] ~ $\\mu_i$\n", "- The occurrence of one event does not affect probability of others. That is, events occur independently.\n", " - __We assume being late one day is not affecting the delay of the day after__ \n", "- The average rate at which events occur is independent of any occurrences. For simplicity, this is usually assumed to be constant, but may in practice vary with time.\n", " - __we assumes delays occurs with a constant rate over time__\n", "- Two events cannot occur at exactly the same instant\n", "\n", "We made a function `poisson_proba` that takes a `trip_id`, a `stop_id`, an `arrival time` and a `departure time` and a dictionnary {key : distribution} to compute a __probability to be at least 2 minutes before departure of next trip__. \n", "\n", "We make a few __assumptions__ on our side :\n", "- We assume that if we have less than 2 minutes for the transfer, we miss it.\n", "- We assume the next train is on time.\n", "- As for poisson distribution $k$ is strictly positive, we assume trains ahead of schedule were on time ($k=0$)\n", "\n", "\n", "_Question we should address :_\n", "- _Is the poisson a reasonable approximation of the binomial distribution in our case ?_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's first test the poisson distribution and compare it with our distribution to see how well it fits the data. We will compute $Pr(X = k)$ for each values of k and look at the shape of the poisson distribution compared to the shape of our scaled data. Then, we will compare $\\sum_{k=0}^T Pr(X = k)$ with the cumulative distribution function which directly gives $Pr(k \\leq X)$" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "An error was encountered:\n", "Invalid status code '404' from http://iccluster044.iccluster.epfl.ch:8998/sessions/6821 with error payload: \"Session '6821' not found.\"\n" ] } ], "source": [ "################################# POISSON FIT TEST #########################################\n", "\n", "# to do .. \n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here are all the functions needed to calculate probability of success for a given transfer. We need the `trip_id`, `stop_id`, `departure_time`, `arrival_time` and dictionnary `d` (pickled load at the beginning of the cell) to be able to compute a probability of success with following function : \n", "\n", "`poisson_proba(trip_id, stop_id, arrival_time, departure_time, d)`" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lambda (expectation given distribution): 1.0194769059543685 \n", "\n", "Probability of success for transfer time = 13.0 minutes : 0.999999999994185\n" ] } ], "source": [ "%local\n", "################################# POISSON FUNCTIONS ########################################\n", "\n", "import pickle \n", "import gzip\n", "import time\n", "import math \n", "import datetime\n", "import time\n", "from scipy.stats import poisson\n", "\n", "# Load dictionnary\n", "with gzip.open(\"../data/distributions.pickle\", \"rb\") as input_file:\n", " d = pickle.load(input_file)\n", "\n", "# Load dictionnary\n", "with open(\"../data/stop_times_array.pkl\", \"rb\") as input_file:\n", " times = pickle.load(input_file)\n", "\n", "# we take two exemple time in format numpy.datetime64\n", "arr_time = times[4][1]\n", "dep_time = times[0][1]\n", "\n", "# Load distribution in dictinonary given a key\n", "def get_distrib(key, dico):\n", " if key in dico:\n", " return dico[key]\n", " else:\n", " raise ValueError(\"KEY ERROR: {} not found un distribution dictionnary\".format(key))\n", " \n", "# Evaluate lambda parameter assuming it is equal to average \n", "def evaluate_lamda(distrib):\n", " # First calculate total number of measures N\n", " N = 0 # by starting at -1 we ignore trains ahead of schedule\n", " for x in distrib:\n", " N += x\n", "\n", " lambda_p = 0 # expectation - we want to calculate it\n", " t = -1 # time = index - 1\n", "\n", " for x in distrib:\n", " if t>0:\n", " lambda_p += t*x\n", " t += 1\n", "\n", " # calculate lambda - the expectation of x\n", " if N > 0:\n", " lambda_p /= N \n", " print('lambda (expectation given distribution): ',lambda_p, '\\n')\n", " return lambda_p\n", " else : \n", " raise ValueError(\"ERROR : {} distribution has 0 counts\".format(key))\n", " #print('Returning 1 to avoid later problem... \\n')\n", " return 1\n", "\n", "# process time given as string in format 'hh:mm' - not needed\n", "def process_time_str(str_time):\n", " x = time.strptime(str_time,'%H:%M')\n", " return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()\n", "\n", "# Calculate transfer time given two times in string format 'hh:mm'\n", "def get_transfer_time(arr_time, dep_time, delta=2.0):\n", " diff_time_min = (arr_time - dep_time).astype('timedelta64[m]') / np.timedelta64(1, 'm')\n", " return diff_time_min - delta\n", "\n", "# Calculate poisson probability of success for a given transfert \n", "# for a given trip_id, stop_id, arrival/departure times and dict\n", "def poisson_proba(trip_id, stop_id, arr_time, dep_time, dico):\n", " # Generate key from trip_id / stop_id \n", " key = str(trip_id) + '__' + str(stop_id[0:7]) # 7 first char to be sbb-compatible\n", "\n", " # Get distribution from dictionnary\n", " distrib = get_distrib(key, dico)\n", " \n", " # Calculate transfer time at disposal \n", " T = get_transfer_time(arr_time, dep_time)\n", " \n", " # Get lambda value to calculate proba\n", " lambda_p = evaluate_lamda(distrib)\n", "\n", " # Get proba\n", " if T > 2:\n", " poisson_p = poisson.cdf(T, lambda_p)\n", " else : \n", " poisson_p = 0.0 # if we have less than 2 minutes, we miss it\n", " \n", " print('Probability of success for transfer time = {} minutes : '.format(T),poisson_p)\n", " return poisson_p\n", "\n", "# Mock exemple of probability calculations with given inputs\n", "trip_id = '1286.TA.26-32-j19-1.12.H'\n", "stop_id = '8591184'\n", "\n", "# we take two exemple time from stop_times_array in format numpy.datetime64\n", "arr_time = times[3][1]\n", "dep_time = times[0][1]\n", "\n", "Pr = poisson_proba(trip_id, stop_id, arr_time, dep_time, d)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }