Page MenuHomec4science
No OneTemporary

File Metadata

Tue, Jul 16, 06:35

import pandas as pd
import glob
import numpy as np
import os
from functions import *
import pantab
class comptage():
def __init__(self):
def read_data(self, folders, processed_files_path="data/processed_files.txt", vehicle_type="Vehicules", processing="Last"):
#Read all the data from server and store it in a dataframe
files_station = []
old_format = []
for folder in folders:
files = glob.glob(folder+vehicle_type+"/*.csv")
#get element containing substring "Empl01" in files
#delete "0" from station string
for file in files:
if ("empl" in file) or ("Empl" in file):
elif ("TMSSA" not in file) and ("Week_Volume" not in file): #TSSA file have another strucutre. yet there is only 1 file
# Get list of files that have been processed
processed_files = get_processed_files("data/Processed_files.txt")
#Open files_station by chunk and concatenate them
if processing=="all":
files = files_station
print("You're processing all the files, this might take some time")
#Check all elements of files_station that are not in processed_files
files = [file for file in files_station if file not in processed_files]
df_list = []
for file in files:
if vehicle_type=="Vehicules":
#If file is not empty do this:
if os.stat(file).st_size != 0:
df = pd.read_csv(file, sep=";", parse_dates=True)
df = df[df["typ"] == 1]
df["date and time [UTC]"] = pd.to_datetime(df["date and time [UTC]"])
df["station"] = file.split("Site_")[1].split("/")[0]
df["station"] = df["station"].astype(int)
df = df.drop(columns=["typ", "range [m]", "notes"])
df["speed [km/h]"] = df["speed [km/h]"].str.replace("+", "", regex=False).str.replace(",",".").astype(float)
df["length [m]"] = df["length [m]"].str.replace(",",".").astype(float)
df = pd.DataFrame()
elif vehicle_type=="Pietons-velos":
if os.stat(file).st_size != 0:
df = pd.read_csv(file, sep=",", header=4, parse_dates=True)
df["Date"] = pd.to_datetime(df["Date"] + " " + df["Time"], format="%d.%m.%Y %H:%M:%S")
df["Date"] = pd.to_datetime(df["Date"] + " " + df["Time"], format="%d/%m/%Y %H:%M:%S")
df.drop("Time", axis=1, inplace=True)
df["station"] = file.split("Site_")[1].split("/")[0]
df["station"] = df["station"].astype(int)
df["Classification_name"] = np.where(df["Classification"]==1, "pietons", "velos")
df = pd.DataFrame()
store_processed_file(file, processed_files_path)
self.df_concat_new = pd.concat(df_list, axis=0, ignore_index=True)
if len(files)==0:
self.df_concat_new = pd.DataFrame()
return None
def read_concat(self, path):
if os.path.exists(path):
df = pd.read_csv(path, sep=",", parse_dates=["date and time [UTC]"])
df = pd.read_csv(path, sep=",", parse_dates=["Date"])
df = pd.DataFrame()
self.df_concat = pd.concat([df, self.df_concat_new], axis=0, ignore_index=True)
if self.df_concat_new.empty:
print("No new files to process")
self.df_concat.to_csv(path, index=False)
def localize(self, path):
station_info = pd.read_excel(path)
#Split Compteur_location into two columns lat and lon
compteur_location= station_info["coordinates"]
compteur_location = [i.split(" ") for i in compteur_location]
#Select first element of each list
Compteur_lat = [i[0] for i in compteur_location]
Compteur_lon = [i[1] for i in compteur_location]
#Drop column "coordinates" and create new one called "lat" and "lon"
station_info = station_info.drop(columns=["coordinates"])
station_info["lat"] = Compteur_lat
station_info["lon"] = Compteur_lon
station_info["lat"] = station_info["lat"].astype(float)
station_info["lon"] = station_info["lon"].astype(float)
#Join self.df_concat with compteur_location based on the key "station"
self.df_concat = self.df_concat.merge(station_info, on="station")
def to_L2(self, vehicle_type="Vehicules", categories=None, output_path="data/Resultats/L2_cars.csv"):
if vehicle_type=="Vehicules":
#check if speed is positive, if yes flll a new column called "direction" with "ingoing" if positive and "outgoing" if negative
self.df_concat["direction"] = np.where(self.df_concat["speed [km/h]"]>0, "ingoing", "outgoing")
self.df_concat["speed [km/h]"] = self.df_concat["speed [km/h]"].abs()
self.df_concat["exceeded_speed"] = self.df_concat["speed [km/h]"]>self.df_concat["Speed limit [km/h]"]
#Correct the following line
self.df_concat["length_category"] = np.nan
self.df_concat.loc[self.df_concat["length [m]"]<categories["Velo"], "length_category"] = "Velo"
self.df_concat.loc[(self.df_concat["length [m]"]>=categories["Velo"])&(self.df_concat["length [m]"]<categories["Moto"]), "length_category"] = "Moto"
self.df_concat.loc[(self.df_concat["length [m]"]>=categories["Moto"])&(self.df_concat["length [m]"]<categories["Auto"]), "length_category"] = "Auto"
self.df_concat.loc[(self.df_concat["length [m]"]>=categories["Auto"])&(self.df_concat["length [m]"]<categories["Camionette"]), "length_category"] = "Camionette"
self.df_concat.loc[(self.df_concat["length [m]"]>=categories["Camionette"]), "length_category"] = "Camion"
elif vehicle_type=="Pietons-velos":
self.df_L2 = self.df_concat.copy()
if not self.df_concat_new.empty:
def convert_to_hyper_tb(self,output_name, tablename, mode="w"):
pantab.frame_to_hyper(self.df_L2, output_name, table=tablename, table_mode=mode)

Event Timeline