Page MenuHomec4science

make_shaded_area_features.py
No OneTemporary

File Metadata

Created
Sun, May 4, 04:59

make_shaded_area_features.py

import numpy as np
import pandas as pd
import xarray as xr
import os
import time
from features import Training, Testing
from sklearn.model_selection import train_test_split
def attach_col_to_table(table, new_table, merge_column, value_column, value_col_target = None, how = 'left'):
# merge column in new_table with table
merged_table = table.merge(new_table.loc[ :,[merge_column, value_column] ], on = merge_column, how = how)
# change name of column if required
if value_col_target is not None:
merged_table = merged_table.rename( {value_column : value_col_target} , axis = 1)
return merged_table
## ========== USER INPUT ===================
target_path = "/Users/alinawalch/Documents/EPFL/data/rooftops" # folder in which "datasets" exists
ROOFTOP_FILE = '/Users/alinawalch/Documents/EPFL/data/rooftops/GVA_select_area8_buf30_merged.csv'
SHADE_2m_FILE = '/Users/alinawalch/Documents/EPFL/data/rooftops/shading_images/visibility/shading_vis_40_2m.csv'
SHADE_50cm_FILE = '/Users/alinawalch/Documents/EPFL/data/rooftops/shading_images/visibility/shading_vis_40_50cm.csv'
# List of features and tables
FEATURES = ['shaded_area_ratio_2m', 'NEIGUNG', 'AUSRICHTUN', 'FLAECHE', 'SHAPE_Leng', 'SHAPE_Ratio', 'GASTW', 'GBAUP', 'GAREA', 'n_neighbors_100']
TARGETS = ['shaded_area_ratio_50cm']
SAMPLE_SIZE = 1.0 # percentage of the data that is used for training & testing (THE SAMPLE)
TEST_RATIO = 0.2 # percentage of the sample that is used for testing
## === MERGE INFORMATION OF ROOFS & SHADING ==
rooftops = pd.read_csv( ROOFTOP_FILE , index_col=0 )
shade_2m = pd.read_csv( SHADE_2m_FILE , index_col=0 )
shade_50cm = pd.read_csv( SHADE_50cm_FILE, index_col=0 )
all_data = attach_col_to_table( rooftops, shade_2m , 'DF_UID', 'fully_shaded_ratio', 'shaded_area_ratio_2m' )
all_data = attach_col_to_table( all_data, shade_50cm, 'DF_UID', 'fully_shaded_ratio', 'shaded_area_ratio_50cm' )
# eliminate all duplicate columns and columns with NaNs:
all_data_reduced = all_data.dropna().drop_duplicates()
print("Created learning table with columns:")
print(all_data_reduced.columns)
## ======== CREATE NEW DATASET ==============
# declare new training dataset
modelname = ("fully_shaded_ratio_%dD" %len(FEATURES))
new_set = Training(target_path, modelname, FEATURES, TARGETS, data_type = 'table')
print(new_set.train_path) # this will contain both training and validation feature tables
print(new_set.test_path)
## ====== CREATE FEATURE AND TARGET TABLES ===
# select the sample and split into training and testing data
learning_table = all_data_reduced.sample( frac = SAMPLE_SIZE )
train_table, test_table = train_test_split( learning_table, test_size = TEST_RATIO )
# create the new dataset and normalize data
new_set.make_dataset( table = train_table, test_table = test_table )
new_set.normalize_all( val_ratio = 1.0 ) # DO NOT SPLIT into val and tr (for cross-validation purposes)

Event Timeline