make_shaded_area_features.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, May 4, 04:59

make_shaded_area_features.py
View Options

	import numpy as np
	import pandas as pd
	import xarray as xr
	import os
	import time
	from features import Training, Testing
	from sklearn.model_selection import train_test_split

	def attach_col_to_table(table, new_table, merge_column, value_column, value_col_target = None, how = 'left'):

	# merge column in new_table with table
	merged_table = table.merge(new_table.loc[ :,[merge_column, value_column] ], on = merge_column, how = how)

	# change name of column if required
	if value_col_target is not None:
	merged_table = merged_table.rename( {value_column : value_col_target} , axis = 1)

	return merged_table

	## ========== USER INPUT ===================

	target_path = "/Users/alinawalch/Documents/EPFL/data/rooftops" # folder in which "datasets" exists

	ROOFTOP_FILE = '/Users/alinawalch/Documents/EPFL/data/rooftops/GVA_select_area8_buf30_merged.csv'
	SHADE_2m_FILE = '/Users/alinawalch/Documents/EPFL/data/rooftops/shading_images/visibility/shading_vis_40_2m.csv'
	SHADE_50cm_FILE = '/Users/alinawalch/Documents/EPFL/data/rooftops/shading_images/visibility/shading_vis_40_50cm.csv'

	# List of features and tables
	FEATURES = ['shaded_area_ratio_2m', 'NEIGUNG', 'AUSRICHTUN', 'FLAECHE', 'SHAPE_Leng', 'SHAPE_Ratio', 'GASTW', 'GBAUP', 'GAREA', 'n_neighbors_100']
	TARGETS = ['shaded_area_ratio_50cm']

	SAMPLE_SIZE = 1.0 # percentage of the data that is used for training & testing (THE SAMPLE)
	TEST_RATIO = 0.2 # percentage of the sample that is used for testing

	## === MERGE INFORMATION OF ROOFS & SHADING ==

	rooftops = pd.read_csv( ROOFTOP_FILE , index_col=0 )
	shade_2m = pd.read_csv( SHADE_2m_FILE , index_col=0 )
	shade_50cm = pd.read_csv( SHADE_50cm_FILE, index_col=0 )

	all_data = attach_col_to_table( rooftops, shade_2m , 'DF_UID', 'fully_shaded_ratio', 'shaded_area_ratio_2m' )
	all_data = attach_col_to_table( all_data, shade_50cm, 'DF_UID', 'fully_shaded_ratio', 'shaded_area_ratio_50cm' )

	# eliminate all duplicate columns and columns with NaNs:
	all_data_reduced = all_data.dropna().drop_duplicates()

	print("Created learning table with columns:")
	print(all_data_reduced.columns)

	## ======== CREATE NEW DATASET ==============

	# declare new training dataset
	modelname = ("fully_shaded_ratio_%dD" %len(FEATURES))
	new_set = Training(target_path, modelname, FEATURES, TARGETS, data_type = 'table')

	print(new_set.train_path) # this will contain both training and validation feature tables
	print(new_set.test_path)

	## ====== CREATE FEATURE AND TARGET TABLES ===

	# select the sample and split into training and testing data
	learning_table = all_data_reduced.sample( frac = SAMPLE_SIZE )
	train_table, test_table = train_test_split( learning_table, test_size = TEST_RATIO )

	# create the new dataset and normalize data
	new_set.make_dataset( table = train_table, test_table = test_table )
	new_set.normalize_all( val_ratio = 1.0 ) # DO NOT SPLIT into val and tr (for cross-validation purposes)

make_shaded_area_features.pyNo OneTemporaryActions

File Metadata

make_shaded_area_features.pyView Options

Event Timeline

make_shaded_area_features.py
No OneTemporary
Actions

make_shaded_area_features.py
View Options