Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F111655688
make_shaded_area_features.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, May 4, 04:59
Size
2 KB
Mime Type
text/x-python
Expires
Tue, May 6, 04:59 (1 d, 13 h)
Engine
blob
Format
Raw Data
Handle
25955845
Attached To
R8800 solar_potential
make_shaded_area_features.py
View Options
import
numpy
as
np
import
pandas
as
pd
import
xarray
as
xr
import
os
import
time
from
features
import
Training
,
Testing
from
sklearn.model_selection
import
train_test_split
def
attach_col_to_table
(
table
,
new_table
,
merge_column
,
value_column
,
value_col_target
=
None
,
how
=
'left'
):
# merge column in new_table with table
merged_table
=
table
.
merge
(
new_table
.
loc
[
:,[
merge_column
,
value_column
]
],
on
=
merge_column
,
how
=
how
)
# change name of column if required
if
value_col_target
is
not
None
:
merged_table
=
merged_table
.
rename
(
{
value_column
:
value_col_target
}
,
axis
=
1
)
return
merged_table
## ========== USER INPUT ===================
target_path
=
"/Users/alinawalch/Documents/EPFL/data/rooftops"
# folder in which "datasets" exists
ROOFTOP_FILE
=
'/Users/alinawalch/Documents/EPFL/data/rooftops/GVA_select_area8_buf30_merged.csv'
SHADE_2m_FILE
=
'/Users/alinawalch/Documents/EPFL/data/rooftops/shading_images/visibility/shading_vis_40_2m.csv'
SHADE_50cm_FILE
=
'/Users/alinawalch/Documents/EPFL/data/rooftops/shading_images/visibility/shading_vis_40_50cm.csv'
# List of features and tables
FEATURES
=
[
'shaded_area_ratio_2m'
,
'NEIGUNG'
,
'AUSRICHTUN'
,
'FLAECHE'
,
'SHAPE_Leng'
,
'SHAPE_Ratio'
,
'GASTW'
,
'GBAUP'
,
'GAREA'
,
'n_neighbors_100'
]
TARGETS
=
[
'shaded_area_ratio_50cm'
]
SAMPLE_SIZE
=
1.0
# percentage of the data that is used for training & testing (THE SAMPLE)
TEST_RATIO
=
0.2
# percentage of the sample that is used for testing
## === MERGE INFORMATION OF ROOFS & SHADING ==
rooftops
=
pd
.
read_csv
(
ROOFTOP_FILE
,
index_col
=
0
)
shade_2m
=
pd
.
read_csv
(
SHADE_2m_FILE
,
index_col
=
0
)
shade_50cm
=
pd
.
read_csv
(
SHADE_50cm_FILE
,
index_col
=
0
)
all_data
=
attach_col_to_table
(
rooftops
,
shade_2m
,
'DF_UID'
,
'fully_shaded_ratio'
,
'shaded_area_ratio_2m'
)
all_data
=
attach_col_to_table
(
all_data
,
shade_50cm
,
'DF_UID'
,
'fully_shaded_ratio'
,
'shaded_area_ratio_50cm'
)
# eliminate all duplicate columns and columns with NaNs:
all_data_reduced
=
all_data
.
dropna
()
.
drop_duplicates
()
print
(
"Created learning table with columns:"
)
print
(
all_data_reduced
.
columns
)
## ======== CREATE NEW DATASET ==============
# declare new training dataset
modelname
=
(
"fully_shaded_ratio_
%d
D"
%
len
(
FEATURES
))
new_set
=
Training
(
target_path
,
modelname
,
FEATURES
,
TARGETS
,
data_type
=
'table'
)
print
(
new_set
.
train_path
)
# this will contain both training and validation feature tables
print
(
new_set
.
test_path
)
## ====== CREATE FEATURE AND TARGET TABLES ===
# select the sample and split into training and testing data
learning_table
=
all_data_reduced
.
sample
(
frac
=
SAMPLE_SIZE
)
train_table
,
test_table
=
train_test_split
(
learning_table
,
test_size
=
TEST_RATIO
)
# create the new dataset and normalize data
new_set
.
make_dataset
(
table
=
train_table
,
test_table
=
test_table
)
new_set
.
normalize_all
(
val_ratio
=
1.0
)
# DO NOT SPLIT into val and tr (for cross-validation purposes)
Event Timeline
Log In to Comment