Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F111347670
hourly_model.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Apr 30, 23:40
Size
2 KB
Mime Type
text/x-python
Expires
Fri, May 2, 23:40 (2 d)
Engine
blob
Format
Raw Data
Handle
25887106
Attached To
R8800 solar_potential
hourly_model.py
View Options
import
os
import
sys
import
xarray
as
xr
import
pandas
as
pd
import
numpy
as
np
from
sklearn.neighbors
import
KNeighborsRegressor
from
sklearn.ensemble
import
RandomForestRegressor
from
meteo_data
import
Meteo_Reader
import
util
path
=
sys
.
argv
[
1
]
locname
=
sys
.
argv
[
2
]
features
=
sys
.
argv
[
3
]
.
split
(
","
)
target
=
sys
.
argv
[
4
]
model
=
sys
.
argv
[
5
]
# put 'knn' or 'rf'
print
(
features
)
print
(
target
)
# knn features
n_neighbors
=
5
weight_type
=
'distance'
# RF features
forest_size
=
500
# ### Output collector
# Will write all outputs to an hdf5 file.
name
=
(
'
%s
_
%s
_2001_
%d
D'
%
(
model
,
target
,
len
(
features
)))
variables
=
[
'x'
,
'y'
,
'month'
,
'hour'
,
target
]
output_table
=
util
.
Table_Writer
(
os
.
path
.
join
(
path
,
'results'
),
variables
,
name
)
output_table
.
open_hdf5
(
add_norm
=
False
)
# Load training data (monthly-mean-hourly) as well as the location mask and select the variables of interest for training.
mmh
=
xr
.
open_dataset
(
os
.
path
.
join
(
path
,
'raw_data'
,
'2001_mmh.nc'
))
solar
=
xr
.
merge
([
mmh
[
target
],
mmh
.
hourmask
])
reader
=
Meteo_Reader
(
os
.
path
.
join
(
path
,
'raw_data'
))
# Load the hourmask and create an array with month, hour pairs.
mask
=
mmh
.
hourmask
hour_month_combinations
=
mask
.
to_dataframe
()
.
dropna
()
.
reset_index
()
.
loc
[:,[
'month'
,
'hour'
]]
# ## Per - hour execution
for
month
,
hour
in
zip
(
hour_month_combinations
.
month
,
hour_month_combinations
.
hour
):
t_all
=
util
.
Timer
()
print
(
'
\n\n
Month:
%d
, hour:
%d
'
%
(
month
,
hour
))
# select subset of the data for modelling
date
=
(
'2001
%02d
01'
%
month
)
solar_hour
=
solar
.
sel
(
month
=
month
,
date
=
date
,
hour
=
hour
)
# convert data into a pandas dataframe and split into feature and target tables
solar_table
=
reader
.
make_table
(
indata
=
solar_hour
,
ftrs
=
features
)
training_features
=
solar_table
.
loc
[:,
features
]
training_targets
=
solar_table
.
loc
[:,
target
]
# read query data from csv file and select the features
pts
=
pd
.
read_csv
(
os
.
path
.
join
(
path
,
'locations'
,
locname
+
'.csv'
))
query_features
=
pts
.
reset_index
()
.
loc
[:,
features
]
# declare initialisers for different models and perform initialisation
initialize_model
=
{
'rf'
:
RandomForestRegressor
(
n_estimators
=
forest_size
,
max_depth
=
100
,
n_jobs
=
-
1
),
'knn'
:
KNeighborsRegressor
(
n_neighbors
=
n_neighbors
,
weights
=
weight_type
,
n_jobs
=
-
1
)
}
regressor
=
initialize_model
[
model
]
# Fit selected model
tt
=
util
.
Timer
()
regressor
.
fit
(
training_features
,
training_targets
)
print
(
'
\n
Fitted model'
)
tt
.
stop
()
# Predict on the query data
tt
=
util
.
Timer
()
query_output
=
regressor
.
predict
(
query_features
)
print
(
'
\n
Predicted on query'
)
tt
.
stop
()
# Prepare query table for writing
query_features
[
target
]
=
query_output
query_features
[
'month'
]
=
month
query_features
[
'hour'
]
=
hour
# write to the hdf5 file
output_table
.
write_hdf5
(
query_features
)
print
(
'
\n
Finished iteration'
)
t_all
.
restart
()
# close the hdf5 file
output_table
.
close_hdf5
()
Event Timeline
Log In to Comment