File Metadata

Created: Thu, Jul 3, 10:32

hourly_model.py
View Options


	import os
	import sys
	import xarray as xr
	import pandas as pd
	import numpy as np
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.ensemble import RandomForestRegressor
	from meteo_data import Meteo_Reader
	import util


	path = sys.argv[1]
	locname = sys.argv[2]
	features = sys.argv[3].split(",")
	target = sys.argv[4]
	model = sys.argv[5] # put 'knn' or 'rf'

	print(features)
	print(target)

	# knn features
	n_neighbors = 5
	weight_type = 'distance'

	# RF features
	forest_size = 500

	# ### Output collector
	# Will write all outputs to an hdf5 file.
	name = ('%s_%s_2001_%dD' %(model, target, len(features)))
	variables = ['x', 'y', 'month', 'hour', target]
	output_table = util.Table_Writer(os.path.join(path, 'results'), variables, name)
	output_table.open_hdf5(add_norm = False)

	# Load training data (monthly-mean-hourly) as well as the location mask and select the variables of interest for training.
	mmh = xr.open_dataset(os.path.join(path,'raw_data', '2001_mmh.nc'))
	solar = xr.merge([mmh[target], mmh.hourmask])
	reader = Meteo_Reader(os.path.join(path, 'raw_data'))

	# Load the hourmask and create an array with month, hour pairs.
	mask = mmh.hourmask
	hour_month_combinations = mask.to_dataframe().dropna().reset_index().loc[:,['month', 'hour']]

	# ## Per - hour execution
	for month, hour in zip(hour_month_combinations.month, hour_month_combinations.hour):
	t_all = util.Timer()

	print('\n\nMonth: %d, hour: %d' %(month, hour))
	# select subset of the data for modelling
	date = ('2001%02d01' %month)
	solar_hour = solar.sel(month = month, date = date, hour = hour)

	# convert data into a pandas dataframe and split into feature and target tables
	solar_table = reader.make_table(indata = solar_hour, ftrs = features)
	training_features = solar_table.loc[:,features]
	training_targets = solar_table.loc[:,target]

	# read query data from csv file and select the features
	pts = pd.read_csv(os.path.join(path,'locations',locname+'.csv'))
	query_features = pts.reset_index().loc[:,features]

	# declare initialisers for different models and perform initialisation
	initialize_model = {
	'rf' : RandomForestRegressor(n_estimators = forest_size, max_depth = 100, n_jobs = -1),
	'knn' : KNeighborsRegressor(n_neighbors=n_neighbors, weights = weight_type, n_jobs = -1)
	}
	regressor = initialize_model[model]

	# Fit selected model
	tt = util.Timer()
	regressor.fit(training_features, training_targets)
	print('\nFitted model')
	tt.stop()

	# Predict on the query data
	tt = util.Timer()
	query_output = regressor.predict(query_features)
	print('\nPredicted on query')
	tt.stop()

	# Prepare query table for writing
	query_features[target] = query_output
	query_features['month'] = month
	query_features['hour'] = hour

	# write to the hdf5 file
	output_table.write_hdf5(query_features)

	print('\nFinished iteration')
	t_all.restart()

	# close the hdf5 file
	output_table.close_hdf5()

hourly_model.py
No OneTemporary
Actions

File Metadata

hourly_model.py
View Options

Event Timeline

hourly_model.pyNo OneTemporaryActions

File Metadata

hourly_model.pyView Options

Event Timeline

hourly_model.py
No OneTemporary
Actions

hourly_model.py
View Options