Page MenuHomec4science

train_KI.py
No OneTemporary

File Metadata

Created
Sat, Apr 26, 20:12

train_KI.py

import numpy as np
import pandas as pd
import xarray as xr
import os
import time
from features import Training, Testing
from tables import open_file, Atom, Filters
################################ INPUTS ################################
#data_path = os.path.abspath("/Users/alinawalch/Documents/EPFL/data/meteo") # folder in which raw data is stored
data_path = os.path.abspath("/mnt/sda1/hyenergy/data/meteo")
# List of features and tables
ftr_list = ['x','y','z','month','hour']
lbl_list = ['KI']
start_yr = [2012]#,2012,2004] # Format: 'yyyymmdd'
end_yr = [2012]#,2015]#,2015]
sampling_types = ['rand', 'grid']
sampling_resolutions = [100, 500]
query_locs = 'query_points_1600.csv'
hours = list(range(3,20))
months = list(range(1,13))
# set location masks for the training and test
#train_locs = "locations/grid100_train.txt"
#test_locs = "locations/grid100_test.txt"
# modelname = "mytest"
########################### Create feature table. ########################
for yr0, yr1 in zip(start_yr, end_yr):
start_date = str(yr0)+'0101'
end_date = str(yr1)+'1231'
for res in sampling_resolutions:
print(res)
for typ in sampling_types:
print(typ)
# set location masks for the training and test
train_locs = "locations/"+typ+str(res)+"_train.txt"
test_locs = "locations/"+typ+str(res)+"_test.txt"
modelname = str(yr0)+'-'+str(yr1)+'_'+typ+str(res)+'_'+lbl_list[0]
new_set = Training(data_path, modelname, ftr_list, lbl_list)
new_set.make_dataset(start_date, end_date, sample_name = train_locs, test_name = test_locs)
new_set.normalize_all(feature_norm = 'mean', target_norm = 'mean', val_ratio = 0.8, force_normalization = True)
myquery = Testing(data_path, modelname, query_name = 'grid1600')
myquery.make_query(loc = query_locs, hour = hours, month = months)
myquery.normalize_input(force_normalization = True)
# set location masks for the training and test
train_locs = "locations/all_train.txt"
test_locs = "locations/all_test.txt"
modelname = str(yr0)+'-'+str(yr1)+'_all_'+lbl_list[0]
new_set = Training(data_path, modelname, ftr_list, lbl_list)
new_set.make_dataset(start_date, end_date, sample_name = train_locs, test_name = test_locs)
new_set.normalize_all(feature_norm = 'mean', target_norm = 'mean', val_ratio = 0.8, force_normalization = True)
myquery = Testing(data_path, modelname, query_name = 'grid1600')
myquery.make_query(loc = query_locs, hour = hours, month = months)
myquery.normalize_input()

Event Timeline