Page MenuHomec4science

ensemble-elm2.py
No OneTemporary

File Metadata

Created
Thu, May 1, 08:59

ensemble-elm2.py

# coding: utf-8
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import pandas as pd
import xarray as xr
import os
import sys
import time
import hpelm
import util
from ds import Dataset
from tables import open_file, Atom, Filters
import csv
from sklearn.metrics import mean_squared_error as mse
# In[2]:
# data_path = os.path.abspath("/Users/alinawalch/Documents/EPFL/data/meteo")
data_path = os.path.abspath("/mnt/sda1/hyenergy/data/meteo")
dsname = sys.argv[1]
queryname = 'query_locs_13d_500'
t_nodes = 'sigm'
# In[3]:
tt = util.Timer()
my_ds = Dataset(data_path, dsname, queryname)
my_ds.get_matrices(['train','test','query'])
tt.stop(print_wallclock = False)
# In[4]:
n = my_ds.train_x.shape[0]
nf = my_ds.train_x.shape[1]
nt = my_ds.train_t.shape[1]
# In[12]:
n_nodes_lists = [200,400,1000,2000,5000,10000]
ensemble_size_lists = [50,50,50,50,50,20]
k = 1
# In[13]:
for n_nodes, ensemble_size in zip(n_nodes_lists,ensemble_size_lists):
modelname = ('ELM_ens%d_node%d_k%d' %(ensemble_size,n_nodes,k))
my_ds.add_model(modelname, queryname)
print('model added')
print('made training and validation data')
val = my_ds.test_x
val_t = my_ds.test_t
val_F = os.path.join(my_ds.train_path_out,('val_x.hdf5'))
util.make_hdf5(val, val_F)
train_y_mse = 0
val_y = np.zeros(val_t.shape)
for m in range(ensemble_size):
ind = np.floor(np.random.rand(n)*n).astype(int)
# get set of indices for each split:
train = my_ds.train_x[ind,:]
train_t = my_ds.train_t[ind]
train_F = os.path.join(my_ds.train_path_out,('train_x.hdf5'))
train_X = os.path.join(my_ds.train_path_out,('train_x_2.hdf5'))
train_T = os.path.join(my_ds.train_path_out,('train_t.hdf5'))
util.make_hdf5(train, train_F)
util.make_hdf5(train, train_X)
util.make_hdf5(train_t, train_T)
t_train = util.Timer()
print('training model %d' %m)
model = hpelm.hp_elm.HPELM(nf, nt)
model.add_neurons(n_nodes, t_nodes)
model.train(train_X, train_T)
model.save(os.path.join(my_ds.model_path, ('model_%d' %(m))))
t_train.stop()
t_pred = util.Timer()
y_train = model.predict(train_F)
train_y_mse = train_y_mse + mse(y_train,train_t)
err_train = train_y_mse/(m+1)
print('Train error: %f' %err_train)
y_val_tmp = model.predict(val_F)
val_y = val_y + y_val_tmp
err_val = mse(val_y/(m+1),val_t)
print('Validation error: %f' %err_val)
t_pred.stop()
with open(os.path.join(my_ds.model_path,('ensemble_err.csv')), 'a') as csvfile:
w = csv.writer(csvfile, delimiter=',')
w.writerow([m, err_train, err_val])
with open(os.path.join(my_ds.model_path,('ensemble_time.csv')), 'a') as csvfile:
w = csv.writer(csvfile, delimiter=',')
w.writerow([m, t_train.cputime, t_pred.cputime])

Event Timeline