Page MenuHomec4science

ensemble-cv3D.py
No OneTemporary

File Metadata

Created
Sat, Apr 26, 16:29

ensemble-cv3D.py

# coding: utf-8
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import pandas as pd
import xarray as xr
import os
import sys
import time
import hpelm
import util
from ds import Dataset
from tables import open_file, Atom, Filters
import csv
from sklearn.metrics import mean_squared_error as mse
# In[2]:
# data_path = os.path.abspath("/Users/alinawalch/Documents/EPFL/data/meteo")
data_path = os.path.abspath("/mnt/sda1/hyenergy/data/meteo")
dsname = '2001_sample1M_SIS_3D'
queryname = 'query_locs_13d_500'
t_nodes = 'sigm'
# In[3]:
tt = util.Timer()
my_ds = Dataset(data_path, dsname, queryname)
my_ds.get_matrices(['train','test','query'])
tt.stop(print_wallclock = False)
# In[4]:
data = np.vstack([my_ds.train_x, my_ds.test_x])
targets = np.vstack([my_ds.train_t, my_ds.test_t])
n = data.shape[0]
nf = data.shape[1]
nt = targets.shape[1]
# In[12]:
n_nodes_lists = [200,400,1000,2000,5000,10000]
ensemble_size_lists = [200,100,100,50,50,20]
k = 1
# In[13]:
for n_nodes, ensemble_size in zip(n_nodes_lists,ensemble_size_lists):
modelname = ('ELM_ens%d_node%d_k%d' %(ensemble_size,n_nodes,k))
my_ds.add_model(modelname, queryname)
print('model added')
ind = np.random.permutation(n)
# get set of indices for each split:
val_end = int(0.2*n)
val = data[:val_end]
val_t = targets[:val_end]
train = data[val_end:]
train_t = targets[val_end:]
print('made training and validation data')
train_F = os.path.join(my_ds.train_path_out,('train_x.hdf5'))
train_X = os.path.join(my_ds.train_path_out,('train_x_2.hdf5'))
val_F = os.path.join(my_ds.train_path_out,('val_x.hdf5'))
train_T = os.path.join(my_ds.train_path_out,('train_t.hdf5'))
util.make_hdf5(train, train_F)
util.make_hdf5(train, train_X)
util.make_hdf5(val, val_F)
util.make_hdf5(train_t, train_T)
train_y = np.zeros(train_t.shape)
val_y = np.zeros(val_t.shape)
for m in range(ensemble_size):
t_train = util.Timer()
print('training model %d' %m)
model = hpelm.hp_elm.HPELM(nf, nt)
model.add_neurons(n_nodes, t_nodes)
model.train(train_X, train_T)
model.save(os.path.join(my_ds.model_path, ('model_%d' %(m))))
t_train.stop()
t_pred = util.Timer()
y_train_tmp = model.predict(train_F)
train_y = train_y + y_train_tmp
err_train = mse(train_y/(m+1),train_t)
print('Train error: %f' %err_train)
y_val_tmp = model.predict(val_F)
val_y = val_y + y_val_tmp
err_val = mse(val_y/(m+1),val_t)
print('Validation error: %f' %err_val)
t_pred.stop()
with open(os.path.join(my_ds.model_path,('ensemble_err.csv')), 'a') as csvfile:
w = csv.writer(csvfile, delimiter=',')
w.writerow([m, err_train, err_val])
with open(os.path.join(my_ds.model_path,('ensemble_time.csv')), 'a') as csvfile:
w = csv.writer(csvfile, delimiter=',')
w.writerow([m, t_train.cputime, t_pred.cputime])

Event Timeline