Page MenuHomec4science

ensemble-CV.py
No OneTemporary

File Metadata

Created
Mon, Apr 28, 17:08

ensemble-CV.py

# coding: utf-8
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import pandas as pd
import xarray as xr
import os
import sys
import time
import hpelm
import util
from ds import Dataset
from tables import open_file, Atom, Filters
import csv
# In[4]:
print(sys.argv)
data_path = os.path.abspath("/Users/alinawalch/Documents/EPFL/data/meteo")
# data_path = os.path.abspath("/mnt/sda1/hyenergy/data/meteo")
dsname = sys.argv[1]
queryname = 'query_locs_13d_500'
modelname = sys.argv[2]
# In[5]:
tt = util.Timer()
my_ds = Dataset(data_path, dsname, queryname)
my_ds.get_matrices(['train','test','query'])
tt.stop(print_wallclock = False)
# In[6]:
my_ds.add_model(modelname, queryname)
k = int(sys.argv[3])
modelfile = os.path.join(my_ds.model_path, 'model.hdf5')
n_nodes = int(sys.argv[4])
t_nodes = 'sigm'
n_valsteps = int(sys.argv[5])
# In[7]:
data = np.vstack([my_ds.train_x, my_ds.test_x])
targets = np.vstack([my_ds.train_t, my_ds.test_t])
n = data.shape[0]
nf = data.shape[1]
nt = targets.shape[1]
# In[8]:
ind = np.random.permutation(n)
# In[9]:
# get set of indices for each split:
inds = []
for i in range(k+1):
batch_start = int(float(i)/(k+1)*n)
batch_end = min(int(float(i+1)/(k+1)*n),n)
inds.append(ind[batch_start:batch_end])
# In[10]:
test = data[inds[-1]]
test_t = targets[inds[-1]]
# In[11]:
train_F = os.path.join(my_ds.train_path_out,'train_tmp.hdf5')
val_F = os.path.join(my_ds.train_path_out,'val_tmp.hdf5')
test_F = os.path.join(my_ds.test_path_out,'test_tmp.hdf5')
train_T = os.path.join(my_ds.train_path_out,'train_t_tmp.hdf5')
val_T = os.path.join(my_ds.train_path_out,'val_t_tmp.hdf5')
test_T = os.path.join(my_ds.test_path_out,'test_t_tmp.hdf5')
# In[13]:
for i in range(k):
inds_tmp = inds[:k]
inds_tmp.pop(i)
val = data[inds[i]]
val_t = targets[inds[i]]
train = data[np.hstack(inds_tmp)]
train_t = targets[np.hstack(inds_tmp)]
util.make_hdf5(train, train_F)
util.make_hdf5(val, val_F)
util.make_hdf5(test, test_F)
util.make_hdf5(train_t, train_T)
util.make_hdf5(val_t, val_T)
util.make_hdf5(test_t, test_T)
model = hpelm.hp_elm.HPELM(nf, nt)
model.add_neurons(n_nodes, t_nodes)
model.add_data(train_F, train_T, fHH = ('fHH%d.hdf5' %i), fHT = ('fHT%d.hdf5' %i))
err = model.validation_corr(('fHH%d.hdf5' %i), ('fHT%d.hdf5' %i), val_F, val_T, steps=n_valsteps)
with open(os.path.join(my_ds.model_path,('cv_err_%d_%d_%d.csv' %(k,n_nodes,n_valsteps))), 'a') as csvfile:
w = csv.writer(csvfile, delimiter=',')
w.writerow(err[1])
with open(os.path.join(my_ds.model_path,('cv_idx_%d_%d_%d.csv' %(k,n_nodes,n_valsteps))), 'a') as csvfile:
w = csv.writer(csvfile, delimiter=',')
w.writerow(err[0])
# In[ ]:
print(idx_err)

Event Timeline