Page MenuHomec4science

make_1M_dataset.py
No OneTemporary

File Metadata

Created
Wed, Apr 30, 00:58

make_1M_dataset.py

# coding: utf-8
# In[5]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import pandas as pd
import xarray as xr
import os
import time
import util
from features import Training, Testing
from tables import open_file, Atom, Filters
# In[6]:
# data_path = os.path.abspath("/Users/alinawalch/Documents/EPFL/data/meteo")
data_path = os.path.abspath("/mnt/sda1/hyenergy/data/meteo")
# In[7]:
bigtable = pd.read_csv(os.path.join(data_path,'datasets','hourly_1M_sample.csv'), index_col = 0)
# In[13]:
ftr = '_3D' #['_3D','_6D']
ftr_list = ['x','y','z','month','hour'] #(['x','y','z','month','hour'],['x','y','z','medDoG','big_NS','big_EW','month','hour'])
lbl_list = ['SIS']
# In[9]:
locmask = 'sample1M'
queryname = 'query_locs_13d_500'
ds = 2001
hours = list(range(3,20))
months = list(range(1,13))
# In[14]:
# for ftr,ftr_list in zip(ftrs, ftr_lists):
dsname = str(ds) + '_' + locmask + '_SIS' + ftr
print('Making datasets ... ')
t_set = util.Timer()
new_set = Training(data_path, dsname, ftr_list, lbl_list, data_type = 'table');
print(new_set.features.cols)
new_set.make_dataset(table = bigtable[:800000], test_table = bigtable[800000:])
new_set.normalize_all(feature_norm = 'mean', target_norm = 'mean', val_ratio = 1.0)
# queryname = querynames[0]
print('Making query dataset for %s' %queryname)
myquery = Testing(data_path, dsname, query_name = queryname)
myquery.make_query(loc = queryname+'.csv', hour = hours, month = months)
myquery.normalize_input()
t_set.stop(print_wallclock = False)

Event Timeline