make_1M_dataset.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Mon, Jul 14, 01:09

make_1M_dataset.py
View Options


	# coding: utf-8

	# In[5]:


	import numpy as np
	import matplotlib.pyplot as plt
	from matplotlib.colors import LinearSegmentedColormap
	import pandas as pd
	import xarray as xr
	import os
	import time
	import util
	from features import Training, Testing
	from tables import open_file, Atom, Filters


	# In[6]:


	# data_path = os.path.abspath("/Users/alinawalch/Documents/EPFL/data/meteo")
	data_path = os.path.abspath("/mnt/sda1/hyenergy/data/meteo")


	# In[7]:


	bigtable = pd.read_csv(os.path.join(data_path,'datasets','hourly_1M_sample.csv'), index_col = 0)


	# In[13]:


	ftr = '_3D' #['_3D','_6D']
	ftr_list = ['x','y','z','month','hour'] #(['x','y','z','month','hour'],['x','y','z','medDoG','big_NS','big_EW','month','hour'])
	lbl_list = ['SIS']


	# In[9]:


	locmask = 'sample1M'
	queryname = 'query_locs_13d_500'
	ds = 2001

	hours = list(range(3,20))
	months = list(range(1,13))


	# In[14]:


	# for ftr,ftr_list in zip(ftrs, ftr_lists):

	dsname = str(ds) + '_' + locmask + '_SIS' + ftr

	print('Making datasets ... ')
	t_set = util.Timer()
	new_set = Training(data_path, dsname, ftr_list, lbl_list, data_type = 'table');
	print(new_set.features.cols)
	new_set.make_dataset(table = bigtable[:800000], test_table = bigtable[800000:])
	new_set.normalize_all(feature_norm = 'mean', target_norm = 'mean', val_ratio = 1.0)

	# queryname = querynames[0]
	print('Making query dataset for %s' %queryname)
	myquery = Testing(data_path, dsname, query_name = queryname)
	myquery.make_query(loc = queryname+'.csv', hour = hours, month = months)
	myquery.normalize_input()
	t_set.stop(print_wallclock = False)

make_1M_dataset.pyNo OneTemporaryActions

File Metadata

make_1M_dataset.pyView Options

Event Timeline

make_1M_dataset.py
No OneTemporary
Actions

make_1M_dataset.py
View Options