Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F111174911
make_1M_dataset.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Apr 30, 00:58
Size
1 KB
Mime Type
text/x-python
Expires
Fri, May 2, 00:58 (2 d)
Engine
blob
Format
Raw Data
Handle
25877616
Attached To
R8800 solar_potential
make_1M_dataset.py
View Options
# coding: utf-8
# In[5]:
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
matplotlib.colors
import
LinearSegmentedColormap
import
pandas
as
pd
import
xarray
as
xr
import
os
import
time
import
util
from
features
import
Training
,
Testing
from
tables
import
open_file
,
Atom
,
Filters
# In[6]:
# data_path = os.path.abspath("/Users/alinawalch/Documents/EPFL/data/meteo")
data_path
=
os
.
path
.
abspath
(
"/mnt/sda1/hyenergy/data/meteo"
)
# In[7]:
bigtable
=
pd
.
read_csv
(
os
.
path
.
join
(
data_path
,
'datasets'
,
'hourly_1M_sample.csv'
),
index_col
=
0
)
# In[13]:
ftr
=
'_3D'
#['_3D','_6D']
ftr_list
=
[
'x'
,
'y'
,
'z'
,
'month'
,
'hour'
]
#(['x','y','z','month','hour'],['x','y','z','medDoG','big_NS','big_EW','month','hour'])
lbl_list
=
[
'SIS'
]
# In[9]:
locmask
=
'sample1M'
queryname
=
'query_locs_13d_500'
ds
=
2001
hours
=
list
(
range
(
3
,
20
))
months
=
list
(
range
(
1
,
13
))
# In[14]:
# for ftr,ftr_list in zip(ftrs, ftr_lists):
dsname
=
str
(
ds
)
+
'_'
+
locmask
+
'_SIS'
+
ftr
print
(
'Making datasets ... '
)
t_set
=
util
.
Timer
()
new_set
=
Training
(
data_path
,
dsname
,
ftr_list
,
lbl_list
,
data_type
=
'table'
);
print
(
new_set
.
features
.
cols
)
new_set
.
make_dataset
(
table
=
bigtable
[:
800000
],
test_table
=
bigtable
[
800000
:])
new_set
.
normalize_all
(
feature_norm
=
'mean'
,
target_norm
=
'mean'
,
val_ratio
=
1.0
)
# queryname = querynames[0]
print
(
'Making query dataset for
%s
'
%
queryname
)
myquery
=
Testing
(
data_path
,
dsname
,
query_name
=
queryname
)
myquery
.
make_query
(
loc
=
queryname
+
'.csv'
,
hour
=
hours
,
month
=
months
)
myquery
.
normalize_input
()
t_set
.
stop
(
print_wallclock
=
False
)
Event Timeline
Log In to Comment