Page MenuHomec4science

make_ds_rand.py
No OneTemporary

File Metadata

Created
Sun, Apr 27, 11:18

make_ds_rand.py

#!/usr/bin/env python
import numpy as np
import pandas as pd
import xarray as xr
import os
import sys
import time
import util
from features import Training, Testing
from meteo_data import Meteo_Reader
data_path = sys.argv[1]
dataset = sys.argv[2]
features = sys.argv[3].split(",")
targets = sys.argv[4].split(",")
sample_size = float(sys.argv[5])*1000000
val_ratio = float(sys.argv[6])
year = 2001
# path = os.path.join(data_path, 'raw_data')
# filepath = os.path.join(path, ('%d-%02d_table.csv' %(year, month)))
train_locs = "locations/all_train.txt"
test_locs = "locations/all_test.txt"
tt = util.Timer()
ds = Meteo_Reader(os.path.join(data_path, 'raw_data'), variables = targets)
ds.read_yearly(year)
train_data = ds.get_subset(filename = train_locs, sample_name = 'train')
test_data = ds.get_subset(filename = test_locs, sample_name = 'test')
tbl_train = ds.make_table(indata = train_data, ftrs = features)
tbl_test = ds.make_table(indata = test_data, ftrs = features)
tt.stop()
# sample_size = min(sample_size, len(tbl))
N_train = min(int(0.8*sample_size), len(tbl_train))
N_test = min(int(0.2*sample_size), len(tbl_test))
new_set = Training(data_path, dataset, features, targets, data_type = 'table');
new_set.make_dataset(table = tbl_train.sample(N_train), test_table = tbl_test.sample(N_test))
new_set.normalize_all(feature_norm = 'mean', target_norm = 'mean', val_ratio = val_ratio, force_normalization = True)

Event Timeline