Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F110737066
ELM_ensemble.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Apr 27, 21:31
Size
6 KB
Mime Type
text/x-python
Expires
Tue, Apr 29, 21:31 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
25806295
Attached To
R8800 solar_potential
ELM_ensemble.py
View Options
import
numpy
as
np
import
pandas
as
pd
import
xarray
as
xr
import
os
import
sys
import
time
import
hpelm
import
util
from
ds
import
Dataset
from
tables
import
open_file
,
Atom
,
Filters
import
csv
from
sklearn.metrics
import
mean_squared_error
as
mse
import
h5py
def
find_name
(
filename
):
path
,
name
=
os
.
path
.
split
(
filename
)
root
,
ext
=
os
.
path
.
splitext
(
name
)
i
=
0
while
os
.
path
.
exists
(
filename
):
i
+=
1
filename
=
os
.
path
.
join
(
path
,
str
(
i
)
+
'_'
+
root
+
ext
)
return
filename
class
HPELM_Ensemble
():
def
__init__
(
self
,
path
,
n_estimators
,
n_nodes
,
n_features
,
n_targets
,
t_nodes
=
'sigm'
,
bootstrap
=
True
,
oob
=
False
,
precision
=
None
,
accelerator
=
None
,
save_model
=
True
):
self
.
n_est
=
n_estimators
self
.
n_nodes
=
n_nodes
self
.
t_nodes
=
t_nodes
self
.
nf
=
n_features
self
.
nt
=
n_targets
self
.
bootstrap
=
bootstrap
self
.
oob
=
oob
self
.
save_model
=
save_model
self
.
model_path
=
path
if
not
os
.
path
.
exists
(
path
):
os
.
mkdir
(
path
)
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
path
,
'tmp'
)):
os
.
mkdir
(
os
.
path
.
join
(
path
,
'tmp'
))
if
accelerator
==
'GPU'
:
accel
=
'GPU'
if
precision
is
None
:
precision
=
'single'
else
:
accel
=
accelerator
if
precision
is
None
:
precision
=
'double'
self
.
estimators_
=
[]
for
i
in
range
(
n_estimators
):
self
.
estimators_
.
append
(
hpelm
.
HPELM
(
n_features
,
n_targets
,
accelerator
=
accel
,
precision
=
precision
))
def
fit
(
self
,
X
,
T
,
val
=
None
,
val_X
=
None
,
val_T
=
None
):
# path: path where auxilary files should be stored
# X, T, val_X, val_T --> files
self
.
train_times_
=
[]
print
(
'
\n\n
Training model'
)
logfile
=
find_name
(
os
.
path
.
join
(
self
.
model_path
,(
'log_train.csv'
)))
with
open
(
logfile
,
'w'
)
as
csvfile
:
w
=
csv
.
writer
(
csvfile
,
delimiter
=
','
)
w
.
writerow
([
'model_ID'
,
'walltime'
,
'cputime'
])
if
self
.
bootstrap
:
x
=
util
.
get_matrix
(
X
)
t
=
util
.
get_matrix
(
T
)
n
=
x
.
shape
[
0
]
if
self
.
oob
:
oob_file
=
os
.
path
.
join
(
self
.
model_path
,
'OOB.hdf5'
)
f
=
h5py
.
File
(
oob_file
,
'w'
)
oob_ds
=
f
.
create_dataset
(
'data'
,
(
n
,
self
.
n_est
),
dtype
=
'i'
)
t_cpu
=
[]
t_wall
=
[]
m
=
-
1
for
model
in
self
.
estimators_
:
m
+=
1
print
(
'Fitting model
%d
'
%
m
)
tt
=
util
.
Timer
()
# apply bootstrap:
if
self
.
bootstrap
:
ind
=
np
.
random
.
randint
(
n
,
size
=
n
)
if
self
.
oob
:
oob_vec
=
np
.
zeros
(
n
)
oob_vec
[
np
.
delete
(
range
(
n
),
np
.
unique
(
ind
))]
=
1
# get out-of-bag indices and set to 1
oob_ds
[:,
m
]
=
oob_vec
train
=
x
[
ind
,:]
train_t
=
t
[
ind
]
X_tr
=
os
.
path
.
join
(
self
.
model_path
,
'tmp'
,
'train_x.hdf5'
)
T_tr
=
os
.
path
.
join
(
self
.
model_path
,
'tmp'
,
'train_t.hdf5'
)
if
os
.
path
.
exists
(
X_tr
):
os
.
remove
(
X_tr
)
if
os
.
path
.
exists
(
T_tr
):
os
.
remove
(
T_tr
)
util
.
make_hdf5
(
train
,
X_tr
)
util
.
make_hdf5
(
train_t
,
T_tr
)
else
:
X_tr
=
X
T_tr
=
T
model
.
add_neurons
(
self
.
n_nodes
,
self
.
t_nodes
)
if
val
is
None
:
model
.
train
(
X_tr
,
T_tr
)
else
:
model
.
train
(
X_tr
,
T_tr
,
val
,
Xv
=
val_X
,
Tv
=
val_T
)
if
self
.
save_model
:
model
.
save
(
os
.
path
.
join
(
self
.
model_path
,
(
'model_
%02d
.hdf5'
%
(
m
))))
tt
.
stop
(
print_any
=
False
)
with
open
(
logfile
,
'a'
)
as
csvfile
:
w
=
csv
.
writer
(
csvfile
,
delimiter
=
','
)
w
.
writerow
([
m
,
tt
.
walltime
,
tt
.
cputime
])
self
.
train_times_
.
append
([
tt
.
cputime
,
tt
.
walltime
])
if
self
.
oob
:
f
.
close
()
def
load
(
self
):
m
=
-
1
for
model
in
self
.
estimators_
:
m
+=
1
try
:
model
.
load
(
os
.
path
.
join
(
self
.
model_path
,
(
'model_
%02d
.hdf5'
%
(
m
))))
except
:
print
(
'ERROR: could not load model in '
+
os
.
path
.
join
(
self
.
model_path
,
(
'model_
%02d
.hdf5'
%
(
m
))))
def
predict
(
self
,
X
,
Y
=
None
,
t
=
None
,
eval
=
False
,
norm
=
None
,
label
=
''
):
# t: target --> used to calculate errors "on the fly"
print
(
'
\n\n
Predicting for
%s
'
%
label
)
self
.
prediction_times_
=
[]
if
Y
is
None
:
y_out
=
[]
else
:
path
=
os
.
path
.
split
(
Y
)[
0
]
body
=
os
.
path
.
splitext
(
Y
)[
0
]
name
=
os
.
path
.
split
(
body
)[
1
]
logfile
=
find_name
(
os
.
path
.
join
(
self
.
model_path
,(
'log_pred_
%s
.csv'
%
label
)))
if
t
is
not
None
:
y_pred
=
np
.
zeros
(
t
.
shape
)
self
.
mse_
=
np
.
zeros
(
self
.
n_est
)
get_mse
=
True
else
:
get_mse
=
False
with
open
(
logfile
,
'w'
)
as
csvfile
:
w
=
csv
.
writer
(
csvfile
,
delimiter
=
','
)
if
get_mse
:
w
.
writerow
([
'model_ID'
,
'mse'
,
'walltime'
,
'cputime'
])
else
:
w
.
writerow
([
'model_ID'
,
'walltime'
,
'cputime'
])
m
=
-
1
for
model
in
self
.
estimators_
:
m
+=
1
print
(
'Predicting on model
%d
'
%
m
)
tt
=
util
.
Timer
()
if
Y
is
None
:
y_pred_tmp
=
model
.
predict
(
X
)
y_out
.
append
(
y_pred_tmp
)
if
get_mse
:
y_pred
+=
y_pred_tmp
self
.
mse_
[
m
]
=
mse
(
t
,
y_pred
/
(
m
+
1
))
else
:
Y_pred
=
(
'
%s
_
%02d
.hdf5'
%
(
body
,
m
))
model
.
predict
(
X
,
Y_pred
)
tt
.
stop
(
print_any
=
False
)
with
open
(
logfile
,
'a'
)
as
csvfile
:
w
=
csv
.
writer
(
csvfile
,
delimiter
=
','
)
if
get_mse
:
w
.
writerow
([
m
,
self
.
mse_
[
m
],
tt
.
walltime
,
tt
.
cputime
])
else
:
w
.
writerow
([
m
,
tt
.
walltime
,
tt
.
cputime
])
self
.
prediction_times_
.
append
([
tt
.
cputime
,
tt
.
walltime
])
if
Y
is
None
:
if
t
is
not
None
:
return
y_pred
/
self
.
n_est
,
y_out
else
:
return
y_out
elif
eval
:
util
.
merge_files
(
Y
,
self
.
n_est
,
batches
=
True
,
norm
=
norm
)
def
oob_prediction
(
self
,
Y
,
norm
=
None
):
# path: path where oob.hdf5 can be found
# FOR NOW NOT IN BATCHES (assumes ds to be small enough - necessary condition for bootstrap!)
body
=
os
.
path
.
splitext
(
Y
)[
0
]
filepath
=
os
.
path
.
split
(
Y
)[
0
]
name
=
os
.
path
.
split
(
body
)[
1
]
oob_file
=
os
.
path
.
join
(
self
.
model_path
,
'OOB.hdf5'
)
oob_inds
=
util
.
get_matrix
(
oob_file
)
n
=
oob_inds
.
shape
[
0
]
oob_count
=
np
.
zeros
((
n
,
1
))
oob_sigma
=
np
.
zeros
((
n
,
self
.
nt
))
y_sigma
=
np
.
zeros
((
n
,
self
.
nt
))
y_sigma_sq
=
np
.
zeros
((
n
,
self
.
nt
))
for
m
in
range
(
self
.
n_est
):
inds
=
oob_inds
[:,
m
]
.
reshape
((
-
1
,
1
))
oob_mask
=
np
.
repeat
(
inds
,
self
.
nt
,
axis
=
1
)
Y_pred
=
(
'
%s
_
%02d
.hdf5'
%
(
body
,
m
))
y_tmp
=
util
.
get_matrix
(
Y_pred
)
os
.
remove
(
Y_pred
)
if
norm
is
not
None
:
y_tmp
=
norm
.
rescale
(
y_tmp
)
y_sigma
+=
y_tmp
y_sigma_sq
+=
y_tmp
**
2
oob_count
+=
inds
oob_sigma
+=
oob_mask
*
y_tmp
zero_inds
=
np
.
nonzero
(
oob_count
==
0
)
print
(
'
%d
out of
%d
samples never out-of-bag'
%
(
len
(
zero_inds
),
n
))
print
(
'-> substituted these samples with overall prediction'
)
oob_sigma
[
zero_inds
,:]
=
y_sigma
[
zero_inds
,:]
oob_count
[
zero_inds
]
=
self
.
n_est
mean
=
y_sigma
/
self
.
n_est
var
=
1.0
/
self
.
n_est
*
(
y_sigma_sq
-
y_sigma
**
2
/
self
.
n_est
)
oob
=
oob_sigma
/
oob_count
util
.
make_hdf5
(
mean
,
Y
)
util
.
make_hdf5
(
var
,
body
+
'_var.hdf5'
)
util
.
make_hdf5
(
oob
,
os
.
path
.
join
(
filepath
,
'oob_prediction.hdf5'
))
return
oob
,
var
,
mean
Event Timeline
Log In to Comment