Page MenuHomec4science

uncertainty.py
No OneTemporary

File Metadata

Created
Wed, Apr 30, 09:28

uncertainty.py

import numpy as np
import pandas as pd
import xarray as xr
from sklearn.model_selection import cross_val_predict
from sklearn.base import clone
def _check_dimensions( array ):
# reshape 1-dimensional arrays to (-1,1)
if len(array.shape) == 1:
return array.reshape((-1,1))
else: return array
class Uncertainty():
def __init__(self, estimator, residual_estimator = None):
self.model = estimator
self.residuals = clone(estimator) if residual_estimator is None else residual_estimator
# set oob_score and bootstrap to True, to allow access to oob_prediction_ for residuals
self.model.set_params( bootstrap = True, oob_score = True)
def get_params(self, deep = False):
return { 'estimator' : self.model, 'residual_estimator' : self.residuals}
def fit(self, x, t):
self.model.fit(x, t)
def predict(self, x):
return self.model.predict( x )
def predict_model_uncertainty(self, x):
# predict and calculate model uncertainty
# initialise sum and sum of squares (for calculation of mean and variance)
y_sigma = np.zeros(( len(x), self.model.n_outputs_ ))
y_sigma_sq = np.zeros(( len(x), self.model.n_outputs_ ))
for m, model in enumerate( self.model.estimators_ ):
y_tmp = _check_dimensions( model.predict(x))
y_sigma += y_tmp
y_sigma_sq += y_tmp ** 2
prediction = y_sigma / self.model.n_estimators
model_var = ( 1.0 / self.model.n_estimators ) * ( y_sigma_sq - y_sigma**2 / self.model.n_estimators )
return prediction, np.sqrt( model_var )
def fit_residuals( self, x, t, model_std = None ):
# calculate residuals and fit model
residual_data = self.calculate_residuals( x, t, model_std = model_std )
self.residuals.fit( x, residual_data )
return residual_data
def predict_residuals( self, x ):
return self.residuals.predict( x )
def predict_data_uncertainty(self, x):
return self.residuals.predict( x )
def crossval_residuals( self, x, t, cv = 3, model_std = None ):
# calculate residuals and perform cross_val_predict
residual_data = self.calculate_residuals( x, t, model_std = model_std )
predicted_residuals = cross_val_predict( self.residuals, x, residual_data, cv = cv)
return residual_data, predicted_residuals
def set_residual_model(self, estimator):
self.residuals = estimator
def calculate_residuals(self, x, t, model_std = None ):
# calculate residuals from out-of-bag prediction and by subtracting the model variance
# NOTE: residuals are defined as the SQUARE-ROOT of the squared difference!!!!!!!!
# (i.e. directly the data uncertainty)
# check if model standard deviation (i.e. model uncertainty) has been previously calculated
if model_std is None:
print('Model variance not given\nEstimating model variance ... ')
_, model_std = self.predict_model_uncertainty(x)
print('Fitting model ...')
oob_prediction = _check_dimensions( self.model.oob_prediction_ )
target = _check_dimensions( t )
model_std = _check_dimensions( model_std )
residuals_oob = ( oob_prediction - target )**2 - model_std**2
residuals = np.sqrt( np.maximum(residuals_oob, 0) )
if self.model.n_outputs_ == 1:
residuals = residuals.reshape((-1,))
return residuals

Event Timeline