Model uncertainty propagation

The total uncertainty is composed out of two contributions: - the intrinsic uncertainty of the modelled process - the uncertainty about the values of the model parameters

Let’s demonstrate this with an artificial model

Create a simple graph that just relates two variables x and y

[2]:
import numpy as np
import halerium.core as hal
from IPython.display import Image

g = hal.Graph("g")
with g:
    x = hal.Variable("x", mean=0, variance=.1)

    factor = hal.StaticVariable("factor", mean=0., variance=1.)
    log_var = hal.StaticVariable("log_var", mean=-2., variance=1.)
    var = hal.exp(log_var)
#    var = hal.StaticVariable("var")
#    var.mean = hal.exp(log_var)
#    var.variance = 0

    y = hal.Variable("y")
    y.mean = x * factor
    y.variance = var

# call this to show the graph in the online platform
# hal.show(g)

Create artificial training data

[3]:
real_factor = 3. # the actual value of the parameter
real_log_var = np.log(0.3**2)

dl_gen = hal.DataLinker(n_data=10)
dl_gen.link(g.factor, real_factor)
dl_gen.link(g.log_var, real_log_var)
model_gen = hal.get_generative_model(g, dl_gen)

x_train, y_train = model_gen.get_samples([g.x, g.y], n_samples=1)
x_train, y_train = np.array(x_train)[0,:], np.array(y_train)[0,:]

Now train a model with the training data

[4]:
dl_train = hal.DataLinker(n_data=len(x_train))
dl_train.link(g.x, x_train)
dl_train.link(g.y, y_train)

model_train = hal.get_posterior_model(g, dl_train)
rec_factor = model_train.get_means(g.factor)
std_factor = model_train.get_standard_deviations(g.factor)
rec_log_var = model_train.get_means(g.log_var)
std_log_var = model_train.get_standard_deviations(g.log_var)

print("real_factor:", np.round(real_factor, 2))
print("rec_factor: {} +-{}".format(np.round(rec_factor, 2), np.round(std_factor, 2)))
print("real_log_var:", np.round(real_log_var, 2))
print("rec_log_var: {} +-{}".format(np.round(rec_log_var, 2), np.round(std_log_var, 2)))
real_factor: 3.0
rec_factor: 2.99 +-0.2
real_log_var: -2.41
rec_log_var: -2.43 +-0.42
[5]:
import pylab as pl
pl.hist(model_train.get_samples(g.factor, n_samples=1000), bins=20)

display(model_train.get_means(g.factor, n_samples=1000))
display(model_train.get_standard_deviations(g.factor, n_samples=1000))
2.992204822110734
0.18626433575347545
../../_images/examples_03_why_care_02_model_uncertainty_propagation_8_2.png

Predict without uncertainty propagation

[6]:
# predict without uncertainty propagation by linking the reconstructed parameter values

x_test = [1.]

dl_predict1 = hal.DataLinker()
dl_predict1.link(g.factor, rec_factor)
dl_predict1.link(g.log_var, rec_log_var)
dl_predict1.link(g.x, x_test)

model_predict1 = hal.get_generative_model(g, dl_predict1)
prediction_samples_without_uncertainty = model_predict1.get_samples(g.y, n_samples=5000)
prediction_samples_without_uncertainty = np.array(prediction_samples_without_uncertainty)[:,0]

Predict without uncertainty propagation

[7]:
# predict with uncertainty propagation by using the get_posterior_graph method

posterior_graph = model_train.get_posterior_graph()

dl_predict2 = hal.DataLinker()
dl_predict2.link(g.x, x_test)

model_predict2 = hal.get_generative_model(posterior_graph, dl_predict2)
prediction_samples_with_uncertainty = model_predict2.get_samples(g.y, n_samples=5000)
prediction_samples_with_uncertainty = np.array(prediction_samples_with_uncertainty)[:,0]
[8]:
from plots import plot_two_densities

Image(plot_two_densities(prediction_samples_with_uncertainty, prediction_samples_without_uncertainty,
                   smoothness=8.))
[8]:
../../_images/examples_03_why_care_02_model_uncertainty_propagation_13_0.png