Encoding causal structure#

Imagine having a process with some causal structure

The causal structure we assume simply encodes that - y1 depends on settings x1 and x2 - y2 depends on settings x2 and x3

[2]:

import numpy as np
import halerium.core as hal
from halerium.core.regression import connect_via_regression

g = hal.Graph("g")
with g:
    hal.Entity("settings")
    with settings:
        hal.Variable("x1", mean=0., variance=1.)
        hal.Variable("x2", mean=0., variance=1.)
        hal.Variable("x3", mean=0., variance=1.)

    hal.Graph("substructure1")
    with substructure1:
        with inputs:
            hal.Entity("settings")
            with settings:
                hal.Variable("x1")
                hal.Variable("x2")

        with outputs:
            hal.Entity("results")
            with results:
                hal.Variable("y1", variance=0.1)

        connect_via_regression(name_prefix="reg",
                               inputs=[inputs.settings.x1, inputs.settings.x2],
                               outputs=[outputs.results.y1])

    hal.Graph("substructure2")
    with substructure2:
        with inputs:
            hal.Entity("settings")
            with settings:
                hal.Variable("x2")
                hal.Variable("x3")

        with outputs:
            hal.Entity("results")
            with results:
                hal.Variable("y2", variance=0.1)

        connect_via_regression(name_prefix="reg",
                               inputs=[inputs.settings.x2, inputs.settings.x3],
                               outputs=[outputs.results.y2])

    hal.link(settings, substructure1.inputs.settings)
    hal.link(settings, substructure2.inputs.settings)

# use the hal.show function to display the graph in the online platform
#hal.show(g)

Let us generate artificial data to test this.#

The important thing is that the settings in the past were not chosen randomly, but they were managed in some manner that did not have a machine learning use case in mind.

generate training data#

[3]:

real_matrix1 = np.array([1., -1])
real_intercept1 = 0.
real_matrix2 = np.array([-1, 1.])
real_intercept2 = 0.

past_settings = [[-1., -1., -1.]]*10
past_settings += [[0., -1., 0.]]*10
past_settings += [[1., 1., 1.]]*10
past_settings = np.array(past_settings)

past_results = hal.get_generative_model(g, data={g.settings.x1: past_settings[:, 0],
                                                 g.settings.x2: past_settings[:, 1],
                                                 g.settings.x3: past_settings[:, 2],
                                                 g.substructure1.reg_y1.location.slope: real_matrix1,
                                                 g.substructure1.reg_y1.location.intercept: real_intercept1,
                                                 g.substructure2.reg_y2.location.slope: real_matrix2,
                                                 g.substructure2.reg_y2.location.intercept: real_intercept2}).get_samples(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
past_results = np.array(past_results)[:, 0, :].T

generate test data#

[4]:

future_settings = ([[-1., 0., 0.]]*10 +
                   [[0.5, 0.5, -1.]]*10)
future_settings = np.array(future_settings)

future_results = hal.get_generative_model(g, data={g.settings.x1: future_settings[:, 0],
                                                   g.settings.x2: future_settings[:, 1],
                                                   g.settings.x3: future_settings[:, 2],
                                                   g.substructure1.reg_y1.location.slope: real_matrix1,
                                                   g.substructure1.reg_y1.location.intercept: real_intercept1,
                                                   g.substructure2.reg_y2.location.slope: real_matrix2,
                                                   g.substructure2.reg_y2.location.intercept: real_intercept2}).get_samples(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
future_results = np.array(future_results)[:, 0, :].T

train a black box model and apply it on test data#

[5]:

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(past_settings, past_results)
black_box_prediction_past = lr.predict(past_settings)
black_box_prediction_future = lr.predict(future_settings)

train the causal model and apply it on test data#

[6]:

causal_model = hal.get_posterior_model(g, data={g.settings.x1: past_settings[:, 0],
                                                g.settings.x2: past_settings[:, 1],
                                                g.settings.x3: past_settings[:, 2],
                                                g.substructure1.outputs.results.y1: past_results[:, 0],
                                                g.substructure2.outputs.results.y2: past_results[:, 1]})
trained_causal_graph = causal_model.get_posterior_graph()

causal_prediction_past = hal.get_generative_model(
    trained_causal_graph,
    data={g.settings.x1: past_settings[:, 0],
          g.settings.x2: past_settings[:, 1],
          g.settings.x3: past_settings[:, 2]}).get_means(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
causal_prediction_past = np.array(causal_prediction_past).T

causal_prediction_future = hal.get_generative_model(
    trained_causal_graph,
    data={g.settings.x1: future_settings[:, 0],
          g.settings.x2: future_settings[:, 1],
          g.settings.x3: future_settings[:, 2]}).get_means(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
causal_prediction_future = np.array(causal_prediction_future).T

Compare the performance#

[7]:

norm = np.var(np.append(past_results, future_results))
black_box_performance_past = 1-np.mean((black_box_prediction_past-past_results)**2) / norm
causal_performance_past = 1-np.mean((causal_prediction_past-past_results)**2) / norm

black_box_performance_future = 1-np.mean((black_box_prediction_future-future_results)**2) / norm
causal_performance_future = 1-np.mean((causal_prediction_future-future_results)**2) / norm

print("black box on training data:", black_box_performance_past)
print("causal model on training data:", causal_performance_past)
print("black box on test data:", black_box_performance_future)
print("causal box on test data:", causal_performance_future)

black box on training data: 0.8700155151258816
causal model on training data: 0.8646083422454567
black box on test data: 0.0830037653624115
causal box on test data: 0.8269131649522296

visualization#

[8]:

from plots import plot_compare_causal_and_black_box
from IPython.display import Image

Image(plot_compare_causal_and_black_box(black_box_performance_past,
                                      black_box_performance_future,
                                      causal_performance_past,
                                      causal_performance_future))

[8]:

../../_images/examples_03_why_care_04_encoding_causal_structure_15_0.png

[ ]: