Encoding causal structure

Imagine having a process with some causal structure

The causal structure we assume simply encodes that - y1 depends on settings x1 and x2 - y2 depends on settings x2 and x3

[2]:
import numpy as np
import halerium.core as hal
from halerium.core.regression import connect_via_regression

g = hal.Graph("g")
with g:
    hal.Entity("settings")
    with settings:
        hal.Variable("x1", mean=0., variance=1.)
        hal.Variable("x2", mean=0., variance=1.)
        hal.Variable("x3", mean=0., variance=1.)

    hal.Graph("substructure1")
    with substructure1:
        with inputs:
            hal.Entity("settings")
            with settings:
                hal.Variable("x1")
                hal.Variable("x2")

        with outputs:
            hal.Entity("results")
            with results:
                hal.Variable("y1", variance=0.1)

        connect_via_regression(name_prefix="reg",
                               inputs=[inputs.settings.x1, inputs.settings.x2],
                               outputs=[outputs.results.y1])

    hal.Graph("substructure2")
    with substructure2:
        with inputs:
            hal.Entity("settings")
            with settings:
                hal.Variable("x2")
                hal.Variable("x3")

        with outputs:
            hal.Entity("results")
            with results:
                hal.Variable("y2", variance=0.1)

        connect_via_regression(name_prefix="reg",
                               inputs=[inputs.settings.x2, inputs.settings.x3],
                               outputs=[outputs.results.y2])

    hal.link(settings, substructure1.inputs.settings)
    hal.link(settings, substructure2.inputs.settings)

# use the hal.show function to display the graph in the online platform
#hal.show(g)

Let us generate artificial data to test this.

The important thing is that the settings in the past were not chosen randomly, but they were managed in some manner that did not have a machine learning use case in mind.

generate training data

[3]:
real_matrix1 = np.array([1., -1])
real_intercept1 = 0.
real_matrix2 = np.array([-1, 1.])
real_intercept2 = 0.

past_settings = [[-1., -1., -1.]]*10
past_settings += [[0., -1., 0.]]*10
past_settings += [[1., 1., 1.]]*10
past_settings = np.array(past_settings)

past_results = hal.get_generative_model(g, data={g.settings.x1: past_settings[:, 0],
                                                 g.settings.x2: past_settings[:, 1],
                                                 g.settings.x3: past_settings[:, 2],
                                                 g.substructure1.reg_y1.location.slope: real_matrix1,
                                                 g.substructure1.reg_y1.location.intercept: real_intercept1,
                                                 g.substructure2.reg_y2.location.slope: real_matrix2,
                                                 g.substructure2.reg_y2.location.intercept: real_intercept2}).get_samples(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
past_results = np.array(past_results)[:, 0, :].T

generate test data

[4]:
future_settings = ([[-1., 0., 0.]]*10 +
                   [[0.5, 0.5, -1.]]*10)
future_settings = np.array(future_settings)

future_results = hal.get_generative_model(g, data={g.settings.x1: future_settings[:, 0],
                                                   g.settings.x2: future_settings[:, 1],
                                                   g.settings.x3: future_settings[:, 2],
                                                   g.substructure1.reg_y1.location.slope: real_matrix1,
                                                   g.substructure1.reg_y1.location.intercept: real_intercept1,
                                                   g.substructure2.reg_y2.location.slope: real_matrix2,
                                                   g.substructure2.reg_y2.location.intercept: real_intercept2}).get_samples(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
future_results = np.array(future_results)[:, 0, :].T

train a black box model and apply it on test data

[5]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(past_settings, past_results)
black_box_prediction_past = lr.predict(past_settings)
black_box_prediction_future = lr.predict(future_settings)

train the causal model and apply it on test data

[6]:
causal_model = hal.get_posterior_model(g, data={g.settings.x1: past_settings[:, 0],
                                                g.settings.x2: past_settings[:, 1],
                                                g.settings.x3: past_settings[:, 2],
                                                g.substructure1.outputs.results.y1: past_results[:, 0],
                                                g.substructure2.outputs.results.y2: past_results[:, 1]})
trained_causal_graph = causal_model.get_posterior_graph()

causal_prediction_past = hal.get_generative_model(
    trained_causal_graph,
    data={g.settings.x1: past_settings[:, 0],
          g.settings.x2: past_settings[:, 1],
          g.settings.x3: past_settings[:, 2]}).get_means(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
causal_prediction_past = np.array(causal_prediction_past).T

causal_prediction_future = hal.get_generative_model(
    trained_causal_graph,
    data={g.settings.x1: future_settings[:, 0],
          g.settings.x2: future_settings[:, 1],
          g.settings.x3: future_settings[:, 2]}).get_means(
    [g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
causal_prediction_future = np.array(causal_prediction_future).T

Compare the performance

[7]:
norm = np.var(np.append(past_results, future_results))
black_box_performance_past = 1-np.mean((black_box_prediction_past-past_results)**2) / norm
causal_performance_past = 1-np.mean((causal_prediction_past-past_results)**2) / norm

black_box_performance_future = 1-np.mean((black_box_prediction_future-future_results)**2) / norm
causal_performance_future = 1-np.mean((causal_prediction_future-future_results)**2) / norm

print("black box on training data:", black_box_performance_past)
print("causal model on training data:", causal_performance_past)
print("black box on test data:", black_box_performance_future)
print("causal box on test data:", causal_performance_future)
black box on training data: 0.8700155151258816
causal model on training data: 0.8646083422454567
black box on test data: 0.0830037653624115
causal box on test data: 0.8269131649522296

visualization

[8]:
from plots import plot_compare_causal_and_black_box
from IPython.display import Image

Image(plot_compare_causal_and_black_box(black_box_performance_past,
                                      black_box_performance_future,
                                      causal_performance_past,
                                      causal_performance_future))
[8]:
../../_images/examples_03_why_care_04_encoding_causal_structure_15_0.png
[ ]: