Encoding causal structure#
Imagine having a process with some causal structure
The causal structure we assume simply encodes that - y1 depends on settings x1 and x2 - y2 depends on settings x2 and x3
[2]:
import numpy as np
import halerium.core as hal
from halerium.core.regression import connect_via_regression
g = hal.Graph("g")
with g:
hal.Entity("settings")
with settings:
hal.Variable("x1", mean=0., variance=1.)
hal.Variable("x2", mean=0., variance=1.)
hal.Variable("x3", mean=0., variance=1.)
hal.Graph("substructure1")
with substructure1:
with inputs:
hal.Entity("settings")
with settings:
hal.Variable("x1")
hal.Variable("x2")
with outputs:
hal.Entity("results")
with results:
hal.Variable("y1", variance=0.1)
connect_via_regression(name_prefix="reg",
inputs=[inputs.settings.x1, inputs.settings.x2],
outputs=[outputs.results.y1])
hal.Graph("substructure2")
with substructure2:
with inputs:
hal.Entity("settings")
with settings:
hal.Variable("x2")
hal.Variable("x3")
with outputs:
hal.Entity("results")
with results:
hal.Variable("y2", variance=0.1)
connect_via_regression(name_prefix="reg",
inputs=[inputs.settings.x2, inputs.settings.x3],
outputs=[outputs.results.y2])
hal.link(settings, substructure1.inputs.settings)
hal.link(settings, substructure2.inputs.settings)
# use the hal.show function to display the graph in the online platform
#hal.show(g)
Let us generate artificial data to test this.#
The important thing is that the settings in the past were not chosen randomly, but they were managed in some manner that did not have a machine learning use case in mind.
generate training data#
[3]:
real_matrix1 = np.array([1., -1])
real_intercept1 = 0.
real_matrix2 = np.array([-1, 1.])
real_intercept2 = 0.
past_settings = [[-1., -1., -1.]]*10
past_settings += [[0., -1., 0.]]*10
past_settings += [[1., 1., 1.]]*10
past_settings = np.array(past_settings)
past_results = hal.get_generative_model(g, data={g.settings.x1: past_settings[:, 0],
g.settings.x2: past_settings[:, 1],
g.settings.x3: past_settings[:, 2],
g.substructure1.reg_y1.location.slope: real_matrix1,
g.substructure1.reg_y1.location.intercept: real_intercept1,
g.substructure2.reg_y2.location.slope: real_matrix2,
g.substructure2.reg_y2.location.intercept: real_intercept2}).get_samples(
[g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
past_results = np.array(past_results)[:, 0, :].T
generate test data#
[4]:
future_settings = ([[-1., 0., 0.]]*10 +
[[0.5, 0.5, -1.]]*10)
future_settings = np.array(future_settings)
future_results = hal.get_generative_model(g, data={g.settings.x1: future_settings[:, 0],
g.settings.x2: future_settings[:, 1],
g.settings.x3: future_settings[:, 2],
g.substructure1.reg_y1.location.slope: real_matrix1,
g.substructure1.reg_y1.location.intercept: real_intercept1,
g.substructure2.reg_y2.location.slope: real_matrix2,
g.substructure2.reg_y2.location.intercept: real_intercept2}).get_samples(
[g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
future_results = np.array(future_results)[:, 0, :].T
train a black box model and apply it on test data#
[5]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(past_settings, past_results)
black_box_prediction_past = lr.predict(past_settings)
black_box_prediction_future = lr.predict(future_settings)
train the causal model and apply it on test data#
[6]:
causal_model = hal.get_posterior_model(g, data={g.settings.x1: past_settings[:, 0],
g.settings.x2: past_settings[:, 1],
g.settings.x3: past_settings[:, 2],
g.substructure1.outputs.results.y1: past_results[:, 0],
g.substructure2.outputs.results.y2: past_results[:, 1]})
trained_causal_graph = causal_model.get_posterior_graph()
causal_prediction_past = hal.get_generative_model(
trained_causal_graph,
data={g.settings.x1: past_settings[:, 0],
g.settings.x2: past_settings[:, 1],
g.settings.x3: past_settings[:, 2]}).get_means(
[g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
causal_prediction_past = np.array(causal_prediction_past).T
causal_prediction_future = hal.get_generative_model(
trained_causal_graph,
data={g.settings.x1: future_settings[:, 0],
g.settings.x2: future_settings[:, 1],
g.settings.x3: future_settings[:, 2]}).get_means(
[g.substructure1.outputs.results.y1, g.substructure2.outputs.results.y2])
causal_prediction_future = np.array(causal_prediction_future).T
Compare the performance#
[7]:
norm = np.var(np.append(past_results, future_results))
black_box_performance_past = 1-np.mean((black_box_prediction_past-past_results)**2) / norm
causal_performance_past = 1-np.mean((causal_prediction_past-past_results)**2) / norm
black_box_performance_future = 1-np.mean((black_box_prediction_future-future_results)**2) / norm
causal_performance_future = 1-np.mean((causal_prediction_future-future_results)**2) / norm
print("black box on training data:", black_box_performance_past)
print("causal model on training data:", causal_performance_past)
print("black box on test data:", black_box_performance_future)
print("causal box on test data:", causal_performance_future)
black box on training data: 0.8700155151258816
causal model on training data: 0.8646083422454567
black box on test data: 0.0830037653624115
causal box on test data: 0.8269131649522296
visualization#
[8]:
from plots import plot_compare_causal_and_black_box
from IPython.display import Image
Image(plot_compare_causal_and_black_box(black_box_performance_past,
black_box_performance_future,
causal_performance_past,
causal_performance_future))
[8]:
[ ]: