{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reusing models"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In the past a colleague defined a graph that connected some data x with data y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import halerium.core as hal\n",
    "\n",
    "ga = hal.Graph(\"ga\")\n",
    "with ga:\n",
    "    with inputs:\n",
    "        hal.Entity(\"e1\")\n",
    "        with e1:\n",
    "            hal.Variable(\"x\", shape=(10,), mean=0, variance=1)\n",
    "    with outputs:\n",
    "        hal.Entity(\"e2\")\n",
    "        with e2:\n",
    "            hal.Variable(\"y\", shape=(7,))\n",
    "    hal.regression.connect_via_regression(\"reg\", inputs=inputs.e1.x, outputs=outputs.e2.y)\n",
    "    outputs.e2.y.variance = hal.exp(hal.StaticVariable(\"lnv\", mean=-3, variance=1.))\n",
    "\n",
    "# use this in the online platform to show the graph\n",
    "#hal.show(ga)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The colleague had training data for x and y available and trained the graph accordingly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generating artificial data\n",
    "np.random.seed(42)\n",
    "real_xy_slope = np.random.randn(7,10)\n",
    "real_xy_intercept = np.random.randn(7)\n",
    "\n",
    "data_x = np.random.randn(74,10)\n",
    "data_y = np.einsum(\"ij, nj -> ni\", real_xy_slope, data_x) + real_xy_intercept\n",
    "# normally these would of course be loaded from somewhere\n",
    "\n",
    "posterior_model_a = hal.get_posterior_model(ga, data={ga.inputs.e1.x: data_x, ga.outputs.e2.y: data_y})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "the trained model was converted back to a graph and saved as a JSON file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "posterior_graph_a = posterior_model_a.get_posterior_graph()\n",
    "posterior_graph_a.dump_file(\"posterior_graph.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This was done in the past. Now lets clear the session and start over."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reset -f"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Starting from there"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import halerium.core as hal\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Our graph connects y and z."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "gb = hal.Graph(\"gb\")\n",
    "with gb:\n",
    "    with inputs:\n",
    "        hal.Entity(\"e2\")\n",
    "        with e2:\n",
    "            hal.Variable(\"y\", shape=(7,), mean=0, variance=1)\n",
    "    with outputs:\n",
    "        hal.Entity(\"e3\")\n",
    "        with e3:\n",
    "            hal.Variable(\"z\", shape=(5,))\n",
    "    hal.regression.connect_via_regression(\"reg\", inputs=inputs.e2.y, outputs=outputs.e3.z)\n",
    "    outputs.e3.z.variance = hal.exp(hal.StaticVariable(\"lnv\", mean=-3, variance=1.))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have training data for y and z available, but not for x. We train our graph..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generating artificial data\n",
    "np.random.seed(137)\n",
    "real_yz_slope = np.random.randn(5,7)\n",
    "real_yz_intercept = np.random.randn(5)\n",
    "\n",
    "data_y = np.random.randn(63,7)\n",
    "data_z = np.einsum(\"ij, nj -> ni\", real_yz_slope, data_y) + real_yz_intercept\n",
    "# normally these would of course be loaded from somewhere\n",
    "\n",
    "posterior_model_b = hal.get_posterior_model(gb, data={gb.inputs.e2.y: data_y, gb.outputs.e3.z: data_z})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can again extract the posterior graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "posterior_graph_b = posterior_model_b.get_posterior_graph()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And load the work of our colleague from the hard drive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "posterior_graph_a = hal.Graph.from_specification(file=\"posterior_graph.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can plug those two together in a big graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "big_graph = hal.Graph(\"big_graph\")\n",
    "with big_graph:\n",
    "    posterior_graph_a.copy(\"ga\")\n",
    "    posterior_graph_b.copy(\"gb\")\n",
    "    hal.link(ga.outputs.e2, gb.inputs.e2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With this graph we can now predict from x to z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# test data\n",
    "test_data_x = np.random.randn(100,10)\n",
    "\n",
    "model_predict = hal.get_generative_model(big_graph, data={big_graph.ga.inputs.e1.x: test_data_x})\n",
    "predicted_y, predicted_z = model_predict.get_means([big_graph.ga.outputs.e2.y, big_graph.gb.outputs.e3.z])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}