Add Simulation.subsample (#279)

* Update documentation * Fix bug in reform handling * Test with API * Versioning * Add seedability * Fix syntax bug
PolicyEngine · Sep 24, 2024 · 60d589a · 60d589a
1 parent cf53c6d
commit 60d589a
Show file tree

Hide file tree

Showing 10 changed files with 1,127 additions and 243 deletions.
diff --git a/Makefile b/Makefile
@@ -3,6 +3,7 @@ all: install format test build changelog
 documentation:
 	jb clean docs
 	jb build docs
+	python docs/add_plotly_to_book.py docs/_build
 
 format:
 	black . -l 79

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+    - Simulation subsampling.
diff --git a/docs/_static/style.css b/docs/_static/style.css
@@ -1,9 +1,2 @@
 @import url('https://fonts.googleapis.com/css2?family=Roboto+Serif:opsz@8..144&family=Roboto:wght@300&display=swap');
 
-h1, h2, h3, h4, h5, h6 {
-    font-family: "Roboto";
-}
-
-body {
-    font-family: "Roboto Serif";
-}
diff --git a/docs/add_plotly_to_book.py b/docs/add_plotly_to_book.py
@@ -0,0 +1,27 @@
+import argparse
+from pathlib import Path
+
+# This command-line tools enables Plotly charts to show in the HTML files for the Jupyter Book documentation.
+
+parser = argparse.ArgumentParser()
+parser.add_argument("book_path", help="Path to the Jupyter Book.")
+
+args = parser.parse_args()
+
+# Find every HTML file in the Jupyter Book. Then, add a script tag to the start of the <head> tag in each file, with the contents:
+# <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+
+book_folder = Path(args.book_path)
+
+for html_file in book_folder.glob("**/*.html"):
+    with open(html_file, "r") as f:
+        html = f.read()
+
+    # Add the script tag to the start of the <head> tag.
+    html = html.replace(
+        "<head>",
+        '<head><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>',
+    )
+
+    with open(html_file, "w") as f:
+        f.write(html)
diff --git a/docs/usage/charts.ipynb b/docs/usage/charts.ipynb
diff --git a/docs/usage/cli.md b/docs/usage/cli.md
diff --git a/docs/usage/datasets.ipynb b/docs/usage/datasets.ipynb
@@ -15,9 +15,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([100.,   0., 200.], dtype=float32)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from policyengine_core.country_template.constants import COUNTRY_DIR\n",
     "from policyengine_core.data import Dataset\n",
@@ -28,12 +39,14 @@
     "    # Specify metadata used to describe and store the dataset.\n",
     "    name = \"country_template_dataset\"\n",
     "    label = \"Country template dataset\"\n",
-    "    folder_path = COUNTRY_DIR / \"data\" / \"storage\"\n",
+    "    file_path = (\n",
+    "        COUNTRY_DIR / \"data\" / \"storage\" / \"country_template_dataset.h5\"\n",
+    "    )\n",
     "    data_format = Dataset.TIME_PERIOD_ARRAYS\n",
     "\n",
     "    # The generation function is the most important part: it defines\n",
-    "    # how the dataset is generated from the raw data for a given year.\n",
-    "    def generate(self, year: int) -> None:\n",
+    "    # how the dataset is generated from the raw data.\n",
+    "    def generate(self) -> None:\n",
     "        person_id = [0, 1, 2]\n",
     "        household_id = [0, 1]\n",
     "        person_household_id = [0, 0, 1]\n",
@@ -50,25 +63,16 @@
     "            \"salary\": {salary_time_period: salary},\n",
     "            \"household_weight\": {weight_time_period: weight},\n",
     "        }\n",
-    "        self.save_variable_values(year, data)\n",
+    "        self.save_dataset(data)\n",
     "\n",
     "\n",
-    "# Important: we must instantiate datasets. This tests their validity and adds dynamic logic.\n",
-    "CountryTemplateDataset = CountryTemplateDataset()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Dataset API\n",
+    "from policyengine_core.country_template import Simulation\n",
     "\n",
-    "PolicyEngine Core also includes two subclasses of `Dataset`:\n",
+    "CountryTemplateDataset().generate()\n",
     "\n",
-    "* `PublicDataset` - a dataset that is publicly available, and can be downloaded from a URL. Includes a `download` method to download the dataset.\n",
-    "* `PrivateDataset` - a dataset that is not publicly available, and must be downloaded from a private URL (specifically, Google Cloud buckets). Includes a `download` method to download the dataset, and a `upload` method to upload the dataset.\n",
+    "simulation = Simulation(dataset=CountryTemplateDataset)\n",
     "\n",
-    "See {doc}`/python_api/data` for the API reference."
+    "simulation.calculate(\"salary\", \"2022-01\")"
    ]
   }
  ],
@@ -88,7 +92,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.10.14"
   },
   "orig_nbformat": 4,
   "vscode": {

diff --git a/docs/usage/reforms.ipynb b/docs/usage/reforms.ipynb
@@ -74,21 +74,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(    value     weight\n",
-       " 0  4000.0  1000000.0\n",
-       " 1  6000.0  1200000.0,\n",
-       "     value     weight\n",
-       " 0  2000.0  1000000.0\n",
-       " 1  3000.0  1200000.0)"
+       "(   value     weight\n",
+       " 0  200.0  1000000.0\n",
+       " 1  200.0  1200000.0,\n",
+       "    value     weight\n",
+       " 0  200.0  1000000.0\n",
+       " 1  200.0  1200000.0)"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -116,7 +116,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.19"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,