Skip to content

Commit

Permalink
Add Simulation.subsample (#279)
Browse files Browse the repository at this point in the history
* Update documentation

* Fix bug in reform handling

* Test with API

* Versioning

* Add seedability

* Fix syntax bug
  • Loading branch information
nikhilwoodruff authored Sep 24, 2024
1 parent cf53c6d commit 60d589a
Show file tree
Hide file tree
Showing 10 changed files with 1,127 additions and 243 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ all: install format test build changelog
documentation:
jb clean docs
jb build docs
python docs/add_plotly_to_book.py docs/_build

format:
black . -l 79
Expand Down
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Simulation subsampling.
7 changes: 0 additions & 7 deletions docs/_static/style.css
Original file line number Diff line number Diff line change
@@ -1,9 +1,2 @@
@import url('https://fonts.googleapis.com/css2?family=Roboto+Serif:opsz@8..144&family=Roboto:wght@300&display=swap');

h1, h2, h3, h4, h5, h6 {
font-family: "Roboto";
}

body {
font-family: "Roboto Serif";
}
27 changes: 27 additions & 0 deletions docs/add_plotly_to_book.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse
from pathlib import Path

# This command-line tools enables Plotly charts to show in the HTML files for the Jupyter Book documentation.

parser = argparse.ArgumentParser()
parser.add_argument("book_path", help="Path to the Jupyter Book.")

args = parser.parse_args()

# Find every HTML file in the Jupyter Book. Then, add a script tag to the start of the <head> tag in each file, with the contents:
# <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>

book_folder = Path(args.book_path)

for html_file in book_folder.glob("**/*.html"):
with open(html_file, "r") as f:
html = f.read()

# Add the script tag to the start of the <head> tag.
html = html.replace(
"<head>",
'<head><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>',
)

with open(html_file, "w") as f:
f.write(html)
181 changes: 0 additions & 181 deletions docs/usage/charts.ipynb

This file was deleted.

14 changes: 0 additions & 14 deletions docs/usage/cli.md

This file was deleted.

44 changes: 24 additions & 20 deletions docs/usage/datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,20 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"array([100., 0., 200.], dtype=float32)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from policyengine_core.country_template.constants import COUNTRY_DIR\n",
"from policyengine_core.data import Dataset\n",
Expand All @@ -28,12 +39,14 @@
" # Specify metadata used to describe and store the dataset.\n",
" name = \"country_template_dataset\"\n",
" label = \"Country template dataset\"\n",
" folder_path = COUNTRY_DIR / \"data\" / \"storage\"\n",
" file_path = (\n",
" COUNTRY_DIR / \"data\" / \"storage\" / \"country_template_dataset.h5\"\n",
" )\n",
" data_format = Dataset.TIME_PERIOD_ARRAYS\n",
"\n",
" # The generation function is the most important part: it defines\n",
" # how the dataset is generated from the raw data for a given year.\n",
" def generate(self, year: int) -> None:\n",
" # how the dataset is generated from the raw data.\n",
" def generate(self) -> None:\n",
" person_id = [0, 1, 2]\n",
" household_id = [0, 1]\n",
" person_household_id = [0, 0, 1]\n",
Expand All @@ -50,25 +63,16 @@
" \"salary\": {salary_time_period: salary},\n",
" \"household_weight\": {weight_time_period: weight},\n",
" }\n",
" self.save_variable_values(year, data)\n",
" self.save_dataset(data)\n",
"\n",
"\n",
"# Important: we must instantiate datasets. This tests their validity and adds dynamic logic.\n",
"CountryTemplateDataset = CountryTemplateDataset()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataset API\n",
"from policyengine_core.country_template import Simulation\n",
"\n",
"PolicyEngine Core also includes two subclasses of `Dataset`:\n",
"CountryTemplateDataset().generate()\n",
"\n",
"* `PublicDataset` - a dataset that is publicly available, and can be downloaded from a URL. Includes a `download` method to download the dataset.\n",
"* `PrivateDataset` - a dataset that is not publicly available, and must be downloaded from a private URL (specifically, Google Cloud buckets). Includes a `download` method to download the dataset, and a `upload` method to upload the dataset.\n",
"simulation = Simulation(dataset=CountryTemplateDataset)\n",
"\n",
"See {doc}`/python_api/data` for the API reference."
"simulation.calculate(\"salary\", \"2022-01\")"
]
}
],
Expand All @@ -88,7 +92,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.10.14"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
18 changes: 9 additions & 9 deletions docs/usage/reforms.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,21 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"( value weight\n",
" 0 4000.0 1000000.0\n",
" 1 6000.0 1200000.0,\n",
" value weight\n",
" 0 2000.0 1000000.0\n",
" 1 3000.0 1200000.0)"
"( value weight\n",
" 0 200.0 1000000.0\n",
" 1 200.0 1200000.0,\n",
" value weight\n",
" 0 200.0 1000000.0\n",
" 1 200.0 1200000.0)"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -116,7 +116,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 60d589a

Please sign in to comment.