Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Childcare Cost Simulation (CAP data) #129

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion jb/child_allowance.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

# Read in census data and specify columns for use
raw = pd.read_csv(
"https://github.com/UBICenter/child-allowance/blob/master/jb/data/cps_00003.csv.gz",
"https://github.com/UBICenter/child-allowance/blob/master/jb/data/cps_00003.csv.gz?raw=true",
compression="gzip",
usecols=[
"YEAR",
"MONTH",
Expand Down
111 changes: 111 additions & 0 deletions jb/child_allowance_costs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Roadmap:
# Pull in the edited dataset
# Merge on State index using person-level child age indicator
# Create two rows per index (low and high quality)
# Calculate state-based outcomes.


# Preamble and read data
import microdf as mdf
import pandas as pd
import numpy as np
import us

# Read in census data and specify columns for use
person_raw = pd.read_csv(
"https://github.com/UBICenter/child-allowance/blob/master/jb/data/cps_00003.csv.gz?raw=true",
compression="gzip",
usecols=[
"YEAR",
"STATEFIP",
"AGE",
"SEX",
"SPMWT",
"SPMFTOTVAL",
"SPMTOTRES",
"SPMCHXPNS",
"SPMTHRESH",
"SPMFAMUNIT",
"ASECWT",
],
)
person = person_raw.copy(deep=True)

# Define child age identifiers
person["person"] = 1
person["child_6"] = person.age < 6
person["infant"] = person.age < 1
person["toddler"] = person.age.between(1, 2)
person["preschool"] = person.age.between(3, 5)

# Age categories for merge
person["age_cat"] = "over_5"
person.loc[(person.age < 1), "age_cat"] = "infant"
person.loc[(person.age.between(1, 2)), "age_cat"] = "toddler"
person.loc[(person.age.between(3, 5)), "age_cat"] = "preschool"

# Create State categories
person["state"] = (
pd.Series(person.statefip)
.apply(lambda x: us.states.lookup(str(x).zfill(2)).name)
.tolist()
)

# Read in cost data
costs = pd.read_csv(
"https://github.com/UBICenter/child-allowance/blob/master/jb/data/CCare_cost.csv"
)

# Merge datasets to calculate per-child cost
# Creates two rows per person (one base_quality
# and one high_quality with different costs)
person_costs = person.merge(
costs[
[
"state",
"high_quality",
"age_cat",
"cost",
]
],
how="left",
on=["state", "age_cat"],
)

# Set over_5 cost of childcare to 0
person_costs.loc[(person_costs.age_cat == "over_5"), "cost"] = 0

# Define data collected at the SPM unit level
SPMU_COLS = [
"spmfamunit",
"spmwt",
"spmftotval",
"spmtotres",
"spmchxpns",
"spmthresh",
"year",
]

SPMU_AGG_COLS = ["child_6", "infant", "toddler", "preschool", "person", "cost"]
spmu_quality = person_costs.groupby(SPMU_COLS + ["high_quality"])[
SPMU_AGG_COLS
].sum()
spmu_quality.columns = ["spmu_" + i for i in SPMU_AGG_COLS]
spmu_quality.reset_index(inplace=True)

# Calculate total cost of transfers, and total number of children
program_cost_high = mdf.weighted_sum(
spmu_quality[spmu_quality.high_quality], "cost", "spmwt"
)
program_cost_high = mdf.weighted_sum(
spmu_quality[~spmu_quality.high_quality], "cost", "spmwt"
)

# Program costs also group by age category (sum inf + todd + preschool)
mdf.weighted_sum.groupby

# New microdf groupby argument - have one program cost dataframe - won't need to filter just groupby

# Groupby age_cat and highquality to get the weighted sums we are interested in.

# Get total cost for Max.
Loading