Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: patch
changes:
fixed:
- Minor fixes
7 changes: 7 additions & 0 deletions src/policyengine/core/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ class Simulation(BaseModel):
def run(self):
self.tax_benefit_model_version.run(self)

def ensure(self):
try:
self.tax_benefit_model_version.load(self)
except Exception:
self.run()
self.save()

def save(self):
"""Save the simulation's output dataset."""
self.tax_benefit_model_version.save(self)
Expand Down
61 changes: 33 additions & 28 deletions src/policyengine/tax_benefit_models/uk.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,38 @@
"""PolicyEngine UK tax-benefit model - imports from uk/ module."""

from .uk import (
PolicyEngineUK,
PolicyEngineUKDataset,
PolicyEngineUKLatest,
ProgrammeStatistics,
UKYearData,
create_datasets,
general_policy_reform_analysis,
uk_latest,
uk_model,
)
from importlib.util import find_spec

__all__ = [
"UKYearData",
"PolicyEngineUKDataset",
"create_datasets",
"PolicyEngineUK",
"PolicyEngineUKLatest",
"uk_model",
"uk_latest",
"general_policy_reform_analysis",
"ProgrammeStatistics",
]
if find_spec("policyengine_uk") is not None:
from .uk import (
PolicyEngineUK,
PolicyEngineUKDataset,
PolicyEngineUKLatest,
ProgrammeStatistics,
UKYearData,
create_datasets,
ensure_datasets,
general_policy_reform_analysis,
load_datasets,
uk_latest,
uk_model,
)

# Rebuild models to resolve forward references
from policyengine.core import Dataset
__all__ = [
"UKYearData",
"PolicyEngineUKDataset",
"create_datasets",
"load_datasets",
"ensure_datasets",
"PolicyEngineUK",
"PolicyEngineUKLatest",
"uk_model",
"uk_latest",
"general_policy_reform_analysis",
"ProgrammeStatistics",
]

Dataset.model_rebuild()
UKYearData.model_rebuild()
PolicyEngineUKDataset.model_rebuild()
PolicyEngineUKLatest.model_rebuild()
# Rebuild models to resolve forward references
PolicyEngineUKDataset.model_rebuild()
PolicyEngineUKLatest.model_rebuild()
else:
__all__ = []
60 changes: 39 additions & 21 deletions src/policyengine/tax_benefit_models/uk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,44 @@
"""PolicyEngine UK tax-benefit model."""

from .analysis import general_policy_reform_analysis
from .datasets import PolicyEngineUKDataset, UKYearData, create_datasets
from .model import PolicyEngineUK, PolicyEngineUKLatest, uk_latest, uk_model
from .outputs import ProgrammeStatistics
from importlib.util import find_spec

__all__ = [
"UKYearData",
"PolicyEngineUKDataset",
"create_datasets",
"PolicyEngineUK",
"PolicyEngineUKLatest",
"uk_model",
"uk_latest",
"general_policy_reform_analysis",
"ProgrammeStatistics",
]
if find_spec("policyengine_uk") is not None:
from policyengine.core import Dataset

# Rebuild models to resolve forward references
from policyengine.core import Dataset
from .analysis import general_policy_reform_analysis
from .datasets import (
PolicyEngineUKDataset,
UKYearData,
create_datasets,
ensure_datasets,
load_datasets,
)
from .model import (
PolicyEngineUK,
PolicyEngineUKLatest,
uk_latest,
uk_model,
)
from .outputs import ProgrammeStatistics

Dataset.model_rebuild()
UKYearData.model_rebuild()
PolicyEngineUKDataset.model_rebuild()
PolicyEngineUKLatest.model_rebuild()
# Rebuild Pydantic models to resolve forward references
Dataset.model_rebuild()
UKYearData.model_rebuild()
PolicyEngineUKDataset.model_rebuild()
PolicyEngineUKLatest.model_rebuild()

__all__ = [
"UKYearData",
"PolicyEngineUKDataset",
"create_datasets",
"load_datasets",
"ensure_datasets",
"PolicyEngineUK",
"PolicyEngineUKLatest",
"uk_model",
"uk_latest",
"general_policy_reform_analysis",
"ProgrammeStatistics",
]
else:
__all__ = []
85 changes: 78 additions & 7 deletions src/policyengine/tax_benefit_models/uk/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,7 @@ def model_post_init(self, __context):
if self.data is not None:
self.save()
elif self.filepath and not self.data:
try:
self.load()
except FileNotFoundError:
# File doesn't exist yet, that's OK
pass
self.load()

def save(self) -> None:
"""Save dataset to HDF5 file."""
Expand Down Expand Up @@ -85,7 +81,9 @@ def create_datasets(
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
],
years: list[int] = [2026, 2027, 2028, 2029, 2030],
) -> None:
data_folder: str = "./data",
) -> dict[str, PolicyEngineUKDataset]:
result = {}
for dataset in datasets:
from policyengine_uk import Microsimulation

Expand Down Expand Up @@ -139,9 +137,10 @@ def create_datasets(
)

uk_dataset = PolicyEngineUKDataset(
id=f"{Path(dataset).stem}_year_{year}",
name=f"{dataset}-year-{year}",
description=f"UK Dataset for year {year} based on {dataset}",
filepath=f"./data/{Path(dataset).stem}_year_{year}.h5",
filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5",
year=year,
data=UKYearData(
person=MicroDataFrame(person_df, weights="person_weight"),
Expand All @@ -154,3 +153,75 @@ def create_datasets(
),
)
uk_dataset.save()

dataset_key = f"{Path(dataset).stem}_{year}"
result[dataset_key] = uk_dataset

return result


def load_datasets(
datasets: list[str] = [
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
],
years: list[int] = [2026, 2027, 2028, 2029, 2030],
data_folder: str = "./data",
) -> dict[str, PolicyEngineUKDataset]:
result = {}
for dataset in datasets:
for year in years:
filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
uk_dataset = PolicyEngineUKDataset(
name=f"{dataset}-year-{year}",
description=f"UK Dataset for year {year} based on {dataset}",
filepath=filepath,
year=year,
)
uk_dataset.load()

dataset_key = f"{Path(dataset).stem}_{year}"
result[dataset_key] = uk_dataset

return result


def ensure_datasets(
datasets: list[str] = [
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
],
years: list[int] = [2026, 2027, 2028, 2029, 2030],
data_folder: str = "./data",
) -> dict[str, PolicyEngineUKDataset]:
"""Ensure datasets exist, loading if available or creating if not.

Args:
datasets: List of HuggingFace dataset paths
years: List of years to load/create data for
data_folder: Directory containing or to save the dataset files

Returns:
Dictionary mapping dataset keys to PolicyEngineUKDataset objects
"""
# Check if all dataset files exist
all_exist = True
for dataset in datasets:
for year in years:
filepath = Path(
f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
)
if not filepath.exists():
all_exist = False
break
if not all_exist:
break

if all_exist:
return load_datasets(
datasets=datasets, years=years, data_folder=data_folder
)
else:
return create_datasets(
datasets=datasets, years=years, data_folder=data_folder
)
20 changes: 16 additions & 4 deletions src/policyengine/tax_benefit_models/uk/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,17 +265,29 @@ def save(self, simulation: "Simulation"):

def load(self, simulation: "Simulation"):
"""Load the simulation's output dataset."""
import os

filepath = str(
Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
)

simulation.output_dataset = PolicyEngineUKDataset(
id=simulation.id,
name=simulation.dataset.name,
description=simulation.dataset.description,
filepath=str(
Path(simulation.dataset.filepath).parent
/ (simulation.id + ".h5")
),
filepath=filepath,
year=simulation.dataset.year,
is_output_dataset=True,
)

# Load timestamps from file system metadata
if os.path.exists(filepath):
simulation.created_at = datetime.datetime.fromtimestamp(
os.path.getctime(filepath)
)
simulation.updated_at = datetime.datetime.fromtimestamp(
os.path.getmtime(filepath)
)


uk_latest = PolicyEngineUKLatest()
6 changes: 6 additions & 0 deletions src/policyengine/tax_benefit_models/us.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,20 @@
PolicyEngineUSLatest,
ProgramStatistics,
USYearData,
create_datasets,
ensure_datasets,
general_policy_reform_analysis,
load_datasets,
us_latest,
us_model,
)

__all__ = [
"USYearData",
"PolicyEngineUSDataset",
"create_datasets",
"load_datasets",
"ensure_datasets",
"PolicyEngineUS",
"PolicyEngineUSLatest",
"us_model",
Expand Down
10 changes: 9 additions & 1 deletion src/policyengine/tax_benefit_models/us/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
from policyengine.core import Dataset

from .analysis import general_policy_reform_analysis
from .datasets import PolicyEngineUSDataset, USYearData, create_datasets
from .datasets import (
PolicyEngineUSDataset,
USYearData,
create_datasets,
ensure_datasets,
load_datasets,
)
from .model import (
PolicyEngineUS,
PolicyEngineUSLatest,
Expand All @@ -25,6 +31,8 @@
"USYearData",
"PolicyEngineUSDataset",
"create_datasets",
"load_datasets",
"ensure_datasets",
"PolicyEngineUS",
"PolicyEngineUSLatest",
"us_model",
Expand Down
Loading