From 26a968d91a53004e08cdf2831505ecafc86c6115 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 18 Nov 2025 13:16:00 +0000 Subject: [PATCH 1/8] Move entity variables out to constant --- .../tax_benefit_models/uk/model.py | 136 +++++++++--------- .../tax_benefit_models/us/model.py | 108 +++++++------- 2 files changed, 122 insertions(+), 122 deletions(-) diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index 6b8c5c7..5d16ff0 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -44,6 +44,73 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion): upload_time ) + entity_variables = { + "person": [ + # IDs and weights + "person_id", + "benunit_id", + "household_id", + "person_weight", + # Demographics + "age", + "gender", + "is_adult", + "is_SP_age", + "is_child", + # Income + "employment_income", + "self_employment_income", + "pension_income", + "private_pension_income", + "savings_interest_income", + "dividend_income", + "property_income", + "total_income", + "earned_income", + # Benefits + "universal_credit", + "child_benefit", + "pension_credit", + "income_support", + "working_tax_credit", + "child_tax_credit", + # Tax + "income_tax", + "national_insurance", + ], + "benunit": [ + # IDs and weights + "benunit_id", + "benunit_weight", + # Structure + "family_type", + # Income and benefits + "universal_credit", + "child_benefit", + "working_tax_credit", + "child_tax_credit", + ], + "household": [ + # IDs and weights + "household_id", + "household_weight", + # Income measures + "household_net_income", + "hbai_household_net_income", + "equiv_hbai_household_net_income", + "household_market_income", + "household_gross_income", + # Benefits and tax + "household_benefits", + "household_tax", + "vat", + # Housing + "rent", + "council_tax", + "tenure_type", + ], + } + def __init__(self, **kwargs: dict): super().__init__(**kwargs) from policyengine_core.enums import Enum @@ -153,80 +220,13 @@ def run(self, simulation: "Simulation") -> "Simulation": ) modifier(microsim) - entity_variables = { - "person": [ - # IDs and weights - "person_id", - "benunit_id", - "household_id", - "person_weight", - # Demographics - "age", - "gender", - "is_adult", - "is_SP_age", - "is_child", - # Income - "employment_income", - "self_employment_income", - "pension_income", - "private_pension_income", - "savings_interest_income", - "dividend_income", - "property_income", - "total_income", - "earned_income", - # Benefits - "universal_credit", - "child_benefit", - "pension_credit", - "income_support", - "working_tax_credit", - "child_tax_credit", - # Tax - "income_tax", - "national_insurance", - ], - "benunit": [ - # IDs and weights - "benunit_id", - "benunit_weight", - # Structure - "family_type", - # Income and benefits - "universal_credit", - "child_benefit", - "working_tax_credit", - "child_tax_credit", - ], - "household": [ - # IDs and weights - "household_id", - "household_weight", - # Income measures - "household_net_income", - "hbai_household_net_income", - "equiv_hbai_household_net_income", - "household_market_income", - "household_gross_income", - # Benefits and tax - "household_benefits", - "household_tax", - "vat", - # Housing - "rent", - "council_tax", - "tenure_type", - ], - } - data = { "person": pd.DataFrame(), "benunit": pd.DataFrame(), "household": pd.DataFrame(), } - for entity, variables in entity_variables.items(): + for entity, variables in self.entity_variables.items(): for var in variables: data[entity][var] = microsim.calculate( var, period=simulation.dataset.year, map_to=entity diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index a5a267a..67ea18e 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -45,6 +45,59 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion): version: str = None created_at: datetime.datetime = None + entity_variables = { + "person": [ + # IDs and weights + "person_id", + "marital_unit_id", + "family_id", + "spm_unit_id", + "tax_unit_id", + "household_id", + "person_weight", + # Demographics + "age", + # Income + "employment_income", + # Benefits + "ssi", + "social_security", + "medicaid", + "unemployment_compensation", + ], + "marital_unit": [ + "marital_unit_id", + "marital_unit_weight", + ], + "family": [ + "family_id", + "family_weight", + ], + "spm_unit": [ + "spm_unit_id", + "spm_unit_weight", + "snap", + "tanf", + "spm_unit_net_income", + ], + "tax_unit": [ + "tax_unit_id", + "tax_unit_weight", + "income_tax", + "employee_payroll_tax", + "eitc", + "ctc", + ], + "household": [ + "household_id", + "household_weight", + "household_net_income", + "household_benefits", + "household_tax", + "household_market_income", + ], + } + def __init__(self, **kwargs: dict): # Lazy-load package metadata if not provided if "version" not in kwargs or kwargs.get("version") is None: @@ -156,59 +209,6 @@ def run(self, simulation: "Simulation") -> "Simulation": ) modifier(microsim) - entity_variables = { - "person": [ - # IDs and weights - "person_id", - "marital_unit_id", - "family_id", - "spm_unit_id", - "tax_unit_id", - "household_id", - "person_weight", - # Demographics - "age", - # Income - "employment_income", - # Benefits - "ssi", - "social_security", - "medicaid", - "unemployment_compensation", - ], - "marital_unit": [ - "marital_unit_id", - "marital_unit_weight", - ], - "family": [ - "family_id", - "family_weight", - ], - "spm_unit": [ - "spm_unit_id", - "spm_unit_weight", - "snap", - "tanf", - "spm_unit_net_income", - ], - "tax_unit": [ - "tax_unit_id", - "tax_unit_weight", - "income_tax", - "employee_payroll_tax", - "eitc", - "ctc", - ], - "household": [ - "household_id", - "household_weight", - "household_net_income", - "household_benefits", - "household_tax", - "household_market_income", - ], - } - data = { "person": pd.DataFrame(), "marital_unit": pd.DataFrame(), @@ -259,7 +259,7 @@ def run(self, simulation: "Simulation") -> "Simulation": data["person"][target_col] = person_input_df[col].values # Then calculate non-ID, non-weight variables from simulation - for entity, variables in entity_variables.items(): + for entity, variables in self.entity_variables.items(): for var in variables: if var not in id_columns and var not in weight_columns: data[entity][var] = microsim.calculate( From 152891d2da9e96bb841584419ee74403774762f3 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 18 Nov 2025 13:17:15 +0000 Subject: [PATCH 2/8] Versioning --- changelog_entry.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..4fe4868 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Entity variables moved out to an editable constant. From e41b243eb93fe6d43a204062445099c3a563bb33 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 18 Nov 2025 13:41:36 +0000 Subject: [PATCH 3/8] Annotate --- src/policyengine/tax_benefit_models/uk/model.py | 2 +- src/policyengine/tax_benefit_models/us/model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index 5d16ff0..571ceb1 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -44,7 +44,7 @@ class PolicyEngineUKLatest(TaxBenefitModelVersion): upload_time ) - entity_variables = { + entity_variables: dict[str, list[str]] = { "person": [ # IDs and weights "person_id", diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index 67ea18e..9fd5f05 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -45,7 +45,7 @@ class PolicyEngineUSLatest(TaxBenefitModelVersion): version: str = None created_at: datetime.datetime = None - entity_variables = { + entity_variables: dict[str, list[str]] = { "person": [ # IDs and weights "person_id", From fa25964563caedb95b9e2feb34526048d0d7e45b Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 18 Nov 2025 17:00:20 +0000 Subject: [PATCH 4/8] Add safety catch for country packages not being installed, and dataset helpers --- src/policyengine/tax_benefit_models/uk.py | 61 +++++++------- .../tax_benefit_models/uk/__init__.py | 55 +++++++----- .../tax_benefit_models/uk/datasets.py | 72 +++++++++++++++- src/policyengine/tax_benefit_models/us.py | 6 ++ .../tax_benefit_models/us/__init__.py | 10 ++- .../tax_benefit_models/us/datasets.py | 84 ++++++++++++++++++- 6 files changed, 234 insertions(+), 54 deletions(-) diff --git a/src/policyengine/tax_benefit_models/uk.py b/src/policyengine/tax_benefit_models/uk.py index a9fb102..31946f3 100644 --- a/src/policyengine/tax_benefit_models/uk.py +++ b/src/policyengine/tax_benefit_models/uk.py @@ -1,33 +1,38 @@ """PolicyEngine UK tax-benefit model - imports from uk/ module.""" -from .uk import ( - PolicyEngineUK, - PolicyEngineUKDataset, - PolicyEngineUKLatest, - ProgrammeStatistics, - UKYearData, - create_datasets, - general_policy_reform_analysis, - uk_latest, - uk_model, -) +from importlib.util import find_spec -__all__ = [ - "UKYearData", - "PolicyEngineUKDataset", - "create_datasets", - "PolicyEngineUK", - "PolicyEngineUKLatest", - "uk_model", - "uk_latest", - "general_policy_reform_analysis", - "ProgrammeStatistics", -] +if find_spec("policyengine_uk") is not None: + from .uk import ( + PolicyEngineUK, + PolicyEngineUKDataset, + PolicyEngineUKLatest, + ProgrammeStatistics, + UKYearData, + create_datasets, + general_policy_reform_analysis, + load_datasets, + ensure_datasets, + uk_latest, + uk_model, + ) -# Rebuild models to resolve forward references -from policyengine.core import Dataset + __all__ = [ + "UKYearData", + "PolicyEngineUKDataset", + "create_datasets", + "load_datasets", + "ensure_datasets", + "PolicyEngineUK", + "PolicyEngineUKLatest", + "uk_model", + "uk_latest", + "general_policy_reform_analysis", + "ProgrammeStatistics", + ] -Dataset.model_rebuild() -UKYearData.model_rebuild() -PolicyEngineUKDataset.model_rebuild() -PolicyEngineUKLatest.model_rebuild() + # Rebuild models to resolve forward references + PolicyEngineUKDataset.model_rebuild() + PolicyEngineUKLatest.model_rebuild() +else: + __all__ = [] diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index ade6e53..247a561 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -1,26 +1,39 @@ """PolicyEngine UK tax-benefit model.""" -from .analysis import general_policy_reform_analysis -from .datasets import PolicyEngineUKDataset, UKYearData, create_datasets -from .model import PolicyEngineUK, PolicyEngineUKLatest, uk_latest, uk_model -from .outputs import ProgrammeStatistics +from importlib.util import find_spec -__all__ = [ - "UKYearData", - "PolicyEngineUKDataset", - "create_datasets", - "PolicyEngineUK", - "PolicyEngineUKLatest", - "uk_model", - "uk_latest", - "general_policy_reform_analysis", - "ProgrammeStatistics", -] +if find_spec("policyengine_uk") is not None: + from policyengine.core import Dataset -# Rebuild models to resolve forward references -from policyengine.core import Dataset + from .analysis import general_policy_reform_analysis + from .datasets import ( + PolicyEngineUKDataset, + UKYearData, + create_datasets, + ensure_datasets, + load_datasets, + ) + from .model import PolicyEngineUK, PolicyEngineUKLatest, uk_latest, uk_model + from .outputs import ProgrammeStatistics -Dataset.model_rebuild() -UKYearData.model_rebuild() -PolicyEngineUKDataset.model_rebuild() -PolicyEngineUKLatest.model_rebuild() + # Rebuild Pydantic models to resolve forward references + Dataset.model_rebuild() + UKYearData.model_rebuild() + PolicyEngineUKDataset.model_rebuild() + PolicyEngineUKLatest.model_rebuild() + + __all__ = [ + "UKYearData", + "PolicyEngineUKDataset", + "create_datasets", + "load_datasets", + "ensure_datasets", + "PolicyEngineUK", + "PolicyEngineUKLatest", + "uk_model", + "uk_latest", + "general_policy_reform_analysis", + "ProgrammeStatistics", + ] +else: + __all__ = [] diff --git a/src/policyengine/tax_benefit_models/uk/datasets.py b/src/policyengine/tax_benefit_models/uk/datasets.py index bdf89d9..66adfc4 100644 --- a/src/policyengine/tax_benefit_models/uk/datasets.py +++ b/src/policyengine/tax_benefit_models/uk/datasets.py @@ -85,7 +85,9 @@ def create_datasets( "hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5", ], years: list[int] = [2026, 2027, 2028, 2029, 2030], -) -> None: + data_folder: str = "./data", +) -> dict[str, PolicyEngineUKDataset]: + result = {} for dataset in datasets: from policyengine_uk import Microsimulation @@ -141,7 +143,7 @@ def create_datasets( uk_dataset = PolicyEngineUKDataset( name=f"{dataset}-year-{year}", description=f"UK Dataset for year {year} based on {dataset}", - filepath=f"./data/{Path(dataset).stem}_year_{year}.h5", + filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5", year=year, data=UKYearData( person=MicroDataFrame(person_df, weights="person_weight"), @@ -154,3 +156,69 @@ def create_datasets( ), ) uk_dataset.save() + + dataset_key = f"{Path(dataset).stem}_{year}" + result[dataset_key] = uk_dataset + + return result + + +def load_datasets( + datasets: list[str] = [ + "hf://policyengine/policyengine-uk-data/frs_2023_24.h5", + "hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5", + ], + years: list[int] = [2026, 2027, 2028, 2029, 2030], + data_folder: str = "./data", +) -> dict[str, PolicyEngineUKDataset]: + result = {} + for dataset in datasets: + for year in years: + filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5" + uk_dataset = PolicyEngineUKDataset( + name=f"{dataset}-year-{year}", + description=f"UK Dataset for year {year} based on {dataset}", + filepath=filepath, + year=year, + ) + uk_dataset.load() + + dataset_key = f"{Path(dataset).stem}_{year}" + result[dataset_key] = uk_dataset + + return result + + +def ensure_datasets( + datasets: list[str] = [ + "hf://policyengine/policyengine-uk-data/frs_2023_24.h5", + "hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5", + ], + years: list[int] = [2026, 2027, 2028, 2029, 2030], + data_folder: str = "./data", +) -> dict[str, PolicyEngineUKDataset]: + """Ensure datasets exist, loading if available or creating if not. + + Args: + datasets: List of HuggingFace dataset paths + years: List of years to load/create data for + data_folder: Directory containing or to save the dataset files + + Returns: + Dictionary mapping dataset keys to PolicyEngineUKDataset objects + """ + # Check if all dataset files exist + all_exist = True + for dataset in datasets: + for year in years: + filepath = Path(f"{data_folder}/{Path(dataset).stem}_year_{year}.h5") + if not filepath.exists(): + all_exist = False + break + if not all_exist: + break + + if all_exist: + return load_datasets(datasets=datasets, years=years, data_folder=data_folder) + else: + return create_datasets(datasets=datasets, years=years, data_folder=data_folder) diff --git a/src/policyengine/tax_benefit_models/us.py b/src/policyengine/tax_benefit_models/us.py index c915a3b..3cf6264 100644 --- a/src/policyengine/tax_benefit_models/us.py +++ b/src/policyengine/tax_benefit_models/us.py @@ -9,7 +9,10 @@ PolicyEngineUSLatest, ProgramStatistics, USYearData, + create_datasets, + ensure_datasets, general_policy_reform_analysis, + load_datasets, us_latest, us_model, ) @@ -17,6 +20,9 @@ __all__ = [ "USYearData", "PolicyEngineUSDataset", + "create_datasets", + "load_datasets", + "ensure_datasets", "PolicyEngineUS", "PolicyEngineUSLatest", "us_model", diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py index 6336178..26d9da9 100644 --- a/src/policyengine/tax_benefit_models/us/__init__.py +++ b/src/policyengine/tax_benefit_models/us/__init__.py @@ -6,7 +6,13 @@ from policyengine.core import Dataset from .analysis import general_policy_reform_analysis - from .datasets import PolicyEngineUSDataset, USYearData, create_datasets + from .datasets import ( + PolicyEngineUSDataset, + USYearData, + create_datasets, + ensure_datasets, + load_datasets, + ) from .model import ( PolicyEngineUS, PolicyEngineUSLatest, @@ -25,6 +31,8 @@ "USYearData", "PolicyEngineUSDataset", "create_datasets", + "load_datasets", + "ensure_datasets", "PolicyEngineUS", "PolicyEngineUSLatest", "us_model", diff --git a/src/policyengine/tax_benefit_models/us/datasets.py b/src/policyengine/tax_benefit_models/us/datasets.py index 53643cc..b44d5ba 100644 --- a/src/policyengine/tax_benefit_models/us/datasets.py +++ b/src/policyengine/tax_benefit_models/us/datasets.py @@ -112,15 +112,21 @@ def create_datasets( "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5", ], years: list[int] = [2024, 2025, 2026, 2027, 2028], -) -> None: + data_folder: str = "./data", +) -> dict[str, PolicyEngineUSDataset]: """Create PolicyEngineUSDataset instances from HuggingFace dataset paths. Args: datasets: List of HuggingFace dataset paths (e.g., "hf://policyengine/policyengine-us-data/cps_2024.h5") years: List of years to extract data for + data_folder: Directory to save the dataset files + + Returns: + Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects """ from policyengine_us import Microsimulation + result = {} for dataset in datasets: sim = Microsimulation(dataset=dataset) @@ -265,7 +271,7 @@ def create_datasets( us_dataset = PolicyEngineUSDataset( name=f"{dataset}-year-{year}", description=f"US Dataset for year {year} based on {dataset}", - filepath=f"./data/{Path(dataset).stem}_year_{year}.h5", + filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5", year=year, data=USYearData( person=MicroDataFrame(person_df, weights="person_weight"), @@ -285,3 +291,77 @@ def create_datasets( ), ) us_dataset.save() + + dataset_key = f"{Path(dataset).stem}_{year}" + result[dataset_key] = us_dataset + + return result + + +def load_datasets( + datasets: list[str] = [ + "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5", + ], + years: list[int] = [2024, 2025, 2026, 2027, 2028], + data_folder: str = "./data", +) -> dict[str, PolicyEngineUSDataset]: + """Load PolicyEngineUSDataset instances from saved HDF5 files. + + Args: + datasets: List of HuggingFace dataset paths (used to derive file names) + years: List of years to load data for + data_folder: Directory containing the dataset files + + Returns: + Dictionary mapping dataset keys (e.g., "enhanced_cps_2024") to PolicyEngineUSDataset objects + """ + result = {} + for dataset in datasets: + for year in years: + filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5" + us_dataset = PolicyEngineUSDataset( + name=f"{dataset}-year-{year}", + description=f"US Dataset for year {year} based on {dataset}", + filepath=filepath, + year=year, + ) + us_dataset.load() + + dataset_key = f"{Path(dataset).stem}_{year}" + result[dataset_key] = us_dataset + + return result + + +def ensure_datasets( + datasets: list[str] = [ + "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5", + ], + years: list[int] = [2024, 2025, 2026, 2027, 2028], + data_folder: str = "./data", +) -> dict[str, PolicyEngineUSDataset]: + """Ensure datasets exist, loading if available or creating if not. + + Args: + datasets: List of HuggingFace dataset paths + years: List of years to load/create data for + data_folder: Directory containing or to save the dataset files + + Returns: + Dictionary mapping dataset keys to PolicyEngineUSDataset objects + """ + # Check if all dataset files exist + all_exist = True + for dataset in datasets: + for year in years: + filepath = Path(f"{data_folder}/{Path(dataset).stem}_year_{year}.h5") + if not filepath.exists(): + all_exist = False + break + if not all_exist: + break + + if all_exist: + return load_datasets(datasets=datasets, years=years, data_folder=data_folder) + else: + return create_datasets(datasets=datasets, years=years, data_folder=data_folder) From c387bcb6bf53f8556940563f5445e16bad8b0d0f Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 20 Nov 2025 13:40:04 +0000 Subject: [PATCH 5/8] Small fixes --- src/policyengine/core/simulation.py | 7 +++++++ .../tax_benefit_models/uk/datasets.py | 7 ++----- .../tax_benefit_models/uk/model.py | 20 +++++++++++++++---- .../tax_benefit_models/us/datasets.py | 7 ++----- .../tax_benefit_models/us/model.py | 20 +++++++++++++++---- 5 files changed, 43 insertions(+), 18 deletions(-) diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py index f7c214e..4ebb295 100644 --- a/src/policyengine/core/simulation.py +++ b/src/policyengine/core/simulation.py @@ -24,6 +24,13 @@ class Simulation(BaseModel): def run(self): self.tax_benefit_model_version.run(self) + def ensure(self): + try: + self.tax_benefit_model_version.load(self) + except: + self.run() + self.save() + def save(self): """Save the simulation's output dataset.""" self.tax_benefit_model_version.save(self) diff --git a/src/policyengine/tax_benefit_models/uk/datasets.py b/src/policyengine/tax_benefit_models/uk/datasets.py index 66adfc4..ff47a58 100644 --- a/src/policyengine/tax_benefit_models/uk/datasets.py +++ b/src/policyengine/tax_benefit_models/uk/datasets.py @@ -37,11 +37,7 @@ def model_post_init(self, __context): if self.data is not None: self.save() elif self.filepath and not self.data: - try: - self.load() - except FileNotFoundError: - # File doesn't exist yet, that's OK - pass + self.load() def save(self) -> None: """Save dataset to HDF5 file.""" @@ -141,6 +137,7 @@ def create_datasets( ) uk_dataset = PolicyEngineUKDataset( + id=f"{Path(dataset).stem}_year_{year}", name=f"{dataset}-year-{year}", description=f"UK Dataset for year {year} based on {dataset}", filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5", diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index 571ceb1..abeee8f 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -265,17 +265,29 @@ def save(self, simulation: "Simulation"): def load(self, simulation: "Simulation"): """Load the simulation's output dataset.""" + import os + + filepath = str( + Path(simulation.dataset.filepath).parent / (simulation.id + ".h5") + ) + simulation.output_dataset = PolicyEngineUKDataset( id=simulation.id, name=simulation.dataset.name, description=simulation.dataset.description, - filepath=str( - Path(simulation.dataset.filepath).parent - / (simulation.id + ".h5") - ), + filepath=filepath, year=simulation.dataset.year, is_output_dataset=True, ) + # Load timestamps from file system metadata + if os.path.exists(filepath): + simulation.created_at = datetime.datetime.fromtimestamp( + os.path.getctime(filepath) + ) + simulation.updated_at = datetime.datetime.fromtimestamp( + os.path.getmtime(filepath) + ) + uk_latest = PolicyEngineUKLatest() diff --git a/src/policyengine/tax_benefit_models/us/datasets.py b/src/policyengine/tax_benefit_models/us/datasets.py index b44d5ba..dbbce08 100644 --- a/src/policyengine/tax_benefit_models/us/datasets.py +++ b/src/policyengine/tax_benefit_models/us/datasets.py @@ -44,11 +44,7 @@ def model_post_init(self, __context) -> None: if self.data is not None: self.save() elif self.filepath and not self.data: - try: - self.load() - except FileNotFoundError: - # File doesn't exist yet, that's OK - pass + self.load() def save(self) -> None: """Save dataset to HDF5 file.""" @@ -269,6 +265,7 @@ def create_datasets( tax_unit_df = entity_df us_dataset = PolicyEngineUSDataset( + id=f"{Path(dataset).stem}_year_{year}", name=f"{dataset}-year-{year}", description=f"US Dataset for year {year} based on {dataset}", filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5", diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index 9fd5f05..d65feac 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -311,18 +311,30 @@ def save(self, simulation: "Simulation"): def load(self, simulation: "Simulation"): """Load the simulation's output dataset.""" + import os + + filepath = str( + Path(simulation.dataset.filepath).parent / (simulation.id + ".h5") + ) + simulation.output_dataset = PolicyEngineUSDataset( id=simulation.id, name=simulation.dataset.name, description=simulation.dataset.description, - filepath=str( - Path(simulation.dataset.filepath).parent - / (simulation.id + ".h5") - ), + filepath=filepath, year=simulation.dataset.year, is_output_dataset=True, ) + # Load timestamps from file system metadata + if os.path.exists(filepath): + simulation.created_at = datetime.datetime.fromtimestamp( + os.path.getctime(filepath) + ) + simulation.updated_at = datetime.datetime.fromtimestamp( + os.path.getmtime(filepath) + ) + def _build_simulation_from_dataset(self, microsim, dataset, system): """Build a PolicyEngine Core simulation from dataset entity IDs. From e7d7ae836b1e1c7e88557dd560b67238cc5064b4 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 20 Nov 2025 13:40:27 +0000 Subject: [PATCH 6/8] Versioning --- changelog_entry.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index 4fe4868..76d27ba 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -1,4 +1,4 @@ - bump: patch changes: fixed: - - Entity variables moved out to an editable constant. + - Minor fixes From 5398c076b42d45e74db81710d033cc80cb872071 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 20 Nov 2025 13:40:54 +0000 Subject: [PATCH 7/8] Format --- src/policyengine/tax_benefit_models/uk/__init__.py | 7 ++++++- src/policyengine/tax_benefit_models/uk/datasets.py | 12 +++++++++--- src/policyengine/tax_benefit_models/us/datasets.py | 12 +++++++++--- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index 247a561..d933589 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -13,7 +13,12 @@ ensure_datasets, load_datasets, ) - from .model import PolicyEngineUK, PolicyEngineUKLatest, uk_latest, uk_model + from .model import ( + PolicyEngineUK, + PolicyEngineUKLatest, + uk_latest, + uk_model, + ) from .outputs import ProgrammeStatistics # Rebuild Pydantic models to resolve forward references diff --git a/src/policyengine/tax_benefit_models/uk/datasets.py b/src/policyengine/tax_benefit_models/uk/datasets.py index ff47a58..138ee78 100644 --- a/src/policyengine/tax_benefit_models/uk/datasets.py +++ b/src/policyengine/tax_benefit_models/uk/datasets.py @@ -208,7 +208,9 @@ def ensure_datasets( all_exist = True for dataset in datasets: for year in years: - filepath = Path(f"{data_folder}/{Path(dataset).stem}_year_{year}.h5") + filepath = Path( + f"{data_folder}/{Path(dataset).stem}_year_{year}.h5" + ) if not filepath.exists(): all_exist = False break @@ -216,6 +218,10 @@ def ensure_datasets( break if all_exist: - return load_datasets(datasets=datasets, years=years, data_folder=data_folder) + return load_datasets( + datasets=datasets, years=years, data_folder=data_folder + ) else: - return create_datasets(datasets=datasets, years=years, data_folder=data_folder) + return create_datasets( + datasets=datasets, years=years, data_folder=data_folder + ) diff --git a/src/policyengine/tax_benefit_models/us/datasets.py b/src/policyengine/tax_benefit_models/us/datasets.py index dbbce08..f6f64db 100644 --- a/src/policyengine/tax_benefit_models/us/datasets.py +++ b/src/policyengine/tax_benefit_models/us/datasets.py @@ -351,7 +351,9 @@ def ensure_datasets( all_exist = True for dataset in datasets: for year in years: - filepath = Path(f"{data_folder}/{Path(dataset).stem}_year_{year}.h5") + filepath = Path( + f"{data_folder}/{Path(dataset).stem}_year_{year}.h5" + ) if not filepath.exists(): all_exist = False break @@ -359,6 +361,10 @@ def ensure_datasets( break if all_exist: - return load_datasets(datasets=datasets, years=years, data_folder=data_folder) + return load_datasets( + datasets=datasets, years=years, data_folder=data_folder + ) else: - return create_datasets(datasets=datasets, years=years, data_folder=data_folder) + return create_datasets( + datasets=datasets, years=years, data_folder=data_folder + ) From f43e1af7e7c86b21a9d73870ec619bc2426ecb3d Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 20 Nov 2025 14:06:21 +0000 Subject: [PATCH 8/8] Fix format --- src/policyengine/core/simulation.py | 2 +- src/policyengine/tax_benefit_models/uk.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py index 4ebb295..0a5c106 100644 --- a/src/policyengine/core/simulation.py +++ b/src/policyengine/core/simulation.py @@ -27,7 +27,7 @@ def run(self): def ensure(self): try: self.tax_benefit_model_version.load(self) - except: + except Exception: self.run() self.save() diff --git a/src/policyengine/tax_benefit_models/uk.py b/src/policyengine/tax_benefit_models/uk.py index 31946f3..d6c1ad3 100644 --- a/src/policyengine/tax_benefit_models/uk.py +++ b/src/policyengine/tax_benefit_models/uk.py @@ -10,9 +10,9 @@ ProgrammeStatistics, UKYearData, create_datasets, + ensure_datasets, general_policy_reform_analysis, load_datasets, - ensure_datasets, uk_latest, uk_model, )