Skip to content

Commit bd4292f

Browse files
authored
Safety catches and helpers (#188)
* Move entity variables out to constant * Versioning * Annotate * Add safety catch for country packages not being installed, and dataset helpers * Small fixes * Versioning * Format * Fix format
1 parent 7634ce9 commit bd4292f

File tree

10 files changed

+298
-72
lines changed

10 files changed

+298
-72
lines changed

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: patch
2+
changes:
3+
fixed:
4+
- Minor fixes

src/policyengine/core/simulation.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ class Simulation(BaseModel):
2424
def run(self):
2525
self.tax_benefit_model_version.run(self)
2626

27+
def ensure(self):
28+
try:
29+
self.tax_benefit_model_version.load(self)
30+
except Exception:
31+
self.run()
32+
self.save()
33+
2734
def save(self):
2835
"""Save the simulation's output dataset."""
2936
self.tax_benefit_model_version.save(self)
Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,38 @@
11
"""PolicyEngine UK tax-benefit model - imports from uk/ module."""
22

3-
from .uk import (
4-
PolicyEngineUK,
5-
PolicyEngineUKDataset,
6-
PolicyEngineUKLatest,
7-
ProgrammeStatistics,
8-
UKYearData,
9-
create_datasets,
10-
general_policy_reform_analysis,
11-
uk_latest,
12-
uk_model,
13-
)
3+
from importlib.util import find_spec
144

15-
__all__ = [
16-
"UKYearData",
17-
"PolicyEngineUKDataset",
18-
"create_datasets",
19-
"PolicyEngineUK",
20-
"PolicyEngineUKLatest",
21-
"uk_model",
22-
"uk_latest",
23-
"general_policy_reform_analysis",
24-
"ProgrammeStatistics",
25-
]
5+
if find_spec("policyengine_uk") is not None:
6+
from .uk import (
7+
PolicyEngineUK,
8+
PolicyEngineUKDataset,
9+
PolicyEngineUKLatest,
10+
ProgrammeStatistics,
11+
UKYearData,
12+
create_datasets,
13+
ensure_datasets,
14+
general_policy_reform_analysis,
15+
load_datasets,
16+
uk_latest,
17+
uk_model,
18+
)
2619

27-
# Rebuild models to resolve forward references
28-
from policyengine.core import Dataset
20+
__all__ = [
21+
"UKYearData",
22+
"PolicyEngineUKDataset",
23+
"create_datasets",
24+
"load_datasets",
25+
"ensure_datasets",
26+
"PolicyEngineUK",
27+
"PolicyEngineUKLatest",
28+
"uk_model",
29+
"uk_latest",
30+
"general_policy_reform_analysis",
31+
"ProgrammeStatistics",
32+
]
2933

30-
Dataset.model_rebuild()
31-
UKYearData.model_rebuild()
32-
PolicyEngineUKDataset.model_rebuild()
33-
PolicyEngineUKLatest.model_rebuild()
34+
# Rebuild models to resolve forward references
35+
PolicyEngineUKDataset.model_rebuild()
36+
PolicyEngineUKLatest.model_rebuild()
37+
else:
38+
__all__ = []
Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,44 @@
11
"""PolicyEngine UK tax-benefit model."""
22

3-
from .analysis import general_policy_reform_analysis
4-
from .datasets import PolicyEngineUKDataset, UKYearData, create_datasets
5-
from .model import PolicyEngineUK, PolicyEngineUKLatest, uk_latest, uk_model
6-
from .outputs import ProgrammeStatistics
3+
from importlib.util import find_spec
74

8-
__all__ = [
9-
"UKYearData",
10-
"PolicyEngineUKDataset",
11-
"create_datasets",
12-
"PolicyEngineUK",
13-
"PolicyEngineUKLatest",
14-
"uk_model",
15-
"uk_latest",
16-
"general_policy_reform_analysis",
17-
"ProgrammeStatistics",
18-
]
5+
if find_spec("policyengine_uk") is not None:
6+
from policyengine.core import Dataset
197

20-
# Rebuild models to resolve forward references
21-
from policyengine.core import Dataset
8+
from .analysis import general_policy_reform_analysis
9+
from .datasets import (
10+
PolicyEngineUKDataset,
11+
UKYearData,
12+
create_datasets,
13+
ensure_datasets,
14+
load_datasets,
15+
)
16+
from .model import (
17+
PolicyEngineUK,
18+
PolicyEngineUKLatest,
19+
uk_latest,
20+
uk_model,
21+
)
22+
from .outputs import ProgrammeStatistics
2223

23-
Dataset.model_rebuild()
24-
UKYearData.model_rebuild()
25-
PolicyEngineUKDataset.model_rebuild()
26-
PolicyEngineUKLatest.model_rebuild()
24+
# Rebuild Pydantic models to resolve forward references
25+
Dataset.model_rebuild()
26+
UKYearData.model_rebuild()
27+
PolicyEngineUKDataset.model_rebuild()
28+
PolicyEngineUKLatest.model_rebuild()
29+
30+
__all__ = [
31+
"UKYearData",
32+
"PolicyEngineUKDataset",
33+
"create_datasets",
34+
"load_datasets",
35+
"ensure_datasets",
36+
"PolicyEngineUK",
37+
"PolicyEngineUKLatest",
38+
"uk_model",
39+
"uk_latest",
40+
"general_policy_reform_analysis",
41+
"ProgrammeStatistics",
42+
]
43+
else:
44+
__all__ = []

src/policyengine/tax_benefit_models/uk/datasets.py

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,7 @@ def model_post_init(self, __context):
3737
if self.data is not None:
3838
self.save()
3939
elif self.filepath and not self.data:
40-
try:
41-
self.load()
42-
except FileNotFoundError:
43-
# File doesn't exist yet, that's OK
44-
pass
40+
self.load()
4541

4642
def save(self) -> None:
4743
"""Save dataset to HDF5 file."""
@@ -85,7 +81,9 @@ def create_datasets(
8581
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
8682
],
8783
years: list[int] = [2026, 2027, 2028, 2029, 2030],
88-
) -> None:
84+
data_folder: str = "./data",
85+
) -> dict[str, PolicyEngineUKDataset]:
86+
result = {}
8987
for dataset in datasets:
9088
from policyengine_uk import Microsimulation
9189

@@ -139,9 +137,10 @@ def create_datasets(
139137
)
140138

141139
uk_dataset = PolicyEngineUKDataset(
140+
id=f"{Path(dataset).stem}_year_{year}",
142141
name=f"{dataset}-year-{year}",
143142
description=f"UK Dataset for year {year} based on {dataset}",
144-
filepath=f"./data/{Path(dataset).stem}_year_{year}.h5",
143+
filepath=f"{data_folder}/{Path(dataset).stem}_year_{year}.h5",
145144
year=year,
146145
data=UKYearData(
147146
person=MicroDataFrame(person_df, weights="person_weight"),
@@ -154,3 +153,75 @@ def create_datasets(
154153
),
155154
)
156155
uk_dataset.save()
156+
157+
dataset_key = f"{Path(dataset).stem}_{year}"
158+
result[dataset_key] = uk_dataset
159+
160+
return result
161+
162+
163+
def load_datasets(
164+
datasets: list[str] = [
165+
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
166+
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
167+
],
168+
years: list[int] = [2026, 2027, 2028, 2029, 2030],
169+
data_folder: str = "./data",
170+
) -> dict[str, PolicyEngineUKDataset]:
171+
result = {}
172+
for dataset in datasets:
173+
for year in years:
174+
filepath = f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
175+
uk_dataset = PolicyEngineUKDataset(
176+
name=f"{dataset}-year-{year}",
177+
description=f"UK Dataset for year {year} based on {dataset}",
178+
filepath=filepath,
179+
year=year,
180+
)
181+
uk_dataset.load()
182+
183+
dataset_key = f"{Path(dataset).stem}_{year}"
184+
result[dataset_key] = uk_dataset
185+
186+
return result
187+
188+
189+
def ensure_datasets(
190+
datasets: list[str] = [
191+
"hf://policyengine/policyengine-uk-data/frs_2023_24.h5",
192+
"hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5",
193+
],
194+
years: list[int] = [2026, 2027, 2028, 2029, 2030],
195+
data_folder: str = "./data",
196+
) -> dict[str, PolicyEngineUKDataset]:
197+
"""Ensure datasets exist, loading if available or creating if not.
198+
199+
Args:
200+
datasets: List of HuggingFace dataset paths
201+
years: List of years to load/create data for
202+
data_folder: Directory containing or to save the dataset files
203+
204+
Returns:
205+
Dictionary mapping dataset keys to PolicyEngineUKDataset objects
206+
"""
207+
# Check if all dataset files exist
208+
all_exist = True
209+
for dataset in datasets:
210+
for year in years:
211+
filepath = Path(
212+
f"{data_folder}/{Path(dataset).stem}_year_{year}.h5"
213+
)
214+
if not filepath.exists():
215+
all_exist = False
216+
break
217+
if not all_exist:
218+
break
219+
220+
if all_exist:
221+
return load_datasets(
222+
datasets=datasets, years=years, data_folder=data_folder
223+
)
224+
else:
225+
return create_datasets(
226+
datasets=datasets, years=years, data_folder=data_folder
227+
)

src/policyengine/tax_benefit_models/uk/model.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,17 +265,29 @@ def save(self, simulation: "Simulation"):
265265

266266
def load(self, simulation: "Simulation"):
267267
"""Load the simulation's output dataset."""
268+
import os
269+
270+
filepath = str(
271+
Path(simulation.dataset.filepath).parent / (simulation.id + ".h5")
272+
)
273+
268274
simulation.output_dataset = PolicyEngineUKDataset(
269275
id=simulation.id,
270276
name=simulation.dataset.name,
271277
description=simulation.dataset.description,
272-
filepath=str(
273-
Path(simulation.dataset.filepath).parent
274-
/ (simulation.id + ".h5")
275-
),
278+
filepath=filepath,
276279
year=simulation.dataset.year,
277280
is_output_dataset=True,
278281
)
279282

283+
# Load timestamps from file system metadata
284+
if os.path.exists(filepath):
285+
simulation.created_at = datetime.datetime.fromtimestamp(
286+
os.path.getctime(filepath)
287+
)
288+
simulation.updated_at = datetime.datetime.fromtimestamp(
289+
os.path.getmtime(filepath)
290+
)
291+
280292

281293
uk_latest = PolicyEngineUKLatest()

src/policyengine/tax_benefit_models/us.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,20 @@
99
PolicyEngineUSLatest,
1010
ProgramStatistics,
1111
USYearData,
12+
create_datasets,
13+
ensure_datasets,
1214
general_policy_reform_analysis,
15+
load_datasets,
1316
us_latest,
1417
us_model,
1518
)
1619

1720
__all__ = [
1821
"USYearData",
1922
"PolicyEngineUSDataset",
23+
"create_datasets",
24+
"load_datasets",
25+
"ensure_datasets",
2026
"PolicyEngineUS",
2127
"PolicyEngineUSLatest",
2228
"us_model",

src/policyengine/tax_benefit_models/us/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@
66
from policyengine.core import Dataset
77

88
from .analysis import general_policy_reform_analysis
9-
from .datasets import PolicyEngineUSDataset, USYearData, create_datasets
9+
from .datasets import (
10+
PolicyEngineUSDataset,
11+
USYearData,
12+
create_datasets,
13+
ensure_datasets,
14+
load_datasets,
15+
)
1016
from .model import (
1117
PolicyEngineUS,
1218
PolicyEngineUSLatest,
@@ -25,6 +31,8 @@
2531
"USYearData",
2632
"PolicyEngineUSDataset",
2733
"create_datasets",
34+
"load_datasets",
35+
"ensure_datasets",
2836
"PolicyEngineUS",
2937
"PolicyEngineUSLatest",
3038
"us_model",

0 commit comments

Comments
 (0)