From 093db22518d6254d0d7518122cd0de95b9f5e1be Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Sun, 8 Mar 2026 22:00:23 +0000 Subject: [PATCH] Optimise UK simulation run (-63% cold sim time) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes to the UK model's run() method: 1. Convert MicroDataFrames to plain DataFrames before passing to UKSingleYearDataset. The data pipeline only needs numeric arrays for copying and uprating — MicroDataFrame.copy() triggers expensive O(N²) weight linking that's wasted here. 2. Monkey-patch apply_uprating to skip its defensive deep copy of the entire multi-year dataset. extend_single_year_dataset already copies each year individually, so the second copy is redundant. Benchmarked: cold simulate dropped from 39.6s to 14.8s (-63%), wall total from 46.3s to 21.5s (-54%). Mean household income unchanged (£54,562). All 110 tests pass. Co-Authored-By: Claude --- changelog.d/optimise-uk-sim.changed.md | 1 + .../tax_benefit_models/uk/model.py | 42 ++++++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 changelog.d/optimise-uk-sim.changed.md diff --git a/changelog.d/optimise-uk-sim.changed.md b/changelog.d/optimise-uk-sim.changed.md new file mode 100644 index 00000000..61eacff1 --- /dev/null +++ b/changelog.d/optimise-uk-sim.changed.md @@ -0,0 +1 @@ +Optimised UK simulation run by avoiding MicroDataFrame overhead and redundant dataset copies in the uprating pipeline (cold sim 39.6s to 14.8s, -63%). diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index 04860e58..9358ad31 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -249,8 +249,12 @@ def _filter_dataset_by_household_variable( ) def run(self, simulation: "Simulation") -> "Simulation": + import policyengine_uk.data.economic_assumptions as ea from policyengine_uk import Microsimulation - from policyengine_uk.data import UKSingleYearDataset + from policyengine_uk.data import ( + UKMultiYearDataset, + UKSingleYearDataset, + ) from policyengine.utils.parametric_reforms import ( simulation_modifier_from_parameter_values, @@ -267,13 +271,41 @@ def run(self, simulation: "Simulation") -> "Simulation": dataset, simulation.filter_field, simulation.filter_value ) + # Use plain DataFrames to avoid MicroDataFrame copy overhead input_data = UKSingleYearDataset( - person=dataset.data.person, - benunit=dataset.data.benunit, - household=dataset.data.household, + person=pd.DataFrame(dataset.data.person), + benunit=pd.DataFrame(dataset.data.benunit), + household=pd.DataFrame(dataset.data.household), fiscal_year=dataset.year, ) - microsim = Microsimulation(dataset=input_data) + + # Patch apply_uprating to skip redundant deep copy of + # the multi-year dataset (each year is already copied + # individually by extend_single_year_dataset) + _orig_apply_uprating = ea.apply_uprating + + def _apply_uprating_no_copy( + dataset, tax_benefit_system_parameters=None + ): + if not isinstance(dataset, UKMultiYearDataset): + raise TypeError("dataset must be of type UKMultiYearDataset.") + for year in dataset.datasets.keys(): + if year == min(dataset.datasets.keys()): + continue + current_year = dataset.datasets[year] + prev_year = dataset.datasets[year - 1] + ea.apply_single_year_uprating( + current_year, + prev_year, + tax_benefit_system_parameters, + ) + return dataset + + ea.apply_uprating = _apply_uprating_no_copy + try: + microsim = Microsimulation(dataset=input_data) + finally: + ea.apply_uprating = _orig_apply_uprating if ( simulation.policy