From 093db22518d6254d0d7518122cd0de95b9f5e1be Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Sun, 8 Mar 2026 22:00:23 +0000
Subject: [PATCH] Optimise UK simulation run (-63% cold sim time)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two changes to the UK model's run() method:

1. Convert MicroDataFrames to plain DataFrames before passing to
   UKSingleYearDataset. The data pipeline only needs numeric arrays for
   copying and uprating — MicroDataFrame.copy() triggers expensive O(N²)
   weight linking that's wasted here.

2. Monkey-patch apply_uprating to skip its defensive deep copy of the
   entire multi-year dataset. extend_single_year_dataset already copies
   each year individually, so the second copy is redundant.

Benchmarked: cold simulate dropped from 39.6s to 14.8s (-63%), wall
total from 46.3s to 21.5s (-54%). Mean household income unchanged
(£54,562). All 110 tests pass.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 changelog.d/optimise-uk-sim.changed.md        |  1 +
 .../tax_benefit_models/uk/model.py            | 42 ++++++++++++++++---
 2 files changed, 38 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/optimise-uk-sim.changed.md

diff --git a/changelog.d/optimise-uk-sim.changed.md b/changelog.d/optimise-uk-sim.changed.md
new file mode 100644
index 00000000..61eacff1
--- /dev/null
+++ b/changelog.d/optimise-uk-sim.changed.md
@@ -0,0 +1 @@
+Optimised UK simulation run by avoiding MicroDataFrame overhead and redundant dataset copies in the uprating pipeline (cold sim 39.6s to 14.8s, -63%).
diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py
index 04860e58..9358ad31 100644
--- a/src/policyengine/tax_benefit_models/uk/model.py
+++ b/src/policyengine/tax_benefit_models/uk/model.py
@@ -249,8 +249,12 @@ def _filter_dataset_by_household_variable(
         )
 
     def run(self, simulation: "Simulation") -> "Simulation":
+        import policyengine_uk.data.economic_assumptions as ea
         from policyengine_uk import Microsimulation
-        from policyengine_uk.data import UKSingleYearDataset
+        from policyengine_uk.data import (
+            UKMultiYearDataset,
+            UKSingleYearDataset,
+        )
 
         from policyengine.utils.parametric_reforms import (
             simulation_modifier_from_parameter_values,
@@ -267,13 +271,41 @@ def run(self, simulation: "Simulation") -> "Simulation":
                 dataset, simulation.filter_field, simulation.filter_value
             )
 
+        # Use plain DataFrames to avoid MicroDataFrame copy overhead
         input_data = UKSingleYearDataset(
-            person=dataset.data.person,
-            benunit=dataset.data.benunit,
-            household=dataset.data.household,
+            person=pd.DataFrame(dataset.data.person),
+            benunit=pd.DataFrame(dataset.data.benunit),
+            household=pd.DataFrame(dataset.data.household),
             fiscal_year=dataset.year,
         )
-        microsim = Microsimulation(dataset=input_data)
+
+        # Patch apply_uprating to skip redundant deep copy of
+        # the multi-year dataset (each year is already copied
+        # individually by extend_single_year_dataset)
+        _orig_apply_uprating = ea.apply_uprating
+
+        def _apply_uprating_no_copy(
+            dataset, tax_benefit_system_parameters=None
+        ):
+            if not isinstance(dataset, UKMultiYearDataset):
+                raise TypeError("dataset must be of type UKMultiYearDataset.")
+            for year in dataset.datasets.keys():
+                if year == min(dataset.datasets.keys()):
+                    continue
+                current_year = dataset.datasets[year]
+                prev_year = dataset.datasets[year - 1]
+                ea.apply_single_year_uprating(
+                    current_year,
+                    prev_year,
+                    tax_benefit_system_parameters,
+                )
+            return dataset
+
+        ea.apply_uprating = _apply_uprating_no_copy
+        try:
+            microsim = Microsimulation(dataset=input_data)
+        finally:
+            ea.apply_uprating = _orig_apply_uprating
 
         if (
             simulation.policy