From 96bed021e47376100dcd698de8efd3c10097df91 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Thu, 5 Mar 2026 21:26:51 -0500 Subject: [PATCH 1/2] Switch code formatter from black to ruff format Co-Authored-By: Claude Opus 4.6 --- .github/workflows/pull_request.yaml | 4 +- .github/workflows/push.yaml | 4 +- Makefile | 2 +- changelog.d/switch-to-ruff.changed.md | 1 + docs/LA_methodology.ipynb | 15 +- docs/constituency_methodology.ipynb | 15 +- docs/methodology.ipynb | 145 +++++++++--------- docs/validation/constituencies.ipynb | 80 ++++++++-- docs/validation/local_authorities.ipynb | 77 ++++++++-- docs/validation/national.ipynb | 7 +- .../datasets/childcare/takeup_rate.py | 8 +- .../datasets/imputations/consumption.py | 4 +- .../targets/sources/_common.py | 2 +- .../microsimulation/test_reform_impacts.py | 18 +-- policyengine_uk_data/tests/test_aggregates.py | 6 +- .../tests/test_child_limit.py | 12 +- .../tests/test_low_income_deciles.py | 4 +- .../tests/test_non_negative_incomes.py | 6 +- ...sion_contributions_via_salary_sacrifice.py | 18 +-- policyengine_uk_data/tests/test_population.py | 6 +- .../tests/test_property_purchased.py | 20 +-- .../tests/test_salary_sacrifice_headcount.py | 12 +- .../tests/test_scotland_babies.py | 4 +- .../tests/test_scotland_uc_babies.py | 4 +- .../tests/test_target_registry.py | 12 +- .../tests/test_uc_by_children.py | 4 +- pyproject.toml | 21 +-- uv.lock | 108 ++++++------- 28 files changed, 362 insertions(+), 257 deletions(-) create mode 100644 changelog.d/switch-to-ruff.changed.md diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index e7be9e6f..0b8e9d64 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -24,9 +24,9 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install black + pip install "ruff>=0.9.0" - name: Check formatting - run: black . -l 79 --check + run: ruff format --check . test: name: Test runs-on: ubuntu-latest diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index d4575eb6..5ca27117 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -23,9 +23,9 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install black + pip install "ruff>=0.9.0" - name: Check formatting - run: black . -l 79 --check + run: ruff format --check . test: name: Build and test runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 327895d3..64699a0d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: data test format: - black . -l 79 + ruff format . test: pytest . diff --git a/changelog.d/switch-to-ruff.changed.md b/changelog.d/switch-to-ruff.changed.md new file mode 100644 index 00000000..3e176424 --- /dev/null +++ b/changelog.d/switch-to-ruff.changed.md @@ -0,0 +1 @@ +Switched code formatter from black to ruff format. diff --git a/docs/LA_methodology.ipynb b/docs/LA_methodology.ipynb index 4b81035a..46774f4b 100644 --- a/docs/LA_methodology.ipynb +++ b/docs/LA_methodology.ipynb @@ -374,10 +374,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/total_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/total_income.csv\"\n", + ")" ] }, { @@ -818,10 +821,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/age.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/age.csv\"\n", + ")" ] }, { @@ -1188,10 +1194,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/employment_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/employment_income.csv\"\n", + ")" ] }, { diff --git a/docs/constituency_methodology.ipynb b/docs/constituency_methodology.ipynb index 8166105a..9cacc36d 100644 --- a/docs/constituency_methodology.ipynb +++ b/docs/constituency_methodology.ipynb @@ -374,10 +374,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv\"\n", + ")" ] }, { @@ -818,10 +821,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/age.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/age.csv\"\n", + ")" ] }, { @@ -1302,10 +1308,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv\"\n", + ")" ] }, { diff --git a/docs/methodology.ipynb b/docs/methodology.ipynb index 019359af..d580a6d4 100644 --- a/docs/methodology.ipynb +++ b/docs/methodology.ipynb @@ -343,10 +343,12 @@ "import pandas as pd\n", "from itables import init_notebook_mode\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "\n", "init_notebook_mode(all_interactive=True)\n", "\n", + "\n", "def get_loss(dataset, reform, time_period):\n", " loss_results = get_loss_results(dataset, time_period, reform)\n", "\n", @@ -362,6 +364,7 @@ " loss_results[\"type\"] = loss_results.name.apply(get_type)\n", " return loss_results\n", "\n", + "\n", "reported_benefits = Reform.from_dict(\n", " {\n", " \"gov.contrib.policyengine.disable_simulated_benefits\": True,\n", @@ -414,10 +417,10 @@ 0.5348574279909283, 0.33769630678188683, 0.4724366479253982, - 0.056382436859752076, + 0.05638243685975208, 0.6323673982942971, 0.9366950828549806, - 0.11071012537714943, + 0.11071012537714944, 0.43224912766430945, 0.4932054498037529, 0.4262926636695862, @@ -428,7 +431,7 @@ 0.2753207784816197, 0.2723132074858013, 0.8419023088645935, - 0.38591469862268324, + 0.3859146986226832, 1, 1, 0.238984410722884, @@ -438,7 +441,7 @@ 0.07523280073668749, 0.6813040233247071, 1, - 0.09093881776916021, + 0.0909388177691602, 0.7953899467026746, 0.46345942912091304, 1, @@ -482,7 +485,7 @@ 0.056645307043117855, 0.04613438039833396, 0.17900131626571955, - 0.10437807305849049, + 0.10437807305849048, 0.04133579034001572, 0.01689458524887367, 0.07304306223253616, @@ -492,17 +495,17 @@ 0.02570792373305158, 0.2117702613060218, 0.06261644764053753, - 0.11048854967581091, + 0.11048854967581093, 0.05986189620431352, 0.0734813370478334, 0.14993575315772975, - 0.09216128695626061, + 0.0921612869562606, 0.06654101281860526, - 0.030242342070154266, + 0.030242342070154263, 0.20055057386109623, - 0.09490778457907527, + 0.09490778457907528, 0.15890381361993214, - 0.09147390139813617, + 0.09147390139813616, 0.07256124815551596, 0.14006199032920222, 0.06918929372173468, @@ -518,7 +521,7 @@ 0.05198270343522084, 0.02894483338415261, 0.19421519077802243, - 0.10227911889430563, + 0.10227911889430565, 0.022966625631099497, 0.04223796065100079, 0.07170636880139063, @@ -529,7 +532,7 @@ 0.29116400964006, 0.12510157647671732, 0.04460071124716505, - 0.13148586282369953, + 0.1314858628236995, 0.12155024410317292, 0.1461505005795304, 0.09404538078799742, @@ -537,14 +540,14 @@ 0.07684637616907224, 0.1739255359932635, 0.06968768763060447, - 0.048645930535396516, + 0.04864593053539651, 0.04715293440571787, - 0.061179568577858875, + 0.06117956857785887, 0.14599437481067512, 0.07423160697853332, 0.08014078307669949, 0.04069199366965248, - 0.18721575205594373, + 0.1872157520559437, 0.14720678189147138, 0.021828844225395293, 0.0625035269825679, @@ -556,8 +559,8 @@ 0.2078691963062574, 0.15441863025620098, 0.10737865871453003, - 0.09454279295816925, - 0.09633845930037417, + 0.09454279295816924, + 0.09633845930037416, 0.15611969585317312, 0.06873017587493258, 0.0791181851399526, @@ -567,11 +570,11 @@ 0.06731946150876865, 0.14050146457241625, 0.08162092155759072, - 0.11667759541165895, + 0.11667759541165897, 0.030289924549114444, 0.06339486495990165, 0.05731838222169315, - 0.18678826188260914, + 0.1867882618826091, 0.13891870679655027, 0.11545889612594176, 0.09001345414783932, @@ -617,14 +620,14 @@ 1, 1, 1, - 0.19149475966528307, - 0.24673403495070823, + 0.1914947596652831, + 0.24673403495070825, 0.3072794892096273, 0.379747974710697, 0.31039305362432584, 0.10848112754405202, 0.010619302099698998, - 0.13888253105203519, + 0.1388825310520352, 0.4073900806470958, 0.2531205286437236, 0.792085840046357, @@ -636,7 +639,7 @@ 0.03973531361317136, 0.2350261871873867, 0.4310117757696614, - 0.24612705047281858, + 0.24612705047281855, 0.12926913086657413, 0.21946877869613568, 0.5242635218702594, @@ -646,7 +649,7 @@ 0.7638985610220846, 0.41669987859417645, 0.06992062037206717, - 0.038954754430174716, + 0.03895475443017472, 0.1702228614236586, 0.3644700400430666, 0.40331728828616015, @@ -662,7 +665,7 @@ 0.054489850288550065, 0.12854107496184566, 0.11753069172373803, - 0.11321576973743849, + 0.11321576973743848, 0.34453613181731424, 0.1244811487104074, 0.06072357901250304, @@ -673,23 +676,23 @@ 0.30226424119880596, 0.8856712808994389, 0.4473604762734464, - 0.11850271000359837, + 0.11850271000359835, 0.003773473791717633, 0.5013148425482626, 0.032331919418836653, 0.3379801252340519, 0.1101123244337878, 0.002039042612175903, - 0.10686992109637393, + 0.10686992109637392, 0.529566917828135, 0.364533754611501, 0.8714741500070979, 0.3758462425849252, - 0.9160445175597087, + 0.9160445175597088, 0.4321594002439528, 0.06604500940315498, 0.03876573963794215, - 0.9211680206617687, + 0.9211680206617688, 0.22674879162547168, 0.3174237803524217, 0.10323686418719458, @@ -700,28 +703,28 @@ 1, 0.5029385157743501, 0.8471689821257619, - 0.21622933403097414, + 0.21622933403097416, 0.1315637600363954, - 0.014207504312205523, + 0.014207504312205525, 1, 0.43231937365869, 0.17936817815314637, 0.09069845844777394, 0.21653261782151073, - 0.10904328492252735, + 0.10904328492252736, 0.45696603359696897, 0.10027631605492449, - 0.14383375266562679, + 0.1438337526656268, 0.7256394043973807, 0.8423948571490725, 0.2589492388346111, 0.2353889384491187, - 0.14548208834407161, - 0.9851234327091203, + 0.1454820883440716, + 0.9851234327091204, 0.3397268942949406, - 0.39379184896953845, + 0.3937918489695385, 0.0786704963046369, - 0.09609641144440331, + 0.09609641144440333, 0.0074050587750516175, 0.6175878341381192, 0.2383574886847478, @@ -741,7 +744,7 @@ 0.273695744109254, 0.10090010598312445, 0.5747793559128609, - 0.9107437140452885, + 0.9107437140452884, 0.34401959095740303, 1, 1, @@ -750,7 +753,7 @@ 0.8684502342014089, 0.7344750160682946, 0.8686393732736275, - 0.43612221537997764, + 0.4361222153799776, 0.6769238453303257, 0.20200886652112943, 0.0266550847359111, @@ -760,10 +763,10 @@ 0.987640762587488, 0.9835036985633404, 0.14278799841519346, - 0.22361630855449383, + 0.22361630855449385, 0.9296698695284026, 0.827944905065973, - 0.9949206266575741, + 0.994920626657574, 0.8286463118940774, 0.7054238995836947, 0.6591252642520408, @@ -772,7 +775,7 @@ 0.9789237514645788, 0.7658415543956806, 0.9979477775731906, - 0.9599693030243193, + 0.9599693030243192, 0.8309056529775222, 0.7063448234599843, 0.025177568881478127, @@ -781,14 +784,14 @@ 1, 0.9926826451871978, 0.9128835701758315, - 0.9947484624313153, + 0.9947484624313152, 0.8693660641248387, 1, 1, 0.035836087718205054, 0.03891346894228955, 0.13385158796068683, - 0.12169435256029607, + 0.12169435256029608, 0.2836104416464377, 0.05094982968902384, 0.034294312830338104, @@ -797,7 +800,7 @@ 0.257589360854839, 0.6548370775766441, 0.2932462480888995, - 0.9040908751204909, + 0.9040908751204908, 0.33043668625756617 ], "xaxis": "x", @@ -1687,8 +1690,9 @@ "def format_fig(fig):\n", " return fig\n", "\n", + "\n", "loss_results.abs_rel_error = loss_results.abs_rel_error.clip(0, 1)\n", - " \n", + "\n", "fig = px.histogram(\n", " loss_results,\n", " x=\"abs_rel_error\",\n", @@ -1704,7 +1708,8 @@ " legend_title=\"Category\",\n", " xaxis_title=\"Absolute relative error\",\n", " yaxis_title=\"Number of variables\",\n", - " xaxis_tickformat=\".0%\",)\n", + " xaxis_tickformat=\".0%\",\n", + ")\n", "format_fig(fig)" ] }, @@ -1818,7 +1823,7 @@ "xaxis": "x", "y": [ 1033786.8507080078, - 1849527.0676879883, + 1849527.0676879885, 6388252.871826172, 5427908.585449219, 3428882.373565674, @@ -1826,7 +1831,7 @@ 1826260.506072998, 993297.5881958008, 313195.4216308594, - 204920.73791503906, + 204920.73791503903, 218245.16729736328, 845.8313598632812, 1075.3846435546875, @@ -1875,7 +1880,7 @@ -505763.6264343262, -13854.41845703125, -73651.49392700195, - 13914.588195800781, + 13914.58819580078, 39777.421630859375, 5029.7379150390625, 111758.16729736328, @@ -1920,18 +1925,18 @@ "xaxis": "x", "y": [ 1.5997536783990056, - -0.24673403495070823, + -0.24673403495070825, -0.05017349529511451, - -0.038954754430174716, + -0.03895475443017472, -0.12854107496184566, -0.003773473791717633, -0.03876573963794215, - 0.014207504312205523, - 0.14548208834407161, + 0.014207504312205525, + 0.1454820883440716, 0.02516240308487657, 1.0495005709369527, -0.9835036985633404, - -0.9599693030243193, + -0.9599693030243192, -0.03891346894228955 ], "yaxis": "y" @@ -1977,7 +1982,7 @@ 505763.6264343262, 13854.41845703125, 73651.49392700195, - 13914.588195800781, + 13914.58819580078, 39777.421630859375, 5029.7379150390625, 111758.16729736328, @@ -2022,18 +2027,18 @@ "xaxis": "x", "y": [ 1.5997536783990056, - 0.24673403495070823, + 0.24673403495070825, 0.05017349529511451, - 0.038954754430174716, + 0.03895475443017472, 0.12854107496184566, 0.003773473791717633, 0.03876573963794215, - 0.014207504312205523, - 0.14548208834407161, + 0.014207504312205525, + 0.1454820883440716, 0.02516240308487657, 1.0495005709369527, 0.9835036985633404, - 0.9599693030243193, + 0.9599693030243192, 0.03891346894228955 ], "yaxis": "y" @@ -3141,7 +3146,7 @@ -0.3647842083382897, -0.4473604762734464, -0.4321594002439528, - -0.21622933403097414, + -0.21622933403097416, -0.2589492388346111, -0.2627782706474406, -0.34401959095740303, @@ -3243,7 +3248,7 @@ 0.3647842083382897, 0.4473604762734464, 0.4321594002439528, - 0.21622933403097414, + 0.21622933403097416, 0.2589492388346111, 0.2627782706474406, 0.34401959095740303, @@ -5086,13 +5091,13 @@ ], "xaxis": "x", "y": [ - 1023954.3214297295, + 1023954.3214297296, 1839525.9950885773, 6387366.710470676, 5448517.7086930275, 3420158.3231887817, 3675436.0024147034, - 1825305.0654945374, + 1825305.0654945376, 995032.6799602509, 313195.4208712578, 204920.7386112213, @@ -5201,7 +5206,7 @@ 1.0495005534866122, -0.9835036991585266, -0.9599693047283234, - -0.038545385074422164 + -0.03854538507442216 ], "yaxis": "y" }, @@ -5303,7 +5308,7 @@ 1.0495005534866122, 0.9835036991585266, 0.9599693047283234, - 0.038545385074422164 + 0.03854538507442216 ], "yaxis": "y" } @@ -6544,7 +6549,7 @@ 1606618.6615459488, 304110.76789712394, 217559.91823041387, - 115103.32235789967, + 115103.32235789968, 49768.71192988932, 21825.629729905864, 27469371.2800173 @@ -6647,7 +6652,7 @@ 0.11225584232612314, 0.0883927652091083, 0.08091431214983678, - -0.029357726530223446, + -0.02935772653022345, -0.18755100767175908, 0.04168935771146878 ], @@ -6749,7 +6754,7 @@ 0.11225584232612314, 0.0883927652091083, 0.08091431214983678, - 0.029357726530223446, + 0.02935772653022345, 0.18755100767175908, 0.04168935771146878 ], diff --git a/docs/validation/constituencies.ipynb b/docs/validation/constituencies.ipynb index 782613f0..0b76f81f 100644 --- a/docs/validation/constituencies.ipynb +++ b/docs/validation/constituencies.ipynb @@ -177,35 +177,72 @@ "\n", "weights_file_path = STORAGE_FOLDER / \"parliamentary_constituency_weights.h5\"\n", "with h5py.File(weights_file_path, \"r\") as f:\n", - " weights = f[str(2025)][...]\n", + " weights = f[str(2025)][...]\n", "\n", "constituencies = pd.read_csv(STORAGE_FOLDER / \"constituencies_2024.csv\")\n", "\n", "baseline = Microsimulation()\n", "household_weights = baseline.calculate(\"household_weight\", 2025).values\n", "\n", - "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import create_constituency_target_matrix, create_national_target_matrix\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import (\n", + " create_constituency_target_matrix,\n", + " create_national_target_matrix,\n", + ")\n", "from policyengine_uk_data.datasets import EnhancedFRS_2022_23\n", - "constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", - "national_target_matrix, national_actuals = create_national_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + "\n", + "constituency_target_matrix, constituency_actuals, _ = (\n", + " create_constituency_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + ")\n", + "national_target_matrix, national_actuals = create_national_target_matrix(\n", + " EnhancedFRS_2022_23, 2025, None\n", + ")\n", "\n", "constituency_wide = weights @ constituency_target_matrix\n", "constituency_wide.index = constituencies.code.values\n", "constituency_wide[\"name\"] = constituencies.name.values\n", "\n", - "constituency_results = pd.melt(constituency_wide.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "constituency_results = pd.melt(\n", + " constituency_wide.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", "constituency_actuals.index = constituencies.code.values\n", "constituency_actuals[\"name\"] = constituencies.name.values\n", - "constituency_actuals_long = pd.melt(constituency_actuals.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "constituency_actuals_long = pd.melt(\n", + " constituency_actuals.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", - "constituency_target_validation = pd.merge(constituency_results, constituency_actuals_long, on=[\"index\", \"variable\"], suffixes=(\"_target\", \"_actual\"))\n", + "constituency_target_validation = pd.merge(\n", + " constituency_results,\n", + " constituency_actuals_long,\n", + " on=[\"index\", \"variable\"],\n", + " suffixes=(\"_target\", \"_actual\"),\n", + ")\n", "constituency_target_validation.drop(\"name_actual\", axis=1, inplace=True)\n", - "constituency_target_validation.columns = [\"index\", \"name\", \"metric\", \"estimate\", \"target\"]\n", + "constituency_target_validation.columns = [\n", + " \"index\",\n", + " \"name\",\n", + " \"metric\",\n", + " \"estimate\",\n", + " \"target\",\n", + "]\n", "\n", - "constituency_target_validation[\"error\"] = constituency_target_validation[\"estimate\"] - constituency_target_validation[\"target\"]\n", - "constituency_target_validation[\"abs_error\"] = constituency_target_validation[\"error\"].abs()\n", - "constituency_target_validation[\"rel_abs_error\"] = constituency_target_validation[\"abs_error\"] / constituency_target_validation[\"target\"]" + "constituency_target_validation[\"error\"] = (\n", + " constituency_target_validation[\"estimate\"]\n", + " - constituency_target_validation[\"target\"]\n", + ")\n", + "constituency_target_validation[\"abs_error\"] = constituency_target_validation[\n", + " \"error\"\n", + "].abs()\n", + "constituency_target_validation[\"rel_abs_error\"] = (\n", + " constituency_target_validation[\"abs_error\"]\n", + " / constituency_target_validation[\"target\"]\n", + ")" ] }, { @@ -398,12 +435,25 @@ "outputs": [], "source": [ "national_performance = household_weights @ national_target_matrix\n", - "national_target_validation = pd.DataFrame({\"metric\": national_performance.index, \"estimate\": national_performance.values})\n", + "national_target_validation = pd.DataFrame(\n", + " {\n", + " \"metric\": national_performance.index,\n", + " \"estimate\": national_performance.values,\n", + " }\n", + ")\n", "national_target_validation[\"target\"] = national_actuals.values\n", "\n", - "national_target_validation[\"error\"] = national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", - "national_target_validation[\"abs_error\"] = national_target_validation[\"error\"].abs()\n", - "national_target_validation[\"rel_abs_error\"] = national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]" + "national_target_validation[\"error\"] = (\n", + " national_target_validation[\"estimate\"]\n", + " - national_target_validation[\"target\"]\n", + ")\n", + "national_target_validation[\"abs_error\"] = national_target_validation[\n", + " \"error\"\n", + "].abs()\n", + "national_target_validation[\"rel_abs_error\"] = (\n", + " national_target_validation[\"abs_error\"]\n", + " / national_target_validation[\"target\"]\n", + ")" ] }, { diff --git a/docs/validation/local_authorities.ipynb b/docs/validation/local_authorities.ipynb index 10998700..37f17d72 100644 --- a/docs/validation/local_authorities.ipynb +++ b/docs/validation/local_authorities.ipynb @@ -196,29 +196,65 @@ "baseline = Microsimulation()\n", "household_weights = baseline.calculate(\"household_weight\", 2025).values\n", "\n", - "from policyengine_uk_data.datasets.frs.local_areas.local_authorities.loss import create_local_authority_target_matrix, create_national_target_matrix\n", + "from policyengine_uk_data.datasets.frs.local_areas.local_authorities.loss import (\n", + " create_local_authority_target_matrix,\n", + " create_national_target_matrix,\n", + ")\n", "from policyengine_uk_data.datasets import EnhancedFRS_2022_23\n", "\n", - "local_authority_target_matrix, local_authority_actuals, _ = create_local_authority_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", - "national_target_matrix, national_actuals = create_national_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + "local_authority_target_matrix, local_authority_actuals, _ = (\n", + " create_local_authority_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + ")\n", + "national_target_matrix, national_actuals = create_national_target_matrix(\n", + " EnhancedFRS_2022_23, 2025, None\n", + ")\n", "\n", "local_authority_wide = weights @ local_authority_target_matrix\n", "local_authority_wide.index = constituencies_2024.code.values\n", "local_authority_wide[\"name\"] = constituencies_2024.name.values\n", "\n", - "local_authority_results = pd.melt(local_authority_wide.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "local_authority_results = pd.melt(\n", + " local_authority_wide.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", "local_authority_actuals.index = constituencies_2024.code.values\n", "local_authority_actuals[\"name\"] = constituencies_2024.name.values\n", - "local_authority_actuals_long = pd.melt(local_authority_actuals.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "local_authority_actuals_long = pd.melt(\n", + " local_authority_actuals.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", - "local_authority_target_validation = pd.merge(local_authority_results, local_authority_actuals_long, on=[\"index\", \"variable\"], suffixes=(\"_target\", \"_actual\"))\n", + "local_authority_target_validation = pd.merge(\n", + " local_authority_results,\n", + " local_authority_actuals_long,\n", + " on=[\"index\", \"variable\"],\n", + " suffixes=(\"_target\", \"_actual\"),\n", + ")\n", "local_authority_target_validation.drop(\"name_actual\", axis=1, inplace=True)\n", - "local_authority_target_validation.columns = [\"index\", \"name\", \"metric\", \"estimate\", \"target\"]\n", + "local_authority_target_validation.columns = [\n", + " \"index\",\n", + " \"name\",\n", + " \"metric\",\n", + " \"estimate\",\n", + " \"target\",\n", + "]\n", "\n", - "local_authority_target_validation[\"error\"] = local_authority_target_validation[\"estimate\"] - local_authority_target_validation[\"target\"]\n", - "local_authority_target_validation[\"abs_error\"] = local_authority_target_validation[\"error\"].abs()\n", - "local_authority_target_validation[\"rel_abs_error\"] = local_authority_target_validation[\"abs_error\"] / local_authority_target_validation[\"target\"]" + "local_authority_target_validation[\"error\"] = (\n", + " local_authority_target_validation[\"estimate\"]\n", + " - local_authority_target_validation[\"target\"]\n", + ")\n", + "local_authority_target_validation[\"abs_error\"] = (\n", + " local_authority_target_validation[\"error\"].abs()\n", + ")\n", + "local_authority_target_validation[\"rel_abs_error\"] = (\n", + " local_authority_target_validation[\"abs_error\"]\n", + " / local_authority_target_validation[\"target\"]\n", + ")" ] }, { @@ -400,12 +436,25 @@ "outputs": [], "source": [ "national_performance = household_weights @ national_target_matrix\n", - "national_target_validation = pd.DataFrame({\"metric\": national_performance.index, \"estimate\": national_performance.values})\n", + "national_target_validation = pd.DataFrame(\n", + " {\n", + " \"metric\": national_performance.index,\n", + " \"estimate\": national_performance.values,\n", + " }\n", + ")\n", "national_target_validation[\"target\"] = national_actuals.values\n", "\n", - "national_target_validation[\"error\"] = national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", - "national_target_validation[\"abs_error\"] = national_target_validation[\"error\"].abs()\n", - "national_target_validation[\"rel_abs_error\"] = national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]" + "national_target_validation[\"error\"] = (\n", + " national_target_validation[\"estimate\"]\n", + " - national_target_validation[\"target\"]\n", + ")\n", + "national_target_validation[\"abs_error\"] = national_target_validation[\n", + " \"error\"\n", + "].abs()\n", + "national_target_validation[\"rel_abs_error\"] = (\n", + " national_target_validation[\"abs_error\"]\n", + " / national_target_validation[\"target\"]\n", + ")" ] }, { diff --git a/docs/validation/national.ipynb b/docs/validation/national.ipynb index 3ade912a..ad9b8e3b 100644 --- a/docs/validation/national.ipynb +++ b/docs/validation/national.ipynb @@ -170,10 +170,12 @@ "import pandas as pd\n", "from itables import init_notebook_mode\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "\n", "init_notebook_mode(all_interactive=True)\n", "\n", + "\n", "def get_validation():\n", " df = pd.DataFrame()\n", " for dataset in [FRS_2022_23, EnhancedFRS_2022_23]:\n", @@ -185,6 +187,7 @@ " df = df.reset_index(drop=True)\n", " return df\n", "\n", + "\n", "df = get_validation()\n", "truth_df = df[df.dataset == df.dataset.unique()[0]].reset_index()\n", "truth_df[\"estimate\"] = truth_df[\"target\"]\n", @@ -617,7 +620,9 @@ " on=[\"time_period\", \"name\"],\n", " suffixes=(\"_frs\", \"_efrs\"),\n", ")\n", - "merged[\"rel_error_change_under_efrs\"] = merged[\"abs_rel_error_efrs\"] - merged[\"abs_rel_error_frs\"]\n", + "merged[\"rel_error_change_under_efrs\"] = (\n", + " merged[\"abs_rel_error_efrs\"] - merged[\"abs_rel_error_frs\"]\n", + ")\n", "# Sort columns\n", "merged = merged[\n", " [\n", diff --git a/policyengine_uk_data/datasets/childcare/takeup_rate.py b/policyengine_uk_data/datasets/childcare/takeup_rate.py index dcb3ae18..9e5031c3 100644 --- a/policyengine_uk_data/datasets/childcare/takeup_rate.py +++ b/policyengine_uk_data/datasets/childcare/takeup_rate.py @@ -161,13 +161,13 @@ def objective(params: list[float]) -> float: print("\nSpending (£ billion):") for key in targets["spending"]: print( - f" {key.upper()}: {spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {spending[key]/targets['spending'][key]:.3f})" + f" {key.upper()}: {spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {spending[key] / targets['spending'][key]:.3f})" ) print("\nCaseload (thousands):") for key in targets["caseload"]: print( - f" {key.upper()}: {caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {caseload[key]/targets['caseload'][key]:.3f})" + f" {key.upper()}: {caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {caseload[key] / targets['caseload'][key]:.3f})" ) return loss @@ -211,11 +211,11 @@ def objective(params: list[float]) -> float: print("\nSpending (£ billion):") for key in targets["spending"]: print( - f" {key.upper()}: {final_spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {final_spending[key]/targets['spending'][key]:.3f})" + f" {key.upper()}: {final_spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {final_spending[key] / targets['spending'][key]:.3f})" ) print("\nCaseload (thousands):") for key in targets["caseload"]: print( - f" {key.upper()}: {final_caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {final_caseload[key]/targets['caseload'][key]:.3f})" + f" {key.upper()}: {final_caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {final_caseload[key] / targets['caseload'][key]:.3f})" ) diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index 34cbdc8b..c726441c 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -262,9 +262,7 @@ def uprate_lcfs_table( household["petrol_spending"] *= fuel_uprating household["diesel_spending"] *= fuel_uprating - cpi = ( - system.parameters.gov.economic_assumptions.indices.obr.consumer_price_index - ) + cpi = system.parameters.gov.economic_assumptions.indices.obr.consumer_price_index cpi_uprating = cpi(time_period) / cpi(start_period) for variable in IMPUTATIONS: diff --git a/policyengine_uk_data/targets/sources/_common.py b/policyengine_uk_data/targets/sources/_common.py index 69fb6988..110bc819 100644 --- a/policyengine_uk_data/targets/sources/_common.py +++ b/policyengine_uk_data/targets/sources/_common.py @@ -9,7 +9,7 @@ HEADERS = { "User-Agent": ( - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" " AppleWebKit/537.36" + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" ), } diff --git a/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py b/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py index 19baa19f..3b3f0446 100644 --- a/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py +++ b/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py @@ -59,9 +59,9 @@ def test_reform_fiscal_impacts( """Test that each reform produces the expected fiscal impact.""" impact = get_fiscal_impact(baseline, enhanced_frs, reform) - assert ( - abs(impact - expected_impact) < tolerance - ), f"Impact for {reform_name} is {impact:.1f} billion, expected {expected_impact:.1f} billion" + assert abs(impact - expected_impact) < tolerance, ( + f"Impact for {reform_name} is {impact:.1f} billion, expected {expected_impact:.1f} billion" + ) def test_config_file_exists(): @@ -77,11 +77,11 @@ def test_all_reforms_have_required_fields(): for i, reform in enumerate(reforms_data): for field in required_fields: - assert ( - field in reform - ), f"Reform {i} missing required field: {field}" + assert field in reform, ( + f"Reform {i} missing required field: {field}" + ) - assert isinstance( - reform["parameters"], dict - ), f"Reform {i} parameters must be a dictionary" + assert isinstance(reform["parameters"], dict), ( + f"Reform {i} parameters must be a dictionary" + ) assert len(reform["parameters"]) > 0, f"Reform {i} has no parameters" diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index 8a76d37f..10a5fb2c 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -14,6 +14,6 @@ def test_aggregates(baseline, variable: str): variable, map_to="household", period=2025 ).sum() - assert ( - abs(estimate / AGGREGATES[variable] - 1) < 0.7 - ), f"Expected {AGGREGATES[variable]/1e9:.1f} billion for {variable}, got {estimate/1e9:.1f} billion (relative error = {abs(estimate / AGGREGATES[variable] - 1):.1%})." + assert abs(estimate / AGGREGATES[variable] - 1) < 0.7, ( + f"Expected {AGGREGATES[variable] / 1e9:.1f} billion for {variable}, got {estimate / 1e9:.1f} billion (relative error = {abs(estimate / AGGREGATES[variable] - 1):.1%})." + ) diff --git a/policyengine_uk_data/tests/test_child_limit.py b/policyengine_uk_data/tests/test_child_limit.py index e0103982..446ed4c4 100644 --- a/policyengine_uk_data/tests/test_child_limit.py +++ b/policyengine_uk_data/tests/test_child_limit.py @@ -35,9 +35,9 @@ def test_child_limit(baseline): 440e3 * UPRATING_24_25 ) # Expected number of affected households - assert ( - abs(children_affected / child_target - 1) < 0.3 - ), f"Expected {child_target/1e6:.1f} million affected children, got {children_affected/1e6:.1f} million." - assert ( - abs(households_affected / household_target - 1) < 0.3 - ), f"Expected {household_target/1e3:.0f} thousand affected households, got {households_affected/1e3:.0f} thousand." + assert abs(children_affected / child_target - 1) < 0.3, ( + f"Expected {child_target / 1e6:.1f} million affected children, got {children_affected / 1e6:.1f} million." + ) + assert abs(households_affected / household_target - 1) < 0.3, ( + f"Expected {household_target / 1e3:.0f} thousand affected households, got {households_affected / 1e3:.0f} thousand." + ) diff --git a/policyengine_uk_data/tests/test_low_income_deciles.py b/policyengine_uk_data/tests/test_low_income_deciles.py index 189ac071..c093ddf6 100644 --- a/policyengine_uk_data/tests/test_low_income_deciles.py +++ b/policyengine_uk_data/tests/test_low_income_deciles.py @@ -42,8 +42,8 @@ def test_first_decile_tax_rate_reasonable(baseline): d1_tax_rate = d1_tax / d1_market assert d1_tax_rate < 1.75, ( f"First decile tax rate is {d1_tax_rate:.0%}, which exceeds 175%. " - f"Total D1 tax: £{d1_tax/1e9:.1f}bn, " - f"Total D1 market income: £{d1_market/1e9:.1f}bn. " + f"Total D1 tax: £{d1_tax / 1e9:.1f}bn, " + f"Total D1 market income: £{d1_market / 1e9:.1f}bn. " "This likely indicates a bug in property_purchased or similar variable." ) diff --git a/policyengine_uk_data/tests/test_non_negative_incomes.py b/policyengine_uk_data/tests/test_non_negative_incomes.py index 4d9f671a..633305e8 100644 --- a/policyengine_uk_data/tests/test_non_negative_incomes.py +++ b/policyengine_uk_data/tests/test_non_negative_incomes.py @@ -18,6 +18,6 @@ def test_income_non_negative(frs, variable: str): """Test that income variables have no negative values.""" values = frs.person[variable] min_value = values.min() - assert ( - min_value >= 0 - ), f"{variable} has negative values (min = {min_value:.2f})" + assert min_value >= 0, ( + f"{variable} has negative values (min = {min_value:.2f})" + ) diff --git a/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py b/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py index 57c5edf8..3f5c2ba3 100644 --- a/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py +++ b/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py @@ -5,20 +5,20 @@ def test_pension_contributions_via_salary_sacrifice(baseline): ) # Basic validation: all values should be non-negative - assert ( - values >= 0 - ).all(), "Salary sacrifice pension contributions must be non-negative" + assert (values >= 0).all(), ( + "Salary sacrifice pension contributions must be non-negative" + ) # Should have some non-zero values (not everyone uses salary sacrifice, but some do) total = values.sum() - assert ( - total > 0 - ), f"Expected some salary sacrifice contributions, got {total}" + assert total > 0, ( + f"Expected some salary sacrifice contributions, got {total}" + ) # Reasonableness check: total should be less than total employment income # This is a very loose check just to catch major issues employment_income = baseline.calculate("employment_income", period=2025) total_employment = employment_income.sum() - assert ( - total < total_employment - ), f"Salary sacrifice contributions ({total/1e9:.1f}B) cannot exceed total employment income ({total_employment/1e9:.1f}B)" + assert total < total_employment, ( + f"Salary sacrifice contributions ({total / 1e9:.1f}B) cannot exceed total employment income ({total_employment / 1e9:.1f}B)" + ) diff --git a/policyengine_uk_data/tests/test_population.py b/policyengine_uk_data/tests/test_population.py index 321c377c..43645791 100644 --- a/policyengine_uk_data/tests/test_population.py +++ b/policyengine_uk_data/tests/test_population.py @@ -2,6 +2,6 @@ def test_population(baseline): population = baseline.calculate("people", 2025).sum() / 1e6 POPULATION_TARGET = 69.5 # Expected UK population in millions, per ONS 2022-based estimate here: https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationprojections/bulletins/nationalpopulationprojections/2022based # Tolerance temporarily relaxed to 7% due to calibration inflation issue #217 - assert ( - abs(population / POPULATION_TARGET - 1) < 0.07 - ), f"Expected UK population of {POPULATION_TARGET:.1f} million, got {population:.1f} million." + assert abs(population / POPULATION_TARGET - 1) < 0.07, ( + f"Expected UK population of {POPULATION_TARGET:.1f} million, got {population:.1f} million." + ) diff --git a/policyengine_uk_data/tests/test_property_purchased.py b/policyengine_uk_data/tests/test_property_purchased.py index 709e86bf..0c742180 100644 --- a/policyengine_uk_data/tests/test_property_purchased.py +++ b/policyengine_uk_data/tests/test_property_purchased.py @@ -34,9 +34,9 @@ def test_property_purchased_rate(baseline): target_rate = PROPERTY_PURCHASE_RATE tolerance = 0.02 - assert ( - abs(actual_rate - target_rate) < tolerance - ), f"property_purchased rate {actual_rate:.2%} is not close to target {target_rate:.2%}" + assert abs(actual_rate - target_rate) < tolerance, ( + f"property_purchased rate {actual_rate:.2%} is not close to target {target_rate:.2%}" + ) def test_property_purchased_not_all_true(baseline): @@ -47,9 +47,9 @@ def test_property_purchased_not_all_true(baseline): n_households = len(property_purchased) # Should NOT be 100% True (the bug we fixed) - assert ( - true_count < n_households * 0.1 - ), f"Too many households have property_purchased=True ({true_count}/{n_households})" + assert true_count < n_households * 0.1, ( + f"Too many households have property_purchased=True ({true_count}/{n_households})" + ) def test_property_purchased_has_some_true(baseline): @@ -80,13 +80,13 @@ def test_sdlt_total_reasonable(baseline): max_sdlt = 50e9 # £50bn maximum (official is ~£14bn) assert total_sdlt > min_sdlt, ( - f"Total SDLT £{total_sdlt/1e9:.1f}bn is too low " - f"(minimum expected: £{min_sdlt/1e9:.1f}bn)" + f"Total SDLT £{total_sdlt / 1e9:.1f}bn is too low " + f"(minimum expected: £{min_sdlt / 1e9:.1f}bn)" ) assert total_sdlt < max_sdlt, ( - f"Total SDLT £{total_sdlt/1e9:.1f}bn is unrealistically high " - f"(maximum expected: £{max_sdlt/1e9:.1f}bn). " + f"Total SDLT £{total_sdlt / 1e9:.1f}bn is unrealistically high " + f"(maximum expected: £{max_sdlt / 1e9:.1f}bn). " f"Official SDLT is ~£14bn. " "This suggests property_purchased may be incorrectly set to True for all households." ) diff --git a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py index af0fdfd4..09f3ab25 100644 --- a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py +++ b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py @@ -23,8 +23,8 @@ def test_salary_sacrifice_total_users(baseline): TARGET = 7_700_000 assert abs(total_users / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1e6:.1f}mn SS users, " - f"got {total_users/1e6:.1f}mn ({total_users/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1e6:.1f}mn SS users, " + f"got {total_users / 1e6:.1f}mn ({total_users / TARGET * 100:.0f}% of target)" ) @@ -44,8 +44,8 @@ def test_salary_sacrifice_below_cap_users(baseline): TARGET = 4_300_000 assert abs(total_below_cap / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1e6:.1f}mn below-cap SS users, " - f"got {total_below_cap/1e6:.1f}mn ({total_below_cap/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1e6:.1f}mn below-cap SS users, " + f"got {total_below_cap / 1e6:.1f}mn ({total_below_cap / TARGET * 100:.0f}% of target)" ) @@ -65,6 +65,6 @@ def test_salary_sacrifice_above_cap_users(baseline): TARGET = 3_300_000 assert abs(total_above_cap / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1e6:.1f}mn above-cap SS users, " - f"got {total_above_cap/1e6:.1f}mn ({total_above_cap/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1e6:.1f}mn above-cap SS users, " + f"got {total_above_cap / 1e6:.1f}mn ({total_above_cap / TARGET * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/tests/test_scotland_babies.py b/policyengine_uk_data/tests/test_scotland_babies.py index a6674018..b5eba325 100644 --- a/policyengine_uk_data/tests/test_scotland_babies.py +++ b/policyengine_uk_data/tests/test_scotland_babies.py @@ -30,6 +30,6 @@ def test_scotland_babies_under_1(baseline): TOLERANCE = 0.15 # 15% tolerance assert abs(total_babies / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1000:.0f}k babies under 1 in Scotland, " - f"got {total_babies/1000:.0f}k ({total_babies/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1000:.0f}k babies under 1 in Scotland, " + f"got {total_babies / 1000:.0f}k ({total_babies / TARGET * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/tests/test_scotland_uc_babies.py b/policyengine_uk_data/tests/test_scotland_uc_babies.py index 36246b48..85751cca 100644 --- a/policyengine_uk_data/tests/test_scotland_uc_babies.py +++ b/policyengine_uk_data/tests/test_scotland_uc_babies.py @@ -44,6 +44,6 @@ def test_scotland_uc_households_child_under_1(baseline): TOLERANCE = 0.15 # 15% tolerance assert abs(total / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1000:.0f}k UC households with child under 1 in Scotland, " - f"got {total/1000:.0f}k ({total/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1000:.0f}k UC households with child under 1 in Scotland, " + f"got {total / 1000:.0f}k ({total / TARGET * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/tests/test_target_registry.py b/policyengine_uk_data/tests/test_target_registry.py index ccc49e00..c6f78bdd 100644 --- a/policyengine_uk_data/tests/test_target_registry.py +++ b/policyengine_uk_data/tests/test_target_registry.py @@ -52,9 +52,9 @@ def test_hmrc_spi_targets_exist(): targets = get_all_targets(year=2025) spi_targets = [t for t in targets if t.source == "hmrc_spi"] # 13 bands × 6 income types × 2 (count + amount) = 156 per year - assert ( - len(spi_targets) >= 100 - ), f"Expected 100+ SPI targets, got {len(spi_targets)}" + assert len(spi_targets) >= 100, ( + f"Expected 100+ SPI targets, got {len(spi_targets)}" + ) def test_dwp_pip_targets(): @@ -76,9 +76,9 @@ def test_voa_council_tax_targets(): def test_core_target_count(): """Total target count should be substantial.""" targets = get_all_targets(year=2025) - assert ( - len(targets) >= 200 - ), f"Expected 200+ targets for 2025, got {len(targets)}" + assert len(targets) >= 200, ( + f"Expected 200+ targets for 2025, got {len(targets)}" + ) def test_two_child_limit_targets(): diff --git a/policyengine_uk_data/tests/test_uc_by_children.py b/policyengine_uk_data/tests/test_uc_by_children.py index 3878d99b..649e0fb3 100644 --- a/policyengine_uk_data/tests/test_uc_by_children.py +++ b/policyengine_uk_data/tests/test_uc_by_children.py @@ -55,6 +55,6 @@ def test_uc_households_by_children(baseline, bucket, target): actual = (household_weight * match).sum() assert abs(actual / target - 1) < TOLERANCE, ( - f"UC households with {bucket}: expected {target/1e3:.0f}k, " - f"got {actual/1e3:.0f}k ({actual/target*100:.0f}% of target)" + f"UC households with {bucket}: expected {target / 1e3:.0f}k, " + f"got {actual / 1e3:.0f}k ({actual / target * 100:.0f}% of target)" ) diff --git a/pyproject.toml b/pyproject.toml index 72df5b5a..73200a77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "policyengine-uk>=2.43.5", "microcalibrate>=0.18.0", "microimpute>=1.0.1", - "black>=25.1.0", + "ruff>=0.9.0", "rich>=13.0.0", "odfpy", "pandas", @@ -35,7 +35,7 @@ dependencies = [ [project.optional-dependencies] dev = [ - "black", + "ruff>=0.9.0", "pytest", "torch", "tables", @@ -70,23 +70,8 @@ filterwarnings = [ "ignore::PendingDeprecationWarning", ] -[tool.black] +[tool.ruff] line-length = 79 -target-version = ['py311'] -include = '\.pyi?$' -extend-exclude = ''' -/( - # directories - \.eggs - | \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | build - | dist -)/ -''' [tool.towncrier] package = "policyengine_uk_data" diff --git a/uv.lock b/uv.lock index 8c63a74e..9e03803a 100644 --- a/uv.lock +++ b/uv.lock @@ -99,31 +99,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, ] -[[package]] -name = "black" -version = "25.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, - { name = "pytokens" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8c/ad/33adf4708633d047950ff2dfdea2e215d84ac50ef95aff14a614e4b6e9b2/black-25.11.0.tar.gz", hash = "sha256:9a323ac32f5dc75ce7470501b887250be5005a01602e931a15e45593f70f6e08", size = 655669, upload-time = "2025-11-10T01:53:50.558Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/47/3378d6a2ddefe18553d1115e36aea98f4a90de53b6a3017ed861ba1bd3bc/black-25.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0a1d40348b6621cc20d3d7530a5b8d67e9714906dfd7346338249ad9c6cedf2b", size = 1772446, upload-time = "2025-11-10T02:02:16.181Z" }, - { url = "https://files.pythonhosted.org/packages/ba/4b/0f00bfb3d1f7e05e25bfc7c363f54dc523bb6ba502f98f4ad3acf01ab2e4/black-25.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:51c65d7d60bb25429ea2bf0731c32b2a2442eb4bd3b2afcb47830f0b13e58bfd", size = 1607983, upload-time = "2025-11-10T02:02:52.502Z" }, - { url = "https://files.pythonhosted.org/packages/99/fe/49b0768f8c9ae57eb74cc10a1f87b4c70453551d8ad498959721cc345cb7/black-25.11.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:936c4dd07669269f40b497440159a221ee435e3fddcf668e0c05244a9be71993", size = 1682481, upload-time = "2025-11-10T01:57:12.35Z" }, - { url = "https://files.pythonhosted.org/packages/55/17/7e10ff1267bfa950cc16f0a411d457cdff79678fbb77a6c73b73a5317904/black-25.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:f42c0ea7f59994490f4dccd64e6b2dd49ac57c7c84f38b8faab50f8759db245c", size = 1363869, upload-time = "2025-11-10T01:58:24.608Z" }, - { url = "https://files.pythonhosted.org/packages/67/c0/cc865ce594d09e4cd4dfca5e11994ebb51604328489f3ca3ae7bb38a7db5/black-25.11.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:35690a383f22dd3e468c85dc4b915217f87667ad9cce781d7b42678ce63c4170", size = 1771358, upload-time = "2025-11-10T02:03:33.331Z" }, - { url = "https://files.pythonhosted.org/packages/37/77/4297114d9e2fd2fc8ab0ab87192643cd49409eb059e2940391e7d2340e57/black-25.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:dae49ef7369c6caa1a1833fd5efb7c3024bb7e4499bf64833f65ad27791b1545", size = 1612902, upload-time = "2025-11-10T01:59:33.382Z" }, - { url = "https://files.pythonhosted.org/packages/de/63/d45ef97ada84111e330b2b2d45e1dd163e90bd116f00ac55927fb6bf8adb/black-25.11.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bd4a22a0b37401c8e492e994bce79e614f91b14d9ea911f44f36e262195fdda", size = 1680571, upload-time = "2025-11-10T01:57:04.239Z" }, - { url = "https://files.pythonhosted.org/packages/ff/4b/5604710d61cdff613584028b4cb4607e56e148801ed9b38ee7970799dab6/black-25.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:aa211411e94fdf86519996b7f5f05e71ba34835d8f0c0f03c00a26271da02664", size = 1382599, upload-time = "2025-11-10T01:57:57.427Z" }, - { url = "https://files.pythonhosted.org/packages/00/5d/aed32636ed30a6e7f9efd6ad14e2a0b0d687ae7c8c7ec4e4a557174b895c/black-25.11.0-py3-none-any.whl", hash = "sha256:e3f562da087791e96cefcd9dda058380a442ab322a02e222add53736451f604b", size = 204918, upload-time = "2025-11-10T01:53:48.917Z" }, -] - [[package]] name = "blosc2" version = "3.11.1" @@ -471,6 +446,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -481,6 +457,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, @@ -914,15 +891,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/f2/08ace4142eb281c12701fc3b93a10795e4d4dc7f753911d836675050f886/msgpack-1.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d99ef64f349d5ec3293688e91486c5fdb925ed03807f64d98d205d2713c60b46", size = 70868, upload-time = "2025-10-08T09:15:44.959Z" }, ] -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, -] - [[package]] name = "ndindex" version = "1.10.1" @@ -1283,15 +1251,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" }, ] -[[package]] -name = "pathspec" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, -] - [[package]] name = "patsy" version = "1.0.2" @@ -1407,10 +1366,9 @@ wheels = [ [[package]] name = "policyengine-uk-data" -version = "1.35.0" +version = "1.40.3" source = { editable = "." } dependencies = [ - { name = "black" }, { name = "google-auth" }, { name = "google-cloud-storage" }, { name = "huggingface-hub" }, @@ -1422,29 +1380,31 @@ dependencies = [ { name = "policyengine" }, { name = "policyengine-core" }, { name = "policyengine-uk" }, + { name = "pydantic" }, + { name = "pyyaml" }, { name = "requests" }, { name = "rich" }, + { name = "ruff" }, { name = "tabulate" }, { name = "tqdm" }, ] [package.optional-dependencies] dev = [ - { name = "black" }, { name = "build" }, { name = "furo" }, { name = "itables" }, { name = "pytest" }, { name = "quantile-forest" }, + { name = "ruff" }, { name = "tables" }, { name = "torch" }, + { name = "towncrier" }, { name = "yaml-changelog" }, ] [package.metadata] requires-dist = [ - { name = "black", specifier = ">=25.1.0" }, - { name = "black", marker = "extra == 'dev'" }, { name = "build", marker = "extra == 'dev'" }, { name = "furo", marker = "extra == 'dev'" }, { name = "google-auth" }, @@ -1459,13 +1419,18 @@ requires-dist = [ { name = "policyengine" }, { name = "policyengine-core", specifier = ">=3.19.4" }, { name = "policyengine-uk", specifier = ">=2.43.5" }, + { name = "pydantic", specifier = ">=2.0" }, { name = "pytest", marker = "extra == 'dev'" }, + { name = "pyyaml" }, { name = "quantile-forest", marker = "extra == 'dev'" }, { name = "requests" }, { name = "rich", specifier = ">=13.0.0" }, + { name = "ruff", specifier = ">=0.9.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" }, { name = "tables", marker = "extra == 'dev'" }, { name = "tabulate" }, { name = "torch", marker = "extra == 'dev'" }, + { name = "towncrier", marker = "extra == 'dev'", specifier = ">=24.8.0" }, { name = "tqdm" }, { name = "yaml-changelog", marker = "extra == 'dev'", specifier = ">=0.1.7" }, ] @@ -1687,15 +1652,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] -[[package]] -name = "pytokens" -version = "0.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4e/8d/a762be14dae1c3bf280202ba3172020b2b0b4c537f94427435f19c413b72/pytokens-0.3.0.tar.gz", hash = "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", size = 17644, upload-time = "2025-11-05T13:36:35.34Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/25/d9db8be44e205a124f6c98bc0324b2bb149b7431c53877fc6d1038dddaf5/pytokens-0.3.0-py3-none-any.whl", hash = "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3", size = 12195, upload-time = "2025-11-05T13:36:33.183Z" }, -] - [[package]] name = "pytz" version = "2025.2" @@ -1822,6 +1778,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] +[[package]] +name = "ruff" +version = "0.15.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" }, + { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" }, + { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" }, + { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" }, + { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" }, + { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" }, + { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" }, + { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" }, + { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" }, + { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" }, + { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" }, + { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" }, + { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" }, + { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" }, + { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" }, +] + [[package]] name = "scikit-learn" version = "1.7.2" @@ -2238,6 +2219,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/2b/f7818f6ec88758dfd21da46b6cd46af9d1b3433e53ddbb19ad1e0da17f9b/torch-2.9.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c88d3299ddeb2b35dcc31753305612db485ab6f1823e37fb29451c8b2732b87e", size = 111163659, upload-time = "2025-11-12T15:23:20.009Z" }, ] +[[package]] +name = "towncrier" +version = "25.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "jinja2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/eb/5bf25a34123698d3bbab39c5bc5375f8f8bcbcc5a136964ade66935b8b9d/towncrier-25.8.0.tar.gz", hash = "sha256:eef16d29f831ad57abb3ae32a0565739866219f1ebfbdd297d32894eb9940eb1", size = 76322, upload-time = "2025-08-30T11:41:55.393Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/06/8ba22ec32c74ac1be3baa26116e3c28bc0e76a5387476921d20b6fdade11/towncrier-25.8.0-py3-none-any.whl", hash = "sha256:b953d133d98f9aeae9084b56a3563fd2519dfc6ec33f61c9cd2c61ff243fb513", size = 65101, upload-time = "2025-08-30T11:41:53.644Z" }, +] + [[package]] name = "tqdm" version = "4.67.1" From cd75d30ea73d7421a1de02788aef7a04f0143101 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Thu, 5 Mar 2026 22:06:18 -0500 Subject: [PATCH 2/2] Use ruff default line-length (88) instead of 79 Remove the explicit line-length = 79 setting from [tool.ruff] in pyproject.toml and reformat all files with ruff's default of 88. Co-Authored-By: Claude Opus 4.6 --- .github/bump_version.py | 4 +- docs/methodology.ipynb | 16 +- docs/validation/constituencies.ipynb | 14 +- docs/validation/local_authorities.ipynb | 16 +- .../datasets/childcare/takeup_rate.py | 19 +-- .../datasets/create_datasets.py | 4 +- policyengine_uk_data/datasets/frs.py | 148 +++++------------- .../datasets/imputations/capital_gains.py | 16 +- .../datasets/imputations/consumption.py | 22 +-- .../datasets/imputations/salary_sacrifice.py | 28 +--- .../datasets/imputations/services/etb.py | 8 +- .../datasets/imputations/services/nhs.py | 14 +- .../datasets/imputations/services/services.py | 4 +- .../boundary_changes/mapping_matrix.py | 4 +- .../local_areas/constituencies/calibrate.py | 16 +- .../local_areas/constituencies/loss.py | 16 +- .../targets/create_employment_incomes.py | 43 ++--- .../targets/fill_missing_age_demographics.py | 4 +- .../local_authorities/calibrate.py | 22 +-- .../local_areas/local_authorities/loss.py | 22 +-- .../targets/create_employment_incomes.py | 54 ++----- policyengine_uk_data/datasets/spi.py | 13 +- policyengine_uk_data/parameters/__init__.py | 8 +- .../targets/build_loss_matrix.py | 16 +- .../targets/compute/benefits.py | 32 +--- .../targets/compute/demographics.py | 10 +- .../targets/compute/households.py | 26 ++- .../targets/compute/income.py | 20 +-- policyengine_uk_data/targets/registry.py | 4 +- .../targets/sources/hmrc_salary_sacrifice.py | 11 +- .../targets/sources/hmrc_spi.py | 8 +- .../targets/sources/local_age.py | 12 +- .../targets/sources/local_income.py | 12 +- .../targets/sources/local_la_extras.py | 4 +- policyengine_uk_data/targets/sources/obr.py | 25 +-- .../targets/sources/ons_demographics.py | 8 +- .../targets/sources/ons_savings.py | 8 +- .../microsimulation/test_reform_impacts.py | 4 +- .../microsimulation/update_reform_impacts.py | 12 +- policyengine_uk_data/tests/test_aggregates.py | 4 +- .../tests/test_child_limit.py | 21 +-- policyengine_uk_data/tests/test_childcare.py | 24 +-- .../tests/test_non_negative_incomes.py | 4 +- ...sion_contributions_via_salary_sacrifice.py | 4 +- .../tests/test_scotland_uc_babies.py | 8 +- .../tests/test_target_registry.py | 8 +- .../tests/test_uc_by_children.py | 8 +- .../tests/test_vehicle_ownership.py | 8 +- policyengine_uk_data/utils/calibrate.py | 27 +--- policyengine_uk_data/utils/data_upload.py | 8 +- policyengine_uk_data/utils/datasets.py | 20 +-- policyengine_uk_data/utils/huggingface.py | 4 +- .../utils/incomes_projection.py | 30 ++-- policyengine_uk_data/utils/loss.py | 4 +- policyengine_uk_data/utils/progress.py | 12 +- policyengine_uk_data/utils/qrf.py | 4 +- policyengine_uk_data/utils/spi.py | 8 +- policyengine_uk_data/utils/stack.py | 4 +- policyengine_uk_data/utils/uc_data.py | 12 +- pyproject.toml | 3 - 60 files changed, 261 insertions(+), 691 deletions(-) diff --git a/.github/bump_version.py b/.github/bump_version.py index bb0fd6dd..779a82e3 100644 --- a/.github/bump_version.py +++ b/.github/bump_version.py @@ -19,9 +19,7 @@ def get_current_version(pyproject_path: Path) -> str: def infer_bump(changelog_dir: Path) -> str: fragments = [ - f - for f in changelog_dir.iterdir() - if f.is_file() and f.name != ".gitkeep" + f for f in changelog_dir.iterdir() if f.is_file() and f.name != ".gitkeep" ] if not fragments: print("No changelog fragments found", file=sys.stderr) diff --git a/docs/methodology.ipynb b/docs/methodology.ipynb index d580a6d4..892371e3 100644 --- a/docs/methodology.ipynb +++ b/docs/methodology.ipynb @@ -2913,9 +2913,7 @@ "\n", "variable = \"employment_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", @@ -4120,9 +4118,7 @@ "source": [ "variable = \"dividend_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", @@ -6179,9 +6175,7 @@ "\n", "variable = \"employment_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", @@ -7627,9 +7621,7 @@ "\n", "variable = \"employment_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", diff --git a/docs/validation/constituencies.ipynb b/docs/validation/constituencies.ipynb index 0b76f81f..8160e553 100644 --- a/docs/validation/constituencies.ipynb +++ b/docs/validation/constituencies.ipynb @@ -190,8 +190,8 @@ ")\n", "from policyengine_uk_data.datasets import EnhancedFRS_2022_23\n", "\n", - "constituency_target_matrix, constituency_actuals, _ = (\n", - " create_constituency_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + "constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix(\n", + " EnhancedFRS_2022_23, 2025, None\n", ")\n", "national_target_matrix, national_actuals = create_national_target_matrix(\n", " EnhancedFRS_2022_23, 2025, None\n", @@ -444,15 +444,11 @@ "national_target_validation[\"target\"] = national_actuals.values\n", "\n", "national_target_validation[\"error\"] = (\n", - " national_target_validation[\"estimate\"]\n", - " - national_target_validation[\"target\"]\n", + " national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", ")\n", - "national_target_validation[\"abs_error\"] = national_target_validation[\n", - " \"error\"\n", - "].abs()\n", + "national_target_validation[\"abs_error\"] = national_target_validation[\"error\"].abs()\n", "national_target_validation[\"rel_abs_error\"] = (\n", - " national_target_validation[\"abs_error\"]\n", - " / national_target_validation[\"target\"]\n", + " national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]\n", ")" ] }, diff --git a/docs/validation/local_authorities.ipynb b/docs/validation/local_authorities.ipynb index 37f17d72..7d972bc1 100644 --- a/docs/validation/local_authorities.ipynb +++ b/docs/validation/local_authorities.ipynb @@ -248,9 +248,9 @@ " local_authority_target_validation[\"estimate\"]\n", " - local_authority_target_validation[\"target\"]\n", ")\n", - "local_authority_target_validation[\"abs_error\"] = (\n", - " local_authority_target_validation[\"error\"].abs()\n", - ")\n", + "local_authority_target_validation[\"abs_error\"] = local_authority_target_validation[\n", + " \"error\"\n", + "].abs()\n", "local_authority_target_validation[\"rel_abs_error\"] = (\n", " local_authority_target_validation[\"abs_error\"]\n", " / local_authority_target_validation[\"target\"]\n", @@ -445,15 +445,11 @@ "national_target_validation[\"target\"] = national_actuals.values\n", "\n", "national_target_validation[\"error\"] = (\n", - " national_target_validation[\"estimate\"]\n", - " - national_target_validation[\"target\"]\n", + " national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", ")\n", - "national_target_validation[\"abs_error\"] = national_target_validation[\n", - " \"error\"\n", - "].abs()\n", + "national_target_validation[\"abs_error\"] = national_target_validation[\"error\"].abs()\n", "national_target_validation[\"rel_abs_error\"] = (\n", - " national_target_validation[\"abs_error\"]\n", - " / national_target_validation[\"target\"]\n", + " national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]\n", ")" ] }, diff --git a/policyengine_uk_data/datasets/childcare/takeup_rate.py b/policyengine_uk_data/datasets/childcare/takeup_rate.py index 9e5031c3..db643e93 100644 --- a/policyengine_uk_data/datasets/childcare/takeup_rate.py +++ b/policyengine_uk_data/datasets/childcare/takeup_rate.py @@ -59,9 +59,7 @@ def simulate_childcare_programs( np.random.seed(seed) # Take-up flags - sim.set_input( - "would_claim_tfc", 2024, np.random.random(benunit_count) < tfc - ) + sim.set_input("would_claim_tfc", 2024, np.random.random(benunit_count) < tfc) sim.set_input( "would_claim_extended_childcare", 2024, @@ -87,9 +85,7 @@ def simulate_childcare_programs( extended_hours_values = np.clip(extended_hours_values, 0, 30) # Set the maximum extended childcare hours usage variable - sim.set_input( - "maximum_extended_childcare_hours_usage", 2024, extended_hours_values - ) + sim.set_input("maximum_extended_childcare_hours_usage", 2024, extended_hours_values) # Calculate outputs df = sim.calculate_dataframe( @@ -114,14 +110,9 @@ def simulate_childcare_programs( spending = { "tfc": sim.calculate("tax_free_childcare", 2024).sum() / 1e9, - "extended": sim.calculate("extended_childcare_entitlement", 2024).sum() - / 1e9, - "targeted": sim.calculate("targeted_childcare_entitlement", 2024).sum() - / 1e9, - "universal": sim.calculate( - "universal_childcare_entitlement", 2024 - ).sum() - / 1e9, + "extended": sim.calculate("extended_childcare_entitlement", 2024).sum() / 1e9, + "targeted": sim.calculate("targeted_childcare_entitlement", 2024).sum() / 1e9, + "universal": sim.calculate("universal_childcare_entitlement", 2024).sum() / 1e9, } caseload = { diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py index 07f6b362..2396946e 100644 --- a/policyengine_uk_data/datasets/create_datasets.py +++ b/policyengine_uk_data/datasets/create_datasets.py @@ -165,9 +165,7 @@ def main(): # Downrate and save update_dataset("Downrate to 2023", "processing") - frs_calibrated = uprate_dataset( - frs_calibrated_constituencies, 2023 - ) + frs_calibrated = uprate_dataset(frs_calibrated_constituencies, 2023) update_dataset("Downrate to 2023", "completed") update_dataset("Save final dataset", "processing") diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index 37ce70dc..7cc05cd3 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -74,23 +74,17 @@ def create_frs( if "benunit" in df.columns: # In the tables, benunit is the index of the benefit unit *within* the household. df.rename(columns={"benunit": "benunit_id"}, inplace=True) - df["benunit_id"] = ( - df["household_id"] * 1e2 + df["benunit_id"] - ).astype(int) + df["benunit_id"] = (df["household_id"] * 1e2 + df["benunit_id"]).astype(int) if "person" in df.columns: df.rename(columns={"person": "person_id"}, inplace=True) - df["person_id"] = ( - df["household_id"] * 1e3 + df["person_id"] - ).astype(int) + df["person_id"] = (df["household_id"] * 1e3 + df["person_id"]).astype(int) frs[table_name] = df # Combine adult and child tables for convenience - frs["person"] = ( - pd.concat([frs["adult"], frs["child"]]).sort_index().fillna(0) - ) + frs["person"] = pd.concat([frs["adult"], frs["child"]]).sort_index().fillna(0) person = frs["person"] benunit = frs["benunit"] @@ -165,12 +159,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): elif ( typeed2_val in (2, 4) or (typeed2_val in (3, 8) and age_val < 11) - or ( - typeed2_val == 0 - and fted_val == 1 - and age_val > 5 - and age_val < 11 - ) + or (typeed2_val == 0 and fted_val == 1 and age_val > 5 and age_val < 11) ): return "PRIMARY" # In lower secondary @@ -191,19 +180,14 @@ def determine_education_level(fted_val, typeed2_val, age_val): elif typeed2_val in (7, 8) and age_val >= 19: return "POST_SECONDARY" # In tertiary - elif typeed2_val == 9 or ( - typeed2_val == 0 and fted_val == 1 and age_val >= 19 - ): + elif typeed2_val == 9 or (typeed2_val == 0 and fted_val == 1 and age_val >= 19): return "TERTIARY" else: return "NOT_IN_EDUCATION" # Apply the function to determine education level pe_person["current_education"] = pd.Series( - [ - determine_education_level(f, t, a) - for f, t, a in zip(fted, typeed2, age) - ], + [determine_education_level(f, t, a) for f, t, a in zip(fted, typeed2, age)], index=pe_person.index, ) @@ -326,9 +310,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ctannual = household.ctannual[CT_valid] # Build the table - ct_mean = ctannual.groupby( - [region, band, single_person], dropna=False - ).mean() + ct_mean = ctannual.groupby([region, band, single_person], dropna=False).mean() ct_mean = ct_mean.replace(-1, ct_mean.mean()) # For every household consult the table to find the imputed @@ -357,9 +339,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): BANDS = ["A", "B", "C", "D", "E", "F", "G", "H", "I"] # Band 1 is the most common pe_household["council_tax_band"] = ( - categorical(household.ctband, 1, range(1, 10), BANDS) - .fillna("D") - .values + categorical(household.ctband, 1, range(1, 10), BANDS).fillna("D").values ) # Domestic rates variables are all weeklyised, unlike Council Tax variables (despite the variable name suggesting otherwise) if year < 2021: @@ -384,9 +364,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): WEEKS_IN_YEAR = 365.25 / 7 - pe_person["employment_income"] = ( - np.maximum(0, person.inearns) * WEEKS_IN_YEAR - ) + pe_person["employment_income"] = np.maximum(0, person.inearns) * WEEKS_IN_YEAR pension_payment = sum_to_entity( pension.penpay * (pension.penpay > 0), @@ -400,10 +378,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pension_deductions_removed = sum_to_entity( pension.poamt - * ( - ((pension.poinc == 2) | (pension.penoth == 1)) - & (pension.poamt > 0) - ), + * (((pension.poinc == 2) | (pension.penoth == 1)) & (pension.poamt > 0)), pension.person_id, person.person_id, ) @@ -412,9 +387,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pension_payment + pension_tax_paid + pension_deductions_removed ) * WEEKS_IN_YEAR - pe_person["self_employment_income"] = ( - np.maximum(0, person.seincam2) * WEEKS_IN_YEAR - ) + pe_person["self_employment_income"] = np.maximum(0, person.seincam2) * WEEKS_IN_YEAR INVERTED_BASIC_RATE = 1.25 @@ -429,10 +402,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) taxable_savings_interest = ( sum_to_entity( - ( - account.accint - * np.where(account.acctax == 1, INVERTED_BASIC_RATE, 1) - ) + (account.accint * np.where(account.acctax == 1, INVERTED_BASIC_RATE, 1)) * (account.account.isin((1, 3, 5, 27, 28))), account.person_id, person.person_id, @@ -446,10 +416,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["dividend_income"] = np.maximum( 0, sum_to_entity( - ( - account.accint - * np.where(account.invtax == 1, INVERTED_BASIC_RATE, 1) - ) + (account.accint * np.where(account.invtax == 1, INVERTED_BASIC_RATE, 1)) * ( ((account.account == 6) & (account.invtax == 1)) # GGES | account.account.isin((7, 8)) # Stocks/shares/UITs @@ -474,16 +441,14 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["property_income"] = ( np.maximum( 0, - is_head * persons_household_property_income - + person.cvpay - + person.royyr1, + is_head * persons_household_property_income + person.cvpay + person.royyr1, ) * WEEKS_IN_YEAR ) maintenance_to_self = np.maximum( - pd.Series( - np.where(person.mntus1 == 2, person.mntusam1, person.mntamt1) - ).fillna(0), + pd.Series(np.where(person.mntus1 == 2, person.mntusam1, person.mntamt1)).fillna( + 0 + ), 0, ) maintenance_from_dwp = person.mntamt2 @@ -519,8 +484,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ] pe_person["private_transfer_income"] = ( - sum_from_positive_fields(person, PRIVATE_TRANSFER_INCOME_FIELDS) - * WEEKS_IN_YEAR + sum_from_positive_fields(person, PRIVATE_TRANSFER_INCOME_FIELDS) * WEEKS_IN_YEAR ) pe_person["lump_sum_income"] = person.redamt @@ -561,9 +525,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["jsa_contrib_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((1, 3))) - * (benefits.benefit == 14), + benefits.benamt * (benefits.var2.isin((1, 3))) * (benefits.benefit == 14), benefits.person_id, person.person_id, ) @@ -571,9 +533,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pe_person["jsa_income_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((2, 4))) - * (benefits.benefit == 14), + benefits.benamt * (benefits.var2.isin((2, 4))) * (benefits.benefit == 14), benefits.person_id, person.person_id, ) @@ -581,9 +541,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pe_person["esa_contrib_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((1, 3))) - * (benefits.benefit == 16), + benefits.benamt * (benefits.var2.isin((1, 3))) * (benefits.benefit == 16), benefits.person_id, person.person_id, ) @@ -591,9 +549,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pe_person["esa_income_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((2, 4))) - * (benefits.benefit == 16), + benefits.benamt * (benefits.var2.isin((2, 4))) * (benefits.benefit == 16), benefits.person_id, person.person_id, ) @@ -647,9 +603,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["maintenance_expenses"] = ( pd.Series( - np.where( - maintenance.mrus == 2, maintenance.mruamt, maintenance.mramt - ) + np.where(maintenance.mrus == 2, maintenance.mruamt, maintenance.mramt) ) .groupby(maintenance.person_id) .sum() @@ -662,9 +616,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_household["mortgage_interest_repayment"] = ( household.mortint.fillna(0).values * WEEKS_IN_YEAR ) - mortgage_capital = np.where( - mortgage.rmort == 1, mortgage.rmamt, mortgage.borramt - ) + mortgage_capital = np.where(mortgage.rmort == 1, mortgage.rmamt, mortgage.borramt) mortgage_capital_repayment = sum_to_entity( mortgage_capital / mortgage.mortend, mortgage.household_id, @@ -674,9 +626,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["childcare_expenses"] = ( sum_to_entity( - childcare.chamt - * (childcare.cost == 1) - * (childcare.registrd == 1), + childcare.chamt * (childcare.cost == 1) * (childcare.registrd == 1), childcare.person_id, person.person_id, ) @@ -721,10 +671,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): # respondents who were not asked the question (imputation candidates) if "salsac_raw" in job.columns: salsac_numeric = ( - job["salsac_raw"] - .map({"1": 1, "2": 0, " ": -1}) - .fillna(-1) - .astype(int) + job["salsac_raw"].map({"1": 1, "2": 0, " ": -1}).fillna(-1).astype(int) ) # Aggregate to person level: take max (any job with SS = person has SS) pe_person["salary_sacrifice_reported"] = np.clip( @@ -795,17 +742,13 @@ def determine_education_level(fted_val, typeed2_val, age_val): from policyengine_uk import Microsimulation sim = Microsimulation(dataset=dataset) - region = sim.populations["benunit"].household( - "region", dataset.time_period - ) + region = sim.populations["benunit"].household("region", dataset.time_period) lha_category = sim.calculate("LHA_category", year) brma = np.empty(len(region), dtype=object) # Sample from a random BRMA in the region, weighted by the number of observations in each BRMA - lha_list_of_rents = pd.read_csv( - STORAGE_FOLDER / "lha_list_of_rents.csv.gz" - ) + lha_list_of_rents = pd.read_csv(STORAGE_FOLDER / "lha_list_of_rents.csv.gz") lha_list_of_rents = lha_list_of_rents.copy() for possible_region in lha_list_of_rents.region.unique(): @@ -813,9 +756,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): lor_mask = (lha_list_of_rents.region == possible_region) & ( lha_list_of_rents.lha_category == possible_lha_category ) - mask = (region == possible_region) & ( - lha_category == possible_lha_category - ) + mask = (region == possible_region) & (lha_category == possible_lha_category) brma[mask] = lha_list_of_rents[lor_mask].brma.sample( n=len(region[mask]), replace=True ) @@ -831,9 +772,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): } ) - df = df.groupby("household_id").brma.aggregate( - lambda x: x.sample(n=1).iloc[0] - ) + df = df.groupby("household_id").brma.aggregate(lambda x: x.sample(n=1).iloc[0]) brmas = df[sim.calculate("household_id")].values pe_household["brma"] = brmas @@ -862,8 +801,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) paragraph_4 = ( pe_person.pip_dl_reported - >= benefit.pip.daily_living.enhanced * WEEKS_IN_YEAR - - THRESHOLD_SAFETY_GAP + >= benefit.pip.daily_living.enhanced * WEEKS_IN_YEAR - THRESHOLD_SAFETY_GAP ) paragraph_5 = pe_person.afcs_reported > 0 pe_person["is_severely_disabled_for_benefits"] = ( @@ -882,9 +820,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pension_credit_rate = load_take_up_rate("pension_credit", year) universal_credit_rate = load_take_up_rate("universal_credit", year) marriage_allowance_rate = load_take_up_rate("marriage_allowance", year) - child_benefit_opts_out_rate = load_take_up_rate( - "child_benefit_opts_out_rate", year - ) + child_benefit_opts_out_rate = load_take_up_rate("child_benefit_opts_out_rate", year) tfc_rate = load_take_up_rate("tax_free_childcare", year) extended_childcare_rate = load_take_up_rate("extended_childcare", year) universal_childcare_rate = load_take_up_rate("universal_childcare", year) @@ -911,9 +847,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_benunit["would_claim_uc"] = ( generator.random(len(pe_benunit)) < universal_credit_rate ) - pe_benunit["would_claim_tfc"] = ( - generator.random(len(pe_benunit)) < tfc_rate - ) + pe_benunit["would_claim_tfc"] = generator.random(len(pe_benunit)) < tfc_rate pe_benunit["would_claim_extended_childcare"] = ( generator.random(len(pe_benunit)) < extended_childcare_rate ) @@ -937,12 +871,8 @@ def determine_education_level(fted_val, typeed2_val, age_val): # Generate other stochastic variables using rates from parameter files tv_ownership_rate = load_parameter("stochastic", "tv_ownership_rate", year) - tv_evasion_rate = load_parameter( - "stochastic", "tv_licence_evasion_rate", year - ) - first_time_buyer_rate = load_parameter( - "stochastic", "first_time_buyer_rate", year - ) + tv_evasion_rate = load_parameter("stochastic", "tv_licence_evasion_rate", year) + first_time_buyer_rate = load_parameter("stochastic", "first_time_buyer_rate", year) # Household-level: TV ownership pe_household["household_owns_tv"] = ( @@ -963,9 +893,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["higher_earner_tie_break"] = generator.random(len(pe_person)) # Person-level: Private school attendance random draw - pe_person["attends_private_school_random_draw"] = generator.random( - len(pe_person) - ) + pe_person["attends_private_school_random_draw"] = generator.random(len(pe_person)) # Generate extended childcare hours usage values with mean 15.019 and sd # 4.972 @@ -974,9 +902,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): extended_hours_values = np.clip(extended_hours_values, 0, 30) # Add the maximum extended childcare hours usage - pe_benunit["maximum_extended_childcare_hours_usage"] = ( - extended_hours_values - ) + pe_benunit["maximum_extended_childcare_hours_usage"] = extended_hours_values # Add marital status at the benefit unit level diff --git a/policyengine_uk_data/datasets/imputations/capital_gains.py b/policyengine_uk_data/datasets/imputations/capital_gains.py index 7408af5a..65dfb9b8 100644 --- a/policyengine_uk_data/datasets/imputations/capital_gains.py +++ b/policyengine_uk_data/datasets/imputations/capital_gains.py @@ -20,9 +20,9 @@ capital_gains = pd.read_csv( STORAGE_FOLDER / "capital_gains_distribution_advani_summers.csv.gz" ) -capital_gains["maximum_total_income"] = ( - capital_gains.minimum_total_income.shift(-1).fillna(np.inf) -) +capital_gains["maximum_total_income"] = capital_gains.minimum_total_income.shift( + -1 +).fillna(np.inf) # Silence verbose logging logging.getLogger("root").setLevel(logging.WARNING) @@ -82,9 +82,7 @@ def loss(blend_factor): sim.map_result(ti_in_range, "person", "household", how="sum") ) household_cg_in_income_range_count = torch.tensor( - sim.map_result( - cg_in_income_range, "person", "household", how="sum" - ) + sim.map_result(cg_in_income_range, "person", "household", how="sum") ) pred_ti_in_range = ( blended_household_weight * household_ti_in_range_count @@ -116,9 +114,9 @@ def loss(blend_factor): new_household_weight[first_half] = ( blend_factor * original_household_weight[first_half] ) - new_household_weight[~first_half] = ( - 1 - blend_factor - ) * original_household_weight[first_half] + new_household_weight[~first_half] = (1 - blend_factor) * original_household_weight[ + first_half + ] # Impute actual capital gains amounts given gains new_cg = np.zeros(len(ti)) diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index c726441c..639e95d4 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -213,16 +213,14 @@ def impute_has_fuel_to_lcfs(household: pd.DataFrame) -> pd.DataFrame: output_df = model.predict(input_df) # Clip to [0, 1] as it's a probability - household["has_fuel_consumption"] = output_df[ - "has_fuel_consumption" - ].values.clip(0, 1) + household["has_fuel_consumption"] = output_df["has_fuel_consumption"].values.clip( + 0, 1 + ) return household -def generate_lcfs_table( - lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame -): +def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame): """ Generate LCFS training table for consumption imputation. @@ -247,14 +245,10 @@ def generate_lcfs_table( # This bridges WAS (has vehicles) to LCFS (has fuel spending) household = impute_has_fuel_to_lcfs(household) - return household[ - PREDICTOR_VARIABLES + IMPUTATIONS + ["household_weight"] - ].dropna() + return household[PREDICTOR_VARIABLES + IMPUTATIONS + ["household_weight"]].dropna() -def uprate_lcfs_table( - household: pd.DataFrame, time_period: str -) -> pd.DataFrame: +def uprate_lcfs_table(household: pd.DataFrame, time_period: str) -> pd.DataFrame: from policyengine_uk.system import system start_period = 2021 @@ -298,9 +292,7 @@ def save_imputation_models(): def create_consumption_model(overwrite_existing: bool = False): from policyengine_uk_data.utils.qrf import QRF - if ( - STORAGE_FOLDER / "consumption.pkl" - ).exists() and not overwrite_existing: + if (STORAGE_FOLDER / "consumption.pkl").exists() and not overwrite_existing: return QRF(file_path=STORAGE_FOLDER / "consumption.pkl") return save_imputation_models() diff --git a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py index 7769f7a5..ff7cf4e2 100644 --- a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py +++ b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py @@ -62,9 +62,7 @@ def save_salary_sacrifice_model(): employment_income = sim.calculate("employment_income").values # Get SS amounts and indicator for who was asked - ss_amount = ( - dataset.person.pension_contributions_via_salary_sacrifice.values - ) + ss_amount = dataset.person.pension_contributions_via_salary_sacrifice.values if "salary_sacrifice_asked" not in dataset.person.columns: raise ValueError( "Dataset missing salary_sacrifice_asked field. " @@ -77,17 +75,13 @@ def save_salary_sacrifice_model(): training_mask = ss_asked == 1 if training_mask.sum() == 0: - raise ValueError( - "No training data found - no respondents were asked SALSAC." - ) + raise ValueError("No training data found - no respondents were asked SALSAC.") train_df = pd.DataFrame( { "age": age[training_mask], "employment_income": employment_income[training_mask], - "pension_contributions_via_salary_sacrifice": ss_amount[ - training_mask - ], + "pension_contributions_via_salary_sacrifice": ss_amount[training_mask], } ) @@ -141,9 +135,7 @@ def impute_salary_sacrifice( # Get variables needed for imputation age = sim.calculate("age").values employment_income = sim.calculate("employment_income").values - current_ss = ( - dataset.person.pension_contributions_via_salary_sacrifice.values - ) + current_ss = dataset.person.pension_contributions_via_salary_sacrifice.values # Get indicator for who was asked if "salary_sacrifice_asked" not in dataset.person.columns: @@ -164,9 +156,7 @@ def impute_salary_sacrifice( predictions = model.predict(pred_df) # Get imputed amounts (QRF predicts continuous values) - imputed_ss = predictions[ - "pension_contributions_via_salary_sacrifice" - ].values + imputed_ss = predictions["pension_contributions_via_salary_sacrifice"].values # Ensure non-negative imputed_ss = np.maximum(0, imputed_ss) @@ -187,9 +177,7 @@ def impute_salary_sacrifice( # 4.3mn below 2k). Donors keep their full employee pension amount # so those above 2k become above-cap records and the rest below-cap. person_weight = sim.calculate("person_weight").values - employee_pension = dataset.person[ - "employee_pension_contributions" - ].values.copy() + employee_pension = dataset.person["employee_pension_contributions"].values.copy() has_ss = final_ss > 0 # Donor pool: employed pension contributors not already SS users @@ -207,9 +195,7 @@ def impute_salary_sacrifice( if donor_weighted > 0: imputation_rate = min(0.5, shortfall / donor_weighted) rng = np.random.default_rng(seed=2024) - newly_imputed = is_donor & ( - rng.random(len(final_ss)) < imputation_rate - ) + newly_imputed = is_donor & (rng.random(len(final_ss)) < imputation_rate) # Move full employee pension to SS so the above/below # 2k split reflects the natural pension distribution diff --git a/policyengine_uk_data/datasets/imputations/services/etb.py b/policyengine_uk_data/datasets/imputations/services/etb.py index cc9e28fd..31678e42 100644 --- a/policyengine_uk_data/datasets/imputations/services/etb.py +++ b/policyengine_uk_data/datasets/imputations/services/etb.py @@ -48,9 +48,7 @@ def create_public_services_model(overwrite_existing: bool = False) -> None: overwrite_existing: Whether to overwrite an existing model file. """ # Check if model already exists and we're not overwriting - if ( - STORAGE_FOLDER / "public_services.pkl" - ).exists() and not overwrite_existing: + if (STORAGE_FOLDER / "public_services.pkl").exists() and not overwrite_existing: return etb_path = STORAGE_FOLDER / "etb_1977_21" / "householdv2_1977-2021.tab" @@ -161,9 +159,7 @@ def create_efrs_input_dataset(dataset: UKSingleYearDataset) -> pd.DataFrame: df["count_primary_education"] = education == "PRIMARY" df["count_secondary_education"] = education == "LOWER_SECONDARY" - df["count_further_education"] = education.isin( - ["UPPER_SECONDARY", "TERTIARY"] - ) + df["count_further_education"] = education.isin(["UPPER_SECONDARY", "TERTIARY"]) df["hbai_household_net_income"] = ( df["hbai_household_net_income"] / df["household_count_people"] ) diff --git a/policyengine_uk_data/datasets/imputations/services/nhs.py b/policyengine_uk_data/datasets/imputations/services/nhs.py index fa25d7ca..09e73b53 100644 --- a/policyengine_uk_data/datasets/imputations/services/nhs.py +++ b/policyengine_uk_data/datasets/imputations/services/nhs.py @@ -66,9 +66,9 @@ def get_age_bounds(age_group: str): + nhs[nhs["Lower age"] > 80].groupby(["Gender", "Service"]).sum() ).reset_index() - nhs[nhs["Lower age"] == 80][["Activity Count", "Total Cost"]] = ( - over_80_values[["Activity Count", "Total Cost"]] - ) + nhs[nhs["Lower age"] == 80][["Activity Count", "Total Cost"]] = over_80_values[ + ["Activity Count", "Total Cost"] + ] nhs = nhs[nhs["Lower age"] <= 80] nhs[nhs["Lower age"] == 80]["Upper age"] = 120 @@ -85,12 +85,8 @@ def get_age_bounds(age_group: str): ].household_weight.values.sum() nhs.loc[i, "Total people"] = count - nhs["Per-person average units"] = ( - nhs["Activity Count"] / nhs["Total people"] - ) - nhs["Per-person average spending"] = ( - nhs["Total Cost"] / nhs["Total people"] - ) + nhs["Per-person average units"] = nhs["Activity Count"] / nhs["Total people"] + nhs["Per-person average spending"] = nhs["Total Cost"] / nhs["Total people"] indirect_cost_adjustment_factor = ( 202e9 / nhs["Total Cost"].sum() ) # £202 billion 2025/26 budget diff --git a/policyengine_uk_data/datasets/imputations/services/services.py b/policyengine_uk_data/datasets/imputations/services/services.py index 22717e07..2b7892f2 100644 --- a/policyengine_uk_data/datasets/imputations/services/services.py +++ b/policyengine_uk_data/datasets/imputations/services/services.py @@ -86,8 +86,6 @@ def impute_services( ] for person_imputations in visit_variables + spending_variables: - dataset.person[person_imputations] = input_data[ - person_imputations - ].values + dataset.person[person_imputations] = input_data[person_imputations].values return dataset diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py b/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py index c493f92f..45713388 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py @@ -15,9 +15,7 @@ # 4. Calculate proportion of old constituency's population in each new constituency # First, compute total population of each old constituency using groupby and transform -total_old_pop = df.groupby("code_2010")["old_population_present"].transform( - "sum" -) +total_old_pop = df.groupby("code_2010")["old_population_present"].transform("sum") # Then compute the proportion for each row df["proportion"] = df["old_population_present"] / total_old_pop diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py index 6ea99677..24aa3c30 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py @@ -73,9 +73,9 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): constituency_target_validation["estimate"] - constituency_target_validation["target"] ) - constituency_target_validation["abs_error"] = ( - constituency_target_validation["error"].abs() - ) + constituency_target_validation["abs_error"] = constituency_target_validation[ + "error" + ].abs() constituency_target_validation["rel_abs_error"] = ( constituency_target_validation["abs_error"] / constituency_target_validation["target"] @@ -91,15 +91,11 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): national_target_validation["target"] = national_actuals.values national_target_validation["error"] = ( - national_target_validation["estimate"] - - national_target_validation["target"] + national_target_validation["estimate"] - national_target_validation["target"] ) - national_target_validation["abs_error"] = national_target_validation[ - "error" - ].abs() + national_target_validation["abs_error"] = national_target_validation["error"].abs() national_target_validation["rel_abs_error"] = ( - national_target_validation["abs_error"] - / national_target_validation["target"] + national_target_validation["abs_error"] / national_target_validation["target"] ) df = pd.concat( diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/loss.py b/policyengine_uk_data/datasets/local_areas/constituencies/loss.py index bad839f4..3ea6e12a 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/loss.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/loss.py @@ -50,9 +50,7 @@ def create_constituency_target_matrix( # ── Income targets ───────────────────────────────────────────── incomes = get_constituency_income_targets() - national_incomes = get_national_income_projections( - int(dataset.time_period) - ) + national_incomes = get_national_income_projections(int(dataset.time_period)) for income_variable in INCOME_VARIABLES: income_values = sim.calculate(income_variable).values @@ -107,16 +105,10 @@ def create_constituency_target_matrix( children_per_hh = sim.map_result(is_child, "person", "household") on_uc_hh = sim.map_result(on_uc, "benunit", "household") > 0 - matrix["uc_hh_0_children"] = (on_uc_hh & (children_per_hh == 0)).astype( - float - ) + matrix["uc_hh_0_children"] = (on_uc_hh & (children_per_hh == 0)).astype(float) matrix["uc_hh_1_child"] = (on_uc_hh & (children_per_hh == 1)).astype(float) - matrix["uc_hh_2_children"] = (on_uc_hh & (children_per_hh == 2)).astype( - float - ) - matrix["uc_hh_3plus_children"] = ( - on_uc_hh & (children_per_hh >= 3) - ).astype(float) + matrix["uc_hh_2_children"] = (on_uc_hh & (children_per_hh == 2)).astype(float) + matrix["uc_hh_3plus_children"] = (on_uc_hh & (children_per_hh >= 3)).astype(float) uc_by_children = get_constituency_uc_by_children_targets() for col in uc_by_children.columns: diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py b/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py index a502fa78..a99c8dc9 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py @@ -131,33 +131,23 @@ def fill_missing_percentiles(row): # If this percentile is missing in the row if pd.isna(row[col]): # Find the closest lower and upper known percentiles - lower = max( - [p for p in known_percentiles if p < percentile], default=None - ) - upper = min( - [p for p in known_percentiles if p > percentile], default=None - ) + lower = max([p for p in known_percentiles if p < percentile], default=None) + upper = min([p for p in known_percentiles if p > percentile], default=None) # If both lower and upper bounds exist, interpolate if lower is not None and upper is not None: # Ratio between the target percentile and the lower bound - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the lower bound exists, extrapolate upwards elif lower is not None: - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the upper bound exists, extrapolate downwards elif upper is not None: - upper_ratio = ( - reference_values[percentile] / reference_values[upper] - ) + upper_ratio = reference_values[percentile] / reference_values[upper] row[col] = row[f"{upper} percentile"] * upper_ratio return row @@ -311,9 +301,7 @@ def calculate_band_population(row): # Ensure lower_percentile is less than upper_percentile if lower_percentile < upper_percentile: # Integrate to get proportion in this income band - proportion_in_band, _ = quad( - spline, lower_percentile, upper_percentile - ) + proportion_in_band, _ = quad(spline, lower_percentile, upper_percentile) proportion_in_band = proportion_in_band / spline( filtered_percentiles[-1] ) # Normalize by max spline value @@ -340,9 +328,7 @@ def calculate_band_population(row): income_bands, columns=["income_lower_bound", "income_upper_bound"] ) band_df["population_count"] = band_population_counts - band_df["parliamentary constituency 2010"] = row[ - "parliamentary constituency 2010" - ] + band_df["parliamentary constituency 2010"] = row["parliamentary constituency 2010"] band_df["constituency_code"] = row["constituency_code"] return band_df @@ -435,9 +421,7 @@ def calculate_band_population(row): import numpy as np -def find_and_replace_zero_populations( - result_df_copy, total_income -) -> pd.DataFrame: +def find_and_replace_zero_populations(result_df_copy, total_income) -> pd.DataFrame: # Step 1: Find constituencies with all zero populations constituencies_with_zero_population = ( result_df_copy.groupby("constituency_code") @@ -480,15 +464,12 @@ def find_and_replace_zero_populations( # Calculate absolute differences differences = np.abs( - other_constituencies["total_income_count"] - - current_total_income + other_constituencies["total_income_count"] - current_total_income ) # Get the index of the minimum difference min_diff_idx = differences.values.argmin() - nearest_constituency = other_constituencies.iloc[min_diff_idx][ - "code" - ] + nearest_constituency = other_constituencies.iloc[min_diff_idx]["code"] # Step 3: Copy population and earnings data from nearest constituency # For each income band of the zero constituency @@ -543,9 +524,7 @@ def find_and_replace_zero_populations( ].values[0] except Exception as e: - print( - f"Error processing constituency {zero_constituency}: {str(e)}" - ) + print(f"Error processing constituency {zero_constituency}: {str(e)}") continue return result_df diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py b/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py index b95d7938..9bacfb71 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py @@ -29,9 +29,7 @@ missing_constituencies = pd.DataFrame( { "code": missing_constituencies.values, - "name": incomes.set_index("code") - .loc[missing_constituencies] - .name.values, + "name": incomes.set_index("code").loc[missing_constituencies].name.values, } ) for col in ages.columns[2:]: diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py index 588f2955..746d94e7 100644 --- a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py +++ b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py @@ -18,12 +18,8 @@ def calibrate( ): return calibrate_local_areas( dataset=dataset, - matrix_fn=lambda ds: create_local_authority_target_matrix( - ds, ds.time_period - ), - national_matrix_fn=lambda ds: create_national_target_matrix( - ds, ds.time_period - ), + matrix_fn=lambda ds: create_local_authority_target_matrix(ds, ds.time_period), + national_matrix_fn=lambda ds: create_national_target_matrix(ds, ds.time_period), area_count=360, weight_file="local_authority_weights.h5", excluded_training_targets=excluded_training_targets, @@ -37,9 +33,7 @@ def calibrate( def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): la_target_matrix, la_actuals = m_c, y_c national_target_matrix, national_actuals = m_n, y_n - local_authorities = pd.read_csv( - STORAGE_FOLDER / "local_authorities_2021.csv" - ) + local_authorities = pd.read_csv(STORAGE_FOLDER / "local_authorities_2021.csv") la_wide = weights @ la_target_matrix la_wide.index = local_authorities.code.values la_wide["name"] = local_authorities.name.values @@ -93,15 +87,11 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): national_target_validation["target"] = national_actuals.values national_target_validation["error"] = ( - national_target_validation["estimate"] - - national_target_validation["target"] + national_target_validation["estimate"] - national_target_validation["target"] ) - national_target_validation["abs_error"] = national_target_validation[ - "error" - ].abs() + national_target_validation["abs_error"] = national_target_validation["error"].abs() national_target_validation["rel_abs_error"] = ( - national_target_validation["abs_error"] - / national_target_validation["target"] + national_target_validation["abs_error"] / national_target_validation["target"] ) df = pd.concat( diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py index 1b4e113e..446329ba 100644 --- a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py +++ b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py @@ -60,9 +60,7 @@ def create_local_authority_target_matrix( # ── Income targets ───────────────────────────────────────────── incomes = get_la_income_targets() - national_incomes = get_national_income_projections( - int(dataset.time_period) - ) + national_incomes = get_national_income_projections(int(dataset.time_period)) for income_variable in INCOME_VARIABLES: income_values = sim.calculate(income_variable).values @@ -127,9 +125,7 @@ def create_local_authority_target_matrix( ) hbai_net_income = sim.calculate("equiv_hbai_household_net_income").values - hbai_net_income_ahc = sim.calculate( - "equiv_hbai_household_net_income_ahc" - ).values + hbai_net_income_ahc = sim.calculate("equiv_hbai_household_net_income_ahc").values housing_costs = hbai_net_income - hbai_net_income_ahc matrix["ons/equiv_net_income_bhc"] = hbai_net_income @@ -194,15 +190,11 @@ def create_local_authority_target_matrix( ) tenure_type = sim.calculate("tenure_type").values - matrix["tenure/owned_outright"] = (tenure_type == "OWNED_OUTRIGHT").astype( - float - ) - matrix["tenure/owned_mortgage"] = ( - tenure_type == "OWNED_WITH_MORTGAGE" - ).astype(float) - matrix["tenure/private_rent"] = (tenure_type == "RENT_PRIVATELY").astype( + matrix["tenure/owned_outright"] = (tenure_type == "OWNED_OUTRIGHT").astype(float) + matrix["tenure/owned_mortgage"] = (tenure_type == "OWNED_WITH_MORTGAGE").astype( float ) + matrix["tenure/private_rent"] = (tenure_type == "RENT_PRIVATELY").astype(float) matrix["tenure/social_rent"] = ( (tenure_type == "RENT_FROM_COUNCIL") | (tenure_type == "RENT_FROM_HA") ).astype(float) @@ -219,9 +211,7 @@ def create_local_authority_target_matrix( ("social_rent", "social_rent_pct"), ]: targets = tenure_merged[pct_col] / 100 * tenure_merged["households"] - national = ( - original_weights * matrix[f"tenure/{tenure_key}"].values - ).sum() + national = (original_weights * matrix[f"tenure/{tenure_key}"].values).sum() y[f"tenure/{tenure_key}"] = np.where( has_tenure, targets.values, national * la_household_share ) diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py b/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py index 4e34dd3b..3f789442 100644 --- a/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py +++ b/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py @@ -131,33 +131,23 @@ def fill_missing_percentiles(row): # If this percentile is missing in the row if pd.isna(row[col]): # Find the closest lower and upper known percentiles - lower = max( - [p for p in known_percentiles if p < percentile], default=None - ) - upper = min( - [p for p in known_percentiles if p > percentile], default=None - ) + lower = max([p for p in known_percentiles if p < percentile], default=None) + upper = min([p for p in known_percentiles if p > percentile], default=None) # If both lower and upper bounds exist, interpolate if lower is not None and upper is not None: # Ratio between the target percentile and the lower bound - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the lower bound exists, extrapolate upwards elif lower is not None: - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the upper bound exists, extrapolate downwards elif upper is not None: - upper_ratio = ( - reference_values[percentile] / reference_values[upper] - ) + upper_ratio = reference_values[percentile] / reference_values[upper] row[col] = row[f"{upper} percentile"] * upper_ratio return row @@ -280,9 +270,9 @@ def calculate_band_population(row): income_bands, columns=["income_lower_bound", "income_upper_bound"] ) band_df["population_count"] = [0] * len(income_bands) - band_df["local authority: district / unitary (as of April 2023)"] = ( - row["local authority: district / unitary (as of April 2023)"] - ) + band_df["local authority: district / unitary (as of April 2023)"] = row[ + "local authority: district / unitary (as of April 2023)" + ] band_df["LA_code"] = row["LA_code"] return band_df @@ -311,9 +301,7 @@ def calculate_band_population(row): # Ensure lower_percentile is less than upper_percentile if lower_percentile < upper_percentile: # Integrate to get proportion in this income band - proportion_in_band, _ = quad( - spline, lower_percentile, upper_percentile - ) + proportion_in_band, _ = quad(spline, lower_percentile, upper_percentile) proportion_in_band = proportion_in_band / spline( filtered_percentiles[-1] ) # Normalize by max spline value @@ -435,15 +423,11 @@ def calculate_band_population(row): import numpy as np -def find_and_replace_zero_populations( - result_df_copy, total_income -) -> pd.DataFrame: +def find_and_replace_zero_populations(result_df_copy, total_income) -> pd.DataFrame: # Step 1: Find local authorities with all zero populations LA_with_zero_population = ( result_df_copy.groupby("LA_code") - .filter(lambda group: (group["population_count"] == 0).all())[ - "LA_code" - ] + .filter(lambda group: (group["population_count"] == 0).all())["LA_code"] .unique() ) @@ -462,26 +446,18 @@ def find_and_replace_zero_populations( ) continue - current_total_income = current_LA_data[ - "total_income_count" - ].values[0] + current_total_income = current_LA_data["total_income_count"].values[0] # Find the nearest local authority by total_income_count # Exclude both the current local authority and other zero population local authorities - other_LA = total_income[ - ~total_income["code"].isin(LA_with_zero_population) - ] + other_LA = total_income[~total_income["code"].isin(LA_with_zero_population)] if other_LA.empty: - print( - f"Warning: No valid local authorities found to copy from" - ) + print(f"Warning: No valid local authorities found to copy from") continue # Calculate absolute differences - differences = np.abs( - other_LA["total_income_count"] - current_total_income - ) + differences = np.abs(other_LA["total_income_count"] - current_total_income) # Get the index of the minimum difference min_diff_idx = differences.values.argmin() diff --git a/policyengine_uk_data/datasets/spi.py b/policyengine_uk_data/datasets/spi.py index 253b1455..f0c08626 100644 --- a/policyengine_uk_data/datasets/spi.py +++ b/policyengine_uk_data/datasets/spi.py @@ -76,19 +76,14 @@ def create_spi( percent_along_age_range = np.random.rand(len(df)) min_age = np.array([AGE_RANGES[age][0] for age in age_range]) max_age = np.array([AGE_RANGES[age][1] for age in age_range]) - person["age"] = ( - min_age + (max_age - min_age) * percent_along_age_range - ).astype(int) + person["age"] = (min_age + (max_age - min_age) * percent_along_age_range).astype( + int + ) person["state_pension_reported"] = df.SRP person["other_tax_credits"] = df.TAX_CRED person["miscellaneous_income"] = ( - df.MOTHINC - + df.INCPBEN - + df.OSSBEN - + df.TAXTERM - + df.UBISJA - + df.OTHERINC + df.MOTHINC + df.INCPBEN + df.OSSBEN + df.TAXTERM + df.UBISJA + df.OTHERINC ) person["gift_aid"] = df.GIFTAID + df.GIFTINV person["other_investment_income"] = df.OTHERINV diff --git a/policyengine_uk_data/parameters/__init__.py b/policyengine_uk_data/parameters/__init__.py index dcf981ba..06f3b558 100644 --- a/policyengine_uk_data/parameters/__init__.py +++ b/policyengine_uk_data/parameters/__init__.py @@ -11,9 +11,7 @@ PARAMETERS_DIR = Path(__file__).parent -def load_parameter( - category: str, variable_name: str, year: int = 2015 -) -> float: +def load_parameter(category: str, variable_name: str, year: int = 2015) -> float: """Load parameter from YAML files in a specific category. Args: @@ -48,9 +46,7 @@ def load_parameter( break if applicable_value is None: - raise ValueError( - f"No value found for {category}/{variable_name} in {year}" - ) + raise ValueError(f"No value found for {category}/{variable_name} in {year}") return applicable_value diff --git a/policyengine_uk_data/targets/build_loss_matrix.py b/policyengine_uk_data/targets/build_loss_matrix.py index 8af391a3..3358a646 100644 --- a/policyengine_uk_data/targets/build_loss_matrix.py +++ b/policyengine_uk_data/targets/build_loss_matrix.py @@ -161,9 +161,7 @@ def pe(self, variable: str): """Calculate variable mapped to household level.""" key = ("pe", variable) if key not in self._cache: - self._cache[key] = self.sim.calculate( - variable, map_to="household" - ).values + self._cache[key] = self.sim.calculate(variable, map_to="household").values return self._cache[key] def pe_person(self, variable: str): @@ -194,9 +192,7 @@ def household_from_family(self, values): @property def region(self): if "region" not in self._cache: - self._cache["region"] = self.sim.calculate( - "region", map_to="person" - ) + self._cache["region"] = self.sim.calculate("region", map_to="person") return self._cache["region"] @property @@ -225,9 +221,7 @@ def counterfactual_sim(self): if "counterfactual_sim" not in self._cache: from policyengine_uk import Microsimulation - ss = self.sim.calculate( - "pension_contributions_via_salary_sacrifice" - ) + ss = self.sim.calculate("pension_contributions_via_salary_sacrifice") emp = self.sim.calculate("employment_income") cf_sim = Microsimulation(dataset=self.dataset, reform=self.reform) cf_sim.set_input( @@ -247,9 +241,7 @@ def counterfactual_sim(self): # ── Column computation dispatch ────────────────────────────────────── -def _compute_column( - target: Target, ctx: _SimContext, year: int -) -> np.ndarray | None: +def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray | None: """Compute the household-level column for a target. Dispatches to domain-specific compute modules. diff --git a/policyengine_uk_data/targets/compute/benefits.py b/policyengine_uk_data/targets/compute/benefits.py index 140eab68..0a6d291b 100644 --- a/policyengine_uk_data/targets/compute/benefits.py +++ b/policyengine_uk_data/targets/compute/benefits.py @@ -17,9 +17,7 @@ def compute_benefit_cap(target, ctx) -> np.ndarray: return ctx.sim.calculate( "benefit_cap_reduction", map_to="household" ).values.astype(float) - reduction = ctx.sim.calculate( - "benefit_cap_reduction", map_to="household" - ).values + reduction = ctx.sim.calculate("benefit_cap_reduction", map_to="household").values return (reduction > 0).astype(float) @@ -29,9 +27,7 @@ def compute_scotland_uc_child(target, ctx) -> np.ndarray: on_uc = ctx.household_from_family(uc > 0) > 0 child_u1 = ctx.pe_person("is_child") & (ctx.age < 1) has_child_u1 = ctx.household_from_person(child_u1) > 0 - return ( - (ctx.household_region == "SCOTLAND") & on_uc & has_child_u1 - ).astype(float) + return ((ctx.household_region == "SCOTLAND") & on_uc & has_child_u1).astype(float) def compute_uc_by_children(target, ctx) -> np.ndarray: @@ -74,9 +70,7 @@ def ft_hh(value): if ft_str == "single_no_children": match = ft_hh("SINGLE") & (children_per_hh == 0) elif ft_str == "single_with_children": - match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & ( - children_per_hh > 0 - ) + match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (children_per_hh > 0) elif ft_str == "couple_no_children": match = ft_hh("COUPLE_NO_CHILDREN") elif ft_str == "couple_with_children": @@ -95,15 +89,11 @@ def compute_uc_payment_dist(target, ctx) -> np.ndarray: lower = target.lower_bound upper = target.upper_bound - uc_payments = ctx.sim.calculate( - "universal_credit", map_to="benunit" - ).values + uc_payments = ctx.sim.calculate("universal_credit", map_to="benunit").values uc_family_type = ctx.sim.calculate("family_type", map_to="benunit").values in_band = ( - (uc_payments >= lower) - & (uc_payments < upper) - & (uc_family_type == family_type) + (uc_payments >= lower) & (uc_payments < upper) & (uc_family_type == family_type) ) return ctx.household_from_family(in_band) @@ -113,9 +103,7 @@ def compute_uc_jobseeker(target, ctx) -> np.ndarray: family = ctx.sim.populations["benunit"] uc = ctx.sim.calculate("universal_credit") on_uc = uc > 0 - unemployed = family.any( - ctx.sim.calculate("employment_status") == "UNEMPLOYED" - ) + unemployed = family.any(ctx.sim.calculate("employment_status") == "UNEMPLOYED") if "non_jobseekers" in target.name: mask = on_uc * ~unemployed @@ -164,17 +152,13 @@ def compute_two_child_limit(target, ctx) -> np.ndarray | None: if name == "dwp/uc/two_child_limit/children_affected": return children_in_capped if name == "dwp/uc/two_child_limit/children_in_affected_households": - total_children = sim.map_result( - is_child * child_in_uc, "person", "household" - ) + total_children = sim.map_result(is_child * child_in_uc, "person", "household") return total_children * capped_hh if "_children_households_total_children" in name: n = int(name.split("/")[-1].split("_")[0]) children_count = sim.map_result(is_child, "person", "household") - return (capped_hh * (children_count == n) * children_count).astype( - float - ) + return (capped_hh * (children_count == n) * children_count).astype(float) if "_children_households" in name and "total" not in name: n = int(name.split("/")[-1].split("_")[0]) children_count = sim.map_result(is_child, "person", "household") diff --git a/policyengine_uk_data/targets/compute/demographics.py b/policyengine_uk_data/targets/compute/demographics.py index 670b8072..4d0b8092 100644 --- a/policyengine_uk_data/targets/compute/demographics.py +++ b/policyengine_uk_data/targets/compute/demographics.py @@ -33,9 +33,7 @@ def compute_regional_age(target, ctx) -> np.ndarray | None: return None person_match = ( - (ctx.region.values == pe_region) - & (ctx.age >= lower) - & (ctx.age <= upper) + (ctx.region.values == pe_region) & (ctx.age >= lower) & (ctx.age <= upper) ) return ctx.household_from_person(person_match) @@ -73,7 +71,7 @@ def compute_scotland_demographics(target, ctx) -> np.ndarray | None: if name == "ons/scotland_households_3plus_children": is_child = ctx.pe_person("is_child") children_per_hh = ctx.household_from_person(is_child) - return ( - (ctx.household_region == "SCOTLAND") & (children_per_hh >= 3) - ).astype(float) + return ((ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)).astype( + float + ) return None diff --git a/policyengine_uk_data/targets/compute/households.py b/policyengine_uk_data/targets/compute/households.py index be7686ac..be51f2d0 100644 --- a/policyengine_uk_data/targets/compute/households.py +++ b/policyengine_uk_data/targets/compute/households.py @@ -15,18 +15,18 @@ def ft_hh(value): return ctx.household_from_family(ft == value) > 0 if name == "lone_households_under_65": - return ( - ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65) - ).astype(float) + return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65)).astype( + float + ) if name == "lone_households_over_65": - return ( - ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65) - ).astype(float) + return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65)).astype( + float + ) if name == "unrelated_adult_households": people_per_hh = ctx.household_from_person(np.ones_like(is_child)) - return ( - ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1) - ).astype(float) + return (ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1)).astype( + float + ) if name == "couple_no_children_households": return ft_hh("COUPLE_NO_CHILDREN").astype(float) if name == "couple_under_3_children_households": @@ -36,14 +36,10 @@ def ft_hh(value): & (children_per_hh <= 2) ).astype(float) if name == "couple_3_plus_children_households": - return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype( - float - ) + return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype(float) if name == "couple_non_dependent_children_only_households": people_per_hh = ctx.household_from_person(np.ones_like(is_child)) - return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype( - float - ) + return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype(float) if name == "lone_parent_dependent_children_households": return (ft_hh("LONE_PARENT") & (children_per_hh > 0)).astype(float) if name == "lone_parent_non_dependent_children_households": diff --git a/policyengine_uk_data/targets/compute/income.py b/policyengine_uk_data/targets/compute/income.py index c2f286c4..1953be0e 100644 --- a/policyengine_uk_data/targets/compute/income.py +++ b/policyengine_uk_data/targets/compute/income.py @@ -12,9 +12,7 @@ def compute_income_band(target, ctx) -> np.ndarray: upper = target.upper_bound income_df = ctx.sim.calculate_dataframe(["total_income", variable]) - in_band = (income_df.total_income >= lower) & ( - income_df.total_income < upper - ) + in_band = (income_df.total_income >= lower) & (income_df.total_income < upper) if target.is_count: return ctx.household_from_person((income_df[variable] > 0) * in_band) @@ -39,9 +37,7 @@ def compute_ss_it_relief(target, ctx) -> np.ndarray: name = target.name if "basic" in name: - mask = (adj_net_income_cf > basic_thresh) & ( - adj_net_income_cf <= higher_thresh - ) + mask = (adj_net_income_cf > basic_thresh) & (adj_net_income_cf <= higher_thresh) elif "higher" in name: mask = (adj_net_income_cf > higher_thresh) & ( adj_net_income_cf <= additional_thresh @@ -65,14 +61,10 @@ def compute_ss_ni_relief(target, ctx) -> np.ndarray: name = target.name if "employee" in name: ni_base = ctx.sim.calculate("ni_employee") - ni_cf = ctx.counterfactual_sim.calculate( - "ni_employee", ctx.time_period - ) + ni_cf = ctx.counterfactual_sim.calculate("ni_employee", ctx.time_period) else: ni_base = ctx.sim.calculate("ni_employer") - ni_cf = ctx.counterfactual_sim.calculate( - "ni_employer", ctx.time_period - ) + ni_cf = ctx.counterfactual_sim.calculate("ni_employer", ctx.time_period) return ctx.household_from_person(ni_cf - ni_base) @@ -90,9 +82,7 @@ def compute_ss_headcount(target, ctx) -> np.ndarray: "Variable" ) row = "pension_contributions_via_salary_sacrifice" - price_adj = ( - uprating.loc[row, "2023"] / uprating.loc[row, str(ctx.time_period)] - ) + price_adj = uprating.loc[row, "2023"] / uprating.loc[row, str(ctx.time_period)] ss_base = ss * price_adj name = target.name diff --git a/policyengine_uk_data/targets/registry.py b/policyengine_uk_data/targets/registry.py index ebd0af4f..b4c97108 100644 --- a/policyengine_uk_data/targets/registry.py +++ b/policyengine_uk_data/targets/registry.py @@ -25,9 +25,7 @@ def discover_source_modules() -> list: modules = [] package_path = Path(sources_pkg.__file__).parent for importer, modname, ispkg in pkgutil.iter_modules([str(package_path)]): - mod = importlib.import_module( - f"policyengine_uk_data.targets.sources.{modname}" - ) + mod = importlib.import_module(f"policyengine_uk_data.targets.sources.{modname}") if hasattr(mod, "get_targets"): modules.append(mod) return modules diff --git a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py index 97ec499a..f9865e6f 100644 --- a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py +++ b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py @@ -32,9 +32,7 @@ def get_targets() -> list[Target]: targets = [] try: - r = requests.get( - ref, headers=HEADERS, allow_redirects=True, timeout=30 - ) + r = requests.get(ref, headers=HEADERS, allow_redirects=True, timeout=30) r.raise_for_status() df = pd.read_csv(io.StringIO(r.content.decode("utf-8-sig"))) @@ -108,14 +106,11 @@ def get_targets() -> list[Target]: ) except Exception as e: - logger.error( - "Failed to download/parse HMRC salary sacrifice CSV: %s", e - ) + logger.error("Failed to download/parse HMRC salary sacrifice CSV: %s", e) # Total salary sacrifice contributions (SPP Review 2025: £24bn base) _SS_CONTRIBUTIONS = { - y: 24e9 * _GROWTH ** max(0, y - _BASE_YEAR) - for y in range(_BASE_YEAR, 2030) + y: 24e9 * _GROWTH ** max(0, y - _BASE_YEAR) for y in range(_BASE_YEAR, 2030) } targets.append( Target( diff --git a/policyengine_uk_data/targets/sources/hmrc_spi.py b/policyengine_uk_data/targets/sources/hmrc_spi.py index de993d49..72d792e9 100644 --- a/policyengine_uk_data/targets/sources/hmrc_spi.py +++ b/policyengine_uk_data/targets/sources/hmrc_spi.py @@ -154,9 +154,7 @@ def get_targets() -> list[Target]: for idx, row in merged.iterrows(): lower = int(row["lower_bound"]) - upper = ( - _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf") - ) + upper = _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf") band_label = f"{lower:_}_to_{upper:_}" for variable in INCOME_VARIABLES: @@ -182,9 +180,7 @@ def get_targets() -> list[Target]: if count_col in row.index and row[count_col] > 0: targets.append( Target( - name=( - f"hmrc/{variable}_count_income_band_{band_label}" - ), + name=(f"hmrc/{variable}_count_income_band_{band_label}"), variable=variable, source="hmrc_spi", unit=Unit.COUNT, diff --git a/policyengine_uk_data/targets/sources/local_age.py b/policyengine_uk_data/targets/sources/local_age.py index 0f74bc44..f4879977 100644 --- a/policyengine_uk_data/targets/sources/local_age.py +++ b/policyengine_uk_data/targets/sources/local_age.py @@ -17,16 +17,8 @@ logger = logging.getLogger(__name__) -_CONST_DIR = ( - STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" -) -_LA_DIR = ( - STORAGE.parent - / "datasets" - / "local_areas" - / "local_authorities" - / "targets" -) +_CONST_DIR = STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" +_LA_DIR = STORAGE.parent / "datasets" / "local_areas" / "local_authorities" / "targets" _REF = ( "https://www.ons.gov.uk/peoplepopulationandcommunity/" diff --git a/policyengine_uk_data/targets/sources/local_income.py b/policyengine_uk_data/targets/sources/local_income.py index 695f790c..8fd7adb3 100644 --- a/policyengine_uk_data/targets/sources/local_income.py +++ b/policyengine_uk_data/targets/sources/local_income.py @@ -19,16 +19,8 @@ logger = logging.getLogger(__name__) -_CONST_DIR = ( - STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" -) -_LA_DIR = ( - STORAGE.parent - / "datasets" - / "local_areas" - / "local_authorities" - / "targets" -) +_CONST_DIR = STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" +_LA_DIR = STORAGE.parent / "datasets" / "local_areas" / "local_authorities" / "targets" _REF = ( "https://www.gov.uk/government/statistics/" diff --git a/policyengine_uk_data/targets/sources/local_la_extras.py b/policyengine_uk_data/targets/sources/local_la_extras.py index 76bcf06d..6bc20b6f 100644 --- a/policyengine_uk_data/targets/sources/local_la_extras.py +++ b/policyengine_uk_data/targets/sources/local_la_extras.py @@ -28,9 +28,7 @@ "earningsandworkinghours/datasets/" "smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales" ) -_REF_TENURE = ( - "https://www.gov.uk/government/statistics/english-housing-survey-2023" -) +_REF_TENURE = "https://www.gov.uk/government/statistics/english-housing-survey-2023" _REF_RENT = ( "https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/" "privaterentalmarketsummarystatisticsinengland" diff --git a/policyengine_uk_data/targets/sources/obr.py b/policyengine_uk_data/targets/sources/obr.py index c4f92db3..3268f428 100644 --- a/policyengine_uk_data/targets/sources/obr.py +++ b/policyengine_uk_data/targets/sources/obr.py @@ -45,9 +45,7 @@ def _download_workbook(url: str) -> openpyxl.Workbook: return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False) -def _read_row_values( - ws, row_num: int, col_letters: list[str] -) -> dict[int, float]: +def _read_row_values(ws, row_num: int, col_letters: list[str]) -> dict[int, float]: """Read numeric values from a row, mapped to calendar years.""" result = {} for col in col_letters: @@ -353,9 +351,7 @@ def read_49(row_num: int) -> dict[int, float]: # Find the second UC row (outside cap section) for row in range(uc_outside_row + 1, 55): cell_val = ws[f"B{row}"].value - if cell_val and str(cell_val).strip().startswith( - "Universal credit" - ): + if cell_val and str(cell_val).strip().startswith("Universal credit"): values = read_49(row) if values: targets.append( @@ -427,27 +423,20 @@ def _parse_tv_licence(wb: openpyxl.Workbook) -> list[Target]: _PRIVATE_SCHOOL = {y: 557_000 for y in range(2018, 2032)} # SPP Review: salary sacrifice NI relief (uprated 3% pa from 2024 base) -_SS_EMPLOYEE_NI = { - y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032) -} -_SS_EMPLOYER_NI = { - y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032) -} +_SS_EMPLOYEE_NI = {y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)} +_SS_EMPLOYER_NI = {y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)} # Salary sacrifice headcount: 7.7m total (3.3m above £2k, 4.3m below) # OBR para 1.7: SS population grows 0.9% faster than employees (~2.4%/yr) _SS_HEADCOUNT_GROWTH = 1.024 _SS_TOTAL_USERS = { - y: 7_700_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) - for y in range(2024, 2032) + y: 7_700_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) for y in range(2024, 2032) } _SS_BELOW_CAP_USERS = { - y: 4_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) - for y in range(2024, 2032) + y: 4_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) for y in range(2024, 2032) } _SS_ABOVE_CAP_USERS = { - y: 3_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) - for y in range(2024, 2032) + y: 3_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) for y in range(2024, 2032) } diff --git a/policyengine_uk_data/targets/sources/ons_demographics.py b/policyengine_uk_data/targets/sources/ons_demographics.py index a51adf39..dba77671 100644 --- a/policyengine_uk_data/targets/sources/ons_demographics.py +++ b/policyengine_uk_data/targets/sources/ons_demographics.py @@ -78,9 +78,7 @@ @lru_cache(maxsize=1) def _download_uk_projection() -> pd.DataFrame: """Download and parse the UK principal population projection.""" - r = requests.get( - _UK_ZIP_URL, headers=HEADERS, allow_redirects=True, timeout=120 - ) + r = requests.get(_UK_ZIP_URL, headers=HEADERS, allow_redirects=True, timeout=120) r.raise_for_status() z = zipfile.ZipFile(io.BytesIO(r.content)) with z.open("uk/uk_ppp_machine_readable.xlsx") as f: @@ -182,9 +180,7 @@ def _parse_regional_from_csv() -> list[Target]: for _, row in demographics.iterrows(): name = row["name"] - if name in _SKIP_NAMES or any( - name.startswith(p) for p in _SKIP_PREFIXES - ): + if name in _SKIP_NAMES or any(name.startswith(p) for p in _SKIP_PREFIXES): continue values = {} for y in _YEARS: diff --git a/policyengine_uk_data/targets/sources/ons_savings.py b/policyengine_uk_data/targets/sources/ons_savings.py index 3764f22e..2046a4d3 100644 --- a/policyengine_uk_data/targets/sources/ons_savings.py +++ b/policyengine_uk_data/targets/sources/ons_savings.py @@ -18,15 +18,15 @@ logger = logging.getLogger(__name__) -_API_URL = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data" +_API_URL = ( + "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data" +) _REF = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea" def get_targets() -> list[Target]: try: - r = requests.get( - _API_URL, headers=HEADERS, allow_redirects=True, timeout=30 - ) + r = requests.get(_API_URL, headers=HEADERS, allow_redirects=True, timeout=30) r.raise_for_status() data = r.json() diff --git a/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py b/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py index 3b3f0446..1e266ca9 100644 --- a/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py +++ b/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py @@ -77,9 +77,7 @@ def test_all_reforms_have_required_fields(): for i, reform in enumerate(reforms_data): for field in required_fields: - assert field in reform, ( - f"Reform {i} missing required field: {field}" - ) + assert field in reform, f"Reform {i} missing required field: {field}" assert isinstance(reform["parameters"], dict), ( f"Reform {i} parameters must be a dictionary" diff --git a/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py b/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py index 56a23c55..e98151b6 100644 --- a/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py +++ b/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py @@ -33,9 +33,7 @@ def get_fiscal_impact(reform: dict) -> float: return float((reform_revenue - baseline_revenue) / 1e9) -def update_impacts( - config_path: Path, dry_run: bool = False, verbose: bool = True -): +def update_impacts(config_path: Path, dry_run: bool = False, verbose: bool = True): """ Update the expected impacts in the configuration file with current model values. @@ -61,9 +59,7 @@ def update_impacts( old_impact = reform["expected_impact"] new_impact = round(get_fiscal_impact(reform["parameters"]), 1) - if ( - abs(old_impact - new_impact) > 0.01 - ): # Only record meaningful changes + if abs(old_impact - new_impact) > 0.01: # Only record meaningful changes changes.append( { "name": reform["name"], @@ -126,9 +122,7 @@ def main(): parser.add_argument( "--config", type=Path, - default=Path( - "policyengine_uk_data/tests/microsimulation/reforms_config.yaml" - ), + default=Path("policyengine_uk_data/tests/microsimulation/reforms_config.yaml"), help="Path to the reforms configuration file (default: reforms_config.yaml)", ) parser.add_argument( diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index 10a5fb2c..c8326a20 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -10,9 +10,7 @@ @pytest.mark.parametrize("variable", AGGREGATES.keys()) def test_aggregates(baseline, variable: str): - estimate = baseline.calculate( - variable, map_to="household", period=2025 - ).sum() + estimate = baseline.calculate(variable, map_to="household", period=2025).sum() assert abs(estimate / AGGREGATES[variable] - 1) < 0.7, ( f"Expected {AGGREGATES[variable] / 1e9:.1f} billion for {variable}, got {estimate / 1e9:.1f} billion (relative error = {abs(estimate / AGGREGATES[variable] - 1):.1%})." diff --git a/policyengine_uk_data/tests/test_child_limit.py b/policyengine_uk_data/tests/test_child_limit.py index 446ed4c4..76c065db 100644 --- a/policyengine_uk_data/tests/test_child_limit.py +++ b/policyengine_uk_data/tests/test_child_limit.py @@ -10,30 +10,19 @@ def test_child_limit(baseline): > 0 ) * baseline.calculate("is_child", map_to="person").values child_in_uc_household = ( - baseline.calculate( - "universal_credit", map_to="person", period=2025 - ).values - > 0 + baseline.calculate("universal_credit", map_to="person", period=2025).values > 0 ) children_in_capped_households = baseline.map_result( child_is_affected * child_in_uc_household, "person", "household" ) capped_households = (children_in_capped_households > 0) * 1.0 - household_weight = baseline.calculate( - "household_weight", period=2025 - ).values - children_affected = ( - children_in_capped_households * household_weight - ).sum() + household_weight = baseline.calculate("household_weight", period=2025).values + children_affected = (children_in_capped_households * household_weight).sum() households_affected = (capped_households * household_weight).sum() UPRATING_24_25 = 1.12 # https://ifs.org.uk/articles/two-child-limit-poverty-incentives-and-cost, table at the end - child_target = ( - 1.6e6 * UPRATING_24_25 - ) # Expected number of affected children - household_target = ( - 440e3 * UPRATING_24_25 - ) # Expected number of affected households + child_target = 1.6e6 * UPRATING_24_25 # Expected number of affected children + household_target = 440e3 * UPRATING_24_25 # Expected number of affected households assert abs(children_affected / child_target - 1) < 0.3, ( f"Expected {child_target / 1e6:.1f} million affected children, got {children_affected / 1e6:.1f} million." diff --git a/policyengine_uk_data/tests/test_childcare.py b/policyengine_uk_data/tests/test_childcare.py index 7e5e16e6..72842946 100644 --- a/policyengine_uk_data/tests/test_childcare.py +++ b/policyengine_uk_data/tests/test_childcare.py @@ -41,17 +41,11 @@ def test_childcare(baseline, enhanced_frs): # Calculate actual spending values spending = { "tfc": baseline.calculate("tax_free_childcare", 2024).sum() / 1e9, - "extended": baseline.calculate( - "extended_childcare_entitlement", 2024 - ).sum() + "extended": baseline.calculate("extended_childcare_entitlement", 2024).sum() / 1e9, - "targeted": baseline.calculate( - "targeted_childcare_entitlement", 2024 - ).sum() + "targeted": baseline.calculate("targeted_childcare_entitlement", 2024).sum() / 1e9, - "universal": baseline.calculate( - "universal_childcare_entitlement", 2024 - ).sum() + "universal": baseline.calculate("universal_childcare_entitlement", 2024).sum() / 1e9, } @@ -82,14 +76,10 @@ def test_childcare(baseline, enhanced_frs): for key, rate in take_up_rates.items(): print(f"{key.upper():<12} {rate:.3f}") - print( - f"\nEXTENDED HOURS: Mean = {hours_mean:.2f}, Std Dev = {hours_std:.2f}" - ) + print(f"\nEXTENDED HOURS: Mean = {hours_mean:.2f}, Std Dev = {hours_std:.2f}") print("\nSPENDING (£ billion):") - print( - f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}" - ) + print(f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}") print("-" * 55) failed_any = False @@ -106,9 +96,7 @@ def test_childcare(baseline, enhanced_frs): failed_any = True print("\nCASELOAD (thousands):") - print( - f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}" - ) + print(f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}") print("-" * 55) # Test caseload for each program diff --git a/policyengine_uk_data/tests/test_non_negative_incomes.py b/policyengine_uk_data/tests/test_non_negative_incomes.py index 633305e8..762d8fb0 100644 --- a/policyengine_uk_data/tests/test_non_negative_incomes.py +++ b/policyengine_uk_data/tests/test_non_negative_incomes.py @@ -18,6 +18,4 @@ def test_income_non_negative(frs, variable: str): """Test that income variables have no negative values.""" values = frs.person[variable] min_value = values.min() - assert min_value >= 0, ( - f"{variable} has negative values (min = {min_value:.2f})" - ) + assert min_value >= 0, f"{variable} has negative values (min = {min_value:.2f})" diff --git a/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py b/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py index 3f5c2ba3..9ed48abf 100644 --- a/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py +++ b/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py @@ -11,9 +11,7 @@ def test_pension_contributions_via_salary_sacrifice(baseline): # Should have some non-zero values (not everyone uses salary sacrifice, but some do) total = values.sum() - assert total > 0, ( - f"Expected some salary sacrifice contributions, got {total}" - ) + assert total > 0, f"Expected some salary sacrifice contributions, got {total}" # Reasonableness check: total should be less than total employment income # This is a very loose check just to catch major issues diff --git a/policyengine_uk_data/tests/test_scotland_uc_babies.py b/policyengine_uk_data/tests/test_scotland_uc_babies.py index 85751cca..14394838 100644 --- a/policyengine_uk_data/tests/test_scotland_uc_babies.py +++ b/policyengine_uk_data/tests/test_scotland_uc_babies.py @@ -25,15 +25,11 @@ def test_scotland_uc_households_child_under_1(baseline): ).values # Check if household has child under 1 - is_child = baseline.calculate( - "is_child", map_to="person", period=2025 - ).values + is_child = baseline.calculate("is_child", map_to="person", period=2025).values age = baseline.calculate("age", map_to="person", period=2025).values child_under_1 = is_child & (age < 1) - has_child_under_1 = ( - baseline.map_result(child_under_1, "person", "household") > 0 - ) + has_child_under_1 = baseline.map_result(child_under_1, "person", "household") > 0 scotland_uc_child_under_1 = ( (region.values == "SCOTLAND") & (uc > 0) & has_child_under_1 diff --git a/policyengine_uk_data/tests/test_target_registry.py b/policyengine_uk_data/tests/test_target_registry.py index c6f78bdd..bc0854c5 100644 --- a/policyengine_uk_data/tests/test_target_registry.py +++ b/policyengine_uk_data/tests/test_target_registry.py @@ -52,9 +52,7 @@ def test_hmrc_spi_targets_exist(): targets = get_all_targets(year=2025) spi_targets = [t for t in targets if t.source == "hmrc_spi"] # 13 bands × 6 income types × 2 (count + amount) = 156 per year - assert len(spi_targets) >= 100, ( - f"Expected 100+ SPI targets, got {len(spi_targets)}" - ) + assert len(spi_targets) >= 100, f"Expected 100+ SPI targets, got {len(spi_targets)}" def test_dwp_pip_targets(): @@ -76,9 +74,7 @@ def test_voa_council_tax_targets(): def test_core_target_count(): """Total target count should be substantial.""" targets = get_all_targets(year=2025) - assert len(targets) >= 200, ( - f"Expected 200+ targets for 2025, got {len(targets)}" - ) + assert len(targets) >= 200, f"Expected 200+ targets for 2025, got {len(targets)}" def test_two_child_limit_targets(): diff --git a/policyengine_uk_data/tests/test_uc_by_children.py b/policyengine_uk_data/tests/test_uc_by_children.py index 649e0fb3..8de5d558 100644 --- a/policyengine_uk_data/tests/test_uc_by_children.py +++ b/policyengine_uk_data/tests/test_uc_by_children.py @@ -35,9 +35,7 @@ def test_uc_households_by_children(baseline, bucket, target): uc = baseline.calculate("universal_credit", period=2025).values on_uc = baseline.map_result(uc > 0, "benunit", "household") > 0 - is_child = baseline.calculate( - "is_child", map_to="person", period=2025 - ).values + is_child = baseline.calculate("is_child", map_to="person", period=2025).values children_per_hh = baseline.map_result(is_child, "person", "household") if bucket == "0_children": @@ -49,9 +47,7 @@ def test_uc_households_by_children(baseline, bucket, target): else: # 3plus_children match = on_uc & (children_per_hh >= 3) - household_weight = baseline.calculate( - "household_weight", period=2025 - ).values + household_weight = baseline.calculate("household_weight", period=2025).values actual = (household_weight * match).sum() assert abs(actual / target - 1) < TOLERANCE, ( diff --git a/policyengine_uk_data/tests/test_vehicle_ownership.py b/policyengine_uk_data/tests/test_vehicle_ownership.py index d2fbc73b..6e9f6923 100644 --- a/policyengine_uk_data/tests/test_vehicle_ownership.py +++ b/policyengine_uk_data/tests/test_vehicle_ownership.py @@ -9,9 +9,7 @@ def test_vehicle_ownership(baseline): """Test that vehicle ownership distribution matches NTS 2024 targets.""" - num_vehicles = baseline.calculate( - "num_vehicles", map_to="household", period=2025 - ) + num_vehicles = baseline.calculate("num_vehicles", map_to="household", period=2025) weights = baseline.calculate("household_weight", period=2025) total_hh = weights.sum() @@ -28,9 +26,7 @@ def test_vehicle_ownership(baseline): f"Expected {NTS_ONE_VEHICLE_RATE:.0%} households with one vehicle, " f"got {one_vehicle_rate:.0%}" ) - assert ( - abs(two_plus_rate - NTS_TWO_PLUS_VEHICLE_RATE) < ABSOLUTE_TOLERANCE - ), ( + assert abs(two_plus_rate - NTS_TWO_PLUS_VEHICLE_RATE) < ABSOLUTE_TOLERANCE, ( f"Expected {NTS_TWO_PLUS_VEHICLE_RATE:.0%} households with two+ vehicles, " f"got {two_plus_rate:.0%}" ) diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py index 6e31402c..c9fc5a92 100644 --- a/policyengine_uk_data/utils/calibrate.py +++ b/policyengine_uk_data/utils/calibrate.py @@ -53,13 +53,10 @@ def calibrate_local_areas( areas_per_household = r.sum( axis=0 ) # number of areas each household can contribute to - areas_per_household = np.maximum( - areas_per_household, 1 - ) # avoid division by zero + areas_per_household = np.maximum(areas_per_household, 1) # avoid division by zero original_weights = np.log( dataset.household.household_weight.values / areas_per_household - + np.random.random(len(dataset.household.household_weight.values)) - * 0.01 + + np.random.random(len(dataset.household.household_weight.values)) * 0.01 ) weights = torch.tensor( np.ones((area_count, len(original_weights))) * original_weights, @@ -85,9 +82,7 @@ def calibrate_local_areas( matrix.values if hasattr(matrix, "values") else matrix, dtype=torch.float32, ) - y = torch.tensor( - y.values if hasattr(y, "values") else y, dtype=torch.float32 - ) + y = torch.tensor(y.values if hasattr(y, "values") else y, dtype=torch.float32) matrix_national = torch.tensor( m_national.values if hasattr(m_national, "values") else m_national, dtype=torch.float32, @@ -135,9 +130,7 @@ def pct_close(w, t=0.1, local=True, national=True): if local: pred_local = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1) - e_local = torch.sum( - torch.abs((pred_local / (1 + y) - 1)) < t - ).item() + e_local = torch.sum(torch.abs((pred_local / (1 + y) - 1)) < t).item() c_local = pred_local.shape[0] * pred_local.shape[1] numerator += e_local denominator += c_local @@ -183,9 +176,7 @@ def dropout_weights(weights, p): optimizer.step() local_close = pct_close(weights_, local=True, national=False) - national_close = pct_close( - weights_, local=False, national=True - ) + national_close = pct_close(weights_, local=False, national=True) if dropout_targets: validation_loss = loss(weights_, validation=True) @@ -213,9 +204,7 @@ def dropout_weights(weights, p): excluded_training_targets, ) performance_step["epoch"] = epoch - performance_step["loss"] = ( - performance_step.rel_abs_error**2 - ) + performance_step["loss"] = performance_step.rel_abs_error**2 performance_step["target_name"] = [ f"{area}/{metric}" for area, metric in zip( @@ -231,9 +220,7 @@ def dropout_weights(weights, p): with h5py.File(STORAGE_FOLDER / weight_file, "w") as f: f.create_dataset(dataset_key, data=final_weights) - dataset.household.household_weight = final_weights.sum( - axis=0 - ) + dataset.household.household_weight = final_weights.sum(axis=0) else: for epoch in range(epochs): optimizer.zero_grad() diff --git a/policyengine_uk_data/utils/data_upload.py b/policyengine_uk_data/utils/data_upload.py index 89445ad9..342a02f7 100644 --- a/policyengine_uk_data/utils/data_upload.py +++ b/policyengine_uk_data/utils/data_upload.py @@ -89,18 +89,14 @@ def upload_files_to_gcs( Upload files to Google Cloud Storage and set metadata with the version. """ credentials, project_id = google.auth.default() - storage_client = storage.Client( - credentials=credentials, project=project_id - ) + storage_client = storage.Client(credentials=credentials, project=project_id) bucket = storage_client.bucket(gcs_bucket_name) for file_path in files: file_path = Path(file_path) blob = bucket.blob(file_path.name) blob.upload_from_filename(file_path) - logging.info( - f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}." - ) + logging.info(f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}.") # Set metadata blob.metadata = {"version": version} diff --git a/policyengine_uk_data/utils/datasets.py b/policyengine_uk_data/utils/datasets.py index 82c9e273..2f4cd1b4 100644 --- a/policyengine_uk_data/utils/datasets.py +++ b/policyengine_uk_data/utils/datasets.py @@ -10,9 +10,7 @@ warnings.filterwarnings("ignore") -def sum_to_entity( - values: pd.Series, foreign_key: pd.Series, primary_key -) -> np.ndarray: +def sum_to_entity(values: pd.Series, foreign_key: pd.Series, primary_key) -> np.ndarray: """Sums values by joining foreign and primary keys. Args: @@ -23,14 +21,10 @@ def sum_to_entity( Returns: pd.Series: A value for each person. """ - return ( - values.groupby(foreign_key).sum().reindex(primary_key).fillna(0).values - ) + return values.groupby(foreign_key).sum().reindex(primary_key).fillna(0).values -def categorical( - values: pd.Series, default: int, left: list, right: list -) -> pd.Series: +def categorical(values: pd.Series, default: int, left: list, right: list) -> pd.Series: """Maps a categorical input to an output using given left and right arrays. Args: @@ -45,9 +39,7 @@ def categorical( return values.fillna(default).map({i: j for i, j in zip(left, right)}) -def sum_from_positive_fields( - table: pd.DataFrame, fields: List[str] -) -> np.array: +def sum_from_positive_fields(table: pd.DataFrame, fields: List[str]) -> np.array: """Sum from fields in table, ignoring negative values. Args: @@ -57,9 +49,7 @@ def sum_from_positive_fields( Returns: np.array """ - return np.where( - table[fields].sum(axis=1) > 0, table[fields].sum(axis=1), 0 - ) + return np.where(table[fields].sum(axis=1) > 0, table[fields].sum(axis=1), 0) def sum_positive_variables(variables: List[str]) -> np.array: diff --git a/policyengine_uk_data/utils/huggingface.py b/policyengine_uk_data/utils/huggingface.py index 1ed8de25..7fc5d9e9 100644 --- a/policyengine_uk_data/utils/huggingface.py +++ b/policyengine_uk_data/utils/huggingface.py @@ -2,9 +2,7 @@ import os -def download( - repo: str, repo_filename: str, local_folder: str, version: str = None -): +def download(repo: str, repo_filename: str, local_folder: str, version: str = None): token = os.environ.get( "HUGGING_FACE_TOKEN", ) diff --git a/policyengine_uk_data/utils/incomes_projection.py b/policyengine_uk_data/utils/incomes_projection.py index 1f62b1cb..302dea9d 100644 --- a/policyengine_uk_data/utils/incomes_projection.py +++ b/policyengine_uk_data/utils/incomes_projection.py @@ -20,9 +20,9 @@ MAX_YEAR = 2029 for time_period in range(MIN_YEAR, MAX_YEAR + 1): - time_period_df = statistics[ - ["name", "unit", "reference", str(time_period)] - ].rename(columns={str(time_period): "value"}) + time_period_df = statistics[["name", "unit", "reference", str(time_period)]].rename( + columns={str(time_period): "value"} + ) time_period_df["time_period"] = time_period dfs.append(time_period_df) @@ -49,9 +49,7 @@ def create_target_matrix( sim = Microsimulation(dataset=dataset, reform=reform) sim.default_calculation_period = time_period - household_from_person = lambda values: sim.map_result( - values, "person", "household" - ) + household_from_person = lambda values: sim.map_result(values, "person", "household") df = pd.DataFrame() @@ -99,9 +97,7 @@ def create_target_matrix( target_values.append(row[variable + "_amount"]) target_names.append(name_amount) name_count = ( - "hmrc/" - + variable - + f"_count_income_band_{i}_{lower:_}_to_{upper:_}" + "hmrc/" + variable + f"_count_income_band_{i}_{lower:_}_to_{upper:_}" ) df[name_count] = household_from_person( (income_df[variable] > 0) * in_income_band @@ -184,16 +180,10 @@ def create_income_projections(): for variable in INCOME_VARIABLES: count_values = [] amount_values = [] - for i, (lower, upper) in enumerate( - zip(lower_bounds, upper_bounds) - ): - in_band = sim.calculate("total_income", year).between( - lower, upper - ) + for i, (lower, upper) in enumerate(zip(lower_bounds, upper_bounds)): + in_band = sim.calculate("total_income", year).between(lower, upper) value = sim.calculate(variable, year) - count_in_band_with_nonzero_value = round( - ((value > 0) * in_band).sum() - ) + count_in_band_with_nonzero_value = round(((value > 0) * in_band).sum()) amount_in_band = round(value[in_band].sum()) count_values.append(count_in_band_with_nonzero_value) amount_values.append(amount_in_band) @@ -202,9 +192,7 @@ def create_income_projections(): year_df["year"] = year projection_df = pd.concat([projection_df, year_df]) - projection_df.to_csv( - STORAGE_FOLDER / "incomes_projection.csv", index=False - ) + projection_df.to_csv(STORAGE_FOLDER / "incomes_projection.csv", index=False) if __name__ == "__main__": diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py index 18d30bed..27eb919f 100644 --- a/policyengine_uk_data/utils/loss.py +++ b/policyengine_uk_data/utils/loss.py @@ -11,9 +11,7 @@ ) -def get_loss_results( - dataset, time_period, reform=None, household_weights=None -): +def get_loss_results(dataset, time_period, reform=None, household_weights=None): """Calculate loss metrics comparing model outputs to targets. Args: diff --git a/policyengine_uk_data/utils/progress.py b/policyengine_uk_data/utils/progress.py index ccb5efcf..e6a70f89 100644 --- a/policyengine_uk_data/utils/progress.py +++ b/policyengine_uk_data/utils/progress.py @@ -285,9 +285,7 @@ def update_calibration( description=f"[yellow]●[/yellow] Calibration epoch {iteration}/{iterations} • calculating loss", ) else: - loss_text = ( - f" • loss: {loss_value:.6f}" if loss_value else "" - ) + loss_text = f" • loss: {loss_value:.6f}" if loss_value else "" nested_progress.update_task( calibration_task, description=f"[blue]●[/blue] Calibration epoch {iteration}/{iterations}{loss_text}", @@ -317,9 +315,7 @@ def update_calibration( description=f"Calibration iteration {iteration}/{iterations} • [yellow]calculating loss[/yellow]", ) else: - loss_text = ( - f" • loss: {loss_value:.6f}" if loss_value else "" - ) + loss_text = f" • loss: {loss_value:.6f}" if loss_value else "" progress.update_task( main_task, description=f"Calibration iteration {iteration}/{iterations}{loss_text}", @@ -329,9 +325,7 @@ def update_calibration( yield update_calibration @contextmanager - def track_file_processing( - self, files: List[str], operation: str = "processing" - ): + def track_file_processing(self, files: List[str], operation: str = "processing"): """Track file processing operations. Args: diff --git a/policyengine_uk_data/utils/qrf.py b/policyengine_uk_data/utils/qrf.py index d99e5a25..05c0ba66 100644 --- a/policyengine_uk_data/utils/qrf.py +++ b/policyengine_uk_data/utils/qrf.py @@ -74,6 +74,4 @@ def save(self, file_path: str): file_path: Path where model should be saved. """ with open(file_path, "wb") as f: - pickle.dump( - {"model": self.model, "input_columns": self.input_columns}, f - ) + pickle.dump({"model": self.model, "input_columns": self.input_columns}, f) diff --git a/policyengine_uk_data/utils/spi.py b/policyengine_uk_data/utils/spi.py index fae6b451..5e94bcff 100644 --- a/policyengine_uk_data/utils/spi.py +++ b/policyengine_uk_data/utils/spi.py @@ -70,12 +70,8 @@ def parse_value(value): import numpy as np -income["total_income_lower_bound"] = list(income["income_range"][:-1]) + [ - 12_570 -] -income["total_income_upper_bound"] = ( - list(income["income_range"][1:-1]) + [np.inf] * 2 -) +income["total_income_lower_bound"] = list(income["income_range"][:-1]) + [12_570] +income["total_income_upper_bound"] = list(income["income_range"][1:-1]) + [np.inf] * 2 # Order the income bound columns first income = income[ [ diff --git a/policyengine_uk_data/utils/stack.py b/policyengine_uk_data/utils/stack.py index 2ded165c..2fe9df82 100644 --- a/policyengine_uk_data/utils/stack.py +++ b/policyengine_uk_data/utils/stack.py @@ -17,8 +17,6 @@ def stack_datasets( return UKSingleYearDataset( person=pd.concat([data_1.person, data_2.person], ignore_index=True), benunit=pd.concat([data_1.benunit, data_2.benunit], ignore_index=True), - household=pd.concat( - [data_1.household, data_2.household], ignore_index=True - ), + household=pd.concat([data_1.household, data_2.household], ignore_index=True), fiscal_year=data_1.time_period, ) diff --git a/policyengine_uk_data/utils/uc_data.py b/policyengine_uk_data/utils/uc_data.py index 1e2282cb..7bacebc5 100644 --- a/policyengine_uk_data/utils/uc_data.py +++ b/policyengine_uk_data/utils/uc_data.py @@ -63,9 +63,7 @@ def parse_band(band): "Couple, no children": "COUPLE_NO_CHILDREN", "Couple, with children": "COUPLE_WITH_CHILDREN", } - result_df["family_type"] = result_df["family_type"].map( - family_type_mapping - ) + result_df["family_type"] = result_df["family_type"].map(family_type_mapping) # Reorder columns and drop monthly band result_df = result_df[ @@ -111,9 +109,7 @@ def _parse_uc_pc_households(): # Parse NI data ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods" - df_ni = pd.read_excel( - ni_file_path, sheet_name="5b", engine="odf", header=None - ) + df_ni = pd.read_excel(ni_file_path, sheet_name="5b", engine="odf", header=None) # Get constituency names from row 2, columns 1-18 ni_constituencies = df_ni.iloc[2, 1:19].tolist() @@ -179,9 +175,7 @@ def _parse_uc_la_households(): # Parse NI data ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods" - df_ni = pd.read_excel( - ni_file_path, sheet_name="5c", engine="odf", header=None - ) + df_ni = pd.read_excel(ni_file_path, sheet_name="5c", engine="odf", header=None) # Get LGD names from row 2, columns 1-11 ni_lgd_names = df_ni.iloc[2, 1:12].tolist() diff --git a/pyproject.toml b/pyproject.toml index 73200a77..9e4f1030 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,9 +70,6 @@ filterwarnings = [ "ignore::PendingDeprecationWarning", ] -[tool.ruff] -line-length = 79 - [tool.towncrier] package = "policyengine_uk_data" directory = "changelog.d"