diff --git a/.github/workflows/deploy_containerapps.yml b/.github/workflows/deploy_containerapps.yml new file mode 100644 index 0000000..7c17514 --- /dev/null +++ b/.github/workflows/deploy_containerapps.yml @@ -0,0 +1,101 @@ +name: Deploy to Azure Container Apps (shapes) + +on: + push: + branches: + - shapes + workflow_dispatch: + +permissions: + contents: read + id-token: write + +concurrency: + group: deploy-aca-shapes + cancel-in-progress: true + +env: + AZURE_RESOURCE_GROUP: ${{ secrets.AZURE_RESOURCE_GROUP }} + AZURE_CONTAINERAPP_NAME: rcpch-census-platform-v2 + ACR_NAME: ${{ secrets.ACR_NAME }} + ACR_LOGIN_SERVER: ${{ secrets.ACR_LOGIN_SERVER }} + WEB_IMAGE_NAME: rcpch-census-web + DB_DUMP_VERSION: v1.1.0 + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Azure login (OIDC) + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: Login to ACR (OIDC) + shell: bash + run: | + if [ -z "${ACR_NAME}" ]; then + echo "Missing secrets.ACR_NAME (ACR resource name)." >&2 + exit 1 + fi + az acr login --name "${ACR_NAME}" + + - name: Build and push web image + shell: bash + run: | + docker build -f Dockerfile.postgis -t "${ACR_LOGIN_SERVER}/${WEB_IMAGE_NAME}:${GITHUB_SHA}" . + docker push "${ACR_LOGIN_SERVER}/${WEB_IMAGE_NAME}:${GITHUB_SHA}" + + - name: Render Container App YAML + shell: bash + env: + ACR_LOGIN_SERVER: ${{ secrets.ACR_LOGIN_SERVER }} + DJANGO_ALLOWED_HOSTS: ${{ secrets.DJANGO_ALLOWED_HOSTS }} + DJANGO_CSRF_TRUSTED_ORIGINS: ${{ secrets.DJANGO_CSRF_TRUSTED_ORIGINS }} + DB_DUMP_VERSION: ${{ env.DB_DUMP_VERSION }} + KEY_VAULT_NAME: ${{ secrets.KEY_VAULT_NAME }} + ENV_STORAGE_NAME: ${{ secrets.ENV_STORAGE_NAME }} + run: | + python - <<'PY' + from pathlib import Path + import os + + template_path = Path('infra/azure/containerapp.template.yml') + out_path = Path('infra/azure/containerapp.rendered.yml') + + content = template_path.read_text(encoding='utf-8') + replacements = { + '__ACR_LOGIN_SERVER__': os.environ['ACR_LOGIN_SERVER'], + '__IMAGE_TAG__': os.environ['GITHUB_SHA'], + '__DJANGO_ALLOWED_HOSTS__': os.environ['DJANGO_ALLOWED_HOSTS'], + '__DJANGO_CSRF_TRUSTED_ORIGINS__': os.environ['DJANGO_CSRF_TRUSTED_ORIGINS'], + '__DB_DUMP_VERSION__': os.environ['DB_DUMP_VERSION'], + '__KEY_VAULT_NAME__': os.environ['KEY_VAULT_NAME'], + '__ENV_STORAGE_NAME__': os.environ['ENV_STORAGE_NAME'], + } + + for k, v in replacements.items(): + content = content.replace(k, v) + + out_path.write_text(content, encoding='utf-8') + print(f"Wrote {out_path}") + PY + + - name: Deploy Container App revision + shell: bash + run: | + az containerapp update \ + -n "${AZURE_CONTAINERAPP_NAME}" \ + -g "${AZURE_RESOURCE_GROUP}" \ + --yaml infra/azure/containerapp.rendered.yml + + - name: Show FQDN + shell: bash + run: | + FQDN=$(az containerapp show -n "${AZURE_CONTAINERAPP_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query properties.configuration.ingress.fqdn -o tsv) + echo "Application URL: https://${FQDN}" diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 0000000..e190278 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,41 @@ +name: Deploy site/ to GitHub Pages + +on: + push: + branches: + - live + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: true + +jobs: + deploy: + runs-on: ubuntu-latest + # Safety: only deploy Pages from the default branch. + if: github.ref == 'refs/heads/live' + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Configure Pages + uses: actions/configure-pages@v5 + + - name: Upload Pages artifact (site/) + uses: actions/upload-pages-artifact@v3 + with: + path: site + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2077cf9..f871818 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,22 +1,20 @@ -# Runs tests on Pull Requests -# Requires seeding the database with IMD data before running tests - name: Run Tests on: pull_request: - branches: - - live - - development + branches: [live, development] workflow_dispatch: jobs: test: runs-on: ubuntu-latest + # This tells GitHub to run all 'steps' inside this container + container: + image: python:3.12-bookworm services: - postgres: - image: postgres:latest + postgis: + image: postgis/postgis:15-3.4 env: POSTGRES_USER: rcpchCensususer POSTGRES_PASSWORD: password @@ -29,56 +27,48 @@ jobs: --health-timeout 5s --health-retries 5 + # Define global env for all steps + env: + RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER: rcpchCensususer + RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD: password + RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME: rcpchCensusdb + RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST: postgis # Connect to service name + RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT: 5432 + RCPCH_CENSUS_ENGINE_SECRET_KEY: test-secret-key + DEBUG: "True" + DJANGO_ALLOWED_HOSTS: localhost + POSTCODES_IO_API_URL: https://api.postcodes.io + steps: - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - cache: "pip" + - name: Install system packages + run: | + apt-get update && apt-get install -y --no-install-recommends \ + gdal-bin libgdal-dev libgeos-dev binutils libproj-dev postgresql-client - name: Install dependencies run: | python -m pip install --upgrade pip + # Force Python GDAL bindings to match the system version + export GDAL_VERSION=$(gdal-config --version) + pip install "GDAL==$GDAL_VERSION.*" + pip install -r requirements.txt pip install -r requirements/development-requirements.txt + - name: Enable PostGIS extension + run: | + export PGPASSWORD=password + for i in {1..15}; do + psql -h postgis -U rcpchCensususer -d rcpchCensusdb -c "CREATE EXTENSION IF NOT EXISTS postgis;" && break || sleep 2 + done + - name: Run migrations - env: - RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER: rcpchCensususer - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD: password - RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME: rcpchCensusdb - RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST: localhost - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT: 5432 - RCPCH_CENSUS_ENGINE_SECRET_KEY: test-secret-key - DEBUG: "True" - DJANGO_ALLOWED_HOSTS: localhost - POSTCODES_IO_API_URL: https://api.postcodes.io run: python manage.py migrate - name: Seed database - env: - RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER: rcpchCensususer - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD: password - RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME: rcpchCensusdb - RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST: localhost - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT: 5432 - RCPCH_CENSUS_ENGINE_SECRET_KEY: test-secret-key - DEBUG: "True" - DJANGO_ALLOWED_HOSTS: localhost - POSTCODES_IO_API_URL: https://api.postcodes.io run: python manage.py seed --mode='ci_test' - name: Run tests - env: - RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER: rcpchCensususer - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD: password - RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME: rcpchCensusdb - RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST: localhost - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT: 5432 - RCPCH_CENSUS_ENGINE_SECRET_KEY: test-secret-key - DEBUG: "True" - DJANGO_ALLOWED_HOSTS: localhost - POSTCODES_IO_API_URL: https://api.postcodes.io run: python -m pytest tests/ -v --tb=short diff --git a/.gitignore b/.gitignore index 122f8b0..78cbc9f 100644 --- a/.gitignore +++ b/.gitignore @@ -120,8 +120,15 @@ staticfiles/ .vscode/ .vscode/* +# Docker and docker compose +.pip_cache +.data # Docker and docker compose .pip_cache .data -build_info.json \ No newline at end of file +build_info.json +# Ignore large generated/staging geojson and shapefiles +deprivation_scores/geojson/ +deprivation_scores/shape_files/ +postgis_dump_files/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 813ee41..0000000 --- a/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# Base Docker image Official Python 3.11 -FROM python:3.12 - -# Set 'build-time' environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 - -# Add Development requirements -COPY requirements/development-requirements.txt /app/requirements/development-requirements.txt -COPY requirements/common-requirements.txt /app/requirements/common-requirements.txt - -# Set working directory for requirements installation -WORKDIR /app/requirements/ - -# Run installation of requirements -RUN pip install --upgrade pip -RUN pip install -r /app/requirements/development-requirements.txt - -# Set working directory back to main app -WORKDIR /app/ - -# Copy application code into image -# (Excludes any files/dirs matched by patterns in .dockerignore) -COPY . /app/ - -# Use port 8001 in development (may be overridden by docker-compose file) -EXPOSE 8001 \ No newline at end of file diff --git a/Dockerfile.postgis b/Dockerfile.postgis new file mode 100644 index 0000000..dff4eb5 --- /dev/null +++ b/Dockerfile.postgis @@ -0,0 +1,36 @@ +FROM python:3.12-bookworm + +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + gdal-bin \ + libgdal-dev \ + libgeos-dev \ + libproj-dev \ + libpq-dev \ + postgresql-client \ + && rm -rf /var/lib/apt/lists/* + +# Ensure headers are found for C extensions +ENV CPLUS_INCLUDE_PATH=/usr/include/gdal +ENV C_INCLUDE_PATH=/usr/include/gdal + +WORKDIR /app + +# Copy requirements first for better layer caching +COPY requirements/ /app/requirements/ + +RUN pip install --no-cache-dir --upgrade pip && \ + export GDAL_VERSION=$(gdal-config --version) && \ + pip install --no-cache-dir "GDAL==$GDAL_VERSION.*" && \ + pip install --no-cache-dir -r requirements/common-requirements.txt && \ + pip install --no-cache-dir -r requirements/development-requirements.txt + +# Copy the rest of the project +COPY . /app/ + +EXPOSE 8001 \ No newline at end of file diff --git a/README.md b/README.md index de4d526..4432257 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This project is a python 3.11 / Django Rest Framework project providing UK censu ## Why is it needed? -The [Office of National Statistics](https://www.ons.gov.uk) publishes all the Census data exhaustively - this project is not intended to replace it. There is a need though for RCPCH to be able to describe the lived environment and experience of children and young people in a meaningful way, to inform research, audit and clinical practice. The project will curate social and environmental data where they have impact on children's health or on paediatrics, available to clinicians and researchers. It is a work in progress. The first application within this project is an API to address deprivation, by reporting indices of multiple deprivation from across the UK against a postcode. It is consumed by software that RCPCH provide. +The [Office of National Statistics](https://www.ons.gov.uk) publishes all the Census data exhaustively - this project is not intended to replace it. There is a need though for RCPCH to be able to describe the lived environment and experience of children and young people in a meaningful way, to inform research, audit and clinical practice. The project will curate social and environmental data where they have impact on children's health or on paediatrics, available to clinicians and researchers. It is a work in progress. The first application within this project is an API to address deprivation, by reporting indices of multiple deprivation from across the UK against a postcode. It is consumed by software that RCPCH provide. It supports the latest deprivation score publications including 2025. ### UK Areas @@ -114,8 +114,9 @@ Written in python 3.11 and django-rest-framework. We recommend using `pyenv` or 5. ```python manage.py makemigrations``` 6. ```python manage.py migrate``` 7. ```python manage.py seed --mode='__all__'``` +8. ```python manage.py seed --mode='import_bfc_boundaries'``` -This latter step will take several minutes as it populates the database with all the census and deprivation data. If successful, it should yield the following message: +This latter step will take more than 30 minutes as it populates the database with all the census and deprivation data, and also the geographical boundary shapes and map tiles. If successful, it should yield the following message: > ![alt rcpch-census-db](static/images/census_db_screenshot.png?raw=true) The final step is to run the server: @@ -141,14 +142,47 @@ To run the tests without: ### Docker Compose development install - -1. clone the repo -2. ```cd rcpch_census_platform``` -3. ```s/up``` -4. grab the token from the console within the docker > ![alt drf_token](static/images/census_db_token.png) -5. Add the token to your header when making an api call (```-H 'Authorization: *******'``` in curl statement for example). If you are using Postman, use the OAUTH2 Authorization header, and the key 'Token'. +This repository includes a Docker Compose development stack with: + +- `web`: Django/DRF API (runs on port `8001`) +- `db`: PostGIS (runs on port `5432`) +- `pg_tileserv`: vector tile server backed by PostGIS (runs on port `7800`) + +Prerequisites: + +- Docker Desktop (or Docker Engine + Compose) + +Steps: + +1. Clone the repo +2. From the repository root, start the dev stack: + - `./s/dev` + - (equivalent: `docker compose -f docker-compose-postgis.yml up --build`) +3. The `web` container will wait for the database, run `collectstatic`, run `migrate`, and then start Django. +4. Seed the database (this can take a long time): + - `docker compose -f docker-compose-postgis.yml exec web python manage.py seed --mode='__all__'` + - Optional: `docker compose -f docker-compose-postgis.yml exec web python manage.py seed --mode='import_bfc_boundaries'` + +Useful URLs: + +- API: `http://localhost:8001/rcpch-census-platform/api/v1/` +- Tileserver (pg_tileserv): `http://localhost:7800/` + +Note: the nginx reverse-proxy container is used in the Azure Container Apps deployment to route `/tiles/*` and the API under a single public ingress. For local development you can usually hit Django (`:8001`) and pg_tileserv (`:7800`) directly. + +### Demo map site (GitHub Pages + local dev) + +The MapLibre demo lives in the `site/` folder and is deployed to GitHub Pages via a GitHub Actions workflow. + +For local development, you can preview it in VS Code: + +1. Open `site/index.html` +2. Right click the file in the Explorer and choose **Open with Live Server** (requires the Live Server extension) + +When the demo is served from `localhost` / `127.0.0.1`, it will default to using local tiles at `http://localhost:7800`. +To point it at a deployed tiles endpoint, pass a query parameter: -If you navigate to the base url```http://localhost:8001/rcpch-census-platform/api/v1/``` and login, it should be possible then to view the data. Alternatively, add the token to Postman. +- `?tilesBase=https:///tiles` ### Other Command Line functions @@ -158,18 +192,25 @@ These are: | Model | Number of Rows | Notes | |----|----|----| -| LSOA | 34753 | LSOA should have 34753 (32844 in England, 1909 in wales) rows. | -| DataZone | 6976 | DataZone should have 6976 rows. | -| LocalAuthority | 371 | LocalAuthority should have 371 (317 in England, 22 in Wales, 32 is Scotland) rows (the 11 Northern Irish Local Authorities are not included here). | -| PopulationDensity | 32844 | PopulationDensity should have 32058 rows. | -| GreenSpace | 371 | GreenSpace should have 371 rows. | -| SOA | 890 | SOA should have 890 rows. | -| WelshIndexMultipleDeprivation | 1909 | WelshIndexMultipleDeprivation should have 1909 rows. | -| NorthernIrelandIndexMultipleDeprivation | 890 | NorthernIrelandIndexMultipleDeprivation should have 890 rows. | -| ScottishIndexMultipleDeprivation | 6976 | ScottishIndexMultipleDeprivation should have 6976 rows. | +| LSOA (2011) | 34,753 | 2011 LSOA rows (32,844 in England, 1,909 in Wales). | +| LSOA (2021) | 35,672 | 2021 LSOA rows. | +| DataZone | 6,976 | Scotland Data Zones. | +| LocalAuthority (2011) | 32 | Scotland local authorities (2011). | +| LocalAuthority (2019) | 339 | England + Wales local authorities (2019): 317 + 22. | +| LocalAuthority (2024, with geom) | 318 | 2024 local authorities with geometries present. | +| PopulationDensity | 32,844 | England population densities. | +| GreenSpace | 371 | England/Wales/Scotland green space rows. | +| SOA | 890 | Northern Ireland SOAs. | +| WelshIndexMultipleDeprivation | 1,909 | Wales WIMD (2019) rows. | +| NorthernIrelandIndexMultipleDeprivation | 890 | Northern Ireland NIMDM (2017) rows. | +| ScottishIndexMultipleDeprivation | 6,976 | Scotland SIMD (2020) rows. | `python manage.py seed --mode test_table_totals` +To validate the generated UK master views have geometries for all nations (and sanity-check the coordinate system), run: + +`python manage.py seed --mode test_geometries` + ## Creating openapi.yml and openapi.json files @@ -187,21 +228,10 @@ YAML docker compose -f docker-compose.dev-init.yml exec web python manage.py spectacular --file openapi.json ``` - +The full list of endpoints can be viewed in the openAPI spec above, but the key endpoints that are important are: -There are 10 routes that accept GET requests, all of which return lists that can be filtered, with the exception of ```/indices_of_multiple_deprivation/``` which accepts only a postcode. - -1. ```/local_authority_districts/```: params include ```local_authority_district_code```, ```local_authority_district_name```, ```year``` or if none is passed, a list of all local authorities in the UK is returned -2. ```/england_wales_lower_layer_super_output_areas/```: params include ```lsoa_code```, ```lsoa_name```, ```year```. If none is passed, a list of all LSOAs is returned. -3. ```/northern_ireland_small_output_areas/```: params include ```soa_code```, ```soa_name```, ```year```. If none is passed, a list of all SOAs is returned. -4. ```/scotland_datazones/```: params include ```data_zone_code```,```data_zone_name```,```year```,```local_authority_code```. If none is passed, a list of all Data Zones is returned. -5. ```/greenspace/```: returns data on green space access by local authority in England, Scotland and Wales -6. ```/english_indices_of_multiple_deprivation/```: params include ```lsoa_code_name``` or ```lsoa_code```, ```local_authority_code``` as well any of the return object fields. It returns a list of all English indices of deprivation -7. ```/welsh_indices_of_multiple_deprivation/```: params include ```lsoa_code```, ```local_authority_code``` as well any of the return object fields. It returns a list of all Welsh indices of deprivation -8. ```/scottish_indices_of_multiple_deprivation/```: params include ```data_zone_code``` and ```data_zone_name```, ```local_authority_code``` as well as any of the return object fields. It returns a list of all Scottish indices of deprivation. -9. ```/northern_ireland_indices_of_multiple_deprivation/```: params include ```soa_code``` and ```soa_code_name``` as well as any of the return object fields. It returns a list of all Scottish indices of deprivation -10. ```/indices_of_multiple_deprivation/```: takes a UK postcode (mandatory) and returns deprivation score and quantiles for that LSOA -11. ```/index_of_multiple_deprivation_quantile/```: takes a UK postcode (mandatory) and a requested quantile (mandatory) and returns a deprivation quantile. +1. ```/indices_of_multiple_deprivation/```: takes a UK postcode (mandatory) and returns deprivation score and quantiles for that LSOA. It optionally accepts a year for the request IMD dataset, defaulting to 2019 for England and Wales, 2020 for Scotland and 2017 for Northern Ireland +2. ```/index_of_multiple_deprivation_quantile/```: takes a UK postcode (mandatory) and a requested quantile (mandatory) and returns a deprivation quantile. Also accepts a year as above. example: SW1A 1AA (Buckingham Palace): @@ -406,58 +436,7 @@ returns: } ``` -There is an additional endpoint: -```http://localhost:8000/rcpch-census-platform/api/v1/boundaries?postcode=sw1a1aa``` - -This will return information about a given postcode: - -```json -HTTP 200 OK -Allow: GET, HEAD, OPTIONS -Content-Type: application/json -Vary: Accept - -{ - "postcode": "SW1A 1AA", - "quality": 1, - "eastings": 529090, - "northings": 179645, - "country": "England", - "nhs_ha": "London", - "longitude": -0.141588, - "latitude": 51.501009, - "european_electoral_region": "London", - "primary_care_trust": "Westminster", - "region": "London", - "lsoa": "Westminster 018C", - "msoa": "Westminster 018", - "incode": "1AA", - "outcode": "SW1A", - "parliamentary_constituency": "Cities of London and Westminster", - "admin_district": "Westminster", - "parish": "Westminster, unparished area", - "admin_county": null, - "admin_ward": "St. James's", - "ced": null, - "ccg": "NHS North West London", - "nuts": "Westminster", - "codes": { - "admin_district": "E09000033", - "admin_county": "E99999999", - "admin_ward": "E05013806", - "parish": "E43000236", - "parliamentary_constituency": "E14000639", - "ccg": "E38000256", - "ccg_id": "W2U3Z", - "ced": "E99999999", - "nuts": "TLI32", - "lsoa": "E01004736", - "msoa": "E02000977", - "lau2": "E09000033" - } -} -``` - -This information comes directly from the remarkable [postcodes.io](https://postcodes.io) which offers this as a free service. This is a dependency of the RCPCH Census Platform API, since it is used to get LSOAs from a postcode. This process is complicated as boundaries frequently change. +**ACKNOWLEDGEMENT** +The postcode look up is powered by an RCPCH hosted instance of [postcodes.io](api.postcodes.io) [![DOI](https://zenodo.org/badge/568991339.svg)](https://zenodo.org/badge/latestdoi/568991339) diff --git a/deprivation_scores/management/commands/seed.py b/deprivation_scores/management/commands/seed.py index ca0cc6f..798accb 100644 --- a/deprivation_scores/management/commands/seed.py +++ b/deprivation_scores/management/commands/seed.py @@ -1,9 +1,19 @@ +import gc +import csv from enum import Enum +import io +import json +import os from math import floor +import requests +from shapely.geometry import MultiPolygon import sys -import csv from decimal import Decimal +import geopandas as gpd +import pandas as pd +from sqlalchemy import create_engine from django.core.management.base import BaseCommand +from django.db import connection from django.conf import settings from ...models import ( LSOA, @@ -18,6 +28,7 @@ NorthernIrelandIndexMultipleDeprivation, PopulationDensity, ) +from django.db import connection class QuantileType(Enum): @@ -80,10 +91,804 @@ class QuantileType(Enum): class Command(BaseCommand): help = "seed database with census and IMD data for England, Wales, Scotland and Northern Ireland." + def _run_post_processing_sql(self): + self.stdout.write( + self.style.WARNING( + "\nπŸš€ Running high-performance PostGIS optimizations and building UK-wide views..." + ) + ) + + # --- Nested SQL Helpers --- + + def get_lsoa_view_sql(view_name, geom_column, boundary_year): + """Creates individual England/Wales Table filtered by boundary year""" + return f""" + -- 1. Clean up both types to prevent the conflict error + DROP TABLE IF EXISTS public.{view_name} CASCADE; + DROP VIEW IF EXISTS public.{view_name} CASCADE; + + -- 2. Create as a TABLE, not a VIEW + CREATE TABLE public.{view_name} AS + SELECT + l.year::int as year, -- Renamed to 'year' for tileserv compatibility + l.{geom_column}::geometry(MultiPolygon, 3857) AS geom, + l.lsoa_code::text, + COALESCE(e.imd_decile, w.imd_decile, 0)::int as imd_decile, + COALESCE(e.imd_rank, w.imd_rank, 0)::int as imd_rank + FROM deprivation_scores_lsoa l + LEFT JOIN deprivation_scores_englishindexmultipledeprivation e + ON e.lsoa_id = l.id + LEFT JOIN deprivation_scores_welshindexmultipledeprivation w + ON w.lsoa_id = l.id + WHERE l.{geom_column} IS NOT NULL + AND l.year = {boundary_year}; + + -- 3. Index it! (This is what prevents the 500 errors) + CREATE INDEX idx_{view_name}_geom ON public.{view_name} USING GIST (geom); + ANALYZE public.{view_name}; + """ + + # In your get_uk_master_view_sql helper, add a boundary_year parameter + def get_uk_master_view_sql(view_name, geom_suffix, boundary_year, imd_year): + """Creates UK Master View combining all 4 nations with IMD data""" + actual_geom_col = ( + "geom_3857" if geom_suffix == "3857" else f"geom_3857_{geom_suffix}" + ) + + return f""" + -- 1. Clean up existing objects (both table and view types) + DROP TABLE IF EXISTS public.{view_name} CASCADE; + DROP VIEW IF EXISTS public.{view_name} CASCADE; + + -- 2. Materialize the data into a physical TABLE for performance + CREATE TABLE public.{view_name} AS + -- ENGLAND + SELECT + l.year::int AS year, + {imd_year}::int AS imd_year, + l.lsoa_code::text AS code, + ST_MakeValid(ST_Multi(l.{actual_geom_col}))::geometry(MultiPolygon, 3857) AS geom, + 'england'::text AS nation, + COALESCE(e.imd_decile, 0)::int AS imd_decile + FROM deprivation_scores_lsoa l + LEFT JOIN deprivation_scores_englishindexmultipledeprivation e + ON e.lsoa_id = l.id AND e.year = {imd_year} + WHERE l.lsoa_code LIKE 'E%' AND l.{actual_geom_col} IS NOT NULL AND l.year = {boundary_year} + + UNION ALL + + -- WALES (Fall back to 2019 IMD as it is the most recent available) + SELECT + l.year::int AS year, + {imd_year}::int AS imd_year, + l.lsoa_code::text AS code, + ST_MakeValid(ST_Multi(l.{actual_geom_col}))::geometry(MultiPolygon, 3857) AS geom, + 'wales'::text AS nation, + COALESCE(w.imd_decile, 0)::int AS imd_decile + FROM deprivation_scores_lsoa l + LEFT JOIN deprivation_scores_welshindexmultipledeprivation w + ON w.lsoa_id = l.id AND w.year = 2019 + WHERE l.lsoa_code LIKE 'W%' AND l.{actual_geom_col} IS NOT NULL AND l.year = {boundary_year} + + UNION ALL + + -- SCOTLAND + SELECT + d.year::int AS year, + {imd_year}::int AS imd_year, + d.data_zone_code::text AS code, + ST_MakeValid(ST_Multi(d.{actual_geom_col}))::geometry(MultiPolygon, 3857) AS geom, + 'scotland'::text AS nation, + COALESCE(WIDTH_BUCKET(s.imd_rank, 1, 6977, 10), 0)::int AS imd_decile + FROM deprivation_scores_datazone d + LEFT JOIN deprivation_scores_scottishindexmultipledeprivation s ON s.data_zone_id = d.id + WHERE d.{actual_geom_col} IS NOT NULL + + UNION ALL + + -- NORTHERN IRELAND + SELECT + so.year::int AS year, + {imd_year}::int AS imd_year, + so.soa_code::text AS code, + ST_MakeValid(ST_Multi(so.{actual_geom_col}))::geometry(MultiPolygon, 3857) AS geom, + 'northern_ireland'::text AS nation, + COALESCE(WIDTH_BUCKET(ni.imd_rank, 1, 891, 10), 0)::int AS imd_decile + FROM deprivation_scores_soa so + LEFT JOIN deprivation_scores_northernirelandindexmultipledeprivation ni ON ni.soa_id = so.id + WHERE so.{actual_geom_col} IS NOT NULL; + + -- 3. Create Spatial Index (Removes 500 errors by speeding up BBOX queries) + CREATE INDEX idx_{view_name}_geom ON public.{view_name} USING GIST (geom); + + -- 4. Gather statistics for the query planner + ANALYZE public.{view_name}; + """ + + # --- SQL Statement List --- + + sql_statements = [ + # Section 1: Cleanup + # 2. SCHEMA: Ensure columns exist FIRST + "ALTER TABLE deprivation_scores_lsoa ADD COLUMN IF NOT EXISTS geom_3857 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_lsoa ADD COLUMN IF NOT EXISTS geom_3857_simp_z0_4 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_lsoa ADD COLUMN IF NOT EXISTS geom_3857_simp_z5_7 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_lsoa ADD COLUMN IF NOT EXISTS geom_3857_simp_z8_10 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_datazone ADD COLUMN IF NOT EXISTS geom_3857 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_datazone ADD COLUMN IF NOT EXISTS geom_3857_simp_z0_4 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_datazone ADD COLUMN IF NOT EXISTS geom_3857_simp_z5_7 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_soa ADD COLUMN IF NOT EXISTS geom_3857 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_soa ADD COLUMN IF NOT EXISTS geom_3857_simp_z0_4 geometry(MultiPolygon,3857);", + "ALTER TABLE deprivation_scores_soa ADD COLUMN IF NOT EXISTS geom_3857_simp_z5_7 geometry(MultiPolygon,3857);", + # Now do updates and drops + "UPDATE deprivation_scores_datazone SET geom_3857_simp_z0_4 = ST_Multi(ST_CollectionExtract(ST_MakeValid(ST_SimplifyPreserveTopology(geom_3857, 0.5)), 3));", + "DROP TABLE IF EXISTS public.uk_master_2011_z0_4 CASCADE;", + "DROP TABLE IF EXISTS public.uk_master_2011_z5_7 CASCADE;", + "DROP TABLE IF EXISTS public.uk_master_2011_z8_10 CASCADE;", + "DROP TABLE IF EXISTS public.uk_master_2021_z0_4 CASCADE;", + "DROP TABLE IF EXISTS public.uk_master_2021_z5_7 CASCADE;", + "DROP TABLE IF EXISTS public.uk_master_2021_z8_10 CASCADE;", + # Also drop the LSOA-specific ones as tables + "DROP TABLE IF EXISTS public.lsoa_tiles_2011_z0_4 CASCADE;", + "DROP TABLE IF EXISTS public.lsoa_tiles_2021_z0_4 CASCADE;", + "ALTER TABLE deprivation_scores_localauthority ADD COLUMN IF NOT EXISTS geom_3857 geometry(MultiPolygon,3857);", + # 3. GEOPROCESSING (WGS84 -> Web Mercator 3857) + "UPDATE deprivation_scores_lsoa SET geom_3857 = ST_MakeValid(geom_3857) WHERE NOT ST_IsValid(geom_3857);", + "UPDATE deprivation_scores_lsoa SET geom_3857 = ST_Transform(geom, 3857) WHERE geom_3857 IS NULL AND geom IS NOT NULL;", + "UPDATE deprivation_scores_datazone SET geom_3857 = ST_Transform(geom, 3857) WHERE geom_3857 IS NULL AND geom IS NOT NULL;", + "UPDATE deprivation_scores_soa SET geom_3857 = ST_Transform(geom, 3857) WHERE geom_3857 IS NULL AND geom IS NOT NULL;", + "UPDATE deprivation_scores_localauthority SET geom_3857 = ST_Transform(geom, 3857) WHERE geom_3857 IS NULL AND geom IS NOT NULL;", + # 4. SIMPLIFICATION (Gentle simplification to preserve detail) + # Applied to all regions for z0_4 + "UPDATE deprivation_scores_lsoa SET geom_3857_simp_z0_4 = ST_Multi(ST_CollectionExtract(ST_MakeValid(ST_SimplifyPreserveTopology(geom_3857, 5)), 3)) WHERE geom_3857_simp_z0_4 IS NULL AND geom_3857 IS NOT NULL;", + "UPDATE deprivation_scores_datazone SET geom_3857_simp_z0_4 = ST_Multi(ST_CollectionExtract(ST_MakeValid(ST_SimplifyPreserveTopology(geom_3857, 0.5)), 3));", + "UPDATE deprivation_scores_soa SET geom_3857_simp_z0_4 = ST_Multi(ST_CollectionExtract(ST_MakeValid(ST_SimplifyPreserveTopology(geom_3857, 20)), 3)) WHERE geom_3857_simp_z0_4 IS NULL AND geom_3857 IS NOT NULL;", + # Mid-level simplification (z5_7) + "UPDATE deprivation_scores_lsoa SET geom_3857_simp_z5_7 = ST_Multi(ST_CollectionExtract(ST_MakeValid(ST_SimplifyPreserveTopology(geom_3857, 10)), 3)) WHERE geom_3857_simp_z5_7 IS NULL AND geom_3857 IS NOT NULL;", + "UPDATE deprivation_scores_datazone SET geom_3857_simp_z5_7 = ST_Multi(ST_CollectionExtract(ST_MakeValid(ST_SimplifyPreserveTopology(geom_3857, 2)), 3));", + "UPDATE deprivation_scores_soa SET geom_3857_simp_z5_7 = ST_Multi(ST_CollectionExtract(ST_MakeValid(ST_SimplifyPreserveTopology(geom_3857, 10)), 3)) WHERE geom_3857_simp_z5_7 IS NULL AND geom_3857 IS NOT NULL;", + # 5. SPATIAL INDEXING & CLUSTERING + # 5. SPATIAL INDEXING & CLUSTERING (The Core Optimizations) + "CREATE INDEX IF NOT EXISTS idx_lsoa_3857 ON deprivation_scores_lsoa USING GIST (geom_3857);", + "CREATE INDEX IF NOT EXISTS idx_datazone_3857 ON deprivation_scores_datazone USING GIST (geom_3857);", + "CREATE INDEX IF NOT EXISTS idx_soa_3857 ON deprivation_scores_soa USING GIST (geom_3857);", + # Performance indexes for the Year/IMD joins + "CREATE INDEX IF NOT EXISTS idx_lsoa_year_id ON deprivation_scores_lsoa (year, id);", + "CREATE INDEX IF NOT EXISTS idx_english_imd_year_lsoa ON deprivation_scores_englishindexmultipledeprivation (year, lsoa_id);", + "CREATE INDEX IF NOT EXISTS idx_welsh_imd_year_lsoa ON deprivation_scores_welshindexmultipledeprivation (year, lsoa_id);", + # Cluster tables (Physically re-order rows by geography for tile speed) + "CLUSTER deprivation_scores_lsoa USING idx_lsoa_3857;", + "CLUSTER deprivation_scores_datazone USING idx_datazone_3857;", + "CLUSTER deprivation_scores_soa USING idx_soa_3857;", + # 6. VIEWS (Split by Boundary Year) + # 6. VIEWS + # --- 2011 Individual LSOA Views --- + get_lsoa_view_sql("lsoa_tiles_2011_z0_4", "geom_3857_simp_z0_4", 2011), + get_lsoa_view_sql("lsoa_tiles_2011_z5_7", "geom_3857_simp_z5_7", 2011), + get_lsoa_view_sql("lsoa_tiles_2011_z8_10", "geom_3857", 2011), + # --- 2021 Individual LSOA Views --- + get_lsoa_view_sql("lsoa_tiles_2021_z0_4", "geom_3857_simp_z0_4", 2021), + get_lsoa_view_sql("lsoa_tiles_2021_z5_7", "geom_3857_simp_z5_7", 2021), + get_lsoa_view_sql("lsoa_tiles_2021_z8_10", "geom_3857", 2021), + # --- UK Master Views (The ones your map actually calls) --- + # Boundary Year 2011 + IMD 2019 + get_uk_master_view_sql("uk_master_2011_z0_4", "simp_z0_4", 2011, 2019), + get_uk_master_view_sql("uk_master_2011_z5_7", "simp_z5_7", 2011, 2019), + get_uk_master_view_sql("uk_master_2011_z8_10", "3857", 2011, 2019), + # Boundary Year 2021 + IMD 2025 + get_uk_master_view_sql("uk_master_2021_z0_4", "simp_z0_4", 2021, 2025), + get_uk_master_view_sql("uk_master_2021_z5_7", "simp_z5_7", 2021, 2025), + get_uk_master_view_sql("uk_master_2021_z8_10", "3857", 2021, 2025), + "CREATE OR REPLACE VIEW public.la_tiles AS SELECT year, geom_3857 AS geom, local_authority_district_code AS lad_code FROM deprivation_scores_localauthority;", + # 7. FINAL HOUSEKEEPING + "GRANT SELECT ON ALL TABLES IN SCHEMA public TO PUBLIC;", + "ANALYZE deprivation_scores_lsoa;", + "ANALYZE deprivation_scores_datazone;", + "ANALYZE deprivation_scores_soa;", + # Final optimization for the Master Tables + "VACUUM ANALYZE public.uk_master_2011_z0_4;", + "VACUUM ANALYZE public.uk_master_2011_z5_7;", + "VACUUM ANALYZE public.uk_master_2011_z8_10;", + "VACUUM ANALYZE public.uk_master_2021_z0_4;", + "VACUUM ANALYZE public.uk_master_2021_z5_7;", + "VACUUM ANALYZE public.uk_master_2021_z8_10;", + ] + + def table_or_view_exists(cursor, name): + # Accepts schema-qualified names like public.foo + if '.' in name: + schema, rel = name.split('.', 1) + else: + schema, rel = 'public', name + cursor.execute(""" + SELECT EXISTS ( + SELECT 1 FROM information_schema.tables WHERE table_schema=%s AND table_name=%s + UNION + SELECT 1 FROM information_schema.views WHERE table_schema=%s AND table_name=%s + ) + """, [schema, rel, schema, rel]) + return cursor.fetchone()[0] + + print("[POSTPROCESS] Starting SQL post-processing...") + with connection.cursor() as cursor: + for statement in sql_statements: + stmt = statement.strip() + print(f"[POSTPROCESS] Executing: {stmt}") + # Check for ANALYZE, VACUUM, CLUSTER, etc. that require table/view existence + skip = False + for op in ["ANALYZE", "VACUUM", "CLUSTER", "GRANT", "CREATE INDEX", "DROP INDEX"]: + if stmt.startswith(op): + # Extract table/view name (naive split, works for your patterns) + tokens = stmt.split() + # e.g. ANALYZE public.uk_master_2011_z8_10; + for t in tokens[1:]: + t = t.strip(';') + if '.' in t or t.isidentifier(): + if not table_or_view_exists(cursor, t): + msg = f" Skipping '{op}' for missing table/view: {t}" + self.stdout.write(self.style.WARNING(msg)) + print(f"[POSTPROCESS] WARNING: {msg}") + skip = True + break + break + if skip: + continue + try: + if stmt: + cursor.execute(stmt) + print(f"[POSTPROCESS] Success: {stmt}") + except Exception as e: + err_msg = f"SQL Error: {e}" + self.stderr.write(self.style.ERROR(err_msg)) + print(f"[POSTPROCESS] ERROR: {err_msg}") + print("[POSTPROCESS] SQL post-processing complete.") + + self.stdout.write( + self.style.SUCCESS("βœ… Post-processing and spatial optimizations complete.") + ) + + def _stream_bfc_import(self, dataset, force=False): + source = dataset["url"] + table_name = dataset["table"] + year = dataset["year"] + chunk_size = dataset.get("chunk_size", 1000) + specific_code_col = dataset.get("code_column", "").lower() + django_col = dataset.get("django_code_col") + + # 1. Guard + with connection.cursor() as cursor: + cursor.execute( + f"SELECT COUNT(*) FROM {table_name} WHERE year = %s AND geom IS NOT NULL", + [year], + ) + if cursor.fetchone()[0] > 0 and not force: + self.stdout.write( + self.style.SUCCESS( + f" {dataset['name']} already spatialized. Skipping." + ) + ) + return + + try: + # 2. Remote Download with simple progress log + if source.startswith("http"): + # Check if this is an ArcGIS REST API endpoint that needs pagination + is_arcgis_api = "/FeatureServer/" in source or "/MapServer/" in source + + if is_arcgis_api: + # Pagination for ArcGIS REST API using ObjectID strategy + self.stdout.write(f" Downloading from ArcGIS REST API: {source}") + + # Step 1: Get all ObjectIDs (fast, no geometry) + base_url = source.split("?")[0] # Remove existing query params + separator = "&" if "?" in source else "?" + + # Extract query params from original URL if they exist + query_params = {} + if "?" in source: + param_string = source.split("?")[1] + for param in param_string.split("&"): + if "=" in param: + key, value = param.split("=", 1) + query_params[key] = value + + # Get ObjectIDs only + oid_url = f"{base_url}?where={query_params.get('where', '1=1')}&returnIdsOnly=true&f=json" + self.stdout.write(f" Fetching ObjectIDs...") + oid_response = requests.get(oid_url, timeout=300) + oid_response.raise_for_status() + oid_data = oid_response.json() + + if "error" in oid_data: + raise ValueError( + f"API Error: {oid_data['error'].get('message', 'Unknown error')}" + ) + + object_ids = oid_data.get("objectIds", []) + if not object_ids: + raise ValueError("No ObjectIDs returned from API") + + self.stdout.write( + f" Found {len(object_ids)} features. Fetching in batches..." + ) + + # Step 2: Fetch features in batches by ObjectID (smaller batches to avoid URL length limits) + all_gdfs = [] + batch_size = 100 # Reduced from 1000 to avoid 403 Forbidden due to URL length + log_interval = 1000 # Log progress every 1000 features + + import time + from requests.exceptions import ( + ChunkedEncodingError, + ConnectionError, + ReadTimeout, + HTTPError, + ) + + max_retries = 4 + retry_delay = 5 + + def fetch_batch(batch_ids, depth=0): + id_list = ",".join(map(str, batch_ids)) + batch_url = ( + f"{base_url}?objectIds={id_list}&outFields=*&f=geojson" + ) + for attempt in range(max_retries): + try: + indent = " " * (depth + 1) + self.stdout.write( + f"{indent}Downloading batch {batch_ids[0]}-{batch_ids[-1]} (attempt {attempt+1})..." + ) + batch_response = requests.get( + batch_url, stream=True, timeout=300 + ) + batch_response.raise_for_status() + total_bytes = 0 + bytes_data = io.BytesIO() + for chunk in batch_response.iter_content( + chunk_size=1024 * 1024 + ): + if chunk: + bytes_data.write(chunk) + total_bytes += len(chunk) + self.stdout.write( + f"{indent}Downloaded {total_bytes/1e6:.2f} MB for batch {batch_ids[0]}-{batch_ids[-1]}" + ) + bytes_data.seek(0) + batch_gdf = gpd.read_file(bytes_data) + all_gdfs.append(batch_gdf) + return True + except HTTPError as e: + if e.response.status_code == 504 and len(batch_ids) > 1: + self.stdout.write( + f"{indent}504 Gateway Timeout for batch {batch_ids[0]}-{batch_ids[-1]}. Splitting batch..." + ) + mid = len(batch_ids) // 2 + fetch_batch(batch_ids[:mid], depth + 1) + fetch_batch(batch_ids[mid:], depth + 1) + return True + else: + self.stdout.write( + f"{indent}HTTP Error for batch {batch_ids[0]}-{batch_ids[-1]}: {str(e)}" + ) + break + except ( + ChunkedEncodingError, + ConnectionError, + ReadTimeout, + ) as e: + self.stdout.write( + f"{indent}Connection error: {e}. Retrying ({attempt+1}/{max_retries})..." + ) + time.sleep(retry_delay) + except Exception as e: + self.stdout.write( + f"{indent}Failed to parse batch {batch_ids[0]}-{batch_ids[-1]}: {str(e)}" + ) + break + else: + self.stdout.write( + f"{indent}Failed to download batch {batch_ids[0]}-{batch_ids[-1]} after {max_retries} attempts. Skipping." + ) + return False + + for i in range(0, len(object_ids), batch_size): + batch_ids = object_ids[i : i + batch_size] + fetch_batch(batch_ids) + + # Only log every 1000 features + total_fetched = sum(len(gdf) for gdf in all_gdfs) + if total_fetched % log_interval == 0 or total_fetched >= len( + object_ids + ): + self.stdout.write( + f" Progress: {total_fetched}/{len(object_ids)} features fetched" + ) + + if len(all_gdfs) == 0: + raise ValueError("No features retrieved from API") + + # Concatenate all batches + final_gdf = gpd.GeoDataFrame(pd.concat(all_gdfs, ignore_index=True)) + self.stdout.write( + f" Download complete. Total features: {len(final_gdf)}" + ) + else: + # Non-paginated download (e.g., direct JSON files) + self.stdout.write(f" Starting download: {source}") + response = requests.get(source, stream=True, timeout=300) + response.raise_for_status() + + total_size = int(response.headers.get("content-length", 0)) + bytes_data = io.BytesIO() + downloaded = 0 + last_percent = -1 + + for chunk in response.iter_content(chunk_size=chunk_size * 1024): + bytes_data.write(chunk) + if total_size > 0: + downloaded += len(chunk) + percent = int(100 * downloaded / total_size) + if percent % 10 == 0 and percent != last_percent: + self.stdout.write(f" Download Progress: {percent}%") + last_percent = percent + + self.stdout.write( + " Download complete. Parsing JSON into GeoPandas..." + ) + bytes_data.seek(0) + final_gdf = gpd.read_file(bytes_data) + else: + self.stdout.write(f" Loading local file: {source}") + final_gdf = gpd.read_file(source) + + final_gdf.columns = [c.lower() for c in final_gdf.columns] + + # 3. Geometric Processing (ONLY for Northern Ireland Small Areas that need dissolving) + # Northern Ireland downloads ~4500 small areas that need to be dissolved into ~890 SOAs + is_ni_small_areas = ( + table_name == "deprivation_scores_soa" + and len(final_gdf) > 1000 + and specific_code_col in final_gdf.columns + ) + + if is_ni_small_areas: + self.stdout.write( + f" Condensing {len(final_gdf)} Small Areas into ~890 SOAs (Memory Intensive)..." + ) + + if final_gdf.crs is None: + final_gdf.set_crs("EPSG:29903", inplace=True) + + # Dissolve + final_gdf = final_gdf.dissolve(by=specific_code_col).reset_index() + + # Simplify (Tolerance 1.0m) + self.stdout.write( + " Simplifying geometries for database optimization..." + ) + final_gdf["geometry"] = final_gdf.simplify( + tolerance=1.0, preserve_topology=True + ) + + # 4. Standardize CRS & Geometry Type + # Only apply Irish projection logic to Northern Ireland data + if table_name == "deprivation_scores_soa": + if ( + final_gdf.crs is None + or final_gdf.geometry.iloc[0].centroid.x > 1000 + ): + final_gdf.set_crs("EPSG:29903", allow_override=True, inplace=True) + + # Ensure we have WGS84 (EPSG:4326) for database storage + if final_gdf.crs is None: + self.stdout.write(" Warning: No CRS detected, assuming WGS84...") + final_gdf.set_crs("EPSG:4326", inplace=True) + elif final_gdf.crs != "EPSG:4326": + self.stdout.write(f" Reprojecting from {final_gdf.crs} to WGS84...") + final_gdf = final_gdf.to_crs("EPSG:4326") + + # Convert to MultiPolygon only if needed (preserve valid geometries) + def ensure_multipolygon(geom): + if geom.geom_type == "MultiPolygon": + return geom + elif geom.geom_type == "Polygon": + return MultiPolygon([geom]) + else: + # For other types, try to extract polygons + return MultiPolygon( + [g for g in geom.geoms if g.geom_type == "Polygon"] + ) + + final_gdf["geometry"] = final_gdf["geometry"].map(ensure_multipolygon) + + # 5. Database Merge + db = settings.DATABASES["default"] + engine = create_engine( + f"postgresql+psycopg2://{db['USER']}:{db['PASSWORD']}@{db['HOST']}:{db.get('PORT', 5432)}/{db['NAME']}" + ) + + temp_table = f"temp_shapes_{year}" + self.stdout.write(f" Uploading shapes to PostgreSQL...") + + final_gdf[[specific_code_col, "geometry"]].to_postgis( + temp_table, engine, if_exists="replace", index=False + ) + + # DEBUG: Check temp table geometry after to_postgis + with connection.cursor() as cursor: + cursor.execute( + f"SELECT ST_NPoints(geometry) FROM {temp_table} LIMIT 1;" + ) + + cursor.execute( + f"SELECT ST_GeometryType(geom), ST_SRID(geom) FROM {table_name} WHERE year = %s LIMIT 1;", + [year], + ) + + with connection.cursor() as cursor: + # Use explicit geometry cast to prevent any implicit simplification + cursor.execute( + f""" + UPDATE {table_name} SET geom = t.geometry::geometry(MultiPolygon, 4326) + FROM {temp_table} t + WHERE {table_name}.{django_col} = t.{specific_code_col} AND {table_name}.year = %s; + """, + [year], + ) + count = cursor.rowcount + + # DEBUG: Check main table immediately after UPDATE + cursor.execute( + f"SELECT ST_NPoints(geom) FROM {table_name} WHERE year = %s LIMIT 1;", + [year], + ) + + cursor.execute(f"DROP TABLE IF EXISTS {temp_table};") + connection.commit() + + self.stdout.write( + self.style.SUCCESS(f" Successfully spatialized {count} rows.") + ) + + except Exception as e: + self.stderr.write(self.style.ERROR(f" Import failed: {str(e)}")) + with connection.cursor() as cursor: + cursor.execute(f"DROP TABLE IF EXISTS temp_shapes_{year};") + + def purge_cdn_cache(self): + """ + Tells the CDN to clear the cached map data. + """ + self.stdout.write(self.style.MIGRATE_LABEL(" Requesting CDN Cache Purge...")) + + # Example for Cloudflare + zone_id = settings.CLOUDFLARE_ZONE_ID + api_token = settings.CLOUDFLARE_API_TOKEN + + url = f"https://api.cloudflare.com/client/v4/zones/{zone_id}/purge_cache" + headers = { + "Authorization": f"Bearer {api_token}", + "Content-Type": "application/json", + } + + # We target only the map-data URLs to avoid clearing the whole site + data = {"prefixes": [f"{settings.SITE_URL}/api/map-data/"]} + + try: + response = requests.post(url, headers=headers, json=data, timeout=10) + if response.status_code == 200: + self.stdout.write( + self.style.SUCCESS(" βœ… CDN Cache Purged successfully.") + ) + else: + self.stdout.write( + self.style.ERROR(f" ❌ CDN Purge failed: {response.text}") + ) + except Exception as e: + self.stdout.write(self.style.ERROR(f" ❌ CDN Purge error: {e}")) + + def warm_cache(self): + self.stdout.write( + self.style.HTTP_INFO(" Pre-warming cache for zoom levels...") + ) + # We target the specific levels our views are optimized for + for z in [3, 6, 9]: + url = f"{settings.SITE_URL}/api/map-data/?z={z}" + try: + # We use a long timeout because generating the first UK-wide + # GeoJSON from the DB can take a few seconds + requests.get(url, timeout=120) + self.stdout.write(f" βœ… Cache primed for zoom {z}") + except Exception as e: + self.stdout.write(f" ⚠️ Could not warm zoom {z}: {e}") + + def test_geometries(self): + self.stdout.write( + self.style.MIGRATE_LABEL("\nπŸ” Validating Spatial Data & Views...") + ) + + + def view_exists(cursor, name): + if '.' in name: + schema, rel = name.split('.', 1) + else: + schema, rel = 'public', name + cursor.execute(""" + SELECT EXISTS ( + SELECT 1 FROM information_schema.views WHERE table_schema=%s AND table_name=%s + ) + """, [schema, rel]) + return cursor.fetchone()[0] + + with connection.cursor() as cursor: + # 1. Check the 2011 Master View + self.stdout.write("Checking 2011 Era (2019 IMD)...") + if view_exists(cursor, "uk_master_2011_z8_10"): + cursor.execute( + """ + SELECT nation, COUNT(*) + FROM public.uk_master_2011_z8_10 + GROUP BY nation; + """ + ) + results_2011 = cursor.fetchall() + nations_2011 = {row[0]: row[1] for row in results_2011} + else: + self.stdout.write(self.style.WARNING(" Skipping 2011 view check: public.uk_master_2011_z8_10 does not exist.")) + nations_2011 = {} + + # 2. Check the 2021 Master View + self.stdout.write("Checking 2021 Era (2025 IMD)...") + if view_exists(cursor, "uk_master_2021_z8_10"): + cursor.execute( + """ + SELECT nation, COUNT(*) + FROM public.uk_master_2021_z8_10 + GROUP BY nation; + """ + ) + results_2021 = cursor.fetchall() + nations_2021 = {row[0]: row[1] for row in results_2021} + else: + self.stdout.write(self.style.WARNING(" Skipping 2021 view check: public.uk_master_2021_z8_10 does not exist.")) + nations_2021 = {} + + expected_nations = ["england", "wales", "scotland", "northern_ireland"] + + for nation in expected_nations: + count_11 = nations_2011.get(nation, 0) + count_21 = nations_2021.get(nation, 0) + + status = "βœ…" if (count_11 > 0 and count_21 > 0) else "❌" + label = nation.replace("_", " ").title() + + self.stdout.write( + f" {status} {label}: 2011({count_11}) | 2021({count_21}) polygons." + ) + + # 3. Coordinate System Verification + # Using the 2021 view for the sample + if view_exists(cursor, "uk_master_2021_z8_10"): + cursor.execute( + "SELECT ST_X(ST_Centroid(geom)) FROM public.uk_master_2021_z8_10 LIMIT 1;" + ) + coord_sample = cursor.fetchone() + + if coord_sample and abs(coord_sample[0]) > 180: + self.stdout.write( + self.style.SUCCESS( + " βœ… Coordinate System: Web Mercator (EPSG:3857) confirmed." + ) + ) + else: + self.stdout.write( + self.style.WARNING( + " ⚠️ Coordinate System: Geometry may be in degrees (WGS84). Check ST_Transform logic." + ) + ) + else: + self.stdout.write(self.style.WARNING(" Skipping coordinate system check: public.uk_master_2021_z8_10 does not exist.")) + + self.stdout.write(self.style.SUCCESS("✨ Validation Complete.\n")) + def add_arguments(self, parser): parser.add_argument("--mode", type=str, help="Mode") + parser.add_argument( + "--force", + action="store_true", + help="Force re-import even if expected records with geom already exist", + ) def handle(self, *args, **options): + force = options.get("force", False) + + # Define the datasets to be used by the engine + BFC_DATASETS = [ + { + "name": "LSOA 2011 BFC", + "url": "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Lower_layer_Super_Output_Areas_Dec_2011_Boundaries_Full_Clipped_BFC_EW_V3_2022/FeatureServer/0/query?where=1=1&outFields=*&f=geojson", + "table": "deprivation_scores_lsoa", + "django_code_col": "lsoa_code", + "year": 2011, + "code_column": "LSOA11CD", + }, + { + "name": "LSOA 2021 BFC", + "url": "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LSOA_2021_EW_BFE_V10_RUC/FeatureServer/3/query?where=1=1&outFields=*&f=geojson", + "table": "deprivation_scores_lsoa", + "django_code_col": "lsoa_code", + "year": 2021, + "code_column": "LSOA21CD", + }, + { + "name": "LAD 2024 BFC", + "url": "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Local_Authority_Districts_May_2024_Boundaries_UK_BFC/FeatureServer/0/query?where=1=1&outFields=*&f=geojson", + "table": "deprivation_scores_localauthority", + "django_code_col": "local_authority_district_code", + "year": 2024, + "code_column": "LAD24CD", + "chunk_size": 1, # Fewer LAs + }, + { + "name": "LAD 2019 BFC", + # Note the _2022 suffix and the /0/query at the end + "url": "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD_Dec_2019_Boundaries_UK_BFC_2022/FeatureServer/0/query?where=1=1&outFields=*&f=geojson", + "table": "deprivation_scores_localauthority", + "django_code_col": "local_authority_district_code", + "year": 2019, + "code_column": "lad19cd", # MUST be lowercase for this specific service + "chunk_size": 25, # Very complex polygons; keep chunk size small + }, + { + "name": "Scotland DataZones 2011 BFC", + "url": "https://maps.gov.scot/server/rest/services/ScotGov/StatisticalUnits/MapServer/2/query?where=1=1&outFields=*&f=geojson", + "table": "deprivation_scores_datazone", + "django_code_col": "data_zone_code", + "year": 2011, + "code_column": "DataZone", + "chunk_size": 100, + }, + { + "name": "Northern Ireland SOA 2011 (Auto-Processed)", + "url": "https://admin.opendatani.gov.uk/dataset/519e5019-6726-445d-8821-12d88f164c1e/resource/b64d8909-883e-42b1-bc79-50dc43f6769e/download/sa2011.json", + "table": "deprivation_scores_soa", + "django_code_col": "soa_code", + "year": 2001, + "code_column": "soa2011", + }, + ] + + # Update your logic here + if options.get("mode") == "import_bfc_boundaries": + self.stdout.write( + "\n" + + self.style.SUCCESS( + "Starting high-performance BFC boundary import via ArcGIS API..." + ) + + "\n" + ) + + for ds in BFC_DATASETS: + # We now pass the entire dictionary 'ds' instead of individual arguments + self._stream_bfc_import( + dataset=ds, + force=force, + ) + + # Run optimizations after all datasets are imported + # self._run_post_processing_sql() + + # Warm the local cache and then purge the remote CDN + # if not settings.DEBUG: # Only purge in production + # self.purge_cdn_cache() + # # Trigger the CDN to fetch the new data immediately + # self.warm_cache() + + # test that the tables have the correct number of geometries + self.test_geometries() + return + if options["mode"] == "add_organisational_areas": self.stdout.write(B + "Adding organisational areas..." + W) add_lsoas_2011_wards_2019_to_LADS_2019() @@ -458,7 +1263,11 @@ def update_english_imd_data_with_subdomains(): path = f"{settings.IMD_DATA_FILES_FOLDER}/{IMD_2019_SUBDOMAINS_OF_DEPRIVATION}" sys.stdout.write( - "\n" + G + "πŸ“Ž - Adding sub-domains of deprivation to LSOAs" + W + "\n" + "\n" + + G + + "πŸ“Ž - Adding 2019 sub-domains of deprivation to 2011 LSOAs" + + W + + "\n" ) with open(path, "r") as f: data = list(csv.reader(f, delimiter=",")) @@ -520,7 +1329,7 @@ def update_english_imd_data_with_supplementary_indices(): sys.stdout.write( "\n" + G - + "πŸ“Ž - Adding supplementary indices (IDACI and IDAOPI) of deprivation to LSOAs" + + "πŸ“Ž - Adding 2019 supplementary indices (IDACI and IDAOPI) of deprivation to 2011 LSOAs" + W + "\n" ) @@ -542,7 +1351,7 @@ def update_english_imd_data_with_supplementary_indices(): idaopi_decile=int(float(row[9])), ) count += 1 - final = f" Added {count} supplementary indices (IDACI and IDAOPI) of deprivation 2019 to LSOAs\n" + final = f" Added {count} supplementary indices (IDACI and IDAOPI) of deprivation 2019 to 2011 LSOAs\n" sys.stdout.write(BOLD + "\nπŸ”₯ Complete." + END + final) try: assert count == 32844 @@ -570,7 +1379,11 @@ def update_english_imd_data_with_scores(): path = f"{settings.IMD_DATA_FILES_FOLDER}/{IMD_2019_SCORES_OF_DEPRIVATION}" with open(path, "r") as f: sys.stdout.write( - "\n" + G + "πŸ“Ž - Adding English scores of deprivation to LSOAs" + W + "\n" + "\n" + + G + + "πŸ“Ž - Adding 2019 English scores of deprivation to 2011 LSOAs" + + W + + "\n" ) data = list(csv.reader(f, delimiter=",")) count = 0 @@ -637,7 +1450,7 @@ def update_english_imd_data_with_transformed_scores(): sys.stdout.write( "\n" + G - + "πŸ“Ž - Adding English transformed scores of deprivation to LSOAs" + + "πŸ“Ž - Adding 2019 English transformed scores of deprivation to 2011 LSOAs" + W + "\n" ) @@ -662,7 +1475,7 @@ def update_english_imd_data_with_transformed_scores(): living_environment_score_exponentially_transformed=Decimal(row[10]), ) count += 1 - final = f" Added {count} English transformed scores of deprivation 2019\n" + final = f" Added {count} English 2019 transformed scores of deprivation 2019\n" sys.stdout.write(BOLD + "\nπŸ”₯ Complete." + END + final) try: assert count == 32844 @@ -1762,6 +2575,11 @@ def quantile_for_rank(rank: int, quantile: QuantileType) -> int: raise ValueError(f"Incorrect rank {rank} passed for {quantile.value}") +""" +Tests +""" + + def test_table_totals(): """ Test the total number of records in each table @@ -1801,9 +2619,11 @@ def test_table_totals(): }, { "model": LocalAuthority, - "count": LocalAuthority.objects.filter(year=2024).count(), + "count": LocalAuthority.objects.filter( + year=2024, geom__isnull=False + ).count(), "expected": 318, - "message": "2024 LocalAuthority should have 318 rows. ", + "message": "2024 LocalAuthority should have 318 rows with geometries.", }, { "model": PopulationDensity, diff --git a/deprivation_scores/migrations/0006_localauthority_geom_lsoa_geom.py b/deprivation_scores/migrations/0006_localauthority_geom_lsoa_geom.py new file mode 100644 index 0000000..a2a9a85 --- /dev/null +++ b/deprivation_scores/migrations/0006_localauthority_geom_lsoa_geom.py @@ -0,0 +1,28 @@ +# Generated by Django 6.0 on 2025-12-22 20:12 + +import django.contrib.gis.db.models.fields +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("deprivation_scores", "0005_set_year"), + ] + + operations = [ + migrations.AddField( + model_name="localauthority", + name="geom", + field=django.contrib.gis.db.models.fields.MultiPolygonField( + blank=True, null=True, srid=4326 + ), + ), + migrations.AddField( + model_name="lsoa", + name="geom", + field=django.contrib.gis.db.models.fields.MultiPolygonField( + blank=True, null=True, srid=4326 + ), + ), + ] diff --git a/deprivation_scores/migrations/0007_datazone_geom_soa_geom.py b/deprivation_scores/migrations/0007_datazone_geom_soa_geom.py new file mode 100644 index 0000000..8a7360a --- /dev/null +++ b/deprivation_scores/migrations/0007_datazone_geom_soa_geom.py @@ -0,0 +1,28 @@ +# Generated by Django 6.0 on 2025-12-24 23:25 + +import django.contrib.gis.db.models.fields +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("deprivation_scores", "0006_localauthority_geom_lsoa_geom"), + ] + + operations = [ + migrations.AddField( + model_name="datazone", + name="geom", + field=django.contrib.gis.db.models.fields.MultiPolygonField( + blank=True, null=True, srid=4326 + ), + ), + migrations.AddField( + model_name="soa", + name="geom", + field=django.contrib.gis.db.models.fields.MultiPolygonField( + blank=True, null=True, srid=4326 + ), + ), + ] diff --git a/deprivation_scores/models.py b/deprivation_scores/models.py index af95f49..c539bf4 100644 --- a/deprivation_scores/models.py +++ b/deprivation_scores/models.py @@ -1,4 +1,5 @@ from django.db import models +from django.contrib.gis.db import models as gis_models class LocalAuthority(models.Model): @@ -12,6 +13,9 @@ class LocalAuthority(models.Model): "Local Authority District Year", ) + # Geometry column for LSOA boundaries (EPSG:4326) + geom = gis_models.MultiPolygonField(srid=4326, null=True, blank=True) + class Meta: verbose_name = ("Local Authority",) verbose_name_plural = "Local Authorities" @@ -75,6 +79,9 @@ class LSOA(models.Model): to=LocalAuthority, on_delete=models.CASCADE ) + # Geometry column for LSOA boundaries (EPSG:4326) + geom = gis_models.MultiPolygonField(srid=4326, null=True, blank=True) + class Meta: verbose_name = ("LSOA",) verbose_name_plural = "LSOAs" @@ -418,6 +425,7 @@ class DataZone(models.Model): ) year = models.IntegerField("Data Zone Year") local_authority = models.ForeignKey(LocalAuthority, on_delete=models.CASCADE) + geom = gis_models.MultiPolygonField(srid=4326, null=True, blank=True) class Meta: verbose_name = ("Data Zone",) @@ -580,6 +588,7 @@ class SOA(models.Model): year = models.IntegerField() soa_code = models.CharField(max_length=50, unique=True) soa_name = models.CharField(max_length=50) + geom = gis_models.MultiPolygonField(srid=4326, null=True, blank=True) class Meta: verbose_name = ("SOA",) diff --git a/deprivation_scores/urls.py b/deprivation_scores/urls.py index 6551862..2fc1787 100644 --- a/deprivation_scores/urls.py +++ b/deprivation_scores/urls.py @@ -27,9 +27,11 @@ def get_api_root_view(self, *args, **kwargs): def view_with_build_info(request, *args, **kwargs): response = view(request, *args, **kwargs) - + build_info = get_build_info() - response.headers["X-Git-Revision"] = build_info.get("latest_git_commit", "[latest commit hash not found]") + response.headers["X-Git-Revision"] = build_info.get( + "latest_git_commit", "[latest commit hash not found]" + ) return response @@ -60,7 +62,7 @@ def view_with_build_info(request, *args, **kwargs): ) router.register( - f"uk_population_density", + "uk_population_density", viewset=PopulationDensityViewSet, basename="uk_population_density", ) @@ -73,15 +75,15 @@ def view_with_build_info(request, *args, **kwargs): "indices_of_multiple_deprivation", view=UKIndexMultipleDeprivationView.as_view(), ), - path("index_of_multiple_deprivation_quantile", + path( + "index_of_multiple_deprivation_quantile", view=UKIndexMultipleDeprivationQuantileView.as_view(), ), - # JSON Schema path("schema/", SpectacularJSONAPIView.as_view(), name="schema"), - # Swagger UI - path("swagger-ui/", + path( + "swagger-ui/", SpectacularSwaggerView.as_view(), name="swagger-ui", ), diff --git a/deprivation_scores/views.py b/deprivation_scores/views.py index 65188d1..37c2cdc 100644 --- a/deprivation_scores/views.py +++ b/deprivation_scores/views.py @@ -5,8 +5,9 @@ mixins, ) from rest_framework.decorators import api_view -from rest_framework.views import APIView, Response from rest_framework.exceptions import ParseError, NotFound +from rest_framework.response import Response +from rest_framework.views import APIView from django_filters.rest_framework import DjangoFilterBackend from drf_spectacular.utils import ( diff --git a/docker-compose-postgis.yml b/docker-compose-postgis.yml new file mode 100644 index 0000000..3247f71 --- /dev/null +++ b/docker-compose-postgis.yml @@ -0,0 +1,68 @@ +services: + web: + build: + context: . + dockerfile: Dockerfile.postgis + ports: + - 8001:8001 + volumes: + - .:/app + environment: + - RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER=rcpchCensususer + - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD=password + - RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME=rcpchCensusdb + - RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST=db + - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT=5432 + - RCPCH_CENSUS_ENGINE_SECRET_KEY=mysecretkey + - DEBUG=True + - DJANGO_ALLOWED_HOSTS=0.0.0.0 + - DJANGO_CSRF_TRUSTED_ORIGINS=https://localhost,https://0.0.0.0 + - POSTCODES_IO_API_URL=https://api.postcodes.io + - POSTCODES_IO_API_KEY=1234567890 + command: > + sh -c "/app/s/wait-for-db.sh && python -u manage.py collectstatic --noinput && python -u manage.py migrate && + python manage.py runserver 0.0.0.0:8001" + depends_on: + db: + condition: service_healthy + tty: true + stdin_open: true + + db: + image: postgis/postgis:15-3.4 + platform: linux/amd64 + volumes: + - "datadb_postgis:/var/lib/postgresql/data" + environment: + - POSTGRES_USER=rcpchCensususer + - POSTGRES_PASSWORD=password + - POSTGRES_DB=rcpchCensusdb + ports: + - 5432:5432 + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"] + interval: 5s + timeout: 3s + retries: 20 + tty: true + stdin_open: true + + pg_tileserv: + image: pramsey/pg_tileserv:latest + environment: + # Add these environment variables to pg_tileserv + - TS_TILE_EXTENT=8192 # Doubles the resolution within the tile + - TS_TILE_BUFFER=256 # Captures features that sit right on the edge of a tile + - HTTP_CORS_ORIGIN=* + - DATABASE_URL=postgresql://rcpchCensususer:password@db:5432/rcpchCensusdb + ports: + - 7800:7800 + depends_on: + db: + condition: service_healthy + restart: unless-stopped + tty: true + stdin_open: true + +volumes: + datadb_postgis: diff --git a/docker-compose.dev-init.yml b/docker-compose.dev-init.yml deleted file mode 100644 index 6e5dfd5..0000000 --- a/docker-compose.dev-init.yml +++ /dev/null @@ -1,52 +0,0 @@ -# runs a local version of RCPCHCensusEngine for development on port 8001 -# syncs changes in local code folder to the RCPCHCensusEngine container -# migrates the database and seeds the database -# YOU ONLY NEED TO USE THIS COMPOSE FILE THE FIRST TIME - -version: "3.12" - -services: - # web container - runs the django app - web: - build: . - ports: - - 8001:8001 - volumes: - - .:/app - environment: - # these env vars are ONLY for development - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER=rcpchCensususer - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD=password - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME=rcpchCensusdb - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST=db - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT=5432 - - RCPCH_CENSUS_ENGINE_SECRET_KEY=mysecretkey - - DEBUG=True - - DJANGO_ALLOWED_HOSTS=0.0.0.0 - - DJANGO_CSRF_TRUSTED_ORIGINS=https://localhost,https://0.0.0.0 - - POSTCODES_IO_API_URL=https://api.postcodes.io - - POSTCODES_IO_API_KEY=1234567890 - command: > - sh -c "python -u manage.py collectstatic --noinput && - python -u manage.py migrate && - python manage.py seed --mode='__all__' && - python manage.py runserver 0.0.0.0:8001" - # ensures that docker compose always displays log output - tty: true - stdin_open: true - - # db container - runs postgres - db: - image: postgres:latest - volumes: - - "datadb:/var/lib/postgresql/data" - environment: - - POSTGRES_USER=rcpchCensususer - - POSTGRES_PASSWORD=password - - POSTGRES_DB=rcpchCensusdb - # ensures that docker compose always displays log output - tty: true - stdin_open: true - -volumes: - datadb: diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml deleted file mode 100644 index 4a57240..0000000 --- a/docker-compose.dev.yml +++ /dev/null @@ -1,50 +0,0 @@ -# runs a local version of RCPCHCensusEngine for development on port 8001 -# syncs changes in local code folder to the RCPCHCensusEngine container -# migrates the database and seeds the database -# YOU ONLY NEED TO USE THIS COMPOSE FILE THE FIRST TIME - -version: "3.12" - -services: - # web container - runs the django app - web: - build: . - ports: - - 8001:8001 - volumes: - - .:/app - environment: - # these env vars are ONLY for development - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER=rcpchCensususer - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD=password - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME=rcpchCensusdb - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST=db - - RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT=5432 - - RCPCH_CENSUS_ENGINE_SECRET_KEY=mysecretkey - - DEBUG=True - - DJANGO_ALLOWED_HOSTS=0.0.0.0 - - DJANGO_CSRF_TRUSTED_ORIGINS=https://localhost,https://0.0.0.0 - - POSTCODES_IO_API_URL=https://api.postcodes.io - - POSTCODES_IO_API_KEY=1234567890 - command: > - sh -c "python -u manage.py collectstatic --noinput && - python manage.py runserver 0.0.0.0:8001" - # ensures that docker compose always displays log output - tty: true - stdin_open: true - - # db container - runs postgres - db: - image: postgres:latest - volumes: - - "datadb:/var/lib/postgresql/data" - environment: - - POSTGRES_USER=rcpchCensususer - - POSTGRES_PASSWORD=password - - POSTGRES_DB=rcpchCensusdb - # ensures that docker compose always displays log output - tty: true - stdin_open: true - -volumes: - datadb: diff --git a/docs/boundaries.md b/docs/boundaries.md new file mode 100644 index 0000000..74d67ed --- /dev/null +++ b/docs/boundaries.md @@ -0,0 +1,65 @@ +# Boundary Dataset & Spatial Optimization Pipeline + +This document describes the automated pipeline for fetching, merging, and optimizing UK boundary geometries (LSOA, DataZone, SOA, and Local Authority) for the project. + +## Overview + +Unlike standard data seeding, geometries are handled via a post-processing enrichment phase. This ensures that the base statistical data (IMD scores) is established first, with spatial data added and optimized afterwards to support high-performance map rendering. + +### The Pipeline Workflow + +1. **Base Seeding**: The project first seeds the base tables (`LSOA`, `DataZone`, `SOA`, `LocalAuthority`) and their associated IMD tables using the standard CSV import modes. +2. **Enrichment (`import_bfc_boundaries`)**: + * The management command streams GeoJSON data directly from ArcGIS REST APIs in chunks. + * Data is loaded into a **temporary table** (`temp_shapes_{year}`). + * A SQL **Merge** is performed: Geometries are joined to the base tables using the specific code mappings and the `year`. +3. **Spatial Optimization**: + * **Transformation**: Geometries are transformed to **EPSG:3857** (Web Mercator). + * **Simplification**: Multiple resolutions are created (`ST_SimplifyPreserveTopology`) to ensure the map remains performant at national scales. + * **Indexing**: GIST spatial indexes are applied to all geometry columns. + +## Dataset Mapping & Endpoints + +We use **BFC (Boundaries Full Clipped)** datasets to ensure high-fidelity boundaries before simplification. + +| Nation / Type | Year | ArcGIS Endpoint (Layer 0 Query) | ArcGIS ID Field | Django Model Column | +| :--- | :--- | :--- | :--- | :--- | +| **England/Wales LSOA** | 2011 | [ONS FeatureServer](https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Lower_layer_Super_Output_Areas_Dec_2011_Boundaries_Full_Clipped_BFC_EW_V3_2022/FeatureServer/0/query) | `LSOA11CD` | `lsoa_code` | +| **England/Wales LSOA** | 2021 | [ONS FeatureServer](https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LSOA_2021_EW_BFE_V10_RUC/FeatureServer/3/query) | `LSOA21CD` | `lsoa_code` | +| **Scotland DataZone** | 2011 | [ScotGov MapServer](https://maps.gov.scot/server/rest/services/ScotGov/StatisticalUnits/MapServer/2/query) | `DataZone` | `data_zone_code` | +| **N. Ireland SOA** | 2011 | [NISRA FeatureServer](https://services3.arcgis.com/APHjSHuFMGWVZFgQ/arcgis/rest/services/SOA2011/FeatureServer/0/query) | `SOA_CODE` | `soa_code` | +| **UK Local Authority** | 2019 | [ONS FeatureServer](https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Local_Authority_Districts_December_2019_Boundaries_UK_BFC/FeatureServer/0/query) | `LAD19CD` | `local_authority_district_code` | +| **UK Local Authority** | 2024 | [ONS FeatureServer](https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Local_Authority_Districts_May_2024_Boundaries_UK_BFC/FeatureServer/0/query) | `LAD24CD` | `local_authority_district_code` | + +## Technical Note: Why we use Streaming over ogr2ogr + +While `ogr2ogr` is a standard tool for spatial data migration, this project utilizes a custom Python streaming implementation via `requests` and `GeoPandas`. This approach was chosen to handle the **ArcGIS resultRecordCount** limits more gracefully and to avoid external binary dependencies in the web container. + +The **2024 Local Authority** endpoint, in particular, is sensitive to large requests; therefore, we implement a strict `chunk_size` (e.g., 100 records) to prevent timeout errors and 500-responses from the ArcGIS server. This streaming method ensures that we only move the necessary identifier and geometry fields into the database, keeping memory usage predictable during the enrichment process. + +## Architecture: pg_tileserv & CDN + +To achieve fast rendering on the frontend: + +1. **pg_tileserv**: A dedicated Go-based container connects to the database and serves the SQL views as **MVT (Mapbox Vector Tiles)**. +2. **Resolution Switching**: The frontend automatically requests different views based on zoom level: + * `z0-z4`: Uses `uk_master_tiles_z0_4` (High simplification). + * `z5-z7`: Uses `uk_master_tiles_z5_7` (Medium simplification). + * `z8+`: Uses `uk_master_tiles_z8_10` (Full detail BFC). +3. **CDN**: Tiles are cached at the edge via a CDN to ensure sub-second map interactivity. + +## How to Run + +After the base IMD data has been seeded, run the geometry enrichment: + +```bash +python manage.py seed --mode import_bfc_boundaries +``` + +To update or overwrite existing geometries, use the `--force` flag. + +## References + +PostGIS Reference: https://postgis.net/docs/ +ONS Geoportal: https://geoportal.statistics.gov.uk/ +pg_tileserv: https://github.com/CrunchyData/pg_tileserv \ No newline at end of file diff --git a/docs/deploy.md b/docs/deploy.md new file mode 100644 index 0000000..efd05a4 --- /dev/null +++ b/docs/deploy.md @@ -0,0 +1,98 @@ +# Azure & CDN Setup for UK Deprivation Map + +This document outlines the production architecture for deploying the Django REST Framework (DRF) API and the `pg_tileserv` vector tile server to Azure, using **Azure Front Door** and **API Management** for high-performance spatial data delivery. + +## 1. Database: Azure Database for PostgreSQL (Flexible Server) + +Both services must connect to the same database instance. + +* **Create Server**: Select the **Flexible Server** option in the Azure Portal. +* **Enable PostGIS**: Navigate to **Server Parameters**, search for `azure.extensions`, and add `POSTGIS`. +* **Spatial Optimization**: After running your Python build scripts, you must physically reorder the data rows to match the spatial index. This reduces disk I/O for tile requests: + + ```sql + CLUSTER public.uk_master_2021_z5_7 USING idx_uk_master_2021_z5_7_geom; + ANALYZE public.uk_master_2021_z5_7; + ``` + +## 2. Service A: Django API (Azure App Service) + +The Django application handles authentication, metadata, and the **Table Materialization Logic**. + +* **Service**: Azure App Service (Linux). +* **Build Strategy**: Your Python scripts should create **Physical Tables** with **GIST Indexes** rather than Views to prevent 500 Internal Server Errors in production. +* **Seed Command**: + + ```bash + python manage.py run_spatial_script --mode production + ``` + +## 3. Service B: Tile Server (Azure Container Apps) + +`pg_tileserv` is deployed as a lightweight container. + +* **Service**: Azure Container Apps (ACA). +* **Performance**: Because we use indexed tables, `pg_tileserv` can remain on a low-consumption tier (0.5 vCPU). +* **Environment Variables**: + * `DATABASE_URL`: Your PostgreSQL connection string. + * `HTTP_PORT`: `7800` + +## 4. Gateway: Azure API Management (APIM) + +In production, APIM sits between your CDN and your services to handle security and protocol translation. Apply the following **Inbound Policy** to handle CORS and internal caching: + +```xml + + + + + + [https://your-username.github.io](https://your-username.github.io) + + + GET + OPTIONS + + +
*
+
+
+ + x + y + z + +
+ + + + +
+``` + +## 5. Global Entry: Azure Front Door (CDN) + +Azure Front Door provides global caching and SSL termination. + +Routing & Caching Configuration +Origin Group: Point to your APIM Gateway Endpoint. + +Path Patterns: + +/api/* (Django) + +/tiles/* (pg_tileserv) + +Query String Behavior: Set to "Include all query strings". This is mandatory so that the CDN treats every unique tile coordinate (x, y, z) as a unique cache entry. + +Compression: Enable Brotli and Gzip. Vector tiles (.pbf) are highly compressible. + +## 6. Deployment Workflow & Cache Purging + +Whenever the underlying spatial tables are updated/rebuilt by your Python script, the CDN and APIM caches must be invalidated. + +```bash +# Purge logic for Azure Front Door +az network front-door endpoint purge --content-paths "/tiles/*" \ + --profile-name MyFrontDoorProfile --resource-group MyResourceGroup + ``` diff --git a/infra/azure/containerapp.template.yml b/infra/azure/containerapp.template.yml new file mode 100644 index 0000000..350a0e6 --- /dev/null +++ b/infra/azure/containerapp.template.yml @@ -0,0 +1,139 @@ +# infra/azure/containerapp.template.yml +properties: + environmentId: resourceId('Microsoft.App/managedEnvironments', '__ENV_STORAGE_NAME__') + configuration: + activeRevisionsMode: Single + ingress: + external: true + targetPort: 8000 + transport: auto + allowInsecure: false + secrets: + - name: db-password + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/postgres-password + identity: system + - name: django-secret-key + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/django-secret-key + identity: system + - name: postgres-db-host + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/postgres-db-host + identity: system + - name: postgres-db-name + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/postgres-db-name + identity: system + - name: postgres-db-password + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/postgres-db-password + identity: system + - name: postgres-db-port + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/postgres-db-port + identity: system + - name: postgres-db-user + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/postgres-db-user + identity: system + - name: sentry-dsn + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/sentry-dsn + identity: system + - name: tiles-db-url + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/tiles-db-url + identity: system + - name: postcodes-io-api-key + keyVaultUrl: https://__KEY_VAULT_NAME__.vault.azure.net/secrets/postcodes-io-api-key + identity: system + + template: + containers: + # PostgreSQL with PostGIS - runs as sidecar + - name: db + image: postgis/postgis:15-3.4 + env: + - name: POSTGRES_USER + secretRef: postgres-db-user + - name: POSTGRES_PASSWORD + secretRef: postgres-db-password + - name: POSTGRES_DB + secretRef: postgres-db-name + - name: DB_DUMP_VERSION + value: "__DB_DUMP_VERSION__" + - name: PGDATA + value: "/var/lib/postgresql/data/pgdata" + command: + - /bin/bash + - -c + - | + # Download init script from GitHub + mkdir -p /docker-entrypoint-initdb.d + wget -O /docker-entrypoint-initdb.d/01-init.sh \ + https://raw.githubusercontent.com/rcpch/rcpch-census-platform/shapes/init-db/01-download-and-restore.sh + chmod +x /docker-entrypoint-initdb.d/01-init.sh + + # Start PostgreSQL + exec docker-entrypoint.sh postgres + resources: + cpu: 2.0 + memory: 4Gi + + # Django Web App + - name: web + image: __ACR_LOGIN_SERVER__/rcpch-census-web:__IMAGE_TAG__ + env: + - name: DJANGO_ALLOWED_HOSTS + value: "__DJANGO_ALLOWED_HOSTS__" + - name: DJANGO_CSRF_TRUSTED_ORIGINS + value: "__DJANGO_CSRF_TRUSTED_ORIGINS__" + - name: DJANGO_SECRET_KEY + secretRef: django-secret-key + - name: RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST + value: "localhost" + - name: RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME + secretRef: postgres-db-name + - name: RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD + secretRef: postgres-db-password + - name: RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT + value: "5432" + - name: RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER + secretRef: postgres-db-user + - name: POSTCODES_IO_API_KEY + secretRef: postcodes-io-api-key + - name: SENTRY_DSN + secretRef: sentry-dsn + - name: SENTRY_ENVIRONMENT + value: "shapes" + - name: SENTRY_TRACES_SAMPLE_RATE + value: "0.1" + probes: + - type: Startup + httpGet: + path: /health/ + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + failureThreshold: 60 + - type: Liveness + httpGet: + path: /health/ + port: 8000 + periodSeconds: 30 + failureThreshold: 3 + - type: Readiness + httpGet: + path: /health/ + port: 8000 + periodSeconds: 10 + failureThreshold: 3 + resources: + cpu: 1.0 + memory: 2Gi + + # pg_tileserv for vector tiles + - name: tiles + image: pramsey/pg_tileserv:latest + env: + - name: DATABASE_URL + secretRef: tiles-db-url + resources: + cpu: 0.5 + memory: 1Gi + + scale: + minReplicas: 1 + maxReplicas: 1 \ No newline at end of file diff --git a/infra/nginx/Dockerfile b/infra/nginx/Dockerfile new file mode 100644 index 0000000..25fc8c6 --- /dev/null +++ b/infra/nginx/Dockerfile @@ -0,0 +1,3 @@ +FROM nginx:1.27-alpine + +COPY default.conf /etc/nginx/conf.d/default.conf diff --git a/infra/nginx/default.conf b/infra/nginx/default.conf new file mode 100644 index 0000000..c445649 --- /dev/null +++ b/infra/nginx/default.conf @@ -0,0 +1,32 @@ +server { + listen 80; + + # Tile endpoint (pg_tileserv) via /tiles/* + location /tiles/ { + # CORS for GitHub Pages demos and other clients + add_header Access-Control-Allow-Origin "*" always; + add_header Access-Control-Allow-Methods "GET, OPTIONS" always; + add_header Access-Control-Allow-Headers "*" always; + + if ($request_method = OPTIONS) { + return 204; + } + + # Cache hints for CDN (purge when you rebuild tiles) + add_header Cache-Control "public, max-age=31536000, immutable" always; + + rewrite ^/tiles/(.*)$ /$1 break; + proxy_pass http://127.0.0.1:7800; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + # Everything else -> Django + location / { + proxy_pass http://127.0.0.1:8001; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } +} diff --git a/init-db/01-download-and-restore.sh b/init-db/01-download-and-restore.sh new file mode 100755 index 0000000..520a176 --- /dev/null +++ b/init-db/01-download-and-restore.sh @@ -0,0 +1,50 @@ +# init-db/01-download-and-restore.sh +#!/bin/bash +set -e + +RELEASE_VERSION="${DB_DUMP_VERSION}" +GITHUB_REPO="rcpch/rcpch-census-platform" +BASE_URL="https://github.com/${GITHUB_REPO}/releases/download/${RELEASE_VERSION}" +DUMP_FILE="rcpch-census-${RELEASE_VERSION}.dump" + +echo "=== Starting database initialization ===" +echo "Release version: ${RELEASE_VERSION}" + +# Check if database is already populated +if psql -U "$POSTGRES_USER" -d "$POSTGRES_DB" -tAc "SELECT 1 FROM pg_tables WHERE tablename='deprivation_scores_lsoa'" | grep -q 1; then + echo "Database already initialized, skipping restore" + exit 0 +fi + +echo "Downloading database dump ${RELEASE_VERSION}..." + +# Check if dump is split +PART_AA="${DUMP_FILE}.part-aa" +if wget --spider "${BASE_URL}/${PART_AA}" 2>/dev/null; then + echo "Detected split dump, downloading and reassembling..." + + # Download all parts + for part in {a..z}; do + PART_FILE="${DUMP_FILE}.part-a${part}" + if wget -q "${BASE_URL}/${PART_FILE}" 2>/dev/null; then + echo "Downloaded ${PART_FILE}" + else + break + fi + done + + # Reassemble + cat ${DUMP_FILE}.part-* > ${DUMP_FILE} + rm ${DUMP_FILE}.part-* + echo "Reassembled dump file" +else + # Single file download + echo "Downloading single dump file..." + wget -q "${BASE_URL}/${DUMP_FILE}" +fi + +echo "Restoring database (this may take 5-10 minutes)..." +pg_restore -U "$POSTGRES_USER" -d "$POSTGRES_DB" --no-owner --no-acl ${DUMP_FILE} || true + +echo "Database restored successfully" +rm ${DUMP_FILE} \ No newline at end of file diff --git a/rcpch_census_platform/settings.py b/rcpch_census_platform/settings.py index 87d16b2..25ceda9 100644 --- a/rcpch_census_platform/settings.py +++ b/rcpch_census_platform/settings.py @@ -53,6 +53,7 @@ "django.contrib.admin", "django.contrib.auth", "django.contrib.contenttypes", + "django.contrib.gis", "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", @@ -100,7 +101,7 @@ DATABASES = { "default": { - "ENGINE": "django.db.backends.postgresql", + "ENGINE": "django.contrib.gis.db.backends.postgis", "NAME": os.environ.get("RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME"), "USER": os.environ.get("RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER"), "PASSWORD": os.environ.get("RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD"), @@ -150,7 +151,10 @@ STATIC_ROOT = str(BASE_DIR.joinpath("staticfiles")) STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage" WHITENOISE_ROOT = os.path.join(BASE_DIR, "static/root") - +GITHUB_PAGES_URL = os.getenv("GITHUB_PAGES_URL", "http://localhost:3000/") +if DEBUG: + GITHUB_PAGES_URL = "http://localhost:3000/" + CORS_ALLOW_ALL_ORIGINS = True # Default primary key field type # https://docs.djangoproject.com/en/4.1/ref/settings/#default-auto-field diff --git a/requirements/common-requirements.txt b/requirements/common-requirements.txt index e17da2f..eb97649 100644 --- a/requirements/common-requirements.txt +++ b/requirements/common-requirements.txt @@ -10,4 +10,15 @@ drf-spectacular django-filter psycopg2-binary whitenoise -# RCPCH imports \ No newline at end of file +# RCPCH imports + +# Geo/PostGIS-related Python packages (install after system GDAL/GEOS/PROJ libs) +# Pin broad compatible versions to reduce binary build issues β€” adjust if you +# know the exact system GDAL/GEOS/PROJ versions you will have in the image. +pyproj>=3.5,<5 +shapely>=2.0,<3 +fiona>=1.9,<2 +geopandas==1.1.2 +pandas==2.3.3 +SQLAlchemy==2.0.45 +GeoAlchemy2==0.18.1 \ No newline at end of file diff --git a/s/build-dump b/s/build-dump new file mode 100755 index 0000000..1ee2009 --- /dev/null +++ b/s/build-dump @@ -0,0 +1,157 @@ +#!/bin/bash +# s/build-dump.sh +# Creates a database dump file locally + +set -e + +DUMP_DIR="postgis_dump_files" +DUMP_FILE="${DUMP_DIR}/rcpch-census.dump" +CONTAINER_NAME="census-build-db" + +echo "==========================================" +echo "RCPCH Census Database Dump Builder" +echo "==========================================" +echo "" + +# Create dump directory if it doesn't exist +mkdir -p "$DUMP_DIR" + +# Check if dump already exists (either single file or split files) +EXISTING_DUMP=false +if [ -f "$DUMP_FILE" ]; then + EXISTING_DUMP=true + DUMP_TYPE="single file" +elif ls ${DUMP_DIR}/rcpch-census.dump.part-* 1> /dev/null 2>&1; then + EXISTING_DUMP=true + DUMP_TYPE="split files ($(ls ${DUMP_DIR}/rcpch-census.dump.part-* | wc -l) parts)" +fi + +if [ "$EXISTING_DUMP" = true ]; then + echo "⚠ Dump already exists: $DUMP_TYPE" + read -p "Do you want to rebuild it? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Using existing dump." + exit 0 + fi + echo "Rebuilding dump..." + rm -f "$DUMP_FILE" + rm -f ${DUMP_DIR}/rcpch-census.dump.part-* +fi + +# Check if build container is already running +if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + echo "Cleaning up existing build container..." + docker rm -f "$CONTAINER_NAME" || true +fi + +# Start PostGIS container +echo "" +echo "Starting PostGIS container..." +docker run -d --name "$CONTAINER_NAME" \ + -e POSTGRES_USER=rcpchCensususer \ + -e POSTGRES_PASSWORD=buildpass \ + -e POSTGRES_DB=rcpchCensusdb \ + -p 5432:5432 \ + postgis/postgis:15-3.4 + +# Wait for database to be ready +echo "Waiting for database to be ready..." +for i in {1..60}; do + if docker exec "$CONTAINER_NAME" pg_isready -U rcpchCensususer > /dev/null 2>&1; then + echo "βœ“ Database is ready" + break + fi + if [ $i -eq 60 ]; then + echo "βœ— Database failed to start in time" + docker rm -f "$CONTAINER_NAME" + exit 1 + fi + sleep 2 +done + +# Build Django image if needed +echo "" +echo "Building Django seeder image..." +DJANGO_IMAGE=$(docker build -q -f Dockerfile.postgis .) + +# Run seeding commands +echo "" +echo "Running database migrations and seeding..." +echo "This may take a while (15-30 minutes for large datasets)..." +echo "" + +docker run --rm --network host \ + -e RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER=rcpchCensususer \ + -e RCPCH_CENSUS_ENGINE_POSTGRES_DB_PASSWORD=buildpass \ + -e RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME=rcpchCensusdb \ + -e RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST=127.0.0.1 \ + -e RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT=5432 \ + -e DJANGO_SECRET_KEY=dummy-build-key \ + -e PYTHONUNBUFFERED=1 \ + "$DJANGO_IMAGE" \ + sh -c "/app/s/wait-for-db.sh && \ + python manage.py migrate --noinput && \ + python manage.py seed --mode=__all__ && \ + python manage.py seed --mode=import_bfc_boundaries" + +if [ $? -ne 0 ]; then + echo "βœ— Seeding failed" + docker rm -f "$CONTAINER_NAME" + exit 1 +fi + +echo "" +echo "βœ“ Seeding completed successfully" + +# Create dump file +echo "" +echo "Creating database dump (this may take several minutes)..." +docker exec "$CONTAINER_NAME" pg_dump \ + -U rcpchCensususer \ + -Fc -Z 9 \ + -f /tmp/rcpch-census.dump \ + rcpchCensusdb + +# Copy dump to local filesystem +docker cp "${CONTAINER_NAME}:/tmp/rcpch-census.dump" "./${DUMP_FILE}" + +# Get file size +SIZE=$(du -h "$DUMP_FILE" | cut -f1) +echo "βœ“ Dump created: $DUMP_FILE ($SIZE)" + +# Test restore in a separate container +echo "" +echo "Testing dump restore..." +TEST_CONTAINER="census-test-db" + +docker run -d --name "$TEST_CONTAINER" \ + -e POSTGRES_USER=testuser \ + -e POSTGRES_PASSWORD=testpass \ + -e POSTGRES_DB=testdb \ + -p 5433:5432 \ + postgis/postgis:15-3.4 + +sleep 10 + +if docker exec -i "$TEST_CONTAINER" pg_restore \ + -U testuser \ + -d testdb \ + --no-owner < "$DUMP_FILE" > /dev/null 2>&1; then + echo "βœ“ Restore test successful" +else + echo "⚠ Restore test had warnings (this may be normal)" +fi + +# Cleanup +echo "" +echo "Cleaning up containers..." +docker rm -f "$TEST_CONTAINER" > /dev/null 2>&1 +docker rm -f "$CONTAINER_NAME" > /dev/null 2>&1 + +echo "" +echo "==========================================" +echo "βœ“ Dump file ready: $DUMP_FILE ($SIZE)" +echo "==========================================" +echo "" +echo "Run './s/release-dump.sh' to create a GitHub release" \ No newline at end of file diff --git a/s/dev b/s/dev new file mode 100755 index 0000000..27c3c25 --- /dev/null +++ b/s/dev @@ -0,0 +1,9 @@ +#!/bin/bash -e + +# Convenience script: bring up the PostGIS development stack +echo "✨ Running PostGIS development compose πŸ—ΊοΈ" + +# ensure data dir exists (compose will create volume but keep parity with other scripts) +mkdir -p datadb + +docker compose -f docker-compose-postgis.yml up --build diff --git a/s/docker-init b/s/docker-init deleted file mode 100755 index eb7203f..0000000 --- a/s/docker-init +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -# scripts may need to be made executable on some platforms before they can be run -# chmod +x is the command to do this on unixy systems - -# starts the postgres and application containers -# migrates db, seeds db, creates superuser, runs server - -mkdir datadb -docker compose -f docker-compose.dev-init.yml up \ No newline at end of file diff --git a/s/docker-start b/s/docker-start deleted file mode 100755 index 7977b2e..0000000 --- a/s/docker-start +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# scripts may need to be made executable on some platforms before they can be run -# chmod +x is the command to do this on unixy systems - -# starts the postgres and application containers -# DOES NOT MIGRATE OR SEED DATA (this is done in the s/docker-init script) - -docker compose -f docker-compose.dev.yml up \ No newline at end of file diff --git a/s/pg-tiles b/s/pg-tiles new file mode 100755 index 0000000..05ba63c --- /dev/null +++ b/s/pg-tiles @@ -0,0 +1,11 @@ +#!/bin/bash -e + +# Convenience script: start pg_tileserv service for serving vector tiles +# Usage: ./s/pg-tiles + +echo "✨ Starting pg_tileserv (PostGIS β†’ MVT) on port 7800" + +# ensure data dir exists +mkdir -p datadb + +docker compose -f docker-compose-postgis.yml up --build pg_tileserv diff --git a/s/release-dump b/s/release-dump new file mode 100755 index 0000000..55515c5 --- /dev/null +++ b/s/release-dump @@ -0,0 +1,433 @@ +#!/bin/bash +# s/release-dump.sh +# Creates a GitHub release and uploads the dump file + +set -e + +DUMP_DIR="postgis_dump_files" +DUMP_FILE="${DUMP_DIR}/rcpch-census.dump" +MAX_SIZE_MB=1800 # Stay under 2GB limit with safety margin + +echo "==========================================" +echo "RCPCH Census Database Release Publisher" +echo "==========================================" +echo "" + +# Create dump directory if it doesn't exist +mkdir -p "$DUMP_DIR" + +# Check if dump exists (either single file or split files) +DUMP_EXISTS=false +IS_SPLIT=false + +if [ -f "$DUMP_FILE" ]; then + DUMP_EXISTS=true + IS_SPLIT=false + echo "Found existing dump: rcpch-census.dump" +elif ls ${DUMP_DIR}/rcpch-census.dump.part-* 1> /dev/null 2>&1; then + DUMP_EXISTS=true + IS_SPLIT=true + NUM_EXISTING_PARTS=$(ls ${DUMP_DIR}/rcpch-census.dump.part-* | wc -l) + echo "Found existing split dump: $NUM_EXISTING_PARTS parts" +fi + +if [ "$DUMP_EXISTS" = false ]; then + echo "βœ— No dump file found in $DUMP_DIR/" + echo "" + read -p "Do you want to build it now? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + ./s/build-dump.sh + else + echo "Please run './s/build-dump.sh' first" + exit 1 + fi +fi + +# Check if gh CLI is installed +if ! command -v gh &> /dev/null; then + echo "βœ— GitHub CLI (gh) is not installed" + echo "" + echo "Install it with:" + echo " macOS: brew install gh" + echo " Linux: See https://github.com/cli/cli#installation" + echo "" + exit 1 +fi + +# Check if authenticated +if ! gh auth status &> /dev/null; then + echo "βœ— Not authenticated with GitHub" + echo "" + echo "Run: gh auth login" + exit 1 +fi + +# Get file size +if [ "$IS_SPLIT" = true ]; then + # If already split from previous build, calculate total size + SIZE_MB=0 + for part in ${DUMP_DIR}/rcpch-census.dump.part-*; do + PART_SIZE=$(du -m "$part" | cut -f1) + SIZE_MB=$((SIZE_MB + PART_SIZE)) + done + SIZE="${SIZE_MB}MB (split)" +else + SIZE=$(du -h "$DUMP_FILE" | cut -f1) + SIZE_MB=$(du -m "$DUMP_FILE" | cut -f1) +fi + +echo "Total dump size: $SIZE ($SIZE_MB MB)" + +# Get latest release version +LATEST_VERSION=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "") + +if [ -z "$LATEST_VERSION" ]; then + echo "No previous releases found." + SUGGESTED_MAJOR="v1.0.0" + SUGGESTED_MINOR="v0.1.0" + SUGGESTED_PATCH="v0.0.1" +else + echo "Latest release: $LATEST_VERSION" + echo "" + + # Strip 'v' prefix for processing + VERSION_NUM="${LATEST_VERSION#v}" + + # Split into major.minor.patch + IFS='.' read -r MAJOR MINOR PATCH <<< "$VERSION_NUM" + + # Calculate next versions + SUGGESTED_MAJOR="v$((MAJOR + 1)).0.0" + SUGGESTED_MINOR="v${MAJOR}.$((MINOR + 1)).0" + SUGGESTED_PATCH="v${MAJOR}.${MINOR}.$((PATCH + 1))" +fi + +# Display suggestions +echo "Suggested versions:" +echo " 1) $SUGGESTED_PATCH (patch - bug fixes, data corrections)" +echo " 2) $SUGGESTED_MINOR (minor - new features, new data fields)" +echo " 3) $SUGGESTED_MAJOR (major - breaking changes, new schema)" +echo " 4) Custom version" +echo "" + +# Prompt for version choice +read -p "Choose version (1-4) or press Enter for patch [$SUGGESTED_PATCH]: " CHOICE + +case "$CHOICE" in + 1|"") + VERSION="$SUGGESTED_PATCH" + ;; + 2) + VERSION="$SUGGESTED_MINOR" + ;; + 3) + VERSION="$SUGGESTED_MAJOR" + ;; + 4) + echo "Enter custom version (e.g., v2024.10 or v1.5.2):" + read -r VERSION + if [ -z "$VERSION" ]; then + echo "βœ— Version cannot be empty" + exit 1 + fi + # Ensure version starts with 'v' + if [[ ! "$VERSION" =~ ^v ]]; then + VERSION="v${VERSION}" + fi + ;; + *) + echo "βœ— Invalid choice" + exit 1 + ;; +esac + +echo "" +echo "Selected version: $VERSION" + +# Check if release already exists +if gh release view "$VERSION" &> /dev/null; then + echo "⚠ Release $VERSION already exists" + read -p "Do you want to delete and recreate it? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Deleting existing release..." + gh release delete "$VERSION" -y + else + echo "Cancelled" + exit 1 + fi +fi + +# Prepare file(s) for upload +UPLOAD_FILES=() +NEEDS_NEW_SPLIT=false + +if [ "$IS_SPLIT" = true ]; then + # Use existing split files, just rename them for the version + echo "Using existing split files..." + for old_part in ${DUMP_DIR}/rcpch-census.dump.part-*; do + SUFFIX=$(echo "$old_part" | sed "s|${DUMP_DIR}/rcpch-census.dump.part-||") + NEW_PART="${DUMP_DIR}/rcpch-census-${VERSION}.dump.part-${SUFFIX}" + cp "$old_part" "$NEW_PART" + UPLOAD_FILES+=("$NEW_PART") + done + NUM_PARTS=${#UPLOAD_FILES[@]} + echo "βœ“ Prepared $NUM_PARTS parts for upload" +elif [ "$SIZE_MB" -gt "$MAX_SIZE_MB" ]; then + # Need to split for the first time + NEEDS_NEW_SPLIT=true + echo "" + echo "⚠ File is larger than ${MAX_SIZE_MB}MB and will be split for GitHub upload" + echo "Splitting dump file..." + + VERSIONED_BASE="${DUMP_DIR}/rcpch-census-${VERSION}.dump" + + # Split into 1.8GB chunks + split -b ${MAX_SIZE_MB}M "$DUMP_FILE" "${VERSIONED_BASE}.part-" + + # Get list of parts + for part in ${VERSIONED_BASE}.part-*; do + UPLOAD_FILES+=("$part") + done + + NUM_PARTS=${#UPLOAD_FILES[@]} + echo "βœ“ Split into $NUM_PARTS parts" +else + # Single file, small enough + VERSIONED_DUMP="${DUMP_DIR}/rcpch-census-${VERSION}.dump" + cp "$DUMP_FILE" "$VERSIONED_DUMP" + UPLOAD_FILES=("$VERSIONED_DUMP") +fi + +# Prompt for release type/description +echo "" +echo "What type of release is this?" +echo " 1) Data update (Updated IMD data for [date/region])" +echo " 2) Bug fix (Fixed [issue])" +echo " 3) New feature (Added [feature])" +echo " 4) Custom description" +echo "" +read -p "Choose (1-4): " RELEASE_TYPE + +case "$RELEASE_TYPE" in + 1) + echo "Enter date/region for data update (e.g., 'October 2024' or 'England Q4 2024'):" + read -r UPDATE_INFO + TITLE="Data Update ${VERSION}" + NOTES="Updated IMD data for ${UPDATE_INFO}" + ;; + 2) + echo "Describe the fix:" + read -r FIX_DESC + TITLE="Bug Fix ${VERSION}" + NOTES="Fixed: ${FIX_DESC}" + ;; + 3) + echo "Describe the new feature:" + read -r FEATURE_DESC + TITLE="New Feature ${VERSION}" + NOTES="Added: ${FEATURE_DESC}" + ;; + 4) + echo "Enter release title:" + read -r TITLE + echo "Enter release description:" + read -r NOTES + ;; + *) + TITLE="Database Dump ${VERSION}" + NOTES="" + ;; +esac + +# Create release notes with split file instructions if needed +if [ "$IS_SPLIT" = true ] || [ "$NEEDS_NEW_SPLIT" = true ]; then + DOWNLOAD_INSTRUCTIONS=" +## ⚠️ Large File - Assembly Required + +This dump has been split into ${NUM_PARTS} parts due to GitHub's file size limits. + +**Download and reassemble:** + +\`\`\`bash +# Download all parts +wget https://github.com/rcpch/rcpch-census-platform/releases/download/${VERSION}/rcpch-census-${VERSION}.dump.part-aa +wget https://github.com/rcpch/rcpch-census-platform/releases/download/${VERSION}/rcpch-census-${VERSION}.dump.part-ab +# ... (download remaining parts) + +# Reassemble +cat rcpch-census-${VERSION}.dump.part-* > rcpch-census-${VERSION}.dump + +# Verify integrity (optional) +ls -lh rcpch-census-${VERSION}.dump + +# Clean up parts +rm rcpch-census-${VERSION}.dump.part-* +\`\`\` + +**Or use this one-liner:** + +\`\`\`bash +# Download and reassemble automatically +for part in {a..z}; do + wget -q https://github.com/rcpch/rcpch-census-platform/releases/download/${VERSION}/rcpch-census-${VERSION}.dump.part-a\${part} 2>/dev/null || break +done +cat rcpch-census-${VERSION}.dump.part-* > rcpch-census-${VERSION}.dump +rm rcpch-census-${VERSION}.dump.part-* +\`\`\` +" +else + DOWNLOAD_INSTRUCTIONS=" +\`\`\`bash +# Download the dump +wget https://github.com/rcpch/rcpch-census-platform/releases/download/${VERSION}/rcpch-census-${VERSION}.dump +\`\`\` +" +fi + +RELEASE_NOTES="# RCPCH Census Database Dump ${VERSION} + +**Total Size:** ${SIZE} + +${NOTES} + +${DOWNLOAD_INSTRUCTIONS} + +## How to use this dump: + +### Option 1: Using Docker Compose + +\`\`\`yaml +# docker-compose.yml +services: + db: + image: postgis/postgis:15-3.4 + environment: + POSTGRES_USER: rcpchCensususer + POSTGRES_PASSWORD: \${DB_PASSWORD} + POSTGRES_DB: rcpchCensusdb + volumes: + - ./init-db:/docker-entrypoint-initdb.d + - pgdata:/var/lib/postgresql/data + ports: + - \"5432:5432\" + +volumes: + pgdata: +\`\`\` + +\`\`\`bash +# Download and prepare +mkdir -p init-db +cd init-db + +${DOWNLOAD_INSTRUCTIONS} + +# Create restore script +cat > restore.sh << 'EOF' +#!/bin/bash +set -e +echo \"Restoring database from dump...\" +pg_restore -U \"\$POSTGRES_USER\" -d \"\$POSTGRES_DB\" --no-owner /docker-entrypoint-initdb.d/rcpch-census-${VERSION}.dump +echo \"Database restored successfully\" +EOF + +chmod +x restore.sh +cd .. + +# Start services +docker-compose up -d +\`\`\` + +### Option 2: Manual Docker Setup + +\`\`\`bash +${DOWNLOAD_INSTRUCTIONS} + +# Start PostGIS container +docker run -d --name census-db \\ + -e POSTGRES_USER=rcpchCensususer \\ + -e POSTGRES_PASSWORD=yourpassword \\ + -e POSTGRES_DB=rcpchCensusdb \\ + -p 5432:5432 \\ + -v census-data:/var/lib/postgresql/data \\ + postgis/postgis:15-3.4 + +# Wait for database to be ready +sleep 10 + +# Restore the dump +docker exec -i census-db pg_restore \\ + -U rcpchCensususer \\ + -d rcpchCensusdb \\ + --no-owner < rcpch-census-${VERSION}.dump + +echo \"Database ready on localhost:5432\" +\`\`\` + +### Option 3: Local PostgreSQL + +\`\`\`bash +# Create database +createdb -U postgres rcpchCensusdb + +# Enable PostGIS +psql -U postgres -d rcpchCensusdb -c \"CREATE EXTENSION postgis;\" + +# Restore dump +pg_restore -U postgres -d rcpchCensusdb --no-owner rcpch-census-${VERSION}.dump +\`\`\` + +## Data Sources + +This dump contains Index of Multiple Deprivation (IMD) data for all four UK nations, including: +- LSOA boundaries and geometries +- IMD scores and rankings +- Postcode to LSOA mappings + +## License + +See repository LICENSE file for data usage terms. +" + +# Create the release +echo "" +echo "Creating GitHub release: $VERSION" +echo "Title: $TITLE" +echo "" + +gh release create "$VERSION" "${UPLOAD_FILES[@]}" \ + --title "$TITLE" \ + --notes "$RELEASE_NOTES" + +if [ $? -eq 0 ]; then + echo "" + echo "==========================================" + echo "βœ“ Release published successfully!" + echo "==========================================" + echo "" + echo "View at: https://github.com/rcpch/rcpch-census-platform/releases/tag/${VERSION}" + + if [ "$IS_SPLIT" = true ] || [ "$NEEDS_NEW_SPLIT" = true ]; then + echo "" + echo "Files uploaded (${NUM_PARTS} parts):" + for file in "${UPLOAD_FILES[@]}"; do + echo " - $(basename $file)" + done + fi + + echo "" + + # Clean up versioned files (keep original rcpch-census.dump or parts in dump dir) + for file in "${UPLOAD_FILES[@]}"; do + rm -f "$file" + done +else + echo "βœ— Failed to create release" + # Clean up on failure + for file in "${UPLOAD_FILES[@]}"; do + rm -f "$file" + done + exit 1 +fi \ No newline at end of file diff --git a/s/up b/s/up deleted file mode 100755 index f765211..0000000 --- a/s/up +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# scripts may need to be made executable on some platforms before they can be run -# chmod +x is the command to do this on unixy systems - -# starts all docker compose services -mkdir datadb -echo "✨ Running RCPCH Census Platform docker πŸ› οΈ" -docker compose -f docker-compose.dev-init.yml up \ No newline at end of file diff --git a/s/up-debug b/s/up-debug deleted file mode 100755 index e1f649e..0000000 --- a/s/up-debug +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -e - -# copy of s/start-dev with -Xfrozen_modules=off flag for debugpy to work -echo "✨ Running up-debug πŸ› οΈ" -mkdir datadb -echo "✨ Running RCPCH Census Platform docker πŸ› οΈ" -docker compose -f docker-compose.dev-init.yml up \ No newline at end of file diff --git a/s/wait-for-db.sh b/s/wait-for-db.sh new file mode 100755 index 0000000..d68de5f --- /dev/null +++ b/s/wait-for-db.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -euo pipefail + +# Wait for Postgres to be available using pg_isready +# Uses environment variables if set, otherwise defaults +: ${RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST:=db} +: ${RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT:=5432} +: ${RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER:=rcpchCensususer} +: ${RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME:=rcpchCensusdb} + +DB_HOST="$RCPCH_CENSUS_ENGINE_POSTGRES_DB_HOST" +DB_PORT="$RCPCH_CENSUS_ENGINE_POSTGRES_DB_PORT" +DB_USER="$RCPCH_CENSUS_ENGINE_POSTGRES_DB_USER" +DB_NAME="${RCPCH_CENSUS_ENGINE_POSTGRES_DB_NAME:-rcpchCensusdb}" + +echo "Waiting for database $DB_HOST:$DB_PORT as $DB_USER..." +until pg_isready -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" >/dev/null 2>&1; do + printf '.' + sleep 1 +done +echo "\nDatabase is available" diff --git a/site/index.html b/site/index.html new file mode 100644 index 0000000..c639a44 --- /dev/null +++ b/site/index.html @@ -0,0 +1,88 @@ + + + + + + + UK Deprivation Map | Era Comparison + + + + + + +
+

UK Deprivation Viewer

+ +
+ + +
+ +
+ + +
+
+ +
+
Deprivation Decile
+
+
1 (Most Deprived)
+
3
+
5
+
8
+
10 (Least Deprived)
+
No Data
+
+
+ +
+ + + + + \ No newline at end of file diff --git a/site/map-logic.js b/site/map-logic.js new file mode 100644 index 0000000..5812833 --- /dev/null +++ b/site/map-logic.js @@ -0,0 +1,185 @@ +const map = new maplibregl.Map({ + container: "map", + style: "https://tiles.stadiamaps.com/styles/alidade_smooth.json", + center: [-3.43, 55.37], + zoom: 5, + refreshExpired: true, // Helps with local dev updates +}); + +function getTilesBaseUrl() { + const params = new URLSearchParams(window.location.search); + const fromQuery = params.get("tilesBase"); + if (fromQuery) return fromQuery.replace(/\/+$/, ""); + + const meta = document.querySelector('meta[name="rcpch-tiles-base-url"]'); + const fromMeta = meta?.getAttribute("content")?.trim(); + if (fromMeta) return fromMeta.replace(/\/+$/, ""); + + if ( + window.location.hostname === "localhost" || + window.location.hostname === "127.0.0.1" + ) { + return "http://localhost:7800"; + } + + return ""; +} + +const TILES_BASE_URL = getTilesBaseUrl(); +if (!TILES_BASE_URL) { + console.warn( + "No tiles base URL configured. Set ?tilesBase=https:///tiles or the rcpch-tiles-base-url meta tag." + ); +} + +// State management +let currentEra = "2021"; + +function getViewName(era, zoom) { + // Matches your SQL view suffixes + const zoomSuffix = zoom <= 4 ? "z0_4" : zoom <= 7 ? "z5_7" : "z8_10"; + return `public.uk_master_${era}_${zoomSuffix}`; +} + +function updateMapSource() { + const newLayer = getViewName(currentEra, map.getZoom()); + const newTiles = [`${TILES_BASE_URL}/${newLayer}/{z}/{x}/{y}.pbf`]; + + const source = map.getSource("deprivation-source"); + if (source) { + // 1. Update the tiles on the source + source.setTiles(newTiles); + + // 2. We must recreate the layer to change the 'source-layer' + if (map.getLayer("deprivation-layer")) { + // Record the current nation filter so we can re-apply it + const currentFilter = map.getFilter("deprivation-layer"); + + map.removeLayer("deprivation-layer"); + + map.addLayer({ + id: "deprivation-layer", + type: "fill", + source: "deprivation-source", + "source-layer": newLayer, // This is the vital part + paint: { + "fill-color": [ + "case", + ["==", ["get", "imd_decile"], 0], + "#cccccc", + [ + "interpolate", + ["linear"], + ["get", "imd_decile"], + 1, + "#08306b", + 10, + "#f7fbff", + ], + ], + "fill-opacity": 0.7, + "fill-outline-color": "rgba(255, 255, 255, 0.2)", + }, + }); + + // 3. Re-apply the filter (England/Scotland/etc) if one was active + if (currentFilter) { + map.setFilter("deprivation-layer", currentFilter); + } + + console.log(`Switched to table: ${newLayer}`); + } + } +} + +map.on("load", () => { + const initialLayer = getViewName(currentEra, map.getZoom()); + + map.addSource("deprivation-source", { + type: "vector", + tiles: [`${TILES_BASE_URL}/${initialLayer}/{z}/{x}/{y}.pbf`], + minzoom: 0, + maxzoom: 14, + }); + + map.addLayer({ + id: "deprivation-layer", + type: "fill", + source: "deprivation-source", + "source-layer": initialLayer, + paint: { + "fill-color": [ + "case", + ["==", ["get", "imd_decile"], 0], + "#cccccc", // Gray fallback + [ + "interpolate", + ["linear"], + ["get", "imd_decile"], + 1, + "#08306b", + 10, + "#f7fbff", + ], + ], + "fill-opacity": 0.7, + "fill-outline-color": "rgba(255, 255, 255, 0.2)", // Subtle outlines + }, + }); + + // --- AUTOMATIC ZOOM SWITCHING --- + // This ensures that when a user zooms from 4 to 5, + // the source switches from the z0_4 table to the z5_7 table. + map.on("zoomend", updateMapSource); + + // Create a single popup instance (reused on hover) + const popup = new maplibregl.Popup({ + closeButton: false, + closeOnClick: false, + }); + + map.on("mousemove", "deprivation-layer", (e) => { + // Change the cursor style as a UI cue + map.getCanvas().style.cursor = "pointer"; + + const feature = e.features[0]; + const props = feature.properties; + const decile = props.imd_decile; + + // Create the HTML content for the popup + const content = ` +
+ + ${props.nation.toUpperCase()} LSOA + +
Code: ${props.code}
+
Decile: ${decile === 0 ? "No Data" : decile}
+
+ Era: ${currentEra} | Data Year: ${props.imd_year} +
+
+ `; + + // Position and display the popup + popup.setLngLat(e.lngLat).setHTML(content).addTo(map); + }); + + map.on("mouseleave", "deprivation-layer", () => { + map.getCanvas().style.cursor = ""; + popup.remove(); + }); +}); + +// --- THE ERA TOGGLE --- +document.getElementById("era-toggle").addEventListener("change", (e) => { + currentEra = e.target.value; // Update state + updateMapSource(); // Trigger update +}); + +document.getElementById("nation-filter").addEventListener("change", (e) => { + const selectedNation = e.target.value; + map.setFilter( + "deprivation-layer", + selectedNation === "all" ? null : ["==", ["get", "nation"], selectedNation] + ); +});