From 9f0d7180cc09455ef7169bf83b54dd8b1e07c854 Mon Sep 17 00:00:00 2001 From: Iahn Cajigas Date: Wed, 11 Mar 2026 06:20:46 -0400 Subject: [PATCH] Fix CI Figshare flakiness with retry logic and data caching Two complementary fixes for transient HTTP 403 errors from Figshare that were causing notebook-changed-pr CI failures: 1. Add exponential-backoff retry (4 attempts) to _http_get() in data_manager.py for 403/429/5xx errors and network failures. 2. Cache the example data directory across CI runs using actions/cache in all notebook jobs (smoke, parity-core, changed-pr, helpfile-full). Uses NSTAT_DATA_DIR env var so the data persists between runs and Figshare is only contacted when the cache is cold. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 21 +++++++++ .github/workflows/notebook-full-fidelity.yml | 7 +++ nstat/data_manager.py | 48 +++++++++++++++----- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd9c33f3..b31f290b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -119,12 +119,19 @@ jobs: notebook-smoke: runs-on: ubuntu-latest + env: + NSTAT_DATA_DIR: ${{ github.workspace }}/.nstat_data_cache steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.11" + - name: Cache example data + uses: actions/cache@v4 + with: + path: ${{ env.NSTAT_DATA_DIR }} + key: nstat-example-data-v1 - name: Install dependencies run: | python -m pip install --upgrade pip @@ -136,12 +143,19 @@ jobs: notebook-parity-core: runs-on: ubuntu-latest + env: + NSTAT_DATA_DIR: ${{ github.workspace }}/.nstat_data_cache steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.11" + - name: Cache example data + uses: actions/cache@v4 + with: + path: ${{ env.NSTAT_DATA_DIR }} + key: nstat-example-data-v1 - name: Install dependencies run: | python -m pip install --upgrade pip @@ -154,6 +168,8 @@ jobs: notebook-changed-pr: if: github.event_name == 'pull_request' runs-on: ubuntu-latest + env: + NSTAT_DATA_DIR: ${{ github.workspace }}/.nstat_data_cache steps: - uses: actions/checkout@v4 @@ -162,6 +178,11 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.11" + - name: Cache example data + uses: actions/cache@v4 + with: + path: ${{ env.NSTAT_DATA_DIR }} + key: nstat-example-data-v1 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/notebook-full-fidelity.yml b/.github/workflows/notebook-full-fidelity.yml index ac4cc6a2..ddfc6faf 100644 --- a/.github/workflows/notebook-full-fidelity.yml +++ b/.github/workflows/notebook-full-fidelity.yml @@ -15,12 +15,19 @@ on: jobs: helpfile-full: runs-on: ubuntu-latest + env: + NSTAT_DATA_DIR: ${{ github.workspace }}/.nstat_data_cache steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.11" + - name: Cache example data + uses: actions/cache@v4 + with: + path: ${{ env.NSTAT_DATA_DIR }} + key: nstat-example-data-v1 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/nstat/data_manager.py b/nstat/data_manager.py index ffbf0db4..4f74dbaf 100644 --- a/nstat/data_manager.py +++ b/nstat/data_manager.py @@ -13,6 +13,7 @@ import shutil import tempfile import time +import urllib.error import urllib.request import zipfile from dataclasses import dataclass @@ -105,17 +106,42 @@ def _write_sentinel(data_dir: Path, *, source_url: str) -> None: (data_dir / SENTINEL_NAME).write_text(json.dumps(payload, indent=2), encoding="utf-8") -def _http_get(url: str, *, timeout: float = 60.0) -> tuple[str, bytes]: - req = urllib.request.Request( - url, - headers={ - "User-Agent": "nSTAT-python-data-manager/1.0 (+https://github.com/cajigaslab/nSTAT-python)" - }, - ) - with urllib.request.urlopen(req, timeout=timeout) as resp: - final_url = str(resp.geturl()) - body = resp.read() - return final_url, body +def _http_get( + url: str, *, timeout: float = 60.0, retries: int = 4, backoff: float = 2.0 +) -> tuple[str, bytes]: + """HTTP GET with exponential-backoff retry for transient errors (429/5xx/403).""" + last_error: Exception | None = None + for attempt in range(1, retries + 1): + try: + req = urllib.request.Request( + url, + headers={ + "User-Agent": "nSTAT-python-data-manager/1.0 " + "(+https://github.com/cajigaslab/nSTAT-python)" + }, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + final_url = str(resp.geturl()) + body = resp.read() + return final_url, body + except urllib.error.HTTPError as exc: + last_error = exc + # Retry on rate-limit (429), server errors (5xx), and + # transient Figshare 403s from GitHub Actions IPs. + if exc.code in (403, 429) or exc.code >= 500: + if attempt < retries: + delay = backoff**attempt + time.sleep(delay) + continue + raise + except (urllib.error.URLError, OSError) as exc: + last_error = exc + if attempt < retries: + delay = backoff**attempt + time.sleep(delay) + continue + raise + raise RuntimeError(f"HTTP GET failed after {retries} attempts: {url}") from last_error def _resolve_figshare_download_url() -> str: