From 133f91c9f93cc9d123a8c4062411612a62c322bf Mon Sep 17 00:00:00 2001 From: Paul Calnon Date: Wed, 6 May 2026 08:36:08 -0500 Subject: [PATCH] fix(api): wrap readiness-probe filesystem ops in asyncio.to_thread (Phase 3 cleanup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3 cleanup of the 3 ASYNC240 violations Phase 0 enumerated in juniper-data (juniper-ml notes/ASYNC_ROUTE_VIOLATIONS_2026-05-06.md §2.2): - juniper_data/api/app.py:46 (lifespan probe ``storage_path.absolute()``) - juniper_data/api/routes/health.py:131 (``storage_path.is_dir()``) - juniper_data/api/routes/health.py:132 (``storage_path.glob(\"*.npz\")``) Fix --- ``health.py``: extracted a ``_probe_storage(storage_path)`` helper that runs both the ``is_dir()`` stat and the ``*.npz`` glob in a single ``asyncio.to_thread`` call from the readiness route. One thread-hop instead of two; the previous synchronous-in-async pattern blocked the event loop while the disk was probed. ``app.py``: per-line ``# noqa: ASYNC240`` for the ``storage_path.absolute()`` call. Unlike ``is_dir()``/``glob()``, ``Path.absolute()`` is pure path manipulation with no filesystem I/O — the ASYNC240 rule is over-conservative. Comment explains the distinction so a future reviewer doesn't accidentally wrap it in ``to_thread`` thinking it's I/O. Lifespan startup is also a one-shot event, not a request handler. After this change ``ruff check --select ASYNC juniper_data/`` reports zero violations — the async-route audit lane stays green without needing per-file-ignores. Tests ----- - 950/950 in juniper_data/tests/ pass. - 12/12 in test_health_enhanced.py pass — the readiness route behaviour is unchanged; the helper extraction is purely internal restructuring. Plan: juniper-ml notes/ASYNC_ROUTE_AUDIT_HOOK_MIGRATION_PLAN.md §4 Phase 3 cleanup. First of two cleanup PRs (data, then canopy); cascor and worker have nothing to fix at Phase 3. Co-Authored-By: Claude Opus 4.7 (1M context) --- juniper_data/api/app.py | 8 +++++++- juniper_data/api/routes/health.py | 23 +++++++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/juniper_data/api/app.py b/juniper_data/api/app.py index bc0582e..655b31d 100644 --- a/juniper_data/api/app.py +++ b/juniper_data/api/app.py @@ -43,7 +43,13 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: logger = logging.getLogger("juniper_data") logger.info(f"JuniperData API v{__version__} starting") - logger.info(f"Storage path: {storage_path.absolute()}") + # ``Path.absolute()`` is pure path manipulation (no I/O); the + # ASYNC240 rule is over-conservative here and flags every + # ``pathlib.Path`` method without distinguishing stat-bound ones + # from text-only ones. Lifespan startup is also a one-shot + # event, not a request handler — even if there were I/O it + # wouldn't block per-request latency. + logger.info(f"Storage path: {storage_path.absolute()}") # noqa: ASYNC240 yield diff --git a/juniper_data/api/routes/health.py b/juniper_data/api/routes/health.py index f15a867..0a90649 100644 --- a/juniper_data/api/routes/health.py +++ b/juniper_data/api/routes/health.py @@ -15,6 +15,7 @@ and seed-03). """ +import asyncio import time from pathlib import Path @@ -59,6 +60,19 @@ def _liveness_tick(settings: Settings) -> None: raise RuntimeError(f"storage path not a directory: {settings.storage_path}") +def _probe_storage(storage_path: Path) -> tuple[bool, int]: + """Filesystem probe for the readiness route. + + Bundles the ``is_dir()`` stat and the ``*.npz`` glob into a single + helper so the readiness route takes one ``asyncio.to_thread`` + hop instead of two. Returns ``(is_dir, dataset_count)``; + ``dataset_count`` is 0 when the path isn't a directory. + """ + if not storage_path.is_dir(): + return False, 0 + return True, len(list(storage_path.glob("*.npz"))) + + @router.get("/health") async def health_check() -> dict: """Combined health check endpoint (backward compatible). @@ -128,8 +142,13 @@ async def readiness_probe(request: Request, response: Response) -> ReadinessResp settings = _settings_from_request(request) storage_path = Path(settings.storage_path) - if storage_path.is_dir(): - dataset_count = len(list(storage_path.glob("*.npz"))) + # Probe filesystem off the event loop. ``is_dir()`` is a stat + # syscall and ``glob()`` walks the directory; both block on slow + # disks. Bundled into a single ``to_thread`` call so the readiness + # probe takes one thread-hop, not two. + is_dir, dataset_count = await asyncio.to_thread(_probe_storage, storage_path) + + if is_dir: storage_dep = DependencyStatus( name="Dataset Storage", status="healthy",