diff --git a/juniper_data/api/app.py b/juniper_data/api/app.py index bc0582e..655b31d 100644 --- a/juniper_data/api/app.py +++ b/juniper_data/api/app.py @@ -43,7 +43,13 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: logger = logging.getLogger("juniper_data") logger.info(f"JuniperData API v{__version__} starting") - logger.info(f"Storage path: {storage_path.absolute()}") + # ``Path.absolute()`` is pure path manipulation (no I/O); the + # ASYNC240 rule is over-conservative here and flags every + # ``pathlib.Path`` method without distinguishing stat-bound ones + # from text-only ones. Lifespan startup is also a one-shot + # event, not a request handler — even if there were I/O it + # wouldn't block per-request latency. + logger.info(f"Storage path: {storage_path.absolute()}") # noqa: ASYNC240 yield diff --git a/juniper_data/api/routes/health.py b/juniper_data/api/routes/health.py index f15a867..0a90649 100644 --- a/juniper_data/api/routes/health.py +++ b/juniper_data/api/routes/health.py @@ -15,6 +15,7 @@ and seed-03). """ +import asyncio import time from pathlib import Path @@ -59,6 +60,19 @@ def _liveness_tick(settings: Settings) -> None: raise RuntimeError(f"storage path not a directory: {settings.storage_path}") +def _probe_storage(storage_path: Path) -> tuple[bool, int]: + """Filesystem probe for the readiness route. + + Bundles the ``is_dir()`` stat and the ``*.npz`` glob into a single + helper so the readiness route takes one ``asyncio.to_thread`` + hop instead of two. Returns ``(is_dir, dataset_count)``; + ``dataset_count`` is 0 when the path isn't a directory. + """ + if not storage_path.is_dir(): + return False, 0 + return True, len(list(storage_path.glob("*.npz"))) + + @router.get("/health") async def health_check() -> dict: """Combined health check endpoint (backward compatible). @@ -128,8 +142,13 @@ async def readiness_probe(request: Request, response: Response) -> ReadinessResp settings = _settings_from_request(request) storage_path = Path(settings.storage_path) - if storage_path.is_dir(): - dataset_count = len(list(storage_path.glob("*.npz"))) + # Probe filesystem off the event loop. ``is_dir()`` is a stat + # syscall and ``glob()`` walks the directory; both block on slow + # disks. Bundled into a single ``to_thread`` call so the readiness + # probe takes one thread-hop, not two. + is_dir, dataset_count = await asyncio.to_thread(_probe_storage, storage_path) + + if is_dir: storage_dep = DependencyStatus( name="Dataset Storage", status="healthy",