From ac8ce827e9e1a920176bdfe8f70c586637fdea50 Mon Sep 17 00:00:00 2001 From: rhyswdev Date: Wed, 1 Apr 2026 14:31:31 +0100 Subject: [PATCH] Add /statusz endpoint to expose re-indexing state The existing /readyz endpoint always returns "ok" during re-indexing because the old index continues to serve traffic. This makes it impossible for downstream systems to know when a re-index completes. The new /statusz endpoint checks the distributed indexing lock and returns {"status": "indexing"} while a re-index is in progress, or {"status": "ok"} otherwise. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/deploy/monitoring.md | 3 ++- tests/test_base.py | 6 ++++++ yente/routers/admin.py | 19 +++++++++++++++++++ yente/search/lock.py | 16 ++++++++++++++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/deploy/monitoring.md b/docs/deploy/monitoring.md index 5378e948..1ded32b6 100644 --- a/docs/deploy/monitoring.md +++ b/docs/deploy/monitoring.md @@ -8,8 +8,9 @@ yente provides standard health check endpoints: * `/healthz`: Returns `200 OK` if the Python application is responsive. Use this for basic liveness probes. * `/readyz`: Returns `200 OK` if the search index is available and searchable. Use this for readiness probes to ensure the service doesn't receive traffic before the initial indexing is complete. +* `/statusz`: Returns `{"status": "ok"}` when the service is idle, or `{"status": "indexing"}` when a re-index is in progress. Use this to trigger downstream workflows after an index rebuild completes. -Note that `/readyz` will return `200 OK` even if the index is stale, as long as it is searchable. Read on for how to monitor data freshness. +Note that `/readyz` will return `200 OK` even if the index is stale, as long as it is searchable. Similarly, `/statusz` does not indicate whether the data is fresh — only whether a re-index operation is currently running. Read on for how to monitor data freshness. ## Monitoring catalog and index freshness diff --git a/tests/test_base.py b/tests/test_base.py index 3c3531ee..424feeff 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -15,6 +15,12 @@ def test_readyz(): assert res.json().get("status") == "ok", res +def test_statusz(): + res = client.get("/statusz") + assert res.status_code == 200, res + assert res.json().get("status") in ("ok", "indexing"), res + + def test_manifest(): res = client.get("/manifest") assert res.status_code == 200, res diff --git a/yente/routers/admin.py b/yente/routers/admin.py index 9dceb710..73a68389 100644 --- a/yente/routers/admin.py +++ b/yente/routers/admin.py @@ -13,6 +13,7 @@ from yente.provider import SearchProvider, get_provider from yente.routers.util import ENABLED_ALGORITHMS from yente.search.indexer import update_index, update_index_threaded +from yente.search.lock import check_is_locked from yente.search.status import sync_dataset_versions log = get_logger(__name__) @@ -66,6 +67,24 @@ async def readyz( return StatusResponse(status="ok") +@router.get( + "/statusz", + summary="Index status check", + tags=["System information"], + response_model=StatusResponse, +) +async def statusz( + provider: SearchProvider = Depends(get_provider), +) -> StatusResponse: + """Check if the service is currently re-indexing its search index. Returns + ``indexing`` while an index build is in progress, ``ok`` otherwise. This can + be used to trigger downstream workflows after an index rebuild completes.""" + locked = await check_is_locked(provider) + if locked: + return StatusResponse(status="indexing") + return StatusResponse(status="ok") + + @router.get( "/catalog", summary="Data catalog", diff --git a/yente/search/lock.py b/yente/search/lock.py index fe9ad78b..8f98338f 100644 --- a/yente/search/lock.py +++ b/yente/search/lock.py @@ -212,6 +212,22 @@ async def release_lock(provider: SearchProvider, lock_session: LockSession) -> N log.error(f"Failed to release lock {lock_session.id}. Response: {e}") +async def check_is_locked(provider: SearchProvider) -> bool: + """Check if the global lock is currently held (i.e. indexing is in progress). + + This is a read-only operation that does not attempt to acquire or modify the lock. + Returns True if an active (non-expired) lock exists, False otherwise. + """ + try: + hit = await provider.get_document(get_lock_index_name(), LOCK_DOC_ID) + if not hit: + return False + return lock_is_active(hit) + except YenteIndexError: + # Lock index may not exist yet (e.g. first run before any indexing) + return False + + async def refresh_lock(provider: SearchProvider, lock_session: LockSession) -> bool: """Refresh a lock by updating the acquired_at time to now.