Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/deploy/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ yente provides standard health check endpoints:

* `/healthz`: Returns `200 OK` if the Python application is responsive. Use this for basic liveness probes.
* `/readyz`: Returns `200 OK` if the search index is available and searchable. Use this for readiness probes to ensure the service doesn't receive traffic before the initial indexing is complete.
* `/statusz`: Returns `{"status": "ok"}` when the service is idle, or `{"status": "indexing"}` when a re-index is in progress. Use this to trigger downstream workflows after an index rebuild completes.

Note that `/readyz` will return `200 OK` even if the index is stale, as long as it is searchable. Read on for how to monitor data freshness.
Note that `/readyz` will return `200 OK` even if the index is stale, as long as it is searchable. Similarly, `/statusz` does not indicate whether the data is fresh — only whether a re-index operation is currently running. Read on for how to monitor data freshness.

## Monitoring catalog and index freshness

Expand Down
6 changes: 6 additions & 0 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ def test_readyz():
assert res.json().get("status") == "ok", res


def test_statusz():
res = client.get("/statusz")
assert res.status_code == 200, res
assert res.json().get("status") in ("ok", "indexing"), res


def test_manifest():
res = client.get("/manifest")
assert res.status_code == 200, res
Expand Down
19 changes: 19 additions & 0 deletions yente/routers/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from yente.provider import SearchProvider, get_provider
from yente.routers.util import ENABLED_ALGORITHMS
from yente.search.indexer import update_index, update_index_threaded
from yente.search.lock import check_is_locked
from yente.search.status import sync_dataset_versions

log = get_logger(__name__)
Expand Down Expand Up @@ -66,6 +67,24 @@ async def readyz(
return StatusResponse(status="ok")


@router.get(
"/statusz",
summary="Index status check",
tags=["System information"],
response_model=StatusResponse,
)
async def statusz(
provider: SearchProvider = Depends(get_provider),
) -> StatusResponse:
"""Check if the service is currently re-indexing its search index. Returns
``indexing`` while an index build is in progress, ``ok`` otherwise. This can
be used to trigger downstream workflows after an index rebuild completes."""
locked = await check_is_locked(provider)
if locked:
return StatusResponse(status="indexing")
return StatusResponse(status="ok")


@router.get(
"/catalog",
summary="Data catalog",
Expand Down
16 changes: 16 additions & 0 deletions yente/search/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,22 @@ async def release_lock(provider: SearchProvider, lock_session: LockSession) -> N
log.error(f"Failed to release lock {lock_session.id}. Response: {e}")


async def check_is_locked(provider: SearchProvider) -> bool:
"""Check if the global lock is currently held (i.e. indexing is in progress).

This is a read-only operation that does not attempt to acquire or modify the lock.
Returns True if an active (non-expired) lock exists, False otherwise.
"""
try:
hit = await provider.get_document(get_lock_index_name(), LOCK_DOC_ID)
if not hit:
return False
return lock_is_active(hit)
except YenteIndexError:
# Lock index may not exist yet (e.g. first run before any indexing)
return False


async def refresh_lock(provider: SearchProvider, lock_session: LockSession) -> bool:
"""Refresh a lock by updating the acquired_at time to now.

Expand Down