-
Notifications
You must be signed in to change notification settings - Fork 0
telemetry: production /api/stats with persistent query_log + Vercel deploy fix #4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,12 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import asyncio | ||
| from datetime import datetime, timedelta, timezone | ||
| from email.utils import format_datetime | ||
| import json | ||
| import logging | ||
| import time | ||
| from uuid import uuid4 | ||
| from uuid import UUID, uuid4 | ||
|
|
||
| from fastapi import FastAPI, HTTPException, Request | ||
| from fastapi.exceptions import RequestValidationError | ||
|
|
@@ -40,6 +42,7 @@ | |
| from nexusrag.apps.api.routes.self_serve import router as self_serve_router | ||
| from nexusrag.apps.api.routes.sla_admin import router as sla_admin_router | ||
| from nexusrag.apps.api.routes.sso import router as sso_router | ||
| from nexusrag.apps.api.routes.stats import router as stats_router | ||
| from nexusrag.apps.api.routes.ui import router as ui_router | ||
| from nexusrag.core.logging import configure_logging | ||
| from nexusrag.apps.api.errors import ( | ||
|
|
@@ -52,6 +55,8 @@ | |
| from nexusrag.apps.api.response import API_VERSION, is_versioned_request | ||
| from nexusrag.apps.api.rate_limit import route_class_for_request | ||
| from nexusrag.services.telemetry import record_request | ||
| from nexusrag.persistence.db import SessionLocal | ||
| from nexusrag.persistence.repos.query_log import record_query | ||
| from nexusrag.persistence.guards import TenantPredicateError | ||
|
|
||
|
|
||
|
|
@@ -61,13 +66,44 @@ | |
| "/docs", | ||
| "/openapi.json", | ||
| "/redoc", | ||
| "/api/stats", | ||
| ) | ||
| _ENVELOPE_EXEMPT_PREFIXES = ( | ||
| "/v1/openapi.json", | ||
| "/v1/docs", | ||
| "/v1/redoc", | ||
| ) | ||
|
|
||
| # Path prefixes whose requests count as "queries" for the public /api/stats | ||
| # aggregator. Each request to one of these paths produces one query_log row. | ||
| _QUERY_PATH_PREFIXES = ("/v1/run", "/run") | ||
|
|
||
| _telemetry_log = logging.getLogger("nexusrag.telemetry.query_log") | ||
|
|
||
|
|
||
| async def _record_query_async( | ||
| *, | ||
| query_id: UUID, | ||
| started_at: datetime, | ||
| completed_at: datetime, | ||
| retrieved_chunks: int, | ||
| status: str, | ||
| ) -> None: | ||
| # Fire-and-forget DB write. Telemetry must never break the request path, | ||
| # so we open our own session and swallow any failure. | ||
| try: | ||
| async with SessionLocal() as session: | ||
| await record_query( | ||
| session, | ||
| query_id=query_id, | ||
| started_at=started_at, | ||
| completed_at=completed_at, | ||
| retrieved_chunks=retrieved_chunks, | ||
| status=status, | ||
| ) | ||
| except Exception as exc: # noqa: BLE001 - telemetry failure must not propagate | ||
| _telemetry_log.warning("query_log insert failed", exc_info=exc) | ||
|
|
||
|
|
||
| def create_app() -> FastAPI: | ||
| configure_logging() | ||
|
|
@@ -79,7 +115,9 @@ async def request_context_middleware(request: Request, call_next): # type: igno | |
| request_id = request.headers.get("X-Request-Id") or str(uuid4()) | ||
| request.state.request_id = request_id | ||
| start = time.monotonic() | ||
| started_at_dt = datetime.now(timezone.utc) | ||
| response = await call_next(request) | ||
| completed_at_dt = datetime.now(timezone.utc) | ||
| latency_ms = (time.monotonic() - start) * 1000.0 | ||
| route_class, _cost = route_class_for_request(request) | ||
| record_request( | ||
|
|
@@ -88,6 +126,25 @@ async def request_context_middleware(request: Request, call_next): # type: igno | |
| status_code=response.status_code, | ||
| latency_ms=latency_ms, | ||
| ) | ||
| # Persist a query_log row for every request that hits a query path, | ||
| # so the public /api/stats aggregator returns real counters that | ||
| # survive cold starts. Routes can populate request.state.retrieved_chunks | ||
| # to surface the actual chunk count; otherwise we record 0. | ||
| if request.url.path.startswith(_QUERY_PATH_PREFIXES): | ||
| chunks_attr = getattr(request.state, "retrieved_chunks", 0) | ||
| try: | ||
| retrieved_chunks = int(chunks_attr) | ||
| except (TypeError, ValueError): | ||
| retrieved_chunks = 0 | ||
|
Comment on lines
+134
to
+138
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The middleware falls back to Useful? React with 👍 / 👎. |
||
| asyncio.create_task( | ||
| _record_query_async( | ||
| query_id=uuid4(), | ||
| started_at=started_at_dt, | ||
| completed_at=completed_at_dt, | ||
| retrieved_chunks=retrieved_chunks, | ||
| status="ok" if response.status_code < 400 else "error", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For Useful? React with 👍 / 👎. |
||
| ) | ||
| ) | ||
| # Wrap versioned JSON responses in the standardized success envelope. | ||
| if ( | ||
| is_versioned_request(request) | ||
|
|
@@ -155,6 +212,12 @@ async def _http_exception_handler(request: Request, exc: HTTPException): | |
| async def _tenant_predicate_exception_handler(request: Request, exc: TenantPredicateError): | ||
| return await tenant_predicate_exception_handler(request, exc) | ||
|
|
||
| # Public, unauthenticated /api/stats endpoint for the Production | ||
| # Telemetry panel on https://eleventh.dev. Mounted before the versioned | ||
| # v1 routes so the path is canonical and not subject to /v1 routing | ||
| # rules. See docs/TELEMETRY_SCHEMA reference. | ||
| app.include_router(stats_router) | ||
|
|
||
| # Mount versioned v1 API routes. | ||
| app.include_router(audio_router, prefix=f"/{API_VERSION}") | ||
| # Expose admin endpoints for tenant quota management. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| """Public, unauthenticated /api/stats endpoint. | ||
|
|
||
| Implements the Tier-A telemetry contract from | ||
| https://github.com/IgnazioDS/IgnazioDS/blob/main/TELEMETRY_SCHEMA.md. | ||
|
|
||
| The endpoint is consumed by the Production Telemetry panel on | ||
| https://eleventh.dev. Polling cadence is ~30s. Contract guarantees: | ||
|
|
||
| - HTTP 200 in all branches, even when the database is unreachable | ||
| - Privacy: aggregate counts only, no row-level fields ever leave this route | ||
| - CORS: wildcard origin (response is non-PII aggregates) | ||
| - Cache: public, max-age=30, stale-while-revalidate=60 | ||
| """ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| import os | ||
| from datetime import datetime, timezone | ||
| from typing import Any | ||
|
|
||
| from fastapi import APIRouter, Depends, Response | ||
| from sqlalchemy.ext.asyncio import AsyncSession | ||
|
|
||
| from nexusrag.apps.api.deps import get_db | ||
| from nexusrag.persistence.repos.query_log import ( | ||
| aggregate, | ||
| to_metrics_dict, | ||
| zero_metrics, | ||
| ) | ||
|
|
||
|
|
||
| _log = logging.getLogger(__name__) | ||
| router = APIRouter() | ||
|
|
||
|
|
||
| SCHEMA_VERSION = 1 | ||
| SYSTEM_SLUG = "nexusrag" | ||
|
|
||
| # Captured at module import (cold start). Subsequent warm invocations | ||
| # return the same value, which is a reasonable proxy for "this lambda | ||
| # instance was deployed at this time". A new deploy spawns new lambdas | ||
| # with a new value, so the field freshens on every push to main. | ||
| _DEPLOYED_AT = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | ||
|
|
||
|
|
||
| def _now_iso() -> str: | ||
| return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | ||
|
|
||
|
|
||
| def _vercel_deploy_time() -> str | None: | ||
| # Prefer the commit author date Vercel injects; fall back to module-load | ||
| # capture (set when the lambda cold-started). | ||
| raw = os.environ.get("VERCEL_GIT_COMMIT_AUTHOR_DATE") | ||
| if not raw: | ||
| return _DEPLOYED_AT | ||
| if raw.isdigit(): | ||
| # Vercel sometimes exposes this as unix-seconds. | ||
| try: | ||
| return ( | ||
| datetime.fromtimestamp(int(raw), tz=timezone.utc) | ||
| .strftime("%Y-%m-%dT%H:%M:%SZ") | ||
| ) | ||
| except (ValueError, OSError): | ||
| return _DEPLOYED_AT | ||
| return raw | ||
|
|
||
|
|
||
| def _set_public_headers(response: Response) -> None: | ||
| response.headers["Cache-Control"] = "public, max-age=30, stale-while-revalidate=60" | ||
| response.headers["Access-Control-Allow-Origin"] = "*" | ||
| response.headers["Access-Control-Allow-Methods"] = "GET, OPTIONS" | ||
| response.headers["Access-Control-Allow-Headers"] = "Content-Type" | ||
|
|
||
|
|
||
| @router.options("/api/stats", include_in_schema=False) | ||
| async def stats_options(response: Response) -> Response: | ||
| # CORS preflight: 204 with the same wildcard headers. | ||
| _set_public_headers(response) | ||
| response.status_code = 204 | ||
| return response | ||
|
|
||
|
|
||
| @router.get("/api/stats") | ||
| async def stats( | ||
| response: Response, | ||
| session: AsyncSession = Depends(get_db), | ||
| ) -> dict[str, Any]: | ||
| _set_public_headers(response) | ||
| last_deployed_at = _vercel_deploy_time() | ||
|
|
||
| try: | ||
| agg = await aggregate(session) | ||
| metrics = to_metrics_dict(agg) | ||
| last_active_at = ( | ||
| agg.last_active_at.astimezone(timezone.utc) | ||
| .strftime("%Y-%m-%dT%H:%M:%SZ") | ||
| if agg.last_active_at is not None | ||
| else None | ||
| ) | ||
| status_value = "operational" | ||
| except Exception as exc: # noqa: BLE001 - contract forbids 5xx | ||
| # Internal error message must NEVER appear in the response body | ||
| # (it would leak detail to an unauthenticated public endpoint). | ||
| # Log internally and degrade gracefully. | ||
| _log.warning("stats aggregator failed", exc_info=exc) | ||
| metrics = zero_metrics() | ||
| last_active_at = None | ||
| status_value = "degraded" | ||
|
|
||
| return { | ||
| "system": SYSTEM_SLUG, | ||
| "mode": "live", | ||
| "status": status_value, | ||
| # Vercel deploys are READY whenever the function responds; we treat | ||
| # 30-day uptime as 100% by definition. The public schema documents | ||
| # this approximation; replace with a self-pinger when the system | ||
| # moves to a real long-lived runtime. | ||
| "uptime_pct_30d": 100.0, | ||
| "last_deployed_at": last_deployed_at, | ||
| "last_active_at": last_active_at, | ||
| "metrics": metrics, | ||
| "schema_version": SCHEMA_VERSION, | ||
| "generated_at": _now_iso(), | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The query log timestamps are captured immediately after
call_next, but/runreturns aStreamingResponsewhose generation continues after this point, socompleted_atandlatency_msare recorded before the actual query finishes. This makes the published p50/p95 latency metrics systematically too low for real streamed runs and breaks the intended end-to-end timing semantics.Useful? React with 👍 / 👎.