Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 13 additions & 31 deletions juniper_data/api/observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,45 +105,27 @@ async def __call__(self, scope, receive, send):
def _ensure_dataset_metrics() -> dict:
"""Create dataset-related Prometheus metrics on first access.

Idempotent against the global ``prometheus_client.REGISTRY``: if the
module-level cache has been cleared (e.g. by a test fixture
resetting ``_dataset_metrics = None``) but the underlying
counters / histogram / gauge are still registered, this re-fetches
the existing collectors instead of raising
``ValueError: Duplicated timeseries``. Same shape as
``juniper_observability.middleware.prometheus.PrometheusMiddleware``
(juniper-ml PR #211) and ``juniper-canopy/src/observability.py
:_ensure_canopy_metrics`` (canopy V34a). Production behaviour
unchanged on the happy path.
Idempotent against the global ``prometheus_client.REGISTRY`` via
:func:`juniper_observability.register_or_reuse`: if the module-level
cache has been cleared (e.g. by a test fixture resetting
``_dataset_metrics = None``) but the underlying counters / histogram
/ gauge are still registered, the helper re-fetches the existing
collectors instead of raising ``ValueError: Duplicated timeseries``.
Production behaviour unchanged on the happy path.
"""
global _dataset_metrics
if _dataset_metrics is None:
from prometheus_client import REGISTRY, Counter, Gauge, Histogram

def _get_or_create(factory, name, *args, **kwargs):
try:
return factory(name, *args, **kwargs)
except ValueError:
# Already registered — typically test pollution or an
# in-process re-init. Re-fetch the existing collector so
# callers always get a working metric. ``prometheus_client``
# registers each collector under both the bare name and
# the suffixed sample names (``_total`` / ``_created`` /
# ``_bucket`` / ``_sum`` / ``_count``), all pointing at
# the same collector object.
existing = REGISTRY._names_to_collectors.get(name)
if existing is None:
raise
return existing
from juniper_observability import register_or_reuse
from prometheus_client import Counter, Gauge, Histogram

_dataset_metrics = {
"generations_total": _get_or_create(
"generations_total": register_or_reuse(
Counter,
"juniper_data_dataset_generations_total",
"Total dataset generation requests",
["generator", "status"],
),
"generation_duration_seconds": _get_or_create(
"generation_duration_seconds": register_or_reuse(
Histogram,
"juniper_data_dataset_generation_duration_seconds",
# METRICS-MON R4.1: bucket layout is **tentative pending
Expand All @@ -156,7 +138,7 @@ def _get_or_create(factory, name, *args, **kwargs):
["generator"],
buckets=DATASET_GENERATION_DURATION_BUCKETS,
),
"datasets_cached": _get_or_create(
"datasets_cached": register_or_reuse(
Gauge,
"juniper_data_datasets_cached",
"Number of datasets currently cached in storage",
Expand All @@ -166,7 +148,7 @@ def _get_or_create(factory, name, *args, **kwargs):
# actual generation work (cache misses); this counts every
# incoming POST so capacity-planning queries don't undercount
# deterministic re-POSTs (see roadmap §7 R4.5).
"post_total": _get_or_create(
"post_total": register_or_reuse(
Counter,
"juniper_data_dataset_post_total",
"Total POST /v1/datasets requests, split by cache outcome",
Expand Down
10 changes: 9 additions & 1 deletion juniper_data/storage/cached.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

import numpy as np

from juniper_data.api.observability import set_datasets_cached
# ``set_datasets_cached`` is imported lazily inside ``_emit_cached_count``
# below — top-level import here triggers a circular import via
# ``juniper_data.api.__init__`` → ``api.app`` → ``juniper_data.storage``
# (introduced by PR #92). Lazy import breaks the cycle without changing
# any production behaviour.
from juniper_data.core.models import DatasetMeta
from juniper_data.storage.constants import DEFAULT_LIST_LIMIT, DEFAULT_LIST_OFFSET

Expand Down Expand Up @@ -62,6 +66,10 @@ def _emit_cached_count(self) -> None:
discipline used everywhere else in this class.
"""
try:
# Lazy import — see top-of-file comment for the cycle
# avoidance rationale.
from juniper_data.api.observability import set_datasets_cached

count = len(self._cache.list_datasets(limit=_CACHE_COUNT_PROBE_LIMIT))
set_datasets_cached(count)
except Exception:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ api = [
# >=0.1.1 to match the cascor / canopy floor (audit-doc C.2 fix);
# juniper-ml#155 published 0.1.0a0, juniper-ml has since shipped
# 0.1.1.
"juniper-observability>=0.1.1",
"juniper-observability>=0.2.0",
]
test = [
"pytest>=7.0.0",
Expand Down
Loading