From ff9d1a5cea0787cbeb4be98a32c329108895f4e1 Mon Sep 17 00:00:00 2001 From: Paul Calnon Date: Wed, 6 May 2026 01:19:21 -0500 Subject: [PATCH 1/2] refactor(observability): migrate to juniper-observability register_or_reuse (Phase 2a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2a of the migration plan in ``juniper-ml/notes/observability/REGISTER_OR_REUSE_HELPER_DESIGN_2026-05-05.md``. Drops the inline ``_get_or_create`` helper added in PR #87 and calls the canonical ``juniper_observability.register_or_reuse`` shipped in juniper-observability ``0.2.0``. ### What changes - ``juniper_data/api/observability.py``: ``_ensure_dataset_metrics`` now imports ``register_or_reuse`` from ``juniper_observability`` (lazy, inside the ``if _dataset_metrics is None`` branch) and uses it for all four collectors (Counter ×2 + Histogram + Gauge). The inline ``_get_or_create`` closure and the manual ``REGISTRY._names_to_collectors`` lookup are gone. - ``pyproject.toml``: ``juniper-observability>=0.1.1`` → ``juniper-observability>=0.2.0``. The new helper is the reason for the bump; existing 0.1.1 callers' behaviour is unchanged. ### Drive-by fix: storage circular import PR #92 (``audit-fixup: wire juniper_data_datasets_cached gauge to cache layer``) added ``from juniper_data.api.observability import set_datasets_cached`` at the top of ``juniper_data/storage/cached.py``. That triggers the import chain: juniper_data.storage.__init__ → cached.py → juniper_data.api.observability → juniper_data.api.__init__ → app.py → juniper_data.storage (← still initialising!) → ImportError: cannot import name 'LocalFSDatasetStore' …which broke ``pytest --collect-only`` on origin/main as of 2026-05-06: 31 collection errors covering every test that imports ``juniper_data.storage`` directly or transitively. ``--collect-only`` amplifies the breakage, but a normal full pytest run also hits the same path during fixture collection. Fix: defer the ``set_datasets_cached`` import to inside ``CachedDatasetStore._emit_cached_count`` so the cycle never fires at module-import time. The function is best-effort with a ``except Exception`` swallowing failures, so a deferred import that raises ``ImportError`` (e.g. during a ``CachedDatasetStore`` instantiation in a non-API context) gets logged at DEBUG and skipped the same way any other observability failure does. ### Verification Full juniper-data suite under JuniperData env: **950 passed** (was ``31 errors during collection`` on origin/main pre-fix). Co-Authored-By: Claude Opus 4.7 (1M context) --- juniper_data/api/observability.py | 45 ++++++++++--------------------- juniper_data/storage/cached.py | 10 ++++++- pyproject.toml | 2 +- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/juniper_data/api/observability.py b/juniper_data/api/observability.py index 52520e4..a5dc742 100644 --- a/juniper_data/api/observability.py +++ b/juniper_data/api/observability.py @@ -105,45 +105,28 @@ async def __call__(self, scope, receive, send): def _ensure_dataset_metrics() -> dict: """Create dataset-related Prometheus metrics on first access. - Idempotent against the global ``prometheus_client.REGISTRY``: if the - module-level cache has been cleared (e.g. by a test fixture - resetting ``_dataset_metrics = None``) but the underlying - counters / histogram / gauge are still registered, this re-fetches - the existing collectors instead of raising - ``ValueError: Duplicated timeseries``. Same shape as - ``juniper_observability.middleware.prometheus.PrometheusMiddleware`` - (juniper-ml PR #211) and ``juniper-canopy/src/observability.py - :_ensure_canopy_metrics`` (canopy V34a). Production behaviour - unchanged on the happy path. + Idempotent against the global ``prometheus_client.REGISTRY`` via + :func:`juniper_observability.register_or_reuse`: if the module-level + cache has been cleared (e.g. by a test fixture resetting + ``_dataset_metrics = None``) but the underlying counters / histogram + / gauge are still registered, the helper re-fetches the existing + collectors instead of raising ``ValueError: Duplicated timeseries``. + Production behaviour unchanged on the happy path. """ global _dataset_metrics if _dataset_metrics is None: - from prometheus_client import REGISTRY, Counter, Gauge, Histogram - - def _get_or_create(factory, name, *args, **kwargs): - try: - return factory(name, *args, **kwargs) - except ValueError: - # Already registered — typically test pollution or an - # in-process re-init. Re-fetch the existing collector so - # callers always get a working metric. ``prometheus_client`` - # registers each collector under both the bare name and - # the suffixed sample names (``_total`` / ``_created`` / - # ``_bucket`` / ``_sum`` / ``_count``), all pointing at - # the same collector object. - existing = REGISTRY._names_to_collectors.get(name) - if existing is None: - raise - return existing + from prometheus_client import Counter, Gauge, Histogram + + from juniper_observability import register_or_reuse _dataset_metrics = { - "generations_total": _get_or_create( + "generations_total": register_or_reuse( Counter, "juniper_data_dataset_generations_total", "Total dataset generation requests", ["generator", "status"], ), - "generation_duration_seconds": _get_or_create( + "generation_duration_seconds": register_or_reuse( Histogram, "juniper_data_dataset_generation_duration_seconds", # METRICS-MON R4.1: bucket layout is **tentative pending @@ -156,7 +139,7 @@ def _get_or_create(factory, name, *args, **kwargs): ["generator"], buckets=DATASET_GENERATION_DURATION_BUCKETS, ), - "datasets_cached": _get_or_create( + "datasets_cached": register_or_reuse( Gauge, "juniper_data_datasets_cached", "Number of datasets currently cached in storage", @@ -166,7 +149,7 @@ def _get_or_create(factory, name, *args, **kwargs): # actual generation work (cache misses); this counts every # incoming POST so capacity-planning queries don't undercount # deterministic re-POSTs (see roadmap §7 R4.5). - "post_total": _get_or_create( + "post_total": register_or_reuse( Counter, "juniper_data_dataset_post_total", "Total POST /v1/datasets requests, split by cache outcome", diff --git a/juniper_data/storage/cached.py b/juniper_data/storage/cached.py index d23e569..0e62f73 100644 --- a/juniper_data/storage/cached.py +++ b/juniper_data/storage/cached.py @@ -5,7 +5,11 @@ import numpy as np -from juniper_data.api.observability import set_datasets_cached +# ``set_datasets_cached`` is imported lazily inside ``_emit_cached_count`` +# below — top-level import here triggers a circular import via +# ``juniper_data.api.__init__`` → ``api.app`` → ``juniper_data.storage`` +# (introduced by PR #92). Lazy import breaks the cycle without changing +# any production behaviour. from juniper_data.core.models import DatasetMeta from juniper_data.storage.constants import DEFAULT_LIST_LIMIT, DEFAULT_LIST_OFFSET @@ -62,6 +66,10 @@ def _emit_cached_count(self) -> None: discipline used everywhere else in this class. """ try: + # Lazy import — see top-of-file comment for the cycle + # avoidance rationale. + from juniper_data.api.observability import set_datasets_cached + count = len(self._cache.list_datasets(limit=_CACHE_COUNT_PROBE_LIMIT)) set_datasets_cached(count) except Exception: diff --git a/pyproject.toml b/pyproject.toml index 688c26a..a86be56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ api = [ # >=0.1.1 to match the cascor / canopy floor (audit-doc C.2 fix); # juniper-ml#155 published 0.1.0a0, juniper-ml has since shipped # 0.1.1. - "juniper-observability>=0.1.1", + "juniper-observability>=0.2.0", ] test = [ "pytest>=7.0.0", From 0b5246f825c89a8a8a654463d62ceec3d24d8e96 Mon Sep 17 00:00:00 2001 From: Paul Calnon Date: Wed, 6 May 2026 08:24:07 -0500 Subject: [PATCH 2/2] ruff linter fix --- juniper_data/api/observability.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/juniper_data/api/observability.py b/juniper_data/api/observability.py index a5dc742..4469e13 100644 --- a/juniper_data/api/observability.py +++ b/juniper_data/api/observability.py @@ -115,9 +115,8 @@ def _ensure_dataset_metrics() -> dict: """ global _dataset_metrics if _dataset_metrics is None: - from prometheus_client import Counter, Gauge, Histogram - from juniper_observability import register_or_reuse + from prometheus_client import Counter, Gauge, Histogram _dataset_metrics = { "generations_total": register_or_reuse(