From c6f6925d3dfab541579817589e24e434f8121c9f Mon Sep 17 00:00:00 2001 From: Kailas Mahavarkar <66670953+KailasMahavarkar@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:37:01 +0530 Subject: [PATCH 1/2] fix: add debug logging at silent exception swallows Audit of `except Exception: pass` sites in src/graphstore/ turned up 7 spots that were swallowing real failures without any trace. Each site stays behaviourally identical - the same except branch keeps running - but now emits logger.debug(err) so dev-time tracing of "why is this empty/missing/zero" works. Sites touched: core/optimizer.py:635 document_store.delete_document eviction could silently leak disk space when delete failed. Now logs slot + err. dsl/handlers/ingest.py:263 put_node/put_edge for NER entity link silently dropped, hiding incomplete graphs. dsl/handlers/mutations.py: same as above for sentence-level entity 314, 319 links on UPDATE paths. dsl/sys/pipeline.py:251 evidence edge (obs -> ev) silently dropped, breaking evidence chains. core/evolve/_impl.py:268 memory.measure() failure silently zeroed total_bytes metric. core/evolve/_impl.py:284 optimizer.health_check() failure silently zeroed tombstone/string_bloat metrics. core/memory.py:84 document_store.stats() failure silently dropped disk-usage line in memory report. ingest/connector.py:43 user progress_callback exception silently ignored with no breadcrumb. Added logger initialization to optimizer.py, mutations.py, memory.py (other files already had one). The remaining ~17 except-pass sites in src/ are legitimate (cleanup- during-cleanup, hardware probing, decode-with-replacement, optional- feature detection) and intentionally opaque. 574 tests pass (pre-existing flake in test_subgraph_extraction is on main too, unrelated to this change). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/graphstore/core/evolve/_impl.py | 6 ++++-- src/graphstore/core/memory.py | 7 +++++-- src/graphstore/core/optimizer.py | 8 ++++++-- src/graphstore/dsl/handlers/ingest.py | 8 ++++---- src/graphstore/dsl/handlers/mutations.py | 11 +++++++---- src/graphstore/dsl/sys/pipeline.py | 4 ++-- src/graphstore/ingest/connector.py | 4 ++-- 7 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/graphstore/core/evolve/_impl.py b/src/graphstore/core/evolve/_impl.py index fab4f336..9a4d6c3b 100644 --- a/src/graphstore/core/evolve/_impl.py +++ b/src/graphstore/core/evolve/_impl.py @@ -265,7 +265,8 @@ def compute_signals(self) -> dict: from graphstore.core.memory import measure m = measure(store, gs._vector_store, gs._document_store) total_bytes = m.get("total", 0) - except Exception: + except Exception as err: + logger.debug("memory.measure() failed during evolve telemetry: %s", err) total_bytes = 0 ceiling = getattr(store, "_ceiling_bytes", 1) or 1 @@ -281,7 +282,8 @@ def compute_signals(self) -> dict: h = health_check(store, gs._vector_store, gs._document_store) tombstone_ratio = h.get("tombstone_ratio", 0.0) string_bloat = h.get("string_bloat", 0.0) - except Exception: + except Exception as err: + logger.debug("optimizer.health_check() failed during evolve telemetry: %s", err) tombstone_ratio = 0.0 string_bloat = 0.0 diff --git a/src/graphstore/core/memory.py b/src/graphstore/core/memory.py index d6dd72bc..f5e7ef39 100755 --- a/src/graphstore/core/memory.py +++ b/src/graphstore/core/memory.py @@ -4,7 +4,10 @@ graphstore store/vector/document types. """ +import logging import sys + +logger = logging.getLogger(__name__) from graphstore.core.errors import CeilingExceeded from graphstore.algos.measure import ( estimate_bytes as _algo_estimate_bytes, @@ -81,8 +84,8 @@ def measure(store, vector_store=None, document_store=None, skip_csr: bool = Fals try: stats = document_store.stats() report["document_store_disk"] = stats.get("total_bytes", 0) - except Exception: - pass + except Exception as err: + logger.debug("document_store.stats() failed during memory accounting: %s", err) return report diff --git a/src/graphstore/core/optimizer.py b/src/graphstore/core/optimizer.py index 7b4844de..039e863d 100644 --- a/src/graphstore/core/optimizer.py +++ b/src/graphstore/core/optimizer.py @@ -6,8 +6,12 @@ from __future__ import annotations +import logging + import numpy as np +logger = logging.getLogger(__name__) + from graphstore.algos.compact import ( apply_slot_remap_to_edges as _algo_apply_slot_remap, build_live_mask as _algo_build_live_mask, @@ -632,8 +636,8 @@ def _evict_nodes(store: CoreStore, slots_to_evict: list[int], vector_store=None, if document_store is not None: try: document_store.delete_document(slot) - except Exception: - pass + except Exception as err: + logger.debug("document_store.delete_document(%s) failed during eviction: %s", slot, err) # Tombstone store.columns.clear(slot) diff --git a/src/graphstore/dsl/handlers/ingest.py b/src/graphstore/dsl/handlers/ingest.py index b95f9f70..75ff2bee 100644 --- a/src/graphstore/dsl/handlers/ingest.py +++ b/src/graphstore/dsl/handlers/ingest.py @@ -260,12 +260,12 @@ def _ingest_body(self, q, result, chunks, parent_id, parent_kind, ent_id = f"ent:{s}" try: self.store.put_node(ent_id, "entity", {"name": ent.text}) - except Exception: - pass + except Exception as err: + logger.debug("put_node(%s) skipped during ingest entity link: %s", ent_id, err) try: self.store.put_edge(chunk_id, ent_id, "mentions") - except Exception: - pass + except Exception as err: + logger.debug("put_edge(%s -> %s) skipped during ingest entity link: %s", chunk_id, ent_id, err) # Embed chunk text for vector retrieval embed_batch.append((chunk_slot, embed_text)) diff --git a/src/graphstore/dsl/handlers/mutations.py b/src/graphstore/dsl/handlers/mutations.py index 5b22dbfa..01b6b474 100644 --- a/src/graphstore/dsl/handlers/mutations.py +++ b/src/graphstore/dsl/handlers/mutations.py @@ -1,8 +1,11 @@ """Mutation handlers for the DSL executor (create, update, delete, merge, batch).""" +import logging import time from collections import deque +logger = logging.getLogger(__name__) + import numpy as np from scipy.sparse import csr_matrix @@ -311,12 +314,12 @@ def _create_node(self, q: CreateNode) -> Result: ent_id = f"ent:{ent_slug_val}" try: self.store.put_node(ent_id, "entity", {"name": ent_display}) - except Exception: - pass + except Exception as err: + logger.debug("put_node(%s) skipped during mutation entity link: %s", ent_id, err) try: self.store.put_edge(sent_id, ent_id, "mentions") - except Exception: - pass + except Exception as err: + logger.debug("put_edge(%s -> %s) skipped during mutation entity link: %s", sent_id, ent_id, err) if batch_embed_sentences: self._batch_embed_and_store(list(zip(sent_slots, sentences))) diff --git a/src/graphstore/dsl/sys/pipeline.py b/src/graphstore/dsl/sys/pipeline.py index 56a40b34..2a7524ae 100644 --- a/src/graphstore/dsl/sys/pipeline.py +++ b/src/graphstore/dsl/sys/pipeline.py @@ -248,8 +248,8 @@ def _consolidate(self, q: SysConsolidate) -> Result: if ev_id: try: store.put_edge(obs_id, ev_id, "evidence") - except Exception: - pass + except Exception as err: + logger.debug("evidence edge %s -> %s skipped: %s", obs_id, ev_id, err) created += 1 except Exception: diff --git a/src/graphstore/ingest/connector.py b/src/graphstore/ingest/connector.py index 1c6d0c22..7a6cfdfb 100755 --- a/src/graphstore/ingest/connector.py +++ b/src/graphstore/ingest/connector.py @@ -40,8 +40,8 @@ def connect_all(store, vector_store, threshold=0.85, where_expr=None, executor=N if progress_callback is not None and slot % 100 == 0: try: progress_callback(slot, n) - except Exception: - pass + except Exception as err: + logger.debug("user progress_callback raised (ignored): %s", err) if not live[slot] or not vector_store.has_vector(slot): continue From f04cf270ebe29c0ece123ebf9a63232894c7e92d Mon Sep 17 00:00:00 2001 From: Kailas Mahavarkar <66670953+KailasMahavarkar@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:42:10 +0530 Subject: [PATCH 2/2] fix(test): add --dist loadgroup so xdist_group marker is respected tests/test_e2e_real_embedder.py uses a module-scoped `brain` fixture (persistent GraphStore + real embedder) that Phase 1 tests populate via INGEST and Phase 3+4 tests read via CREATE EDGE / RECALL / SUBGRAPH. The module is tagged `pytest.mark.xdist_group("e2e_real_embedder")` so all tests land on the same xdist worker - otherwise each worker forks its own empty GraphStore and downstream tests hit NodeNotFound on the IDs ingest created in a different worker. But pytest-xdist's default scheduler (LoadScheduling) ignores xdist_group markers. The grouping is only honored when the scheduler is set to `loadgroup` (or `loadfile` / `worksteal`, none of which were configured here). Result before this fix: running the test module via the default `uv run pytest` addopts (-n 4) split the six Ingest tests across four workers, so the `brain` fixture Phase 3/4 saw was populated by at most one Ingest test - the other five paper: IDs did not exist. Failures: TestKnowledgeGraph::test_create_topic_hierarchy TestKnowledgeGraph::test_recall_from_paper TestKnowledgeGraph::test_subgraph_extraction TestKnowledgeGraph::test_traverse_topic_graph TestAgentLifecycle::test_checkpoint_and_stats TestRealCognitiveRetrieval::test_what_do_transformers_and_bert... TestRealCognitiveRetrieval::test_cross_document_search TestSemanticRetrieval::test_similar_to_finds_attention_content Serial or --dist loadgroup: all pass. Fix: set `--dist loadgroup` in addopts. Full suite 1802 passed, 101 skipped under default invocation. Co-Authored-By: Claude Opus 4.7 (1M context) --- pyproject.toml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1263cabd..7a3d123b 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,7 +128,14 @@ testpaths = ["tests"] # -n 4: 4 xdist workers. With the 2-thread BLAS cap set in conftest.py # that's 4 * 2 = 8 threads max. Measured: 118s single -> 66s (-44%). # Override per run via `pytest -n 0` (serial) or `pytest -n auto`. -addopts = "-n 4 --timeout=60" +# +# --dist loadgroup: respect @pytest.mark.xdist_group so modules that share +# a module-scoped fixture (e.g. tests/test_e2e_real_embedder.py with a +# persistent GraphStore across classes) stay on one worker. Default +# LoadScheduling ignored the marker and fragmented module state across +# workers, breaking tests that depended on ingest state established in +# an earlier test class in the same module. +addopts = "-n 4 --timeout=60 --dist loadgroup" markers = [ "needs_embedder: embedder test (model2vec is now core, kept for compat)", "needs_fastembed: requires graphstore[embedders-extra] (fastembed)",