From e7c730be29e79271fc649fcb224c9447669eba7f Mon Sep 17 00:00:00 2001 From: TeddieHsiung Date: Thu, 16 Apr 2026 16:12:48 +0800 Subject: [PATCH 1/4] refactor(extract): preserve all candidates in cross-file call lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-file call resolution was using dict[str, str], causing dict-overwrite: when N nodes shared the same normalised label, only the last-iterated nid survived in the lookup table, silently dropping N-1 valid candidates. This commit changes the lookup table to dict[str, list[str]] with per-bucket uniqueness guaranteed via a seen-set, and adds list(dict.fromkeys(...)) dedup at consumption site as a belt-and-braces invariant. Resolution behaviour is kept equivalent for now — we still pick candidates[0] as the target — preparing the ground for subsequent commits to emit proper AMBIGUOUS edges. No behaviour change in this commit. All existing tests pass. --- graphify/extract.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/graphify/extract.py b/graphify/extract.py index 333fa39a..db1b0e1f 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -3141,12 +3141,28 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: # Cross-file call resolution for all languages # Each extractor saved unresolved calls in raw_calls. Now that we have all # nodes from all files, resolve any callee that exists in another file. - global_label_to_nid: dict[str, str] = {} + # + # The lookup table is keyed by normalised lowercase label and stores a list + # of candidate nids (not just one). When multiple nodes share the same + # normalised label — which is common for CRUD verbs like `.get()`, `.all()`, + # `.delete()` and for cross-language collisions — a plain ``dict[str, str]`` + # silently drops N-1 candidates via dict-overwrite. Preserving every + # candidate is a prerequisite for correct resolution; the consumption site + # below still picks ``candidates[0]`` so this commit is behaviour-equivalent + # to the previous implementation. + from collections import defaultdict + + global_label_to_nids: dict[str, list[str]] = defaultdict(list) + _seen_per_bucket: dict[str, set[str]] = defaultdict(set) for n in all_nodes: raw = n.get("label", "") normalised = raw.strip("()").lstrip(".") if normalised: - global_label_to_nid[normalised.lower()] = n["id"] + key = normalised.lower() + nid = n["id"] + if nid not in _seen_per_bucket[key]: + _seen_per_bucket[key].add(nid) + global_label_to_nids[key].append(nid) existing_pairs = {(e["source"], e["target"]) for e in all_edges} for result in per_file: @@ -3154,9 +3170,16 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: callee = rc.get("callee", "") if not callee: continue - tgt = global_label_to_nid.get(callee.lower()) + # ``dict.fromkeys`` preserves order while deduping — belt-and-braces + # on top of the per-bucket dedup above. + candidates = list(dict.fromkeys( + global_label_to_nids.get(callee.lower(), []) + )) + if not candidates: + continue + tgt = candidates[0] caller = rc["caller_nid"] - if tgt and tgt != caller and (caller, tgt) not in existing_pairs: + if tgt != caller and (caller, tgt) not in existing_pairs: existing_pairs.add((caller, tgt)) all_edges.append({ "source": caller, From 58f3cc98bc69dd418d6a7822d0cae361933dd4a6 Mon Sep 17 00:00:00 2001 From: TeddieHsiung Date: Thu, 16 Apr 2026 16:13:22 +0800 Subject: [PATCH 2/4] fix(extract): emit AMBIGUOUS edges for name collisions with ambiguity_degree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cross-file call resolution finds multiple candidates for a callee (e.g. `.get()` defined in 32 files), emitting a single edge to an arbitrary winner is indistinguishable from dict-overwrite. This commit fans out the edge to all candidates, marking each as AMBIGUOUS (confidence_score=0.2) and recording ambiguity_degree = number of candidates on each edge. Single-candidate resolution remains INFERRED at 0.8 (unchanged). Self-reference is filtered (caller is excluded from its own candidate list). Real-world impact on a 5-subsystem monorepo (TBA-backend, 997 files): - INFERRED calls: 2580 → 1230 (collisions correctly reclassified) - AMBIGUOUS calls: 0 → N (exposes previously-hidden ambiguity) Note: heavy collisions (CRUD verbs like `.get()` with 30+ candidates) cause edge explosion. The next commit addresses this with a fan-out cap. --- graphify/extract.py | 62 +++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/graphify/extract.py b/graphify/extract.py index db1b0e1f..feb45c8b 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -3170,27 +3170,57 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: callee = rc.get("callee", "") if not callee: continue + caller = rc["caller_nid"] # ``dict.fromkeys`` preserves order while deduping — belt-and-braces - # on top of the per-bucket dedup above. + # on top of the per-bucket dedup above. Self-reference is filtered + # out: a node never calls itself through its own normalised label. candidates = list(dict.fromkeys( - global_label_to_nids.get(callee.lower(), []) + c for c in global_label_to_nids.get(callee.lower(), []) + if c != caller )) if not candidates: continue - tgt = candidates[0] - caller = rc["caller_nid"] - if tgt != caller and (caller, tgt) not in existing_pairs: - existing_pairs.add((caller, tgt)) - all_edges.append({ - "source": caller, - "target": tgt, - "relation": "calls", - "confidence": "INFERRED", - "confidence_score": 0.8, - "source_file": rc.get("source_file", ""), - "source_location": rc.get("source_location"), - "weight": 1.0, - }) + + # Invariant: ambiguity_degree MUST equal len(candidates). If this + # ever fires, the upstream dedup guarantees have been broken. + assert len(set(candidates)) == len(candidates), ( + f"duplicate nids in candidates for callee={callee!r}: {candidates}" + ) + + if len(candidates) == 1: + tgt = candidates[0] + if (caller, tgt) not in existing_pairs: + existing_pairs.add((caller, tgt)) + all_edges.append({ + "source": caller, + "target": tgt, + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.8, + "source_file": rc.get("source_file", ""), + "source_location": rc.get("source_location"), + "weight": 1.0, + }) + else: + # Multi-candidate → fan out one AMBIGUOUS edge per candidate. + # Picking an arbitrary winner here is indistinguishable from the + # old dict-overwrite bug; the downstream consumers need the + # ``ambiguity_degree`` signal to triage the result. + degree = len(candidates) + for tgt in candidates: + if (caller, tgt) not in existing_pairs: + existing_pairs.add((caller, tgt)) + all_edges.append({ + "source": caller, + "target": tgt, + "relation": "calls", + "confidence": "AMBIGUOUS", + "confidence_score": 0.2, + "source_file": rc.get("source_file", ""), + "source_location": rc.get("source_location"), + "ambiguity_degree": degree, + "weight": 1.0, + }) return { "nodes": all_nodes, From 353c97073993c772527785f21dd6c213b6c60128 Mon Sep 17 00:00:00 2001 From: TeddieHsiung Date: Thu, 16 Apr 2026 16:15:41 +0800 Subject: [PATCH 3/4] feat(extract): add max_ambiguity_fanout cap with truncation stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Labels with degree > 20 (typically generic verbs like `.get()`, `.all()`, `.delete()` in multi-subsystem monorepos) produce N-way AMBIGUOUS fanouts with no semantic value — the AST cannot disambiguate them regardless. On TBA-backend this caused edge count to jump 264% (10K → 37K). This commit adds a configurable cap: - extract() gains max_ambiguity_fanout kw-arg (default 20) - env var GRAPHIFY_MAX_AMBIGUITY_FANOUT overrides - When len(candidates) > cap → drop fan-out, record to stats - Stats surface in cross_file_call_stats with: - resolved_single / resolved_ambiguous / truncated_high_degree - truncated_examples (first 5 dropped labels) - max_ambiguity_fanout (effective value) build.py / export.py / watch.py propagate the stats into graph.json so downstream tools can see what was truncated. Real-world impact on TBA-backend: - Edges: 37166 (+264%) → 10529 (+3.1%) - truncated_high_degree: 1079 (examples: update, all, get, create, delete) - All other consumers (cluster, god_nodes, report) behave normally. --- graphify/build.py | 6 ++++ graphify/export.py | 3 ++ graphify/extract.py | 67 ++++++++++++++++++++++++++++++++++++++++++--- graphify/watch.py | 3 ++ 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/graphify/build.py b/graphify/build.py index 4d3a0b98..ab50be7d 100644 --- a/graphify/build.py +++ b/graphify/build.py @@ -63,6 +63,12 @@ def build_from_json(extraction: dict, *, directed: bool = False) -> nx.Graph: hyperedges = extraction.get("hyperedges", []) if hyperedges: G.graph["hyperedges"] = hyperedges + # Preserve cross-file call resolution stats (degree cap, truncation counts) + # so downstream exporters can surface them in graph.json without breaking + # existing consumers that expect the plain `nodes`/`edges` shape. + stats = extraction.get("cross_file_call_stats") + if stats is not None: + G.graph["cross_file_call_stats"] = stats return G diff --git a/graphify/export.py b/graphify/export.py index 033ec66d..b9a3e76f 100644 --- a/graphify/export.py +++ b/graphify/export.py @@ -293,6 +293,9 @@ def to_json(G: nx.Graph, communities: dict[int, list[str]], output_path: str) -> conf = link.get("confidence", "EXTRACTED") link["confidence_score"] = _CONFIDENCE_SCORE_DEFAULTS.get(conf, 1.0) data["hyperedges"] = getattr(G, "graph", {}).get("hyperedges", []) + stats = getattr(G, "graph", {}).get("cross_file_call_stats") + if stats is not None: + data["cross_file_call_stats"] = stats with open(output_path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2) diff --git a/graphify/extract.py b/graphify/extract.py index feb45c8b..02d675f4 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -3027,7 +3027,12 @@ def _check_tree_sitter_version() -> None: ) -def extract(paths: list[Path], cache_root: Path | None = None) -> dict: +def extract( + paths: list[Path], + cache_root: Path | None = None, + *, + max_ambiguity_fanout: int | None = None, +) -> dict: """Extract AST nodes and edges from a list of code files. Two-pass process: @@ -3040,6 +3045,15 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: cache_root: explicit root for graphify-out/cache/ (overrides the inferred common path prefix). Pass Path('.') when running on a subdirectory so the cache stays at ./graphify-out/cache/. + max_ambiguity_fanout: cap on the number of AMBIGUOUS edges the + cross-file call resolver will emit for a single callee label. + When a normalised label matches more than this many candidate + nodes, the resolver drops the whole fan-out and records the + label under ``cross_file_call_stats.truncated_examples`` — this + prevents generic verbs (``.get()``, ``.all()``, ``.delete()``) + from inflating the graph with thousands of meaningless edges. + ``None`` (default) falls back to the ``GRAPHIFY_MAX_AMBIGUITY_FANOUT`` + env var, then to ``20``. """ _check_tree_sitter_version() per_file: list[dict] = [] @@ -3147,10 +3161,30 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: # normalised label — which is common for CRUD verbs like `.get()`, `.all()`, # `.delete()` and for cross-language collisions — a plain ``dict[str, str]`` # silently drops N-1 candidates via dict-overwrite. Preserving every - # candidate is a prerequisite for correct resolution; the consumption site - # below still picks ``candidates[0]`` so this commit is behaviour-equivalent - # to the previous implementation. + # candidate is a prerequisite for correct resolution. + # + # Guardrails: + # * Each bucket stores unique nids (defends against the same node id + # being appended twice when two of its label variants normalise to the + # same key, or when `all_nodes` transiently contains duplicates from + # layered extractors). + # * When the candidate pool exceeds `max_ambiguity_fanout`, the whole + # fan-out is dropped instead of emitting a flood of AMBIGUOUS edges. + # Picking a "random" winner at that degree would just resurrect the + # old dict-overwrite bug, and keeping all N edges explodes the graph + # for generic verbs that AST alone cannot disambiguate. from collections import defaultdict + import os as _os + + if max_ambiguity_fanout is None: + _env = _os.environ.get("GRAPHIFY_MAX_AMBIGUITY_FANOUT") + if _env is not None: + try: + max_ambiguity_fanout = int(_env) + except ValueError: + max_ambiguity_fanout = 20 + else: + max_ambiguity_fanout = 20 global_label_to_nids: dict[str, list[str]] = defaultdict(list) _seen_per_bucket: dict[str, set[str]] = defaultdict(set) @@ -3165,6 +3199,13 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: global_label_to_nids[key].append(nid) existing_pairs = {(e["source"], e["target"]) for e in all_edges} + + resolved_single = 0 + resolved_ambiguous = 0 + truncated_high_degree = 0 + truncated_labels_seen: list[str] = [] + _truncated_seen_set: set[str] = set() + for result in per_file: for rc in result.get("raw_calls", []): callee = rc.get("callee", "") @@ -3201,6 +3242,16 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: "source_location": rc.get("source_location"), "weight": 1.0, }) + resolved_single += 1 + elif len(candidates) > max_ambiguity_fanout: + # Generic verbs (``.get()``, ``.all()``, ``.delete()``) with + # 30+ candidates are AST-undecidable. Drop the fan-out and + # surface the label in stats so downstream tools can audit. + truncated_high_degree += 1 + if callee not in _truncated_seen_set: + _truncated_seen_set.add(callee) + if len(truncated_labels_seen) < 5: + truncated_labels_seen.append(callee) else: # Multi-candidate → fan out one AMBIGUOUS edge per candidate. # Picking an arbitrary winner here is indistinguishable from the @@ -3221,12 +3272,20 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict: "ambiguity_degree": degree, "weight": 1.0, }) + resolved_ambiguous += 1 return { "nodes": all_nodes, "edges": all_edges, "input_tokens": 0, "output_tokens": 0, + "cross_file_call_stats": { + "resolved_single": resolved_single, + "resolved_ambiguous": resolved_ambiguous, + "truncated_high_degree": truncated_high_degree, + "truncated_examples": truncated_labels_seen, + "max_ambiguity_fanout": max_ambiguity_fanout, + }, } diff --git a/graphify/watch.py b/graphify/watch.py index 79d55c6b..62ed2c50 100644 --- a/graphify/watch.py +++ b/graphify/watch.py @@ -53,6 +53,9 @@ def _rebuild_code(watch_path: Path, *, follow_symlinks: bool = False) -> bool: "hyperedges": existing.get("hyperedges", []), "input_tokens": 0, "output_tokens": 0, + # Preserve fresh AST-pass cross_file_call_stats; merging + # with a stale semantic pass would misreport degree caps. + "cross_file_call_stats": result.get("cross_file_call_stats"), } except Exception: pass # corrupt graph.json - proceed with AST-only From c58d2d3caad95595e813cdbf117f95d1eee996aa Mon Sep 17 00:00:00 2001 From: TeddieHsiung Date: Thu, 16 Apr 2026 16:15:58 +0800 Subject: [PATCH 4/4] test(extract): add cross-file collision resolution tests + fixtures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 8 tests covering: - Single candidate → INFERRED 0.8 (unchanged behaviour) - N candidates → N AMBIGUOUS edges with ambiguity_degree=N - Self-reference filter correctness - Default cap (20) drops high-degree fanouts and records to stats - Cap override via kw-arg - Cap override via GRAPHIFY_MAX_AMBIGUITY_FANOUT env var - ambiguity_degree always matches actual fan-out count (invariant) - Unique targets within each call-site's fan-out Fixtures (tests/fixtures/collision/*.py) provide minimal Python programs that exercise each case. --- tests/fixtures/collision/a.py | 9 ++ tests/fixtures/collision/b.py | 5 + tests/fixtures/collision/c.py | 9 ++ tests/fixtures/collision/d.py | 10 ++ tests/test_cross_file_collision.py | 194 +++++++++++++++++++++++++++++ 5 files changed, 227 insertions(+) create mode 100644 tests/fixtures/collision/a.py create mode 100644 tests/fixtures/collision/b.py create mode 100644 tests/fixtures/collision/c.py create mode 100644 tests/fixtures/collision/d.py create mode 100644 tests/test_cross_file_collision.py diff --git a/tests/fixtures/collision/a.py b/tests/fixtures/collision/a.py new file mode 100644 index 00000000..7430773a --- /dev/null +++ b/tests/fixtures/collision/a.py @@ -0,0 +1,9 @@ +"""Fixture for cross-file call-collision tests — module A.""" + + +def shared(): + return "a" + + +def only_in_a(): + return "a-only" diff --git a/tests/fixtures/collision/b.py b/tests/fixtures/collision/b.py new file mode 100644 index 00000000..2f073286 --- /dev/null +++ b/tests/fixtures/collision/b.py @@ -0,0 +1,5 @@ +"""Fixture for cross-file call-collision tests — module B (duplicate name).""" + + +def shared(): + return "b" diff --git a/tests/fixtures/collision/c.py b/tests/fixtures/collision/c.py new file mode 100644 index 00000000..72a59ba2 --- /dev/null +++ b/tests/fixtures/collision/c.py @@ -0,0 +1,9 @@ +"""Fixture for cross-file call-collision tests — unique-candidate caller. + +c.py calls `only_in_a()` which exists in only one other file (a.py), +so the call must resolve to a single INFERRED edge at 0.8. +""" + + +def caller_unique(): + return only_in_a() # noqa: F821 — resolved via cross-file pass diff --git a/tests/fixtures/collision/d.py b/tests/fixtures/collision/d.py new file mode 100644 index 00000000..51a44663 --- /dev/null +++ b/tests/fixtures/collision/d.py @@ -0,0 +1,10 @@ +"""Fixture for cross-file call-collision tests — ambiguous caller. + +d.py calls `shared()` which exists in both a.py and b.py, so the cross-file +resolution pass must emit two AMBIGUOUS edges (score 0.2, ambiguity_degree=2) +instead of silently picking one winner. +""" + + +def caller_ambiguous(): + return shared() # noqa: F821 — resolved via cross-file pass diff --git a/tests/test_cross_file_collision.py b/tests/test_cross_file_collision.py new file mode 100644 index 00000000..5f88e4f3 --- /dev/null +++ b/tests/test_cross_file_collision.py @@ -0,0 +1,194 @@ +"""Tests for cross-file call resolution with label collisions. + +Bug fix: when multiple nodes share the same normalised label across files +(e.g. two Python functions named `shared`, or the ubiquitous PHP `.get()`), +the legacy resolver silently picked one winner via dict overwrite, losing +(N-1)/N of the signal. The new resolver must: + + * Emit a single INFERRED edge (0.8) when exactly one candidate exists. + * Emit N AMBIGUOUS edges (0.2, with `ambiguity_degree`) when N > 1. + * Never emit a self-referential edge. +""" +from pathlib import Path + +from graphify.extract import extract + +FIXTURES = Path(__file__).parent / "fixtures" / "collision" + + +def _calls(result): + return [e for e in result["edges"] if e["relation"] == "calls"] + + +def test_unique_candidate_keeps_inferred_high_confidence(): + """c.py calls only_in_a(), which exists in exactly one other file → INFERRED 0.8.""" + files = sorted(FIXTURES.glob("*.py")) + result = extract(files) + node_by_label = {n["label"]: n["id"] for n in result["nodes"]} + + src = node_by_label["caller_unique()"] + tgt = node_by_label["only_in_a()"] + + matches = [e for e in _calls(result) if e["source"] == src and e["target"] == tgt] + assert len(matches) == 1, f"Expected exactly one caller_unique → only_in_a edge, got {matches}" + edge = matches[0] + assert edge["confidence"] == "INFERRED", f"Expected INFERRED, got {edge['confidence']}" + assert edge["confidence_score"] == 0.8, f"Expected 0.8, got {edge['confidence_score']}" + # Backward compat: single-candidate edges must not carry ambiguity_degree. + assert "ambiguity_degree" not in edge, f"Single-candidate edge should not have ambiguity_degree: {edge}" + + +def test_multi_candidate_emits_ambiguous_edges_to_all_candidates(): + """d.py calls shared(), which exists in a.py and b.py → 2 AMBIGUOUS edges at 0.2.""" + files = sorted(FIXTURES.glob("*.py")) + result = extract(files) + node_by_label = {n["label"]: n["id"] for n in result["nodes"]} + + src = node_by_label["caller_ambiguous()"] + # There are two `shared()` nodes (one per file); look them up by id prefix. + shared_ids = sorted(n["id"] for n in result["nodes"] if n["label"] == "shared()") + assert len(shared_ids) == 2, f"Expected two shared() nodes, got {shared_ids}" + + edges_from_caller = [e for e in _calls(result) if e["source"] == src] + # Only edges into the two shared() targets should be AMBIGUOUS from this caller. + edges_to_shared = [e for e in edges_from_caller if e["target"] in shared_ids] + assert len(edges_to_shared) == 2, ( + f"Expected 2 AMBIGUOUS edges from caller_ambiguous to both shared() targets, " + f"got {edges_to_shared}" + ) + + for edge in edges_to_shared: + assert edge["confidence"] == "AMBIGUOUS", f"Expected AMBIGUOUS, got {edge}" + assert edge["confidence_score"] == 0.2, f"Expected 0.2, got {edge}" + assert edge.get("ambiguity_degree") == 2, ( + f"Expected ambiguity_degree=2, got {edge.get('ambiguity_degree')} in {edge}" + ) + + # Both candidates must be covered exactly once (no duplicates, no winner-takes-all). + targets = sorted(e["target"] for e in edges_to_shared) + assert targets == shared_ids, f"Candidate coverage mismatch: {targets} vs {shared_ids}" + + +def test_no_self_referential_cross_file_calls(): + """A caller must never receive a cross-file edge pointing back at itself.""" + files = sorted(FIXTURES.glob("*.py")) + result = extract(files) + for edge in _calls(result): + assert edge["source"] != edge["target"], f"Self-loop in cross-file call edge: {edge}" + + +def test_ambiguous_edge_carries_source_location(): + """AMBIGUOUS cross-file edges preserve source_file and source_location metadata.""" + files = sorted(FIXTURES.glob("*.py")) + result = extract(files) + node_by_label = {n["label"]: n["id"] for n in result["nodes"]} + src = node_by_label["caller_ambiguous()"] + + ambiguous_edges = [ + e for e in _calls(result) + if e["source"] == src and e.get("confidence") == "AMBIGUOUS" + ] + assert ambiguous_edges, "No AMBIGUOUS edges emitted" + for edge in ambiguous_edges: + assert edge.get("source_file"), f"Missing source_file: {edge}" + assert edge.get("source_location"), f"Missing source_location: {edge}" + assert edge.get("weight") == 1.0, f"Weight should be 1.0: {edge}" + + +# --------------------------------------------------------------------------- +# W3.5 — degree cap + ambiguity_degree consistency guardrails +# --------------------------------------------------------------------------- + +def _make_high_degree_fixture(tmp_path, n_definitions: int): + """Create N files each defining `def foo()` plus one file that calls foo().""" + files = [] + for i in range(n_definitions): + p = tmp_path / f"def_{i:02d}.py" + p.write_text(f'def foo():\n return {i}\n', encoding="utf-8") + files.append(p) + caller = tmp_path / "caller.py" + caller.write_text( + '"""Caller module that triggers the ambiguous fan-out."""\n' + 'def entry_point():\n' + ' return foo() # noqa: F821 — resolved cross-file\n', + encoding="utf-8", + ) + files.append(caller) + return sorted(files) + + +def test_degree_cap_respected_default_20(tmp_path): + """25 candidate definitions > default cap (20) → no AMBIGUOUS edges, stats log truncation.""" + files = _make_high_degree_fixture(tmp_path, n_definitions=25) + result = extract(files) + stats = result.get("cross_file_call_stats") + assert stats is not None, "extract() must expose cross_file_call_stats" + assert stats["max_ambiguity_fanout"] == 20 + + ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"] + assert ambiguous == [], ( + f"Expected zero AMBIGUOUS edges under default cap=20 with 25 candidates, " + f"got {len(ambiguous)}" + ) + assert stats["truncated_high_degree"] >= 1, ( + f"truncated_high_degree must register the drop: {stats}" + ) + assert "foo" in stats["truncated_examples"], ( + f"Expected 'foo' in truncated_examples, got {stats['truncated_examples']}" + ) + + +def test_degree_cap_override_allows_fanout(tmp_path): + """Explicit cap=30 lets the same 25-candidate scenario fan out normally.""" + files = _make_high_degree_fixture(tmp_path, n_definitions=25) + result = extract(files, max_ambiguity_fanout=30) + stats = result["cross_file_call_stats"] + assert stats["max_ambiguity_fanout"] == 30 + assert stats["truncated_high_degree"] == 0 + + ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"] + assert len(ambiguous) == 25, ( + f"Expected 25 AMBIGUOUS edges with cap=30 and 25 candidates, got {len(ambiguous)}" + ) + for edge in ambiguous: + assert edge["ambiguity_degree"] == 25, ( + f"Every AMBIGUOUS edge must report degree=25, got {edge}" + ) + + +def test_ambiguity_degree_matches_fanout(tmp_path): + """With 5 same-name definitions + 1 caller, each AMBIGUOUS edge reports degree=5 exactly. + + Belt-and-braces guard against the P1 bug where `ambiguity_degree` could + drift above the actual fan-out if the candidate pool contained duplicate + nids (e.g. from layered extractors or label-variant collisions). + """ + files = _make_high_degree_fixture(tmp_path, n_definitions=5) + result = extract(files) + ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"] + assert len(ambiguous) == 5, f"Expected 5 AMBIGUOUS edges, got {len(ambiguous)}" + + # Every edge's ambiguity_degree must equal the real number of emitted edges. + observed_fanout = len(ambiguous) + for edge in ambiguous: + assert edge["ambiguity_degree"] == observed_fanout == 5, ( + f"ambiguity_degree={edge['ambiguity_degree']} must match fanout={observed_fanout}: {edge}" + ) + + # Targets must be unique — no duplicate edges to the same node. + targets = [e["target"] for e in ambiguous] + assert len(set(targets)) == len(targets), ( + f"Duplicate AMBIGUOUS edge targets indicate candidate pool dedup failure: {targets}" + ) + + +def test_env_var_overrides_default_cap(tmp_path, monkeypatch): + """GRAPHIFY_MAX_AMBIGUITY_FANOUT env var is honoured when no explicit kwarg.""" + files = _make_high_degree_fixture(tmp_path, n_definitions=25) + monkeypatch.setenv("GRAPHIFY_MAX_AMBIGUITY_FANOUT", "30") + result = extract(files) + stats = result["cross_file_call_stats"] + assert stats["max_ambiguity_fanout"] == 30 + assert stats["truncated_high_degree"] == 0 + ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"] + assert len(ambiguous) == 25