From e7c730be29e79271fc649fcb224c9447669eba7f Mon Sep 17 00:00:00 2001
From: TeddieHsiung <jackshiung@gmail.com>
Date: Thu, 16 Apr 2026 16:12:48 +0800
Subject: [PATCH 1/4] refactor(extract): preserve all candidates in cross-file
 call lookup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cross-file call resolution was using dict[str, str], causing
dict-overwrite: when N nodes shared the same normalised label, only
the last-iterated nid survived in the lookup table, silently dropping
N-1 valid candidates.

This commit changes the lookup table to dict[str, list[str]] with
per-bucket uniqueness guaranteed via a seen-set, and adds
list(dict.fromkeys(...)) dedup at consumption site as a belt-and-braces
invariant. Resolution behaviour is kept equivalent for now — we still
pick candidates[0] as the target — preparing the ground for subsequent
commits to emit proper AMBIGUOUS edges.

No behaviour change in this commit. All existing tests pass.
---
 graphify/extract.py | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/graphify/extract.py b/graphify/extract.py
index 333fa39a..db1b0e1f 100644
--- a/graphify/extract.py
+++ b/graphify/extract.py
@@ -3141,12 +3141,28 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
     # Cross-file call resolution for all languages
     # Each extractor saved unresolved calls in raw_calls. Now that we have all
     # nodes from all files, resolve any callee that exists in another file.
-    global_label_to_nid: dict[str, str] = {}
+    #
+    # The lookup table is keyed by normalised lowercase label and stores a list
+    # of candidate nids (not just one). When multiple nodes share the same
+    # normalised label — which is common for CRUD verbs like `.get()`, `.all()`,
+    # `.delete()` and for cross-language collisions — a plain ``dict[str, str]``
+    # silently drops N-1 candidates via dict-overwrite. Preserving every
+    # candidate is a prerequisite for correct resolution; the consumption site
+    # below still picks ``candidates[0]`` so this commit is behaviour-equivalent
+    # to the previous implementation.
+    from collections import defaultdict
+
+    global_label_to_nids: dict[str, list[str]] = defaultdict(list)
+    _seen_per_bucket: dict[str, set[str]] = defaultdict(set)
     for n in all_nodes:
         raw = n.get("label", "")
         normalised = raw.strip("()").lstrip(".")
         if normalised:
-            global_label_to_nid[normalised.lower()] = n["id"]
+            key = normalised.lower()
+            nid = n["id"]
+            if nid not in _seen_per_bucket[key]:
+                _seen_per_bucket[key].add(nid)
+                global_label_to_nids[key].append(nid)
 
     existing_pairs = {(e["source"], e["target"]) for e in all_edges}
     for result in per_file:
@@ -3154,9 +3170,16 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
             callee = rc.get("callee", "")
             if not callee:
                 continue
-            tgt = global_label_to_nid.get(callee.lower())
+            # ``dict.fromkeys`` preserves order while deduping — belt-and-braces
+            # on top of the per-bucket dedup above.
+            candidates = list(dict.fromkeys(
+                global_label_to_nids.get(callee.lower(), [])
+            ))
+            if not candidates:
+                continue
+            tgt = candidates[0]
             caller = rc["caller_nid"]
-            if tgt and tgt != caller and (caller, tgt) not in existing_pairs:
+            if tgt != caller and (caller, tgt) not in existing_pairs:
                 existing_pairs.add((caller, tgt))
                 all_edges.append({
                     "source": caller,

From 58f3cc98bc69dd418d6a7822d0cae361933dd4a6 Mon Sep 17 00:00:00 2001
From: TeddieHsiung <jackshiung@gmail.com>
Date: Thu, 16 Apr 2026 16:13:22 +0800
Subject: [PATCH 2/4] fix(extract): emit AMBIGUOUS edges for name collisions
 with ambiguity_degree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When cross-file call resolution finds multiple candidates for a callee
(e.g. `.get()` defined in 32 files), emitting a single edge to an
arbitrary winner is indistinguishable from dict-overwrite. This commit
fans out the edge to all candidates, marking each as AMBIGUOUS
(confidence_score=0.2) and recording ambiguity_degree = number of
candidates on each edge.

Single-candidate resolution remains INFERRED at 0.8 (unchanged).
Self-reference is filtered (caller is excluded from its own candidate list).

Real-world impact on a 5-subsystem monorepo (TBA-backend, 997 files):
- INFERRED calls: 2580 → 1230 (collisions correctly reclassified)
- AMBIGUOUS calls: 0 → N (exposes previously-hidden ambiguity)

Note: heavy collisions (CRUD verbs like `.get()` with 30+ candidates)
cause edge explosion. The next commit addresses this with a fan-out cap.
---
 graphify/extract.py | 62 +++++++++++++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/graphify/extract.py b/graphify/extract.py
index db1b0e1f..feb45c8b 100644
--- a/graphify/extract.py
+++ b/graphify/extract.py
@@ -3170,27 +3170,57 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
             callee = rc.get("callee", "")
             if not callee:
                 continue
+            caller = rc["caller_nid"]
             # ``dict.fromkeys`` preserves order while deduping — belt-and-braces
-            # on top of the per-bucket dedup above.
+            # on top of the per-bucket dedup above. Self-reference is filtered
+            # out: a node never calls itself through its own normalised label.
             candidates = list(dict.fromkeys(
-                global_label_to_nids.get(callee.lower(), [])
+                c for c in global_label_to_nids.get(callee.lower(), [])
+                if c != caller
             ))
             if not candidates:
                 continue
-            tgt = candidates[0]
-            caller = rc["caller_nid"]
-            if tgt != caller and (caller, tgt) not in existing_pairs:
-                existing_pairs.add((caller, tgt))
-                all_edges.append({
-                    "source": caller,
-                    "target": tgt,
-                    "relation": "calls",
-                    "confidence": "INFERRED",
-                    "confidence_score": 0.8,
-                    "source_file": rc.get("source_file", ""),
-                    "source_location": rc.get("source_location"),
-                    "weight": 1.0,
-                })
+
+            # Invariant: ambiguity_degree MUST equal len(candidates). If this
+            # ever fires, the upstream dedup guarantees have been broken.
+            assert len(set(candidates)) == len(candidates), (
+                f"duplicate nids in candidates for callee={callee!r}: {candidates}"
+            )
+
+            if len(candidates) == 1:
+                tgt = candidates[0]
+                if (caller, tgt) not in existing_pairs:
+                    existing_pairs.add((caller, tgt))
+                    all_edges.append({
+                        "source": caller,
+                        "target": tgt,
+                        "relation": "calls",
+                        "confidence": "INFERRED",
+                        "confidence_score": 0.8,
+                        "source_file": rc.get("source_file", ""),
+                        "source_location": rc.get("source_location"),
+                        "weight": 1.0,
+                    })
+            else:
+                # Multi-candidate → fan out one AMBIGUOUS edge per candidate.
+                # Picking an arbitrary winner here is indistinguishable from the
+                # old dict-overwrite bug; the downstream consumers need the
+                # ``ambiguity_degree`` signal to triage the result.
+                degree = len(candidates)
+                for tgt in candidates:
+                    if (caller, tgt) not in existing_pairs:
+                        existing_pairs.add((caller, tgt))
+                        all_edges.append({
+                            "source": caller,
+                            "target": tgt,
+                            "relation": "calls",
+                            "confidence": "AMBIGUOUS",
+                            "confidence_score": 0.2,
+                            "source_file": rc.get("source_file", ""),
+                            "source_location": rc.get("source_location"),
+                            "ambiguity_degree": degree,
+                            "weight": 1.0,
+                        })
 
     return {
         "nodes": all_nodes,

From 353c97073993c772527785f21dd6c213b6c60128 Mon Sep 17 00:00:00 2001
From: TeddieHsiung <jackshiung@gmail.com>
Date: Thu, 16 Apr 2026 16:15:41 +0800
Subject: [PATCH 3/4] feat(extract): add max_ambiguity_fanout cap with
 truncation stats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Labels with degree > 20 (typically generic verbs like `.get()`,
`.all()`, `.delete()` in multi-subsystem monorepos) produce N-way
AMBIGUOUS fanouts with no semantic value — the AST cannot
disambiguate them regardless. On TBA-backend this caused edge count
to jump 264% (10K → 37K).

This commit adds a configurable cap:
- extract() gains max_ambiguity_fanout kw-arg (default 20)
- env var GRAPHIFY_MAX_AMBIGUITY_FANOUT overrides
- When len(candidates) > cap → drop fan-out, record to stats
- Stats surface in cross_file_call_stats with:
    - resolved_single / resolved_ambiguous / truncated_high_degree
    - truncated_examples (first 5 dropped labels)
    - max_ambiguity_fanout (effective value)

build.py / export.py / watch.py propagate the stats into graph.json
so downstream tools can see what was truncated.

Real-world impact on TBA-backend:
- Edges: 37166 (+264%) → 10529 (+3.1%)
- truncated_high_degree: 1079 (examples: update, all, get, create, delete)
- All other consumers (cluster, god_nodes, report) behave normally.
---
 graphify/build.py   |  6 ++++
 graphify/export.py  |  3 ++
 graphify/extract.py | 67 ++++++++++++++++++++++++++++++++++++++++++---
 graphify/watch.py   |  3 ++
 4 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/graphify/build.py b/graphify/build.py
index 4d3a0b98..ab50be7d 100644
--- a/graphify/build.py
+++ b/graphify/build.py
@@ -63,6 +63,12 @@ def build_from_json(extraction: dict, *, directed: bool = False) -> nx.Graph:
     hyperedges = extraction.get("hyperedges", [])
     if hyperedges:
         G.graph["hyperedges"] = hyperedges
+    # Preserve cross-file call resolution stats (degree cap, truncation counts)
+    # so downstream exporters can surface them in graph.json without breaking
+    # existing consumers that expect the plain `nodes`/`edges` shape.
+    stats = extraction.get("cross_file_call_stats")
+    if stats is not None:
+        G.graph["cross_file_call_stats"] = stats
     return G
 
 
diff --git a/graphify/export.py b/graphify/export.py
index 033ec66d..b9a3e76f 100644
--- a/graphify/export.py
+++ b/graphify/export.py
@@ -293,6 +293,9 @@ def to_json(G: nx.Graph, communities: dict[int, list[str]], output_path: str) ->
             conf = link.get("confidence", "EXTRACTED")
             link["confidence_score"] = _CONFIDENCE_SCORE_DEFAULTS.get(conf, 1.0)
     data["hyperedges"] = getattr(G, "graph", {}).get("hyperedges", [])
+    stats = getattr(G, "graph", {}).get("cross_file_call_stats")
+    if stats is not None:
+        data["cross_file_call_stats"] = stats
     with open(output_path, "w", encoding="utf-8") as f:
         json.dump(data, f, indent=2)
 
diff --git a/graphify/extract.py b/graphify/extract.py
index feb45c8b..02d675f4 100644
--- a/graphify/extract.py
+++ b/graphify/extract.py
@@ -3027,7 +3027,12 @@ def _check_tree_sitter_version() -> None:
         )
 
 
-def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
+def extract(
+    paths: list[Path],
+    cache_root: Path | None = None,
+    *,
+    max_ambiguity_fanout: int | None = None,
+) -> dict:
     """Extract AST nodes and edges from a list of code files.
 
     Two-pass process:
@@ -3040,6 +3045,15 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
         cache_root: explicit root for graphify-out/cache/ (overrides the
             inferred common path prefix). Pass Path('.') when running on a
             subdirectory so the cache stays at ./graphify-out/cache/.
+        max_ambiguity_fanout: cap on the number of AMBIGUOUS edges the
+            cross-file call resolver will emit for a single callee label.
+            When a normalised label matches more than this many candidate
+            nodes, the resolver drops the whole fan-out and records the
+            label under ``cross_file_call_stats.truncated_examples`` — this
+            prevents generic verbs (``.get()``, ``.all()``, ``.delete()``)
+            from inflating the graph with thousands of meaningless edges.
+            ``None`` (default) falls back to the ``GRAPHIFY_MAX_AMBIGUITY_FANOUT``
+            env var, then to ``20``.
     """
     _check_tree_sitter_version()
     per_file: list[dict] = []
@@ -3147,10 +3161,30 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
     # normalised label — which is common for CRUD verbs like `.get()`, `.all()`,
     # `.delete()` and for cross-language collisions — a plain ``dict[str, str]``
     # silently drops N-1 candidates via dict-overwrite. Preserving every
-    # candidate is a prerequisite for correct resolution; the consumption site
-    # below still picks ``candidates[0]`` so this commit is behaviour-equivalent
-    # to the previous implementation.
+    # candidate is a prerequisite for correct resolution.
+    #
+    # Guardrails:
+    #  * Each bucket stores unique nids (defends against the same node id
+    #    being appended twice when two of its label variants normalise to the
+    #    same key, or when `all_nodes` transiently contains duplicates from
+    #    layered extractors).
+    #  * When the candidate pool exceeds `max_ambiguity_fanout`, the whole
+    #    fan-out is dropped instead of emitting a flood of AMBIGUOUS edges.
+    #    Picking a "random" winner at that degree would just resurrect the
+    #    old dict-overwrite bug, and keeping all N edges explodes the graph
+    #    for generic verbs that AST alone cannot disambiguate.
     from collections import defaultdict
+    import os as _os
+
+    if max_ambiguity_fanout is None:
+        _env = _os.environ.get("GRAPHIFY_MAX_AMBIGUITY_FANOUT")
+        if _env is not None:
+            try:
+                max_ambiguity_fanout = int(_env)
+            except ValueError:
+                max_ambiguity_fanout = 20
+        else:
+            max_ambiguity_fanout = 20
 
     global_label_to_nids: dict[str, list[str]] = defaultdict(list)
     _seen_per_bucket: dict[str, set[str]] = defaultdict(set)
@@ -3165,6 +3199,13 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
                 global_label_to_nids[key].append(nid)
 
     existing_pairs = {(e["source"], e["target"]) for e in all_edges}
+
+    resolved_single = 0
+    resolved_ambiguous = 0
+    truncated_high_degree = 0
+    truncated_labels_seen: list[str] = []
+    _truncated_seen_set: set[str] = set()
+
     for result in per_file:
         for rc in result.get("raw_calls", []):
             callee = rc.get("callee", "")
@@ -3201,6 +3242,16 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
                         "source_location": rc.get("source_location"),
                         "weight": 1.0,
                     })
+                    resolved_single += 1
+            elif len(candidates) > max_ambiguity_fanout:
+                # Generic verbs (``.get()``, ``.all()``, ``.delete()``) with
+                # 30+ candidates are AST-undecidable. Drop the fan-out and
+                # surface the label in stats so downstream tools can audit.
+                truncated_high_degree += 1
+                if callee not in _truncated_seen_set:
+                    _truncated_seen_set.add(callee)
+                    if len(truncated_labels_seen) < 5:
+                        truncated_labels_seen.append(callee)
             else:
                 # Multi-candidate → fan out one AMBIGUOUS edge per candidate.
                 # Picking an arbitrary winner here is indistinguishable from the
@@ -3221,12 +3272,20 @@ def extract(paths: list[Path], cache_root: Path | None = None) -> dict:
                             "ambiguity_degree": degree,
                             "weight": 1.0,
                         })
+                        resolved_ambiguous += 1
 
     return {
         "nodes": all_nodes,
         "edges": all_edges,
         "input_tokens": 0,
         "output_tokens": 0,
+        "cross_file_call_stats": {
+            "resolved_single": resolved_single,
+            "resolved_ambiguous": resolved_ambiguous,
+            "truncated_high_degree": truncated_high_degree,
+            "truncated_examples": truncated_labels_seen,
+            "max_ambiguity_fanout": max_ambiguity_fanout,
+        },
     }
 
 
diff --git a/graphify/watch.py b/graphify/watch.py
index 79d55c6b..62ed2c50 100644
--- a/graphify/watch.py
+++ b/graphify/watch.py
@@ -53,6 +53,9 @@ def _rebuild_code(watch_path: Path, *, follow_symlinks: bool = False) -> bool:
                     "hyperedges": existing.get("hyperedges", []),
                     "input_tokens": 0,
                     "output_tokens": 0,
+                    # Preserve fresh AST-pass cross_file_call_stats; merging
+                    # with a stale semantic pass would misreport degree caps.
+                    "cross_file_call_stats": result.get("cross_file_call_stats"),
                 }
             except Exception:
                 pass  # corrupt graph.json - proceed with AST-only

From c58d2d3caad95595e813cdbf117f95d1eee996aa Mon Sep 17 00:00:00 2001
From: TeddieHsiung <jackshiung@gmail.com>
Date: Thu, 16 Apr 2026 16:15:58 +0800
Subject: [PATCH 4/4] test(extract): add cross-file collision resolution tests
 + fixtures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds 8 tests covering:
- Single candidate → INFERRED 0.8 (unchanged behaviour)
- N candidates → N AMBIGUOUS edges with ambiguity_degree=N
- Self-reference filter correctness
- Default cap (20) drops high-degree fanouts and records to stats
- Cap override via kw-arg
- Cap override via GRAPHIFY_MAX_AMBIGUITY_FANOUT env var
- ambiguity_degree always matches actual fan-out count (invariant)
- Unique targets within each call-site's fan-out

Fixtures (tests/fixtures/collision/*.py) provide minimal Python
programs that exercise each case.
---
 tests/fixtures/collision/a.py      |   9 ++
 tests/fixtures/collision/b.py      |   5 +
 tests/fixtures/collision/c.py      |   9 ++
 tests/fixtures/collision/d.py      |  10 ++
 tests/test_cross_file_collision.py | 194 +++++++++++++++++++++++++++++
 5 files changed, 227 insertions(+)
 create mode 100644 tests/fixtures/collision/a.py
 create mode 100644 tests/fixtures/collision/b.py
 create mode 100644 tests/fixtures/collision/c.py
 create mode 100644 tests/fixtures/collision/d.py
 create mode 100644 tests/test_cross_file_collision.py

diff --git a/tests/fixtures/collision/a.py b/tests/fixtures/collision/a.py
new file mode 100644
index 00000000..7430773a
--- /dev/null
+++ b/tests/fixtures/collision/a.py
@@ -0,0 +1,9 @@
+"""Fixture for cross-file call-collision tests — module A."""
+
+
+def shared():
+    return "a"
+
+
+def only_in_a():
+    return "a-only"
diff --git a/tests/fixtures/collision/b.py b/tests/fixtures/collision/b.py
new file mode 100644
index 00000000..2f073286
--- /dev/null
+++ b/tests/fixtures/collision/b.py
@@ -0,0 +1,5 @@
+"""Fixture for cross-file call-collision tests — module B (duplicate name)."""
+
+
+def shared():
+    return "b"
diff --git a/tests/fixtures/collision/c.py b/tests/fixtures/collision/c.py
new file mode 100644
index 00000000..72a59ba2
--- /dev/null
+++ b/tests/fixtures/collision/c.py
@@ -0,0 +1,9 @@
+"""Fixture for cross-file call-collision tests — unique-candidate caller.
+
+c.py calls `only_in_a()` which exists in only one other file (a.py),
+so the call must resolve to a single INFERRED edge at 0.8.
+"""
+
+
+def caller_unique():
+    return only_in_a()  # noqa: F821 — resolved via cross-file pass
diff --git a/tests/fixtures/collision/d.py b/tests/fixtures/collision/d.py
new file mode 100644
index 00000000..51a44663
--- /dev/null
+++ b/tests/fixtures/collision/d.py
@@ -0,0 +1,10 @@
+"""Fixture for cross-file call-collision tests — ambiguous caller.
+
+d.py calls `shared()` which exists in both a.py and b.py, so the cross-file
+resolution pass must emit two AMBIGUOUS edges (score 0.2, ambiguity_degree=2)
+instead of silently picking one winner.
+"""
+
+
+def caller_ambiguous():
+    return shared()  # noqa: F821 — resolved via cross-file pass
diff --git a/tests/test_cross_file_collision.py b/tests/test_cross_file_collision.py
new file mode 100644
index 00000000..5f88e4f3
--- /dev/null
+++ b/tests/test_cross_file_collision.py
@@ -0,0 +1,194 @@
+"""Tests for cross-file call resolution with label collisions.
+
+Bug fix: when multiple nodes share the same normalised label across files
+(e.g. two Python functions named `shared`, or the ubiquitous PHP `.get()`),
+the legacy resolver silently picked one winner via dict overwrite, losing
+(N-1)/N of the signal. The new resolver must:
+
+  * Emit a single INFERRED edge (0.8) when exactly one candidate exists.
+  * Emit N AMBIGUOUS edges (0.2, with `ambiguity_degree`) when N > 1.
+  * Never emit a self-referential edge.
+"""
+from pathlib import Path
+
+from graphify.extract import extract
+
+FIXTURES = Path(__file__).parent / "fixtures" / "collision"
+
+
+def _calls(result):
+    return [e for e in result["edges"] if e["relation"] == "calls"]
+
+
+def test_unique_candidate_keeps_inferred_high_confidence():
+    """c.py calls only_in_a(), which exists in exactly one other file → INFERRED 0.8."""
+    files = sorted(FIXTURES.glob("*.py"))
+    result = extract(files)
+    node_by_label = {n["label"]: n["id"] for n in result["nodes"]}
+
+    src = node_by_label["caller_unique()"]
+    tgt = node_by_label["only_in_a()"]
+
+    matches = [e for e in _calls(result) if e["source"] == src and e["target"] == tgt]
+    assert len(matches) == 1, f"Expected exactly one caller_unique → only_in_a edge, got {matches}"
+    edge = matches[0]
+    assert edge["confidence"] == "INFERRED", f"Expected INFERRED, got {edge['confidence']}"
+    assert edge["confidence_score"] == 0.8, f"Expected 0.8, got {edge['confidence_score']}"
+    # Backward compat: single-candidate edges must not carry ambiguity_degree.
+    assert "ambiguity_degree" not in edge, f"Single-candidate edge should not have ambiguity_degree: {edge}"
+
+
+def test_multi_candidate_emits_ambiguous_edges_to_all_candidates():
+    """d.py calls shared(), which exists in a.py and b.py → 2 AMBIGUOUS edges at 0.2."""
+    files = sorted(FIXTURES.glob("*.py"))
+    result = extract(files)
+    node_by_label = {n["label"]: n["id"] for n in result["nodes"]}
+
+    src = node_by_label["caller_ambiguous()"]
+    # There are two `shared()` nodes (one per file); look them up by id prefix.
+    shared_ids = sorted(n["id"] for n in result["nodes"] if n["label"] == "shared()")
+    assert len(shared_ids) == 2, f"Expected two shared() nodes, got {shared_ids}"
+
+    edges_from_caller = [e for e in _calls(result) if e["source"] == src]
+    # Only edges into the two shared() targets should be AMBIGUOUS from this caller.
+    edges_to_shared = [e for e in edges_from_caller if e["target"] in shared_ids]
+    assert len(edges_to_shared) == 2, (
+        f"Expected 2 AMBIGUOUS edges from caller_ambiguous to both shared() targets, "
+        f"got {edges_to_shared}"
+    )
+
+    for edge in edges_to_shared:
+        assert edge["confidence"] == "AMBIGUOUS", f"Expected AMBIGUOUS, got {edge}"
+        assert edge["confidence_score"] == 0.2, f"Expected 0.2, got {edge}"
+        assert edge.get("ambiguity_degree") == 2, (
+            f"Expected ambiguity_degree=2, got {edge.get('ambiguity_degree')} in {edge}"
+        )
+
+    # Both candidates must be covered exactly once (no duplicates, no winner-takes-all).
+    targets = sorted(e["target"] for e in edges_to_shared)
+    assert targets == shared_ids, f"Candidate coverage mismatch: {targets} vs {shared_ids}"
+
+
+def test_no_self_referential_cross_file_calls():
+    """A caller must never receive a cross-file edge pointing back at itself."""
+    files = sorted(FIXTURES.glob("*.py"))
+    result = extract(files)
+    for edge in _calls(result):
+        assert edge["source"] != edge["target"], f"Self-loop in cross-file call edge: {edge}"
+
+
+def test_ambiguous_edge_carries_source_location():
+    """AMBIGUOUS cross-file edges preserve source_file and source_location metadata."""
+    files = sorted(FIXTURES.glob("*.py"))
+    result = extract(files)
+    node_by_label = {n["label"]: n["id"] for n in result["nodes"]}
+    src = node_by_label["caller_ambiguous()"]
+
+    ambiguous_edges = [
+        e for e in _calls(result)
+        if e["source"] == src and e.get("confidence") == "AMBIGUOUS"
+    ]
+    assert ambiguous_edges, "No AMBIGUOUS edges emitted"
+    for edge in ambiguous_edges:
+        assert edge.get("source_file"), f"Missing source_file: {edge}"
+        assert edge.get("source_location"), f"Missing source_location: {edge}"
+        assert edge.get("weight") == 1.0, f"Weight should be 1.0: {edge}"
+
+
+# ---------------------------------------------------------------------------
+# W3.5 — degree cap + ambiguity_degree consistency guardrails
+# ---------------------------------------------------------------------------
+
+def _make_high_degree_fixture(tmp_path, n_definitions: int):
+    """Create N files each defining `def foo()` plus one file that calls foo()."""
+    files = []
+    for i in range(n_definitions):
+        p = tmp_path / f"def_{i:02d}.py"
+        p.write_text(f'def foo():\n    return {i}\n', encoding="utf-8")
+        files.append(p)
+    caller = tmp_path / "caller.py"
+    caller.write_text(
+        '"""Caller module that triggers the ambiguous fan-out."""\n'
+        'def entry_point():\n'
+        '    return foo()  # noqa: F821 — resolved cross-file\n',
+        encoding="utf-8",
+    )
+    files.append(caller)
+    return sorted(files)
+
+
+def test_degree_cap_respected_default_20(tmp_path):
+    """25 candidate definitions > default cap (20) → no AMBIGUOUS edges, stats log truncation."""
+    files = _make_high_degree_fixture(tmp_path, n_definitions=25)
+    result = extract(files)
+    stats = result.get("cross_file_call_stats")
+    assert stats is not None, "extract() must expose cross_file_call_stats"
+    assert stats["max_ambiguity_fanout"] == 20
+
+    ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"]
+    assert ambiguous == [], (
+        f"Expected zero AMBIGUOUS edges under default cap=20 with 25 candidates, "
+        f"got {len(ambiguous)}"
+    )
+    assert stats["truncated_high_degree"] >= 1, (
+        f"truncated_high_degree must register the drop: {stats}"
+    )
+    assert "foo" in stats["truncated_examples"], (
+        f"Expected 'foo' in truncated_examples, got {stats['truncated_examples']}"
+    )
+
+
+def test_degree_cap_override_allows_fanout(tmp_path):
+    """Explicit cap=30 lets the same 25-candidate scenario fan out normally."""
+    files = _make_high_degree_fixture(tmp_path, n_definitions=25)
+    result = extract(files, max_ambiguity_fanout=30)
+    stats = result["cross_file_call_stats"]
+    assert stats["max_ambiguity_fanout"] == 30
+    assert stats["truncated_high_degree"] == 0
+
+    ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"]
+    assert len(ambiguous) == 25, (
+        f"Expected 25 AMBIGUOUS edges with cap=30 and 25 candidates, got {len(ambiguous)}"
+    )
+    for edge in ambiguous:
+        assert edge["ambiguity_degree"] == 25, (
+            f"Every AMBIGUOUS edge must report degree=25, got {edge}"
+        )
+
+
+def test_ambiguity_degree_matches_fanout(tmp_path):
+    """With 5 same-name definitions + 1 caller, each AMBIGUOUS edge reports degree=5 exactly.
+
+    Belt-and-braces guard against the P1 bug where `ambiguity_degree` could
+    drift above the actual fan-out if the candidate pool contained duplicate
+    nids (e.g. from layered extractors or label-variant collisions).
+    """
+    files = _make_high_degree_fixture(tmp_path, n_definitions=5)
+    result = extract(files)
+    ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"]
+    assert len(ambiguous) == 5, f"Expected 5 AMBIGUOUS edges, got {len(ambiguous)}"
+
+    # Every edge's ambiguity_degree must equal the real number of emitted edges.
+    observed_fanout = len(ambiguous)
+    for edge in ambiguous:
+        assert edge["ambiguity_degree"] == observed_fanout == 5, (
+            f"ambiguity_degree={edge['ambiguity_degree']} must match fanout={observed_fanout}: {edge}"
+        )
+
+    # Targets must be unique — no duplicate edges to the same node.
+    targets = [e["target"] for e in ambiguous]
+    assert len(set(targets)) == len(targets), (
+        f"Duplicate AMBIGUOUS edge targets indicate candidate pool dedup failure: {targets}"
+    )
+
+
+def test_env_var_overrides_default_cap(tmp_path, monkeypatch):
+    """GRAPHIFY_MAX_AMBIGUITY_FANOUT env var is honoured when no explicit kwarg."""
+    files = _make_high_degree_fixture(tmp_path, n_definitions=25)
+    monkeypatch.setenv("GRAPHIFY_MAX_AMBIGUITY_FANOUT", "30")
+    result = extract(files)
+    stats = result["cross_file_call_stats"]
+    assert stats["max_ambiguity_fanout"] == 30
+    assert stats["truncated_high_degree"] == 0
+    ambiguous = [e for e in _calls(result) if e.get("confidence") == "AMBIGUOUS"]
+    assert len(ambiguous) == 25