From 9a95fddb078072e424c9222b1e78906d33b8da8c Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 05:25:19 +0000 Subject: [PATCH 01/14] feat(context): enforce sensitivity policy in context pipeline Add sensitivity filtering to the context compilation pipeline. Items whose sensitivity level meets or exceeds ContextPolicy.sensitivity_floor are now dropped (default) or redacted before reaching the prompt. - Add ContextItem.sensitivity field (default: Sensitivity.public) - Add ContextPolicy.sensitivity_action field ('drop' or 'redact') - Add context/sensitivity.py with apply_sensitivity_filter() and MaskRedactionHook (replaces text with '[REDACTED: {sensitivity}]') - Wire sensitivity filter into manager._build() between dependency closure and firewall (step 2b) - Record sensitivity drops in BuildStats.dropped_reasons['sensitivity'] - Export MaskRedactionHook from context/ and top-level __init__.py - Add 19 tests in tests/test_sensitivity.py covering drop mode, redact mode, hook behavior, edge cases, serde roundtrip, and integration Closes #16 --- CHANGELOG.md | 8 + src/contextweaver/__init__.py | 2 + src/contextweaver/config.py | 5 +- src/contextweaver/context/__init__.py | 3 + src/contextweaver/context/manager.py | 7 + src/contextweaver/context/sensitivity.py | 131 ++++++++++ src/contextweaver/types.py | 3 + tests/test_sensitivity.py | 291 +++++++++++++++++++++++ 8 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 src/contextweaver/context/sensitivity.py create mode 100644 tests/test_sensitivity.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ce9836..aaaf171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Sensitivity enforcement in context pipeline: items at or above `ContextPolicy.sensitivity_floor` are dropped or redacted +- `ContextItem.sensitivity` field (default: `Sensitivity.public`) +- `ContextPolicy.sensitivity_action` field (`"drop"` or `"redact"`) +- `MaskRedactionHook` — built-in redaction hook replacing text with `[REDACTED: {sensitivity}]` +- `apply_sensitivity_filter()` function in `context/sensitivity.py` +- `BuildStats.dropped_reasons["sensitivity"]` tracks sensitivity-dropped item count + ## [0.1.1] - 2026-03-03 ### Added diff --git a/src/contextweaver/__init__.py b/src/contextweaver/__init__.py index 37f0b6b..fd352f6 100644 --- a/src/contextweaver/__init__.py +++ b/src/contextweaver/__init__.py @@ -24,6 +24,7 @@ from contextweaver._utils import TfIdfScorer, jaccard from contextweaver.config import ContextBudget, ContextPolicy, ScoringConfig from contextweaver.context.manager import ContextManager +from contextweaver.context.sensitivity import MaskRedactionHook from contextweaver.envelope import ( BuildStats, ChoiceCard, @@ -134,6 +135,7 @@ "StoreBundle", # context engine "ContextManager", + "MaskRedactionHook", # routing engine "Catalog", "ChoiceGraph", diff --git a/src/contextweaver/config.py b/src/contextweaver/config.py index 372dd41..2bef7b1 100644 --- a/src/contextweaver/config.py +++ b/src/contextweaver/config.py @@ -101,7 +101,9 @@ class ContextPolicy: ttl_behavior: How to handle items that have exceeded their TTL. ``"drop"`` removes them; ``"warn"`` keeps them but fires a hook. sensitivity_floor: Items at or above this sensitivity level are - subject to redaction hooks before being included. + dropped or redacted (depending on ``sensitivity_action``). + sensitivity_action: ``"drop"`` (default) removes items at or above + the floor; ``"redact"`` replaces their text via redaction hooks. redaction_hooks: Names of redaction hook implementations to apply, in order. Resolved at runtime by the context manager. """ @@ -116,5 +118,6 @@ class ContextPolicy: ) ttl_behavior: str = "drop" sensitivity_floor: Sensitivity = Sensitivity.confidential + sensitivity_action: str = "drop" redaction_hooks: list[str] = field(default_factory=list) extra: dict[str, Any] = field(default_factory=dict) diff --git a/src/contextweaver/context/__init__.py b/src/contextweaver/context/__init__.py index 112a2e8..b95cd5d 100644 --- a/src/contextweaver/context/__init__.py +++ b/src/contextweaver/context/__init__.py @@ -15,11 +15,14 @@ from contextweaver.context.prompt import render_context, render_item from contextweaver.context.scoring import score_candidates, score_item from contextweaver.context.selection import select_and_pack +from contextweaver.context.sensitivity import MaskRedactionHook, apply_sensitivity_filter __all__ = [ "ContextManager", + "MaskRedactionHook", "apply_firewall", "apply_firewall_to_batch", + "apply_sensitivity_filter", "build_schema_header", "deduplicate_candidates", "generate_candidates", diff --git a/src/contextweaver/context/manager.py b/src/contextweaver/context/manager.py index 2b3c934..6512b24 100644 --- a/src/contextweaver/context/manager.py +++ b/src/contextweaver/context/manager.py @@ -22,6 +22,7 @@ from contextweaver.context.prompt import render_context from contextweaver.context.scoring import score_candidates from contextweaver.context.selection import select_and_pack +from contextweaver.context.sensitivity import apply_sensitivity_filter from contextweaver.envelope import ContextPack, ResultEnvelope from contextweaver.protocols import ( ArtifactStore, @@ -387,6 +388,9 @@ def _build( # 2. Dependency closure candidates, closures = resolve_dependency_closure(candidates, self._event_log) + # 2b. Sensitivity filter + candidates, sensitivity_drops = apply_sensitivity_filter(candidates, self._policy) + # 3. Firewall candidates, envelopes = apply_firewall_to_batch( candidates, self._artifact_store, self._hook @@ -467,6 +471,9 @@ def _build( stats.dedup_removed = dedup_removed stats.dependency_closures = closures stats.header_footer_tokens = hf_tokens + if sensitivity_drops > 0: + stats.dropped_count += sensitivity_drops + stats.dropped_reasons["sensitivity"] = sensitivity_drops # 7. Render prompt = render_context(selected, header=full_header, footer=footer) diff --git a/src/contextweaver/context/sensitivity.py b/src/contextweaver/context/sensitivity.py new file mode 100644 index 0000000..eb7d4a7 --- /dev/null +++ b/src/contextweaver/context/sensitivity.py @@ -0,0 +1,131 @@ +"""Sensitivity enforcement for the contextweaver Context Engine. + +Filters or redacts :class:`~contextweaver.types.ContextItem` objects whose +sensitivity level meets or exceeds the configured +:attr:`~contextweaver.config.ContextPolicy.sensitivity_floor`. + +Two actions are supported: + +* **drop** (default) — the item is silently removed from the candidate list. +* **redact** — the item's text is replaced with a placeholder via a + :class:`~contextweaver.protocols.RedactionHook`. + +The built-in :class:`MaskRedactionHook` replaces the text with +``[REDACTED: {sensitivity}]`` while preserving all other item metadata. +""" + +from __future__ import annotations + +from dataclasses import replace + +from contextweaver.config import ContextPolicy +from contextweaver.protocols import RedactionHook +from contextweaver.types import ContextItem, Sensitivity + +# Ordered severity levels for comparison. +_SENSITIVITY_ORDER: dict[Sensitivity, int] = { + Sensitivity.public: 0, + Sensitivity.internal: 1, + Sensitivity.confidential: 2, + Sensitivity.restricted: 3, +} + +# Built-in hook registry (name → instance). +_BUILTIN_HOOKS: dict[str, RedactionHook] = {} + + +class MaskRedactionHook: + """Replace item text with ``[REDACTED: {sensitivity}]``. + + All other item fields (id, kind, metadata, parent_id, artifact_ref) are + preserved so the item still participates in dependency closure, stats + tracking, and rendering structure. + """ + + def redact(self, item: ContextItem) -> ContextItem: + """Return a copy of *item* with its text replaced by a redaction mask. + + Args: + item: The context item to redact. + + Returns: + A new :class:`ContextItem` with masked text and a minimal + token estimate. + """ + placeholder = f"[REDACTED: {item.sensitivity.value}]" + return replace(item, text=placeholder, token_estimate=len(placeholder) // 4) + + +# Register the built-in hook so it can be referenced by name in +# ContextPolicy.redaction_hooks. +_BUILTIN_HOOKS["mask"] = MaskRedactionHook() + + +def _resolve_hooks(names: list[str]) -> list[RedactionHook]: + """Resolve hook names to instances. + + Args: + names: Hook names from :attr:`ContextPolicy.redaction_hooks`. + + Returns: + Resolved :class:`RedactionHook` instances. + + Raises: + ValueError: If a name cannot be resolved. + """ + hooks: list[RedactionHook] = [] + for name in names: + hook = _BUILTIN_HOOKS.get(name) + if hook is None: + msg = f"Unknown redaction hook {name!r}. Available: {sorted(_BUILTIN_HOOKS)}" + raise ValueError(msg) + hooks.append(hook) + return hooks + + +def apply_sensitivity_filter( + items: list[ContextItem], + policy: ContextPolicy, +) -> tuple[list[ContextItem], int]: + """Filter or redact items whose sensitivity meets or exceeds the policy floor. + + Args: + items: Candidate items to inspect. + policy: The active context policy (provides ``sensitivity_floor``, + ``sensitivity_action``, and ``redaction_hooks``). + + Returns: + A 2-tuple ``(filtered_items, dropped_count)``. In ``"redact"`` mode + *dropped_count* is always ``0`` because items are kept (with masked + text). + """ + floor_level = _SENSITIVITY_ORDER[policy.sensitivity_floor] + action = policy.sensitivity_action + + # Fast path: if the floor is above restricted nothing can be filtered. + if floor_level > _SENSITIVITY_ORDER[Sensitivity.restricted]: + return items, 0 + + # Resolve redaction hooks once (only needed in redact mode). + hooks: list[RedactionHook] = [] + if action == "redact": + hook_names = policy.redaction_hooks or ["mask"] + hooks = _resolve_hooks(hook_names) + + result: list[ContextItem] = [] + dropped = 0 + for item in items: + item_level = _SENSITIVITY_ORDER[item.sensitivity] + if item_level >= floor_level: + if action == "redact": + redacted = item + for hook in hooks: + redacted = hook.redact(redacted) + result.append(redacted) + else: + # Default: drop + dropped += 1 + else: + result.append(item) + + return result, dropped diff --git a/src/contextweaver/types.py b/src/contextweaver/types.py index 8d4b1ae..76ce2f4 100644 --- a/src/contextweaver/types.py +++ b/src/contextweaver/types.py @@ -158,6 +158,7 @@ class ContextItem: kind: ItemKind text: str token_estimate: int = 0 + sensitivity: Sensitivity = Sensitivity.public metadata: dict[str, Any] = field(default_factory=dict) parent_id: str | None = None artifact_ref: ArtifactRef | None = None @@ -169,6 +170,7 @@ def to_dict(self) -> dict[str, Any]: "kind": self.kind.value, "text": self.text, "token_estimate": self.token_estimate, + "sensitivity": self.sensitivity.value, "metadata": dict(self.metadata), "parent_id": self.parent_id, "artifact_ref": self.artifact_ref.to_dict() if self.artifact_ref else None, @@ -183,6 +185,7 @@ def from_dict(cls, data: dict[str, Any]) -> ContextItem: kind=ItemKind(data["kind"]), text=data["text"], token_estimate=int(data.get("token_estimate", 0)), + sensitivity=Sensitivity(data.get("sensitivity", Sensitivity.public.value)), metadata=dict(data.get("metadata", {})), parent_id=data.get("parent_id"), artifact_ref=ArtifactRef.from_dict(artifact_raw) if artifact_raw else None, diff --git a/tests/test_sensitivity.py b/tests/test_sensitivity.py new file mode 100644 index 0000000..58c2c99 --- /dev/null +++ b/tests/test_sensitivity.py @@ -0,0 +1,291 @@ +"""Tests for contextweaver.context.sensitivity.""" + +from __future__ import annotations + +import pytest + +from contextweaver.config import ContextPolicy +from contextweaver.context.manager import ContextManager +from contextweaver.context.sensitivity import ( + _SENSITIVITY_ORDER, + MaskRedactionHook, + apply_sensitivity_filter, +) +from contextweaver.store.event_log import InMemoryEventLog +from contextweaver.types import ContextItem, ItemKind, Phase, Sensitivity + + +def _item( + iid: str, + sensitivity: Sensitivity = Sensitivity.public, + text: str = "hello", + kind: ItemKind = ItemKind.user_turn, +) -> ContextItem: + return ContextItem(id=iid, kind=kind, text=text, sensitivity=sensitivity) + + +# ------------------------------------------------------------------ +# Sensitivity ordering +# ------------------------------------------------------------------ + + +def test_sensitivity_order_monotonic() -> None: + levels = [ + Sensitivity.public, + Sensitivity.internal, + Sensitivity.confidential, + Sensitivity.restricted, + ] + for i in range(len(levels) - 1): + assert _SENSITIVITY_ORDER[levels[i]] < _SENSITIVITY_ORDER[levels[i + 1]] + + +# ------------------------------------------------------------------ +# Drop mode (default) +# ------------------------------------------------------------------ + + +def test_drop_restricted_when_floor_confidential() -> None: + policy = ContextPolicy(sensitivity_floor=Sensitivity.confidential) + items = [ + _item("pub", Sensitivity.public, "public data"), + _item("int", Sensitivity.internal, "internal data"), + _item("conf", Sensitivity.confidential, "confidential data"), + _item("restr", Sensitivity.restricted, "SSN: 123-45-6789"), + ] + filtered, dropped = apply_sensitivity_filter(items, policy) + assert dropped == 2 + kept_ids = {i.id for i in filtered} + assert "pub" in kept_ids + assert "int" in kept_ids + assert "conf" not in kept_ids + assert "restr" not in kept_ids + + +def test_public_internal_pass_through_unmodified() -> None: + policy = ContextPolicy(sensitivity_floor=Sensitivity.confidential) + pub = _item("pub", Sensitivity.public, "public text") + intern = _item("int", Sensitivity.internal, "internal text") + filtered, dropped = apply_sensitivity_filter([pub, intern], policy) + assert dropped == 0 + assert len(filtered) == 2 + assert filtered[0].text == "public text" + assert filtered[1].text == "internal text" + + +def test_floor_restricted_drops_only_restricted() -> None: + policy = ContextPolicy(sensitivity_floor=Sensitivity.restricted) + items = [ + _item("pub", Sensitivity.public), + _item("int", Sensitivity.internal), + _item("conf", Sensitivity.confidential), + _item("restr", Sensitivity.restricted), + ] + filtered, dropped = apply_sensitivity_filter(items, policy) + assert dropped == 1 + kept_ids = {i.id for i in filtered} + assert "pub" in kept_ids + assert "int" in kept_ids + assert "conf" in kept_ids + assert "restr" not in kept_ids + + +def test_drop_mode_records_in_build_stats() -> None: + log = InMemoryEventLog() + log.append(_item("pub", Sensitivity.public, "safe")) + log.append(_item("secret", Sensitivity.restricted, "SSN: 123-45-6789")) + policy = ContextPolicy(sensitivity_floor=Sensitivity.confidential) + mgr = ContextManager(event_log=log, policy=policy) + pack = mgr.build_sync(phase=Phase.answer, query="hello") + assert "SSN" not in pack.prompt + assert pack.stats.dropped_reasons.get("sensitivity", 0) >= 1 + + +# ------------------------------------------------------------------ +# Redact mode +# ------------------------------------------------------------------ + + +def test_redact_mode_replaces_text() -> None: + policy = ContextPolicy( + sensitivity_floor=Sensitivity.confidential, + sensitivity_action="redact", + ) + items = [ + _item("pub", Sensitivity.public, "safe text"), + _item("conf", Sensitivity.confidential, "secret text"), + ] + filtered, dropped = apply_sensitivity_filter(items, policy) + assert dropped == 0 + assert len(filtered) == 2 + assert filtered[0].text == "safe text" + assert filtered[1].text == "[REDACTED: confidential]" + + +def test_redact_mode_preserves_metadata() -> None: + policy = ContextPolicy( + sensitivity_floor=Sensitivity.confidential, + sensitivity_action="redact", + ) + original = ContextItem( + id="s1", + kind=ItemKind.doc_snippet, + text="secret doc content", + sensitivity=Sensitivity.restricted, + metadata={"source": "vault"}, + parent_id="parent1", + ) + filtered, _ = apply_sensitivity_filter([original], policy) + assert len(filtered) == 1 + redacted = filtered[0] + assert redacted.id == "s1" + assert redacted.kind == ItemKind.doc_snippet + assert redacted.text == "[REDACTED: restricted]" + assert redacted.metadata == {"source": "vault"} + assert redacted.parent_id == "parent1" + assert redacted.sensitivity == Sensitivity.restricted + + +def test_redact_mode_with_explicit_mask_hook() -> None: + policy = ContextPolicy( + sensitivity_floor=Sensitivity.restricted, + sensitivity_action="redact", + redaction_hooks=["mask"], + ) + item = _item("r1", Sensitivity.restricted, "top secret") + filtered, dropped = apply_sensitivity_filter([item], policy) + assert dropped == 0 + assert filtered[0].text == "[REDACTED: restricted]" + + +# ------------------------------------------------------------------ +# MaskRedactionHook directly +# ------------------------------------------------------------------ + + +def test_mask_hook_replaces_text() -> None: + hook = MaskRedactionHook() + item = _item("x", Sensitivity.confidential, "original text") + redacted = hook.redact(item) + assert redacted.text == "[REDACTED: confidential]" + assert redacted.id == "x" + assert redacted.sensitivity == Sensitivity.confidential + + +def test_mask_hook_updates_token_estimate() -> None: + hook = MaskRedactionHook() + item = _item("x", Sensitivity.restricted, "very long text " * 100) + redacted = hook.redact(item) + assert redacted.token_estimate == len("[REDACTED: restricted]") // 4 + + +# ------------------------------------------------------------------ +# Edge cases +# ------------------------------------------------------------------ + + +def test_empty_items_list() -> None: + policy = ContextPolicy(sensitivity_floor=Sensitivity.confidential) + filtered, dropped = apply_sensitivity_filter([], policy) + assert filtered == [] + assert dropped == 0 + + +def test_all_items_dropped() -> None: + policy = ContextPolicy(sensitivity_floor=Sensitivity.public) + items = [_item("a", Sensitivity.public), _item("b", Sensitivity.restricted)] + filtered, dropped = apply_sensitivity_filter(items, policy) + assert dropped == 2 + assert filtered == [] + + +def test_unknown_hook_name_raises() -> None: + policy = ContextPolicy( + sensitivity_floor=Sensitivity.confidential, + sensitivity_action="redact", + redaction_hooks=["nonexistent"], + ) + item = _item("x", Sensitivity.confidential) + with pytest.raises(ValueError, match="Unknown redaction hook"): + apply_sensitivity_filter([item], policy) + + +# ------------------------------------------------------------------ +# ContextItem serde roundtrip with sensitivity +# ------------------------------------------------------------------ + + +def test_context_item_to_dict_includes_sensitivity() -> None: + item = _item("s1", Sensitivity.restricted, "secret") + d = item.to_dict() + assert d["sensitivity"] == "restricted" + + +def test_context_item_from_dict_reads_sensitivity() -> None: + data = { + "id": "s1", + "kind": "user_turn", + "text": "hello", + "sensitivity": "confidential", + } + item = ContextItem.from_dict(data) + assert item.sensitivity == Sensitivity.confidential + + +def test_context_item_from_dict_defaults_to_public() -> None: + data = {"id": "s2", "kind": "user_turn", "text": "hello"} + item = ContextItem.from_dict(data) + assert item.sensitivity == Sensitivity.public + + +def test_context_item_roundtrip() -> None: + original = _item("rt", Sensitivity.internal, "internal data") + rebuilt = ContextItem.from_dict(original.to_dict()) + assert rebuilt.sensitivity == Sensitivity.internal + assert rebuilt.id == original.id + assert rebuilt.text == original.text + + +# ------------------------------------------------------------------ +# Integration: manager build excludes sensitive content from prompt +# ------------------------------------------------------------------ + + +def test_build_excludes_restricted_from_prompt() -> None: + log = InMemoryEventLog() + log.append(_item("u1", Sensitivity.public, "safe question")) + log.append( + ContextItem( + id="secret1", + kind=ItemKind.doc_snippet, + text="SSN: 123-45-6789", + sensitivity=Sensitivity.restricted, + ) + ) + policy = ContextPolicy(sensitivity_floor=Sensitivity.confidential) + mgr = ContextManager(event_log=log, policy=policy) + pack = mgr.build_sync(phase=Phase.answer, query="hello") + assert "SSN" not in pack.prompt + assert "safe question" in pack.prompt + + +def test_build_redact_mode_masks_in_prompt() -> None: + log = InMemoryEventLog() + log.append(_item("u1", Sensitivity.public, "safe question")) + log.append( + ContextItem( + id="secret1", + kind=ItemKind.doc_snippet, + text="SSN: 123-45-6789", + sensitivity=Sensitivity.restricted, + ) + ) + policy = ContextPolicy( + sensitivity_floor=Sensitivity.confidential, + sensitivity_action="redact", + ) + mgr = ContextManager(event_log=log, policy=policy) + pack = mgr.build_sync(phase=Phase.answer, query="hello") + assert "SSN" not in pack.prompt + assert "[REDACTED: restricted]" in pack.prompt + assert "safe question" in pack.prompt From bc8893f9b4b1ba122b114d794fe883736c19083c Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 05:25:32 +0000 Subject: [PATCH 02/14] docs: update pipeline descriptions and agent instructions for sensitivity enforcement - AGENTS.md: update context/ pipeline description to include sensitivity filter - CLAUDE.md: note sensitivity.py in module responsibility map - .github/copilot-instructions.md: add sensitivity_filter step to pipeline, add MaskRedactionHook to key types - docs/architecture.md: add sensitivity_filter as pipeline step 3 (now 8 steps) - docs/concepts.md: add Sensitivity Enforcement section --- .github/copilot-instructions.md | 3 ++- AGENTS.md | 2 +- CLAUDE.md | 2 +- docs/architecture.md | 12 +++++++----- docs/concepts.md | 17 +++++++++++++++++ 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index d7badd5..a8c233c 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -6,7 +6,7 @@ This is contextweaver, a Python library for dynamic context management for AI ag - src/contextweaver/ contains the library - Two engines: Context Engine (phase-specific context building) and Routing Engine (bounded-choice tool routing) - All stores (EventLog, ArtifactStore, EpisodicStore, FactStore) are protocol-based with InMemory defaults -- Context pipeline: generate_candidates → score_candidates → deduplicate_candidates → select_and_pack → render_context +- Context pipeline: generate_candidates → sensitivity_filter → apply_firewall → score_candidates → deduplicate_candidates → select_and_pack → render_context - Routing pipeline: TreeBuilder → ChoiceGraph → Router (beam search) → ChoiceCards ## Conventions @@ -30,3 +30,4 @@ This is contextweaver, a Python library for dynamic context management for AI ag - ContextPack (rendered prompt + stats + BuildStats) - ChoiceCard (LLM-friendly compact card, never includes full schemas) - ChoiceGraph (bounded DAG, serializable, validated on load) +- MaskRedactionHook (built-in redaction hook for sensitivity enforcement) diff --git a/AGENTS.md b/AGENTS.md index d93d16d..0b173a5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,7 +18,7 @@ It provides two integrated engines: | `src/contextweaver/serde.py` | Serialisation helpers for to_dict/from_dict patterns | | `src/contextweaver/store/` | InMemoryArtifactStore, InMemoryEventLog, InMemoryEpisodicStore, InMemoryFactStore | | `src/contextweaver/summarize/` | SummarizationRule, RuleEngine, extract_facts() | -| `src/contextweaver/context/` | Full context pipeline: candidates → scoring → dedup → selection → firewall → prompt | +| `src/contextweaver/context/` | Full context pipeline: candidates → sensitivity filter → firewall → scoring → dedup → selection → prompt | | `src/contextweaver/routing/` | Catalog, ChoiceGraph, TreeBuilder, Router (beam search), cards renderer | | `src/contextweaver/adapters/` | MCP and A2A protocol adapters | | `src/contextweaver/__main__.py` | CLI: 7 subcommands (demo, build, route, print-tree, init, ingest, replay) | diff --git a/CLAUDE.md b/CLAUDE.md index d1c0e5a..92a7e28 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,7 +36,7 @@ _utils.py → text similarity (tokenize, jaccard, TfIdfScorer) serde.py → to_dict/from_dict helpers store/ → in-memory data stores (append-only event log, artifact store, …) summarize/ → rule engine + fact extraction -context/ → full context compilation pipeline +context/ → full context compilation pipeline (incl. sensitivity.py for sensitivity enforcement) routing/ → catalog, DAG, beam-search router, card renderer adapters/ → MCP and A2A protocol conversion __main__.py → CLI entry point diff --git a/docs/architecture.md b/docs/architecture.md index 2d731eb..53d6ae7 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -47,16 +47,18 @@ the event log. The pipeline has seven stages: episodic memory and facts into the candidate pool. 2. **dependency_closure** — if a selected item has a `parent_id`, bring the parent along even if it scored lower. -3. **apply_firewall** — large tool results (above threshold) are +3. **sensitivity_filter** — drop or redact items whose `sensitivity` + level meets or exceeds `ContextPolicy.sensitivity_floor`. +4. **apply_firewall** — large tool results (above threshold) are summarised; the raw output is stored in the ArtifactStore and replaced with a compact reference + summary. -4. **score_candidates** — rank candidates by recency, tag match, kind +5. **score_candidates** — rank candidates by recency, tag match, kind priority, and token cost. -5. **deduplicate_candidates** — remove near-duplicate items using Jaccard +6. **deduplicate_candidates** — remove near-duplicate items using Jaccard similarity over tokenised text. -6. **select_and_pack** — greedily pack the highest-scoring candidates +7. **select_and_pack** — greedily pack the highest-scoring candidates into the token budget for the current phase. -7. **render_context** — assemble the final prompt string, grouped by +8. **render_context** — assemble the final prompt string, grouped by section (facts, history, tool results), with `BuildStats` metadata. ## Routing Engine pipeline diff --git a/docs/concepts.md b/docs/concepts.md index 20e5630..7e73e08 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -65,6 +65,23 @@ A `ResultEnvelope` captures the processed output of a tool call: - `views` — optional alternative representations. - `status` — success / error / partial. +## Sensitivity Enforcement + +Each `ContextItem` has a `sensitivity` field (default: `public`) that +classifies its data sensitivity level. The `ContextPolicy.sensitivity_floor` +setting (default: `confidential`) determines which items are subject to +filtering during context compilation. + +Items whose sensitivity level meets or exceeds the floor are either: + +- **Dropped** (`sensitivity_action="drop"`, the default) — removed from + the candidate list before scoring or rendering. +- **Redacted** (`sensitivity_action="redact"`) — text replaced with + `[REDACTED: {sensitivity}]` via the `MaskRedactionHook`, while + preserving all item metadata. + +Dropped or redacted items are recorded in `BuildStats.dropped_reasons["sensitivity"]`. + ## Build Stats Every context build produces a `BuildStats` object that explains exactly From cf51e36b4bdd2ba6fce81fc55461c75ffccfcdb1 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:03:28 +0000 Subject: [PATCH 03/14] fix(context): adjust total_candidates for sensitivity drops in BuildStats select_and_pack() computes total_candidates from the post-filter list, so adding sensitivity_drops to dropped_count without also adjusting total_candidates broke the invariant: dropped_count + included_count <= total_candidates Now both total_candidates and dropped_count are incremented by sensitivity_drops, keeping BuildStats internally consistent. --- src/contextweaver/context/manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/contextweaver/context/manager.py b/src/contextweaver/context/manager.py index 6512b24..713db0f 100644 --- a/src/contextweaver/context/manager.py +++ b/src/contextweaver/context/manager.py @@ -472,6 +472,10 @@ def _build( stats.dependency_closures = closures stats.header_footer_tokens = hf_tokens if sensitivity_drops > 0: + # Account for items dropped by sensitivity filtering in both the + # total candidate count and the drop breakdown so that + # dropped_count + included_count <= total_candidates remains true. + stats.total_candidates += sensitivity_drops stats.dropped_count += sensitivity_drops stats.dropped_reasons["sensitivity"] = sensitivity_drops From d669016a85f4b82661000590cf8bdf6fc731fff8 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:08:25 +0000 Subject: [PATCH 04/14] fix(context): use accumulation pattern for dropped_reasons[sensitivity] Use get(..., 0) + N instead of direct assignment, matching the pattern in selection.py. Defensive against future multi-pass scenarios. --- src/contextweaver/context/manager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/contextweaver/context/manager.py b/src/contextweaver/context/manager.py index 713db0f..706055f 100644 --- a/src/contextweaver/context/manager.py +++ b/src/contextweaver/context/manager.py @@ -477,7 +477,9 @@ def _build( # dropped_count + included_count <= total_candidates remains true. stats.total_candidates += sensitivity_drops stats.dropped_count += sensitivity_drops - stats.dropped_reasons["sensitivity"] = sensitivity_drops + stats.dropped_reasons["sensitivity"] = ( + stats.dropped_reasons.get("sensitivity", 0) + sensitivity_drops + ) # 7. Render prompt = render_context(selected, header=full_header, footer=footer) From 9d282df057ee6d560676bb5c2baa0d54a3fbd02d Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:14:49 +0000 Subject: [PATCH 05/14] fix(context): validate sensitivity_action to reject unknown values Raise ValueError for unrecognised sensitivity_action values (e.g. typos) instead of silently falling through to drop mode. Consistent with the existing _resolve_hooks() validation pattern in the same module. --- src/contextweaver/context/sensitivity.py | 5 +++++ tests/test_sensitivity.py | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/contextweaver/context/sensitivity.py b/src/contextweaver/context/sensitivity.py index eb7d4a7..03175b8 100644 --- a/src/contextweaver/context/sensitivity.py +++ b/src/contextweaver/context/sensitivity.py @@ -102,6 +102,11 @@ def apply_sensitivity_filter( floor_level = _SENSITIVITY_ORDER[policy.sensitivity_floor] action = policy.sensitivity_action + _VALID_ACTIONS = {"drop", "redact"} + if action not in _VALID_ACTIONS: + msg = f"Unknown sensitivity_action {action!r}. Valid: {sorted(_VALID_ACTIONS)}" + raise ValueError(msg) + # Fast path: if the floor is above restricted nothing can be filtered. if floor_level > _SENSITIVITY_ORDER[Sensitivity.restricted]: return items, 0 diff --git a/tests/test_sensitivity.py b/tests/test_sensitivity.py index 58c2c99..a399760 100644 --- a/tests/test_sensitivity.py +++ b/tests/test_sensitivity.py @@ -210,6 +210,16 @@ def test_unknown_hook_name_raises() -> None: apply_sensitivity_filter([item], policy) +def test_unknown_sensitivity_action_raises() -> None: + policy = ContextPolicy( + sensitivity_floor=Sensitivity.confidential, + sensitivity_action="dorp", + ) + item = _item("x", Sensitivity.confidential) + with pytest.raises(ValueError, match="Unknown sensitivity_action"): + apply_sensitivity_filter([item], policy) + + # ------------------------------------------------------------------ # ContextItem serde roundtrip with sensitivity # ------------------------------------------------------------------ From 1c31509801c1bc2549222462858533a575e4fe52 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:18:12 +0000 Subject: [PATCH 06/14] refactor(context): remove unreachable fast-path guard in sensitivity filter The guard checked floor_level > restricted, but restricted is the maximum Sensitivity level, making the branch unreachable dead code. --- src/contextweaver/context/sensitivity.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/contextweaver/context/sensitivity.py b/src/contextweaver/context/sensitivity.py index 03175b8..b7ac587 100644 --- a/src/contextweaver/context/sensitivity.py +++ b/src/contextweaver/context/sensitivity.py @@ -107,10 +107,6 @@ def apply_sensitivity_filter( msg = f"Unknown sensitivity_action {action!r}. Valid: {sorted(_VALID_ACTIONS)}" raise ValueError(msg) - # Fast path: if the floor is above restricted nothing can be filtered. - if floor_level > _SENSITIVITY_ORDER[Sensitivity.restricted]: - return items, 0 - # Resolve redaction hooks once (only needed in redact mode). hooks: list[RedactionHook] = [] if action == "redact": From 369dc5f857afd1021b407cf8cddb6b238a95d9b7 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:19:41 +0000 Subject: [PATCH 07/14] docs: fix BuildStats description only drops are recorded, not redactions --- docs/concepts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/concepts.md b/docs/concepts.md index 7e73e08..d96608f 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -80,7 +80,7 @@ Items whose sensitivity level meets or exceeds the floor are either: `[REDACTED: {sensitivity}]` via the `MaskRedactionHook`, while preserving all item metadata. -Dropped or redacted items are recorded in `BuildStats.dropped_reasons["sensitivity"]`. +Dropped items are recorded in `BuildStats.dropped_reasons["sensitivity"]`. ## Build Stats From d60be54e765f9d50b5267e79da998a659cd49355 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:20:46 +0000 Subject: [PATCH 08/14] docs: fix pipeline stage count seven eight --- docs/architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture.md b/docs/architecture.md index 53d6ae7..371e2eb 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -41,7 +41,7 @@ the "context window problem" for tool-using AI agents. ## Context Engine pipeline The Context Engine compiles a phase-aware, budget-constrained prompt from -the event log. The pipeline has seven stages: +the event log. The pipeline has eight stages: 1. **generate_candidates** — pull events from the event log and inject episodic memory and facts into the candidate pool. From f0dea92ba8282d26342300f760fc98a2dd1497e8 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:42:18 +0000 Subject: [PATCH 09/14] chore: remove test_output.txt and add to .gitignore --- .gitignore | Bin 4959 -> 4992 bytes test_output.txt | 98 ------------------------------------------------ 2 files changed, 98 deletions(-) delete mode 100644 test_output.txt diff --git a/.gitignore b/.gitignore index b7301933407d25467ab8cd0d9a3b79c4aad82eef..78483f1c997dc50fc7bf82813bcbfbc7b5a6b1aa 100644 GIT binary patch delta 41 scmcbw)}X#2URaS!S-~YWsWe?7zqF*Fw1lf9wYVf6!qzLPDB", line 198, in _run_module_as_main - File "", line 88, in _run_code - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Scripts\pytest.exe\__main__.py", line 7, in - sys.exit(console_main()) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\config\__init__.py", line 201, in console_main - code = main() - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\config\__init__.py", line 175, in main - ret: ExitCode | int = config.hook.pytest_cmdline_main(config=config) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_hooks.py", line 512, in __call__ - return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_manager.py", line 120, in _hookexec - return self._inner_hookexec(hook_name, methods, kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_callers.py", line 121, in _multicall - res = hook_impl.function(*args) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\main.py", line 336, in pytest_cmdline_main - return wrap_session(config, _main) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\main.py", line 289, in wrap_session - session.exitstatus = doit(config, session) or 0 - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\main.py", line 343, in _main - config.hook.pytest_runtestloop(session=session) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_hooks.py", line 512, in __call__ - return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_manager.py", line 120, in _hookexec - return self._inner_hookexec(hook_name, methods, kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_callers.py", line 121, in _multicall - res = hook_impl.function(*args) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\main.py", line 367, in pytest_runtestloop - item.config.hook.pytest_runtest_protocol(item=item, nextitem=nextitem) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_hooks.py", line 512, in __call__ - return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_manager.py", line 120, in _hookexec - return self._inner_hookexec(hook_name, methods, kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_callers.py", line 121, in _multicall - res = hook_impl.function(*args) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\runner.py", line 117, in pytest_runtest_protocol - runtestprotocol(item, nextitem=nextitem) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\runner.py", line 136, in runtestprotocol - reports.append(call_and_report(item, "call", log)) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\runner.py", line 245, in call_and_report - call = CallInfo.from_call( - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\runner.py", line 344, in from_call - result: TResult | None = func() - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\runner.py", line 246, in - lambda: runtest_hook(item=item, **kwds), when=when, reraise=reraise - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_hooks.py", line 512, in __call__ - return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_manager.py", line 120, in _hookexec - return self._inner_hookexec(hook_name, methods, kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_callers.py", line 121, in _multicall - res = hook_impl.function(*args) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\runner.py", line 178, in pytest_runtest_call - item.runtest() - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\python.py", line 1671, in runtest - self.ihook.pytest_pyfunc_call(pyfuncitem=self) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_hooks.py", line 512, in __call__ - return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_manager.py", line 120, in _hookexec - return self._inner_hookexec(hook_name, methods, kwargs, firstresult) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\pluggy\_callers.py", line 121, in _multicall - res = hook_impl.function(*args) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\site-packages\_pytest\python.py", line 157, in pytest_pyfunc_call - result = testfunction(**testargs) - File "C:\Users\dandrsantos\Documents\Pessoal\contextweaver\tests\test_cli.py", line 226, in test_replay_preview - result = _run("replay", "--session", str(ingested_path), "--phase", "answer") - File "C:\Users\dandrsantos\Documents\Pessoal\contextweaver\tests\test_cli.py", line 16, in _run - return subprocess.run( - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 550, in run - stdout, stderr = process.communicate(input, timeout=timeout) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1209, in communicate - stdout, stderr = self._communicate(input, endtime, timeout) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1628, in _communicate - self.stdout_thread.join(self._remaining_time(endtime)) - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1149, in join - self._wait_for_tstate_lock() - File "C:\Users\dandrsantos\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1169, in _wait_for_tstate_lock - if lock.acquire(block, timeout): -+++++++++++++++++++++++++++++++++++ Timeout +++++++++++++++++++++++++++++++++++ From 8766508007c9a2abe35912c4d04f32fd77d97522 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:44:32 +0000 Subject: [PATCH 10/14] docs(manager): renumber pipeline steps to include sensitivity filter --- src/contextweaver/context/manager.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/contextweaver/context/manager.py b/src/contextweaver/context/manager.py index 706055f..5797ab1 100644 --- a/src/contextweaver/context/manager.py +++ b/src/contextweaver/context/manager.py @@ -4,11 +4,12 @@ 1. :func:`~contextweaver.context.candidates.generate_candidates` — phase filter 2. :func:`~contextweaver.context.candidates.resolve_dependency_closure` — parent chain expansion -3. :func:`~contextweaver.context.firewall.apply_firewall_to_batch` — raw output interception -4. :func:`~contextweaver.context.scoring.score_candidates` — relevance scoring -5. :func:`~contextweaver.context.dedup.deduplicate_candidates` — near-duplicate removal -6. :func:`~contextweaver.context.selection.select_and_pack` — budget-aware selection -7. :func:`~contextweaver.context.prompt.render_context` — prompt assembly +3. :func:`~contextweaver.context.sensitivity.apply_sensitivity_filter` — sensitivity enforcement (drop/redact) +4. :func:`~contextweaver.context.firewall.apply_firewall_to_batch` — raw output interception +5. :func:`~contextweaver.context.scoring.score_candidates` — relevance scoring +6. :func:`~contextweaver.context.dedup.deduplicate_candidates` — near-duplicate removal +7. :func:`~contextweaver.context.selection.select_and_pack` — budget-aware selection +8. :func:`~contextweaver.context.prompt.render_context` — prompt assembly """ from __future__ import annotations @@ -352,7 +353,7 @@ def _build( ) -> ContextPack: """Run the full context compilation pipeline (synchronous core). - All seven pipeline steps are pure computation, so no ``await`` is + All eight pipeline steps are pure computation, so no ``await`` is needed. Both :meth:`build` (async) and :meth:`build_sync` delegate here. From e582989744084449f91395e0603d30e7501c7fe0 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:48:25 +0000 Subject: [PATCH 11/14] docs(manager): renumber inline step comments 1-8 for consistency --- src/contextweaver/context/manager.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/contextweaver/context/manager.py b/src/contextweaver/context/manager.py index 5797ab1..59e5334 100644 --- a/src/contextweaver/context/manager.py +++ b/src/contextweaver/context/manager.py @@ -389,18 +389,18 @@ def _build( # 2. Dependency closure candidates, closures = resolve_dependency_closure(candidates, self._event_log) - # 2b. Sensitivity filter + # 3. Sensitivity filter candidates, sensitivity_drops = apply_sensitivity_filter(candidates, self._policy) - # 3. Firewall + # 4. Firewall candidates, envelopes = apply_firewall_to_batch( candidates, self._artifact_store, self._hook ) - # 4. Score + # 5. Score scored = score_candidates(candidates, query, _tags, self._scoring) - # 5. Dedup + # 6. Dedup scored, dedup_removed = deduplicate_candidates(scored) # Pre-build episodic + fact injection text so we can estimate its @@ -467,7 +467,7 @@ def _build( else: adjusted = effective_budget - # 6. Select (budget already accounts for header/footer overhead) + # 7. Select (budget already accounts for header/footer overhead) selected, stats = select_and_pack(scored, phase, adjusted, self._policy, self._estimator) stats.dedup_removed = dedup_removed stats.dependency_closures = closures @@ -482,7 +482,7 @@ def _build( stats.dropped_reasons.get("sensitivity", 0) + sensitivity_drops ) - # 7. Render + # 8. Render prompt = render_context(selected, header=full_header, footer=footer) pack = ContextPack(prompt=prompt, stats=stats, phase=phase, envelopes=envelopes) From 97e261858b6e6d40e419729d777af8ac3f1ae823 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:53:26 +0000 Subject: [PATCH 12/14] feat(sensitivity): add register_redaction_hook() for user-extensible hooks --- src/contextweaver/__init__.py | 3 +- src/contextweaver/context/__init__.py | 7 ++++- src/contextweaver/context/sensitivity.py | 31 +++++++++++++++--- tests/test_sensitivity.py | 40 ++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 7 deletions(-) diff --git a/src/contextweaver/__init__.py b/src/contextweaver/__init__.py index fd352f6..89beb57 100644 --- a/src/contextweaver/__init__.py +++ b/src/contextweaver/__init__.py @@ -24,7 +24,7 @@ from contextweaver._utils import TfIdfScorer, jaccard from contextweaver.config import ContextBudget, ContextPolicy, ScoringConfig from contextweaver.context.manager import ContextManager -from contextweaver.context.sensitivity import MaskRedactionHook +from contextweaver.context.sensitivity import MaskRedactionHook, register_redaction_hook from contextweaver.envelope import ( BuildStats, ChoiceCard, @@ -136,6 +136,7 @@ # context engine "ContextManager", "MaskRedactionHook", + "register_redaction_hook", # routing engine "Catalog", "ChoiceGraph", diff --git a/src/contextweaver/context/__init__.py b/src/contextweaver/context/__init__.py index b95cd5d..b4f6a38 100644 --- a/src/contextweaver/context/__init__.py +++ b/src/contextweaver/context/__init__.py @@ -15,7 +15,11 @@ from contextweaver.context.prompt import render_context, render_item from contextweaver.context.scoring import score_candidates, score_item from contextweaver.context.selection import select_and_pack -from contextweaver.context.sensitivity import MaskRedactionHook, apply_sensitivity_filter +from contextweaver.context.sensitivity import ( + MaskRedactionHook, + apply_sensitivity_filter, + register_redaction_hook, +) __all__ = [ "ContextManager", @@ -23,6 +27,7 @@ "apply_firewall", "apply_firewall_to_batch", "apply_sensitivity_filter", + "register_redaction_hook", "build_schema_header", "deduplicate_candidates", "generate_candidates", diff --git a/src/contextweaver/context/sensitivity.py b/src/contextweaver/context/sensitivity.py index b7ac587..c6454e0 100644 --- a/src/contextweaver/context/sensitivity.py +++ b/src/contextweaver/context/sensitivity.py @@ -30,8 +30,29 @@ Sensitivity.restricted: 3, } -# Built-in hook registry (name → instance). -_BUILTIN_HOOKS: dict[str, RedactionHook] = {} +# Hook registry (name → instance). Built-in hooks are registered at +# module load time; users can add their own via :func:`register_redaction_hook`. +_HOOK_REGISTRY: dict[str, RedactionHook] = {} + + +def register_redaction_hook(name: str, hook: RedactionHook) -> None: + """Register a custom :class:`~contextweaver.protocols.RedactionHook`. + + Once registered, the *name* can be used in + :attr:`~contextweaver.config.ContextPolicy.redaction_hooks` just like the + built-in ``"mask"`` hook. + + Args: + name: Short identifier for the hook (e.g. ``"my_custom_hook"``). + hook: An object implementing the :class:`RedactionHook` protocol. + + Raises: + ValueError: If *name* is already registered. + """ + if name in _HOOK_REGISTRY: + msg = f"Redaction hook {name!r} is already registered" + raise ValueError(msg) + _HOOK_REGISTRY[name] = hook class MaskRedactionHook: @@ -58,7 +79,7 @@ def redact(self, item: ContextItem) -> ContextItem: # Register the built-in hook so it can be referenced by name in # ContextPolicy.redaction_hooks. -_BUILTIN_HOOKS["mask"] = MaskRedactionHook() +_HOOK_REGISTRY["mask"] = MaskRedactionHook() def _resolve_hooks(names: list[str]) -> list[RedactionHook]: @@ -75,9 +96,9 @@ def _resolve_hooks(names: list[str]) -> list[RedactionHook]: """ hooks: list[RedactionHook] = [] for name in names: - hook = _BUILTIN_HOOKS.get(name) + hook = _HOOK_REGISTRY.get(name) if hook is None: - msg = f"Unknown redaction hook {name!r}. Available: {sorted(_BUILTIN_HOOKS)}" + msg = f"Unknown redaction hook {name!r}. Available: {sorted(_HOOK_REGISTRY)}" raise ValueError(msg) hooks.append(hook) return hooks diff --git a/tests/test_sensitivity.py b/tests/test_sensitivity.py index a399760..d176b0d 100644 --- a/tests/test_sensitivity.py +++ b/tests/test_sensitivity.py @@ -7,9 +7,11 @@ from contextweaver.config import ContextPolicy from contextweaver.context.manager import ContextManager from contextweaver.context.sensitivity import ( + _HOOK_REGISTRY, _SENSITIVITY_ORDER, MaskRedactionHook, apply_sensitivity_filter, + register_redaction_hook, ) from contextweaver.store.event_log import InMemoryEventLog from contextweaver.types import ContextItem, ItemKind, Phase, Sensitivity @@ -299,3 +301,41 @@ def test_build_redact_mode_masks_in_prompt() -> None: assert "SSN" not in pack.prompt assert "[REDACTED: restricted]" in pack.prompt assert "safe question" in pack.prompt + + +# ------------------------------------------------------------------ +# register_redaction_hook +# ------------------------------------------------------------------ + + +def test_register_custom_hook_and_use_in_redact_mode() -> None: + """A user-registered hook can be referenced by name in ContextPolicy.""" + + class UppercaseHook: + def redact(self, item: ContextItem) -> ContextItem: + from dataclasses import replace as _replace + + return _replace(item, text=item.text.upper()) + + # Register & use + register_redaction_hook("upper_test", UppercaseHook()) + try: + policy = ContextPolicy( + sensitivity_floor=Sensitivity.confidential, + sensitivity_action="redact", + redaction_hooks=["upper_test"], + ) + items = [_item("a", Sensitivity.restricted, text="secret data")] + result, dropped = apply_sensitivity_filter(items, policy) + assert dropped == 0 + assert len(result) == 1 + assert result[0].text == "SECRET DATA" + finally: + # Clean up so other tests are not affected. + del _HOOK_REGISTRY["upper_test"] + + +def test_register_duplicate_hook_raises() -> None: + """Registering a hook with an existing name raises ValueError.""" + with pytest.raises(ValueError, match="already registered"): + register_redaction_hook("mask", MaskRedactionHook()) From 44c9347da2e6dfe7aaf65e471081010c8ea84634 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:55:51 +0000 Subject: [PATCH 13/14] style(sensitivity): move _VALID_ACTIONS to module scope --- src/contextweaver/context/sensitivity.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/contextweaver/context/sensitivity.py b/src/contextweaver/context/sensitivity.py index c6454e0..251a9a3 100644 --- a/src/contextweaver/context/sensitivity.py +++ b/src/contextweaver/context/sensitivity.py @@ -30,6 +30,9 @@ Sensitivity.restricted: 3, } +# Valid values for ContextPolicy.sensitivity_action. +_VALID_ACTIONS: set[str] = {"drop", "redact"} + # Hook registry (name → instance). Built-in hooks are registered at # module load time; users can add their own via :func:`register_redaction_hook`. _HOOK_REGISTRY: dict[str, RedactionHook] = {} @@ -123,7 +126,6 @@ def apply_sensitivity_filter( floor_level = _SENSITIVITY_ORDER[policy.sensitivity_floor] action = policy.sensitivity_action - _VALID_ACTIONS = {"drop", "redact"} if action not in _VALID_ACTIONS: msg = f"Unknown sensitivity_action {action!r}. Valid: {sorted(_VALID_ACTIONS)}" raise ValueError(msg) From 81fcf830fb6b328c12d000b2ca662bbb551e5905 Mon Sep 17 00:00:00 2001 From: dgenio Date: Wed, 4 Mar 2026 06:57:54 +0000 Subject: [PATCH 14/14] fix(manager): shorten docstring line to satisfy E501 --- src/contextweaver/context/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/contextweaver/context/manager.py b/src/contextweaver/context/manager.py index 59e5334..de86540 100644 --- a/src/contextweaver/context/manager.py +++ b/src/contextweaver/context/manager.py @@ -4,7 +4,7 @@ 1. :func:`~contextweaver.context.candidates.generate_candidates` — phase filter 2. :func:`~contextweaver.context.candidates.resolve_dependency_closure` — parent chain expansion -3. :func:`~contextweaver.context.sensitivity.apply_sensitivity_filter` — sensitivity enforcement (drop/redact) +3. :func:`~contextweaver.context.sensitivity.apply_sensitivity_filter` — sensitivity enforcement 4. :func:`~contextweaver.context.firewall.apply_firewall_to_batch` — raw output interception 5. :func:`~contextweaver.context.scoring.score_candidates` — relevance scoring 6. :func:`~contextweaver.context.dedup.deduplicate_candidates` — near-duplicate removal