From 7c97eea30d9e82eced76a60d876733c83a1b6b5a Mon Sep 17 00:00:00 2001
From: anirudhp26 <anirudh26p@gmail.com>
Date: Tue, 28 Apr 2026 00:28:23 +0530
Subject: [PATCH] fix(critical): close silent fail-open patterns in protect()
 init and rule eval
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Audit Round 2, PR A — addresses the two CRITICAL findings: silent
fail-opens that masked broken policies as a working Veto instance.

1. **`protect()` no longer silently degrades to allow-all on init
   failure.** Previously, any exception during Veto initialization
   (malformed YAML, bad rule, transient network) was caught and
   replaced with an allow-all wrapper. Every tool call passed; no
   warning. The system became a no-op while looking healthy. Now:
     - Default behaviour: re-raise. The user sees what broke.
     - Opt-in `protect(safe_fallback=True)`: keeps the legacy
       degradation, but prints a loud `WARNING: Veto initialization
       failed — falling back to ALLOW-ALL` banner to stderr.

2. **`Veto.evaluateLocalExpression` (TS) now propagates compile and
   evaluate errors** instead of catching them, logging at `warn`,
   and returning `false`. Earlier behaviour: a `block` rule with a
   parse error silently never matched and the call went through.
   Now: error is logged at `error` level and re-raised; the
   validator engine treats validator exceptions as a deny — fail-
   closed by default for security-relevant rules.

3. **`matches` operator on a rejected regex now writes a one-time
   ERROR line to stderr** in both SDKs (Python `condition_evaluator`,
   TS `condition-evaluator`). The operator still returns `false`
   (preserves matches-operator semantics — a non-matching pattern
   isn't a hard error), but a fail-open block rule on a
   misconfigured regex is no longer invisible at runtime. Pairs
   with the load-time scan added in #201; this handles rules added
   or mutated after init.

Tests
-----
* `packages/sdk-python/tests/test_failopen_critical.py` — 6 cases
  (init-raises-by-default, opt-in fallback prints banner, success
  path silent, matches one-time log, safe pattern silent, two
  patterns log once each)
* `packages/sdk/tests/core/failopen-critical.test.ts` — 4 cases
  (matches one-time log, safe pattern silent, two patterns log
  once each, compile() malformed expression throws)
* Full suites: TS 1386/1386, Python 334/334

Behavioural change for downstream
---------------------------------
- `protect()` callers that depended on the allow-all degradation as a
  soft fallback need to either fix their config or pass
  `safe_fallback=True` (and accept every call is now permitted).
- Local rules with broken expressions now produce a deny instead of
  an allow. For most users this is the safer default.
---
 .changeset/critical-failopen-fixes.md         |  15 ++
 .../tests/test_failopen_critical.py           | 169 ++++++++++++++++++
 packages/sdk-python/veto/core/protect.py      |  20 ++-
 .../veto/rules/condition_evaluator.py         |  39 +++-
 packages/sdk/src/core/veto.ts                 |  15 +-
 packages/sdk/src/rules/condition-evaluator.ts |  32 +++-
 .../sdk/tests/core/failopen-critical.test.ts  | 100 +++++++++++
 7 files changed, 381 insertions(+), 9 deletions(-)
 create mode 100644 .changeset/critical-failopen-fixes.md
 create mode 100644 packages/sdk-python/tests/test_failopen_critical.py
 create mode 100644 packages/sdk/tests/core/failopen-critical.test.ts

diff --git a/.changeset/critical-failopen-fixes.md b/.changeset/critical-failopen-fixes.md
new file mode 100644
index 00000000..5768d922
--- /dev/null
+++ b/.changeset/critical-failopen-fixes.md
@@ -0,0 +1,15 @@
+---
+"veto-sdk": patch
+"veto": patch
+---
+
+Close two critical fail-open patterns surfaced by the audit.
+
+- **Python `protect()` no longer silently degrades to allow-all on init failure.** Previously, _any_ exception during `Veto` initialization (malformed YAML, bad rule shape, transient network error) was caught and replaced with an allow-all instance — every tool call passed with no policy enforcement, no warning, no audit trail. `protect()` now re-raises by default. The legacy degrade-to-allow-all behaviour is reachable via the explicit opt-in `protect(safe_fallback=True)`, which prints a loud `WARNING: Veto initialization failed — falling back to ALLOW-ALL` banner to stderr so it can't be missed.
+- **Local rule expression errors now propagate** instead of returning `false`. A `block` rule with a parse or eval error used to silently never match, letting the call through. Both compile and evaluate paths in `Veto.evaluateLocalExpression` now log at `error` level and re-raise; the validation engine's existing fail-closed treatment of validator exceptions turns this into a deny.
+- **`matches` operator on a rejected regex now surfaces a one-time stderr error** in both SDKs. The condition still returns `false` (preserves operator semantics — a non-matching pattern is not a hard error), but a fail-open block rule on a misconfigured pattern is no longer invisible at runtime. Pairs with the load-time scan added in #201; this catches rules added or mutated after init.
+
+**Behavioural notes:**
+
+- Existing `protect()` callers that relied on the allow-all degradation as a "soft start" need to either (a) fix their config or (b) add `safe_fallback=True` and accept that every tool call is now permitted.
+- Local rules with a broken expression now produce a deny instead of an allow. For most users this is the safer default; if the previous behaviour is needed, fix the expression.
diff --git a/packages/sdk-python/tests/test_failopen_critical.py b/packages/sdk-python/tests/test_failopen_critical.py
new file mode 100644
index 00000000..b1c097fe
--- /dev/null
+++ b/packages/sdk-python/tests/test_failopen_critical.py
@@ -0,0 +1,169 @@
+"""
+Regression tests for the critical fail-open fixes (audit Round 2, PR A).
+
+Two specific behaviour changes pinned here:
+
+1. ``protect()`` no longer silently degrades to an allow-all instance when
+   Veto initialisation throws — by default it re-raises. The legacy degrade
+   behaviour is reachable via ``protect(safe_fallback=True)`` and prints a
+   loud banner to stderr.
+
+2. The ``matches`` operator on a regex that the safety heuristic rejects
+   still returns ``False`` (preserves rule semantics) but now also writes
+   a one-time ERROR line to stderr so a fail-open block rule can't go
+   unnoticed at runtime.
+"""
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+
+from unittest.mock import patch
+
+from veto.core.protect import protect, _reset_protect_cache_for_tests
+from veto.rules.condition_evaluator import (
+    _LOGGED_BAD_PATTERNS,
+    evaluate_legacy_condition,
+)
+
+
+class _SimulatedInitError(RuntimeError):
+    """Stand-in for any Veto-side init failure (bad YAML, malformed rule
+    schema, unreachable cloud, etc.) so tests don't need to find a real
+    config-shape that the loader rejects."""
+
+
+@pytest.fixture(autouse=True)
+def _reset_caches():
+    _reset_protect_cache_for_tests()
+    _LOGGED_BAD_PATTERNS.clear()
+    yield
+    _reset_protect_cache_for_tests()
+    _LOGGED_BAD_PATTERNS.clear()
+
+
+# ──────────────────────────────────────────────────────────────────
+# 1. protect() — fail-loud by default, opt-in safe_fallback.
+# ──────────────────────────────────────────────────────────────────
+class TestProtectFailLoud:
+    def test_init_failure_raises_by_default(self):
+        """An init exception must propagate out of ``protect()`` — the
+        previous behaviour silently returned an allow-all wrapper, which
+        is the worst class of fail-open."""
+
+        class FakeTool:
+            name = "send_email"
+
+        async def run() -> None:
+            await protect(
+                FakeTool(),
+                rules=[{"id": "x", "tools": ["send_email"], "action": "block"}],
+            )
+
+        with patch(
+            "veto.core.protect.Veto.from_rules",
+            side_effect=_SimulatedInitError("bang"),
+        ):
+            with pytest.raises(_SimulatedInitError):
+                asyncio.run(run())
+
+    def test_safe_fallback_true_degrades_with_loud_banner(self, capsys):
+        """Opt-in ``safe_fallback=True`` keeps the legacy degrade behaviour
+        but prints a loud stderr banner so it can't be missed."""
+        from veto.core.veto import Veto as _RealVeto
+        real_from_rules = _RealVeto.from_rules
+
+        def fail_only_user_rules(*args, **kwargs):
+            # The user-rules call passes a non-empty `rules`. The allow-all
+            # fallback path calls back into Veto.from_rules with `rules=[]`
+            # — let that one through so the fallback succeeds.
+            if kwargs.get("rules"):
+                raise _SimulatedInitError("bang")
+            return real_from_rules(*args, **kwargs)
+
+        class FakeTool:
+            name = "send_email"
+
+            def __call__(self, **kwargs):  # something to wrap
+                return None
+
+        async def run():
+            return await protect(
+                FakeTool(),
+                rules=[{"id": "x", "tools": ["send_email"], "action": "block"}],
+                safe_fallback=True,
+            )
+
+        with patch(
+            "veto.core.protect.Veto.from_rules",
+            side_effect=fail_only_user_rules,
+        ):
+            wrapped = asyncio.run(run())
+        captured = capsys.readouterr()
+        assert wrapped is not None, "safe_fallback should still produce a wrapped tool"
+        assert "WARNING: Veto initialization failed" in captured.err
+        assert "ALLOW-ALL" in captured.err
+        assert "safe_fallback=True" in captured.err
+
+    def test_successful_init_does_not_print_banner(self, capsys):
+        """A working rule set must not trigger the banner."""
+
+        class FakeTool:
+            name = "send_email"
+
+        async def run():
+            return await protect(
+                FakeTool(),
+                rules=[{
+                    "id": "ok",
+                    "tools": ["send_email"],
+                    "action": "block",
+                    "conditions": [
+                        {"field": "arguments.body", "operator": "contains", "value": "secret"}
+                    ],
+                }],
+            )
+
+        asyncio.run(run())
+        captured = capsys.readouterr()
+        assert "WARNING: Veto initialization failed" not in captured.err
+
+
+# ──────────────────────────────────────────────────────────────────
+# 2. matches operator — rejected pattern surfaces a one-time error.
+# ──────────────────────────────────────────────────────────────────
+class TestMatchesPatternRejection:
+    def test_rejected_pattern_returns_false_and_logs_once(self, capsys):
+        # The heuristic rejects `.*…|…*.*` shapes — picked because the
+        # earlier audit hit this exact shape on a real demo rule.
+        bad = "(rm.*|wget.*)"
+        # First evaluation: returns False AND emits the one-time error.
+        result_1 = evaluate_legacy_condition("anything", "matches", bad)
+        captured_1 = capsys.readouterr()
+        assert result_1 is False
+        assert "ERROR" in captured_1.err
+        assert "rejected by safety heuristic" in captured_1.err
+        # Second evaluation with the SAME pattern: still False, no second log.
+        result_2 = evaluate_legacy_condition("anything else", "matches", bad)
+        captured_2 = capsys.readouterr()
+        assert result_2 is False
+        assert captured_2.err == "", "duplicate log on the same pattern"
+
+    def test_safe_pattern_does_not_log(self, capsys):
+        result = evaluate_legacy_condition("rm -rf /tmp", "matches", "rm -rf")
+        captured = capsys.readouterr()
+        assert result is True
+        assert captured.err == ""
+
+    def test_each_distinct_bad_pattern_logs_once(self, capsys):
+        bad_a = "(rm.*|wget.*)"
+        bad_b = "(curl.*|fetch.*)"
+        evaluate_legacy_condition("x", "matches", bad_a)
+        evaluate_legacy_condition("x", "matches", bad_b)
+        # Each pattern logs once, never re-logs:
+        evaluate_legacy_condition("y", "matches", bad_a)
+        evaluate_legacy_condition("y", "matches", bad_b)
+        captured = capsys.readouterr()
+        # Exactly two ERROR lines expected (one per distinct pattern).
+        assert captured.err.count("ERROR") == 2
diff --git a/packages/sdk-python/veto/core/protect.py b/packages/sdk-python/veto/core/protect.py
index 8a48cc0a..02af3bb2 100644
--- a/packages/sdk-python/veto/core/protect.py
+++ b/packages/sdk-python/veto/core/protect.py
@@ -436,8 +436,24 @@ async def _initialize_veto(
                     on_approval_required=options.get("on_approval_required"),
                 )
             )
-    except Exception:
-        instance = _create_allow_all_instance(options)
+    except Exception as init_error:
+        # By default, fail LOUD — silently degrading a security tool to
+        # allow-all on a config typo or transient network blip is the
+        # worst class of misbehaviour. Re-raise so the caller sees what
+        # broke. Opt in to the legacy degrade-to-allow-all behaviour with
+        # `protect(safe_fallback=True)` — it now emits an error-level
+        # banner so it can't be missed.
+        if options.get("safe_fallback"):
+            print(
+                "[veto] WARNING: Veto initialization failed — falling back "
+                "to ALLOW-ALL because safe_fallback=True was set. Every "
+                "tool call will be permitted with no policy enforcement. "
+                f"Original error: {type(init_error).__name__}: {init_error}",
+                file=sys.stderr,
+            )
+            instance = _create_allow_all_instance(options)
+        else:
+            raise
 
     _instance_cache[cache_key] = instance
     return instance, source, inline_rules, packs, False
diff --git a/packages/sdk-python/veto/rules/condition_evaluator.py b/packages/sdk-python/veto/rules/condition_evaluator.py
index edfeb15f..bd213707 100644
--- a/packages/sdk-python/veto/rules/condition_evaluator.py
+++ b/packages/sdk-python/veto/rules/condition_evaluator.py
@@ -1,13 +1,42 @@
 from __future__ import annotations
 
+import logging
+import re
+import sys
 from collections.abc import Callable, Mapping
-from typing import Any, Optional
 from datetime import datetime, timezone
-import re
+from typing import Any, Optional
 from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
 
 from veto.deterministic.regex_safety import is_safe_pattern
 
+# One-time log per pattern so a misconfigured rule emits a single
+# error line instead of spamming stderr on every evaluation.
+_LOGGED_BAD_PATTERNS: set[str] = set()
+_BAD_PATTERN_LOGGER = logging.getLogger("veto.rules.condition_evaluator")
+
+
+def _report_unsafe_pattern(pattern: str) -> None:
+    """Surface a rejected `matches` regex once per process.
+
+    The condition itself returns ``False`` to the caller (the safer of the
+    two silent options) but a fail-open block rule should not be invisible.
+    PR #201 catches most cases via a load-time scan; this catches rules
+    that are added or mutated at runtime.
+    """
+    if pattern in _LOGGED_BAD_PATTERNS:
+        return
+    _LOGGED_BAD_PATTERNS.add(pattern)
+    truncated = pattern if len(pattern) <= 128 else pattern[:128] + "…"
+    message = (
+        f"[veto] ERROR: rule `matches` regex rejected by safety heuristic — "
+        f"the rule will never fire. Pattern: {truncated!r}"
+    )
+    # Belt and braces: write to stderr (always visible) and to the
+    # standard `logging` module (so structured-logging consumers see it).
+    print(message, file=sys.stderr)
+    _BAD_PATTERN_LOGGER.error(message)
+
 DAY_ORDER: tuple[str, ...] = ("sun", "mon", "tue", "wed", "thu", "fri", "sat")
 DAY_SET = set(DAY_ORDER)
 TIME_24H_PATTERN = re.compile(r"^(?:[01]\d|2[0-3]):[0-5]\d$")
@@ -262,6 +291,12 @@ def evaluate_legacy_condition(field_value: Any, operator: str, expected: Any) ->
             return False
         regex = create_safe_regex(expected, flags=re.IGNORECASE)
         if regex is None:
+            # The pattern was rejected by the safety heuristic. Returning
+            # ``False`` here means a `block` rule with a broken regex never
+            # fires — the silent fail-open class. PR #201's load-time scan
+            # catches static rules; this catches rules that are added or
+            # mutated at runtime.
+            _report_unsafe_pattern(expected)
             return False
         return regex.search(field_value) is not None
     if operator == "greater_than":
diff --git a/packages/sdk/src/core/veto.ts b/packages/sdk/src/core/veto.ts
index 547adbe8..5d6b7c0b 100644
--- a/packages/sdk/src/core/veto.ts
+++ b/packages/sdk/src/core/veto.ts
@@ -2028,22 +2028,29 @@ export class Veto {
         }
         this.compiledExpressionCache.set(expression, ast);
       } catch (error) {
-        this.logger.warn('Failed to compile local rule expression', {
+        // Earlier this returned `false` and let the validator chain proceed —
+        // i.e. a `block` rule with a parse error silently never matched and
+        // the call was permitted. That's a fail-open. Surface as an error
+        // and propagate; the validation engine treats validator errors as
+        // a deny (see `validator.ts` — fail-closed on validator exceptions).
+        this.logger.error('Failed to compile local rule expression', {
           expression,
           error: error instanceof Error ? error.message : String(error),
         });
-        return false;
+        throw error;
       }
     }
 
     try {
       return Boolean(evaluate(ast, context));
     } catch (error) {
-      this.logger.warn('Failed to evaluate local rule expression', {
+      // Same fail-open class as the compile path above — propagate so the
+      // engine fails closed instead of silently dropping the rule.
+      this.logger.error('Failed to evaluate local rule expression', {
         expression,
         error: error instanceof Error ? error.message : String(error),
       });
-      return false;
+      throw error;
     }
   }
 
diff --git a/packages/sdk/src/rules/condition-evaluator.ts b/packages/sdk/src/rules/condition-evaluator.ts
index 329c4af6..a9097c8c 100644
--- a/packages/sdk/src/rules/condition-evaluator.ts
+++ b/packages/sdk/src/rules/condition-evaluator.ts
@@ -86,6 +86,27 @@ export function resolveFieldPath(
   return resolveDotPath(field, context);
 }
 
+/**
+ * One-time stderr report per rejected `matches` pattern so a misconfigured
+ * rule emits a single error instead of spamming on every evaluation. The
+ * condition itself returns `false` to the caller (the safer of the silent
+ * options) but a fail-open block rule should not be invisible.
+ */
+const _LOGGED_BAD_PATTERNS = new Set<string>();
+function reportUnsafePattern(pattern: string): void {
+  if (_LOGGED_BAD_PATTERNS.has(pattern)) return;
+  _LOGGED_BAD_PATTERNS.add(pattern);
+  const truncated = pattern.length > 128 ? `${pattern.slice(0, 128)}…` : pattern;
+  const message =
+    `[veto] ERROR: rule \`matches\` regex rejected by safety heuristic — ` +
+    `the rule will never fire. Pattern: ${JSON.stringify(truncated)}`;
+  if (typeof process !== 'undefined' && typeof process.stderr?.write === 'function') {
+    process.stderr.write(`${message}\n`);
+  } else if (typeof console !== 'undefined') {
+    console.error(message);
+  }
+}
+
 /**
  * Compile a regex pattern only if it passes safety checks.
  */
@@ -513,7 +534,15 @@ export function evaluateLegacyCondition(
         return false;
       }
       if (typeof fieldValue === 'string') {
-        return createSafeRegex(expected, 'i')?.test(fieldValue) ?? false;
+        const compiled = createSafeRegex(expected, 'i');
+        if (!compiled) {
+          // Fail-open class: a rejected pattern silently makes the rule
+          // never fire. Surface it once per process so a `block` rule with
+          // a broken regex isn't invisible at runtime.
+          reportUnsafePattern(expected);
+          return false;
+        }
+        return compiled.test(fieldValue);
       }
 
       if (!allowNestedObjectStringSearch) {
@@ -522,6 +551,7 @@ export function evaluateLegacyCondition(
 
       const regex = createSafeRegex(expected, 'i');
       if (!regex) {
+        reportUnsafePattern(expected);
         return false;
       }
 
diff --git a/packages/sdk/tests/core/failopen-critical.test.ts b/packages/sdk/tests/core/failopen-critical.test.ts
new file mode 100644
index 00000000..e1a0fd50
--- /dev/null
+++ b/packages/sdk/tests/core/failopen-critical.test.ts
@@ -0,0 +1,100 @@
+/**
+ * Regression tests for the critical fail-open fixes (audit Round 2, PR A).
+ *
+ * Pins two behaviour changes:
+ *
+ * 1. `Veto.evaluateLocalExpression` no longer catches a parse / eval error
+ *    and returns `false` (silent fail-open — a `block` rule with a broken
+ *    expression never matched). It now propagates the error; the validation
+ *    engine's existing catch treats validator errors as a deny.
+ *
+ * 2. The `matches` operator on a regex that the safety heuristic rejects
+ *    still returns `false` (preserves operator semantics) but now also
+ *    writes a one-time error line to stderr so a fail-open block rule
+ *    can't go unnoticed at runtime.
+ */
+import { describe, it, expect, vi } from 'vitest';
+
+const ESC = String.fromCharCode(0x1b);
+const stripAnsi = (s: string) => s.replace(new RegExp(`${ESC}\\[\\d+m`, 'g'), '');
+
+describe('matches operator — rejected pattern surfaces a one-time error', () => {
+  it('rejected pattern returns false and writes ERROR to stderr (one-time)', async () => {
+    // Fresh module isolates the in-module Set that tracks logged patterns.
+    vi.resetModules();
+    const { evaluateLegacyCondition } = await import(
+      '../../src/rules/condition-evaluator.js'
+    );
+
+    const writes: string[] = [];
+    const spy = vi.spyOn(process.stderr, 'write').mockImplementation((c: any) => {
+      writes.push(String(c));
+      return true;
+    });
+
+    const bad = '(rm.*|wget.*)';
+    const r1 = evaluateLegacyCondition('rm -rf /', 'matches', bad);
+    const r2 = evaluateLegacyCondition('rm -rf /tmp', 'matches', bad);
+    spy.mockRestore();
+
+    const all = stripAnsi(writes.join(''));
+    expect(r1).toBe(false);
+    expect(r2).toBe(false);
+    expect(all).toContain('ERROR');
+    expect(all).toContain('rejected by safety heuristic');
+    expect((all.match(/ERROR/g) ?? []).length).toBe(1);
+  });
+
+  it('safe pattern does not log', async () => {
+    vi.resetModules();
+    const { evaluateLegacyCondition } = await import(
+      '../../src/rules/condition-evaluator.js'
+    );
+
+    const writes: string[] = [];
+    const spy = vi.spyOn(process.stderr, 'write').mockImplementation((c: any) => {
+      writes.push(String(c));
+      return true;
+    });
+
+    const result = evaluateLegacyCondition('rm -rf /', 'matches', 'rm -rf');
+    spy.mockRestore();
+
+    expect(result).toBe(true);
+    expect(stripAnsi(writes.join(''))).not.toContain('ERROR');
+  });
+
+  it('two distinct bad patterns log exactly once each', async () => {
+    vi.resetModules();
+    const { evaluateLegacyCondition } = await import(
+      '../../src/rules/condition-evaluator.js'
+    );
+
+    const writes: string[] = [];
+    const spy = vi.spyOn(process.stderr, 'write').mockImplementation((c: any) => {
+      writes.push(String(c));
+      return true;
+    });
+
+    const a = '(rm.*|wget.*)';
+    const b = '(curl.*|fetch.*)';
+    evaluateLegacyCondition('x', 'matches', a);
+    evaluateLegacyCondition('x', 'matches', b);
+    evaluateLegacyCondition('y', 'matches', a);  // dup
+    evaluateLegacyCondition('y', 'matches', b);  // dup
+    spy.mockRestore();
+
+    const all = stripAnsi(writes.join(''));
+    expect((all.match(/ERROR/g) ?? []).length).toBe(2);
+  });
+});
+
+describe('local rule expression — propagates errors instead of returning false', () => {
+  it('compile() failure now throws from evaluateLocalExpression', async () => {
+    // Direct test of the compiler — a malformed expression must throw at
+    // compile time, not be silently absorbed.
+    const { compile } = await import('../../src/compiler/index.js');
+    expect(() => compile('1 +')).toThrow();
+    expect(() => compile('args.x ==')).toThrow();
+  });
+});