From c72759531870efb4bd21460f77f74fea3c6b07d8 Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Wed, 25 Mar 2026 11:47:13 -0700 Subject: [PATCH 1/7] ci: add Python CI workflow, merge_group triggers, fix extraction docstring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Python CI with 3.10/3.11/3.12 test matrix and PyPI publish - Add merge_group trigger to JS CI for merge queue support - Fix extraction.py docstring (70 → 82 probes) --- .github/workflows/js.yml | 1 + .github/workflows/python.yml | 54 +++++++++++++++++++++++++++ python/agentseal/probes/extraction.py | 2 +- 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/python.yml diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index c09a6d6..d057fac 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -6,6 +6,7 @@ on: branches: [main] pull_request: paths: ["js/**"] + merge_group: defaults: run: diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000..665d684 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,54 @@ +name: Python Package + +on: + push: + paths: ["python/**"] + branches: [main] + pull_request: + paths: ["python/**"] + merge_group: + +defaults: + run: + working-directory: python + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: python/pyproject.toml + - name: Install package and test deps + run: pip install -e . && pip install pytest ruff + - name: Lint (fatal errors only) + run: ruff check . --select=E9,F63,F7,F82 + - name: Run tests + run: python -m pytest tests/ -x -q --tb=short + + publish: + needs: test + if: startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Build + run: pip install build && python -m build + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + # working-directory default does NOT apply to uses actions; + # path is relative to $GITHUB_WORKSPACE + packages-dir: python/dist/ diff --git a/python/agentseal/probes/extraction.py b/python/agentseal/probes/extraction.py index 529d6dc..eeaafff 100644 --- a/python/agentseal/probes/extraction.py +++ b/python/agentseal/probes/extraction.py @@ -1,6 +1,6 @@ # agentseal/probes/extraction.py """ -70 extraction probes - attempt to make the agent reveal its system prompt. +82 extraction probes - attempt to make the agent reveal its system prompt. Layer 2: imports from schemas. """ From 4d39b43ce114b700fd70ad3f8da86915afa5198c Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Wed, 25 Mar 2026 12:01:12 -0700 Subject: [PATCH 2/7] chore: gitignore private infrastructure, remove leaked design docs - Add pipeline scripts, .claude/, docs/superpowers/ to .gitignore - Remove 3 private design specs accidentally committed via PR #21 --- .gitignore | 16 + .../plans/2026-03-24-js-guard-v08-parity.md | 1327 ----------------- .../2026-03-24-guard-v0.8-features-design.md | 506 ------- .../2026-03-24-js-guard-v08-parity-design.md | 515 ------- 4 files changed, 16 insertions(+), 2348 deletions(-) delete mode 100644 docs/superpowers/plans/2026-03-24-js-guard-v08-parity.md delete mode 100644 docs/superpowers/specs/2026-03-24-guard-v0.8-features-design.md delete mode 100644 docs/superpowers/specs/2026-03-24-js-guard-v08-parity-design.md diff --git a/.gitignore b/.gitignore index 7709de7..2406e6c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,19 @@ python/agentseal/probes/rag_poisoning.py python/agentseal/probes/multimodal.py python/agentseal/genome.py python/agentseal/report.py + +# Private infrastructure (multi-agent pipeline, NOT open source) +CLAUDE.md +.claude/ +.pi/ +justfile +ruff.toml +agent.sh +develop.sh +research.sh +work.sh +sync.sh +validate.sh +audit.sh +research/ +docs/superpowers/ diff --git a/docs/superpowers/plans/2026-03-24-js-guard-v08-parity.md b/docs/superpowers/plans/2026-03-24-js-guard-v08-parity.md deleted file mode 100644 index 381ad1e..0000000 --- a/docs/superpowers/plans/2026-03-24-js-guard-v08-parity.md +++ /dev/null @@ -1,1327 +0,0 @@ -# JS/TS Guard v0.8 Feature Parity Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Port all Python Guard v0.8 features to the TypeScript package so `npx agentseal guard` has full feature parity. - -**Architecture:** 4 new modules (project-config, history, registry-client, rules), 7 updated modules (guard-models, guard, deobfuscate, blocklist, mcp-checker, baselines, skill-scanner), CLI guard command. TDD throughout. Each module is self-contained with its own test file. - -**Tech Stack:** TypeScript, Vitest, better-sqlite3 (optional), Node.js 18+ fetch, js-yaml - -**Spec:** `docs/superpowers/specs/2026-03-24-js-guard-v08-parity-design.md` - ---- - -## File Structure - -### New Files -| File | Responsibility | -|------|---------------| -| `src/project-config.ts` | .agentseal.yaml loader, resolution, filtering, guard init | -| `src/history.ts` | SQLite history store, delta computation, path normalization | -| `src/registry-client.ts` | agentseal.org API client, slug generation, enrichment | -| `src/rules.ts` | YAML rule engine, glob matching, guard test | -| `test/project-config.test.ts` | ~35 tests | -| `test/history.test.ts` | ~25 tests | -| `test/registry-client.test.ts` | ~20 tests | -| `test/rules.test.ts` | ~25 tests | -| `test/guard-models-v08.test.ts` | ~15 tests | -| `test/guard-v08.test.ts` | ~15 tests | -| `test/deobfuscate-v08.test.ts` | ~10 tests | - -### Modified Files -| File | Changes | -|------|---------| -| `src/guard-models.ts` | Add UnlistedFinding, CustomFinding, DeltaEntry, DeltaResult, fromDict methods, registry fields | -| `src/guard.ts` | Wire project config, rules, registry, history, delta into run() | -| `src/deobfuscate.ts` | CONFUSABLES map, decodeHtmlEntities, 2-pass pipeline | -| `src/blocklist.ts` | 12 seed hashes, union on load | -| `src/mcp-checker.ts` | 5 new supply chain checks, symlink resolution | -| `src/baselines.ts` | URL + headers in fingerprint | -| `src/skill-scanner.ts` | 3 markdown exfil patterns | -| `src/index.ts` | Export new modules | -| `bin/agentseal.ts` | guard command + init + test subcommands | -| `package.json` | optionalDependencies, version bump | - ---- - -## Task 1: Update guard-models.ts with new types - -**Files:** -- Modify: `js/src/guard-models.ts:68-74` (MCPServerResult), `js/src/guard-models.ts:150-160` (GuardReport) -- Create: `js/test/guard-models-v08.test.ts` - -- [ ] **Step 1: Write failing tests for new types** - -Create `js/test/guard-models-v08.test.ts`: - -```typescript -import { describe, it, expect } from "vitest"; - -// ═══════════════════════════════════════════════════════════════ -// UNLISTED FINDING -// ═══════════════════════════════════════════════════════════════ - -describe("UnlistedFinding", () => { - it("has required fields", () => { - const f: any = { - code: "GUARD-001", - title: "Unlisted agent", - description: "Agent 'cursor' is not in allowed_agents", - severity: "medium", - item_name: "cursor", - item_type: "agent", - }; - expect(f.code).toBe("GUARD-001"); - expect(f.item_type).toBe("agent"); - }); -}); - -// ═══════════════════════════════════════════════════════════════ -// CUSTOM FINDING -// ═══════════════════════════════════════════════════════════════ - -describe("CustomFinding", () => { - it("round-trips through toDict/fromDict", () => { - // Will import CustomFinding helpers once implemented - const cf = { - code: "CUSTOM-001", - title: "Test rule", - severity: "high", - verdict: "danger", - remediation: "Fix it", - rule_file: "/rules/test.yaml", - entity_type: "mcp", - entity_name: "slack-mcp", - }; - // Test fromDict and toDict once available - expect(cf.code).toBe("CUSTOM-001"); - }); -}); - -// ═══════════════════════════════════════════════════════════════ -// DELTA RESULT -// ═══════════════════════════════════════════════════════════════ - -describe("DeltaResult", () => { - it("computes total_new correctly", () => { - // Will use createDeltaResult helper once implemented - expect(true).toBe(true); // placeholder - }); - - it("computes total_resolved correctly", () => { - expect(true).toBe(true); - }); -}); - -// ═══════════════════════════════════════════════════════════════ -// MCPServerResult registry fields -// ═══════════════════════════════════════════════════════════════ - -describe("MCPServerResult registry", () => { - it("includes registry in toDict when score set", () => { - expect(true).toBe(true); - }); - - it("omits registry from toDict when score not set", () => { - expect(true).toBe(true); - }); -}); - -// ═══════════════════════════════════════════════════════════════ -// GuardReport.fromDict -// ═══════════════════════════════════════════════════════════════ - -describe("GuardReport.fromDict", () => { - it("round-trips all fields", () => { - expect(true).toBe(true); - }); -}); -``` - -- [ ] **Step 2: Run tests to verify they pass (placeholders)** - -Run: `cd js && npx vitest run test/guard-models-v08.test.ts` - -- [ ] **Step 3: Add UnlistedFinding and CustomFinding interfaces to guard-models.ts** - -Add after the existing `MCPFinding` interface (~line 55): - -```typescript -export interface UnlistedFinding { - code: string; - title: string; - description: string; - severity: string; - item_name: string; - item_type: string; -} - -export interface CustomFinding { - code: string; - title: string; - severity: string; - verdict: string; - remediation: string; - rule_file: string; - entity_type: string; - entity_name: string; -} - -export function customFindingFromDict(d: Record): CustomFinding { - return { - code: d.code ?? "", - title: d.title ?? "", - severity: d.severity ?? "medium", - verdict: d.verdict ?? "warning", - remediation: d.remediation ?? "", - rule_file: d.rule_file ?? "", - entity_type: d.entity_type ?? "", - entity_name: d.entity_name ?? "", - }; -} - -export function customFindingToDict(f: CustomFinding): Record { - return { ...f }; -} - -export function unlistedFindingToDict(f: UnlistedFinding): Record { - return { ...f }; -} -``` - -- [ ] **Step 4: Add DeltaEntry and DeltaResult** - -Add after CustomFinding: - -```typescript -export interface DeltaEntry { - change_type: string; - entity_type: string; - entity_name: string; - code?: string; - title?: string; - old_verdict?: string; - new_verdict?: string; - severity?: string; -} - -export function deltaEntryToDict(e: DeltaEntry): Record { - const d: Record = { - change_type: e.change_type, - entity_type: e.entity_type, - entity_name: e.entity_name, - }; - if (e.code) d.code = e.code; - if (e.title) d.title = e.title; - if (e.old_verdict) d.old_verdict = e.old_verdict; - if (e.new_verdict) d.new_verdict = e.new_verdict; - if (e.severity) d.severity = e.severity; - return d; -} - -export class DeltaResult { - previous_timestamp: string; - entries: DeltaEntry[]; - - constructor(previous_timestamp: string, entries: DeltaEntry[] = []) { - this.previous_timestamp = previous_timestamp; - this.entries = entries; - } - - get total_new(): number { - return this.entries.filter( - (e) => e.change_type === "new" || e.change_type === "new_entity" - ).length; - } - - get total_resolved(): number { - return this.entries.filter( - (e) => e.change_type === "resolved" || e.change_type === "removed_entity" - ).length; - } - - get total_changed(): number { - return this.entries.filter((e) => e.change_type === "changed").length; - } - - toDict(): Record { - return { - previous_timestamp: this.previous_timestamp, - entries: this.entries.map(deltaEntryToDict), - total_new: this.total_new, - total_resolved: this.total_resolved, - total_changed: this.total_changed, - }; - } -} -``` - -- [ ] **Step 5: Add registry fields to MCPServerResult** - -In the `MCPServerResult` interface (~line 68), add: - -```typescript - registry_score?: number; - registry_level?: string; - registry_findings_count?: number; -``` - -- [ ] **Step 6: Add new fields to GuardReport and fromDict** - -In the `GuardReport` interface (~line 150), add fields as **optional** (to avoid breaking existing Guard.run() until Task 9 wires them in): - -```typescript - unlisted_findings?: UnlistedFinding[]; - custom_findings?: CustomFinding[]; - config_path?: string; -``` - -Add `guardReportFromDict` function: - -```typescript -export function guardReportFromDict(d: Record): GuardReport { - return { - timestamp: d.timestamp ?? "", - duration_seconds: d.duration_seconds ?? 0, - agents_found: d.agents_found ?? [], - skill_results: d.skill_results ?? [], - mcp_results: (d.mcp_results ?? []).map((m: any) => ({ - ...m, - registry_score: m.registry?.score ?? m.registry_score, - registry_level: m.registry?.level ?? m.registry_level, - registry_findings_count: m.registry?.findings_count ?? m.registry_findings_count, - })), - mcp_runtime_results: d.mcp_runtime_results ?? [], - toxic_flows: d.toxic_flows ?? [], - baseline_changes: d.baseline_changes ?? [], - llm_tokens_used: d.llm_tokens_used ?? 0, - unlisted_findings: d.unlisted_findings ?? [], - custom_findings: (d.custom_findings ?? []).map(customFindingFromDict), - config_path: d.config_path ?? "", - }; -} -``` - -- [ ] **Step 7: Replace placeholder tests with real assertions** - -Update `test/guard-models-v08.test.ts` to import and test all new types with real assertions. - -- [ ] **Step 8: Run all tests** - -Run: `cd js && npx vitest run test/guard-models-v08.test.ts` -Expected: All pass - -- [ ] **Step 9: Commit** - -```bash -git add js/src/guard-models.ts js/test/guard-models-v08.test.ts -git commit -m "feat(js): add UnlistedFinding, CustomFinding, DeltaResult types and fromDict helpers" -``` - ---- - -## Task 2: Security hardening - deobfuscate.ts - -**Files:** -- Modify: `js/src/deobfuscate.ts:94-96` (normalizeUnicode), `js/src/deobfuscate.ts:208-219` (deobfuscate) -- Create: `js/test/deobfuscate-v08.test.ts` - -- [ ] **Step 1: Write failing tests** - -Create `js/test/deobfuscate-v08.test.ts`: - -```typescript -import { describe, it, expect } from "vitest"; -import { deobfuscate, normalizeUnicode } from "../src/deobfuscate.js"; - -describe("TR39 confusables", () => { - it("maps Cyrillic a to Latin a", () => { - expect(normalizeUnicode("\u0430")).toBe("a"); - }); - - it("maps Cyrillic C to Latin C", () => { - expect(normalizeUnicode("\u0421")).toBe("C"); - }); - - it("maps fullwidth A to Latin A", () => { - expect(normalizeUnicode("\uff21")).toBe("A"); - }); - - it("maps Greek omicron to Latin o", () => { - expect(normalizeUnicode("\u03bf")).toBe("o"); - }); - - it("maps Turkish dotless i to Latin i", () => { - expect(normalizeUnicode("\u0131")).toBe("i"); - }); - - it("normalizes mixed Cyrillic/Latin word", () => { - // "curl" with Cyrillic с and Latin url - const input = "\u0441url"; - expect(normalizeUnicode(input)).toBe("curl"); - }); -}); - -describe("decodeHtmlEntities", () => { - it("decodes numeric entities", () => { - expect(deobfuscate("curl")).toBe("curl"); - }); - - it("decodes hex entities", () => { - expect(deobfuscate("curl")).toBe("curl"); - }); - - it("decodes named entities", () => { - expect(deobfuscate("& < >")).toBe("& < >"); - }); -}); - -describe("2-pass pipeline", () => { - it("catches base64 inside zero-width split", () => { - // First pass strips zero-width, second pass decodes base64 - const zw = "\u200B"; - const b64 = btoa("curl http://evil.com"); - const obfuscated = b64.slice(0, 4) + zw + b64.slice(4); - const result = deobfuscate(obfuscated); - expect(result).toContain("curl"); - }); -}); -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd js && npx vitest run test/deobfuscate-v08.test.ts` -Expected: FAIL (normalizeUnicode doesn't handle confusables yet, no decodeHtmlEntities) - -- [ ] **Step 3: Add CONFUSABLES map to deobfuscate.ts** - -Add before the `normalizeUnicode` function (~line 90): - -```typescript -const CONFUSABLES = new Map([ - // Cyrillic uppercase - ["\u0410", "A"], ["\u0412", "B"], ["\u0421", "C"], ["\u0415", "E"], - ["\u041d", "H"], ["\u0406", "I"], ["\u0408", "J"], ["\u041a", "K"], - ["\u041c", "M"], ["\u041e", "O"], ["\u0420", "P"], ["\u0405", "S"], - ["\u0422", "T"], ["\u0425", "X"], ["\u0423", "Y"], ["\u0417", "Z"], - // Cyrillic lowercase - ["\u0430", "a"], ["\u0441", "c"], ["\u0435", "e"], ["\u04bb", "h"], - ["\u0456", "i"], ["\u0458", "j"], ["\u043e", "o"], ["\u0440", "p"], - ["\u0455", "s"], ["\u0445", "x"], ["\u0443", "y"], - // Greek uppercase - ["\u0391", "A"], ["\u0392", "B"], ["\u0395", "E"], ["\u0397", "H"], - ["\u0399", "I"], ["\u039a", "K"], ["\u039c", "M"], ["\u039d", "N"], - ["\u039f", "O"], ["\u03a1", "P"], ["\u03a4", "T"], ["\u03a7", "X"], - ["\u03a5", "Y"], ["\u0396", "Z"], - // Greek lowercase - ["\u03bf", "o"], ["\u03b1", "a"], - // Cherokee - ["\u13a0", "D"], ["\u13a1", "R"], ["\u13a2", "T"], ["\u13aa", "G"], - ["\u13b3", "W"], ["\u13d2", "S"], ["\u13da", "S"], - ["\uab4e", "s"], ["\uab4f", "s"], ["\uaba3", "s"], ["\uabaa", "s"], - // Turkish dotless i - ["\u0131", "i"], - // Small caps - ["\u1d00", "A"], ["\u0299", "B"], ["\u1d04", "C"], - // Fullwidth Latin uppercase - ["\uff21", "A"], ["\uff22", "B"], ["\uff23", "C"], ["\uff24", "D"], - ["\uff25", "E"], ["\uff26", "F"], ["\uff27", "G"], ["\uff28", "H"], - ["\uff29", "I"], ["\uff2a", "J"], ["\uff2b", "K"], ["\uff2c", "L"], - ["\uff2d", "M"], ["\uff2e", "N"], ["\uff2f", "O"], ["\uff30", "P"], - ["\uff31", "Q"], ["\uff32", "R"], ["\uff33", "S"], ["\uff34", "T"], - ["\uff35", "U"], ["\uff36", "V"], ["\uff37", "W"], ["\uff38", "X"], - ["\uff39", "Y"], ["\uff3a", "Z"], - // Fullwidth Latin lowercase - ["\uff41", "a"], ["\uff42", "b"], ["\uff43", "c"], ["\uff44", "d"], - ["\uff45", "e"], ["\uff46", "f"], ["\uff47", "g"], ["\uff48", "h"], - ["\uff49", "i"], ["\uff4a", "j"], ["\uff4b", "k"], ["\uff4c", "l"], - ["\uff4d", "m"], ["\uff4e", "n"], ["\uff4f", "o"], ["\uff50", "p"], - ["\uff51", "q"], ["\uff52", "r"], ["\uff53", "s"], ["\uff54", "t"], - ["\uff55", "u"], ["\uff56", "v"], ["\uff57", "w"], ["\uff58", "x"], - ["\uff59", "y"], ["\uff5a", "z"], -]); -``` - -- [ ] **Step 4: Update normalizeUnicode to apply confusables** - -Replace the existing `normalizeUnicode` function: - -```typescript -export function normalizeUnicode(text: string): string { - let result = text.normalize("NFKC"); - let out = ""; - for (const ch of result) { - out += CONFUSABLES.get(ch) ?? ch; - } - return out; -} -``` - -- [ ] **Step 5: Add decodeHtmlEntities function** - -Add before the deobfuscate function: - -```typescript -const NAMED_ENTITIES: Record = { - amp: "&", lt: "<", gt: ">", quot: '"', apos: "'", - nbsp: "\u00A0", copy: "\u00A9", reg: "\u00AE", -}; - -export function decodeHtmlEntities(text: string): string { - return text - .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => - String.fromCodePoint(parseInt(hex, 16)) - ) - .replace(/&#(\d+);/g, (_, dec) => - String.fromCodePoint(parseInt(dec, 10)) - ) - .replace(/&([a-zA-Z]+);/g, (match, name) => - NAMED_ENTITIES[name.toLowerCase()] ?? match - ); -} -``` - -- [ ] **Step 6: Refactor deobfuscate to 2-pass pipeline** - -Replace the existing `deobfuscate` function: - -```typescript -function _deobfuscatePass(text: string): string { - text = stripZeroWidth(text); - text = stripTagChars(text); - text = stripVariationSelectors(text); - text = stripBidiControls(text); - text = stripHtmlComments(text); - text = decodeHtmlEntities(text); - text = normalizeUnicode(text); - text = decodeBase64Blocks(text); - text = unescapeSequences(text); - text = expandStringConcat(text); - return text; -} - -export function deobfuscate(text: string): string { - text = _deobfuscatePass(text); - text = _deobfuscatePass(text); - return text; -} -``` - -- [ ] **Step 7: Export decodeHtmlEntities from deobfuscate.ts and index.ts** - -Add `decodeHtmlEntities` to the exports in `src/index.ts` alongside existing deobfuscate exports. - -- [ ] **Step 8: Run tests** - -Run: `cd js && npx vitest run test/deobfuscate-v08.test.ts` -Expected: All pass - -- [ ] **Step 9: Run full existing deobfuscate tests to verify no regressions** - -Run: `cd js && npx vitest run test/deobfuscate.test.ts` -Expected: All existing tests still pass - -- [ ] **Step 10: Commit** - -```bash -git add js/src/deobfuscate.ts js/test/deobfuscate-v08.test.ts js/src/index.ts -git commit -m "feat(js): add TR39 confusables, HTML entity decoding, 2-pass deobfuscation" -``` - ---- - -## Task 3: Security hardening - blocklist.ts seed hashes - -**Files:** -- Modify: `js/src/blocklist.ts:20-28` (constructor, _hashes), `js/src/blocklist.ts:69-78` (_loadFromFile) - -- [ ] **Step 1: Write failing test** - -Add to bottom of existing `js/test/blocklist.test.ts`: - -```typescript -describe("seed hashes", () => { - it("has 12 seed hashes on construction", () => { - const bl = new Blocklist(mkdtempSync(join(tmpdir(), "bl-"))); - expect(bl.size).toBeGreaterThanOrEqual(12); - }); - - it("recognizes credential-theft-cursorrules hash", () => { - const bl = new Blocklist(mkdtempSync(join(tmpdir(), "bl-"))); - expect(bl.isBlocked("854aa9bd5a641b03fcf2e4a26affb33057af3238a10a83e194c05384f371734f")).toBe(true); - }); - - it("seed hashes survive file load", () => { - const dir = mkdtempSync(join(tmpdir(), "bl-")); - writeFileSync(join(dir, "blocklist.json"), JSON.stringify({ sha256_hashes: ["aaa"], updated: new Date().toISOString() })); - const bl = new Blocklist(dir); - expect(bl.isBlocked("854aa9bd5a641b03fcf2e4a26affb33057af3238a10a83e194c05384f371734f")).toBe(true); - expect(bl.isBlocked("aaa")).toBe(true); - }); -}); -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cd js && npx vitest run test/blocklist.test.ts` - -- [ ] **Step 3: Add SEED_HASHES constant and update constructor** - -In `blocklist.ts`, add before the Blocklist class: - -```typescript -const SEED_HASHES = new Set([ - "854aa9bd5a641b03fcf2e4a26affb33057af3238a10a83e194c05384f371734f", - "46315c1d4dcd39199c6d0e43985c5007c1156bc538e3a82ba9b2883f363eab35", - "0b2ca8fedb87a97de9f5c462e09110febf887516dd62877d7e95a5556ef90905", - "2b5a339d00216894c7bd3620e008e5443f4e30b9e9883a2b15c082d076775084", - "eccb3a65c459a6b69223d38726e3fddb6184a6e7c52935148fdcd84961a6f9df", - "f554a511faaca2431265399a9d5b2f7184778b9521952dc757257dbe0aab2a46", - "323b9121b6e320fb04bae89c963690069c5172dca017469be2917e5feaec886c", - "4826c0e8aef00f902190ab32519e4533b7e4b725f46fb70156705ea8708a7385", - "3951cdb38bbc37e28f98448e0478b93d319d892783efb23462b59fedea52189d", - "a7ddd5ce6c41055b4ef808810ac6f1b09dc4ae05eecc2f89dc64ac4682502d99", - "eab3b7330de3b61fae1b5cba738ae499424e1c45ef1b025c560cca410e6cd16b", - "d71ceee36d1e136a5cddc0d5b416210d94635a71fa90f9ef817f4f74a7b21603", -]); -``` - -In constructor, initialize `_hashes` with seeds: -```typescript -this._hashes = new Set(SEED_HASHES); -``` - -In `_loadFromFile`, change to UNION instead of replace: -```typescript -// OLD: this._hashes = new Set(data.sha256_hashes); -// NEW: -for (const h of (data.sha256_hashes ?? [])) { - this._hashes.add(h); -} -``` - -- [ ] **Step 4: Update existing blocklist tests for seed hashes** - -Existing tests assert `bl.size === 0` for empty blocklists. With 12 seed hashes, these break. Update: -- `expect(bl.size).toBe(0)` -> `expect(bl.size).toBe(12)` (empty cache = seeds only) -- `expect(bl.size).toBe(2)` -> `expect(bl.size).toBe(14)` (2 from cache + 12 seeds) -- Any test that calls `bl.addHashes(["x"])` then checks size: add 12 to expected count - -- [ ] **Step 5: Run tests** - -Run: `cd js && npx vitest run test/blocklist.test.ts` -Expected: All pass (new + updated existing) - -- [ ] **Step 6: Commit** - -```bash -git add js/src/blocklist.ts js/test/blocklist.test.ts -git commit -m "feat(js): add 12 seed hashes to blocklist, union on file load" -``` - ---- - -## Task 4: Security hardening - mcp-checker.ts supply chain + baselines + skill-scanner - -**Files:** -- Modify: `js/src/mcp-checker.ts:294-350` (_checkSupplyChain), `js/src/mcp-checker.ts:154-176` (_checkSensitivePaths) -- Modify: `js/src/baselines.ts:50-61` (configFingerprint) -- Modify: `js/src/skill-scanner.ts` (SKILL-002 patterns) - -- [ ] **Step 1: Write failing tests for new supply chain checks** - -Add to existing `js/test/mcp-checker.test.ts`: - -```typescript -describe("supply chain - bunx", () => { - it("detects unpinned bunx package", () => { - const result = checker.check({ name: "test", command: "bunx", args: ["@scope/pkg"], source_file: "f" }); - expect(result.findings.some((f: any) => f.code === "MCP-007")).toBe(true); - }); -}); - -describe("supply chain - deno", () => { - it("detects unpinned deno module", () => { - const result = checker.check({ name: "test", command: "deno", args: ["run", "npm:pkg"], source_file: "f" }); - expect(result.findings.some((f: any) => f.code === "MCP-007")).toBe(true); - }); -}); - -describe("supply chain - docker", () => { - it("detects docker run with :latest", () => { - const result = checker.check({ name: "test", command: "docker", args: ["run", "myimg:latest"], source_file: "f" }); - expect(result.findings.some((f: any) => f.code === "MCP-007")).toBe(true); - }); - - it("detects docker run without tag", () => { - const result = checker.check({ name: "test", command: "docker", args: ["run", "myimg"], source_file: "f" }); - expect(result.findings.some((f: any) => f.code === "MCP-007")).toBe(true); - }); -}); - -describe("supply chain - pip", () => { - it("detects unpinned pip install", () => { - const result = checker.check({ name: "test", command: "pip", args: ["install", "requests"], source_file: "f" }); - expect(result.findings.some((f: any) => f.code === "MCP-007")).toBe(true); - }); -}); - -describe("supply chain - go", () => { - it("detects unpinned go run", () => { - const result = checker.check({ name: "test", command: "go", args: ["run", "github.com/user/tool"], source_file: "f" }); - expect(result.findings.some((f: any) => f.code === "MCP-007")).toBe(true); - }); -}); -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd js && npx vitest run test/mcp-checker.test.ts` - -- [ ] **Step 3: Add bunx/deno/docker/pip/go checks to _checkSupplyChain** - -In `_checkSupplyChain` method, after the existing npx and uvx checks, add: - -```typescript -// bunx (Bun's npx) -const bunxMatch = allStr.match(/bunx\s+(@?[a-zA-Z0-9_./-]+(?:@[^\s]+)?)/); -if (bunxMatch) { - const pkg = bunxMatch[1]; - const parts = pkg.split("/"); - const last = parts[parts.length - 1] || pkg; - const hasVersion = last.includes("@") && !last.startsWith("@"); - if (!hasVersion) { - findings.push({ - code: "MCP-007", title: "Unpinned bunx package", - description: `Package "${pkg}" has no version pin. Use @version.`, - severity: "medium", remediation: `Pin: bunx ${pkg}@`, - }); - } -} - -// deno run -if (/deno\s+run/.test(allStr)) { - const denoMatch = allStr.match(/deno\s+run\s+(?:--allow-\S+\s+)*(\S+)/); - if (denoMatch) { - const mod = denoMatch[1]; - if (!mod.startsWith(".") && !mod.startsWith("/") && !mod.includes("@")) { - findings.push({ - code: "MCP-007", title: "Unpinned deno module", - description: `Module "${mod}" has no version pin.`, - severity: "medium", remediation: `Pin: ${mod}@`, - }); - } - } -} - -// docker run -const dockerMatch = allStr.match(/docker\s+run\s+(?:-[^\s]+\s+)*([a-zA-Z0-9_./-]+(?::[^\s]+)?)/); -if (dockerMatch) { - const image = dockerMatch[1]; - if (!image.includes(":") || image.endsWith(":latest")) { - findings.push({ - code: "MCP-007", title: "Unpinned Docker image", - description: `Image "${image}" uses no tag or :latest.`, - severity: "medium", remediation: `Pin: ${image.split(":")[0]}:`, - }); - } -} - -// pip install -const pipMatch = allStr.match(/pip3?\s+install\s+([a-zA-Z0-9_.-]+)/); -if (pipMatch) { - const pkg = pipMatch[1]; - if (!["-e", "-r", "--upgrade"].includes(pkg)) { - const after = allStr.split(pkg)[1] || ""; - if (!after.slice(0, 20).includes("==")) { - findings.push({ - code: "MCP-007", title: "Unpinned pip package", - description: `Package "${pkg}" has no ==version pin.`, - severity: "medium", remediation: `Pin: ${pkg}==`, - }); - } - } -} - -// go run -const goMatch = allStr.match(/go\s+run\s+([a-zA-Z0-9_./-]+)/); -if (goMatch) { - const mod = goMatch[1]; - if (!mod.startsWith(".") && !mod.startsWith("/") && !mod.includes("@")) { - findings.push({ - code: "MCP-007", title: "Unpinned Go module", - description: `Module "${mod}" has no @version pin.`, - severity: "medium", remediation: `Pin: ${mod}@`, - }); - } -} -``` - -- [ ] **Step 4: Add symlink resolution to _checkSensitivePaths** - -Add `realpathSync` to the top-level imports at the top of `mcp-checker.ts`: - -```typescript -import { realpathSync } from "node:fs"; -``` - -Then at the start of `_checkSensitivePaths`, resolve symlinks: - -```typescript -const resolvedArgs = args.map((a: string) => { - try { return realpathSync(a); } catch { return a; } -}); -``` - -Use `resolvedArgs` instead of `args` for the sensitive path check. - -- [ ] **Step 5: Update baselines.ts configFingerprint** - -In `configFingerprint` (~line 50), add `url` and `headers` to the existing parts array. Keep the existing `createHash` pattern (do NOT use `sha256` from blocklist): - -```typescript -function configFingerprint(server: Record): string { - const parts = [ - server.command ?? "", - JSON.stringify([...(server.args ?? [])].map(String).sort()), - JSON.stringify(Object.keys(server.env ?? {}).map(String).sort()), - server.url ?? "", - JSON.stringify(Object.keys(server.headers ?? {}).map(String).sort()), - ]; - return createHash("sha256").update(parts.join("|")).digest("hex"); -} -``` - -- [ ] **Step 6: Add markdown exfil patterns to skill-scanner.ts** - -Find the existing SKILL-002 rule in the `PATTERN_RULES` array (~line 58-74). It has a `patterns: RegExp[]` array. **Append** 3 new RegExp entries to this existing array: - -```typescript -// Add to the existing SKILL-002 patterns array: -/!\[.*?\]\(https?:\/\/[^\s)]+\?[^\s)]*(?:data|content|file|secret|key|token|d)=/i, -/]*src=["']https?:\/\/[^"']+\?[^"']*(?:data|content|file|secret|key|token|d)=/i, -/(?:render|display|show|include)\s+(?:an?\s+)?(?:image|img|markdown)\s+(?:tag|link)?\s*.*https?:\/\//i, -``` - -Do NOT create new rule objects — add to the existing SKILL-002 `patterns` array. - -- [ ] **Step 7: Run all modified test files** - -Run: `cd js && npx vitest run test/mcp-checker.test.ts test/baselines.test.ts test/skill-scanner.test.ts` -Expected: All pass - -- [ ] **Step 8: Commit** - -```bash -git add js/src/mcp-checker.ts js/src/baselines.ts js/src/skill-scanner.ts js/test/mcp-checker.test.ts -git commit -m "feat(js): add 5 supply chain checks, URL in fingerprint, markdown exfil patterns" -``` - ---- - -## Task 5: project-config.ts - -**Files:** -- Create: `js/src/project-config.ts` -- Create: `js/test/project-config.test.ts` - -- [ ] **Step 0: Add yaml dependency** - -In `js/package.json`, add to `dependencies`: -```json -"yaml": "^2.4.0" -``` - -Run: `cd js && npm install` - -The `yaml` package has built-in TypeScript types (no separate @types needed). - -- [ ] **Step 1: Write failing tests** - -Create `js/test/project-config.test.ts` with tests for: -- `loadProjectConfig`: valid YAML, invalid fail_on, unknown keys warning, null values coerced -- `resolveProjectConfig`: explicit path, walk-up to .git, HOME boundary, null when nothing found -- `shouldIgnorePath`: segment matching ("node_modules" matches "foo/node_modules/bar") -- `shouldIgnoreFinding`: bare code match, code:path match, maxsplit=1 for colons -- `shouldFail`: all three levels including hasSafe -- `generateUnlistedFindings`: filters not_installed/error agents, empty allowlist = no findings -- `generateConfigYaml`: includes default ignore_paths -- `runGuardInit`: interactive=false creates file, force overwrites - -~35 tests total. Each test creates a temp directory with `mkdtempSync`. - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd js && npx vitest run test/project-config.test.ts` - -- [ ] **Step 3: Implement project-config.ts** - -Create `js/src/project-config.ts` implementing all functions from the spec. Key implementation notes: -- Use `js-yaml` or `yaml` package for YAML parsing (check which is in dependencies, add if needed) -- `resolveProjectConfig` walks parent dirs checking for `.agentseal.yaml`, stops at `.git`, HOME, or root -- `shouldIgnoreFinding` uses `id.indexOf(":")` for first-colon split -- `shouldFail` with "safe" level checks all three booleans -- `generateUnlistedFindings` filters agents by status before checking allowlist -- `generateConfigYaml` uses template strings, defaults `ignore_paths: [node_modules, .git, __pycache__]` -- `runGuardInit` calls `scanMachine()` + `scanDirectory()`, prompts unless `interactive=false` - -- [ ] **Step 4: Run tests** - -Run: `cd js && npx vitest run test/project-config.test.ts` -Expected: All pass - -- [ ] **Step 5: Export from index.ts** - -Add project-config exports to `src/index.ts`. - -- [ ] **Step 6: Commit** - -```bash -git add js/src/project-config.ts js/test/project-config.test.ts js/src/index.ts -git commit -m "feat(js): add project-config module — .agentseal.yaml loader, resolution, filtering" -``` - ---- - -## Task 6: registry-client.ts - -**Files:** -- Create: `js/src/registry-client.ts` -- Create: `js/test/registry-client.test.ts` - -- [ ] **Step 1: Write failing tests** - -Create `js/test/registry-client.test.ts` with tests for: -- `slugify`: "@anthropic/filesystem" -> "anthropic-filesystem", "my_tool" -> "my-tool" -- `extractPackageSlug`: npx, bunx, uvx, pip, docker, bare binary returns null -- `bulkCheck`: mocked fetch returning data, mocked fetch timeout returns {}, empty slugs returns {} -- `enrichMcpResults`: sets registry fields, skips if already set, handles no results - -~20 tests total. Mock `globalThis.fetch` with `vi.fn()`. - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd js && npx vitest run test/registry-client.test.ts` - -- [ ] **Step 3: Implement registry-client.ts** - -Create `js/src/registry-client.ts`: -- `slugify(name)`: lowercase, replace `@scope/` with `scope-`, replace `[^a-z0-9-]` with `-` -- `extractPackageSlug(command)`: regex for npx/bunx/uvx/pip/docker, strip @version, return slugify(pkg) -- `bulkCheck(slugs, apiKey?)`: POST with `AbortSignal.timeout(8000)`, User-Agent header, catch all errors -- `enrichMcpResults(results, apiKey?)`: build slug map from name + command, call bulkCheck, set fields if not already set - -- [ ] **Step 4: Run tests** - -Run: `cd js && npx vitest run test/registry-client.test.ts` -Expected: All pass - -- [ ] **Step 5: Export from index.ts** - -- [ ] **Step 6: Commit** - -```bash -git add js/src/registry-client.ts js/test/registry-client.test.ts js/src/index.ts -git commit -m "feat(js): add registry client for MCP trust score enrichment" -``` - ---- - -## Task 7: rules.ts - -**Files:** -- Create: `js/src/rules.ts` -- Create: `js/test/rules.test.ts` - -- [ ] **Step 1: Write failing tests** - -Create `js/test/rules.test.ts` with tests for: -- `fnmatchCase`: basic `*`, `?`, `[abc]`, `[!abc]`, regex-special char escaping -- `RuleEngine.fromPaths`: valid YAML, missing required fields throws, invalid severity throws, duplicate IDs throws, dir globbing -- `evaluateMcp`: AND across fields, OR within field, coerce string to [string] -- `evaluateSkill`: content truncated to 10240, path matching -- `evaluateAgent`: agent_type matching -- `runTests`: pass and fail cases - -~25 tests total. - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd js && npx vitest run test/rules.test.ts` - -- [ ] **Step 3: Implement fnmatchCase** - -```typescript -export function fnmatchCase(value: string, pattern: string): boolean { - const re = pattern - .replace(/[.+^${}()|\\]/g, "\\$&") // escape regex specials - .replace(/\*/g, ".*") - .replace(/\?/g, "."); - return new RegExp(`^${re}$`, "i").test(value); -} -``` - -Handle `[...]` and `[!...]` by passing them through to regex unchanged (they're valid regex character classes). - -- [ ] **Step 4: Implement RuleEngine class** - -Create `js/src/rules.ts` with: -- YAML loading via js-yaml/yaml -- Validation: required fields, enum values, no duplicate IDs -- `_matchEntity`: AND across fields, OR within field, coerce string to array -- `evaluateMcp/Skill/Agent`: build entity_data dict, filter rules by match.type, call _matchEntity -- `runTests`: iterate rules' tests, call _matchEntity, compare result - -- [ ] **Step 5: Run tests** - -Run: `cd js && npx vitest run test/rules.test.ts` -Expected: All pass - -- [ ] **Step 6: Export from index.ts** - -- [ ] **Step 7: Commit** - -```bash -git add js/src/rules.ts js/test/rules.test.ts js/src/index.ts -git commit -m "feat(js): add YAML community rule engine with glob matching" -``` - ---- - -## Task 8: history.ts - -**Files:** -- Create: `js/src/history.ts` -- Create: `js/test/history.test.ts` - -- [ ] **Step 1: Add better-sqlite3 to optionalDependencies** - -In `js/package.json`, add: -```json -"optionalDependencies": { - "better-sqlite3": "^11.0.0" -}, -``` - -In devDependencies, add: -```json -"@types/better-sqlite3": "^7.6.0" -``` - -Run: `cd js && npm install` - -- [ ] **Step 2: Write failing tests** - -Create `js/test/history.test.ts` with tests for: -- `HistoryStore`: constructor creates DB file, save/loadPrevious round-trip, loadPrevious returns null on first scan, prune removes old entries, prune enforces max_rows, _count helper -- `normalizeSkillPath`: HOME prefix, scanPath prefix, fallback to last 2 segments, Windows separator normalization -- `computeDelta`: new skill, resolved skill, changed verdict, new MCP, removed MCP, new agent, removed agent, agent status filtering - -~25 tests total. Each test uses mkdtempSync for DB path. - -- [ ] **Step 3: Run tests to verify they fail** - -Run: `cd js && npx vitest run test/history.test.ts` - -- [ ] **Step 4: Implement history.ts** - -Create `js/src/history.ts`: - -```typescript -import { createRequire } from "node:module"; -const _require = createRequire(import.meta.url); - -let Database: any = null; -try { - Database = _require("better-sqlite3"); -} catch { - // better-sqlite3 not installed — history features disabled -} -``` - -Note: `createRequire` is needed because the package is ESM (`"type": "module"`). Regular `require()` is not available in ESM. This keeps the synchronous API that `better-sqlite3` is chosen for. - -- `HistoryStore` class: wraps better-sqlite3, creates table + index on construction, save with BEGIN IMMEDIATE, loadPrevious with OFFSET 1, prune on save, _count for tests -- If `Database` is null, constructor returns a no-op stub (save does nothing, loadPrevious returns null) -- `normalizeSkillPath`: replace HOME with `~/`, replace scanPath with relative, fallback last 2 segments, normalize `\` to `/` -- `computeDelta`: compare current vs previous by normalized keys, emit DeltaEntry for each diff - -- [ ] **Step 5: Run tests** - -Run: `cd js && npx vitest run test/history.test.ts` -Expected: All pass (if better-sqlite3 installed) or skip gracefully - -- [ ] **Step 6: Export from index.ts** - -- [ ] **Step 7: Commit** - -```bash -git add js/src/history.ts js/test/history.test.ts js/package.json js/src/index.ts -git commit -m "feat(js): add SQLite history store with delta scanning" -``` - ---- - -## Task 9: Wire everything into guard.ts - -**Files:** -- Modify: `js/src/guard.ts:38-49` (GuardOptions), `js/src/guard.ts:189-263` (run method) -- Create: `js/test/guard-v08.test.ts` - -- [ ] **Step 1: Write integration tests** - -Create `js/test/guard-v08.test.ts` with tests for: -- Guard with project config (ignore_paths filters results) -- Guard with custom rules (produces CustomFinding) -- Guard with --from-json (re-renders existing report) -- Guard with history (saves and computes delta) -- Guard without registry (noRegistry option skips enrichment) -- Guard exit code logic (fail_on levels, ERROR as danger) -- Guard unlisted findings (agents not in allowlist) - -~15 tests. Each creates temp dirs with .agentseal.yaml and test skill files. - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd js && npx vitest run test/guard-v08.test.ts` - -- [ ] **Step 3: Update GuardOptions interface** - -Add to the existing `GuardOptions` interface: - -```typescript - config?: ProjectConfig; - noRegistry?: boolean; - noDiff?: boolean; - rulesPaths?: string[]; - fromJson?: string; - failOn?: string; -``` - -- [ ] **Step 4: Update Guard.run() to wire new modules** - -Update the `run()` method following the flow from the spec: -1. If `fromJson` provided, read file, parse via `guardReportFromDict`, return immediately -2. Resolve project config (resolveProjectConfig or use provided config) -3. Resolve rules paths (options > config > .agentseal/rules/) -4. Run existing discovery (keep raw MCP config dicts) -5. Filter by ignore_paths -6. Scan skills + evaluate custom rules -7. Check MCPs + evaluate custom rules (using raw config dicts) -8. Evaluate custom rules on agents -9. Enrich from registry (unless noRegistry) -10. Generate unlisted findings -11. Existing toxic flows + baselines -12. Apply ignore_findings -13. Save to history (unless noDiff) -14. Compute delta (unless noDiff) -15. Build full GuardReport - -- [ ] **Step 5: Run tests** - -Run: `cd js && npx vitest run test/guard-v08.test.ts` -Expected: All pass - -- [ ] **Step 6: Run ALL existing guard tests for regression** - -Run: `cd js && npx vitest run test/guard.test.ts` -Expected: All existing tests still pass - -- [ ] **Step 7: Commit** - -```bash -git add js/src/guard.ts js/test/guard-v08.test.ts -git commit -m "feat(js): wire project config, rules, registry, history into guard" -``` - ---- - -## Task 10: CLI guard command - -**Files:** -- Modify: `js/bin/agentseal.ts:290-319` (add guard command after compare) - -- [ ] **Step 1: Add guard command to CLI** - -After the existing `compare` command, add: - -```typescript -const guardCmd = program - .command("guard") - .description("Scan machine for AI agent security issues") - .argument("[path]", "directory to scan (default: entire machine)") - .option("--verbose", "show all findings") - .option("--no-registry", "skip agentseal.org enrichment") - .option("--no-diff", "skip delta comparison") - .option("--from-json ", "re-render saved JSON report") - .option("--fail-on ", "exit code threshold: danger|warning|safe") - .option("--rules ", "custom YAML rules path") - .option("--config ", "explicit .agentseal.yaml path") - .option("-o, --output ", "output format: terminal|json|sarif", "terminal") - .option("--save ", "save JSON report to file") - .option("--reset-baselines", "re-trust all MCP servers") - .action(async (scanPath, opts) => { - // Implementation: build GuardOptions from CLI opts, run Guard, render output - }); - -guardCmd - .command("init") - .description("Generate .agentseal.yaml config file") - .option("--force", "overwrite existing config") - .action(async (opts) => { - // Implementation: call runGuardInit - }); - -guardCmd - .command("test") - .description("Validate YAML rules") - .option("--rules ", "rules path (default: .agentseal/rules/)") - .action(async (opts) => { - // Implementation: load rules, run tests, print results - }); -``` - -- [ ] **Step 2: Implement guard action handler** - -The handler should: -1. Print banner -2. If `--from-json`, read file and re-render -3. Build GuardOptions from CLI opts -4. Resolve project config (--config or auto-detect) -5. Create Guard instance and run -6. Render output (terminal/json/sarif) -7. If `--save`, write JSON to file -8. Exit with code based on fail_on - -- [ ] **Step 3: Implement terminal output renderer** - -Create a `_renderGuardTerminal(report, delta?, verbose?)` function with: -- ANSI-aware column padding (strip escape codes for width) -- Section separators and headers -- Color-coded verdicts (green=safe, yellow=warning, red=danger) -- REGISTRY column for MCPs -- DELTA section showing new/resolved/changed -- Summary box - -- [ ] **Step 4: Implement guard init handler** - -Call `runGuardInit({ force: opts.force, interactive: true })`. - -- [ ] **Step 5: Implement guard test handler** - -Load rules from `--rules` or `.agentseal/rules/`, call `engine.runTests()`, print pass/fail table. - -- [ ] **Step 6: Manual test** - -Run: `cd js && npm run build && node dist/agentseal.js guard --help` -Expected: Shows guard command with all options - -Run: `cd js && node dist/agentseal.js guard` -Expected: Scans machine, shows terminal output - -- [ ] **Step 7: Commit** - -```bash -git add js/bin/agentseal.ts -git commit -m "feat(js): add guard CLI command with init, test subcommands" -``` - ---- - -## Task 11: Package updates and final integration - -**Files:** -- Modify: `js/package.json` (version, optionalDependencies) -- Modify: `js/src/index.ts` (all new exports) - -- [ ] **Step 1: Bump version to 0.6.0** - -In `package.json`, change `"version": "0.5.2"` to `"version": "0.6.0"`. - -- [ ] **Step 2: Verify all exports in index.ts** - -Ensure all new modules are exported: -- `project-config.ts`: ProjectConfig, loadProjectConfig, resolveProjectConfig, shouldIgnorePath, shouldIgnoreFinding, shouldFail, generateUnlistedFindings, runGuardInit -- `history.ts`: HistoryStore, normalizeSkillPath, computeDelta -- `registry-client.ts`: slugify, extractPackageSlug, bulkCheck, enrichMcpResults -- `rules.ts`: RuleEngine, fnmatchCase, Rule, RuleTestResult, CustomFinding -- `guard-models.ts`: UnlistedFinding, CustomFinding, DeltaEntry, DeltaResult, guardReportFromDict - -- [ ] **Step 3: Run FULL test suite** - -Run: `cd js && npx vitest run` -Expected: All tests pass (existing + ~150 new) - -- [ ] **Step 4: Build and verify** - -Run: `cd js && npm run build` -Expected: Build succeeds, no type errors - -- [ ] **Step 5: Run typecheck** - -Run: `cd js && npx tsc --noEmit` -Expected: No type errors - -- [ ] **Step 6: Test CLI end-to-end** - -```bash -cd js && node dist/agentseal.js guard -cd js && node dist/agentseal.js guard --output json -cd js && node dist/agentseal.js guard init --force -cd js && node dist/agentseal.js guard test -``` - -- [ ] **Step 7: Commit everything** - -```bash -git add js/ -git commit -m "chore(js): bump to 0.6.0, export all new guard v0.8 modules" -``` - ---- - -## Task 12: Triple verification pass - -**Files:** None (verification only) - -- [ ] **Step 1: Run full test suite 3 times** - -```bash -cd js && npx vitest run && npx vitest run && npx vitest run -``` - -All 3 runs must pass with identical results. - -- [ ] **Step 2: Verify Python/JS parity checklist** - -Check each feature exists in both: -- [ ] project-config: loadProjectConfig, resolveProjectConfig, shouldIgnorePath, shouldIgnoreFinding, shouldFail, generateUnlistedFindings, runGuardInit -- [ ] history: HistoryStore (save, loadPrevious, prune), normalizeSkillPath, computeDelta -- [ ] registry-client: slugify, extractPackageSlug, bulkCheck, enrichMcpResults -- [ ] rules: RuleEngine (fromPaths, evaluateMcp/Skill/Agent, runTests), fnmatchCase -- [ ] guard-models: UnlistedFinding, CustomFinding, DeltaEntry, DeltaResult, fromDict, registry fields -- [ ] deobfuscate: CONFUSABLES (80+), decodeHtmlEntities, 2-pass pipeline -- [ ] blocklist: 12 seed hashes, union on load -- [ ] mcp-checker: bunx, deno, docker, pip, go supply chain checks -- [ ] baselines: URL + headers in fingerprint -- [ ] skill-scanner: 3 markdown exfil patterns -- [ ] CLI: guard, guard init, guard test, all flags - -- [ ] **Step 3: Verify graceful degradation** - -Temporarily rename `node_modules/better-sqlite3` and run: -```bash -cd js && npx vitest run test/guard-v08.test.ts -``` - -Guard tests should still pass (history features disabled, no crash). - -Restore: `mv node_modules/better-sqlite3.bak node_modules/better-sqlite3` - -- [ ] **Step 4: Final commit if any fixes needed** - -```bash -git add js/src/ js/test/ js/bin/ js/package.json -git commit -m "fix(js): triple verification fixes" -``` diff --git a/docs/superpowers/specs/2026-03-24-guard-v0.8-features-design.md b/docs/superpowers/specs/2026-03-24-guard-v0.8-features-design.md deleted file mode 100644 index 33d48e0..0000000 --- a/docs/superpowers/specs/2026-03-24-guard-v0.8-features-design.md +++ /dev/null @@ -1,506 +0,0 @@ -# Guard v0.8 Features Design Spec - -**Date:** 2026-03-24 -**Branch:** `dev/guard-init` -**Scope:** 3 features + version bump to complete the guard 0.8.0 release - ---- - -## Feature 1: MCP Registry Trust Score Enrichment - -### Goal -After local static analysis, enrich MCP results with cloud-scanned trust scores from the AgentSeal registry. Informational only — does not override local verdicts. - -### Flow -1. Guard completes local MCP static analysis (MCP-001 through MCP-013) -2. Collect all MCP server names from `mcp_results` -3. Derive lookup keys from both server `name` AND `command` field (which contains the npm/pypi package name). Send both to improve match rate since local config keys ("fs") rarely match registry slugs ("modelcontextprotocol-server-filesystem"). -4. Call `POST https://agentseal.org/api/v1/mcp/intel/bulk-check` with `{"slugs": [...]}` (deduplicated list of candidate slugs) -5. Enrich each `MCPServerResult` with registry data -6. If API unreachable, times out (5s), or returns 429 → skip silently, log debug message - -### Data Model Changes - -**`guard_models.py` — MCPServerResult:** -```python -registry_score: Optional[float] = None # 0-100 -registry_level: Optional[str] = None # EXCELLENT|HIGH|MEDIUM|LOW|CRITICAL -registry_findings_count: Optional[int] = None -``` - -**`guard_models.py` — GuardReport.to_dict() / from_dict():** -Include registry fields in serialization. SARIF: add as `properties` on MCP results. - -### New Module: `registry_client.py` -```python -import urllib.request, json - -BULK_CHECK_URL = "https://agentseal.org/api/v1/mcp/intel/bulk-check" -TIMEOUT = 8 # seconds (allows for Cloudflare cold starts) - -def slugify(name: str) -> str: - """Derive registry slug from MCP server name.""" - -def extract_package_slug(command: str) -> str | None: - """Extract package name from command (e.g. 'npx @scope/pkg' → 'scope-pkg').""" - -def bulk_check(slugs: list[str], *, api_key: str | None = None) -> dict[str, dict]: - """Call bulk-check endpoint. Returns {slug: {trust_score, trust_level, ...}}. - Skips call if slugs is empty. Ignores unknown slugs in response.""" - -def enrich_mcp_results(results: list[MCPServerResult], *, api_key: str | None = None) -> None: - """Mutate results in-place with registry data. - from_dict() handles missing registry fields with None defaults for backward compat.""" -``` - -Uses `urllib.request` (stdlib) — no new dependencies. - -### CLI Changes -- New flag: `--no-registry` — skip API call (offline/CI mode) -- `AGENTSEAL_API_KEY` env var → passed as Bearer token for Pro tier -- Registry enrichment runs after MCP static analysis, before output rendering - -### Terminal Output -New REGISTRY column in MCP SERVERS section: -``` -MCP SERVERS 3 servers -────────────────────────────────────────────────────────────────────────── -NAME STATUS VERDICT SEVERITY REGISTRY FINDING -filesystem checked WARNING medium 72 HIGH MCP-001: Sensitive path -slack-mcp checked OK — 91 EXCELLENT — -unknown-server checked DANGER critical — NOT FOUND MCP-007: Unpinned pkg -``` - -Registry column color: EXCELLENT/HIGH=green, MEDIUM=yellow, LOW/CRITICAL=red, NOT FOUND=dim. - -### JSON Output -```json -{ - "mcp_results": [{ - "name": "filesystem", - "verdict": "WARNING", - "findings": [...], - "registry": { - "score": 72.0, - "level": "HIGH", - "findings_count": 3 - } - }] -} -``` - -### SARIF Output -Registry data added as `properties.registry` on each MCP result entry. - -### Error Handling -- Network timeout (5s) → skip, no error shown -- HTTP 429 (rate limited) → skip, debug log -- HTTP 4xx/5xx → skip, debug log -- Invalid JSON response → skip, debug log -- No crash, no user-visible error for registry failures - ---- - -## Feature 2: YAML Community Rules + `guard test` - -### Goal -Let users write custom detection rules in YAML. Rules match against MCP servers, skills, or agents using glob patterns. Include a `guard test` subcommand to validate rules against inline test cases. - -### Rule Format - -Rules live in `.agentseal/rules/*.yaml` or can be referenced from `.agentseal.yaml`: -```yaml -# .agentseal.yaml -rules_paths: - - .agentseal/rules/ - - shared-rules/crypto.yaml -``` - -Each rule file: -```yaml -rules: - - id: CUSTOM-001 - title: "Crypto mining MCP server" - description: "Detects MCP servers running cryptocurrency miners" - severity: critical # critical|high|medium|low - verdict: danger # danger|warning - remediation: "Remove this MCP server immediately" - match: - type: mcp # mcp|skill|agent - command: ["*miner*", "*xmrig*"] # glob patterns, OR logic - tests: - - name: "matches xmrig" - input: {type: mcp, command: "npx xmrig-proxy", name: "miner"} - expect: match - - name: "ignores filesystem" - input: {type: mcp, command: "npx @modelcontextprotocol/filesystem", name: "fs"} - expect: no_match -``` - -### Match Fields by Entity Type - -| Entity | Available match fields | -|--------|----------------------| -| `mcp` | `name`, `command`, `args` (joined string), `env_keys`, `env_values`, `source_file` | -| `skill` | `name`, `path`, `content` (first 10KB of file) | -| `agent` | `agent_type`, `name`, `config_path` | - -**Matching logic:** -- Each field value is a list of glob patterns -- Multiple patterns in one field = OR (any match) -- Multiple fields = AND (all must match) -- Glob uses `fnmatch.fnmatchcase()` with explicit `.lower()` normalization on both pattern and value — deterministic case-insensitive matching across all platforms (security tool must not have platform-dependent false negatives) - -### Data Model - -**`guard_models.py`:** -```python -@dataclass -class CustomFinding: - code: str # CUSTOM-001 - title: str - severity: str # critical|high|medium|low - verdict: str # danger|warning - remediation: str - rule_file: str # path to rule YAML that matched - entity_type: str # mcp|skill|agent - entity_name: str # name of matched entity - - def to_dict(self) -> dict: ... - @classmethod - def from_dict(cls, d: dict) -> "CustomFinding": ... - -# Added to GuardReport: -# custom_findings: list[CustomFinding] = field(default_factory=list) -# GuardReport.total_dangers and total_warnings MUST include custom_findings counts. -# GuardReport.to_sarif() MUST include custom findings with CUSTOM-* rule definitions. -# GuardReport.all_actions MUST include custom finding remediations. -``` - -### New Module: `rules.py` - -```python -@dataclass -class Rule: - id: str - title: str - description: str - severity: str - verdict: str - remediation: str - match: dict # {type, field: [patterns]} - tests: list[dict] # inline test cases - source_file: str # which YAML file - -@dataclass -class RuleTestResult: - rule_id: str - test_name: str - passed: bool - expected: str - actual: str - -class RuleEngine: - def __init__(self, rules: list[Rule]): ... - - @classmethod - def from_paths(cls, paths: list[str]) -> "RuleEngine": - """Load and validate rules from YAML files. Validates: required fields, - severity in (critical|high|medium|low), verdict in (danger|warning), - type in (mcp|skill|agent), unique IDs across files. Errors include file path.""" - - def evaluate_mcp(self, server: MCPServerResult, raw_config: dict) -> list[CustomFinding]: - """Evaluate all MCP rules against a server.""" - - def evaluate_skill(self, skill: SkillResult, content: str) -> list[CustomFinding]: - """Evaluate all skill rules against a skill file. - Caller (guard.py) reads file content: UTF-8 errors='replace', first 10240 bytes, - skip files that fail to open.""" - - def evaluate_agent(self, agent: AgentConfigResult) -> list[CustomFinding]: - """Evaluate all agent rules against an agent config.""" - - def run_tests(self) -> list[RuleTestResult]: - """Run inline tests for all loaded rules.""" -``` - -### CLI: `guard test` Subcommand - -```bash -agentseal guard test # test all rules in .agentseal/rules/ -agentseal guard test --rules path/to/rules.yaml # test specific file -``` - -Output: -``` -RULE TESTS 2 rules, 4 tests -────────────────────────────────────────────────────────────────────────── -CUSTOM-001 matches xmrig ............................ PASS -CUSTOM-001 ignores filesystem ....................... PASS -CUSTOM-002 catches shady org ........................ PASS -CUSTOM-002 allows trusted org ....................... FAIL - Expected: no_match, Got: match - -3 passed, 1 failed -``` - -Exit code: 0 if all pass, 1 if any fail. - -### Terminal Output - -New CUSTOM RULES section after MCP SERVERS: -``` -CUSTOM RULES 2 matches -────────────────────────────────────────────────────────────────────────── -RULE ENTITY VERDICT SEVERITY FINDING -CUSTOM-001 miner (mcp) DANGER critical Crypto mining MCP server -CUSTOM-003 deploy.sh (sk) WARNING medium Skill reads SSH keys -``` - -### Integration with ProjectConfig - -Add to `.agentseal.yaml`: -```yaml -rules_paths: - - .agentseal/rules/ # directory → load all *.yaml - - shared/extra-rules.yaml # single file -``` - -Default: if `.agentseal/rules/` exists, load it automatically even without `rules_paths`. - -Add `rules_paths` to `_KNOWN_KEYS` in `project_config.py` to prevent spurious warnings. - -### Guard Orchestration - -Rules evaluate after built-in checks: -1. Discovery → skills → MCP static → toxic flows → baselines → (runtime) -2. → Custom rules evaluation -3. → Registry enrichment -4. → Config filtering (ignore_findings applies to custom findings too) -5. → Output - -Custom findings affect exit code via `fail_on` (same as built-in findings). - ---- - -## Feature 3: GitHub Action (Docker-based) - -### Goal -Provide a GitHub Action that runs `agentseal guard` on PRs and uploads SARIF to GitHub's Code Scanning. - -### User Experience - -```yaml -# .github/workflows/agentseal.yml -name: AgentSeal Guard -on: [pull_request] - -jobs: - guard: - runs-on: ubuntu-latest - permissions: - security-events: write # for SARIF upload - contents: read - steps: - - uses: actions/checkout@v4 - - - uses: agentseal/guard-action@v1 - with: - path: "." - config: ".agentseal.yaml" - fail-on: "danger" - no-registry: "false" - verbose: "false" - env: - AGENTSEAL_API_KEY: ${{ secrets.AGENTSEAL_API_KEY }} - - - uses: github/codeql-action/upload-sarif@v3 - if: always() - with: - sarif_file: agentseal.sarif -``` - -### Files - -All under `/Users/torukmakto/AgentSeal/oss-workspace/.github/actions/guard/`: - -**`action.yml`:** -```yaml -name: "AgentSeal Guard" -description: "Scan your project for AI agent security risks" -branding: - icon: "shield" - color: "blue" -inputs: - path: - description: "Directory to scan" - required: false - default: "." - config: - description: "Path to .agentseal.yaml config" - required: false - fail-on: - description: "Verdict threshold for failure (danger|warning|safe)" - required: false - default: "danger" - no-registry: - description: "Skip registry API enrichment" - required: false - default: "false" - verbose: - description: "Show all items including safe" - required: false - default: "false" -outputs: - sarif-file: - description: "Path to generated SARIF file" - total-dangers: - description: "Number of DANGER findings" - total-warnings: - description: "Number of WARNING findings" - exit-code: - description: "Guard exit code (0=pass, 1=fail)" -runs: - using: "docker" - image: "Dockerfile" - args: - - ${{ inputs.path }} - - ${{ inputs.config }} - - ${{ inputs.fail-on }} - - ${{ inputs.no-registry }} - - ${{ inputs.verbose }} -``` - -**`Dockerfile`:** -```dockerfile -FROM python:3.12-slim -ARG AGENTSEAL_VERSION=0.8.0 -RUN pip install --no-cache-dir agentseal==${AGENTSEAL_VERSION} -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh -ENTRYPOINT ["/entrypoint.sh"] -``` - -**`entrypoint.sh`:** -```bash -#!/bin/bash -set -e - -PATH_ARG="${1:-.}" -CONFIG_ARG="$2" -FAIL_ON="${3:-danger}" -NO_REGISTRY="${4:-false}" -VERBOSE="${5:-false}" - -SARIF_PATH="/github/workspace/agentseal.sarif" -JSON_PATH="/tmp/agentseal-report.json" - -# Build command as array (no eval, no injection risk) -CMD=(agentseal guard --path "$PATH_ARG" --output json --save "$JSON_PATH" --fail-on "$FAIL_ON") -[ -n "$CONFIG_ARG" ] && CMD+=(--config "$CONFIG_ARG") -[ "$NO_REGISTRY" = "true" ] && CMD+=(--no-registry) -[ "$VERBOSE" = "true" ] && CMD+=(--verbose) - -# Single scan run — JSON output captures everything -"${CMD[@]}" || true -EXIT_CODE=${PIPESTATUS[0]:-$?} - -# Convert JSON → SARIF (new --from-json flag or python one-liner) -agentseal guard --from-json "$JSON_PATH" --output sarif --save "$SARIF_PATH" 2>/dev/null || true - -# Terminal output for Actions log (from saved JSON, no re-scan) -agentseal guard --from-json "$JSON_PATH" --output terminal 2>/dev/null || true - -# Write PR summary from saved JSON -echo "## AgentSeal Guard Results" >> "$GITHUB_STEP_SUMMARY" -python3 -c " -import sys, json -r = json.load(open('$JSON_PATH')) -d = r.get('summary', {}) -print(f'| Metric | Count |') -print(f'|--------|-------|') -print(f'| Dangers | {d.get(\"total_dangers\", 0)} |') -print(f'| Warnings | {d.get(\"total_warnings\", 0)} |') -print(f'| Safe | {d.get(\"total_safe\", 0)} |') -" >> "$GITHUB_STEP_SUMMARY" || true - -# Set outputs -echo "sarif-file=$SARIF_PATH" >> "$GITHUB_OUTPUT" -echo "exit-code=$EXIT_CODE" >> "$GITHUB_OUTPUT" - -exit "$EXIT_CODE" -``` - -**Note:** This requires a `--from-json` CLI flag that loads a saved JSON report and re-renders it in a different output format (no re-scan). This is a small addition to cli.py — load JSON, reconstruct GuardReport via `from_dict()`, then render. - -### Key Behaviors -- SARIF file at `agentseal.sarif` in workspace root -- Terminal output in Actions log for human readability -- Markdown summary in PR for quick overview -- Exit code respects `fail-on` threshold -- `AGENTSEAL_API_KEY` env var auto-detected for Pro registry access -- No delta scanning in CI (no persistent history between runs) - ---- - -## Feature 4: Version Bump to 0.8.0 - -Bump version in: -- `python/agentseal/__init__.py` (or wherever `__version__` lives) -- `pyproject.toml` version field -- `package.json` if TypeScript package exists -- SARIF tool version string in `guard_models.py` - -Done last, after all features land and tests pass. - ---- - -## Implementation Order - -1. **Registry client** (`registry_client.py`) — standalone module, easy to test -2. **Registry enrichment wiring** — integrate into guard.py and CLI output -3. **Rule engine** (`rules.py`) — standalone module with inline tests -4. **Rule wiring** — integrate into guard.py, CLI, `guard test` subcommand -5. **GitHub Action** — action.yml, Dockerfile, entrypoint.sh -6. **Version bump** — 0.8.0 - -## Testing Strategy - -### Registry client (`test_registry_client.py`) -- bulk_check with empty slug list → returns empty dict, no HTTP call -- bulk_check with valid slugs → mock 200, verify response parsing -- bulk_check with partial response (some slugs missing) → only found ones enriched -- bulk_check with timeout/429/5xx → returns empty dict, no crash -- bulk_check with invalid JSON response → returns empty dict -- slugify edge cases: empty string, `@scope/name`, unicode, dashes -- extract_package_slug: `npx @scope/pkg`, `uvx pkg`, `docker run img`, bare binary -- enrich_mcp_results with empty list → no-op -- from_dict backward compat: old reports without registry fields → None defaults - -### Rules engine (`test_rules.py`) -- Load valid rule file → correct Rule objects -- Load invalid YAML → clear error with file path -- Load rule with missing required field → validation error -- Load rule with invalid severity/verdict/type → validation error -- Duplicate rule IDs across files → error -- Pattern matching: glob OR logic (multiple patterns in one field) -- Pattern matching: AND logic (multiple fields all must match) -- Case-insensitive matching across platforms (fnmatchcase + lower) -- Rule with no tests field → guard test skips gracefully -- Rule with empty pattern list → no match -- evaluate_mcp / evaluate_skill / evaluate_agent with matching and non-matching entities -- Skill content matching: first 10KB, UTF-8 errors='replace' -- run_tests: all pass, some fail, mixed results - -### Integration -- Guard with custom rules + registry enrichment in single run -- Custom finding suppressed by ignore_findings with CUSTOM-xxx code -- Custom findings affect exit code at all fail_on levels -- total_dangers / total_warnings include custom findings -- SARIF output includes custom findings with CUSTOM-* rule definitions -- JSON output includes both registry data and custom findings -- --from-json renders all output formats from saved report - -### GitHub Action -- Manual test on a fork before publishing -- Verify SARIF schema validity (GitHub Code Scanning accepts it) diff --git a/docs/superpowers/specs/2026-03-24-js-guard-v08-parity-design.md b/docs/superpowers/specs/2026-03-24-js-guard-v08-parity-design.md deleted file mode 100644 index 677da37..0000000 --- a/docs/superpowers/specs/2026-03-24-js-guard-v08-parity-design.md +++ /dev/null @@ -1,515 +0,0 @@ -# JS/TS Guard v0.8 Feature Parity Design Spec - -**Date:** 2026-03-24 -**Status:** Approved -**Scope:** Port all Python Guard v0.8 features to the TypeScript package - -## Goal - -Bring the JS/TS `agentseal` package to full feature parity with Python Guard v0.8. After this work, `npx agentseal guard` should produce identical functionality to `agentseal guard` in Python: project config, delta scanning, registry enrichment, custom YAML rules, and all security hardening. - -## Architecture Overview - -``` -bin/agentseal.ts # CLI: add guard command + subcommands -src/ - project-config.ts # NEW: .agentseal.yaml loader, resolution, filtering - history.ts # NEW: SQLite history store + delta computation - registry-client.ts # NEW: agentseal.org registry API client - rules.ts # NEW: YAML community rule engine - guard.ts # UPDATE: wire new modules into scan flow - guard-models.ts # UPDATE: new types (Custom/Unlisted/Delta) - deobfuscate.ts # UPDATE: TR39 confusables, HTML entities, 2-pass - blocklist.ts # UPDATE: 12 seed hashes - mcp-checker.ts # UPDATE: 5 new supply chain checks - baselines.ts # UPDATE: URL + headers in fingerprint - skill-scanner.ts # UPDATE: markdown image exfil patterns - index.ts # UPDATE: export new modules -test/ - project-config.test.ts # NEW: ~35 tests - history.test.ts # NEW: ~25 tests - registry-client.test.ts # NEW: ~20 tests (mocked HTTP) - rules.test.ts # NEW: ~25 tests - guard-models.test.ts # NEW: ~15 tests (fromDict, delta) - guard-v08.test.ts # NEW: ~15 tests (integration) - deobfuscate-v08.test.ts # NEW: ~10 tests (confusables, entities, 2-pass) -``` - -## New Modules - -### 1. project-config.ts - -Manages `.agentseal.yaml` project-level scanning policy. - -**Interface:** -```typescript -interface ProjectConfig { - fail_on: "danger" | "warning" | "safe"; // default: "danger" - allowed_agents: string[]; // agent type slugs - allowed_mcp_servers: string[]; // "name" or "name@agent_type" - ignore_paths: string[]; // path segments to skip - ignore_findings: IgnoreFindingEntry[]; // [{id, reason?}] - rules_paths: string[]; // YAML rule file/dir paths - config_path: string; // resolved absolute path -} - -interface IgnoreFindingEntry { - id: string; // "CODE" or "CODE:path" - reason?: string; // warn to stderr if missing -} -``` - -**Functions:** -- `loadProjectConfig(path: string): ProjectConfig` — parse YAML, validate fail_on, warn on unknown keys. Handle YAML `null`/`~` values by coercing to empty arrays (`data.get("key") || []` pattern). -- `resolveProjectConfig(opts?: { configPath?: string; searchDir?: string }): ProjectConfig | null` — explicit path > CWD > walk parents to .git/HOME/root -- `shouldIgnorePath(config: ProjectConfig, path: string): boolean` — any path segment in ignore_paths -- `shouldIgnoreFinding(config: ProjectConfig, code: string, path?: string): boolean` — match bare code or code:path. Split on `:` with maxsplit=1 (i.e. `id.indexOf(":")` for first colon only) to handle codes like `MCP-CVE:file.json`. -- `shouldFail(failOn: string, verdicts: { hasDanger: boolean; hasWarning: boolean; hasSafe?: boolean }): boolean` — `danger` returns hasDanger, `warning` returns hasDanger||hasWarning, `safe` returns hasDanger||hasWarning||hasSafe. ERROR verdicts treated as danger. -- `generateUnlistedFindings(config: ProjectConfig, agents: AgentConfigResult[], mcpServers: Record[]): UnlistedFinding[]` — filter out agents with status `not_installed` or `error` before checking allowlists. Only generate findings if the respective allowlist is non-empty. -- `generateConfigYaml(agents: AgentConfigResult[], mcpServers: Record[]): string` — include sensible defaults for `ignore_paths`: `["node_modules", ".git", "__pycache__"]`. -- `runGuardInit(opts?: { targetDir?: string; force?: boolean; interactive?: boolean }): boolean` — `interactive` defaults to true, set false in tests to suppress prompts. - -**Resolution order:** explicit --config > .agentseal.yaml in searchDir > walk parent dirs up to .git, HOME, or fs root. First match wins, no merging. - -**YAML schema:** -```yaml -fail_on: danger -allowed_agents: [] -allowed_mcp_servers: [] -ignore_paths: [] -ignore_findings: [] -rules_paths: [] -``` - -Known keys: `fail_on`, `allowed_agents`, `allowed_mcp_servers`, `ignore_paths`, `ignore_findings`, `rules_paths`. Unknown keys produce stderr warning. Missing `reason` on ignore_findings produces stderr warning. - -### 2. history.ts - -SQLite-backed scan history with delta/diff computation. Uses `better-sqlite3` for synchronous API matching Python's sqlite3. - -**Schema:** -```sql -CREATE TABLE IF NOT EXISTS guard_scans ( - id INTEGER PRIMARY KEY, - timestamp TEXT NOT NULL, - scan_path TEXT, - report_json TEXT NOT NULL -); -CREATE INDEX IF NOT EXISTS idx_scope ON guard_scans(scan_path, timestamp); -``` - -**Class: HistoryStore** -```typescript -class HistoryStore { - constructor(dbPath?: string, maxRows?: number, retentionDays?: number) - // dbPath default: ~/.agentseal/history.db - // maxRows default: 1000 - // retentionDays default: 90 - - save(report: GuardReport, scanPath?: string): void - // Normalize scanPath via path.resolve(). BEGIN IMMEDIATE transaction, insert report_json. - // Calls prune() after save. - - loadPrevious(scanPath?: string): GuardReport | null - // SELECT ... ORDER BY timestamp DESC LIMIT 1 OFFSET 1 - // Returns null on any error (sqlite, json parse, key error). Logs warning to stderr. - - prune(): void - // DELETE older than retentionDays, DELETE beyond maxRows (by timestamp DESC) - - _count(): number - // SELECT COUNT(*) — exposed for test assertions - - close(): void -} -``` - -**Functions:** -- `normalizeSkillPath(path: string, scanPath?: string): string` — normalize order: (1) HOME prefix becomes `~/remainder`, (2) scanPath prefix becomes relative path, (3) fallback: last 2 path segments. Normalize path separators to `/` on all platforms (Windows compat). -- `computeDelta(current: GuardReport, previous: GuardReport, scanPath?: string): DeltaResult` - - Skills: key by normalizeSkillPath(path), detect new/resolved/changed findings - - MCPs: key by `name:normalizeSkillPath(source_file)`, same logic - - Agents: filter status found/installed_no_config only, new_entity/removed_entity only (no findings on agents) - -**Graceful degradation:** `better-sqlite3` listed as `optionalDependencies` in package.json (not `dependencies`). Import wrapped in try/catch. If unavailable, HistoryStore constructor returns a no-op stub. Guard command works without history, just no delta output. Single warning logged to stderr on first use. - -### 3. registry-client.ts - -Client for agentseal.org MCP trust score enrichment. - -**Functions:** -- `slugify(name: string): string` — lowercase, @scope/ becomes scope-, non-alnum becomes dash -- `extractPackageSlug(command: string): string | null` — parse npx/bunx/uvx/pip/docker commands, strip @version. Returns null for bare binaries, empty, or unparseable commands. -- `bulkCheck(slugs: string[], apiKey?: string): Promise>` — POST to `https://agentseal.org/api/v1/mcp/intel/bulk-check`, User-Agent: `agentseal-guard/0.8`, 8s timeout via AbortController, returns {} on any error. Uses `globalThis.fetch` (Node 18+). -- `enrichMcpResults(results: MCPServerResult[], apiKey?: string): Promise` — in-place mutation. Builds slug map from both name_slug and cmd_slug per result. Calls bulkCheck. Sets registry_score/level/findings_count. Skips if `registry_score` already set (prevents double-enrichment on multi-slug match). - -**Error handling:** All errors (timeout, network, parse) caught silently, return empty. Guard works fully offline. - -### 4. rules.ts - -YAML community rule engine with glob matching. - -**Interfaces:** -```typescript -interface Rule { - id: string; - title: string; - description: string; - severity: "critical" | "high" | "medium" | "low"; - verdict: "danger" | "warning"; - remediation: string; - match: Record; // YAML may produce string or string[] - tests: RuleTest[]; - source_file: string; -} - -interface RuleTest { - name: string; - input: Record; - expect: "match" | "no_match"; -} - -interface RuleTestResult { - rule_id: string; - test_name: string; - passed: boolean; - expected: string; - actual: string; -} -``` - -**Class: RuleEngine** -```typescript -class RuleEngine { - static fromPaths(paths: string[]): RuleEngine - // Resolve paths: files kept, dirs globbed for *.yaml/*.yml - // Validate: required fields (id, title, severity, verdict, match), severity/verdict enum, match.type enum - // No duplicate IDs across files. Errors include source file path in message. - // Skip files without "rules" key. - - evaluateMcp(server: MCPServerResult, rawConfig: Record): CustomFinding[] - // entity_data: { name, command, args (space-joined), env_keys (space-joined), env_values (space-joined), source_file } - - evaluateSkill(skill: SkillResult, content: string): CustomFinding[] - // entity_data: { name, path, content (truncated to 10240 chars) } - - evaluateAgent(agent: AgentConfigResult): CustomFinding[] - // entity_data: { agent_type, name, config_path } - - // All use _matchEntity: AND across fields, OR within each field - // Coerce non-array match values to [value] before matching - // Null/undefined entity values treated as "" - - runTests(): RuleTestResult[] - // Execute each rule's tests, return pass/fail per test -} -``` - -**Glob matching:** Implement inline `fnmatchCase(value: string, pattern: string): boolean`. Convert glob to regex: escape regex-special chars (`.$^+{}()|\\`), then `*` becomes `.*`, `?` becomes `.`, `[...]` passes through (including `[!...]` negation). Case-insensitive comparison. No external dependency. - -**YAML rule format:** -```yaml -version: 1 -rules: - - id: "CUSTOM-001" - title: "Block Slack MCP" - description: "Slack MCP servers should not be used" - severity: "high" - verdict: "danger" - remediation: "Remove this MCP server" - match: - type: "mcp" - name: ["*slack*"] - tests: - - name: "matches slack" - input: { name: "slack-mcp", command: "npx @slack/mcp" } - expect: "match" -``` - -## Updated Modules - -### 5. guard-models.ts - -**New interfaces:** -```typescript -interface UnlistedFinding { - code: string; // "GUARD-001" or "GUARD-002" - title: string; - description: string; - severity: string; // default: "medium" - item_name: string; - item_type: string; // "agent" or "mcp_server" -} - -interface CustomFinding { - code: string; // Custom rule ID - title: string; - severity: string; - verdict: string; - remediation: string; - rule_file: string; - entity_type: string; // "mcp" | "skill" | "agent" - entity_name: string; -} - -interface DeltaEntry { - change_type: "new" | "resolved" | "changed" | "new_entity" | "removed_entity"; - entity_type: "skill" | "mcp" | "agent"; - entity_name: string; - code?: string; - title?: string; - old_verdict?: string; - new_verdict?: string; - severity?: string; -} - -interface DeltaResult { - previous_timestamp: string; - entries: DeltaEntry[]; - // Computed getters: - get total_new(): number; // count where change_type in ("new", "new_entity") - get total_resolved(): number; // count where change_type in ("resolved", "removed_entity") - get total_changed(): number; // count where change_type == "changed" -} -``` - -Note: `DeltaResult` uses computed getters (not stored values) matching Python's `@property` pattern. - -**New fields on MCPServerResult:** -```typescript -registry_score?: number; -registry_level?: string; -registry_findings_count?: number; -``` - -**New fields on GuardReport:** -```typescript -unlisted_findings: UnlistedFinding[]; -custom_findings: CustomFinding[]; -config_path: string; -``` - -**Static methods:** `fromDict()` on GuardReport, MCPServerResult, SkillResult, CustomFinding, UnlistedFinding. GuardReport.fromDict fully deserializes all fields including custom_findings (via CustomFinding.fromDict), unlisted_findings, toxic_flows, baseline_changes, and mcp_runtime_results. This fixes a Python limitation where from_dict skips these fields. - -**toDict():** MCPServerResult.toDict includes conditional `registry` nested object (only if registry_score is set). GuardReport.toDict includes conditional keys for mcp_runtime_results, toxic_flows, baseline_changes, unlisted_findings, custom_findings. - -**SARIF support:** GuardReport.toSarif includes custom_findings in results. GUARD-001/002 included as SARIF rules. - -### 6. deobfuscate.ts - -**Add `CONFUSABLES` map** -- 80+ TR39 Unicode confusable character mappings: -- Cyrillic uppercase/lowercase (A,B,C,E,H,I,J,K,M,O,P,S,T,X,Y,Z / a,c,e,h,i,j,o,p,s,x,y) -- Greek uppercase/lowercase (A,B,E,H,I,K,M,N,O,P,T,X,Y,Z / o,a) -- Cherokee (D,R,T,G,W,S / s variants) -- Turkish dotless i -- Small caps (A,B,C) -- Fullwidth Latin A-Z and a-z (26+26 = 52 chars) - -**Add `decodeHtmlEntities(text: string): string`** -- decode numeric (`c`, `c`) and named (`&`, `<`, `>`, `"`, `'`) HTML entities. Regex-based: match `&#(\d+);` and `&#x([0-9a-fA-F]+);` and convert via `String.fromCodePoint()`. Named entities via a small lookup map (no DOM dependency). - -**Update `normalizeUnicode()`** -- apply CONFUSABLES character replacement after NFKC normalization. Iterate each character, replace if in CONFUSABLES map. - -**Update `deobfuscate()`** -- 2-pass pipeline: extract single-pass function `_deobfuscatePass()`, main `deobfuscate()` calls it twice to catch obfuscation-within-obfuscation chains. - -### 7. blocklist.ts - -**Add `SEED_HASHES`** -- 12 canonical malicious skill hashes loaded on construction: -``` -854aa9bd5a641b03fcf2e4a26affb33057af3238a10a83e194c05384f371734f credential-theft-cursorrules -46315c1d4dcd39199c6d0e43985c5007c1156bc538e3a82ba9b2883f363eab35 markdown-image-exfil -0b2ca8fedb87a97de9f5c462e09110febf887516dd62877d7e95a5556ef90905 reverse-shell-instruction -2b5a339d00216894c7bd3620e008e5443f4e30b9e9883a2b15c082d076775084 curl-exfil-instruction -eccb3a65c459a6b69223d38726e3fddb6184a6e7c52935148fdcd84961a6f9df prompt-injection-override -f554a511faaca2431265399a9d5b2f7184778b9521952dc757257dbe0aab2a46 supply-chain-install -323b9121b6e320fb04bae89c963690069c5172dca017469be2917e5feaec886c obfuscated-credential-theft -4826c0e8aef00f902190ab32519e4533b7e4b725f46fb70156705ea8708a7385 social-engineering-exfil -3951cdb38bbc37e28f98448e0478b93d319d892783efb23462b59fedea52189d mcp-config-injection -a7ddd5ce6c41055b4ef808810ac6f1b09dc4ae05eecc2f89dc64ac4682502d99 keylogger-instruction -eab3b7330de3b61fae1b5cba738ae499424e1c45ef1b025c560cca410e6cd16b crypto-miner-injection -d71ceee36d1e136a5cddc0d5b416210d94635a71fa90f9ef817f4f74a7b21603 dns-exfil-instruction -``` - -Seed hashes always present. On remote/cache load, UNION seed hashes with loaded hashes (never replace). This fixes a Python bug where file load could overwrite seeds. - -### 8. mcp-checker.ts - -**Add 5 new supply chain checks (MCP-007):** - -| Package Manager | Detection Pattern | Version Pin Check | -|----------------|-------------------|-------------------| -| bunx | `bunx\s+(@?[\w_./-]+(?:@[^\s]+)?)` | `@version` in last path segment | -| deno | `deno\s+run\s+(?:--allow-\S+\s+)*(\S+)` | `@version` in module, skip local paths (`.`, `/`) | -| docker | `docker\s+run\s+(?:-[^\s]+\s+)*([\w_./-]+(?::[^\s]+)?)` | `:tag` present and not `:latest` | -| pip | `pip3?\s+install\s+([\w_.-]+)` | `==version` after package name, skip flags (`-e`, `-r`, `--upgrade`) | -| go | `go\s+run\s+([\w_./-]+)` | `@version` in module, skip local paths | - -All patterns build `all_str` from command + args (space-joined), same as existing npx/uvx checks. - -**Add symlink resolution** in `_checkSensitivePaths` -- use `fs.realpathSync()` before checking against sensitive path list. Catch ENOENT (broken symlinks). - -### 9. baselines.ts - -**Update `configFingerprint()`:** -```typescript -// Before (v0.7): -SHA256(command | JSON(sorted_args) | JSON(sorted_env_keys)) - -// After (v0.8): -SHA256(command | JSON(sorted_args) | JSON(sorted_env_keys) | url | JSON(sorted_header_keys)) -``` - -URL and sorted header keys (not values) added. Attackers could swap the URL to a malicious endpoint without changing command/args -- URL MUST be in the fingerprint. - -### 10. skill-scanner.ts - -**Add 3 markdown image exfiltration patterns (SKILL-002):** -1. `!\[.*?\]\(https?://[^\s)]+\?[^\s)]*(?:data|content|file|secret|key|token|d)=` -- markdown image with exfil query params -2. `]*src=["']https?://[^"']+\?[^"']*(?:data|content|file|secret|key|token|d)=` -- HTML img tag with exfil query params -3. `(?:render|display|show|include)\s+(?:an?\s+)?(?:image|img|markdown)\s+(?:tag|link)?\s*.*https?://` -- instruction to render external images - -### 11. guard.ts - -**Wire in new modules to Guard.run():** - -``` -Guard.run() flow: - 1. Resolve project config (if --config or .agentseal.yaml exists) - 2. Resolve rules: --rules flag > config.rules_paths > .agentseal/rules/ default dir - 3. Discover agents/MCPs/skills (existing) -- keep raw MCP config dicts for rule evaluation - 4. Filter by ignore_paths (new) - 5. Scan skills (existing) + evaluate custom rules on skills (new) - 6. Check MCP configs (existing) + evaluate custom rules on MCPs (new, uses raw config dicts) - 7. Evaluate custom rules on agents (new) - 8. Enrich MCP results from registry (new, unless --no-registry) - 9. Generate unlisted findings (new, if config has allowlists) - 10. Toxic flows + baselines (existing) - 11. Apply ignore_findings filter (new) - 12. Save raw report to history BEFORE filtering (new, unless --no-diff) - 13. Compute delta against filtered previous report (new, unless --no-diff) - 14. Build GuardReport with all new fields -``` - -Note: Step 3 must preserve raw MCP config dicts (the original parsed JSON objects) alongside MCPServerResult objects. Custom rules need raw configs for field matching (args as array, env as dict, etc). This avoids re-scanning. - -**GuardOptions additions:** -```typescript -interface GuardOptions { - // existing: - verbose?: boolean; - scanPath?: string; - onProgress?: GuardProgressFn; - semantic?: boolean; - embedFn?: EmbedFn; - // new: - config?: ProjectConfig; - noRegistry?: boolean; - noDiff?: boolean; - rulesPaths?: string[]; - fromJson?: string; // path to JSON report for re-rendering - failOn?: string; // override config fail_on -} -``` - -### 12. CLI (bin/agentseal.ts) - -**New `guard` command** using commander `.command()` subcommand pattern: - -``` -agentseal guard [path] # scan machine or directory -agentseal guard init # generate .agentseal.yaml -agentseal guard test # validate YAML rules - -Options: - --verbose # show all findings - --no-registry # skip agentseal.org enrichment - --no-diff # skip delta comparison - --from-json # re-render saved JSON report - --fail-on # danger|warning|safe (exit code control) - --rules # custom YAML rules path - --config # explicit .agentseal.yaml path - --output # terminal|json|sarif - --save # save JSON report to file - --reset-baselines # re-trust all MCP servers -``` - -**guard init defaults:** when no `--rules` provided, `guard test` checks `.agentseal/rules/` in CWD. - -**Terminal output format:** Match Python's docker/kubectl-inspired layout: -- ANSI-aware column padding (strip ANSI escape codes for width calculation) -- Section separators (AGENTS, SKILLS, MCP SERVERS, CUSTOM RULES, POLICY, DELTA) -- Column headers (NAME, STATUS, VERDICT, SEVERITY, FINDING) -- REGISTRY column for MCP servers (score + level when available) -- Summary box with severity/status counts -- Exit code: 0 (pass), 1 (fail per fail_on), 2 (error). ERROR verdicts treated as danger for fail_on purposes. - -**CLI flags NOT ported** (Python-only, require MCP runtime or LLM): - -| Flag | Reason | -|------|--------| -| `--connect` | Requires MCP runtime (subprocess stdio) | -| `--timeout` | MCP runtime connection timeout | -| `--concurrency` | MCP runtime parallelism | -| `--model` / `--api-key` / `--ollama-url` / `--litellm-url` | LLM judge (optional, not in scope) | -| `--llm-all` | LLM judge flag | -| `--no-semantic` | Semantic analysis toggle | -| `--output html` | HTML report generation (future work) | - -## Dependencies - -**New (optionalDependencies):** -- `better-sqlite3` -- optional dependency. npm proceeds if native build fails on target platform. Code wraps import in try/catch for graceful degradation. - -**New (devDependencies):** -- `@types/better-sqlite3` -- type definitions - -**No new dependencies for:** -- YAML parsing -- use `yaml` package (check if already present, else add) -- Glob matching -- inline `fnmatchCase` implementation -- HTML entity decoding -- inline regex implementation -- HTTP client -- use `globalThis.fetch` (Node 18+, stable in Node 21+). Node 18 may emit experimental warning, acceptable for our target audience. - -**Graceful degradation:** If `better-sqlite3` is not installed (optionalDependencies allows this), history features silently disable. Guard still works, just without delta output. Single warning logged to stderr on first use. - -## Testing Strategy - -**Framework:** Vitest (existing) -**Isolation:** mkdtempSync per test -**Total new tests:** ~150 - -| Module | Tests | Strategy | -|--------|-------|----------| -| project-config | ~35 | YAML parsing (including null values), resolution walk-up, filtering, fail_on logic (including hasSafe), unlisted generation (agent status filtering), init with interactive=false | -| history | ~25 | SQLite CRUD, normalize_skill_path (cross-platform separators), compute_delta, retention/cap, graceful failure when better-sqlite3 missing, _count() for assertions | -| registry-client | ~20 | slugify, extractPackageSlug, mocked fetch for bulkCheck/enrich, skip-if-already-set behavior | -| rules | ~25 | YAML loading, validation (required fields, enums, duplicate IDs), glob matching (*, ?, [!...], escaping), evaluate per entity type, coerce string to [string], runTests | -| guard-models | ~15 | fromDict round-trip (all types including CustomFinding, UnlistedFinding), DeltaResult computed getters, registry fields, toSarif with custom findings | -| guard-v08 (integration) | ~15 | Full guard.run() with config + rules + history, --from-json re-render | -| deobfuscate-v08 | ~10 | Confusables mapping, HTML entities (numeric, hex, named), 2-pass catches nested obfuscation | -| blocklist-v08 | ~5 | Seed hashes present on construction, seed hashes survive file load (union not replace) | - -**Registry client mocking:** Use Vitest's `vi.fn()` to mock `globalThis.fetch`. No live API calls in tests. - -## What's NOT Ported - -| Feature | Reason | -|---------|--------| -| MCP runtime (subprocess stdio) | Python-only architecture, JS has no equivalent | -| Client pool randomization | Only relevant for MCP runtime | -| Toxic flows readOnlyHint fix | JS already has different trust model in toxic-flows.ts | -| GitHub Action | Shared infrastructure, already done in .github/actions/guard/ | -| ASCII seal logo | Python terminal only, JS CLI uses its own banner | -| HTML report output (`--output html`) | Future work, not in Python v0.8 core | -| LLM judge flags | Optional expensive feature, not in scope for parity | - -## Version - -This ships as part of the existing `agentseal` npm package. Version bump from 0.5.2 to 0.6.0 (minor bump for new features). - -## File Count Summary - -- **4 new source files** (project-config, history, registry-client, rules) -- **7 new test files** -- **7 updated source files** (guard, guard-models, deobfuscate, blocklist, mcp-checker, baselines, skill-scanner) -- **1 updated CLI** (bin/agentseal.ts) -- **1 updated barrel export** (index.ts) -- **~150 new tests** From 74bdb0a50e5aea1b1fddf95b07c877086dc78106 Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Wed, 25 Mar 2026 12:25:37 -0700 Subject: [PATCH 3/7] fix: remove undefined GenomeReport type ref (Pro-only) --- python/agentseal/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/agentseal/validator.py b/python/agentseal/validator.py index 6e071d1..f920207 100644 --- a/python/agentseal/validator.py +++ b/python/agentseal/validator.py @@ -277,7 +277,7 @@ async def run_genome( scan_report: _ScanReport | None = None, max_probes_per_category: int = 5, max_categories: int = 3, - ) -> "GenomeReport": + ) -> "object": """Run genome boundary mapping. If scan_report is None, runs a full scan first.""" if scan_report is None: scan_report = await self.run() From 50b58526ac1eb6b4e3afbcfb0d9992ef3f1cd79c Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Wed, 25 Mar 2026 12:37:12 -0700 Subject: [PATCH 4/7] fix: skip semantic test when numpy not installed --- python/tests/test_mcp_tool_analyzer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/tests/test_mcp_tool_analyzer.py b/python/tests/test_mcp_tool_analyzer.py index 1118538..fcb7e7a 100644 --- a/python/tests/test_mcp_tool_analyzer.py +++ b/python/tests/test_mcp_tool_analyzer.py @@ -735,7 +735,11 @@ def test_combined_tool_and_server_findings(self): class TestSemanticAnalysis(unittest.TestCase): def test_high_similarity_triggers_finding(self): - import numpy as np + try: + import numpy as np + except ImportError: + self.skipTest("numpy not installed (optional dependency)") + return mock_semantic = MagicMock() # Return normalized vectors: first call = reference corpus, second = tool desc From 0b1747081e31daa21947cbed8814823f75549ae8 Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Wed, 25 Mar 2026 13:08:45 -0700 Subject: [PATCH 5/7] fix: skip second numpy-dependent test when not installed --- python/tests/test_mcp_tool_analyzer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/tests/test_mcp_tool_analyzer.py b/python/tests/test_mcp_tool_analyzer.py index fcb7e7a..45e6712 100644 --- a/python/tests/test_mcp_tool_analyzer.py +++ b/python/tests/test_mcp_tool_analyzer.py @@ -771,7 +771,11 @@ def mock_embed(texts): self.assertTrue(len(findings) > 0) def test_low_similarity_no_semantic_finding(self): - import numpy as np + try: + import numpy as np + except ImportError: + self.skipTest("numpy not installed (optional dependency)") + return mock_semantic = MagicMock() ref_count = 25 From cd3d8fc2b5ddcdc69d250b29fd1b5e9c553c0c0c Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Wed, 25 Mar 2026 13:35:23 -0700 Subject: [PATCH 6/7] fix: notifier throttle fails on fresh CI (monotonic < interval) --- python/agentseal/notify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/agentseal/notify.py b/python/agentseal/notify.py index dd25d20..040e3d0 100644 --- a/python/agentseal/notify.py +++ b/python/agentseal/notify.py @@ -27,7 +27,7 @@ class Notifier: def __init__(self, enabled: bool = True, min_interval: float = 30.0): self._enabled = enabled self._min_interval = min_interval - self._last_notify_time: float = 0.0 + self._last_notify_time: float = float("-inf") self._platform = platform.system() def notify( From 31bbbdddcc3c15f4aa7d13978b310f6fe25bffd1 Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Wed, 25 Mar 2026 13:48:01 -0700 Subject: [PATCH 7/7] fix: skip shield tests when watchdog not installed --- python/tests/test_shield.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/tests/test_shield.py b/python/tests/test_shield.py index 32ca38a..ab95a7b 100644 --- a/python/tests/test_shield.py +++ b/python/tests/test_shield.py @@ -21,6 +21,8 @@ import pytest from agentseal.guard_models import GuardVerdict, SkillResult, MCPServerResult, MCPFinding +pytest.importorskip("watchdog", reason="watchdog not installed (optional dependency)") + from agentseal.shield import ( Shield, _classify_path,