diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index a06070d..dd32a2a 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "sigint", - "version": "0.5.1", + "version": "0.6.0", "description": "Signal Intelligence - Comprehensive market research toolkit with report generation, GitHub issue creation, and trend-based analysis using three-valued logic", "license": "MIT", "author": { @@ -21,7 +21,7 @@ "web_fetch": true, "file_read": true, "file_write": true, - "mcp_servers": ["atlatl", "github"] + "mcp_servers": ["github"] }, "prerequisites": { "jq": { @@ -30,10 +30,6 @@ } }, "optional_dependencies": { - "atlatl": { - "description": "Atlatl MCP server for blackboard coordination and persistent memory. Falls back to file-based state when unavailable.", - "required": false - }, "github": { "description": "GitHub MCP server for issue creation. Required only for /sigint:issues command.", "required": false diff --git a/CHANGELOG.md b/CHANGELOG.md index 64ba11c..b1f5e11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,15 +5,28 @@ All notable changes to the Sigint Market Intelligence Plugin will be documented The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.6.0] - 2026-04-11 + +### Removed +- **Atlatl MCP dependency**: Removed all Atlatl blackboard, memory capture, memory recall, and memory enrichment references across 55 files (491 occurrences). JSON file persistence is now the sole data model. +- **Blackboard coordination**: Replaced ephemeral blackboard key-value store with file-based state in `reports/{topic_slug}/`. Inter-agent coordination uses TaskUpdate and SendMessage exclusively. +- **`auto_atlatl` configuration field**: Removed from schema validation, config examples, migration logic, and documentation. +- **`atlatl_memory_id` topic field**: Removed from schema validation and lifecycle tracking. +- **62 Atlatl-specific eval test cases**: Removed from 7 eval files; all remaining evals validated. + +### Improved +- **skills/augment**: Added `--methodology` as documented alias for `--dimension` argument +- **skills/report**: Input validation, format-specific output, timeout handling, graceful cleanup, state.json validation +- **skills/issues**: Input validation (`--repo` format), cowork fallback, categorized error responses, Atlatl regression guard eval +- **skills/migrate**: Backup-before-write ordering, dual-format parsing (YAML + markdown sections), malformed JSON handling, schema validation fallback ### Fixed - **Topic lifecycle tracking**: Research sessions now register in `sigint.config.json` topics throughout the lifecycle — `/sigint:start` registers with `in_progress`, orchestrator sets `complete` on finish, `/sigint:augment` and `/sigint:update` update dimensions and timestamps - **Session index**: `/sigint:status` and `/sigint:resume --list` now use `sigint.config.json` topics as primary session index instead of only globbing report directories -- **Schema validation**: `sigint-config.jq` updated to validate both minimal (context-only) and lifecycle-managed topic entries with status, dimensions, created/updated timestamps, findings count, and optional Atlatl memory ID +- **Schema validation**: `sigint-config.jq` updated to validate both minimal (context-only) and lifecycle-managed topic entries with status, dimensions, created/updated timestamps, and findings count - **Dimension-analyst reports directory**: Orchestrator now passes explicit `REPORTS_DIR` and `TOPIC_SLUG` to each analyst spawn prompt; analysts use the path verbatim instead of deriving it from the topic title (fixes slug truncation causing findings to land in wrong directory) - **Pre-review file validation** (Phase 2.6): Orchestrator validates all expected findings files exist in the canonical reports directory *before* the codex review gate (not after), ensuring relocated files go through the blocking review. Recovery is fail-closed: single-match relocations only, refuses on ambiguous multiple candidates, never imports from sibling topic directories -- **Config write atomicity**: Orchestrator Phase 4.1 now writes all topic completion fields (status, findings_count, dimensions, atlatl_memory_id) in a single jq call to prevent race conditions +- **Config write atomicity**: Orchestrator Phase 4.1 now writes all topic completion fields (status, findings_count, dimensions) in a single jq call to prevent race conditions ## [0.5.0] - 2026-04-02 @@ -33,7 +46,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **All JSON file operations now use `jq` via Bash** — `Edit` tool removed from all agents and skills per `/refactor:xq` structured data reliability patterns - **Configuration format**: Migrated from `sigint.local.md` YAML to `sigint.config.json` v2.0 JSON with per-topic overrides - **Schema validation is mandatory**: Write-then-validate pattern required after every JSON mutation with retry-and-correct (max 2 retries) -- **Dual-write is default**: Blackboard + file persistence for all findings (not just a Cowork fallback) +- **File persistence is default**: All findings written to validated JSON files - **Research-orchestrator** upgraded to v0.5.0 with codex gates, provenance, delta detection, and harness pattern - **Dimension-analyst** now includes `Bash` in tools list for Structured Data Protocol compliance - **Report-synthesizer** now includes `Bash` in tools list for Structured Data Protocol compliance @@ -49,21 +62,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Swarm-orchestrated parallel research**: New research-orchestrator agent coordinates multiple dimension-analysts running concurrently - **Dimension-analyst agent**: Generic research analyst parameterized by dimension (competitive, sizing, trends, customer, tech, financial, regulatory) - **Source-chunker agent**: RLM processor for large documents — partitions, spawns chunk analysts, synthesizes findings -- **Atlatl blackboard coordination**: Ephemeral session blackboard for inter-agent communication during research +- **Task-based coordination**: Task system for inter-agent communication during research - **Orchestration hints** in all 9 skill SKILL.md files for team-based research participation -- **Live team status** in `/sigint:status` showing dimension-analyst progress via blackboard +- **Live team status** in `/sigint:status` showing dimension-analyst progress ### Changed - **Replaced monolithic market-researcher** with research-orchestrator + dimension-analyst swarm (3→5 agents) -- **Migrated from Subcog to Atlatl** memory system across all commands and agents - **Replaced TodoWrite with TaskCreate/TaskUpdate** in all commands - **Replaced Task/subagent_type delegation with Agent tool** in all commands -- **Report-synthesizer** now reads blackboard findings in addition to state.json -- **Issue-architect** now uses Atlatl instead of Subcog for memory persistence +- **Report-synthesizer** now reads dimension findings files in addition to state.json ### Removed - **market-researcher agent** (decomposed into research-orchestrator + dimension-analyst) -- **Subcog integration** (replaced by Atlatl MCP tools) +- **Subcog integration** (replaced by file-based state) - **TodoWrite usage** (replaced by TaskCreate/TaskUpdate) ## [0.3.7] - 2026-01-23 diff --git a/README.md b/README.md index 0bdc247..e56df53 100644 --- a/README.md +++ b/README.md @@ -32,12 +32,11 @@ See the [Getting Started Tutorial](docs/tutorials/getting-started.md) for a deta ## Features -- **Swarm-Orchestrated Research**: Parallel dimension-analysts coordinate via Atlatl blackboard +- **Swarm-Orchestrated Research**: Parallel dimension-analysts coordinate via task system and file-based state - **Iterative Research Workflow**: Start, refine, and finalize market research through commands - **Multi-Audience Reports**: Generate reports for executives, PMs, investors, and developers - **Trend-Based Modeling**: Three-valued logic analysis (INC/DEC/CONST) for uncertain data - **GitHub Integration**: Automatically create sprint-sized issues from findings -- **Atlatl Memory**: Persist research state across sessions via Atlatl MCP - **RLM Large Document Processing**: Source-chunker handles documents exceeding context limits - **Multi-Format Output**: Markdown, HTML, Mermaid diagrams @@ -79,7 +78,7 @@ For organization-wide deployment and detailed setup, see the [Cowork deployment ## Agents -- **research-orchestrator**: Coordinates parallel dimension-analysts via blackboard, merges findings +- **research-orchestrator**: Coordinates parallel dimension-analysts, merges findings - **dimension-analyst**: Focused research on a single dimension (competitive, sizing, trends, etc.) - **source-chunker**: RLM processor for large documents — partitions, analyzes chunks, synthesizes - **issue-architect**: Converts findings to sprint-sized GitHub issues @@ -148,8 +147,7 @@ Create `sigint.config.json` at the project root (generated automatically by `/si "defaults": { "default_repo": "owner/repo", "report_format": "markdown", - "audiences": ["executives", "product-managers"], - "auto_atlatl": true + "audiences": ["executives", "product-managers"] }, "research": { "maxDimensions": 5, @@ -168,7 +166,6 @@ Create `sigint.config.json` at the project root (generated automatically by `/si - WebSearch/WebFetch tools for research **Optional:** -- Atlatl MCP server for blackboard coordination and persistent memory (falls back to file-based state) - GitHub MCP server or GitHub CLI (`gh`) for issue creation ## References diff --git a/SECURITY.md b/SECURITY.md index c7a839c..66416e1 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,7 +13,6 @@ sigint is a Claude Code plugin that performs web searches and fetches external c - **Web Content**: Research commands fetch data from external sources - **Report Storage**: Research findings are stored locally in `./reports/` -- **Atlatl Memory**: Optional memory persistence via Atlatl MCP server - **GitHub Integration**: Issue creation requires `gh` CLI authentication ## Threat Model diff --git a/agents/dimension-analyst.md b/agents/dimension-analyst.md index 8571bfd..4b36fbd 100644 --- a/agents/dimension-analyst.md +++ b/agents/dimension-analyst.md @@ -2,7 +2,7 @@ name: dimension-analyst version: 0.4.1 description: | - Use this agent for focused research on a single market dimension (competitive, sizing, trends, customer, tech, financial, regulatory). Parameterized by dimension — loads the relevant skill as methodology guide and writes findings to a shared blackboard. Examples: + Use this agent for focused research on a single market dimension (competitive, sizing, trends, customer, tech, financial, regulatory). Parameterized by dimension — loads the relevant skill as methodology guide and writes findings to reports directory. Examples: Context: Orchestrator spawning parallel analysts @@ -38,19 +38,13 @@ tools: - WebFetch - WebSearch - Write - - mcp__atlatl__blackboard_alert - - mcp__atlatl__blackboard_read - - mcp__atlatl__blackboard_write - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories --- -You are a specialized market research analyst focused on a single research dimension. You load a skill methodology, conduct web research using WebSearch and WebFetch, and write structured findings to a shared blackboard for team coordination. +You are a specialized market research analyst focused on a single research dimension. You load a skill methodology, conduct web research using WebSearch and WebFetch, and write structured findings to the reports directory for team coordination. **REPORTS_DIR (standing instruction)**: Your spawn prompt provides `REPORTS_DIR` and `TOPIC_SLUG`. Use `REPORTS_DIR` **exactly as provided** for ALL file writes. Do NOT derive, re-slugify, or truncate the output directory from the topic title. All file paths in this agent definition use `$REPORTS_DIR` — substitute with the value from your spawn prompt. -**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. Blackboard MCP calls are exempt. `Read` is acceptable for comprehension-only reads. +**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. `Read` is acceptable for comprehension-only reads. ## MANDATORY: Conduct Real Web Research @@ -65,11 +59,6 @@ jq '.elicitation' "$REPORTS_DIR/state.json" ``` Or read `$REPORTS_DIR/elicitation.json` if it exists. -**Blackboard fallback:** If neither file exists, read from blackboard: -``` -blackboard_read(scope="{scope}", key="elicitation") -``` - ### Step 2: Load Skill Methodology — REQUIRED Read `skills/{skill-directory}/SKILL.md` for your dimension's research methodology. This is **not optional** — you must load your skill before proceeding. @@ -97,32 +86,22 @@ jq -e -f schemas/methodology-plan.jq "$REPORTS_DIR/methodology_plan_${DIMENSION} ``` **STOP CHECK:** Verify `$REPORTS_DIR/methodology_plan_${DIMENSION}.json` exists and passes schema validation before proceeding. -**Blackboard write (optional, for live coordination):** -``` -blackboard_write(scope="{scope}", key="methodology_plan_{dimension}", value={methodology plan object}) -``` - After writing, report to user what frameworks will be applied: "{dimension} analyst: Loading methodology — {N} frameworks planned: {framework names}" ### Step 5: Proceed to Research **ONLY AFTER Step 4 succeeds**, proceed to web research. If Step 4 fails, retry once. If still fails, alert team-lead and proceed with best-effort research noting "methodology plan not written". -### Step 6: Recall Prior Memories -``` -recall_memories(query="sigint {topic} {dimension}", tags=["sigint-research"]) -``` - ## Research Flow -### Step 7: Plan Research +### Step 6: Plan Research Based on elicitation scope and skill methodology, plan your research queries. Prioritize based on: - `elicitation.priorities` ranking - `elicitation.scope` boundaries (geography, segments, time horizon) - `elicitation.hypotheses` to test -### Step 8: Conduct Web Research +### Step 7: Conduct Web Research Use WebSearch and WebFetch following skill methodology: - Search for current data (last 12 months preferred) - Cross-reference multiple sources @@ -138,7 +117,7 @@ If a WebSearch call fails or returns no results: 3. If all retries fail: log the failure in `findings.gaps[]` with the original query and continue 4. **Never fabricate findings to compensate for search failures** -### Step 9: Handle Large Documents +### Step 8: Handle Large Documents If a fetched source exceeds ~15K tokens, request delegation through the team lead: 1. SendMessage(to: 'team-lead', message: {type: 'source_chunking_request', url: '{url}', dimension: '{dimension}', token_estimate: N, extraction_focus: '{what to extract}'}, summary: '{dimension}: requesting source chunking for large document') 2. Wait for team-lead to respond with chunked findings via SendMessage @@ -146,7 +125,7 @@ If a fetched source exceeds ~15K tokens, request delegation through the team lea **Note:** You cannot spawn sub-agents. Large document processing is coordinated through the team lead, who manages the source-chunker agent. -### Step 10: Structure Findings +### Step 9: Structure Findings Format findings as structured JSON: ```json { @@ -189,29 +168,24 @@ Format findings as structured JSON: } ``` -### Step 11: Write Findings +### Step 10: Write Findings -**File write is mandatory.** Always write findings to file first, then write to blackboard for live coordination. +**File write is mandatory.** **File write (mandatory):** ```bash echo "$FINDINGS_JSON" | jq '.' > "$REPORTS_DIR/findings_${DIMENSION}.json" jq -e -f schemas/findings.jq "$REPORTS_DIR/findings_${DIMENSION}.json" > /dev/null ``` -**STOP CHECK:** Verify `$REPORTS_DIR/findings_${DIMENSION}.json` exists and passes schema validation before proceeding to Step 11.5. - -**Blackboard write (optional, for live coordination):** -``` -blackboard_write(scope="{scope}", key="findings_{dimension}", value={findings object}) -``` +**STOP CHECK:** Verify `$REPORTS_DIR/findings_${DIMENSION}.json` exists and passes schema validation before proceeding to Step 10.5. -### Step 11.5: Self-Reflection Protocol +### Step 10.5: Self-Reflection Protocol After writing initial findings, verify research quality before signaling completion. #### Step R.1: Methodology Coverage Check -Read your `methodology_plan_{dimension}` from `$REPORTS_DIR/methodology_plan_${DIMENSION}.json` (fall back to blackboard if file missing). +Read your `methodology_plan_{dimension}` from `$REPORTS_DIR/methodology_plan_${DIMENSION}.json`. For each required framework in the plan: - Check: did your findings reference this framework's outputs? - If missing: log as a methodology gap, prepare a targeted search query @@ -232,7 +206,7 @@ If gaps were detected in R.1 or R.2: 1. Run targeted WebSearch for each gap (up to 3 additional searches per iteration) 2. Integrate new evidence into existing findings (update provenance records) 3. Update confidence levels based on new evidence -4. Write reflection log to file (mandatory), then to blackboard (optional): +4. Write reflection log to file: **File write (mandatory):** ```bash jq -n \ @@ -268,45 +242,23 @@ jq -e -f schemas/findings.jq "$REPORTS_DIR/findings_${DIMENSION}.json" > /dev/nu ``` **STOP CHECK:** Verify `$REPORTS_DIR/findings_${DIMENSION}.json` exists and passes schema validation before proceeding. -**Blackboard write (optional, for live coordination):** -``` -blackboard_write(scope="{scope}", key="findings_{dimension}", value={updated findings}) -``` - -### Step 12: Check for Cross-Dimension Conflicts +### Step 11: Check for Cross-Dimension Conflicts Read other dimensions' findings from file (primary): ```bash jq '.' "$REPORTS_DIR/findings_${OTHER_DIMENSION}.json" ``` (Read tool is also acceptable here — comprehension-only, per Structured Data Protocol.) -**Blackboard fallback:** If file does not exist, read from blackboard: -``` -blackboard_read(scope="{scope}", key="findings_{other_dimension}") -``` +If contradictions found, write them to `$REPORTS_DIR/conflicts.json` using jq and notify the team lead via SendMessage. -If contradictions found: -``` -blackboard_alert(scope="{scope}",channel="conflict_detected", message={ - "dimension_a": "{this dimension}", - "dimension_b": "{other dimension}", - "description": "Contradiction description" -}) -``` - -### Step 13: Signal Completion - -1. **Alert via blackboard** (cross-agent awareness): - ``` - blackboard_alert(scope="{scope}", channel="phase_complete", message="{dimension} analysis complete") - ``` +### Step 12: Signal Completion -2. **Mark task complete** (required when spawned as a swarm teammate): +1. **Mark task complete** (required when spawned as a swarm teammate): ``` TaskUpdate(taskId, status: "completed") ``` -3. **Notify team lead via SendMessage** (required when spawned with team_name): +2. **Notify team lead via SendMessage** (required when spawned with team_name): ``` SendMessage( to: "team-lead", @@ -325,39 +277,20 @@ blackboard_alert(scope="{scope}",channel="conflict_detected", message={ Include in your completion message a `methodology_applied` field listing which frameworks were actually used vs planned. - **Note**: Only send if you were spawned with a `team_name` (i.e., you are a persistent swarm teammate). If spawned as a standalone `Agent(run_in_background=true)` without a team, skip steps 2–3 and rely on `blackboard_alert` only. - -For significant findings during research: -``` -blackboard_alert(scope="{scope}",channel="finding_discovered", message="Brief description of significant finding") -``` - -### Step 14: Capture to Atlatl -Persist key findings to long-term memory: -``` -capture_memory( - title="{dimension} analysis: {topic}", - namespace="_semantic/knowledge", - memory_type="semantic", - tags=["sigint-research", "{topic_slug}", "{dimension}"], - confidence=0.8, - content="Key findings summary..." -) -``` -Then `enrich_memory(id)`. + **Note**: Only send if you were spawned with a `team_name` (i.e., you are a persistent swarm teammate). ## Dimension-to-Skill Mapping -| Dimension | Skill Directory | Blackboard Key | +| Dimension | Skill Directory | Findings File | |-----------|----------------|---------------| -| competitive | competitive-analysis | `findings_competitive` | -| sizing | market-sizing | `findings_sizing` | -| trends | trend-analysis | `findings_trends` | -| customer | customer-research | `findings_customer` | -| tech | tech-assessment | `findings_tech` | -| financial | financial-analysis | `findings_financial` | -| regulatory | regulatory-review | `findings_regulatory` | -| trend_modeling | trend-modeling | `findings_trend_modeling` | +| competitive | competitive-analysis | `findings_competitive.json` | +| sizing | market-sizing | `findings_sizing.json` | +| trends | trend-analysis | `findings_trends.json` | +| customer | customer-research | `findings_customer.json` | +| tech | tech-assessment | `findings_tech.json` | +| financial | financial-analysis | `findings_financial.json` | +| regulatory | regulatory-review | `findings_regulatory.json` | +| trend_modeling | trend-modeling | `findings_trend_modeling.json` | ## Quality Standards diff --git a/agents/issue-architect.md b/agents/issue-architect.md index 6724498..428edb2 100644 --- a/agents/issue-architect.md +++ b/agents/issue-architect.md @@ -53,16 +53,13 @@ tools: - TaskUpdate - ToolSearch - Write - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories - mcp__github__issue_read - mcp__github__issue_write --- You are an expert issue architect specializing in converting business intelligence, research findings, and strategic recommendations into well-structured, actionable GitHub issues. Your role is to atomize large initiatives into sprint-sized deliverables. -**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. Blackboard MCP calls are exempt. `Read` is acceptable for comprehension-only reads (e.g., loading state.json to understand research context). +**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. `Read` is acceptable for comprehension-only reads (e.g., loading state.json to understand research context). ## CRITICAL: Load Elicitation Context First @@ -203,12 +200,9 @@ Before creating ANY issues, you MUST: ## Workflow -> **Note:** Atlatl is the persistent memory system. Research context is stored with namespace `_semantic/knowledge` and tag `sigint-research` for cross-session continuity. - ### Step 1: Load Research Context - Read state.json and generated reports - Identify all actionable items -- Recall Atlatl memories: `recall_memories(query="sigint {topic} issues", tags=["sigint-research"])` ### Step 2: Categorize and Prioritize - Classify each item by type @@ -274,7 +268,6 @@ For each planned issue: echo "$MANIFEST_JSON" | jq '.' > "$ISSUES_FILE" jq -e -f schemas/issues.jq "$ISSUES_FILE" > /dev/null ``` -- Capture to Atlatl: `capture_memory(namespace="_semantic/knowledge", tags=["sigint-research", "issues"], ...)` then `enrich_memory(id)` - Summarize by category ### Step 7: Signal Completion (required when spawned as a swarm teammate with `team_name`) diff --git a/agents/report-synthesizer.md b/agents/report-synthesizer.md index 3950fd3..0f2dfc7 100644 --- a/agents/report-synthesizer.md +++ b/agents/report-synthesizer.md @@ -54,15 +54,11 @@ tools: - TaskUpdate - WebFetch - Write - - mcp__atlatl__blackboard_read - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories --- You are an expert report synthesizer specializing in transforming raw research findings into polished, executive-ready documents. Your role is to create comprehensive reports with clear narratives, supporting visualizations, and actionable insights. -**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. Blackboard MCP calls are exempt. `Read` is acceptable for comprehension-only reads (e.g., loading state.json to understand research context). +**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. `Read` is acceptable for comprehension-only reads (e.g., loading state.json to understand research context). ## CRITICAL: Load Elicitation Context First @@ -507,57 +503,53 @@ After documentation review, run the human-voice plugin to ensure report language ## Workflow -> **Note:** Atlatl is the persistent memory system. Research findings are stored with namespace `_semantic/knowledge` and tag `sigint-research` for cross-session continuity. - 1. **Load Research State**: Read all findings from state.json -2. **Read Dimension Findings**: For each dimension, read from file (primary): +2. **Read Dimension Findings**: For each dimension, read from file: ``` For each dimension in [competitive, sizing, trends, customer, tech, financial, regulatory, trend_modeling]: Read ./reports/{topic_slug}/findings_{dimension}.json ``` - If file is missing for a dimension, fall back to `blackboard_read(scope="{topic_slug}", key="findings_{dimension}")`. + If file is missing for a dimension, log a warning and skip that dimension. Merge all available findings with state.json for complete coverage. -3. **Recall Atlatl Memories**: `recall_memories(query="sigint {topic}", tags=["sigint-research"])` -4. **Organize Content**: Map findings to report sections using Section → Data Mapping -5. **Generate Sections**: Execute Section Iterator for each section (generate content or placeholder) -6. **Create Visualizations**: Generate Mermaid diagrams where conditions are met (see Section Generation Protocol) -7. **Apply Audience Transform**: Reorder sections and apply content transforms per Audience Transform Protocol -8. **Write Report**: Produce complete markdown document -9. **Format Outputs**: Generate requested formats (HTML if `--format html` or `--format both`) -10. **Save Files**: Write to reports directory -11. **Run Documentation Review** (if plugin available): Execute `/documentation-review:doc-review` on reports directory -12. **Fix Issues** (if plugin available): All markdown must pass review before completing -13. **Run Human Voice Review** (if plugin available): Execute `/human-voice:voice-review` on each report file with emoji preservation instruction -14. **Fix Voice Issues** (if plugin available): Rewrite flagged sections for natural, human-sounding language while preserving emojis -15. **Post-Report Codex Review Gate (BLOCKING):** +3. **Organize Content**: Map findings to report sections using Section → Data Mapping +4. **Generate Sections**: Execute Section Iterator for each section (generate content or placeholder) +5. **Create Visualizations**: Generate Mermaid diagrams where conditions are met (see Section Generation Protocol) +6. **Apply Audience Transform**: Reorder sections and apply content transforms per Audience Transform Protocol +7. **Write Report**: Produce complete markdown document +8. **Format Outputs**: Generate requested formats (HTML if `--format html` or `--format both`) +9. **Save Files**: Write to reports directory +10. **Run Documentation Review** (if plugin available): Execute `/documentation-review:doc-review` on reports directory +11. **Fix Issues** (if plugin available): All markdown must pass review before completing +12. **Run Human Voice Review** (if plugin available): Execute `/human-voice:voice-review` on each report file with emoji preservation instruction +13. **Fix Voice Issues** (if plugin available): Rewrite flagged sections for natural, human-sounding language while preserving emojis +14. **Post-Report Codex Review Gate (BLOCKING):** Self-review the report against the findings data before delivering: - **Step 15a: Load findings for cross-reference** + **Step 14a: Load findings for cross-reference** Read `./reports/{topic_slug}/state.json` to get the authoritative findings array. - **Step 15b: Verify claim traceability** + **Step 14b: Verify claim traceability** For each factual assertion in the report: - Check: does it trace to a specific finding ID in state.json? - Check: does the finding have provenance (sources with URLs)? - Flag untraced claims - **Step 15c: Verify no hallucinated statistics** + **Step 14c: Verify no hallucinated statistics** For each number/statistic in the report: - Check: does it appear in a finding's summary, evidence, or provenance snippet? - Flag numbers not traceable to findings data - **Step 15d: Check balanced representation** + **Step 14d: Check balanced representation** - Compare section coverage against `elicitation.priorities` ranking - Flag if any priority dimension is missing or under-represented - **Step 15e: Remediate or warn** + **Step 14e: Remediate or warn** - If flagged issues found: revise the report to fix traceable issues (max 1 revision pass) - If issues remain after revision: append a "Provenance Warnings" section listing unresolved claims - If no issues: proceed **Fallback:** If spawned with a `team_name` and a team lead is available, send flagged issues via SendMessage for awareness. Do not wait for a response — the self-review is authoritative. -16. **Capture Summary**: `capture_memory(namespace="_semantic/knowledge", tags=["sigint-research", "report"], title="Report generated: {topic}", ...)` then `enrich_memory(id)` -17. **Signal Completion** (required when spawned as a swarm teammate with `team_name`): +15. **Signal Completion** (required when spawned as a swarm teammate with `team_name`): ``` TaskUpdate(taskId, status: "completed") SendMessage( diff --git a/agents/research-orchestrator.md b/agents/research-orchestrator.md index 4b2a5de..6ded17c 100644 --- a/agents/research-orchestrator.md +++ b/agents/research-orchestrator.md @@ -23,22 +23,13 @@ tools: - TeamCreate - TeamDelete - Write - - mcp__atlatl__blackboard_ack_alert - - mcp__atlatl__blackboard_alert - - mcp__atlatl__blackboard_create - - mcp__atlatl__blackboard_pending_alerts - - mcp__atlatl__blackboard_read - - mcp__atlatl__blackboard_write - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories --- # Research Orchestrator Agent You are the orchestrator for sigint research sessions. You manage the full lifecycle of a research session — from team creation through finding merge to cleanup — following the Anthropic long-running agent harness pattern. -**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. Blackboard MCP calls are exempt. `Read` is acceptable for comprehension-only reads. +**Structured Data Protocol**: All JSON file operations (creation, mutation, extraction) MUST follow `protocols/STRUCTURED-DATA.md`. Use `jq` via Bash for all JSON file I/O. **Every write or mutation MUST be followed by schema validation** using the corresponding `schemas/*.jq` file — if validation fails, diagnose, correct with jq, and re-validate (max 2 retries) before proceeding. See the Retry-and-Correct protocol in `protocols/STRUCTURED-DATA.md`. `Read` is acceptable for comprehension-only reads. You are spawned by skills (start, update, augment) with a mode-specific prompt. Your responsibilities: @@ -61,7 +52,7 @@ You receive one of these modes in your spawn prompt: --- -## Phase 0: Initialize Team and Blackboard +## Phase 0: Initialize Team and Directory ### Step 0.1: Create Team @@ -70,25 +61,12 @@ TeamCreate(team_name: "sigint-{topic_slug}-research") ``` If TeamCreate fails, retry once. If it fails again, report the error and stop. -### Step 0.2: Create Research Directory and Blackboard +### Step 0.2: Create Research Directory ```bash mkdir -p ./reports/{topic_slug} ``` -``` -blackboard_create(scope="{topic_slug}", ttl=86400) -``` -Store as `blackboard_scope = "{topic_slug}"`. - -**File-first default:** For EVERY data write in this agent, write to file FIRST with schema validation, THEN write to blackboard. File writes are mandatory and gating; blackboard writes are optional coordination aids with 24h TTL. - -> **Blackboard failure fallback:** If `blackboard_create` fails (Atlatl MCP unavailable), set `blackboard_scope = null`. All data is already persisted via file writes — blackboard ops are simply skipped. - -**Blackboard null-guard (standing instruction):** Before every `blackboard_write(...)` or `blackboard_read(...)` call in this agent: -- If `blackboard_scope` is null: skip the blackboard op (file is already written/readable) -- If `blackboard_write` fails at runtime: log warning to `research-progress.md` (file is already written) - ### Step 0.3: Create Phase Tasks ``` @@ -151,7 +129,7 @@ After elicitation: jq -e -f schemas/state.jq "./reports/$TOPIC_SLUG/state.json" > /dev/null ``` -2. Write elicitation — file first, then blackboard: +2. Write elicitation to file: **File write (mandatory):** ```bash @@ -159,14 +137,7 @@ After elicitation: jq -e -f schemas/elicitation.jq "./reports/$TOPIC_SLUG/elicitation.json" > /dev/null ``` - **Blackboard write (optional, for live coordination):** - ``` - blackboard_write(scope="{topic_slug}", key="elicitation", value={elicitation}) - ``` - -3. Capture to Atlatl memory. - -4. Update progress file: +3. Update progress file: ```markdown ## {ISO_DATE} — Elicitation Complete - Decision context: {brief} @@ -239,14 +210,10 @@ jq -e -f schemas/state.jq "./reports/$TOPIC_SLUG/state.json" > /dev/null ``` Where `$SELECTED_DIMS_JSON` is `["competitive", "sizing", ...]` (string array) and `$CUSTOM_DIMS_JSON` is `["custom_dim_name", ...]` (string array of non-standard dimension names, empty `[]` if none). -Also update `elicitation.json` and blackboard: +Also update `elicitation.json`: ```bash jq '.elicitation' "./reports/$TOPIC_SLUG/state.json" > "./reports/$TOPIC_SLUG/elicitation.json" ``` -``` -blackboard_write(scope="{topic_slug}", key="elicitation", value={updated elicitation}) -``` - Update progress file: ```markdown ## {ISO_DATE} — Dimension Selection Complete @@ -278,11 +245,9 @@ Agent( run_in_background=true, prompt="[TASK DISCOVERY PROTOCOL] You are a dimension-analyst for {dimension} research on '{topic}'. - BLACKBOARD: {topic_slug} TOPIC_SLUG: {topic_slug} REPORTS_DIR: ./reports/{topic_slug} Skill to load: skills/{skill-directory}/SKILL.md - Your blackboard key: findings_{dimension} Your task ID: #{taskId} CRITICAL: Use REPORTS_DIR exactly as provided for ALL file writes. @@ -315,22 +280,22 @@ For each dimension: ### Dimension-to-Skill Mapping -| Dimension | Skill Directory | Blackboard Key | -|-----------|----------------|----------------| -| competitive | competitive-analysis | `findings_competitive` | -| sizing | market-sizing | `findings_sizing` | -| trends | trend-analysis | `findings_trends` | -| customer | customer-research | `findings_customer` | -| tech | tech-assessment | `findings_tech` | -| financial | financial-analysis | `findings_financial` | -| regulatory | regulatory-review | `findings_regulatory` | -| trend_modeling | trend-modeling | `findings_trend_modeling` | +| Dimension | Skill Directory | Findings File | +|-----------|----------------|---------------| +| competitive | competitive-analysis | `findings_competitive.json` | +| sizing | market-sizing | `findings_sizing.json` | +| trends | trend-analysis | `findings_trends.json` | +| customer | customer-research | `findings_customer.json` | +| tech | tech-assessment | `findings_tech.json` | +| financial | financial-analysis | `findings_financial.json` | +| regulatory | regulatory-review | `findings_regulatory.json` | +| trend_modeling | trend-modeling | `findings_trend_modeling.json` | --- ## Phase 2.5: Methodology Verification Gate -For each analyst, check `./reports/{topic_slug}/methodology_plan_{dimension}.json` exists. If not found, fall back to `blackboard_read(scope="{topic_slug}", key="methodology_plan_{dimension}")` and **write recovered data to file immediately**. If not present after 3 checks (5 seconds apart), proceed without it and log a warning. +For each analyst, check `./reports/{topic_slug}/methodology_plan_{dimension}.json` exists. If not present after 3 checks (5 seconds apart), proceed without it and log a warning. Surface methodology table to user. If any analyst misses the window, log warning but do not block. @@ -386,8 +351,7 @@ fi **Recovery rules (fail-closed):** - **Single candidate**: Relocate and log warning. The file will be reviewed in Phase 2.75. - **Multiple candidates**: Refuse relocation. Log all candidate paths. Exclude dimension from merge. Alert user. -- **No candidates, blackboard has data**: Read from `blackboard_read(scope="{topic_slug}", key="findings_{dim}")` and **write recovered data to file** using jq + schema validation. Log as "recovered from blackboard". -- **No candidates, no blackboard data**: Log as missing. Exclude dimension from merge. +- **No candidates**: Log as missing. Exclude dimension from merge. - **Never relocate from a directory belonging to a different active topic** (check `sigint.config.json` topics to verify ownership). Update progress file: @@ -408,7 +372,7 @@ Update progress file: ### Step 2.75.1: For Each Completed Dimension -1. Read `findings_{dimension}` from blackboard (and `./reports/{topic_slug}/findings_{dimension}.json`). +1. Read `./reports/{topic_slug}/findings_{dimension}.json`. 2. Spawn codex review agent: ``` @@ -501,7 +465,6 @@ WHILE gate == "fail" due to methodology gaps AND methodology_retry_count < 2: name="dimension-analyst-{dimension}-retry{methodology_retry_count}", prompt="Gap-fill retry #{methodology_retry_count} for {dimension} analysis on '{topic}'. - BLACKBOARD: {topic_slug} TOPIC_SLUG: {topic_slug} REPORTS_DIR: ./reports/{topic_slug} Skill to load: skills/{skill-directory}/SKILL.md @@ -550,11 +513,11 @@ Wait for all dimension-analysts to complete (or timeout after `dimensionTimeout` ### Step 3.1: Read All Findings -For each dimension, read from `./reports/{topic_slug}/findings_{dimension}.json` (primary). If file is missing, fall back to `blackboard_read(scope="{topic_slug}", key="findings_{dimension}")` and **immediately write recovered data to file** with schema validation. +For each dimension, read from `./reports/{topic_slug}/findings_{dimension}.json`. If file is missing, log a warning and exclude that dimension from the merge. ### Step 3.2: Check Cross-Dimension Conflicts -Read `conflicts` key from blackboard. Surface any contradictions. +Check `./reports/{topic_slug}/conflicts.json` if it exists. Surface any contradictions. ### Step 3.3: Build Methodology Coverage Matrix @@ -644,18 +607,6 @@ echo "$MERGED_FINDINGS_JSON" | jq '.' > "./reports/$SLUG/merged_findings.json" jq -e -f schemas/merged-findings.jq "./reports/$SLUG/merged_findings.json" > /dev/null ``` -**Blackboard write (optional, for live coordination):** -``` -blackboard_write(scope="{topic_slug}", key="merged_findings", value={...}) -``` - -### Step 3.6: Capture to Atlatl - -``` -capture_memory(title="Research complete: {topic}", ...) -enrich_memory(id) -``` - Update progress file: ```markdown ## {ISO_DATE} — Findings Merged @@ -749,7 +700,7 @@ Update progress file. ### Step 4.1: Update Topic in Config -Update the topic entry in `sigint.config.json` with completion status, findings count, and optional Atlatl memory ID in a **single atomic jq call** (per Structured Data Protocol): +Update the topic entry in `sigint.config.json` with completion status and findings count in a **single atomic jq call** (per Structured Data Protocol): ```bash FINDING_COUNT=$(jq '.findings | length' "./reports/$SLUG/state.json") DIMENSIONS_JSON=$(jq -c '[.findings[].dimension // empty] | unique' "./reports/$SLUG/state.json") @@ -757,17 +708,15 @@ jq --arg slug "$SLUG" \ --arg date "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ --argjson count "$FINDING_COUNT" \ --argjson dims "$DIMENSIONS_JSON" \ - --arg mid "${ATLATL_MEMORY_ID:-}" \ '.topics[$slug].status = "complete" | .topics[$slug].updated = $date | .topics[$slug].findings_count = $count | - .topics[$slug].dimensions = $dims | - if $mid != "" then .topics[$slug].atlatl_memory_id = $mid else . end' \ + .topics[$slug].dimensions = $dims' \ ./sigint.config.json > tmp.$$ && mv tmp.$$ ./sigint.config.json jq -e -f schemas/sigint-config.jq ./sigint.config.json > /dev/null ``` -The `atlatl_memory_id` is set only if a memory was captured for this session (non-empty `$ATLATL_MEMORY_ID`). All fields are written atomically to avoid race conditions with concurrent config writes. +All fields are written atomically to avoid race conditions with concurrent config writes. ### Step 4.2: Shutdown Team @@ -923,19 +872,6 @@ Dimension-analysts populate `provenance` during research. Codex review gates ver --- -## Blackboard Key Inventory - -| Key | Written By | Read By | Primary File (mandatory) | -|-----|-----------|---------|--------------------------| -| `elicitation` | orchestrator | all analysts | `elicitation.json` | -| `team_status` | orchestrator | `/sigint:status` | `team_status.json` | -| `methodology_plan_{dim}` | each analyst | orchestrator | `methodology_plan_{dim}.json` | -| `findings_{dim}` | each analyst | orchestrator, other analysts | `findings_{dim}.json` | -| `conflicts` | analysts | orchestrator | `conflicts.json` | -| `merged_findings` | orchestrator | report-synthesizer | `merged_findings.json` | - -All file paths are relative to `./reports/{topic_slug}/`. - --- ## Source-Chunker Coordination diff --git a/commands/augment.md b/commands/augment.md index b98d700..91b9271 100644 --- a/commands/augment.md +++ b/commands/augment.md @@ -2,7 +2,7 @@ description: Deep-dive into a specific area of current research version: 0.2.0 argument-hint: [--methodology ] -allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, mcp__atlatl__capture_memory, mcp__atlatl__recall_memories, mcp__atlatl__enrich_memory, mcp__atlatl__blackboard_create, mcp__atlatl__blackboard_write, mcp__atlatl__blackboard_read, mcp__atlatl__blackboard_alert, mcp__atlatl__blackboard_pending_alerts, mcp__atlatl__blackboard_ack_alert, mcp__claude_ai_Mermaid_Chart__validate_and_render_mermaid_diagram, mcp__claude_ai_Mermaid_Chart__get_mermaid_syntax_document +allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, mcp__claude_ai_Mermaid_Chart__validate_and_render_mermaid_diagram, mcp__claude_ai_Mermaid_Chart__get_mermaid_syntax_document --- Load and execute the sigint:augment skill. diff --git a/commands/init.md b/commands/init.md index 9595bb6..f1eefba 100644 --- a/commands/init.md +++ b/commands/init.md @@ -1,42 +1,26 @@ --- -description: Manually initialize or reload Atlatl memory context for sigint +description: Manually initialize or reload research context for sigint version: 0.1.0 argument-hint: [--full] [--topic ] -allowed-tools: Bash, Glob, Grep, Read, Write, mcp__atlatl__recall_memories +allowed-tools: Bash, Glob, Grep, Read, Write --- -Manually initialize the Atlatl memory context for sigint research. +Manually initialize the research context for sigint. **Arguments:** -- `--full` - Load all sigint-related memories (not just current session) -- `--topic` - Load memories for a specific topic +- `--full` - Load all sigint-related context (not just current session) +- `--topic` - Load context for a specific topic **Process:** -1. **Search Atlatl memories:** - Search for existing sigint memories using `recall_memories(query="sigint research", tags=["sigint-research"])`. - Atlatl namespace mapping: - - `_semantic/knowledge` with tag `sigint-research` - Research session state and findings - - `_procedural/patterns` with tag `sigint-methodology` - Learned methodologies and approaches - - `_semantic/knowledge` with tag `sigint-sources` - Trusted sources and data quality notes - - `_procedural/patterns` with tag `sigint-patterns` - Recognized market patterns +1. **If `--topic` specified:** + Load only context related to that topic from `./reports/[topic_slug]/`. -2. **Load relevant memories:** - Use `recall_memories` to retrieve: - - Active research sessions - - Recent findings and insights - - Methodology preferences - - Source reliability notes - -3. **If `--topic` specified:** - Load only memories related to that topic. - Include related topics if semantically similar. - -4. **If `--full` specified:** - Load comprehensive sigint memory context. +2. **If `--full` specified:** + Load comprehensive sigint context from all topics. May use more context window but provides full history. -5. **Load configuration (Config Resolution Protocol):** +3. **Load configuration (Config Resolution Protocol):** Apply the **Config Resolution Protocol**: 1. Read `protocols/CONFIG-RESOLUTION.md` and follow all steps. @@ -49,8 +33,7 @@ Manually initialize the Atlatl memory context for sigint research. version: "2.0", defaults: { report_format: "markdown", - audiences: ["technical"], - auto_atlatl: true + audiences: ["technical"] }, research: { maxDimensions: 5, @@ -70,35 +53,30 @@ Manually initialize the Atlatl memory context for sigint research. ``` Continue initialization regardless. -6. **Display loaded context:** +4. **Display loaded context:** ``` - Atlatl Context Loaded - ───────────────────── + Research Context Loaded + ─────────────────────── Research Sessions: [count] Active Session: [topic or "none"] - Methodology Notes: [count] - Source Notes: [count] - Patterns: [count] Configuration (sigint.config.json v2.0): - Default Repo: [config.default_repo or "not set"] - Report Format: [config.report_format] - Audiences: [config.audiences] - - Auto Atlatl: [config.auto_atlatl] - Topics configured: [count of keys in project_config.topics, or 0] ``` -7. **Suggest next action:** +5. **Suggest next action:** - If active session: suggest `/sigint:status` - If no session: suggest `/sigint:start ` - If stale session: suggest `/sigint:update` **Note:** This command supplements the SessionStart hook which provides basic awareness. -Use this command to manually reload full Atlatl context mid-session or after configuration changes. +Use this command to manually reload full research context mid-session or after configuration changes. **Output:** - Confirmation of context loaded -- Summary of available memories - Configuration status - Suggested next action diff --git a/commands/issues.md b/commands/issues.md index 6a1415b..9a0ddbb 100644 --- a/commands/issues.md +++ b/commands/issues.md @@ -2,7 +2,7 @@ description: Create GitHub issues from research findings as atomic deliverables version: 0.2.0 argument-hint: [--repo ] [--dry-run] [--labels ] -allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, ToolSearch, mcp__atlatl__capture_memory, mcp__atlatl__recall_memories, mcp__atlatl__enrich_memory, mcp__atlatl__blackboard_create, mcp__atlatl__blackboard_write, mcp__atlatl__blackboard_read, mcp__github__issue_write, mcp__github__issue_read +allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, ToolSearch, mcp__github__issue_write, mcp__github__issue_read --- Load and execute the `sigint:issues` skill with these arguments: $ARGUMENTS diff --git a/commands/report.md b/commands/report.md index 1376656..289fb08 100644 --- a/commands/report.md +++ b/commands/report.md @@ -2,7 +2,7 @@ description: Generate comprehensive research report in multiple formats version: 0.2.0 argument-hint: "[--format ] [--audience ] [--sections ]" -allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, mcp__atlatl__capture_memory, mcp__atlatl__recall_memories, mcp__atlatl__enrich_memory, mcp__atlatl__blackboard_create, mcp__atlatl__blackboard_write, mcp__atlatl__blackboard_read, mcp__claude_ai_Mermaid_Chart__validate_and_render_mermaid_diagram +allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, mcp__claude_ai_Mermaid_Chart__validate_and_render_mermaid_diagram --- Load and execute the sigint:report skill. diff --git a/commands/resume.md b/commands/resume.md index d8e235b..7670c71 100644 --- a/commands/resume.md +++ b/commands/resume.md @@ -1,8 +1,8 @@ --- -description: Resume a previous research session from progress file and Atlatl +description: Resume a previous research session from progress file version: 0.5.0 argument-hint: "[] [--list]" -allowed-tools: AskUserQuestion, Glob, Grep, Read, Write, mcp__atlatl__inject_context, mcp__atlatl__recall_memories +allowed-tools: AskUserQuestion, Glob, Grep, Read, Write --- Resume a previous sigint research session following the harness initialization protocol. @@ -22,17 +22,15 @@ The resume command follows the Anthropic long-running agent harness pattern: rea Fall back to scanning `./reports/*/state.json` for sessions not registered in the config (legacy sessions predating topic registration). - Recall Atlatl memories: `recall_memories(query="sigint research sessions", tags=["sigint-research"])` Display table: ``` - | Topic | Status | Last Updated | Dimensions | Findings | Atlatl Memory | - |-------|--------|--------------|------------|----------|---------------| + | Topic | Status | Last Updated | Dimensions | Findings | + |-------|--------|--------------|------------|----------| ``` 2. **If topic specified:** Load `./reports/[topic]/research-progress.md` **FIRST** (harness init protocol). Then load `./reports/[topic]/state.json` for structured data. - Recall related Atlatl memories: `recall_memories(query="sigint {topic}", tags=["sigint-research"])` 3. **If no topic specified:** Check for single active session. diff --git a/commands/start.md b/commands/start.md index 8ec86ec..c3e179a 100644 --- a/commands/start.md +++ b/commands/start.md @@ -2,7 +2,7 @@ description: Begin a new market research session with comprehensive scoping version: 0.2.0 argument-hint: "[]" -allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, mcp__atlatl__capture_memory, mcp__atlatl__recall_memories, mcp__atlatl__enrich_memory, mcp__atlatl__blackboard_create, mcp__atlatl__blackboard_write, mcp__atlatl__blackboard_read, mcp__atlatl__blackboard_alert, mcp__atlatl__blackboard_pending_alerts, mcp__atlatl__blackboard_ack_alert +allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion --- Load and execute the sigint:start skill. diff --git a/commands/status.md b/commands/status.md index a5ab8cb..472a453 100644 --- a/commands/status.md +++ b/commands/status.md @@ -2,7 +2,7 @@ description: Show current research session state and progress version: 0.1.0 argument-hint: [--verbose] -allowed-tools: Glob, Grep, Read, mcp__atlatl__blackboard_read +allowed-tools: Glob, Grep, Read --- Display the current sigint research session status and progress. @@ -25,13 +25,6 @@ Display the current sigint research session status and progress. - Sources count - Last activity -2b. **Check blackboard for live progress:** - If a research session is active, check blackboard for real-time team status: - ``` - blackboard_read(scope="{topic_slug}", key="team_status") - ``` - If blackboard data exists, show live analyst progress in the dashboard. - 3. **Calculate progress metrics:** - Research completeness (based on methodology coverage) - Data freshness (age of newest finding) diff --git a/commands/update.md b/commands/update.md index 00e8ff2..a305354 100644 --- a/commands/update.md +++ b/commands/update.md @@ -2,7 +2,7 @@ description: Refresh data and findings for existing research using swarm orchestration version: 0.5.0 argument-hint: "[--topic ] [--area ] [--since ] [--no-delta] [--dimensions ]" -allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, mcp__atlatl__capture_memory, mcp__atlatl__recall_memories, mcp__atlatl__enrich_memory, mcp__atlatl__blackboard_create, mcp__atlatl__blackboard_write, mcp__atlatl__blackboard_read, mcp__atlatl__blackboard_alert, mcp__atlatl__blackboard_pending_alerts, mcp__atlatl__blackboard_ack_alert +allowed-tools: Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion --- Load and execute the sigint:update skill. diff --git a/docs/README.md b/docs/README.md index bd33bbd..9b0a6e9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -39,5 +39,5 @@ Information-oriented descriptions for lookup. Understanding-oriented discussion of concepts and design. -- [Architecture](explanation/architecture.md) — Swarm orchestration, blackboard, and memory systems +- [Architecture](explanation/architecture.md) — Swarm orchestration and coordination systems - [Research Methodology](explanation/methodology.md) — Three-valued logic and analytical frameworks diff --git a/docs/explanation/architecture.md b/docs/explanation/architecture.md index 790bb2b..96922a5 100644 --- a/docs/explanation/architecture.md +++ b/docs/explanation/architecture.md @@ -1,12 +1,12 @@ --- diataxis_type: explanation title: Architecture -description: How sigint's swarm orchestration, blackboard coordination, and memory systems work +description: How sigint's swarm orchestration and coordination systems work --- # Architecture -Sigint uses a swarm-orchestrated architecture where a research-orchestrator coordinates parallel dimension-analysts via an ephemeral blackboard, with long-term knowledge persisted through Atlatl memory. +Sigint uses a swarm-orchestrated architecture where a research-orchestrator coordinates parallel dimension-analysts via the task system and file-based state. ## Agent hierarchy @@ -93,43 +93,26 @@ The protocol mandates: 1. **Write-then-validate**: Every JSON write is immediately followed by schema validation using the corresponding `schemas/*.jq` file 2. **Retry-and-correct**: If validation fails, diagnose, fix with jq, and re-validate (max 2 retries). Invalid data never proceeds through the pipeline. -3. **Dual-write**: Blackboard MCP + file persistence for all findings. Files are the durable store; blackboard is the coordination layer with 24h TTL. +3. **File persistence**: All findings written to validated JSON files. Files are the sole durable store. 4. **Safe interpolation**: Shell variables enter jq via `--arg`/`--argjson`, never via string interpolation. Twelve schema validators exist in `schemas/`: state, findings, elicitation, methodology-plan, reflection, quarantine, merged-findings, report-metadata, issues, team-status, conflicts, and sigint-config. -## Blackboard coordination +## File-based coordination -The blackboard is an ephemeral key-value store (TTL 24h) created per research session. It serves as the shared scratchpad between the orchestrator and its analysts. +All inter-agent coordination uses file-based state in the `reports/{topic_slug}/` directory. Each research session produces structured JSON files that serve as the shared state between the orchestrator and its analysts. -### Blackboard schema +### State files -| Key | Written by | Read by | Content | -|-----|-----------|---------|---------| -| `elicitation` | orchestrator | all analysts | Full elicitation context from state.json | -| `team_status` | orchestrator | status command | `{analysts: {dimension: status, ...}}` | -| `methodology_plan_{dim}` | dimension-analyst | orchestrator | Planned frameworks per dimension | -| `findings_{dimension}` | dimension-analyst | orchestrator, report-synthesizer | Structured findings with sources and gaps | -| `conflicts` | dimension-analyst | orchestrator | Cross-dimension contradictions | -| `merged_findings` | orchestrator | report-synthesizer | Consolidated post-merge findings | +| File | Written by | Read by | Content | +|------|-----------|---------|---------| +| `state.json` | orchestrator | all agents | Full session state including elicitation | +| `methodology_plan_{dim}.json` | dimension-analyst | orchestrator | Planned frameworks per dimension | +| `findings_{dimension}.json` | dimension-analyst | orchestrator, report-synthesizer | Structured findings with sources and gaps | +| `findings_{dim}_conflicts.json` | dimension-analyst | orchestrator | Cross-dimension contradictions | +| `merged_findings.json` | orchestrator | report-synthesizer | Consolidated post-merge findings | -### Alert channels - -| Channel | Publisher | Subscriber | Trigger | -|---------|----------|------------|---------| -| `finding_discovered` | analyst | orchestrator | Significant finding (>20% share shift, etc.) | -| `conflict_detected` | analyst | orchestrator | Cross-dimension contradiction | -| `phase_complete` | analyst | orchestrator | Dimension research finished | -| `source_shared` | analyst | other analysts | High-value source useful across dimensions | - -## Blackboard vs Atlatl memory - -These serve different purposes and should not be confused: - -- **Blackboard** = ephemeral session coordination. Inter-agent scratchpad. Dies after 24 hours. Used during active research for team communication. -- **Atlatl memory** = permanent cross-session knowledge. Key findings, decisions, methodology patterns. Survives across sessions. Used for recall when starting related research. - -**Rule:** Every finding written to the blackboard should also be captured to Atlatl memory if it has cross-session value. +Agents signal completion via TaskUpdate and SendMessage. ## Source chunking (RLM) @@ -146,13 +129,13 @@ This enables analysis of documents that would otherwise exceed a single agent's ## Research flow 1. `/sigint:start` conducts elicitation, creates state.json -2. research-orchestrator creates blackboard, writes elicitation +2. research-orchestrator creates session directory, writes elicitation to state.json 3. Parallel dimension-analysts read elicitation, load skill methodologies, conduct web research -4. Each analyst writes findings to blackboard + file (dual-write), alerts orchestrator on completion +4. Each analyst writes findings to validated JSON files, signals orchestrator on completion 5. Post-findings codex review gate runs per dimension; failures quarantined 6. Orchestrator merges all findings into state.json (with delta detection in update mode) 7. Post-merge codex review gate checks cross-dimension consistency -8. `/sigint:report` spawns report-synthesizer, which reads state.json + blackboard findings +8. `/sigint:report` spawns report-synthesizer, which reads state.json + dimension findings files 9. Post-report codex review gate verifies claim traceability 10. `/sigint:issues` spawns issue-architect to atomize findings into GitHub issues 11. Post-issues codex review gate verifies issue-finding linkage diff --git a/docs/how-to/configure-plugin.md b/docs/how-to/configure-plugin.md index 8fd7e69..f7a3219 100644 --- a/docs/how-to/configure-plugin.md +++ b/docs/how-to/configure-plugin.md @@ -30,8 +30,7 @@ Create `sigint.config.json` in your project root: "defaults": { "default_repo": "myorg/myrepo", "report_format": "markdown", - "audiences": ["developers", "product-managers"], - "auto_atlatl": true + "audiences": ["developers", "product-managers"] }, "research": { "maxDimensions": 5, @@ -51,8 +50,7 @@ Create `~/.claude/sigint.config.json` for user-wide defaults: "version": "2.0", "defaults": { "report_format": "markdown", - "audiences": ["executives"], - "auto_atlatl": true + "audiences": ["executives"] }, "research": { "maxDimensions": 5, @@ -73,8 +71,7 @@ Override defaults for specific research topics in the `topics` block: "defaults": { "default_repo": "myorg/myrepo", "report_format": "markdown", - "audiences": ["technical"], - "auto_atlatl": true + "audiences": ["technical"] }, "research": { "maxDimensions": 5, @@ -109,7 +106,6 @@ When you run `/sigint:start`, the topic is automatically registered in `sigint.c "updated": "2026-04-03T09:15:00Z", "reports_dir": "./reports/ag-grants-research", "findings_count": 42, - "atlatl_memory_id": "mem_abc123", "context_file": "./reports/ag-grants-research/CONTEXT.md" } } @@ -123,7 +119,6 @@ When you run `/sigint:start`, the topic is automatically registered in `sigint.c | `updated` | all lifecycle commands | Last activity timestamp | | `reports_dir` | start | Path to reports directory | | `findings_count` | orchestrator/augment | Total active findings | -| `atlatl_memory_id` | orchestrator | Atlatl memory ID linking session findings | | `context_file` | migrate/manual | Optional CONTEXT.md path | These fields enable `/sigint:status` and `/sigint:resume --list` to discover sessions from the config without globbing report directories. @@ -159,7 +154,6 @@ The context file is loaded by `/sigint:start` and passed to the research orchest | `default_repo` | `string or null` | `null` | GitHub repo for issue creation (`owner/repo`) | | `report_format` | `markdown, html, or both` | `"markdown"` | Report output format | | `audiences` | `string[]` | `["technical"]` | Default report audiences | -| `auto_atlatl` | `boolean` | `true` | Auto-persist findings to Atlatl memory | ### Research block @@ -175,7 +169,7 @@ The context file is loaded by `/sigint:start` and passed to the research orchest /sigint:init ``` -Creates `sigint.config.json` with default template if it does not exist, and loads Atlatl memory context. +Creates `sigint.config.json` with default template if it does not exist. ## Migrate from legacy configuration diff --git a/docs/how-to/deploy-to-cowork.md b/docs/how-to/deploy-to-cowork.md index 59bdf11..fd2c0fe 100644 --- a/docs/how-to/deploy-to-cowork.md +++ b/docs/how-to/deploy-to-cowork.md @@ -49,18 +49,6 @@ To connect GitHub in Cowork: 2. Enable the **GitHub** connector 3. Authorize access to your target repositories -### Atlatl MCP (Optional) - -Sigint uses Atlatl MCP for blackboard coordination and persistent memory. If Atlatl is not available in your Cowork environment: - -- **Blackboard coordination** falls back to file-based state in `./reports/{topic-slug}/` (per-key files) -- **Persistent memory** falls back to `./reports/{topic-slug}/state.json` per research session -- **All core research workflows** continue to function without Atlatl - -To add Atlatl in Cowork (if available): -1. Go to **Customize > Connectors** -2. Enable the **Atlatl** connector - ### File System Cowork provides a sandboxed filesystem. Sigint writes all output to `./reports/` within the active project directory. This works normally in Cowork's sandbox. @@ -78,10 +66,6 @@ Usage is identical to Claude Code. All commands work the same way: ## Troubleshooting -### "Blackboard tools not available" - -Atlatl MCP is not connected. Sigint will automatically fall back to file-based coordination. Research workflows are unaffected. - ### "Cannot create GitHub issues" Ensure the GitHub connector is enabled in **Customize > Connectors** and you have authorized access to the target repository. diff --git a/docs/how-to/migrate-config.md b/docs/how-to/migrate-config.md index 5bd48bc..9e9317d 100644 --- a/docs/how-to/migrate-config.md +++ b/docs/how-to/migrate-config.md @@ -40,7 +40,7 @@ This displays: The migrate skill performs these steps: 1. **Detects legacy config files** -- scans for `.claude/sigint.local.md` (project and global) and `.sigint.config.json` v1.0 -2. **Parses legacy settings** -- extracts `default_repo`, `report_format`, `audiences`, `auto_atlatl` from YAML frontmatter, and `maxDimensions`, `dimensionTimeout`, `defaultPriorities` from v1.0 JSON +2. **Parses legacy settings** -- extracts `default_repo`, `report_format`, `audiences` from YAML frontmatter, and `maxDimensions`, `dimensionTimeout`, `defaultPriorities` from v1.0 JSON 3. **Discovers existing topics** -- scans `./reports/*/state.json` for previously researched topics 4. **Builds v2.0 config** -- assembles `defaults`, `research`, and `topics` blocks from parsed values (with hardcoded defaults for missing fields) 5. **Creates CONTEXT.md files** -- generates a context file for each discovered topic at `./reports/{slug}/CONTEXT.md` (skips if already exists) diff --git a/docs/how-to/research-workflow.md b/docs/how-to/research-workflow.md index e2cae5c..8517ba6 100644 --- a/docs/how-to/research-workflow.md +++ b/docs/how-to/research-workflow.md @@ -69,7 +69,7 @@ The issue-architect atomizes findings into sprint-sized issues with acceptance c /sigint:resume --list ``` -Restores research state from files and Atlatl memory. +Restores research state from files. ## Check session status diff --git a/docs/how-to/troubleshooting.md b/docs/how-to/troubleshooting.md index 14e152f..07f2cd3 100644 --- a/docs/how-to/troubleshooting.md +++ b/docs/how-to/troubleshooting.md @@ -157,55 +157,20 @@ Or specify at runtime: /sigint:issues --repo owner/repo ``` -## Atlatl Memory Issues - -### Memory Not Persisting - -**Symptom:** Research doesn't recall previous sessions - -**Causes:** -- Atlatl MCP server not running -- Different namespace or tags - -**Solutions:** - -1. Check Atlatl status: - ``` - Use system_status MCP tool - ``` - -2. Manually search: - ``` - recall_memories(query="sigint research", tags=["sigint-research"]) - ``` - -3. Re-initialize: - ``` - /sigint:init - ``` - -### Atlatl Not Available - -**Symptom:** Memory tools not found - -**Cause:** Atlatl MCP server not configured - -**Solution:** sigint works without Atlatl — memory just won't persist across sessions. Research still saves to `./reports/`. - -## Blackboard Issues +## Team Status Issues ### Team Status Not Updating **Symptom:** `/sigint:status` doesn't show analyst progress **Causes:** -- Blackboard expired (TTL is 24h) - Research orchestrator hasn't started yet +- Findings files not yet written by analysts **Solutions:** -1. Check if research is active — orchestrator creates blackboard on start -2. If blackboard expired, findings are still in state.json -3. Re-run research to create fresh blackboard +1. Check if research is active — orchestrator creates session directory on start +2. Findings are persisted in state.json and dimension findings files +3. Re-run research if session state is missing ### Analyst Not Completing diff --git a/docs/reference/agents.md b/docs/reference/agents.md index 2c15cb4..5b06204 100644 --- a/docs/reference/agents.md +++ b/docs/reference/agents.md @@ -23,8 +23,6 @@ Orchestrator agent for sigint research sessions. Owns all phase management: team **Tools:** Agent, AskUserQuestion, Bash, Glob, Grep, Read, SendMessage, TaskCreate, TaskGet, TaskList, TaskUpdate, TeamCreate, TeamDelete, Write -**Atlatl tools:** blackboard_ack_alert, blackboard_alert, blackboard_create, blackboard_pending_alerts, blackboard_read, blackboard_write, capture_memory, enrich_memory, recall_memories - **Modes**: `full` (start), `update` (update), `augment` (augment) **Key capabilities**: @@ -33,7 +31,7 @@ Orchestrator agent for sigint research sessions. Owns all phase management: team - Delta detection protocol for update mode (NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED/TREND_REVERSAL) - Progress file generation (`research-progress.md`) for cross-session continuity - Lineage tracking in `state.json` for full provenance chain -- Blackboard dual-write (blackboard + file) as default behavior +- File-based persistence as default behavior --- @@ -50,11 +48,9 @@ Focused research on a single market dimension, parameterized by skill. **Tools:** Bash, Glob, Grep, Read, SendMessage, Skill, TaskCreate, TaskGet, TaskList, TaskUpdate, WebFetch, WebSearch, Write -**Atlatl tools:** blackboard_alert, blackboard_read, blackboard_write, capture_memory, enrich_memory, recall_memories - **Dimension-to-skill mapping:** -| Dimension | Skill | Blackboard Key | +| Dimension | Skill | Findings File | |-----------|-------|---------------| | competitive | competitive-analysis | `findings_competitive` | | sizing | market-sizing | `findings_sizing` | @@ -106,8 +102,6 @@ Transforms research findings into executive-ready documents with visualizations. **Tools:** Bash, Glob, Grep, Read, SendMessage, Skill, TaskGet, TaskList, TaskUpdate, WebFetch, Write -**Atlatl tools:** blackboard_read, capture_memory, enrich_memory, recall_memories - **Report sections:** Executive Summary, Market Overview, Market Sizing (TAM/SAM/SOM), Competitive Landscape, Trend Analysis, SWOT Analysis, Recommendations, Risk Assessment, Appendix **Audience tailoring:** Executives (strategic), Product Managers (features/roadmap), Investors (opportunity/growth), Developers (technical feasibility) @@ -127,8 +121,6 @@ Converts research findings into sprint-sized GitHub issues. **Tools:** Bash, Glob, Grep, Read, SendMessage, TaskGet, TaskList, TaskUpdate, ToolSearch, Write -**Atlatl tools:** capture_memory, enrich_memory, recall_memories - **GitHub tools:** issue_read, issue_write **Issue categories:** Feature requests, Enhancements, Research tasks, Action items diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 14abc96..d10d5ca 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -16,7 +16,6 @@ Begin a new market research session with structured elicitation. |----------|-------| | **Arguments** | `[]` | | **Allowed tools** | Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion | -| **Atlatl tools** | capture_memory, recall_memories, enrich_memory, blackboard_create, blackboard_write, blackboard_read, blackboard_alert, blackboard_pending_alerts, blackboard_ack_alert | | **Delegates to** | research-orchestrator agent | **Behavior:** Conducts 8-section elicitation (decision context, audience, scope, competitive context, priorities, success criteria, constraints), synthesizes research brief, creates `./reports//state.json`, spawns research-orchestrator with prioritized dimensions. @@ -34,7 +33,6 @@ Deep-dive into a specific area of current research. |----------|-------| | **Arguments** | ` [--methodology ]` | | **Allowed tools** | Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion | -| **Atlatl tools** | capture_memory, recall_memories, enrich_memory, blackboard_create, blackboard_write, blackboard_read, blackboard_alert, blackboard_pending_alerts, blackboard_ack_alert | | **Delegates to** | dimension-analyst agent (via research-orchestrator in augment mode) | **Areas:** `competitive landscape`, `market sizing`, `trends`, `customer research`, `technology assessment`, `financial analysis`, `regulatory review`, `trend modeling` @@ -51,7 +49,6 @@ Refresh data and findings for existing research using swarm orchestration. |----------|-------| | **Arguments** | `[--topic ] [--area ] [--since ] [--no-delta] [--dimensions ]` | | **Allowed tools** | Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion | -| **Atlatl tools** | capture_memory, recall_memories, enrich_memory, blackboard_create, blackboard_write, blackboard_read, blackboard_alert, blackboard_pending_alerts, blackboard_ack_alert | | **Delegates to** | research-orchestrator agent (update mode) | **Behavior:** Loads prior state, spawns dimension-analysts for specified dimensions, runs delta detection protocol (classifying findings as NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED/TREND_REVERSAL), merges findings with reconciliation, generates delta report. @@ -66,7 +63,6 @@ Generate comprehensive research report. |----------|-------| | **Arguments** | `[--format ] [--audience ] [--sections ]` | | **Allowed tools** | Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion | -| **Atlatl tools** | capture_memory, recall_memories, enrich_memory, blackboard_create, blackboard_write, blackboard_read | | **Delegates to** | report-synthesizer agent | **Formats:** `markdown` (default), `html`, `both` @@ -85,7 +81,6 @@ Create GitHub issues from research findings. |----------|-------| | **Arguments** | `[--repo ] [--dry-run] [--labels ]` | | **Allowed tools** | Read, Write, Bash, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, ToolSearch | -| **Atlatl tools** | capture_memory, recall_memories, enrich_memory, blackboard_create, blackboard_write, blackboard_read | | **GitHub tools** | issue_write, issue_read | | **Delegates to** | issue-architect agent | @@ -103,9 +98,8 @@ Resume a previous research session. |----------|-------| | **Arguments** | `[] [--list]` | | **Allowed tools** | Read, Write, Grep, Glob, AskUserQuestion | -| **Atlatl tools** | inject_context, recall_memories | -**Behavior:** Follows the harness initialization protocol -- reads `research-progress.md` first to understand prior work state. Scans `./reports/*/state.json` for sessions, recalls Atlatl memories for context. With `--list`, displays table of all sessions. +**Behavior:** Follows the harness initialization protocol -- reads `research-progress.md` first to understand prior work state. Scans `./reports/*/state.json` for sessions. With `--list`, displays table of all sessions. --- @@ -117,23 +111,21 @@ Show current research session state and progress. |----------|-------| | **Arguments** | `[--verbose]` | | **Allowed tools** | Read, Grep, Glob | -| **Atlatl tools** | blackboard_read | -**Behavior:** Finds active session, loads state.json, checks blackboard for live team status, calculates progress metrics, displays dashboard with findings coverage and suggested next actions. +**Behavior:** Finds active session, loads state.json, calculates progress metrics, displays dashboard with findings coverage and suggested next actions. --- ## /sigint:init -Initialize or reload Atlatl memory context. +Initialize or reload plugin configuration. | Property | Value | |----------|-------| | **Arguments** | `[--full] [--topic ]` | | **Allowed tools** | Bash, Glob, Grep, Read, Write | -| **Atlatl tools** | recall_memories | -**Behavior:** Searches Atlatl memories with sigint-research tags, loads cascading configuration (project > global > defaults), creates project config if missing, displays context summary. +**Behavior:** Loads cascading configuration (project > global > defaults), creates project config if missing, displays context summary. ## See also diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 2e356c7..471fcf3 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -36,7 +36,6 @@ For any field and topic, values resolve via this cascade: | `default_repo` | `string or null` | `null` | GitHub repo for issue creation (owner/repo) | | `report_format` | `markdown, html, or both` | `"markdown"` | Report output format | | `audiences` | `string[]` | `["technical"]` | Default report audiences | -| `auto_atlatl` | `boolean` | `true` | Auto-persist findings to Atlatl memory | ### Research Runtime Fields (`research` block) @@ -63,8 +62,7 @@ All user preference fields above, plus: "defaults": { "default_repo": "owner/repo", "report_format": "markdown", - "audiences": ["technical"], - "auto_atlatl": true + "audiences": ["technical"] }, "research": { "maxDimensions": 5, diff --git a/docs/reference/protocols.md b/docs/reference/protocols.md index a09da7b..9a7222e 100644 --- a/docs/reference/protocols.md +++ b/docs/reference/protocols.md @@ -26,7 +26,6 @@ All `.json` file creation, mutation, and extraction must use `jq` via Bash. The | Mutate JSON file | `jq '.key = "val"' file.json > tmp.$$ && mv tmp.$$ file.json` | Always | | Extract fields | `jq '.field' file.json` | Always | | Comprehension-only read | `Read` tool | Understanding file structure, not transforming | -| Blackboard operations | MCP tools | Exempt from jq requirement (MCP handles serialization) | ### Variable interpolation @@ -87,12 +86,6 @@ If schema validation fails, the agent must not continue the pipeline. Instead: 3. **Re-validate**: Run the schema check again 4. **Retry limit**: Maximum 2 correction attempts. If validation still fails after 2 fixes, log the failure, write a `.invalid` sidecar, and report the error. Do not proceed with invalid data. -### Dual-write pattern - -Sigint uses dual-write as the default: blackboard MCP + file persistence. Blackboard writes are exempt from jq (MCP handles serialization). File writes must use jq and pass schema validation. - -If blackboard is unavailable, the file write is the sole persistence path. - --- ## Config Resolution Protocol diff --git a/docs/reference/skills.md b/docs/reference/skills.md index 881e242..c60e87f 100644 --- a/docs/reference/skills.md +++ b/docs/reference/skills.md @@ -55,7 +55,7 @@ Operational skills have a simpler structure (typically just `SKILL.md`) since th ## Orchestration hints Every methodology SKILL.md includes an `## Orchestration Hints` section that tells dimension-analysts: -- **Blackboard key** for writing findings +- **Findings file key** for writing findings - **Cross-reference dimensions** to validate against - **Alert triggers** for significant findings - **Confidence rules** for source requirements diff --git a/docs/tutorials/getting-started.md b/docs/tutorials/getting-started.md index ecb8e69..1966e3e 100644 --- a/docs/tutorials/getting-started.md +++ b/docs/tutorials/getting-started.md @@ -21,7 +21,6 @@ By the end of this tutorial, you will have: - Claude Code installed and running - `jq` installed (used for all JSON file operations) - (Optional) GitHub CLI (`gh`) for issue creation -- (Optional) Atlatl MCP server for cross-session memory ## Step 1: Install the plugin diff --git a/evals/agents/dimension-analyst/evals.json b/evals/agents/dimension-analyst/evals.json index 1ca091d..a5612c2 100644 --- a/evals/agents/dimension-analyst/evals.json +++ b/evals/agents/dimension-analyst/evals.json @@ -1,258 +1,4 @@ [ - { - "id": "analyst-competitive-happy-path", - "description": "Dimension analyst correctly loads the competitive-analysis skill and writes findings to file and blackboard", - "prompt": "You are a dimension-analyst for competitive research on 'AI code review tools'. Blackboard task_id: ai-code-review. Skill to load: skills/competitive-analysis/SKILL.md. Your blackboard key: findings_competitive. Analyze the competitive landscape.", - "expectations": [ - { - "description": "The competitive analysis skill is loaded", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["competitive-analysis", "competitive-analysis/SKILL.md"] - } - ] - }, - { - "description": "Findings are written to file (mandatory) and blackboard key", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_competitive" }, - { "type": "output_contains_any", "values": ["findings_competitive.json", "jq", "REPORTS_DIR"] } - ] - }, - { - "description": "Web search is used for current data gathering", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["WebSearch", "web search", "search"] - } - ] - }, - { - "description": "Deprecated v0.3 patterns are not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "TodoWrite" }, - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Follows competitive analysis methodology from the loaded skill including competitor profiles, matrix, and strategic positioning" - } - ] - }, - { - "id": "analyst-sizing-happy-path", - "description": "Dimension analyst loads market-sizing skill and produces quantified TAM/SAM/SOM estimates", - "prompt": "You are a dimension-analyst for sizing research on 'autonomous vehicle fleet management'. Blackboard task_id: av-fleet-mgmt. Skill to load: skills/market-sizing/SKILL.md. Your blackboard key: findings_sizing. Estimate the market size.", - "expectations": [ - { - "description": "The market-sizing skill is loaded", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["market-sizing", "market-sizing/SKILL.md"] - } - ] - }, - { - "description": "Findings are written to the correct blackboard key", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_sizing" } - ] - }, - { - "description": "Market sizing methodology is applied with TAM/SAM/SOM", - "deterministic_checks": [ - { "type": "output_contains_any", "values": ["TAM", "SAM", "SOM"] } - ] - }, - { - "description": "Produces quantified market size estimates with dollar figures or growth rates" - } - ] - }, - { - "id": "analyst-trends-happy-path", - "description": "Dimension analyst loads trend-analysis skill and identifies trends with direction indicators", - "prompt": "You are a dimension-analyst for trends research on 'generative AI in healthcare'. Blackboard task_id: gen-ai-healthcare. Skill to load: skills/trend-analysis/SKILL.md. Your blackboard key: findings_trends. Identify market trends.", - "expectations": [ - { - "description": "The trend-analysis skill is loaded", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["trend-analysis", "trend-analysis/SKILL.md"] - } - ] - }, - { - "description": "Findings are written to the correct blackboard key", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_trends" } - ] - }, - { - "description": "Trend direction indicators are used", - "deterministic_checks": [ - { "type": "output_contains_any", "values": ["INC", "DEC", "CONST"] } - ] - }, - { - "description": "Identifies both macro trends (economic, regulatory) and micro trends (industry-specific, adoption patterns) with direction indicators" - } - ] - }, - { - "id": "analyst-customer-happy-path", - "description": "Dimension analyst loads customer-research skill and identifies customer segments", - "prompt": "You are a dimension-analyst for customer research on 'developer productivity tools'. Blackboard task_id: dev-prod-tools. Skill to load: skills/customer-research/SKILL.md. Your blackboard key: findings_customer. Research the target customers.", - "expectations": [ - { - "description": "The customer-research skill is loaded", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["customer-research", "customer-research/SKILL.md"] - } - ] - }, - { - "description": "Findings are written to the correct blackboard key", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_customer" } - ] - }, - { - "description": "Identifies distinct customer segments with needs, pain points, and buying behaviors" - } - ] - }, - { - "id": "analyst-tech-happy-path", - "description": "Dimension analyst loads tech-assessment skill and evaluates technology landscape", - "prompt": "You are a dimension-analyst for tech research on 'WebAssembly in server-side computing'. Blackboard task_id: wasm-server. Skill to load: skills/tech-assessment/SKILL.md. Your blackboard key: findings_tech. Assess the technology landscape.", - "expectations": [ - { - "description": "The tech-assessment skill is loaded", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["tech-assessment", "tech-assessment/SKILL.md"] - } - ] - }, - { - "description": "Findings are written to the correct blackboard key", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_tech" } - ] - }, - { - "description": "Evaluates technology readiness, feasibility, and adoption landscape with specific technical findings" - } - ] - }, - { - "id": "analyst-financial-happy-path", - "description": "Dimension analyst loads financial-analysis skill and produces unit economics or revenue analysis", - "prompt": "You are a dimension-analyst for financial research on 'subscription-based SaaS for logistics'. Blackboard task_id: saas-logistics. Skill to load: skills/financial-analysis/SKILL.md. Your blackboard key: findings_financial. Analyze the financial dynamics.", - "expectations": [ - { - "description": "The financial-analysis skill is loaded", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["financial-analysis", "financial-analysis/SKILL.md"] - } - ] - }, - { - "description": "Findings are written to the correct blackboard key", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_financial" } - ] - }, - { - "description": "Produces unit economics, revenue models, or financial projections relevant to the SaaS logistics domain" - } - ] - }, - { - "id": "analyst-regulatory-happy-path", - "description": "Dimension analyst loads regulatory-review skill and identifies compliance requirements", - "prompt": "You are a dimension-analyst for regulatory research on 'digital health platforms in the US and EU'. Blackboard task_id: digital-health. Skill to load: skills/regulatory-review/SKILL.md. Your blackboard key: findings_regulatory. Review the regulatory landscape.", - "expectations": [ - { - "description": "The regulatory-review skill is loaded", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["regulatory-review", "regulatory-review/SKILL.md"] - } - ] - }, - { - "description": "Findings are written to the correct blackboard key", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_regulatory" } - ] - }, - { - "description": "Identifies compliance requirements, regulatory bodies, and risk factors for the specified jurisdictions" - } - ] - }, - { - "id": "analyst-elicitation-read", - "description": "Analyst reads elicitation context from file (primary) or blackboard (fallback) to shape its research approach", - "prompt": "You are a dimension-analyst for competitive research on 'cloud-native observability'. Blackboard task_id: cloud-observability. Read the elicitation from the blackboard first to understand the research scope and priorities before conducting research.", - "expectations": [ - { - "description": "Elicitation context is loaded from file or blackboard", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["elicitation.json", "state.json", "elicitation", "blackboard_read"] - } - ] - }, - { - "description": "Research scope or priorities from the elicitation are referenced", - "deterministic_checks": [ - { "type": "output_contains_any", "values": ["scope", "priorities"] } - ] - }, - { - "description": "Research queries and focus areas are shaped by the elicitation context rather than being generic" - } - ] - }, - { - "id": "analyst-conflict-detection", - "description": "Analyst detects contradictions with another dimension's findings and raises a blackboard alert", - "prompt": "You are a dimension-analyst for sizing research on 'quantum computing services'. Blackboard task_id: quantum-computing. After writing your findings, check the competitive dimension's findings on the blackboard. The competitive analysis says the market has 50+ active players suggesting maturity, but your sizing data shows the market is under $500M suggesting it is still nascent. Flag the conflict.", - "expectations": [ - { - "description": "Cross-dimension conflict is detected and reported", - "deterministic_checks": [ - { "type": "output_contains", "value": "conflict" } - ] - }, - { - "description": "A blackboard alert is raised for the conflict", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["blackboard_alert", "alert"] - } - ] - }, - { - "description": "Detects the specific contradiction between competitive maturity signals and early-stage sizing data and reports it to the orchestrator" - } - ] - }, { "id": "analyst-large-source-delegation", "description": "Analyst delegates large documents to the source-chunker agent instead of processing directly", @@ -260,256 +6,29 @@ "expectations": [ { "description": "The source-chunker agent is referenced for delegation", - "deterministic_checks": [ - { "type": "output_contains", "value": "source-chunker" } - ] - }, - { - "description": "The token threshold for chunking is referenced", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["15K", "15,000", "15000", "token"] + "type": "output_contains", + "value": "source-chunker" } ] }, { - "description": "Delegates to the source-chunker agent for parallel chunk processing instead of attempting single-pass analysis" - } - ] - }, - { - "id": "analyst-methodology-gate-competitive", - "description": "Analyst writes a methodology plan to the blackboard BEFORE conducting any web research for competitive dimension", - "prompt": "You are a dimension-analyst for competitive research on 'cloud storage providers'. Blackboard scope: cloud-storage. Skill to load: skills/competitive-analysis/SKILL.md. Your blackboard key: findings_competitive. Before researching, you must write a methodology plan to the blackboard.", - "expectations": [ - { - "description": "Methodology plan is written to file before any WebSearch", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_competitive" } - ] - }, - { - "description": "Plan references Porter's 5 Forces framework", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["Porter", "5 Forces", "Five Forces"] - } - ] - }, - { - "description": "Plan references Competitor Matrix framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Competitor Matrix" } - ] - }, - { - "description": "methodology_plan file is written before any WebSearch invocation in execution order" - } - ] - }, - { - "id": "analyst-methodology-gate-sizing", - "description": "Analyst writes a methodology plan referencing TAM/SAM/SOM before conducting sizing research", - "prompt": "You are a dimension-analyst for sizing research on 'enterprise data lakehouse platforms'. Blackboard scope: data-lakehouse. Skill to load: skills/market-sizing/SKILL.md. Your blackboard key: findings_sizing. Before researching, you must write a methodology plan to the blackboard.", - "expectations": [ - { - "description": "Methodology plan is written to file before any WebSearch", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_sizing" } - ] - }, - { - "description": "Plan references TAM/SAM/SOM hierarchy", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["TAM", "SAM", "SOM"] - } - ] - }, - { - "description": "methodology_plan file is written before any WebSearch invocation in execution order" - } - ] - }, - { - "id": "analyst-methodology-gate-trends", - "description": "Analyst writes a methodology plan referencing Macro Trends, Micro Trends, and Transitional Scenario Graph before conducting trends research", - "prompt": "You are a dimension-analyst for trends research on 'digital twin technology in manufacturing'. Blackboard scope: digital-twin-mfg. Skill to load: skills/trend-analysis/SKILL.md. Your blackboard key: findings_trends. Before researching, you must write a methodology plan to the blackboard.", - "expectations": [ - { - "description": "Methodology plan is written to file before any WebSearch", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_trends" } - ] - }, - { - "description": "Plan references Macro Trends framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Macro Trends" } - ] - }, - { - "description": "Plan references Micro Trends framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Micro Trends" } - ] - }, - { - "description": "Plan references Transitional Scenario Graph", - "deterministic_checks": [ - { "type": "output_contains", "value": "Transitional Scenario Graph" } - ] - }, - { - "description": "methodology_plan file is written before any WebSearch invocation in execution order" - } - ] - }, - { - "id": "analyst-methodology-gate-customer", - "description": "Analyst writes a methodology plan referencing Personas, JTBD, and Journey Mapping before conducting customer research", - "prompt": "You are a dimension-analyst for customer research on 'B2B fintech payment solutions'. Blackboard scope: b2b-fintech. Skill to load: skills/customer-research/SKILL.md. Your blackboard key: findings_customer. Before researching, you must write a methodology plan to the blackboard.", - "expectations": [ - { - "description": "Methodology plan is written to file before any WebSearch", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_customer" } - ] - }, - { - "description": "Plan references Personas framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Personas" } - ] - }, - { - "description": "Plan references Jobs To Be Done framework", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["JTBD", "Jobs To Be Done", "Jobs-to-be-Done"] - } - ] - }, - { - "description": "Plan references Journey Mapping framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Journey Mapping" } - ] - }, - { - "description": "methodology_plan file is written before any WebSearch invocation in execution order" - } - ] - }, - { - "id": "analyst-methodology-gate-tech", - "description": "Analyst writes a methodology plan referencing TRL, Hype Cycle, and Build vs Buy before conducting tech research", - "prompt": "You are a dimension-analyst for tech research on 'federated learning platforms'. Blackboard scope: federated-learning. Skill to load: skills/tech-assessment/SKILL.md. Your blackboard key: findings_tech. Before researching, you must write a methodology plan to the blackboard.", - "expectations": [ - { - "description": "Methodology plan is written to file before any WebSearch", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_tech" } - ] - }, - { - "description": "Plan references Technology Readiness Level", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["TRL", "Technology Readiness Level"] - } - ] - }, - { - "description": "Plan references Hype Cycle framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Hype Cycle" } - ] - }, - { - "description": "Plan references Build vs Buy analysis", + "description": "The token threshold for chunking is referenced", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Build vs Buy", "Build-vs-Buy", "Build versus Buy"] + "values": [ + "15K", + "15,000", + "15000", + "token" + ] } ] }, { - "description": "methodology_plan file is written before any WebSearch invocation in execution order" - } - ] - }, - { - "id": "analyst-methodology-gate-financial", - "description": "Analyst writes a methodology plan referencing Unit Economics, Revenue Model, and Rule of 40 before conducting financial research", - "prompt": "You are a dimension-analyst for financial research on 'vertical SaaS for construction management'. Blackboard scope: construction-saas. Skill to load: skills/financial-analysis/SKILL.md. Your blackboard key: findings_financial. Before researching, you must write a methodology plan to the blackboard.", - "expectations": [ - { - "description": "Methodology plan is written to file before any WebSearch", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_financial" } - ] - }, - { - "description": "Plan references Unit Economics framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Unit Economics" } - ] - }, - { - "description": "Plan references Revenue Model framework", - "deterministic_checks": [ - { "type": "output_contains", "value": "Revenue Model" } - ] - }, - { - "description": "Plan references Rule of 40", - "deterministic_checks": [ - { "type": "output_contains", "value": "Rule of 40" } - ] - }, - { - "description": "methodology_plan file is written before any WebSearch invocation in execution order" - } - ] - }, - { - "id": "analyst-methodology-gate-regulatory", - "description": "Analyst writes a methodology plan referencing Framework Identification, Penalty Ranges, and Risk Matrix before conducting regulatory research", - "prompt": "You are a dimension-analyst for regulatory research on 'cross-border cryptocurrency exchanges'. Blackboard scope: crypto-exchanges. Skill to load: skills/regulatory-review/SKILL.md. Your blackboard key: findings_regulatory. Before researching, you must write a methodology plan to the blackboard.", - "expectations": [ - { - "description": "Methodology plan is written to file before any WebSearch", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_regulatory" } - ] - }, - { - "description": "Plan references Framework Identification", - "deterministic_checks": [ - { "type": "output_contains", "value": "Framework Identification" } - ] - }, - { - "description": "Plan references Penalty Ranges", - "deterministic_checks": [ - { "type": "output_contains", "value": "Penalty Ranges" } - ] - }, - { - "description": "Plan references Risk Matrix", - "deterministic_checks": [ - { "type": "output_contains", "value": "Risk Matrix" } - ] - }, - { - "description": "methodology_plan file is written before any WebSearch invocation in execution order" + "description": "Delegates to the source-chunker agent for parallel chunk processing instead of attempting single-pass analysis" } ] }, @@ -523,7 +42,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Competitive Rivalry", "competitive rivalry"] + "values": [ + "Competitive Rivalry", + "competitive rivalry" + ] } ] }, @@ -532,7 +54,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Supplier Power", "supplier power", "Bargaining Power of Suppliers"] + "values": [ + "Supplier Power", + "supplier power", + "Bargaining Power of Suppliers" + ] } ] }, @@ -541,7 +67,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Buyer Power", "buyer power", "Bargaining Power of Buyers"] + "values": [ + "Buyer Power", + "buyer power", + "Bargaining Power of Buyers" + ] } ] }, @@ -550,7 +80,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Threat of Substitution", "threat of substitution", "Threat of Substitutes"] + "values": [ + "Threat of Substitution", + "threat of substitution", + "Threat of Substitutes" + ] } ] }, @@ -559,7 +93,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Threat of New Entry", "threat of new entry", "Threat of New Entrants"] + "values": [ + "Threat of New Entry", + "threat of new entry", + "Threat of New Entrants" + ] } ] }, @@ -568,7 +106,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["HIGH", "MODERATE", "LOW"] + "values": [ + "HIGH", + "MODERATE", + "LOW" + ] } ] }, @@ -585,7 +127,10 @@ { "description": "Mermaid quadrantChart syntax is used", "deterministic_checks": [ - { "type": "output_contains", "value": "quadrantChart" } + { + "type": "output_contains", + "value": "quadrantChart" + } ] }, { @@ -593,7 +138,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["```mermaid", "mermaid"] + "values": [ + "```mermaid", + "mermaid" + ] } ] }, @@ -612,7 +160,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Macro Trends", "Macro trends", "macro trends"] + "values": [ + "Macro Trends", + "Macro trends", + "macro trends" + ] } ] }, @@ -621,7 +173,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Micro Trends", "Micro trends", "micro trends"] + "values": [ + "Micro Trends", + "Micro trends", + "micro trends" + ] } ] }, @@ -630,14 +186,21 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Emerging Signals", "emerging signals", "Emerging signals"] + "values": [ + "Emerging Signals", + "emerging signals", + "Emerging signals" + ] } ] }, { "description": "Mermaid stateDiagram-v2 is used for scenario transitions", "deterministic_checks": [ - { "type": "output_contains", "value": "stateDiagram-v2" } + { + "type": "output_contains", + "value": "stateDiagram-v2" + } ] }, { @@ -645,7 +208,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Terminal Scenarios", "terminal scenarios", "Terminal scenarios"] + "values": [ + "Terminal Scenarios", + "terminal scenarios", + "Terminal scenarios" + ] } ] }, @@ -654,7 +221,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Monitoring Indicators", "monitoring indicators", "Monitoring indicators"] + "values": [ + "Monitoring Indicators", + "monitoring indicators", + "Monitoring indicators" + ] } ] }, @@ -673,7 +244,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["r > 0.3", "r < -0.3", "|r| > 0.3", "0.3"] + "values": [ + "r > 0.3", + "r < -0.3", + "|r| > 0.3", + "0.3" + ] } ] }, @@ -690,32 +266,50 @@ { "description": "TAM is present with dollar figure", "deterministic_checks": [ - { "type": "output_contains", "value": "TAM" } + { + "type": "output_contains", + "value": "TAM" + } ] }, { "description": "SAM is present with dollar figure", "deterministic_checks": [ - { "type": "output_contains", "value": "SAM" } + { + "type": "output_contains", + "value": "SAM" + } ] }, { "description": "SOM is present with dollar figure", "deterministic_checks": [ - { "type": "output_contains", "value": "SOM" } + { + "type": "output_contains", + "value": "SOM" + } ] }, { "description": "Dollar figures are present", "deterministic_checks": [ - { "type": "output_contains", "value": "$" } + { + "type": "output_contains", + "value": "$" + } ] }, { "description": "No placeholder values are used", "deterministic_checks": [ - { "type": "output_not_contains", "value": "$X.XB" }, - { "type": "output_not_contains", "value": "$X.X" } + { + "type": "output_not_contains", + "value": "$X.XB" + }, + { + "type": "output_not_contains", + "value": "$X.X" + } ] }, { @@ -733,7 +327,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Bear", "bear", "Conservative"] + "values": [ + "Bear", + "bear", + "Conservative" + ] } ] }, @@ -742,7 +340,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Base", "base", "Moderate"] + "values": [ + "Base", + "base", + "Moderate" + ] } ] }, @@ -751,7 +353,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Bull", "bull", "Aggressive"] + "values": [ + "Bull", + "bull", + "Aggressive" + ] } ] }, @@ -768,13 +374,19 @@ { "description": "CAC (Customer Acquisition Cost) is present", "deterministic_checks": [ - { "type": "output_contains", "value": "CAC" } + { + "type": "output_contains", + "value": "CAC" + } ] }, { "description": "LTV (Lifetime Value) is present", "deterministic_checks": [ - { "type": "output_contains", "value": "LTV" } + { + "type": "output_contains", + "value": "LTV" + } ] }, { @@ -782,7 +394,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["LTV:CAC", "LTV/CAC", "LTV to CAC"] + "values": [ + "LTV:CAC", + "LTV/CAC", + "LTV to CAC" + ] } ] }, @@ -791,7 +407,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["payback", "Payback", "payback period"] + "values": [ + "payback", + "Payback", + "payback period" + ] } ] }, @@ -808,7 +428,10 @@ { "description": "Rule of 40 is referenced", "deterministic_checks": [ - { "type": "output_contains", "value": "Rule of 40" } + { + "type": "output_contains", + "value": "Rule of 40" + } ] }, { @@ -826,7 +449,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["TRL", "Technology Readiness Level"] + "values": [ + "TRL", + "Technology Readiness Level" + ] } ] }, @@ -854,7 +480,14 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["FDA", "MDR", "HIPAA", "CE", "EU AI Act", "framework"] + "values": [ + "FDA", + "MDR", + "HIPAA", + "CE", + "EU AI Act", + "framework" + ] } ] }, @@ -863,7 +496,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["|", "table", "Framework", "Jurisdiction"] + "values": [ + "|", + "table", + "Framework", + "Jurisdiction" + ] } ] }, @@ -882,55 +520,34 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["SendMessage", "team-lead", "source_chunking_request"] + "values": [ + "SendMessage", + "team-lead", + "source_chunking_request" + ] } ] }, { "description": "Does not attempt direct processing of 80K tokens", "deterministic_checks": [ - { "type": "output_not_contains", "value": "process directly" } + { + "type": "output_not_contains", + "value": "process directly" + } ] }, { "description": "Does not reference spawning sub-agents", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "spawn" } - ] - }, - { - "description": "Analyst routes the large document to team-lead via SendMessage with a source_chunking_request, delegating chunking coordination to the orchestrator rather than handling it itself" - } - ] - }, - { - "id": "analyst-methodology-plan-before-research", - "description": "Analyst always writes methodology_plan to file before any WebSearch regardless of dimension", - "prompt": "You are a dimension-analyst for competitive research on 'edge AI inference chips'. Blackboard scope: edge-ai-chips. Skill to load: skills/competitive-analysis/SKILL.md. Your blackboard key: findings_competitive. Conduct your research following proper methodology gating.", - "expectations": [ - { - "description": "Methodology plan is written to file", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan" } - ] - }, - { - "description": "Methodology plan file write occurs before any WebSearch in logical execution order", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["methodology_plan", "jq", "REPORTS_DIR"] + "type": "output_not_contains", + "value": "spawn" } ] }, { - "description": "Does not skip methodology planning step", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "skip methodology" } - ] - }, - { - "description": "The methodology plan is written to file as the first research action, before any WebSearch or WebFetch calls" + "description": "Analyst routes the large document to team-lead via SendMessage with a source_chunking_request, delegating chunking coordination to the orchestrator rather than handling it itself" } ] } diff --git a/evals/agents/issue-architect/evals.json b/evals/agents/issue-architect/evals.json index deadda2..f1022ae 100644 --- a/evals/agents/issue-architect/evals.json +++ b/evals/agents/issue-architect/evals.json @@ -6,61 +6,36 @@ "expectations": [ { "description": "Issues include acceptance criteria", - "deterministic_checks": [ - { "type": "output_contains", "value": "acceptance criteria" } - ] - }, - { - "description": "Issues are scoped to sprint size", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["sprint", "sprint-sized"] + "type": "output_contains", + "value": "acceptance criteria" } ] }, { - "description": "Deprecated v0.3 patterns are not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Each issue is atomic, completable in 1-2 weeks, with specific measurable acceptance criteria and clear business context" - } - ] - }, - { - "id": "issues-atlatl-integration", - "description": "Issue architect uses Atlatl memory for recall and capture of issue context", - "prompt": "Convert our AI code review market research into GitHub issues. Check Atlatl for any prior issue sets we created for this topic. Use the issue-architect agent.", - "expectations": [ - { - "description": "Atlatl memory tools are referenced for recall or capture", + "description": "Issues are scoped to sprint size", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["capture_memory", "Atlatl", "recall_memories"] + "values": [ + "sprint", + "sprint-sized" + ] } ] }, { - "description": "Correct namespace or tag for sigint research is used", + "description": "Deprecated v0.3 patterns are not used", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["_semantic/knowledge", "sigint-research"] + "type": "output_not_contains", + "value": "Subcog" } ] }, { - "description": "Deprecated patterns are not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Prior research findings are recalled from Atlatl to inform issue creation, and new issues are captured back for future reference" + "description": "Each issue is atomic, completable in 1-2 weeks, with specific measurable acceptance criteria and clear business context" } ] }, @@ -74,7 +49,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["elicitation", "decision_context", "priorities"] + "values": [ + "elicitation", + "decision_context", + "priorities" + ] } ] }, @@ -91,7 +70,13 @@ { "description": "Preview or dry-run mode is acknowledged", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["preview", "dry"] } + { + "type": "output_contains_any", + "values": [ + "preview", + "dry" + ] + } ] }, { @@ -109,7 +94,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["enhancement", "research", "action-item"] + "values": [ + "enhancement", + "research", + "action-item" + ] } ] }, @@ -128,7 +117,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["dependency", "dependencies", "blocks", "blocked by"] + "values": [ + "dependency", + "dependencies", + "blocks", + "blocked by" + ] } ] }, diff --git a/evals/agents/report-synthesizer/evals.json b/evals/agents/report-synthesizer/evals.json index 1a2c92d..fa2cfff 100644 --- a/evals/agents/report-synthesizer/evals.json +++ b/evals/agents/report-synthesizer/evals.json @@ -6,91 +6,48 @@ "expectations": [ { "description": "Report includes an Executive Summary section", - "deterministic_checks": [ - { "type": "output_contains", "value": "Executive Summary" } - ] - }, - { - "description": "Report includes a Recommendations section", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["Recommendation", "recommendation"] + "type": "output_contains", + "value": "Executive Summary" } ] }, { - "description": "Report includes Mermaid visualizations", - "deterministic_checks": [ - { "type": "output_contains_any", "values": ["mermaid", "Mermaid"] } - ] - }, - { - "description": "Deprecated v0.3 patterns are not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Produces a full report with Executive Summary, Market Overview, Competitive Landscape, Trend Analysis, SWOT, Recommendations, Risk Assessment, and Appendix sections" - } - ] - }, - { - "id": "report-blackboard-integration", - "description": "Report synthesizer reads dimension findings from the blackboard in addition to state.json", - "prompt": "Generate a report from the research session. The blackboard task_id is 'ai-code-review' and contains findings from competitive, sizing, and trends analysts. Use the report-synthesizer agent.", - "expectations": [ - { - "description": "Blackboard is read for findings data", + "description": "Report includes a Recommendations section", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["blackboard_read", "blackboard"] + "values": [ + "Recommendation", + "recommendation" + ] } ] }, { - "description": "Dimension findings keys are referenced", + "description": "Report includes Mermaid visualizations", "deterministic_checks": [ { "type": "output_contains_any", "values": [ - "findings_competitive", - "findings_sizing", - "findings_trends", - "findings_" + "mermaid", + "Mermaid" ] } ] }, { - "description": "Incorporates blackboard dimension findings alongside state.json data for complete coverage in the report" - } - ] - }, - { - "id": "report-atlatl-integration", - "description": "Report synthesizer recalls prior context and captures report completion to Atlatl", - "prompt": "Generate a report for the cloud infrastructure market research. Check Atlatl for any prior research on this topic first. Use the report-synthesizer agent.", - "expectations": [ - { - "description": "Atlatl memory recall or capture is referenced", + "description": "Deprecated v0.3 patterns are not used", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["recall_memories", "capture_memory"] + "type": "output_not_contains", + "value": "Subcog" } ] }, { - "description": "Deprecated patterns are not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Prior context is recalled from Atlatl to enrich the report, and report completion is captured back for future reference" + "description": "Produces a full report with Executive Summary, Market Overview, Competitive Landscape, Trend Analysis, SWOT, Recommendations, Risk Assessment, and Appendix sections" } ] }, @@ -104,7 +61,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["executive", "Executive"] + "values": [ + "executive", + "Executive" + ] } ] }, @@ -113,7 +73,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["strategic", "bottom-line"] + "values": [ + "strategic", + "bottom-line" + ] } ] }, @@ -130,7 +93,13 @@ { "description": "Investor audience is acknowledged", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["investor", "Investor"] } + { + "type": "output_contains_any", + "values": [ + "investor", + "Investor" + ] + } ] }, { @@ -138,7 +107,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["TAM", "market opportunity"] + "values": [ + "TAM", + "market opportunity" + ] } ] }, @@ -157,7 +129,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Hypothesis", "hypothesis"] + "values": [ + "Hypothesis", + "hypothesis" + ] } ] }, @@ -166,7 +141,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["SUPPORTED", "CHALLENGED", "INCONCLUSIVE"] + "values": [ + "SUPPORTED", + "CHALLENGED", + "INCONCLUSIVE" + ] } ] }, @@ -185,14 +164,21 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["quadrantChart", "stateDiagram", "pie"] + "values": [ + "quadrantChart", + "stateDiagram", + "pie" + ] } ] }, { "description": "Mermaid syntax is used for visualizations", "deterministic_checks": [ - { "type": "output_contains", "value": "mermaid" } + { + "type": "output_contains", + "value": "mermaid" + } ] }, { diff --git a/evals/agents/research-orchestrator/evals.json b/evals/agents/research-orchestrator/evals.json index 5880976..6dd0539 100644 --- a/evals/agents/research-orchestrator/evals.json +++ b/evals/agents/research-orchestrator/evals.json @@ -1,81 +1,4 @@ [ - { - "id": "orchestrator-happy-path-3-dimensions", - "description": "Orchestrator spawns exactly 3 dimension analysts for a prompt with 3 priorities, coordinates via blackboard, and avoids deprecated tools", - "prompt": "Research the AI code review market. Focus on competitive landscape, market sizing, and trends. Use the research-orchestrator agent.", - "expectations": [ - { - "description": "Blackboard is created or referenced for team coordination", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["blackboard_create", "blackboard"] - } - ] - }, - { - "description": "Dimension analyst agents are referenced as the worker pattern", - "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } - ] - }, - { - "description": "At least one dimension findings key is referenced", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "findings_competitive", - "findings_sizing", - "findings_trends" - ] - } - ] - }, - { - "description": "Team status tracking is referenced", - "deterministic_checks": [ - { "type": "output_contains", "value": "team_status" } - ] - }, - { - "description": "Deprecated v0.3 patterns are not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "TodoWrite" }, - { "type": "output_not_contains", "value": "Subcog" }, - { "type": "output_not_contains", "value": "market-researcher" } - ] - }, - { - "description": "Spawns exactly 3 analysts (one per priority: competitive, sizing, trends) and not all 7 dimensions" - }, - { - "description": "Waits for all analysts to complete before merging findings into unified output" - } - ] - }, - { - "id": "orchestrator-single-dimension", - "description": "Orchestrator spawns only one analyst when a single research priority is given", - "prompt": "Research the electric vehicle charging market. Only focus on competitive analysis. Use the research-orchestrator agent.", - "expectations": [ - { - "description": "Only one dimension analyst is referenced since only one priority was given", - "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } - ] - }, - { - "description": "Blackboard coordination is still used even for a single dimension", - "deterministic_checks": [ - { "type": "output_contains", "value": "blackboard" } - ] - }, - { - "description": "Does not spawn unnecessary analysts for dimensions not requested by the user" - } - ] - }, { "id": "orchestrator-all-dimensions", "description": "Orchestrator handles all 8 dimensions with batching to respect the max 5 concurrent limit", @@ -83,124 +6,29 @@ "expectations": [ { "description": "Multiple dimension analysts are referenced for the full set", - "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } - ] - }, - { - "description": "Concurrency limit or batching is acknowledged", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["max 5", "batch", "concurrent", "5 concurrent"] + "type": "output_contains", + "value": "dimension-analyst" } ] }, { - "description": "Batches analyst spawning so no more than 5 run concurrently, with remaining 2 queued" - } - ] - }, - { - "id": "orchestrator-blackboard-lifecycle", - "description": "Verifies the full blackboard lifecycle: create, write elicitation, track status, read findings", - "prompt": "Research the autonomous drone delivery market focusing on regulatory and competitive dimensions. Use the research-orchestrator agent and show the blackboard coordination steps.", - "expectations": [ - { - "description": "Blackboard creation is referenced", + "description": "Concurrency limit or batching is acknowledged", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["blackboard_create", "blackboard"] + "values": [ + "max 5", + "batch", + "concurrent", + "5 concurrent" + ] } ] }, { - "description": "Elicitation context is written to the blackboard", - "deterministic_checks": [ - { "type": "output_contains", "value": "elicitation" } - ] - }, - { - "description": "Team status is tracked on the blackboard", - "deterministic_checks": [ - { "type": "output_contains", "value": "team_status" } - ] - }, - { - "description": "Blackboard is created before analysts are spawned, and findings are read from it after all analysts complete" - } - ] - }, - { - "id": "orchestrator-atlatl-recall", - "description": "Verifies orchestrator recalls prior Atlatl memories before beginning research", - "prompt": "Research the AI code review market with competitive and sizing focus. We researched this topic last month. Use the research-orchestrator agent.", - "expectations": [ - { - "description": "Atlatl recall is invoked to search for prior research", - "deterministic_checks": [ - { "type": "output_contains", "value": "recall_memories" } - ] - }, - { - "description": "The sigint-research tag is used for memory recall", - "deterministic_checks": [ - { "type": "output_contains", "value": "sigint-research" } - ] - }, - { - "description": "Prior findings from Atlatl inform the current research plan rather than starting from scratch" - } - ] - }, - { - "id": "orchestrator-atlatl-capture", - "description": "Verifies orchestrator captures research summary to Atlatl after completion", - "prompt": "Research the edge computing market with sizing and trends focus. Use the research-orchestrator agent and persist findings to memory.", - "expectations": [ - { - "description": "Memory capture is invoked after research completes", - "deterministic_checks": [ - { "type": "output_contains", "value": "capture_memory" } - ] - }, - { - "description": "Memory enrichment is invoked after capture", - "deterministic_checks": [ - { "type": "output_contains", "value": "enrich_memory" } - ] - }, - { - "description": "The semantic knowledge namespace is used for storing findings", - "deterministic_checks": [ - { "type": "output_contains", "value": "_semantic/knowledge" } - ] - }, - { - "description": "A summary of key findings is captured to Atlatl, not raw unprocessed data dumps" - } - ] - }, - { - "id": "orchestrator-conflict-handling", - "description": "Verifies orchestrator detects and resolves conflicting findings from different dimension analysts", - "prompt": "Research the quantum computing market focusing on sizing and financial dimensions. The sizing data may conflict with financial projections given the early stage of the market. Use the research-orchestrator agent.", - "expectations": [ - { - "description": "Conflict detection is referenced in the orchestration flow", - "deterministic_checks": [ - { "type": "output_contains", "value": "conflict" } - ] - }, - { - "description": "Conflict resolution is addressed", - "deterministic_checks": [ - { "type": "output_contains", "value": "resolution" } - ] - }, - { - "description": "Conflicts detected from the blackboard are read and resolved with explicit rationale for the chosen resolution" + "description": "Batches analyst spawning so no more than 5 run concurrently, with remaining 2 queued" } ] }, @@ -212,13 +40,19 @@ { "description": "state.json is referenced as the output target", "deterministic_checks": [ - { "type": "output_contains", "value": "state.json" } + { + "type": "output_contains", + "value": "state.json" + } ] }, { "description": "Findings are written to the state file", "deterministic_checks": [ - { "type": "output_contains", "value": "findings" } + { + "type": "output_contains", + "value": "findings" + } ] }, { diff --git a/evals/commands/evals.json b/evals/commands/evals.json index 09f1ded..9bae7a1 100644 --- a/evals/commands/evals.json +++ b/evals/commands/evals.json @@ -18,7 +18,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["research-orchestrator", "research orchestrator"] + "values": [ + "research-orchestrator", + "research orchestrator" + ] } ] }, @@ -113,7 +116,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["research-orchestrator", "research orchestrator"] + "values": [ + "research-orchestrator", + "research orchestrator" + ] } ] }, @@ -178,99 +184,6 @@ } ] }, - { - "id": "start-no-subcog", - "description": "Verify Subcog is never referenced in /sigint:start transcript", - "prompt": "/sigint:start Digital health platforms for chronic disease management", - "expectations": [ - { - "description": "Subcog never appears in the output", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subcog" - }, - { - "type": "output_not_contains", - "value": "SUBCOG" - } - ] - }, - { - "description": "Output uses Atlatl memory system instead of Subcog", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["Atlatl", "atlatl", "capture_memory", "recall_memories"] - } - ] - }, - { - "description": "Output does not contain other deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "start-atlatl-capture", - "description": "Verify /sigint:start references Atlatl memory capture for persisting research context", - "prompt": "/sigint:start Autonomous vehicle regulations in the EU", - "expectations": [ - { - "description": "Output references Atlatl memory capture for research persistence", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "capture_memory", - "Atlatl", - "atlatl", - "memory capture", - "persist" - ] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, { "id": "start-orchestrator-delegation", "description": "Verify /sigint:start delegates to research-orchestrator, not market-researcher", @@ -281,7 +194,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["research-orchestrator", "research orchestrator"] + "values": [ + "research-orchestrator", + "research orchestrator" + ] } ] }, @@ -315,263 +231,31 @@ }, { "id": "start-task-create", - "description": "Verify /sigint:start references TaskCreate for research plan execution", - "prompt": "/sigint:start Precision fermentation market for alternative proteins", - "expectations": [ - { - "description": "Output references TaskCreate or task creation for research plan", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["TaskCreate", "task", "research plan"] - } - ] - }, - { - "description": "Output references research-orchestrator", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["research-orchestrator", "research orchestrator"] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "init-happy-path", - "description": "Default /sigint:init loads Atlatl context and displays readiness", - "prompt": "/sigint:init", - "expectations": [ - { - "description": "Output indicates Atlatl context loading or initialization", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "Atlatl", - "atlatl", - "context loaded", - "initialized", - "memory" - ] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "init-full-flag", - "description": "/sigint:init --full triggers comprehensive memory load", - "prompt": "/sigint:init --full", - "expectations": [ - { - "description": "Output indicates comprehensive or full memory load", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "full", - "comprehensive", - "all memories", - "complete context", - "recall_memories" - ] - } - ] - }, - { - "description": "Output references Atlatl memory system", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["Atlatl", "atlatl", "recall_memories", "memory"] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "init-topic-flag", - "description": "/sigint:init --topic scopes recall to a specific topic", - "prompt": "/sigint:init --topic \"autonomous vehicles\"", - "expectations": [ - { - "description": "Output scopes context loading to the specified topic", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["autonomous vehicle", "autonomous vehicles", "topic"] - } - ] - }, - { - "description": "Output references recall_memories or Atlatl for topic-scoped retrieval", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["recall_memories", "Atlatl", "atlatl", "memory"] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "init-no-subcog", - "description": "Verify /sigint:init uses Atlatl namespaces, not Subcog namespaces", - "prompt": "/sigint:init --full", - "expectations": [ - { - "description": "Output never references Subcog namespace format", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subcog" - } - ] - }, - { - "description": "Output references Atlatl namespace conventions", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "_semantic", - "_procedural", - "_episodic", - "Atlatl", - "atlatl" - ] - } - ] - }, - { - "description": "Output does not contain other deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "init-atlatl-namespaces", - "description": "Verify /sigint:init references correct Atlatl namespaces", - "prompt": "/sigint:init", + "description": "Verify /sigint:start references TaskCreate for research plan execution", + "prompt": "/sigint:start Precision fermentation market for alternative proteins", "expectations": [ { - "description": "Output references _semantic/knowledge namespace", + "description": "Output references TaskCreate or task creation for research plan", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["_semantic/knowledge", "_semantic", "semantic"] + "values": [ + "TaskCreate", + "task", + "research plan" + ] } ] }, { - "description": "Output references _procedural/patterns namespace", + "description": "Output references research-orchestrator", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["_procedural/patterns", "_procedural", "procedural"] + "values": [ + "research-orchestrator", + "research orchestrator" + ] } ] }, @@ -631,7 +315,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["version", "2.0", "defaults", "topics"] + "values": [ + "version", + "2.0", + "defaults", + "topics" + ] } ] }, @@ -668,7 +357,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["report-synthesizer", "report synthesizer"] + "values": [ + "report-synthesizer", + "report synthesizer" + ] } ] }, @@ -705,7 +397,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["executive-brief", "executive brief", "executive"] + "values": [ + "executive-brief", + "executive brief", + "executive" + ] } ] }, @@ -714,7 +410,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["C-suite", "c-suite", "C-Suite"] + "values": [ + "C-suite", + "c-suite", + "C-Suite" + ] } ] }, @@ -723,7 +423,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["report-synthesizer", "report synthesizer"] + "values": [ + "report-synthesizer", + "report synthesizer" + ] } ] }, @@ -801,7 +504,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Agent", "agent", "report-synthesizer"] + "values": [ + "Agent", + "agent", + "report-synthesizer" + ] } ] }, @@ -833,43 +540,6 @@ } ] }, - { - "id": "report-blackboard-read", - "description": "Verify /sigint:report references blackboard for reading research findings", - "prompt": "/sigint:report", - "expectations": [ - { - "description": "Output references blackboard_read or blackboard for gathering findings", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["blackboard_read", "blackboard", "findings"] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, { "id": "report-no-market-researcher", "description": "Verify /sigint:report never references deprecated market-researcher", @@ -889,7 +559,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["report-synthesizer", "report synthesizer"] + "values": [ + "report-synthesizer", + "report synthesizer" + ] } ] }, @@ -922,7 +595,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["dimension-analyst", "dimension analyst"] + "values": [ + "dimension-analyst", + "dimension analyst" + ] } ] }, @@ -931,7 +607,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["competitive", "landscape", "competitive-landscape"] + "values": [ + "competitive", + "landscape", + "competitive-landscape" + ] } ] }, @@ -965,104 +645,25 @@ "expectations": [ { "description": "Output references the regulatory-review methodology or skill", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["regulatory-review", "regulatory review", "regulatory"] - } - ] - }, - { - "description": "Output references dimension-analyst for execution", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["dimension-analyst", "dimension analyst"] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "augment-atlatl-recall", - "description": "Verify /sigint:augment uses recall_memories for prior research context", - "prompt": "/sigint:augment financial-analysis", - "expectations": [ - { - "description": "Output references recall_memories or Atlatl recall for prior context", "deterministic_checks": [ { "type": "output_contains_any", "values": [ - "recall_memories", - "Atlatl", - "atlatl", - "prior research", - "existing findings" + "regulatory-review", + "regulatory review", + "regulatory" ] } ] }, { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "augment-atlatl-capture", - "description": "Verify /sigint:augment uses capture_memory for persisting findings", - "prompt": "/sigint:augment market-sizing", - "expectations": [ - { - "description": "Output references capture_memory or Atlatl capture for findings persistence", + "description": "Output references dimension-analyst for execution", "deterministic_checks": [ { "type": "output_contains_any", "values": [ - "capture_memory", - "Atlatl", - "atlatl", - "capture", - "persist" + "dimension-analyst", + "dimension analyst" ] } ] @@ -1090,56 +691,6 @@ } ] }, - { - "id": "augment-no-subcog", - "description": "Verify Subcog never appears in /sigint:augment transcript", - "prompt": "/sigint:augment tech-assessment", - "expectations": [ - { - "description": "Subcog never appears in any form", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subcog" - }, - { - "type": "output_not_contains", - "value": "SUBCOG" - } - ] - }, - { - "description": "Output uses Atlatl memory system", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["Atlatl", "atlatl", "capture_memory", "recall_memories"] - } - ] - }, - { - "description": "Output does not contain other deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, { "id": "augment-no-market-researcher", "description": "Verify market-researcher is never referenced in /sigint:augment", @@ -1159,7 +710,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["dimension-analyst", "dimension analyst"] + "values": [ + "dimension-analyst", + "dimension analyst" + ] } ] }, @@ -1192,7 +746,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["source-chunker", "source chunker", "chunking", "chunk"] + "values": [ + "source-chunker", + "source chunker", + "chunking", + "chunk" + ] } ] }, @@ -1229,7 +788,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["issue-architect", "issue architect"] + "values": [ + "issue-architect", + "issue architect" + ] } ] }, @@ -1281,7 +843,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["issue-architect", "issue architect"] + "values": [ + "issue-architect", + "issue architect" + ] } ] }, @@ -1318,7 +883,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Agent", "agent", "issue-architect"] + "values": [ + "Agent", + "agent", + "issue-architect" + ] } ] }, @@ -1350,43 +919,6 @@ } ] }, - { - "id": "issues-atlatl-capture", - "description": "Verify /sigint:issues uses Atlatl for memory persistence", - "prompt": "/sigint:issues", - "expectations": [ - { - "description": "Output references Atlatl memory for persisting issue creation context", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["Atlatl", "atlatl", "capture_memory", "blackboard"] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, { "id": "issues-no-subcog", "description": "Verify Subcog never appears in /sigint:issues transcript", @@ -1396,63 +928,26 @@ "description": "Subcog never appears in any form", "deterministic_checks": [ { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subcog" - }, - { - "type": "output_not_contains", - "value": "SUBCOG" - } - ] - }, - { - "description": "Output does not contain other deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, - { - "id": "update-happy-path", - "description": "Default /sigint:update triggers research refresh flow", - "prompt": "/sigint:update", - "expectations": [ - { - "description": "Output indicates research refresh or update process", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["refresh", "update", "re-research", "latest", "current"] + "type": "output_not_contains", + "value": "Subcog" + }, + { + "type": "output_not_contains", + "value": "subcog" + }, + { + "type": "output_not_contains", + "value": "SUBCOG" } ] }, { - "description": "Output does not contain deprecated patterns", + "description": "Output does not contain other deprecated patterns", "deterministic_checks": [ { "type": "output_not_contains", "value": "TodoWrite" }, - { - "type": "output_not_contains", - "value": "Subcog" - }, { "type": "output_not_contains", "value": "subagent_type" @@ -1466,22 +961,25 @@ ] }, { - "id": "update-area-flag", - "description": "/sigint:update --area targets update to specific research area", - "prompt": "/sigint:update --area competitive-landscape", + "id": "update-happy-path", + "description": "Default /sigint:update triggers research refresh flow", + "prompt": "/sigint:update", "expectations": [ { - "description": "Output acknowledges the targeted area", + "description": "Output indicates research refresh or update process", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["competitive", "landscape", "competitive-landscape"] + "values": [ + "refresh", + "update", + "re-research", + "latest", + "current" + ] } ] }, - { - "description": "Output indicates targeted rather than full refresh" - }, { "description": "Output does not contain deprecated patterns", "deterministic_checks": [ @@ -1506,25 +1004,26 @@ ] }, { - "id": "update-atlatl-capture", - "description": "Verify /sigint:update references capture_memory for updated findings", - "prompt": "/sigint:update", + "id": "update-area-flag", + "description": "/sigint:update --area targets update to specific research area", + "prompt": "/sigint:update --area competitive-landscape", "expectations": [ { - "description": "Output references capture_memory or Atlatl for persisting updates", + "description": "Output acknowledges the targeted area", "deterministic_checks": [ { "type": "output_contains_any", "values": [ - "capture_memory", - "Atlatl", - "atlatl", - "update_memory", - "capture" + "competitive", + "landscape", + "competitive-landscape" ] } ] }, + { + "description": "Output indicates targeted rather than full refresh" + }, { "description": "Output does not contain deprecated patterns", "deterministic_checks": [ @@ -1599,7 +1098,14 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["INC", "DEC", "CONST", "trend", "Trend", "recalcul"] + "values": [ + "INC", + "DEC", + "CONST", + "trend", + "Trend", + "recalcul" + ] } ] }, @@ -1636,7 +1142,13 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["resum", "restor", "session", "previous", "continu"] + "values": [ + "resum", + "restor", + "session", + "previous", + "continu" + ] } ] }, @@ -1673,7 +1185,13 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["session", "Session", "list", "available", "topic"] + "values": [ + "session", + "Session", + "list", + "available", + "topic" + ] } ] }, @@ -1703,49 +1221,6 @@ } ] }, - { - "id": "resume-atlatl-recall", - "description": "Verify /sigint:resume uses recall_memories to restore session context", - "prompt": "/sigint:resume", - "expectations": [ - { - "description": "Output references recall_memories or Atlatl for session restoration", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "recall_memories", - "Atlatl", - "atlatl", - "memory", - "recall" - ] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, { "id": "resume-no-subcog", "description": "Verify Subcog never appears in /sigint:resume transcript", @@ -1787,58 +1262,6 @@ } ] }, - { - "id": "resume-topic-specified", - "description": "/sigint:resume with topic loads topic-specific context", - "prompt": "/sigint:resume autonomous vehicles", - "expectations": [ - { - "description": "Output scopes restoration to the specified topic", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["autonomous vehicle", "autonomous vehicles"] - } - ] - }, - { - "description": "Output references recall_memories or Atlatl for topic-specific recall", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "recall_memories", - "Atlatl", - "atlatl", - "recall", - "memory" - ] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, { "id": "status-happy-path", "description": "Default /sigint:status renders a research dashboard", @@ -1930,49 +1353,6 @@ } ] }, - { - "id": "status-team-progress", - "description": "Verify /sigint:status shows team progress via blackboard", - "prompt": "/sigint:status", - "expectations": [ - { - "description": "Output references blackboard or team status tracking", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "blackboard", - "team_status", - "team progress", - "team", - "Team" - ] - } - ] - }, - { - "description": "Output does not contain deprecated patterns", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "TodoWrite" - }, - { - "type": "output_not_contains", - "value": "Subcog" - }, - { - "type": "output_not_contains", - "value": "subagent_type" - }, - { - "type": "output_not_contains", - "value": "market-researcher" - } - ] - } - ] - }, { "id": "status-coverage-gaps", "description": "Verify /sigint:status identifies coverage gaps in research", @@ -2072,19 +1452,31 @@ { "description": "Output references sigint.config.json as the created file", "deterministic_checks": [ - { "type": "output_contains", "value": "sigint.config.json" } + { + "type": "output_contains", + "value": "sigint.config.json" + } ] }, { "description": "Output does NOT create sigint.local.md", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] }, { "description": "Schema version 2.0 is used", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["2.0", "\"version\""] } + { + "type": "output_contains_any", + "values": [ + "2.0", + "\"version\"" + ] + } ] }, { @@ -2103,13 +1495,27 @@ { "description": "Output mentions sigint:migrate as the recommended next step", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["sigint:migrate", "migrate"] } + { + "type": "output_contains_any", + "values": [ + "sigint:migrate", + "migrate" + ] + } ] }, { "description": "Output warns that a legacy configuration was found", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["legacy", "existing", "already exists", "found"] } + { + "type": "output_contains_any", + "values": [ + "legacy", + "existing", + "already exists", + "found" + ] + } ] }, { @@ -2125,19 +1531,31 @@ { "description": "Output references the topic slug in the created config", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["ai-code-assistants", "topics"] } + { + "type": "output_contains_any", + "values": [ + "ai-code-assistants", + "topics" + ] + } ] }, { "description": "Output references sigint.config.json as target", "deterministic_checks": [ - { "type": "output_contains", "value": "sigint.config.json" } + { + "type": "output_contains", + "value": "sigint.config.json" + } ] }, { "description": "Output does not create sigint.local.md", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] } ] @@ -2150,13 +1568,19 @@ { "description": "Output does NOT reference creating sigint.local.md", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] }, { "description": "Output does NOT reference .claude/ directory as config target", "deterministic_checks": [ - { "type": "output_not_contains", "value": ".claude/sigint" } + { + "type": "output_not_contains", + "value": ".claude/sigint" + } ] } ] @@ -2169,19 +1593,32 @@ { "description": "Output references writing sigint.config.json", "deterministic_checks": [ - { "type": "output_contains", "value": "sigint.config.json" } + { + "type": "output_contains", + "value": "sigint.config.json" + } ] }, { "description": "Output references writing CONTEXT.md", "deterministic_checks": [ - { "type": "output_contains", "value": "CONTEXT.md" } + { + "type": "output_contains", + "value": "CONTEXT.md" + } ] }, { "description": "Output references renaming original to .bak", "deterministic_checks": [ - { "type": "output_contains_any", "values": [".bak", "backup", "renamed"] } + { + "type": "output_contains_any", + "values": [ + ".bak", + "backup", + "renamed" + ] + } ] }, { @@ -2197,19 +1634,38 @@ { "description": "Output shows what would be written without committing", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["dry-run", "dry run", "would", "preview"] } + { + "type": "output_contains_any", + "values": [ + "dry-run", + "dry run", + "would", + "preview" + ] + } ] }, { "description": "Output shows sigint.config.json content that would be created", "deterministic_checks": [ - { "type": "output_contains", "value": "sigint.config.json" } + { + "type": "output_contains", + "value": "sigint.config.json" + } ] }, { "description": "Output explicitly states no files were written", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["no files", "not written", "dry run", "preview only"] } + { + "type": "output_contains_any", + "values": [ + "no files", + "not written", + "dry run", + "preview only" + ] + } ] } ] @@ -2222,13 +1678,30 @@ { "description": "Output detects that v2.0 config already exists", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["already exists", "existing", "found", "v2", "2.0"] } + { + "type": "output_contains_any", + "values": [ + "already exists", + "existing", + "found", + "v2", + "2.0" + ] + } ] }, { "description": "Output offers merge mode or asks for confirmation before overwriting", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["merge", "overwrite", "confirm", "already"] } + { + "type": "output_contains_any", + "values": [ + "merge", + "overwrite", + "confirm", + "already" + ] + } ] }, { @@ -2244,13 +1717,27 @@ { "description": "Output reports no source config files found", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["not found", "no config", "nothing to migrate", "does not exist"] } + { + "type": "output_contains_any", + "values": [ + "not found", + "no config", + "nothing to migrate", + "does not exist" + ] + } ] }, { "description": "Output suggests running /sigint:init to create a fresh config", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["sigint:init", "init"] } + { + "type": "output_contains_any", + "values": [ + "sigint:init", + "init" + ] + } ] } ] @@ -2263,19 +1750,37 @@ { "description": "Output references migrating runtime config fields", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["maxDimensions", "dimensionTimeout", "research", "runtime"] } + { + "type": "output_contains_any", + "values": [ + "maxDimensions", + "dimensionTimeout", + "research", + "runtime" + ] + } ] }, { "description": "Output produces sigint.config.json v2.0", "deterministic_checks": [ - { "type": "output_contains", "value": "sigint.config.json" } + { + "type": "output_contains", + "value": "sigint.config.json" + } ] }, { "description": "Old .sigint.config.json is renamed to .bak", "deterministic_checks": [ - { "type": "output_contains_any", "values": [".bak", "backed up", "renamed"] } + { + "type": "output_contains_any", + "values": [ + ".bak", + "backed up", + "renamed" + ] + } ] } ] diff --git a/evals/integration/evals.json b/evals/integration/evals.json index 8ccdebd..723f0d2 100644 --- a/evals/integration/evals.json +++ b/evals/integration/evals.json @@ -1,151 +1,4 @@ [ - { - "id": "e2e-start-to-findings", - "description": "Full /sigint:start flow from elicitation through orchestration to findings output", - "prompt": "Research the AI code assistant market. Priorities: competitive landscape, market sizing, trend analysis. Audience: investors. Timeline: this month.", - "expectations": [ - { - "description": "Research orchestrator is referenced as the coordination agent", - "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } - ] - }, - { - "description": "Dimension analyst agents are referenced as workers", - "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } - ] - }, - { - "description": "Blackboard is used for team coordination", - "deterministic_checks": [ - { "type": "output_contains", "value": "blackboard" } - ] - }, - { - "description": "Findings are produced from the research", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings" } - ] - }, - { - "description": "State file is referenced for persistence", - "deterministic_checks": [ - { "type": "output_contains", "value": "state.json" } - ] - }, - { - "description": "Deprecated market-researcher agent pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "market-researcher" } - ] - }, - { - "description": "Deprecated TodoWrite tool is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "TodoWrite" } - ] - }, - { - "description": "Deprecated Subcog pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Deprecated subagent_type pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "subagent_type" } - ] - }, - { - "description": "Elicitation captures all user context (audience, timeline, priorities), orchestrator spawns 3 dimension analysts (competitive, sizing, trends), and findings are written to state.json" - } - ] - }, - { - "id": "e2e-start-to-report", - "description": "Research findings are synthesized into an investor-tailored report", - "prompt": "Generate a report from the AI code assistant research for investor audience in markdown format", - "expectations": [ - { - "description": "Report synthesizer agent is referenced", - "deterministic_checks": [ - { "type": "output_contains", "value": "report-synthesizer" } - ] - }, - { - "description": "Executive Summary section is included in the report", - "deterministic_checks": [ - { "type": "output_contains", "value": "Executive Summary" } - ] - }, - { - "description": "Data source is blackboard or state.json", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["blackboard", "state.json"] - } - ] - }, - { - "description": "Investor audience is acknowledged in the report", - "deterministic_checks": [ - { "type": "output_contains", "value": "investor" } - ] - }, - { - "description": "Deprecated Subcog pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Report synthesizer reads findings from state or blackboard, generates investor-tailored report with Executive Summary, competitive landscape, market sizing, and trend analysis sections" - } - ] - }, - { - "id": "e2e-augment-single-dimension", - "description": "Deep dive augmentation into a single regulatory dimension", - "prompt": "/sigint:augment regulatory landscape for the AI code assistant research", - "expectations": [ - { - "description": "Dimension analyst agent is referenced for the deep dive", - "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } - ] - }, - { - "description": "Regulatory dimension is targeted", - "deterministic_checks": [ - { "type": "output_contains", "value": "regulatory" } - ] - }, - { - "description": "Regulatory-review skill methodology is referenced", - "deterministic_checks": [ - { "type": "output_contains", "value": "regulatory-review" } - ] - }, - { - "description": "Regulatory findings blackboard key is used", - "deterministic_checks": [ - { "type": "output_contains", "value": "findings_regulatory" } - ] - }, - { - "description": "Deprecated market-researcher pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "market-researcher" } - ] - }, - { - "description": "Single dimension-analyst is spawned for the regulatory dimension using regulatory-review skill methodology, findings stored under findings_regulatory blackboard key" - } - ] - }, { "id": "e2e-start-to-issues", "description": "Research findings are converted into structured GitHub issues", @@ -154,129 +7,41 @@ { "description": "Issue architect agent is referenced", "deterministic_checks": [ - { "type": "output_contains", "value": "issue-architect" } + { + "type": "output_contains", + "value": "issue-architect" + } ] }, { "description": "Acceptance criteria are included in issues", "deterministic_checks": [ - { "type": "output_contains", "value": "acceptance criteria" } + { + "type": "output_contains", + "value": "acceptance criteria" + } ] }, { "description": "Sprint planning is referenced", - "deterministic_checks": [ - { "type": "output_contains", "value": "sprint" } - ] - }, - { - "description": "Deprecated Subcog pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Issue architect extracts actionable items from research findings, creates categorized issues with clear acceptance criteria and sprint assignments" - } - ] - }, - { - "id": "e2e-cross-session-recall", - "description": "New research session recalls prior research from Atlatl memory", - "prompt": "Research developer productivity tools. I previously researched AI code assistants - use those findings as context.", - "expectations": [ - { - "description": "Atlatl recall is invoked to find prior research", - "deterministic_checks": [ - { "type": "output_contains", "value": "recall_memories" } - ] - }, - { - "description": "Research tag is used for memory search", - "deterministic_checks": [ - { "type": "output_contains", "value": "sigint-research" } - ] - }, - { - "description": "Prior AI code assistant research is referenced", "deterministic_checks": [ { - "type": "output_contains_any", - "values": [ - "AI code assistant", - "AI code assistants", - "prior research" - ] + "type": "output_contains", + "value": "sprint" } ] }, - { - "description": "Atlatl recall successfully finds prior AI code assistant research and uses those findings to inform the new developer productivity tools research session" - } - ] - }, - { - "id": "e2e-resume-with-atlatl", - "description": "Resume session combines state.json file restoration with Atlatl memory enhancement", - "prompt": "/sigint:resume AI code assistants", - "expectations": [ - { - "description": "State file is referenced for session restoration", - "deterministic_checks": [ - { "type": "output_contains", "value": "state.json" } - ] - }, - { - "description": "Atlatl recall is used to enhance restored session", - "deterministic_checks": [ - { "type": "output_contains", "value": "recall_memories" } - ] - }, { "description": "Deprecated Subcog pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Session is restored from state.json file state and enhanced with Atlatl memories for richer context, suggesting next actions based on combined data" - } - ] - }, - { - "id": "e2e-init-atlatl-context", - "description": "Init command loads Atlatl context and identifies active sessions", - "prompt": "/sigint:init --topic 'AI code assistants'", - "expectations": [ - { - "description": "Atlatl recall is invoked during initialization", - "deterministic_checks": [ - { "type": "output_contains", "value": "recall_memories" } - ] - }, - { - "description": "Semantic knowledge or research tag namespace is referenced", "deterministic_checks": [ { - "type": "output_contains_any", - "values": ["_semantic/knowledge", "sigint-research"] + "type": "output_not_contains", + "value": "Subcog" } ] }, { - "description": "Atlatl is explicitly named as the memory source", - "deterministic_checks": [ - { "type": "output_contains", "value": "Atlatl" } - ] - }, - { - "description": "Deprecated Subcog pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Initialization loads Atlatl context for the topic, identifies any active research sessions, and suggests the next action (start new or resume existing)" + "description": "Issue architect extracts actionable items from research findings, creates categorized issues with clear acceptance criteria and sprint assignments" } ] }, @@ -288,19 +53,28 @@ { "description": "Source chunker is referenced for large document processing", "deterministic_checks": [ - { "type": "output_contains", "value": "source-chunker" } + { + "type": "output_contains", + "value": "source-chunker" + } ] }, { "description": "Chunking strategy is referenced", "deterministic_checks": [ - { "type": "output_contains", "value": "chunk" } + { + "type": "output_contains", + "value": "chunk" + } ] }, { "description": "Dimension analyst processes the chunked content", "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } + { + "type": "output_contains", + "value": "dimension-analyst" + } ] }, { @@ -316,13 +90,19 @@ { "description": "Dimension analyst processes the source directly", "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } + { + "type": "output_contains", + "value": "dimension-analyst" + } ] }, { "description": "Source chunker is not required for small documents", "deterministic_checks": [ - { "type": "output_not_contains", "value": "source-chunker" } + { + "type": "output_not_contains", + "value": "source-chunker" + } ] }, { @@ -338,54 +118,40 @@ { "description": "Conflict detection is referenced in the orchestration flow", "deterministic_checks": [ - { "type": "output_contains", "value": "conflict" } + { + "type": "output_contains", + "value": "conflict" + } ] }, { "description": "Conflicting dimensions are identified by name", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["competitive", "sizing", "financial"] } + { + "type": "output_contains_any", + "values": [ + "competitive", + "sizing", + "financial" + ] + } ] }, { "description": "Resolution rationale is provided", - "deterministic_checks": [ - { "type": "output_contains_any", "values": ["resolution", "rationale", "reconcil"] } - ] - }, - { - "description": "When competitive analysis finds concentrated market share (few major players controlling most of the market) but sizing estimates suggest high fragmentation, the conflict is detected with specific dimension names, logged with structured context, and resolved with explicit rationale" - } - ] - }, - { - "id": "e2e-update-stale-research", - "description": "Stale research findings are identified and refreshed with current data", - "prompt": "/sigint:update --area 'competitive landscape'", - "expectations": [ - { - "description": "Staleness or freshness of data is referenced", "deterministic_checks": [ { "type": "output_contains_any", - "values": ["stale", "fresh", "recent"] + "values": [ + "resolution", + "rationale", + "reconcil" + ] } ] }, { - "description": "Memory capture is invoked to persist updated findings", - "deterministic_checks": [ - { "type": "output_contains", "value": "capture_memory" } - ] - }, - { - "description": "Deprecated Subcog pattern is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Stale findings are identified by comparing timestamps, fresh data is fetched, compared with existing findings, state.json is updated, and new findings are captured to Atlatl" + "description": "When competitive analysis finds concentrated market share (few major players controlling most of the market) but sizing estimates suggest high fragmentation, the conflict is detected with specific dimension names, logged with structured context, and resolved with explicit rationale" } ] }, @@ -399,7 +165,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["team_status", "TEAM STATUS"] + "values": [ + "team_status", + "TEAM STATUS" + ] } ] }, @@ -408,7 +177,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["complete", "in_progress"] + "values": [ + "complete", + "in_progress" + ] } ] }, @@ -427,7 +199,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["no research", "no session", "no findings"] + "values": [ + "no research", + "no session", + "no findings" + ] } ] }, @@ -436,131 +212,6 @@ } ] }, - { - "id": "e2e-zero-deprecated-patterns-start", - "description": "Full start flow uses exclusively v0.4.0 patterns with zero deprecated references", - "prompt": "/sigint:start quantum computing hardware market", - "expectations": [ - { - "description": "Deprecated TodoWrite is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "TodoWrite" } - ] - }, - { - "description": "Deprecated Subcog is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Deprecated subagent_type is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "subagent_type" } - ] - }, - { - "description": "Deprecated market-researcher delegation is not used", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "market-researcher" } - ] - }, - { - "description": "Research orchestrator is used as the coordination agent", - "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } - ] - }, - { - "description": "Atlatl memory integration is present", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["Atlatl", "capture_memory", "recall_memories"] - } - ] - }, - { - "description": "Task or Agent orchestration tools are used", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["TaskCreate", "Agent"] - } - ] - }, - { - "description": "Entire start flow uses only v0.4.0 architecture: research-orchestrator for coordination, dimension-analyst for workers, blackboard for state, Atlatl for memory, and TaskCreate/Agent for orchestration" - } - ] - }, - { - "id": "e2e-zero-deprecated-patterns-full-cycle", - "description": "Complete research cycle (start, augment, report, issues) uses exclusively v0.4.0 patterns", - "prompt": "Run complete research cycle: start with quantum computing, augment competitive, generate report, create issues", - "expectations": [ - { - "description": "Deprecated TodoWrite is not used anywhere in the cycle", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "TodoWrite" } - ] - }, - { - "description": "Deprecated Subcog is not used anywhere in the cycle", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } - ] - }, - { - "description": "Deprecated subagent_type is not used anywhere in the cycle", - "deterministic_checks": [ - { "type": "output_not_contains", "value": "subagent_type" } - ] - }, - { - "description": "Research orchestrator coordinates the start phase", - "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } - ] - }, - { - "description": "Dimension analyst handles augmentation and initial research", - "deterministic_checks": [ - { "type": "output_contains", "value": "dimension-analyst" } - ] - }, - { - "description": "Report synthesizer generates the report", - "deterministic_checks": [ - { "type": "output_contains", "value": "report-synthesizer" } - ] - }, - { - "description": "Issue architect creates the GitHub issues", - "deterministic_checks": [ - { "type": "output_contains", "value": "issue-architect" } - ] - }, - { - "description": "Blackboard is used for inter-agent coordination", - "deterministic_checks": [ - { "type": "output_contains", "value": "blackboard" } - ] - }, - { - "description": "Atlatl memory is integrated", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["Atlatl", "capture_memory"] - } - ] - }, - { - "description": "Full research cycle completes through all four phases (start, augment, report, issues) using only v0.4.0 agents and patterns without any deprecated tool, agent, or memory references" - } - ] - }, { "id": "e2e-methodology-compliance", "description": "End-to-end research with methodology plan, framework application, and coverage matrix", @@ -569,7 +220,10 @@ { "description": "Methodology plan is written per dimension", "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan" } + { + "type": "output_contains", + "value": "methodology_plan" + } ] }, { @@ -577,7 +231,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Frameworks Planned", "methodology plan"] + "values": [ + "Frameworks Planned", + "methodology plan" + ] } ] }, @@ -586,20 +243,29 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Coverage Matrix", "coverage matrix"] + "values": [ + "Coverage Matrix", + "coverage matrix" + ] } ] }, { "description": "Deprecated TodoWrite is not used", "deterministic_checks": [ - { "type": "output_not_contains", "value": "TodoWrite" } + { + "type": "output_not_contains", + "value": "TodoWrite" + } ] }, { "description": "Deprecated Subcog is not used", "deterministic_checks": [ - { "type": "output_not_contains", "value": "Subcog" } + { + "type": "output_not_contains", + "value": "Subcog" + } ] }, { @@ -615,7 +281,10 @@ { "description": "State diagram is included in output", "deterministic_checks": [ - { "type": "output_contains", "value": "stateDiagram" } + { + "type": "output_contains", + "value": "stateDiagram" + } ] }, { @@ -623,7 +292,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["INC", "DEC", "CONST"] + "values": [ + "INC", + "DEC", + "CONST" + ] } ] }, @@ -632,7 +305,10 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Scenario", "scenario"] + "values": [ + "Scenario", + "scenario" + ] } ] }, @@ -641,7 +317,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Terminal", "terminal", "equilibrium"] + "values": [ + "Terminal", + "terminal", + "equilibrium" + ] } ] }, @@ -660,7 +340,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Porter", "5 Forces", "Five Forces"] + "values": [ + "Porter", + "5 Forces", + "Five Forces" + ] } ] }, @@ -669,7 +353,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["HIGH", "MODERATE", "LOW"] + "values": [ + "HIGH", + "MODERATE", + "LOW" + ] } ] }, @@ -678,7 +366,13 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Competitive Rivalry", "Supplier", "Buyer", "Substitution", "New Entry"] + "values": [ + "Competitive Rivalry", + "Supplier", + "Buyer", + "Substitution", + "New Entry" + ] } ] }, @@ -695,19 +389,32 @@ { "description": "Output references config lookup using the topic slug", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["ai-code-assistants", "topic", "sigint.config.json"] } + { + "type": "output_contains_any", + "values": [ + "ai-code-assistants", + "topic", + "sigint.config.json" + ] + } ] }, { "description": "Output passes config to research-orchestrator", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] }, { "description": "Output does NOT read sigint.local.md as config source", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] } ] @@ -720,13 +427,25 @@ { "description": "Output warns that the config is using the old v1.0 format", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["v1.0", "1.0", "migrate", "old format", "upgrade"] } + { + "type": "output_contains_any", + "values": [ + "v1.0", + "1.0", + "migrate", + "old format", + "upgrade" + ] + } ] }, { "description": "Output continues to function (does not fail) despite old format", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] } ] @@ -739,19 +458,28 @@ { "description": "Output proceeds to research-orchestrator delegation without config errors", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] }, { "description": "Output does NOT report a config error or missing file error", "deterministic_checks": [ - { "type": "output_not_contains", "value": "config not found" } + { + "type": "output_not_contains", + "value": "config not found" + } ] }, { "description": "Output does NOT reference sigint.local.md as a config source", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] } ] @@ -764,13 +492,23 @@ { "description": "Output references loading or reading the CONTEXT.md file", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["CONTEXT.md", "context_file", "context file"] } + { + "type": "output_contains_any", + "values": [ + "CONTEXT.md", + "context_file", + "context file" + ] + } ] }, { "description": "Output passes context to the research-orchestrator", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] } ] @@ -783,13 +521,24 @@ { "description": "Output warns that the context file was not found", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["not found", "missing", "does not exist", "context"] } + { + "type": "output_contains_any", + "values": [ + "not found", + "missing", + "does not exist", + "context" + ] + } ] }, { "description": "Output continues to research-orchestrator delegation despite missing context file", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] } ] @@ -802,14 +551,23 @@ { "description": "Output does not error or abort on missing topic slug", "deterministic_checks": [ - { "type": "output_not_contains", "value": "error" }, - { "type": "output_not_contains", "value": "abort" } + { + "type": "output_not_contains", + "value": "error" + }, + { + "type": "output_not_contains", + "value": "abort" + } ] }, { "description": "Research session proceeds normally using defaults", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] } ] @@ -822,13 +580,23 @@ { "description": "Output references the repository from config rather than prompting user", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["owner/repo", "repository", "default_repo"] } + { + "type": "output_contains_any", + "values": [ + "owner/repo", + "repository", + "default_repo" + ] + } ] }, { "description": "Output does NOT reference sigint.local.md as repo source", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] } ] @@ -841,13 +609,24 @@ { "description": "Issue creation proceeds using the defaults block repo", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["default_repo", "defaults", "repository", "repo"] } + { + "type": "output_contains_any", + "values": [ + "default_repo", + "defaults", + "repository", + "repo" + ] + } ] }, { "description": "Output does NOT reference sigint.local.md as the source", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] } ] @@ -860,13 +639,19 @@ { "description": "Output references sigint.config.json as the configuration file", "deterministic_checks": [ - { "type": "output_contains", "value": "sigint.config.json" } + { + "type": "output_contains", + "value": "sigint.config.json" + } ] }, { "description": "Output does NOT reference sigint.local.md as a config file", "deterministic_checks": [ - { "type": "output_not_contains", "value": "sigint.local.md" } + { + "type": "output_not_contains", + "value": "sigint.local.md" + } ] } ] @@ -879,14 +664,28 @@ { "description": "The topic-specific maxDimensions value (3) is used", "deterministic_checks": [ - { "type": "output_contains", "value": "maxDimensions" }, - { "type": "output_contains_any", "values": ["MAX_DIMENSIONS: 3", "max_dimensions: 3", "maxDimensions: 3", "max_dimensions = 3"] } + { + "type": "output_contains", + "value": "maxDimensions" + }, + { + "type": "output_contains_any", + "values": [ + "MAX_DIMENSIONS: 3", + "max_dimensions: 3", + "maxDimensions: 3", + "max_dimensions = 3" + ] + } ] }, { "description": "The defaults value (5) is not used for maxDimensions", "deterministic_checks": [ - { "type": "output_not_contains", "value": "MAX_DIMENSIONS: 5" } + { + "type": "output_not_contains", + "value": "MAX_DIMENSIONS: 5" + } ] } ] @@ -899,14 +698,27 @@ { "description": "Project config value (4) takes precedence over global (7)", "deterministic_checks": [ - { "type": "output_contains_any", "values": ["MAX_DIMENSIONS: 4", "max_dimensions: 4", "maxDimensions: 4"] }, - { "type": "output_not_contains", "value": "MAX_DIMENSIONS: 7" } + { + "type": "output_contains_any", + "values": [ + "MAX_DIMENSIONS: 4", + "max_dimensions: 4", + "maxDimensions: 4" + ] + }, + { + "type": "output_not_contains", + "value": "MAX_DIMENSIONS: 7" + } ] }, { "description": "Research session proceeds normally", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] } ] diff --git a/evals/orchestration/evals.json b/evals/orchestration/evals.json index 4194472..54fd2a1 100644 --- a/evals/orchestration/evals.json +++ b/evals/orchestration/evals.json @@ -1,167 +1,4 @@ [ - { - "id": "blackboard-creation-before-spawning", - "description": "Verify blackboard is created before any analyst is spawned, with appropriate TTL and task reference", - "prompt": "Research the competitive landscape for autonomous vehicle LiDAR suppliers. Focus on market sizing and competitive analysis dimensions.", - "expectations": [ - { - "description": "Blackboard creation is invoked", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["blackboard_create", "blackboard create"] - } - ] - }, - { - "description": "Task or topic identifier is associated with the blackboard", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["task_id", "topic_slug", "lidar", "autonomous-vehicle"] - } - ] - }, - { - "description": "TTL is configured for the research session", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["ttl", "TTL", "24", "expir"] - } - ] - }, - { - "description": "Blackboard creation happens temporally before any analyst spawning - the orchestrator must set up shared state before launching parallel workers" - }, - { - "description": "TTL is appropriate for a research session (typically 24 hours), not too short (minutes) or too long (weeks)" - } - ] - }, - { - "id": "blackboard-elicitation-propagation", - "description": "Verify elicitation context is written to blackboard and subsequently read by spawned analysts", - "prompt": "Research the European electric vehicle charging infrastructure market. Priorities: market sizing, regulatory review, competitive analysis. Time horizon: 2024-2030. Geographic scope: EU27 only.", - "expectations": [ - { - "description": "Elicitation data is written to blackboard", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["elicitation", "scope", "priorities"] - } - ] - }, - { - "description": "A blackboard write operation occurs for elicitation context", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["blackboard_write", "blackboard write", "write"] - } - ] - }, - { - "description": "Analysts read the blackboard to retrieve elicitation context", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["blackboard_read", "blackboard read", "read"] - } - ] - }, - { - "description": "The full elicitation object is propagated to analysts, not just the topic name - including time horizon (2024-2030), geographic scope (EU27), and dimension priorities" - }, - { - "description": "Analysts shape their research based on elicitation scope and priorities, e.g., market sizing focuses on EU27 charging infrastructure specifically, not global EV market" - } - ] - }, - { - "id": "blackboard-team-status-tracking", - "description": "Verify team_status key is updated throughout the research lifecycle with per-analyst status tracking", - "prompt": "Research quantum computing hardware vendors. Dimensions: competitive analysis, tech assessment, trend analysis. Show me status updates as analysts work.", - "expectations": [ - { - "description": "Team status key is referenced on the blackboard", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "team_status" - } - ] - }, - { - "description": "Status values reflect lifecycle states", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "pending", - "in_progress", - "complete", - "running", - "completed" - ] - } - ] - }, - { - "description": "Status transitions follow the correct lifecycle: pending (spawned but not started) -> in_progress (actively researching) -> complete (findings written) for each analyst" - }, - { - "description": "Status is queryable by the /sigint:status command, allowing the user to check progress of individual dimension analysts at any time" - } - ] - }, - { - "id": "blackboard-findings-key-schema", - "description": "Verify findings written to blackboard follow the required schema with dimension, confidence, sources, and gaps", - "prompt": "Research the global cybersecurity mesh architecture market. Run competitive analysis and market sizing dimensions. Show me the raw findings structure.", - "expectations": [ - { - "description": "Findings keys use the findings_ prefix pattern per dimension", - "deterministic_checks": [ - { - "type": "regex_match", - "pattern": "findings_(competitive|sizing|regulatory|trend|financial|tech|customer)" - } - ] - }, - { - "description": "Findings include a dimension field", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "dimension" - } - ] - }, - { - "description": "Findings include a confidence field", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "confidence" - } - ] - }, - { - "description": "Findings include a sources field", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "sources" - } - ] - }, - { - "description": "Each findings object contains the full schema: dimension (string), status (complete/partial), findings (array of individual findings), sources (array of URLs/references), and gaps (array of identified knowledge gaps)" - } - ] - }, { "id": "parallel-analyst-spawning", "description": "Verify multiple dimension analysts are spawned concurrently rather than sequentially", @@ -172,7 +9,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["dimension-analyst", "analyst", "dimension analyst"] + "values": [ + "dimension-analyst", + "analyst", + "dimension analyst" + ] } ] }, @@ -181,7 +22,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["parallel", "concurrent", "background", "simultaneously"] + "values": [ + "parallel", + "concurrent", + "background", + "simultaneously" + ] } ] }, @@ -190,7 +36,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["run_in_background", "background", "async", "spawn"] + "values": [ + "run_in_background", + "background", + "async", + "spawn" + ] } ] }, @@ -212,7 +63,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["market sizing", "market-sizing", "sizing"] + "values": [ + "market sizing", + "market-sizing", + "sizing" + ] } ] }, @@ -221,7 +76,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["trend analysis", "trend-analysis", "trend"] + "values": [ + "trend analysis", + "trend-analysis", + "trend" + ] } ] }, @@ -242,251 +101,6 @@ } ] }, - { - "id": "parallel-completion-synchronization", - "description": "Verify orchestrator waits for all analysts to complete before initiating findings merge", - "prompt": "Research the satellite internet market. Run competitive analysis, market sizing, and trend analysis. Do not merge until all dimensions are complete.", - "expectations": [ - { - "description": "Completion signal or alert is used for synchronization", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["phase_complete", "complete", "all_complete", "finished"] - } - ] - }, - { - "description": "Merge operation is referenced after completion", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["merge", "merging", "synthesiz", "consolidat"] - } - ] - }, - { - "description": "The orchestrator waits for all spawned analysts to report completion before proceeding to the merge/synthesis phase - no partial merges" - }, - { - "description": "If one analyst completes significantly before others, its findings are stored on the blackboard but the merge does not begin until the slowest analyst also completes" - } - ] - }, - { - "id": "cross-dimension-conflict-detection", - "description": "Verify conflicting findings across dimensions are detected and flagged via blackboard alerts", - "prompt": "Research the global drone delivery market. Run market sizing and competitive analysis. If market sizing estimates TAM at $10B but competitive analysis revenue sums imply $5B, flag the conflict.", - "expectations": [ - { - "description": "Conflict detection is referenced", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "conflict" - } - ] - }, - { - "description": "Conflict alert channel is used", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["conflict_detected", "conflict alert", "alert"] - } - ] - }, - { - "description": "Conflicting dimensions are identified", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["dimension_a", "dimension_b", "sizing", "competitive"] - } - ] - }, - { - "description": "When market sizing says TAM is $10B but competitive analysis revenue sums only reach $5B, the system flags this as a cross-dimension conflict requiring reconciliation" - }, - { - "description": "The conflict report includes both dimensions involved, the specific data points that conflict, and a description of the discrepancy" - } - ] - }, - { - "id": "cross-dimension-source-sharing", - "description": "Verify high-value sources discovered by one analyst are shared with other active analysts via blackboard alerts", - "prompt": "Research the industrial IoT platform market. Run competitive analysis and market sizing. If one analyst finds a comprehensive Gartner Magic Quadrant report, share it with the other analyst.", - "expectations": [ - { - "description": "Source sharing alert channel is used", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "source_shared", - "shared_source", - "share source", - "source sharing" - ] - } - ] - }, - { - "description": "When one analyst discovers a comprehensive industry report (e.g., Gartner Magic Quadrant, IDC MarketScape), it broadcasts an alert so other active analysts can leverage the same source" - }, - { - "description": "Shared sources reduce duplicate web searches and API calls across analysts, improving both speed and cost efficiency of the research session" - } - ] - }, - { - "id": "cross-dimension-finding-discovery-alert", - "description": "Verify significant findings trigger immediate alerts to the orchestrator via blackboard alert channels", - "prompt": "Research the generative AI chip market. Run trend analysis and competitive analysis. Alert immediately if a major market shift is discovered (e.g., >20% share change, new dominant player).", - "expectations": [ - { - "description": "Finding discovery alert channel is used", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": [ - "finding_discovered", - "major_finding", - "significant_finding", - "alert" - ] - } - ] - }, - { - "description": "Major findings such as >20% market share shifts, trend reversals, new dominant entrants, or regulatory disruptions trigger an immediate alert to the orchestrator rather than waiting for the full analysis to complete" - }, - { - "description": "The alert includes a brief description of what was found, which dimension discovered it, and a preliminary confidence level so the orchestrator can decide whether to adjust other analysts' priorities" - } - ] - }, - { - "id": "atlatl-recall-before-research", - "description": "Verify Atlatl memory recall is performed before spawning analysts to leverage prior research on the same or related topics", - "prompt": "Research the autonomous vehicle LiDAR market. Check if we have prior research on this topic before starting new analysts.", - "expectations": [ - { - "description": "recall_memories is invoked", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "recall_memories" - } - ] - }, - { - "description": "Query includes sigint-relevant terms", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["sigint", "lidar", "autonomous", "research"] - } - ] - }, - { - "description": "Should not reference unrelated tools", - "deterministic_checks": [ - { - "type": "output_not_contains", - "value": "Subcog" - } - ] - }, - { - "description": "If prior research exists on the topic (or a closely related topic), those findings are loaded from Atlatl and shared via the blackboard so analysts can build on existing knowledge rather than starting from scratch" - }, - { - "description": "Prior findings reduce duplicate research effort - analysts skip re-investigating well-established facts and focus on gaps, updates, or newly requested dimensions" - } - ] - }, - { - "id": "atlatl-capture-after-merge", - "description": "Verify key cross-dimension findings are captured to Atlatl permanent memory after the merge/synthesis phase completes", - "prompt": "Research the space tourism market. Run market sizing and trend analysis. After merging findings, save key insights to permanent memory for future sessions.", - "expectations": [ - { - "description": "capture_memory is invoked after merge", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "capture_memory" - } - ] - }, - { - "description": "enrich_memory is invoked to improve discoverability", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "enrich_memory" - } - ] - }, - { - "description": "Correct namespace is used for knowledge storage", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "_semantic/knowledge" - } - ] - }, - { - "description": "Sigint research tag is applied", - "deterministic_checks": [ - { - "type": "output_contains", - "value": "sigint-research" - } - ] - }, - { - "description": "The captured memory contains cross-dimension insights and synthesized conclusions, not raw data dumps from individual analysts" - }, - { - "description": "The captured memory is enriched with tags, entity extraction, and embeddings so future sessions can discover and leverage these findings via semantic search" - } - ] - }, - { - "id": "methodology-plan-collection", - "description": "Orchestrator collects methodology plans from analysts via file reads (primary) or blackboard reads (fallback) to verify planning occurred", - "prompt": "Research AI-powered legal tech. Run competitive and sizing dimensions. Show methodology plans.", - "expectations": [ - { - "description": "Competitive methodology plan key is referenced", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_competitive" } - ] - }, - { - "description": "Sizing methodology plan key is referenced", - "deterministic_checks": [ - { "type": "output_contains", "value": "methodology_plan_sizing" } - ] - }, - { - "description": "File read or blackboard read is used to collect methodology plans", - "deterministic_checks": [ - { - "type": "output_contains_any", - "values": ["methodology_plan_competitive.json", "methodology_plan_sizing.json", "blackboard_read", "methodology_plan"] - } - ] - }, - { - "description": "Orchestrator reads methodology_plan files (primary) or blackboard keys (fallback) for each spawned analyst dimension to verify that planning occurred before research" - } - ] - }, { "id": "methodology-plan-user-display", "description": "Orchestrator displays framework plans per dimension in table format for user visibility", @@ -497,7 +111,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Frameworks Planned", "frameworks", "methodology"] + "values": [ + "Frameworks Planned", + "frameworks", + "methodology" + ] } ] }, @@ -506,7 +124,15 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["competitive", "sizing", "trends", "customer", "tech", "financial", "regulatory"] + "values": [ + "competitive", + "sizing", + "trends", + "customer", + "tech", + "financial", + "regulatory" + ] } ] }, @@ -525,7 +151,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Coverage Matrix", "coverage matrix", "Methodology Coverage"] + "values": [ + "Coverage Matrix", + "coverage matrix", + "Methodology Coverage" + ] } ] }, @@ -534,7 +164,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["Planned", "Applied", "planned", "applied"] + "values": [ + "Planned", + "Applied", + "planned", + "applied" + ] } ] }, @@ -543,7 +178,12 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["competitive", "sizing", "trends", "dimension"] + "values": [ + "competitive", + "sizing", + "trends", + "dimension" + ] } ] }, @@ -560,7 +200,10 @@ { "description": "Source-chunker is spawned by team-lead", "deterministic_checks": [ - { "type": "output_contains", "value": "source-chunker" } + { + "type": "output_contains", + "value": "source-chunker" + } ] }, { @@ -568,14 +211,21 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["SendMessage", "route", "findings"] + "values": [ + "SendMessage", + "route", + "findings" + ] } ] }, { "description": "Analyst does not spawn source-chunker directly", "deterministic_checks": [ - { "type": "output_not_contains", "value": "analyst spawns" } + { + "type": "output_not_contains", + "value": "analyst spawns" + } ] }, { @@ -593,14 +243,21 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["warning", "Warning", "\u26a0"] + "values": [ + "warning", + "Warning", + "\u26a0" + ] } ] }, { "description": "Session continues rather than aborting", "deterministic_checks": [ - { "type": "output_not_contains", "value": "abort" } + { + "type": "output_not_contains", + "value": "abort" + } ] }, { @@ -608,7 +265,11 @@ "deterministic_checks": [ { "type": "output_contains_any", - "values": ["gap", "unverified", "missing"] + "values": [ + "gap", + "unverified", + "missing" + ] } ] }, @@ -625,14 +286,23 @@ { "description": "Only topic-a config values are used, not topic-b values", "deterministic_checks": [ - { "type": "output_contains", "value": "topic-a" }, - { "type": "output_not_contains", "value": "topic-b" } + { + "type": "output_contains", + "value": "topic-a" + }, + { + "type": "output_not_contains", + "value": "topic-b" + } ] }, { "description": "Research orchestrator is invoked with the correct scope", "deterministic_checks": [ - { "type": "output_contains", "value": "research-orchestrator" } + { + "type": "output_contains", + "value": "research-orchestrator" + } ] } ] diff --git a/hooks/hooks.json b/hooks/hooks.json index 582b334..8a37e21 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -6,7 +6,7 @@ "hooks": [ { "type": "prompt", - "prompt": "Sigint plugin is installed. Active research sessions may exist in ./reports/*/state.json. Atlatl memories are stored with tag sigint-research. Configuration file: ./sigint.config.json (project, schema v2.0) or ~/.claude/sigint.config.json (global fallback)." + "prompt": "Sigint plugin is installed. Active research sessions may exist in ./reports/*/state.json. Configuration file: ./sigint.config.json (project, schema v2.0) or ~/.claude/sigint.config.json (global fallback)." } ] } diff --git a/protocols/STRUCTURED-DATA.md b/protocols/STRUCTURED-DATA.md index 2d56798..c73dfcb 100644 --- a/protocols/STRUCTURED-DATA.md +++ b/protocols/STRUCTURED-DATA.md @@ -25,7 +25,6 @@ This protocol defines how sigint agents handle JSON file operations. All agents **Out of scope:** - YAML frontmatter in `.md` files (embedded structured data in non-structured files) -- Blackboard MCP tool calls (`blackboard_write`, `blackboard_read`) — MCP handles serialization internally - Markdown, HTML, and plain-text files - `plugin.json` (read-only, never mutated by agents) @@ -143,7 +142,7 @@ sigint.config.json → schemas/sigint-config.jq ## File-First Write Pattern -File writes are the authoritative persistence path. Blackboard writes are optional coordination aids with 24h TTL. Always write to file first, validate, then optionally write to blackboard. +File writes are the authoritative persistence path. Always write to file first, then validate. ```bash # Step 1: File write (MANDATORY — must use jq) @@ -154,13 +153,8 @@ jq -e -f "schemas/${SCHEMA}.jq" "./reports/{topic_slug}/{key}.json" > /dev/null echo "SCHEMA VIOLATION: {key}.json failed validation" exit 1 } - -# Step 3: Blackboard (optional — MCP handles serialization, exempt from jq requirement) -blackboard_write(scope="{topic_slug}", key="{key}", value={object}) ``` -If blackboard is unavailable, no action needed — the file is already written and validated. - --- ## Recipes diff --git a/schemas/sigint-config.jq b/schemas/sigint-config.jq index caefb2a..e92b0df 100644 --- a/schemas/sigint-config.jq +++ b/schemas/sigint-config.jq @@ -16,7 +16,6 @@ has("topics") and (.topics | type == "object") and (.defaults | has("report_format") and (.report_format | type == "string" and IN("markdown", "html", "both")) and has("audiences") and (.audiences | type == "array" and length > 0 and all(type == "string")) and - has("auto_atlatl") and (.auto_atlatl | type == "boolean") ) and (.research | @@ -39,7 +38,6 @@ has("topics") and (.topics | type == "object") and # Optional fields validated when present (if has("updated") then (.updated | type == "string") else true end) and (if has("findings_count") then (.findings_count | type == "number" and . >= 0) else true end) and - (if has("atlatl_memory_id") then (.atlatl_memory_id | type == "string") else true end) and (if has("context_file") then (.context_file | type == "string") else true end) ) ) diff --git a/skills/augment/SKILL.md b/skills/augment/SKILL.md index d125e9a..b2d6aee 100644 --- a/skills/augment/SKILL.md +++ b/skills/augment/SKILL.md @@ -17,15 +17,6 @@ allowed-tools: - TeamCreate - TeamDelete - Write - - mcp__atlatl__blackboard_ack_alert - - mcp__atlatl__blackboard_alert - - mcp__atlatl__blackboard_create - - mcp__atlatl__blackboard_pending_alerts - - mcp__atlatl__blackboard_read - - mcp__atlatl__blackboard_write - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories - mcp__claude_ai_Mermaid_Chart__get_mermaid_syntax_document - mcp__claude_ai_Mermaid_Chart__validate_and_render_mermaid_diagram --- @@ -41,7 +32,7 @@ research state. **Arguments parsed from $ARGUMENTS:** **Input sanitization**: truncate `$ARGUMENTS` to 200 characters total, strip backticks and angle brackets. - `$1` — area to investigate (e.g., "competitor pricing", "regulatory landscape") -- `--dimension ` — optional: competitive, sizing, trends, customer, tech, financial, regulatory, trend_modeling +- `--dimension ` (alias: `--methodology`) — optional: competitive, sizing, trends, customer, tech, financial, regulatory, trend_modeling --- @@ -61,12 +52,6 @@ research state. - `topic_slug` — slug identifier (derive if missing: `topic.toLowerCase().replace(/[^a-z0-9]+/g,'-').slice(0,40)`) - `elicitation` — full elicitation context -3. Recall prior memories: - ``` - recall_memories(query="sigint {topic} {area}", tags=["sigint-research"]) - ``` - Apply any matching findings to inform the analyst's task description. - ### Step 0.2: Identify methodology Map `area` to dimension and skill directory: @@ -82,7 +67,7 @@ Map `area` to dimension and skill directory: | compliance, regulatory, legal, privacy, GDPR | regulatory | regulatory-review | | scenario, causal model, three-valued logic, trade-offs | trend_modeling | trend-modeling | -If `--dimension` flag was provided, use that dimension directly. +If `--dimension` or `--methodology` flag was provided, use that dimension directly (both flags are equivalent). If the area doesn't map clearly, use `AskUserQuestion`: > "Which research methodology best fits '{area}'? Options: competitive / sizing / trends / customer / tech / financial / regulatory / trend_modeling" @@ -113,17 +98,11 @@ if [ ! -f "./reports/$TOPIC_SLUG/elicitation.json" ]; then fi ``` -Optionally write to blackboard for live coordination: -``` -blackboard_write(scope="{topic_slug}", key="elicitation", value={elicitation object from state.json}) -``` - --- ## Analyst Prompt Template: Task Discovery Protocol ``` -BLACKBOARD: {topic_slug} TASK DISCOVERY PROTOCOL: 1. Call TaskList to find tasks assigned to you (owner = your name). 2. Call TaskGet on your task to read the full description. @@ -150,15 +129,13 @@ Agent( run_in_background: true, prompt: "You are a dimension-analyst for {dimension} research on '{topic}'. - BLACKBOARD: {topic_slug} TOPIC_SLUG: {topic_slug} REPORTS_DIR: ./reports/{topic_slug} - Read key: elicitation (or fall back to ./reports/{topic_slug}/state.json) + Read elicitation from: ./reports/{topic_slug}/elicitation.json (or fall back to ./reports/{topic_slug}/state.json) Skill to load: skills/{skill_dir}/SKILL.md Your task ID: {task_id} Focus area: {area} - Prior context from memories: {summary of recalled memories, if any} CRITICAL: Use REPORTS_DIR exactly as provided for ALL file writes. Do NOT derive or re-slugify the output directory from the topic title. @@ -168,7 +145,6 @@ Agent( Write findings to: - File (mandatory): {REPORTS_DIR}/findings_{dimension}.json (with schema validation — STOP CHECK before proceeding) - - Blackboard (optional): blackboard_write(scope='{topic_slug}', key='findings_{dimension}', value={structured JSON}) {TASK DISCOVERY PROTOCOL from Phase 0.2} @@ -207,7 +183,7 @@ Wait for `SendMessage` from `dimension-analyst-{dimension}`. When message arrives: 1. Extract `findings_path` and `finding_count` from message. -2. Read findings from file: `./reports/{topic_slug}/findings_{dimension}.json` (primary). Fall back to `blackboard_read(scope="{topic_slug}", key="findings_{dimension}")` only if file is missing. +2. Read findings from file: `./reports/{topic_slug}/findings_{dimension}.json`. --- @@ -271,21 +247,7 @@ jq --arg slug "$TOPIC_SLUG" \ jq -e -f schemas/sigint-config.jq ./sigint.config.json > /dev/null ``` -### Step 3.4: Persist to Atlatl - -``` -capture_memory( - title: "{dimension} augment: {topic} — {area}", - namespace: "_semantic/knowledge", - memory_type: "semantic", - tags: ["sigint-research", "{topic_slug}", "{dimension}", "augment"], - confidence: 0.8, - content: "Key findings from {dimension} augmentation of {topic} on {area}: ..." -) -enrich_memory(id) -``` - -### Step 3.5: Present findings to user +### Step 3.4: Present findings to user Present a summary including: - Number of new findings (`finding_count`) @@ -323,8 +285,7 @@ TeamDelete("{team_name}") **If analyst doesn't complete within a reasonable time:** 1. Check for findings file: `./reports/{topic_slug}/findings_{dimension}.json` 2. If file exists → analyst wrote but didn't message → treat as complete, proceed to Phase 3 -3. If file missing, check blackboard: `blackboard_read(scope="{topic_slug}", key="findings_{dimension}")`. If found, **write recovered data to file** and proceed. -4. If no findings anywhere → inform user: "Augment analysis did not complete. The analyst may have encountered an error. You can retry with /sigint:augment." +3. If no findings file → inform user: "Augment analysis did not complete. The analyst may have encountered an error. You can retry with /sigint:augment." **If state.json is missing:** - "No active research session found for this topic. Run /sigint:start first." diff --git a/skills/competitive-analysis/SKILL.md b/skills/competitive-analysis/SKILL.md index b508ce7..6e06910 100644 --- a/skills/competitive-analysis/SKILL.md +++ b/skills/competitive-analysis/SKILL.md @@ -212,7 +212,6 @@ For detailed frameworks and templates, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_competitive` - **Cross-reference dimensions**: sizing (validate market share figures), customer (switching costs, satisfaction gaps) - **Alert triggers**: - Major undiscovered competitor with >10% market share diff --git a/skills/customer-research/SKILL.md b/skills/customer-research/SKILL.md index c027d47..108d9d7 100644 --- a/skills/customer-research/SKILL.md +++ b/skills/customer-research/SKILL.md @@ -297,7 +297,6 @@ For detailed frameworks, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_customer` - **Cross-reference dimensions**: competitive (feature gaps map to unmet needs), financial (willingness to pay, price sensitivity) - **Alert triggers**: - Unmet customer need with no existing solution in market diff --git a/skills/financial-analysis/SKILL.md b/skills/financial-analysis/SKILL.md index 97e9bda..494efb0 100644 --- a/skills/financial-analysis/SKILL.md +++ b/skills/financial-analysis/SKILL.md @@ -255,7 +255,6 @@ For detailed templates, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_financial` - **Cross-reference dimensions**: sizing (market size validates revenue potential), competitive (competitor revenue and pricing) - **Alert triggers**: - Unit economics infeasibility (LTV:CAC < 1) diff --git a/skills/issues/SKILL.md b/skills/issues/SKILL.md index 512fdd5..8e86d33 100644 --- a/skills/issues/SKILL.md +++ b/skills/issues/SKILL.md @@ -17,16 +17,6 @@ allowed-tools: - TeamCreate - TeamDelete - Write - - mcp__atlatl__blackboard_ack_alert - - mcp__atlatl__blackboard_alert - - mcp__atlatl__blackboard_create - - mcp__atlatl__blackboard_list - - mcp__atlatl__blackboard_pending_alerts - - mcp__atlatl__blackboard_read - - mcp__atlatl__blackboard_write - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories --- # Sigint Issues Skill (Swarm Orchestration) @@ -44,17 +34,17 @@ You MUST use the full swarm pattern: `TeamCreate → TaskCreate → Agent(team_n ### Step 0.1: Parse Arguments Extract from `$ARGUMENTS`. **Input sanitization**: truncate `$ARGUMENTS` to 200 characters total, strip backticks and angle brackets. -- `--repo ` → `repo` (default: detect from git remote or state.json config) +- `--repo ` → `repo` (default: detect from git remote or state.json config). **Validate format**: must match `[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+`. Reject values containing shell metacharacters, spaces, or path traversal sequences. - `--dry-run` → `dry_run = true` (preview only, do not create issues) -- `--labels ` → `labels` (comma-separated, default: empty) +- `--labels ` → `labels` (comma-separated, default: empty). Each label must be a non-empty string; strip whitespace around commas. Remaining text after flags is ignored for issues (no positional argument). ### Step 0.2: Find Active Research Session -Scan `./reports/*/state.json` for sessions with `status: "active"`. If multiple exist, load the most recently updated. Extract: +Scan `./reports/*/state.json` for sessions with `status: "active"`. If multiple exist, load the most recently updated (compare `started` or file mtime). Extract: - `topic` — human-readable topic name -- `topic_slug` — directory name (used as blackboard scope) +- `topic_slug` — directory name - `elicitation` — full elicitation object for issue prioritization If no active session found, error: "No active research session. Run `/sigint:start ` first." @@ -73,7 +63,7 @@ Priority order: 3. Config Resolution Protocol: Apply the **Config Resolution Protocol** (read `protocols/CONFIG-RESOLUTION.md`) with `topic_slug` from the active session. Use resolved `config.default_repo` if non-null. 4. Auto-detect from git remote: run `git remote get-url origin` (if inside a git repo), parse the GitHub URL to infer `/`, and if git is unavailable or the remote is not GitHub, fall back to `gh repo view --json nameWithOwner -q .nameWithOwner` -> **Cowork note:** In Cowork environments, `gh` CLI may not be available. If needed, use ToolSearch to discover an MCP tool that can resolve the current repo/context, or fall back to asking the user for the `/` value. +> **Cowork note:** In Cowork environments, `gh` CLI may not be available. If `git remote` and `gh repo view` both fail, use ToolSearch to discover a GitHub MCP tool (e.g., `mcp__github__*`) that can resolve the current repo context, or fall back to asking the user for the `/` value via `AskUserQuestion`. If `dry_run = true`, repository resolution is informational only — no issues will be created. @@ -230,9 +220,13 @@ If the result contains issue URLs, list the top 5 with links. ### Step 2.3: Handle Failure If the issue-architect sends an error message (or does not respond within a reasonable session): -- Report what went wrong -- Suggest next steps (check `--repo` arg, verify GitHub auth with `gh auth status`, retry) -- Skip to Phase 3 cleanup +- Report what went wrong with the specific error details +- Suggest next steps based on the error: + - **Auth failure**: "Verify GitHub auth with `gh auth status`" + - **Repo not found**: "Check `--repo` argument — ensure the repository exists and you have write access" + - **Rate limited**: "GitHub API rate limit hit — wait and retry, or use `--dry-run` to preview without API calls" + - **Generic failure**: "Retry with `/sigint:issues --repo {repo}`" +- Skip to Phase 3 cleanup (TeamDelete MUST still run even on failure) --- diff --git a/skills/issues/evals/evals.json b/skills/issues/evals/evals.json index b683536..b9c00be 100644 --- a/skills/issues/evals/evals.json +++ b/skills/issues/evals/evals.json @@ -309,6 +309,49 @@ "Results are displayed as a preview with category breakdown and issue titles/priorities", "TeamDelete is called for cleanup even though this was a dry run" ] + }, + { + "id": 9, + "prompt": "--repo testorg/testrepo create issues from my edge case research", + "expected_output": "The SKILL.md file contains NO references to Atlatl, blackboard, mcp__atlatl, recall_memories, capture_memory, inject_context, or blackboard scope annotations. The allowed-tools list contains only standard orchestration tools (Agent, Bash, Glob, Grep, Read, Write, SendMessage, TaskCreate, TaskGet, TaskList, TaskUpdate, TeamCreate, TeamDelete, AskUserQuestion).", + "files": [ + { + "path": "reports/edge-case-testing/state.json", + "content": "{\n \"topic\": \"Edge Case Testing\",\n \"topic_slug\": \"edge-case-testing\",\n \"started\": \"2026-04-10\",\n \"status\": \"active\",\n \"phase\": \"discovery\",\n \"elicitation\": {\n \"scope\": \"Edge case testing for issue creation\",\n \"decision_context\": \"QA validation\",\n \"priorities\": [\"Regression testing\"]\n }\n}" + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "testorg/testrepo", + "description": "--repo argument is parsed and used" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "TeamCreate.*sigint-edge-case-testing-issues", + "description": "Team name correctly uses the topic slug" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "issue-architect", + "description": "issue-architect agent is spawned" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "TeamDelete", + "description": "Cleanup phase executes TeamDelete" + } + ], + "expectations": [ + "The skill execution contains no references to Atlatl, blackboard, or memory tools", + "The allowed-tools in the skill frontmatter do not include any mcp__atlatl tools", + "The repo format validation accepts testorg/testrepo as a valid owner/repo pattern", + "The full swarm lifecycle completes: TeamCreate → TaskCreate → Agent → SendMessage → TeamDelete" + ] } ] } diff --git a/skills/market-sizing/SKILL.md b/skills/market-sizing/SKILL.md index 94ec557..a3ca594 100644 --- a/skills/market-sizing/SKILL.md +++ b/skills/market-sizing/SKILL.md @@ -271,7 +271,6 @@ For detailed templates and examples, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_sizing` - **Cross-reference dimensions**: financial (revenue validation), competitive (player count and share) - **Alert triggers**: - TAM deviation >30% from initial expectations diff --git a/skills/migrate/SKILL.md b/skills/migrate/SKILL.md index b7255c1..55eb6e6 100644 --- a/skills/migrate/SKILL.md +++ b/skills/migrate/SKILL.md @@ -1,10 +1,11 @@ --- name: migrate -description: Migrate legacy sigint configuration (sigint.local.md or .sigint.config.json v1.0) to sigint.config.json v2.0 with per-topic support. Safe, idempotent, supports dry-run preview. +description: Migrate legacy sigint configuration (sigint.local.md or .sigint.config.json v1.0) to sigint.config.json v2.0 with per-topic support and CONTEXT.md generation. Safe, idempotent, supports --dry-run preview and --global flag. Handles both YAML frontmatter and markdown section formats. argument-hint: "[--dry-run] [--global]" allowed-tools: - Read - Write + - Glob - Bash - AskUserQuestion @@ -31,7 +32,7 @@ Parse `$ARGUMENTS`: Check for each (silent, no errors if missing): - `project_local_md` — `./.claude/sigint.local.md` exists - `global_local_md` — `~/.claude/sigint.local.md` exists (relevant only if migrate_global) -- `project_config_v1` — `./.sigint.config.json` exists and its version field is "1.0" +- `project_config_v1` — `./.sigint.config.json` exists and its version field is "1.0" (if version is missing or not "1.0", ignore this file and emit: "Skipping .sigint.config.json — version is not 1.0.") - `target_exists` — `./sigint.config.json` exists ### Step 0.2: Check for existing v2.0 target @@ -64,13 +65,22 @@ If no source files found AND no v1.0 target: ### Step 1.1: Parse project sigint.local.md (if exists) -Read `./.claude/sigint.local.md`. Extract YAML frontmatter: +Read `./.claude/sigint.local.md`. The file may use either format: + +**Format A — YAML frontmatter** (between `---` delimiters): +Extract `default_repo`, `report_format`, `audiences` from frontmatter. +Extract markdown body (everything after closing `---` separator). Store as `local_md_body`. + +**Format B — Markdown sections** (## headings with values on next line): +Parse `## Report Format`, `## Default Repo`, `## Audiences` headings. +The value is the text on the line(s) following the heading. +For `audiences`, split comma-separated values into an array. +Store remaining content as `local_md_body`. + +Extract: - `default_repo` (string or null) - `report_format` (string or null) - `audiences` (array or null) -- `auto_atlatl` (boolean or null) - -Extract markdown body (everything after closing `---` separator). Store as `local_md_body`. ### Step 1.2: Parse global sigint.local.md (if migrate_global AND exists) @@ -78,7 +88,11 @@ Same extraction from `~/.claude/sigint.local.md`. Store as `global_defaults_raw` ### Step 1.3: Parse .sigint.config.json v1.0 (if exists) -Read and parse. Extract `research.maxDimensions`, `research.dimensionTimeout`, `research.defaultPriorities`. Store as `v1_research_config`. +Read and parse as JSON. If the file is malformed JSON: +- Output: "WARNING: .sigint.config.json is not valid JSON — skipping. Will use defaults." +- Continue with empty `v1_research_config`. + +If valid, extract `research.maxDimensions`, `research.dimensionTimeout`, `research.defaultPriorities`. Store as `v1_research_config`. ### Step 1.4: Discover existing topics from reports @@ -98,8 +112,7 @@ For each match, read and extract `topic_slug` (from directory name in glob path) "defaults": { "default_repo": , "report_format": , - "audiences": , - "auto_atlatl": + "audiences": } ``` @@ -175,7 +188,6 @@ Resolved defaults: default_repo: {value or "not set"} report_format: {value} audiences: {value} - auto_atlatl: {value} maxDimensions: {value} ``` @@ -199,14 +211,27 @@ AskUserQuestion( (Skipped if dry_run = true.) -### Step 5.1: Write CONTEXT.md files +### Step 5.1: Back up legacy files FIRST + +**Back up before writing anything** to ensure no data loss if a subsequent step fails. + +If a `.bak` file already exists at the target path, use a timestamped suffix instead (e.g., `.bak.20260402`) to avoid overwriting previous backups. + +``` +Bash: mv ./.claude/sigint.local.md ./.claude/sigint.local.md.bak (if existed) +Bash: mv ./.sigint.config.json ./.sigint.config.json.bak (if existed) +If migrate_global AND ~/.claude/sigint.local.md exists: + Bash: mv ~/.claude/sigint.local.md ~/.claude/sigint.local.md.bak +``` + +### Step 5.2: Write CONTEXT.md files For each topic where CONTEXT.md does not already exist: ``` Write("./reports/{slug}/CONTEXT.md", content) ``` -### Step 5.2: Write sigint.config.json +### Step 5.3: Write sigint.config.json Write using jq (per Structured Data Protocol): ```bash @@ -219,22 +244,24 @@ jq -n \ jq -e -f schemas/sigint-config.jq ./sigint.config.json > /dev/null ``` -### Step 5.3: Rename legacy files to .bak - -If a `.bak` file already exists at the target path, use a timestamped suffix instead (e.g., `.bak.20260402`) to avoid overwriting previous backups. - +If `schemas/sigint-config.jq` does not exist, skip schema validation and emit a warning: ``` -Bash: mv ./.claude/sigint.local.md ./.claude/sigint.local.md.bak (if existed) -Bash: mv ./.sigint.config.json ./.sigint.config.json.bak (if existed) -If migrate_global AND ~/.claude/sigint.local.md exists: - Bash: mv ~/.claude/sigint.local.md ~/.claude/sigint.local.md.bak +WARNING: Schema file schemas/sigint-config.jq not found — skipping validation. Run /sigint:init to generate it. ``` ### Step 5.4: Update .gitignore -Read `.gitignore`. Find the `.claude/sigint.local.md` entry: -- Replace it with `sigint.config.json` -- If not found, append: +If `.gitignore` exists: +- Read it. Find a line containing `.claude/sigint.local.md`: + - If found, use `sed` via Bash to replace that line with `sigint.config.json` + - If not found, check if `sigint.config.json` is already listed. If not, append via Bash: + ``` + # Sigint local config (contains user-specific settings) + sigint.config.json + ``` + +If `.gitignore` does not exist: +- Create it with: ``` # Sigint local config (contains user-specific settings) sigint.config.json @@ -245,7 +272,10 @@ Read `.gitignore`. Find the `.claude/sigint.local.md` entry: ## Phase 6: Completion Output ``` -Migration complete. +Migration complete (v1.0 → v2.0). + +Sources migrated: + {list of source files that were parsed} Written: sigint.config.json (v2.0) @@ -254,6 +284,12 @@ Written: Backed up: {each .bak file created} +Settings migrated: + default_repo: {value or "not set"} + report_format: {value} + audiences: {comma-separated list} + maxDimensions: {value} + {count} topic(s) registered: {slug list} Next steps: diff --git a/skills/migrate/evals/evals.json b/skills/migrate/evals/evals.json index 9131502..0322ccb 100644 --- a/skills/migrate/evals/evals.json +++ b/skills/migrate/evals/evals.json @@ -29,13 +29,32 @@ "file": "transcript.md", "pattern": "\\.bak|backup", "description": "A backup is created before migration" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "zircote/sigint", + "description": "The default_repo value is preserved in migration" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "executives", + "description": "The audiences value is preserved in migration" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "Migration complete|migration complete", + "description": "The completion message is shown" } ], "expectations": [ "The skill detects .claude/sigint.local.md as a legacy config source", - "Settings are parsed from the legacy markdown format", - "A .bak backup is created before overwriting", - "The new sigint.config.json has version 2.0 with migrated settings" + "Settings are parsed from the legacy markdown format (## headings)", + "A .bak backup is created BEFORE writing new files", + "The new sigint.config.json has version 2.0 with migrated settings", + "The default_repo, report_format, and audiences values are preserved" ] }, { @@ -49,11 +68,18 @@ "file": "transcript.md", "pattern": "[Nn]othing to migrate|[Nn]o legacy|not found", "description": "The skill reports no legacy config found" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "/sigint:init", + "description": "The skill suggests running /sigint:init for fresh config" } ], "expectations": [ "The skill checks for legacy config files and finds none", "A clear message indicates nothing to migrate", + "The skill suggests /sigint:init as the next step", "No files are written or modified" ] }, @@ -73,13 +99,142 @@ "file": "transcript.md", "pattern": "dry.run|preview|would", "description": "The output indicates dry-run mode (preview only)" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "DRY RUN|[Nn]o files modified", + "description": "The dry-run exit message is shown" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "2\\.0|v2", + "description": "The preview references v2.0 format" } ], "expectations": [ "The --dry-run flag is parsed and respected", "Legacy config is detected and parsed", "The would-be output is shown but no files are written", - "The preview shows the v2.0 JSON structure" + "The preview shows the v2.0 JSON structure", + "The DRY RUN exit message is displayed" + ] + }, + { + "id": 4, + "prompt": "migrate config", + "expected_output": "When sigint.config.json v2.0 already exists and a legacy source is present, the skill asks the user whether to merge, overwrite, or cancel.", + "files": [ + { + "path": "sigint.config.json", + "content": "{\n \"version\": \"2.0\",\n \"defaults\": {\n \"default_repo\": \"zircote/sigint\",\n \"report_format\": \"markdown\",\n \"audiences\": [\"technical\"]\n },\n \"research\": {},\n \"topics\": {}\n}" + }, + { + "path": ".sigint.config.json", + "content": "{\n \"version\": \"1.0\",\n \"report_format\": \"html\",\n \"default_repo\": \"zircote/sigint\"\n}" + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "already exists|v2\\.0 already", + "description": "The skill detects existing v2.0 config" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "[Mm]erge|[Oo]verwrite|[Cc]ancel", + "description": "The skill offers merge/overwrite/cancel options" + } + ], + "expectations": [ + "The skill detects sigint.config.json v2.0 already exists", + "The skill detects .sigint.config.json v1.0 as a legacy source", + "AskUserQuestion is called with Merge/Overwrite/Cancel options", + "The skill handles the user's choice appropriately" + ] + }, + { + "id": 5, + "prompt": "migrate config", + "expected_output": "When .sigint.config.json exists but contains malformed JSON, the skill warns about the invalid file and continues with defaults.", + "files": [ + { + "path": ".sigint.config.json", + "content": "{ invalid json here }" + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "[Ww]arning|WARNING|invalid|malformed|not valid JSON", + "description": "The skill warns about malformed JSON" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "default|skip", + "description": "The skill falls back to defaults or skips the file" + } + ], + "expectations": [ + "The skill detects .sigint.config.json but fails to parse it", + "A warning about invalid JSON is emitted", + "The skill continues gracefully with defaults rather than crashing", + "Migration still produces a valid v2.0 config if other sources exist, or exits cleanly if none" + ] + }, + { + "id": 6, + "prompt": "migrate my sigint config", + "expected_output": "Skill handles YAML frontmatter format in sigint.local.md, extracting settings from frontmatter and preserving markdown body as CONTEXT.md content.", + "files": [ + { + "path": ".claude/sigint.local.md", + "content": "---\ndefault_repo: zircote/sigint\nreport_format: html\naudiences:\n - technical\n - executives\n - investors\n---\n\n# My Research Context\n\nThis project focuses on competitive intelligence in the cloud infrastructure market." + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "sigint\\.local\\.md", + "description": "The legacy sigint.local.md is detected" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "frontmatter|Format A|YAML", + "description": "YAML frontmatter format is recognized" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "investors", + "description": "The investors audience is extracted from YAML array" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "html", + "description": "The html report_format is preserved" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "\\.bak|backup", + "description": "Backup is created" + } + ], + "expectations": [ + "YAML frontmatter is detected and parsed (Format A)", + "All three audiences are extracted from the YAML array", + "report_format: html is preserved", + "Markdown body is stored for CONTEXT.md generation", + "Migration completes successfully with v2.0 config" ] } ] diff --git a/skills/regulatory-review/SKILL.md b/skills/regulatory-review/SKILL.md index b916f65..23ec8b4 100644 --- a/skills/regulatory-review/SKILL.md +++ b/skills/regulatory-review/SKILL.md @@ -382,7 +382,6 @@ For detailed frameworks, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_regulatory` - **Cross-reference dimensions**: trends (regulatory trends and policy direction), competitive (compliance status of competitors) - **Alert triggers**: - New regulation with <12 months to compliance deadline diff --git a/skills/report-autoresearch/evals.json b/skills/report-autoresearch/evals.json deleted file mode 100644 index da79d3d..0000000 --- a/skills/report-autoresearch/evals.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "skill_name": "report-autoresearch", - "evals": [ - { - "id": "9-section-report", - "name": "eval-9-section-report", - "description": "Verify report generates all 9 sections with not-assessed placeholders for missing dimensions", - "path": "iteration-0/eval-9-section-report" - }, - { - "id": "audience-executive", - "name": "eval-audience-executive", - "description": "Verify --audience executives transform reorders sections and generates executive-summary.md", - "path": "iteration-0/eval-audience-executive" - }, - { - "id": "swot-mermaid", - "name": "eval-swot-mermaid", - "description": "Verify SWOT quadrant Mermaid diagram is generated when 2+ dimensions have findings", - "path": "iteration-0/eval-swot-mermaid" - }, - { - "id": "positioning-map", - "name": "eval-positioning-map", - "description": "Verify competitive positioning map is generated when 2+ competitors with 2+ attributes", - "path": "iteration-0/eval-positioning-map" - }, - { - "id": "html-format", - "name": "eval-html-format", - "description": "Verify --format html produces valid HTML output with inline CSS", - "path": "iteration-0/eval-html-format" - } - ] -} diff --git a/skills/report-autoresearch/iteration-0/eval-9-section-report/grading.json b/skills/report-autoresearch/iteration-0/eval-9-section-report/grading.json deleted file mode 100644 index 075b631..0000000 --- a/skills/report-autoresearch/iteration-0/eval-9-section-report/grading.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "eval_id": "9-section-report", - "skill": "report", - "description": "Verify report generates all 9 sections with not-assessed placeholders for missing dimensions", - "input": "Generate report with competitive and sizing findings only", - "context": { - "findings_present": ["competitive", "sizing"], - "findings_absent": ["trends", "customer", "tech", "financial", "regulatory"] - }, - "expected": { - "sections_generated": 9, - "sections_with_data": ["executive-summary", "market-overview", "market-sizing", "competitive"], - "sections_with_placeholder": ["trends", "swot", "recommendations", "risk", "appendix"], - "placeholder_contains_augment_suggestion": true - }, - "grading_criteria": [ - "All 9 sections are present in the generated report", - "Sections with findings contain real content", - "Sections without findings contain 'not assessed' placeholder and /sigint:augment suggestion", - "Executive summary is always generated regardless of dimension coverage", - "SWOT is generated (partial) from available competitive and sizing data" - ] -} diff --git a/skills/report-autoresearch/iteration-0/eval-9-section-report/outputs/.gitkeep b/skills/report-autoresearch/iteration-0/eval-9-section-report/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/report-autoresearch/iteration-0/eval-audience-executive/grading.json b/skills/report-autoresearch/iteration-0/eval-audience-executive/grading.json deleted file mode 100644 index 9a06526..0000000 --- a/skills/report-autoresearch/iteration-0/eval-audience-executive/grading.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "eval_id": "audience-executive", - "skill": "report", - "description": "Verify --audience executives transform reorders sections and generates executive-summary.md", - "input": "--audience executives", - "context": { - "findings_present": ["competitive", "sizing", "trends", "regulatory"] - }, - "expected": { - "section_order_starts_with": ["executive-summary", "recommendations", "risk"], - "standalone_executive_summary_generated": true, - "technical_jargon_replaced": true, - "strategic_implication_labels_present": true, - "methodology_notes_in_appendix": true - }, - "grading_criteria": [ - "Section order places executive-summary, recommendations, risk first", - "Standalone YYYY-MM-DD-executive-summary.md is generated", - "TAM is replaced with 'total market opportunity' in executive output", - "Strategic Implication labels prefix key findings", - "Methodology notes are moved to appendix" - ] -} diff --git a/skills/report-autoresearch/iteration-0/eval-audience-executive/outputs/.gitkeep b/skills/report-autoresearch/iteration-0/eval-audience-executive/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/report-autoresearch/iteration-0/eval-html-format/grading.json b/skills/report-autoresearch/iteration-0/eval-html-format/grading.json deleted file mode 100644 index 3737075..0000000 --- a/skills/report-autoresearch/iteration-0/eval-html-format/grading.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "eval_id": "html-format", - "skill": "report", - "description": "Verify --format html produces valid HTML output with inline CSS", - "input": "--format html", - "context": { - "findings_present": ["competitive", "sizing"] - }, - "expected": { - "html_file_generated": true, - "valid_html_structure": true, - "inline_css_only": true, - "mermaid_in_div_tags": true, - "no_external_css_files": true - }, - "grading_criteria": [ - "YYYY-MM-DD-report.html is generated in the reports directory", - "File contains valid HTML with DOCTYPE, head, body tags", - "Styles are inline only — no external .css file references", - "Mermaid code blocks are wrapped in
", - "Markdown tables are converted to HTML elements" - ] -} diff --git a/skills/report-autoresearch/iteration-0/eval-html-format/outputs/.gitkeep b/skills/report-autoresearch/iteration-0/eval-html-format/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/report-autoresearch/iteration-0/eval-positioning-map/grading.json b/skills/report-autoresearch/iteration-0/eval-positioning-map/grading.json deleted file mode 100644 index a2248ac..0000000 --- a/skills/report-autoresearch/iteration-0/eval-positioning-map/grading.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "eval_id": "positioning-map", - "skill": "report", - "description": "Verify competitive positioning map is generated when 2+ competitors with 2+ attributes", - "input": "Generate competitive section with 3 competitors", - "context": { - "findings_present": ["competitive"], - "competitors": [ - {"name": "Competitor A", "feature_score": 0.8, "price_score": 0.9}, - {"name": "Competitor B", "feature_score": 0.6, "price_score": 0.4}, - {"name": "Competitor C", "feature_score": 0.4, "price_score": 0.3} - ] - }, - "expected": { - "positioning_map_generated": true, - "diagram_type": "quadrantChart", - "all_competitors_plotted": true, - "axes_labeled": true - }, - "grading_criteria": [ - "Competitive positioning quadrantChart is generated with 2+ competitors", - "All 3 competitors appear as data points", - "Axes are labeled (feature set vs price)", - "Diagram is omitted when fewer than 2 competitors have comparable attributes" - ] -} diff --git a/skills/report-autoresearch/iteration-0/eval-positioning-map/outputs/.gitkeep b/skills/report-autoresearch/iteration-0/eval-positioning-map/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/report-autoresearch/iteration-0/eval-swot-mermaid/grading.json b/skills/report-autoresearch/iteration-0/eval-swot-mermaid/grading.json deleted file mode 100644 index e6dceb3..0000000 --- a/skills/report-autoresearch/iteration-0/eval-swot-mermaid/grading.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "eval_id": "swot-mermaid", - "skill": "report", - "description": "Verify SWOT quadrant Mermaid diagram is generated when 2+ dimensions have findings", - "input": "Generate report with competitive and trends findings", - "context": { - "findings_present": ["competitive", "trends"], - "cross_dimension_synthesis": true - }, - "expected": { - "swot_section_generated": true, - "mermaid_diagram_present": true, - "diagram_type": "quadrantChart", - "swot_quadrants": ["Strengths", "Weaknesses", "Opportunities", "Threats"] - }, - "grading_criteria": [ - "SWOT quadrantChart diagram is generated when cross-dimension synthesis is possible", - "All 4 SWOT quadrants are labeled correctly", - "Findings from multiple dimensions are synthesized into SWOT entries", - "Diagram follows the existing quadrantChart template pattern" - ] -} diff --git a/skills/report-autoresearch/iteration-0/eval-swot-mermaid/outputs/.gitkeep b/skills/report-autoresearch/iteration-0/eval-swot-mermaid/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/report-writing/SKILL.md b/skills/report-writing/SKILL.md index 9a15d47..cdedf88 100644 --- a/skills/report-writing/SKILL.md +++ b/skills/report-writing/SKILL.md @@ -328,7 +328,6 @@ For detailed templates, see: ## Orchestration Hints -- **Blackboard key**: N/A (report-writing is a synthesis skill, not a research dimension) - **Cross-reference dimensions**: N/A — consumes all dimensions' findings - **Alert triggers**: N/A - **Confidence rules**: Report confidence inherits from source findings; flag any section relying on low-confidence data diff --git a/skills/report/SKILL.md b/skills/report/SKILL.md index 796eb75..465689e 100644 --- a/skills/report/SKILL.md +++ b/skills/report/SKILL.md @@ -2,7 +2,7 @@ name: report description: Generate a comprehensive market research report from current findings. Orchestrates report-synthesizer using full swarm pattern with TeamCreate, TaskCreate, SendMessage, and TeamDelete. argument-hint: "[--format markdown|html|both] [--audience executives|pm|investors|dev|all] [--sections executive-summary,market-overview,market-sizing,competitive,trends,swot,recommendations,risk,appendix|all]" -allowed-tools: Agent, AskUserQuestion, Glob, Grep, Read, SendMessage, TaskCreate, TaskGet, TaskList, TaskUpdate, TeamCreate, TeamDelete, Write +allowed-tools: Agent, AskUserQuestion, Bash, Glob, Grep, Read, SendMessage, TaskCreate, TaskGet, TaskList, TaskUpdate, TeamCreate, TeamDelete, Write --- Generate a comprehensive market research report from current research findings. @@ -29,13 +29,15 @@ Generate a comprehensive market research report from current research findings. ## Phase 0: Parse Arguments + Initialize Parse `$ARGUMENTS` to extract: -- `format` → default `markdown` -- `audience` → default `all` -- `sections` → default `all` +- `format` → default `markdown`. Validate: must be one of `markdown`, `html`, `both`. If invalid, warn user and fall back to `markdown`. +- `audience` → default `all`. Validate: must be one of `executives`, `pm`, `investors`, `dev`, `all`. If invalid, warn user and fall back to `all`. +- `sections` → default `all`. Validate: each comma-separated value must be one of: `executive-summary`, `market-overview`, `market-sizing`, `competitive`, `trends`, `swot`, `recommendations`, `risk`, `appendix`, or `all`. Strip whitespace around each value. If any value is unrecognized, warn the user with the invalid value and list valid options, then proceed with only the valid sections. **Determine topic slug and reports directory:** -- Read `./reports/` directory to find the most recent report folder (or read `state.json` from the most recent session) +- Read `./reports/` directory to find report folders containing `state.json` +- If multiple sessions exist, select the most recently modified `state.json` (by file timestamp) - Extract `topic_slug` from state.json's `topic_slug` field +- **Validate state.json**: Confirm `topic_slug` is a non-empty string and `status` is one of `"active"` or `"complete"`. If state.json is malformed or missing required fields, inform the user: "Research session state.json is corrupted or incomplete. Re-run `/sigint:start ` to reinitialize." - If no reports directory exists, inform the user: "No research session found. Run `/sigint:start ` first." - **Resolve `reports_dir` from config** (REQUIRED — do not hardcode paths): ```bash @@ -55,7 +57,7 @@ Step 0.2 — **TaskCreate** the synthesizer task: TaskCreate({ subject: "Generate report: {format} / {audience}", owner: "report-synthesizer", - description: "Synthesize all findings from state.json and blackboard into a complete report." + description: "Synthesize all findings from state.json into a complete report." }) ``` Note the returned task ID as `{reportTaskId}`. @@ -73,12 +75,6 @@ Agent( name: "report-synthesizer", run_in_background: true, prompt: """ - [ATLATL CONTEXT] - Atlatl MCP tools are available for persistent memory. - Search: recall_memories(query="sigint {topic} report") before starting. - Capture findings after completing. - - BLACKBOARD: {topic_slug} Task Discovery Protocol: 1. TaskList → find tasks assigned to you (owner: "report-synthesizer") 2. TaskGet → read full task description @@ -93,9 +89,13 @@ Agent( - audience: {audience} - sections: {sections} - state_file: {reports_dir}/state.json - - blackboard scope: {topic_slug} (read findings_* keys for dimension data) - output_dir: {reports_dir}/ - - date: Replace YYYY-MM-DD in file names with today's date in ISO format (e.g., 2026-04-02) + - date: Replace YYYY-MM-DD in file names with today's actual date in ISO format + + FORMAT-SPECIFIC OUTPUT: + - If format is "markdown": write {reports_dir}/YYYY-MM-DD-report.md + - If format is "html": write {reports_dir}/YYYY-MM-DD-report.html + - If format is "both": write BOTH .md and .html files TASK: #{reportTaskId} — Generate report: {format} / {audience} @@ -103,8 +103,8 @@ Agent( SendMessage( to: "team-lead", message: { - files: ["{reports_dir}/YYYY-MM-DD-report.md", ...], # Replace YYYY-MM-DD with today's date in ISO format - formats_generated: ["{format}"], + files: ["list of all written file paths"], + formats_generated: ["list of formats actually written"], summary: "one-line summary of the key finding" }, summary: "Report generated: {N} sections, {format}" @@ -131,8 +131,14 @@ Wait for SendMessage from `report-synthesizer` containing: - `formats_generated` — list of formats written - `summary` — one-line finding summary +**Timeout handling**: If no response is received within 120 seconds, send a status check: +``` +SendMessage(to: "report-synthesizer", message: "Status check — are you still working on task #{reportTaskId}?", summary: "Status check") +``` +If still no response after an additional 60 seconds, inform the user: "Report generation timed out. The report-synthesizer may have encountered an error. Check the reports directory for partial output, then retry with `/sigint:report`." + Once received: -1. Verify files exist using Read or Glob +1. Verify files exist using Read or Glob. If any reported file is missing, warn the user about the missing file(s) but still present any files that do exist. 2. Present to user: ``` @@ -153,6 +159,8 @@ Next steps: ## Phase 3: Cleanup +Always attempt cleanup even if Phase 2 encountered errors: + ``` SendMessage( to: "report-synthesizer", @@ -160,3 +168,5 @@ SendMessage( ) TeamDelete("sigint-{topic_slug}-report") ``` + +If TeamDelete fails (e.g., team already cleaned up), log the warning but do not surface it to the user — the report was already delivered. diff --git a/skills/report/evals/evals.json b/skills/report/evals/evals.json index 6571c66..2d699ae 100644 --- a/skills/report/evals/evals.json +++ b/skills/report/evals/evals.json @@ -92,6 +92,126 @@ "The --audience executives argument is parsed and passed to the synthesizer", "The synthesizer prompt includes both parameters" ] + }, + { + "id": 4, + "prompt": "--sections competitive,trends generate report", + "expected_output": "Skill validates the --sections argument, confirms competitive and trends are valid section names, and passes only those sections to the report-synthesizer.", + "files": [ + { + "path": "reports/test-topic/state.json", + "content": "{\n \"topic\": \"Test Topic\",\n \"topic_slug\": \"test-topic\",\n \"started\": \"2026-04-01\",\n \"status\": \"active\",\n \"findings\": [],\n \"elicitation\": {}\n}" + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "competitive", + "description": "competitive section is included" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "trends", + "description": "trends section is included" + } + ], + "expectations": [ + "The --sections competitive,trends argument is validated against valid section names", + "Only competitive and trends sections are passed to the synthesizer", + "The synthesizer prompt specifies sections: competitive,trends (not all)" + ] + }, + { + "id": 5, + "prompt": "generate report from corrupted session", + "expected_output": "When state.json is malformed (missing topic_slug), the skill detects the error and informs the user about the corrupted state.", + "files": [ + { + "path": "reports/broken-session/state.json", + "content": "{\n \"topic\": \"Broken\",\n \"started\": \"2026-04-01\"\n}" + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "corrupted|incomplete|malformed|missing.*field|invalid.*state", + "description": "Error message mentions corrupted/incomplete state" + } + ], + "expectations": [ + "The skill detects that state.json is missing the topic_slug field", + "An informative error message is displayed", + "No swarm orchestration is attempted with invalid state" + ] + }, + { + "id": 6, + "prompt": "--format both generate report", + "expected_output": "Skill parses --format both and instructs the report-synthesizer to generate both markdown and HTML output files.", + "files": [ + { + "path": "reports/test-topic/state.json", + "content": "{\n \"topic\": \"Test Topic\",\n \"topic_slug\": \"test-topic\",\n \"started\": \"2026-04-01\",\n \"status\": \"active\",\n \"findings\": [],\n \"elicitation\": {}\n}" + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "both", + "description": "Both format is referenced in the transcript" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "\\.md.*\\.html|\\.html.*\\.md", + "description": "Both .md and .html file extensions appear in output" + } + ], + "expectations": [ + "The --format both argument is parsed correctly", + "The synthesizer is instructed to produce both markdown and HTML files", + "The completion message lists both .md and .html files" + ] + }, + { + "id": 7, + "prompt": "--sections competitive,bogus-section,trends generate report", + "expected_output": "Skill validates section names, warns about 'bogus-section' being unrecognized, lists valid options, and proceeds with only competitive and trends.", + "files": [ + { + "path": "reports/test-topic/state.json", + "content": "{\n \"topic\": \"Test Topic\",\n \"topic_slug\": \"test-topic\",\n \"started\": \"2026-04-01\",\n \"status\": \"active\",\n \"findings\": [],\n \"elicitation\": {}\n}" + } + ], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "bogus-section", + "description": "The invalid section name is mentioned in the warning" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "unrecognized|invalid|unknown", + "description": "A warning about the invalid section is present" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "competitive.*trends|trends.*competitive", + "description": "Valid sections competitive and trends are still used" + } + ], + "expectations": [ + "The invalid section bogus-section is flagged with a warning", + "Valid section names are listed for the user", + "The report proceeds with only the valid sections (competitive, trends)" + ] } ] } diff --git a/skills/start-autoresearch/evals.json b/skills/start-autoresearch/evals.json deleted file mode 100644 index b4acbf5..0000000 --- a/skills/start-autoresearch/evals.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "skill_name": "start-autoresearch", - "evals": [ - { - "id": "dimension-selection", - "name": "eval-dimension-selection", - "description": "Verify Phase 1.5 presents dimension selection UI and respects user confirmation", - "path": "iteration-0/eval-dimension-selection" - }, - { - "id": "methodology-loading", - "name": "eval-methodology-loading", - "description": "Verify dimension-analysts load SKILL.md methodology before conducting research", - "path": "iteration-0/eval-methodology-loading" - }, - { - "id": "methodology-gate", - "name": "eval-methodology-gate", - "description": "Verify Phase 2.75 hard-fails on missing required frameworks and retries with gap targeting", - "path": "iteration-0/eval-methodology-gate" - } - ] -} diff --git a/skills/start-autoresearch/iteration-0/eval-dimension-selection/grading.json b/skills/start-autoresearch/iteration-0/eval-dimension-selection/grading.json deleted file mode 100644 index df3adc9..0000000 --- a/skills/start-autoresearch/iteration-0/eval-dimension-selection/grading.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "eval_id": "dimension-selection", - "skill": "start", - "description": "Verify Phase 1.5 presents dimension selection UI and respects user confirmation", - "input": "Research the market for enterprise Kubernetes security tooling", - "context": { - "max_dimensions": 5, - "elicitation_complete": true, - "topic": "enterprise kubernetes security tooling", - "topic_slug": "enterprise-kubernetes-security-tooling" - }, - "expected": { - "dimension_selection_presented": true, - "dimensions_shown": ["competitive", "sizing", "trends", "tech", "regulatory", "customer", "financial", "trend_modeling"], - "rationale_per_dimension": true, - "selected_count_lte_max": true, - "user_confirm_requested": true - }, - "grading_criteria": [ - "Phase 1.5 dimension selection UI is presented after elicitation", - "All 8 standard dimensions are shown with include/exclude rationale", - "Selected count does not exceed max_dimensions (5)", - "AskUserQuestion is used to capture user confirmation", - "State.json elicitation.dimensions is updated with confirmed selection" - ] -} diff --git a/skills/start-autoresearch/iteration-0/eval-dimension-selection/outputs/.gitkeep b/skills/start-autoresearch/iteration-0/eval-dimension-selection/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/start-autoresearch/iteration-0/eval-methodology-gate/grading.json b/skills/start-autoresearch/iteration-0/eval-methodology-gate/grading.json deleted file mode 100644 index de69463..0000000 --- a/skills/start-autoresearch/iteration-0/eval-methodology-gate/grading.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "eval_id": "methodology-gate", - "skill": "start", - "description": "Verify Phase 2.75 hard-fails on missing required frameworks and retries with gap targeting", - "input": "Research market sizing for edge computing platforms", - "context": { - "dimension": "sizing", - "methodology_gaps": ["TAM/SAM/SOM Hierarchy", "Scenario Modeling"], - "retry_count": 0 - }, - "expected": { - "gate_overridden_to_fail": true, - "retry_spawned": true, - "retry_prompt_contains_gap_list": true, - "max_retries_respected": 2 - }, - "grading_criteria": [ - "Codex review gate overrides to fail when required frameworks are missing", - "Retry analyst is spawned with specific gap list in prompt", - "Retry analyst focuses research on missing TAM/SAM/SOM and Scenario Modeling frameworks", - "After 2 failed retries, findings proceed with methodology_gaps_unresolved field set" - ] -} diff --git a/skills/start-autoresearch/iteration-0/eval-methodology-gate/outputs/.gitkeep b/skills/start-autoresearch/iteration-0/eval-methodology-gate/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/start-autoresearch/iteration-0/eval-methodology-loading/grading.json b/skills/start-autoresearch/iteration-0/eval-methodology-loading/grading.json deleted file mode 100644 index 24c0b37..0000000 --- a/skills/start-autoresearch/iteration-0/eval-methodology-loading/grading.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "eval_id": "methodology-loading", - "skill": "start", - "description": "Verify dimension-analysts load SKILL.md methodology before conducting research", - "input": "Research competitive landscape of AI observability platforms", - "context": { - "dimension": "competitive", - "skill_dir": "competitive-analysis" - }, - "expected": { - "skill_md_loaded": true, - "methodology_plan_written": true, - "required_frameworks_extracted": ["Porter's 5 Forces", "Competitor Matrix", "Positioning Map", "Trend Indicators"], - "research_starts_after_plan": true - }, - "grading_criteria": [ - "Analyst reads skills/competitive-analysis/SKILL.md before starting research", - "methodology_plan_competitive.json is written before any WebSearch calls", - "methodology_plan includes all 4 required frameworks from the skill", - "Analyst proceeds to research only after Step 4 STOP CHECK passes" - ] -} diff --git a/skills/start-autoresearch/iteration-0/eval-methodology-loading/outputs/.gitkeep b/skills/start-autoresearch/iteration-0/eval-methodology-loading/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/start/SKILL.md b/skills/start/SKILL.md index 4e0a498..21cc689 100644 --- a/skills/start/SKILL.md +++ b/skills/start/SKILL.md @@ -17,15 +17,6 @@ allowed-tools: - TeamCreate - TeamDelete - Write - - mcp__atlatl__blackboard_ack_alert - - mcp__atlatl__blackboard_alert - - mcp__atlatl__blackboard_create - - mcp__atlatl__blackboard_pending_alerts - - mcp__atlatl__blackboard_read - - mcp__atlatl__blackboard_write - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories --- # Sigint Start Skill (Launcher) @@ -120,7 +111,7 @@ Agent( {If resuming: PRIOR_ELICITATION: {prior elicitation JSON}} Execute the full research orchestration: - 1. Initialize team and blackboard (Phase 0) + 1. Initialize team (Phase 0) 2. Run elicitation (Phase 1) {quick mode note if applicable} 3. Spawn dimension-analysts (Phase 2) 4. Verify methodology plans (Phase 2.5) diff --git a/skills/tech-assessment/SKILL.md b/skills/tech-assessment/SKILL.md index 03b296e..81838cc 100644 --- a/skills/tech-assessment/SKILL.md +++ b/skills/tech-assessment/SKILL.md @@ -349,7 +349,6 @@ For detailed frameworks, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_tech` - **Cross-reference dimensions**: trends (technology adoption curves), competitive (competitor tech stacks and capabilities) - **Alert triggers**: - Disruptive technology at TRL 7+ (system prototype demonstrated) diff --git a/skills/trend-analysis-autoresearch/evals.json b/skills/trend-analysis-autoresearch/evals.json deleted file mode 100644 index a6d9655..0000000 --- a/skills/trend-analysis-autoresearch/evals.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "skill_name": "trend-analysis-autoresearch", - "evals": [ - { - "id": "mermaid-scenario", - "name": "eval-mermaid-scenario", - "description": "Verify report generates trend scenario state diagram when INC/DEC/CONST signals present", - "path": "iteration-0/eval-mermaid-scenario" - }, - { - "id": "trend-tables", - "name": "eval-trend-tables", - "description": "Verify trends section generates macro and micro trend tables from findings", - "path": "iteration-0/eval-trend-tables" - } - ] -} diff --git a/skills/trend-analysis-autoresearch/iteration-0/eval-mermaid-scenario/grading.json b/skills/trend-analysis-autoresearch/iteration-0/eval-mermaid-scenario/grading.json deleted file mode 100644 index 67bf3af..0000000 --- a/skills/trend-analysis-autoresearch/iteration-0/eval-mermaid-scenario/grading.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "eval_id": "mermaid-scenario", - "skill": "trend-analysis", - "description": "Verify report generates trend scenario state diagram when INC/DEC/CONST signals present", - "input": "Generate report with trend findings containing INC and DEC signals", - "context": { - "findings_present": ["trends"], - "trend_signals": [ - {"signal": "INC", "driver": "AI adoption"}, - {"signal": "DEC", "driver": "Legacy spend"}, - {"signal": "CONST", "driver": "Regulatory pace"} - ] - }, - "expected": { - "mermaid_diagram_generated": true, - "diagram_type": "stateDiagram-v2", - "states_present": ["Current", "GrowthScenario", "ConsolidationScenario"], - "transitions_labeled_with_signals": true - }, - "grading_criteria": [ - "Trend scenario graph is generated when INC/DEC/CONST signals exist in findings", - "Mermaid stateDiagram-v2 format is used", - "Each transition is labeled with the signal direction and driver", - "Terminal scenario states are present" - ] -} diff --git a/skills/trend-analysis-autoresearch/iteration-0/eval-mermaid-scenario/outputs/.gitkeep b/skills/trend-analysis-autoresearch/iteration-0/eval-mermaid-scenario/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/trend-analysis-autoresearch/iteration-0/eval-trend-tables/grading.json b/skills/trend-analysis-autoresearch/iteration-0/eval-trend-tables/grading.json deleted file mode 100644 index cf91f4c..0000000 --- a/skills/trend-analysis-autoresearch/iteration-0/eval-trend-tables/grading.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "eval_id": "trend-tables", - "skill": "trend-analysis", - "description": "Verify trends section generates macro and micro trend tables from findings", - "input": "Generate trends section from trend findings", - "context": { - "findings_present": ["trends"], - "macro_trends": ["AI regulation", "Cloud cost pressure"], - "micro_trends": ["Edge inference", "Model compression"] - }, - "expected": { - "macro_trends_table": true, - "micro_trends_table": true, - "trend_indicators_present": true - }, - "grading_criteria": [ - "Macro Trends table is generated with findings from trends dimension", - "Micro Trends table is generated separately from macro", - "INC/DEC/CONST indicators are present on each trend", - "Not-assessed placeholder is NOT generated when trend findings exist" - ] -} diff --git a/skills/trend-analysis-autoresearch/iteration-0/eval-trend-tables/outputs/.gitkeep b/skills/trend-analysis-autoresearch/iteration-0/eval-trend-tables/outputs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/skills/trend-analysis/SKILL.md b/skills/trend-analysis/SKILL.md index a691ec3..0b0f08c 100644 --- a/skills/trend-analysis/SKILL.md +++ b/skills/trend-analysis/SKILL.md @@ -202,7 +202,6 @@ For detailed methodologies, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_trends` (trend-modeling uses separate key `findings_trend_modeling`) - **Cross-reference dimensions**: tech (adoption curves, technology maturity), regulatory (regulatory shifts impacting trends) - **Alert triggers**: - Trend reversal detected (INC→DEC or DEC→INC) diff --git a/skills/trend-modeling/SKILL.md b/skills/trend-modeling/SKILL.md index 4a85dc1..5b136bc 100644 --- a/skills/trend-modeling/SKILL.md +++ b/skills/trend-modeling/SKILL.md @@ -198,7 +198,6 @@ For theoretical background and advanced techniques, see: Dimension-specific confidence criteria below REFINE (not replace) these universal definitions. -- **Blackboard key**: `findings_trend_modeling` (separate from trend-analysis which uses `findings_trends` — trend-modeling produces scenario models that complement but do not overwrite trend-analysis findings) - **Cross-reference dimensions**: All dimensions provide input variables for scenario modeling - **Alert triggers**: - Scenario with >50% probability of adverse outcome diff --git a/skills/update/SKILL.md b/skills/update/SKILL.md index 076f543..13a5414 100644 --- a/skills/update/SKILL.md +++ b/skills/update/SKILL.md @@ -17,15 +17,6 @@ allowed-tools: - TeamCreate - TeamDelete - Write - - mcp__atlatl__blackboard_ack_alert - - mcp__atlatl__blackboard_alert - - mcp__atlatl__blackboard_create - - mcp__atlatl__blackboard_pending_alerts - - mcp__atlatl__blackboard_read - - mcp__atlatl__blackboard_write - - mcp__atlatl__capture_memory - - mcp__atlatl__enrich_memory - - mcp__atlatl__recall_memories --- # Sigint Update Skill (Swarm Orchestration) @@ -144,10 +135,9 @@ Agent( ELICITATION: {prior elicitation JSON from state.json} Execute the update orchestration: - 1. Initialize team and blackboard (Phase 0) — reuse topic_slug + 1. Initialize team (Phase 0) — reuse topic_slug 2. Skip elicitation — load from ELICITATION above - 3. Write elicitation to blackboard for analysts - 4. Spawn dimension-analysts for DIMENSIONS (Phase 2) + 3. Spawn dimension-analysts for DIMENSIONS (Phase 2) 5. Verify methodology plans (Phase 2.5) 6. Run post-findings codex review gates (Phase 2.75) 7. Run delta detection BEFORE merge (Delta Protocol) — classify findings as NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED/TREND_REVERSAL