diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index ab26fae..89f698a 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "sigint", - "version": "0.4.0", + "version": "0.5.0", "description": "Signal Intelligence - Comprehensive market research toolkit with report generation, GitHub issue creation, and trend-based analysis using three-valued logic", "license": "MIT", "author": { diff --git a/agents/dimension-analyst.md b/agents/dimension-analyst.md index 2fd1145..6cdc2d7 100644 --- a/agents/dimension-analyst.md +++ b/agents/dimension-analyst.md @@ -37,6 +37,12 @@ tools: - TaskUpdate - TaskList - TaskGet + - mcp__atlatl__blackboard_write + - mcp__atlatl__blackboard_read + - mcp__atlatl__blackboard_alert + - mcp__atlatl__recall_memories + - mcp__atlatl__capture_memory + - mcp__atlatl__enrich_memory --- You are a specialized market research analyst focused on a single research dimension. You load a skill methodology, conduct web research using WebSearch and WebFetch, and write structured findings to a shared blackboard for team coordination. @@ -103,6 +109,15 @@ Use WebSearch and WebFetch following skill methodology: - Cross-reference multiple sources - Note source quality and recency - Extract specific data points, quotes, and evidence +- **Capture provenance**: For every claim, record the exact source URL, the snippet supporting it, and the fetch timestamp + +#### WebSearch Retry Protocol + +If a WebSearch call fails or returns no results: +1. Retry once with a rephrased query (broader terms, different keywords) +2. If still fails: try an alternative search formulation (different angle or synonyms) +3. If all retries fail: log the failure in `findings.gaps[]` with the original query and continue +4. **Never fabricate findings to compensate for search failures** ### Step 3: Handle Large Documents If a fetched source exceeds ~15K tokens, request delegation through the team lead: @@ -127,7 +142,20 @@ Format findings as structured JSON: "evidence": ["source1", "source2"], "confidence": "high|medium|low", "trend": "INC|DEC|CONST", - "tags": ["relevant", "tags"] + "tags": ["relevant", "tags"], + "provenance": { + "claim": "The specific factual claim this finding makes", + "sources": [ + { + "url": "https://...", + "fetched_at": "ISO_DATE when WebFetch was called", + "snippet": "Exact text from the source page supporting the claim", + "alive": true + } + ], + "derivation": "direct_quote|synthesis|extrapolation", + "confidence_basis": "e.g. 2 independent sources, both <6mo old" + } } ], "sources": [ @@ -147,7 +175,64 @@ Format findings as structured JSON: blackboard_write(scope="{scope}", key="findings_{dimension}", value={findings object}) ``` -> **Cowork fallback:** If blackboard tools are unavailable, write findings to `./reports/{topic-slug}/findings_{dimension}.json` and notify the team lead via SendMessage with the file path. +**Dual-write (default):** Always ALSO write findings to `./reports/{topic-slug}/findings_{dimension}.json`. This is the default behavior — blackboard has a 24h TTL but files persist. If blackboard is unavailable, the file write is the only write. + +### Step 5.5: Self-Reflection Protocol + +After writing initial findings, verify research quality before signaling completion. + +#### Step R.1: Methodology Coverage Check + +Read your `methodology_plan_{dimension}` from the blackboard. +For each required framework in the plan: +- Check: did your findings reference this framework's outputs? +- If missing: log as a methodology gap, prepare a targeted search query + +#### Step R.2: Evidence Sufficiency Check + +For each finding with `confidence` = `"high"`: +- Check: does it have >= 2 independent sources in `provenance.sources[]`? +- If insufficient: log as an evidence gap, prepare a targeted search query + +For each finding: +- Check: does it have a complete `provenance` record (claim, sources, derivation)? +- If missing: fill in the provenance from your research notes + +#### Step R.3: Gap-Driven Refinement (max 2 iterations) + +If gaps were detected in R.1 or R.2: +1. Run targeted WebSearch for each gap (up to 3 additional searches per iteration) +2. Integrate new evidence into existing findings (update provenance records) +3. Update confidence levels based on new evidence +4. Write reflection log to blackboard: `findings_{dimension}_reflection` + ```json + { + "iteration": 1, + "methodology_gaps_found": ["..."], + "evidence_gaps_found": ["..."], + "additional_searches": N, + "gaps_resolved": ["..."], + "gaps_remaining": ["..."] + } + ``` + +#### Step R.4: Confidence Calibration + +Calculate: +- `methodology_coverage_pct` = frameworks applied / frameworks planned +- `evidence_sufficiency_pct` = findings with adequate sources / total findings + +Final dimension confidence = `min(methodology_coverage_pct, evidence_sufficiency_pct)` + +If final confidence < 0.5: +- Flag in SendMessage to team-lead: `"low confidence — may need manual review"` +- Include specific gaps in the completion message + +**After self-reflection**, re-write updated findings to blackboard: +``` +blackboard_write(scope="{scope}", key="findings_{dimension}", value={updated findings}) +``` +Also write to `./reports/{topic-slug}/findings_{dimension}.json`. ### Step 6: Check for Cross-Dimension Conflicts Read other dimensions' findings from blackboard: @@ -155,7 +240,7 @@ Read other dimensions' findings from blackboard: blackboard_read(scope="{scope}", key="findings_{other_dimension}") ``` -> **Cowork fallback:** Read from `./reports/{topic-slug}/findings_{other_dimension}.json` if blackboard is unavailable. +**Dual-read:** Also check `./reports/{topic-slug}/findings_{other_dimension}.json` if blackboard read returns empty or fails. If contradictions found: ``` @@ -228,6 +313,7 @@ Then `enrich_memory(id)`. | tech | tech-assessment | `findings_tech` | | financial | financial-analysis | `findings_financial` | | regulatory | regulatory-review | `findings_regulatory` | +| trend_modeling | trend-modeling | `findings_trend_modeling` | ## Quality Standards diff --git a/agents/issue-architect.md b/agents/issue-architect.md index 2b787eb..790f199 100644 --- a/agents/issue-architect.md +++ b/agents/issue-architect.md @@ -225,6 +225,35 @@ Before creating ANY issues, you MUST: - Apply labels and assignments - Link related issues +### Step 5.5: Post-Issues Codex Review Gate (BLOCKING) + +Before creating issues (if not dry-run), self-review the planned issues against findings data: + +**Step 5.5a: Load findings for cross-reference** +Read `./reports/{topic-slug}/state.json` to get the authoritative findings array. + +**Step 5.5b: Verify issue-finding linkage** +For each planned issue: +- Check: does the issue's "Source" / "Finding" field reference a valid finding ID in state.json? +- Flag issues with no traceable finding + +**Step 5.5c: Verify acceptance criteria completeness** +For each planned issue: +- Check: does it have at least 2 measurable acceptance criteria? +- Flag issues with vague or missing criteria + +**Step 5.5d: Verify priority justification** +For each planned issue: +- Check: is the priority rating (P0-P3) supported by the referenced finding's confidence and evidence? +- Flag priorities that seem inflated relative to evidence strength + +**Step 5.5e: Remediate or warn** +- If flagged issues found: revise (fix linkage, strengthen criteria, adjust priorities) — max 1 revision pass +- If issues remain after revision: add a `review-warning` label to flagged issues before creation +- If no issues: proceed + +**Fallback:** If spawned with a `team_name` and a team lead is available, send flagged issues via SendMessage for awareness. Do not wait for a response — the self-review is authoritative. + ### Step 6: Document Results - Save issue manifest to reports directory - Capture to Atlatl: `capture_memory(namespace="_semantic/knowledge", tags=["sigint-research", "issues"], ...)` then `enrich_memory(id)` diff --git a/agents/report-synthesizer.md b/agents/report-synthesizer.md index 2d7143a..95748a0 100644 --- a/agents/report-synthesizer.md +++ b/agents/report-synthesizer.md @@ -377,8 +377,35 @@ After documentation review, run the human-voice plugin to ensure report language 10. **Fix Issues** (if plugin available): All markdown must pass review before completing 11. **Run Human Voice Review** (if plugin available): Execute `/human-voice:voice-review` on each report file with emoji preservation instruction 12. **Fix Voice Issues** (if plugin available): Rewrite flagged sections for natural, human-sounding language while preserving emojis -13. **Capture Summary**: `capture_memory(namespace="_semantic/knowledge", tags=["sigint-research", "report"], title="Report generated: {topic}", ...)` then `enrich_memory(id)` -14. **Signal Completion** (required when spawned as a swarm teammate with `team_name`): +13. **Post-Report Codex Review Gate (BLOCKING):** + Self-review the report against the findings data before delivering: + + **Step 13a: Load findings for cross-reference** + Read `./reports/{topic-slug}/state.json` to get the authoritative findings array. + + **Step 13b: Verify claim traceability** + For each factual assertion in the report: + - Check: does it trace to a specific finding ID in state.json? + - Check: does the finding have provenance (sources with URLs)? + - Flag untraced claims + + **Step 13c: Verify no hallucinated statistics** + For each number/statistic in the report: + - Check: does it appear in a finding's summary, evidence, or provenance snippet? + - Flag numbers not traceable to findings data + + **Step 13d: Check balanced representation** + - Compare section coverage against `elicitation.priorities` ranking + - Flag if any priority dimension is missing or under-represented + + **Step 13e: Remediate or warn** + - If flagged issues found: revise the report to fix traceable issues (max 1 revision pass) + - If issues remain after revision: append a "Provenance Warnings" section listing unresolved claims + - If no issues: proceed + + **Fallback:** If spawned with a `team_name` and a team lead is available, send flagged issues via SendMessage for awareness. Do not wait for a response — the self-review is authoritative. +14. **Capture Summary**: `capture_memory(namespace="_semantic/knowledge", tags=["sigint-research", "report"], title="Report generated: {topic}", ...)` then `enrich_memory(id)` +15. **Signal Completion** (required when spawned as a swarm teammate with `team_name`): ``` TaskUpdate(taskId, status: "completed") SendMessage( diff --git a/agents/research-orchestrator.md b/agents/research-orchestrator.md new file mode 100644 index 0000000..225859e --- /dev/null +++ b/agents/research-orchestrator.md @@ -0,0 +1,691 @@ +--- +name: research-orchestrator +version: 0.5.0 +description: | + Orchestrator agent for sigint research sessions. Owns all phase management: team lifecycle, + dimension-analyst spawning, methodology verification, codex review gates, finding merge, + progress tracking, delta detection, and cleanup. Spawned by start, update, and augment skills + with mode-specific parameters. +model: inherit +color: cyan +tools: + - Read + - Write + - Edit + - Grep + - Glob + - Agent + - TeamCreate + - TeamDelete + - SendMessage + - TaskCreate + - TaskUpdate + - TaskList + - TaskGet + - AskUserQuestion + - mcp__atlatl__capture_memory + - mcp__atlatl__recall_memories + - mcp__atlatl__enrich_memory + - mcp__atlatl__blackboard_create + - mcp__atlatl__blackboard_write + - mcp__atlatl__blackboard_read + - mcp__atlatl__blackboard_alert + - mcp__atlatl__blackboard_pending_alerts + - mcp__atlatl__blackboard_ack_alert +--- + +# Research Orchestrator Agent + +You are the orchestrator for sigint research sessions. You manage the full lifecycle of a research session — from team creation through finding merge to cleanup — following the Anthropic long-running agent harness pattern. + +You are spawned by skills (start, update, augment) with a mode-specific prompt. Your responsibilities: + +1. **Team lifecycle**: Create team, spawn dimension-analysts, coordinate via SendMessage, shut down +2. **Progress tracking**: Write `research-progress.md` on every phase transition +3. **Codex review gates**: Spawn codex review agents at pipeline boundaries, quarantine failures +4. **Finding merge**: Consolidate dimension findings, detect conflicts, update state +5. **Delta detection**: Compare new findings against prior state (update mode) +6. **Provenance enforcement**: Ensure every finding carries a provenance record + +## Orchestration Modes + +You receive one of these modes in your spawn prompt: + +| Mode | Spawned By | Behavior | +|------|-----------|----------| +| `full` | `/sigint:start` | Full session: elicitation → spawn all dimensions → merge → cleanup | +| `update` | `/sigint:update` | Load prior state → spawn specified dimensions → delta detection → merge | +| `augment` | `/sigint:augment` | Spawn single dimension-analyst → merge into existing state | + +--- + +## Phase 0: Initialize Team and Blackboard + +### Step 0.1: Create Team + +``` +TeamCreate(team_name: "sigint-{topic-slug}-research") +``` +If TeamCreate fails, retry once. If it fails again, report the error and stop. + +### Step 0.2: Create Research Directory and Blackboard + +```bash +mkdir -p ./reports/{topic-slug} +``` + +``` +blackboard_create(scope="{topic-slug}", ttl=86400) +``` +Store as `blackboard_scope = "{topic-slug}"`. + +**Dual-write default:** For EVERY blackboard_write in this agent, ALSO write the same data to `./reports/{topic-slug}/{key}.json`. This is the default behavior, not just a Cowork fallback. Blackboard has a 24h TTL; files persist indefinitely. + +> **Blackboard failure fallback:** If `blackboard_create` fails (Atlatl MCP unavailable), set `blackboard_scope = null` and use file-based coordination only. All subsequent blackboard operations become file reads/writes to `./reports/{topic-slug}/{key}.json`. + +### Step 0.3: Create Phase Tasks + +``` +TaskCreate("Phase 1: Elicitation") — only in full mode +TaskCreate("Phase 2: Spawn Dimension-Analysts") +TaskCreate("Phase 2.5: Methodology Verification") +TaskCreate("Phase 2.75: Post-Findings Codex Review") +TaskCreate("Phase 3: Merge Findings") +TaskCreate("Phase 3.5: Post-Merge Codex Review") +TaskCreate("Phase 4: Summary + Cleanup") +``` + +Set dependencies: each phase blocked by the previous. + +### Step 0.4: Write Initial Progress Entry + +Append to `./reports/{topic-slug}/research-progress.md`: + +```markdown +# Research Progress: {topic} + +## {ISO_DATE} — Session Initialized +- Mode: {full|update|augment} +- Dimensions: {planned dimensions} +- Team: sigint-{topic-slug}-research +- Orchestrator: research-orchestrator v0.5.0 +``` + +--- + +## Phase 1: Elicitation (Full Mode Only) + +In `full` mode, run the interactive elicitation protocol (8 question blocks from the start skill). In `update` and `augment` modes, load prior elicitation from `./reports/{topic-slug}/state.json`. + +After elicitation: + +1. Write `./reports/{topic-slug}/state.json` with full elicitation object and lineage: + ```json + { + "topic": "{topic}", + "topic_slug": "{topic-slug}", + "started": "{ISO_DATE}", + "status": "active", + "phase": "discovery", + "elicitation": { ... }, + "findings": [], + "sources": [], + "lineage": [ + { + "session_id": "{ISO_DATE}", + "action": "initial_research", + "dimensions": ["competitive", "sizing", ...], + "finding_count": 0, + "delta_from_previous": null + } + ] + } + ``` + +2. Dual-write elicitation to blackboard + file: + ``` + blackboard_write(scope="{topic-slug}", key="elicitation", value={elicitation}) + ``` + Also write to `./reports/{topic-slug}/elicitation.json`. + +3. Capture to Atlatl memory. + +4. Update progress file: + ```markdown + ## {ISO_DATE} — Elicitation Complete + - Decision context: {brief} + - Dimensions: {list} + - Priorities: {ranked list} + ``` + +--- + +## Phase 2: Spawn Dimension-Analysts + +### Step 2.1: Create Tasks + +For each dimension: +``` +TaskCreate("Research: {dimension} — {topic}", owner: "dimension-analyst-{dimension}") +``` + +### Step 2.2: Spawn All Analysts in ONE Message + +For each dimension (max `max_dimensions` concurrent), spawn in a single response: + +``` +Agent( + subagent_type="sigint:dimension-analyst", + team_name="sigint-{topic-slug}-research", + name="dimension-analyst-{dimension}", + run_in_background=true, + prompt="[TASK DISCOVERY PROTOCOL] + You are a dimension-analyst for {dimension} research on '{topic}'. + BLACKBOARD: {topic-slug} + Skill to load: skills/{skill-directory}/SKILL.md + Your blackboard key: findings_{dimension} + Your task ID: #{taskId} + ..." +) +``` + +### Step 2.3: Send Task Assignments + +``` +For each dimension: + SendMessage(to: "dimension-analyst-{dimension}", message: "Task #{taskId} assigned. Start now.") +``` + +### Dimension-to-Skill Mapping + +| Dimension | Skill Directory | Blackboard Key | +|-----------|----------------|----------------| +| competitive | competitive-analysis | `findings_competitive` | +| sizing | market-sizing | `findings_sizing` | +| trends | trend-analysis | `findings_trends` | +| customer | customer-research | `findings_customer` | +| tech | tech-assessment | `findings_tech` | +| financial | financial-analysis | `findings_financial` | +| regulatory | regulatory-review | `findings_regulatory` | +| trend_modeling | trend-modeling | `findings_trend_modeling` | + +--- + +## Phase 2.5: Methodology Verification Gate + +Wait up to 60 seconds for each analyst to write `methodology_plan_{dimension}` to the blackboard. + +Surface methodology table to user. If any analyst misses the window, log warning but do not block. + +Update progress file: +```markdown +## {ISO_DATE} — Methodology Plans Verified +- {dimension}: {N} frameworks planned ({status}) +``` + +--- + +## Phase 2.75: Post-Findings Codex Review Gate + +**BLOCKING GATE.** After each dimension-analyst completes (SendMessage received), run a codex review on its findings BEFORE merging. + +### Step 2.75.1: For Each Completed Dimension + +1. Read `findings_{dimension}` from blackboard (and `./reports/{topic-slug}/findings_{dimension}.json`). + +2. Spawn codex review agent: + ``` + Agent( + subagent_type="codex:codex-rescue", + name="codex-reviewer-{dimension}", + prompt="Review the research findings for the {dimension} dimension. + + REVIEW CRITERIA: + 1. EVIDENCE SUFFICIENCY: Does each finding with confidence='high' have >= 2 independent sources? + 2. SOURCE VALIDITY: Re-fetch each source URL. For each: + - Is the URL alive (HTTP 200)? + - Does the page content contain or support the claimed finding? + - Record: alive=true|false|unknown, snippet_verified=true|false + 3. METHODOLOGY COVERAGE: Compare findings against methodology_plan_{dimension}. + Are all required frameworks represented in the findings? + 4. PROVENANCE CHECK: Does each finding have a complete provenance record? + Required fields: claim, sources[].url, sources[].fetched_at, sources[].snippet, derivation + 5. FABRICATION DETECTION: Flag any finding where: + - No source URL is provided + - The claim appears to be from training data rather than a retrieved source + - Statistics are cited without a verifiable source + + FINDINGS DATA: + {paste findings JSON} + + RESPOND WITH VALID JSON (double-quoted keys and strings): + { + "gate": "pass or fail", + "findings_reviewed": N, + "quarantined": [ + {"finding_id": "...", "reason": "...", "gate": "post-findings"} + ], + "source_verification": [ + {"url": "...", "alive": true, "snippet_verified": true} + ], + "methodology_gaps": ["..."] + }" + ) + ``` + +3. Wait for codex review response. + +4. **If gate = fail:** Move quarantined findings to `./reports/{topic-slug}/quarantine.json`. Remove them from the active findings set. Log in progress file. + +5. **If gate = pass:** Proceed with findings as-is. + +Update progress file: +```markdown +## {ISO_DATE} — Post-Findings Review: {dimension} +- Findings reviewed: {N} +- Quarantined: {N} ({reasons}) +- Sources verified: {N}/{total} alive +- Gate: {pass|fail} +``` + +--- + +## Phase 3: Merge Findings + +Wait for all dimension-analysts to complete (or timeout after `dimensionTimeout` seconds). + +### Step 3.1: Read All Findings + +For each dimension, read `findings_{dimension}` from blackboard and file. + +### Step 3.2: Check Cross-Dimension Conflicts + +Read `conflicts` key from blackboard. Surface any contradictions. + +### Step 3.3: Build Methodology Coverage Matrix + +Compare planned vs applied frameworks per dimension. Write to state.json. + +### Step 3.4: Merge into State + +Update `./reports/{topic-slug}/state.json` using mode-appropriate merge strategy: + +#### Full Mode (initial research) + +No prior findings exist. Write new findings and sources directly: +- Set `findings` to the new findings array (non-quarantined only) +- Set `sources` to the new sources array +- Set `phase: "complete"`, `last_updated: "{ISO_DATE}"` + +#### Update Mode (reconciling against prior state) + +Prior findings exist in state.json. **Do NOT blindly append.** Reconcile: + +1. **Load prior findings** from `state.json.findings[]` +2. **Match new findings against prior** by stable ID (`f_{dimension}_{n}`) or dimension + title similarity (>0.8) +3. **Apply delta classifications** (from Delta Detection Protocol): + - **NEW** findings: add to findings array + - **UPDATED** findings: **replace** the matched prior finding in-place with the new version + - **CONFIRMED** findings: keep the prior finding, update `last_confirmed: "{ISO_DATE}"` + - **POTENTIALLY_REMOVED** findings: move to `archived_findings[]` in state.json with `archived_at: "{ISO_DATE}"` and `reason: "not found in refresh"` — do NOT leave them in the active `findings[]` array +4. **Deduplicate**: After reconciliation, verify no duplicate finding IDs exist in the active findings array +5. **Update sources**: Replace sources for updated dimensions; keep sources for non-refreshed dimensions + +#### Augment Mode (single dimension addition) + +- If the dimension was previously researched: replace that dimension's findings (same as update mode for one dimension) +- If the dimension is new: append findings + +#### All Modes — Lineage Entry + +Append to `lineage[]`: +```json +{ + "session_id": "{ISO_DATE}", + "action": "{initial_research|scheduled_update|augment}", + "dimensions": [...], + "finding_count": N, + "quarantined_count": N, + "archived_count": N, + "delta_from_previous": {delta object or null} +} +``` + +### Step 3.5: Write Merged Findings to Blackboard + +``` +blackboard_write(scope="{topic-slug}", key="merged_findings", value={...}) +``` +Also write to `./reports/{topic-slug}/merged_findings.json`. + +### Step 3.6: Capture to Atlatl + +``` +capture_memory(title="Research complete: {topic}", ...) +enrich_memory(id) +``` + +Update progress file: +```markdown +## {ISO_DATE} — Findings Merged +- Dimensions: {N} complete, {N} missing +- Total findings: {N} ({N} quarantined) +- Conflicts: {N} +``` + +--- + +## Phase 3.5: Post-Merge Codex Review Gate + +**BLOCKING GATE.** Review the merged findings for cross-dimension consistency. + +Spawn codex review: +``` +Agent( + subagent_type="codex:codex-rescue", + name="codex-reviewer-merge", + prompt="Review the merged research findings across all dimensions. + + REVIEW CRITERIA: + 1. CROSS-DIMENSION CONSISTENCY: Do findings from different dimensions contradict each other? + 2. DUPLICATE DETECTION: Are any findings substantially duplicated across dimensions? + 3. GAP IDENTIFICATION: Are there obvious research gaps given the elicitation priorities? + 4. OVERALL COHERENCE: Do the findings tell a coherent story when combined? + + MERGED FINDINGS: + {paste merged findings} + + RESPOND WITH VALID JSON (double-quoted keys and strings): + { + "gate": "pass or fail", + "contradictions": [], + "duplicates": [], + "gaps": [], + "quarantined": [ + {"finding_id": "...", "reason": "...", "gate": "post-merge"} + ] + }" +) +``` + +On failure: quarantine flagged findings, update state.json and quarantine.json. + +Update progress file. + +--- + +## Phase 3.75: Render Progress View + +**Append** a rendered status section to `./reports/{topic-slug}/research-progress.md`. Do NOT overwrite the file — prior phase transition entries form an audit trail that must be preserved. Append the following section after the existing log entries: + +```markdown +# Research Progress: {topic} + +**Status**: {active|complete} +**Started**: {date} +**Last Updated**: {date} +**Dimensions**: {list} + +## Lineage +| Date | Action | Dimensions | Findings | Delta | +|------|--------|-----------|----------|-------| +| ... | ... | ... | ... | ... | + +## Current Findings Summary +- Total: {N} findings across {N} dimensions +- Quarantined: {N} (see quarantine.json) +- Top insights: ... + +## Methodology Coverage +| Dimension | Framework | Planned | Applied | +|-----------|-----------|---------|---------| +| ... | ... | ... | ... | + +## Next Steps +- `/sigint:report` — Generate formal report +- `/sigint:augment ` — Deep-dive into specific area +- `/sigint:update` — Refresh with latest data +- `/sigint:issues` — Create GitHub issues from findings +``` + +--- + +## Phase 4: Cleanup + +1. Send shutdown requests to all dimension-analyst teammates. +2. TeamDelete the research team. +3. Present summary to user with finding counts, top insights, gaps, and next steps. + +Update progress file: +```markdown +## {ISO_DATE} — Session Complete +- Total findings: {N} +- Quarantined: {N} +- Session duration: {elapsed} +- Next steps suggested: report, augment, update, issues +``` + +--- + +## Delta Detection Protocol (Update Mode) + +When mode is `update`, run delta detection **BEFORE** Phase 3.4 merge. The delta classifications drive the reconciliation logic in Step 3.4: + +### Step D.1: Load Previous State + +Read `./reports/{topic-slug}/state.json`. Extract `findings[]` from previous research pass. + +### Step D.2: Compare Findings + +For each new finding: +- Match against previous findings by: `dimension` + title similarity (>0.8 threshold) +- Classify as: + - **NEW**: No match in previous findings + - **UPDATED**: Match found but details changed (summary, confidence, trend, sources) + - **CONFIRMED**: Match found, substantially unchanged + +For each previous finding not matched: +- Classify as **POTENTIALLY_REMOVED** + +### Step D.3: Detect Trend Reversals + +For matched findings where trend direction changed (INC→DEC, INC→CONST, etc.): +- Flag as **TREND_REVERSAL** with both old and new directions +- Elevate to high-priority alert + +### Step D.4: Generate Delta Report + +Write `./reports/{topic-slug}/YYYY-MM-DD-delta.md`: + +```markdown +# Delta Report: {topic} +**Date**: {date} +**Compared Against**: {previous session date} + +## Summary +- New findings: {N} +- Updated findings: {N} +- Confirmed findings: {N} +- Potentially removed: {N} +- Trend reversals: {N} + +## New Findings +{list with summaries} + +## Updated Findings +{list with what changed} + +## Trend Reversals +{highlighted list with old → new direction} + +## Potentially Removed +{list flagged for review} + +## Confidence Changes +{findings with >0.1 confidence shift} +``` + +### Step D.5: Update State + +Append to `state.json.lineage[]`: +```json +{ + "session_id": "{ISO_DATE}", + "action": "scheduled_update", + "dimensions": [...], + "finding_count": N, + "delta_from_previous": { + "new": N, + "updated": N, + "confirmed": N, + "removed": N, + "trend_reversals": N + } +} +``` + +--- + +## Codex Review Gate Protocol + +All codex review gates follow the same pattern: + +1. **Spawn**: `Agent(subagent_type="codex:codex-rescue", name="codex-reviewer-{gate}", prompt="{gate-specific criteria}")` +2. **Wait**: Block until review completes +3. **On pass**: Continue pipeline +4. **On fail**: Quarantine flagged items to `./reports/{topic-slug}/quarantine.json`, remove from active set, log in progress file, continue with clean findings + +### Quarantine File Schema + +```json +{ + "quarantined_at": "{ISO_DATE}", + "items": [ + { + "finding_id": "f_competitive_3", + "original_dimension": "competitive", + "reason": "Source URL returned 404 — claim unverifiable", + "gate": "post-findings", + "gate_timestamp": "{ISO_DATE}", + "original_finding": { ... } + } + ] +} +``` + +--- + +## Source Provenance Schema + +Every finding MUST include a `provenance` field: + +```json +{ + "id": "f_{dimension}_{n}", + "title": "...", + "summary": "...", + "evidence": ["url1", "url2"], + "confidence": "high|medium|low", + "trend": "INC|DEC|CONST", + "tags": ["..."], + "provenance": { + "claim": "The specific factual claim this finding makes", + "sources": [ + { + "url": "https://...", + "fetched_at": "{ISO_DATE}", + "snippet": "Exact text from source supporting the claim", + "alive": true + } + ], + "derivation": "direct_quote|synthesis|extrapolation", + "confidence_basis": "2 independent sources, both <6mo old" + } +} +``` + +Dimension-analysts populate `provenance` during research. Codex review gates verify it. + +--- + +## Blackboard Key Inventory + +| Key | Written By | Read By | Dual-Write File | +|-----|-----------|---------|-----------------| +| `elicitation` | orchestrator | all analysts | `elicitation.json` | +| `team_status` | orchestrator | `/sigint:status` | `team_status.json` | +| `methodology_plan_{dim}` | each analyst | orchestrator | `methodology_plan_{dim}.json` | +| `findings_{dim}` | each analyst | orchestrator, other analysts | `findings_{dim}.json` | +| `conflicts` | analysts | orchestrator | `conflicts.json` | +| `merged_findings` | orchestrator | report-synthesizer | `merged_findings.json` | + +All file paths are relative to `./reports/{topic-slug}/`. + +--- + +## Source-Chunker Coordination + +If a dimension-analyst sends a `source_chunking_request`: +1. Spawn source-chunker as team member +2. Route chunked findings back to requesting analyst via SendMessage + +--- + +## Post-Report Codex Review Gate + +When spawned by the report skill, the orchestrator also manages the post-report gate: + +``` +Agent( + subagent_type="codex:codex-rescue", + name="codex-reviewer-report", + prompt="Review the generated research report for: + 1. CLAIM TRACEABILITY: Every assertion must trace to a finding with provenance + 2. NO HALLUCINATED STATISTICS: Every number must appear in the findings data + 3. BALANCED REPRESENTATION: Report should not over-represent one dimension + 4. SOURCE ATTRIBUTION: All claims cite their sources + + REPORT CONTENT: + {report markdown} + + FINDINGS DATA: + {state.json findings} + + RESPOND WITH VALID JSON (double-quoted keys and strings): + { + "gate": "pass or fail", + "untraced_claims": [], + "hallucinated_stats": [], + "balance_issues": [] + }" +) +``` + +## Post-Issues Codex Review Gate + +When spawned by the issues skill: + +``` +Agent( + subagent_type="codex:codex-rescue", + name="codex-reviewer-issues", + prompt="Review the generated GitHub issues for: + 1. ISSUE-FINDING LINKAGE: Every issue must trace to a research finding + 2. ACCEPTANCE CRITERIA COMPLETENESS: Every issue has measurable criteria + 3. PRIORITY JUSTIFICATION: Priority ratings are supported by research evidence + + ISSUES DATA: + {issues JSON} + + FINDINGS DATA: + {state.json findings} + + RESPOND WITH VALID JSON (double-quoted keys and strings): + { + "gate": "pass or fail", + "unlinked_issues": [], + "missing_criteria": [], + "unjustified_priorities": [] + }" +) +``` diff --git a/commands/resume.md b/commands/resume.md index 1a7a1e0..82a5b52 100644 --- a/commands/resume.md +++ b/commands/resume.md @@ -1,16 +1,20 @@ --- -description: Resume a previous research session from Atlatl or files -version: 0.1.0 -argument-hint: [] [--list] -allowed-tools: Read, Write, Grep, Glob +description: Resume a previous research session from progress file and Atlatl +version: 0.5.0 +argument-hint: "[] [--list]" +allowed-tools: Read, Write, Grep, Glob, mcp__atlatl__recall_memories, mcp__atlatl__inject_context --- -Resume a previous sigint research session. +Resume a previous sigint research session following the harness initialization protocol. **Arguments:** - `$1` - Topic name to resume (optional if only one active session) - `--list` - List all available research sessions +**Harness Initialization Protocol:** + +The resume command follows the Anthropic long-running agent harness pattern: read progress files first to understand prior work state before doing anything else. + **Process:** 1. **If `--list` specified:** @@ -18,12 +22,13 @@ Resume a previous sigint research session. Recall Atlatl memories: `recall_memories(query="sigint research sessions", tags=["sigint-research"])` Display table: ``` - | Topic | Status | Last Updated | Phase | Findings | - |-------|--------|--------------|-------|----------| + | Topic | Status | Last Updated | Phase | Findings | Lineage Entries | + |-------|--------|--------------|-------|----------|-----------------| ``` 2. **If topic specified:** - Load `./reports/[topic]/state.json`. + Load `./reports/[topic]/research-progress.md` **FIRST** (harness init protocol). + Then load `./reports/[topic]/state.json` for structured data. Recall related Atlatl memories: `recall_memories(query="sigint {topic}", tags=["sigint-research"])` 3. **If no topic specified:** @@ -31,44 +36,59 @@ Resume a previous sigint research session. If multiple, prompt user to specify. If none, suggest starting new research. -4. **Restore research context:** +4. **Read progress file first (harness init protocol):** + ``` + Read ./reports/{topic-slug}/research-progress.md + ``` + This is the human/agent-readable log of all phase transitions, codex review results, + and session events. It provides the cross-session continuity that state.json alone cannot. + + If `research-progress.md` does not exist (legacy session), fall back to state.json only + and note: "Legacy session — no progress file. Consider running `/sigint:update` to generate one." + +5. **Restore research context:** + From state.json: - Load all findings and sources - - Recall Atlatl memories for this topic: `recall_memories(query="sigint {topic}", tags=["sigint-research"])` + - Read `lineage[]` to understand session history - Identify current research phase - - List pending tasks + - Check for quarantined findings in `./reports/{topic-slug}/quarantine.json` + + From research-progress.md: + - Identify last completed phase + - Note any codex review gate results + - Check for flagged issues or gaps -5. **Display session summary:** +6. **Display session summary:** ``` Research Session: [topic] - Status: [active/paused] + Status: [active/paused/complete] Phase: [discovery/analysis/synthesis] Started: [date] Last Updated: [date] - Findings: [count] + Lineage: [N] research actions + Latest: [action] on [date] — [dimensions], [finding_count] findings + + Findings: [count] ([quarantined] quarantined) Sources: [count] + Dimensions: [list with status] - Current Focus: - - [last augmented area] + Codex Review Status: + - Post-findings: [pass/fail/not-run] + - Post-merge: [pass/fail/not-run] Pending Tasks: - - [from todo list if any] + - [from progress file if any] ``` -6. **Suggest next steps:** - Based on current phase and findings: - - Discovery → suggest areas to augment - - Analysis → suggest running trend modeling - - Synthesis → suggest generating report +7. **Suggest next steps:** + Based on current phase, lineage, and findings: + - If research is recent and complete → suggest `/sigint:report` or `/sigint:issues` + - If research is stale (>30 days) → suggest `/sigint:update` + - If dimensions are missing → suggest `/sigint:augment ` + - If quarantined findings exist → suggest reviewing `quarantine.json` **Output:** -- Session summary and context -- Research state restored -- Suggested next actions - -**Example usage:** -``` -/sigint:resume -/sigint:resume "AI code assistants" -/sigint:resume --list -``` +- Session summary with lineage history +- Research state restored with progress context +- Suggested next actions based on current state diff --git a/commands/update.md b/commands/update.md index be42dff..ae1efa1 100644 --- a/commands/update.md +++ b/commands/update.md @@ -1,70 +1,12 @@ --- -description: Refresh data and findings for existing research -version: 0.1.0 -argument-hint: [--area ] [--since ] -allowed-tools: Read, Write, Grep, Glob, WebSearch, WebFetch +description: Refresh data and findings for existing research using swarm orchestration +version: 0.5.0 +argument-hint: "[--topic ] [--area ] [--since ] [--no-delta] [--dimensions ]" +allowed-tools: Read, Write, Edit, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, mcp__atlatl__capture_memory, mcp__atlatl__recall_memories, mcp__atlatl__enrich_memory, mcp__atlatl__blackboard_create, mcp__atlatl__blackboard_write, mcp__atlatl__blackboard_read, mcp__atlatl__blackboard_alert, mcp__atlatl__blackboard_pending_alerts, mcp__atlatl__blackboard_ack_alert --- -Update existing research with fresh data and recent developments. +Load and execute the sigint:update skill. -**Arguments:** -- `--area` - Optional: specific area to update (otherwise updates all) -- `--since` - Optional: only fetch news/data since this date +$ARGUMENTS are passed through to the skill. -**Process:** - -1. **Load research state:** - Read `./reports/[current-topic]/state.json`. - Identify existing findings and their dates. - -2. **Check for staleness:** - Flag findings older than 30 days as potentially stale. - Prioritize updating market size, competitive landscape, and trend data. - -3. **Gather fresh data:** - Use WebSearch with date filters to find recent: - - News articles and press releases - - Market reports and analyses - - Competitor announcements - - Regulatory changes - - Technology developments - -4. **Compare with existing findings:** - Identify: - - Confirmations (data still valid) - - Updates (values changed) - - New information (previously unknown) - - Contradictions (conflicting data) - -5. **Update trend models:** - Recalculate trend directions (INC/DEC/CONST). - Update transitional scenario graphs with new data. - Identify if any terminal scenarios have shifted. - -6. **Update research state:** - Modify state.json with: - - Updated findings - - New sources - - Last updated timestamp - - Change log - -7. **Capture to Atlatl:** - Store significant changes and learnings using `capture_memory`: - - `namespace`: `_semantic/knowledge` - - `memory_type`: `semantic` - - `tags`: `["sigint-research", "{topic}", "update"]` - - Note any trend reversals or unexpected developments - Then run `enrich_memory(id)`. - -**Output:** -- Summary of what changed since last update -- Updated findings with change indicators -- Revised scenario graphs (if applicable) -- Recommendations based on changes - -**Example usage:** -``` -/sigint:update -/sigint:update --area "competitive landscape" -/sigint:update --since 2024-01-01 -``` +Begin the update now based on: $ARGUMENTS diff --git a/docs/reference/agents.md b/docs/reference/agents.md index 1684824..a94e37e 100644 --- a/docs/reference/agents.md +++ b/docs/reference/agents.md @@ -10,20 +10,30 @@ Sigint uses 5 specialized agents for research orchestration, analysis, reporting ## research-orchestrator -Coordinates parallel market research across multiple dimensions. +**File**: `agents/research-orchestrator.md` + +Orchestrator agent for sigint research sessions. Owns all phase management: team lifecycle, dimension-analyst spawning, codex review gates, finding merge, progress tracking, delta detection, and cleanup. Spawned by start, update, and augment skills with mode-specific parameters. | Property | Value | |----------|-------| -| **Version** | 0.4.0 | +| **Version** | 0.5.0 | | **Color** | cyan | | **Model** | inherit | -| **Spawned by** | `/sigint:start` | +| **Spawned by** | `/sigint:start`, `/sigint:update`, `/sigint:augment` | -**Tools:** Read, Write, Grep, Glob, Agent, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet +**Tools:** Read, Write, Edit, Grep, Glob, Agent, TeamCreate, TeamDelete, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion **Atlatl tools:** blackboard_create, blackboard_write, blackboard_read, blackboard_alert, blackboard_pending_alerts, blackboard_ack_alert, recall_memories, capture_memory, enrich_memory -**Behavior:** Creates blackboard (TTL 24h), writes elicitation context, spawns parallel dimension-analysts (max 5 concurrent), monitors via blackboard alerts, merges findings into state.json, captures summary to Atlatl. +**Modes**: `full` (start), `update` (update), `augment` (augment) + +**Key capabilities**: +- Blocking codex review gates at 4 pipeline boundaries (post-findings, post-merge, post-report, post-issues) +- Quarantine mechanism for gate failures (`quarantine.json`) +- Delta detection protocol for update mode (NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED/TREND_REVERSAL) +- Progress file generation (`research-progress.md`) for cross-session continuity +- Lineage tracking in `state.json` for full provenance chain +- Blackboard dual-write (blackboard + file) as default behavior --- @@ -38,7 +48,9 @@ Focused research on a single market dimension, parameterized by skill. | **Model** | inherit | | **Spawned by** | research-orchestrator, `/sigint:augment` | -**Tools:** Read, Write, Grep, Glob, WebSearch, WebFetch, Skill, Agent, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet +**Tools:** Read, Write, Grep, Glob, WebSearch, WebFetch, Skill, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet + +**Atlatl tools:** blackboard_write, blackboard_read, blackboard_alert, recall_memories, capture_memory, enrich_memory **Dimension-to-skill mapping:** @@ -51,6 +63,7 @@ Focused research on a single market dimension, parameterized by skill. | tech | tech-assessment | `findings_tech` | | financial | financial-analysis | `findings_financial` | | regulatory | regulatory-review | `findings_regulatory` | +| trend_modeling | trend-modeling | `findings_trend_modeling` | --- @@ -63,7 +76,7 @@ RLM processor for large documents exceeding context limits. | **Version** | 0.4.0 | | **Color** | blue | | **Model** | inherit | -| **Spawned by** | dimension-analyst | +| **Spawned by** | research-orchestrator (on behalf of dimension-analyst request) | **Tools:** Read, Write, WebFetch, Grep, Glob, Agent, SendMessage, TaskCreate, TaskUpdate, TaskList, TaskGet diff --git a/evals/agents/research-orchestrator/evals.json b/evals/agents/research-orchestrator/evals.json index 8db59d0..5880976 100644 --- a/evals/agents/research-orchestrator/evals.json +++ b/evals/agents/research-orchestrator/evals.json @@ -78,8 +78,8 @@ }, { "id": "orchestrator-all-dimensions", - "description": "Orchestrator handles all 7 priorities with batching to respect the max 5 concurrent limit", - "prompt": "Run a comprehensive research session on the telemedicine market covering all dimensions: competitive, sizing, trends, customer, tech, financial, and regulatory. Use the research-orchestrator agent.", + "description": "Orchestrator handles all 8 dimensions with batching to respect the max 5 concurrent limit", + "prompt": "Run a comprehensive research session on the telemedicine market covering all dimensions: competitive, sizing, trends, customer, tech, financial, regulatory, and trend_modeling. Use the research-orchestrator agent.", "expectations": [ { "description": "Multiple dimension analysts are referenced for the full set", diff --git a/skills/start/SKILL.md b/skills/start/SKILL.md index 8475c16..3a38969 100644 --- a/skills/start/SKILL.md +++ b/skills/start/SKILL.md @@ -1,26 +1,19 @@ --- name: start -description: Begin a new market research session with comprehensive scoping. Full swarm orchestrator — runs elicitation, creates team, spawns parallel dimension-analyst teammates, merges findings, and cleans up. Mirrors the feature-dev skill's TeamCreate → TaskCreate → Agent(team_name) → SendMessage pattern. +description: Begin a new market research session. Thin launcher that delegates to the research-orchestrator agent for all phase management. argument-hint: "[--quick] []" --- -# Sigint Start Skill (Swarm Orchestration) +# Sigint Start Skill (Launcher) -You are the team lead orchestrating a market research session using parallel dimension-analyst agents coordinated as a swarm team. - -## Overview - -This skill implements the full research session workflow: -- **dimension-analyst** — Single-dimension web researcher with evidence-mandatory real web searches (spawned as N parallel instances, one per priority dimension) - -The workflow uses interactive approval gates (elicitation + brief confirmation) and parallel dimension-analyst spawning. All coordination uses the swarm pattern: TeamCreate → TaskCreate → Agent(team_name) → SendMessage. +This skill initializes a research session and delegates to the `research-orchestrator` agent for all orchestration. The orchestrator owns team lifecycle, dimension-analyst spawning, codex review gates, finding merge, progress tracking, and cleanup. ## Arguments Parse `$ARGUMENTS` before any other processing: -- `--quick` — Abbreviated elicitation. Ask only decision context, top 3 priorities, timeline. Use defaults for rest. -- Remaining text after flag extraction is the initial topic hint (may be empty). +- `--quick` — Abbreviated elicitation (3 questions instead of 8) +- Remaining text after flag extraction is the initial topic hint (may be empty) --- @@ -29,7 +22,7 @@ Parse `$ARGUMENTS` before any other processing: ### Step 0.0.1: Load or Create Configuration 1. Attempt to read `.sigint.config.json` from the project root. -2. **If file exists**: Parse silently. Merge with defaults. Store as `config`. Proceed to Phase 0.1. +2. **If file exists**: Parse silently. Merge with defaults. Store as `config`. Proceed. 3. **If file does NOT exist**: Use defaults and proceed (do not create the file). **Config schema v1.0**: @@ -44,607 +37,72 @@ Parse `$ARGUMENTS` before any other processing: } ``` -**Defaults** (applied when file is missing or keys are absent): -```json -{ - "maxDimensions": 5, - "dimensionTimeout": 300, - "defaultPriorities": ["competitive", "sizing", "trends"] -} -``` - Store effective config as `config`. Set `max_dimensions = config.research.maxDimensions ?? 5`. --- -## Phase 0.1: Initialize Team and Blackboard - -**MANDATORY SWARM ORCHESTRATION — DO NOT USE PLAIN AGENT SPAWNS** - -You MUST use the full swarm pattern: TeamCreate → TaskCreate → Agent with team_name → SendMessage. Do NOT fall back to spawning standalone Agent subagents without a team. The swarm pattern enables persistent teammates that coordinate via shared task lists and messaging — standalone subagents cannot do this. - -**Step 0.1.1**: Derive `topic-slug` from `$ARGUMENTS` topic hint (or use `"research"` if no topic yet): lowercase, replace spaces and special characters with hyphens, truncate to 40 characters (e.g., "AI code review tools" → `"ai-code-review-tools"`). - -**Step 0.1.2**: Call **TeamCreate**. This is a blocking prerequisite — do not proceed until it succeeds: -``` -TeamCreate with team_name: "sigint-{topic-slug}-research" -``` -If TeamCreate fails, retry once. If it fails again, report the error and stop. - -**Step 0.1.3**: Create research directory and blackboard: - -First, ensure the reports directory exists: -```bash -mkdir -p ./reports/{topic-slug} -``` - -Then create the blackboard: -``` -blackboard_create(scope="{topic-slug}", ttl=86400) -``` -Store the scope as `blackboard_scope = "{topic-slug}"`. - -> **Cowork fallback:** If `blackboard_create` fails (Atlatl MCP unavailable), use file-based coordination instead. The `./reports/{topic-slug}/` directory was already created above. Write per-key files to `./reports/{topic-slug}/` (e.g., `methodology_plan_{dimension}.json`, `findings_{dimension}.json`) and coordinate via TaskCreate/SendMessage only. Set `blackboard_scope = null` and skip all subsequent `blackboard_read`/`blackboard_write` calls — use per-key file reads/writes as the coordination mechanism. +## Phase 0.1: Derive Topic Slug -**Step 0.1.4**: **CRITICAL — DO NOT SKIP.** Immediately after blackboard_create returns, use **TaskCreate** to create 3 high-level phase tasks (dimension tasks are created after elicitation when priorities are known): -- `"Phase 1: Elicitation"` -- `"Phase 3: Merge Findings"` — blockedBy Phase 2 tasks (add blockers after Phase 2 tasks are created) -- `"Phase 4: Summary + Cleanup"` - -Proceed immediately to Phase 0.2 (template reference only) then Phase 1. +Derive `topic-slug` from `$ARGUMENTS` topic hint (or use `"research"` if no topic yet): lowercase, replace spaces and special characters with hyphens, truncate to 40 characters. --- -## Phase 0.2: Task Discovery Protocol Template - -All dimension-analyst teammates receive this in their spawn prompt: - -``` -BLACKBOARD: {blackboard_scope} -Use blackboard_read(scope="{blackboard_scope}", key="...") to read shared context. -Use blackboard_write(scope="{blackboard_scope}", key="...", value="...") to share findings. +## Previous Research Detection -TASK DISCOVERY PROTOCOL: -1. When you receive a message from the team lead, call TaskList to find tasks assigned to you. -2. Call TaskGet on your assigned task to read the full description. -3. Work on the task. -4. When done: (a) TaskUpdate(taskId, status: "completed"), (b) SendMessage to team-lead with results, (c) TaskList for more work. -5. If no tasks assigned, wait for next message from team lead. -6. NEVER commit code via git — only the team lead commits. +Before delegating, check if research already exists: ``` - -Every Agent spawn **MUST include `team_name: "sigint-{topic-slug}-research"`**. After each spawn, send a **SendMessage** to the teammate with their task assignment. Without SendMessage, teammates sit idle. - ---- - -## Phase 1: Research Context Elicitation - -Only after Phase 0.1 completes. If `--quick` flag was provided, skip to **Quick Mode** section below. - -Use **AskUserQuestion** for each elicitation block. Gather all context before synthesizing the brief. - -### 1.1 Decision Context - -**Question:** "What decision or action will this research inform?" - -Options: -- Market entry decision (should we enter this market?) -- Product positioning (how should we position against competitors?) -- Investment/funding (preparing for investors or evaluating investment) -- Pricing strategy (how should we price our offering?) -- Partnership evaluation (assessing potential partners or acquisitions) -- Strategic planning (informing roadmap or business strategy) -- Other (let user specify) - -**Follow-up based on response:** -- Market entry → Ask about risk tolerance, timeline to decision -- Product positioning → Ask about current positioning, target segment -- Investment → Ask about stage, amount seeking, investor type -- Pricing → Ask about current pricing, cost structure awareness -- Partnership → Ask about specific targets or criteria -- Strategic planning → Ask about planning horizon, key uncertainties - -### 1.2 Stakeholder & Audience - -**Question:** "Who will consume this research and what do they need?" - -Options (multi-select): -- Executive leadership (high-level strategic insights) -- Board/investors (market opportunity, risk assessment) -- Product team (competitive features, customer needs) -- Sales/marketing (positioning, messaging, battlecards) -- Technical team (technology landscape, build vs buy) -- External presentation (pitch deck, public materials) - -**Follow-up:** "What's their familiarity with this market?" -- Expert (deep domain knowledge) -- Familiar (general awareness) -- New to this space (need foundational context) - -### 1.3 Prior Knowledge & Assumptions - -**Question:** "What do you already know or believe about this market?" - -Gather free-form response, then ask: - -**Question:** "Are there specific assumptions you want validated or challenged?" - -Options: -- Yes, I have specific hypotheses to test -- No, this is exploratory research -- I have some intuitions but they're soft - -If yes: Elicit 2-5 specific hypotheses to validate. - -### 1.4 Scope Definition - -**Question:** "What geographic scope?" - -Options: -- Global -- North America -- Europe -- Asia-Pacific -- Specific countries (specify) -- Multiple regions (specify) - -**Question:** "What market segments or verticals to focus on?" - -Options: -- Enterprise (large organizations) -- Mid-market (100-1000 employees) -- SMB (small businesses) -- Consumer/B2C -- Specific verticals (specify) -- All segments - -**Question:** "What time horizon matters?" - -Options: -- Current state (next 0-12 months) -- Near-term (1-3 years) -- Long-term (3-5+ years) -- All horizons with emphasis on near-term - -### 1.5 Competitive Context - -**Question:** "Are there specific competitors you're already tracking?" - -Gather list if yes. - -**Question:** "How do you think about your competitive position?" - -Options: -- We're the incumbent/leader -- We're a challenger trying to disrupt -- We're a new entrant evaluating the space -- We're adjacent and considering expansion -- We don't have a product yet (evaluating opportunity) - -### 1.6 Research Priorities - -**Question:** "Rank these research areas by importance (1-5):" - -Present as multi-select with priority indication: -- Market size & growth (TAM/SAM/SOM) → dimension: `sizing` -- Competitive landscape & positioning → dimension: `competitive` -- Customer segments & needs → dimension: `customer` -- Technology trends & disruption → dimension: `tech` -- Regulatory & compliance factors → dimension: `regulatory` -- Pricing & business models → dimension: `financial` -- Go-to-market strategies → dimension: `competitive` -- Risk factors & barriers → dimension: `trends` - -Collect ordered list. Map to dimensions using the table above. Deduplicate mapped dimensions. Cap at `max_dimensions` (default 5). - -### 1.7 Success Criteria - -**Question:** "What would make this research highly valuable to you?" - -Options (multi-select): -- Clear go/no-go recommendation -- Specific competitive insights I didn't have -- Quantified market opportunity -- Identified risks I hadn't considered -- Actionable next steps -- Data to support a specific argument -- Surprising insights that change my thinking - -**Question:** "What would make this research fail or be unhelpful?" - -Gather free-form to understand anti-patterns. - -### 1.8 Constraints & Practicalities - -**Question:** "What's your timeline for this research?" - -Options: -- Urgent (need insights today) -- This week -- This month -- No rush, thoroughness over speed - -**Question:** "Any specific sources you trust or distrust?" - -Gather if applicable. - -**Question:** "Budget context that affects recommendations?" - -Options: -- Bootstrapped/limited resources -- Funded startup -- Established company with budget -- Enterprise with significant resources -- Not relevant to share - ---- - -### Quick Mode (--quick flag) - -Ask only: -1. "What decision does this inform and what's the topic?" -2. "Top 3 research priorities (from: sizing, competitive, customer, tech, regulatory, financial, trends)?" -3. "What's your timeline?" - -Use defaults for geography (global), segments (all), and other fields. - ---- - -## Phase 1 Synthesis: Research Brief - -After gathering all context, synthesize into a research brief: - -```markdown -## Research Brief: [TOPIC] - -### Decision Context -[What decision this informs and key success criteria] - -### Target Audience -[Who will use this, their expertise level, what they need] - -### Scope -- Geography: [regions] -- Segments: [target segments] -- Time horizon: [timeframe] -- Competitive position: [their position] - -### Research Priorities (ranked) -1. [Highest priority dimension] -2. [Second priority] -3. [Third priority] - -### Hypotheses to Test -- [Hypothesis 1 if provided] - -### Known Competitors -- [Competitor list if provided] - -### Constraints -- Timeline: [urgency] -- Budget context: [if shared] - -### Success Criteria -[What makes this valuable] - -### Anti-patterns to Avoid -[What would make this unhelpful] +Glob("./reports/*/state.json") ``` -**Present brief to user and confirm:** -"Does this capture your research needs? Anything to adjust before we begin?" - -Only proceed to Phase 2 after confirmation (or user says "proceed"). - -**After confirmation:** - -1. **Create research directory:** `./reports/{topic-slug}/` - -2. **Write state file:** `./reports/{topic-slug}/state.json` - ```json - { - "topic": "{topic}", - "topic_slug": "{topic-slug}", - "started": "{ISO_DATE}", - "status": "active", - "phase": "discovery", - "elicitation": { - "decision_context": "...", - "audience": ["..."], - "audience_expertise": "...", - "prior_knowledge": "...", - "hypotheses": ["..."], - "scope": { - "geography": "...", - "segments": ["..."], - "time_horizon": "..." - }, - "competitive_position": "...", - "known_competitors": ["..."], - "priorities": ["..."], - "dimensions": ["competitive", "sizing", "..."], - "success_criteria": ["..."], - "anti_patterns": "...", - "timeline": "...", - "budget_context": "..." - }, - "findings": [], - "sources": [] - } - ``` - -3. **Write elicitation to blackboard:** - ``` - blackboard_write(scope="{topic-slug}", key="elicitation", value={full elicitation object}) - blackboard_write(scope="{topic-slug}", key="team_status", value={"analysts": {"competitive": "pending", ...}}) - ``` - -4. **Capture to Atlatl:** - ``` - capture_memory( - title="Research brief: {topic}", - namespace="_semantic/knowledge", - memory_type="semantic", - tags=["sigint-research", "{topic-slug}"], - confidence=0.9, - content="{research brief markdown}" - ) - ``` - Then `enrich_memory(id)`. +If `./reports/{topic-slug}/state.json` exists: +- Load prior elicitation from state.json +- Ask: "Previous research found for '{topic}'. Use prior research context as starting point, or start completely fresh?" +- If "use prior": Pass `--resume-from={topic-slug}` context to orchestrator +- If "start fresh": Proceed normally (prior state.json will be overwritten after confirmation) --- -## Phase 2: Spawn Dimension-Analysts - -**Step 2.1**: For each prioritized dimension, create a TaskCreate: -``` -For each dimension in dimensions (up to max_dimensions): - TaskCreate({ - subject: "Research: {dimension} — {topic}", - owner: "dimension-analyst-{dimension}" - }) -``` -Store each returned task ID. Then update the "Phase 3: Merge Findings" task: -``` -TaskUpdate("Phase 3: Merge Findings", addBlockedBy: [all Phase 2 task IDs]) -``` +## Phase 0.2: Delegate to Research Orchestrator -**Step 2.2**: Spawn ALL dimension-analysts **in ONE message** (so they launch in parallel). Include ALL Agent calls in a single response: +Spawn the research-orchestrator agent with full mode: -For each dimension (max `max_dimensions` concurrent): ``` Agent( - subagent_type="sigint:dimension-analyst", - team_name="sigint-{topic-slug}-research", - name="dimension-analyst-{dimension}", - run_in_background=true, - prompt="[TASK DISCOVERY PROTOCOL from Phase 0.2] - - You are a dimension-analyst for {dimension} research on '{topic}'. - - BLACKBOARD: {topic-slug} - - IMPORTANT: Use WebSearch and WebFetch for real web research. Minimum 5 searches. - Do NOT fabricate findings. Every claim must cite a source you actually retrieved. - - State file: ./reports/{topic-slug}/state.json - Skill to load: skills/{skill-directory}/SKILL.md - - METHODOLOGY CHECKLIST (verify against SKILL.md): - {For competitive: "Porter's 5 Forces (with ratings), Competitor Matrix (with trends), Positioning Map (if axes available)"} - {For sizing: "Methodology Selection (Top-Down/Bottom-Up/Value Theory), TAM > SAM > SOM hierarchy, Scenario Modeling (Bear/Base/Bull)"} - {For trends: "Macro/Micro Trends Tables (INC/DEC/CONST), Emerging Signals, Transitional Scenario Graph (Mermaid stateDiagram), Terminal Scenarios"} - {For customer: "Personas, Jobs-to-be-Done, Journey Mapping, Segmentation & Prioritization"} - {For tech: "TRL Levels, Hype Cycle Mapping, Build vs Buy Matrix"} - {For financial: "Unit Economics (CAC/LTV/LTV:CAC/payback), Revenue Model, Cost Structure, Rule of 40"} - {For regulatory: "Framework Identification, Industry-to-Framework Mapping, Penalty Ranges, Risk Matrix"} - - IMPORTANT: You MUST write methodology_plan_{dimension} to the blackboard BEFORE any WebSearch call. - Read your SKILL.md, extract the Required Frameworks table, and write your plan first. - - Your blackboard key: findings_{dimension} - Your task ID: #{taskId} - - Procedure: - 1. blackboard_read(scope='{topic-slug}', key='elicitation') — load research context - 2. Read skills/{skill-directory}/SKILL.md — load your methodology - 3. recall_memories(query='sigint {topic} {dimension}') — check prior knowledge - 4. WebSearch + WebFetch — minimum 5 searches following skill methodology - 5. blackboard_write(scope='{topic-slug}', key='findings_{dimension}', value={structured findings JSON}) - 6. Write findings to ./reports/{topic-slug}/{dimension}-findings.md - 7. TaskUpdate(#{taskId}, status: 'completed') - 8. SendMessage(to: 'team-lead', message: {dimension, findings_key, findings_path, finding_count, confidence_avg}, summary: '{dimension} analysis complete')" + subagent_type="sigint:research-orchestrator", + name="research-orchestrator", + prompt="You are the research orchestrator for a new research session. + + MODE: full + TOPIC: {topic from $ARGUMENTS} + TOPIC_SLUG: {topic-slug} + CONFIG: {serialized config} + MAX_DIMENSIONS: {max_dimensions} + QUICK_MODE: {true if --quick flag} + {If resuming: PRIOR_ELICITATION: {prior elicitation JSON}} + + Execute the full research orchestration: + 1. Initialize team and blackboard (Phase 0) + 2. Run elicitation (Phase 1) {quick mode note if applicable} + 3. Spawn dimension-analysts (Phase 2) + 4. Verify methodology plans (Phase 2.5) + 5. Run post-findings codex review gates (Phase 2.75) + 6. Merge findings (Phase 3) + 7. Run post-merge codex review gate (Phase 3.5) + 8. Render progress view (Phase 3.75) + 9. Cleanup (Phase 4) + + Follow all protocols defined in your agent definition." ) ``` -**Dimension-to-skill mapping:** - -| Dimension | Skill Directory | -|-----------|----------------| -| competitive | competitive-analysis | -| sizing | market-sizing | -| trends | trend-analysis | -| customer | customer-research | -| tech | tech-assessment | -| financial | financial-analysis | -| regulatory | regulatory-review | - -**Step 2.3**: Immediately after spawning all agents in a single message, send each one a task assignment message: -``` -For each dimension: - SendMessage( - to: "dimension-analyst-{dimension}", - message: "Task #{taskId} assigned: {dimension} analysis on '{topic}'. Start now.", - summary: "Task #{taskId} assigned" - ) -``` - -**Do NOT execute research yourself.** Only dimension-analyst agents have WebSearch/WebFetch. Your role is to coordinate. - -If more than `max_dimensions` priorities exist, batch the remaining dimensions after the first batch completes. - ---- - -## Phase 2.5: Methodology Verification Gate - -After spawning all analysts and sending task assignments, verify that each analyst has produced a methodology plan. - -**Step 2.5.1**: Wait up to 60 seconds per analyst for methodology plans to appear on the blackboard. -For each dimension: -``` -blackboard_read(scope="{topic-slug}", key="methodology_plan_{dimension}") -``` - -**Step 2.5.2**: Surface methodology plans to user in a table: -"📋 Methodology plans confirmed: -| Dimension | Frameworks Planned | Status | -|-----------|-------------------|--------| -| competitive | Porter's 5 Forces, Competitor Matrix, Positioning Map | ✓ plan written | -| sizing | TAM/SAM/SOM, Scenario Modeling | ✓ plan written | -| trends | Macro/Micro Tables, Scenario Graph, Terminal Scenarios | ✓ plan written | -..." - -**Step 2.5.3**: If any analyst fails to produce a methodology plan within 60 seconds: -- Log warning: "⚠️ {dimension} analyst did not produce methodology plan. Research will proceed but methodology compliance is unverified." -- Do NOT block the session. Continue with partial verification. -- Note the gap in the team_status blackboard entry. - ---- - -## Phase 3: Wait and Merge - -Wait for SendMessages from each dimension-analyst (one per dimension). You will receive each as a background notification. - -**For each message received:** -1. Read `findings_{dimension}` from blackboard: - ``` - blackboard_read(scope="{topic-slug}", key="findings_{dimension}") - ``` -2. Append findings to local buffer. -3. Update team_status: - ``` - blackboard_write(scope="{topic-slug}", key="team_status", - value={"analysts": {..., "{dimension}": "complete"}}) - ``` - -**Error handling**: If an analyst has not reported within `dimensionTimeout` seconds (default 300) after the last received message, continue with partial results. Note missing dimensions in state.json as gaps. Do not wait indefinitely. - -**After ALL expected dimensions have reported (or timeout):** - -1. Read any cross-dimension conflicts: - ``` - blackboard_read(scope="{topic-slug}", key="conflicts") - ``` - -2. Merge all findings into `./reports/{topic-slug}/state.json`: - - Append all `findings` arrays - - Append all `sources` arrays - - Set `phase: "complete"`, `last_updated: "{ISO_DATE}"` - - Note any gaps from failed/timed-out analysts - -3. Build Methodology Coverage Matrix: - For each dimension: - - Read methodology_plan_{dimension} from blackboard (frameworks planned) - - Read findings_{dimension} from blackboard (check which framework outputs are present) - - Compare planned vs evidenced frameworks - - Present to user: - "📊 Methodology Coverage Matrix: - | Dimension | Framework | Planned | Applied | Notes | - |-----------|-----------|---------|---------|-------| - | competitive | Porter's 5 Forces | ✓ | ✓ | 5 forces with ratings | - | competitive | Competitor Matrix | ✓ | ✓ | 6 competitors compared | - | competitive | Positioning Map | ✓ | ✗ | skipped — no clear axes from elicitation | - | sizing | TAM/SAM/SOM | ✓ | ✓ | $4.2B / $1.8B / $450M | - ..." - - Write coverage matrix to state.json under "methodology_coverage" key. - -4. Write merged summary to blackboard for downstream agents: - ``` - blackboard_write(scope="{topic-slug}", key="merged_findings", value={ - "dimensions_complete": [...], - "dimensions_missing": [...], - "total_findings": N, - "top_insights": ["..."], - "conflicts": [...] - }) - ``` - -5. Capture summary to Atlatl: - ``` - capture_memory( - title="Research complete: {topic}", - namespace="_semantic/knowledge", - memory_type="semantic", - tags=["sigint-research", "{topic-slug}"], - confidence=0.85, - content="Merged findings summary across {N} dimensions..." - ) - ``` - Then `enrich_memory(id)`. - -### Source-Chunker Coordination - -If a dimension-analyst sends a message with type "source_chunking_request": -1. Spawn source-chunker: Agent(subagent_type="sigint:source-chunker", team_name="sigint-{topic-slug}-research", name="source-chunker-{request_id}", run_in_background=true, prompt="...") -2. Create task for source-chunker with the URL, extraction focus, and target dimension -3. When source-chunker completes, route chunked findings back to the requesting analyst via SendMessage - -6. Present merged findings summary to user: - - Number of dimensions researched - - Total findings count - - Top 3-5 key insights across all dimensions - - Any conflicts or gaps to be aware of - - Suggested next steps: `/sigint:report` or `/sigint:augment` - ---- - -## Phase 4: Cleanup - -1. Send shutdown requests to all dimension-analyst teammates: - ``` - For each dimension: - SendMessage( - to: "dimension-analyst-{dimension}", - message: {"type": "shutdown_request", "reason": "Research session complete"}, - summary: "Shutdown: research complete" - ) - ``` - -2. Delete the team: - ``` - TeamDelete("sigint-{topic-slug}-research") - ``` - -3. Final status update to user: - - Research session complete - - State saved to `./reports/{topic-slug}/state.json` - - Findings available for report generation: `/sigint:report` - - Issue creation available: `/sigint:issues` - ---- - -## Previous Research Detection - -Before Phase 0.1, check if research already exists on this topic: -``` -Glob("./reports/*/state.json") -``` - -If `./reports/{topic-slug}/state.json` exists: -- Load prior elicitation from state.json -- Ask: "Previous research found for '{topic}'. Use prior research context as starting point, or start completely fresh?" -- If "use prior": Pre-populate elicitation with prior values, ask only for gaps/updates -- If "start fresh": Proceed normally (prior state.json will be overwritten after confirmation) +Wait for the orchestrator to complete. The orchestrator handles all interaction with the user (elicitation questions, confirmations, progress updates). --- ## Output -After complete session: -- Confirmed research brief -- Initialized state with full elicitation context -- Findings across all prioritized dimensions -- Merged state.json with all findings and sources -- Next step prompts for report and issue generation +After orchestrator completes: +- Research session state saved to `./reports/{topic-slug}/state.json` +- Progress view at `./reports/{topic-slug}/research-progress.md` +- Quarantined findings (if any) at `./reports/{topic-slug}/quarantine.json` +- Next steps: `/sigint:report`, `/sigint:augment`, `/sigint:update`, `/sigint:issues` diff --git a/skills/start/evals/evals.json b/skills/start/evals/evals.json new file mode 100644 index 0000000..372ebb4 --- /dev/null +++ b/skills/start/evals/evals.json @@ -0,0 +1,209 @@ +{ + "skill_name": "start", + "evals": [ + { + "id": 1, + "prompt": "Research the competitive landscape of AI code assistants", + "expected_output": "Skill parses 'the competitive landscape of AI code assistants' as the topic, derives a topic-slug, checks for .sigint.config.json, checks for prior research via Glob, and delegates to the research-orchestrator agent in full mode with QUICK_MODE: false", + "files": [], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "research-orchestrator", + "description": "Delegates to the research-orchestrator agent" + }, + { + "type": "output_contains", + "value": "MODE: full", + "description": "Spawns orchestrator in full mode" + } + ], + "expectations": [ + "The skill derives a topic-slug from the input topic using lowercase and hyphens", + "The skill checks for .sigint.config.json configuration before delegating", + "The skill checks for previous research via Glob on ./reports/*/state.json", + "The orchestrator agent is spawned with the topic, topic-slug, config, and max_dimensions parameters" + ] + }, + { + "id": 2, + "prompt": "--quick AI code assistants market sizing", + "expected_output": "Skill extracts the --quick flag, sets QUICK_MODE to true, and uses 'AI code assistants market sizing' as the remaining topic. Delegates to research-orchestrator with abbreviated elicitation (3 questions instead of 8).", + "files": [], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "QUICK_MODE", + "description": "Quick mode flag is passed to the orchestrator" + }, + { + "type": "output_contains", + "value": "research-orchestrator", + "description": "Delegates to the research-orchestrator agent" + } + ], + "expectations": [ + "The --quick flag is extracted from arguments and QUICK_MODE is set to true", + "The remaining text after --quick is used as the topic hint, not '--quick' itself", + "Abbreviated elicitation with 3 questions is indicated in the orchestrator prompt" + ] + }, + { + "id": 3, + "prompt": "Research Cloud-Native Database Solutions for Enterprise!!!", + "expected_output": "Skill normalizes the topic to a slug: lowercase, special characters replaced with hyphens, truncated to 40 characters. The slug should be something like 'cloud-native-database-solutions-for-ent' (truncated at 40 chars).", + "files": [], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "TOPIC_SLUG", + "description": "Topic slug is passed to the orchestrator" + } + ], + "expectations": [ + "The topic-slug is derived as lowercase with hyphens replacing spaces and special characters", + "The topic-slug is truncated to 40 characters maximum", + "Exclamation marks and other special characters are replaced with hyphens or removed in the slug", + "The original topic text is preserved separately from the normalized slug" + ] + }, + { + "id": 4, + "prompt": "", + "expected_output": "When no topic is provided, the skill uses 'research' as the default topic-slug and proceeds to delegate to the research-orchestrator. The orchestrator handles elicitation to determine the actual topic.", + "files": [], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "research-orchestrator", + "description": "Still delegates to orchestrator even with no topic" + } + ], + "expectations": [ + "When no topic hint is provided, the skill uses 'research' as the default topic-slug", + "The skill still proceeds to delegate to the research-orchestrator without error", + "The orchestrator is spawned and will handle topic elicitation interactively" + ] + }, + { + "id": 5, + "prompt": "Research autonomous vehicle regulations", + "expected_output": "Skill loads .sigint.config.json if it exists and merges with defaults. If the config file does not exist, defaults are used silently without creating the file. Default maxDimensions is 5.", + "files": [ + { + "path": ".sigint.config.json", + "content": "{\n \"version\": \"1.0\",\n \"research\": {\n \"maxDimensions\": 3,\n \"dimensionTimeout\": 600,\n \"defaultPriorities\": [\"regulatory\", \"competitive\"]\n }\n}" + } + ], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "MAX_DIMENSIONS", + "description": "Max dimensions parameter is passed to orchestrator" + }, + { + "type": "output_contains", + "value": "CONFIG", + "description": "Config is serialized and passed to orchestrator" + } + ], + "expectations": [ + "The skill reads and parses .sigint.config.json successfully", + "MAX_DIMENSIONS is set to 3 (from the custom config, not the default 5)", + "The config is serialized and passed to the orchestrator agent prompt", + "The custom defaultPriorities from config are preserved" + ] + }, + { + "id": 6, + "prompt": "Research supply chain optimization", + "expected_output": "Skill checks for previous research at ./reports/supply-chain-optimization/state.json via Glob. If prior research exists, the user is asked whether to resume or start fresh. If no prior research, proceeds directly to orchestrator delegation.", + "files": [ + { + "path": "reports/supply-chain-optimization/state.json", + "content": "{\n \"topic\": \"supply chain optimization\",\n \"phase\": \"complete\",\n \"elicitation\": {\"questions\": [], \"answers\": []}\n}" + } + ], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "state.json", + "description": "Checks for prior state.json via Glob" + } + ], + "expectations": [ + "The skill detects existing research at reports/supply-chain-optimization/state.json", + "The user is asked whether to use prior research context or start completely fresh", + "If resuming, --resume-from context is passed to the orchestrator", + "The prior elicitation data from state.json is loaded for potential reuse" + ] + }, + { + "id": 7, + "prompt": "--quick", + "expected_output": "When only --quick is passed with no topic, the skill extracts the flag and uses 'research' as the default topic-slug. Delegates to orchestrator with QUICK_MODE: true.", + "files": [], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "QUICK_MODE", + "description": "Quick mode is set even with no topic" + }, + { + "type": "output_contains", + "value": "research-orchestrator", + "description": "Delegates to orchestrator" + } + ], + "expectations": [ + "The --quick flag is extracted leaving no remaining topic text", + "The default topic-slug 'research' is used when no topic remains after flag extraction", + "QUICK_MODE is set to true in the orchestrator prompt", + "The orchestrator is spawned successfully despite no explicit topic" + ] + }, + { + "id": 8, + "prompt": "Research the emerging trends in quantum computing hardware & software ecosystems across North America and Europe", + "expected_output": "Long topic text is handled correctly. The slug is truncated to 40 characters. The full topic text is preserved in the TOPIC parameter passed to the orchestrator.", + "files": [], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "TOPIC:", + "description": "Full topic is passed to orchestrator" + }, + { + "type": "output_contains", + "value": "TOPIC_SLUG:", + "description": "Truncated slug is passed to orchestrator" + } + ], + "expectations": [ + "The topic-slug is truncated to exactly 40 characters", + "The full untruncated topic text is passed separately as the TOPIC parameter", + "Ampersands and special characters in the topic are handled in slug normalization", + "The orchestrator receives both the full topic and the normalized slug" + ] + }, + { + "id": 9, + "prompt": "Research fintech payment processing", + "expected_output": "After the orchestrator completes, the skill indicates where outputs are saved: state.json, research-progress.md, and optionally quarantine.json in the reports/{topic-slug}/ directory. Next steps reference /sigint:report, /sigint:augment, /sigint:update, /sigint:issues.", + "files": [], + "deterministic_checks": [ + { + "type": "output_contains", + "value": "reports/", + "description": "Output references the reports directory" + } + ], + "expectations": [ + "The skill indicates research session state is saved to ./reports/{topic-slug}/state.json", + "Progress view location at ./reports/{topic-slug}/research-progress.md is mentioned", + "Next steps reference available follow-up commands: /sigint:report, /sigint:augment, /sigint:update, /sigint:issues", + "The orchestrator handles all 9 phases from initialization through cleanup" + ] + } + ] +} diff --git a/skills/trend-analysis/SKILL.md b/skills/trend-analysis/SKILL.md index 69bcbbf..bd3c1ee 100644 --- a/skills/trend-analysis/SKILL.md +++ b/skills/trend-analysis/SKILL.md @@ -24,12 +24,14 @@ Trend analysis identifies patterns of change over time to anticipate future mark ## Trend Categories ### Macro Trends (3-10+ years) -Large-scale shifts affecting multiple industries: -- **Economic**: Interest rates, inflation, employment -- **Technological**: AI, blockchain, quantum computing -- **Social**: Demographics, values, behaviors -- **Environmental**: Climate, sustainability, resources -- **Political**: Regulation, trade, governance +Large-scale shifts affecting multiple industries. **Always classify each macro trend into one of these five categories and label it explicitly**: +- **Economic**: Interest rates, inflation, employment, trade flows, pricing pressure +- **Technological**: AI, blockchain, quantum computing, automation, platform shifts +- **Social**: Demographics, values, behaviors, workforce changes, cultural shifts +- **Environmental**: Climate, sustainability, resources, regulatory carbon targets +- **Political**: Regulation, trade policy, governance, geopolitical risk + +When presenting macro trends, include the category label (e.g., "Technological: AI-driven precision agriculture") so the classification is visible in the output. ### Micro Trends (1-3 years) Industry or segment-specific patterns: @@ -47,9 +49,11 @@ Early indicators of potential trends: - Early adopter behavior - Influencer attention +**For each emerging signal, always state what it may indicate or suggest** using language such as "This signal suggests...", "This may indicate...", "The implication is...", or "This points to...". Every signal must be paired with its potential implication. + ## Three-Valued Trend Logic -From the trend-based modeling research, apply minimal-information quantifiers: +From the trend-based modeling research, apply minimal-information quantifiers. The three values are **INC** (Increasing), **DEC** (Decreasing), and **CONST** (Constant). When explaining the system, always introduce all three values together in a single summary before elaborating on each. **INC (Increasing)** - Measurable upward movement @@ -158,9 +162,9 @@ Rate trend confidence: ## Trend Analysis Summary ### Macro Trends -| Trend | Direction | Confidence | Timeframe | -|-------|-----------|------------|-----------| -| [Name] | INC/DEC/CONST | High/Med/Low | X years | +| Category | Trend | Direction | Confidence | Timeframe | +|----------|-------|-----------|------------|-----------| +| Economic/Technological/Social/Environmental/Political | [Name] | INC/DEC/CONST | High/Med/Low | X years | ### Micro Trends | Trend | Direction | Confidence | Timeframe | @@ -168,8 +172,8 @@ Rate trend confidence: | [Name] | INC/DEC/CONST | High/Med/Low | X months | ### Emerging Signals -- [Signal 1]: [Potential implication] -- [Signal 2]: [Potential implication] +- [Signal 1]: This suggests/indicates [potential implication] +- [Signal 2]: This suggests/indicates [potential implication] ## Transitional Scenario Graph [Mermaid diagram] @@ -213,7 +217,7 @@ For detailed methodologies, see: ## Orchestration Hints -- **Blackboard key**: `findings_trends` +- **Blackboard key**: `findings_trends` (trend-modeling uses separate key `findings_trend_modeling`) - **Cross-reference dimensions**: tech (adoption curves, technology maturity), regulatory (regulatory shifts impacting trends) - **Alert triggers**: - Trend reversal detected (INC→DEC or DEC→INC) diff --git a/skills/trend-modeling/SKILL.md b/skills/trend-modeling/SKILL.md index 2985915..6e5d0b9 100644 --- a/skills/trend-modeling/SKILL.md +++ b/skills/trend-modeling/SKILL.md @@ -48,7 +48,7 @@ Traditional market analysis requires extensive quantitative data. Three-valued l ## Extended Notation -For more nuanced analysis: +For more nuanced analysis, use acceleration/deceleration modifiers. **When using extended notation, ALWAYS include this definition table in the output** so the reader understands the codes: | Code | Meaning | Description | |------|---------|-------------| @@ -57,24 +57,37 @@ For more nuanced analysis: | AD | Accelerating Decrease | DEC with increasing rate | | DD | Decelerating Decrease | DEC with decreasing rate | +When a user requests extended notation, the output MUST: +1. Include the definition table above (with the "Accelerating Growth", "Decelerating Growth", etc. labels) +2. Use AG/DG/AD/DD codes in scenario assignments +3. Explain which extended code applies to each variable and why + ## Correlation-to-Trend Conversion -Transform correlation relationships into trend relationships: +Transform correlation relationships into trend relationships using the **required notation format**: **If variables X and Y have positive correlation:** - When X is INC → Y is INC - When X is DEC → Y is DEC -- Notation: INC(X, Y) +- **MUST use notation**: `INC(X, Y)` — meaning X and Y trend in the same direction **If variables X and Y have negative correlation:** - When X is INC → Y is DEC - When X is DEC → Y is INC -- Notation: DEC(X, Y) +- **MUST use notation**: `DEC(X, Y)` — meaning X and Y trend in opposite directions + +**CRITICAL**: When the user provides correlation data, you MUST: +1. Explicitly label each correlation as "positive correlation" or "negative correlation" +2. Use the `INC(X, Y)` or `DEC(X, Y)` notation format for each relationship +3. Show the conversion step clearly before building the relationship matrix **Example:** -- Market size and competition have positive correlation +- Market size and competition have **positive correlation** → `INC(Market Size, Competition)` - If Market Size = INC, then Competition = INC - If Market Size = DEC, then Competition = DEC +- Price and demand have **negative correlation** → `DEC(Price, Demand)` +- If Price = INC, then Demand = DEC +- If Price = DEC, then Demand = INC ## Trend Model Construction @@ -203,6 +216,7 @@ When analyzing terminal scenarios: - **Document uncertainty**: Note where relationships are speculative - **Update iteratively**: Refine model as new information emerges - **Focus on transitions**: The paths between scenarios often matter more than endpoints +- **Large models (7+ variables)**: Focus the relationship matrix on direct relationships only. Not every variable pair needs a relationship — use CONST for pairs without clear correlation. Generate 3-5 key scenarios rather than exhaustively enumerating all combinations. Prioritize terminal scenarios and the most likely transitional paths. ## Advantages of This Approach @@ -220,7 +234,7 @@ For theoretical background and advanced techniques, see: ## Orchestration Hints -- **Blackboard key**: `findings_trends` (shared with trend-analysis — trend-modeling extends trend-analysis findings with scenario modeling) +- **Blackboard key**: `findings_trend_modeling` (separate from trend-analysis which uses `findings_trends` — trend-modeling produces scenario models that complement but do not overwrite trend-analysis findings) - **Cross-reference dimensions**: All dimensions provide input variables for scenario modeling - **Alert triggers**: - Scenario with >50% probability of adverse outcome diff --git a/skills/update/SKILL.md b/skills/update/SKILL.md new file mode 100644 index 0000000..feee489 --- /dev/null +++ b/skills/update/SKILL.md @@ -0,0 +1,121 @@ +--- +name: update +description: Refresh existing research with latest data using swarm orchestration and delta detection. Delegates to the research-orchestrator agent in update mode. +argument-hint: "[--topic ] [--area ] [--since ] [--no-delta] [--dimensions ]" +--- + +# Sigint Update Skill (Swarm Orchestration) + +This skill refreshes existing research by delegating to the research-orchestrator agent in update mode. The orchestrator spawns dimension-analysts for the specified dimensions, runs codex review gates, performs delta detection against prior findings, and updates state. + +## Arguments + +Parse `$ARGUMENTS` before any other processing. Always echo the parsed result so the user sees what was resolved: + +- `--topic ` — Optional: specify which research session to update. Required when multiple sessions exist. +- `--area ` — Optional: specific area to update. Maps to the matching dimension from prior elicitation (e.g., `--area regulatory` resolves to the dimension whose name contains "regulatory"). +- `--since ` — Optional: SINCE_DATE for date-filtered queries. Only fetch data since this date. +- `--no-delta` — Disable delta detection (DELTA_ENABLED: false). By default, delta detection is enabled (DELTA_ENABLED: true). +- `--dimensions ` — Optional: comma-separated list of specific dimensions to update. Only these are passed to the orchestrator, not all dimensions. + +--- + +## Phase 0: Pre-flight + +### Step 0.1: Locate Active Research Session + +Find the active research state: +``` +Glob("./reports/*/state.json") +``` + +If no state.json found: +1. Inform user: "No active research session found. Use `/sigint:start` to begin." +2. Show what the update would have done based on the parsed arguments. Use this exact format: + + **Planned update workflow (blocked — no session data):** + - **Topic**: {from --topic, or "auto-detect from single session"} + - **Dimensions**: {from --dimensions listing each one, or --area mapped to its matching dimension, or "all dimensions from prior elicitation"} + - **SINCE_DATE**: {from --since, or "none (fetch all available data)"} + - **DELTA_ENABLED**: {"false — findings would be replaced wholesale (--no-delta specified)" if --no-delta, or "true — delta detection would classify findings as NEW, UPDATED, CONFIRMED, POTENTIALLY_REMOVED, or TREND_REVERSAL"} + - **Orchestrator**: research-orchestrator would be spawned in MODE: update + - **Elicitation**: Prior elicitation from state.json would be reused (not re-run) + - **Reconciliation**: {If DELTA_ENABLED: "Reconcile merge — replace updated findings, archive removed, add new (not append blindly). A new lineage entry would be added." | If not: "Wholesale replacement — findings replaced entirely, no reconciliation against prior findings."} + +3. Stop execution. Do NOT proceed further. + +If multiple sessions found: +- If `--topic` was provided: select that topic-slug +- Otherwise: list all available sessions and ask the user to choose which one to update. Output: "Multiple sessions found. Please specify which session to update." Do NOT arbitrarily pick one. + +### Step 0.2: Load Prior State + +Read `./reports/{topic-slug}/state.json` (where `topic-slug` is resolved from `--topic` argument, single-session auto-detect, or user selection). Extract: +- `topic`, `topic_slug` +- `elicitation` (reuse for dimension-analysts) +- `findings[]` (for delta detection baseline) +- `lineage[]` (to append new entry) +- Prior `dimensions` list + +### Step 0.3: Resolve Dimensions + +Priority order for which dimensions to update: +1. `--dimensions` argument (if provided) +2. `--area` mapped to dimension (if provided) +3. All dimensions from prior `elicitation.dimensions` + +--- + +## Phase 1: Delegate to Research Orchestrator + +Spawn the research-orchestrator in update mode: + +``` +Agent( + subagent_type="sigint:research-orchestrator", + name="research-orchestrator", + prompt="You are the research orchestrator for a research UPDATE session. + + MODE: update + TOPIC: {topic} + TOPIC_SLUG: {topic-slug} + DIMENSIONS: {resolved dimensions list} + SINCE_DATE: {--since value or null} + DELTA_ENABLED: {false if --no-delta, otherwise true} + PRIOR_STATE: {summary of prior state — finding count, dimensions, last updated} + ELICITATION: {prior elicitation JSON from state.json} + + Execute the update orchestration: + 1. Initialize team and blackboard (Phase 0) — reuse topic-slug + 2. Skip elicitation — load from ELICITATION above + 3. Write elicitation to blackboard for analysts + 4. Spawn dimension-analysts for DIMENSIONS (Phase 2) + 5. Verify methodology plans (Phase 2.5) + 6. Run post-findings codex review gates (Phase 2.75) + 7. Run delta detection BEFORE merge (Delta Protocol) — classify findings as NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED/TREND_REVERSAL + 8. Reconcile merge (Phase 3.4) — replace updated, archive removed, add new. Do NOT append blindly; reconcile against prior findings. + 9. Run post-merge codex review gate (Phase 3.5) + 10. Render progress view (Phase 3.75) + 11. Cleanup (Phase 4) + + IMPORTANT: Include SINCE_DATE context in analyst spawn prompts so they + prioritize recent data. Analysts should use date-filtered WebSearch queries. + + Follow all protocols defined in your agent definition." +) +``` + +Wait for orchestrator to complete. + +--- + +## Output + +After orchestrator completes: +- Updated findings in `./reports/{topic-slug}/state.json` with new lineage entry +- Delta report at `./reports/{topic-slug}/YYYY-MM-DD-delta.md` (if delta enabled) +- Updated `./reports/{topic-slug}/research-progress.md` +- Quarantined findings (if any) at `./reports/{topic-slug}/quarantine.json` +- Summary of what changed: new, updated, confirmed, removed findings +- Trend reversals highlighted +- Next steps: `/sigint:report`, `/sigint:augment` diff --git a/skills/update/evals/evals.json b/skills/update/evals/evals.json new file mode 100644 index 0000000..943f3fe --- /dev/null +++ b/skills/update/evals/evals.json @@ -0,0 +1,281 @@ +{ + "skill_name": "update", + "evals": [ + { + "id": 1, + "prompt": "Update my research on the enterprise observability market. I want to refresh all dimensions with the latest data.", + "expected_output": "The skill locates the active research session via state.json, loads prior state and elicitation, delegates to research-orchestrator in update mode with all dimensions, performs delta detection, and produces an updated state.json with a new lineage entry and delta report", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "state\\.json", + "description": "Skill searches for or reads state.json to locate the active research session" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "(research-orchestrator|orchestrator)", + "description": "Skill delegates to the research-orchestrator agent" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "MODE:\\s*update|mode.*update|update.*mode", + "description": "Orchestrator is spawned in update mode" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "DELTA_ENABLED.*true|delta.*detect|delta.*enabled", + "description": "Delta detection is enabled by default" + } + ], + "expectations": [ + "The skill reads state.json to load prior findings, elicitation, lineage, and dimensions", + "The research-orchestrator is spawned with MODE: update and receives the prior elicitation", + "All dimensions from the prior research are included in the update delegation", + "Delta detection is enabled by default since --no-delta was not specified", + "The output references a summary of changes: new, updated, confirmed, or removed findings" + ] + }, + { + "id": 2, + "prompt": "/sigint:update --topic enterprise-observability --dimensions market-size,competitive-landscape --since 2026-01-01", + "expected_output": "The skill uses the specified topic slug, updates only the market-size and competitive-landscape dimensions, and passes the since date to the orchestrator for date-filtered queries", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "enterprise-observability", + "description": "Skill resolves to the enterprise-observability topic slug" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "market-size", + "description": "market-size dimension is included in the update" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "competitive-landscape", + "description": "competitive-landscape dimension is included in the update" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "2026-01-01|SINCE_DATE", + "description": "The since date is passed to the orchestrator" + } + ], + "expectations": [ + "Only market-size and competitive-landscape dimensions are updated, not all dimensions", + "The SINCE_DATE parameter is passed to the orchestrator so analysts prioritize data from 2026-01-01 onward", + "The skill does NOT re-run elicitation -- it reuses the prior elicitation from state.json" + ] + }, + { + "id": 3, + "prompt": "/sigint:update --no-delta", + "expected_output": "The skill performs a full refresh of existing research with delta detection disabled, meaning findings are replaced wholesale without NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED classification", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "state\\.json", + "description": "Skill locates and reads state.json" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "DELTA_ENABLED.*false|no.delta|delta.*disabled", + "description": "Delta detection is explicitly disabled" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "(research-orchestrator|orchestrator)", + "description": "Skill still delegates to the research-orchestrator" + } + ], + "expectations": [ + "Delta detection is disabled because --no-delta was specified", + "The orchestrator does NOT classify findings as NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED", + "Findings are replaced wholesale rather than reconciled against prior findings", + "The rest of the update pipeline (orchestrator delegation, dimension analysts, merge) still executes normally" + ] + }, + { + "id": 4, + "prompt": "/sigint:update --area regulatory", + "expected_output": "The skill maps the --area argument to the corresponding dimension and updates only that single dimension", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "regulatory|regulation", + "description": "The regulatory area is identified and mapped to a dimension" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "state\\.json", + "description": "Skill loads prior state from state.json" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "(research-orchestrator|orchestrator)", + "description": "Skill delegates to the research-orchestrator" + } + ], + "expectations": [ + "The --area regulatory argument is mapped to a specific dimension from the prior elicitation", + "Only the resolved dimension is passed to the orchestrator, not all dimensions", + "Delta detection is enabled by default for the single-dimension update" + ] + }, + { + "id": 5, + "prompt": "Update my research.", + "expected_output": "When no state.json exists, the skill informs the user that no active research session was found and suggests using /sigint:start", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "state\\.json|Glob", + "description": "Skill searches for state.json files" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "[Nn]o active research|[Nn]o.*session|not found", + "description": "Skill reports that no active research session was found" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "sigint:start|/start", + "description": "Skill suggests using /sigint:start to begin a new session" + } + ], + "expectations": [ + "The skill searches for state.json files using Glob", + "When no state.json is found, the skill stops and does NOT attempt to create a new session", + "The error message suggests using /sigint:start to begin research" + ] + }, + { + "id": 6, + "prompt": "Update my research. I have sessions for both enterprise-observability and ai-coding-assistants.", + "expected_output": "When multiple sessions exist and no --topic is specified, the skill lists all sessions and asks the user to specify which one to update", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "state\\.json|Glob", + "description": "Skill searches for state.json files" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "(multiple|which|specify|select|choose)", + "description": "Skill asks the user to choose which session to update" + } + ], + "expectations": [ + "The skill finds multiple state.json files across different topic directories", + "The skill lists all available sessions for the user to choose from", + "The skill does NOT arbitrarily pick one session -- it asks the user to specify" + ] + }, + { + "id": 7, + "prompt": "/sigint:update --topic enterprise-observability --dimensions market-size", + "expected_output": "The skill performs an update on a single dimension, delegates to the orchestrator, receives delta detection results classifying findings as NEW/UPDATED/CONFIRMED/POTENTIALLY_REMOVED, and reconciles by replacing updated findings rather than appending", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "enterprise-observability", + "description": "Skill uses the specified topic slug" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "market-size", + "description": "Only market-size dimension is updated" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "(NEW|UPDATED|CONFIRMED|POTENTIALLY_REMOVED|TREND_REVERSAL)", + "description": "Delta detection classifies findings using the defined categories" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "(reconcil|replace|merge|not append)", + "description": "Findings are reconciled (replaced, not appended blindly)" + } + ], + "expectations": [ + "Delta detection classifies findings using at least some of: NEW, UPDATED, CONFIRMED, POTENTIALLY_REMOVED, TREND_REVERSAL", + "Reconciliation replaces updated findings rather than blindly appending new ones alongside old", + "The updated state.json includes a new lineage entry recording this update" + ] + }, + { + "id": 8, + "prompt": "/sigint:update --topic enterprise-observability --since 2026-03-01 --dimensions competitive-landscape,customer-research", + "expected_output": "The skill combines --since date filtering with specific dimensions, passing SINCE_DATE to the orchestrator so analysts use date-filtered web searches. A delta report is generated showing changes since the prior update.", + "files": [], + "deterministic_checks": [ + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "2026-03-01|SINCE_DATE", + "description": "The since date is propagated to the orchestrator" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "competitive-landscape", + "description": "competitive-landscape dimension is included" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "customer-research", + "description": "customer-research dimension is included" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "delta|DELTA", + "description": "Delta detection is referenced in the output" + }, + { + "type": "regex_match", + "file": "transcript.md", + "pattern": "elicitation|ELICITATION", + "description": "Prior elicitation is loaded and passed to the orchestrator" + } + ], + "expectations": [ + "The SINCE_DATE is included in analyst spawn prompts so they prioritize recent data via date-filtered searches", + "Only competitive-landscape and customer-research dimensions are updated", + "Prior elicitation from state.json is reused without re-running the elicitation phase", + "A delta report summarizing changes is generated or referenced in the output" + ] + } + ] +}