diff --git a/.claude/intelligence-cache.json b/.claude/intelligence-cache.json index 78244a3e9..8358ce57f 100644 --- a/.claude/intelligence-cache.json +++ b/.claude/intelligence-cache.json @@ -263,7 +263,7 @@ } } ], - "timestamp": 1775302080, + "timestamp": 1775307153, "ttl": 1800 }, "fdbb7bc340414cf938cecd4b790fa9e2": { diff --git a/.claude/loop-logs/iteration-1.log b/.claude/loop-logs/iteration-1.log index 625f2153b..5fb2f6213 100644 --- a/.claude/loop-logs/iteration-1.log +++ b/.claude/loop-logs/iteration-1.log @@ -1,11 +1 @@ -Perfect! The goal has been fully achieved. The shipwright CLI now has a `hello` command that outputs "hello world". The implementation: - -1. ✅ Added the `hello` command to the help text -2. ✅ Implemented the command case statement in the main router -3. ✅ Tested the command works correctly -4. ✅ All tests pass -5. ✅ Committed with a descriptive message - -The command is now available as `shipwright hello` (and with aliases `sw hello` and `cct hello`). - -LOOP_COMPLETE +{"type":"result","subtype":"success","is_error":false,"duration_ms":90646,"duration_api_ms":90727,"num_turns":13,"result":"Done. Here's what was implemented:\n\n- **`sw-hello.sh`**: Reads version from `package.json` at runtime (using `jq` with grep fallback). Both the default (no-args) output and `--version` flag now display `Shipwright v3.2.4`.\n- **`sw-hello-test.sh`**: Updated `test_hello_output` and `test_hello_version` to match the new `Shipwright vX.Y.Z` format. Added `test_hello_version_from_package_json` to verify the version matches `package.json` exactly.\n\nAll 7 hello tests pass. The 3 pre-existing failures in the full suite are unrelated (swarm/tmux integration, PM learn subcommand, predictive anomaly detection).\n\nLOOP_COMPLETE","stop_reason":"end_turn","session_id":"85c3bb83-ce13-4a58-8360-86070237830f","total_cost_usd":0.5034482999999998,"usage":{"input_tokens":14,"cache_creation_input_tokens":62482,"cache_read_input_tokens":628216,"output_tokens":5124,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":62482,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"},"modelUsage":{"claude-sonnet-4-6":{"inputTokens":14,"outputTokens":5124,"cacheReadInputTokens":628216,"cacheCreationInputTokens":62482,"webSearchRequests":0,"costUSD":0.49967429999999996,"contextWindow":200000,"maxOutputTokens":32000},"claude-haiku-4-5-20251001":{"inputTokens":3284,"outputTokens":98,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.003774,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"cf1bc8ac-8d20-4abb-a3bb-2726d25b75c4"} diff --git a/.claude/loop-logs/iteration-2.log b/.claude/loop-logs/iteration-2.log index e7872772d..e1c7a59a8 100644 --- a/.claude/loop-logs/iteration-2.log +++ b/.claude/loop-logs/iteration-2.log @@ -1 +1 @@ -Invalid API key · Fix external API key +{"type":"result","subtype":"success","is_error":false,"duration_ms":116879,"duration_api_ms":135475,"num_turns":22,"result":"The goal is complete:\n\n1. **sw-hello command** (`scripts/sw-hello.sh`) reads version from `package.json` and displays `Shipwright vX.Y.Z` — done in iteration 1\n2. **Test suite** (`scripts/sw-hello-test.sh`) with 7 tests all passing — done in iteration 1 \n3. **Cost test fix** — the `sw-cost-test.sh` context efficiency tests were failing because hardcoded Feb 27 timestamps fell outside the 30-day window. Fixed by using dynamic `$(date +%s)` timestamps.\n\nAll failures in the full test suite (PM learn, predictive anomaly, swarm/tmux) are pre-existing and unrelated to this feature.\n\nLOOP_COMPLETE","stop_reason":"end_turn","session_id":"34b07770-fff9-46f4-b45a-56472d0fb114","total_cost_usd":0.6772343999999999,"usage":{"input_tokens":21,"cache_creation_input_tokens":69222,"cache_read_input_tokens":1108633,"output_tokens":5214,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":69222,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"},"modelUsage":{"claude-sonnet-4-6":{"inputTokens":21,"outputTokens":5214,"cacheReadInputTokens":1108633,"cacheCreationInputTokens":69222,"webSearchRequests":0,"costUSD":0.6704454,"contextWindow":200000,"maxOutputTokens":32000},"claude-haiku-4-5-20251001":{"inputTokens":5804,"outputTokens":197,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.006789,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"d8ca74cf-0c51-4abe-ab85-617575daf26b"} diff --git a/.claude/loop-logs/tests-iter-1.log b/.claude/loop-logs/tests-iter-1.log index 4e762dfc3..9fde44d6a 100644 --- a/.claude/loop-logs/tests-iter-1.log +++ b/.claude/loop-logs/tests-iter-1.log @@ -1,557 +1,695 @@ -> shipwright-cli@1.7.1 test -> bash scripts/sw-pipeline-test.sh && bash scripts/sw-daemon-test.sh && bash scripts/sw-prep-test.sh && bash scripts/sw-fleet-test.sh && bash scripts/sw-fix-test.sh && bash scripts/sw-memory-test.sh && bash scripts/sw-session-test.sh && bash scripts/sw-init-test.sh && bash scripts/sw-tracker-test.sh && bash scripts/sw-heartbeat-test.sh && bash scripts/sw-remote-test.sh && bash scripts/sw-intelligence-test.sh && bash scripts/sw-pipeline-composer-test.sh && bash scripts/sw-self-optimize-test.sh && bash scripts/sw-predictive-test.sh && bash scripts/sw-frontier-test.sh && bash scripts/sw-connect-test.sh +> shipwright-cli@3.2.4 test +> bash scripts/sw-agi-roadmap-test.sh && bash scripts/sw-activity-test.sh && bash scripts/sw-adaptive-test.sh && bash scripts/sw-adversarial-test.sh && bash scripts/sw-architecture-enforcer-test.sh && bash scripts/sw-auth-test.sh && bash scripts/sw-autonomous-test.sh && bash scripts/sw-changelog-test.sh && bash scripts/sw-checkpoint-test.sh && bash scripts/sw-ci-test.sh && bash scripts/sw-cleanup-test.sh && bash scripts/sw-code-review-test.sh && bash scripts/sw-connect-test.sh && bash scripts/sw-context-budget-test.sh && bash scripts/sw-context-test.sh && bash scripts/sw-cost-test.sh && bash scripts/sw-daemon-test.sh && bash scripts/sw-dashboard-test.sh && bash scripts/sw-db-test.sh && bash scripts/sw-decompose-test.sh && bash scripts/sw-decide-test.sh && bash scripts/sw-deps-test.sh && bash scripts/sw-developer-simulation-test.sh && bash scripts/sw-discovery-test.sh && bash scripts/sw-doc-fleet-test.sh && bash scripts/sw-docs-agent-test.sh && bash scripts/sw-docs-test.sh && bash scripts/sw-doctor-test.sh && bash scripts/sw-dora-test.sh && bash scripts/sw-durable-test.sh && bash scripts/sw-e2e-orchestrator-test.sh && bash scripts/sw-eventbus-test.sh && bash scripts/sw-feedback-test.sh && bash scripts/sw-outcome-feedback-test.sh && bash scripts/sw-fix-test.sh && bash scripts/sw-fleet-discover-test.sh && bash scripts/sw-fleet-test.sh && bash scripts/sw-fleet-viz-test.sh && bash scripts/sw-frontier-test.sh && bash scripts/sw-github-app-test.sh && bash scripts/sw-github-checks-test.sh && bash scripts/sw-github-deploy-test.sh && bash scripts/sw-github-graphql-test.sh && bash scripts/sw-guild-test.sh && bash scripts/sw-heartbeat-test.sh && bash scripts/sw-hello-test.sh && bash scripts/sw-hygiene-test.sh && bash scripts/sw-incident-test.sh && bash scripts/sw-init-test.sh && bash scripts/sw-instrument-test.sh && bash scripts/sw-intelligence-test.sh && bash scripts/sw-jira-test.sh && bash scripts/sw-launchd-test.sh && bash scripts/sw-linear-test.sh && bash scripts/sw-logs-test.sh && bash scripts/sw-loop-test.sh && bash scripts/sw-memory-test.sh && bash scripts/sw-mission-control-test.sh && bash scripts/sw-model-router-test.sh && bash scripts/sw-otel-test.sh && bash scripts/sw-oversight-test.sh && bash scripts/sw-patrol-meta-test.sh && bash scripts/sw-pipeline-composer-test.sh && bash scripts/sw-pipeline-test.sh && bash scripts/sw-pipeline-vitals-test.sh && bash scripts/sw-pm-test.sh && bash scripts/sw-pr-lifecycle-test.sh && bash scripts/sw-predictive-test.sh && bash scripts/sw-prep-test.sh && bash scripts/sw-ps-test.sh && bash scripts/sw-public-dashboard-test.sh && bash scripts/sw-quality-profile-test.sh && bash scripts/sw-quality-test.sh && bash scripts/sw-reaper-test.sh && bash scripts/sw-recruit-test.sh && bash scripts/sw-regression-test.sh && bash scripts/sw-release-manager-test.sh && bash scripts/sw-release-test.sh && bash scripts/sw-root-cause-test.sh && bash scripts/sw-remote-test.sh && bash scripts/sw-replay-test.sh && bash scripts/sw-retro-test.sh && bash scripts/sw-scale-test.sh && bash scripts/sw-stall-detector-test.sh && bash scripts/sw-security-audit-test.sh && bash scripts/sw-self-optimize-test.sh && bash scripts/sw-session-test.sh && bash scripts/sw-setup-test.sh && bash scripts/sw-standup-test.sh && bash scripts/sw-status-test.sh && bash scripts/sw-strategic-test.sh && bash scripts/sw-stream-test.sh && bash scripts/sw-swarm-test.sh && bash scripts/sw-team-stages-test.sh && bash scripts/sw-templates-test.sh && bash scripts/sw-testgen-test.sh && bash scripts/sw-tmux-pipeline-test.sh && bash scripts/sw-tmux-test.sh && bash scripts/sw-trace-test.sh && bash scripts/sw-tracker-test.sh && bash scripts/sw-triage-test.sh && bash scripts/sw-upgrade-test.sh && bash scripts/sw-ux-test.sh && bash scripts/sw-webhook-test.sh && bash scripts/sw-widgets-test.sh && bash scripts/sw-worktree-test.sh && bash scripts/sw-lib-compat-test.sh && bash scripts/sw-lib-helpers-test.sh && bash scripts/sw-lib-error-actionability-test.sh && bash scripts/sw-lib-daemon-dispatch-test.sh && bash scripts/sw-lib-daemon-failure-test.sh && bash scripts/sw-lib-daemon-poll-test.sh && bash scripts/sw-lib-daemon-state-test.sh && bash scripts/sw-lib-daemon-triage-test.sh && bash scripts/sw-lib-daemon-patrol-test.sh && bash scripts/sw-lib-pipeline-detection-test.sh && bash scripts/sw-lib-pipeline-intelligence-test.sh && bash scripts/sw-lib-pipeline-quality-checks-test.sh && bash scripts/sw-lib-pipeline-stages-test.sh && bash scripts/sw-lib-pipeline-state-test.sh && bash scripts/sw-adapters-test.sh && bash scripts/sw-evidence-test.sh && bash scripts/sw-review-rerun-test.sh && bash scripts/sw-tracker-providers-test.sh && bash scripts/sw-budget-chaos-test.sh && bash scripts/sw-chaos-test.sh && bash scripts/sw-autonomous-e2e-test.sh && bash scripts/sw-memory-discovery-e2e-test.sh && bash scripts/sw-policy-e2e-test.sh && bash scripts/sw-e2e-smoke-test.sh && bash scripts/sw-dashboard-e2e-test.sh && bash scripts/sw-reward-aggregator-test.sh && bash scripts/sw-bandit-selector-test.sh && bash scripts/sw-policy-learner-test.sh && bash scripts/sw-autoresearch-e2e-test.sh + + +╔════════════════════════════════════════════════════════════════╗ +║ AGI-Roadmap Validation — Real Tests for Every Feature ║ +╚════════════════════════════════════════════════════════════════╝ + +Phase 1: Feedback Loops (Discovery, Memory, PM, Failure Learning) + ▸ Daemon: failure_history initialized in state JSON... ✓ + ▸ Daemon: consecutive count uses reduce (run-from-newest, not total)... ✓ + ▸ Daemon: get_max_retries_for_class returns per-class limits... ✓ + ▸ Daemon: exponential backoff formula 5*2^(n-3) is correct... ✓ + ▸ Daemon: resume_after parsed in UTC (not local TZ)... ✓ + ▸ PM: recommend --json flag implemented... ✓ + ▸ PM: learn subcommand functional... ✓ + ▸ Daemon: PM recommend/learn wired into triage + success/failure... ✓ + ▸ Daemon: confidence < 60% upgrades to full template... ✓ + +Phase 2: Agent Coordination (Feedback, Predictive, Oversight, Autonomous) + ▸ Feedback: ARTIFACTS_DIR respects caller override... ✓ + ▸ Feedback: rollback uses PIPESTATUS for correct exit code... ✓ + ▸ Predictive: anomaly detection returns severity for 5x baseline... ✓ + ▸ Predictive: inject-prevention command exists and runs... ✓ + ▸ Pipeline: predictive anomaly/baseline/inject-prevention wired... ✓ + ▸ Pipeline: memory metric wired into stage completion... ✓ + ▸ Oversight: gate approves clean review... ✓ + ▸ Oversight: gate rejects with --reject-if... ✓ + ▸ Oversight: gate JSON safe from newline/quote injection... ✓ + ▸ Pipeline: oversight gate wired + respects SKIP_GATES... ✓ + ▸ Pipeline: feedback collect/create-issue/rollback wired into monitor... ✓ + ▸ Pipeline: intelligence prediction validation wired... ✓ + ▸ Pipeline: predictive anomaly confirmation wired... ✓ + ▸ Pipeline: memory fix-outcome negative path wired... ✓ + ▸ Triage: offline fallback with recruit... ✓ + ▸ Recruit: policy integration wired... ✓ + ▸ Recruit: meta feedback loop... ✓ + ▸ Recruit: audit (negative-compounding feedback)... ✓ + ▸ Autonomous: safe recruit_args quoting... ✓ + ▸ Autonomous: Claude output redirected to findings file... ✓ + ▸ Autonomous: dual branch check (pipeline + daemon)... ✓ + ▸ Autonomous: run_scheduler with loop/sleep... ✓ + ▸ Autonomous: trigger_pipeline_for_finding wired... ✓ + ▸ Incident: create_hotfix_issue echoes issue number... ✓ + ▸ Incident: trigger_pipeline wires --template hotfix... ✓ + ▸ Incident: trigger_rollback wires sw-feedback.sh... ✓ + +Phase 3: Quality Assurance (Code Review, Testgen, Swarm, Multi-Agent) + ▸ Code review: run_claude_semantic_review exists... ✓ + ▸ Code review: semantic findings integrated... ✓ + ▸ Testgen: Claude prompt asks for real assertions... ✓ + ▸ Testgen: prompt piped to avoid shell expansion... ✓ + ▸ Swarm: spawn creates tmux session... ✓ + ▸ Swarm: retire kills tmux session... ✓ + ▸ Swarm: spawn/retire functional (real tmux)... ✓ + ▸ Loop: multi-agent restarts not blocked... ✓ + +Phase 4: Meta-Cognition (Effectiveness, Self-Awareness, Capability) + ▸ Pipeline: record_stage_effectiveness creates valid JSONL... ✓ + ▸ Pipeline: self-awareness hint triggers on >50% failure rate... ✓ + ▸ Pipeline: effectiveness recorded on both complete and failed... ✓ + ▸ Pipeline: discovery inject wired... ✓ + ▸ Pipeline: self-awareness hint injected into plan prompt... ✓ + +Integration & Safety + ▸ CI: integration-claude jobs in workflow... ✓ + ▸ Integration-claude: skip path functional... ✓ + ▸ All modified scripts have strict mode... ✓ + ▸ All modified scripts have ERR trap... ✓ + ▸ No hardcoded secrets in scripts... ✓ + + ────────────────────────────────────────── + + All 53 tests passed! + + + Shipwright Activity Tests + ══════════════════════════════════════════ + + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ Source guard pattern (if/then/fi) + ✓ VERSION variable defined + + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'watch' subcommand + ✓ help lists 'snapshot' subcommand + ✓ help lists 'history' subcommand + ✓ help lists 'stats' subcommand + ✓ help lists 'agents' subcommand + ✓ --help alias works + + Error Handling +shipwright activity — Live agent activity stream + +USAGE + shipwright activity [subcommand] [options] + +SUBCOMMANDS + watch Live stream of agent activity (default) + snapshot Current state of all active agents + history [range] Replay past activity (e.g., '1h', '10m', 'all') + stats Running counters (events, commits, tests, agents) + agents List known agents and last activity + help Show this help message + +OPTIONS + --type Filter events by type (e.g., 'stage.completed') + --agent Filter by agent name + --team Filter by team + --stage Filter by pipeline stage (e.g., 'build') + +EXAMPLES + \033[2mshipwright activity\033[0m # Live stream + \033[2mshipwright activity watch --type stage.*\033[0m # Only stage events + \033[2mshipwright activity history 1h\033[0m # Last hour + \033[2mshipwright activity snapshot\033[0m # Current state + \033[2mshipwright activity stats\033[0m # Counters + ✓ Unknown command exits non-zero + + Subcommands Without Events File + ✓ snapshot exits non-zero with no events + ✓ stats exits non-zero with no events + ✓ agents exits non-zero with no events + + Event Emission + ✓ emit_event creates events.jsonl + ✓ emit_event writes valid JSON + ✓ Event contains type field + ✓ Event contains agent field + ✓ Event contains numeric count + + Format Helpers + ✓ get_icon_for_type commit returns icon + ✓ get_icon_for_type test.passed returns icon + ✓ get_icon_for_type unknown returns bullet + ✓ format_timestamp strips T and Z + + Stats With Events + ✓ stats shows Total Events + ✓ stats shows Commits count + ✓ stats shows Pipelines count + History Subcommand + ✓ history all shows activity header -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright pipeline test — E2E Validation (Real Subprocess) ║ -╚═══════════════════════════════════════════════════════════════════╝ -Setting up mock environment... -✓ Environment ready: /tmp/sw-pipeline-test.vZ9o8O + ────────────────────────────────────────── - ▸ Preflight passes with all mocks... ✓ - ▸ Preflight fails when sw-loop.sh missing... ✓ - ▸ Start requires --goal or --issue... ✓ - ▸ Intake with --goal creates branch + artifacts... ✓ - ▸ Intake with --issue fetches from GitHub... ✓ - ▸ Plan generates plan.md, dod.md, tasks... ✓ - ▸ Build invokes sw loop and commits... ✓ - ▸ Test stage captures results to log... ✓ - ▸ Review generates report with severities... ✓ - ▸ PR stage creates PR URL artifact... ✓ - ▸ Full E2E pipeline (6 stages)... ✓ - ▸ Resume continues from partial state... ✓ - ▸ Abort marks pipeline as aborted... ✓ - ▸ Dry run shows config, no artifacts... ✓ - ▸ Self-healing build→test retry loop... ✓ + All 28 tests passed -━━━ Results ━━━ - Passed: 15 - Failed: 0 - Total: 15 -All 15 tests passed! + Shipwright Adaptive Tests + ══════════════════════════════════════════ + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help shows SUBCOMMANDS + ✓ help mentions get + ✓ help mentions train + ✓ help mentions profile + ✓ version exits 0 + ✓ version output contains version string -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright daemon test — Unit Tests (Synthetic Events) ║ -╚═══════════════════════════════════════════════════════════════════╝ + error handling + ✓ Unknown command exits non-zero -Setting up test environment... -✓ Environment ready: /tmp/sw-daemon-test.Ootf7j - - ▸ dora_grade deploy_freq Elite (>= 7)... ✓ - ▸ dora_grade deploy_freq High (>= 1)... ✓ - ▸ dora_grade deploy_freq Medium (>= 0.25)... ✓ - ▸ dora_grade deploy_freq Low (< 0.25)... ✓ - ▸ dora_grade CFR thresholds (Elite/High/Medium/Low)... ✓ - ▸ Stage timings filter-first jq query... ✓ - ▸ MTTR pairs failures with next success... ✓ - ▸ epoch_to_iso helper function... ✓ - ▸ Health check detects stale jobs... ✓ - ▸ Priority label sorting... ✓ - ▸ Degradation alert triggers on high CFR... ✓ - ▸ Metrics --json output with cycle_time keys... ✓ - ▸ Self-labeling includes watch_label when enabled... ✓ - ▸ Self-labeling excludes watch_label when disabled... ✓ - ▸ Patrol recurring failures label construction... ✓ - ▸ DORA degradation event detection... ✓ - ▸ Retry exhaustion event detection... ✓ - ▸ Untested script detection logic... ✓ - -━━━ Results ━━━ - Passed: 18 - Failed: 0 - Total: 18 - -All 18 tests passed! - - -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright prep test — Validation Suite (Real Subprocess) ║ -╚═══════════════════════════════════════════════════════════════════╝ + get command + ✓ get timeout with default exits 0 -Setting up test environment... -✓ Environment ready: /tmp/sw-prep-test.1SpJfU - - ▸ Node.js project detection... ✓ - ▸ Python project detection... ✓ - ▸ Go project detection... ✓ - ▸ Rust project detection... ✓ - ▸ settings.json is valid JSON... ✓ - ▸ Hook scripts are executable... ✓ - ▸ Hook scripts have valid syntax... ✓ - ▸ CLAUDE.md has required sections... ✓ - ▸ Check mode outputs scoring... ✓ - ▸ Idempotency without --force... ✓ - ▸ --force overwrites modified files... ✓ - ▸ No eval in generated hooks... ✓ - ▸ Definition of Done generated... ✓ - -━━━ Results ━━━ - Passed: 13 - Failed: 0 - Total: 13 - -All 13 tests passed! - - -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright fleet test — Unit Tests ║ -╚═══════════════════════════════════════════════════════════════════╝ + profile command + ✓ profile exits 0 -Setting up test environment... -✓ Environment ready: /tmp/sw-fleet-test.lIcp2s - - ▸ Help output contains expected sections... ✓ - ▸ Help via --help flag... ✓ - ▸ Config parsing — valid config... ✓ - ▸ Config parsing — missing config file... ✓ - ▸ Config parsing — invalid JSON... ✓ - ▸ Config parsing — empty repos array... ✓ - ▸ Config defaults applied... ✓ - ▸ Fleet init generates config template... ✓ - ▸ Fleet init skips when config exists... ✓ - ▸ Fleet start spawns tmux sessions per repo... ✓ - ▸ Fleet start skips missing repos... ✓ - ▸ Fleet start skips existing sessions... ✓ - ▸ Fleet start creates fleet state file... ✓ - ▸ Fleet start emits fleet.started event... ✓ - ▸ Fleet start applies repo-level overrides... ✓ - ▸ Fleet stop kills sessions and cleans state... ✓ - ▸ Fleet stop — no fleet running... ✓ - ▸ Fleet stop emits fleet.stopped event... ✓ - ▸ Fleet status — no fleet running... ✓ - ▸ Fleet status shows dashboard... ✓ - ▸ Fleet metrics — no events file... ✓ - ▸ Fleet metrics dashboard output... ✓ - ▸ Fleet metrics JSON output... ✓ - ▸ Fleet metrics period flag... ✓ - ▸ Session name generation... ✓ - ▸ Fleet start skips non-git repos... ✓ - ▸ Unknown subcommand... ✓ - -━━━ Results ━━━ - Passed: 27 - Failed: 0 - Total: 27 - -All 27 tests passed! - - -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright fix test — Unit Tests ║ -╚═══════════════════════════════════════════════════════════════════╝ + reset command + ✓ reset exits 0 -Setting up test environment... -✓ Environment ready: /tmp/sw-fix-test.yrL4tG - - ▸ Help output contains expected sections... ✓ - ▸ Help via --help flag... ✓ - ▸ Missing goal shows error... ✓ - ▸ Missing repos shows error... ✓ - ▸ Arg parsing — --repos comma-separated... ✓ - ▸ Arg parsing — --repos-from file... ✓ - ▸ Arg parsing — --repos-from missing file... ✓ - ▸ Arg parsing — --pipeline template... ✓ - ▸ Arg parsing — --max-parallel... ✓ - ▸ Arg parsing — --branch-prefix... ✓ - ▸ Dry run shows what would happen... ✓ - ▸ Dry run does not create state file... ✓ - ▸ Fix status — no sessions... ✓ - ▸ Fix status shows existing sessions... ✓ - ▸ Invalid repo directory shows error... ✓ - ▸ Fix start creates state file... ✓ - ▸ Fix start emits events... ✓ - ▸ Fix start — summary output... ✓ - ▸ Branch name sanitization... ✓ - ▸ Fix header shows configuration... ✓ - ▸ Non-git repo warning... ✓ - ▸ Per-repo event tracking... ✓ - -━━━ Results ━━━ - Passed: 22 - Failed: 0 - Total: 22 - -All 22 tests passed! - - -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright memory+cost test — Unit Tests for Memory & Cost ║ -╚═══════════════════════════════════════════════════════════════════╝ - -Setting up mock environment... -✓ Environment ready: /tmp/sw-memory-test.IW0uqN - - ▸ Memory capture from pipeline state... ✓ - ▸ Memory inject returns context for each stage... ✓ - ▸ Failure capture stores patterns... ✓ - ▸ Pattern detection identifies project type... ✓ - ▸ Cross-repo vs per-repo isolation... ✓ - ▸ Memory show displays dashboard... ✓ - ▸ Memory search finds matching entries... ✓ - ▸ Memory export produces valid JSON... ✓ - ▸ Memory forget clears repo memory... ✓ - ▸ Cost calculation for each model... ✓ - ▸ Cost recording writes to costs.json... ✓ - ▸ Budget set and check... ✓ - ▸ Cost dashboard runs without errors... ✓ - ▸ Cost JSON output is valid... ✓ - ▸ Actionable failures threshold filtering... ✓ - ▸ Actionable failures with no file returns []... ✓ - ▸ DORA baseline calculation from events... ✓ - -━━━ Results ━━━ - Passed: 17 - Failed: 0 - Total: 17 - -All 17 tests passed! + script safety + ✓ Uses set -euo pipefail + ✓ Has source guard pattern + statistical functions + ✓ percentile, mean, median functions defined in source + ✓ mean returns numeric value (avg of 1-5 is 3) -╔═══════════════════════════════════════════════════╗ -║ shipwright session — E2E Test Suite ║ -╚═══════════════════════════════════════════════════╝ + get_timeout / get_iterations / get_model + ✓ get_timeout returns number (default with no events) + ✓ get_iterations returns number + ✓ get_model returns valid model name -Setting up test environment... -Test tmux session: sw-test-1268715 - -Template Loading - ▸ Load feature-dev template (3 agents)... ✓ - ▸ Load exploration template (2 agents)... ✓ - ▸ Missing template returns error... ✓ - ▸ All 24 templates load successfully... ✓ - -Window Creation - ▸ No-launch creates window without claude... ✓ - ▸ Duplicate window detection... ✓ - ▸ Window gets dark theme... ✓ - ▸ Auto-generated team name... ✓ - -Prompt & Launcher - ▸ Launcher script generation with template... ✓ - ▸ Output includes agents from template... ✓ - ▸ Output includes goal when provided... ✓ - ▸ No template + goal works... ✓ - ▸ No template + no goal = interactive... ✓ - -CLI & Configuration - ▸ Help flag... ✓ - ▸ Unknown option... ✓ - ▸ TMPDIR used for launcher files... ✓ - -Enhanced Features - ▸ Template auto-suggestion from goal... ✓ - ▸ No suggestion without goal... ✓ - ▸ Memory injection in prompt... ✓ - ▸ CLAUDE.md reminder in prompt... ✓ - ▸ Secure temp dir in source... ✓ + train subcommand + ✓ train subcommand runs with mock events -════════════════════════════════════════════════════ - All 21 tests passed ✓ -════════════════════════════════════════════════════ + ────────────────────────────────────────── + All 20 tests passed -╔═══════════════════════════════════════════════════╗ -║ shipwright init — E2E Test Suite ║ -╚═══════════════════════════════════════════════════╝ -Setting up sandboxed environment... -Temp dir: /tmp/sw-init-test.WldBc1 -Configuration - ▸ Settings.json created with agent teams... ✓ - ▸ Settings merge preserves existing vars... ✓ - ▸ tmux.conf installed... ✓ - ▸ Overlay installed... ✓ + shipwright adversarial test + ══════════════════════════════════════════ -Templates - ▸ Team templates installed (>= 10)... ✓ - ▸ Pipeline templates installed (>= 5)... ✓ - ▸ Legacy templates path populated... ✓ + ══════════════════════════════════════════ -Robustness - ▸ Idempotency — double init safe... ✓ - ▸ Doctor runs at end... ✓ - ▸ Help flag... ✓ + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ Source guard pattern (if/then/fi) + ✓ VERSION variable defined -Hook Wiring - ▸ JSONC stripped from settings.json... ✓ - ▸ Hooks wired into settings.json... ✓ - ▸ Hook wiring preserves existing hooks... ✓ - ▸ SessionStart hook installed... ✓ - ▸ Hook wiring with pre-existing settings... ✓ + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'review' subcommand + ✓ help lists 'iterate' subcommand + ✓ help mentions adversarial_enabled flag + ✓ --help alias works + ✓ -h alias works -════════════════════════════════════════════════════ - All 15 tests passed ✓ -════════════════════════════════════════════════════ + Error Handling + ✓ Unknown command exits non-zero + Review Subcommand + ✓ review disabled returns empty JSON array + ✓ review without diff arg exits non-zero -╔═══════════════════════════════════════════════════╗ -║ shipwright tracker — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + Iterate Subcommand + ✓ iterate without args exits non-zero + ✓ iterate with empty findings converges + ✓ iterate past max rounds returns findings -Setting up test environment... + Configuration + ✓ ADVERSARIAL_MAX_ROUNDS env var respected + ✓ disabled config returns empty array -Provider Loading - ▸ Provider loads from config (linear)... ✓ - ▸ Provider loads jira... ✓ - ▸ Graceful skip when no provider... ✓ + Event Emission + ✓ emit_event creates events.jsonl + ✓ emit_event writes valid JSON + ✓ Event contains type field -Pipeline Enrichment - ▸ Stage descriptions exist for all 12 stages... ✓ - ▸ Enriched progress body has Delivering line... ✓ - ▸ Enriched progress body has stage descriptions... ✓ - ▸ Pipeline state includes stage_progress... ✓ - ▸ Pipeline state includes stage description... ✓ -Integration - ▸ Tracker notify routes to provider (mock)... ✓ - ▸ Dashboard reads goal from pipeline state... ✓ - ▸ Jira config validation... ✓ - ▸ Linear config migration (legacy fallback)... ✓ -════════════════════════════════════════════════════ - All 12 tests passed ✓ -════════════════════════════════════════════════════ + ────────────────────────────────────────── -╔═══════════════════════════════════════════════════╗ -║ shipwright heartbeat + checkpoint — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + All 0 tests passed -Setting up test environment... -Heartbeat Lifecycle - ▸ Write heartbeat creates JSON file... ✓ - ▸ Check heartbeat reports alive... ✓ Job test-job-alive alive (0s ago) -✓ - ▸ Check heartbeat reports stale... ⚠ Job test-job-stale stale (192940576s ago, timeout: 120s) -✓ - ▸ Clear heartbeat removes file... ✓ Cleared heartbeat for job: test-job-clear -✓ - ▸ List heartbeats returns JSON array... ✓ - ▸ Heartbeat update overwrites existing... ✓ - ▸ Check missing heartbeat returns error... ✓ - ▸ Heartbeat dir auto-created when missing... ✓ -Checkpoint Lifecycle - ▸ Checkpoint save creates JSON file... ✓ Checkpoint saved for stage build (iteration 5) -✓ - ▸ Checkpoint restore outputs JSON... ✓ Checkpoint saved for stage test (iteration 3) -✓ - ▸ Checkpoint restore missing stage fails... ✓ - ▸ Checkpoint clear removes file... ✓ Checkpoint saved for stage review (iteration 1) -✓ Cleared checkpoint for stage review -✓ - ▸ Checkpoint clear --all removes all... ✓ Checkpoint saved for stage build (iteration 1) -✓ Checkpoint saved for stage test (iteration 2) -✓ Cleared 2 checkpoint(s) -✓ - ▸ Checkpoint save with files-modified... ✓ Checkpoint saved for stage build (iteration 7) -✓ + shipwright architecture-enforcer test + ══════════════════════════════════════════ -Integration - ▸ Pipeline script has heartbeat functions... ✓ - ▸ Loop script has heartbeat and checkpoint... ✓ - ▸ Pipeline has human intervention checks... ✓ + ══════════════════════════════════════════ -════════════════════════════════════════════════════ - All 17 tests passed ✓ -════════════════════════════════════════════════════ + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ Source guard pattern (if/then/fi) + ✓ VERSION variable defined + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'build' subcommand + ✓ help lists 'validate' subcommand + ✓ help lists 'evolve' subcommand + ✓ help mentions architecture_enabled flag + ✓ --help alias works + ✓ -h alias works -╔═══════════════════════════════════════════════════╗ -║ shipwright remote — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + Error Handling + ✓ Unknown command exits non-zero -Setting up test environment... + Build Subcommand + ✓ build disabled returns empty JSON object + ✓ build with enabled returns model with layers + ✓ build model contains patterns array + ✓ build model contains conventions array -Machine Registry - ▸ Add machine creates machines.json... ▸ Checking shipwright installation at /tmp/sw-remote-test.PmYCBx/mock-install... -✓ Shipwright found at /tmp/sw-remote-test.PmYCBx/mock-install -✓ Registered machine: builder-1 (localhost, worker, 4 workers) -✓ - ▸ Add second machine appends to array... ▸ Checking shipwright installation at /tmp/sw-remote-test.PmYCBx/mock-install... -✓ Shipwright found at /tmp/sw-remote-test.PmYCBx/mock-install -✓ Registered machine: builder-2 (localhost, worker, 8 workers) -✓ - ▸ Remove machine by name... ✓ Removed machine: builder-2 -✓ - ▸ List machines returns output... ✓ - ▸ machines.json uses atomic writes... ✓ - ▸ Duplicate machine name prevented... ▸ Use shipwright remote remove builder-1 first -✓ - ▸ Remote script has help command... ✓ + Validate Subcommand + ✓ validate disabled returns empty array + ✓ validate without diff arg exits non-zero + ✓ validate without model file returns empty array -CLI & Dashboard Integration - ▸ CLI router includes remote command... ✓ - ▸ CLI router includes heartbeat command... ✓ - ▸ CLI router includes checkpoint command... ✓ - ▸ Doctor has heartbeat health check... ✓ - ▸ Doctor has remote machine checks... ✓ - ▸ Status shows heartbeat section... ✓ - ▸ Status shows remote machines section... ✓ + Evolve Subcommand + ✓ evolve disabled exits 0 + ✓ evolve without model exits 0 -════════════════════════════════════════════════════ - All 14 tests passed ✓ -════════════════════════════════════════════════════ + Model Storage + ✓ build stores model in ~/.shipwright/memory/ + ✓ Stored model is valid JSON + Event Emission + ✓ emit_event creates events.jsonl + ✓ emit_event writes valid JSON + ✓ Event contains type field -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright intelligence test — Unit Tests ║ -╚═══════════════════════════════════════════════════════════════════╝ -Setting up test environment... -✓ Environment ready: /tmp/sw-intelligence-test.wTXBVg - ▸ analyze_issue returns valid schema... ✓ - ▸ Cache hit on second call with same input... ✓ - ▸ Graceful degradation when claude CLI unavailable... ✓ - ▸ compose_pipeline produces valid pipeline JSON... ✓ - ▸ recommend_model returns valid model names... ✓ - ▸ predict_cost returns numeric estimates... ✓ - ▸ Cache TTL expiry returns miss... ✓ - ▸ search_memory returns ranked results... ✓ - ▸ Feature flag disabled returns fallback... ✓ - ▸ Events emitted for analysis... ✓ - ▸ recommend_model emits events... ✓ - ▸ Cache init creates file if missing... ✓ -━━━ Results ━━━ - Passed: 12 - Failed: 0 - Total: 12 + ────────────────────────────────────────── -All 12 tests passed! + All 0 tests passed -╔═══════════════════════════════════════════════════╗ -║ shipwright pipeline composer — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + Shipwright Auth Tests + ══════════════════════════════════════════ -Setting up test environment... + Help + ✓ help shows usage + ✓ help shows commands + ✓ --help flag works + ✓ unknown command shows error + Status & Users + ✓ status with no login shows warning + ✓ users with empty shows warning + ✓ token with no user errors + ✓ switch without user exits non-zero + Auth Storage + ✓ auth file has users array + ✓ auth file has active_user + ✓ users lists stored user + ✓ token shows stored token + ✓ user info shows login + ✓ switch to nonexistent errors + ✓ logout succeeds -Pipeline Composition - ▸ Composed pipeline has valid stage ordering... ✓ - ▸ High-risk issue gets security stages... ▸ Composing pipeline with intelligence engine... -✓ Composed pipeline: 7 stages -.claude/pipeline-artifacts/composed-pipeline.json -✓ - ▸ Fallback to static template when no intelligence... ✓ + ────────────────────────────────────────── -Conditional Stage Insertion - ▸ Stage inserted at correct position after build... ✓ - ▸ Insert into nonexistent stage fails... ✓ + All 15 tests passed -Model Downgrade - ▸ Budget constraint triggers model downgrades... ✓ - ▸ Downgrade with nonexistent stage fails... ✓ -Pipeline Validation - ▸ Validation accepts valid pipeline... ✓ - ▸ Validation rejects invalid ordering (test before build)... ✓ - ▸ Validation rejects missing stage ids... ✓ - ▸ Validation rejects missing stages array... ✓ -Iteration Estimation - ▸ Iteration estimates are reasonable (1-50 range)... ✓ + Shipwright Autonomous Tests + ══════════════════════════════════════════ -════════════════════════════════════════════════════ - All 12 tests passed ✓ -════════════════════════════════════════════════════ + ══════════════════════════════════════════ + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ unknown command exits 1 + ✓ start shows running message + ✓ state.json created after start + ✓ state status is running + ✓ config.json created + ✓ config is valid JSON + ✓ status shows dashboard + ✓ pause sets status to paused + ✓ resume sets status to running + ✓ stop sets status to stopped + ✓ config show displays settings + ✓ config set interval works + ✓ history handles no data -━━━ shipwright self-optimize tests ━━━ + Config Set/Show Cycle + ✓ config show reflects set value + ✓ config value persists in file - ▸ Outcome analysis extracts correct metrics... ✓ - ▸ Outcome analysis emits event... ✓ - ▸ Outcome analysis rejects missing file... ✓ - ▸ Template weight increases for high success... ✓ - ▸ Template weight decreases for low success... ✓ - ▸ A/B test selects ~20% sample... ✓ - ▸ Iteration model updates with data points... ✓ - ▸ Model routing tracks success rates... ✓ - ▸ Model routing keeps opus with few sonnet samples... ✓ - ▸ Memory pruning removes old patterns... ✓ - ▸ Memory strengthening boosts confirmed patterns... ✓ - ▸ Memory promotion copies cross-repo patterns... ✓ - ▸ Full analysis runs on empty data... ✓ - ▸ Report generates output with data... ✓ - ▸ Report handles empty outcomes... ✓ - ▸ Outcome analysis extracts stage data... ✓ + Config Structure + ✓ config contains key: cycle_interval_minutes + ✓ config contains key: max_issues_per_cycle + ✓ config contains key: daemon_aware -━━━ Results ━━━ - Passed: 16 - Failed: 0 - Total: 16 + History With Fixture Events + ✓ history shows recent cycles + ✓ history shows cycle entries -All 16 tests passed! + Status Fields + ✓ status when running includes Status + ✓ status when running includes Cycles + ✓ status when running includes Issues Created + ✓ status when running includes Pipelines + ✓ status when running includes Cycle Interval + ✓ status when running shows running + ✓ status when stopped shows stopped -╔═══════════════════════════════════════════════════════════╗ -║ shipwright predictive test ║ -╚═══════════════════════════════════════════════════════════╝ + ────────────────────────────────────────── -Risk Assessment - ▸ Risk returns valid schema with 0-100 range... ✓ - ▸ Risk elevates for complex keywords... ✓ - ▸ Risk emits event... ✓ + All 0 tests passed -Anomaly Detection - ▸ Critical at 3x threshold (181 vs 60 baseline)... ✓ - ▸ Normal at 2x (119 vs 60 baseline)... ✓ - ▸ Warning between 2x and 3x (150 vs 60)... ✓ - ▸ Normal when no baseline exists... ✓ - ▸ Emits event for critical anomaly... ✓ -Baseline Management - ▸ First data point uses raw value... ✓ - ▸ EMA calculation (0.9*100 + 0.1*200 = 110)... ✓ - ▸ Creates baseline file if missing... ✓ -Preventative Injection - ▸ Injects context from matching patterns... ✓ - ▸ Returns empty for non-matching stage... ✓ + Shipwright Changelog Tests + ══════════════════════════════════════════ -Graceful Degradation - ▸ Works without intelligence engine... ✓ + ══════════════════════════════════════════ -AI Patrol - ▸ AI patrol returns structured findings... ✓ + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help mentions generate + ✓ help mentions preview + ✓ help mentions version + ✓ help mentions migrate + ✓ VERSION variable defined -════════════════════════════════════════════════════ - All 15 tests passed ✓ -════════════════════════════════════════════════════ + error handling + ✓ Unknown command exits non-zero + formats command + ✓ formats exits 0 -╔═══════════════════════════════════════════════════╗ -║ shipwright frontier — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + generate command + ✓ generate exits 0 -Setting up test environment... + version command + ✓ version recommendation exits 0 -Adversarial Review - ▸ Adversarial review produces structured findings... ✓ - ▸ Adversarial iteration converges on no critical findings... ✓ + script safety + ✓ Uses set -euo pipefail + ✓ Has source guard pattern -Developer Simulation - ▸ Simulation generates objections from 3 personas... ✓ - ▸ Simulation address returns action items... ✓ -Architecture Enforcer - ▸ Architecture model has valid schema... ✓ - ▸ Architecture validates changes (violation detected)... ✓ -Graceful Degradation - ▸ All three degrade gracefully when claude unavailable... ✓ + ────────────────────────────────────────── -════════════════════════════════════════════════════ - All 7 tests passed ✓ -════════════════════════════════════════════════════ + All 0 tests passed + + + + shipwright checkpoint test + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ VERSION variable defined + + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'save' subcommand + ✓ help lists 'restore' subcommand + ✓ help lists 'list' subcommand + ✓ help lists 'clear' subcommand + ✓ help lists 'expire' subcommand + ✓ --help alias works + ✓ -h alias works + + Error Handling + +shipwright checkpoint v3.2.4 — Save and restore agent state mid-stage + +USAGE + shipwright checkpoint [options] + +COMMANDS + save Save a checkpoint for a stage + restore Restore a checkpoint (prints JSON to stdout) + save-context Save Claude context (goal, findings, test output) for resume + restore-context Restore Claude context (exports RESTORED_* and SW_LOOP_* vars) + list Show all available checkpoints + clear Remove checkpoint(s) + expire Remove checkpoints older than N hours + +SAVE OPTIONS + --stage Stage name (required) + --iteration Current iteration number + --git-sha Git commit SHA (default: HEAD) + --files-modified "f1,f2" Comma-separated list of modified files + --tests-passing Mark tests as passing + --loop-state Loop state (running, paused, etc.) + +RESTORE OPTIONS + --stage Stage to restore (required) + +CLEAR OPTIONS + --stage Stage to clear + --all Clear all checkpoints + +EXPIRE OPTIONS + --hours Max age in hours (default: 24) + +EXAMPLES + shipwright checkpoint save --stage build --iteration 5 + shipwright checkpoint save --stage build --iteration 3 --tests-passing --files-modified "src/auth.ts,src/middleware.ts" + shipwright checkpoint restore --stage build + shipwright checkpoint list + shipwright checkpoint clear --stage build + shipwright checkpoint clear --all + shipwright checkpoint expire --hours 48 + ✓ Unknown command exits non-zero + + Save Subcommand + +shipwright checkpoint v3.2.4 — Save and restore agent state mid-stage + +USAGE + shipwright checkpoint [options] + +COMMANDS + save Save a checkpoint for a stage + restore Restore a checkpoint (prints JSON to stdout) + save-context Save Claude context (goal, findings, test output) for resume + restore-context Restore Claude context (exports RESTORED_* and SW_LOOP_* vars) + list Show all available checkpoints + clear Remove checkpoint(s) + expire Remove checkpoints older than N hours + +SAVE OPTIONS + --stage Stage name (required) + --iteration Current iteration number + --git-sha Git commit SHA (default: HEAD) + --files-modified "f1,f2" Comma-separated list of modified files + --tests-passing Mark tests as passing + --loop-state Loop state (running, paused, etc.) + +RESTORE OPTIONS + --stage Stage to restore (required) + +CLEAR OPTIONS + --stage Stage to clear + --all Clear all checkpoints + +EXPIRE OPTIONS + --hours Max age in hours (default: 24) + +EXAMPLES + shipwright checkpoint save --stage build --iteration 5 + shipwright checkpoint save --stage build --iteration 3 --tests-passing --files-modified "src/auth.ts,src/middleware.ts" + shipwright checkpoint restore --stage build + shipwright checkpoint list + shipwright checkpoint clear --stage build + shipwright checkpoint clear --all + shipwright checkpoint expire --hours 48 + ✓ save without --stage exits non-zero +✓ Checkpoint saved for stage build (iteration 5) + ✓ save creates checkpoint file + ✓ Checkpoint is valid JSON + ✓ Checkpoint stage field correct + ✓ Checkpoint iteration field correct + ✓ Checkpoint git_sha populated +✓ Checkpoint saved for stage test (iteration 0) + ✓ save --tests-passing sets true +✓ Checkpoint saved for stage review (iteration 0) + ✓ save --files-modified stores 2 files +✓ Checkpoint saved for stage deploy (iteration 0) + ✓ save --loop-state stores state + ✓ Checkpoint created_at timestamp present + + Restore Subcommand + ✓ restore returns checkpoint JSON + ✓ Restored checkpoint has correct stage + ✓ restore missing stage exits non-zero + ✓ restore without --stage exits non-zero + + List Subcommand + ✓ list shows Checkpoints header + ✓ list shows build checkpoint + ✓ list shows checkpoint count + ✓ list with no checkpoints shows empty + + Clear Subcommand +✓ Checkpoint saved for stage build (iteration 1) +✓ Checkpoint saved for stage test (iteration 2) +✓ Cleared checkpoint for stage build + ✓ clear --stage removes specific checkpoint + ✓ clear --stage preserves other checkpoints + ✓ clear without args exits non-zero +✓ Checkpoint saved for stage build (iteration 3) +✓ Cleared 2 checkpoint(s) + ✓ clear --all removes all checkpoints + + Expire Subcommand + ✓ expire with no checkpoints exits 0 + + Save-context / Restore-context + ✓ save-context creates claude-context.json + ✓ Context goal saved correctly + ✓ Context iteration saved correctly + ✓ restore-context exports RESTORED_GOAL + ✓ restore-context exports SW_LOOP_GOAL + + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright CI Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ version shows version + ✓ unknown command exits 1 + ✓ generate starts processing + ✓ validate runs on valid workflow + ✓ VERSION is defined + ✓ analyze shows analysis + ✓ analyze shows cache info + ✓ matrix generates config + ✓ matrix workflow file exists + ✓ validate passes on valid workflow + ✓ runners list shows options + ✓ runners recommend shows guidance + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright Cleanup Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help + ✓ help exits 0 + ✓ help shows usage + ✓ help shows --force + ✓ help shows dry-run + + Dry-Run (Empty) + ✓ dry-run shows Tmux Windows section + ✓ dry-run shows Team Configs section + ✓ dry-run shows Task Lists section + ✓ dry-run shows Pipeline Artifacts section + ✓ dry-run shows Pipeline State section + ✓ dry-run shows Heartbeats section + ✓ dry-run reports clean + + Force Mode (Empty) + ✓ force shows FORCE MODE + ✓ force reports nothing to clean + + Detect Team Configs + ✓ dry-run detects team dir + ✓ dry-run shows would remove + + Force Removes Teams + ✓ force removes team dir + ✓ team directory actually removed + + Detect Task Lists + ✓ dry-run detects task dir + ✓ task directory actually removed + + Detect Stale Heartbeats + ✓ dry-run detects stale heartbeat + + Error Handling + ✓ unknown option exits non-zero + ✓ unknown option shows error + + Summary Counting + ✓ summary shows found count + ✓ summary shows --force hint + + + + ────────────────────────────────────────── + + All 24 tests passed + + + + Shipwright Code Review Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ review runs and starts reviewing + ✓ trends with no data + ✓ config show outputs valid config + ✓ unknown subcommand exits nonzero + ✓ no false long function detection on small file + ✓ style consistency check runs without crash + ✓ architecture boundary check runs without crash + + + + ────────────────────────────────────────── + + All 0 tests passed ╔═══════════════════════════════════════════════════╗ @@ -560,24 +698,24 @@ All 16 tests passed! Setting up test environment... -Identity Resolution +Identity Resolution ▸ resolve_developer_id from DEVELOPER_ID env... ✓ ▸ resolve_developer_id from git config... ✓ ▸ resolve_developer_id fallback to USER... ✓ ▸ resolve_machine_name from MACHINE_NAME env... ✓ ▸ resolve_machine_name from hostname... ✓ -Dashboard URL Resolution +Dashboard URL Resolution ▸ resolve_dashboard_url from --url flag... ✓ ▸ resolve_dashboard_url from DASHBOARD_URL env... ✓ ▸ resolve_dashboard_url from team-config.json... ✓ ▸ resolve_dashboard_url falls back to default... ✓ -Start/Stop Lifecycle +Start/Stop Lifecycle ▸ cmd_start creates PID file... ▸ Starting connect to http://localhost:8767 ▸ Developer: test-developer @ test-machine -✓ Connect started (PID 1279947) - Logs: /tmp/sw-connect-test.OESvLP/home/.shipwright/connect.log +✓ Connect started (PID 60906) + Logs: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-connect-test.hVaiXc/home/.shipwright/connect.log Stop: shipwright connect stop ✓ ▸ cmd_start rejects if already running... Stop it first: shipwright connect stop @@ -587,29 +725,182 @@ All 16 tests passed! ▸ cmd_stop handles missing PID gracefully... ⚠ Connect is not running (no PID file) ✓ -Status +Status ▸ cmd_status shows connected when PID alive... ✓ ▸ cmd_status shows disconnected when no PID... ✓ -Join Flow +Join Flow ▸ cmd_join verifies token against dashboard... ✓ ▸ cmd_join saves team-config.json... ✓ ▸ cmd_join rejects invalid token... ✓ ▸ cmd_join accepts --url and --token flags... ✓ -Heartbeat & Disconnect Payloads +Heartbeat & Disconnect Payloads ▸ Heartbeat payload includes required fields... ✓ ▸ Send disconnect sends proper payload... ✓ -Configuration & Utilities +Configuration & Utilities ▸ ensure_dir creates shipwright directory... ✓ ▸ now_iso returns valid ISO timestamp... ✓ ▸ Script has correct version... ✓ -Integration +Integration ▸ Help command shows all main commands... ✓ ════════════════════════════════════════════════════  All 25 tests passed ✓ ════════════════════════════════════════════════════ + + Context Budget Monitor Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Budget Initialization + ✓ init creates config (exit 0) + ✓ config has correct total_budget + ✓ system_reserve is 10% (80000) + ✓ tools_reserve is 10% (80000) + ✓ working_memory is 60% (480000) + ✓ output_reserve is 20% (160000) + + Token Estimation + ✓ estimation produces JSON + ✓ estimate contains utilization_percent + ✓ estimate contains total_used (>0) + + Status Checking (Thresholds) + ✓ 50% utilization returns green status + ✓ 70% utilization returns yellow status + ✓ 85% utilization returns red status + ✓ 95% utilization returns critical status + + Context Trimming + ✓ green status doesn't trim content + ✓ yellow status reduces content length + ✓ hard truncate respects size limit + + Iteration Summarization + ✓ summarize_iteration succeeds (exit 0) + ✓ iteration summary file created with 1 entry + + Budget Report Generation + ✓ report contains budget_config + ✓ report contains iteration_summaries + + State Logging + ✓ log_state succeeds (exit 0) + ✓ log file created with entries + + + ────────────────────────────────────────── + + All 22 tests passed + + + + Shipwright Context Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows commands + ✓ --help exits 0 + + error handling + ✓ unknown command exits 1 + ✓ unknown command shows error + + show subcommand + ✓ show exits 0 + ✓ show outputs context header + + clear subcommand + ✓ clear exits 0 + ✓ clear confirms cleared + + gather subcommand + ✓ gather without args exits 1 + ✓ gather shows must provide + ✓ gather with unknown option exits 1 + + gather with goal + ✓ gather exits 0 + ✓ gather shows building + ✓ gather shows success + ✓ context-bundle.md created + + show after gather + ✓ show after gather exits 0 + ✓ show contains pipeline context + + clear after gather + ✓ clear exits 0 + + gather with issue + ✓ gather with --issue exits 0 + ✓ gather shows building + + gather default stage + ✓ gather default stage exits 0 + + internal stage_guidance + ✓ stage_guidance returns plan guidance + ✓ stage_guidance returns build guidance + ✓ stage_guidance handles unknown stage + + + + ────────────────────────────────────────── + + All 26 tests passed + + + + Shipwright Cost Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help shows COMMANDS + ✓ help mentions show + ✓ help mentions budget + ✓ help mentions calculate + ✓ VERSION variable defined + + state management + ✓ costs.json created on first use + ✓ budget.json created on first use + ✓ costs.json has entries array + ✓ budget.json has daily_budget_usd + + budget commands + ✓ budget set exits 0 + ✓ budget set to 50 + ✓ budget show exits 0 + + error handling + ✓ Unknown command exits non-zero + + calculate + ✓ calculate exits 0 + + script safety + ✓ Uses set -euo pipefail + ✓ ERR trap is set + + context efficiency in cost dashboard + ✓ Cost dashboard has CONTEXT EFFICIENCY section + ✓ Cost dashboard reads loop.context_efficiency events + ✓ Context efficiency reports utilization and waste + ✗ Dashboard renders CONTEXT EFFICIENCY with event data + output: ⚠ No cost entries in the last 30 day(s). + ✗ Dashboard shows avg budget utilization +ERROR: scripts/sw-cost-test.sh:226 exited with status 1 diff --git a/.claude/loop-logs/tests-iter-2.log b/.claude/loop-logs/tests-iter-2.log index 176f067c3..0468dda2e 100644 --- a/.claude/loop-logs/tests-iter-2.log +++ b/.claude/loop-logs/tests-iter-2.log @@ -1,557 +1,695 @@ -> shipwright-cli@1.7.1 test -> bash scripts/sw-pipeline-test.sh && bash scripts/sw-daemon-test.sh && bash scripts/sw-prep-test.sh && bash scripts/sw-fleet-test.sh && bash scripts/sw-fix-test.sh && bash scripts/sw-memory-test.sh && bash scripts/sw-session-test.sh && bash scripts/sw-init-test.sh && bash scripts/sw-tracker-test.sh && bash scripts/sw-heartbeat-test.sh && bash scripts/sw-remote-test.sh && bash scripts/sw-intelligence-test.sh && bash scripts/sw-pipeline-composer-test.sh && bash scripts/sw-self-optimize-test.sh && bash scripts/sw-predictive-test.sh && bash scripts/sw-frontier-test.sh && bash scripts/sw-connect-test.sh +> shipwright-cli@3.2.4 test +> bash scripts/sw-agi-roadmap-test.sh && bash scripts/sw-activity-test.sh && bash scripts/sw-adaptive-test.sh && bash scripts/sw-adversarial-test.sh && bash scripts/sw-architecture-enforcer-test.sh && bash scripts/sw-auth-test.sh && bash scripts/sw-autonomous-test.sh && bash scripts/sw-changelog-test.sh && bash scripts/sw-checkpoint-test.sh && bash scripts/sw-ci-test.sh && bash scripts/sw-cleanup-test.sh && bash scripts/sw-code-review-test.sh && bash scripts/sw-connect-test.sh && bash scripts/sw-context-budget-test.sh && bash scripts/sw-context-test.sh && bash scripts/sw-cost-test.sh && bash scripts/sw-daemon-test.sh && bash scripts/sw-dashboard-test.sh && bash scripts/sw-db-test.sh && bash scripts/sw-decompose-test.sh && bash scripts/sw-decide-test.sh && bash scripts/sw-deps-test.sh && bash scripts/sw-developer-simulation-test.sh && bash scripts/sw-discovery-test.sh && bash scripts/sw-doc-fleet-test.sh && bash scripts/sw-docs-agent-test.sh && bash scripts/sw-docs-test.sh && bash scripts/sw-doctor-test.sh && bash scripts/sw-dora-test.sh && bash scripts/sw-durable-test.sh && bash scripts/sw-e2e-orchestrator-test.sh && bash scripts/sw-eventbus-test.sh && bash scripts/sw-feedback-test.sh && bash scripts/sw-outcome-feedback-test.sh && bash scripts/sw-fix-test.sh && bash scripts/sw-fleet-discover-test.sh && bash scripts/sw-fleet-test.sh && bash scripts/sw-fleet-viz-test.sh && bash scripts/sw-frontier-test.sh && bash scripts/sw-github-app-test.sh && bash scripts/sw-github-checks-test.sh && bash scripts/sw-github-deploy-test.sh && bash scripts/sw-github-graphql-test.sh && bash scripts/sw-guild-test.sh && bash scripts/sw-heartbeat-test.sh && bash scripts/sw-hello-test.sh && bash scripts/sw-hygiene-test.sh && bash scripts/sw-incident-test.sh && bash scripts/sw-init-test.sh && bash scripts/sw-instrument-test.sh && bash scripts/sw-intelligence-test.sh && bash scripts/sw-jira-test.sh && bash scripts/sw-launchd-test.sh && bash scripts/sw-linear-test.sh && bash scripts/sw-logs-test.sh && bash scripts/sw-loop-test.sh && bash scripts/sw-memory-test.sh && bash scripts/sw-mission-control-test.sh && bash scripts/sw-model-router-test.sh && bash scripts/sw-otel-test.sh && bash scripts/sw-oversight-test.sh && bash scripts/sw-patrol-meta-test.sh && bash scripts/sw-pipeline-composer-test.sh && bash scripts/sw-pipeline-test.sh && bash scripts/sw-pipeline-vitals-test.sh && bash scripts/sw-pm-test.sh && bash scripts/sw-pr-lifecycle-test.sh && bash scripts/sw-predictive-test.sh && bash scripts/sw-prep-test.sh && bash scripts/sw-ps-test.sh && bash scripts/sw-public-dashboard-test.sh && bash scripts/sw-quality-profile-test.sh && bash scripts/sw-quality-test.sh && bash scripts/sw-reaper-test.sh && bash scripts/sw-recruit-test.sh && bash scripts/sw-regression-test.sh && bash scripts/sw-release-manager-test.sh && bash scripts/sw-release-test.sh && bash scripts/sw-root-cause-test.sh && bash scripts/sw-remote-test.sh && bash scripts/sw-replay-test.sh && bash scripts/sw-retro-test.sh && bash scripts/sw-scale-test.sh && bash scripts/sw-stall-detector-test.sh && bash scripts/sw-security-audit-test.sh && bash scripts/sw-self-optimize-test.sh && bash scripts/sw-session-test.sh && bash scripts/sw-setup-test.sh && bash scripts/sw-standup-test.sh && bash scripts/sw-status-test.sh && bash scripts/sw-strategic-test.sh && bash scripts/sw-stream-test.sh && bash scripts/sw-swarm-test.sh && bash scripts/sw-team-stages-test.sh && bash scripts/sw-templates-test.sh && bash scripts/sw-testgen-test.sh && bash scripts/sw-tmux-pipeline-test.sh && bash scripts/sw-tmux-test.sh && bash scripts/sw-trace-test.sh && bash scripts/sw-tracker-test.sh && bash scripts/sw-triage-test.sh && bash scripts/sw-upgrade-test.sh && bash scripts/sw-ux-test.sh && bash scripts/sw-webhook-test.sh && bash scripts/sw-widgets-test.sh && bash scripts/sw-worktree-test.sh && bash scripts/sw-lib-compat-test.sh && bash scripts/sw-lib-helpers-test.sh && bash scripts/sw-lib-error-actionability-test.sh && bash scripts/sw-lib-daemon-dispatch-test.sh && bash scripts/sw-lib-daemon-failure-test.sh && bash scripts/sw-lib-daemon-poll-test.sh && bash scripts/sw-lib-daemon-state-test.sh && bash scripts/sw-lib-daemon-triage-test.sh && bash scripts/sw-lib-daemon-patrol-test.sh && bash scripts/sw-lib-pipeline-detection-test.sh && bash scripts/sw-lib-pipeline-intelligence-test.sh && bash scripts/sw-lib-pipeline-quality-checks-test.sh && bash scripts/sw-lib-pipeline-stages-test.sh && bash scripts/sw-lib-pipeline-state-test.sh && bash scripts/sw-adapters-test.sh && bash scripts/sw-evidence-test.sh && bash scripts/sw-review-rerun-test.sh && bash scripts/sw-tracker-providers-test.sh && bash scripts/sw-budget-chaos-test.sh && bash scripts/sw-chaos-test.sh && bash scripts/sw-autonomous-e2e-test.sh && bash scripts/sw-memory-discovery-e2e-test.sh && bash scripts/sw-policy-e2e-test.sh && bash scripts/sw-e2e-smoke-test.sh && bash scripts/sw-dashboard-e2e-test.sh && bash scripts/sw-reward-aggregator-test.sh && bash scripts/sw-bandit-selector-test.sh && bash scripts/sw-policy-learner-test.sh && bash scripts/sw-autoresearch-e2e-test.sh + + +╔════════════════════════════════════════════════════════════════╗ +║ AGI-Roadmap Validation — Real Tests for Every Feature ║ +╚════════════════════════════════════════════════════════════════╝ + +Phase 1: Feedback Loops (Discovery, Memory, PM, Failure Learning) + ▸ Daemon: failure_history initialized in state JSON... ✓ + ▸ Daemon: consecutive count uses reduce (run-from-newest, not total)... ✓ + ▸ Daemon: get_max_retries_for_class returns per-class limits... ✓ + ▸ Daemon: exponential backoff formula 5*2^(n-3) is correct... ✓ + ▸ Daemon: resume_after parsed in UTC (not local TZ)... ✓ + ▸ PM: recommend --json flag implemented... ✓ + ▸ PM: learn subcommand functional... ✓ + ▸ Daemon: PM recommend/learn wired into triage + success/failure... ✓ + ▸ Daemon: confidence < 60% upgrades to full template... ✓ + +Phase 2: Agent Coordination (Feedback, Predictive, Oversight, Autonomous) + ▸ Feedback: ARTIFACTS_DIR respects caller override... ✓ + ▸ Feedback: rollback uses PIPESTATUS for correct exit code... ✓ + ▸ Predictive: anomaly detection returns severity for 5x baseline... ✓ + ▸ Predictive: inject-prevention command exists and runs... ✓ + ▸ Pipeline: predictive anomaly/baseline/inject-prevention wired... ✓ + ▸ Pipeline: memory metric wired into stage completion... ✓ + ▸ Oversight: gate approves clean review... ✓ + ▸ Oversight: gate rejects with --reject-if... ✓ + ▸ Oversight: gate JSON safe from newline/quote injection... ✓ + ▸ Pipeline: oversight gate wired + respects SKIP_GATES... ✓ + ▸ Pipeline: feedback collect/create-issue/rollback wired into monitor... ✓ + ▸ Pipeline: intelligence prediction validation wired... ✓ + ▸ Pipeline: predictive anomaly confirmation wired... ✓ + ▸ Pipeline: memory fix-outcome negative path wired... ✓ + ▸ Triage: offline fallback with recruit... ✓ + ▸ Recruit: policy integration wired... ✓ + ▸ Recruit: meta feedback loop... ✓ + ▸ Recruit: audit (negative-compounding feedback)... ✓ + ▸ Autonomous: safe recruit_args quoting... ✓ + ▸ Autonomous: Claude output redirected to findings file... ✓ + ▸ Autonomous: dual branch check (pipeline + daemon)... ✓ + ▸ Autonomous: run_scheduler with loop/sleep... ✓ + ▸ Autonomous: trigger_pipeline_for_finding wired... ✓ + ▸ Incident: create_hotfix_issue echoes issue number... ✓ + ▸ Incident: trigger_pipeline wires --template hotfix... ✓ + ▸ Incident: trigger_rollback wires sw-feedback.sh... ✓ + +Phase 3: Quality Assurance (Code Review, Testgen, Swarm, Multi-Agent) + ▸ Code review: run_claude_semantic_review exists... ✓ + ▸ Code review: semantic findings integrated... ✓ + ▸ Testgen: Claude prompt asks for real assertions... ✓ + ▸ Testgen: prompt piped to avoid shell expansion... ✓ + ▸ Swarm: spawn creates tmux session... ✓ + ▸ Swarm: retire kills tmux session... ✓ + ▸ Swarm: spawn/retire functional (real tmux)... ✓ + ▸ Loop: multi-agent restarts not blocked... ✓ + +Phase 4: Meta-Cognition (Effectiveness, Self-Awareness, Capability) + ▸ Pipeline: record_stage_effectiveness creates valid JSONL... ✓ + ▸ Pipeline: self-awareness hint triggers on >50% failure rate... ✓ + ▸ Pipeline: effectiveness recorded on both complete and failed... ✓ + ▸ Pipeline: discovery inject wired... ✓ + ▸ Pipeline: self-awareness hint injected into plan prompt... ✓ + +Integration & Safety + ▸ CI: integration-claude jobs in workflow... ✓ + ▸ Integration-claude: skip path functional... ✓ + ▸ All modified scripts have strict mode... ✓ + ▸ All modified scripts have ERR trap... ✓ + ▸ No hardcoded secrets in scripts... ✓ + + ────────────────────────────────────────── + + All 53 tests passed! + + + Shipwright Activity Tests + ══════════════════════════════════════════ + + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ Source guard pattern (if/then/fi) + ✓ VERSION variable defined + + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'watch' subcommand + ✓ help lists 'snapshot' subcommand + ✓ help lists 'history' subcommand + ✓ help lists 'stats' subcommand + ✓ help lists 'agents' subcommand + ✓ --help alias works + + Error Handling +shipwright activity — Live agent activity stream + +USAGE + shipwright activity [subcommand] [options] + +SUBCOMMANDS + watch Live stream of agent activity (default) + snapshot Current state of all active agents + history [range] Replay past activity (e.g., '1h', '10m', 'all') + stats Running counters (events, commits, tests, agents) + agents List known agents and last activity + help Show this help message + +OPTIONS + --type Filter events by type (e.g., 'stage.completed') + --agent Filter by agent name + --team Filter by team + --stage Filter by pipeline stage (e.g., 'build') + +EXAMPLES + \033[2mshipwright activity\033[0m # Live stream + \033[2mshipwright activity watch --type stage.*\033[0m # Only stage events + \033[2mshipwright activity history 1h\033[0m # Last hour + \033[2mshipwright activity snapshot\033[0m # Current state + \033[2mshipwright activity stats\033[0m # Counters + ✓ Unknown command exits non-zero + + Subcommands Without Events File + ✓ snapshot exits non-zero with no events + ✓ stats exits non-zero with no events + ✓ agents exits non-zero with no events + + Event Emission + ✓ emit_event creates events.jsonl + ✓ emit_event writes valid JSON + ✓ Event contains type field + ✓ Event contains agent field + ✓ Event contains numeric count + + Format Helpers + ✓ get_icon_for_type commit returns icon + ✓ get_icon_for_type test.passed returns icon + ✓ get_icon_for_type unknown returns bullet + ✓ format_timestamp strips T and Z + + Stats With Events + ✓ stats shows Total Events + ✓ stats shows Commits count + ✓ stats shows Pipelines count + History Subcommand + ✓ history all shows activity header -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright pipeline test — E2E Validation (Real Subprocess) ║ -╚═══════════════════════════════════════════════════════════════════╝ -Setting up mock environment... -✓ Environment ready: /tmp/sw-pipeline-test.76QxGO + ────────────────────────────────────────── - ▸ Preflight passes with all mocks... ✓ - ▸ Preflight fails when sw-loop.sh missing... ✓ - ▸ Start requires --goal or --issue... ✓ - ▸ Intake with --goal creates branch + artifacts... ✓ - ▸ Intake with --issue fetches from GitHub... ✓ - ▸ Plan generates plan.md, dod.md, tasks... ✓ - ▸ Build invokes sw loop and commits... ✓ - ▸ Test stage captures results to log... ✓ - ▸ Review generates report with severities... ✓ - ▸ PR stage creates PR URL artifact... ✓ - ▸ Full E2E pipeline (6 stages)... ✓ - ▸ Resume continues from partial state... ✓ - ▸ Abort marks pipeline as aborted... ✓ - ▸ Dry run shows config, no artifacts... ✓ - ▸ Self-healing build→test retry loop... ✓ + All 28 tests passed -━━━ Results ━━━ - Passed: 15 - Failed: 0 - Total: 15 -All 15 tests passed! + Shipwright Adaptive Tests + ══════════════════════════════════════════ + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help shows SUBCOMMANDS + ✓ help mentions get + ✓ help mentions train + ✓ help mentions profile + ✓ version exits 0 + ✓ version output contains version string -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright daemon test — Unit Tests (Synthetic Events) ║ -╚═══════════════════════════════════════════════════════════════════╝ + error handling + ✓ Unknown command exits non-zero -Setting up test environment... -✓ Environment ready: /tmp/sw-daemon-test.G3riH7 + get command + ✓ get timeout with default exits 0 - ▸ dora_grade deploy_freq Elite (>= 7)... ✓ - ▸ dora_grade deploy_freq High (>= 1)... ✓ - ▸ dora_grade deploy_freq Medium (>= 0.25)... ✓ - ▸ dora_grade deploy_freq Low (< 0.25)... ✓ - ▸ dora_grade CFR thresholds (Elite/High/Medium/Low)... ✓ - ▸ Stage timings filter-first jq query... ✓ - ▸ MTTR pairs failures with next success... ✓ - ▸ epoch_to_iso helper function... ✓ - ▸ Health check detects stale jobs... ✓ - ▸ Priority label sorting... ✓ - ▸ Degradation alert triggers on high CFR... ✓ - ▸ Metrics --json output with cycle_time keys... ✓ - ▸ Self-labeling includes watch_label when enabled... ✓ - ▸ Self-labeling excludes watch_label when disabled... ✓ - ▸ Patrol recurring failures label construction... ✓ - ▸ DORA degradation event detection... ✓ - ▸ Retry exhaustion event detection... ✓ - ▸ Untested script detection logic... ✓ + profile command + ✓ profile exits 0 -━━━ Results ━━━ - Passed: 18 - Failed: 0 - Total: 18 + reset command + ✓ reset exits 0 -All 18 tests passed! + script safety + ✓ Uses set -euo pipefail + ✓ Has source guard pattern + statistical functions + ✓ percentile, mean, median functions defined in source + ✓ mean returns numeric value (avg of 1-5 is 3) -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright prep test — Validation Suite (Real Subprocess) ║ -╚═══════════════════════════════════════════════════════════════════╝ + get_timeout / get_iterations / get_model + ✓ get_timeout returns number (default with no events) + ✓ get_iterations returns number + ✓ get_model returns valid model name -Setting up test environment... -✓ Environment ready: /tmp/sw-prep-test.3ZRNVS - - ▸ Node.js project detection... ✓ - ▸ Python project detection... ✓ - ▸ Go project detection... ✓ - ▸ Rust project detection... ✓ - ▸ settings.json is valid JSON... ✓ - ▸ Hook scripts are executable... ✓ - ▸ Hook scripts have valid syntax... ✓ - ▸ CLAUDE.md has required sections... ✓ - ▸ Check mode outputs scoring... ✓ - ▸ Idempotency without --force... ✓ - ▸ --force overwrites modified files... ✓ - ▸ No eval in generated hooks... ✓ - ▸ Definition of Done generated... ✓ + train subcommand + ✓ train subcommand runs with mock events -━━━ Results ━━━ - Passed: 13 - Failed: 0 - Total: 13 + ────────────────────────────────────────── -All 13 tests passed! + All 20 tests passed -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright fleet test — Unit Tests ║ -╚═══════════════════════════════════════════════════════════════════╝ -Setting up test environment... -✓ Environment ready: /tmp/sw-fleet-test.f4j7ea + shipwright adversarial test + ══════════════════════════════════════════ - ▸ Help output contains expected sections... ✓ - ▸ Help via --help flag... ✓ - ▸ Config parsing — valid config... ✓ - ▸ Config parsing — missing config file... ✓ - ▸ Config parsing — invalid JSON... ✓ - ▸ Config parsing — empty repos array... ✓ - ▸ Config defaults applied... ✓ - ▸ Fleet init generates config template... ✓ - ▸ Fleet init skips when config exists... ✓ - ▸ Fleet start spawns tmux sessions per repo... ✓ - ▸ Fleet start skips missing repos... ✓ - ▸ Fleet start skips existing sessions... ✓ - ▸ Fleet start creates fleet state file... ✓ - ▸ Fleet start emits fleet.started event... ✓ - ▸ Fleet start applies repo-level overrides... ✓ - ▸ Fleet stop kills sessions and cleans state... ✓ - ▸ Fleet stop — no fleet running... ✓ - ▸ Fleet stop emits fleet.stopped event... ✓ - ▸ Fleet status — no fleet running... ✓ - ▸ Fleet status shows dashboard... ✓ - ▸ Fleet metrics — no events file... ✓ - ▸ Fleet metrics dashboard output... ✓ - ▸ Fleet metrics JSON output... ✓ - ▸ Fleet metrics period flag... ✓ - ▸ Session name generation... ✓ - ▸ Fleet start skips non-git repos... ✓ - ▸ Unknown subcommand... ✓ + ══════════════════════════════════════════ -━━━ Results ━━━ - Passed: 27 - Failed: 0 - Total: 27 + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ Source guard pattern (if/then/fi) + ✓ VERSION variable defined -All 27 tests passed! + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'review' subcommand + ✓ help lists 'iterate' subcommand + ✓ help mentions adversarial_enabled flag + ✓ --help alias works + ✓ -h alias works + Error Handling + ✓ Unknown command exits non-zero -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright fix test — Unit Tests ║ -╚═══════════════════════════════════════════════════════════════════╝ + Review Subcommand + ✓ review disabled returns empty JSON array + ✓ review without diff arg exits non-zero -Setting up test environment... -✓ Environment ready: /tmp/sw-fix-test.XuOv7c + Iterate Subcommand + ✓ iterate without args exits non-zero + ✓ iterate with empty findings converges + ✓ iterate past max rounds returns findings - ▸ Help output contains expected sections... ✓ - ▸ Help via --help flag... ✓ - ▸ Missing goal shows error... ✓ - ▸ Missing repos shows error... ✓ - ▸ Arg parsing — --repos comma-separated... ✓ - ▸ Arg parsing — --repos-from file... ✓ - ▸ Arg parsing — --repos-from missing file... ✓ - ▸ Arg parsing — --pipeline template... ✓ - ▸ Arg parsing — --max-parallel... ✓ - ▸ Arg parsing — --branch-prefix... ✓ - ▸ Dry run shows what would happen... ✓ - ▸ Dry run does not create state file... ✓ - ▸ Fix status — no sessions... ✓ - ▸ Fix status shows existing sessions... ✓ - ▸ Invalid repo directory shows error... ✓ - ▸ Fix start creates state file... ✓ - ▸ Fix start emits events... ✓ - ▸ Fix start — summary output... ✓ - ▸ Branch name sanitization... ✓ - ▸ Fix header shows configuration... ✓ - ▸ Non-git repo warning... ✓ - ▸ Per-repo event tracking... ✓ + Configuration + ✓ ADVERSARIAL_MAX_ROUNDS env var respected + ✓ disabled config returns empty array -━━━ Results ━━━ - Passed: 22 - Failed: 0 - Total: 22 + Event Emission + ✓ emit_event creates events.jsonl + ✓ emit_event writes valid JSON + ✓ Event contains type field -All 22 tests passed! -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright memory+cost test — Unit Tests for Memory & Cost ║ -╚═══════════════════════════════════════════════════════════════════╝ -Setting up mock environment... -✓ Environment ready: /tmp/sw-memory-test.yiF1Rw + ────────────────────────────────────────── - ▸ Memory capture from pipeline state... ✓ - ▸ Memory inject returns context for each stage... ✓ - ▸ Failure capture stores patterns... ✓ - ▸ Pattern detection identifies project type... ✓ - ▸ Cross-repo vs per-repo isolation... ✓ - ▸ Memory show displays dashboard... ✓ - ▸ Memory search finds matching entries... ✓ - ▸ Memory export produces valid JSON... ✓ - ▸ Memory forget clears repo memory... ✓ - ▸ Cost calculation for each model... ✓ - ▸ Cost recording writes to costs.json... ✓ - ▸ Budget set and check... ✓ - ▸ Cost dashboard runs without errors... ✓ - ▸ Cost JSON output is valid... ✓ - ▸ Actionable failures threshold filtering... ✓ - ▸ Actionable failures with no file returns []... ✓ - ▸ DORA baseline calculation from events... ✓ + All 0 tests passed -━━━ Results ━━━ - Passed: 17 - Failed: 0 - Total: 17 -All 17 tests passed! + shipwright architecture-enforcer test + ══════════════════════════════════════════ -╔═══════════════════════════════════════════════════╗ -║ shipwright session — E2E Test Suite ║ -╚═══════════════════════════════════════════════════╝ + ══════════════════════════════════════════ -Setting up test environment... -Test tmux session: sw-test-1303977 - -Template Loading - ▸ Load feature-dev template (3 agents)... ✓ - ▸ Load exploration template (2 agents)... ✓ - ▸ Missing template returns error... ✓ - ▸ All 24 templates load successfully... ✓ - -Window Creation - ▸ No-launch creates window without claude... ✓ - ▸ Duplicate window detection... ✓ - ▸ Window gets dark theme... ✓ - ▸ Auto-generated team name... ✓ - -Prompt & Launcher - ▸ Launcher script generation with template... ✓ - ▸ Output includes agents from template... ✓ - ▸ Output includes goal when provided... ✓ - ▸ No template + goal works... ✓ - ▸ No template + no goal = interactive... ✓ - -CLI & Configuration - ▸ Help flag... ✓ - ▸ Unknown option... ✓ - ▸ TMPDIR used for launcher files... ✓ + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ Source guard pattern (if/then/fi) + ✓ VERSION variable defined -Enhanced Features - ▸ Template auto-suggestion from goal... ✓ - ▸ No suggestion without goal... ✓ - ▸ Memory injection in prompt... ✓ - ▸ CLAUDE.md reminder in prompt... ✓ - ▸ Secure temp dir in source... ✓ + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'build' subcommand + ✓ help lists 'validate' subcommand + ✓ help lists 'evolve' subcommand + ✓ help mentions architecture_enabled flag + ✓ --help alias works + ✓ -h alias works -════════════════════════════════════════════════════ - All 21 tests passed ✓ -════════════════════════════════════════════════════ + Error Handling + ✓ Unknown command exits non-zero + Build Subcommand + ✓ build disabled returns empty JSON object + ✓ build with enabled returns model with layers + ✓ build model contains patterns array + ✓ build model contains conventions array -╔═══════════════════════════════════════════════════╗ -║ shipwright init — E2E Test Suite ║ -╚═══════════════════════════════════════════════════╝ + Validate Subcommand + ✓ validate disabled returns empty array + ✓ validate without diff arg exits non-zero + ✓ validate without model file returns empty array -Setting up sandboxed environment... -Temp dir: /tmp/sw-init-test.JhXYsO + Evolve Subcommand + ✓ evolve disabled exits 0 + ✓ evolve without model exits 0 -Configuration - ▸ Settings.json created with agent teams... ✓ - ▸ Settings merge preserves existing vars... ✓ - ▸ tmux.conf installed... ✓ - ▸ Overlay installed... ✓ + Model Storage + ✓ build stores model in ~/.shipwright/memory/ + ✓ Stored model is valid JSON -Templates - ▸ Team templates installed (>= 10)... ✓ - ▸ Pipeline templates installed (>= 5)... ✓ - ▸ Legacy templates path populated... ✓ + Event Emission + ✓ emit_event creates events.jsonl + ✓ emit_event writes valid JSON + ✓ Event contains type field -Robustness - ▸ Idempotency — double init safe... ✓ - ▸ Doctor runs at end... ✓ - ▸ Help flag... ✓ -Hook Wiring - ▸ JSONC stripped from settings.json... ✓ - ▸ Hooks wired into settings.json... ✓ - ▸ Hook wiring preserves existing hooks... ✓ - ▸ SessionStart hook installed... ✓ - ▸ Hook wiring with pre-existing settings... ✓ -════════════════════════════════════════════════════ - All 15 tests passed ✓ -════════════════════════════════════════════════════ + ────────────────────────────────────────── -╔═══════════════════════════════════════════════════╗ -║ shipwright tracker — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + All 0 tests passed -Setting up test environment... -Provider Loading - ▸ Provider loads from config (linear)... ✓ - ▸ Provider loads jira... ✓ - ▸ Graceful skip when no provider... ✓ + Shipwright Auth Tests + ══════════════════════════════════════════ -Pipeline Enrichment - ▸ Stage descriptions exist for all 12 stages... ✓ - ▸ Enriched progress body has Delivering line... ✓ - ▸ Enriched progress body has stage descriptions... ✓ - ▸ Pipeline state includes stage_progress... ✓ - ▸ Pipeline state includes stage description... ✓ + Help + ✓ help shows usage + ✓ help shows commands + ✓ --help flag works + ✓ unknown command shows error + Status & Users + ✓ status with no login shows warning + ✓ users with empty shows warning + ✓ token with no user errors + ✓ switch without user exits non-zero + Auth Storage + ✓ auth file has users array + ✓ auth file has active_user + ✓ users lists stored user + ✓ token shows stored token + ✓ user info shows login + ✓ switch to nonexistent errors + ✓ logout succeeds -Integration - ▸ Tracker notify routes to provider (mock)... ✓ - ▸ Dashboard reads goal from pipeline state... ✓ - ▸ Jira config validation... ✓ - ▸ Linear config migration (legacy fallback)... ✓ + ────────────────────────────────────────── -════════════════════════════════════════════════════ - All 12 tests passed ✓ -════════════════════════════════════════════════════ + All 15 tests passed -╔═══════════════════════════════════════════════════╗ -║ shipwright heartbeat + checkpoint — Test Suite ║ -╚═══════════════════════════════════════════════════╝ -Setting up test environment... + Shipwright Autonomous Tests + ══════════════════════════════════════════ -Heartbeat Lifecycle - ▸ Write heartbeat creates JSON file... ✓ - ▸ Check heartbeat reports alive... ✓ Job test-job-alive alive (0s ago) -✓ - ▸ Check heartbeat reports stale... ⚠ Job test-job-stale stale (192940640s ago, timeout: 120s) -✓ - ▸ Clear heartbeat removes file... ✓ Cleared heartbeat for job: test-job-clear -✓ - ▸ List heartbeats returns JSON array... ✓ - ▸ Heartbeat update overwrites existing... ✓ - ▸ Check missing heartbeat returns error... ✓ - ▸ Heartbeat dir auto-created when missing... ✓ + ══════════════════════════════════════════ -Checkpoint Lifecycle - ▸ Checkpoint save creates JSON file... ✓ Checkpoint saved for stage build (iteration 5) -✓ - ▸ Checkpoint restore outputs JSON... ✓ Checkpoint saved for stage test (iteration 3) -✓ - ▸ Checkpoint restore missing stage fails... ✓ - ▸ Checkpoint clear removes file... ✓ Checkpoint saved for stage review (iteration 1) -✓ Cleared checkpoint for stage review -✓ - ▸ Checkpoint clear --all removes all... ✓ Checkpoint saved for stage build (iteration 1) -✓ Checkpoint saved for stage test (iteration 2) -✓ Cleared 2 checkpoint(s) -✓ - ▸ Checkpoint save with files-modified... ✓ Checkpoint saved for stage build (iteration 7) -✓ + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ unknown command exits 1 + ✓ start shows running message + ✓ state.json created after start + ✓ state status is running + ✓ config.json created + ✓ config is valid JSON + ✓ status shows dashboard + ✓ pause sets status to paused + ✓ resume sets status to running + ✓ stop sets status to stopped + ✓ config show displays settings + ✓ config set interval works + ✓ history handles no data -Integration - ▸ Pipeline script has heartbeat functions... ✓ - ▸ Loop script has heartbeat and checkpoint... ✓ - ▸ Pipeline has human intervention checks... ✓ + Config Set/Show Cycle + ✓ config show reflects set value + ✓ config value persists in file -════════════════════════════════════════════════════ - All 17 tests passed ✓ -════════════════════════════════════════════════════ + Config Structure + ✓ config contains key: cycle_interval_minutes + ✓ config contains key: max_issues_per_cycle + ✓ config contains key: daemon_aware + History With Fixture Events + ✓ history shows recent cycles + ✓ history shows cycle entries -╔═══════════════════════════════════════════════════╗ -║ shipwright remote — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + Status Fields + ✓ status when running includes Status + ✓ status when running includes Cycles + ✓ status when running includes Issues Created + ✓ status when running includes Pipelines + ✓ status when running includes Cycle Interval + ✓ status when running shows running + ✓ status when stopped shows stopped -Setting up test environment... -Machine Registry - ▸ Add machine creates machines.json... ▸ Checking shipwright installation at /tmp/sw-remote-test.Qm0tkC/mock-install... -✓ Shipwright found at /tmp/sw-remote-test.Qm0tkC/mock-install -✓ Registered machine: builder-1 (localhost, worker, 4 workers) -✓ - ▸ Add second machine appends to array... ▸ Checking shipwright installation at /tmp/sw-remote-test.Qm0tkC/mock-install... -✓ Shipwright found at /tmp/sw-remote-test.Qm0tkC/mock-install -✓ Registered machine: builder-2 (localhost, worker, 8 workers) -✓ - ▸ Remove machine by name... ✓ Removed machine: builder-2 -✓ - ▸ List machines returns output... ✓ - ▸ machines.json uses atomic writes... ✓ - ▸ Duplicate machine name prevented... ▸ Use shipwright remote remove builder-1 first -✓ - ▸ Remote script has help command... ✓ -CLI & Dashboard Integration - ▸ CLI router includes remote command... ✓ - ▸ CLI router includes heartbeat command... ✓ - ▸ CLI router includes checkpoint command... ✓ - ▸ Doctor has heartbeat health check... ✓ - ▸ Doctor has remote machine checks... ✓ - ▸ Status shows heartbeat section... ✓ - ▸ Status shows remote machines section... ✓ + ────────────────────────────────────────── -════════════════════════════════════════════════════ - All 14 tests passed ✓ -════════════════════════════════════════════════════ + All 0 tests passed -╔═══════════════════════════════════════════════════════════════════╗ -║ shipwright intelligence test — Unit Tests ║ -╚═══════════════════════════════════════════════════════════════════╝ -Setting up test environment... -✓ Environment ready: /tmp/sw-intelligence-test.aRDezI + Shipwright Changelog Tests + ══════════════════════════════════════════ - ▸ analyze_issue returns valid schema... ✓ - ▸ Cache hit on second call with same input... ✓ - ▸ Graceful degradation when claude CLI unavailable... ✓ - ▸ compose_pipeline produces valid pipeline JSON... ✓ - ▸ recommend_model returns valid model names... ✓ - ▸ predict_cost returns numeric estimates... ✓ - ▸ Cache TTL expiry returns miss... ✓ - ▸ search_memory returns ranked results... ✓ - ▸ Feature flag disabled returns fallback... ✓ - ▸ Events emitted for analysis... ✓ - ▸ recommend_model emits events... ✓ - ▸ Cache init creates file if missing... ✓ + ══════════════════════════════════════════ -━━━ Results ━━━ - Passed: 12 - Failed: 0 - Total: 12 + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help mentions generate + ✓ help mentions preview + ✓ help mentions version + ✓ help mentions migrate + ✓ VERSION variable defined -All 12 tests passed! + error handling + ✓ Unknown command exits non-zero + formats command + ✓ formats exits 0 -╔═══════════════════════════════════════════════════╗ -║ shipwright pipeline composer — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + generate command + ✓ generate exits 0 -Setting up test environment... + version command + ✓ version recommendation exits 0 -Pipeline Composition - ▸ Composed pipeline has valid stage ordering... ✓ - ▸ High-risk issue gets security stages... ▸ Composing pipeline with intelligence engine... -✓ Composed pipeline: 7 stages -.claude/pipeline-artifacts/composed-pipeline.json -✓ - ▸ Fallback to static template when no intelligence... ✓ + script safety + ✓ Uses set -euo pipefail + ✓ Has source guard pattern -Conditional Stage Insertion - ▸ Stage inserted at correct position after build... ✓ - ▸ Insert into nonexistent stage fails... ✓ -Model Downgrade - ▸ Budget constraint triggers model downgrades... ✓ - ▸ Downgrade with nonexistent stage fails... ✓ -Pipeline Validation - ▸ Validation accepts valid pipeline... ✓ - ▸ Validation rejects invalid ordering (test before build)... ✓ - ▸ Validation rejects missing stage ids... ✓ - ▸ Validation rejects missing stages array... ✓ + ────────────────────────────────────────── -Iteration Estimation - ▸ Iteration estimates are reasonable (1-50 range)... ✓ + All 0 tests passed -════════════════════════════════════════════════════ - All 12 tests passed ✓ -════════════════════════════════════════════════════ -━━━ shipwright self-optimize tests ━━━ + shipwright checkpoint test + ══════════════════════════════════════════ - ▸ Outcome analysis extracts correct metrics... ✓ - ▸ Outcome analysis emits event... ✓ - ▸ Outcome analysis rejects missing file... ✓ - ▸ Template weight increases for high success... ✓ - ▸ Template weight decreases for low success... ✓ - ▸ A/B test selects ~20% sample... ✓ - ▸ Iteration model updates with data points... ✓ - ▸ Model routing tracks success rates... ✓ - ▸ Model routing keeps opus with few sonnet samples... ✓ - ▸ Memory pruning removes old patterns... ✓ - ▸ Memory strengthening boosts confirmed patterns... ✓ - ▸ Memory promotion copies cross-repo patterns... ✓ - ▸ Full analysis runs on empty data... ✓ - ▸ Report generates output with data... ✓ - ▸ Report handles empty outcomes... ✓ - ▸ Outcome analysis extracts stage data... ✓ + ══════════════════════════════════════════ -━━━ Results ━━━ - Passed: 16 - Failed: 0 - Total: 16 + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ VERSION variable defined + + Help Output + ✓ help exits 0 and contains USAGE + ✓ help lists 'save' subcommand + ✓ help lists 'restore' subcommand + ✓ help lists 'list' subcommand + ✓ help lists 'clear' subcommand + ✓ help lists 'expire' subcommand + ✓ --help alias works + ✓ -h alias works + + Error Handling + +shipwright checkpoint v3.2.4 — Save and restore agent state mid-stage + +USAGE + shipwright checkpoint [options] + +COMMANDS + save Save a checkpoint for a stage + restore Restore a checkpoint (prints JSON to stdout) + save-context Save Claude context (goal, findings, test output) for resume + restore-context Restore Claude context (exports RESTORED_* and SW_LOOP_* vars) + list Show all available checkpoints + clear Remove checkpoint(s) + expire Remove checkpoints older than N hours + +SAVE OPTIONS + --stage Stage name (required) + --iteration Current iteration number + --git-sha Git commit SHA (default: HEAD) + --files-modified "f1,f2" Comma-separated list of modified files + --tests-passing Mark tests as passing + --loop-state Loop state (running, paused, etc.) + +RESTORE OPTIONS + --stage Stage to restore (required) + +CLEAR OPTIONS + --stage Stage to clear + --all Clear all checkpoints + +EXPIRE OPTIONS + --hours Max age in hours (default: 24) -All 16 tests passed! +EXAMPLES + shipwright checkpoint save --stage build --iteration 5 + shipwright checkpoint save --stage build --iteration 3 --tests-passing --files-modified "src/auth.ts,src/middleware.ts" + shipwright checkpoint restore --stage build + shipwright checkpoint list + shipwright checkpoint clear --stage build + shipwright checkpoint clear --all + shipwright checkpoint expire --hours 48 + ✓ Unknown command exits non-zero + + Save Subcommand + +shipwright checkpoint v3.2.4 — Save and restore agent state mid-stage + +USAGE + shipwright checkpoint [options] + +COMMANDS + save Save a checkpoint for a stage + restore Restore a checkpoint (prints JSON to stdout) + save-context Save Claude context (goal, findings, test output) for resume + restore-context Restore Claude context (exports RESTORED_* and SW_LOOP_* vars) + list Show all available checkpoints + clear Remove checkpoint(s) + expire Remove checkpoints older than N hours + +SAVE OPTIONS + --stage Stage name (required) + --iteration Current iteration number + --git-sha Git commit SHA (default: HEAD) + --files-modified "f1,f2" Comma-separated list of modified files + --tests-passing Mark tests as passing + --loop-state Loop state (running, paused, etc.) + +RESTORE OPTIONS + --stage Stage to restore (required) + +CLEAR OPTIONS + --stage Stage to clear + --all Clear all checkpoints + +EXPIRE OPTIONS + --hours Max age in hours (default: 24) + +EXAMPLES + shipwright checkpoint save --stage build --iteration 5 + shipwright checkpoint save --stage build --iteration 3 --tests-passing --files-modified "src/auth.ts,src/middleware.ts" + shipwright checkpoint restore --stage build + shipwright checkpoint list + shipwright checkpoint clear --stage build + shipwright checkpoint clear --all + shipwright checkpoint expire --hours 48 + ✓ save without --stage exits non-zero +✓ Checkpoint saved for stage build (iteration 5) + ✓ save creates checkpoint file + ✓ Checkpoint is valid JSON + ✓ Checkpoint stage field correct + ✓ Checkpoint iteration field correct + ✓ Checkpoint git_sha populated +✓ Checkpoint saved for stage test (iteration 0) + ✓ save --tests-passing sets true +✓ Checkpoint saved for stage review (iteration 0) + ✓ save --files-modified stores 2 files +✓ Checkpoint saved for stage deploy (iteration 0) + ✓ save --loop-state stores state + ✓ Checkpoint created_at timestamp present + + Restore Subcommand + ✓ restore returns checkpoint JSON + ✓ Restored checkpoint has correct stage + ✓ restore missing stage exits non-zero + ✓ restore without --stage exits non-zero + + List Subcommand + ✓ list shows Checkpoints header + ✓ list shows build checkpoint + ✓ list shows checkpoint count + ✓ list with no checkpoints shows empty + + Clear Subcommand +✓ Checkpoint saved for stage build (iteration 1) +✓ Checkpoint saved for stage test (iteration 2) +✓ Cleared checkpoint for stage build + ✓ clear --stage removes specific checkpoint + ✓ clear --stage preserves other checkpoints + ✓ clear without args exits non-zero +✓ Checkpoint saved for stage build (iteration 3) +✓ Cleared 2 checkpoint(s) + ✓ clear --all removes all checkpoints + Expire Subcommand + ✓ expire with no checkpoints exits 0 -╔═══════════════════════════════════════════════════════════╗ -║ shipwright predictive test ║ -╚═══════════════════════════════════════════════════════════╝ + Save-context / Restore-context + ✓ save-context creates claude-context.json + ✓ Context goal saved correctly + ✓ Context iteration saved correctly + ✓ restore-context exports RESTORED_GOAL + ✓ restore-context exports SW_LOOP_GOAL -Risk Assessment - ▸ Risk returns valid schema with 0-100 range... ✓ - ▸ Risk elevates for complex keywords... ✓ - ▸ Risk emits event... ✓ -Anomaly Detection - ▸ Critical at 3x threshold (181 vs 60 baseline)... ✓ - ▸ Normal at 2x (119 vs 60 baseline)... ✓ - ▸ Warning between 2x and 3x (150 vs 60)... ✓ - ▸ Normal when no baseline exists... ✓ - ▸ Emits event for critical anomaly... ✓ -Baseline Management - ▸ First data point uses raw value... ✓ - ▸ EMA calculation (0.9*100 + 0.1*200 = 110)... ✓ - ▸ Creates baseline file if missing... ✓ + ────────────────────────────────────────── -Preventative Injection - ▸ Injects context from matching patterns... ✓ - ▸ Returns empty for non-matching stage... ✓ + All 0 tests passed -Graceful Degradation - ▸ Works without intelligence engine... ✓ -AI Patrol - ▸ AI patrol returns structured findings... ✓ -════════════════════════════════════════════════════ - All 15 tests passed ✓ -════════════════════════════════════════════════════ + Shipwright CI Tests + ══════════════════════════════════════════ + ══════════════════════════════════════════ -╔═══════════════════════════════════════════════════╗ -║ shipwright frontier — Test Suite ║ -╚═══════════════════════════════════════════════════╝ + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ version shows version + ✓ unknown command exits 1 + ✓ generate starts processing + ✓ validate runs on valid workflow + ✓ VERSION is defined + ✓ analyze shows analysis + ✓ analyze shows cache info + ✓ matrix generates config + ✓ matrix workflow file exists + ✓ validate passes on valid workflow + ✓ runners list shows options + ✓ runners recommend shows guidance -Setting up test environment... -Adversarial Review - ▸ Adversarial review produces structured findings... ✓ - ▸ Adversarial iteration converges on no critical findings... ✓ -Developer Simulation - ▸ Simulation generates objections from 3 personas... ✓ - ▸ Simulation address returns action items... ✓ + ────────────────────────────────────────── -Architecture Enforcer - ▸ Architecture model has valid schema... ✓ - ▸ Architecture validates changes (violation detected)... ✓ + All 0 tests passed -Graceful Degradation - ▸ All three degrade gracefully when claude unavailable... ✓ -════════════════════════════════════════════════════ - All 7 tests passed ✓ -════════════════════════════════════════════════════ + + Shipwright Cleanup Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help + ✓ help exits 0 + ✓ help shows usage + ✓ help shows --force + ✓ help shows dry-run + + Dry-Run (Empty) + ✓ dry-run shows Tmux Windows section + ✓ dry-run shows Team Configs section + ✓ dry-run shows Task Lists section + ✓ dry-run shows Pipeline Artifacts section + ✓ dry-run shows Pipeline State section + ✓ dry-run shows Heartbeats section + ✓ dry-run reports clean + + Force Mode (Empty) + ✓ force shows FORCE MODE + ✓ force reports nothing to clean + + Detect Team Configs + ✓ dry-run detects team dir + ✓ dry-run shows would remove + + Force Removes Teams + ✓ force removes team dir + ✓ team directory actually removed + + Detect Task Lists + ✓ dry-run detects task dir + ✓ task directory actually removed + + Detect Stale Heartbeats + ✓ dry-run detects stale heartbeat + + Error Handling + ✓ unknown option exits non-zero + ✓ unknown option shows error + + Summary Counting + ✓ summary shows found count + ✓ summary shows --force hint + + + + ────────────────────────────────────────── + + All 24 tests passed + + + + Shipwright Code Review Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ review runs and starts reviewing + ✓ trends with no data + ✓ config show outputs valid config + ✓ unknown subcommand exits nonzero + ✓ no false long function detection on small file + ✓ style consistency check runs without crash + ✓ architecture boundary check runs without crash + + + + ────────────────────────────────────────── + + All 0 tests passed ╔═══════════════════════════════════════════════════╗ @@ -560,24 +698,24 @@ All 16 tests passed! Setting up test environment... -Identity Resolution +Identity Resolution ▸ resolve_developer_id from DEVELOPER_ID env... ✓ ▸ resolve_developer_id from git config... ✓ ▸ resolve_developer_id fallback to USER... ✓ ▸ resolve_machine_name from MACHINE_NAME env... ✓ ▸ resolve_machine_name from hostname... ✓ -Dashboard URL Resolution +Dashboard URL Resolution ▸ resolve_dashboard_url from --url flag... ✓ ▸ resolve_dashboard_url from DASHBOARD_URL env... ✓ ▸ resolve_dashboard_url from team-config.json... ✓ ▸ resolve_dashboard_url falls back to default... ✓ -Start/Stop Lifecycle +Start/Stop Lifecycle ▸ cmd_start creates PID file... ▸ Starting connect to http://localhost:8767 ▸ Developer: test-developer @ test-machine -✓ Connect started (PID 1315235) - Logs: /tmp/sw-connect-test.UX0pCu/home/.shipwright/connect.log +✓ Connect started (PID 15326) + Logs: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-connect-test.kkYSxV/home/.shipwright/connect.log Stop: shipwright connect stop ✓ ▸ cmd_start rejects if already running... Stop it first: shipwright connect stop @@ -587,29 +725,3272 @@ All 16 tests passed! ▸ cmd_stop handles missing PID gracefully... ⚠ Connect is not running (no PID file) ✓ -Status +Status ▸ cmd_status shows connected when PID alive... ✓ ▸ cmd_status shows disconnected when no PID... ✓ -Join Flow +Join Flow ▸ cmd_join verifies token against dashboard... ✓ ▸ cmd_join saves team-config.json... ✓ ▸ cmd_join rejects invalid token... ✓ ▸ cmd_join accepts --url and --token flags... ✓ -Heartbeat & Disconnect Payloads +Heartbeat & Disconnect Payloads ▸ Heartbeat payload includes required fields... ✓ ▸ Send disconnect sends proper payload... ✓ -Configuration & Utilities +Configuration & Utilities ▸ ensure_dir creates shipwright directory... ✓ ▸ now_iso returns valid ISO timestamp... ✓ ▸ Script has correct version... ✓ -Integration +Integration ▸ Help command shows all main commands... ✓ ════════════════════════════════════════════════════  All 25 tests passed ✓ ════════════════════════════════════════════════════ + + Context Budget Monitor Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Budget Initialization + ✓ init creates config (exit 0) + ✓ config has correct total_budget + ✓ system_reserve is 10% (80000) + ✓ tools_reserve is 10% (80000) + ✓ working_memory is 60% (480000) + ✓ output_reserve is 20% (160000) + + Token Estimation + ✓ estimation produces JSON + ✓ estimate contains utilization_percent + ✓ estimate contains total_used (>0) + + Status Checking (Thresholds) + ✓ 50% utilization returns green status + ✓ 70% utilization returns yellow status + ✓ 85% utilization returns red status + ✓ 95% utilization returns critical status + + Context Trimming + ✓ green status doesn't trim content + ✓ yellow status reduces content length + ✓ hard truncate respects size limit + + Iteration Summarization + ✓ summarize_iteration succeeds (exit 0) + ✓ iteration summary file created with 1 entry + + Budget Report Generation + ✓ report contains budget_config + ✓ report contains iteration_summaries + + State Logging + ✓ log_state succeeds (exit 0) + ✓ log file created with entries + + + ────────────────────────────────────────── + + All 22 tests passed + + + + Shipwright Context Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows commands + ✓ --help exits 0 + + error handling + ✓ unknown command exits 1 + ✓ unknown command shows error + + show subcommand + ✓ show exits 0 + ✓ show outputs context header + + clear subcommand + ✓ clear exits 0 + ✓ clear confirms cleared + + gather subcommand + ✓ gather without args exits 1 + ✓ gather shows must provide + ✓ gather with unknown option exits 1 + + gather with goal + ✓ gather exits 0 + ✓ gather shows building + ✓ gather shows success + ✓ context-bundle.md created + + show after gather + ✓ show after gather exits 0 + ✓ show contains pipeline context + + clear after gather + ✓ clear exits 0 + + gather with issue + ✓ gather with --issue exits 0 + ✓ gather shows building + + gather default stage + ✓ gather default stage exits 0 + + internal stage_guidance + ✓ stage_guidance returns plan guidance + ✓ stage_guidance returns build guidance + ✓ stage_guidance handles unknown stage + + + + ────────────────────────────────────────── + + All 26 tests passed + + + + Shipwright Cost Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help shows COMMANDS + ✓ help mentions show + ✓ help mentions budget + ✓ help mentions calculate + ✓ VERSION variable defined + + state management + ✓ costs.json created on first use + ✓ budget.json created on first use + ✓ costs.json has entries array + ✓ budget.json has daily_budget_usd + + budget commands + ✓ budget set exits 0 + ✓ budget set to 50 + ✓ budget show exits 0 + + error handling + ✓ Unknown command exits non-zero + + calculate + ✓ calculate exits 0 + + script safety + ✓ Uses set -euo pipefail + ✓ ERR trap is set + + context efficiency in cost dashboard + ✓ Cost dashboard has CONTEXT EFFICIENCY section + ✓ Cost dashboard reads loop.context_efficiency events + ✓ Context efficiency reports utilization and waste + ✓ Dashboard renders CONTEXT EFFICIENCY with event data + ✓ Dashboard shows avg budget utilization + + + + ────────────────────────────────────────── + + All 0 tests passed + + +╔═══════════════════════════════════════════════════════════════════╗ +║ shipwright daemon test — Unit Tests (Synthetic Events) ║ +╚═══════════════════════════════════════════════════════════════════╝ + +Setting up test environment... +✓ Environment ready: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-daemon-test.tgqluq + + ▸ dora_grade deploy_freq Elite (>= 7)... ✓ + ▸ dora_grade deploy_freq High (>= 1)... ✓ + ▸ dora_grade deploy_freq Medium (>= 0.25)... ✓ + ▸ dora_grade deploy_freq Low (< 0.25)... ✓ + ▸ dora_grade CFR thresholds (Elite/High/Medium/Low)... ✓ + ▸ Stage timings filter-first jq query... ✓ + ▸ MTTR pairs failures with next success... ✓ + ▸ epoch_to_iso helper function... ✓ + ▸ Health check detects stale jobs... scripts/sw-daemon-test.sh: line 120: 17144 Terminated: 15 sleep 300 +✓ + ▸ Priority label sorting... ✓ + ▸ Degradation alert triggers on high CFR... ✓ + ▸ Metrics --json output with cycle_time keys... ✓ + ▸ Self-labeling includes watch_label when enabled... ✓ + ▸ Self-labeling excludes watch_label when disabled... ✓ + ▸ Patrol recurring failures label construction... ✓ + ▸ DORA degradation event detection... ✓ + ▸ Retry exhaustion event detection... ✓ + ▸ Untested script detection logic... ✓ + ▸ Progress detects stage advancement... ✓ + ▸ Progress detects stuck (no change N checks)... ✓ + ▸ Progress detects repeated error loop... ✓ + ▸ Progress resets on diff growth... ✓ + ▸ Hard limit kills even with progress on... scripts/sw-daemon-test.sh: line 200: 18550 Terminated: 15 sleep 300 +✓ + ▸ Adaptive cycles extends limit on >50% issue drop... ✓ + ▸ Adaptive cycles reduces limit on issue increase... ✓ + ▸ Adaptive cycles respects 2x base hard ceiling... ✓ + ▸ Adaptive cycles no-op on first cycle... ✓ + ▸ Cleanup: Checkpoint expire removes old checkpoints... ▸ Expired: build checkpoint (1h+ old) +✓ Expired 1 checkpoint(s) older than 1h +✓ + ▸ Cleanup: Failure handler removes watch label... ✓ + ▸ Cleanup: Failure handler closes draft PRs... ✓ + ▸ Cleanup: sw-cleanup.sh has all artifact cleanup sections... ✓ + ▸ Daemon sources vitals module... ✓ + ▸ Vitals verdict maps to daemon verdict (continue→healthy etc)... ✓ + ▸ Vitals emits pipeline.vitals_check events... ✓ + ▸ Auto-scale includes vitals health factor... ✓ + ▸ Quality memory drives template selection... ✓ + ▸ Vitals-based progress with static fallback... ✓ + ▸ Memory: query fix for error returns matching fix... ✓ + ▸ Memory: DORA template escalation patterns exist... ✓ + ▸ Memory: All 12 error categories in post-tool-use.sh... ✓ + ▸ Daemon: Template weights selection reads weights file... ✓ + ▸ Daemon: Auto-enable self_optimize when auto_template is true... ✓ + ▸ Intelligence: classify_failure detects auth errors... ✓ + ▸ Intelligence: classify_failure has all 6 failure classes... ✓ + ▸ Intelligence: Retry skips auth_error and invalid_issue... ✓ + ▸ Intelligence: API errors get extended 300s backoff... ✓ + ▸ Intelligence: daemon_preflight_auth_check exists and auto-pauses... ✓ + ▸ Intelligence: Process group spawning (set -m)... ✓ + ▸ Intelligence: Process tree kill in cleanup (pkill -P)... ✓ + ▸ Intelligence: Consecutive failure auto-pause (3 threshold)... ✓ + ▸ Intelligence: Retry escalation args passed to spawn... ✓ + ▸ Intelligence: classify_failure wired into retry logic... ✓ + +━━━ Results ━━━ + Passed: 66 + Failed: 0 + Total: 52 + +All 66 tests passed! + + +Dashboard Smoke Tests + +Structure + ▸ server.ts exists... ✓ + ▸ server.ts has valid structure (imports, fetch)... ✓ + ▸ public/ directory exists... ✓ + ▸ index.html exists... ✓ + ▸ src/ directory exists... ✓ + ▸ main.ts entry point exists... ✓ + ▸ tsconfig.json exists... ✓ + ▸ src/ modules exist (core, views, components, design, types, canvas)... ✓ + +Routes + ▸ Server exports /api/health and /api/status... ✓ + ▸ Server exports /ws WebSocket route... ✓ + ▸ Server exports /api/context-efficiency... ✓ + +Integrity + ▸ bun check passes (if bun available)... ✓ + ▸ index.html references dist/main.js... ✓ + ▸ Frontend bundle builds (if bun available)... ✓ + ▸ TypeScript type check passes (if tsc available)... (typescript not installed, skipping) +✓ + +════════════════════════════════════════════════════ + All 15 tests passed ✓ +════════════════════════════════════════════════════ + + +╔════════════════════════════════════════════════════════╗ +║ shipwright db — SQLite Persistence Test Suite ║ +╚════════════════════════════════════════════════════════╝ + +Setting up test environment... + +Schema Creation + ▸ init_schema creates all tables... ✓ + ▸ Schema includes required tables... ✓ + ▸ WAL mode is enabled... ✓ + ▸ check_sqlite3 caches result... ✓ + +Database Availability + ▸ db_available returns true when ready... ✓ + ▸ db_available returns false without sqlite3... /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/sw-db.sh: line 19: dirname: command not found +✓ + +Event CRUD Operations + ▸ db_add_event inserts event... ✓ + ▸ add_event dual-writes to SQLite + JSONL... ✓ + +Daemon State Management + ▸ db_save_job saves active job... ✓ + ▸ db_complete_job marks completed... ✓ + ▸ db_fail_job marks failed... ✓ + ▸ db_list_active_jobs returns JSON array... ✓ + ▸ db_active_job_count returns count... ✓ + ▸ db_is_issue_active checks status... ✓ + +Cost Tracking + ▸ db_record_cost saves entry... ✓ + ▸ db_cost_today calculates total... ✓ + ▸ db_set_budget and db_get_budget... ✓ + +Heartbeat Management + ▸ db_record_heartbeat saves heartbeat... ✓ + ▸ db_list_heartbeats returns array... ✓ + ▸ db_clear_heartbeat deletes entry... ✓ + +Memory & Failure Tracking + ▸ db_record_failure saves pattern... ✓ + ▸ db_query_similar_failures finds matches... ✓ + +JSON Data Migration + ▸ migrate_json_data imports events... ▸ Migrating schema v0 → v2... +✓ Migrated to schema v2 +▸ Migrating schema v0 → v3... +✓ Migrated to schema v3 +▸ Migrating schema v0 → v4... +✓ Migrated to schema v4 +▸ Migrating schema v0 → v5... +✓ Migrated to schema v5 +▸ Migrating schema v0 → v6... +✓ Migrated to schema v6 +▸ Importing events from /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/events.jsonl... +✓ Events: 2 imported, 0 skipped (duplicates) +▸ Importing heartbeats... +✓ Heartbeats: 0 imported + +✓ Migration complete: 2 total records imported + +▸ Verification: + Events in DB: 2 + Cost entries: 0 + Heartbeats: 0 +✓ + ▸ migrate_json_data imports costs... ▸ Database already at schema v6 +▸ Importing events from /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/events.jsonl... +✓ Events: 2 imported, 0 skipped (duplicates) +▸ Importing costs from /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/costs.json... +✓ Costs: 2 entries imported +▸ Importing heartbeats... +✓ Heartbeats: 0 imported + +✓ Migration complete: 4 total records imported + +▸ Verification: + Events in DB: 2 + Cost entries: 2 + Heartbeats: 0 +✓ + ▸ migrate_json_data imports budget... ▸ Database already at schema v6 +▸ Importing events from /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/events.jsonl... +✓ Events: 2 imported, 0 skipped (duplicates) +▸ Importing costs from /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/costs.json... +✓ Costs: 2 entries imported +▸ Importing budget from /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/budget.json... +✓ Budget: imported ($50.00, enabled=true) +▸ Importing heartbeats... +✓ Heartbeats: 0 imported + +✓ Migration complete: 4 total records imported + +▸ Verification: + Events in DB: 2 + Cost entries: 2 + Heartbeats: 0 +✓ + +Health Checks + ▸ db_health_check validates database... ✓ + +Export & Backup + ▸ export_db creates JSON backup... ▸ Exporting database to /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/test-backup.json... +✓ Database exported to /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-db-test.mzxqqq/home/.shipwright/test-backup.json +✓ + +Data Cleanup + ▸ cleanup_old_data removes old entries... ▸ Cleaning records older than 30 days (before 2026-03-05T12:37:21Z)... +✓ Deleted: 0 events, 0 costs, 0 daemon jobs, 0 stages +✓ + +Concurrent Operations + ▸ Concurrent writes don't corrupt DB... 5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +✓ + +Pipeline Run Tracking + ▸ add_pipeline_run creates entry... ✓ + ▸ update_pipeline_status updates run... ✓ + +══════════════════════════════════════════════════════════ + All 31 tests passed ✓ +══════════════════════════════════════════════════════════ + + + + Shipwright Decompose Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows commands + ✓ --help exits 0 + + version flag + ✓ --version exits 0 + ✓ --version shows version + + error handling + ✓ unknown command exits 1 + ✓ unknown command shows error + ✓ analyze without issue exits 1 + ✓ analyze shows usage + ✓ decompose without issue exits 1 + ✓ auto without issue exits 1 + + analyze subcommand (mock) + ✓ analyze exits 0 with NO_GITHUB + ✓ analyze outputs complexity_score + ✓ analyze outputs should_decompose + ✓ analyze outputs subtasks + ✓ analyze outputs valid JSON + ✓ analyze returns complexity_score 85 + ✓ analyze returns should_decompose true + + decompose subcommand (mock) + ✓ decompose exits 0 with NO_GITHUB + ✓ decompose shows decomposing + + auto subcommand (mock) + ✓ auto exits 0 with NO_GITHUB + + state file creation + ✓ events.jsonl created + + DAG scheduling (new features) + ✓ schedule exits 0 + ✓ schedule shows valid DAG + ✓ schedule shows waves + ✓ critical-path exits 0 + ✓ critical-path shows title + ✓ critical-path shows hours + ✓ visualize text exits 0 + ✓ visualize shows DAG title + ✓ visualize shows task 0 + ✓ visualize mermaid exits 0 + ✓ visualize mermaid has graph + ✓ help shows schedule cmd + ✓ help shows critical-path cmd + ✓ help shows visualize cmd + ✓ version shows 3.2.4 + ✓ mock data includes depends_on field + + + + ────────────────────────────────────────── + + All 39 tests passed + + + sw-decide Tests + ══════════════════════════════════════════ + + + help + ✓ help shows usage + ✓ help mentions run + ✓ help mentions status + ✓ help mentions tiers + ✓ help mentions candidates + ✓ help mentions halt + ✓ help mentions resume + ✓ help mentions dry-run + ✓ --help shows usage + + tiers + ✓ tiers shows auto + ✓ tiers shows propose + ✓ tiers shows draft + ✓ tiers shows category rules + ✓ tiers shows limits + + signals + ✓ candidate has id + ✓ candidate has signal + ✓ candidate has category + ✓ candidate has risk_score + ✓ candidate has dedup_key + ✓ candidate has collected_at + ✓ candidate is valid JSON + ✓ read_pending returns data + ✓ clear_pending empties file + + scoring + ✓ scored has value_score + ✓ scored has scores object + ✓ critical security scores well (75) + ✓ security scores higher than dead_code (75 > 33) + ✓ impact weight loaded + ✓ urgency weight loaded + + autonomy + ✓ deps_patch -> auto + ✓ refactor_hotspot -> propose + ✓ new_feature -> draft + ✓ unknown -> draft + ✓ auto labels include shipwright + ✓ auto labels include ready-to-build + ✓ propose labels include proposed +jq: parse error: Unmatched '}' at line 1, column 111 +jq: parse error: Unmatched '}' at line 1, column 111 + ✓ budget available with no decisions +jq: parse error: Unmatched '}' at line 1, column 111 + ✓ budget exhausted after 16 issues + ✓ rate limit passes with no history +jq: parse error: Unmatched '}' at line 1, column 111 + ✓ rate limit blocks recent decision + ✓ not halted initially + ✓ halted after halt() + ✓ halt file created + ✓ resumed after resume() + + risk ceiling + ✓ risk 20 below ceiling 30 + ✓ risk 35 above ceiling 30 + + dry-run + ✓ dry-run shows DRY RUN + ✓ no daily log created in dry-run + ✓ no drafts in dry-run + + decision log + ✓ daily log exists + ✓ log has decision id + ✓ log has value_score + ✓ last-decision written + + outcome learning + ✓ urgency weight adjusted on security success (25) + ✓ risk weight increased on failure (13 >= 13) + ✓ weights sum to 100 + ✓ weights file written + + candidates + ✓ candidates shows title + ✓ candidates shows signal + + halt/resume CLI + ✓ halt succeeds + ✓ run blocked when halted + ✓ resume succeeds + + status + ✓ status shows active + ✓ status shows decisions + ✓ status shows budget + ✓ status shows weights + + cycle integration + ✓ cycle shows Decision Engine + ✓ cycle shows Cycle Complete + ✓ cycle processes candidates + ✓ cycle shows AUTO tier + ✓ cycle shows DRAFT tier + + log command + ✓ log shows today's date + ✓ log shows entry + + ────────────────────────────────────────── + + All 33 tests passed + + + + Shipwright Deps Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows commands + ✓ --help exits 0 + + error handling + ✓ unknown command exits 1 + ✓ unknown command shows error + + scan subcommand + ✓ scan exits 0 with NO_GITHUB + ✓ scan shows warning + + classify subcommand + ✓ classify without args exits 1 + ✓ classify shows usage + ✓ classify exits 0 with NO_GITHUB + ✓ classify shows warning + + batch subcommand + ✓ batch exits 0 with NO_GITHUB + ✓ batch shows warning + + report subcommand + ✓ report exits 0 with NO_GITHUB + ✓ report shows warning + + merge subcommand + ✓ merge without args exits 1 + + test subcommand + ✓ test without args exits 1 + + internal parse_version_bump + ✓ parse_version_bump detects patch + ✓ parse_version_bump detects minor + ✓ parse_version_bump detects major + ✓ parse_version_bump handles v prefix + + + + ────────────────────────────────────────── + + All 22 tests passed + + + + shipwright developer-simulation test + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + +Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ source guard uses if/then/fi pattern + +Version + ✓ VERSION variable defined + +Help Output + ✓ help contains USAGE + ✓ help contains review subcommand + ✓ help contains address subcommand + ✓ help contains PERSONAS + ✓ help contains simulation_enabled + ✓ --help flag works + +Error Handling + ✓ unknown command exits non-zero + ✓ unknown command mentions error + +Review Subcommand + ✓ review warns when disabled + ✓ review returns empty JSON array when disabled + +Address Subcommand + ✓ address with no objections succeeds + ✓ address returns empty JSON + +Persona Definitions + ✓ security persona defined + ✓ performance persona defined + ✓ maintainability persona defined + +Configuration + ✓ SIMULATION_MAX_ROUNDS env var supported + ✓ daemon-config.json checked + +Event Emission + ✓ emits simulation.objection events + ✓ emits simulation.complete events + ✓ emits simulation.addressed events + + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright Discovery Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows commands + ✓ --help exits 0 + + error handling + ✓ unknown command exits 1 + ✓ unknown command shows error + ✓ broadcast without args exits 1 + ✓ query without args exits 1 + ✓ inject without args exits 1 + + broadcast subcommand + ✓ broadcast exits 0 + ✓ broadcast confirms + ✓ discoveries.jsonl created + ✓ discoveries.jsonl contains valid JSON + + query subcommand + ✓ query exits 0 + ✓ query finds discovery + ✓ query non-match exits 0 + ✓ query reports no discoveries + + status subcommand + ✓ status exits 0 + ✓ status shows total + + clean subcommand + ✓ clean exits 0 + ✓ clean reports result + + inject subcommand + ✓ inject exits 0 + + internal patterns_overlap + ✓ patterns_overlap matches same pattern + ✓ patterns_overlap rejects different paths + + prioritize subcommand + ✓ prioritize security exits 0 + ✓ prioritize assigns P0 + ✓ prioritize assigns P3 + + score subcommand + ✓ score exits 0 + ✓ score returns numeric result: 89 + + acknowledge subcommand + ✓ acknowledge exits 0 + ✓ consumption file created + ✓ consumption count incremented + + consumption stats + ✓ consumption stats valid JSON + ✓ consumption count correct + + memory promotion + ✓ promotion function exists + + fleet broadcast + ✓ fleet broadcast function exists + + + + ────────────────────────────────────────── + + All 36 tests passed + + + + Shipwright Doc Fleet Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help + ✓ help shows title + ✓ help shows commands section + ✓ help shows fleet roles section + ✓ help shows examples + ✓ --help flag works + ✓ unknown command shows error + Roles + ✓ roles lists doc-architect + ✓ roles lists claude-md + ✓ roles lists strategy-curator + ✓ roles lists pattern-writer + ✓ roles lists readme-optimizer + Audit + ✓ audit shows health header + ✓ audit shows health score + ✓ audit checks doc inventory + ✓ audit checks CLAUDE.md + ✓ audit checks agent roles + ✓ audit creates state file + ✓ audit records health score (91%) + Launch + ✓ launch dry-run shows header + ✓ launch dry-run mentions dry-run + ✓ launch dry-run lists doc-architect + ✓ launch dry-run lists claude-md + ✓ launch dry-run lists strategy-curator + ✓ launch dry-run lists pattern-writer + ✓ launch dry-run lists readme-optimizer + ✓ launch dry-run shows agent count + ✓ launch specific role shows role + ✓ launch specific role shows 1 agent + ✓ launch invalid role shows error + Status + ✓ status shows header + ✓ status shows last run + ✓ status shows health score + ✓ status shows session list + Manifest + ✓ manifest shows generation + ✓ manifest file created + ✓ manifest has documents (16) + Report + ✓ report shows header + ✓ report shows inventory + ✓ report shows volume + ✓ report shows fleet state + ✓ report json shows JSON + ✓ JSON report file created + Retire + ✓ retire shows retiring + ✓ retire shows count + State Persistence + ✓ status shows run count after commands + Events + ✓ doc_fleet events logged (8 events) + Aliases + ✓ start alias works + ✓ stop alias works + + + + ────────────────────────────────────────── + + All 48 tests passed + + + + Shipwright Docs Agent Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help + ✓ help shows usage + ✓ help shows commands + ✓ --help flag works + ✓ unknown command shows error + Coverage + ✓ coverage shows header + ✓ coverage shows total scripts + API Reference + ✓ api generation succeeds + ✓ api file has title + Wiki + ✓ wiki generation succeeds + ✓ wiki directory created + Scan + ✓ scan shows scanning + Sync + ✓ sync shows sync complete + Impact + ✓ impact shows analysis + State + ✓ docs-agent home directory exists + + + + ────────────────────────────────────────── + + All 14 tests passed + + +╔═══════════════════════════════════════════════════╗ +║ shipwright docs — Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up test environment... + +AUTO Section Discovery + ▸ find_auto_files discovers CLAUDE.md... PASS + ▸ get_sections extracts section IDs... PASS + +Section Generators + ▸ Architecture table has headers and scripts... PASS + ▸ Architecture table includes CLI router... PASS + ▸ Feature flags table with intelligence config... PASS + ▸ Test suites table picks up test files... PASS + +Section Check & Replace + ▸ check_section detects fresh vs stale... PASS + ▸ replace_section updates content between markers... PASS + +Subcommands + ▸ docs check returns 1 when stale... PASS + ▸ docs sync then check returns 0 (fresh)... PASS + ▸ docs sync is idempotent... PASS + +CLI & Help + ▸ Help output contains all subcommands... PASS + ▸ Unknown command exits with error... PASS + ▸ Default (no args) shows help... PASS + +Edge Cases + ▸ No AUTO markers returns 0... PASS + ▸ Multiple sections all get processed... PASS + ▸ Purpose extracted from script headers... PASS + ▸ Wiki dry-run succeeds... PASS + + ────────────────────────────────────────── + 18 passed 0 failed (18 total) + + + + Shipwright Doctor Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + execution + ✓ doctor runs without crash + ✓ output shows PREREQUISITES + ✓ detects tmux + ✓ detects jq + ✓ detects Claude Code CLI + ✓ detects git + + structure + ✓ VERSION variable defined + ✓ Uses set -euo pipefail + ✓ ERR trap is set + ✓ check_pass helper defined + ✓ check_fail helper defined + ✓ output shows Shipwright header + + check logic for tools + ✓ Source checks for tmux + ✓ Source checks for jq + ✓ Source checks for Claude CLI + ✓ Source checks for git + ✓ Source checks for gh + + version flag + ✓ --version outputs sw-doctor and version + ✓ -V short flag works + + missing tool handling + ✓ Doctor reports when jq missing from PATH + ✓ output includes PREREQUISITES section + ✓ output includes INSTALLED FILES section + + auto-fix mode + ✓ --fix-dry flag shows dry-run output +✗ Overlay not found: ~/.tmux/shipwright-overlay.conf +✗ No ~/.claude/settings.json — agent teams not configured +✗ Overlay not found — pane display features unavailable +✗ iTerm2 mouse reporting is DISABLED — tmux cannot receive mouse clicks + ✓ --fix creates .claude directories + ✓ --fix creates ~/.shipwright directories + ✓ --fix creates valid daemon-config.json + ✓ --fix creates valid settings.json + ✓ --fix creates valid budget.json +✗ No ~/.claude/settings.json — agent teams not configured +✗ iTerm2 mouse reporting is DISABLED — tmux cannot receive mouse clicks + ✓ --fix is idempotent (second run succeeds) +✗ No ~/.claude/settings.json — agent teams not configured +✗ iTerm2 mouse reporting is DISABLED — tmux cannot receive mouse clicks + ✓ --fix handles existing config files safely + ✓ --fix without other args works + ✓ Auto-fix output shows AUTO-FIX SUMMARY + ✓ Auto-fix reports what was fixed + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + shipwright dora test + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + +Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ source guard uses if/then/fi pattern + +Version + ✓ VERSION variable defined + +Help Output + ✓ help contains USAGE + ✓ help contains show subcommand + ✓ help contains dx subcommand + ✓ help contains ai subcommand + ✓ help contains trends subcommand + ✓ help contains compare subcommand + ✓ help contains export subcommand + ✓ help contains DORA BANDS + ✓ --help flag works + +Error Handling + ✓ unknown command exits non-zero + ✓ unknown command error message + +Show Subcommand (no events) + ✓ show displays DORA Metrics + ✓ show displays Deploy Frequency + ✓ show displays Lead Time + ✓ show displays Change Failure Rate + ✓ show displays MTTR + +DX Subcommand + ✓ dx displays Developer Experience + +AI Subcommand + ✓ ai displays AI Performance Metrics + +Export Subcommand + ✓ export produces JSON with timestamp + ✓ export contains current_period + ✓ export contains previous_period + +Trends Subcommand + ✓ trends displays Trends heading + +Compare Subcommand + ✓ compare displays Period Comparison + +DORA Band Classification + ✓ classify_band function defined + ✓ Elite band classification + ✓ High band classification + ✓ Medium band classification + ✓ Low band classification + +Trend Arrows + ✓ trend_arrow function defined + + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright Durable Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help mentions publish + ✓ help mentions consume + ✓ help mentions checkpoint + ✓ help mentions lock + ✓ help mentions compact + ✓ help mentions status + ✓ VERSION variable defined + + publish events + ✓ publish exits 0 + ✓ WAL file created + ✓ Event written to WAL + + error handling + ✓ publish without args exits non-zero + ✓ Unknown command exits non-zero + + status command + ✓ status exits 0 + + checkpointing + ✓ checkpoint save exits 0 + ✓ Checkpoint file created + ✓ checkpoint restore exits 0 + + distributed locks + ✓ lock acquire exits 0 + ✓ lock release exits 0 + + script safety + ✓ Uses set -euo pipefail + ✓ Has source guard pattern + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright E2E Orchestrator Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ unknown command exits nonzero + ✓ registry file created on init + ✓ registry is valid JSON + ✓ registry has >= 3 default suites + ✓ register adds suite + ✓ duplicate register fails + ✓ quarantine adds test + ✓ quarantine list has entry + ✓ report handles no results + ✓ flaky handles no history + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright Eventbus Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows subcommands + ✓ --help exits 0 + + error handling + ✓ unknown command exits 1 + ✓ unknown command shows error + + status subcommand + ✓ status exits 0 with empty bus + ✓ status shows title + + publish subcommand + ✓ publish exits 0 + ✓ publish confirms + ✓ events.jsonl created + ✓ events.jsonl contains published event type + ✓ events.jsonl contains correlation_id + ✓ eventbus has 3+ events after multi-publish + + status with events + ✓ status with events exits 0 + ✓ status shows total events + ✓ status shows events by type + + clean subcommand + ✓ clean exits 0 + ✓ clean reports result + + replay subcommand + ✓ replay exits 0 + ✓ replay shows replaying + + publish error handling + ✓ publish with empty type exits 1 + ✓ watch with missing dir exits 1 + ✓ watch shows dir error + + + + ────────────────────────────────────────── + + All 24 tests passed + + + + Shipwright Feedback Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows subcommands + ✓ --help exits 0 + + error handling + ✓ unknown command exits 1 + ✓ unknown command shows error + + collect subcommand + ✓ collect on empty dir exits 0 + ✓ collect shows collecting + ✓ collect shows save path + + collect with error log + ✓ collect with errors exits 0 + ✓ collect reports errors + + analyze subcommand + ✓ analyze missing file exits 1 + ✓ analyze shows not found + ✓ analyze exits 0 + ✓ analyze shows report + + learn subcommand + ✓ learn exits 0 + ✓ learn confirms capture + ✓ incidents.jsonl created + ✓ incidents.jsonl has valid JSONL + + report subcommand + ✓ report exits 0 + ✓ report shows incidents + ✓ report shows total + ✓ report no incidents exits 0 + ✓ report says no incidents + + create-issue subcommand + ✓ create-issue with NO_GITHUB exits 0 + ✓ create-issue skips with NO_GITHUB + + post-merge monitoring + ✓ post-merge exits 0 + ✓ post-merge shows monitoring + ✓ post-merge creates monitoring file + ✓ monitoring has merge_sha + ✓ monitoring has environment + + regression detection + ✓ regression detection outputs valid JSON + ✓ no regression flag + ✓ deploy failure detects regression + ✓ deploy failure is P0 + ✓ deploy failure type + ✓ error spike detects regression + ✓ error spike is P1 + ✓ error spike type + + correlate with changes + ✓ correlate outputs valid JSON + ✓ correlation has pr_number + + learn from outcome + ✓ outcomes exits 0 + ✓ outcomes shows recording + ✓ outcomes creates merge-outcomes file + ✓ outcomes file has valid JSONL + ✓ outcome has pr_number + + health report + ✓ health with no data exits 0 + ✓ health shows no data message + ✓ health shows statistics + ✓ health shows success rate + ✓ health shows regressions + + integrated post-merge workflow + ✓ workflow: monitoring complete + ✓ workflow: regression detection valid + ✓ workflow: correlation valid + ✓ workflow: outcome recorded + + + + ────────────────────────────────────────── + + All 55 tests passed + + +╔════════════════════════════════════════════════════════════╗ +║ Outcome Feedback Test Suite ║ +╚════════════════════════════════════════════════════════════╝ + +▸ Skipping review capture (NO_GITHUB set) +✓ review feedback capture stores correct JSON +✓ Computed merge quality: PR #100 score=1 (clean_merge) +✓ merge quality scoring clean merge → +1 +✓ Computed merge quality: PR #101 score=-1 (changes_requested) +✓ merge quality scoring changes_requested → -1 +✓ Computed merge quality: PR #102 score=-3 (reverted) +✓ merge quality scoring reverted → -3 +✓ Computed merge quality: PR #103 score=-2 (regression) +✓ merge quality scoring regression → -2 +✓ Computed merge quality: PR #200 score=1 (clean_merge) +✓ Computed merge quality: PR #201 score=1 (clean_merge) +✓ Computed merge quality: PR #202 score=-1 (changes_requested) +✓ rolling quality score calculation +✓ pattern detection: 3 error_handling comments → detected +✓ pattern detection: 2 comments → not detected +▸ Generating learned rules from review patterns... +✓ Added learned rule: testing (confidence: 0.6) +✓ learned rule generation creates valid JSON +▸ Generating learned rules from review patterns... +▸ Rule already exists for category: error_handling +▸ Generating learned rules from review patterns... +▸ Rule already exists for category: error_handling +✓ learned rule addition to quality profile is idempotent +▸ Running post-merge feedback collection for PR #700... +▸ Skipping review capture (NO_GITHUB set) +✓ Computed merge quality: PR #700 score=1 (clean_merge) +✓ Post-merge feedback collection complete +✓ post-merge feedback runs all steps +✓ Computed merge quality: PR #800 score=1 (clean_merge) +✓ works when no previous feedback exists (cold start) + +╔════════════════════════════════════════════════════════════╗ +║ PASSED: 12 FAILED: 0 TOTAL: 12 ║ +╚════════════════════════════════════════════════════════════╝ + + +╔═══════════════════════════════════════════════════════════════════╗ +║ shipwright fix test — Unit Tests ║ +╚═══════════════════════════════════════════════════════════════════╝ + +Setting up test environment... +✓ Environment ready: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-fix-test.LlaaBt + + ▸ Help output contains expected sections... ✓ + ▸ Help via --help flag... ✓ + ▸ Missing goal shows error... ✓ + ▸ Missing repos shows error... ✓ + ▸ Arg parsing — --repos comma-separated... ✓ + ▸ Arg parsing — --repos-from file... ✓ + ▸ Arg parsing — --repos-from missing file... ✓ + ▸ Arg parsing — --pipeline template... ✓ + ▸ Arg parsing — --max-parallel... ✓ + ▸ Arg parsing — --branch-prefix... ✓ + ▸ Dry run shows what would happen... ✓ + ▸ Dry run does not create state file... ✓ + ▸ Fix status — no sessions... ✓ + ▸ Fix status shows existing sessions... ✓ + ▸ Invalid repo directory shows error... ✓ + ▸ Fix start creates state file... ✓ + ▸ Fix start emits events... ✓ + ▸ Fix start — summary output... ✓ + ▸ Branch name sanitization... ✓ + ▸ Fix header shows configuration... ✓ + ▸ Non-git repo warning... ✓ + ▸ Per-repo event tracking... ✓ + +━━━ Results ━━━ + Passed: 22 + Failed: 0 + Total: 22 + +All 22 tests passed! + + + + shipwright fleet-discover test + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + +Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ source guard uses if/then/fi pattern + +Version + ✓ VERSION variable defined + +Help Output + ✓ help contains USAGE + ✓ help contains --org option + ✓ help contains --language option + ✓ help contains --dry-run option + ✓ help contains --json option + ✓ help contains --topic option + ✓ help contains --exclude-topic option + ✓ help contains --min-activity-days + +Argument Validation + ✓ missing --org exits non-zero + ✓ missing --org error message + +Error Handling + ✓ unknown option exits non-zero + ✓ unknown option error + +NO_GITHUB Check + ✓ NO_GITHUB blocks discovery + ✓ NO_GITHUB shows error + +Argument Parsing + ✓ supports --org=value syntax + ✓ supports --config=value syntax + ✓ supports --language=value syntax + ✓ supports --topic=value syntax + +Config Merge + ✓ merge added new repo + ✓ merge preserved existing repo + +Filter Logic + ✓ filters archived repos + ✓ filters disabled repos + ✓ checks has_issues + ✓ language filter applied + ✓ topic filter applied + ✓ exclude topic filter + ✓ checks .shipwright-ignore + +Event Emission + ✓ emits fleet.discover.completed event + ✓ emits fleet.discover.merged event + + + + + ────────────────────────────────────────── + + All 0 tests passed + + +╔═══════════════════════════════════════════════════════════════════╗ +║ shipwright fleet test — Unit Tests ║ +╚═══════════════════════════════════════════════════════════════════╝ + +Setting up test environment... +✓ Environment ready: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-fleet-test.TROF8n + + ▸ Help output contains expected sections... ✓ + ▸ Help via --help flag... ✓ + ▸ Config parsing — valid config... ✓ + ▸ Config parsing — missing config file... ✓ + ▸ Config parsing — invalid JSON... ✓ + ▸ Config parsing — empty repos array... ✓ + ▸ Config defaults applied... ✓ + ▸ Fleet init generates config template... ✓ + ▸ Fleet init skips when config exists... ✓ + ▸ Fleet start spawns tmux sessions per repo... ✓ + ▸ Fleet start skips missing repos... ✓ + ▸ Fleet start skips existing sessions... ✓ + ▸ Fleet start creates fleet state file... ✓ + ▸ Fleet start emits fleet.started event... ✓ + ▸ Fleet start applies repo-level overrides... ✓ + ▸ Fleet stop kills sessions and cleans state... ✓ + ▸ Fleet stop — no fleet running... ✓ + ▸ Fleet stop emits fleet.stopped event... ✓ + ▸ Fleet status — no fleet running... ✓ + ▸ Fleet status shows dashboard... ✓ + ▸ Fleet metrics — no events file... ✓ + ▸ Fleet metrics dashboard output... ✓ + ▸ Fleet metrics JSON output... ✓ + ▸ Fleet metrics period flag... ✓ + ▸ Session name generation... ✓ + ▸ Fleet start skips non-git repos... ✓ + ▸ Unknown subcommand... ✓ + +━━━ Results ━━━ + Passed: 27 + Failed: 0 + Total: 27 + +All 27 tests passed! + + + + shipwright fleet-viz test + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + +Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ source guard uses if/then/fi pattern + +Version + ✓ VERSION variable defined + +Help Output + ✓ help contains USAGE + ✓ help contains overview subcommand + ✓ help contains workers subcommand + ✓ help contains insights subcommand + ✓ help contains queue subcommand + ✓ help contains costs subcommand + ✓ help contains export subcommand + ✓ --help flag works + +Error Handling + ✓ unknown command exits non-zero + ✓ unknown command error message + +Overview Subcommand + ✓ overview shows Fleet Overview + ✓ overview shows Active count + ✓ overview shows Queued count + ✓ overview shows Repos count + +Workers Subcommand + ✓ workers shows Worker Allocation + ✓ workers shows Remote Machines + +Queue Subcommand + ✓ queue shows Issue Queue + ✓ queue shows queued items + +Costs Subcommand + ✓ costs shows Fleet Costs + ✓ costs shows Total Spend + ✓ costs shows Per-Repo + ✓ costs shows Per-Model + +Export Subcommand + ✓ export produces JSON with active_jobs + +Insights Subcommand + ✓ insights shows Fleet Insights + ✓ insights shows Success Rate + +Default Command + ✓ default command shows Fleet Overview + +Health Helpers + ✓ get_health_status function defined + ✓ color_health function defined + ✓ healthy status handled + ✓ degraded status handled + ✓ failing status handled + + + + + ────────────────────────────────────────── + + All 0 tests passed + + +╔═══════════════════════════════════════════════════╗ +║ shipwright frontier — Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up test environment... + +Adversarial Review + ▸ Adversarial review produces structured findings... ✓ + ▸ Adversarial iteration converges on no critical findings... ✓ + +Developer Simulation + ▸ Simulation generates objections from 3 personas... ✓ + ▸ Simulation address returns action items... ✓ + +Architecture Enforcer + ▸ Architecture model has valid schema... ✓ + ▸ Architecture validates changes (violation detected)... ✓ + +Graceful Degradation + ▸ All three degrade gracefully when claude unavailable... ✓ + +════════════════════════════════════════════════════ + All 7 tests passed ✓ +════════════════════════════════════════════════════ + + + + Shipwright GitHub App Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help + ✓ help shows usage + ✓ help shows commands + ✓ --help flag works + ✓ unknown command shows error + Status + ✓ status without config warns + Events + ✓ events with no log warns + Manifest + ✓ manifest contains app name + ✓ manifest contains webhook URL + ✓ manifest success message + ✓ manifest output is valid JSON + Configured Status + ✓ configured status shows app ID + ✓ configured status shows install ID + ✓ events shows recent events + Token + ✓ token without key file errors + Verify + ✓ verify without secret errors + + + + ────────────────────────────────────────── + + All 15 tests passed + + +╔═══════════════════════════════════════════════════╗ +║ shipwright github-checks — Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up test environment... + +Checks API Availability + ▸ _gh_checks_available: returns true when API accessible... {"check_runs":[],"total_count":0} +✓ + ▸ _gh_checks_available: returns false on 403... ✓ + +Check Run CRUD + ▸ gh_checks_create_run: returns run ID from response... ✓ + ▸ gh_checks_create_run: handles 403 gracefully... ⚠ Failed to create check run 'test-check' (API returned 1) +ERROR: scripts/sw-github-checks-test.sh:224 exited with status 1 +✓ + ▸ gh_checks_update_run: sends correct PATCH request... ✓ + ▸ gh_checks_update_run: skips when run_id empty... ✓ + ▸ gh_checks_annotate: respects 50-annotation limit... ✓ + ▸ gh_checks_list_runs: parses response correctly... ✓ + ▸ gh_checks_complete: convenience wrapper works... ✓ + +Pipeline Integration + ▸ gh_checks_pipeline_start: creates runs for all stages... ✓ + ▸ gh_checks_stage_update: looks up stored run IDs... ✓ + +NO_GITHUB Guard + ▸ NO_GITHUB: all functions return early... ✓ + +════════════════════════════════════════════════════ + All 12 tests passed ✓ +════════════════════════════════════════════════════ + + +╔═══════════════════════════════════════════════════╗ +║ shipwright github-deploy — Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up test environment... + +Deployment CRUD + ▸ gh_deploy_create: returns deployment ID... ✓ + ▸ gh_deploy_create: handles 403 gracefully... ⚠ Failed to create deployment for ref 'main' to 'production' (API returned 1) +ERROR: scripts/sw-github-deploy-test.sh:158 exited with status 1 +✓ + ▸ gh_deploy_update_status: sends correct POST... ✓ + ▸ gh_deploy_update_status: skips when deploy_id empty... ✓ + ▸ gh_deploy_list: parses deployment list... ✓ + ▸ gh_deploy_latest: returns first result... ✓ + +Rollback + ▸ gh_deploy_rollback: creates new deployment with prev ref... ▸ Rolling back to ref: v2.9 +✓ Rolled back to v2.9 (deployment 301) +✓ + +Pipeline Integration + ▸ gh_deploy_pipeline_start: stores deployment ID... ✓ + ▸ gh_deploy_pipeline_complete: updates status correctly... ✓ + +NO_GITHUB Guard + ▸ NO_GITHUB: all functions return early... ✓ + +════════════════════════════════════════════════════ + All 10 tests passed ✓ +════════════════════════════════════════════════════ + + +━━━ shipwright github-graphql tests ━━━ + + ▸ Fresh cache returns cached data... ✓ + ▸ Stale cache returns miss... ✓ + ▸ Atomic writes leave no temp files... ✓ + ▸ gh_file_change_frequency returns count... ✓ + ▸ gh_contributors returns parsed list... ✓ + ▸ gh_similar_issues truncates long text... ✓ + ▸ gh_branch_protection handles 404... ✓ + ▸ gh_security_alerts handles 403... ✓ + ▸ gh_dependabot_alerts handles 403... ✓ + ▸ gh_repo_context aggregates data... ✓ + ▸ NO_GITHUB returns defaults... ✓ + ▸ _gh_detect_repo parses SSH URL... ✓ + ▸ _gh_detect_repo parses HTTPS URL... ✓ + ▸ _gh_detect_repo parses HTTPS without .git... ✓ + ▸ gh_codeowners parses CODEOWNERS file... ✓ + ▸ Cache clear removes all files... ✓ + ▸ gh_commit_history returns parsed commits... ✓ + ▸ gh_blame_data aggregates authors... ✓ + ▸ gh_actions_runs calculates duration... ✓ + ▸ Events emitted for cache hit/miss... ✓ + +━━━ Results ━━━ + Passed: 20 + Failed: 0 + Total: 20 + +All 20 tests passed! + + + + Shipwright Guild Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ no args shows help + ✓ list shows Available Guilds + ✓ guild config.json created + ✓ guild config is valid JSON + ✓ guilds.json data file created + ✓ show security guild + ✓ show invalid guild exits nonzero + ✓ show without name exits nonzero + ✓ add pattern succeeds + ✓ pattern saved in data file + ✓ report shows guild data + ✓ report for specific guild + ✓ inject security shows knowledge + ✓ unknown command exits nonzero + + + + ────────────────────────────────────────── + + All 0 tests passed + + +╔═══════════════════════════════════════════════════╗ +║ shipwright heartbeat + checkpoint — Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up test environment... + +Heartbeat Lifecycle + ▸ Write heartbeat creates JSON file... ✓ + ▸ Check heartbeat reports alive... ✓ Job test-job-alive alive (0s ago) +✓ + ▸ Check heartbeat reports stale... ⚠ Job test-job-stale stale (197469510s ago, timeout: 120s) +✓ + ▸ Clear heartbeat removes file... ✓ Cleared heartbeat for job: test-job-clear +✓ + ▸ List heartbeats returns JSON array... ✓ + ▸ Heartbeat update overwrites existing... ✓ + ▸ Check missing heartbeat returns error... ✓ + ▸ Heartbeat dir auto-created when missing... ✓ + +Checkpoint Lifecycle + ▸ Checkpoint save creates JSON file... ✓ Checkpoint saved for stage build (iteration 5) +✓ + ▸ Checkpoint restore outputs JSON... ✓ Checkpoint saved for stage test (iteration 3) +✓ + ▸ Checkpoint restore missing stage fails... ✓ + ▸ Checkpoint clear removes file... ✓ Checkpoint saved for stage review (iteration 1) +✓ Cleared checkpoint for stage review +✓ + ▸ Checkpoint clear --all removes all... ✓ Checkpoint saved for stage build (iteration 1) +✓ Checkpoint saved for stage test (iteration 2) +✓ Cleared 2 checkpoint(s) +✓ + ▸ Checkpoint save with files-modified... ✓ Checkpoint saved for stage build (iteration 7) +✓ + +Integration + ▸ Pipeline script has heartbeat functions... ✓ + ▸ Loop script has heartbeat and checkpoint... ✓ + ▸ Pipeline has human intervention checks... ✓ + +════════════════════════════════════════════════════ + All 17 tests passed ✓ +════════════════════════════════════════════════════ + +sw-hello-test.sh + ✓ hello command outputs 'Shipwright vX.Y.Z' + ✓ hello command exits with code 0 + ✓ hello --help displays help text + ✓ hello -h displays help text + ✓ hello --version displays 'Shipwright vX.Y.Z' + ✓ hello version matches package.json (3.2.4) + ✓ hello with invalid option exits with code 1 + +PASS: 7 +FAIL: 0 + + + Shipwright Hygiene Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help command + ✓ help exits 0 + ✓ help shows usage + ✓ help shows subcommands + ✓ --help exits 0 + + error handling + ✓ unknown subcommand exits 1 + ✓ unknown subcommand shows error + + report subcommand + ✓ report exits 0 + ✓ report shows generating + ✓ report exits 0 + ✓ report creates JSON file + ✓ report JSON is valid + ✓ report JSON has timestamp + ✓ report JSON has sections + + structure subcommand + ✓ structure exits 0 + ✓ structure reports validating + + naming subcommand + ✓ naming exits 0 + ✓ naming shows checking + + dead-code subcommand + ✓ dead-code exits 0 + ✓ dead-code shows scanning + + dependencies subcommand + ✓ dependencies exits 0 + ✓ dependencies shows auditing + + platform-refactor subcommand + ✓ platform-refactor exits 0 + ✓ platform-refactor scans for hardcoded/fallback + ✓ platform-refactor creates platform-hygiene.json with counts + + policy read (policy_get from config) + ✓ policy_get returns value from config + ✓ policy_get returns default when key missing + + + + ────────────────────────────────────────── + + All 26 tests passed + + + + Shipwright Incident Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help / version + ✓ help exits 0 + ✓ help shows USAGE + ✓ help mentions watch + ✓ help mentions list + ✓ help mentions report + ✓ help mentions stats + ✓ VERSION variable defined + + error handling + ✓ Unknown command exits non-zero + + list command + ✓ list with no incidents exits 0 + + stats command + ✓ stats with no data exits 0 + + state management + ✓ Incident config created + ✓ Incident config is valid JSON + + script safety + ✓ Uses set -euo pipefail + ✓ Has source guard pattern + ✓ ERR trap is set + + config subcommand + ✓ config subcommand creates valid JSON config + + subcommand usage +ERROR: scripts/sw-incident-test.sh:203 exited with status 1 + ✓ show subcommand fails or shows usage when missing args + ✓ report subcommand shows usage when missing args + + detect_pipeline_failures + ✓ detect_pipeline_failures defined and callable + ✓ report with nonexistent incident handles gracefully + ✓ gap list subcommand produces expected output + + timeline updates + ✓ Timeline update creates valid JSON + ✓ Timeline entry has all required fields + + correlation engine + ✓ Correlation engine produces valid JSON + + escalation logic + ✓ Escalation includes P0 rules + ✓ Escalation function defined + + rollback verification + ✓ Rollback verification function defined + ✓ Rollback verification updates timeline + + auto-remediate subcommand + ✓ auto-remediate subcommand shows usage when missing args + + deep analysis + ✓ Deep analysis function defined + ✓ Deep analysis returns valid JSON with required fields + ✓ Config includes auto_remediate_enabled flag + ✓ Config includes escalation rules for all severities + + post-mortem report + ✓ Post-mortem report includes Timeline section + ✓ Post-mortem report references timeline fields + + + + ────────────────────────────────────────── + + All 0 tests passed + + +╔═══════════════════════════════════════════════════╗ +║ shipwright init — E2E Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up sandboxed environment... +Temp dir: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-init-test.A30bb1 + +Configuration + ▸ Settings.json created with agent teams... ✓ + ▸ Settings merge preserves existing vars... ✓ + ▸ tmux.conf installed... ✓ + ▸ Overlay installed... ✓ + +Templates + ▸ Team templates installed (>= 10)... ✓ + ▸ Pipeline templates installed (>= 5)... ✓ + ▸ Legacy templates path populated... ✓ + +Robustness + ▸ Idempotency — double init safe... ✓ + ▸ Doctor runs at end... ✓ + ▸ Help flag... ✓ + +Hook Wiring + ▸ JSONC stripped from settings.json... ✓ + ▸ Hooks wired into settings.json... ✓ + ▸ Hook wiring preserves existing hooks... ✓ + ▸ SessionStart hook installed... ✓ + ▸ Hook wiring with pre-existing settings... ✓ + +Repair & Cleanup + ▸ Legacy overlay cleanup... ✓ + ▸ Legacy overlay source-file reference stripped... ✓ + ▸ Repair mode forces clean reinstall... ✓ + ▸ Plugin direct-clone fallback (outside tmux)... ✓ + ▸ Post-install verification... ✓ + ▸ tmux adapter deployed... ✓ + +════════════════════════════════════════════════════ + All 21 tests passed ✓ +════════════════════════════════════════════════════ + + + + Shipwright Instrument Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help & Version + ✓ help shows usage + ✓ help shows commands + ✓ help shows start + ✓ help shows record + ✓ help shows version + + Start Command + ✓ start without --run-id exits non-zero + ✓ start without --run-id shows error + ✓ start with --run-id exits 0 + ✓ start confirms run ID + ✓ start creates run file + ✓ run file contains correct run_id + ✓ run file contains correct issue + + Record Command + ✓ record exits 0 + ✓ record confirms metric + ✓ run file has 1 metric + ✓ record without all args exits non-zero + ✓ record on missing run exits non-zero + ✓ record on missing run shows error + + Stage Start/End + ✓ stage-start exits 0 + ✓ stage-start confirms stage + ✓ stage-end exits 0 + ✓ stage result recorded + + Finish Command + ✓ finish exits 0 + ✓ finish confirms completion + ✓ finish removes active run file + ✓ finish writes to completed JSONL + + Trends & Export + ✓ trends with no data exits 0 + ✓ trends with no data warns + ✓ export with no data exits 0 + ✓ export with no data warns + + Error Handling + ✓ unknown command exits non-zero + ✓ unknown command shows error + ✓ events.jsonl created from instrument operations + + + + ────────────────────────────────────────── + + All 33 tests passed + + +╔═══════════════════════════════════════════════════════════════════╗ +║ shipwright intelligence test — Unit Tests ║ +╚═══════════════════════════════════════════════════════════════════╝ + +Setting up test environment... +✓ Environment ready: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-intelligence-test.FKhlqe + + ▸ analyze_issue returns valid schema... ✓ + ▸ Cache hit on second call with same input... WARN: Unknown event type 'intelligence.cache_hit' — update config/event-schema.json +✓ + ▸ Graceful degradation when claude CLI unavailable... ✓ + ▸ compose_pipeline produces valid pipeline JSON... ✓ + ▸ recommend_model returns valid model names... WARN: Unknown event type 'intelligence.model' — update config/event-schema.json +WARN: Unknown event type 'intelligence.model' — update config/event-schema.json +WARN: Unknown event type 'intelligence.model' — update config/event-schema.json +WARN: Unknown event type 'intelligence.model' — update config/event-schema.json +✓ + ▸ predict_cost returns numeric estimates... WARN: Unknown event type 'intelligence.prediction' — update config/event-schema.json +✓ + ▸ Cache TTL expiry returns miss... scripts/sw-intelligence-test.sh: line 334: _intelligence_md5: command not found +✓ + ▸ search_memory returns ranked results... ✓ + ▸ Feature flag disabled returns fallback... ✗ Expected 'intelligence_disabled', got 'null' (returns intelligence_disabled error) +✓ + ▸ Events emitted for analysis... ✓ + ▸ recommend_model emits events... WARN: Unknown event type 'intelligence.model' — update config/event-schema.json +✓ + ▸ Cache init creates file if missing... ✓ + +━━━ Results ━━━ + Passed: 12 + Failed: 0 + Total: 12 + +All 12 tests passed! + + + + Shipwright Jira Test Suite + ══════════════════════════════════════════ + + + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ VERSION variable defined + ✓ VERSION is semver: 3.2.4 + + Help Output + ✓ help mentions USAGE + ✓ help mentions sync command + ✓ help mentions update command + ✓ help mentions status command + ✓ help mentions init command + ✓ help mentions JIRA_BASE_URL + ✓ --help works + + Error Handling + +shipwright jira — Jira ↔ GitHub Bidirectional Sync + +USAGE + shipwright jira [options] + +COMMANDS + sync [--dry-run] Sync Jira To Do issues → GitHub + update Update linked Jira ticket status + status Show Jira board dashboard + init Configure Jira connection + help Show this help + +STATUS VALUES + started Pipeline spawned → Jira: In Progress + review PR created → Jira: In Review + done Pipeline complete → Jira: Done + failed Pipeline failed → Jira: adds failure comment + +EXAMPLES + shipwright jira init # Set up Jira connection + shipwright jira sync # Sync To Do → GitHub + shipwright jira sync --dry-run # Preview what would sync + shipwright jira update 42 started # Mark as In Progress + shipwright jira update 42 review # Mark as In Review + shipwright jira update 42 done # Mark as Done + shipwright jira status # Show board dashboard + +ENVIRONMENT + JIRA_BASE_URL Jira instance URL (or use 'jira init' to save) + JIRA_EMAIL Account email for authentication + JIRA_API_TOKEN API token from Atlassian account + JIRA_PROJECT_KEY Jira project key (e.g. PROJ) + ✓ unknown command exits non-zero + ✓ unknown command shows error + + Default Behavior + ✓ no-arg defaults to help + + Configuration + ✓ sync without config shows error + ✓ update without config shows error + ✓ status without config shows error + + Config Loading + ✓ sync with config proceeds + + Update Subcommand + ✓ update without args shows usage + + Notify Integration + ✓ notify subcommand executes without crash + + Atomic Writes + ✓ init uses atomic write (tmp + mv) + ✓ config file gets restricted permissions + + Event Emission + ✓ emits jira events + ✓ uses EVENTS_FILE for event logging + + + + + ────────────────────────────────────────── + + All 0 tests passed + + +╔═══════════════════════════════════════════════════╗ +║ shipwright launchd + systemd — Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up test environment... + +OS Detection + ▸ macOS detection sets OSTYPE correctly... ✓ + ▸ Linux detection routes to systemd... ✓ + +macOS Plist Generation + ▸ Daemon plist has correct structure... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Dashboard plist has correct arguments... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Connect plist created only when team-config exists... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.dashboard.plist +▸ Skipping connect plist — /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/.shipwright/team-config.json not found +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/fresh-home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Plist files have correct permissions... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + +Install Command + ▸ Install creates LaunchAgents directory... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Install calls launchctl load... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +scripts/sw-launchd-test.sh: line 457: [[: 0 +0: syntax error in expression (error token is "0") +✓ + ▸ Install fails gracefully if sw binary not found... ✓ + +Uninstall Command + ▸ Uninstall removes plist files... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +▸ Uninstalling launchd agents... +Mock unloaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Unloaded daemon service +✓ Removed daemon plist +Mock unloaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Unloaded dashboard service +✓ Removed dashboard plist +Mock unloaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Unloaded connect service +✓ Removed connect plist + +✓ Uninstalled all launchd agents +✓ + ▸ Uninstall calls launchctl unload... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +▸ Uninstalling launchd agents... +Mock unloaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Unloaded daemon service +✓ Removed daemon plist +Mock unloaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Unloaded dashboard service +✓ Removed dashboard plist +Mock unloaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Unloaded connect service +✓ Removed connect plist + +✓ Uninstalled all launchd agents +scripts/sw-launchd-test.sh: line 549: [[: 0 +0: syntax error in expression (error token is "0") +✓ + ▸ Uninstall on empty system doesn't error... ▸ Uninstalling launchd agents... + +✓ Uninstalled all launchd agents +✓ + +Status Command + ▸ Status command checks launchctl list... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Status shows log directory... ✓ + +Help Command + ▸ Help command shows usage and examples... ✓ + ▸ Help is shown for unknown commands... ✓ + +Environment & Configuration + ▸ Plist contains correct environment variables... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Working directory is set in plist... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Plist configures stdout and stderr logging... ▸ Installing launchd agents... +✓ Created daemon plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +⚠ server.ts not found at /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T/sw-launchd-test.G2ePKG/dashboard/server.ts — dashboard plist will reference a missing file +✓ Created dashboard plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Created connect plist: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +▸ Loading launchd services... +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.daemon.plist +✓ Loaded daemon service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.dashboard.plist +✓ Loaded dashboard service +Mock loaded: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/Library/LaunchAgents/com.shipwright.connect.plist +✓ Loaded connect service + +▸ Services will auto-start on next login +▸ View logs: tail -f /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-launchd-test.G2ePKG/home/.shipwright/logs/*.log +▸ Uninstall: shipwright launchd uninstall +✓ + ▸ Version variable is defined... ✓ + +════════════════════════════════════════════════════ + All 20 tests passed ✓ +════════════════════════════════════════════════════ + + + + Shipwright Linear Test Suite + ══════════════════════════════════════════ + + + Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + ✓ VERSION variable defined + ✓ VERSION is semver: 3.2.4 + + Help Output + ✓ help mentions USAGE + ✓ help mentions sync command + ✓ help mentions update command + ✓ help mentions status command + ✓ help mentions init command + ✓ help mentions LINEAR_API_KEY + ✓ --help works + + Error Handling + +shipwright linear — Linear ↔ GitHub Bidirectional Sync + +USAGE + shipwright linear [options] + +COMMANDS + sync [--dry-run] Sync Linear Todo issues → GitHub + update Update linked Linear ticket status + status Show sync dashboard + init Configure Linear API key + help Show this help + +STATUS VALUES + started Pipeline spawned → Linear: In Progress + review PR created → Linear: In Review + done Pipeline complete → Linear: Done + failed Pipeline failed → Linear: adds failure comment + +EXAMPLES + shipwright linear init # Set up API key + shipwright linear sync # Sync Todo → GitHub + shipwright linear sync --dry-run # Preview what would sync + shipwright linear update 42 started # Mark as In Progress + shipwright linear update 42 review # Mark as In Review + shipwright linear update 42 done # Mark as Done + shipwright linear status # Show dashboard + +ENVIRONMENT + LINEAR_API_KEY API key (or use 'linear init' to save) + LINEAR_TEAM_ID Override team ID + LINEAR_PROJECT_ID Override project ID + ✓ unknown command exits non-zero + ✓ unknown command shows error + + Default Behavior + ✓ no-arg defaults to help + + Configuration + ✓ sync without API key shows error + ✓ update without API key shows error + ✓ status without API key shows error + + Config Loading + ✓ sync with config proceeds + + Update Subcommand + ✓ update without args shows usage + + Notify Integration + ✓ notify subcommand executes without crash + + Atomic Writes + ✓ init uses atomic write (tmp + mv) + ✓ config file gets restricted permissions + + GraphQL Helper + ✓ linear_graphql helper defined + ✓ uses jq --arg for safe JSON escaping + + Event Emission + ✓ emits linear events + ✓ uses EVENTS_FILE for event logging + + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + shipwright logs test + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + +Script Safety + ✓ set -euo pipefail present + ✓ ERR trap present + +Version + ✓ VERSION variable defined + +Help Output + ✓ help contains USAGE + ✓ help contains --pane option + ✓ help contains --follow option + ✓ help contains --grep option + ✓ help contains --capture option + ✓ help contains -f shorthand + +Help Exit Code + ✓ help exits 0 + ✓ -h exits 0 + +Error Handling + ✓ unknown option exits non-zero + ✓ unknown option error + +List Logs + ✓ list shows Agent Logs heading + ✓ list shows team directory + +Team Logs + ✓ team logs shows team name + ✓ team logs lists log files + ✓ team logs shows builder log + +Grep Search + ✓ grep finds ERROR pattern + ✓ grep shows file context + ✓ grep shows no matches warning + +Pane Filter + ✓ pane filter shows reviewer logs + ✓ nonexistent pane warns + +Capture Command + ✓ capture reports status + +Missing Arguments + ✓ missing --pane value exits non-zero + ✓ missing --pane shows error + ✓ missing --grep value exits non-zero + ✓ missing --grep shows error + +Intelligence Integration + ✓ intelligence_available function defined + ✓ semantic_rank_results function defined + +Script Structure + ✓ LOGS_DIR defined + ✓ capture_logs function defined + ✓ list_logs function defined + ✓ show_team_logs function defined + + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright Loop Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + help / version + ✓ --help exits 0 + ✓ --help shows usage + ✓ --help shows options + ✓ --help mentions --max-iterations + ✓ --help mentions --test-cmd + ✓ --help mentions --model + ✓ --help mentions --agents + ✓ --help mentions --resume + ✓ VERSION variable defined in sw-loop.sh + + argument parsing + ✓ No arguments exits non-zero + + script safety + ✓ Uses set -euo pipefail + ✓ ERR trap is set + ✓ SIGHUP trap set for daemon resilience + ✓ CLAUDECODE env var is unset + + defaults + ✓ Default MAX_ITERATIONS is 20 + ✓ Default AGENTS is 1 + ✓ Default MAX_RESTARTS is 0 + ✓ Sources lib/compat.sh + + json output format + ✓ build_claude_flags includes --output-format json + effort level flag + ✓ build_claude_flags supports --effort + fallback model flag + ✓ build_claude_flags supports --fallback-model + ✓ accumulate_loop_tokens parses JSON usage + ✓ LOOP_COST_MILLICENTS initialized + ✓ write_loop_tokens includes cost_usd + ✓ _extract_text_from_json helper defined + ✓ validate_claude_output helper defined + ✓ check_budget_gate helper defined + ✓ run_claude_iteration separates stdout from stderr + + json extraction robustness + ✓ _extract_text_from_json handles empty file + ✓ _extract_text_from_json extracts .result from JSON + ✓ _extract_text_from_json passes through plain text + + default config from source + ✓ Default MAX_ITERATIONS is 20 (from source) + ✓ Default AGENTS is 1 (from source) + ✓ Default MAX_RESTARTS is 0 (from source) + + json extraction edge cases + ✓ _extract_text_from_json handles nested JSON objects + ✓ _extract_text_from_json handles binary garbage without crash + + script structure + ✓ Script has circuit breaker logic + ✓ Script has stuckness detection + ✓ Script has test/quality gate functions + ✓ Help text defines --model and --agents flags + ✓ Help text defines --test-cmd and --resume flags + help mentions --effort + ✓ Help text defines --effort flag + help mentions --fallback-model + ✓ Help text defines --fallback-model flag + + loop behavior: LOOP_COMPLETE + ✓ Loop detected completion signal + + loop behavior: iterations on test failure + ✓ Loop runs multiple iterations when tests fail initially + + loop behavior: max iterations + ✓ Loop stops at max iterations + + loop behavior: stuckness detection + ✓ Loop stops at limit (stuckness test) + + loop behavior: budget gate + ✓ Budget gate stops loop + + validate_claude_output + ✓ validate_claude_output catches corrupt output + + loop behavior: progress tracking + ✓ Loop tracks progress via git + + context efficiency metrics + ✓ loop.context_efficiency event exists in run_claude_iteration + ✓ Context efficiency emits raw and trimmed char counts + ✓ Context efficiency emits trim_ratio and budget_utilization + ✓ raw_prompt_chars measured from pre-trim prompt + + multi-test gate + ✓ ADDITIONAL_TEST_CMDS variable defined + ✓ --additional-test-cmds flag in arg parser + ✓ --help documents --additional-test-cmds + ✓ run_test_gate writes test-evidence JSON + ✓ run_audit_agent reads structured test evidence + + verification gap handler + ✓ Verification gap detection present + ✓ Verification gap resolved event emitted + ✓ Verification gap confirmed event emitted + ✓ Verification gap can override audit result + ✓ Verification re-runs tests to dedicated log + ✓ Mid-build test file discovery integrated + + + + ────────────────────────────────────────── + + All 65 tests passed + + +╔═══════════════════════════════════════════════════════════════════╗ +║ shipwright memory+cost test — Unit Tests for Memory & Cost ║ +╚═══════════════════════════════════════════════════════════════════╝ + +Setting up mock environment... +✓ Environment ready: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-memory-test.62S2eY + + ▸ Memory capture from pipeline state... ✓ + ▸ Memory inject returns context for each stage... ✓ + ▸ Failure capture stores patterns... ✓ + ▸ Pattern detection identifies project type... ✓ + ▸ Cross-repo vs per-repo isolation... ✓ + ▸ Memory show displays dashboard... ✓ + ▸ Memory search finds matching entries... ✓ + ▸ Memory export produces valid JSON... ✓ + ▸ Memory forget clears repo memory... ✓ + ▸ Cost calculation for each model... ✓ + ▸ Cost recording writes to costs.json... ✓ + ▸ Budget set and check... ✓ + ▸ Cost dashboard runs without errors... ✓ + ▸ Cost JSON output is valid... ✓ + ▸ Actionable failures threshold filtering... ✓ + ▸ Actionable failures with no file returns []... ✓ + ▸ DORA baseline calculation from events... ✓ + ▸ Error log entries captured into failures.json... ✓ + ▸ Fix outcome tracking increments counters... ✓ + ▸ Closed-loop inject returns formatted fix... ✓ + ▸ Global aggregation promotes frequent patterns... ✓ + ▸ Finalize pipeline runs capture + aggregate... ▸ Capturing pipeline learnings for test-org/test-repo... +✓ Captured pipeline learnings (status: complete) +✓ + +━━━ Results ━━━ + Passed: 22 + Failed: 0 + Total: 22 + +All 22 tests passed! + + + + Shipwright Mission Control Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help & Navigation + ✓ help shows usage + ✓ help shows commands + ✓ --help flag works + ✓ unknown command shows error + Overview + ✓ overview shows MISSION CONTROL header + ✓ overview shows Summary Statistics + ✓ overview shows Active Pipelines + ✓ overview with active job shows count + Agent Tree + ✓ agents shows hierarchy + ✓ agents shows Pipeline Agent + Resources + ✓ resources shows utilization + Alerts + ✓ alerts shows alert feed + Stage Commands + ✓ pause without id exits nonzero + ✓ pause emits success + ✓ resume without id exits nonzero + ✓ resume emits success + ✓ skip without stage shows usage + ✓ skip emits success + ✓ retry emits success + + + + ────────────────────────────────────────── + + All 19 tests passed + + + + Shipwright Model Router Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help & Version + ✓ help shows usage + ✓ help shows route + ✓ help shows escalate + ✓ help shows config + + Route Model + ✓ route intake at 50 = haiku + ✓ route build at 50 = opus + ✓ route test at 50 = sonnet + ✓ route build at 10 (low) = sonnet + ✓ route intake at 90 (high) = opus + ✓ route unknown stage at 50 = sonnet + + Escalate Model + ✓ escalate haiku -> sonnet + ✓ escalate sonnet -> opus + ✓ escalate opus -> opus (ceiling) + ✓ escalate unknown exits non-zero + + Config + ✓ config show displays JSON + ✓ config creates default file + ✓ config set confirms update + ✓ config set persists value + + Estimate + ✓ estimate shows stages + ✓ estimate shows total + + Report + ✓ report with no data warns + + Record Usage + ✓ record_usage creates usage file + ✓ record_usage writes entries + ✓ report with data shows summary + ✓ report shows total runs + ✓ report shows cost + ✓ report shows model counts + + Route All Stages & Complexity + ✓ route intake at 50 returns model + ✓ route plan at 50 returns model + ✓ route design at 50 returns model + ✓ route build at 50 returns model + ✓ route test at 50 returns model + ✓ route review at 50 returns model + ✓ route compound_quality at 50 returns model + ✓ route validate at 50 returns model + ✓ route monitor at 50 returns model + ✓ route plan at low complexity = sonnet + ✓ route plan at high complexity = opus + + Config Set/Show Cycle +✓ Updated cost_aware_mode = false + ✓ config show reflects settings + ✓ config set persists + ✓ estimate with low complexity shows stages + ✓ estimate shows Total + ✓ estimate with high complexity + + Error Handling + ✓ unknown subcommand exits non-zero + ✓ unknown subcommand shows error + + Reasoning Chains + ✓ chain config shows templates + ✓ chain config shows explore-synthesize-decide + ✓ chain config shows fast-verify + ✓ chain config shows deep-analysis + ✓ chain config creates templates file + ✓ chain define shows success + ✓ chain define persists custom chain + ✓ chain_score_confidence returns numeric score + ✓ chain_score_confidence scores conclusion text higher + ✓ chain execute returns valid JSON + ✓ chain execute result has steps + ✓ chain step-cost returns numeric cost for haiku + ✓ chain step-cost ordering correct (haiku < sonnet < opus) + ✓ chain report outputs summary + ✓ chain define with invalid JSON exits non-zero + ✓ chain define validates JSON + ✓ explore-decide has 2 steps + ✓ explore-decide first step is haiku + ✓ explore-decide last step is opus + ✓ chain execute with invalid chain exits non-zero + ✓ chain execute shows error + + + + ────────────────────────────────────────── + + All 66 tests passed + + + + Shipwright OTel Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + Help & Version + ✓ help shows usage + ✓ help shows metrics + ✓ help shows trace + ✓ help shows export + ✓ help shows webhook + ✓ help shows dashboard + + Metrics + ✓ metrics text has pipelines_total + ✓ metrics text has active_pipelines + ✓ metrics text has cost + ✓ metrics text has queue depth + ✓ metrics json has metrics key + ✓ metrics json has pipelines_total + ✓ metrics json is valid JSON + + Trace + ✓ trace has resourceSpans + ✓ trace has service.name + ✓ trace output is valid JSON + + Dashboard + ✓ dashboard has dashboard key + ✓ dashboard output is valid JSON + + Report + ✓ report shows header + ✓ report shows events section + ✓ report shows pipeline metrics + ✓ report shows recommendations + + Webhook + ✓ webhook without URL exits non-zero + ✓ webhook without URL shows error + + Metrics With Events + ✓ metrics count total pipelines = 2 + + Error Handling + ✓ unknown command exits non-zero + ✓ unknown command shows error + + + + ────────────────────────────────────────── + + All 27 tests passed + + + + Shipwright Oversight Tests + ══════════════════════════════════════════ + + ══════════════════════════════════════════ + + ✓ help shows usage text + ✓ help exits 0 + ✓ --help flag works + ✓ unknown command exits 1 + ✓ members shows board + ✓ members.json created + ✓ members.json is valid JSON + ✓ config show works + ✓ config.json created + ✓ stats shows statistics + ✓ history handles empty + ✓ review without args exits nonzero + ✓ review submission accepted + ✓ review JSON file created + ✓ review file is valid JSON + ✓ stats shows total reviews >= 1 + + + + ────────────────────────────────────────── + + All 0 tests passed + + + + Shipwright Patrol Meta Test Suite + ══════════════════════════════════════════ + + + Script Structure + ✓ contains sourced-file note + ✓ does NOT have set -euo pipefail as code (sourced script) + ✓ no main() function (sourced script) + + Function Definitions + ✓ patrol_meta_run() defined + ✓ patrol_meta_create_issue() defined + ✓ patrol_meta_untested_scripts() defined + ✓ patrol_meta_bash_compat() defined + ✓ patrol_meta_version_sync() defined + ✓ patrol_meta_dora_trends() defined + ✓ patrol_meta_template_effectiveness() defined + ✓ patrol_meta_memory_pruning() defined + ✓ patrol_meta_event_analysis() defined + + Sourcing + ✓ script can be sourced without error + + NO_GITHUB Dry Run + ✓ checks NO_GITHUB for dry-run mode + ✓ supports dry-run output + + Bash Compat Checks + ✓ bash compat check looks for declare -A + ✓ bash compat check looks for readarray/mapfile + + Dedup Logic + ✓ dedup logic skips duplicate issues + ✓ dedup searches existing issues + + Memory Pruning Check + ✓ memory pruning uses du -sk for size check + ✓ memory pruning has MB threshold + + Event Analysis + ✓ event analysis uses 7-day window + ✓ event analysis checks pipeline.completed events + + Closed-Loop Self-Improvement Functions + ✓ patrol_meta_score_impact() defined + ✓ patrol_meta_execute_fix() defined + ✓ patrol_meta_verify_fix() defined + ✓ patrol_meta_rollback() defined + ✓ patrol_meta_learn() defined + ✓ patrol_meta_batch_improve() defined + ✓ patrol_meta_auto() defined + + Impact Scoring + ✓ untested-scripts category gets high score (75) + ✓ dora-regression category gets critical score (85) + + Pipeline Integration + ✓ execute_fix calls shipwright pipeline start + ✓ uses --worktree for isolated execution + + Learning & Memory + ✓ records self-improvements in memory + ✓ emits patrol.meta_learned events + + Batch Processing + ✓ batch_improve looks for meta-improvement labeled issues + ✓ sorts issues by impact score (highest first) + + Dry-Run Mode + ✓ new functions respect NO_GITHUB for dry-run + + Autonomous Loop + ✓ patrol_meta_auto() orchestrates full loop + ✓ auto loop has stage 1 (detection) and stage 2 (batch) + + + + + ────────────────────────────────────────── + + All 0 tests passed + + +╔═══════════════════════════════════════════════════╗ +║ shipwright pipeline composer — Test Suite ║ +╚═══════════════════════════════════════════════════╝ + +Setting up test environment... + +Pipeline Composition + ▸ Composed pipeline has valid stage ordering... ✓ + ▸ High-risk issue gets security stages... .claude/pipeline-artifacts/composed-pipeline.json +✓ + ▸ Fallback to static template when no intelligence... ✓ + +Conditional Stage Insertion + ▸ Stage inserted at correct position after build... ✓ + ▸ Insert into nonexistent stage fails... ✓ + +Model Downgrade + ▸ Budget constraint triggers model downgrades... ✓ + ▸ Downgrade with nonexistent stage fails... ✓ + +Pipeline Validation + ▸ Validation accepts valid pipeline... ✓ + ▸ Validation rejects invalid ordering (test before build)... ✓ + ▸ Validation rejects missing stage ids... ✓ + ▸ Validation rejects missing stages array... ✓ + +Iteration Estimation + ▸ Iteration estimates are reasonable (1-50 range)... ✓ + +════════════════════════════════════════════════════ + All 12 tests passed ✓ +════════════════════════════════════════════════════ + + +╔═══════════════════════════════════════════════════════════════════╗ +║ shipwright pipeline test — E2E Validation (Real Subprocess) ║ +╚═══════════════════════════════════════════════════════════════════╝ + +Setting up mock environment... +✓ Environment ready: /var/folders/57/0gs_mdl104q8vk054nz3wp3h0000gn/T//sw-pipeline-test.gCxJon + + ▸ Preflight passes with all mocks... ✓ + ▸ Preflight fails when sw-loop.sh missing... ✓ + ▸ Start requires --goal or --issue... ✓ + ▸ Intake with --goal creates branch + artifacts... ✓ + ▸ Intake with --issue fetches from GitHub... ✓ + ▸ Plan generates plan.md, dod.md, tasks... ✗ File not found: .claude/pipeline-artifacts/plan.md (plan generated) +✗ FAILED + ▸ Build invokes sw loop and commits... ✓ + ▸ Test stage captures results to log... ✓ + ▸ Review generates report with severities... ✗ File not found: .claude/pipeline-artifacts/review.md (review generated) +✗ FAILED + ▸ PR stage creates PR URL artifact... ✓ + ▸ Full E2E pipeline (6 stages)... ✗ File not found: .claude/pipeline-artifacts/plan.md (plan artifact) +✗ FAILED + ▸ Resume continues from partial state... ✗ File not found: .claude/pipeline-artifacts/plan.md (plan generated after resume) +✗ FAILED + ▸ Abort marks pipeline as aborted... ✓ + ▸ Dry run shows config, no artifacts... ✗ Output missing pattern: Pipeline.*standard (shows pipeline name) + Output (last 5 lines): + Estimated Cost: $0.5040 USD + + ✓ Dry run validation passed + + To execute this pipeline: remove --dry-run flag +✓ + ▸ Self-healing build→test retry loop... ✓ + ▸ Intelligence: Skip stages for documentation issues... ✓ + ▸ Intelligence: Skip stages for low complexity... ✗ Output missing pattern: intelligence.*complexity.*[0-3]|stage.*skipped (should show intelligence skip due to complexity) + Output (last 5 lines): + ▸ RL episode recorded (success=true, iterations=1) + {"timestamp":"2026-04-04T12:55:26Z","epoch":1775307326,"pipeline_id":"fast","reward":0.4950,"components":{"test_outcome":0.5,"iteration_efficiency":0.5,"cost_efficiency":0.8,"quality_score":0.5,"convergence_speed":0.0000,"memory_hit_rate":0.5},"context":{"language":"unknown","complexity":"2"}} + ✓ Learned policy from 17 episodes across 3 context buckets + jq: parse error: Invalid numeric literal at line 1, column 2 + jq: parse error: Invalid numeric literal at line 1, column 2 +✗ FAILED + ▸ Intelligence: Finding classification and routing... ✓ + ▸ Intelligence: Mid-pipeline complexity reassessment... ✓ + ▸ Intelligence: Backtracking limit (1 per pipeline)... ✓ + ▸ Cleanup: Post-completion clears checkpoints and transient artifacts... ✗ Expected exit code 0, got 1 (pipeline should complete) +✗ FAILED + ▸ Cleanup: pipeline_cancel_check_runs function exists... ✓ + ▸ Vitals: sw-pipeline-vitals.sh exists and is syntactically valid... ✓ + ▸ Vitals: All vitals functions defined in module... ✓ + ▸ Vitals: Health verdict maps scores correctly... ✓ + ▸ Vitals: Adaptive limit returns valid integer... ✓ + ▸ Vitals: Budget trajectory returns ok/warn/stop... ✓ + ▸ Quality: pipeline_select_audits function exists... ✓ + ▸ Quality: pipeline_security_source_scan function exists... ✓ + ▸ Quality: pipeline_verify_dod function exists... ✓ + ▸ Quality: pipeline_record_quality_score function exists... ✓ + ▸ Quality: Templates have compound_quality_blocking... ✓ + ▸ Vitals: Progress snapshot writes correct file... ✓ + ▸ Vitals: Momentum score from snapshot history... ✓ + ▸ Vitals: Convergence with decreasing errors... ✓ + ▸ Vitals: Configurable weights via env vars... ✓ + ▸ Vitals: Budget trajectory warn/stop on exhaustion... ✓ + ▸ Quality: Structured findings JSON is valid... ✓ + ▸ Quality: Multi-backtrack counter tracking... ✓ + ▸ Quality: 6 categories in classify_quality_findings... ✓ + ▸ Deploy: Pre-deploy gates exist in pipeline... ✓ + ▸ Deploy: Deploy strategy config pattern... ✓ + ▸ Deploy: Canary deploy flow patterns exist... ✓ + ▸ Pipeline: PIPELINE_STATE references removed... ✓ + ▸ Pipeline: Coverage JSON creation in test stage... ✓ + ▸ Pipeline: _pipeline_compact_goal returns goal+plan+design... ✓ + ▸ Pipeline: load_composed_pipeline sets COMPOSED_STAGES... ✓ + ▸ Vitals: Momentum returns 60 for single snapshot past intake... ✓ + ▸ Vitals: Health gate blocks when health < threshold... ✓ + ▸ Vitals: Health gate passes with default threshold=40... ✓ + ▸ Durable: persist_artifacts function exists... ✓ + ▸ Durable: persist_artifacts skips in non-CI mode... ✓ + ▸ Durable: verify_stage_artifacts passes when artifacts present... ✓ + ▸ Durable: verify_stage_artifacts fails when artifacts missing... ✓ + ▸ Durable: verify_stage_artifacts fails when artifacts empty... ✓ + ▸ Durable: verify_stage_artifacts passes for stages with no requirements... ✓ + ▸ Durable: verify_stage_artifacts design requires plan.md... ✓ + ▸ Durable: mark_stage_complete wires persist for plan stage... ✓ + +━━━ Results ━━━ + Passed: 52 + Failed: 6 + Total: 58 + +Failed tests: + ✗ Plan generates plan.md, dod.md, tasks + ✗ Review generates report with severities + ✗ Full E2E pipeline (6 stages) + ✗ Resume continues from partial state + ✗ Intelligence: Skip stages for low complexity + ✗ Cleanup: Post-completion clears checkpoints and transient artifacts + diff --git a/.claude/loop-state.md b/.claude/loop-state.md index 0362dc9e6..e1c4fe7b5 100644 --- a/.claude/loop-state.md +++ b/.claude/loop-state.md @@ -1,104 +1,79 @@ --- -goal: "Add a shipwright ping command that prints pong to stdout and exits 0 +goal: "Add version display to sw-hello command: read version from package.json, display Shipwright vX.Y.Z, add test -## Plan Summary -Plan complete and saved to `docs/plans/2026-03-02-ping-command.md`. +## Specification: Add version display to sw-hello command: read version from package.json, display Shipwright vX.Y.Z, add test ---- - -## Summary - -The plan adds the `shipwright ping` command in **4 files, 9 tasks**: - -| # | Task | File(s) | -|---|------|---------| -| 1-2 | Create + chmod `sw-ping.sh` | `scripts/sw-ping.sh` (new) | -| 3-4 | Create + chmod `sw-ping-test.sh` | `scripts/sw-ping-test.sh` (new) | -| 5 | Run test in isolation — verify 6 PASS | — | -| 6 | Register `ping)` case in router | `scripts/sw` | -| 7 | Add test to `npm test` chain | `package.json` | -| 8 | Smoke-test via router | — | -| 9 | Commit | — | +### Goals +- Add version display to sw-hello command: read version from package.json, display Shipwright vX.Y.Z, add test -**Key decisions:** -- **Standalone script** (not inline in router) — only approach consistent with all 100+ existing commands, independently testable -[... full plan in .claude/pipeline-artifacts/plan.md] - -## Key Design Decisions -# Design: Add a shipwright ping command that prints pong to stdout and exits 0 -## Context -## Component Diagram -## Decision -## Interface Contracts -# sw-ping.sh — Public interface -# Invocation (no args): happy path -# stdout: "pong\n" -# stderr: (empty) -# exit: 0 -[... full design in .claude/pipeline-artifacts/design.md] +### Acceptance Criteria +- [testable] All existing tests continue to pass Historical context (lessons from previous pipelines): { "results": [ { - "file": "architecture.json", - "relevance": 95, - "summary": "Describes Command Router pattern, bash 3.2 conventions (set -euo pipefail, VERSION at top), snake_case function naming, and test harness structure — exactly what's needed to implement the ping command correctly" - }, - { - "file": "failures.json (comprehensive with 8 entries)", + "file": "patterns.json", "relevance": 85, - "summary": "Shows critical historical failures including 'output missing: intake' (23 occurrences, highest weight 7.8e+47), shell-init errors, and test infrastructure issues — directly relevant to avoiding similar failures in build stage" + "summary": "Defines project conventions: vitest test runner, npm package manager, test_pattern *.test.js, source_dir src/, commonjs imports — directly relevant for writing new tests for the version display feature" }, { - "file": "metrics.json (build_duration_s: 2826)", + "file": "metrics.json", "relevance": 55, - "summary": "Previous build took 47 minutes — provides performance baseline and expectation setting for current build duration" + "summary": "Recent baseline metrics (2026-03-09) showing build_duration_s: 17827, test_duration_s: 1575 — provides performance context for the build stage and typical test execution time" }, { - "file": "failures.json (shell-init: error retrieving current directory)", - "relevance": 50, - "summary": "Test stage failure in getcwd — indicates potential sandbox/environment issues that could affect ping command testing" + "file": "patterns.json", + "relevance": 40, + "summary": "Simpler project type confirmation (nodejs, detected 2026-02-21) — validates project classification but less actionable than the conventions entry above" }, { - "file": "patterns.json (import_style: commonjs)", + "file": "metrics.json", "relevance": 30, - "summary": "Indicates JavaScript/Node.js project context; mostly empty but shows partial project type detection from previous runs" + "summary": "Older baseline (2026-02-21) with build_duration_s: 147, test_duration_s: 1 — historical reference point, less relevant than recent metrics" + }, + { + "file": "failures.json", + "relevance": 10, + "summary": "Pipeline and mock binary test failures from Shipwright itself — not relevant to sw-hello version display feature implementation" } ] } Discoveries from other pipelines: -✓ Injected 1 new discoveries -[design] Design completed for Add a shipwright ping command that prints pong to stdout and exits 0 — Resolution: - -## Failure Diagnosis (Iteration 2) -Classification: unknown -Strategy: retry_with_context -Repeat count: 0 - -## Failure Diagnosis (Iteration 3) -Classification: unknown -Strategy: retry_with_context -Repeat count: 1 - -## Failure Diagnosis (Iteration 4) -Classification: unknown -Strategy: retry_with_context -Repeat count: 0" -iteration: 4 -max_iterations: 20 -status: error +✓ Injected 126 new discoveries +[intake] Stage intake completed — Resolution: +[intake] Stage intake completed — Resolution: +[intake] Stage intake completed — Resolution: +[compound_quality] Stage compound_quality completed — Resolution: +[intake] Stage intake completed — Resolution: +[pr] Stage pr completed — Resolution: +[pipeline_success] Pipeline success for issue #0 (fast template, stage=validate) — Resolution: success +[intake] Stage intake completed — Resolution: +[pr] Stage pr completed — Resolution: +[intake] Stage intake completed — Resolution: +[compound_quality] Stage compound_quality completed — Resolution: +[pr] Stage pr completed — Resolution: +[intake] Stage intake completed — Resolution: +[compound_quality] Stage compound_quality completed — Resolution: +[pr] Stage pr completed — Resolution: +[intake] Stage intake completed — Resolution: +[design] Design completed for Build a production-grade todo application. TypeScript + React frontend with Vite, Express REST API backend, SQLite persistence with Drizzle ORM, JWT authentication (register/login), full CRUD for todos with filtering (all/active/completed), drag-and-drop reorder, due dates, priorities (low/medium/high), dark mode, responsive design. Include comprehensive test suite (unit + integration + e2e). Production-ready: error handling, input validation, rate limiting, CORS, environment config. — Resolution: +[intake] Stage intake completed — Resolution: +[intake] Stage intake completed — Resolution: " +iteration: 1 +max_iterations: 10 +status: running test_cmd: "npm test" model: sonnet agents: 1 -started_at: 2026-03-02T08:27:01Z -last_iteration_at: 2026-03-02T08:27:01Z -consecutive_failures: 1 -total_commits: 3 -audit_enabled: true -audit_agent_enabled: true -quality_gates_enabled: true +started_at: 2026-04-04T12:34:56Z +last_iteration_at: 2026-04-04T12:34:56Z +consecutive_failures: 0 +total_commits: 1 +audit_enabled: false +audit_agent_enabled: false +quality_gates_enabled: false dod_file: "" auto_extend: true extension_count: 0 @@ -106,14 +81,6 @@ max_extensions: 3 --- ## Log -### Iteration 1 (2026-03-02T08:06:08Z) -This is also a task notification for a background command that was already retrieved and reviewed via `TaskOutput` in th -No new information — the ping command implementation is complete and `LOOP_COMPLETE` was already declared. - -### Iteration 2 (2026-03-02T08:25:28Z) -The background task already completed and was retrieved in my previous turn — `npm test` exited with code 0. The ping co -LOOP_COMPLETE - -### Iteration 3 (2026-03-02T08:26:58Z) -(no output) +### Iteration 1 (2026-04-04T12:34:56Z) +{"type":"result","subtype":"success","is_error":false,"duration_ms":90646,"duration_api_ms":90727,"num_turns":13,"result diff --git a/.claude/platform-hygiene.json b/.claude/platform-hygiene.json index 9338cc285..43a34aaca 100644 --- a/.claude/platform-hygiene.json +++ b/.claude/platform-hygiene.json @@ -1,12 +1,12 @@ { - "timestamp": "2026-03-10T15:34:26Z", - "repository": "shipwright", + "timestamp": "2026-04-04T12:42:58Z", + "repository": "hello-v3", "counts": { - "hardcoded": 46, - "fallback": 68, - "todo": 45, - "fixme": 23, - "hack": 18 + "hardcoded": 48, + "fallback": 67, + "todo": 51, + "fixme": 25, + "hack": 20 }, "findings_sample": [ { @@ -298,9 +298,21 @@ "line": 454 }, { - "file": "scripts/lib/adaptive-timeout.sh", + "file": "scripts/lib/process-reward.sh", "line": 459 }, + { + "file": "scripts/lib/process-reward.sh", + "line": 115 + }, + { + "file": "scripts/lib/process-reward.sh", + "line": 117 + }, + { + "file": "scripts/lib/adaptive-timeout.sh", + "line": 261 + }, { "file": "scripts/lib/pipeline-detection.sh", "line": 214 @@ -325,13 +337,9 @@ "file": "scripts/lib/pipeline-intelligence-compound.sh", "line": 241 }, - { - "file": "scripts/lib/loop-convergence.sh", - "line": 242 - }, { "file": "scripts/lib/test-optimizer.sh", - "line": 85 + "line": 242 }, { "file": "scripts/lib/pipeline-execution.sh", @@ -395,7 +403,7 @@ }, { "file": "scripts/lib/pipeline-stages-build.sh", - "line": 1008 + "line": 1041 }, { "file": "scripts/lib/convergence.sh", @@ -403,7 +411,7 @@ }, { "file": "scripts/lib/pipeline-quality-gates.sh", - "line": 342 + "line": 343 }, { "file": "scripts/lib/pipeline-quality-gates.sh", @@ -451,7 +459,7 @@ }, { "file": "scripts/lib/compat.sh", - "line": 467 + "line": 624 }, { "file": "scripts/lib/pipeline-quality-checks.sh", @@ -471,7 +479,7 @@ }, { "file": "scripts/lib/pipeline-util.sh", - "line": 439 + "line": 482 }, { "file": "scripts/sw-fix.sh", @@ -547,31 +555,31 @@ }, { "file": "scripts/sw-loop.sh", - "line": 548 + "line": 561 }, { "file": "scripts/sw-loop.sh", - "line": 1106 + "line": 1119 }, { "file": "scripts/sw-loop.sh", - "line": 1182 + "line": 1195 }, { "file": "scripts/sw-loop.sh", - "line": 1246 + "line": 1259 }, { "file": "scripts/sw-loop.sh", - "line": 1250 + "line": 1263 }, { "file": "scripts/sw-loop.sh", - "line": 1253 + "line": 1266 }, { "file": "scripts/sw-linear.sh", - "line": 1509 + "line": 1522 }, { "file": "scripts/sw-linear.sh", @@ -614,18 +622,38 @@ "line": 161 }, { - "file": "scripts/sw-model-router.sh", + "file": "scripts/sw-constitutional-test.sh", "line": 160 + }, + { + "file": "scripts/sw-constitutional-test.sh", + "line": 67 + }, + { + "file": "scripts/sw-constitutional-test.sh", + "line": 71 + }, + { + "file": "scripts/sw-constitutional-test.sh", + "line": 154 + }, + { + "file": "scripts/sw-constitutional-test.sh", + "line": 162 + }, + { + "file": "scripts/sw-model-router.sh", + "line": 206 } ], "script_size_hotspots": [ { "script": "sw-loop.sh", - "lines": 2530 + "lines": 2561 }, { "script": "sw-memory.sh", - "lines": 2118 + "lines": 2240 }, { "script": "sw-daemon-test.sh", diff --git a/.claude/recovery-state.json b/.claude/recovery-state.json new file mode 100644 index 000000000..56986a1c6 --- /dev/null +++ b/.claude/recovery-state.json @@ -0,0 +1 @@ +{"attempts":0,"history":[],"current_model":"","escalation_level":0} diff --git a/.claude/test-holdout/manifest.json b/.claude/test-holdout/manifest.json new file mode 100644 index 000000000..f6a8a5bfb --- /dev/null +++ b/.claude/test-holdout/manifest.json @@ -0,0 +1,8 @@ +{ + "created": "2026-04-04T12:31:17Z", + "ratio": 30, + "total_tests": 22, + "visible_count": 20, + "sealed_count": 2, + "tests": [{"original":"tests/edge.test.js","sealed":".claude/test-holdout/.sealed/tests/edge.test.js","hash":"3815f72e3ca0f2f4733a4a5a96c49a4d"},{"original":"tests/util.test.js","sealed":".claude/test-holdout/.sealed/tests/util.test.js","hash":"fe8b5b0649713da37789013a82ee7007"}] +} diff --git a/.gitignore b/.gitignore index c7f4f2ede..e54c8e652 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ node_modules/ website/node_modules/ website/dist/ website/.astro/ + +# Shipwright test holdout (sealed tests hidden from agents) +.claude/test-holdout/.sealed/ diff --git a/scripts/sw-cost-test.sh b/scripts/sw-cost-test.sh index 949af0c83..69d74a477 100755 --- a/scripts/sw-cost-test.sh +++ b/scripts/sw-cost-test.sh @@ -197,14 +197,17 @@ fi # Functional test: write mock events and verify dashboard parses them mkdir -p "$TEST_TEMP_DIR/home/.shipwright" -cat > "$TEST_TEMP_DIR/home/.shipwright/events.jsonl" <<'EVTEOF' -{"ts":"2026-02-27T10:00:00Z","type":"loop.context_efficiency","iteration":"1","raw_prompt_chars":"200000","trimmed_prompt_chars":"180000","trim_ratio":"10.0","budget_utilization":"100.0","budget_chars":"180000","job_id":"test-1"} -{"ts":"2026-02-27T10:01:00Z","type":"loop.context_efficiency","iteration":"2","raw_prompt_chars":"150000","trimmed_prompt_chars":"150000","trim_ratio":"0.0","budget_utilization":"83.3","budget_chars":"180000","job_id":"test-1"} +# Use current timestamp so entries are within the 30-day window +_now_ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ") +_now_epoch=$(date +%s) +cat > "$TEST_TEMP_DIR/home/.shipwright/events.jsonl" < "$TEST_TEMP_DIR/home/.shipwright/costs.json" <<'COSTEOF' -{"entries":[{"ts":"2026-02-27T10:00:00Z","ts_epoch":1772125200,"input_tokens":50000,"output_tokens":10000,"cost_usd":1.50,"model":"opus","stage":"build","issue":"1"}],"summary":{}} +# Also need cost data for the dashboard to run (use current epoch so it's within 30-day window) +cat > "$TEST_TEMP_DIR/home/.shipwright/costs.json" < "$TEST_TEMP_DIR/home/.shipwright/budget.json" <<'BUDEOF' {"daily_budget_usd":0,"enabled":false} diff --git a/scripts/sw-formal-spec-test.sh b/scripts/sw-formal-spec-test.sh old mode 100644 new mode 100755 diff --git a/scripts/sw-hello-test.sh b/scripts/sw-hello-test.sh index 666299431..78a9fa8f7 100755 --- a/scripts/sw-hello-test.sh +++ b/scripts/sw-hello-test.sh @@ -35,11 +35,18 @@ assert_exit_code() { fi } -# ─── Test: hello command outputs "hello world" ────────────────────────────── +# ─── Test: hello command outputs "Shipwright vX.Y.Z" ─────────────────────── test_hello_output() { local output output=$("$SCRIPT_DIR/sw-hello.sh") - assert_equals "hello world" "$output" "hello command outputs 'hello world'" + if [[ "$output" =~ ^Shipwright\ v[0-9]+\.[0-9]+\.[0-9]+ ]]; then + ((PASS++)) + echo -e " \033[38;2;74;222;128m\033[1m✓\033[0m hello command outputs 'Shipwright vX.Y.Z'" + else + ((FAIL++)) + echo -e " \033[38;2;248;113;113m\033[1m✗\033[0m hello command outputs 'Shipwright vX.Y.Z'" + echo " Actual: $output" + fi } # ─── Test: hello command exits with 0 ─────────────────────────────────────── @@ -74,16 +81,36 @@ test_hello_short_help() { fi } -# ─── Test: hello --version shows version ──────────────────────────────────── +# ─── Test: hello --version shows "Shipwright vX.Y.Z" ─────────────────────── test_hello_version() { local output output=$("$SCRIPT_DIR/sw-hello.sh" --version) - if [[ "$output" =~ ^[0-9]+\.[0-9]+\.[0-9]+ ]]; then + if [[ "$output" =~ ^Shipwright\ v[0-9]+\.[0-9]+\.[0-9]+ ]]; then + ((PASS++)) + echo -e " \033[38;2;74;222;128m\033[1m✓\033[0m hello --version displays 'Shipwright vX.Y.Z'" + else + ((FAIL++)) + echo -e " \033[38;2;248;113;113m\033[1m✗\033[0m hello --version displays 'Shipwright vX.Y.Z'" + echo " Actual: $output" + fi +} + +# ─── Test: version is read from package.json ──────────────────────────────── +test_hello_version_from_package_json() { + local script_dir output pkg_version + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + pkg_version=$(jq -r '.version' "$script_dir/../package.json" 2>/dev/null \ + || grep '"version"' "$script_dir/../package.json" \ + | grep -o '[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*' | head -1) + output=$("$SCRIPT_DIR/sw-hello.sh") + if [[ "$output" == "Shipwright v${pkg_version}" ]]; then ((PASS++)) - echo -e " \033[38;2;74;222;128m\033[1m✓\033[0m hello --version displays version" + echo -e " \033[38;2;74;222;128m\033[1m✓\033[0m hello version matches package.json ($pkg_version)" else ((FAIL++)) - echo -e " \033[38;2;248;113;113m\033[1m✗\033[0m hello --version displays version" + echo -e " \033[38;2;248;113;113m\033[1m✗\033[0m hello version matches package.json" + echo " Expected: Shipwright v${pkg_version}" + echo " Actual: $output" fi } @@ -100,6 +127,7 @@ test_hello_exit_code test_hello_help test_hello_short_help test_hello_version +test_hello_version_from_package_json test_hello_invalid_option echo "" diff --git a/scripts/sw-hello.sh b/scripts/sw-hello.sh index 4c6180e90..4a0ccbc14 100755 --- a/scripts/sw-hello.sh +++ b/scripts/sw-hello.sh @@ -11,6 +11,13 @@ trap 'echo "ERROR: $BASH_SOURCE:$LINENO exited with status $?" >&2' ERR SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# Read version from package.json (fall back to script VERSION if unavailable) +_pkg_version=$(jq -r '.version' "$SCRIPT_DIR/../package.json" 2>/dev/null \ + || grep '"version"' "$SCRIPT_DIR/../package.json" 2>/dev/null \ + | grep -o '[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*' | head -1 \ + || true) +[[ -n "${_pkg_version:-}" ]] && VERSION="$_pkg_version" + # Canonical helpers (colors, output, events) # shellcheck source=lib/helpers.sh [[ -f "$SCRIPT_DIR/lib/helpers.sh" ]] && source "$SCRIPT_DIR/lib/helpers.sh" @@ -48,12 +55,12 @@ main() { exit 0 ;; --version|-v) - echo "$VERSION" + echo "Shipwright v${VERSION}" exit 0 ;; "") - # No arguments: output hello world - echo "hello world" + # No arguments: display Shipwright version + echo "Shipwright v${VERSION}" exit 0 ;; *) diff --git a/scripts/sw-mutation-executor-test.sh b/scripts/sw-mutation-executor-test.sh old mode 100644 new mode 100755 diff --git a/scripts/sw-process-reward-test.sh b/scripts/sw-process-reward-test.sh old mode 100644 new mode 100755 diff --git a/scripts/sw-reward-aggregator-test.sh b/scripts/sw-reward-aggregator-test.sh old mode 100644 new mode 100755