diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 45bda5e..68a7647 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -8,7 +8,6 @@ on: env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} - FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true jobs: verify: @@ -29,22 +28,8 @@ jobs: with: bun-version: ${{ matrix.bun-version }} - - name: Cache Bun dependencies - uses: actions/cache@v4 - with: - path: ~/.bun/install/cache - key: ${{ runner.os }}-bun-${{ hashFiles('**/bun.lockb', '**/package.json') }} - restore-keys: | - ${{ runner.os }}-bun- - - name: Install dependencies - run: bun install --frozen-lockfile - - - name: Compile runtime context - run: bun run compile:context - - - name: Regenerate skills index - run: bun run skills:index + run: bun install - name: Run tests run: bun test @@ -101,18 +86,12 @@ jobs: with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - - name: Set up Docker Buildx - if: steps.check.outputs.new == 'true' - uses: docker/setup-buildx-action@v3 - - name: Build and push Docker image if: steps.check.outputs.new == 'true' - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v5 with: context: . push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max diff --git a/.gitignore b/.gitignore index 13d3f48..d4a1c53 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,6 @@ node_modules/ dist/ *.js.map -# Research archive - lives at ../knowledgebase/ outside this repo. +# Research archive β€” lives at ../knowledgebase/ outside this repo. # Ignored here as a safety net in case it gets re-introduced by mistake. knowledgebase/ diff --git a/AGENTS.md b/AGENTS.md index 8f25497..ad6484b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,55 +1 @@ -# final steering doc - -## 1. core law - -* facts first. -* no bluff. -* no fake results. -* no puppy talk. -* less talk. more work. -* evidence beats guess. -* fast isolated test beats hypothesis. -* suggestion is not truth. treat research hits as maybe true until verified. -* if unsure, say so. -* if change gives no gain, stop, alert user, suggest better path. - -## 2. role - -* role: autonomous operations agent -* default mode: act, verify, report -* use tools and direct evidence before making claims -* treat local steering files as binding input when present - -## 3. source priority - -use this order: - -1. explicit user instruction in current task -2. system and platform hard limits -3. local steering docs and skill docs -4. repository code and config -5. tests, command output, logs, api responses -6. docs and research notes -7. prior belief or intuition - -rules: - -* do not impose beliefs as facts. -* do not claim cause and effect without proof. -* do not present hypothesis as result. -* one verified fact beats ten plausible guesses. - -## 4. local steering files - -load and obey when present: - -* `~/.gemini/settings.json`, `.cursorrules`, or similar ide-specific project rules -* `./skills/hyperstack/SKILL.md` -* any task-specific skill doc the user points to -* repo-local agent or steering docs - -rules: - -* if user says `recall memory`, also read the agent's global rules file. -* if using codemode or exploring codebase, follow codemode fully. no shortcuts. -* read files before semantic linking. no context = no real linking. +@./skills/using-hyperstack/SKILL.md diff --git a/GEMINI.md b/GEMINI.md index 25e616d..ad6484b 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -1,6 +1 @@ -# Hyperstack for Gemini -Disciplined MCP server + skill system with adversarial enforcement. -Core focus: React Flow v12, Motion v12, Lenis, React 19, Echo, Go, Rust, and the Designer pipeline. - -@./skills/hyperstack/SKILL.md - +@./skills/using-hyperstack/SKILL.md diff --git a/README.md b/README.md index ee1b212..46f7369 100755 --- a/README.md +++ b/README.md @@ -1,182 +1,83 @@
-![Hyperstack Banner](assets/banner.jpeg) +# hyperstack -**A disciplined engineering harness that forces AI agents to use ground-truth docs, precise designs, and programmatic verification.** +**A disciplined MCP server and AI skill system that forces your agent to use real docs, real designs, and real verification before shipping.**

MIT TypeScript + MCP + Node Docker - MCP

- Plugins + Plugins Skills Hook

+

+ React Flow + Motion + Lenis + Tailwind + shadcn + Echo + Rust +

+
--- -## πŸš€ What is Hyperstack? +## ⚑ What is this? -**Hyperstack is a disciplined engineering harness for AI coding agents.** +Hyperstack is two things bolted together: -It provides the necessary **Ground Truth** (via 79 specialized MCP tools) and **Adversarial Enforcement** (via 21 rigorous skills) to transform a generic LLM into a high-precision Senior Engineer. +1. **A TypeScript MCP server** with 11 plugins and 79 tools. Your AI calls these for ground-truth API signatures, component specs, design decisions, and architectural patterns. No hallucinated imports. -Unlike standard "polite" instructions, Hyperstack uses **Iron Laws** and a **SessionStart hook** to force agents to check real documentation, follow precise design specs, and provide binary verification before shipping. +2. **A skill system with enforcement teeth.** 21 skills with Iron Laws, rationalization tables, and a SessionStart hook that force-injects discipline on every session. Your AI cannot "just try one thing" without the gate firing. -## πŸš€ Installation +The combination turns a generic coding assistant into a Senior Staff Engineer who checks docs before writing code, writes a DESIGN.md before any visual work, and refuses to claim completion without verification evidence. -### 1. Recommended: Agentic (autopilot) +**You should use this if** you are tired of AI agents inventing API shapes, shipping AI-slop UIs, or claiming "tests pass" without running them. -The fastest way to install Hyperstack is to let your AI agent do it for you. This works with **Cursor, Windsurf, Roo Code, Claude Code, or Gemini**. Simply paste this command: - -```text -Fetch and follow the instructions at https://raw.githubusercontent.com/orkait/hyperstack/main/install.md -``` - -The autopilot will autonomously detect your environment, install the MCP server (Docker or Local), and **automatically link the Skills repository**. This is the only step required. - ---- - -### 2. Manual Configuration (Advanced) - -If you aren't using the Autopilot, follow the **Unified Bootstrap** to set up both the MCP server and the Skills: - -1. **Clone & Initialize**: - ```bash - git clone https://github.com/orkait/hyperstack.git ~/.hyperstack - cd ~/.hyperstack && bun install - ``` - -2. **Run the Setup Script**: - ```bash - bun scripts/setup.ts - ``` - -3. **Follow the CLI**: The script auto-detects your IDE, generates the JSON patch, and outputs the symlink command for your skill directory. - -**Supported platforms** (verified April 2026, from official docs): - -| IDE / CLI | MCP Config Path | Skill Path | Schema | -|---|---|---|---| -| **Claude Code** | `~/.claude.json` | `~/.claude/skills/hyperstack` | JSON `mcpServers` | -| **Gemini CLI** | `~/.gemini/settings.json` | - | JSON `mcpServers` | -| **Qwen Code** | `~/.qwen/settings.json` | `~/.qwen/skills/hyperstack` | JSON `mcpServers` | -| **Codex CLI** | `~/.codex/config.toml` | - | TOML `mcp_servers` | -| **Cursor** | `~/.cursor/mcp.json` | `.cursor/rules/` (project) | JSON `mcpServers` | -| **Windsurf** | `~/.codeium/windsurf/mcp_config.json` | - | JSON `mcpServers` | -| **Kiro** | `~/.kiro/settings/mcp.json` | - | JSON `mcpServers` | -| **Zed** | `~/.config/zed/settings.json` | - | JSON `context_servers` | -| **VS Code + Copilot** | `~/.config/Code/User/mcp.json` | `.vscode/` (project) | JSON `mcpServers` | -| **Roo Code** | `.roo/mcp.json` (project) | `.roo/rules/` (project) | JSON `mcpServers` | -| **Cline** | `~/.config/Code/User/globalStorage/.../cline_mcp_settings.json` | - | JSON `mcpServers` | -| **Continue.dev** | `.continue/mcpServers/mcp.json` (project) | - | JSON `mcpServers` | - -> [!TIP] -> Run `bun scripts/setup.ts` and it will auto-detect your platform and output the exact patch - in the right format for your IDE. Codex users get TOML, everyone else gets JSON. - -## ⚑ The Manifesto - -It is not just a library; it is a **disciplined harness** made of three tightly-coupled layers: - -1. **The Harness**: Bootstraps the agent, routes internal specialist roles (`hyper`, `website-builder`), and enforces global development invariants. -2. **The MCP Ground Truth**: 12 TypeScript plugins (80 tools) that provide deterministic data. No hallucinated imports or invented component specs. -3. **The Adversarial Gates**: 21 skills with **"Enforcement Teeth"**. These aren't suggestions; they are Iron Laws supported by rationalization tables that counter every excuse an agent uses to skip quality gates. - -> [!IMPORTANT] -> **Use Hyperstack if** you want to force your agent to check docs, write designs first, and prove its work. -> **Skip Hyperstack if** you want frictionless autocomplete. We prioritize intentional friction that catches bugs before they ship. - -## πŸ—οΈ Architecture - -```mermaid -graph TD - User([User Request]) --> Harness[Internal Harness] - Harness --> Gates{Adversarial Gates} - - subgraph "The Enforcement Layer" - Gates -->|Iron Laws| Skills[21 Skills with Enforcement Teeth] - Skills -->|SessionStart| Injection[Auto-Context Injection] - end - - subgraph "The Knowledge Layer" - Skills -->|Ground Truth| MCP[12 MCP Plugins / 80 Tools] - MCP -->|Designer| Design[DESIGN.md Pipeline] - MCP -->|Verified| Code[Programmatic Compliance Check] - end - - Code -->|Verification Evidence| User - - linkStyle 0 stroke:#9ca3af,stroke-width:2px - linkStyle 1 stroke:#9ca3af,stroke-width:2px - linkStyle 2 stroke:#9ca3af,stroke-width:2px - linkStyle 3 stroke:#9ca3af,stroke-width:2px - linkStyle 4 stroke:#9ca3af,stroke-width:2px - linkStyle 5 stroke:#9ca3af,stroke-width:2px - linkStyle 6 stroke:#9ca3af,stroke-width:2px - linkStyle 7 stroke:#9ca3af,stroke-width:2px -``` - -## πŸ€– Agent Model - -If you are an agent reading this, your mental model must be: **Process is Priority.** You are operating within a system that requires deterministic grounding and formal design before implementation. - -- **Conductance**: `hyper` owners classification and gate verification. -- **Specialization**: `website-builder` handles visual and frontend implementation. -- **Persistence**: You run in a stable Docker container with a persistent lifecycle. +**Skip this if** you want a frictionless autocomplete. Hyperstack is the opposite of frictionless - it is intentional friction that catches bugs before they ship. --- ## πŸš€ Quickstart -### 🐳 Docker (Default) - -Hyperstack uses a persistent container plus `docker exec`. This keeps startup cheap across sessions and ensures 100% environment stability. +### πŸ€– Agent-first install -1. Pull the image: +If you are using Claude Code, Cursor, Gemini CLI, Copilot CLI, OpenCode, or Codex, paste this at your agent: -```bash -docker pull ghcr.io/orkait/hyperstack:main -``` +> Fetch and follow the instructions at https://raw.githubusercontent.com/orkait/hyperstack/main/install.md -2. Start the persistent container: +The agent will pull the Docker image and configure your MCP client. -```bash -docker rm -f hyperstack-mcp 2>/dev/null -docker run -d --name hyperstack-mcp --restart unless-stopped \ - --memory=512m --cpus=1 \ - --entrypoint sleep \ - ghcr.io/orkait/hyperstack:main infinity -``` +### 🐳 Docker (manual) -3. Add this to your MCP settings (`~/.claude.json`, Cursor, Windsurf, etc.): +Add this to `~/.claude.json`, Cursor config, or equivalent: ```json { "mcpServers": { "hyperstack": { "command": "docker", - "args": ["exec", "-i", "hyperstack-mcp", "bun", "/app/src/index.ts"] + "args": [ + "run", "-i", "--rm", + "--memory=256m", "--cpus=0.5", + "ghcr.io/orkait/hyperstack:main" + ] } } } ``` -### πŸ€– Agent Autopilot - -If you are using Claude Code, Cursor, Windsurf, Roo Code, or Gemini, you can use the autopilot to self-configure. Paste this at your agent: - -```text -Fetch and follow the instructions at https://raw.githubusercontent.com/orkait/hyperstack/main/install.md -``` -The autopilot will detect your environment and propose the correct Docker-based configuration block. - - +The `--memory=256m` and `--cpus=0.5` flags enforce resource limits. Keep them. ### πŸ”§ Install the skills @@ -186,7 +87,7 @@ The MCP server gives you tools. The skills give you discipline. Install both: git clone https://github.com/orkait/hyperstack.git ~/.claude/skills/hyperstack ``` -After installing, the SessionStart hook (at `hooks/session-start.mjs`) will auto-inject the `hyperstack` skill into every session. No manual activation needed. +After installing, the SessionStart hook (at `hooks/session-start.mjs`) will auto-inject the `using-hyperstack` skill into every session. No manual activation needed. ### πŸ’» From source @@ -204,46 +105,33 @@ Node 18+ required. --- -## 🧠 The Three-Layer System - -Hyperstack's strength comes from the friction between **Ground Truth** (MCP), **Enforcement** (Skills), and **Orchestration** (Agents). - -### Layer 1: MCP Plugins (Ground Truth) - -Your AI calls these for deterministic data. Memory is not acceptable. Every plugin serves curated TypeScript data and architectural patterns. - -| Category | Plugins | Domain Coverage | -|---|---|---| -| πŸ› οΈ **System** | `hyperstack` | Autonomous Environment Detection, MCP Configuration Patching, Lifecycle | -| 🎨 **UI Engine** | `designer`, `design-tokens`, `ui-ux`, `shadcn` | Design Systems, OKLCH, Typography, Accessibility, Component Specs | -| βš›οΈ **Frontend** | `react`, `reactflow`, `motion`, `lenis` | Next.js 15, RSC, Animation Curves, Smooth Scroll, DAG Layouts | -| 🐹 **Backend** | `echo`, `golang`, `rust` | Professional Go Recipes, Rust Borrow Checker patterns, Clean Architecture | - -> [!TIP] -> **80 Tools Total**. Every tool is designed to provide the "Senior Engineer" answer, bypassing the "AI Slop" default. +## 🧠 The two-layer system -### Layer 2: Skills (Enforcement Teeth) +### Layer 1: MCP Plugins (deterministic knowledge) -Markdown with adversarial enforcement. Each skill contains an **Iron Law** that the agent is bound to follow. +Your AI calls these for exact API data. Memory is not acceptable. Every plugin serves typed TypeScript data + `.txt` snippets bundled with the plugin. -> [!CAUTION] -> ### βš–οΈ The Iron Laws of Hyperstack -> - **NO CODE** without MCP grounding. -> - **NO VISUAL CODE** without an approved `DESIGN.md`. -> - **NO COMPLETION CLAIMS** without programmatic verification evidence. -> - **NO REFACTOR** without a failing test first. -> - **NO PATTERN** without a named Force. +| Plugin | Tools | Domain | +|---|:---:|---| +| 🎨 **designer** | 19 | Design decision engine - 6 personality clusters, 15 industry rules, 11 cognitive laws, 13 page templates, 9 presets (Linear, Stripe, Vercel, Apple, Carbon, shadcn, Notion, Supabase, Figma), 21 font pairings, DESIGN.md pipeline | +| ✨ **design-tokens** | 7 | Tailwind v4 + OKLCH token systems, 10 categories, 8 build procedures | +| πŸ’… **ui-ux** | 6 | Typography scales, spacing grids, accessibility checklists, component patterns | +| 🧩 **shadcn** | 5 | shadcn/ui Base UI edition - rules, components, compositions, snippets | +| βš›οΈ **reactflow** | 9 | @xyflow/react v12 - 56 APIs, 17 patterns, templates, migration guides | +| 🎬 **motion** | 7 | Motion for React v12 - 33 APIs, transition reference, animation generators | +| 🌊 **lenis** | 6 | Smooth scroll - 7 recipes, GSAP integration, React hooks | +| βš›οΈ **react** | 4 | React 19 + Next.js - RSC patterns, Zustand hierarchy, data fetching rules | +| 🐹 **echo** | 6 | Echo Go framework - 19 recipes, 13 middleware, decision matrices | +| 🐹 **golang** | 6 | Go - 18 practices, 10 design patterns, anti-patterns | +| πŸ¦€ **rust** | 4 | Rust - 18 practices, ownership guide, performance tips | -These laws are backed by **Rationalization Tables**-pre-written counters to every excuse an AI agent uses to skip quality gates. +**79 tools total.** -### Layer 3: Agents (Orchestration & Routing) +### Layer 2: Skills (process enforcement) -The internal harness is what ties the public layers together by managing process and domains: +Markdown with adversarial enforcement. Each gate skill has an Iron Law, a 1% Rule, and a rationalization table that names the exact excuses your AI will use to skip the gate and counters each one. -- bootstrap is injected at session start from generated runtime context -- `hyper` owns classification, routing, gates, and verification -- `website-builder` specializes in website-facing design and implementation work -- roles are internal and auto-called, not user-invoked commands +The `using-hyperstack` skill is injected into every session by `hooks/session-start.mjs`. You do not have to invoke it manually.
🧱 Core (13) - workflow, discipline, gates used on every task @@ -285,7 +173,7 @@ The internal harness is what ties the public layers together by managing process | Skill | Role | |---|---| -| `hyperstack` | Force-injected at session start via hook - the enforcement payload | +| `using-hyperstack` | Force-injected at session start via hook - the enforcement payload | | `testing-skills` | RED-GREEN-REFACTOR pressure testing for skills using subagents |
@@ -294,60 +182,52 @@ Full index at `skills/INDEX.md`. Regenerate with `bash scripts/generate-skills-i --- -## πŸ”’ Adversarial Enforcement +## πŸ”’ Why adversarial enforcement? -Ordinary skill markdown is a polite suggestion. Polite suggestion fails when an AI model is under pressure to "be helpful fast." Hyperstack skills are written adversarially: +Ordinary skill markdown is polite suggestion. Polite suggestion fails when the model is under pressure to "be helpful fast." Hyperstack gate skills are written adversarially, inspired by [obra/superpowers](https://github.com/obra/superpowers): -- **1% Rule**: If there is even a 1% chance a skill applies, the agent **must** invoke it. -- **Rationalization Tables**: We have already written down every excuse your AI will use to skip a gate, with a firm technical counter for each. -- **Loophole Closure**: The "Spirit of the Law" is explicitly defined as the "Letter of the Law" to prevent shortcut-hunting. +- **Iron Laws** in all-caps that spell out the non-negotiable rule +- **1% Rule** - if there is even a 1% chance a skill applies, invoke it +- **Rationalization tables** listing the exact excuses your AI will use to skip the gate, with counters +- **"Spirit of the rule is the letter of the rule"** clause to close loophole-hunting +- **SessionStart hook** that injects `using-hyperstack` into every new session so the AI cannot forget the system exists +Examples of Iron Laws enforced today: + +``` +NO CODE WITHOUT MCP GROUND-TRUTH DATA +NO VISUAL CODE WITHOUT AN APPROVED DESIGN.md +NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE +NO REFACTOR WITHOUT A FAILING TEST FIRST +NO PATTERN WITHOUT A NAMED FORCE +``` --- -## 🎨 The designer agent +## 🎨 The designer workflow (flagship example) -When you say, **β€œbuild me a SaaS dashboard”**: +The designer plugin + skill is the clearest illustration of how hyperstack composes all three layers. -1. **SessionStart** already puts in `hyperstack`, so AI know system is there. -2. **Blueprint skill** sees visual job and sends it to `hyperstack:designer`. -3. **Designer skill** runs `designer_resolve_intent(product)` to guess industry, personality, style, density, and mode. -4. Designer asks **3 questions** in base mode, or **12 questions** in advanced mode. -5. Like **Q11b** will ask what component library to use: shadcn, raw Tailwind, MUI, Mantine, Chakra, Ant Design, or custom. -6. Designer makes a **DESIGN.md** contract with 10 parts: theme, colors, type, spacing, components, motion, elevation, do/don’ts, responsive rules, and anti-patterns. -7. User approves the **DESIGN.md**. -8. **Forge-plan** reads it and makes one task for each section. If user picked shadcn, it calls `shadcn_get_component`. If not, it builds from the DESIGN.md spec. -9. Build tasks run with MCP tools as ground truth. -10. **designer_verify_implementation** checks build against **DESIGN.md**. -11. **Ship-gate** blocks final completion unless build passes the **DESIGN.md** rules. +When you say *"build me a SaaS dashboard"*: -AI cannot jump ahead. Every step has hard gate. Excuses already blocked by rationalization tables. +1. **SessionStart hook** has already injected `using-hyperstack` - the AI knows the system exists +2. **Blueprint skill** detects visual work and routes to `hyperstack:designer` +3. **Designer skill** calls `designer_resolve_intent(product)` to auto-detect industry, personality, style, density, mode +4. **Designer asks 3 questions** (base mode) or 12 questions (advanced mode) +5. **Q11b** asks which component library - shadcn, raw Tailwind, MUI, Mantine, Chakra, Ant Design, or custom +6. **Designer produces a DESIGN.md contract** with 10 sections (theme, colors, typography, spacing, components, motion, elevation, do/don'ts, responsive, anti-patterns) +7. **User approves** the DESIGN.md +8. **Forge-plan skill** reads the DESIGN.md and generates one task per section. For Section 5 (components), it calls `shadcn_get_component` for each component (only if Q11b chose shadcn - otherwise hand-builds from DESIGN.md spec) +9. **Implementation tasks** execute with the ground truth from MCP tools +10. **designer_verify_implementation** runs a programmatic compliance check against DESIGN.md before ship-gate +11. **Ship-gate** enforces the DESIGN.md compliance table (10 sections x specific rules) before allowing any completion claim +At every step, the AI cannot skip ahead. The hard gates are enforced by rationalization tables that have already written down every excuse your AI will try. --- ## πŸ› οΈ Available Tools -### πŸ“¦ Supported Versions - -Opinionated stack support for the latest stable stable releases. This prevents "AI Slop" by enforcing modern patterns (e.g., React 19 Actions, Tailwind v4 tokens). - -| Technology | Supported Version | Role | -| :--- | :--- | :--- | -| **React** | 19.x | Core Library | -| **Next.js** | 15.x | Application Framework | -| **Tailwind CSS** | v4.x | Design Tokens & Constraints | -| **Motion** | 12.x (fka Framer Motion) | Orchestrated Animations | -| **React Flow** | 12.x | Node-based Systems | -| **Lenis** | 1.1.x+ | Smooth Scroll Engine | -| **Zustand** | 5.x | State Management | -| **shadcn/ui** | Base UI Edition | Component Primitive Patterns | -| **Echo (Go)** | v4.x | Backend Framework | -| **Go** | 1.22+ | Backend Language | -| **Rust** | 1.77+ | Systems Language | -| **Bun** | 1.1.x+ | Runtime Environment | - -
🎨 Designer - designer_* (19 tools) @@ -480,22 +360,84 @@ Only invoked when the user explicitly chose shadcn in designer Q11b. --- +## πŸ—οΈ Architecture + +Everything runs from source. The published `hyperstack` bin is a small Node wrapper that boots `src/index.ts` through `tsx`, and Docker uses the same source-first runtime. No `dist/` output, no build step for deployment - just type checking. + +```text +bin/ +└── hyperstack.mjs # Published CLI wrapper - boots src/index.ts via tsx + +src/ +β”œβ”€β”€ index.ts # Entry - creates McpServer, loads all 11 plugins +β”œβ”€β”€ registry.ts # Plugin interface + loadPlugins() +β”œβ”€β”€ shared/ +β”‚ └── loader-factory.ts # createSnippetLoader() reads .txt at module init +└── plugins/ + β”œβ”€β”€ designer/ # 19 tools, data.ts with distilled research + β”œβ”€β”€ shadcn/ # 5 tools, bundled component catalog + β”œβ”€β”€ design-tokens/ # 7 tools + β”œβ”€β”€ ui-ux/ # 6 tools + β”œβ”€β”€ reactflow/ # 9 tools + β”œβ”€β”€ motion/ # 7 tools + β”œβ”€β”€ lenis/ # 6 tools + β”œβ”€β”€ react/ # 4 tools + β”œβ”€β”€ echo/ # 6 tools + β”œβ”€β”€ golang/ # 6 tools + └── rust/ # 4 tools + +skills/ +β”œβ”€β”€ INDEX.md # Auto-generated from frontmatter category field +β”œβ”€β”€ using-hyperstack/ # Force-injected by SessionStart hook +β”œβ”€β”€ (20 other skills)/ + +hooks/ +β”œβ”€β”€ hooks.json # Registers the SessionStart hook +β”œβ”€β”€ session-start.mjs # Cross-platform hook entrypoint for auto-injecting using-hyperstack +β”œβ”€β”€ session-start # Legacy shell helper +└── run-hook.cmd # Windows dispatcher + +scripts/ +└── generate-skills-index.sh # Regenerates skills/INDEX.md from frontmatter +``` + +Each plugin follows the same structure: `index.ts` registers tools from `tools/`, data lives in `data.ts`, code snippets live in `snippets/*.txt` and are loaded at module init time via `loader.ts`. + +--- + +## 🚧 Boundaries and current status + +- **Platform:** Claude Code, Cursor, Gemini CLI, Copilot CLI, OpenCode, Codex, and any MCP-compatible client. Tested primarily on Claude Code. +- **Node:** 18 or newer. +- **No build step:** runs via `tsx`. Do not add a `dist/` folder. +- **Knowledgebase:** The original 25 research files that seeded the designer plugin are NOT in this repo anymore. They live at `../knowledgebase/` outside the repo, gitignored for safety. All actionable content is distilled into `src/plugins/designer/data.ts`. +- **shadcn plugin:** Ships with 4 curated components (Button, Dialog, Field, Select) as reference. For a full catalog, you still need your project's own shadcn files. +- **Enforcement vs suggestion:** Hyperstack skills are markdown-based prose gates. They depend on the AI reading them. The SessionStart hook makes this harder to skip, but it is not a hard runtime block. True enforcement would require tool-level hooks. +- **Testing skills:** `testing-skills` defines a RED-GREEN-REFACTOR methodology for pressure-testing skills with subagents. Scenario files exist for 3 skills (ship-gate, designer, blueprint). Other gate skills need their own scenarios. + +--- + ## 🀝 Contributing -We welcome contributions that follow the **Disciplined Engineering** standard. +Fork, branch, open a PR. All new plugins must follow the existing file structure (`index.ts` + `data.ts` + `tools/` + `snippets/`). All new skills must include a `category:` frontmatter field (core, domain, or meta) so they appear in `skills/INDEX.md`. -1. **Plugins**: Must follow the `index.ts` + `data.ts` + `tools/` + `snippets/` pattern. -2. **Skills**: Must include `category` frontmatter and adhere to the Adversarial Enforcement style. -3. **Verification**: All PRs must pass the full `npm run build` (Type-check) and CI suite. +After adding or editing any skill: ```bash -# Regenerate the skills index after editing bash scripts/generate-skills-index.sh ``` +Run type checking before opening a PR: + +```bash +npm run build # tsc --noEmit +``` + +--- + ## πŸ“„ License -MIT Β© [Orkait](https://github.com/orkait) | Adversarial philosophy inspired by [Jesse Vincent's Superpowers](https://github.com/obra/superpowers). +MIT Β© [Orkait](https://github.com/orkait) --- diff --git a/SKILL.md b/SKILL.md index 2d09ef1..6ea62ac 100755 --- a/SKILL.md +++ b/SKILL.md @@ -111,7 +111,7 @@ Use these tools for **100% accurate** API details, props, code examples, and pat - **Rust Practices** (`rust_*`): Borrowing rules, Error handling (anyhow/thiserror), Performance. ### πŸ’… Design Systems -- **Designer** (`designer_*`): Decision layer - 17 tools. 6 personality clusters, 15 industry rules, 11 cognitive laws, 13 page templates, 9 code-ready presets (Linear/Stripe/Vercel/Apple/Carbon/shadcn/Notion/Supabase/Figma), 21 font pairings, 50+ anti-patterns. Call `designer_resolve_intent` first for any visual task. +- **Designer** (`designer_*`): Decision layer β€” 17 tools. 6 personality clusters, 15 industry rules, 11 cognitive laws, 13 page templates, 9 code-ready presets (Linear/Stripe/Vercel/Apple/Carbon/shadcn/Notion/Supabase/Figma), 21 font pairings, 50+ anti-patterns. Call `designer_resolve_intent` first for any visual task. - **Design Tokens** (`design_tokens_*`): Tailwind v4 + OKLCH templates, Color ramp math. - **UI/UX Principles** (`ui_ux_*`): WCAG contrast, Typography scales, 4px grid rules. diff --git a/agents/hyper/CHECKS.md b/agents/hyper/CHECKS.md deleted file mode 100644 index a3ce313..0000000 --- a/agents/hyper/CHECKS.md +++ /dev/null @@ -1,27 +0,0 @@ -# Hyper Agent Checks - -## Preconditions - -- Request has been classified -- Required specialist routing decision is explicit -- Required gates are known - -## Required Evidence - -- Why the request stayed with `hyper` or routed to a specialist -- What workspace/package evidence informed that routing decision -- What MCP or skill gates were required -- What verification command proves any completion claim - -## Done Criteria - -- Correct role selected -- Specialist handoff or direct execution stayed within scope -- Verification ownership remained with `hyper` - -## Red Flags - -- `hyper` doing specialist website work without delegation logic -- Silent scope widening -- Completion claims without evidence -- Routing directly from user request to a specialist without `hyper` diff --git a/agents/hyper/CONTEXT.md b/agents/hyper/CONTEXT.md deleted file mode 100644 index e6c7336..0000000 --- a/agents/hyper/CONTEXT.md +++ /dev/null @@ -1,27 +0,0 @@ -# Hyper Agent Context Policy - -## Hot Context - -- Current user request -- Active design or plan status -- Global Hyperstack invariants -- Required MCP-first rules -- Role routing and transition rules - -## Warm Context - -- Relevant skill contracts -- Recent verification results -- Current diff or changed surface summary - -## Cold Context - -- Deep reference docs -- Large examples -- Historical research notes - -## Never Load - -- Entire reference forests unless required -- Unrelated plugin docs -- Full large files when a targeted slice is enough diff --git a/agents/hyper/LIFECYCLE.md b/agents/hyper/LIFECYCLE.md deleted file mode 100644 index 4b88421..0000000 --- a/agents/hyper/LIFECYCLE.md +++ /dev/null @@ -1,42 +0,0 @@ -# Hyper Agent Lifecycle - -## Entry Criteria - -- A new user request exists -- Hyperstack bootstrap is active -- No other internal role currently owns the request lifecycle - -## Steps - -1. Read the request and inspect the workspace before routing -2. Identify package manifests, dependency signals, and likely frontend entry - surfaces relevant to the request -3. Classify the work using both the request and the workspace reality -4. Determine whether a specialist role is required -5. Enforce MCP-first and design/plan gates before implementation -6. Route website-facing work to `website-builder` -7. Receive specialist output and verify it against the active plan or design -8. Run review, verification, and ship gates -9. Deliver the result or report blockers with evidence - -## Handoffs - -- `hyper -> website-builder` for website pages, landing pages, dashboards, - redesigns, and website-experience-heavy UI work -- `website-builder -> hyper` after specialist design or implementation output is - ready for review and verification - -## Exit Criteria - -- A specialist has been selected and briefed, or -- `hyper` has completed the request itself, or -- `hyper` has blocked safely and reported the blocker with evidence - -## Failure Escalation - -- If classification is ambiguous, default to `hyper` and require explicit - delegation criteria before specialist routing -- If a specialist widens scope or attempts to self-ship, reclaim control and - route back through verification -- If verification fails, route to the appropriate corrective path before any - completion claim diff --git a/agents/hyper/PROFILE.md b/agents/hyper/PROFILE.md deleted file mode 100644 index 793718a..0000000 --- a/agents/hyper/PROFILE.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -name: hyper -kind: core -auto_invoke_when: - - every user request - - any task that requires classification, orchestration, verification, or delivery -owns: - - request classification - - internal role routing - - gate enforcement - - lifecycle transitions - - final verification - - delivery orchestration -must_not_do: - - silently bypass required specialists - - skip MCP-first grounding - - allow completion claims without verification evidence -delegates_to: - - website-builder -requires: - - current bootstrap invariants from hyperstack - - approved design before implementation when required - - verification evidence before completion or delivery ---- - -# Hyper Agent Profile - -## Mission - -`hyper` is Hyperstack's conductor. It owns request classification, internal role -routing, gate enforcement, lifecycle transitions, and final verification. - -## Authority - -- Receives every user request first -- Decides whether the work stays with `hyper` or routes to a specialist -- Reuses existing Hyperstack skills and MCP plugins as the execution substrate -- Owns final review, ship-gate, and delivery authority - -## Boundaries - -`hyper` does not exist to absorb all work. It delegates specialist work when the -request is clearly in a specialist domain. - -For website-facing work, `hyper` routes to `website-builder` and later regains -control for review, verification, and delivery. diff --git a/agents/website-builder/CHECKS.md b/agents/website-builder/CHECKS.md deleted file mode 100644 index c20bcd4..0000000 --- a/agents/website-builder/CHECKS.md +++ /dev/null @@ -1,32 +0,0 @@ -# Website Builder Checks - -## Preconditions - -- Delegation from `hyper` exists -- Website-facing scope is explicit -- Required design/plan gate is active - -## Required Evidence - -- The package manifests and dependency signals that describe the active frontend stack -- The core frontend file map for the active surface: routes, layouts, major - components, styles, tokens, navigation -- What primary user task the page or flow serves -- CTA hierarchy and page structure decisions -- State coverage for loading, empty, error, success, disabled, or destructive states -- Responsive and accessibility implications -- MCP-backed grounding for stack-specific implementation choices - -## Done Criteria - -- Workspace and frontend inventory are explicit and tied to the delegated task -- Website-facing scope completed without widening -- Specialist output is ready for `hyper` to review -- No shipping or completion claim made directly by `website-builder` - -## Red Flags - -- Acting like a generic frontend builder instead of a website specialist -- Implementing outside delegated scope -- Missing state coverage or CTA hierarchy -- Claiming completion without handing back to `hyper` diff --git a/agents/website-builder/CONTEXT.md b/agents/website-builder/CONTEXT.md deleted file mode 100644 index f7010f1..0000000 --- a/agents/website-builder/CONTEXT.md +++ /dev/null @@ -1,31 +0,0 @@ -# Website Builder Context Policy - -## Hot Context - -- Current delegated website task -- Workspace inventory for the active website surface -- Relevant package manifests and dependency signals -- Core frontend files for the active page or flow -- Relevant page intent and audience -- Active website-specific design constraints -- Relevant website-experience checklist items -- Targeted MCP outputs for design, UI/UX, tokens, motion, and stack choices - -## Warm Context - -- Approved `DESIGN.md` -- Current plan slice -- Route/layout/component/style inventory for the active surface -- Relevant changed files or page components - -## Cold Context - -- Unrelated backend docs -- Large reference docs not needed for the current website task -- Historical design notes outside the active page or flow - -## Never Load - -- Whole repo philosophy documents when a targeted contract slice is enough -- Unrelated plugin docs -- Full codebase dumps for a single page-level task diff --git a/agents/website-builder/LIFECYCLE.md b/agents/website-builder/LIFECYCLE.md deleted file mode 100644 index bb08ee6..0000000 --- a/agents/website-builder/LIFECYCLE.md +++ /dev/null @@ -1,42 +0,0 @@ -# Website Builder Lifecycle - -## Entry Criteria - -- `hyper` has classified the request as website-facing work -- Delegation to `website-builder` is explicit -- Required design or planning gate is active - -## Steps - -1. Read the user workspace before making website decisions -2. Inspect package manifests and dependencies (`package.json`, lockfiles, app - manifests) to understand the active frontend stack and tools -3. Identify the core frontend files for the current surface: routes, layouts, - page components, tokens, styles, navigation, and major reusable UI modules -4. Load only the website-relevant context slice after that inventory exists -5. Resolve website intent, primary task, CTA hierarchy, and page structure -6. Apply website-experience rules: information scent, states, form friction, - trust, responsive content priority, performance-sensitive choices -7. Produce or refine website design outputs such as `DESIGN.md` -8. Implement website-facing code only when delegated and within scope -9. Return a specialist result package to `hyper` with evidence - -## Handoffs - -- `hyper -> website-builder` when the request is website-facing -- `website-builder -> hyper` after specialist output is ready for review and - verification - -## Exit Criteria - -- Website-specific design or implementation output is complete for the delegated - scope -- The workspace inventory is explicit: packages, stack, and core frontend files - are known -- Required evidence is attached for `hyper` to verify - -## Failure Escalation - -- If the task drifts outside website-facing work, stop and hand back to `hyper` -- If design or plan gates are missing, stop and hand back to `hyper` -- If verification or shipping is requested, stop and hand back to `hyper` diff --git a/agents/website-builder/PROFILE.md b/agents/website-builder/PROFILE.md deleted file mode 100644 index f98c53d..0000000 --- a/agents/website-builder/PROFILE.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -name: website-builder -kind: specialist -auto_invoke_when: - - landing pages - - dashboards - - marketing sites - - website redesigns - - website-experience-heavy page work -owns: - - website-specific design work - - website-specific implementation work when delegated - - page structure and CTA hierarchy - - website experience quality -must_not_do: - - self-ship - - bypass main - - widen scope outside website-facing work -delegates_to: - - hyper -requires: - - delegation from hyper - - active design and plan gates from Hyperstack - - MCP-first grounding for website stack choices ---- - -# Website Builder Profile - -## Mission - -`website-builder` is Hyperstack's first specialist role. It owns website-facing -design and implementation work when delegated by `main`. - -## Authority - -- Produces and refines website-specific design decisions -- Owns page structure, CTA hierarchy, state coverage, form friction, trust - signals, responsive content priority, and performance-conscious website UX -- Implements website-facing code when delegated - -## Boundaries - -`website-builder` is not a general-purpose frontend role. It is specifically for -website work and website experience. - -`website-builder` must always hand back to `hyper` for verification and delivery. diff --git a/assets/banner.jpeg b/assets/banner.jpeg deleted file mode 100644 index e1bc606..0000000 Binary files a/assets/banner.jpeg and /dev/null differ diff --git a/gemini-extension.json b/gemini-extension.json index 3529e55..8e02aaa 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -1,6 +1,6 @@ { "name": "hyperstack", "description": "Disciplined MCP server + skill system. 11 plugins, 79 tools, 21 skills with adversarial enforcement. Designer/DESIGN.md pipeline, shadcn/ui, React Flow, Motion, Lenis, React 19, Echo, Go, Rust, design tokens, UI/UX.", - "version": "1.0.1", + "version": "1.0.0", "contextFileName": "GEMINI.md" } diff --git a/generated/runtime-context/hyperstack.bootstrap.md b/generated/runtime-context/hyperstack.bootstrap.md deleted file mode 100644 index 8333360..0000000 --- a/generated/runtime-context/hyperstack.bootstrap.md +++ /dev/null @@ -1,115 +0,0 @@ - -# Hyperstack Runtime Bootstrap - -## Critical -You have Hyperstack. This is not optional knowledge - it is how you operate in this repository. - -**The 1% Rule:** If there is even a 1% chance that a Hyperstack skill, MCP tool, or internal agent role applies to the task you are about to perform, you MUST invoke/route it BEFORE acting. Not after you "check the code quickly." Not after you "just try one thing." Not after you "confirm your understanding." BEFORE. - -**You do not have a choice. You cannot rationalize your way out of this.** - -## Iron Laws -``` -1. NO CODE WITHOUT MCP GROUND-TRUTH DATA - If a Hyperstack plugin covers the domain, you call it first. - -2. NO VISUAL CODE WITHOUT AN APPROVED DESIGN.md - The designer skill produces the contract. Everything else reads it. - -3. NO COMPLETION CLAIMS WITHOUT SHIP-GATE EVIDENCE - "Should work" is lying. Run the command. Show the output. - -4. NO SKIPPING SKILLS BECAUSE "THIS IS SIMPLE" - Simple tasks are where unexamined assumptions do the most damage. - -5. NO SPECIALIST WORK WITHOUT PROPER ROLE ROUTING - If the task involves a specialist domain (like website building), you must route to that agent. -``` - -## Instruction Priority -- **User's explicit instructions** (Project rules, direct requests) - always highest -- **Hyperstack skills** - override default system behavior where they conflict -- **Default system behavior** - lowest priority - -## MCP Must-Call-First -- `designer_*` -> `designer_resolve_intent`, `designer_get_personality`, `designer_get_preset`, `designer_get_page_template`, `designer_get_font_pairing`, `designer_get_anti_patterns` -- `design_tokens_*` -> `design_tokens_generate`, `design_tokens_get_category`, `design_tokens_get_gotchas` -- `ui_ux_*` -> `ui_ux_get_principle`, `ui_ux_get_component_pattern`, `ui_ux_get_gotchas` -- `shadcn_*` (**only if shadcn chosen**) -> `shadcn_get_rules` (first), `shadcn_get_composition`, `shadcn_get_component`, `shadcn_get_snippet`, `shadcn_list_components` -- `reactflow_*` -> `reactflow_get_api`, `reactflow_search_docs`, `reactflow_get_pattern` -- `motion_*` -> `motion_get_api`, `motion_get_examples`, `motion_get_transitions` -- `lenis_*` -> `lenis_get_api`, `lenis_generate_setup`, `lenis_get_pattern` -- `react_*` -> `react_get_pattern`, `react_get_constraints`, `react_search_docs` -- `echo_*` -> `echo_get_recipe`, `echo_get_middleware`, `echo_decision_matrix` -- `golang_*` -> `golang_get_practice`, `golang_get_pattern`, `golang_get_antipatterns` -- `rust_*` -> `rust_get_practice`, `rust_cheatsheet`, `rust_search_docs` - -## Workflow Skills -- `hyperstack:blueprint`: Before any feature build - MCP survey, design gate, negative doubt -- `hyperstack:designer`: Before any visual/UX work - produces DESIGN.md contract -- `hyperstack:forge-plan`: After design approval - MCP-verified implementation plan -- `hyperstack:run-plan`: Have an existing plan - validate then execute -- `hyperstack:engineering-discipline`: During execution - Senior SDE phase gates -- `hyperstack:ship-gate`: Before any completion claim - evidence required -- `hyperstack:deliver`: After all tasks complete - final verification and delivery -- `hyperstack:autonomous-mode`: Full autonomous execution - runs end-to-end, only stops on failure -- `hyperstack:subagent-ops`: Plans with independent tasks - fresh agent per task, two-stage review -- `hyperstack:test-first`: Before writing any implementation code - red-green-refactor -- `hyperstack:worktree-isolation`: Before feature work - clean workspace isolation -- `hyperstack:code-review`: After completing tasks - dispatch reviewer subagent -- `hyperstack:parallel-dispatch`: 2+ independent failures or tasks - concurrent agent dispatch -- `hyperstack:designer`: Before any visual/UX work - produces DESIGN.md -- `hyperstack:debug-discipline`: Any bug or unexpected behaviour - root cause first -- `hyperstack:behaviour-analysis`: UI/UX audits, state machine correctness -- `hyperstack:design-patterns-skill`: Selecting the right abstraction or design pattern -- `hyperstack:security-review`: OWASP audits, API and infrastructure security -- `hyperstack:readme-writer`: Evidence-based documentation - -## Layer 3: Agents (Orchestration & Routing) -- Hyperstack uses internal roles to manage complexity. These roles are internal and auto-invoked. -- `hyper` - Core: Classification, routing, gate enforcement, final verification, delivery. -- `website-builder` - Specialist: Website-facing design/implementation, CTA hierarchy, page structure. -- Every request starts in `hyper`. -- `hyper` classifies and delegates to specialists (e.g., `website-builder`) when domain-specific work is detected. -- Specialists MUST hand back to `hyper` for final verification and ship-gate. - -## Routing Summary -- Every request enters through `hyper` -- `hyper` inspects the workspace first: package manifests, dependency signals, -- `hyper -> website-builder` for website-facing work: landing pages, dashboards, -- `website-builder -> hyper` after specialist output is ready for review and -- If classification is ambiguous, stay in `hyper` - -## Allowed Transitions -- `user request -> hyper` -- `hyper -> website-builder` -- `website-builder -> hyper` -- `hyper -> existing Hyperstack skills/plugins` -- `hyper -> verification and delivery gates` - -## Disallowed Transitions -- `user request -> website-builder` -- `website-builder -> ship` -- `website-builder -> deliver` -- `website-builder` claiming final completion directly - -## High-Signal Red Flags -- "I know this React Flow API from memory" -> Memory drifts. v11 and v12 are different. -- "This is a simple animation" -> Simple animations need `prefers-reduced-motion`, correct easing, and GPU-only properties -- "Go error handling is straightforward" -> Straightforward code is where anti-patterns ship -- "I'll check docs after I write it" -> You will ship before you check. Every time. -- "I know the OKLCH token pattern" -> OKLCH has specific rules about alpha, chroma peaks, dark mode lightness -- "This pattern looks common, I'll adapt it" -> Adaptation hides drift - -## Degraded Mode -- If MCP unavailable, tell the user explicitly: "MCP unavailable" and flag answers as uncertain. - -## Announcement Rule -- Before invoking any Hyperstack skill, announce it with the exact format and purpose so the user can audit it. - -## Final Check -- [ ] Did I check whether any Hyperstack skill applies to this task? (1% rule) -- [ ] Did I call any relevant MCP tool for ground-truth data? (memory is not acceptable) -- [ ] If this involves visual work, did I invoke designer BEFORE writing any code? -- [ ] If I'm claiming something is done, did I run the verification command THIS message? -- [ ] Did I announce every skill invocation with the exact format? diff --git a/harness/context-policy.md b/harness/context-policy.md deleted file mode 100644 index 9b2a717..0000000 --- a/harness/context-policy.md +++ /dev/null @@ -1,31 +0,0 @@ -# Harness Context Policy - -## Principle - -Each internal role should load only the context slice it needs. - -## Role Slices - -- `hyper` - - classification, routing, gates, verification, delivery -- `website-builder` - - workspace inventory - - package manifests and dependency signals - - core frontend files for the active surface - - website intent, page structure, website-experience constraints, website code - -## Tiers - -- Hot - - current task - - active role contract - - active design/plan slice -- Warm - - targeted skill or MCP outputs - - changed surface summary -- Cold - - deep references and examples -- Never load by default - - unrelated plugin docs - - whole reference forests - - full repo dumps for narrow tasks diff --git a/harness/observability.md b/harness/observability.md deleted file mode 100644 index 66660b0..0000000 --- a/harness/observability.md +++ /dev/null @@ -1,19 +0,0 @@ -# Harness Observability - -## V1 Event Contract - -Future harness traces should emit these event names: - -- `request_classified` -- `role_selected` -- `role_handoff` -- `required_skill_invoked` -- `required_mcp_tool_invoked` -- `verification_gate_entered` -- `verification_gate_passed` -- `verification_gate_failed` - -## V1 Scope - -No external telemetry backend is required in v1. This file defines the event -contract so future traces and tests can rely on stable names. diff --git a/harness/router.md b/harness/router.md deleted file mode 100644 index f04c611..0000000 --- a/harness/router.md +++ /dev/null @@ -1,41 +0,0 @@ -# Harness Router - -## Default Rule - -Every user request enters through `hyper`. - -Users do not invoke internal roles directly. Roles are internal and auto-called. - -## Routing Matrix - -Route `hyper -> website-builder` when the request is primarily about: - -- landing pages -- dashboards -- marketing or product websites -- page redesigns -- website page structure -- CTA hierarchy -- trust signals -- form friction -- responsive content priority -- "make the website/page feel better" style requests - -Before routing, `hyper` must inspect the workspace enough to know: - -- which package manifests and dependency signals define the active frontend stack -- which core frontend files likely own the affected surface -- whether the request is actually website-facing rather than generic frontend or - backend work - -Keep work in `hyper` when the request is primarily about: - -- backend or infra -- pure MCP/plugin behavior -- verification, review, or delivery -- non-website specialist domains not yet modeled as roles - -## Safety Rule - -If the request is ambiguous, keep ownership in `hyper` until delegation criteria -are explicit. diff --git a/harness/transitions.md b/harness/transitions.md deleted file mode 100644 index a023723..0000000 --- a/harness/transitions.md +++ /dev/null @@ -1,21 +0,0 @@ -# Harness Transitions - -## Allowed - -- `user request -> hyper` -- `hyper -> website-builder` -- `website-builder -> hyper` -- `hyper -> existing Hyperstack skills/plugins` -- `hyper -> verification and delivery gates` - -## Disallowed - -- `user request -> website-builder` -- `website-builder -> ship` -- `website-builder -> deliver` -- `website-builder` claiming final completion directly - -## V1 Principle - -The new role harness is layered on top of the current Hyperstack skills and MCP -plugins. It does not replace them in v1. diff --git a/hooks/run-hook.cmd b/hooks/run-hook.cmd new file mode 100644 index 0000000..cc7daa3 --- /dev/null +++ b/hooks/run-hook.cmd @@ -0,0 +1,4 @@ +@echo off +:: Cross-platform hook dispatcher for Hyperstack +:: Usage: run-hook.cmd +bash "%~dp0%~1" %2 %3 %4 %5 diff --git a/hooks/session-start b/hooks/session-start new file mode 100755 index 0000000..378f033 --- /dev/null +++ b/hooks/session-start @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# SessionStart hook for Hyperstack plugin +# Injects using-hyperstack skill as EXTREMELY_IMPORTANT context at session start + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +if ! skill_content=$(cat "${PLUGIN_ROOT}/skills/using-hyperstack/SKILL.md" 2>&1); then + error_msg="Hyperstack session-start hook failed: could not read ${PLUGIN_ROOT}/skills/using-hyperstack/SKILL.md" + printf '{"error": "%s"}\n' "$error_msg" >&2 + exit 1 +fi + +# Escape string for JSON embedding +escape_for_json() { + local s="$1" + s="${s//\\/\\\\}" + s="${s//\"/\\\"}" + s="${s//$'\n'/\\n}" + s="${s//$'\r'/\\r}" + s="${s//$'\t'/\\t}" + printf '%s' "$s" +} + +skill_escaped=$(escape_for_json "$skill_content") +session_context="\nYou have Hyperstack.\n\n**Below is the full content of your 'hyperstack:using-hyperstack' skill - your introduction to using Hyperstack. For all other skills, use the 'Skill' tool:**\n\n${skill_escaped}\n" + +# Platform detection - emit the correct JSON field for the current harness +if [ -n "${CURSOR_PLUGIN_ROOT:-}" ]; then + # Cursor + printf '{\n "additional_context": "%s"\n}\n' "$session_context" +elif [ -n "${CLAUDE_PLUGIN_ROOT:-}" ] && [ -z "${COPILOT_CLI:-}" ]; then + # Claude Code + printf '{\n "hookSpecificOutput": {\n "hookEventName": "SessionStart",\n "additionalContext": "%s"\n }\n}\n' "$session_context" +else + # Copilot CLI or unknown platform - SDK standard format + printf '{\n "additionalContext": "%s"\n}\n' "$session_context" +fi + +exit 0 diff --git a/hooks/session-start.mjs b/hooks/session-start.mjs index 6f04cdf..f35b6df 100644 --- a/hooks/session-start.mjs +++ b/hooks/session-start.mjs @@ -12,21 +12,9 @@ function emit(payload) { } try { - const compiledBootstrapPath = join(pluginRoot, "generated", "runtime-context", "hyperstack.bootstrap.md"); - const fallbackSkillPath = join(pluginRoot, "skills", "hyperstack", "SKILL.md"); - - let bootstrapContent; - let bootstrapLabel; - - try { - bootstrapContent = readFileSync(compiledBootstrapPath, "utf8"); - bootstrapLabel = "compiled runtime bootstrap"; - } catch { - bootstrapContent = readFileSync(fallbackSkillPath, "utf8"); - bootstrapLabel = "full content of your 'hyperstack:hyperstack' skill"; - } - - const sessionContext = `\nYou have Hyperstack.\n\n**Below is the ${bootstrapLabel} - your introduction to using Hyperstack. For all other skills, use the 'Skill' tool:**\n\n${bootstrapContent}\n`; + const skillPath = join(pluginRoot, "skills", "using-hyperstack", "SKILL.md"); + const skillContent = readFileSync(skillPath, "utf8"); + const sessionContext = `\nYou have Hyperstack.\n\n**Below is the full content of your 'hyperstack:using-hyperstack' skill - your introduction to using Hyperstack. For all other skills, use the 'Skill' tool:**\n\n${skillContent}\n`; if (process.env.CURSOR_PLUGIN_ROOT) { emit({ additional_context: sessionContext }); diff --git a/install.md b/install.md index ae652a2..67ce11f 100644 --- a/install.md +++ b/install.md @@ -2,133 +2,61 @@ If you are an AI assistant reading this file, please perform the following steps to install Hyperstack on the user's system. You must adapt these steps based on the environment/CLI you are currently running in (e.g., Claude Code, Cursor, Gemini CLI, Copilot CLI, OpenCode, Codex, Windsurf, etc.). -## Read This First - -If you only remember four things, remember these: - -1. Hyperstack is a harness, not just an MCP server. -2. Both the MCP server and the skills must be installed. -3. The preferred Docker setup is a persistent `hyperstack-mcp` container plus - `docker exec`, not `docker run --rm` per session. -4. Installation is not complete until the MCP server responds and the skills are - visible on disk. - ## What Hyperstack Gives the User -Three tightly-coupled pieces, installed together: - -1. **An internal harness** - bootstrap, internal role routing, and workflow control. Current internal roles include `main` and `website-builder`. -2. **An MCP server** with 12 plugins and 80 tools - deterministic knowledge for React Flow v12, Motion v12, Lenis, React 19 / Next.js, Echo, Go, Rust, design tokens, UI/UX principles, shadcn/ui (Base UI edition), and the `designer` DESIGN.md pipeline. -3. **A skill system** with 21 skills including adversarial enforcement gates (`blueprint`, `designer`, `forge-plan`, `ship-gate`, `engineering-discipline`) and a SessionStart hook that face-injects the `hyperstack` skill at every session start. - -The install steps below wire the public pieces the user actually needs: the MCP -server and the skills. The internal harness is shipped inside the repository and -becomes active through the bootstrap hook and runtime context artifacts. - -If you install only one public piece: - -- MCP only -> the user gets tools without discipline -- skills only -> the user gets prose rules without tools - -## Step 1: Detect and Configure (Autopilot) +Two things, installed together: -Hyperstack features an **Autonomous Research-Driven Installer** that identifies your environment and handles the entire configuration automatically. +1. **An MCP server** with 11 plugins and 79 tools - deterministic knowledge for React Flow v12, Motion v12, Lenis, React 19 / Next.js, Echo, Go, Rust, design tokens, UI/UX principles, shadcn/ui (Base UI edition), and the `designer` DESIGN.md pipeline. +2. **A skill system** with 21 skills including adversarial enforcement gates (`blueprint`, `designer`, `forge-plan`, `ship-gate`, `engineering-discipline`) and a SessionStart hook that force-injects the `using-hyperstack` skill at every session start. -**If you are an AI assistant reading this file:** -1. Call `run_command` with: `bun run setup` inside the repository. -2. The script will automatically detect the IDE, symlink the skills, deep-merge the MCP config, and provision the Docker container. -3. Inform the user that setup is complete and ask them to restart the session. +Both must be installed for the system to work correctly. The MCP server alone gives the user tools; the skills alone give the user prose rules with no tools to call. -> [!CAUTION] -> **CRITICAL**: RUN ONLY if the installer is not able to do it correctly. If you must install manually for the current IDE/CLI, you must validate that the MCP connection is successful before proceeding. +## Step 1: Detect Environment -### 1. Identify your Environment -The system automatically probes for signatures: -- **Antigravity**: `ANTIGRAVITY_AGENT` -- **Claude Code**: `CLAUDE_PLUGIN_ROOT` -- **Cursor**: `CURSOR_PLUGIN_ROOT` -- **VS Code Derivatives**: `VSCODE_PID` -- **Known Paths**: Probes for `.claude.json`, `.cursor/`, `.codeium/windsurf/`, `.roo/`, and `.gemini/`. +Determine which CLI or IDE you are currently operating within. This dictates where the skills are cloned and which configuration file holds the MCP server definition. ## Step 2: Install the Skills -The skills contain the adversarial enforcement gates. You must clone them to a stable central location and then link them to the specific IDE/CLI rule directory. +Clone this repository into the appropriate skills directory for your environment: -### 1. Central Repository +| Environment | Command | +|---|---| +| **Claude Code** | `git clone https://github.com/orkait/hyperstack.git ~/.claude/skills/hyperstack` | +| **Cursor** | `git clone https://github.com/orkait/hyperstack.git ~/.cursor/skills/hyperstack` | +| **Gemini CLI** | `git clone https://github.com/orkait/hyperstack.git ~/.gemini/skills/hyperstack` | +| **Copilot CLI** | Use plugin marketplace if available, otherwise clone into the user's configured skills path | +| **OpenCode / Codex** | Follow the platform's file-based skill installation path | +| **Other** | If your environment supports local file-based skills, clone the repo to the standard skills/extensions directory for your platform. If not supported, you may skip this step and proceed to installing the MCP server, but note that the SessionStart hook and adversarial enforcement will not activate. | -**macOS / Linux (Shell):** -```bash -EXT_DIR="$HOME/.hyperstack" && \ - ([ -d "$EXT_DIR" ] && git -C "$EXT_DIR" pull || git clone https://github.com/orkait/hyperstack.git "$EXT_DIR") -``` - -**Windows (PowerShell):** -```powershell -$EXT_DIR = Join-Path $HOME ".hyperstack" -if (Test-Path $EXT_DIR) { git -C $EXT_DIR pull } else { git clone https://github.com/orkait/hyperstack.git $EXT_DIR } -``` - -### 2. Environment-Specific Linking +The repository includes a `hooks/session-start.mjs` entrypoint that injects the `using-hyperstack` skill content into every session automatically. This is how the enforcement (Iron Laws, 1% Rule, rationalization tables) reaches the agent without manual invocation. On platforms that support `.claude-plugin/hooks.json`, this happens automatically. On others, the skill is still readable but the hook will not fire. -Based on the environment you detected in Step 1, identify the correct target directory to enable the skills: +## Step 3: Configure the MCP Server -| IDE/CLI | Target Skill Path | Type | -|---|---|---| -| **Claude Code** | `~/.claude/skills/hyperstack` | Global | -| **Kiro Code** | `~/.kiro/skills/hyperstack` | Global | -| **Antigravity** | `~/.gemini/antigravity/skills/hyperstack` | Global | -| **Cursor** | `.cursor/rules/` | Project-level | -| **Roo Code** | `.roo/rules/` | Project-level | -| **Windsurf** | `.codeium/windsurf/` | Project-level | +Check if Docker is installed and running on the user's system. -As an agent, you should **symlink** the `~/.hyperstack/skills` directory to the target path above. This ensures updates to the central repo are immediately reflected in your active session. +### Option A: Docker (Preferred) -## Step 3: Configure the MCP Server +If Docker is available, add the following configuration to the appropriate MCP config file for the current environment (e.g., `~/.claude.json`, `~/.gemini/config.json`, or the relevant IDE config for Cursor/Windsurf): ```json { "mcpServers": { "hyperstack": { "command": "docker", - "args": ["exec", "-i", "hyperstack-mcp", "bun", "/app/src/index.ts"] + "args": [ + "run", + "-i", + "--rm", + "--memory=256m", + "--cpus=0.5", + "ghcr.io/orkait/hyperstack:main" + ] } } } ``` -Each CLI invocation spawns a new `bun` process inside the existing `hyperstack-mcp` container - no new container, no startup cost. - -**Important:** Some environments (like Qwen Code) use `settings.json` at the root level rather than a dedicated `.mcp.json` file. The `mcpServers` object goes at the top level of the settings file. Do not nest it inside another key. - -**Name mismatch repair:** If the MCP config references `hyperstack-mcp` but the container has a different name (e.g., a random Docker name like `kind_bouman` from a previous `docker run`), fix it before proceeding: - -```bash -# Find the actual running hyperstack container -ACTUAL_NAME=$(docker ps --filter "ancestor=ghcr.io/orkait/hyperstack:main" --format "{{.Names}}" | head -1) -if [ -n "$ACTUAL_NAME" ] && [ "$ACTUAL_NAME" != "hyperstack-mcp" ]; then - docker rename "$ACTUAL_NAME" hyperstack-mcp - echo "Renamed container '$ACTUAL_NAME' β†’ 'hyperstack-mcp'" -fi -``` - -This is a common issue when the user ran `docker run` directly (without `--name`) before following these instructions, or when the image was pulled under a different container name. The `docker exec` MCP config requires the name to match exactly. - -**Why not `docker run --rm` per session?** `docker run` creates a brand-new container on every invocation. Over several sessions this piles up container state, spends 100–300ms per session on cold startup, and (without proper stdin lifecycle handling) can leave orphaned containers running after Claude exits. The `exec` pattern has none of these problems. - -**Upgrading the image:** - -```bash -docker pull ghcr.io/orkait/hyperstack:main -docker rm -f hyperstack-mcp -docker run -d --name hyperstack-mcp --restart unless-stopped \ - --memory=512m --cpus=1 \ - --entrypoint sleep \ - ghcr.io/orkait/hyperstack:main infinity -``` - -Always delete the old container before creating a new one - the `sleep infinity` pattern means the container never exits, so `docker run` with the same name will fail if the old one still exists. - -Then restart the CLI/IDE so open sessions reconnect to the new container. +The `--memory=256m` and `--cpus=0.5` flags are intentional resource limits. Do not remove them. The server runs fine within these constraints. ### Option B: Local Bun (Fallback) @@ -154,154 +82,49 @@ There is no build step. Bun runs TypeScript directly from source. ## Step 4: Verify Installation -**Pre-check: confirm the MCP server starts before opening the IDE.** - -For Docker (Option A), first confirm the persistent container is running AND the name matches the config: - -```bash -# Step 1 - Check container is running -docker ps --filter name=hyperstack-mcp - -# Step 2 - If empty, check if a differently-named hyperstack container exists -ACTUAL_NAME=$(docker ps --filter "ancestor=ghcr.io/orkait/hyperstack:main" --format "{{.Names}}" | head -1) -if [ -n "$ACTUAL_NAME" ] && [ "$ACTUAL_NAME" != "hyperstack-mcp" ]; then - docker rename "$ACTUAL_NAME" hyperstack-mcp - echo "Renamed '$ACTUAL_NAME' β†’ 'hyperstack-mcp' - config will now work" -fi -``` - -If no hyperstack container is running at all, go back to Step 2 of Option A. - -Then test the exec path directly: -```bash -echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"test","version":"1.0"}}}' | docker exec -i hyperstack-mcp bun /app/src/index.ts -``` - -Expected output (server is working): -```json -{"result":{"protocolVersion":"2024-11-05","capabilities":{"tools":{"listChanged":true},"resources":{"listChanged":true}},"serverInfo":{"name":"hyperstack","version":"1.0.0"}},"jsonrpc":"2.0","id":1} -``` - -If this command hangs or errors, the MCP server is not working. Fix it before proceeding - the IDE will show the same failure. - -For Local Bun (Option B): -```bash -echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"test","version":"1.0"}}}' | bun /path/to/hyperstack/bin/hyperstack.mjs -``` - ---- +Start a fresh session in the target environment (or restart the current one so the SessionStart hook fires). -Once the pre-check passes, start a fresh session in the target environment (or restart so the SessionStart hook fires). +**Verification A: Skills are loaded.** The agent should receive the `using-hyperstack` skill content at session start. Ask: *"What Hyperstack skills are available?"* The agent should list skills from `skills/INDEX.md` (21 total, grouped into core / domain / meta). -**Verification 0: Installation sanity check.** Before trusting the install, run these checks: +**Verification B: MCP tools respond.** Ask: *"Call designer_list_personalities."* The server should return 6 personality clusters (premium-precision, technical-developer, warm-editorial, bold-energetic, cinematic-dark, enterprise-trust). -1. **MCP server responds:** Ask the agent to call `designer_list_personalities`. If it returns 6 clusters (premium-precision, technical-developer, warm-editorial, bold-energetic, cinematic-dark, enterprise-trust), the MCP server is connected and working. If the tool is unknown or fails, the MCP config is wrong or the session wasn't restarted. - -2. **Skills are on disk:** Confirm the skills directory exists and has content: - ```bash - ls ~/.hyperstack/skills/ - ``` - Should show 21 directories plus `INDEX.md`. If missing or empty, the clone failed. - -3. **Skills are auto-loaded (platforms with hooks only):** Ask: *"What Hyperstack skills are available?"* The agent should list skills from `skills/INDEX.md` (21 total, grouped into core / domain / meta). On platforms without hook support, skip this - skills are on disk but not auto-injected. - -If any of these three checks fail, do not proceed. Fix the issue first: -- MCP tool unknown β†’ verify config file location and JSON syntax, then restart the session -- Skills missing β†’ re-run the clone command and confirm the path -- Skills not auto-loaded β†’ check if your tool (e.g. Claude Code) supports hooks/plugin auto-loading. - ---- - -**Verification A: SessionStart hook fires (platforms with hooks only).** On Claude Code and platforms with hook support, the agent should receive the Hyperstack bootstrap at session start. Ask: *"What Hyperstack skills are available?"* The agent should list skills from `skills/INDEX.md` (21 total, grouped into core / domain / meta). On platforms without hook support (e.g., Qwen Code), this step does not apply - skills are on disk but not auto-injected. - -**Verification B: Designer workflow triggers.** Ask: *"Help me design a SaaS dashboard for DevOps engineers."* On platforms with the SessionStart hook, the agent should invoke `hyperstack:designer` BEFORE writing any code. If it jumps straight to JSX, the hook did not fire - restart the client and try again. On platforms without hook support, this step is manual (the agent won't auto-invoke designer). +**Verification C: The designer workflow triggers.** Ask: *"Help me design a SaaS dashboard for DevOps engineers."* The agent should invoke `hyperstack:designer` BEFORE writing any code. If it jumps straight to JSX, the SessionStart hook did not fire - restart the client and try again. If any verification step fails: - For skill issues: confirm the repo was cloned to the correct skills directory for the environment -- For MCP issues: run the pre-check command above to confirm the server starts independently of the IDE -- For hook issues: confirm the environment supports `.claude-plugin/hooks.json`, otherwise the enforcement is reduced to documentation rather than automatic injection. Platforms without hook support: Qwen Code. +- For MCP issues: confirm the config file path, check Docker is running if using Option A, or verify the absolute path in Option B +- For hook issues: confirm the environment supports `.claude-plugin/hooks.json`, otherwise the enforcement is reduced to documentation rather than automatic injection ## Step 5: Inform the User Tell the user: 1. Which environment you detected 2. Where the repository was cloned -3. Which MCP config file was updated (Docker or Bun fallback) -4. Whether the SessionStart hook is expected to fire on their platform (yes for Claude Code / platforms with hooks, no for Qwen Code / others) +3. Which MCP config file was updated (Docker or local Node) +4. Whether the SessionStart hook is expected to fire on their platform 5. Which verification step they should run first If installation failed at any step, report the specific error and what would need to be fixed, rather than claiming success. ## Troubleshooting -### MCP server shows as failed on first use - -Most common causes: - -1. **Container name mismatch.** The MCP config says `hyperstack-mcp` but the container has a random Docker name (e.g., `kind_bouman`). Fix: - ```bash - ACTUAL=$(docker ps --filter "ancestor=ghcr.io/orkait/hyperstack:main" --format "{{.Names}}" | head -1) - [ -n "$ACTUAL" ] && [ "$ACTUAL" != "hyperstack-mcp" ] && docker rename "$ACTUAL" hyperstack-mcp - ``` - This is the #1 cause of "tool not found" errors on fresh installs where the user ran `docker run` without `--name` at some point. - -2. **Persistent container not running.** Check: `docker ps --filter name=hyperstack-mcp`. If empty, run Step 2 from Option A to start it. -3. **Image not pulled.** Run `docker pull ghcr.io/orkait/hyperstack:main` and retry. -4. **Wrong container name in config.** The config must use `hyperstack-mcp` as the exec target - must match the `--name` used in Step 2. - ### MCP server shows as failed / cannot pull the Docker image Verify the image is accessible: `docker pull ghcr.io/orkait/hyperstack:main` -If the pull fails, confirm Docker is running and you have an internet connection. The image is public on ghcr.io - no authentication is required to pull it. +If the pull fails, confirm Docker is running and you have an internet connection. The image is public on Docker Hub - no authentication is required to pull it. ### MCP server starts but tools return no results The MCP config file may point to the wrong binary or the server is not running. Verify: -- Docker: run `docker exec -i hyperstack-mcp bun /app/src/index.ts` manually - it should accept JSON-RPC on stdin and respond. If the container isn't running, start it per Step 2 of Option A. +- Docker: run `docker run -i --rm ghcr.io/orkait/hyperstack:main` and confirm it starts without error - Local Bun: confirm the absolute path in `args` exists (`ls /path/to/hyperstack/bin/hyperstack.mjs`) - Restart the CLI/IDE after any config change - MCP servers are loaded at startup -- **Qwen Code:** Uses `~/.qwen/settings.json` (global) or `.qwen/settings.json` (project-level), NOT `.mcp.json`. The `mcpServers` key goes at the root of the settings file. - -### Too many hyperstack containers piling up - -If you see multiple `ghcr.io/orkait/hyperstack` containers running: - -```bash -docker ps -a --filter "ancestor=ghcr.io/orkait/hyperstack:main" -``` - -Your MCP config is using the legacy `docker run --rm` pattern instead of `docker exec`. Clean up and switch to the new config: - -```bash -docker ps -aq --filter "ancestor=ghcr.io/orkait/hyperstack:main" | xargs -r docker rm -f -``` - -Then follow Step 2 of Option A to start the single persistent `hyperstack-mcp` container, and update your MCP config to the `docker exec` form shown in Step 3. ### SessionStart hook does not fire -On Claude Code, hooks live in `.claude/hooks.json`. Confirm the file exists in the repository root and references `session-start.mjs`. If the hook is missing or malformed, the `hyperstack` skill will not be injected automatically. You can still invoke skills manually with `/hyperstack`. - -On Qwen Code, there is no plugin system or hook mechanism. Skills are available on disk at `~/.qwen/skills/hyperstack/skills/INDEX.md` but must be referenced manually by the agent - no auto-injection occurs. +On Claude Code, hooks live in `.claude/hooks.json`. Confirm the file exists in the repository root and references `session-start.mjs`. If the hook is missing or malformed, the `using-hyperstack` skill will not be injected automatically. You can still invoke skills manually with `/using-hyperstack`. ### `bun: command not found` when using Option B Install Bun: `curl -fsSL https://bun.sh/install | bash`, then open a new shell so the path update takes effect. - -## Step 5: Welcome the User to Hyperstack - -Once all verifications pass, your final action is to explain the new reality to the user. Do not just say "installed." Explain the **Disciplined Engineering Harness**. - -**Recommended Success Message:** - -> "System Synchronized. Hyperstack is now active. -> -> I have installed the **Disciplined Engineering Harness** with 12 plugins and 80 tools. I have also established your **Iron Laws** (~/.hyperstack/skills). -> -> From now on: -> - I will not refactor code without a failing test. -> - I will not propose designs without a DESIGN.md contract. -> - I will use the ground-truth primitives provided by the Hyperstack vault. -> -> We are now operating under a professional engineering discipline. How should we begin our first high-integrity task?" diff --git a/package.json b/package.json index 3b92ae2..dd1e010 100644 --- a/package.json +++ b/package.json @@ -1,21 +1,16 @@ { "name": "@orkait-ai/hyperstack", - "version": "1.1.5", - "description": "Disciplined MCP server + skill system. 12 plugins, 80 tools, 21 skills with adversarial enforcement. Designer/DESIGN.md pipeline, shadcn/ui, React Flow, Motion, Lenis, React 19, Echo, Go, Rust, design tokens, UI/UX.", + "version": "1.0.0", + "description": "Disciplined MCP server + skill system. 11 plugins, 79 tools, 21 skills with adversarial enforcement. Designer/DESIGN.md pipeline, shadcn/ui, React Flow, Motion, Lenis, React 19, Echo, Go, Rust, design tokens, UI/UX.", "bin": { "hyperstack": "bin/hyperstack.mjs" }, "type": "module", "scripts": { "build": "tsc --noEmit", - "compile:context": "tsx src/internal/compile-runtime-context.ts", "test": "bun test", "start": "bun src/index.ts", - "dev": "bun --watch src/index.ts", - "docker:run": "bun scripts/ensure-singleton.ts", - "skills:index": "tsx scripts/generate-skills-index.ts", - "mcp:start": "bun scripts/start-mcp.ts", - "setup": "tsx scripts/setup.ts" + "dev": "bun --watch src/index.ts" }, "author": "Orkait", "license": "MIT", diff --git a/scripts/ensure-singleton.ts b/scripts/ensure-singleton.ts deleted file mode 100644 index 635f519..0000000 --- a/scripts/ensure-singleton.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { execSync } from "child_process"; - -const IMAGE = "ghcr.io/orkait/hyperstack:main"; -const CONTAINER_NAME = "hyperstack-mcp"; - -function run() { - console.log("Checking for existing Hyperstack containers (cross-platform)..."); - - try { - // 1. Find containers by image and name - const byImage = execSync(`docker ps -aq --filter "ancestor=${IMAGE}"`).toString().trim().split(/\s+/).filter(Boolean); - const byName = execSync(`docker ps -aq --filter "name=${CONTAINER_NAME}"`).toString().trim().split(/\s+/).filter(Boolean); - - // 2. Combine and uniq - const allStale = Array.from(new Set([...byImage, ...byName])); - - if (allStale.length > 0) { - console.log(`Removing stale Hyperstack containers: ${allStale.join(", ")}`); - // We use a loop or join with space since 'docker rm -f' accepts multiple IDs - execSync(`docker rm -f ${allStale.join(" ")}`, { stdio: "inherit" }); - } else { - console.log("No stale containers found."); - } - - // 3. Start fresh container - console.log(`Starting fresh Hyperstack container: ${CONTAINER_NAME}`); - execSync( - `docker run -d --name ${CONTAINER_NAME} --restart unless-stopped \ - --memory=512m --cpus=1 \ - --entrypoint sleep \ - ${IMAGE} infinity`, - { stdio: "inherit" } - ); - - console.log("\nVerification:"); - execSync(`docker ps --filter name=${CONTAINER_NAME}`, { stdio: "inherit" }); - - } catch (error: any) { - console.error("Error ensuring singleton container:", error.message); - process.exit(1); - } -} - -run(); diff --git a/scripts/generate-skills-index.sh b/scripts/generate-skills-index.sh new file mode 100755 index 0000000..bb27321 --- /dev/null +++ b/scripts/generate-skills-index.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# Generates skills/INDEX.md from skill frontmatter category fields. +# Run: bash scripts/generate-skills-index.sh +# Categories: core (workflow/discipline), domain (specialized), meta (skills about skills) + +set -uo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +SKILLS_DIR="${REPO_ROOT}/skills" +INDEX="${SKILLS_DIR}/INDEX.md" + +# Collect skills by category +core_list="" +domain_list="" +meta_list="" +uncategorized_list="" + +for dir in "${SKILLS_DIR}"/*/; do + skill_name=$(basename "$dir") + skill_file="${dir}SKILL.md" + [ -f "$skill_file" ] || continue + + category=$(grep "^category:" "$skill_file" 2>/dev/null | head -1 | sed 's/category:[[:space:]]*//' | tr -d '\r') + + # Extract first line of description + desc_line=$(awk '/^description:/{flag=1; sub(/^description:[[:space:]]*/,""); sub(/^>-?/,""); if($0!="") print; flag=1; next} flag && /^ / {sub(/^[[:space:]]+/,""); print; exit} flag && /^[a-z]/ {exit}' "$skill_file" | head -1 | tr -d '"') + desc_short=$(printf '%s' "$desc_line" | cut -c 1-120) + + line="| \`${skill_name}\` | ${desc_short} |" + case "$category" in + core) core_list+="${line}"$'\n' ;; + domain) domain_list+="${line}"$'\n' ;; + meta) meta_list+="${line}"$'\n' ;; + *) uncategorized_list+="${line}"$'\n' ;; + esac +done + +# Write index +cat > "$INDEX" <> "$INDEX" < dirent.isDirectory()); - - for (const dir of dirs) { - const skillPath = path.join(SKILLS_DIR, dir.name, "SKILL.md"); - if (!fs.existsSync(skillPath)) continue; - - const content = fs.readFileSync(skillPath, "utf8"); - - // Simple frontmatter parser - const categoryMatch = content.match(/^category:\s*(.*)/m); - const category = categoryMatch ? categoryMatch[1].trim() : "uncategorized"; - - // Extract description (handles multi-line and single-line) - const descMatch = content.match(/^description:\s*(?:>|-)?\s*(.*)/m); - let description = descMatch ? descMatch[1].trim() : ""; - - // If it's multi-line, we might need a more robust parser, - // but for the index, the first line is usually enough. - description = description.replace(/^["'>\-\s]+/, "").substring(0, 120); - - skills.push({ name: dir.name, category, description }); - } - - const core = skills.filter(s => s.category === "core"); - const domain = skills.filter(s => s.category === "domain"); - const meta = skills.filter(s => s.category === "meta"); - const uncategorized = skills.filter(s => !["core", "domain", "meta"].includes(s.category)); - - const formatTable = (list: SkillInfo[]) => { - if (list.length === 0) return ""; - return list.map(s => `| \`${s.name}\` | ${s.description} |`).sort().join("\n") + "\n"; - }; - - const output = `# Hyperstack Skills Index - -Auto-generated from each skill's frontmatter \`category\` field. -Regenerate with: \`bun scripts/generate-skills-index.ts\` or \`npm run skills:index\` - -Categories: -- **core** - workflow, discipline, and gates used on every task -- **domain** - specialized skills for specific contexts (visual, components, security, docs) -- **meta** - skills about skills (bootstrap, testing) - ---- - -## Core (workflow + discipline) - -| Skill | Description | -|---|---| -${formatTable(core)} -## Domain (specialized context) - -| Skill | Description | -|---|---| -${formatTable(domain)} -## Meta (skills about skills) - -| Skill | Description | -|---|---| -${formatTable(meta)} -${uncategorized.length > 0 ? ` -## Uncategorized (missing \`category:\` field) - -| Skill | Description | -|---|---| -${formatTable(uncategorized)} - -These skills need a \`category:\` added to their frontmatter. -` : ""} -`; - - fs.writeFileSync(INDEX_FILE, output); - console.log(`Wrote ${INDEX_FILE}`); -} - -// Only run if called directly -if (import.meta.url === `file://${process.argv[1]}`) { - generateIndex(); -} diff --git a/scripts/setup.ts b/scripts/setup.ts deleted file mode 100644 index 382d46f..0000000 --- a/scripts/setup.ts +++ /dev/null @@ -1,57 +0,0 @@ -import * as setup from "../src/internal/setup-hyperstack.js"; -import * as fs from "node:fs"; -import * as path from "node:path"; - -async function main() { - console.log("\nπŸš€ Hyperstack Autonomous Setup (CLI)"); - console.log("=====================================\n"); - - const hintedPlatform = setup.detectEnvironment(); - console.log(`πŸ“‘ Hinted platform: ${hintedPlatform}`); - - const configPath = setup.findConfigFile(hintedPlatform); - - if (!configPath) { - console.warn("⚠️ Could not find an MCP configuration file in any known location."); - console.log("Tried: .claude.json, .cursor/mcp.json, .codeium/windsurf/mcp_config.json, .roo/mcp.json, .gemini/settings.json, .kiro/settings/mcp.json, .qwen/settings.json"); - console.log("\nπŸ’‘ OpenAI Codex CLI? Run: codex mcp add hyperstack -- bun ~/.hyperstack/bin/hyperstack.mjs"); - console.log(" For any unknown IDE, use the Agentic Autopilot instead."); - process.exit(1); - } - - // Resolve the actual platform from the found config path - const platform = setup.detectPlatformFromConfigPath(configPath); - console.log(`βœ… Found config: ${configPath} (${platform})`); - - const skillPath = setup.findSkillPath(platform); - if (skillPath) { - const hyperstackSkills = path.join(process.cwd(), "skills"); - const skillTarget = path.join(skillPath, "hyperstack"); - console.log(`\nπŸ“š Skill target: ${skillTarget}`); - console.log(`Run this to activate adversarial gates:`); - console.log(` ln -s "${hyperstackSkills}" "${skillTarget}"`); - } - - const pluginRoot = process.cwd(); - - // Attempt to proactively self-heal/upgrade the docker setup - setup.selfHealDocker(); - - const patch = setup.generateMcpPatch(configPath, pluginRoot, platform); - - // Proactively apply the patch - setup.applyMcpPatch(configPath, patch); - - console.log("\nπŸ“‹ Configuration Summary:"); - console.log("---------------------------------"); - console.log(`βœ… Environment: ${platform}`); - console.log(`βœ… Config Path: ${configPath}`); - if (skillPath) { - console.log(`βœ… Skill Target: ${path.join(skillPath, "hyperstack")}`); - } - console.log("---------------------------------\n"); - - console.log("πŸš€ Setup Complete! You must restart your AI client to pick up the new tools."); -} - -main().catch(console.error); diff --git a/scripts/start-mcp.sh b/scripts/start-mcp.sh new file mode 100755 index 0000000..0aab4c2 --- /dev/null +++ b/scripts/start-mcp.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +CONTAINER="hyperstack-daemon" +IMAGE="hyperstack" +LOCK_FILE="/tmp/${CONTAINER}.lock" + +# Use a file lock so concurrent session startups don't race to create the container. +# Only one process runs the startup block at a time; the rest wait, then see the +# container already running and skip straight to docker exec. +( + flock -x 200 + + if ! docker ps -q --filter "name=^${CONTAINER}$" 2>/dev/null | grep -q .; then + # Remove any stopped container with the same name + docker rm -f "$CONTAINER" 2>/dev/null || true + + # Start a long-running daemon container (tail keeps it alive). + # Each MCP session gets its own node process via docker exec below. + docker run -d \ + --name "$CONTAINER" \ + --restart unless-stopped \ + --entrypoint tail \ + "$IMAGE" -f /dev/null + + sleep 0.3 + fi + +) 200>"$LOCK_FILE" + +exec docker exec -i "$CONTAINER" npx tsx src/index.ts diff --git a/scripts/start-mcp.ts b/scripts/start-mcp.ts deleted file mode 100644 index ffb7de1..0000000 --- a/scripts/start-mcp.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { execSync, spawn } from "child_process"; -import * as fs from "fs"; -import * as path from "path"; -import * as os from "os"; - -const IMAGE = "ghcr.io/orkait/hyperstack:main"; -const CONTAINER_NAME = "hyperstack-mcp"; -const LOCK_FILE = path.join(os.tmpdir(), "hyperstack-mcp-startup.lock"); - -/** - * Standardized Cross-Platform MCP Startup Logic - * - Ensures the persistent container is running. - * - Bridges the local I/O to the containerized MCP process via 'docker exec'. - */ -async function startMcp() { - let lockFd: number | null = null; - - try { - // 1. Concurrency control via file lock - try { - lockFd = fs.openSync(LOCK_FILE, 'wx'); - - // Check if the container is already running - const running = execSync(`docker ps -q --filter "name=^${CONTAINER_NAME}$"`, { stdio: "pipe" }).toString().trim(); - - if (!running) { - // If not running, ensure it's removed and start fresh - // Suppress errors if container doesn't exist - try { execSync(`docker rm -f ${CONTAINER_NAME}`, { stdio: "ignore" }); } catch {} - - execSync( - `docker run -d --name ${CONTAINER_NAME} --restart unless-stopped \ - --memory=512m --cpus=1 \ - --entrypoint sleep \ - ${IMAGE} infinity`, - { stdio: "inherit" } - ); - - // Brief pause to allow Docker to initialize the process - await new Promise(r => setTimeout(r, 300)); - } - } catch (e: any) { - if (e.code === 'EEXIST') { - // Another process is handling startup, wait briefly - await new Promise(r => setTimeout(r, 500)); - } else { - throw e; - } - } finally { - if (lockFd !== null) { - fs.closeSync(lockFd); - try { fs.unlinkSync(LOCK_FILE); } catch {} - } - } - - // 2. Connect to the MCP server inside the container - // This allows the local agent/IDE to communicate with the containerized Bun process. - const proc = spawn("docker", ["exec", "-i", CONTAINER_NAME, "bun", "/app/src/index.ts"], { - stdio: "inherit" - }); - - proc.on("exit", (code) => process.exit(code || 0)); - proc.on("error", (err) => { - console.error("Failed to bridge to containerized MCP:", err.message); - process.exit(1); - }); - - } catch (error: any) { - console.error("Critical error starting Hyperstack MCP:", error.message); - process.exit(1); - } -} - -startMcp(); diff --git a/skills/INDEX.md b/skills/INDEX.md index e467193..2f9ad7b 100644 --- a/skills/INDEX.md +++ b/skills/INDEX.md @@ -1,12 +1,12 @@ # Hyperstack Skills Index Auto-generated from each skill's frontmatter `category` field. -Regenerate with: `bun scripts/generate-skills-index.ts` or `npm run skills:index` +Regenerate with: `bash scripts/generate-skills-index.sh` Categories: -- **core** - workflow, discipline, and gates used on every task -- **domain** - specialized skills for specific contexts (visual, components, security, docs) -- **meta** - skills about skills (bootstrap, testing) +- **core** β€” workflow, discipline, and gates used on every task +- **domain** β€” specialized skills for specific contexts (visual, components, security, docs) +- **meta** β€” skills about skills (bootstrap, testing) --- @@ -33,10 +33,10 @@ Categories: | Skill | Description | |---|---| | `behaviour-analysis` | Systematic UI/UX behaviour analysis for interactive applications. Audits every user action, state transition, view mode, | +| `designer` | Evidence-based design decision engine. An intention gate that produces non-slop | | `design-patterns-skill` | Apply core programming principles and design patterns from Clean Code, The Pragmatic Programmer, Code Complete, Refactor | -| `designer` | | | `readme-writer` | Writes or rewrites project README files using repository evidence instead of generic filler. Use when creating a new REA | -| `security-review` | Security code review for vulnerabilities. Use when asked to "security review", "find vulnerabilities", "check for securi | +| `security-review` | Security code review for vulnerabilities. Use when asked to security review, find vulnerabilities, check for security is | | `shadcn-expert` | Advanced shadcn/ui architect specializing in Base UI, Tailwind v4, data-slot patterns, and component composition. Use wh | ## Meta (skills about skills) @@ -44,6 +44,4 @@ Categories: | Skill | Description | |---|---| | `testing-skills` | Use when creating or editing Hyperstack skills, before shipping them, to verify they actually work under pressure and re | -| `hyperstack` | Bootstrap - establishes Hyperstack MCP tools and skills before any technical work. Auto-loaded at session start via Sess | - - +| `using-hyperstack` | Bootstrap β€” establishes Hyperstack MCP tools and skills before any technical work. Auto-loaded at session start via Se | diff --git a/skills/autonomous-mode/SKILL.md b/skills/autonomous-mode/SKILL.md index f69afd9..9dc8831 100644 --- a/skills/autonomous-mode/SKILL.md +++ b/skills/autonomous-mode/SKILL.md @@ -8,9 +8,9 @@ description: Use when the user chooses fully autonomous execution. Aggressively ## What This Is -You are unleashed. Execute the full plan end-to-end, using every Hyperstack MCP tool, web search, and skill to make evidence-backed decisions. No asking for review, clarification, or permission between tasks. Think β†’ verify β†’ implement β†’ move. +You are unleashed. Execute the full plan end-to-end, aggressively using every Hyperstack MCP tool, web search, and skill to make evidence-backed decisions. You do not ask the user for review, clarification on covered topics, or permission between tasks. You think, you verify, you implement, you move. -User gets the finished product. Not questions. Not checkpoints. +The user gets the finished product. Not questions. Not checkpoints. Not "does this look right?" ## The Iron Law @@ -19,42 +19,47 @@ AUTONOMOUS DOES NOT MEAN UNDISCIPLINED. AUTONOMOUS MEANS YOU ARE THE DISCIPLINE. ``` -- Every MCP tool that could be relevant β†’ call it -- Every quality gate β†’ run it yourself -- Every decision point β†’ make it with evidence, log reasoning, keep moving -- Every ambiguity β†’ resolve via MCP data, web search, codebase patterns, engineering judgment -- Every uncertainty β†’ ground it with a deterministic check before proceeding +You use the entire Hyperstack aggressively: +- Every MCP tool that could be relevant -- call it +- Every quality gate -- run it yourself +- Every decision point -- make it with evidence, log the reasoning, keep moving +- Every ambiguity -- resolve it using MCP data, web search, codebase patterns, and engineering judgment +- Every uncertainty -- ground it with a deterministic check before proceeding -You are the senior engineer, reviewer, QA, and decision-maker. +You are the senior engineer, the reviewer, the QA, and the decision-maker. The user trusts you to use Hyperstack to its full capacity and deliver a correct solution. ## When to Use - User explicitly chose autonomous execution -- Plan approved via `blueprint` or `run-plan` -- User said "just do it", "go ahead", "autonomous", "don't ask, build it" +- Plan has been approved (via `blueprint` or `run-plan` validation) +- User said something like "just do it", "go ahead", "autonomous", "don't ask, build it" ## The Autonomous Loop: Reason-Act-Verify +Every action in autonomous mode follows this tight loop. This is not optional -- it is the structure that prevents drift. + ``` -REASON: State what you're about to do and why (one line, logged) -ACT: Execute (write code, run command, call MCP tool) -VERIFY: Check result against deterministic signal (test output, exit code, MCP data) - PASS β†’ next action - FAIL β†’ self-correct (see Self-Correction Hierarchy) +REASON: State what you're about to do and why (one line, logged in decision log) +ACT: Execute the action (write code, run command, call MCP tool) +VERIFY: Check the result against a deterministic signal (test output, exit code, MCP data, type check) + If PASS β†’ next action + If FAIL β†’ course-correct (see Self-Correction Hierarchy) ``` -Never skip VERIFY. "It looks right" is not verification. +Never skip the VERIFY step. "It looks right" is not verification. A passing test, a zero exit code, a matching MCP output -- those are verification. ## Process ### Step 1: Pre-Flight -1. **Worktree** β†’ `hyperstack:worktree-isolation` -2. **MCP survey** β†’ for every domain the plan touches, call tools upfront: +Before writing any code: + +1. **Worktree** -- set up a clean workspace via `hyperstack:worktree-isolation` +2. **Aggressive MCP survey** -- for EVERY domain the plan touches, call the MCP tools proactively. Do not wait until you need them. Load ground truth for all relevant APIs upfront: - | Domain | Call NOW | + | Domain in plan | Call NOW | |---|---| - | React Flow | `reactflow_search_docs` + `reactflow_list_apis` + `reactflow_get_api` per component | + | React Flow | `reactflow_search_docs` + `reactflow_list_apis` + `reactflow_get_api` for each component | | Motion | `motion_search_docs` + `motion_list_apis` | | Go / Echo | `golang_search_docs` + `echo_list_recipes` + `echo_list_middleware` | | Rust | `rust_search_docs` + `rust_list_practices` | @@ -62,153 +67,146 @@ Never skip VERIFY. "It looks right" is not verification. | Design tokens | `design_tokens_list_categories` + `design_tokens_get_gotchas` | | UI/UX | `ui_ux_list_principles` + `ui_ux_get_gotchas` | -3. **Baseline** β†’ run full test suite, record pass count -4. **Task list** β†’ create from plan, all pending -5. **Decision log** β†’ format: `Decision: [what] | Evidence: [source] | Alternatives rejected: [why]` +3. **Baseline** -- run the full test suite. Record the pass count. This is your before-state. +4. **Task list** -- create tasks from the plan. All visible, all pending. +5. **Decision log** -- start a running log of autonomous decisions. Format: `Decision: [what] | Evidence: [source] | Alternatives rejected: [what and why]`. This log is presented to the user at delivery. ### Step 2: Execute All Tasks -For each task: +For each task in order: 1. Mark in progress -2. **MCP verify** β†’ call specific tools for APIs/patterns in THIS task -3. **Test first** β†’ write failing test, run it, confirm it fails for the right reason -4. **Implement** β†’ minimal code to pass, MCP-verified API shapes only -5. **Verify** β†’ run test + full suite, zero regressions -6. **Self-review** β†’ diff matches plan? No debug artifacts? No scope creep? -7. **Commit** β†’ atomic, descriptive -8. Mark complete +2. **MCP verify** -- call specific MCP tools for the APIs/patterns used in THIS task. Cross-reference against Step 1 survey. If anything was missed, update now. +3. **Test first** -- write the failing test. Run it. Confirm it fails for the right reason. (Inline `test-first` discipline -- you execute the discipline directly, not via Skill tool.) +4. **Implement** -- write minimal code to pass. Use MCP-verified API shapes. No guessing. +5. **Verify** -- run the test. Run full suite. Zero regressions. Check exit codes, not vibes. +6. **Self-review** -- read your own diff for this task. Check: matches plan? No debug artifacts? No unintended scope? If you spot something wrong, fix it immediately. +7. **Commit** -- atomic commit with descriptive message +8. Mark complete, move to next ### Self-Correction Hierarchy +When something goes wrong during execution, follow this hierarchy in order. Each level is tried before escalating to the next. + ``` -Level 1: MCP GROUND TRUTH β†’ call the relevant tool -Level 2: CODEBASE PATTERN β†’ grep for similar working implementations -Level 3: WEB SEARCH β†’ targeted: "[library] [version] [error]" - cross-reference against MCP; reject outdated/wrong-version results -Level 4: DEBUG DISCIPLINE β†’ root cause β†’ hypothesis β†’ minimal test β†’ fix - fix within 2 attempts β†’ continue - 3rd attempt fails β†’ ABORT -Level 5: ABORT β†’ stop, report to user +Level 1: MCP GROUND TRUTH + Call the relevant MCP tool. The answer is usually in the docs. + +Level 2: CODEBASE PATTERN MATCH + Grep for similar working implementations in the existing codebase. + What works elsewhere that's similar to what's broken? + +Level 3: WEB SEARCH + Search the web for the specific error, API, or pattern. + Use targeted queries: "[library] [version] [specific error or API name]" + Cross-reference results against MCP data -- web results can be outdated. + Prefer: official docs, GitHub issues, Stack Overflow answers with accepted solutions. + Reject: blog posts with no version info, AI-generated content, results for wrong versions. + +Level 4: DEBUG DISCIPLINE + Full root cause investigation via debug-discipline: + Read error in full β†’ reproduce β†’ check recent changes β†’ trace data flow + Form hypothesis β†’ test minimally β†’ fix + If fixed within 2 attempts: continue + If 3rd attempt fails: ABORT (see abort conditions) + +Level 5: ABORT + You've exhausted self-correction. Stop and report to user. ``` -Web search is NOT first resort. MCP β†’ codebase β†’ web β†’ debug β†’ abort. +**Web search is not a first resort.** MCP data is ground truth. Codebase patterns are proven. Web search is for when those two are insufficient -- unfamiliar errors, third-party library quirks, platform-specific issues, or gaps in MCP coverage. **Web search IS mandatory when:** -- Error not in MCP data -- Library not covered by Hyperstack MCP -- MCP data seems outdated for the version in use -- Platform-specific issue (OS, browser, runtime) +- You encounter an error message you don't recognize and MCP has no relevant data +- You're using a library or API not covered by Hyperstack's MCP namespaces +- The MCP data seems outdated or incomplete for the specific version in use +- You're debugging a platform-specific issue (OS, browser, runtime version) ### Decision-Making Without the User -**On ambiguity:** Resolve via MCP β†’ codebase patterns β†’ web search β†’ engineering judgment (simpler/more maintainable). Log every decision. +**On ambiguity in the plan:** Do not ask the user. Resolve using: +1. MCP tool output (ground truth) +2. Existing codebase patterns (grep for similar implementations) +3. Web search (if MCP and codebase are insufficient) +4. Engineering judgment (pick the simpler, more maintainable option) +5. Log every decision: `Decision: [what] | Evidence: [source] | Alternatives rejected: [why]` -**On missing info:** Exhaust self-correction hierarchy first. All 4 levels fail β†’ abort condition. +**On missing information:** Do not guess. Exhaust the self-correction hierarchy first. If all 4 levels fail, this becomes an abort condition. -**On style/approach:** Follow existing codebase conventions. No convention β†’ simpler option + log it. +**On style/approach choices:** Follow existing codebase conventions. If no convention exists, pick the simpler option and log it. ### Step 3: Final Verification -1. `git diff ..HEAD` β†’ full diff review -2. Diff matches plan? Fix any drift. -3. Remove debug artifacts, console.logs, temp code -4. Full test suite β†’ all green -5. Type/lint check β†’ zero errors -6. `hyperstack:ship-gate` +After all tasks complete: + +1. `git diff ..HEAD` -- full diff review +2. Does the diff match the plan? Fix any drift. +3. Debug artifacts scan: remove console.logs, TODO comments, temporary code +4. Full test suite -- all green +5. Type/lint check -- zero errors +6. Run `hyperstack:ship-gate` -- evidence-backed completion verification + +All of this runs without asking the user anything. ### Step 4: Deliver -Invoke `hyperstack:deliver`. Only human touchpoint. +Invoke `hyperstack:deliver`. This is the ONLY human touchpoint. -Present: -- Summary of what was built (per-task, one line) -- Decision log with evidence and rejected alternatives +Present the user with: +- Summary of what was built (per-task, one line each) +- **Decision log** -- every autonomous decision with evidence and rejected alternatives - Test results (before/after pass counts) - Delivery options (PR / squash / branch) -## What Runs Automatically +## What Runs Automatically (Everything) | Gate | How | |---|---| -| MCP API verification | Per-domain upfront + per-task inline | +| MCP API verification | Aggressively, per-domain upfront + per-task inline | | Web search | On unfamiliar errors, uncovered APIs, version-specific issues | -| Test-first | Every task, no exceptions | +| Test-first discipline | Inline, every task, no exceptions | | Full test suite | After every task + final | -| Debug-discipline | On any failure, up to 3 attempts | -| Self-review | After every task + final diff | +| Debug-discipline | Inline on any failure, up to 3 attempts | +| Self-review | After every task + final diff review | | Ship-gate | Final gate before delivery | -| Decision logging | Every autonomous choice with evidence | +| Decision logging | Every autonomous choice recorded with evidence | -## Abort Conditions +## Abort Conditions (the ONLY things that stop you) -1. **3-strike escalation** β†’ 3 failed fix attempts after exhausting self-correction hierarchy -2. **MCP down for critical domain** β†’ try web search; if insufficient β†’ stop and report -3. **Test suite collapse** β†’ 3+ unrelated failures after a single task -4. **Scope impossibility** β†’ missing dependency, incompatible versions, circular requirement -5. **Security concern** β†’ vulnerability discovered β†’ never ship insecure code autonomously -6. **Information exhaustion** β†’ all 4 self-correction levels failed +1. **3-strike escalation** -- 3 failed fix attempts on a single task after exhausting the full self-correction hierarchy. Architectural problem you cannot solve alone. Stop and report with full evidence of what you tried. +2. **MCP down for critical domain** -- you cannot verify API shapes and the task requires domain-specific code. Try web search as fallback. If web results are insufficient or untrustworthy, stop and report. +3. **Test suite collapse** -- 3+ unrelated failures after a single task. Something systemic broke. Stop and report. +4. **Scope impossibility** -- you discover the plan requires something fundamentally impossible (missing dependency, incompatible library versions, circular requirement). Stop and report. +5. **Security concern** -- you discover the implementation would introduce a vulnerability (injection, auth bypass, data leak). Stop and report. Never ship insecure code autonomously. +6. **Information exhaustion** -- all 4 levels of self-correction (MCP, codebase, web search, debug-discipline) failed to resolve the issue. Stop and report what you tried at each level. -Everything else β†’ you handle it. +**Everything else -- you handle it.** Ambiguity, minor gaps, style decisions, refactoring choices, test strategy -- you decide, you document, you move. ## Drift Prevention -1. **Per-task plan check** β†’ re-read plan requirement before starting, verify diff matches after -2. **Scope fence** β†’ changes outside plan's listed files β†’ log as out-of-scope, mention at delivery -3. **Decision log review** β†’ every 3 tasks, scan log for repeated reversals (signals drift) -4. **Deterministic over probabilistic** β†’ if you can check with a command, do that instead of reasoning +Autonomous execution is vulnerable to drift -- gradually deviating from the plan's intent through accumulated small decisions. Prevent this: + +1. **Per-task plan check** -- before starting each task, re-read the plan's requirement for that task. After completing, verify the diff matches the requirement, not your interpretation of it. +2. **Scope fence** -- if you realize a task needs changes outside the plan's listed files, log it as out-of-scope and mention it at delivery. Do not silently expand scope. +3. **Decision log review** -- after every 3 tasks, scan your decision log. Are the decisions trending in a consistent direction, or are you course-correcting against your own earlier decisions? Repeated reversals signal you're drifting. +4. **Deterministic over probabilistic** -- when you can check something with a command (test, type check, lint, MCP call), always do that instead of reasoning about whether it's probably fine. -## Red Flags - STOP +## Red Flags -- STOP | Thought | Reality | |---|---| -| "I'll skip the MCP check, I remember the API" | Autonomous mode β†’ MORE responsibility to verify, not less | -| "I'll skip the test for this task" | Autonomous β‰  undisciplined. Write the test. | -| "I'll ask the user about this" | Resolve with evidence. Only abort conditions reach the user. | -| "Test failed, I'll fix it in the next task" | Fix now. No debt carried forward. | -| "I'll skip self-review, ship-gate will catch it" | Self-review β†’ task-level. Ship-gate β†’ composition. Both run. | -| "This needs a change outside the plan's scope" | Log it, finish plan, mention at delivery. No scope creep. | -| "I'm confused but I'll figure it out as I code" | Stop. Hit self-correction hierarchy: MCP β†’ codebase β†’ web β†’ debug. | -| "The web search result looks right" | Cross-reference against MCP data and library version. | -| "I've been making a lot of decisions, that's fine" | Review decision log. Too many decisions may signal plan gaps. | +| "I'll skip the MCP check, I remember the API" | You are in autonomous mode. You have MORE responsibility to verify, not less. | +| "I'll skip the test for this task" | Autonomous does not mean undisciplined. Write the test. | +| "I'll ask the user about this" | Resolve it yourself with evidence. Only abort conditions reach the user. | +| "The test failed, I'll fix it in the next task" | Fix now. Autonomous mode does not carry debt forward. | +| "I'll skip self-review, ship-gate will catch it" | Self-review catches task-level issues. Ship-gate catches composition issues. Both run. | +| "This needs a change outside the plan's scope" | Log it, finish the plan, mention it at delivery. Do not scope-creep. | +| "I'm confused but I'll figure it out as I code" | Stop coding. Hit the self-correction hierarchy: MCP β†’ codebase β†’ web search β†’ debug. | +| "The web search result looks right" | Cross-reference against MCP data and library version. Web results can be outdated. | +| "I've been making a lot of decisions, that's fine" | Review your decision log. Too many decisions may signal plan gaps. | ## Integration -- **Requires:** Approved plan from `hyperstack:forge-plan` or `hyperstack:run-plan` +- **Requires:** Approved plan from `hyperstack:forge-plan` or validated plan from `hyperstack:run-plan` - **Uses aggressively:** All MCP tools, web search, `hyperstack:worktree-isolation`, `hyperstack:test-first` (inline), `hyperstack:debug-discipline` (inline on failure), `hyperstack:ship-gate` (final) - **Completes via:** `hyperstack:deliver` (only human touchpoint) - - -## Lifecycle Integration - -### Agent Workflow Chains - -**Full autonomous execution:** -``` -forge-plan β†’ autonomous-mode (THIS) β†’ ship-gate β†’ deliver - ↓ - [uses all skills inline] - ↓ - worktree-isolation β†’ test-first β†’ debug-discipline (on failure) -``` - -### Upstream Dependencies -- `forge-plan` β†’ approved MCP-verified plan -- `run-plan` β†’ validated existing plan - -### Skills Used Inline (not invoked, applied directly) -- `worktree-isolation` β†’ pre-flight -- `test-first` β†’ every task (red-green-refactor) -- `debug-discipline` β†’ on any failure (self-correction hierarchy) -- `ship-gate` β†’ final gate before delivery - -### Downstream Consumers -- `deliver` β†’ only human touchpoint - -### Abort Escalation -| Condition | Escalate to | Action | -|---|---|---| -| 3 failed fix attempts | User | Report findings, suggest architectural change | -| MCP down for critical domain | User | Cannot verify, ask to proceed or wait | -| Test suite collapse | User | 3+ unrelated failures, stop | -| Security concern | User | Never ship insecure code autonomously | diff --git a/skills/behaviour-analysis/SKILL.md b/skills/behaviour-analysis/SKILL.md index e117c0f..34a1848 100755 --- a/skills/behaviour-analysis/SKILL.md +++ b/skills/behaviour-analysis/SKILL.md @@ -15,47 +15,66 @@ Systematic interaction audit combining UX heuristics, QA state-machine thinking, ## When to Use - After implementing a feature with multiple interaction modes -- User reports something "doesn't feel right" or "is inconsistent" -- Before shipping β†’ final behavioural review -- Adding a new view mode, action, or state to an existing system +- When the user reports something "doesn't feel right" or "is inconsistent" +- Before shipping β€” final behavioural review +- When adding a new view mode, action, or state to an existing system ## Integration with hyperstack:designer -**If DESIGN.md exists** β†’ use it as "expected behaviour" ground truth for the interaction matrix in Phase 2. +**If a DESIGN.md exists** (produced by `hyperstack:designer`), use it as the "expected behaviour" ground truth for the interaction matrix in Phase 2. + +Mapping DESIGN.md sections to behaviour-analysis inputs: | DESIGN.md Section | Use as... | |---|---| -| 5. Component Specifications | Expected states per component. Every listed state MUST exist and be visually distinct. | -| 6. Motion | Expected timing for transitions. Matrix "expected" column cites DESIGN.md durations. | -| 8. Do's and Don'ts | Heuristic audit assertions. Each Do = check; each Don't = violation to search for. | -| 9. Responsive Breakpoints | Composition states for Phase 4 edge case sweep. Test every listed breakpoint. | -| 10. Anti-Patterns | Violations to search for in Phase 4. Fail audit if any found. | +| 5. Component Specifications | **Expected states** for each component in the matrix. Every listed state MUST exist and be visually distinct. | +| 6. Motion | **Expected timing** for transitions. The matrix "expected behaviour" column cites DESIGN.md durations. | +| 8. Do's and Don'ts | **Heuristic audit assertions**. Each Do is a check; each Don't is a violation to search for. | +| 9. Responsive Breakpoints | **Composition states** for Phase 4 edge case sweep. Test every listed breakpoint. | +| 10. Anti-Patterns | **Violations to search for** in Phase 4. Fail the audit if any found. | -**Without DESIGN.md:** Fall back to industry standards via WebSearch or general heuristics. +**Without a DESIGN.md:** Fall back to industry standards via WebSearch or general heuristics (the default behaviour described below). -**Reverse escalation:** Audit finds a gap DESIGN.md doesn't specify β†’ escalate back to `hyperstack:designer`. +**Reverse escalation:** If the audit finds a gap that the DESIGN.md doesn't specify (e.g., expected behaviour is ambiguous), escalate back to `hyperstack:designer` β€” the DESIGN.md may need to be updated. ## Process -### Phase 1: Inventory +### Phase 1: Inventory (read code, build the map) + +Before judging anything, build a complete picture: + +1. **Identify all state variables** that affect UI behaviour + - Read the store/state management files + - List every piece of state: data, config, transient UI state + - Note which are persisted vs ephemeral + +2. **Identify all user actions** that modify state + - Buttons, clicks, drags, keyboard shortcuts, sliders, toggles + - API calls triggered by actions + - Implicit actions (hover, scroll, resize, mode switch) + +3. **Identify all view modes / display states** + - Tabs, toggles, conditional rendering branches + - How different modes compose (layout mode x view mode x highlight state) -Build a complete picture before judging anything: +4. **Identify all feedback mechanisms** + - Visual feedback (highlighting, dimming, borders, badges, glow) + - Textual feedback (labels, counts, status text) + - Animated feedback (transitions, physics, spring effects) + - Absence of feedback (silent failures, no-ops) -1. **State variables** β†’ read store/state management files, list every piece of state (data, config, transient UI), note persisted vs ephemeral -2. **User actions** β†’ buttons, clicks, drags, keyboard shortcuts, sliders, toggles, API calls, implicit actions (hover, scroll, resize) -3. **View modes / display states** β†’ tabs, toggles, conditional rendering branches, how modes compose -4. **Feedback mechanisms** β†’ visual (highlighting, dimming, borders, badges), textual (labels, counts, status), animated (transitions, spring), absence of feedback (silent failures, no-ops) +Output: A **state inventory table** and an **action inventory table**. -Output: state inventory table + action inventory table. +### Phase 2: Interaction Matrix (the core analysis) -### Phase 2: Interaction Matrix +Build a matrix: **every action x every relevant state combination**. -Build matrix: every action Γ— every relevant state combination. +For each cell ask: +- **What should happen?** (expected behaviour β€” think like a UX designer) +- **What does happen?** (actual behaviour β€” read the code path) +- **Match?** OK / BUG / UX-ISSUE / MISSING-FEEDBACK -For each cell: -- What should happen? (expected - think like UX designer) -- What does happen? (actual - read the code path) -- Match? β†’ OK / BUG / UX-ISSUE / MISSING-FEEDBACK +Structure the matrix by category: ```markdown | # | Action | Context/State | Expected | Actual | Status | @@ -63,44 +82,63 @@ For each cell: ``` Categories to cover: -- CRUD actions -- Selection & highlighting -- View mode transitions -- Layout mode transitions -- Configuration changes (sliders, toggles, settings) -- Drag & interaction -- Reset & cleanup -- Edge cases (empty, max, conflicting states) +- **CRUD actions** (create, read, update, delete of primary data) +- **Selection & highlighting** (what gets selected, how, clear) +- **View mode transitions** (switching between modes) +- **Layout mode transitions** (switching layout engines) +- **Configuration changes** (sliders, toggles, settings) +- **Drag & interaction** (drag, hover, click targets) +- **Reset & cleanup** (what gets cleared, what persists) +- **Edge cases** (empty state, max state, conflicting states) ### Phase 3: Heuristic Audit -Apply Nielsen's 10 heuristics: +Apply Nielsen's 10 heuristics (adapted for interactive visualizations): -1. **Visibility of system status** β†’ does UI show what's active, selected, loading? -2. **Match between system and real world** β†’ labels make sense? actions named clearly? -3. **User control and freedom** β†’ can user undo/escape from any state? -4. **Consistency and standards** β†’ similar actions behave the same everywhere? -5. **Error prevention** β†’ can user reach a broken/dead state? -6. **Recognition rather than recall** β†’ current mode/state visible without memorizing? -7. **Flexibility and efficiency** β†’ shortcuts for power users? -8. **Aesthetic and minimalist design** β†’ information at right density? -9. **Help users recover from errors** β†’ what happens on API failure, empty results, bad input? -10. **Accessibility** β†’ keyboard navigation, screen reader, reduced motion? +1. **Visibility of system status** β€” Does the UI show what's active, selected, loading? +2. **Match between system and real world** β€” Do labels make sense? Are actions named clearly? +3. **User control and freedom** β€” Can the user undo/escape from any state? Is there always a way back? +4. **Consistency and standards** β€” Do similar actions behave the same way everywhere? +5. **Error prevention** β€” Can the user reach a broken/dead state? +6. **Recognition rather than recall** β€” Is the current mode/state visible without memorizing? +7. **Flexibility and efficiency** β€” Are there shortcuts for power users? +8. **Aesthetic and minimalist design** β€” Is information presented at the right density? +9. **Help users recover from errors** β€” What happens on API failure, empty results, bad input? +10. **Accessibility** β€” Keyboard navigation, screen reader, reduced motion? -See [references/heuristics.md](references/heuristics.md) for detailed questions per heuristic. +Refer to [references/heuristics.md](references/heuristics.md) for detailed questions per heuristic. ### Phase 4: Edge Case Sweep -**Empty states:** no data, no results, no highlights, empty search filter results +Systematically check: -**Boundary states:** 100+ nodes, single node/no edges, all nodes highlighted, all sliders at min/max +**Empty states:** +- No data loaded +- No results +- No highlights active +- Empty search filter results -**Transition states:** mode switch with active highlights, mode switch mid-drag, query execution while loading, rapid repeated actions (double-click, spam slider) +**Boundary states:** +- Maximum data (100+ nodes) +- Single node, no edges +- All nodes highlighted +- All sliders at min/max -**Composition states:** every view mode Γ— every layout mode, highlight + search filter active simultaneously, collapsed groups + highlighting + path results +**Transition states:** +- Mode switch with active highlights +- Mode switch mid-drag +- Query execution while loading +- Rapid repeated actions (double-click, spam slider) + +**Composition states:** +- Every view mode x every layout mode +- Highlight + search filter active simultaneously +- Collapsed groups + highlighting + path results ### Phase 5: Report +Output a structured report: + ```markdown ## State Inventory [table of all state variables] @@ -118,25 +156,29 @@ See [references/heuristics.md](references/heuristics.md) for detailed questions [summary: how many behaviours tested, how many correct, critical issues] ``` -Severity: **CRITICAL** β†’ broken/data loss/unreachable state | **HIGH** β†’ major UX inconsistency | **MEDIUM** β†’ minor inconsistency/missing feedback | **LOW** β†’ cosmetic +Severity levels: +- **CRITICAL** β€” broken functionality, data loss, unreachable state +- **HIGH** β€” major UX inconsistency, confusing behaviour +- **MEDIUM** β€” minor inconsistency, missing feedback +- **LOW** β€” cosmetic, nice-to-have ## Research Enhancement -Before starting, search for: -- Current best practices for the specific UI pattern (graph viz, form, dashboard, etc.) +Before starting the analysis, search for: +- Current best practices for the specific UI pattern being analyzed (graph viz, form, dashboard, etc.) - Known UX patterns for the interaction model (drag-and-drop, force-directed graphs, etc.) - Accessibility guidelines for the specific component type -Use findings to set expectations in the matrix - "expected behaviour" should be informed by industry standards, not gut feeling. +Use findings to set expectations in the matrix β€” "expected behaviour" should be informed by industry standards, not just gut feeling. ## Key Principles -- Think like a user first β†’ what would someone expect when they click this? -- Think like QA second β†’ what's the worst that could happen? -- Think like a developer third β†’ read the code to verify, don't assume -- Every action must have visible feedback β†’ silent no-op = bug -- Every state must be escapable β†’ user should never be stuck -- Composition must be tested β†’ features that work alone often break together +- **Think like a user first** β€” what would someone expect when they click this? +- **Think like QA second** β€” what's the worst thing that could happen? +- **Think like a developer third** β€” read the code to verify, don't assume +- **Every action must have visible feedback** β€” if clicking something does nothing visibly, that's a bug +- **Every state must be escapable** β€” the user should never be "stuck" +- **Composition must be tested** β€” features that work alone often break in combination ## The Iron Law @@ -144,49 +186,23 @@ Use findings to set expectations in the matrix - "expected behaviour" should be NO BEHAVIOUR CLAIM WITHOUT READING THE CODE PATH ``` -You cannot say "this should work" - trace the actual code path and confirm. Reading code is not optional. +You cannot say "this should work" β€” you must trace the actual code path and confirm. Reading code is not optional. Assumptions are bugs waiting to ship. + +## Red Flags β€” STOP -## Red Flags - STOP +These are the rationalizations you will have when you want to skip parts of the analysis. Every one is wrong. | Thought | Reality | |---|---| -| "I'll check a few interactions, not the full matrix" | Partial coverage misses composition bugs. Full matrix. | +| "I'll just check a few interactions, not the full matrix" | Partial coverage misses composition bugs. Do the full matrix. | | "This state combination is unlikely" | Unlikely states are where bugs live. Test them. | -| "Nielsen's heuristics are common sense" | Common sense β‰  verification. Apply them explicitly. | -| "I already know this code" | Code drifts. Mental models drift faster. Read it. | -| "Empty states are trivial" | Empty states = #1 place products feel broken. Audit them. | -| "Transition states will be fine" | Mid-drag/mid-animation/mid-load = where race conditions live. | -| "The user will report any issues" | Users don't report vague discomfort. They leave. | -| "Full audit is overkill for a simple component" | Simple components compose into complex flows. Audit it. | +| "Nielsen's heuristics are common sense" | Common sense is pattern-matching without verification. Apply them explicitly. | +| "I already know this code, I don't need to read it" | Code drifts. Mental models drift faster. Read it. | +| "Empty states are trivial, I'll skip them" | Empty states are the #1 place where products feel broken. Audit them. | +| "Transition states will be fine" | Mid-drag, mid-animation, mid-load states are where race conditions live. Audit them. | +| "The user will report any issues" | Users don't report feeling vague discomfort. They leave. | +| "This is for a simple component, full audit is overkill" | Simple components compose into complex flows. Audit it. | | "I'll skip heuristics I don't remember exactly" | Open the reference. All 10 get applied. | -| "The behaviour feels right" | Feelings β‰  evidence. Read the code. | -| "I tested the happy path manually" | Happy path = 20% of the matrix. Audit the unhappy paths. | -| "No DESIGN.md β†’ no ground truth" | Search for one. Escalate to designer if missing. | - - -## Lifecycle Integration - -### Agent Workflow Chains - -**UI/UX audit (after implementation):** -``` -[execution complete] β†’ behaviour-analysis (THIS) β†’ [fix issues] β†’ ship-gate -``` - -**DESIGN.md integration:** -``` -designer β†’ DESIGN.md β†’ forge-plan β†’ [execution] β†’ behaviour-analysis (uses DESIGN.md as ground truth) -``` - -### Upstream Dependencies -- Implemented feature with multiple interaction modes -- `designer` β†’ DESIGN.md as expected behaviour ground truth (if exists) - -### Downstream Consumers -- `ship-gate` β†’ final verification after fixes - -### Reverse Escalation -| Discovery | Escalate to | Action | -|---|---|---| -| DESIGN.md doesn't specify expected behaviour | `designer` | Append clarification to DESIGN.md | -| Audit finds gap DESIGN.md doesn't cover | `designer` | Add to DESIGN.md | +| "The behaviour feels right" | Feelings are not evidence. Read the code. | +| "I tested the happy path manually" | The happy path is 20% of the matrix. Audit the unhappy paths. | +| "There is no DESIGN.md, so I have no ground truth" | Search for one. Escalate to designer if missing. Do not audit against gut feeling. | diff --git a/skills/behaviour-analysis/references/heuristics.md b/skills/behaviour-analysis/references/heuristics.md index 881875d..9d846e0 100755 --- a/skills/behaviour-analysis/references/heuristics.md +++ b/skills/behaviour-analysis/references/heuristics.md @@ -15,7 +15,7 @@ Detailed questions per Nielsen's heuristic, adapted for interactive data visuali ## 2. Match Between System and Real World - Do button labels describe what they DO, not what they ARE? ("Clear" not "X") -- Are view mode names intuitive? ("Live" vs "Results" vs "Highlight" - does a new user understand these?) +- Are view mode names intuitive? ("Live" vs "Results" vs "Highlight" β€” does a new user understand these?) - Do edge/node labels match the domain vocabulary? - Are slider labels clear about what they control? @@ -62,7 +62,7 @@ Detailed questions per Nielsen's heuristic, adapted for interactive data visuali ## 8. Aesthetic and Minimalist Design - Are controls only shown when relevant? (dagre sliders hidden in cluster mode) -- Is information density appropriate - not too sparse, not overwhelming? +- Is information density appropriate β€” not too sparse, not overwhelming? - Are animations purposeful (communicate state change) or decorative (just pretty)? - Do hover/highlight effects add information or just noise? @@ -71,7 +71,7 @@ Detailed questions per Nielsen's heuristic, adapted for interactive data visuali - What happens when the API is unreachable? - What happens when a query returns an error? - What happens when the graph data is malformed? -- Are error messages actionable ("server unreachable - is it running?")? +- Are error messages actionable ("server unreachable β€” is it running?")? - Can the user retry failed operations? ## 10. Accessibility diff --git a/skills/blueprint/SKILL.md b/skills/blueprint/SKILL.md index f107311..575b6df 100644 --- a/skills/blueprint/SKILL.md +++ b/skills/blueprint/SKILL.md @@ -12,43 +12,53 @@ description: Use before any feature build, component creation, or behaviour modi NO CODE WITHOUT AN APPROVED DESIGN ``` -No design presented + no explicit user approval β†’ no code. Violating the letter = violating the spirit. +If you have not presented a design and the user has not explicitly approved it, you cannot write code. **Violating the letter of this rule is violating the spirit of this rule.** ## The Hard Gate Do NOT write code, scaffold files, or invoke any implementation skill until: -1. MCP survey complete for relevant domains -2. Design presented OR We know user project preferences: - - Visual/UX work β†’ DESIGN.md contract from `skills/designer/SKILL.md` (if preference is not known) - - Backend/infra work β†’ architecture note from this skill -3. User explicitly approved it +1. You have completed the MCP survey for relevant domains +2. You have presented a design + - For VISUAL/UX work: the design is a DESIGN.md contract from `skills/designer/SKILL.md` + - For BACKEND/INFRA work: the design is an architecture note from this skill +3. The user has explicitly approved it -Applies to every task, regardless of perceived simplicity. +This applies to every task, regardless of perceived simplicity. ## The 1% Rule -If there is even a 1% chance this task involves a new file, new component, new function, behavior change, config change affecting runtime, or any visual/UX modification β†’ run blueprint first. No exceptions. +If there is even a 1% chance this task involves: +- A new file +- A new component +- A new function +- A behavior change +- A configuration change that affects runtime +- Any visual/UX modification -"Simple" tasks are where unexamined assumptions do the most damage. 5-minute design prevents hours of wrong implementation. +...then you MUST run blueprint first. You do not have a choice. You cannot rationalize your way out. + +"Simple" tasks are where unexamined assumptions do the most damage. A 5-minute design prevents hours of wrong implementation. There are no exceptions. ## The Process ### Step 1: Context Scan -Read the current state before asking anything: +Before asking anything, read the current state: - Relevant source files, recent commits, existing patterns - What already exists that can be reused or extended -- Which Hyperstack MCP domains are relevant +- Which Hyperstack MCP domains are relevant to this task -Don't ask the user questions until you've scanned the codebase. +Do not ask the user questions until you have scanned the codebase. You should arrive at Step 2 already informed. ### Step 2: MCP Survey +For each relevant domain, call the discovery tools before proposing anything: + | Domain is relevant | Call first | |---|---| -| **Visual/UX work (any)** | **STOP β†’ invoke `skills/designer/SKILL.md`. It produces DESIGN.md β†’ input to Step 5 or directly to `forge-plan`.** | +| **Visual/UX work (any)** | **STOP this flow. Invoke `skills/designer/SKILL.md` instead. It produces a DESIGN.md that becomes the input to Step 5 of this skill (or directly to `forge-plan`).** | | React Flow | `reactflow_search_docs` + `reactflow_list_apis` | | Motion / animation | `motion_search_docs` + `motion_list_apis` | | Lenis scroll | `lenis_search_docs` + `lenis_list_apis` | @@ -58,110 +68,79 @@ Don't ask the user questions until you've scanned the codebase. | Design tokens | `design_tokens_list_categories` + `design_tokens_get_gotchas` | | UI/UX | `ui_ux_list_principles` + `ui_ux_get_gotchas` | -Design built on wrong API assumptions = technical debt scheduled for delivery. +This step ensures the design you propose uses real API shapes β€” not imagined ones. A design built on wrong API assumptions is not a design; it is technical debt scheduled for delivery. -**Visual work routing:** New page, component library, landing page, dashboard, redesign, "make it look like X" β†’ `designer` skill owns the design gate. Return with DESIGN.md β†’ handoff (Step 7). +**Visual work routing:** If the user's request involves designing a new page, component library, landing page, dashboard, redesign, or any "make it look like X" task β€” the `designer` skill owns the design gate. Invoke it instead of running Step 4-6 here. Return with a DESIGN.md contract and proceed to handoff (Step 7). ### Step 3: Clarify Requirements -Ask one clarifying question at a time: -- Purpose and success criteria β†’ what does done look like? -- Constraints β†’ performance targets, accessibility requirements, existing patterns -- Scope boundary β†’ what is explicitly NOT included? +Ask clarifying questions one at a time: +- Purpose and success criteria β€” what does done look like? +- Constraints β€” performance targets, accessibility requirements, existing patterns to follow +- Scope boundary β€” what is explicitly NOT included in this task? -Wait for answer before asking the next. Multiple independent subsystems β†’ flag before proceeding, decompose first. +One question per message. Wait for the answer before asking the next one. + +If the request describes multiple independent subsystems, flag this before proceeding. One design β†’ one implementation cycle. Large requests must be decomposed into sub-projects first. ### Step 4: Propose 2-3 Approaches -For each approach: -- Trade-offs -- MCP-backed APIs and patterns used (cite tool output from Step 2) +Present options with: +- Trade-offs for each approach +- Which MCP-backed APIs and patterns each approach uses (cite the tool output from Step 2) - Your recommendation with reasoning -Lead with your recommended option. No options without a recommendation. +Lead with your recommended option. Do not present options without a recommendation. ### Step 5: Present Design Scale each section to its complexity: -- **Architecture** β†’ module boundaries, data flow, key abstractions -- **Invariants** β†’ what must always be true at runtime -- **Interfaces** β†’ public APIs between modules, including types -- **Error paths** β†’ what happens when dependencies fail, inputs are invalid, async times out +- **Architecture** β€” module boundaries, data flow, key abstractions +- **Invariants** β€” what must always be true at runtime +- **Interfaces** β€” public APIs between modules, including types +- **Error paths** β€” what happens when dependencies fail, inputs are invalid, or async operations time out -Get user confirmation. Revise if needed. Don't proceed until approved. +Get user confirmation after presenting. Revise if needed. Do not proceed until the user approves. ### Step 6: Negative Doubt -List at least 5 failure modes before finalizing: +Before finalising, list at least 5 failure modes: - What breaks at runtime under normal usage? - What edge cases does this design not handle? - Which invariants could be violated by concurrent operations or unexpected state? -- What does MCP `get_gotchas` say about this domain? -- What external dependency could change and break this? +- What does the MCP `get_gotchas` data say about this domain? +- What external dependency (API, library version, browser API) could change and break this? -Address each explicitly β†’ design around it or record the accepted risk. +Address each failure mode explicitly β€” either design around it or record the accepted risk. ### Step 7: Handoff to Implementation -Once approved: -- Save design note to relevant docs directory if non-trivial -- Visual/UX work β†’ DESIGN.md already exists. Save at `docs/DESIGN.md` or `/DESIGN.md`. -- Invoke `hyperstack:forge-plan` β†’ builds MCP-verified implementation plan from approved design -- DESIGN.md present β†’ forge-plan reads it as input spec, each of 10 sections β†’ one or more tasks +Once the design is approved: +- Save a short design note to the relevant docs directory if the task is non-trivial +- For visual/UX work: DESIGN.md already exists (produced by `designer` skill). Save it at `docs/DESIGN.md` or `/DESIGN.md`. +- Invoke `hyperstack:forge-plan` to build a fully MCP-verified implementation plan from the approved design +- **If DESIGN.md exists:** forge-plan reads it as its input spec. Each of the 10 sections becomes one or more tasks. +- The approved design is the spec β€” `forge-plan` translates it into traceable tasks, `engineering-discipline` executes them + +## Red Flags β€” STOP -## Red Flags - STOP +These are the exact thoughts you will have when you want to skip this skill. Every one is a rationalization. Every one has been used before to build wrong architectures. Every one has a counter. | Thought | Reality | |---|---| -| "I know React Flow well enough to skip the survey" | MCP has v12-specific API shapes. Memory has v11. Call the tool. | -| "This is too simple for a design" | Simple tasks β†’ unexamined assumptions β†’ most damage. Return to Hard Gate. | -| "Let me just start with a file and design as we go" | How wrong architectures get built. Design FIRST. | -| "User seems impatient, I'll skip Step 6" | User impatience β‰  permission to ship slop. Negative Doubt is not optional. | -| "I'll propose one approach - the obvious one" | Two approaches exist for every non-trivial design. Find both. | -| "The task is a single-line change" | Single line at the wrong place destroys invariants. Design first. | +| "I know React Flow well enough to skip the survey" | MCP data has v12-specific API shapes. Memory has v11. Call the tool. | +| "This is too simple for a design" | Simple tasks are where unexamined assumptions do the most damage. Return to the Hard Gate. | +| "Let me just start with a file and we'll design as we go" | This is how wrong architectures get built. Do the design FIRST. | +| "The user seems impatient, I'll skip Step 6" | User impatience is not permission to ship slop. Negative Doubt is not optional. | +| "I'll propose one approach β€” the obvious one" | Two approaches exist for every non-trivial design. Find both. | +| "The task is a single-line change" | A single line at the wrong place destroys invariants. Design first. | | "This is a bug fix, not a feature" | Bug fixes change behavior. Behavior changes need designs. | -| "I'm just refactoring" | Refactors move responsibility. Moving responsibility is architectural. | -| "The design will slow us down" | Wrong code ships β†’ fix it β†’ fix it again. That is slow. Design once, ship right. | -| "I can reason about this without external tools" | MCP data contains gotchas you won't remember. Call the tool. | +| "I'm just refactoring" | Refactors move responsibility. Moving responsibility is architectural. Design first. | +| "The design will slow us down" | No. Wrong code ships. Then you fix it. Then fix it again. That is slow. Design once, ship right. | +| "I can reason about this without external tools" | MCP data contains edge cases and gotchas you will not remember. Call the tool. | | "The user will tell me if I'm wrong" | The user hired you to prevent that. Do the design. | | "I already did a similar design last week" | State drifts. Codebase changes. Do the current survey. | +| "This is not my call, I'm just executing instructions" | Executing instructions with no design is how bad instructions become shipped bugs. Design first. | | "Let me start with a prototype" | Prototypes become production. Design the prototype. | - - -## Lifecycle Integration - -### Agent Workflow Chains - -**Website/Frontend Agent:** -``` -blueprint (THIS) β†’ designer β†’ forge-plan β†’ [execution] β†’ ship-gate β†’ deliver - ↓ visual routing -``` - -**Backend/Infra Agent:** -``` -blueprint (THIS) β†’ forge-plan β†’ [execution] β†’ ship-gate β†’ deliver - ↓ architecture note -``` - -**Execution Options (chosen at forge-plan handoff):** -- `autonomous-mode` β†’ full auto, stops only on failure -- `subagent-ops` β†’ fresh agent per task, two-stage review -- `engineering-discipline` β†’ manual with phase gates - -### Upstream Dependencies -- None (entry point for feature work) -- `hyperstack` β†’ 1% rule enforcement - -### Downstream Consumers -- `forge-plan` β†’ reads approved design, builds MCP-verified task plan -- `designer` β†’ if visual/UX routing detected -- `run-plan` β†’ if resuming existing plan - -### Reverse Escalation -| Discovery | Escalate to | Action | -|---|---|---| -| Visual/UX work detected mid-task | `designer` | Pause, get DESIGN.md, resume | -| Architecture gap (non-visual) | `blueprint` | Re-enter for architecture decision | diff --git a/skills/code-review/SKILL.md b/skills/code-review/SKILL.md index 5eae65b..5460905 100644 --- a/skills/code-review/SKILL.md +++ b/skills/code-review/SKILL.md @@ -8,15 +8,17 @@ description: Use when completing tasks, implementing features, or before merging ## Two Modes -1. **Requesting** β†’ dispatching a reviewer subagent to evaluate your work -2. **Receiving** β†’ handling review feedback with technical rigor, not performative agreement +This skill covers both sides of code review: + +1. **Requesting** -- dispatching a reviewer subagent to evaluate your work +2. **Receiving** -- handling review feedback with technical rigor, not performative agreement ## Requesting Review ### When to Request **Mandatory:** -- After each task in `subagent-ops` (handled automatically) +- After each task in `subagent-ops` (handled automatically by that skill) - After completing a major feature - Before merge to main @@ -30,17 +32,18 @@ description: Use when completing tasks, implementing features, or before merging **1. Get the diff range:** ```bash -BASE_SHA=$(git merge-base HEAD main) +BASE_SHA=$(git merge-base HEAD main) # or master/develop HEAD_SHA=$(git rev-parse HEAD) ``` **2. Dispatch a review subagent with:** + - What was implemented (one sentence) -- Requirements or spec it should match +- The requirements or spec it should match - The git diff (`git diff $BASE_SHA..$HEAD_SHA`) - Specific question: "Does this match the spec? Flag missing, extra, or incorrect code." -**Note:** Review subagents get raw diff + spec only. Do not load bootstrap (`hyperstack`) β†’ `` gate prevents it anyway. Provide exactly what they need to evaluate. +**Note:** Review subagents receive raw diff and spec only. Do not load bootstrap (`using-hyperstack`) into review subagents -- the `` gate prevents it, and review subagents do not need the full skill catalogue. Provide exactly what they need to evaluate. **3. Act on results:** @@ -53,28 +56,28 @@ HEAD_SHA=$(git rev-parse HEAD) ### MCP-Enhanced Review -For domain-specific code, include MCP verification in the review prompt: +When reviewing domain-specific code, include MCP verification in the review prompt: > "For any React Flow API usage, verify against `reactflow_get_api`. For any Go patterns, verify against `golang_get_practice`. Flag any API usage that doesn't match MCP output." -This catches API drift a generic reviewer would miss. +This catches API drift that a generic code reviewer would miss. ## Receiving Review ### The Response Pattern ``` -1. READ: Complete feedback without reacting +1. READ: Complete feedback without reacting 2. UNDERSTAND: Restate the requirement (or ask) -3. VERIFY: Check against codebase reality -4. EVALUATE: Technically sound for THIS codebase? -5. RESPOND: Technical acknowledgment or reasoned pushback -6. IMPLEMENT: One item at a time, test each +3. VERIFY: Check against codebase reality +4. EVALUATE: Technically sound for THIS codebase? +5. RESPOND: Technical acknowledgment or reasoned pushback +6. IMPLEMENT: One item at a time, test each ``` ### Forbidden Responses -Never: +Never respond with: - "You're absolutely right!" - "Great point!" - "Thanks for catching that!" @@ -91,17 +94,17 @@ Instead: Push back when: - Suggestion breaks existing functionality - Reviewer lacks full context -- Violates YAGNI +- Violates YAGNI (unused feature) - Technically incorrect for this stack - Conflicts with user's architectural decisions -**How:** Technical reasoning, not defensiveness. Reference working tests/code. Involve user if architectural. +**How:** Use technical reasoning, not defensiveness. Reference working tests/code. Involve the user if architectural. ### Handling Unclear Feedback -Any item unclear β†’ stop. Do not implement anything yet. Ask for clarification first. +If any item is unclear: **stop.** Do not implement anything yet. Ask for clarification on the unclear items first. -Items may be related. Partial understanding β†’ wrong implementation. +Items may be related. Partial understanding leads to wrong implementation. ### Implementation Order @@ -113,14 +116,14 @@ For multi-item feedback: 5. Test each fix individually 6. Verify no regressions -## Red Flags - STOP +## Red Flags -- STOP | Thought | Reality | |---|---| | "Skip review, it's simple" | Simple code has bugs. Review catches them. | -| "I'll review my own code" | Self-review β‰  code review. Dispatch a subagent. | +| "I'll review my own code" | Self-review is not code review. Dispatch a subagent. | | "Reviewer is wrong, ignore it" | Push back with reasoning. Don't silently ignore. | -| "I agree with everything" | Performative agreement β‰  technical evaluation. | +| "I agree with everything" | Performative agreement is not technical evaluation. | | "I'll implement all feedback at once" | One item at a time, test each. | ## Integration @@ -128,29 +131,3 @@ For multi-item feedback: - **Called by:** `hyperstack:subagent-ops` (per-task review cycle), `hyperstack:deliver` (pre-merge review) - **Pairs with:** `hyperstack:ship-gate` (verification after fixes) - **Escalate to:** User if reviewer and implementer disagree on architectural decisions - - -## Lifecycle Integration - -### Agent Workflow Chains - -**Per-task review (subagent-ops):** -``` -subagent-ops β†’ implementer β†’ code-review (THIS) β†’ [fix loop] β†’ next task -``` - -**Pre-merge review:** -``` -[autonomous-mode | engineering-discipline] β†’ code-review (THIS) β†’ deliver -``` - -### Upstream Dependencies -- `subagent-ops` β†’ per-task review cycle (automatic) -- `engineering-discipline` β†’ after completing major features -- `deliver` β†’ pre-merge review - -### Skills Used With -- `ship-gate` β†’ verification after review fixes applied - -### MCP-Enhanced Review -Include MCP verification in review prompts for domain-specific code (reactflow_get_api, golang_get_practice, etc.) diff --git a/skills/debug-discipline/SKILL.md b/skills/debug-discipline/SKILL.md index 774af20..05246de 100644 --- a/skills/debug-discipline/SKILL.md +++ b/skills/debug-discipline/SKILL.md @@ -12,146 +12,126 @@ description: Use when encountering any bug, test failure, or unexpected behaviou NO FIXES WITHOUT ROOT CAUSE FIRST. ``` -Symptom fix = failure. Random changes = thrashing. Every fix attempt without confirmed root cause β†’ higher probability of a second bug. +A symptom fix is a failure. Random changes are not debugging β€” they are thrashing. Every fix attempt without a confirmed root cause increases the probability of a second bug. -Phase 1 not complete β†’ no fix proposed. +**If you have not completed Phase 1, you cannot propose a fix.** ## When to Use -Any technical failure: test failures, runtime errors/panics, unexpected behaviour, build failures, performance regressions, integration issues. +Use this for any technical failure: +- Test failures +- Runtime errors or panics +- Unexpected behaviour (wrong output, wrong rendering, wrong state) +- Build failures +- Performance regressions +- Integration issues -Use **especially** when: -- Under time pressure β†’ urgency makes guessing tempting -- "The fix is obvious" β†’ obvious fixes often address the wrong layer -- You already tried something and it didn't work -- Error message points to a dependency or library function +Use it **especially** when: +- Under time pressure β€” urgency makes guessing tempting +- "The fix is obvious" β€” obvious fixes often address the wrong layer +- You have already tried something and it did not work +- The error message points to a dependency or library function ## The Four Phases ### Phase 1: Root Cause Investigation -**BEFORE any fix attempt:** +**BEFORE attempting any fix:** **1. Read the error in full** -Stack trace, error message, line numbers, exit codes β†’ read every line. The exact wording often contains the fix. +Stack trace, error message, line numbers, exit codes β€” read every line. Do not skim. The exact wording often contains the fix. **2. Reproduce consistently** -Can you trigger it reliably? Exact steps? Can't reproduce β†’ gather more data first, don't guess. +Can you trigger it reliably? What are the exact steps? If you cannot reproduce it, do not guess β€” gather more data first. **3. Check recent changes** -`git diff`, recent commits. Most recent change is guilty until proven otherwise. +`git diff`, recent commits. What changed that could have caused this? Assume the most recent change is guilty until proven otherwise. **4. Check MCP docs for the failing domain** -| Domain | Call | +Before assuming you understand how a library or API behaves, verify: + +| Domain | What to call | |---|---| -| React Flow component behaves unexpectedly | `reactflow_get_api` for the component, `reactflow_get_pattern` for usage | +| React Flow component behaves unexpectedly | `reactflow_get_api` for the component, `reactflow_get_pattern` for the usage pattern | | Go runtime error (goroutine, context, nil pointer) | `golang_get_practice` for the relevant topic | | Rust borrow checker / lifetime error | `rust_get_practice` + `rust_cheatsheet` | | Echo middleware or routing issue | `echo_get_middleware` or `echo_get_recipe` | | Motion animation not firing | `motion_get_api` for the failing hook or component | | CSS/token rendering wrong | `design_tokens_get_gotchas` or `ui_ux_get_gotchas` | -MCP gotchas data frequently contains the exact failure mode you're looking at. +The MCP gotchas data frequently contains the exact failure mode you are looking at. Check it before forming a hypothesis. **5. Trace the data flow** -Where does the bad value originate? Trace backwards from symptom to source. Add diagnostic logging at each layer boundary if needed. Run once to see which layer breaks β†’ investigate that layer. + +Where does the bad value originate? Trace backwards from the symptom to the source. Add diagnostic logging at each layer boundary if needed. Run once to see which layer breaks. Then investigate that specific layer. Fix at the source. Never at the symptom. ### Phase 2: Pattern Analysis -Before writing a fix: +Before writing a fix, find the correct pattern: 1. Locate similar working code in the same codebase -2. Compare failing code against working example β†’ list every difference, however small -3. Check MCP reference for the correct pattern (`[domain]_get_pattern`) +2. Compare the failing code against the working example β€” list every difference, however small +3. Check the MCP reference for the correct pattern (`[domain]_get_pattern`) 4. Understand what the failing code assumed that the working code does not ### Phase 3: Hypothesis and Test -One variable at a time: - -1. **State hypothesis explicitly:** "I believe X is the root cause because Y" -2. **Design minimal test:** smallest change that confirms or refutes the hypothesis -3. **Make one change** β†’ don't bundle multiple fixes -4. **Verify result:** - - Confirms β†’ Phase 4 - - Refutes β†’ new hypothesis, return to top of Phase 3 (count as failed attempt) - - After 2 refuted hypotheses β†’ return to Phase 1 with all new information - - After 3 failed hypotheses total β†’ stop, go to Escalation Rule +Scientific method β€” one variable at a time: -Don't stack a second change on top of a failed one. +1. **State the hypothesis explicitly:** "I believe X is the root cause because Y" +2. **Design the minimal test:** the smallest change that would confirm or refute the hypothesis +3. **Make one change** β€” do not bundle multiple fixes +4. **Verify the result:** + - Confirms hypothesis β†’ Phase 4 + - Refutes hypothesis β†’ form a new hypothesis, return to top of Phase 3 (count this as a failed attempt) + - After 2 refuted hypotheses: return to Phase 1 with all new information before forming another hypothesis + - After 3 failed hypotheses total: stop β€” go directly to the Escalation Rule below + - Do NOT stack a second change on top of a failed one -If you genuinely don't know the root cause after Phase 1 and 2 β†’ say so explicitly. Proposing a fix you don't understand is not debugging. +If you genuinely do not know what the root cause is after Phase 1 and 2, say so explicitly. "I don't understand why X behaves this way" is correct. Proposing a fix you don't understand is not. ### Phase 4: Fix Fix the root cause. Not the symptom. -1. **Write failing test first** β†’ simplest possible reproduction. Run it. Confirm it fails. -2. **Implement one fix** β†’ address confirmed root cause. One change. -3. **Run the test** β†’ confirm it passes. -4. **Check for regressions** β†’ run full test suite. +1. **Write a failing test first** β€” the simplest possible reproduction. Run it. Confirm it fails. +2. **Implement one fix** β€” address the confirmed root cause. One change. +3. **Run the test** β€” confirm it now passes. +4. **Check for regressions** β€” run the full test suite. 5. **Invoke `hyperstack:ship-gate`** before claiming the bug is fixed. -**Attempt counter - mandatory:** -- Attempts 1-2: fix doesn't work β†’ return to Phase 1 with new information +**Attempt counter β€” mandatory:** +- Attempts 1-2: if the fix does not work, return to Phase 1 with the new information - **Attempt 3: STOP. Do not attempt a fourth fix.** ### Escalation Rule (3+ Failed Fixes) -Three failed attempts β†’ architectural problem, not a surface bug. +Three failed attempts signals an architectural problem, not a surface bug. -Signals: -- Each fix reveals new coupling or unexpected shared state elsewhere -- Correct fix would require "significant refactoring" β†’ current structure can't accommodate correct behaviour +Diagnostic pattern: +- Each fix reveals new coupling or unexpected shared state in a different location +- The correct fix would require "significant refactoring" β€” which means the current structure cannot accommodate the correct behaviour - Each fix creates a new symptom elsewhere -Stop fixing. Present findings to user: what you tried, what each attempt revealed, what architectural change appears required. +At this point, stop fixing. Present the findings to the user: what you tried, what each attempt revealed, and what architectural change appears to be required. Do not continue patching. -## Red Flags - STOP +## Red Flags β€” STOP | Thought | Reality | |---|---| -| "Let me just try changing X" | No root cause β†’ don't touch it | -| "It's probably a race condition" | "Probably" β‰  root cause | +| "Let me just try changing X" | You do not have a root cause | +| "It's probably a race condition" | "Probably" is not a root cause | | "Quick fix now, investigate later" | There is no later | -| "Multiple small changes at once" | Can't isolate what worked | -| "The library is broken" | Check MCP docs first | +| "Multiple small changes at once" | You cannot isolate what worked | +| "The library is broken" | Check the MCP docs first | | "One more attempt" (after 2 failures) | Stop. Escalate. | -| "I fixed it - the error is gone" | Run `hyperstack:ship-gate` | +| "I fixed it β€” the error is gone" | Run `hyperstack:ship-gate` | ## Integration - Use `hyperstack:ship-gate` before claiming any bug is fixed -- Use `hyperstack:engineering-discipline` if Phase 4 escalation reveals architectural change needed -- Use `hyperstack:blueprint` if fix requires building new functionality rather than correcting existing behaviour - - -## Lifecycle Integration - -### Agent Workflow Chains - -**Used inline during execution:** -``` -[autonomous-mode | subagent-ops | engineering-discipline] β†’ debug-discipline (THIS) - ↓ - [self-correction hierarchy] - ↓ - [fix β†’ ship-gate] -``` - -### Upstream Dependencies -- Any execution mode encountering failure - -### Skills Used Inline -- `test-first` β†’ Phase 4 (write failing test before fix) -- `ship-gate` β†’ Phase 4 (verify fix before claiming complete) - -### Escalation Paths -| Condition | Escalate to | Action | -|---|---|---| -| 3 failed fix attempts | User | Architectural problem, not surface bug | -| Fix requires new functionality | `blueprint` | Not a bug fix, needs design | -| Fix requires architectural change | `engineering-discipline` | Step 3 architecture reasoning | +- Use `hyperstack:engineering-discipline` if Phase 4 escalation reveals an architectural change is needed +- Use `hyperstack:blueprint` if the fix requires building new functionality rather than correcting existing behaviour diff --git a/skills/deliver/SKILL.md b/skills/deliver/SKILL.md index 4f7ee14..8740b7f 100644 --- a/skills/deliver/SKILL.md +++ b/skills/deliver/SKILL.md @@ -8,20 +8,22 @@ description: Use after all implementation tasks are complete. Runs final verific ## When to Use -After every task in the implementation plan is marked complete and all verification has passed. Terminal state of every Hyperstack workflow. +After every task in the implementation plan is marked complete and all verification has passed. This is the terminal state of every Hyperstack workflow. -Do NOT invoke until all tasks are done. It is a gate, not a shortcut. +Do NOT invoke this skill until all tasks are done. It is a gate, not a shortcut. ## The Process ### Step 1: Full Verification -Run the complete test suite. Not a subset. Not just the tests you wrote. All of them. +Run the complete test suite. Not a subset. Not the tests you just wrote. All of them. -Show the output. Anything fails β†’ stop, invoke `hyperstack:debug-discipline`, resolve before continuing. +Show the output. If anything fails, stop here β€” invoke `hyperstack:debug-discipline` and resolve before continuing. ### Step 2: Type / Lint Check +Run the appropriate check for the project's language: + | Language | Command | |---|---| | TypeScript / Next.js | `npx tsc --noEmit` | @@ -29,90 +31,70 @@ Show the output. Anything fails β†’ stop, invoke `hyperstack:debug-discipline`, | Go | `go vet ./...` | | Python | `mypy .` (if configured) | -Zero errors required. Pre-existing warnings acceptable if documented. +Zero errors required. Warnings are acceptable if pre-existing and documented. ### Step 3: Diff Review -Run `git diff ..HEAD`. +Run `git diff ..HEAD` where `` is main, master, or develop β€” whichever this branch was cut from. Check: -- Diff matches plan or approved design? -- Unintended changes (files outside plan's scope)? -- Debug statements, console.logs, or temp code left in? +- Does the diff match the plan or approved design? +- Are there any unintended changes (modified files outside the plan's scope)? +- Are there any debug statements, console.logs, or temporary code left in? -Anything unintended β†’ revert before continuing. +If anything is unintended, revert it before continuing. ### Step 4: Ship Gate Invoke `hyperstack:ship-gate` on the overall implementation. -Don't skip. Passing individual task verifications β‰  final gate on the whole. +Do not skip this. Passing individual task verifications does not replace a final gate on the whole. ### Step 5: Present Options -Once Steps 1-4 pass: +Once Steps 1-4 pass, present the delivery options to the user: > "All verification passed. How do you want to deliver this? > -> 1. **PR** - push branch and open a pull request (`gh pr create`) -> 2. **Squash** - squash all commits into one and merge -> 3. **Leave as branch** - push branch only, no PR yet" +> 1. **PR** β€” push branch and open a pull request (`gh pr create`) +> 2. **Squash** β€” squash all commits into one and merge +> 3. **Leave as branch** β€” push branch only, no PR yet" Wait for the user's choice. ### Step 6: Execute -Execute exactly the chosen option. No extra steps. No "cleaning up other things while you're at it." +Execute exactly the chosen option. Do not add steps. Do not clean up other things "while you're at it." -**Option 1 - PR:** +**Option 1 β€” PR:** ```bash git push -u origin [branch-name] gh pr create --title "[feature name]" --body "[summary of changes]" ``` -**Option 2 - Squash:** +**Option 2 β€” Squash:** ```bash git checkout main git merge --squash [branch-name] git commit -m "[single descriptive commit message]" ``` -**Option 3 - Leave as branch:** +**Option 3 β€” Leave as branch:** ```bash git push -u origin [branch-name] ``` -## Red Flags - STOP +## Red Flags β€” STOP | Thought | Reality | |---|---| -| "Tests mostly pass, I'll fix the rest in a follow-up" | Fix them now or don't deliver. | -| "The type errors are pre-existing" | Verify with `git stash`. Pre-existing β†’ document it. Not pre-existing β†’ fix it. | -| "I'll skip ship-gate, I just ran individual verifications" | Individual gates β‰  composition. Run ship-gate. | -| "Let me also clean up X while I'm here" | Scope creep. Out-of-plan changes β†’ new branch. | +| "Tests mostly pass, I'll fix the rest in a follow-up" | No. Fix them now or don't deliver. | +| "The type errors are pre-existing" | Verify with `git stash` β€” if they existed before your change, document it. If not, fix them. | +| "I'll skip ship-gate, I just ran individual verifications" | Individual gates do not cover composition. Run ship-gate. | +| "Let me also clean up X while I'm here" | Scope creep. Out-of-plan changes go on a new branch. | ## Integration -- **Requires:** All tasks in `forge-plan` or `run-plan` complete and individually verified +- **Requires:** All tasks in `forge-plan` or `run-plan` complete and individually verified (via `autonomous-mode`, `subagent-ops`, or `engineering-discipline`) - **Requires:** `hyperstack:ship-gate` passing on full implementation - **Invoked after:** `hyperstack:autonomous-mode`, `hyperstack:subagent-ops`, or `hyperstack:engineering-discipline` completes - - -## Lifecycle Integration - -### Agent Workflow Chains - -**Terminal state of all workflows:** -``` -[autonomous-mode | subagent-ops | engineering-discipline] β†’ ship-gate β†’ deliver (THIS) -``` - -### Upstream Dependencies -- `ship-gate` β†’ must pass before deliver invoked -- All tasks in plan marked complete - -### Downstream Consumers -- None (terminal state) - -### Cleanup -- `worktree-isolation` β†’ cleanup after delivery (if worktree used) diff --git a/skills/design-patterns-skill/SKILL.md b/skills/design-patterns-skill/SKILL.md index 7506dd2..747209b 100755 --- a/skills/design-patterns-skill/SKILL.md +++ b/skills/design-patterns-skill/SKILL.md @@ -37,57 +37,69 @@ references: # Design Patterns & Programming Principles +## Overview + +Structured guidance on programming principles and design patterns from foundational software engineering books. Ensures code follows industry-standard practices for readability, maintainability, simplicity, and architectural soundness. + ## When to Apply -- **Code Generation** β†’ writing new functions, classes, or modules -- **Code Review** β†’ evaluating PRs or existing codebases -- **Refactoring** β†’ improving code structure and clarity -- **Architecture Design** β†’ choosing appropriate patterns and abstractions +- **Code Generation:** Writing new functions, classes, or modules +- **Code Review:** Evaluating pull requests or existing codebases +- **Refactoring:** Improving code structure and clarity +- **Architecture Design:** Choosing appropriate patterns and abstractions + +--- ## Core Philosophy -1. Readability over cleverness β†’ code is read more than written -2. Simplicity over complexity β†’ simplest solution that works -3. Testability by design β†’ write code that's easy to test -4. Incremental improvement β†’ leave code better than you found it -5. Patterns as tools β†’ apply when they clarify, not by default +1. **Readability over cleverness** β€” Code is read more than written +2. **Simplicity over complexity** β€” Use the simplest solution that works +3. **Testability by design** β€” Write code that's easy to test +4. **Incremental improvement** β€” Leave code better than you found it +5. **Patterns as tools** β€” Apply patterns when they clarify, not by default + +--- ## Principle Categories ### 1. Readability & Clarity -Descriptive naming, consistent formatting, self-documenting code, small focused functions -β†’ `references/patterns/readability.md` +- Descriptive naming, consistent formatting, self-documenting code, small focused functions +- **Reference:** `references/patterns/readability.md` ### 2. Simplicity & Efficiency -KISS, DRY, YAGNI -β†’ `references/patterns/simplicity.md` +- KISS, DRY, YAGNI +- **Reference:** `references/patterns/simplicity.md` ### 3. Design & Architecture -SRP, composition over inheritance, program to interfaces -Patterns: Factory, Strategy, Observer, Decorator, Adapter, Command, Singleton -β†’ `references/patterns/design-architecture.md` +- SRP, composition over inheritance, program to interfaces +- Patterns: Factory, Strategy, Observer, Decorator, Adapter, Command, Singleton +- **Reference:** `references/patterns/design-architecture.md` ### 4. Testing & Quality -Automated testing, focused assertions, edge case coverage -β†’ `references/patterns/testing.md` +- Automated testing, focused assertions, edge case coverage +- **Reference:** `references/patterns/testing.md` ### 5. Error Handling -Clear error messages, early validation, proper exception usage -β†’ `references/patterns/error-handling.md` +- Clear error messages, early validation, proper exception usage +- **Reference:** `references/patterns/error-handling.md` ### 6. Maintainability -Boy Scout Rule, continuous refactoring, atomic commits, automation -β†’ `references/patterns/maintainability.md` +- Boy Scout Rule, continuous refactoring, atomic commits, automation +- **Reference:** `references/patterns/maintainability.md` + +--- ## AI-Specific Guidance -When generating or reviewing code: +When generating or reviewing code, always: 1. Check for AI pitfalls listed in each principle -2. Avoid pattern prediction bias β†’ don't use patterns just because they're common -3. Question generic naming β†’ resist `data`, `temp`, `result` without context -4. Validate edge cases β†’ don't skip error handling -5. Keep functions focused β†’ resist combining unrelated operations -6. Match project conventions β†’ maintain consistency with existing codebase +2. Avoid pattern prediction bias β€” don't use patterns just because they're common +3. Question generic naming β€” resist `data`, `temp`, `result` without context +4. Validate edge cases β€” don't skip error handling +5. Keep functions focused β€” resist combining unrelated operations +6. Match project conventions β€” maintain consistency with existing codebase + +--- ## Quick Reference @@ -102,10 +114,12 @@ When generating or reviewing code: | Need undo/logging | Command pattern | | Global access point | Singleton (use sparingly) | +--- + ## Sources -- *Clean Code* - Robert C. Martin -- *The Pragmatic Programmer* - Andrew Hunt & David Thomas -- *Code Complete* - Steve McConnell -- *Refactoring* - Martin Fowler -- *Design Patterns* - Gang of Four +- *Clean Code* β€” Robert C. Martin +- *The Pragmatic Programmer* β€” Andrew Hunt & David Thomas +- *Code Complete* β€” Steve McConnell +- *Refactoring* β€” Martin Fowler +- *Design Patterns* β€” Gang of Four diff --git a/skills/design-patterns-skill/references/patterns/simplicity.md b/skills/design-patterns-skill/references/patterns/simplicity.md index 54b8d1d..f0533d2 100755 --- a/skills/design-patterns-skill/references/patterns/simplicity.md +++ b/skills/design-patterns-skill/references/patterns/simplicity.md @@ -215,7 +215,7 @@ class DatabaseConfig { **Supported by:** *The Pragmatic Programmer*, Donald Knuth's famous quote -> "Premature optimization is the root of all evil" - Donald Knuth +> "Premature optimization is the root of all evil" β€” Donald Knuth ### Examples diff --git a/skills/designer/SKILL.md b/skills/designer/SKILL.md index caf30c8..fac5e63 100644 --- a/skills/designer/SKILL.md +++ b/skills/designer/SKILL.md @@ -2,10 +2,10 @@ name: designer category: domain description: >- - Evidence-based design decision engine. Intention gate that produces non-slop + Evidence-based design decision engine. An intention gate that produces non-slop UI/UX by forcing every visual choice through industry context, cognitive science, design master principles, and anti-pattern detection before code generation. - Outputs DESIGN.md contract all subsequent implementation must follow. + Outputs a DESIGN.md contract that all subsequent implementation must follow. metadata: author: booleanstack version: "3.0.0" @@ -28,240 +28,286 @@ triggers: activation: mode: fuzzy priority: high + triggers: + - design + - landing page + - dashboard + - visual + - DESIGN.md + - ui + - ux references: - references/design-md-template.md - - references/website-experience-cheatsheet.md - examples/saas-dashboard.md - examples/developer-tool.md - examples/ecommerce-checkout.md --- -# Designer Skill - Intention Gate +# Designer Skill β€” Intention Gate -> AI UIs all look same because AI skip decision process, jump to code. -> Skill force every design decision through evidence before code generation. -> No visual code until DESIGN.md contract produced and approved. +> AI-generated UIs all look the same because AI skips the decision process and jumps to code. +> This skill forces every design decision through evidence before code generation. +> No visual code until a DESIGN.md contract is produced and approved. -## IRON LAW +--- + +## The Iron Law ``` -NO VISUAL CODE WITHOUT APPROVED DESIGN.md +NO VISUAL CODE WITHOUT AN APPROVED DESIGN.md ``` -Single line JSX, CSS, or styling β†’ no DESIGN.md β†’ BREAKING THIS RULE. No exceptions. "Simple button" still needs personality, color, state decisions. +**Violating the letter of this rule is violating the spirit of this rule.** + +If you are about to write a single line of JSX, CSS, or styling code, and there is no approved DESIGN.md, you are breaking this rule. There are no exceptions. A "simple button" still needs personality, color, and state decisions. -## HARD GATE +## Hard Gate ``` -DO NOT GENERATE VISUAL CODE UNTIL: +DO NOT GENERATE ANY VISUAL CODE UNTIL: 1. Intent extracted (Phase 1) 2. MCP tools consulted (Phase 2) 3. Anti-patterns checked (Phase 3) - 4. DESIGN.md generated + presented (Phase 4) - 5. User approved DESIGN.md + 4. DESIGN.md generated and presented (Phase 4) + 5. User has approved the DESIGN.md + +No exceptions. A "simple button" still needs personality, color, and state decisions. ``` -## 1% RULE +## The 1% Rule + +If there is even a 1% chance that the task involves: +- A new page or view +- A new component +- Changing how something looks +- Changing how something moves (animation, transition, scroll) +- Changing how something responds to user input +- A landing page, dashboard, form, or data display +- "Make it look more like X" +- "Redesign" anything -1% chance task involves new page/view, new component, changing look/feel/motion/interaction, landing page, dashboard, form, data display, "make it look like X", "redesign" β†’ invoke skill BEFORE writing any code. +...then you MUST invoke this skill BEFORE writing any code. You cannot rationalize your way out. -**Apply when:** task changes how something **looks, feels, moves, or is interacted with.** -**Skip when:** pure backend, single CSS bug fix (same colors/spacing), adding to existing design system with established tokens, perf optimization no visual change, infrastructure. +**Apply when:** the task changes how something **looks, feels, moves, or is interacted with**. +**Skip when:** pure backend with no frontend impact, single CSS bug fix (with the same colors/spacing), adding to existing design system with established tokens, performance optimization with no visual change, infrastructure. -## RED FLAGS - STOP +## Red Flags β€” STOP + +These are the rationalizations you will have when you want to skip this skill. Every one is wrong. | Thought | Reality | |---|---| -| "Small component, no full DESIGN.md needed" | Wrong decisions ship. Design it. | -| "I'll use default shadcn styles" | Unexamined defaults = AI slop. | -| "User said 'just make it work'" | Means "make sense visually." Needs design. | -| "I know what SaaS dashboard looks like" | Know AI-slop version. Designer prevents that. | -| "I'll fix design after user sees code" | AI slop fingerprint sticky. Users stop caring first. | -| "MCP tools overkill" | You don't decide. Call them. | -| "I'll generate DESIGN.md after coding" | Post-hoc justification β‰  design. Design FIRST. | -| "User iterating fast, no time for gate" | Speed β‰  permission to ship slop. Gate first. | -| "Quick mockup only" | Quick mockups become shipped products. | -| "Figma has design, I'll translate" | No design resolution = absolute/relative dump. | -| "I'll pick colors as I go" | How AI slop made. Pick deliberately. | -| "Dark mode = invert light mode" | No. Exact anti-pattern this skill prevents. | -| "Skill is slow" | 2 min. Wrong design = 2 weeks to undo. | +| "This is a small component, it doesn't need a full DESIGN.md" | Small components with wrong decisions ship to production. Design it. | +| "I'll just use the default shadcn styles" | Defaults are decisions. Unexamined defaults produce AI slop. Design intentionally. | +| "The user said 'just make it work'" | "Just make it work" means "make something that makes sense visually." That needs design. | +| "I know what a SaaS dashboard looks like" | You know the AI-slop version. Designer prevents that specifically. | +| "I can fix the design after the user sees the code" | No. The AI slop fingerprint is sticky. Users will stop caring before you fix it. | +| "The MCP tools are overkill for this" | You don't get to decide. Call them. | +| "I'll generate a DESIGN.md after coding" | Then it is post-hoc justification, not design. Design FIRST. | +| "The user is iterating quickly, they don't want a gate" | User speed is not permission to ship slop. Gate first, iterate fast inside the gate. | +| "This is just a quick mockup" | Quick mockups become shipped products. Design them. | +| "Figma already has the design, I'll just translate" | Translating from Figma without design resolution creates absolute/relative dumps. Use designer anyway. | +| "I'll pick colors and fonts as I go" | That is how AI slop is made. Pick them deliberately via designer. | +| "Dark mode will just invert the light mode colors" | No it will not. This is the exact anti-pattern designer exists to prevent. | +| "The designer skill is slow" | The skill takes 2 minutes. Shipping wrong design takes 2 weeks to undo. | --- -## Position in Hyperstack Workflow +## Position in the Hyperstack Workflow ``` -user request β†’ blueprint (visual routing) β†’ designer (THIS) β†’ DESIGN.md - ↓ - forge-plan β†’ execution β†’ ship-gate β†’ deliver - -Downstream: forge-plan, shadcn-expert, motion_generate_animation, design_tokens_generate, behaviour-analysis, ship-gate -Reverse escalation: forge-plan β†’ designer (gap), behaviour-analysis β†’ designer (unclear), ship-gate β†’ designer (compliance fail) + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + user request β”‚ β”‚ + β”‚ β”‚ Upstream: β”‚ + β–Ό β”‚ - hyperstack (root orchestrator) β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ - blueprint (visual routing) β”‚ + β”‚ blueprint │─── visual? ──┼─▢ designer (THIS SKILL) β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ + β”‚ β”‚ Produces: β”‚ + β”‚ non-visual β”‚ - DESIGN.md contract (file) β”‚ + β”‚ β”‚ β”‚ + β–Ό β”‚ Downstream consumers: β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ - forge-plan (reads DESIGN.md) β”‚ + β”‚ forge-plan│◀─ DESIGN.md ── - shadcn-expert (per-section code) β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ - motion_generate_animation β”‚ + β”‚ β”‚ - design_tokens_generate β”‚ + β–Ό β”‚ - behaviour-analysis (audit spec) β”‚ + execution β”‚ - ship-gate (compliance check) β”‚ + β”‚ β”‚ β”‚ + β–Ό β”‚ Reverse escalation (allowed): β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ - forge-plan β†’ designer β”‚ + β”‚ ship-gate β”‚ β”‚ (if visual gap discovered) β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ - behaviour-analysis β†’ designer β”‚ + β”‚ β”‚ (if expected behavior unclear) β”‚ + β–Ό β”‚ β”‚ + deliver β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` -## Three-Layer Stack +## The Three-Layer Stack | Layer | Plugin | Question | Tools | |---|---|---|---| -| Decision | `designer` (this) | Which design? | 17 MCP tools | -| Rules | `ui-ux` | What principles? | 6 MCP tools | -| Values | `design-tokens` | What exact CSS? | 7 MCP tools | -| Components | `shadcn` | Which components? | 4 MCP tools | -| Motion | `motion` | Exact animation code? | 7 MCP tools | - ---- - -## Website Experience Non-Negotiables - -Every DESIGN.md must resolve these 7: - -1. **Primary path** - user's main JTBD + single primary action -2. **Information scent** - "Where am I, what can I do, what happens next?" -3. **State coverage** - loading, empty, error, success, disabled, destructive -4. **Form/auth friction** - labels persistent, validation humane, paste allowed, password managers supported -5. **Performance budget** - LCP, INP, CLS, payload-sensitive media targets -6. **Accessibility floor** - focus visibility, focus not obscured, target size, reduced motion, keyboard usage -7. **Responsive content priority** - what survives first on mobile, what deferred - -Use [website-experience-cheatsheet](references/website-experience-cheatsheet.md). - -## User Preferences Override Defaults - -Priority order: -1. Explicit user preferences + constraints -2. Existing workspace reality (framework, component lib, design system, tokens, frontend patterns) -3. Approved product/brand requirements -4. Designer auto-resolved defaults - -User says "use these colors", "keep current design system", "match this app shell", "no shadcn" β†’ preference wins. +| **Decision** | `designer` (this skill) | Which design? | 17 MCP tools | +| **Rules** | `ui-ux` | What principles? | 6 MCP tools | +| **Values** | `design-tokens` | What exact CSS? | 7 MCP tools | +| **Components** | `shadcn` | Which components to compose? | 4 MCP tools | +| **Motion** | `motion` | Exact animation code? | 7 MCP tools | --- # PHASE 1: INTENT EXTRACTION -Two modes. Default **Base** unless user says "advanced" or "detailed." +Two modes. Default to **Base** unless user says "advanced" or "detailed." ## Base Mode (3 Questions + Confirm) -**Step 0:** Existing project β†’ inspect workspace: framework, package manifests, component lib, token system, core frontend files, explicit visual prefs in repo. - -**Step 1:** Call `designer_resolve_intent(product_description)`. Auto-detects: industry, personality, style, mode, density, color mood, must-haves, never-uses. +**Step 1:** Call `designer_resolve_intent` with product description. Auto-detects: industry, personality, style, mode, density, color mood, must-haves, never-uses. **Step 2:** Ask 3 essential questions: | # | Question | Why | |---|---|---| -| 1 | What is product? (1 sentence) | Everything derives from this | -| 2 | Brand color? (hex, name, or "generate") | Can't guess brand | +| 1 | What is the product? (1 sentence) | Everything derives from this | +| 2 | Brand color? (hex, name, or "generate") | Can't guess someone's brand | | 3 | What sections/pages to build? | What to implement | -**Step 3:** Present auto-resolved defaults as suggestions. Ask if user prefs or workspace patterns override. Offer: *"Say 'advanced' for full control, or pick preset to start."* +**Step 3:** Present auto-resolved defaults AND suggest a preset. Offer: *"Say 'advanced' for full control, or pick a preset to start from."* ## Presets (Fast Start) -User says "make it feel like Linear" or "start from Stripe" β†’ `designer_get_preset(name)`, use as DESIGN.md foundation, customize brand color only. +If user says "make it feel like Linear" or "start from Stripe" or "use the Notion style" β€” call `designer_get_preset(name)` and use it as the DESIGN.md foundation. Customize brand color only. | Preset | Best For | Key Trait | |---|---|---| -| `linear` | SaaS, productivity | Opacity hierarchy, 8px grid, 150ms | -| `stripe` | Payment, docs, premium SaaS | Weight 300/500, CIELAB contrast | -| `vercel` | Dev tools, technical | -0.04em tracking, zero chromatic bias | +| `linear` | SaaS, productivity tools | Opacity hierarchy, 8px grid, snappy 150ms | +| `stripe` | Payment, docs, premium SaaS | Weight 300/500, CIELAB contrast, editorial polish | +| `vercel` | Dev tools, technical products | -0.04em tracking, zero chromatic bias, 96px sections | | `apple` | Consumer, mobile-first | 17px body, spring physics, 44pt targets | -| `carbon` | Enterprise, regulated | Zero radius, IBM Plex, WCAG AA | -| `shadcn` | React + Tailwind | OKLCH, opacity borders, brand-agnostic | -| `notion` | Content, editorial | Warm cream bg, serif headings, 65ch prose | -| `supabase` | Dev tools, dark-first | Emerald on black, compact, code-native | +| `carbon` | Enterprise, regulated industries | Zero radius, IBM Plex, WCAG AA out of box | +| `shadcn` | Any React + Tailwind project | OKLCH, opacity borders, brand-agnostic default | +| `notion` | Content, editorial, notes | Warm cream bg, serif headings, 65ch prose | +| `supabase` | Developer tools, dark-first | Emerald on black, compact, code-native | | `figma` | Creative tools, startups | Multi-color, spring animations, vivid | -Call `designer_list_presets` to show all. Preset fills Sections 1-7 automatically. Customize: brand color, sections/pages, industry do's/don'ts. +Call `designer_list_presets` to show all with details. Call `designer_get_preset(name)` for full token config + CSS. + +**Preset workflow:** Preset fills Sections 1-7 of DESIGN.md automatically. You only need to customize: brand color, specific sections/pages, and industry-specific do's/don'ts. ## Advanced Mode (12 Questions) -Call `designer_resolve_intent` first. Show suggested default per question. Present in batches of 3-4 (Hick's Law). +Call `designer_resolve_intent` first. Show suggested default alongside each question. Present in batches of 3-4 (Hick's Law). -**Q1:** Product? (1 sentence) β†’ determines industry, anti-pattern set, style priority +### Q1: What is the product? (1 sentence) +Determines industry category, anti-pattern set, style priority. + +### Q2: Who is the primary user? -**Q2:** Primary user? | User Type | Defaults | |---|---| -| Developer | Dark default, monospace accents, keyboard-first, compact | -| Consumer | Light default, friendly typography, mobile-first, comfortable | +| Developer | Dark default, monospace accents, keyboard-first, compact density | +| Consumer | Light default, friendly typography, mobile-first, comfortable density | | Enterprise | Structured, conservative, data-dense, normal density | -| Child | Playful, 48px+ targets, high contrast, claymorphism | +| Child | Playful, large touch targets (48px+), high contrast, claymorphism | | Creative | Rich motion, bold colors, portfolio-native | -| Healthcare | Calm, AAA, large text, minimal motion | +| Healthcare | Calm, accessible (AAA), large text, minimal motion | + +### Q3: What emotional target? -**Q3:** Emotional target? | Target | Visual Direction | |---|---| -| Trustworthy | Professional palette, serif/clean sans, conservative radius | -| Playful | Vivid colors, 16-24px radius, spring animations | -| Premium | -0.02em+ tracking, generous whitespace, single accent, subtle shadows | -| Energetic | C 0.15+, 32px+ headings, rich motion | -| Calm | Muted palette, warm neutrals, generous lh, minimal motion | -| Technical | Dark default, monospace accents, compact, snappy motion | -| Bold | Max contrast, large type, strong color blocks | -| Editorial | Serif headings, 18px body 1.75lh, warm bg | - -**Q4:** Light or dark default? Product decision, not preference. Developer tools β†’ dark. Marketing β†’ light. Editorial β†’ light. Gaming β†’ dark. - -**Q5:** Brand color? Given: extract hue, derive OKLCH ramp (11 stops). "generate": pick from industry color mood. +| Trustworthy | Professional palette, serif or clean sans, conservative radius | +| Playful | Vivid colors, rounded shapes (16-24px), spring animations | +| Premium | Tight tracking (-0.02em+), generous whitespace, single accent, subtle shadows | +| Energetic | High chroma (C 0.15+), large type (32px+ headings), rich motion | +| Calm | Muted palette, warm neutrals, generous line height, minimal motion | +| Technical | Dark default, monospace accents, compact density, snappy motion | +| Bold | Maximum contrast, large type, strong color blocks | +| Editorial | Serif headings, generous reading (18px body, 1.75 line-height), warm backgrounds | + +### Q4: Light or dark default? +Not a preference β€” a product decision. Developer tools β†’ dark. Marketing β†’ light. Editorial β†’ light. Gaming β†’ dark. Dashboards β†’ either, but intentional. + +### Q5: Brand color? +If given: extract hue, derive OKLCH ramp (11 stops). If "generate": pick from industry color mood. + | Industry | Color Mood | |---|---| | SaaS | Trust blue + single accent | | Healthcare | Calm blue + health green | | Fintech | Navy + trust blue + gold | -| Luxury | Black + gold, minimal | +| Luxury | Black + gold, minimal palette | | AI/Tech | Neutral + one distinct (NOT #6366F1) | | Education | Friendly pastels, warm accents | -| Wellness | Earth tones, sage, soft coral | +| Wellness | Earth tones, sage green, soft coral | -**Q6:** Density? -| Mode | Section Padding | Card Padding | Body | Use | +### Q6: Density? + +| Mode | Section Padding | Card Padding | Body Size | Use | |---|---|---|---|---| | Comfortable | 96px | 40px | 18px | Marketing, editorial, consumer | | Normal | 64px | 28px | 16px | SaaS, dashboards, apps | | Compact | 48px | 20px | 14px | Data tables, admin, dev tools | -**Q7:** Style? minimalism / glassmorphism / soft-ui / dark-oled / vibrant-block / claymorphism / aurora-ui. "recommend": resolved from industry + emotional target. +### Q7: Design style? +7 primary: minimalism, glassmorphism, soft-ui, dark-oled, vibrant-block, claymorphism, aurora-ui. If "recommend": resolved from industry + emotional target. + +### Q8: Font personality? -**Q8:** Font personality? | Personality | Pairing | Use | |---|---|---| | Technical | Geist + Geist Mono | Dev tools, SaaS, dashboards | | Elegant | Cormorant + Montserrat | Luxury, editorial, premium | | Friendly | Plus Jakarta Sans + mono | Consumer, education, SaaS | -| System | Inter (or system stack) | Universal | -| Editorial | Playfair Display + Lora | Content, blogs, news | +| System | Inter (or system stack) | Universal, no strong personality | +| Editorial | Playfair Display + Lora | Content sites, blogs, news | -**Q9:** Motion level? -| Level | Includes | +### Q9: Motion level? + +| Level | What It Includes | |---|---| -| Static | No animations | -| Subtle | Hover states + transitions (150-200ms) | +| Static | No animations at all | +| Subtle | Hover states + transitions only (150-200ms) | | Moderate | + scroll reveals, micro-interactions (200-300ms) | -| Rich | + parallax, page transitions, animated bg (300-500ms) | +| Rich | + parallax, page transitions, animated backgrounds (300-500ms) | + +Always respects `prefers-reduced-motion` regardless of level. -Always respects `prefers-reduced-motion`. +### Q10: Sections/pages? +Landing: Hero, Features, Testimonials, CTA, Footer, Pricing, FAQ. Dashboard: Sidebar, Header, Content, Data panels. Apps: Navigation, Content, Modals, Forms, Empty states. -**Q10:** Sections/pages? Landing: Hero, Features, Testimonials, CTA, Footer, Pricing, FAQ. Dashboard: Sidebar, Header, Content, Data panels. +### Q11: Framework + Component Library? -**Q11: Framework + Component Library (TWO sub-questions):** +**Two sub-questions β€” ask both:** -Q11a Framework: React + Tailwind v4 / Next.js + Tailwind v4 / Vue + Tailwind / Svelte + Tailwind / HTML + Tailwind / Other +**Q11a β€” Framework:** +- React + Tailwind v4 (most common) +- Next.js + Tailwind v4 +- Vue + Tailwind +- Svelte + Tailwind +- HTML + Tailwind (no framework) +- Other (specify) -Q11b Component Library: -- **shadcn/ui (Base UI)** β†’ invokes `hyperstack:shadcn-expert`, uses `shadcn_*` MCP tools -- **Raw Tailwind** β†’ hand-built from DESIGN.md, no lib -- **MUI / Mantine / Chakra / Ant Design** β†’ use library's own docs (no hyperstack plugin) -- **Custom / existing** β†’ read user's components, match patterns -- **Ask me to recommend** β†’ recommend shadcn/ui for React+Tailwind, or raw Tailwind for max control +**Q11b β€” Component Library:** +- **shadcn/ui (Base UI edition)** β€” invokes `hyperstack:shadcn-expert`, uses `shadcn_*` MCP tools +- **Raw Tailwind** β€” no component library, hand-built primitives from DESIGN.md +- **Material UI** β€” use its component catalog (no hyperstack plugin yet) +- **Mantine** β€” use its component catalog (no hyperstack plugin yet) +- **Chakra UI** β€” use its component catalog (no hyperstack plugin yet) +- **Ant Design** β€” enterprise component library (no hyperstack plugin yet) +- **Custom / existing design system** β€” user's own components +- **Ask me to recommend** β€” designer picks based on personality + industry -**DO NOT assume shadcn by default.** Ask explicitly. Libraries have incompatible architectures. +**Do NOT assume shadcn by default.** If the user doesn't answer, ask explicitly. Different component libraries have incompatible architectures (Radix vs Base UI vs MUI primitives vs handcrafted). -Routing: `shadcn/ui` β†’ `hyperstack:shadcn-expert` | `Raw Tailwind` β†’ forge-plan hand-writes | `Other` β†’ library's own docs, flag to user | `Custom` β†’ read existing first +**Routing based on Q11b answer:** +- `shadcn/ui` β†’ `hyperstack:shadcn-expert` handles component work; forge-plan calls `shadcn_*` tools +- `Raw Tailwind` β†’ forge-plan hand-writes components from DESIGN.md Section 5 spec directly (no library wrapper) +- `Other library` β†’ forge-plan uses the library's own docs; hyperstack has no plugin; flag this to user +- `Custom/existing` β†’ read user's existing components first; match their patterns +- `Ask me to recommend` β†’ recommend shadcn/ui for React+Tailwind, or raw Tailwind if user wants maximum control -**Q12:** Constraints? WCAG AA (default) or AAA. Performance budget (< 150KB JS, < 2s load). Dark mode required. Brand keywords. +### Q12: Constraints? +WCAG AA (default) or AAA. Performance budget (< 150KB JS, < 2s load). Dark mode required. Brand keywords. **Do NOT proceed to Phase 2 until Q1, Q5, Q10 answered.** @@ -269,177 +315,187 @@ Routing: `shadcn/ui` β†’ `hyperstack:shadcn-expert` | `Raw Tailwind` β†’ forge-p # PHASE 2: DESIGN SYSTEM RESOLUTION -Every MCP call fills specific DESIGN.md section. No call without purpose. +Every MCP call must fill a specific section of the DESIGN.md. No call without a purpose. -## Core Calls (Every Design Task - 4 calls, run in parallel) +## Core Calls (Every Design Task β€” 4 calls) + +These 4 calls fill 80% of the DESIGN.md. Run them in parallel. ### Call 1: `designer_resolve_intent(product_description)` -**FILLS:** All sections (defaults) -**PURPOSE:** Auto-detects industry, personality, style, mode, density, color mood, must-haves, never-uses. -**USE:** Set defaults for entire DESIGN.md. Present to user in Phase 1. +**FILLS:** All sections (defaults for everything) +**PURPOSE:** Auto-detects industry, personality, style, mode, density, color mood, must-haves, never-uses. Without this, you're guessing. +**USE RESULT TO:** Set defaults for the entire DESIGN.md. Present to user for confirmation in Phase 1. ### Call 2: `designer_get_personality(resolved_cluster)` -**FILLS:** Sections 1, 2, 3, 4, 6, 7 -**PURPOSE:** Concrete visual vocabulary - tracking, radius range, shadow style, motion timing, density, CSS example. Single most important data source. -**USE:** Set every visual property. Personality vocabulary IS design system skeleton. +**FILLS:** Section 1 (theme), Section 2 (color direction), Section 3 (typography), Section 4 (spacing), Section 6 (motion), Section 7 (elevation) +**PURPOSE:** Returns the concrete visual vocabulary β€” specific tracking values, radius range, shadow style, motion timing, density, CSS example. This is the single most important data source for the DESIGN.md. +**USE RESULT TO:** Set every visual property. The personality vocabulary IS the design system skeleton. ### Call 3: `designer_get_page_template(page_type)` -**FILLS:** Sections 5, 9 -**PURPOSE:** Section anatomy with component inventory + cognitive laws for this page type. -**USE:** Define sections to build, components each needs, responsive behavior. +**FILLS:** Section 5 (components), Section 9 (responsive) +**PURPOSE:** Returns section anatomy with component inventory and which cognitive laws apply to this page type. Without this, you're inventing sections from scratch. +**USE RESULT TO:** Define what sections to build, what components each needs, what responsive behavior each requires. ### Call 4: `designer_get_anti_patterns(industry: resolved_industry)` -**FILLS:** Sections 8, 10 -**PURPOSE:** Specific violations this industry must avoid. -**USE:** Write Do's/Don'ts + anti-pattern checklist. Every "Don't" must come from this list. +**FILLS:** Section 8 (do's/don'ts), Section 10 (anti-patterns) +**PURPOSE:** Returns the specific violations this industry must avoid. Without this, you might put AI purple on a bank or neon on a healthcare app. +**USE RESULT TO:** Write the Do's/Don'ts section and the anti-pattern checklist. Every "Don't" must come from this list. -## Context Calls (Only When Product Needs Them) +## Context Calls (Only When the Product Needs Them) -NOT routine. Call ONLY when product has these specific features: +These are NOT routine. Call ONLY when the product has these specific features. -| Product Feature | Call | FILLS | WHY (what decision changes) | +| Product Feature | Call | FILLS | WHY (what decision it changes) | |---|---|---|---| -| Landing page | `designer_get_landing_pattern("hero-section")` | S5 | Conversion stats change hero layout: value prop 3s, CTA above fold, 40-80px bleed | -| Landing page | `designer_get_landing_pattern("section-ordering")` | S5 | Unbounce 41K pages: Heroβ†’Proofβ†’Problemβ†’Featuresβ†’Testimonialsβ†’Pricingβ†’FAQβ†’CTA | -| Landing page | `designer_get_landing_pattern("social-proof")` | S5 | Named metrics (+30-70%) vs logos (+260%) vs badges (+55%) | -| Landing page | `designer_get_landing_pattern("cta-optimization")` | S8 | First-person CTAs +90%, single CTA +266%, "no credit card" +34% | -| Pricing page | `designer_get_landing_pattern("pricing-psychology")` | S5 | Ariely decoy: 3 tiers, highlight middle, expensive first | -| Forms | `designer_get_interaction_pattern("form-design")` | S5 | Validation timing (blur not input), label placement (top not placeholder) | -| Navigation | `designer_get_interaction_pattern("navigation")` | S5 | Hamburger 39% slower on desktop (NNG). Tab bars +58% engagement. | -| Onboarding | `designer_get_interaction_pattern("onboarding")` | S5 | 3-5 checklist items > 8+. Interactive > passive. | -| Data tables | `designer_get_interaction_pattern("skeleton-vs-spinner")` | S6 | Skeleton for known structure, spinner for discrete actions | -| Error handling | `designer_get_ux_writing("error-messages")` | S8 | NNG rubric: what happened + why + how to fix | -| CTAs/buttons | `designer_get_ux_writing("button-labels")` | S8 | "Start my trial" +90% vs "Start your trial" | -| Premium feel | `designer_get_design_system("stripe")` or `("vercel-geist")` | S1 | Stripe weight 300/500, Vercel -0.04em tracking | -| Enterprise | `designer_get_design_system("ibm-carbon")` | S1 | Carbon 12px spacing-04, IBM Plex, a11y-first | - -## Token Calls (Phase 5 only - when generating code) - -Do NOT call during design resolution: +| **Landing page** | `designer_get_landing_pattern("hero-section")` | Section 5 | Conversion stats change hero layout: value prop in 3s, CTA above fold, 40-80px bleed | +| **Landing page** | `designer_get_landing_pattern("section-ordering")` | Section 5 | Unbounce 41K pages: Heroβ†’Proofβ†’Problemβ†’Featuresβ†’Testimonialsβ†’Pricingβ†’FAQβ†’CTA | +| **Landing page** | `designer_get_landing_pattern("social-proof")` | Section 5 | Named metrics (+30-70%) vs logos (+260%) vs badges (+55%) changes proof section design | +| **Landing page** | `designer_get_landing_pattern("cta-optimization")` | Section 8 | First-person CTAs +90%, single CTA +266%, "no credit card" +34% | +| **Pricing page** | `designer_get_landing_pattern("pricing-psychology")` | Section 5 | Ariely decoy changes tier structure: 3 tiers, highlight middle, expensive first | +| **Forms** | `designer_get_interaction_pattern("form-design")` | Section 5 | Validation timing (blur not input), label placement (top not placeholder), max field count | +| **Navigation** | `designer_get_interaction_pattern("navigation")` | Section 5 | Hamburger is 39% slower on desktop (NNG). Tab bars +58% engagement. Changes nav type. | +| **Onboarding** | `designer_get_interaction_pattern("onboarding")` | Section 5 | 3-5 checklist items outperform 8+. Interactive > passive. Changes onboarding structure. | +| **Data tables** | `designer_get_interaction_pattern("skeleton-vs-spinner")` | Section 6 | Skeleton for known structure, spinner for discrete actions. Changes loading pattern. | +| **Error handling** | `designer_get_ux_writing("error-messages")` | Section 8 | NNG rubric: what happened + why + how to fix. Changes error message format. | +| **CTAs/buttons** | `designer_get_ux_writing("button-labels")` | Section 8 | "Start my trial" +90% vs "Start your trial". Changes button copy strategy. | +| **Premium feel** | `designer_get_design_system("stripe")` or `("vercel-geist")` | Section 1 | Specific values to reference: Stripe weight 300/500, Vercel -0.04em tracking | +| **Enterprise** | `designer_get_design_system("ibm-carbon")` | Section 1 | Carbon's 12px spacing-04, IBM Plex, a11y-first component architecture | + +## Token Calls (Phase 5 only β€” when generating code) + +Do NOT call these during design resolution. Call them when writing actual CSS. + ``` -design_tokens_get_category("colors") β†’ OKLCH ramp construction -design_tokens_get_category("typography") β†’ type scale token defs -design_tokens_get_category("spacing") β†’ 4px grid token defs -design_tokens_generate(description) β†’ complete Tailwind v4 CSS +design_tokens_get_category("colors") β†’ OKLCH ramp construction procedure +design_tokens_get_category("typography") β†’ type scale token definitions +design_tokens_get_category("spacing") β†’ 4px grid token definitions +design_tokens_generate(description) β†’ generate complete Tailwind v4 CSS ``` --- # PHASE 3: CONSTRAINT APPLICATION -Cross-reference every decision against rules below. P1 β†’ P10. Higher = fix first. +Cross-reference every decision against the rules below. + +--- + +# DESIGN RULES BY PRIORITY + +*Follow P1β†’P10. Higher priority = fix first. Every rule has a source.* ## P1: Accessibility (CRITICAL) | Rule | Standard | Avoid | |---|---|---| -| `contrast-body` | 4.5:1 body (AA); 7:1 AAA | Testing light mode only | -| `contrast-large` | 3:1 for β‰₯18px bold or β‰₯24px | Assuming brand colors pass | -| `contrast-ui` | 3:1 UI components, borders, icons | Low-contrast borders in dark | -| `focus-rings` | 2px ring, 2px offset, primary color, ALL interactive | `outline: none` without replacement | -| `touch-targets` | 44x44px min (WCAG); 48x48px recommended; 8px gap | Targets < 44px mobile | -| `color-not-only` | Color + icon/text for every state | Red border as sole error indicator | -| `reduced-motion` | `prefers-reduced-motion: reduce` with `!important` in `@layer base` | Missing media query | +| `contrast-body` | 4.5:1 minimum for body text (AA); 7:1 for AAA | Testing only in light mode | +| `contrast-large` | 3:1 for text >= 18px bold or >= 24px | Assuming brand colors pass | +| `contrast-ui` | 3:1 for UI components, borders, icons | Low-contrast borders in dark mode | +| `focus-rings` | 2px ring, 2px offset, primary color on ALL interactive elements | `outline: none` without replacement | +| `touch-targets` | Min 44x44px (WCAG 2.5.5); recommended 48x48px; gap >= 8px | Touch targets < 44px on mobile | +| `color-not-only` | Color + icon/text for every state (error, success, warning) | Red border as sole error indicator | +| `reduced-motion` | `prefers-reduced-motion: reduce` with `!important` in `@layer base` | Missing media query (WCAG 2.3.3) | | `keyboard-nav` | Tab order = visual order; Enter/Space activates; Escape closes | Unreachable interactive elements | -| `skip-links` | `Skip to main content` first body element | No skip link on nav-heavy pages | -| `alt-text` | Descriptive for informational; `alt=""` decorative | `alt="image"` or missing | -| `aria-labels` | `aria-label` on icon-only buttons | Unlabeled icon buttons | -| `heading-hierarchy` | Sequential h1β†’h2β†’h3, no skipping | h1 β†’ h3 | -| `zoom-support` | Works at 400% zoom; never `user-scalable=no` | Disabling pinch-to-zoom | -| `semantic-html` | `