From 5d220a766da84b4602f3a7b72502eeba813194df Mon Sep 17 00:00:00 2001
From: Claude <claude@anthropic.com>
Date: Sat, 4 Apr 2026 02:46:38 +0000
Subject: [PATCH] feat: add reference command and fix README check
 documentation

- Add `skill-bench reference <type>` subcommand for check/setup docs
- Fix README TOML examples to use inline table format
- Remove non-existent checks (file-content, output-contains, text-contains, skill-not-invoked)
- Add missing checks (message-contains, tool-param with value)
- Add proper TOML examples for each check category

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md        |  96 +++++++++++++++++------
 src/cli/args.rs  |   6 ++
 src/main.rs      |   4 +
 src/reference.rs | 193 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 275 insertions(+), 24 deletions(-)
 create mode 100644 src/reference.rs
diff --git a/README.md b/README.md
index dada6b7..e9d1562 100644
--- a/README.md
+++ b/README.md
@@ -151,8 +151,7 @@ command = "mkdir -p subdir && echo 'done' > subdir/file.txt"
 
 [[checks]]
 name = "check_name"
-command = "skill-invoked"
-skill = "skill-name"
+command = { command = "skill-invoked", skill = "skill-name" }
 
 [answers]
 "question_key" = "answer_value"
@@ -180,49 +179,98 @@ name = "optional-descriptive-name"
 command = "echo 'Hello' > greeting.txt && mkdir -p output"
 ```
 
-## Assertion Reference
+## Check Reference
 
-Assertions use structured TOML format:
+Run `skill-bench help <type>` for detailed help on any check type.
 
 ### Skill Verification
-- `skill-loaded` - Skill was loaded
-- `skill-invoked` - Skill was invoked
-- `skill-not-invoked` - Skill was NOT invoked
+
+- `skill-loaded` — Skill was loaded
+- `skill-invoked` — Skill was invoked
+
+```toml
+[[checks]]
+name = "check-name"
+command = { command = "skill-invoked", skill = "my-skill" }
+```
 
 ### MCP Verification
-- `mcp-loaded` - MCP server was loaded
-- `mcp-tool-invoked` - MCP tool was invoked
-- `mcp-success` - MCP tool succeeded
+
+- `mcp-loaded` — MCP server was loaded
+- `mcp-tool-invoked` — MCP tool was invoked
+- `mcp-success` — MCP tool succeeded
+
+```toml
+[[checks]]
+name = "check-name"
+command = { command = "mcp-loaded", server = "filesystem" }
+```
 
 ### Tool Verification
-- `tool-use` - Tool was used
-- `param` - Parameter value verification
+
+- `tool-use` — Tool was called (partial match)
+- `tool-param` — Tool was called with a specific parameter
+
+```toml
+[[checks]]
+name = "check-name"
+command = { command = "tool-use", tool = "Read" }
+
+[[checks]]
+name = "check-param"
+command = { command = "tool-param", tool = "Read", param = "file_path", value = "test.txt" }
+```
 
 ### File Verification
-- `file-content` - Verify file content
-- `file-contains` - File contains string
-- `workspace-file` - File exists
-- `workspace-dir` - Directory exists
+
+- `workspace-file` — File exists in workspace
+- `workspace-dir` — Directory exists in workspace
+- `file-contains` — File contains string
+
+```toml
+[[checks]]
+name = "check-name"
+command = { command = "workspace-file", path = "output.txt" }
+
+[[checks]]
+name = "check-content"
+command = { command = "file-contains", file = "output.txt", contains = "expected text" }
+```
 
 ### Log Verification
-- `output-contains` - Output contains string
-- `log-contains` - Log contains pattern
-- `text-contains` - Text content search
+
+- `log-contains` — Log contains regex pattern
+- `message-contains` — Assistant output contains text
+
+```toml
+[[checks]]
+name = "check-name"
+command = { command = "log-contains", pattern = "error|failed" }
+
+[[checks]]
+name = "check-output"
+command = { command = "message-contains", text = "expected output" }
+```
 
 ### Database Verification
-- `db-query` - SQL query result verification
+
+- `db-query` — SQL query result verification
   - Numeric comparisons: `">0"`, `">=5"`, `"=10"`, `"<3"`, `"<=2"`
 
+```toml
+[[checks]]
+name = "check-name"
+command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM patents", expected = ">0" }
+```
+
 ### Negative Assertions
 
-Use `deny = true` on any assertion for negative verification:
+Use `deny = true` on any check to invert the assertion:
 
 ```toml
 [[checks]]
 name = "should-not-contain-error"
-command = "file-contains"
-file = "output.txt"
-contains = "error"
+command = { command = "file-contains", file = "output.txt", contains = "error" }
 deny = true
 ```
 
diff --git a/src/cli/args.rs b/src/cli/args.rs
index 73bf1e7..6785331 100644
--- a/src/cli/args.rs
+++ b/src/cli/args.rs
@@ -6,6 +6,7 @@ use clap::{Parser, Subcommand};
 #[command(name = "skill-bench")]
 #[command(about = "TOML-based test runner for skill testing", long_about = None)]
 #[command(version)]
+#[command(disable_help_subcommand = true)]
 pub struct Cli {
     #[command(subcommand)]
     pub command: Commands,
@@ -57,6 +58,11 @@ pub enum Commands {
         #[arg(short, long)]
         verbose: bool,
     },
+    /// Show reference for check types and setup
+    Help {
+        /// Check type or "setup" (e.g., skill-invoked, file-contains, db-query)
+        check_type: Option<String>,
+    },
 }
 
 use std::path::PathBuf;
diff --git a/src/main.rs b/src/main.rs
index 20dedb7..9b72378 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,6 +2,7 @@ mod assertions;
 mod cli;
 mod models;
 mod output;
+mod reference;
 mod runtime;
 mod state;
 mod timeline;
@@ -68,6 +69,9 @@ async fn main() -> Result<()> {
         Commands::Timeline { log_file, verbose } => {
             timeline::display_timeline(&log_file, verbose)?;
         }
+        Commands::Help { check_type } => {
+            reference::print_help(check_type.as_deref())?;
+        }
     }
 
     Ok(())
diff --git a/src/reference.rs b/src/reference.rs
new file mode 100644
index 0000000..5323f9f
--- /dev/null
+++ b/src/reference.rs
@@ -0,0 +1,193 @@
+//! Reference documentation for check types and setup
+
+use anyhow::Result;
+
+struct CheckDoc {
+    name: &'static str,
+    description: &'static str,
+    required: &'static [&'static str],
+    optional: &'static [&'static str],
+    example: &'static str,
+}
+
+fn check_docs() -> Vec<CheckDoc> {
+    vec![
+        CheckDoc {
+            name: "skill-loaded",
+            description: "Verify a skill was loaded during initialization",
+            required: &["skill"],
+            optional: &[],
+            example: "command = { command = \"skill-loaded\", skill = \"my-skill\" }",
+        },
+        CheckDoc {
+            name: "skill-invoked",
+            description: "Verify a skill was invoked during execution",
+            required: &["skill"],
+            optional: &[],
+            example: "command = { command = \"skill-invoked\", skill = \"my-skill\" }",
+        },
+        CheckDoc {
+            name: "mcp-loaded",
+            description: "Verify an MCP server was loaded",
+            required: &["server"],
+            optional: &[],
+            example: "command = { command = \"mcp-loaded\", server = \"filesystem\" }",
+        },
+        CheckDoc {
+            name: "mcp-tool-invoked",
+            description: "Verify an MCP tool was invoked",
+            required: &["tool"],
+            optional: &[],
+            example: "command = { command = \"mcp-tool-invoked\", tool = \"read_file\" }",
+        },
+        CheckDoc {
+            name: "mcp-success",
+            description: "Verify MCP tool calls succeeded (no errors)",
+            required: &["tool"],
+            optional: &[],
+            example: "command = { command = \"mcp-success\", tool = \"read_file\" }",
+        },
+        CheckDoc {
+            name: "tool-use",
+            description: "Verify a tool was called (partial match on tool name)",
+            required: &["tool"],
+            optional: &[],
+            example: "command = { command = \"tool-use\", tool = \"Read\" }",
+        },
+        CheckDoc {
+            name: "tool-param",
+            description: "Verify a tool was called with a specific parameter value",
+            required: &["tool", "param"],
+            optional: &["value"],
+            example: "command = { command = \"tool-param\", tool = \"Read\", param = \"file_path\", value = \"test.txt\" }",
+        },
+        CheckDoc {
+            name: "workspace-file",
+            description: "Verify a file exists in the workspace",
+            required: &["path"],
+            optional: &[],
+            example: "command = { command = \"workspace-file\", path = \"output.txt\" }",
+        },
+        CheckDoc {
+            name: "workspace-dir",
+            description: "Verify a directory exists in the workspace",
+            required: &["path"],
+            optional: &[],
+            example: "command = { command = \"workspace-dir\", path = \"output\" }",
+        },
+        CheckDoc {
+            name: "file-contains",
+            description: "Verify a file contains specific text",
+            required: &["file", "contains"],
+            optional: &[],
+            example: "command = { command = \"file-contains\", file = \"output.txt\", contains = \"expected text\" }",
+        },
+        CheckDoc {
+            name: "log-contains",
+            description: "Verify the log contains a regex pattern",
+            required: &["pattern"],
+            optional: &[],
+            example: "command = { command = \"log-contains\", pattern = \"error|failed\" }",
+        },
+        CheckDoc {
+            name: "message-contains",
+            description: "Verify assistant output contains specific text",
+            required: &["text"],
+            optional: &[],
+            example: "command = { command = \"message-contains\", text = \"expected output\" }",
+        },
+        CheckDoc {
+            name: "db-query",
+            description: "Execute a SQL query and verify the result",
+            required: &["query", "expected"],
+            optional: &["db"],
+            example: "command = { command = \"db-query\", db = \"patents.db\", query = \"SELECT COUNT(*) FROM patents\", expected = \">0\" }",
+        },
+    ]
+}
+
+pub fn print_help(check_type: Option<&str>) -> Result<()> {
+    match check_type {
+        None => {
+            print_all();
+            Ok(())
+        }
+        Some("setup") => {
+            print_setup();
+            Ok(())
+        }
+        Some(name) => print_check(name),
+    }
+}
+
+fn print_all() {
+    println!("Usage: skill-bench help <type>\n");
+    println!("Check types:");
+    for doc in check_docs() {
+        println!("  {:<20} {}", doc.name, doc.description);
+    }
+    println!("\nOther:");
+    println!("  {:<20} Setup step documentation", "setup");
+}
+
+fn print_check(name: &str) -> Result<()> {
+    let docs = check_docs();
+    let doc = docs.iter().find(|d| d.name == name).ok_or_else(|| {
+        anyhow::anyhow!(
+            "Unknown check type: '{}'\nRun 'skill-bench help' for available types",
+            name
+        )
+    })?;
+
+    println!("{}\n", doc.name);
+    println!("  {}\n", doc.description);
+
+    println!("  Required fields:");
+    for field in doc.required {
+        println!("    - {}", field);
+    }
+
+    if !doc.optional.is_empty() {
+        println!("\n  Optional fields:");
+        for field in doc.optional {
+            println!("    - {}", field);
+        }
+    }
+
+    println!("\n  Example:");
+    println!("    [[checks]]");
+    println!("    name = \"check-name\"");
+    println!("    {}", doc.example);
+
+    println!("\n  Negative assertion (deny = true inverts the check):");
+    println!("    [[checks]]");
+    println!("    name = \"check-name\"");
+    println!("    {}", doc.example);
+    println!("    deny = true");
+
+    Ok(())
+}
+
+fn print_setup() {
+    println!("Setup\n");
+    println!("  Setup steps run in the test workspace before the test prompt.");
+    println!("  Steps are executed in order. Failure in any step fails the test.\n");
+
+    println!("  File setup (creates a file with content):");
+    println!("    [[setup]]");
+    println!("    name = \"optional-name\"");
+    println!("    path = \"file.txt\"");
+    println!("    content = \"File content\"\n");
+
+    println!("  Required fields:");
+    println!("    - path: File path in workspace");
+    println!("    - content: File content to write\n");
+
+    println!("  Script setup (executes a shell command via bash -c):");
+    println!("    [[setup]]");
+    println!("    name = \"optional-name\"");
+    println!("    command = \"echo 'Hello' > greeting.txt\"\n");
+
+    println!("  Required fields:");
+    println!("    - command: Shell command to execute");
+}