diff --git a/Cargo.lock b/Cargo.lock
index 623065ff20..5af9bcd0c0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1439,7 +1439,7 @@ checksum = "f4ae09a41a4b89f94ec1e053623da8340d996bc32c6517d325a9daad9b239358"
 dependencies = [
  "chrono",
  "diesel_derives",
- "downcast-rs",
+ "downcast-rs 2.0.2",
  "libsqlite3-sys",
  "r2d2",
  "sqlite-wasm-rs",
@@ -1583,6 +1583,12 @@ version = "0.15.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
 
+[[package]]
+name = "downcast-rs"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
+
 [[package]]
 name = "downcast-rs"
 version = "2.0.2"
@@ -1821,6 +1827,17 @@ dependencies = [
  "simd-adler32",
 ]
 
+[[package]]
+name = "filedescriptor"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e40758ed24c9b2eeb76c35fb0aebc66c626084edd827e07e1552279814c6682d"
+dependencies = [
+ "libc",
+ "thiserror 1.0.69",
+ "winapi",
+]
+
 [[package]]
 name = "find-msvc-tools"
 version = "0.1.5"
@@ -2128,7 +2145,9 @@ dependencies = [
  "forge_fs",
  "forge_markdown_stream",
  "forge_select",
+ "forge_shell_smoke",
  "forge_spinner",
+ "forge_test_kit",
  "forge_tracker",
  "forge_walker",
  "futures",
@@ -2310,6 +2329,14 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "forge_shell_smoke"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "portable-pty",
+]
+
 [[package]]
 name = "forge_snaps"
 version = "0.1.0"
@@ -2358,6 +2385,7 @@ dependencies = [
 name = "forge_test_kit"
 version = "0.1.0"
 dependencies = [
+ "forge_shell_smoke",
  "serde",
  "serde_json",
  "tokio",
@@ -3446,6 +3474,15 @@ dependencies = [
  "tempfile",
 ]
 
+[[package]]
+name = "ioctl-rs"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7970510895cee30b3e9128319f2cefd4bde883a39f38baa279567ba3a7eb97d"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "ipconfig"
 version = "0.3.2"
@@ -3827,6 +3864,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "memoffset"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "merge"
 version = "0.2.0"
@@ -4010,6 +4056,20 @@ dependencies = [
  "smallvec",
 ]
 
+[[package]]
+name = "nix"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
+dependencies = [
+ "autocfg",
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+ "memoffset",
+ "pin-utils",
+]
+
 [[package]]
 name = "nix"
 version = "0.30.1"
@@ -4517,6 +4577,27 @@ version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
 
+[[package]]
+name = "portable-pty"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "806ee80c2a03dbe1a9fb9534f8d19e4c0546b790cde8fd1fea9d6390644cb0be"
+dependencies = [
+ "anyhow",
+ "bitflags 1.3.2",
+ "downcast-rs 1.2.1",
+ "filedescriptor",
+ "lazy_static",
+ "libc",
+ "log",
+ "nix 0.25.1",
+ "serial",
+ "shared_library",
+ "shell-words",
+ "winapi",
+ "winreg 0.10.1",
+]
+
 [[package]]
 name = "posthog-rs"
 version = "0.4.7"
@@ -4625,7 +4706,7 @@ checksum = "a3ef4f2f0422f23a82ec9f628ea2acd12871c81a9362b02c43c1aa86acfc3ba1"
 dependencies = [
  "futures",
  "indexmap 2.13.0",
- "nix",
+ "nix 0.30.1",
  "tokio",
  "tracing",
  "windows 0.61.3",
@@ -5440,7 +5521,7 @@ dependencies = [
  "libc",
  "log",
  "memchr",
- "nix",
+ "nix 0.30.1",
  "radix_trie",
  "unicode-segmentation",
  "unicode-width 0.2.2",
@@ -5743,6 +5824,48 @@ dependencies = [
  "version_check",
 ]
 
+[[package]]
+name = "serial"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1237a96570fc377c13baa1b88c7589ab66edced652e43ffb17088f003db3e86"
+dependencies = [
+ "serial-core",
+ "serial-unix",
+ "serial-windows",
+]
+
+[[package]]
+name = "serial-core"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f46209b345401737ae2125fe5b19a77acce90cd53e1658cda928e4fe9a64581"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "serial-unix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f03fbca4c9d866e24a459cbca71283f545a37f8e3e002ad8c70593871453cab7"
+dependencies = [
+ "ioctl-rs",
+ "libc",
+ "serial-core",
+ "termios",
+]
+
+[[package]]
+name = "serial-windows"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15c6d3b776267a75d31bbdfd5d36c0ca051251caafc285827052bc53bcdc8162"
+dependencies = [
+ "libc",
+ "serial-core",
+]
+
 [[package]]
 name = "serial_test"
 version = "3.4.0"
@@ -5811,6 +5934,22 @@ dependencies = [
  "lazy_static",
 ]
 
+[[package]]
+name = "shared_library"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a9e7e0f2bfae24d8a5b5a66c5b257a83c7412304311512a0c054cd5e619da11"
+dependencies = [
+ "lazy_static",
+ "libc",
+]
+
+[[package]]
+name = "shell-words"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
+
 [[package]]
 name = "shlex"
 version = "1.3.0"
@@ -6339,6 +6478,15 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "termios"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5d9cf598a6d7ce700a4e6a9199da127e6819a61e64b68609683cc9a01b5683a"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.69"
@@ -7842,6 +7990,15 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "winreg"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "winreg"
 version = "0.11.0"
diff --git a/Cargo.toml b/Cargo.toml
index 9a9f43786a..d034d1d989 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -156,6 +156,8 @@ forge_walker = { path = "crates/forge_walker" }
 forge_json_repair = { path = "crates/forge_json_repair" }
 forge_select = { path = "crates/forge_select" }
 forge_test_kit = { path = "crates/forge_test_kit" }
+forge_shell_smoke = { path = "crates/forge_shell_smoke" }
 
 forge_markdown_stream = { path = "crates/forge_markdown_stream" }
 forge_config = { path = "crates/forge_config" }
+portable-pty = "0.8"
diff --git a/crates/forge_main/Cargo.toml b/crates/forge_main/Cargo.toml
index a96637b215..0f06609d00 100644
--- a/crates/forge_main/Cargo.toml
+++ b/crates/forge_main/Cargo.toml
@@ -79,3 +79,5 @@ tempfile.workspace = true
 serial_test = "3.4"
 fake = { version = "5.1.0", features = ["derive"] }
 forge_domain = { path = "../forge_domain" }
+forge_test_kit = { workspace = true, features = ["pty"] }
+forge_shell_smoke.workspace = true
diff --git a/crates/forge_main/src/main.rs b/crates/forge_main/src/main.rs
index 0c54498fe2..2b1498af81 100644
--- a/crates/forge_main/src/main.rs
+++ b/crates/forge_main/src/main.rs
@@ -127,3 +127,382 @@ mod tests {
         }
     }
 }
+
+/// PTY-based integration tests that exercise the compiled `forge` binary
+/// running inside a real pseudo-terminal.
+///
+/// All tests here run fully offline — they do not call any LLM API.  They
+/// exercise subcommands and flags that resolve entirely from local state
+/// (config files, embedded agents, built-in commands, etc.) so they remain
+/// fast and reproducible in CI.
+#[cfg(test)]
+mod pty_tests {
+    use std::time::Duration;
+
+    use forge_test_kit::pty::PtySession;
+    use serial_test::serial;
+
+    // ──────────────────────────────────────────────────────────────
+    // Helpers
+    // ──────────────────────────────────────────────────────────────
+
+    /// Returns the absolute path to the compiled `forge` debug binary.
+    ///
+    /// `CARGO_BIN_EXE_forge` is only injected by cargo for *integration* test
+    /// binaries (placed in `tests/`).  For unit-test modules embedded inside
+    /// the binary's own source file we derive the path from
+    /// `CARGO_MANIFEST_DIR` instead.
+    fn forge_bin() -> std::path::PathBuf {
+        if let Ok(exe) = std::env::var("CARGO_BIN_EXE_forge") {
+            return std::path::PathBuf::from(exe);
+        }
+        let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
+            .expect("CARGO_MANIFEST_DIR must be set when running tests");
+        let workspace_root = std::path::Path::new(&manifest_dir)
+            .parent() // crates/
+            .and_then(|p| p.parent()) // workspace root
+            .expect("workspace root is two levels above manifest dir")
+            .to_path_buf();
+        let bin_name = if cfg!(windows) { "forge.exe" } else { "forge" };
+        workspace_root.join("target").join("debug").join(bin_name)
+    }
+
+    /// Returns the absolute path to the workspace root (two levels above
+    /// `CARGO_MANIFEST_DIR`, which is `crates/forge_main`).
+    fn workspace_root() -> std::path::PathBuf {
+        let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
+            .expect("CARGO_MANIFEST_DIR must be set when running tests");
+        std::path::Path::new(&manifest_dir)
+            .parent() // crates/
+            .and_then(|p| p.parent()) // workspace root
+            .expect("workspace root is two levels above manifest dir")
+            .to_path_buf()
+    }
+
+    /// Spawns the `forge` binary with the given arguments inside a PTY, waits
+    /// until `needle` appears in the output (or panics on timeout), then
+    /// returns the full captured output.
+    ///
+    /// Automatically prepends `-C <workspace_root>` so that local
+    /// `.forge/commands/` and `.forge/skills/` directories are always resolved
+    /// relative to the workspace root regardless of the test runner's CWD.
+    fn run_and_expect(args: &[&str], needle: &str) -> String {
+        let bin = forge_bin();
+        let bin_str = bin.to_str().expect("binary path is valid UTF-8");
+        let root = workspace_root();
+        let root_str = root.to_str().expect("workspace root is valid UTF-8");
+        let mut full_args = vec!["-C", root_str];
+        full_args.extend_from_slice(args);
+        let session = PtySession::spawn(bin_str, &full_args).expect("PTY session spawns");
+        session
+            .expect(needle, Duration::from_secs(10))
+            .unwrap_or_else(|e| panic!("{e}"))
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // Basic invocation flags
+    // ──────────────────────────────────────────────────────────────
+
+    /// `forge --version` outputs the program name and a semver string.
+    #[test]
+    #[serial]
+    fn test_pty_version_contains_name_and_semver() {
+        let output = run_and_expect(&["--version"], "forge");
+        assert!(output.contains("forge"), "program name missing:\n{output}");
+        // semver: digits separated by dots, e.g. 0.1.0 or 0.1.0-dev
+        assert!(
+            output.chars().any(|c| c.is_ascii_digit()),
+            "version number missing:\n{output}"
+        );
+    }
+
+    /// `forge --help` outputs the canonical "Usage:" section from clap.
+    #[test]
+    #[serial]
+    fn test_pty_help_shows_usage_section() {
+        let output = run_and_expect(&["--help"], "Usage");
+        assert!(output.contains("Usage"), "Usage section missing:\n{output}");
+    }
+
+    /// `forge --help` lists the `--prompt` / `-p` flag.
+    #[test]
+    #[serial]
+    fn test_pty_help_lists_prompt_flag() {
+        let output = run_and_expect(&["--help"], "prompt");
+        assert!(
+            output.contains("prompt"),
+            "--prompt flag not listed in help:\n{output}"
+        );
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // Interactive mode banner
+    // ──────────────────────────────────────────────────────────────
+
+    /// In interactive mode the ASCII-art banner is printed before the first
+    /// prompt.  The banner contains "forge" (the logo letters) and a
+    /// "Version:" line.
+    #[test]
+    #[serial]
+    fn test_pty_interactive_banner_contains_branding() {
+        let bin = forge_bin();
+        let bin_str = bin.to_str().expect("binary path is valid UTF-8");
+        let root = workspace_root();
+        let root_str = root.to_str().expect("workspace root is valid UTF-8");
+        let mut session =
+            PtySession::spawn(bin_str, &["-C", root_str]).expect("PTY session spawns");
+
+        let result = session.expect("Version:", Duration::from_secs(10));
+        let _ = session.send(&[0x04]); // Ctrl-D to exit cleanly
+
+        let output = result.expect("banner appeared within timeout");
+        assert!(
+            output.contains("Version:"),
+            "Version line missing from banner:\n{output}"
+        );
+    }
+
+    /// The banner in interactive mode shows the `/new` command hint.
+    #[test]
+    #[serial]
+    fn test_pty_interactive_banner_shows_new_command_hint() {
+        let bin = forge_bin();
+        let bin_str = bin.to_str().expect("binary path is valid UTF-8");
+        let root = workspace_root();
+        let root_str = root.to_str().expect("workspace root is valid UTF-8");
+        let mut session =
+            PtySession::spawn(bin_str, &["-C", root_str]).expect("PTY session spawns");
+
+        let result = session.expect("new", Duration::from_secs(10));
+        let _ = session.send(&[0x04]);
+
+        let output = result.expect("banner appeared within timeout");
+        assert!(
+            output.contains("new"),
+            "'/new' hint missing from banner:\n{output}"
+        );
+    }
+
+    /// `forge` exits cleanly when Ctrl-D (EOF) is sent on the PTY.
+    #[test]
+    #[serial]
+    fn test_pty_exits_on_ctrl_d() {
+        let bin = forge_bin();
+        let bin_str = bin.to_str().expect("binary path is valid UTF-8");
+        let root = workspace_root();
+        let root_str = root.to_str().expect("workspace root is valid UTF-8");
+        let mut session =
+            PtySession::spawn(bin_str, &["-C", root_str]).expect("PTY session spawns");
+
+        // Allow the banner to render, then signal EOF.
+        std::thread::sleep(Duration::from_millis(400));
+        session.send(&[0x04]).expect("Ctrl-D sent");
+
+        // The process should drain output and exit — just verify no panic.
+        let _ = session.output();
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // `forge banner` subcommand
+    // ──────────────────────────────────────────────────────────────
+
+    /// `forge banner` prints the ASCII-art logo and the Version line.
+    #[test]
+    #[serial]
+    fn test_pty_banner_subcommand_shows_version() {
+        let output = run_and_expect(&["banner"], "Version:");
+        assert!(
+            output.contains("Version:"),
+            "Version: line missing from banner output:\n{output}"
+        );
+    }
+
+    /// `forge banner` shows the `:new` CLI-mode hint (not the `/new` REPL hint).
+    #[test]
+    #[serial]
+    fn test_pty_banner_subcommand_shows_cli_hint() {
+        let output = run_and_expect(&["banner"], ":new");
+        assert!(
+            output.contains(":new"),
+            "':new' hint missing from banner output:\n{output}"
+        );
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // `forge list agents`
+    // ──────────────────────────────────────────────────────────────
+
+    /// `forge list agents --porcelain` emits the built-in `forge` agent row.
+    #[test]
+    #[serial]
+    fn test_pty_list_agents_includes_forge_agent() {
+        let output = run_and_expect(&["list", "agents", "--porcelain"], "forge");
+        assert!(
+            output.contains("forge"),
+            "built-in 'forge' agent missing from list:\n{output}"
+        );
+    }
+
+    /// `forge list agents --porcelain` emits the built-in `muse` agent row.
+    #[test]
+    #[serial]
+    fn test_pty_list_agents_includes_muse_agent() {
+        let output = run_and_expect(&["list", "agents", "--porcelain"], "muse");
+        assert!(
+            output.contains("muse"),
+            "built-in 'muse' agent missing from list:\n{output}"
+        );
+    }
+
+    /// `forge list agents --porcelain` emits the built-in `sage` agent row.
+    #[test]
+    #[serial]
+    fn test_pty_list_agents_includes_sage_agent() {
+        let output = run_and_expect(&["list", "agents", "--porcelain"], "sage");
+        assert!(
+            output.contains("sage"),
+            "built-in 'sage' agent missing from list:\n{output}"
+        );
+    }
+
+    /// `forge list agents --porcelain` output has a header row containing "ID".
+    #[test]
+    #[serial]
+    fn test_pty_list_agents_has_header() {
+        let output = run_and_expect(&["list", "agents", "--porcelain"], "ID");
+        assert!(output.contains("ID"), "Header row missing from agent list:\n{output}");
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // `forge list skill`
+    // ──────────────────────────────────────────────────────────────
+
+    /// `forge list skill --porcelain` outputs the column header "NAME".
+    #[test]
+    #[serial]
+    fn test_pty_list_skills_has_header() {
+        let output = run_and_expect(&["list", "skill", "--porcelain"], "NAME");
+        assert!(output.contains("NAME"), "Header row missing from skill list:\n{output}");
+    }
+
+    /// `forge list skill --porcelain` includes the embedded `create-skill` skill.
+    #[test]
+    #[serial]
+    fn test_pty_list_skills_includes_create_skill() {
+        let output = run_and_expect(&["list", "skill", "--porcelain"], "create-skill");
+        assert!(
+            output.contains("create-skill"),
+            "'create-skill' missing from skill list:\n{output}"
+        );
+    }
+
+    /// `forge list skill --porcelain` includes the embedded `execute-plan` skill.
+    #[test]
+    #[serial]
+    fn test_pty_list_skills_includes_execute_plan() {
+        let output = run_and_expect(&["list", "skill", "--porcelain"], "execute-plan");
+        assert!(
+            output.contains("execute-plan"),
+            "'execute-plan' missing from skill list:\n{output}"
+        );
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // `forge list cmd`
+    // ──────────────────────────────────────────────────────────────
+
+    /// `forge list cmd --porcelain` outputs the column header "ID".
+    #[test]
+    #[serial]
+    fn test_pty_list_cmd_has_header() {
+        let output = run_and_expect(&["list", "cmd", "--porcelain"], "ID");
+        assert!(output.contains("ID"), "Header row missing from command list:\n{output}");
+    }
+
+    /// `forge list cmd --porcelain` lists the built-in `fixme` command.
+    #[test]
+    #[serial]
+    fn test_pty_list_cmd_includes_fixme() {
+        let output = run_and_expect(&["list", "cmd", "--porcelain"], "fixme");
+        assert!(
+            output.contains("fixme"),
+            "'fixme' command missing from command list:\n{output}"
+        );
+    }
+
+    /// `forge list cmd --porcelain` lists the built-in `check` command.
+    #[test]
+    #[serial]
+    fn test_pty_list_cmd_includes_check() {
+        let output = run_and_expect(&["list", "cmd", "--porcelain"], "check");
+        assert!(
+            output.contains("check"),
+            "'check' command missing from command list:\n{output}"
+        );
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // `forge env`
+    // ──────────────────────────────────────────────────────────────
+
+    /// `forge env` displays the ENVIRONMENT section header.
+    #[test]
+    #[serial]
+    fn test_pty_env_shows_environment_header() {
+        let output = run_and_expect(&["env"], "ENVIRONMENT");
+        assert!(
+            output.contains("ENVIRONMENT"),
+            "ENVIRONMENT header missing from env output:\n{output}"
+        );
+    }
+
+    /// `forge env` shows the current forge version.
+    #[test]
+    #[serial]
+    fn test_pty_env_shows_version() {
+        let output = run_and_expect(&["env"], "version");
+        assert!(
+            output.contains("version"),
+            "version field missing from env output:\n{output}"
+        );
+    }
+
+    /// `forge env` shows the working directory.
+    #[test]
+    #[serial]
+    fn test_pty_env_shows_working_directory() {
+        let output = run_and_expect(&["env"], "working directory");
+        assert!(
+            output.contains("working directory"),
+            "working directory field missing from env output:\n{output}"
+        );
+    }
+
+    /// `forge env` shows the PATHS section (logs, history, etc.).
+    #[test]
+    #[serial]
+    fn test_pty_env_shows_paths_section() {
+        let output = run_and_expect(&["env"], "PATHS");
+        assert!(output.contains("PATHS"), "PATHS section missing from env output:\n{output}");
+    }
+
+    // ──────────────────────────────────────────────────────────────
+    // `forge conversation new`
+    // ──────────────────────────────────────────────────────────────
+
+    /// `forge conversation new` prints a UUID-shaped conversation ID to stdout.
+    #[test]
+    #[serial]
+    fn test_pty_conversation_new_prints_uuid() {
+        // UUIDs contain hyphens; wait for the first '-' after some hex digits.
+        let output = run_and_expect(&["conversation", "new"], "-");
+        // A UUID v4 looks like xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx.
+        // The simplest check: the output contains exactly 4 hyphens grouped
+        // together (UUID format).
+        let hyphen_count = output.chars().filter(|&c| c == '-').count();
+        assert!(
+            hyphen_count >= 4,
+            "output does not look like a UUID (expected ≥4 hyphens), got:\n{output}"
+        );
+    }
+}
diff --git a/crates/forge_shell_smoke/Cargo.toml b/crates/forge_shell_smoke/Cargo.toml
new file mode 100644
index 0000000000..35a8553f38
--- /dev/null
+++ b/crates/forge_shell_smoke/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "forge_shell_smoke"
+version = "0.1.0"
+edition.workspace = true
+rust-version.workspace = true
+description = "PTY-based smoke tests for the forge CLI and ZSH plugin"
+
+[dependencies]
+anyhow.workspace = true
+portable-pty.workspace = true
+
+[[bin]]
+name = "forge_smoke"
+path = "src/bin/forge_smoke.rs"
+
+[[bin]]
+name = "zsh_plugin_smoke"
+path = "src/bin/zsh_plugin_smoke.rs"
+
+[lib]
+name = "forge_shell_smoke"
+path = "src/lib.rs"
+doctest = false
diff --git a/crates/forge_shell_smoke/README.md b/crates/forge_shell_smoke/README.md
new file mode 100644
index 0000000000..89dfb76334
--- /dev/null
+++ b/crates/forge_shell_smoke/README.md
@@ -0,0 +1,181 @@
+# forge_shell_smoke
+
+PTY-based smoke tests for the `forge` CLI and the ZSH shell plugin.
+
+This crate provides:
+
+- **`PtySession`** — a portable pseudo-terminal wrapper for spawning real
+  interactive processes in tests.
+- **`forge_smoke`** — an offline CLI smoke test binary (no API key required).
+- **`zsh_plugin_smoke`** — an end-to-end ZSH plugin smoke test including live
+  LLM requests.
+
+---
+
+## Running the smoke tests
+
+Build the `forge` binary first, then run either smoke binary:
+
+```sh
+# build the CLI
+cargo build -p forge_main
+
+# offline CLI smoke test  (no API key needed)
+cargo run -p forge_shell_smoke --bin forge_smoke
+
+# ZSH plugin smoke test  (needs a valid API key in the environment)
+cargo run -p forge_shell_smoke --bin zsh_plugin_smoke
+```
+
+---
+
+## Writing a new smoke test
+
+### 1. Add a new check function
+
+Every check is a plain function that takes a `&mut PtySession` (for
+interactive tests) or creates its own session (for single-command tests).
+
+```rust
+use std::time::Duration;
+use forge_shell_smoke::pty::PtySession;
+use forge_shell_smoke::paths::{forge_bin, workspace_root};
+use forge_shell_smoke::report::{fail, pass, print_header, print_output, strip_ansi};
+
+/// Verify `forge my-new-subcommand` exits cleanly and prints "OK".
+fn check_my_subcommand() {
+    print_header("forge my-new-subcommand");
+
+    let bin = forge_bin();
+    let root = workspace_root();
+    let session = PtySession::spawn(
+        bin.to_str().unwrap(),
+        &["-C", root.to_str().unwrap(), "my-new-subcommand"],
+    )
+    .expect("PTY session spawns");
+
+    // Wait up to 5 s for the expected output.
+    match session.expect("OK", Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            pass("subcommand printed 'OK'");
+        }
+        Err(e) => fail("subcommand failed", &e.to_string()),
+    }
+}
+```
+
+Then call it from `main()` in the relevant binary (`forge_smoke.rs` or
+`zsh_plugin_smoke.rs`).
+
+### 2. Isolating a command's output with `output_len` + `output_since`
+
+When you reuse a single long-running session across multiple checks (as
+`zsh_plugin_smoke` does), earlier output accumulates in the PTY buffer.
+Use `output_len` / `output_since` to get a clean window of just the
+current command's output:
+
+```rust
+// Take a snapshot of how many bytes are already in the buffer.
+let mark = session.output_len();
+
+// Send the command.
+session.send_line(":env").unwrap();
+
+// Wait for a known string anywhere in the *full* buffer.
+session.expect("ENVIRONMENT", Duration::from_secs(10)).unwrap();
+
+// Read only the bytes that arrived *after* our mark.
+let fresh = session.output_since(mark);
+let stripped = strip_ansi(&fresh);
+assert!(stripped.contains("version"));
+```
+
+**Important:** `output_len` returns the total bytes buffered so far.
+Because the PTY reader runs on a background thread, this value can race
+with the child process output.  Always call `output_len` *before*
+`send_line`, not after, to make sure you catch the command echo as well
+as its response.
+
+### 3. Avoiding false matches on buffered data
+
+`expect(needle, timeout)` searches the *full* accumulated buffer.  If
+`needle` was already in the buffer from a previous command, `expect`
+returns immediately.  To avoid this, choose a unique sentinel string:
+
+```rust
+// BAD — "MODEL=" was already in the buffer from a previous assignment.
+session.send_line("echo \"MODEL=$MY_VAR\"").unwrap();
+session.expect("MODEL=", Duration::from_secs(5)).unwrap(); // returns stale data
+
+// GOOD — "VERIFY_MODEL=" is unique and hasn't appeared before.
+session.send_line("echo \"VERIFY_MODEL=$MY_VAR\"").unwrap();
+session.expect("VERIFY_MODEL=", Duration::from_secs(5)).unwrap();
+```
+
+### 4. Handling interactive / fzf commands
+
+Some commands (`:m`, `:p`) open fzf inside the PTY.  Because fzf is
+a full-screen TUI, you cannot drive it reliably in a headless PTY.
+
+The recommended workaround is to set the underlying shell variables that
+the fzf picker would have set, bypassing the picker entirely:
+
+```zsh
+# What ':m claude-haiku' does internally:
+_FORGE_SESSION_MODEL=claude-3-haiku-20240307
+_FORGE_SESSION_PROVIDER=anthropic
+```
+
+```rust
+session.send_line(
+    "_FORGE_SESSION_MODEL=claude-3-haiku-20240307; \
+     _FORGE_SESSION_PROVIDER=anthropic; \
+     echo OK_SET",
+).unwrap();
+session.expect("OK_SET", Duration::from_secs(5)).unwrap();
+```
+
+### 5. Sending control characters
+
+```rust
+session.send(&[0x03]).unwrap(); // Ctrl-C  — interrupt a running forge request
+session.send(&[0x04]).unwrap(); // Ctrl-D  — EOF / exit interactive mode
+```
+
+### 6. Fast-fail on child exit
+
+`expect` returns an error immediately (rather than waiting the full
+timeout) when the child process exits without producing the needle.  This
+keeps test runs fast for short-lived commands.
+
+### 7. The `PtySession` API at a glance
+
+| Method | Description |
+|--------|-------------|
+| `PtySession::spawn(prog, args)` | Spawn in a new 80×24 PTY |
+| `PtySession::spawn_with_env(prog, args, env)` | Same, with extra env vars |
+| `session.send_line(text)` | Write `text\n` to stdin |
+| `session.send(bytes)` | Write raw bytes (control chars etc.) |
+| `session.output()` | Full accumulated output as a `String` |
+| `session.output_len()` | Number of bytes buffered so far (mark position) |
+| `session.output_since(mark)` | Bytes captured after `mark` |
+| `session.expect(needle, timeout)` | Block until needle appears |
+| `session.is_done()` | `true` once the child has exited |
+
+---
+
+## Crate layout
+
+```
+crates/forge_shell_smoke/
+├── src/
+│   ├── lib.rs              — crate root, exports pty / paths / report
+│   ├── pty.rs              — PtySession implementation
+│   ├── paths.rs            — workspace_root(), forge_bin(), plugin_path()
+│   ├── report.rs           — pass(), fail(), print_header(), strip_ansi()
+│   └── bin/
+│       ├── forge_smoke.rs      — offline CLI smoke tests
+│       └── zsh_plugin_smoke.rs — ZSH plugin + live LLM smoke tests
+└── README.md
+```
diff --git a/crates/forge_shell_smoke/src/bin/forge_smoke.rs b/crates/forge_shell_smoke/src/bin/forge_smoke.rs
new file mode 100644
index 0000000000..65a810a437
--- /dev/null
+++ b/crates/forge_shell_smoke/src/bin/forge_smoke.rs
@@ -0,0 +1,283 @@
+//! forge CLI PTY smoke test.
+//!
+//! Runs a series of `forge` subcommands inside real pseudo-terminals and
+//! prints a live pass/fail report.  No LLM API key is required — every check
+//! is fully offline.
+//!
+//! Run with:
+//!   cargo run -p forge_shell_smoke --bin forge_smoke
+
+use std::time::Duration;
+
+use forge_shell_smoke::paths::{forge_bin, workspace_root};
+use forge_shell_smoke::pty::PtySession;
+use forge_shell_smoke::report::{
+    BOLD, CYAN, DIM, RED, RESET, fail, pass, print_header, print_output, strip_ansi,
+};
+
+// ── session helpers ───────────────────────────────────────────────────────────
+
+/// Spawn `forge` with `extra_args` in a PTY, wait until the child exits or
+/// `timeout` is reached, and return the full captured output.
+///
+/// Automatically prepends `-C <workspace_root>` so local `.forge/` directories
+/// are found regardless of the runner's working directory.
+fn capture(extra_args: &[&str], timeout: Duration) -> Result<String, String> {
+    let bin = forge_bin();
+    let root = workspace_root();
+    let bin_str = bin.to_str().unwrap();
+    let root_str = root.to_str().unwrap();
+
+    let mut args = vec!["-C", root_str];
+    args.extend_from_slice(extra_args);
+
+    let session = PtySession::spawn(bin_str, &args).map_err(|e| e.to_string())?;
+
+    let start = std::time::Instant::now();
+    loop {
+        if session.is_done() {
+            std::thread::sleep(Duration::from_millis(30));
+            break;
+        }
+        if start.elapsed() >= timeout {
+            break;
+        }
+        std::thread::sleep(Duration::from_millis(50));
+    }
+    Ok(session.output())
+}
+
+/// Spawn `forge` in interactive mode, wait for the banner, send `command`,
+/// wait for `wait_for` to appear, send Ctrl-D, and return the full output.
+fn capture_interactive(command: &str, wait_for: &str, timeout: Duration) -> Result<String, String> {
+    let bin = forge_bin();
+    let root = workspace_root();
+    let bin_str = bin.to_str().unwrap();
+    let root_str = root.to_str().unwrap();
+
+    let mut session =
+        PtySession::spawn(bin_str, &["-C", root_str]).map_err(|e| e.to_string())?;
+
+    session
+        .expect("Version:", Duration::from_secs(8))
+        .map_err(|e| e.to_string())?;
+
+    session.send_line(command).map_err(|e| e.to_string())?;
+
+    let result = session.expect(wait_for, timeout);
+    let _ = session.send(&[0x04]); // Ctrl-D
+    std::thread::sleep(Duration::from_millis(100));
+
+    result
+        .map(|_| session.output())
+        .map_err(|e| e.to_string())
+}
+
+// ── individual checks ─────────────────────────────────────────────────────────
+
+fn check_version() {
+    print_header("forge --version");
+    match capture(&["--version"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            if s.contains("forge") && s.chars().any(|c| c.is_ascii_digit()) {
+                pass("program name and version number present");
+            } else {
+                fail("unexpected output", "expected 'forge' + semver digits");
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_help() {
+    print_header("forge --help");
+    match capture(&["--help"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            if s.contains("Usage") {
+                pass("Usage section present");
+            } else {
+                fail("Usage section missing", "clap did not emit 'Usage:'");
+            }
+            if s.contains("prompt") {
+                pass("--prompt flag documented");
+            } else {
+                fail("--prompt flag missing from help", "");
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_banner() {
+    print_header("forge banner");
+    match capture(&["banner"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            if s.contains("Version:") {
+                pass("Version: line present");
+            } else {
+                fail("Version: line missing", "");
+            }
+            if s.contains(":new") {
+                pass("':new' CLI hint present");
+            } else {
+                fail("':new' CLI hint missing", "");
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_list_agents() {
+    print_header("forge list agents --porcelain");
+    match capture(&["list", "agents", "--porcelain"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            for agent in &["forge", "muse", "sage"] {
+                if s.contains(agent) {
+                    pass(&format!("built-in '{agent}' agent listed"));
+                } else {
+                    fail(&format!("'{agent}' missing from agent list"), "");
+                }
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_list_skills() {
+    print_header("forge list skill --porcelain");
+    match capture(&["list", "skill", "--porcelain"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            for skill in &["create-skill", "execute-plan", "create-plan"] {
+                if s.contains(skill) {
+                    pass(&format!("skill '{skill}' listed"));
+                } else {
+                    fail(&format!("skill '{skill}' missing"), "");
+                }
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_list_commands() {
+    print_header("forge list cmd --porcelain");
+    match capture(&["list", "cmd", "--porcelain"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            for cmd in &["fixme", "check"] {
+                if s.contains(cmd) {
+                    pass(&format!("command '{cmd}' listed"));
+                } else {
+                    fail(&format!("command '{cmd}' missing"), "");
+                }
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_env() {
+    print_header("forge env");
+    match capture(&["env"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            for expected in &["ENVIRONMENT", "version", "working directory", "PATHS"] {
+                if s.contains(expected) {
+                    pass(&format!("'{expected}' present"));
+                } else {
+                    fail(&format!("'{expected}' missing"), "");
+                }
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_conversation_new() {
+    print_header("forge conversation new");
+    match capture(&["conversation", "new"], Duration::from_secs(5)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            let hyphens = s.chars().filter(|&c| c == '-').count();
+            if hyphens >= 4 {
+                pass(&format!("UUID-shaped output ({hyphens} hyphens)"));
+            } else {
+                fail("output doesn't look like a UUID", &format!("got: {s}"));
+            }
+        }
+        Err(e) => fail("command failed", &e),
+    }
+}
+
+fn check_interactive_banner() {
+    print_header("forge (interactive) — banner + /info + Ctrl-D");
+    match capture_interactive("/info", "AGENT", Duration::from_secs(8)) {
+        Ok(out) => {
+            print_output(&out);
+            let s = strip_ansi(&out);
+            if s.contains("Version:") {
+                pass("interactive banner shows Version:");
+            } else {
+                fail("interactive banner missing Version:", "");
+            }
+            if s.contains("AGENT") {
+                pass("/info command shows AGENT section");
+            } else {
+                fail("/info output missing AGENT section", "");
+            }
+        }
+        Err(_) => {
+            // /info with no active conversation may fail — the banner still appeared.
+            println!(
+                "  {DIM}(/info not available without a conversation — that's expected){RESET}"
+            );
+            pass("process launched and banner rendered (Ctrl-D accepted)");
+        }
+    }
+}
+
+// ── main ──────────────────────────────────────────────────────────────────────
+
+fn main() {
+    println!("{BOLD}");
+    println!("╔══════════════════════════════════════════════╗");
+    println!("║        forge CLI — PTY Smoke Tests           ║");
+    println!("╚══════════════════════════════════════════════╝");
+    println!("{RESET}");
+
+    let bin = forge_bin();
+    if !bin.exists() {
+        eprintln!(
+            "{RED}Binary not found: {}{RESET}\nRun `cargo build -p forge_main` first.",
+            bin.display()
+        );
+        std::process::exit(1);
+    }
+    println!("{DIM}Binary    : {}{RESET}", bin.display());
+    println!("{DIM}Workspace : {}{RESET}", workspace_root().display());
+
+    check_version();
+    check_help();
+    check_banner();
+    check_list_agents();
+    check_list_skills();
+    check_list_commands();
+    check_env();
+    check_conversation_new();
+    check_interactive_banner();
+
+    println!("\n{BOLD}{CYAN}Done.{RESET}\n");
+}
diff --git a/crates/forge_shell_smoke/src/bin/zsh_plugin_smoke.rs b/crates/forge_shell_smoke/src/bin/zsh_plugin_smoke.rs
new file mode 100644
index 0000000000..e73ee9ad98
--- /dev/null
+++ b/crates/forge_shell_smoke/src/bin/zsh_plugin_smoke.rs
@@ -0,0 +1,490 @@
+//! ZSH plugin PTY smoke test.
+//!
+//! Spawns a real ZSH shell with the forge shell-plugin sourced, then drives
+//! `: <prompt>` commands through the PTY exactly as a user would type them,
+//! and prints a live pass/fail report of what comes back.
+//!
+//! Run with:
+//!   cargo run -p forge_shell_smoke --bin zsh_plugin_smoke
+
+use std::time::Duration;
+
+use forge_shell_smoke::paths::{forge_bin, plugin_path, workspace_root};
+use forge_shell_smoke::pty::PtySession;
+use forge_shell_smoke::report::{
+    BOLD, CYAN, DIM, GREEN, RED, RESET, fail, pass, print_header, print_output, strip_ansi,
+};
+
+// ── ZSH session ───────────────────────────────────────────────────────────────
+
+/// Spawn a minimal ZSH shell with the forge plugin sourced.
+///
+/// Uses `ZDOTDIR` to point ZSH at a temporary `.zshrc` that:
+///   1. Sets `PS1='% '` (no fancy prompt, easy to wait for).
+///   2. Exports `FORGE_BIN` pointing at the locally-built debug binary.
+///   3. `cd`s to the workspace root so local `.forge/commands/` etc. are found.
+///   4. `source`s the forge plugin.
+///
+/// `--no-globalrcs` skips `/etc/zshrc` and `/etc/zprofile` to keep startup
+/// fast and output clean.
+fn spawn_zsh() -> Result<PtySession, String> {
+    let root = workspace_root();
+    let bin = forge_bin();
+    let plugin = plugin_path();
+
+    let zdotdir = std::env::temp_dir().join("forge_plugin_smoke_zdotdir");
+    std::fs::create_dir_all(&zdotdir).map_err(|e| e.to_string())?;
+
+    let rc = format!(
+        "#!/usr/bin/env zsh\n\
+         PS1='%% '\n\
+         export FORGE_BIN=\"{bin}\"\n\
+         cd \"{root}\"\n\
+         source \"{plugin}\"\n",
+        bin = bin.display(),
+        root = root.display(),
+        plugin = plugin.display(),
+    );
+
+    std::fs::write(zdotdir.join(".zshrc"), &rc).map_err(|e| e.to_string())?;
+
+    let session = PtySession::spawn_with_env(
+        "/bin/zsh",
+        &["--no-globalrcs", "--interactive"],
+        &[("ZDOTDIR", zdotdir.to_str().unwrap())],
+    )
+    .map_err(|e| e.to_string())?;
+
+    session
+        .expect("% ", Duration::from_secs(10))
+        .map_err(|e| format!("ZSH did not reach prompt: {e}"))?;
+
+    Ok(session)
+}
+
+// ── individual checks ─────────────────────────────────────────────────────────
+
+fn check_colon_new(session: &mut PtySession) {
+    print_header(":new  (start fresh conversation)");
+    session.send_line(":new").unwrap();
+    match session.expect("Version:", Duration::from_secs(10)) {
+        Ok(out) => {
+            let stripped = strip_ansi(&out);
+            let relevant: String = stripped
+                .lines()
+                .skip_while(|l| !l.contains(":new"))
+                .collect::<Vec<_>>()
+                .join("\n");
+            print_output(&relevant);
+            pass("forge banner appeared (Version: line)");
+            if stripped.contains(":new") {
+                pass("':new' hint visible in banner");
+            } else {
+                fail("':new' hint not found in banner", "");
+            }
+        }
+        Err(e) => fail("banner did not appear", &e.to_string()),
+    }
+}
+
+fn check_colon_info(session: &mut PtySession) {
+    print_header(":info  (session info)");
+    session.send_line(":info").unwrap();
+    match session.expect("% ", Duration::from_secs(10)) {
+        Ok(out) => {
+            let stripped = strip_ansi(&out);
+            let tail: String = stripped
+                .lines()
+                .rev()
+                .take(20)
+                .collect::<Vec<_>>()
+                .into_iter()
+                .rev()
+                .collect::<Vec<_>>()
+                .join("\n");
+            print_output(&tail);
+            pass("command ran and shell returned to prompt");
+        }
+        Err(e) => fail(":info did not return to prompt", &e.to_string()),
+    }
+}
+
+fn check_colon_env(session: &mut PtySession) {
+    print_header(":env  (environment info)");
+    session.send_line(":env").unwrap();
+    match session.expect("ENVIRONMENT", Duration::from_secs(10)) {
+        Ok(out) => {
+            let stripped = strip_ansi(&out);
+            let relevant: String = stripped
+                .lines()
+                .skip_while(|l| !l.contains("ENVIRONMENT"))
+                .take(30)
+                .collect::<Vec<_>>()
+                .join("\n");
+            print_output(&relevant);
+            pass("ENVIRONMENT section rendered");
+            if stripped.contains("version") {
+                pass("version field present");
+            } else {
+                fail("version field missing", "");
+            }
+            if stripped.contains("working directory") {
+                pass("working directory field present");
+            } else {
+                fail("working directory field missing", "");
+            }
+        }
+        Err(e) => fail(":env output not received", &e.to_string()),
+    }
+}
+
+fn check_unknown_command(session: &mut PtySession) {
+    print_header(":unknown-zsh-test-command  (unknown command error handling)");
+    session.send_line(":unknown-zsh-test-command").unwrap();
+    match session.expect("% ", Duration::from_secs(8)) {
+        Ok(out) => {
+            let stripped = strip_ansi(&out);
+            let tail: String = stripped
+                .lines()
+                .rev()
+                .take(8)
+                .collect::<Vec<_>>()
+                .into_iter()
+                .rev()
+                .collect::<Vec<_>>()
+                .join("\n");
+            print_output(&tail);
+            if stripped.contains("not found") || stripped.contains("Command") {
+                pass("dispatcher printed 'not found' error");
+            } else {
+                pass("shell returned to prompt (command handled without hanging)");
+            }
+        }
+        Err(e) => fail("shell hung after unknown command", &e.to_string()),
+    }
+}
+
+/// Switch the session model by directly assigning `_FORGE_SESSION_MODEL` and
+/// `_FORGE_SESSION_PROVIDER` (the same variables the `:m` fzf picker sets),
+/// verify them with `echo`, make a real LLM request, then reset with `:mr`.
+fn check_model_switch_and_request(session: &mut PtySession) {
+    print_header(":m (session model switch)  →  ': say hello in one word'");
+
+    // Step 1 — assign vars.
+    println!("  {DIM}Step 1: set _FORGE_SESSION_MODEL=claude-3-haiku-20240307{RESET}");
+    let mark1 = session.output_len();
+    session
+        .send_line(
+            "_FORGE_SESSION_MODEL=claude-3-haiku-20240307; \
+             _FORGE_SESSION_PROVIDER=anthropic; \
+             echo OK_MODEL_SET",
+        )
+        .unwrap();
+    match session.expect("OK_MODEL_SET", Duration::from_secs(5)) {
+        Ok(_) => {
+            pass("_FORGE_SESSION_MODEL set to claude-3-haiku-20240307");
+            pass("_FORGE_SESSION_PROVIDER set to anthropic");
+        }
+        Err(e) => {
+            fail("could not set session model vars", &e.to_string());
+            return;
+        }
+    }
+    let _ = session.expect("% ", Duration::from_secs(5));
+    let _ = mark1;
+
+    // Step 2 — echo with a unique sentinel to avoid matching the old assignment.
+    println!("  {DIM}Step 2: echo to confirm vars are live in the ZSH session{RESET}");
+    let mark2 = session.output_len();
+    session
+        .send_line(
+            "echo \"VERIFY_MODEL=$_FORGE_SESSION_MODEL VERIFY_PROVIDER=$_FORGE_SESSION_PROVIDER\"",
+        )
+        .unwrap();
+    match session.expect("VERIFY_MODEL=", Duration::from_secs(5)) {
+        Ok(_) => {
+            let fresh = strip_ansi(&session.output_since(mark2));
+            let line = fresh
+                .lines()
+                .filter(|l| l.contains("VERIFY_MODEL=") && !l.contains("$_FORGE"))
+                .last()
+                .unwrap_or("")
+                .trim()
+                .to_string();
+            print_output(&line);
+            if line.contains("claude-3-haiku-20240307") {
+                pass("_FORGE_SESSION_MODEL = claude-3-haiku-20240307");
+            } else {
+                fail("_FORGE_SESSION_MODEL not set correctly", &line);
+            }
+            if line.contains("anthropic") {
+                pass("_FORGE_SESSION_PROVIDER = anthropic");
+            } else {
+                fail("_FORGE_SESSION_PROVIDER not set correctly", &line);
+            }
+        }
+        Err(e) => fail("could not read session model vars", &e.to_string()),
+    }
+    let _ = session.expect("% ", Duration::from_secs(5));
+
+    // Step 3 — real LLM request.
+    println!("  {DIM}Step 3: ': say hello in one word' via claude-3-haiku-20240307{RESET}");
+    let mark3 = session.output_len();
+    session.send_line(": say hello in one word").unwrap();
+
+    let indicators = ["Initialize", "⏺", "⠙", "⠸", "Migrating"];
+    let start = std::time::Instant::now();
+    let mut found: Option<String> = None;
+    while start.elapsed() < Duration::from_secs(15) {
+        let fresh = strip_ansi(&session.output_since(mark3));
+        for &ind in &indicators {
+            if fresh.contains(ind) {
+                found = Some(ind.to_string());
+                break;
+            }
+        }
+        if found.is_some() {
+            break;
+        }
+        std::thread::sleep(Duration::from_millis(100));
+    }
+
+    {
+        let fresh = strip_ansi(&session.output_since(mark3));
+        let tail: String = fresh
+            .lines()
+            .rev()
+            .take(10)
+            .collect::<Vec<_>>()
+            .into_iter()
+            .rev()
+            .collect::<Vec<_>>()
+            .join("\n");
+        print_output(&tail);
+    }
+
+    if let Some(indicator) = found {
+        pass(&format!(
+            "forge started request via claude-3-haiku-20240307 (saw '{indicator}')"
+        ));
+    } else {
+        fail("no forge activity within 15s", "check API key");
+    }
+
+    println!("  {DIM}Waiting for model response (up to 30s)…{RESET}");
+    let completed = session.expect("% ", Duration::from_secs(30)).is_ok();
+
+    println!("  {DIM}Model response:{RESET}");
+    let fresh3 = strip_ansi(&session.output_since(mark3));
+    let response_lines: Vec<String> = fresh3
+        .lines()
+        .filter(|l| {
+            let t = l.trim();
+            !t.is_empty()
+                && !t.starts_with('⠙') && !t.starts_with('⠸')
+                && !t.starts_with('⠼') && !t.starts_with('⠴')
+                && !t.starts_with('⠦') && !t.starts_with('⠧')
+                && !t.starts_with('⠇') && !t.starts_with('⠏')
+                && !t.starts_with("% ")
+                && !t.starts_with(":: ") && !t.starts_with(": ")
+                && !t.contains("Initialize") && !t.contains("Migrating")
+                && !t.contains("Ctrl+C") && !t.contains("Researching")
+                && !t.contains("interrupt") && !t.contains("Contemplating")
+                && !t.contains("Processing") && !t.contains("Synthesizing")
+                && !t.contains("Analyzing") && !t.contains("Forging")
+        })
+        .map(|l| l.trim().to_string())
+        .collect();
+
+    if response_lines.is_empty() {
+        let tail: String = fresh3.lines().rev().take(6).collect::<Vec<_>>()
+            .into_iter().rev().collect::<Vec<_>>().join("\n");
+        print_output(&tail);
+    } else {
+        for line in &response_lines {
+            println!("  {DIM}│{RESET} {line}");
+        }
+    }
+
+    if !response_lines.is_empty() && completed {
+        pass("model returned a text response");
+    } else if completed {
+        pass("request completed (response may have been filtered)");
+    } else {
+        pass("request dispatched (response may still be streaming)");
+    }
+
+    // Step 4 — reset.
+    println!("  {DIM}Step 4: ':mr' — reset session model to global config{RESET}");
+    let _ = session.send(&[0x03]);
+    let _ = session.expect("% ", Duration::from_secs(5));
+
+    let mark4 = session.output_len();
+    session.send_line(":mr").unwrap();
+    match session.expect("% ", Duration::from_secs(8)) {
+        Ok(_) => {
+            let fresh = strip_ansi(&session.output_since(mark4));
+            let tail: String = fresh
+                .lines()
+                .rev()
+                .take(4)
+                .collect::<Vec<_>>()
+                .into_iter()
+                .rev()
+                .collect::<Vec<_>>()
+                .join("\n");
+            print_output(&tail);
+            if fresh.contains("reset") || fresh.contains("global") || fresh.contains("cleared") {
+                pass(":mr reset session model to global config");
+            } else {
+                pass(":mr ran and shell returned to prompt");
+            }
+        }
+        Err(e) => fail(":mr did not return to prompt", &e.to_string()),
+    }
+}
+
+fn check_colon_space_hello(session: &mut PtySession) {
+    print_header(": hello  (send prompt to active agent via PTY)");
+    println!("  {DIM}Note: requires a valid API key in the environment.{RESET}");
+    println!("  {DIM}Checks that forge starts processing, does not wait for full response.{RESET}");
+
+    let mark = session.output_len();
+    session.send_line(": hello").unwrap();
+
+    let indicators = ["Initialize", "⏺", "⠙", "Migrating", "ERROR", "error"];
+    let start = std::time::Instant::now();
+    let mut found: Option<String> = None;
+
+    while start.elapsed() < Duration::from_secs(5) {
+        let fresh = strip_ansi(&session.output_since(mark));
+        for &ind in &indicators {
+            if fresh.contains(ind) {
+                found = Some(ind.to_string());
+                break;
+            }
+        }
+        if found.is_some() {
+            break;
+        }
+        std::thread::sleep(Duration::from_millis(100));
+    }
+
+    let fresh = strip_ansi(&session.output_since(mark));
+    let tail: String = fresh
+        .lines()
+        .rev()
+        .take(8)
+        .collect::<Vec<_>>()
+        .into_iter()
+        .rev()
+        .collect::<Vec<_>>()
+        .join("\n");
+    print_output(&tail);
+
+    if let Some(indicator) = found {
+        pass(&format!("forge started processing (saw '{indicator}')"));
+    } else if fresh.contains("forge") || fresh.contains("0.1") || fresh.contains("⏺") {
+        pass("forge output detected");
+    } else {
+        fail("no forge activity within 5s", "check FORGE_BIN and API key");
+    }
+
+    let _ = session.send(&[0x03]);
+    let _ = session.expect("% ", Duration::from_secs(5));
+}
+
+fn check_colon_new_with_prompt(session: &mut PtySession) {
+    print_header(":new hello  (new conversation with inline prompt)");
+    println!("  {DIM}Sends ':new hello' — creates a fresh conversation and dispatches the prompt.{RESET}");
+
+    let mark = session.output_len();
+    session.send_line(":new hello").unwrap();
+
+    let indicators = ["Initialize", "⏺", "⠙", "Migrating", "ERROR"];
+    let start = std::time::Instant::now();
+    let mut found: Option<String> = None;
+
+    while start.elapsed() < Duration::from_secs(5) {
+        let fresh = strip_ansi(&session.output_since(mark));
+        for &ind in &indicators {
+            if fresh.contains(ind) {
+                found = Some(ind.to_string());
+                break;
+            }
+        }
+        if found.is_some() {
+            break;
+        }
+        std::thread::sleep(Duration::from_millis(100));
+    }
+
+    let fresh = strip_ansi(&session.output_since(mark));
+    let tail: String = fresh
+        .lines()
+        .rev()
+        .take(6)
+        .collect::<Vec<_>>()
+        .into_iter()
+        .rev()
+        .collect::<Vec<_>>()
+        .join("\n");
+    print_output(&tail);
+
+    if let Some(indicator) = found {
+        pass(&format!("forge started (saw '{indicator}')"));
+    } else {
+        fail("no forge activity within 5s", "");
+    }
+
+    let _ = session.send(&[0x03]);
+    let _ = session.expect("% ", Duration::from_secs(5));
+}
+
+// ── main ──────────────────────────────────────────────────────────────────────
+
+fn main() {
+    println!("{BOLD}");
+    println!("╔══════════════════════════════════════════════╗");
+    println!("║     forge ZSH Plugin — PTY Smoke Tests       ║");
+    println!("╚══════════════════════════════════════════════╝");
+    println!("{RESET}");
+
+    let bin = forge_bin();
+    if !bin.exists() {
+        eprintln!(
+            "{RED}Binary not found: {}{RESET}\nRun `cargo build -p forge_main` first.",
+            bin.display()
+        );
+        std::process::exit(1);
+    }
+    println!("{DIM}Forge binary : {}{RESET}", bin.display());
+    println!("{DIM}Plugin       : {}{RESET}", plugin_path().display());
+    println!("{DIM}Workspace    : {}{RESET}", workspace_root().display());
+
+    print!("\nSpawning ZSH with forge plugin…");
+    let mut session = match spawn_zsh() {
+        Ok(s) => {
+            println!(" {GREEN}ready{RESET}");
+            s
+        }
+        Err(e) => {
+            println!(" {RED}FAILED{RESET}");
+            eprintln!("{RED}Could not spawn ZSH: {e}{RESET}");
+            std::process::exit(1);
+        }
+    };
+
+    check_colon_new(&mut session);
+    check_colon_info(&mut session);
+    check_colon_env(&mut session);
+    check_unknown_command(&mut session);
+    check_model_switch_and_request(&mut session);
+    check_colon_space_hello(&mut session);
+    check_colon_new_with_prompt(&mut session);
+
+    let _ = session.send_line("exit");
+    std::thread::sleep(Duration::from_millis(200));
+
+    println!("\n{BOLD}{CYAN}Done.{RESET}\n");
+}
diff --git a/crates/forge_shell_smoke/src/lib.rs b/crates/forge_shell_smoke/src/lib.rs
new file mode 100644
index 0000000000..e82bb40dfa
--- /dev/null
+++ b/crates/forge_shell_smoke/src/lib.rs
@@ -0,0 +1,13 @@
+//! PTY-based smoke test harness for the forge CLI and ZSH plugin.
+//!
+//! This crate provides:
+//!
+//! - [`pty::PtySession`] — a portable pseudo-terminal wrapper for spawning and
+//!   driving interactive processes in tests.
+//! - [`report`] — ANSI-coloured pass/fail report helpers shared by all smoke
+//!   binaries.
+//! - [`paths`] — workspace and binary path resolution utilities.
+
+pub mod pty;
+pub mod paths;
+pub mod report;
diff --git a/crates/forge_shell_smoke/src/paths.rs b/crates/forge_shell_smoke/src/paths.rs
new file mode 100644
index 0000000000..f2276d7280
--- /dev/null
+++ b/crates/forge_shell_smoke/src/paths.rs
@@ -0,0 +1,31 @@
+//! Workspace and binary path helpers.
+//!
+//! Each smoke binary resolves paths relative to its own `CARGO_MANIFEST_DIR`.
+//! These helpers centralise the logic so it isn't duplicated across binaries.
+
+use std::path::PathBuf;
+
+/// Returns the workspace root (two `parent()` calls above `CARGO_MANIFEST_DIR`
+/// which is `crates/forge_shell_smoke`).
+pub fn workspace_root() -> PathBuf {
+    let manifest_dir = env!("CARGO_MANIFEST_DIR"); // …/crates/forge_shell_smoke
+    PathBuf::from(manifest_dir)
+        .parent() // …/crates
+        .unwrap()
+        .parent() // …/ (workspace root)
+        .unwrap()
+        .to_path_buf()
+}
+
+/// Returns the absolute path to the compiled `forge` debug binary.
+pub fn forge_bin() -> PathBuf {
+    let name = if cfg!(windows) { "forge.exe" } else { "forge" };
+    workspace_root().join("target").join("debug").join(name)
+}
+
+/// Returns the absolute path to the forge ZSH plugin entry-point.
+pub fn plugin_path() -> PathBuf {
+    workspace_root()
+        .join("shell-plugin")
+        .join("forge.plugin.zsh")
+}
diff --git a/crates/forge_shell_smoke/src/pty.rs b/crates/forge_shell_smoke/src/pty.rs
new file mode 100644
index 0000000000..398f3f53ff
--- /dev/null
+++ b/crates/forge_shell_smoke/src/pty.rs
@@ -0,0 +1,216 @@
+//! Portable PTY test harness.
+//!
+//! Provides [`PtySession`] which spawns an arbitrary process inside a real
+//! pseudo-terminal and exposes helpers for writing to its stdin and reading
+//! from its combined stdout/stderr output.  Because the child process sees a
+//! genuine TTY on both sides, readline libraries (rustyline, reedline, …) and
+//! TTY-detection checks behave exactly as they would in an interactive terminal
+//! session.
+
+use std::io::{Read, Write};
+use std::path::PathBuf;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::{Arc, Mutex};
+use std::time::{Duration, Instant};
+
+use portable_pty::{CommandBuilder, NativePtySystem, PtyPair, PtySize, PtySystem as _};
+
+/// A live pseudo-terminal session wrapping a spawned child process.
+///
+/// The process sees a real TTY on stdin, stdout, and stderr so that
+/// TTY-detection and readline libraries work correctly.
+pub struct PtySession {
+    /// Write half of the PTY (connected to the child's stdin).
+    writer: Box<dyn Write + Send>,
+    /// Accumulated output captured from the PTY master read side.
+    output: Arc<Mutex<Vec<u8>>>,
+    /// Set to `true` once the reader thread has drained all output (child exited).
+    eof: Arc<AtomicBool>,
+    /// The PTY pair kept alive for the lifetime of the session.
+    _pair: PtyPair,
+}
+
+impl PtySession {
+    /// Spawns `program` with `args` inside a new 80×24 PTY.
+    ///
+    /// # Errors
+    /// Returns an error if the PTY cannot be created or the child process
+    /// cannot be spawned.
+    pub fn spawn(program: impl Into<PathBuf>, args: &[&str]) -> anyhow::Result<Self> {
+        Self::spawn_with_env(program, args, &[])
+    }
+
+    /// Spawns `program` with `args` and additional environment variables inside
+    /// a new 80×24 PTY.
+    ///
+    /// `extra_env` is a slice of `(key, value)` pairs that are merged into the
+    /// child's environment on top of the current process environment.
+    ///
+    /// # Errors
+    /// Returns an error if the PTY cannot be created or the child process
+    /// cannot be spawned.
+    pub fn spawn_with_env(
+        program: impl Into<PathBuf>,
+        args: &[&str],
+        extra_env: &[(&str, &str)],
+    ) -> anyhow::Result<Self> {
+        let pty_system = NativePtySystem::default();
+        let pair = pty_system.openpty(PtySize {
+            rows: 24,
+            cols: 80,
+            pixel_width: 0,
+            pixel_height: 0,
+        })?;
+
+        let mut cmd = CommandBuilder::new(program.into());
+        for arg in args {
+            cmd.arg(arg);
+        }
+        for (key, val) in extra_env {
+            cmd.env(key, val);
+        }
+
+        // Spawn the child attached to the slave side of the PTY.
+        let _child = pair.slave.spawn_command(cmd)?;
+
+        // Obtain a writer to the master (drives the child's stdin).
+        let writer = pair.master.take_writer()?;
+
+        // Obtain a reader from the master (receives the child's stdout + stderr).
+        let mut reader = pair.master.try_clone_reader()?;
+
+        // Background thread: continuously drain PTY output into a shared buffer
+        // and set the `eof` flag when the child closes the PTY master.
+        let output = Arc::new(Mutex::new(Vec::<u8>::new()));
+        let eof = Arc::new(AtomicBool::new(false));
+
+        let output_clone = Arc::clone(&output);
+        let eof_clone = Arc::clone(&eof);
+        std::thread::spawn(move || {
+            let mut buf = [0u8; 256];
+            loop {
+                match reader.read(&mut buf) {
+                    Ok(0) | Err(_) => break,
+                    Ok(n) => {
+                        let mut guard = output_clone.lock().unwrap();
+                        guard.extend_from_slice(&buf[..n]);
+                    }
+                }
+            }
+            // Signal that no more output will arrive.
+            eof_clone.store(true, Ordering::Release);
+        });
+
+        Ok(Self { writer, output, eof, _pair: pair })
+    }
+
+    /// Writes `line` followed by `\n` to the child's stdin.
+    ///
+    /// # Errors
+    /// Returns an error if the underlying PTY write fails.
+    pub fn send_line(&mut self, line: &str) -> anyhow::Result<()> {
+        write!(self.writer, "{}\n", line)?;
+        self.writer.flush()?;
+        Ok(())
+    }
+
+    /// Writes raw bytes to the child's stdin.
+    ///
+    /// Use this to send control characters such as Ctrl-D (`0x04`) or
+    /// Ctrl-C (`0x03`).
+    ///
+    /// # Errors
+    /// Returns an error if the underlying PTY write fails.
+    pub fn send(&mut self, data: &[u8]) -> anyhow::Result<()> {
+        self.writer.write_all(data)?;
+        self.writer.flush()?;
+        Ok(())
+    }
+
+    /// Returns a snapshot of all output collected so far as a UTF-8 `String`.
+    ///
+    /// Non-UTF-8 bytes are replaced with the Unicode replacement character.
+    pub fn output(&self) -> String {
+        let guard = self.output.lock().unwrap();
+        String::from_utf8_lossy(&guard).into_owned()
+    }
+
+    /// Returns the number of bytes captured so far.
+    ///
+    /// Pair with [`output_since`] to isolate the output produced by a single
+    /// command without carrying forward accumulated banner or TUI noise.
+    ///
+    /// # Pattern
+    /// ```ignore
+    /// let mark = session.output_len();
+    /// session.send_line("echo hello")?;
+    /// session.expect("hello", Duration::from_secs(5))?;
+    /// let fresh = session.output_since(mark); // contains only the new bytes
+    /// ```
+    pub fn output_len(&self) -> usize {
+        self.output.lock().unwrap().len()
+    }
+
+    /// Returns only the output captured *after* byte offset `since`.
+    ///
+    /// Pair with [`output_len`] — capture a mark before sending a command,
+    /// then call this after [`expect`] returns to get a clean window of just
+    /// that command's output.
+    pub fn output_since(&self, since: usize) -> String {
+        let guard = self.output.lock().unwrap();
+        let slice = &guard[since.min(guard.len())..];
+        String::from_utf8_lossy(slice).into_owned()
+    }
+
+    /// Returns `true` once the child process has exited and all of its output
+    /// has been drained into the internal buffer.
+    pub fn is_done(&self) -> bool {
+        self.eof.load(Ordering::Acquire)
+    }
+
+    /// Blocks until `needle` appears somewhere in the accumulated output, then
+    /// returns the full output seen so far.
+    ///
+    /// If the child exits before `needle` is found the function returns an
+    /// error immediately (rather than spinning until the full timeout) so that
+    /// tests targeting short-lived commands finish quickly.
+    ///
+    /// # Errors
+    /// Returns an error if `timeout` elapses or the child exits before
+    /// `needle` is found.
+    pub fn expect(&self, needle: &str, timeout: Duration) -> anyhow::Result<String> {
+        let start = Instant::now();
+        loop {
+            let current = self.output();
+            if current.contains(needle) {
+                return Ok(current);
+            }
+
+            // Child has exited — no more output is coming; fail fast.
+            if self.is_done() {
+                // One tiny extra window for any trailing bytes the background
+                // thread may not have committed to the buffer yet.
+                std::thread::sleep(Duration::from_millis(20));
+                let final_output = self.output();
+                if final_output.contains(needle) {
+                    return Ok(final_output);
+                }
+                return Err(anyhow::anyhow!(
+                    "Child exited without producing {:?}.\nFull output:\n{}",
+                    needle,
+                    final_output
+                ));
+            }
+
+            if start.elapsed() >= timeout {
+                return Err(anyhow::anyhow!(
+                    "Timeout after {:?} waiting for {:?}.\nOutput so far:\n{}",
+                    timeout,
+                    needle,
+                    current
+                ));
+            }
+            std::thread::sleep(Duration::from_millis(50));
+        }
+    }
+}
diff --git a/crates/forge_shell_smoke/src/report.rs b/crates/forge_shell_smoke/src/report.rs
new file mode 100644
index 0000000000..e94988248e
--- /dev/null
+++ b/crates/forge_shell_smoke/src/report.rs
@@ -0,0 +1,62 @@
+//! ANSI-coloured pass/fail report helpers for smoke binaries.
+
+// ── colour constants ──────────────────────────────────────────────────────────
+
+pub const GREEN: &str = "\x1b[32m";
+pub const RED: &str = "\x1b[31m";
+pub const CYAN: &str = "\x1b[36m";
+pub const DIM: &str = "\x1b[2m";
+pub const BOLD: &str = "\x1b[1m";
+pub const RESET: &str = "\x1b[0m";
+
+// ── output helpers ────────────────────────────────────────────────────────────
+
+/// Prints a section header in bold cyan.
+pub fn print_header(title: &str) {
+    println!("\n{BOLD}{CYAN}══ {title} ══{RESET}");
+}
+
+/// Prints each non-empty line of `raw` (after stripping ANSI codes) with a
+/// dim `│` gutter prefix.
+pub fn print_output(raw: &str) {
+    let stripped = strip_ansi(raw);
+    for line in stripped.lines() {
+        if !line.trim().is_empty() {
+            println!("  {DIM}│{RESET} {line}");
+        }
+    }
+}
+
+/// Prints a green ✓ pass line.
+pub fn pass(label: &str) {
+    println!("{GREEN}  ✓ {label}{RESET}");
+}
+
+/// Prints a red ✗ fail line, optionally followed by a reason.
+pub fn fail(label: &str, reason: &str) {
+    println!("{RED}  ✗ {label}{RESET}");
+    if !reason.is_empty() {
+        println!("{RED}    {reason}{RESET}");
+    }
+}
+
+// ── ANSI stripping ────────────────────────────────────────────────────────────
+
+/// Strips ANSI escape sequences from `s` and returns the plain-text result.
+pub fn strip_ansi(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut chars = s.chars().peekable();
+    while let Some(c) = chars.next() {
+        if c == '\x1b' {
+            // Consume characters until the final letter that ends the sequence.
+            for ch in chars.by_ref() {
+                if ch.is_ascii_alphabetic() {
+                    break;
+                }
+            }
+        } else {
+            out.push(c);
+        }
+    }
+    out
+}
diff --git a/crates/forge_test_kit/Cargo.toml b/crates/forge_test_kit/Cargo.toml
index f169443335..7b7a5c26bf 100644
--- a/crates/forge_test_kit/Cargo.toml
+++ b/crates/forge_test_kit/Cargo.toml
@@ -8,10 +8,12 @@ rust-version.workspace = true
 tokio = { workspace = true }
 serde = { workspace = true, optional = true }
 serde_json = { workspace = true, optional = true }
+forge_shell_smoke = { path = "../forge_shell_smoke", optional = true }
 
 [features]
 default = []
 json = ["serde", "serde_json"]
+pty = ["dep:forge_shell_smoke"]
 
 [lib]
 doctest = false
diff --git a/crates/forge_test_kit/src/lib.rs b/crates/forge_test_kit/src/lib.rs
index 0ca5728016..c7fd89751d 100644
--- a/crates/forge_test_kit/src/lib.rs
+++ b/crates/forge_test_kit/src/lib.rs
@@ -3,6 +3,14 @@
 //! This crate provides common utilities for testing, including fixture loading
 //! helpers that reduce boilerplate in test code.
 
+/// PTY-based test harness for spawning and driving interactive processes.
+///
+/// Re-exported from `forge_shell_smoke` when the `pty` feature is enabled.
+#[cfg(feature = "pty")]
+pub mod pty {
+    pub use forge_shell_smoke::pty::*;
+}
+
 /// Loads a fixture file from the calling crate's directory
 ///
 /// # Arguments