From 6bcb8744539c0fd2a968283afd15189778f465f9 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 22:51:52 -0800
Subject: [PATCH 01/14] chore: simplify contributing workflow and mise task
 visibility

Restructure mise tasks so only primary contributor commands are public.
Rewrite CONTRIBUTING.md to focus on prerequisites, quick start, and main
tasks. Move code/CLI style guidance into STYLE_GUIDE.md. Add fmt, lint,
ci, check, version, and test:e2e as top-level public tasks. Hide
internal build/publish/docker tasks behind hide=true. Extract sandbox
runner script to build/scripts/run-sandbox.sh and unify cluster:sandbox
as the public entrypoint.

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 .gitlab-ci.yml                          |   4 +-
 CONTRIBUTING.md                         | 474 +++---------------------
 STYLE_GUIDE.md                          |  54 +++
 architecture/build-containers.md        |  22 +-
 architecture/cluster-single-node.md     |   6 +-
 architecture/inference-routing.md       |   6 +-
 build/ci.toml                           |  60 +--
 build/cluster.toml                      |  22 +-
 build/docker.toml                       |   8 +
 build/gator.toml                        |   2 +
 build/helm.toml                         |   1 +
 build/license.toml                      |   2 +
 build/publish.toml                      |   4 +
 build/python.toml                       |  15 +
 build/rust.toml                         |  10 +
 build/scripts/cluster-push-component.sh |   2 +-
 build/scripts/run-sandbox.sh            |  38 ++
 build/test.toml                         |  18 +-
 build/version.toml                      |   5 +
 19 files changed, 271 insertions(+), 482 deletions(-)
 create mode 100644 STYLE_GUIDE.md
 create mode 100644 build/scripts/run-sandbox.sh

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d9477597..080f3df3 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -247,8 +247,8 @@ python_e2e_sandbox_test:
     - socat UNIX-LISTEN:/var/run/docker.sock,fork,reuseaddr TCP:docker:2375 &
     - sleep 1
     - mise run --no-prepare docker:build:cluster
-    - mise run --no-prepare cluster:build
-    - mise run --no-prepare test:e2e:sandbox
+    - mise run --no-prepare cluster:build:full
+    - mise run --no-prepare test:e2e
 
 # =============================================================================
 # Publish Jobs
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9e459977..a7edc539 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,459 +2,93 @@
 
 ## Prerequisites
 
-Install [mise](https://mise.jdx.dev/). This is used to setup the development environment.
+Install [mise](https://mise.jdx.dev/). This is used to set up the development environment.
 
 ```bash
 # Install mise (macOS/Linux)
 curl https://mise.run | sh
 ```
 
+After installing `mise`, activate it with `mise activate` or [add it to your shell](https://mise.jdx.dev/getting-started.html).
 
-After installing `mise` be sure to activate the environment by running `mise activate` or [add it to your shell](https://mise.jdx.dev/getting-started.html).
-
-Shell installation examples:
-
-Fish:
+Shell setup examples:
 
 ```bash
+# Fish
 echo '~/.local/bin/mise activate fish | source' >> ~/.config/fish/config.fish
-```
 
-Zsh (Mac OS Default):
-
-```bash
+# Zsh
 echo 'eval "$(~/.local/bin/mise activate zsh)"' >> ~/.zshrc
 ```
 
-Project uses Rust 1.88+ and Python 3.12+. Docker must be running for cluster and sandbox workflows.
-
-## Developer Certificate of Origin (DCO)
+Project requirements:
+- Rust 1.88+
+- Python 3.12+
+- Docker (running)
 
-All contributions to this project must include a `Signed-off-by` line in the commit message, certifying that you wrote or have the right to submit the code under the project's open-source license. This is the [Developer Certificate of Origin (DCO)](https://developercertificate.org/).
-
-Add the sign-off automatically with `git commit -s`:
+## Getting Started
 
 ```bash
-git commit -s -m "feat(sandbox): add new capability"
-```
-
-This appends a line like:
-
-```
-Signed-off-by: Your Name <your.email@example.com>
-```
-
-A DCO check runs on every pull request and will fail if any commit is missing the sign-off.
-
-## License Headers
-
-All source files must include an SPDX copyright header. Use the license header script to add or check headers:
-
-```bash
-# Add/update headers on all source files
-mise run license:update
-
-# Check that all files have headers (runs in CI and pre-commit)
-mise run license:check
-```
-
-## Getting started
-
-```bash
-# Trust the project config (one-time)
+# One-time trust
 mise trust
 
-# Fast local cluster recreate (reuses prebuilt images)
+# Start/recreate local cluster
 mise run cluster
 
-# Build images and deploy (recommended for CI/first setup)
+# Iterative deploy after code changes
 mise run cluster:build
 
-# Create a sandbox with Claude (or opencode / codex)
+# Launch a sandbox
 ncl sandbox create -- claude
 ```
 
-Note: `ncl` builds the CLI from source on first run, which takes several minutes while Rust compiles. Subsequent runs are fast.
-
-### Other useful commands
-
-```bash
-ncl --help                        # CLI help
-mise build                        # Debug build (without running)
-mise test                         # Run all project tests
-mise run sandbox                  # Run sandbox container interactively
-```
-
-## Shell Completions
-
-The CLI supports dynamic shell completions. Run `nemoclaw completions --help` for full per-shell setup instructions.
-
-For the `ncl` wrapper, generate completions from the real binary and rewrite the registration to target `ncl`:
-
-**Fish:**
-
-```bash
-nemoclaw completions fish | sed 's/--command nemoclaw/--command ncl/' > ~/.config/fish/completions/ncl.fish
-```
-
-**Bash:**
-
-```bash
-nemoclaw completions bash | sed 's/_clap_complete_nemoclaw/_clap_complete_ncl/g; s/ nemoclaw$/ ncl/' > ~/.local/share/bash-completion/completions/ncl
-```
-
-**Zsh:**
-
-```bash
-nemoclaw completions zsh | sed 's/_clap_dynamic_completer_nemoclaw/_clap_dynamic_completer_ncl/g; s/ nemoclaw$/ ncl/' > ~/.zfunc/_ncl
-```
-
-## Sandbox SSH access
+## `ncl` Shortcut
 
-To connect to a running sandbox with SSH, use:
+Inside this repository, `ncl` is a local shortcut script at `scripts/bin/ncl`.
 
-```bash
-nemoclaw sandbox connect <sandbox-id>
-```
+It:
+1. Builds `navigator-cli` if needed.
+2. Runs the local debug CLI binary (`target/debug/nemoclaw`).
 
-To forward a local port into a sandbox (e.g., port 18789):
+Because `mise` adds `scripts/bin` to `PATH` for this project, you can run `ncl` directly from the repo.
 
 ```bash
-nemoclaw sandbox forward start 18789 <sandbox-name>
+ncl --help
+ncl sandbox create -- codex
 ```
 
-This opens a local SSH tunnel so connections to `127.0.0.1:18789` on the host
-are forwarded to `127.0.0.1:18789` inside the sandbox. The command stays
-attached until interrupted (Ctrl+C). Add `-d` to run in the background.
+## Main Tasks
 
-Relevant environment variables:
+These are the primary `mise` tasks for day-to-day development:
 
-- `NEMOCLAW_SSH_GATEWAY_HOST`, `NEMOCLAW_SSH_GATEWAY_PORT`, `NEMOCLAW_SSH_CONNECT_PATH`
-- `NEMOCLAW_SANDBOX_SSH_PORT`, `NEMOCLAW_SSH_HANDSHAKE_SECRET`, `NEMOCLAW_SSH_HANDSHAKE_SKEW_SECS`
-- `NEMOCLAW_SSH_LISTEN_ADDR` (set inside sandbox pods)
+| Task | Purpose |
+|---|---|
+| `mise run cluster` | Fast local cluster recreate |
+| `mise run cluster:build` | Incremental deploy of changed components |
+| `mise run cluster:sandbox` | Run sandbox container interactively |
+| `mise run fmt` | Format Rust and Python |
+| `mise run lint` | Repository lint checks |
+| `mise run check` | Fast compile and type checks |
+| `mise run test` | Default test suite |
+| `mise run test:e2e` | Default end-to-end test lane |
+| `mise run ci` | Full local CI checks |
+| `mise run clean` | Clean build artifacts |
+| `mise run version` | Print git-derived version |
 
 ## Project Structure
 
-```
-crates/
-├── navigator-core/      # Core library
-├── navigator-server/    # Main gateway server, ingress for all operations
-├── navigator-sandbox/   # Sandbox execution environment
-├── navigator-bootstrap/ # Local cluster bootstrap (Docker)
-└── navigator-cli/       # Command-line interface
-python/                  # Python bindings
-proto/                   # Protocol buffer definitions
-architecture/            # Architecture documentation and design plans
-build/                   # mise task definitions and build scripts
-├── *.toml               # Task includes (loaded by mise.toml task_config)
-└── scripts/             # Shared build scripts used by tasks
-deploy/
-├── docker/              # Dockerfiles and build artifacts
-├── helm/navigator/      # NemoClaw Helm chart
-└── kube/manifests/      # Kubernetes manifests for k3s auto-deploy
-```
-
-## Development Workflow
-
-### Building
-
-```bash
-mise run build           # Debug build
-mise run build:release   # Release build
-mise run check           # Quick compile check
-```
-
-### Testing
-
-```bash
-mise run test            # All tests (Rust + Python)
-mise run test:rust       # Rust tests only
-mise run test:python     # Python tests only
-mise run test:e2e:sandbox # Sandbox Python e2e tests
-```
-
-### Python E2E Test Patterns
-
-- Put sandbox SDK e2e tests in `e2e/python/`.
-- Prefer `Sandbox.exec_python(...)` with Python callables over inline `python -c` strings.
-- Define callable helpers inside the test function when possible so they serialize cleanly in sandbox.
-- Keep scenarios focused: one test for happy path and separate tests for negative/policy enforcement behavior.
-- Use `mise run test:e2e:sandbox` to run this suite locally.
-
-### Linting & Formatting
-
-```bash
-# Rust
-mise run rust:format         # Format code
-mise run rust:format:check   # Check formatting
-mise run rust:lint           # Lint with Clippy
-
-# Python
-mise run python:format   # Format with ruff
-mise run python:lint     # Lint with ruff
-mise run python:typecheck # Type check with ty
-
-# Helm
-mise run helm:lint       # Lint the nemoclaw helm chart
-```
-
-### Running Components
-
-```bash
-mise run sandbox         # Run sandbox container with interactive shell
-```
-
-### Custom Container Images
-
-Use `--image` to run a sandbox with any Linux container image:
-
-```bash
-# Run an interactive shell in an Ubuntu sandbox
-ncl sandbox create --image ubuntu:24.04
-
-# Run a command in a custom image
-ncl sandbox create --image python:3.12-slim -- python3 -c "print('hello')"
-
-# Sync local files and run in a custom image
-ncl sandbox create --image node:22 --sync -- npm test
-```
-
-The supervisor binary is side-loaded from the standard sandbox image via a Kubernetes init
-container. The default `run_as_user`/`run_as_group` policy is cleared for custom images to
-avoid failures on images that lack the `sandbox` user. See `architecture/sandbox.md` for
-details on the bootstrap flow and constraints.
-
-#### Building and Pushing Custom Images
-
-Use `ncl sandbox image push` to build a Dockerfile and push the resulting image into the
-cluster's containerd runtime so it can be used with `--image`:
-
-```bash
-# Build and push from a Dockerfile
-ncl sandbox image push --dockerfile ./Dockerfile
-
-# Specify a custom tag
-ncl sandbox image push --dockerfile ./Dockerfile --tag my-sandbox:latest
-
-# Specify a build context directory
-ncl sandbox image push --dockerfile ./build/Dockerfile --context ./build
-
-# Pass build arguments
-ncl sandbox image push --dockerfile ./Dockerfile --build-arg PYTHON_VERSION=3.12
-
-# Use the pushed image
-ncl sandbox create --image my-sandbox:latest
-```
-
-The command builds the image using the local Docker daemon and pushes it into the cluster
-via the same `docker save` / `ctr images import` pipeline used for component images. A
-`.dockerignore` file in the build context directory is respected.
-
-### Git Hooks (Pre-commit)
-
-We use `mise generate git-pre-commit` for local pre-commit checks.
-
-Generate a Git pre-commit hook that runs the `pre-commit` task:
-
-```bash
-mise generate git-pre-commit --write --task=pre-commit
-```
-
-### Kubernetes Development
-
-The project uses the NemoClaw CLI to provision a local k3s-in-container cluster. Docker is the only external dependency for cluster bootstrap.
-
-```bash
-mise run cluster          # Recreate local cluster quickly using prebuilt images
-mise run cluster:build    # Build component images, then deploy cluster (CI-friendly)
-mise run cluster:deploy   # Fast deploy: rebuild changed components and skip unnecessary helm work
-mise run cluster:deploy:sandbox # Fast deploy sandbox-only changes
-mise run cluster:push:server    # Push local server image to configured pull registry
-mise run cluster:push:sandbox   # Push local sandbox image to configured pull registry
-mise run cluster:deploy:pull    # Force full pull-mode deploy flow
-mise run cluster:push           # Legacy image-import fallback workflow
-```
-
-`mise run cluster` uses local `.env` values when present and appends missing keys:
-`CLUSTER_NAME`, `GATEWAY_PORT`, and `NEMOCLAW_CLUSTER`.
-If `GATEWAY_PORT` is missing, it picks a free local port and persists it to `.env`.
-Existing `.env` values are not overwritten.
-Fast `mise run cluster` flow:
-1. Recreate cluster.
-2. Ensure local registry (`127.0.0.1:5000`) is running in pull-through-cache mode.
-3. Deploy with local image refs (`127.0.0.1:5000/navigator/*`, tag `latest` unless `IMAGE_TAG` is set) while k3s pulls through `host.docker.internal:5000`.
-4. Use `mise run cluster:deploy` (or `cluster:deploy:sandbox`) to push local changes to that registry and redeploy only relevant components.
-
-This keeps iterative local push workflows working while still caching remote pulls.
-`mise run cluster:build` keeps the local build-and-push flow for development/CI.
-Cluster bootstrap pulls the cluster image from the published remote registry by default.
-Set `NEMOCLAW_CLUSTER_IMAGE` to override the image reference explicitly.
-
-Default local cluster workflow uses pull mode with a local Docker registry at `127.0.0.1:5000`.
-Local clusters also bind host port `6443` for the Kubernetes API, so only one
-local NemoClaw cluster can run at a time on a given Docker host.
-You can override repository settings with:
-
-- `IMAGE_REPO_BASE` (for example `127.0.0.1:5000/navigator`)
-- `NEMOCLAW_REGISTRY_HOST`, `NEMOCLAW_REGISTRY_NAMESPACE`
-- `NEMOCLAW_REGISTRY_ENDPOINT` (optional mirror endpoint override, e.g. `host.docker.internal:5000`)
-- `NEMOCLAW_REGISTRY_USERNAME`, `NEMOCLAW_REGISTRY_PASSWORD`
-- `NEMOCLAW_REGISTRY_INSECURE=true|false`
-
-Useful env flags for fast deploy:
-
-- `FORCE_HELM_UPGRADE=1` - run Helm upgrade even when chart files are unchanged
-- `DEPLOY_FAST_HELM_WAIT=1` - wait for Helm upgrade completion (`helm --wait`)
-- `DEPLOY_FAST_MODE=full` - force full component rebuild behavior through fast deploy
-- `DOCKER_BUILD_CACHE_DIR=.cache/buildkit` - local BuildKit cache directory used by component image builds
-
-GitHub Container Registry mapping (CI or shared dev):
-
-```bash
-export NEMOCLAW_REGISTRY_HOST=ghcr.io
-export NEMOCLAW_REGISTRY_NAMESPACE=${GITHUB_REPOSITORY}
-export NEMOCLAW_REGISTRY_USERNAME=${GITHUB_ACTOR}
-export NEMOCLAW_REGISTRY_PASSWORD=${GITHUB_TOKEN}
-export IMAGE_REPO_BASE=ghcr.io/${GITHUB_REPOSITORY}
-```
-
-The cluster exposes ports 80/443 for gateway traffic and 6443 for the Kubernetes API.
-
-Once the cluster is deployed. You can interact with the cluster using standard `ncl` CLI commands.
-
-### Gateway mTLS for CLI
-
-When the cluster is configured to terminate TLS at the Gateway with client authentication, the
-CLI needs the generated client certificate bundle. The chart creates a `navigator-cli-client`
-Secret containing `ca.crt`, `tls.crt`, and `tls.key`. During `ncl cluster admin deploy`, the
-CLI bundle is automatically copied into `~/.config/nemoclaw/clusters/<name>/mtls`, where
-`<name>` comes from `NEMOCLAW_CLUSTER_NAME` or the host in `NEMOCLAW_CLUSTER` (localhost
-defaults to `nemoclaw`).
-
-### Debugging Cluster Issues
-
-If a cluster fails to start or is unhealthy after `ncl cluster admin deploy`, use the `debug-navigator-cluster` skill (located at `.agent/skills/debug-navigator-cluster/SKILL.md`) to diagnose the issue. This skill provides step-by-step instructions for troubleshooting cluster bootstrap failures, health check errors, and other infrastructure problems.
-
-### Docker Build Tasks
-
-```bash
-mise run docker:build           # Build all Docker images
-mise run docker:build:sandbox   # Build the sandbox Docker image
-mise run docker:build:server    # Build the server Docker image
-mise run docker:build:cluster   # Build the airgapped k3s cluster image
-```
-
-### Python Development
-
-```bash
-mise run python:dev      # Install Python package in development mode (builds CLI binary)
-mise run python:build    # Build Python wheel with CLI binary
-```
-
-Python protobuf stubs in `python/navigator/_proto/` are generated artifacts and are gitignored
-(except `__init__.py`). `mise` Python build/test/lint/typecheck tasks run `python:proto`
-automatically, so you generally do not need to generate stubs manually.
-
-### Publishing
-
-Versions are derived from git tags using `setuptools_scm`. No version bumps need to be committed.
-Python wheel builds inject version at build time via
-`NEMOCLAW_CARGO_VERSION` (Cargo/SemVer), applied inside wheel-builder Docker
-layers, so publish flows do not edit `Cargo.toml`/`Cargo.lock` in the working
-tree.
-
-**Version commands:**
-
-```bash
-mise run version:print             # Show computed versions (python, cargo, docker)
-mise run version:print -- --cargo  # Show cargo version only
-```
-
-**Publishing credentials (one-time setup):**
-
-```bash
-echo "
-NAV_PYPI_USERNAME=$USER
-NAV_PYPI_PASSWORD=$ARTIFACTORY_PASSWORD" >> .env
-```
-
-Docker publishing in CI uses AWS credentials for ECR. Python publishing uses a
-two-stage flow: wheels are uploaded to S3, then an internal-network runner
-publishes them to Artifactory with `NAV_PYPI_*` credentials.
-
-**Main branch publish (CI):**
-
-- Publishes Docker multiarch images to ECR as `:dev`, `:latest`, and a versioned dev tag.
-- Builds Linux + macOS (arm64) Python wheels and uploads them to
-  `s3://navigator-pypi-artifacts/navigator/<wheel-version>/`.
-- Runs a publish job on the `nv` runner to list that version prefix, download
-  the wheels, and publish them to Artifactory.
-
-**Tag release publish (CI):**
-
-- Push a semver tag (`vX.Y.Z`) to trigger release jobs.
-- CI publishes Docker multiarch images to ECR as `:X.Y.Z` (no `:latest`).
-- CI stages Linux + macOS (arm64) Python wheels in S3 and publishes to
-  Artifactory from the `nv` runner.
-
-**Tagging a release:**
-
-```bash
-git tag v0.1.1
-git push --tags
-# CI will build and publish Docker + Linux/macOS Python wheels.
-```
-
-**Local macOS wheel publish (arm64):**
-
-```bash
-# Native on macOS host:
-mise run python:publish:macos
-
-# Cross-compile from Linux via Docker:
-mise run python:build:macos:docker
-```
-
-### Cleaning
-
-```bash
-mise run clean           # Clean build artifacts
-```
-
-## Code Style
-
-• **Rust**: Formatted with `rustfmt`, linted with Clippy (pedantic + nursery)
-• **Python**: Formatted and linted with `ruff`, type-checked with `ty`
-
-Run `mise run all` before committing to check everything (runs `fmt:check`, `clippy`, `test`, `python:lint`).
-
-## CLI Output Style
-
-When printing structured output from CLI commands, follow these conventions:
-
-• **Blank line after headings**: Always print an empty line between a heading and its key-value fields. This improves readability in the terminal.
-• **Indented fields**: Key-value fields should be indented with 2 spaces.
-• **Dimmed keys**: Use `.dimmed()` for field labels (e.g., `"Id:".dimmed()`).
-• **Colored headings**: Use `.cyan().bold()` for primary headings.
-
-**Good:**
-
-```
-Created sandbox:
-
-  Id: cddeeb6d-a4d3-4158-a4d1-bd931f743700
-  Name: sandbox-cddeeb6d
-  Namespace: navigator
-```
-
-**Bad** (no blank line after heading):
-
-```
-Created sandbox:
-  Id: cddeeb6d-a4d3-4158-a4d1-bd931f743700
-  Name: sandbox-cddeeb6d
-  Namespace: navigator
-```
+| Path | Purpose |
+|---|---|
+| `crates/navigator-core/` | Shared core library |
+| `crates/navigator-server/` | Gateway/control plane server |
+| `crates/navigator-sandbox/` | Sandbox runtime |
+| `crates/navigator-bootstrap/` | Cluster bootstrap logic |
+| `crates/navigator-cli/` | CLI implementation |
+| `python/` | Python SDK/bindings |
+| `proto/` | Protocol buffer definitions |
+| `build/` | `mise` tasks and build scripts |
+| `deploy/` | Dockerfiles, Helm chart, Kubernetes manifests |
+| `architecture/` | Architecture docs and plans |
 
 ## Commit Messages
 
@@ -492,7 +126,15 @@ chore(deps): bump tokio to 1.40
 
 1. Create a feature branch from `main`
 2. Make your changes with tests
-3. Run `mise run all` to verify
+3. Run `mise run ci` to verify
 4. Open a PR with a clear description
 
+### DCO
+
+All contributions must include a `Signed-off-by` line in each commit message. This certifies you have the right to submit the work under the project license. See the [Developer Certificate of Origin](https://developercertificate.org/).
+
+```bash
+git commit -s -m "feat(sandbox): add new capability"
+```
+
 Use the `create-github-pr` skill to help with opening your pull request.
diff --git a/STYLE_GUIDE.md b/STYLE_GUIDE.md
new file mode 100644
index 00000000..bb6e357b
--- /dev/null
+++ b/STYLE_GUIDE.md
@@ -0,0 +1,54 @@
+# STYLE_GUIDE
+
+## License Headers
+
+All source files must include SPDX copyright headers.
+
+```bash
+# Add/update headers
+mise run license:update
+
+# Check headers
+mise run license:check
+```
+
+## Code Style
+
+- Rust: format with `rustfmt`, lint with Clippy.
+- Python: format and lint with `ruff`, type-check with `ty`.
+
+Recommended workflow before opening a PR:
+
+```bash
+mise run fmt
+mise run lint
+mise run ci
+```
+
+## CLI Output Style
+
+When printing structured CLI output:
+
+- Add a blank line after headings.
+- Indent key-value fields by 2 spaces.
+- Use dimmed labels for field keys (for example, `"Id:".dimmed()`).
+- Use cyan + bold for primary headings.
+
+Good:
+
+```text
+Created sandbox:
+
+  Id: cddeeb6d-a4d3-4158-a4d1-bd931f743700
+  Name: sandbox-cddeeb6d
+  Namespace: navigator
+```
+
+Bad:
+
+```text
+Created sandbox:
+  Id: cddeeb6d-a4d3-4158-a4d1-bd931f743700
+  Name: sandbox-cddeeb6d
+  Namespace: navigator
+```
diff --git a/architecture/build-containers.md b/architecture/build-containers.md
index 7289a679..0a781ae8 100644
--- a/architecture/build-containers.md
+++ b/architecture/build-containers.md
@@ -38,7 +38,7 @@ build/
   cluster.toml                     # Cluster bootstrap and deploy tasks
   helm.toml                        # Helm lint task
   rust.toml                        # Rust build/lint/format tasks
-  ci.toml                          # Pre-commit, lint, sandbox runner tasks
+  ci.toml                          # Public quality tasks and CI entrypoint
   test.toml                        # Test tasks (Rust + Python)
   python.toml                      # Python build/lint/format tasks
   publish.toml                     # Release publishing tasks
@@ -317,19 +317,17 @@ All builds use mise tasks defined in `build/*.toml` (included from `mise.toml`).
 | Task | Description |
 |---|---|
 | `mise run cluster` | Fast local recreate: push prebuilt local component images and deploy via local registry |
-| `mise run cluster:deploy` | Fast deploy: rebuild changed components only |
-| `mise run cluster:deploy:all` | Full deploy: rebuild all components via local registry |
-| `mise run cluster:push:server` | Tag and push gateway image to local registry |
-| `mise run cluster:push:sandbox` | Tag and push sandbox image to local registry |
+| `mise run cluster:build` | Fast incremental deploy: rebuild changed components only |
+| `mise run cluster:build:full` | Full build + deploy path (advanced/CI) |
 
 ### Other Tasks
 
 | Task | Description |
 |---|---|
-| `mise run sandbox` | Run sandbox container interactively (builds image first) |
+| `mise run cluster:sandbox` | Run sandbox container interactively (builds image first) |
 | `mise run helm:lint` | Lint the Helm chart |
 
-### How `cluster:deploy` Works
+### How `cluster:build` Works
 
 `build/scripts/cluster-deploy-fast.sh` supports two modes:
 
@@ -346,7 +344,7 @@ All builds use mise tasks defined in `build/*.toml` (included from `mise.toml`).
 
 **Explicit target mode** (arguments: `server`, `sandbox`, `chart`, `all`): Rebuilds only the specified components.
 
-Auto mode persists the last deployed fingerprints in `.cache/cluster-deploy-fast.state` (or `$DEPLOY_FAST_STATE_FILE`). Re-running `mise run cluster:deploy` without new local changes prints `No new local changes since last deploy.` and skips rebuild/upgrade work.
+Auto mode persists the last deployed fingerprints in `.cache/cluster-deploy-fast.state` (or `$DEPLOY_FAST_STATE_FILE`). Re-running `mise run cluster:build` without new local changes prints `No new local changes since last deploy.` and skips rebuild/upgrade work.
 
 After building, the script:
 
@@ -429,13 +427,13 @@ In CI pipelines:
 mise run cluster
 
 # Incremental rebuild of changed components only
-mise run cluster:deploy
+mise run cluster:build
 
-# Rebuild specific component(s)
-mise run cluster:deploy server sandbox
+# Full build + deploy path (advanced/CI)
+mise run cluster:build:full
 
 # Run sandbox container interactively (for testing sandbox code)
-mise run sandbox
+mise run cluster:sandbox
 ```
 
 ### Multi-Arch Publishing
diff --git a/architecture/cluster-single-node.md b/architecture/cluster-single-node.md
index 3137efb0..b4ab676b 100644
--- a/architecture/cluster-single-node.md
+++ b/architecture/cluster-single-node.md
@@ -61,11 +61,11 @@ Development task entrypoints split bootstrap behavior:
 | Task | Behavior |
 |---|---|
 | `mise run cluster` | Fast recreate path: destroys existing local cluster resources for `CLUSTER_NAME` (if present), removes conflicting local NemoClaw clusters that occupy host port `6443`, pushes prebuilt local component images to the local registry, and deploys using local-registry image refs (`127.0.0.1:5000/navigator/*`) |
-| `mise run cluster:build` | Full build path: builds cluster/server/sandbox images, pushes components, then deploys (preferred in CI) |
-| `mise run cluster:deploy` | Iterative deploy path: detects changed files and rebuilds/pushes only impacted components |
+| `mise run cluster:build` | Iterative deploy path: detects changed files and rebuilds/pushes only impacted components |
+| `mise run cluster:build:full` | Full build path: builds cluster/server/sandbox images, pushes components, then deploys (preferred in CI) |
 
 For `mise run cluster`, `.env` acts as local source-of-truth for `CLUSTER_NAME`, `GATEWAY_PORT`, and `NEMOCLAW_CLUSTER`. Missing keys are appended; existing values are preserved. If `GATEWAY_PORT` is missing, the task selects a free local port and persists it.
-Fast mode ensures a local registry (`127.0.0.1:5000`) is running and configures k3s to mirror pulls via `host.docker.internal:5000`, so `cluster` and `cluster:deploy` can push/pull local component images consistently.
+Fast mode ensures a local registry (`127.0.0.1:5000`) is running and configures k3s to mirror pulls via `host.docker.internal:5000`, so `cluster` and `cluster:build` can push/pull local component images consistently.
 
 ## Bootstrap Sequence Diagram
 
diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md
index dce2b609..82cfd9d3 100644
--- a/architecture/inference-routing.md
+++ b/architecture/inference-routing.md
@@ -563,9 +563,9 @@ The `create` and `update` commands perform protocol auto-detection when `--proto
 
 ## Dev Sandbox Workflow
 
-**File:** `build/ci.toml` (task `[sandbox]`), `inference-routes.yaml` (repo root)
+**File:** `build/cluster.toml` (task `["cluster:sandbox"]`), `inference-routes.yaml` (repo root)
 
-Running `mise run sandbox` starts a standalone sandbox container with inference routing pre-configured. The task mounts three files into the container:
+Running `mise run cluster:sandbox` starts a standalone sandbox container with inference routing pre-configured. The task mounts three files into the container:
 
 - `dev-sandbox-policy.rego` as `/var/navigator/policy.rego`
 - `dev-sandbox-policy.yaml` as `/var/navigator/data.yaml`
@@ -589,7 +589,7 @@ routes:
 The `-e` flag forwards arbitrary host environment variables into the container:
 
 ```bash
-mise run sandbox -e ANTHROPIC_API_KEY -- /bin/bash
+mise run cluster:sandbox -e ANTHROPIC_API_KEY -- /bin/bash
 ```
 
 This checks whether the named variable is set in the host environment and passes it through. Unset variables produce a warning and are skipped.
diff --git a/build/ci.toml b/build/ci.toml
index f653faf1..cefd5615 100644
--- a/build/ci.toml
+++ b/build/ci.toml
@@ -1,57 +1,37 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-# CI, pre-commit, and sandbox runner tasks
+# CI and quality tasks
+
+[fmt]
+description = "Format Rust and Python code"
+depends = ["rust:format", "python:format"]
 
 [lint]
-description = "Run all linters and format checks"
-depends = ["license:check", "rust:format:check", "rust:lint", "python:lint"]
+description = "Run repository lint checks"
+depends = ["license:check", "rust:format:check", "rust:lint", "python:format:check", "python:lint", "helm:lint"]
+
+[ci]
+description = "Run full checks (lint, compile/type checks, and tests)"
+depends = ["lint", "check", "test"]
 
 [all]
-description = "Build, lint, and test everything"
-depends = ["lint", "test"]
+description = "Alias for ci"
+depends = ["ci"]
+hide = true
 
 ["pre-commit"]
-description = "Pre-commit checks (format, lint, and tests)"
-depends = ["lint", "test:rust", "test:python"]
+description = "Alias for ci"
+depends = ["ci"]
+hide = true
 
 [sandbox]
-description = "Run the sandbox container with an interactive shell"
+description = "Alias for cluster:sandbox"
 depends = ["docker:build:sandbox"]
 raw = true
 usage = """
 flag "-e --env <env>" var=#true help="Environment variables to pass into the sandbox"
 arg "[command]" var=#true help="Command to run in the sandbox (default: /bin/bash)"
 """
-run = """
-#!/usr/bin/env bash
-TTY_FLAG=""
-if [ -t 0 ]; then
-  TTY_FLAG="-it"
-fi
-CMD=(${usage_command:-/bin/bash})
-ENV_FLAGS=""
-for var in ${usage_env}; do
-  if [ -n "${!var+x}" ]; then
-    ENV_FLAGS="${ENV_FLAGS} -e ${var}=${!var}"
-  else
-    echo "Warning: ${var} is not set in your environment, skipping" >&2
-  fi
-done
-docker run ${TTY_FLAG} \
-  --cap-add=SYS_ADMIN \
-  --cap-add=NET_ADMIN \
-  --cap-add=SYS_PTRACE \
-  -v ${PWD}/dev-sandbox-policy.rego:/var/navigator/policy.rego:ro \
-  -v ${PWD}/dev-sandbox-policy.yaml:/var/navigator/data.yaml:ro \
-  -v ${PWD}/inference-routes.yaml:/var/navigator/inference-routes.yaml:ro \
-  -v ${PWD}/tmp:/sandbox/tmp \
-  -e HOME=/sandbox \
-  -w /sandbox \
-  -e NEMOCLAW_POLICY_RULES=/var/navigator/policy.rego \
-  -e NEMOCLAW_POLICY_DATA=/var/navigator/data.yaml \
-  -e NEMOCLAW_INFERENCE_ROUTES=/var/navigator/inference-routes.yaml \
-  -e NVIDIA_API_KEY="${NVIDIA_API_KEY:-}" \
-  ${ENV_FLAGS} \
-  navigator/sandbox:${IMAGE_TAG:-dev} -i -- ${CMD[@]}
-"""
+run = "bash build/scripts/run-sandbox.sh"
+hide = true
diff --git a/build/cluster.toml b/build/cluster.toml
index 7daaed80..f514f605 100644
--- a/build/cluster.toml
+++ b/build/cluster.toml
@@ -8,29 +8,49 @@ description = "Fast local cluster recreate using prebuilt images"
 run = "build/scripts/cluster-bootstrap.sh fast"
 
 ["cluster:build"]
+description = "Incremental deploy: rebuild changed components and skip unnecessary helm work"
+run = "build/scripts/cluster-deploy-fast.sh"
+
+["cluster:build:full"]
 description = "Build and deploy local k3s cluster with NemoClaw"
 depends = [
   "docker:build:server",
   "docker:build:sandbox",
 ]
 run = "build/scripts/cluster-bootstrap.sh build"
+hide = true
+
+["cluster:sandbox"]
+description = "Run the sandbox container with an interactive shell"
+depends = ["docker:build:sandbox"]
+raw = true
+usage = """
+flag "-e --env <env>" var=#true help="Environment variables to pass into the sandbox"
+arg "[command]" var=#true help="Command to run in the sandbox (default: /bin/bash)"
+"""
+run = "bash build/scripts/run-sandbox.sh"
 
 ["cluster:deploy"]
-description = "Fast deploy: rebuild changed components and skip unnecessary helm work"
+description = "Alias for cluster:build (incremental deploy)"
 run = "build/scripts/cluster-deploy-fast.sh"
+hide = true
 
 ["cluster:deploy:sandbox"]
 description = "Fast deploy sandbox-only changes"
 run = "build/scripts/cluster-deploy-fast.sh sandbox"
+hide = true
 
 ["cluster:deploy:all"]
 description = "Pull-mode deploy using local registry pushes"
 run = "build/scripts/cluster-deploy-fast.sh all"
+hide = true
 
 ["cluster:push:server"]
 description = "Tag and push server image to pull registry"
 run = "build/scripts/cluster-push-component.sh server"
+hide = true
 
 ["cluster:push:sandbox"]
 description = "Tag and push sandbox image to pull registry"
 run = "build/scripts/cluster-push-component.sh sandbox"
+hide = true
diff --git a/build/docker.toml b/build/docker.toml
index 74e2daed..dae126ea 100644
--- a/build/docker.toml
+++ b/build/docker.toml
@@ -10,32 +10,40 @@ depends = [
   "docker:build:server",
   "docker:build:cluster",
 ]
+hide = true
 
 ["docker:build:ci"]
 description = "Build the CI Docker image"
 run = "build/scripts/docker-build-component.sh ci"
+hide = true
 
 ["docker:build:sandbox"]
 description = "Build the sandbox Docker image (base variant)"
 run = "build/scripts/docker-build-component.sh sandbox --build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE:-debug}"
+hide = true
 
 ["docker:build:sandbox:nvidia"]
 description = "Build the NVIDIA GPU sandbox Docker image"
 depends = ["docker:build:sandbox"]
 run = "build/scripts/docker-build-component.sh sandbox nvidia --build-arg BASE_IMAGE=navigator/sandbox:${IMAGE_TAG:-dev}"
+hide = true
 
 ["docker:build:server"]
 description = "Build the server Docker image"
 run = "build/scripts/docker-build-component.sh server"
+hide = true
 
 ["docker:build:cluster"]
 description = "Build the k3s cluster image (component images pulled at runtime from registry)"
 run = "build/scripts/docker-build-cluster.sh"
+hide = true
 
 ["docker:build:cluster:multiarch"]
 description = "Build multi-arch cluster image and push to a registry"
 run = "build/scripts/docker-publish-multiarch.sh --mode registry"
+hide = true
 
 ["docker:publish:cluster:multiarch"]
 description = "Build and publish multi-arch cluster image to ECR"
 run = "build/scripts/docker-publish-multiarch.sh --mode ecr"
+hide = true
diff --git a/build/gator.toml b/build/gator.toml
index ee2a90cf..e64c04ce 100644
--- a/build/gator.toml
+++ b/build/gator.toml
@@ -6,7 +6,9 @@
 [gator]
 description = "Launch the Gator TUI"
 run = "ncl gator"
+hide = true
 
 ["gator:dev"]
 description = "Launch the Gator TUI with hot-reload on file changes"
 run = "cargo watch -s 'ncl gator'"
+hide = true
diff --git a/build/helm.toml b/build/helm.toml
index fb5627f3..d8059d69 100644
--- a/build/helm.toml
+++ b/build/helm.toml
@@ -6,3 +6,4 @@
 ["helm:lint"]
 description = "Lint the nemoclaw helm chart"
 run = "helm lint deploy/helm/navigator"
+hide = true
diff --git a/build/license.toml b/build/license.toml
index d783d6ad..09bd88c0 100644
--- a/build/license.toml
+++ b/build/license.toml
@@ -6,7 +6,9 @@
 ["license:check"]
 description = "Check that all source files have SPDX license headers"
 run = "uv run python scripts/update_license_headers.py --check"
+hide = true
 
 ["license:update"]
 description = "Add or update SPDX license headers on all source files"
 run = "uv run python scripts/update_license_headers.py"
+hide = true
diff --git a/build/publish.toml b/build/publish.toml
index 87ee660b..422cef99 100644
--- a/build/publish.toml
+++ b/build/publish.toml
@@ -13,6 +13,7 @@ CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:all
 uv run python build/scripts/release.py python-publish --version "$VERSION"
 """
+hide = true
 
 ["python:publish:macos"]
 description = "Build and publish macOS arm64 Python wheel"
@@ -24,6 +25,7 @@ CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:macos
 uv run python build/scripts/release.py python-publish --version "$VERSION" --wheel-glob "*macosx*arm64.whl"
 """
+hide = true
 
 ["publish:main"]
 description = "Main branch publish job (images with :dev, :latest, and version tag)"
@@ -38,6 +40,7 @@ NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:multiarch
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:macos
 uv run python build/scripts/release.py python-publish --version "$VERSION_PYTHON"
 """
+hide = true
 
 ["publish:tag"]
 description = "Tag release publish: versioned Docker to ECR and Python to GitHub Packages"
@@ -52,3 +55,4 @@ NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:multiarch
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:macos
 uv run python build/scripts/release.py python-publish --version "$VERSION_PYTHON"
 """
+hide = true
diff --git a/build/python.toml b/build/python.toml
index 4a5c5487..c5d6412e 100644
--- a/build/python.toml
+++ b/build/python.toml
@@ -7,11 +7,13 @@
 description = "Install Python package in development mode (builds CLI binary)"
 depends = ["python:proto"]
 run = "uv sync --group dev && uv pip install ."
+hide = true
 
 ["python:build"]
 description = "Build Python wheel with CLI binary (native)"
 depends = ["python:proto"]
 run = "uv run maturin build --release"
+hide = true
 
 ["python:build:multiarch"]
 description = "Build Python wheels for Linux amd64/arm64 with buildx"
@@ -97,6 +99,7 @@ done
 
 ls -la target/wheels/*.whl
 """
+hide = true
 
 ["python:build:macos"]
 description = "Build Python wheel for macOS arm64"
@@ -125,6 +128,7 @@ fi
 
 ls -la target/wheels/*.whl
 """
+hide = true
 
 ["python:build:macos:docker"]
 description = "Build Python wheel for macOS arm64 from Docker"
@@ -173,25 +177,35 @@ docker build \
 
 ls -la target/wheels/*macosx*arm64.whl
 """
+hide = true
 
 ["python:build:all"]
 description = "Build Python wheels for Linux and macOS"
 depends = ["python:build", "python:build:multiarch", "python:build:macos"]
+hide = true
 
 ["python:lint"]
 description = "Lint Python code with ruff"
 depends = ["python:proto"]
 env = { UV_NO_SYNC = "1" }
 run = "uv run ruff check {{vars.python_paths}}"
+hide = true
 
 ["python:format"]
 description = "Format Python code with ruff"
 run = "uv run ruff format {{vars.python_paths}}"
+hide = true
+
+["python:format:check"]
+description = "Check Python formatting with ruff"
+run = "uv run ruff format --check {{vars.python_paths}}"
+hide = true
 
 ["python:typecheck"]
 description = "Type check Python code with ty"
 depends = ["python:proto"]
 run = "uv run ty check {{vars.python_paths}}"
+hide = true
 
 ["python:proto"]
 description = "Generate Python protobuf stubs from .proto files"
@@ -245,3 +259,4 @@ for path, rules in line_rewrites.items():
     file_path.write_text(text)
 PY
 """
+hide = true
diff --git a/build/rust.toml b/build/rust.toml
index 6bb90b2e..5f12084c 100644
--- a/build/rust.toml
+++ b/build/rust.toml
@@ -6,26 +6,36 @@
 [build]
 description = "Build all Rust crates"
 run = "cargo build --workspace"
+hide = true
 
 ["build:release"]
 description = "Build all Rust crates in release mode"
 run = "cargo build --workspace --release"
+hide = true
 
 [check]
+description = "Run fast compile and type checks"
+depends = ["rust:check", "python:typecheck"]
+
+["rust:check"]
 description = "Check all Rust crates for errors"
 run = "cargo check --workspace"
+hide = true
 
 ["rust:lint"]
 description = "Lint Rust code with Clippy"
 run = "cargo clippy --workspace --all-targets"
+hide = true
 
 ["rust:format"]
 description = "Format Rust code"
 run = "cargo fmt --all"
+hide = true
 
 ["rust:format:check"]
 description = "Check Rust formatting"
 run = "cargo fmt --all -- --check"
+hide = true
 
 [clean]
 description = "Clean build artifacts"
diff --git a/build/scripts/cluster-push-component.sh b/build/scripts/cluster-push-component.sh
index a6fdd00d..bb308f75 100755
--- a/build/scripts/cluster-push-component.sh
+++ b/build/scripts/cluster-push-component.sh
@@ -49,7 +49,7 @@ if [ -z "${resolved_source_image}" ]; then
   done
   echo "build it first with either:" >&2
   echo "  mise run docker:build:${component}" >&2
-  echo "  mise run cluster:build" >&2
+  echo "  mise run cluster:build:full" >&2
   exit 1
 fi
 
diff --git a/build/scripts/run-sandbox.sh b/build/scripts/run-sandbox.sh
new file mode 100644
index 00000000..45d2df9e
--- /dev/null
+++ b/build/scripts/run-sandbox.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+TTY_FLAG=""
+if [ -t 0 ]; then
+  TTY_FLAG="-it"
+fi
+
+CMD=(${usage_command:-/bin/bash})
+ENV_FLAGS=""
+for var in ${usage_env:-}; do
+  if [ -n "${!var+x}" ]; then
+    ENV_FLAGS="${ENV_FLAGS} -e ${var}=${!var}"
+  else
+    echo "Warning: ${var} is not set in your environment, skipping" >&2
+  fi
+done
+
+docker run ${TTY_FLAG} \
+  --cap-add=SYS_ADMIN \
+  --cap-add=NET_ADMIN \
+  --cap-add=SYS_PTRACE \
+  -v ${PWD}/dev-sandbox-policy.rego:/var/navigator/policy.rego:ro \
+  -v ${PWD}/dev-sandbox-policy.yaml:/var/navigator/data.yaml:ro \
+  -v ${PWD}/inference-routes.yaml:/var/navigator/inference-routes.yaml:ro \
+  -v ${PWD}/tmp:/sandbox/tmp \
+  -e HOME=/sandbox \
+  -w /sandbox \
+  -e NEMOCLAW_POLICY_RULES=/var/navigator/policy.rego \
+  -e NEMOCLAW_POLICY_DATA=/var/navigator/data.yaml \
+  -e NEMOCLAW_INFERENCE_ROUTES=/var/navigator/inference-routes.yaml \
+  -e NVIDIA_API_KEY="${NVIDIA_API_KEY:-}" \
+  ${ENV_FLAGS} \
+  navigator/sandbox:${IMAGE_TAG:-dev} -i -- ${CMD[@]}
diff --git a/build/test.toml b/build/test.toml
index cbc0f13b..891e651d 100644
--- a/build/test.toml
+++ b/build/test.toml
@@ -7,33 +7,43 @@
 description = "Run all tests (Rust + Python)"
 depends = ["test:rust", "test:python"]
 
+["test:e2e"]
+description = "Run default end-to-end test lane"
+depends = ["test:e2e:sandbox"]
+
 ["test:rust"]
 description = "Run Rust tests"
 run = "cargo test --workspace"
+hide = true
 
 ["test:python"]
 description = "Run Python tests"
 depends = ["python:proto"]
 env = { UV_NO_SYNC = "1" }
 run = "uv run pytest python/"
+hide = true
 
 ["test:e2e:sandbox"]
 description = "Run sandbox end-to-end tests"
-depends = ["python:proto", "cluster:deploy"]
+depends = ["python:proto", "cluster:build"]
 env = { UV_NO_SYNC = "1", PYTHONPATH = "python" }
 run = "uv run pytest -o python_files='test_*.py' e2e/python"
+hide = true
 
 ["test:e2e:port-forward"]
 description = "Run port-forward integration test"
-depends = ["cluster:deploy"]
+depends = ["cluster:build"]
 run = "bash e2e/bash/test_port_forward.sh"
+hide = true
 
 ["test:e2e:custom-image"]
 description = "Run custom image build and sandbox e2e test"
-depends = ["cluster:deploy"]
+depends = ["cluster:build"]
 run = "bash e2e/bash/test_sandbox_custom_image.sh"
+hide = true
 
 ["test:e2e:sync"]
 description = "Run sandbox file sync e2e test"
-depends = ["cluster:deploy"]
+depends = ["cluster:build"]
 run = "bash e2e/bash/test_sandbox_sync.sh"
+hide = true
diff --git a/build/version.toml b/build/version.toml
index 1b21d763..8f0acb4b 100644
--- a/build/version.toml
+++ b/build/version.toml
@@ -3,6 +3,11 @@
 
 # Version management tasks
 
+["version"]
+description = "Print git-derived version"
+run = "uv run python build/scripts/release.py get-version"
+
 ["version:print"]
 description = "Print git-derived version"
 run = "uv run python build/scripts/release.py get-version"
+hide = true

From 0c6fa08f150afa7d3302ede197fc259e8dc7e474 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:03:33 -0800
Subject: [PATCH 02/14] refactor: rename build/ to tasks/ and simplify cluster
 workflow

Rename the build/ directory to tasks/ for clarity, updating all
references across mise config, CI pipelines, Dockerfiles, architecture
docs, and internal scripts.

Consolidate mise run cluster and mise run cluster:build into a single
cluster task that bootstraps if no cluster is running, then performs
incremental deploy. Add a public sandbox task (mise run sandbox) that
creates a sandbox on the running cluster via ncl.

Move commit message guidance under the Pull Requests section in
CONTRIBUTING.md, replace ncl sandbox create with mise run sandbox in
Getting Started, and remove version from the main tasks table.

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 .../agent-memory/arch-doc-writer/MEMORY.md    |  2 +-
 .github/workflows/ci-image.yml                |  2 +-
 .github/workflows/publish.yml                 |  6 +--
 .gitlab-ci.yml                                | 24 +++++-----
 CONTRIBUTING.md                               | 30 ++++++------
 architecture/build-containers.md              | 46 +++++++++----------
 architecture/cluster-single-node.md           |  5 +-
 architecture/inference-routing.md             |  4 +-
 deploy/docker/Dockerfile.ci                   |  4 +-
 mise.toml                                     |  6 +--
 {build => tasks}/ci.toml                      |  9 ++--
 {build => tasks}/cluster.toml                 | 24 ++++------
 {build => tasks}/docker.toml                  | 14 +++---
 {build => tasks}/gator.toml                   |  0
 {build => tasks}/helm.toml                    |  0
 {build => tasks}/license.toml                 |  0
 {build => tasks}/publish.toml                 | 28 +++++------
 {build => tasks}/python.toml                  |  4 +-
 {build => tasks}/rust.toml                    |  0
 {build => tasks}/scripts/cluster-bootstrap.sh |  2 +-
 .../scripts/cluster-deploy-fast.sh            |  8 ++--
 .../scripts/cluster-push-component.sh         |  2 +-
 tasks/scripts/cluster.sh                      | 19 ++++++++
 .../scripts/docker-build-cluster.sh           |  0
 .../scripts/docker-build-component.sh         |  0
 .../scripts/docker-publish-multiarch.sh       |  0
 {build => tasks}/scripts/release.py           |  0
 {build => tasks}/scripts/run-sandbox.sh       |  0
 {build => tasks}/test.toml                    |  8 ++--
 {build => tasks}/version.toml                 |  4 +-
 30 files changed, 127 insertions(+), 124 deletions(-)
 rename {build => tasks}/ci.toml (76%)
 rename {build => tasks}/cluster.toml (61%)
 rename {build => tasks}/docker.toml (74%)
 rename {build => tasks}/gator.toml (100%)
 rename {build => tasks}/helm.toml (100%)
 rename {build => tasks}/license.toml (100%)
 rename {build => tasks}/publish.toml (64%)
 rename {build => tasks}/python.toml (98%)
 rename {build => tasks}/rust.toml (100%)
 rename {build => tasks}/scripts/cluster-bootstrap.sh (99%)
 rename {build => tasks}/scripts/cluster-deploy-fast.sh (97%)
 rename {build => tasks}/scripts/cluster-push-component.sh (97%)
 create mode 100755 tasks/scripts/cluster.sh
 rename {build => tasks}/scripts/docker-build-cluster.sh (100%)
 rename {build => tasks}/scripts/docker-build-component.sh (100%)
 rename {build => tasks}/scripts/docker-publish-multiarch.sh (100%)
 rename {build => tasks}/scripts/release.py (100%)
 rename {build => tasks}/scripts/run-sandbox.sh (100%)
 rename {build => tasks}/test.toml (90%)
 rename {build => tasks}/version.toml (71%)

diff --git a/.claude/agent-memory/arch-doc-writer/MEMORY.md b/.claude/agent-memory/arch-doc-writer/MEMORY.md
index 72dfcde6..1dc66bbf 100644
--- a/.claude/agent-memory/arch-doc-writer/MEMORY.md
+++ b/.claude/agent-memory/arch-doc-writer/MEMORY.md
@@ -63,7 +63,7 @@
 - Helm chart deploys a StatefulSet (NOT Deployment), PVC 1Gi at /var/navigator
 - Cluster image does NOT bundle image tarballs -- components pulled at runtime from distribution registry
 - PKI job generates CA + server cert + client cert for mTLS (RSA 2048, 10yr, Helm pre-install hook)
-- Build tasks in `build/*.toml`; scripts in `build/scripts/`
+- Build tasks in `tasks/*.toml`; scripts in `tasks/scripts/`
 - `cluster-deploy-fast.sh` supports both auto mode (git diff) and explicit targets (server/sandbox/pki-job/chart/all)
 - `cluster-bootstrap.sh` ensures local Docker registry on port 5000, pushes all components, then deploys
 - Default values.yaml: repository is CloudFront-backed CDN, tag: "latest", pullPolicy: Always
diff --git a/.github/workflows/ci-image.yml b/.github/workflows/ci-image.yml
index c7f0d2c3..83e74b84 100644
--- a/.github/workflows/ci-image.yml
+++ b/.github/workflows/ci-image.yml
@@ -6,7 +6,7 @@ on:
     paths:
       - 'deploy/docker/Dockerfile.ci'
       - 'mise.toml'
-      - 'build/**'
+      - 'tasks/**'
       - '.github/workflows/ci-image.yml'
   workflow_dispatch:
 
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 926da940..ac0afc6e 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -120,14 +120,14 @@ jobs:
         id: version
         run: |
           set -euo pipefail
-          WHEEL_VERSION=$(uv run python build/scripts/release.py get-version --python)
+          WHEEL_VERSION=$(uv run python tasks/scripts/release.py get-version --python)
           echo "wheel_version=${WHEEL_VERSION}" >> "$GITHUB_OUTPUT"
 
       - name: Build Python wheels
         run: |
           set -euo pipefail
           WHEEL_VERSION="${{ steps.version.outputs.wheel_version }}"
-          CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
+          CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
           NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:multiarch
           NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:macos
           ls -la target/wheels/*.whl
@@ -216,4 +216,4 @@ jobs:
       run: |
         set -euo pipefail
         WHEEL_VERSION="${{ needs.build-python-wheels.outputs.wheel_version }}"
-        uv run python build/scripts/release.py python-publish --version "$WHEEL_VERSION"
+        uv run python tasks/scripts/release.py python-publish --version "$WHEEL_VERSION"
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 080f3df3..c747eebf 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -43,7 +43,7 @@ cache:
   - key:
       files:
         - Cargo.lock
-        - build/rust.toml
+        - tasks/rust.toml
       prefix: "target-$CI_RUNNER_EXECUTABLE_ARCH"
     paths:
       - target/
@@ -58,9 +58,9 @@ cache:
         - Cargo.lock
         - crates/**/*
         - proto/**/*
-        - build/rust.toml
-        - build/test.toml
-        - build/ci.toml
+        - tasks/rust.toml
+        - tasks/test.toml
+        - tasks/ci.toml
         - mise.toml
         - .gitlab-ci.yml
     - when: never
@@ -73,9 +73,9 @@ cache:
         - python/**/*
         - scripts/**/*
         - proto/**/*
-        - build/python.toml
-        - build/test.toml
-        - build/ci.toml
+        - tasks/python.toml
+        - tasks/test.toml
+        - tasks/ci.toml
         - mise.toml
         - .gitlab-ci.yml
     - when: never
@@ -87,10 +87,10 @@ cache:
         - deploy/docker/**/*
         - deploy/helm/**/*
         - deploy/kube/**/*
-        - build/cluster.toml
-        - build/docker.toml
-        - build/test.toml
-        - build/scripts/**/*
+        - tasks/cluster.toml
+        - tasks/docker.toml
+        - tasks/test.toml
+        - tasks/scripts/**/*
         - crates/**/*
         - proto/**/*
         - mise.toml
@@ -119,7 +119,7 @@ build_ci_image:
     - changes:
         - deploy/docker/Dockerfile.ci
         - mise.toml
-        - build/**/*
+        - tasks/**/*
         - .gitlab-ci.yml
     - when: never
   script:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a7edc539..583c7851 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -32,14 +32,11 @@ Project requirements:
 # One-time trust
 mise trust
 
-# Start/recreate local cluster
+# Bootstrap or incremental deploy
 mise run cluster
 
-# Iterative deploy after code changes
-mise run cluster:build
-
 # Launch a sandbox
-ncl sandbox create -- claude
+mise run sandbox
 ```
 
 ## `ncl` Shortcut
@@ -63,8 +60,8 @@ These are the primary `mise` tasks for day-to-day development:
 
 | Task | Purpose |
 |---|---|
-| `mise run cluster` | Fast local cluster recreate |
-| `mise run cluster:build` | Incremental deploy of changed components |
+| `mise run cluster` | Bootstrap or incremental deploy |
+| `mise run sandbox` | Create a sandbox on the running cluster |
 | `mise run cluster:sandbox` | Run sandbox container interactively |
 | `mise run fmt` | Format Rust and Python |
 | `mise run lint` | Repository lint checks |
@@ -73,7 +70,6 @@ These are the primary `mise` tasks for day-to-day development:
 | `mise run test:e2e` | Default end-to-end test lane |
 | `mise run ci` | Full local CI checks |
 | `mise run clean` | Clean build artifacts |
-| `mise run version` | Print git-derived version |
 
 ## Project Structure
 
@@ -86,11 +82,18 @@ These are the primary `mise` tasks for day-to-day development:
 | `crates/navigator-cli/` | CLI implementation |
 | `python/` | Python SDK/bindings |
 | `proto/` | Protocol buffer definitions |
-| `build/` | `mise` tasks and build scripts |
+| `tasks/` | `mise` tasks and build scripts |
 | `deploy/` | Dockerfiles, Helm chart, Kubernetes manifests |
 | `architecture/` | Architecture docs and plans |
 
-## Commit Messages
+## Pull Requests
+
+1. Create a feature branch from `main`
+2. Make your changes with tests
+3. Run `mise run ci` to verify
+4. Open a PR with a clear description
+
+### Commit Messages
 
 This project uses [Conventional Commits](https://www.conventionalcommits.org/). All commit messages must follow the format:
 
@@ -122,13 +125,6 @@ docs: update installation instructions
 chore(deps): bump tokio to 1.40
 ```
 
-## Pull Requests
-
-1. Create a feature branch from `main`
-2. Make your changes with tests
-3. Run `mise run ci` to verify
-4. Open a PR with a clear description
-
 ### DCO
 
 All contributions must include a `Signed-off-by` line in each commit message. This certifies you have the right to submit the work under the project license. See the [Developer Certificate of Origin](https://developercertificate.org/).
diff --git a/architecture/build-containers.md b/architecture/build-containers.md
index 0a781ae8..8ac6c1f9 100644
--- a/architecture/build-containers.md
+++ b/architecture/build-containers.md
@@ -33,7 +33,7 @@ deploy/
     manifests/                     # Kubernetes manifests for k3s auto-deploy
       navigator-helmchart.yaml
       agent-sandbox.yaml           # Agent Sandbox CRD controller RBAC
-build/
+tasks/
   docker.toml                      # Docker image build tasks
   cluster.toml                     # Cluster bootstrap and deploy tasks
   helm.toml                        # Helm lint task
@@ -140,7 +140,7 @@ A pre-built Ubuntu 24.04 image for CI pipeline jobs, defined in `deploy/docker/D
 | sccache | Rust compilation cache (amd64 only; skipped on arm64) |
 | socat | Docker socket forwarding in sandbox e2e tests |
 
-The build context must include `build/` because the Dockerfile copies mise task includes from that directory (`mise.toml` + `build/*.toml`).
+The build context must include `tasks/` because the Dockerfile copies mise task includes from that directory (`mise.toml` + `tasks/*.toml`).
 
 ## Cross-Compilation Support
 
@@ -298,7 +298,7 @@ The chart creates a Role and RoleBinding granting the gateway's ServiceAccount p
 
 ## Build Tasks (mise)
 
-All builds use mise tasks defined in `build/*.toml` (included from `mise.toml`).
+All builds use mise tasks defined in `tasks/*.toml` (included from `mise.toml`).
 
 ### Docker Image Tasks
 
@@ -316,8 +316,7 @@ All builds use mise tasks defined in `build/*.toml` (included from `mise.toml`).
 
 | Task | Description |
 |---|---|
-| `mise run cluster` | Fast local recreate: push prebuilt local component images and deploy via local registry |
-| `mise run cluster:build` | Fast incremental deploy: rebuild changed components only |
+| `mise run cluster` | Bootstrap or incremental deploy: creates cluster if needed, rebuilds changed components |
 | `mise run cluster:build:full` | Full build + deploy path (advanced/CI) |
 
 ### Other Tasks
@@ -327,9 +326,9 @@ All builds use mise tasks defined in `build/*.toml` (included from `mise.toml`).
 | `mise run cluster:sandbox` | Run sandbox container interactively (builds image first) |
 | `mise run helm:lint` | Lint the Helm chart |
 
-### How `cluster:build` Works
+### How `cluster` Works (Incremental Deploy)
 
-`build/scripts/cluster-deploy-fast.sh` supports two modes:
+`tasks/scripts/cluster-deploy-fast.sh` supports two modes:
 
 **Auto mode** (no arguments): Detects changed files from Git (unstaged, staged, and untracked), fingerprints the relevant local changes for each component, and rebuilds only components whose fingerprint changed since the last successful deploy.
 
@@ -344,7 +343,7 @@ All builds use mise tasks defined in `build/*.toml` (included from `mise.toml`).
 
 **Explicit target mode** (arguments: `server`, `sandbox`, `chart`, `all`): Rebuilds only the specified components.
 
-Auto mode persists the last deployed fingerprints in `.cache/cluster-deploy-fast.state` (or `$DEPLOY_FAST_STATE_FILE`). Re-running `mise run cluster:build` without new local changes prints `No new local changes since last deploy.` and skips rebuild/upgrade work.
+Auto mode persists the last deployed fingerprints in `.cache/cluster-deploy-fast.state` (or `$DEPLOY_FAST_STATE_FILE`). Re-running `mise run cluster` without new local changes prints `No new local changes since last deploy.` and skips rebuild/upgrade work.
 
 After building, the script:
 
@@ -354,9 +353,9 @@ After building, the script:
 4. Restarts the gateway StatefulSet (or Deployment, if present) and waits for rollout completion.
 5. On success, updates the local deploy fingerprint state file for the next incremental deploy.
 
-### How `mise run cluster` Works
+### How `mise run cluster` Bootstrap Works
 
-`build/scripts/cluster-bootstrap.sh` performs a full cluster bootstrap for local development:
+`tasks/scripts/cluster-bootstrap.sh` performs a full cluster bootstrap for local development:
 
 1. Resolves the local registry address (defaults to `127.0.0.1:5000/navigator`). In CI, uses `$CI_REGISTRY_IMAGE`.
 2. Ensures a local Docker registry container (`navigator-local-registry`) is running on port 5000 (creates one if needed).
@@ -381,8 +380,8 @@ After building, the script:
 
 Container builds use Docker BuildKit with local cache directories:
 
-- `build/scripts/docker-build-component.sh` stores per-component caches in `.cache/buildkit/<component>`.
-- `build/scripts/docker-build-cluster.sh` stores the cluster image cache in `.cache/buildkit/cluster`.
+- `tasks/scripts/docker-build-component.sh` stores per-component caches in `.cache/buildkit/<component>`.
+- `tasks/scripts/docker-build-cluster.sh` stores the cluster image cache in `.cache/buildkit/cluster`.
 - `mise run python:build:multiarch` stores per-platform wheel caches in `.cache/buildkit/python-wheels/<platform>` for local builds when using a `docker-container` buildx driver.
 - Rust-heavy Dockerfiles use BuildKit cache mounts for cargo registry, cargo target, and sccache local disk directories. Cargo target cache mounts are keyed by image name, `TARGETARCH`, and a computed scope hash derived from `Cargo.lock` plus a Rust toolchain hint, with `sharing=locked` to prevent concurrent cache corruption in parallel CI builds. This reduces reuse of stale `target/` artifacts across dependency or toolchain changes while preserving incremental rebuilds within a compatible scope. sccache uses memcached in CI (`SCCACHE_MEMCACHED_ENDPOINT`) and falls back to the local disk cache mount for local dev builds, providing a second layer of caching at the compilation unit level.
 - When the active buildx driver is `docker` (not `docker-container`), local cache import/export flags are skipped automatically because the docker driver cannot export local caches. In CI, cache export is also skipped.
@@ -400,7 +399,7 @@ In CI pipelines:
 
 ## Multi-Arch Publishing
 
-`build/scripts/docker-publish-multiarch.sh` builds and pushes all images for multiple architectures.
+`tasks/scripts/docker-publish-multiarch.sh` builds and pushes all images for multiple architectures.
 
 **Two modes:**
 
@@ -423,12 +422,9 @@ In CI pipelines:
 ### Local Development
 
 ```bash
-# Full build + deploy (builds all images, starts local registry, bootstraps cluster)
+# Bootstrap or incremental deploy (creates cluster if needed, rebuilds changed components)
 mise run cluster
 
-# Incremental rebuild of changed components only
-mise run cluster:build
-
 # Full build + deploy path (advanced/CI)
 mise run cluster:build:full
 
@@ -475,11 +471,11 @@ When the cluster container starts, k3s automatically deploys these HelmChart CRs
 - `deploy/docker/cluster-healthcheck.sh` -- Cluster health check script
 - `deploy/helm/navigator/` -- Helm chart directory
 - `deploy/kube/manifests/` -- Auto-deployed Kubernetes manifests
-- `build/docker.toml` -- Docker build task definitions
-- `build/cluster.toml` -- Cluster lifecycle task definitions
-- `build/scripts/docker-build-component.sh` -- Generic component image builder
-- `build/scripts/docker-build-cluster.sh` -- Cluster image builder
-- `build/scripts/docker-publish-multiarch.sh` -- Multi-arch publish script
-- `build/scripts/cluster-bootstrap.sh` -- Full local cluster bootstrap
-- `build/scripts/cluster-deploy-fast.sh` -- Incremental deploy script
-- `build/scripts/cluster-push-component.sh` -- Single component push to registry
+- `tasks/docker.toml` -- Docker build task definitions
+- `tasks/cluster.toml` -- Cluster lifecycle task definitions
+- `tasks/scripts/docker-build-component.sh` -- Generic component image builder
+- `tasks/scripts/docker-build-cluster.sh` -- Cluster image builder
+- `tasks/scripts/docker-publish-multiarch.sh` -- Multi-arch publish script
+- `tasks/scripts/cluster-bootstrap.sh` -- Full local cluster bootstrap
+- `tasks/scripts/cluster-deploy-fast.sh` -- Incremental deploy script
+- `tasks/scripts/cluster-push-component.sh` -- Single component push to registry
diff --git a/architecture/cluster-single-node.md b/architecture/cluster-single-node.md
index b4ab676b..be4de4eb 100644
--- a/architecture/cluster-single-node.md
+++ b/architecture/cluster-single-node.md
@@ -60,12 +60,11 @@ Development task entrypoints split bootstrap behavior:
 
 | Task | Behavior |
 |---|---|
-| `mise run cluster` | Fast recreate path: destroys existing local cluster resources for `CLUSTER_NAME` (if present), removes conflicting local NemoClaw clusters that occupy host port `6443`, pushes prebuilt local component images to the local registry, and deploys using local-registry image refs (`127.0.0.1:5000/navigator/*`) |
-| `mise run cluster:build` | Iterative deploy path: detects changed files and rebuilds/pushes only impacted components |
+| `mise run cluster` | Bootstrap or incremental deploy: creates cluster if needed (fast recreate), then detects changed files and rebuilds/pushes only impacted components |
 | `mise run cluster:build:full` | Full build path: builds cluster/server/sandbox images, pushes components, then deploys (preferred in CI) |
 
 For `mise run cluster`, `.env` acts as local source-of-truth for `CLUSTER_NAME`, `GATEWAY_PORT`, and `NEMOCLAW_CLUSTER`. Missing keys are appended; existing values are preserved. If `GATEWAY_PORT` is missing, the task selects a free local port and persists it.
-Fast mode ensures a local registry (`127.0.0.1:5000`) is running and configures k3s to mirror pulls via `host.docker.internal:5000`, so `cluster` and `cluster:build` can push/pull local component images consistently.
+Fast mode ensures a local registry (`127.0.0.1:5000`) is running and configures k3s to mirror pulls via `host.docker.internal:5000`, so the cluster task can push/pull local component images consistently.
 
 ## Bootstrap Sequence Diagram
 
diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md
index 82cfd9d3..3ec8d30b 100644
--- a/architecture/inference-routing.md
+++ b/architecture/inference-routing.md
@@ -20,7 +20,7 @@ The inference routing system transparently intercepts AI inference API calls fro
 | `proto/inference.proto` | Protobuf definitions: `InferenceRoute`, `InferenceRouteSpec`, `GetSandboxInferenceBundle` RPC, CRUD RPCs |
 | `proto/sandbox.proto` | `InferencePolicy` message (field on `SandboxPolicy`) |
 | `crates/navigator-sandbox/src/main.rs` | Sandbox binary CLI: `--inference-routes` / `NEMOCLAW_INFERENCE_ROUTES` flag definition |
-| `build/ci.toml` | `[sandbox]` task: mounts `inference-routes.yaml`, sets env vars for dev sandbox |
+| `tasks/ci.toml` | `[sandbox]` task: mounts `inference-routes.yaml`, sets env vars for dev sandbox |
 | `inference-routes.yaml` | Default standalone routes for dev sandbox (NVIDIA API endpoint) |
 | `dev-sandbox-policy.rego` | `network_action` Rego rule -- tri-state decision logic |
 
@@ -563,7 +563,7 @@ The `create` and `update` commands perform protocol auto-detection when `--proto
 
 ## Dev Sandbox Workflow
 
-**File:** `build/cluster.toml` (task `["cluster:sandbox"]`), `inference-routes.yaml` (repo root)
+**File:** `tasks/cluster.toml` (task `["cluster:sandbox"]`), `inference-routes.yaml` (repo root)
 
 Running `mise run cluster:sandbox` starts a standalone sandbox container with inference routing pre-configured. The task mounts three files into the container:
 
diff --git a/deploy/docker/Dockerfile.ci b/deploy/docker/Dockerfile.ci
index ab5ca680..4bd2a595 100644
--- a/deploy/docker/Dockerfile.ci
+++ b/deploy/docker/Dockerfile.ci
@@ -60,9 +60,9 @@ RUN case "$TARGETARCH" in \
 # Install mise
 RUN curl https://mise.run | sh
 
-# Copy mise.toml and build task includes, then install all tools via mise
+# Copy mise.toml and task includes, then install all tools via mise
 COPY mise.toml /opt/mise/mise.toml
-COPY build/ /opt/mise/build/
+COPY tasks/ /opt/mise/tasks/
 WORKDIR /opt/mise
 ARG MISE_GITHUB_TOKEN
 RUN mise trust /opt/mise/mise.toml && \
diff --git a/mise.toml b/mise.toml
index abbb5c88..34baf1d7 100644
--- a/mise.toml
+++ b/mise.toml
@@ -4,7 +4,7 @@
 # NemoClaw mise configuration
 # See https://mise.jdx.dev/ for documentation
 #
-# Tasks are defined in build/*.toml — run `mise tasks` to list them all.
+# Tasks are defined in tasks/*.toml — run `mise tasks` to list them all.
 
 redactions = ["*_TOKEN", "*_PASSWORD"]
 
@@ -43,7 +43,7 @@ DOCKER_BUILDKIT = "1"
 
 [vars]
 # Python paths to include in formatting/linting
-python_paths = "python/ build/scripts/*.py"
+python_paths = "python/ tasks/scripts/*.py"
 
 [task_config]
-includes = ["build/*.toml"]
+includes = ["tasks/*.toml"]
diff --git a/build/ci.toml b/tasks/ci.toml
similarity index 76%
rename from build/ci.toml
rename to tasks/ci.toml
index cefd5615..75db9fee 100644
--- a/build/ci.toml
+++ b/tasks/ci.toml
@@ -26,12 +26,9 @@ depends = ["ci"]
 hide = true
 
 [sandbox]
-description = "Alias for cluster:sandbox"
-depends = ["docker:build:sandbox"]
+description = "Create a sandbox on the running cluster"
 raw = true
 usage = """
-flag "-e --env <env>" var=#true help="Environment variables to pass into the sandbox"
-arg "[command]" var=#true help="Command to run in the sandbox (default: /bin/bash)"
+arg "[command]" var=#true help="Command to run in the sandbox (default: interactive agent)"
 """
-run = "bash build/scripts/run-sandbox.sh"
-hide = true
+run = "ncl sandbox create -- ${usage_command:-claude}"
diff --git a/build/cluster.toml b/tasks/cluster.toml
similarity index 61%
rename from build/cluster.toml
rename to tasks/cluster.toml
index f514f605..a0beb00d 100644
--- a/build/cluster.toml
+++ b/tasks/cluster.toml
@@ -4,12 +4,8 @@
 # Cluster bootstrap and deploy tasks
 
 [cluster]
-description = "Fast local cluster recreate using prebuilt images"
-run = "build/scripts/cluster-bootstrap.sh fast"
-
-["cluster:build"]
-description = "Incremental deploy: rebuild changed components and skip unnecessary helm work"
-run = "build/scripts/cluster-deploy-fast.sh"
+description = "Bootstrap or incremental deploy (creates cluster if needed, rebuilds changed components)"
+run = "tasks/scripts/cluster.sh"
 
 ["cluster:build:full"]
 description = "Build and deploy local k3s cluster with NemoClaw"
@@ -17,7 +13,7 @@ depends = [
   "docker:build:server",
   "docker:build:sandbox",
 ]
-run = "build/scripts/cluster-bootstrap.sh build"
+run = "tasks/scripts/cluster-bootstrap.sh build"
 hide = true
 
 ["cluster:sandbox"]
@@ -28,29 +24,29 @@ usage = """
 flag "-e --env <env>" var=#true help="Environment variables to pass into the sandbox"
 arg "[command]" var=#true help="Command to run in the sandbox (default: /bin/bash)"
 """
-run = "bash build/scripts/run-sandbox.sh"
+run = "bash tasks/scripts/run-sandbox.sh"
 
 ["cluster:deploy"]
-description = "Alias for cluster:build (incremental deploy)"
-run = "build/scripts/cluster-deploy-fast.sh"
+description = "Alias for cluster (incremental deploy)"
+run = "tasks/scripts/cluster.sh"
 hide = true
 
 ["cluster:deploy:sandbox"]
 description = "Fast deploy sandbox-only changes"
-run = "build/scripts/cluster-deploy-fast.sh sandbox"
+run = "tasks/scripts/cluster-deploy-fast.sh sandbox"
 hide = true
 
 ["cluster:deploy:all"]
 description = "Pull-mode deploy using local registry pushes"
-run = "build/scripts/cluster-deploy-fast.sh all"
+run = "tasks/scripts/cluster-deploy-fast.sh all"
 hide = true
 
 ["cluster:push:server"]
 description = "Tag and push server image to pull registry"
-run = "build/scripts/cluster-push-component.sh server"
+run = "tasks/scripts/cluster-push-component.sh server"
 hide = true
 
 ["cluster:push:sandbox"]
 description = "Tag and push sandbox image to pull registry"
-run = "build/scripts/cluster-push-component.sh sandbox"
+run = "tasks/scripts/cluster-push-component.sh sandbox"
 hide = true
diff --git a/build/docker.toml b/tasks/docker.toml
similarity index 74%
rename from build/docker.toml
rename to tasks/docker.toml
index dae126ea..32d09bb1 100644
--- a/build/docker.toml
+++ b/tasks/docker.toml
@@ -14,36 +14,36 @@ hide = true
 
 ["docker:build:ci"]
 description = "Build the CI Docker image"
-run = "build/scripts/docker-build-component.sh ci"
+run = "tasks/scripts/docker-build-component.sh ci"
 hide = true
 
 ["docker:build:sandbox"]
 description = "Build the sandbox Docker image (base variant)"
-run = "build/scripts/docker-build-component.sh sandbox --build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE:-debug}"
+run = "tasks/scripts/docker-build-component.sh sandbox --build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE:-debug}"
 hide = true
 
 ["docker:build:sandbox:nvidia"]
 description = "Build the NVIDIA GPU sandbox Docker image"
 depends = ["docker:build:sandbox"]
-run = "build/scripts/docker-build-component.sh sandbox nvidia --build-arg BASE_IMAGE=navigator/sandbox:${IMAGE_TAG:-dev}"
+run = "tasks/scripts/docker-build-component.sh sandbox nvidia --build-arg BASE_IMAGE=navigator/sandbox:${IMAGE_TAG:-dev}"
 hide = true
 
 ["docker:build:server"]
 description = "Build the server Docker image"
-run = "build/scripts/docker-build-component.sh server"
+run = "tasks/scripts/docker-build-component.sh server"
 hide = true
 
 ["docker:build:cluster"]
 description = "Build the k3s cluster image (component images pulled at runtime from registry)"
-run = "build/scripts/docker-build-cluster.sh"
+run = "tasks/scripts/docker-build-cluster.sh"
 hide = true
 
 ["docker:build:cluster:multiarch"]
 description = "Build multi-arch cluster image and push to a registry"
-run = "build/scripts/docker-publish-multiarch.sh --mode registry"
+run = "tasks/scripts/docker-publish-multiarch.sh --mode registry"
 hide = true
 
 ["docker:publish:cluster:multiarch"]
 description = "Build and publish multi-arch cluster image to ECR"
-run = "build/scripts/docker-publish-multiarch.sh --mode ecr"
+run = "tasks/scripts/docker-publish-multiarch.sh --mode ecr"
 hide = true
diff --git a/build/gator.toml b/tasks/gator.toml
similarity index 100%
rename from build/gator.toml
rename to tasks/gator.toml
diff --git a/build/helm.toml b/tasks/helm.toml
similarity index 100%
rename from build/helm.toml
rename to tasks/helm.toml
diff --git a/build/license.toml b/tasks/license.toml
similarity index 100%
rename from build/license.toml
rename to tasks/license.toml
diff --git a/build/publish.toml b/tasks/publish.toml
similarity index 64%
rename from build/publish.toml
rename to tasks/publish.toml
index 422cef99..b4ac7955 100644
--- a/build/publish.toml
+++ b/tasks/publish.toml
@@ -8,10 +8,10 @@ description = "Build and publish Python wheels"
 run = """
 #!/usr/bin/env bash
 set -euo pipefail
-VERSION=$(uv run python build/scripts/release.py get-version --python)
-CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
+VERSION=$(uv run python tasks/scripts/release.py get-version --python)
+CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:all
-uv run python build/scripts/release.py python-publish --version "$VERSION"
+uv run python tasks/scripts/release.py python-publish --version "$VERSION"
 """
 hide = true
 
@@ -20,10 +20,10 @@ description = "Build and publish macOS arm64 Python wheel"
 run = """
 #!/usr/bin/env bash
 set -euo pipefail
-VERSION=$(uv run python build/scripts/release.py get-version --python)
-CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
+VERSION=$(uv run python tasks/scripts/release.py get-version --python)
+CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:macos
-uv run python build/scripts/release.py python-publish --version "$VERSION" --wheel-glob "*macosx*arm64.whl"
+uv run python tasks/scripts/release.py python-publish --version "$VERSION" --wheel-glob "*macosx*arm64.whl"
 """
 hide = true
 
@@ -32,13 +32,13 @@ description = "Main branch publish job (images with :dev, :latest, and version t
 run = """
 #!/usr/bin/env bash
 set -euo pipefail
-VERSION_DOCKER=$(uv run python build/scripts/release.py get-version --docker)
-VERSION_PYTHON=$(uv run python build/scripts/release.py get-version --python)
-CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
+VERSION_DOCKER=$(uv run python tasks/scripts/release.py get-version --docker)
+VERSION_PYTHON=$(uv run python tasks/scripts/release.py get-version --python)
+CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
 IMAGE_TAG=dev TAG_LATEST=true EXTRA_DOCKER_TAGS="$VERSION_DOCKER" mise run docker:publish:cluster:multiarch
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:multiarch
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:macos
-uv run python build/scripts/release.py python-publish --version "$VERSION_PYTHON"
+uv run python tasks/scripts/release.py python-publish --version "$VERSION_PYTHON"
 """
 hide = true
 
@@ -47,12 +47,12 @@ description = "Tag release publish: versioned Docker to ECR and Python to GitHub
 run = """
 #!/usr/bin/env bash
 set -euo pipefail
-VERSION_DOCKER=$(uv run python build/scripts/release.py get-version --docker)
-VERSION_PYTHON=$(uv run python build/scripts/release.py get-version --python)
-CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
+VERSION_DOCKER=$(uv run python tasks/scripts/release.py get-version --docker)
+VERSION_PYTHON=$(uv run python tasks/scripts/release.py get-version --python)
+CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
 IMAGE_TAG="$VERSION_DOCKER" TAG_LATEST=false mise run docker:publish:cluster:multiarch
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:multiarch
 NEMOCLAW_CARGO_VERSION="$CARGO_VERSION" mise run python:build:macos
-uv run python build/scripts/release.py python-publish --version "$VERSION_PYTHON"
+uv run python tasks/scripts/release.py python-publish --version "$VERSION_PYTHON"
 """
 hide = true
diff --git a/build/python.toml b/tasks/python.toml
similarity index 98%
rename from build/python.toml
rename to tasks/python.toml
index c5d6412e..fd4fab37 100644
--- a/build/python.toml
+++ b/tasks/python.toml
@@ -41,7 +41,7 @@ sha256_16_stdin() {
 PLATFORMS=${DOCKER_PLATFORMS:-linux/amd64,linux/arm64}
 CARGO_VERSION=${NEMOCLAW_CARGO_VERSION:-}
 if [ -z "$CARGO_VERSION" ]; then
-  CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
+  CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
 fi
 
 LOCK_HASH=$(sha256_16 Cargo.lock)
@@ -156,7 +156,7 @@ sha256_16_stdin() {
 CARGO_VERSION=${NEMOCLAW_CARGO_VERSION:-}
 OSXCROSS_IMAGE_REF=${OSXCROSS_IMAGE:-crazymax/osxcross:latest}
 if [ -z "$CARGO_VERSION" ]; then
-  CARGO_VERSION=$(uv run python build/scripts/release.py get-version --cargo)
+  CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
 fi
 
 LOCK_HASH=$(sha256_16 Cargo.lock)
diff --git a/build/rust.toml b/tasks/rust.toml
similarity index 100%
rename from build/rust.toml
rename to tasks/rust.toml
diff --git a/build/scripts/cluster-bootstrap.sh b/tasks/scripts/cluster-bootstrap.sh
similarity index 99%
rename from build/scripts/cluster-bootstrap.sh
rename to tasks/scripts/cluster-bootstrap.sh
index 5f79e1d8..1149ab66 100755
--- a/build/scripts/cluster-bootstrap.sh
+++ b/tasks/scripts/cluster-bootstrap.sh
@@ -217,7 +217,7 @@ if [ "${SKIP_IMAGE_PUSH:-}" = "1" ]; then
   echo "Skipping image push (SKIP_IMAGE_PUSH=1; images already in registry)."
 elif [ "${MODE}" = "build" ] || [ "${MODE}" = "fast" ]; then
   for component in server sandbox; do
-    build/scripts/cluster-push-component.sh "${component}"
+    tasks/scripts/cluster-push-component.sh "${component}"
   done
 fi
 
diff --git a/build/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh
similarity index 97%
rename from build/scripts/cluster-deploy-fast.sh
rename to tasks/scripts/cluster-deploy-fast.sh
index 51c65916..7a8cfe88 100755
--- a/build/scripts/cluster-deploy-fast.sh
+++ b/tasks/scripts/cluster-deploy-fast.sh
@@ -256,19 +256,19 @@ sandbox_pid=""
 
 if [[ "${build_server}" == "1" ]]; then
   if [[ "${build_sandbox}" == "1" ]]; then
-    build/scripts/docker-build-component.sh server &
+    tasks/scripts/docker-build-component.sh server &
     server_pid=$!
   else
-    build/scripts/docker-build-component.sh server
+    tasks/scripts/docker-build-component.sh server
   fi
 fi
 
 if [[ "${build_sandbox}" == "1" ]]; then
   if [[ -n "${server_pid}" ]]; then
-    build/scripts/docker-build-component.sh sandbox --build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE} &
+    tasks/scripts/docker-build-component.sh sandbox --build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE} &
     sandbox_pid=$!
   else
-    build/scripts/docker-build-component.sh sandbox --build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE}
+    tasks/scripts/docker-build-component.sh sandbox --build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE}
   fi
 fi
 
diff --git a/build/scripts/cluster-push-component.sh b/tasks/scripts/cluster-push-component.sh
similarity index 97%
rename from build/scripts/cluster-push-component.sh
rename to tasks/scripts/cluster-push-component.sh
index bb308f75..5858ecb7 100755
--- a/build/scripts/cluster-push-component.sh
+++ b/tasks/scripts/cluster-push-component.sh
@@ -49,7 +49,7 @@ if [ -z "${resolved_source_image}" ]; then
   done
   echo "build it first with either:" >&2
   echo "  mise run docker:build:${component}" >&2
-  echo "  mise run cluster:build:full" >&2
+  echo "  mise run cluster" >&2
   exit 1
 fi
 
diff --git a/tasks/scripts/cluster.sh b/tasks/scripts/cluster.sh
new file mode 100755
index 00000000..5cb3f5f3
--- /dev/null
+++ b/tasks/scripts/cluster.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Unified cluster entrypoint: bootstrap if no cluster is running, then
+# incremental deploy.
+
+set -euo pipefail
+
+CLUSTER_NAME=${CLUSTER_NAME:-$(basename "$PWD")}
+CONTAINER_NAME="navigator-cluster-${CLUSTER_NAME}"
+
+if ! docker ps -q --filter "name=${CONTAINER_NAME}" | grep -q .; then
+  echo "No running cluster found. Bootstrapping..."
+  exec tasks/scripts/cluster-bootstrap.sh fast
+fi
+
+exec tasks/scripts/cluster-deploy-fast.sh "$@"
diff --git a/build/scripts/docker-build-cluster.sh b/tasks/scripts/docker-build-cluster.sh
similarity index 100%
rename from build/scripts/docker-build-cluster.sh
rename to tasks/scripts/docker-build-cluster.sh
diff --git a/build/scripts/docker-build-component.sh b/tasks/scripts/docker-build-component.sh
similarity index 100%
rename from build/scripts/docker-build-component.sh
rename to tasks/scripts/docker-build-component.sh
diff --git a/build/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
similarity index 100%
rename from build/scripts/docker-publish-multiarch.sh
rename to tasks/scripts/docker-publish-multiarch.sh
diff --git a/build/scripts/release.py b/tasks/scripts/release.py
similarity index 100%
rename from build/scripts/release.py
rename to tasks/scripts/release.py
diff --git a/build/scripts/run-sandbox.sh b/tasks/scripts/run-sandbox.sh
similarity index 100%
rename from build/scripts/run-sandbox.sh
rename to tasks/scripts/run-sandbox.sh
diff --git a/build/test.toml b/tasks/test.toml
similarity index 90%
rename from build/test.toml
rename to tasks/test.toml
index 891e651d..f086260e 100644
--- a/build/test.toml
+++ b/tasks/test.toml
@@ -25,25 +25,25 @@ hide = true
 
 ["test:e2e:sandbox"]
 description = "Run sandbox end-to-end tests"
-depends = ["python:proto", "cluster:build"]
+depends = ["python:proto", "cluster"]
 env = { UV_NO_SYNC = "1", PYTHONPATH = "python" }
 run = "uv run pytest -o python_files='test_*.py' e2e/python"
 hide = true
 
 ["test:e2e:port-forward"]
 description = "Run port-forward integration test"
-depends = ["cluster:build"]
+depends = ["cluster"]
 run = "bash e2e/bash/test_port_forward.sh"
 hide = true
 
 ["test:e2e:custom-image"]
 description = "Run custom image build and sandbox e2e test"
-depends = ["cluster:build"]
+depends = ["cluster"]
 run = "bash e2e/bash/test_sandbox_custom_image.sh"
 hide = true
 
 ["test:e2e:sync"]
 description = "Run sandbox file sync e2e test"
-depends = ["cluster:build"]
+depends = ["cluster"]
 run = "bash e2e/bash/test_sandbox_sync.sh"
 hide = true
diff --git a/build/version.toml b/tasks/version.toml
similarity index 71%
rename from build/version.toml
rename to tasks/version.toml
index 8f0acb4b..49a8e09b 100644
--- a/build/version.toml
+++ b/tasks/version.toml
@@ -5,9 +5,9 @@
 
 ["version"]
 description = "Print git-derived version"
-run = "uv run python build/scripts/release.py get-version"
+run = "uv run python tasks/scripts/release.py get-version"
 
 ["version:print"]
 description = "Print git-derived version"
-run = "uv run python build/scripts/release.py get-version"
+run = "uv run python tasks/scripts/release.py get-version"
 hide = true

From 1518e98f4bcb43ca3af01ec503456313b3e6ac5d Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:03:56 -0800
Subject: [PATCH 03/14] chore: hide fmt, lint, and check tasks (subsumed by ci)

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md | 5 +----
 tasks/ci.toml   | 2 ++
 tasks/rust.toml | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 583c7851..b6259a7a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -63,12 +63,9 @@ These are the primary `mise` tasks for day-to-day development:
 | `mise run cluster` | Bootstrap or incremental deploy |
 | `mise run sandbox` | Create a sandbox on the running cluster |
 | `mise run cluster:sandbox` | Run sandbox container interactively |
-| `mise run fmt` | Format Rust and Python |
-| `mise run lint` | Repository lint checks |
-| `mise run check` | Fast compile and type checks |
 | `mise run test` | Default test suite |
 | `mise run test:e2e` | Default end-to-end test lane |
-| `mise run ci` | Full local CI checks |
+| `mise run ci` | Full local CI checks (lint, compile/type checks, tests) |
 | `mise run clean` | Clean build artifacts |
 
 ## Project Structure
diff --git a/tasks/ci.toml b/tasks/ci.toml
index 75db9fee..caf960ed 100644
--- a/tasks/ci.toml
+++ b/tasks/ci.toml
@@ -6,10 +6,12 @@
 [fmt]
 description = "Format Rust and Python code"
 depends = ["rust:format", "python:format"]
+hide = true
 
 [lint]
 description = "Run repository lint checks"
 depends = ["license:check", "rust:format:check", "rust:lint", "python:format:check", "python:lint", "helm:lint"]
+hide = true
 
 [ci]
 description = "Run full checks (lint, compile/type checks, and tests)"
diff --git a/tasks/rust.toml b/tasks/rust.toml
index 5f12084c..8d708f46 100644
--- a/tasks/rust.toml
+++ b/tasks/rust.toml
@@ -16,6 +16,7 @@ hide = true
 [check]
 description = "Run fast compile and type checks"
 depends = ["rust:check", "python:typecheck"]
+hide = true
 
 ["rust:check"]
 description = "Check all Rust crates for errors"

From 304f7d0ad4c7eca44a624d8a215bc7b139a6a194 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:04:24 -0800
Subject: [PATCH 04/14] chore: hide cluster:sandbox, use sandbox as public
 entrypoint

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md    | 1 -
 tasks/cluster.toml | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b6259a7a..8dabeea1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -62,7 +62,6 @@ These are the primary `mise` tasks for day-to-day development:
 |---|---|
 | `mise run cluster` | Bootstrap or incremental deploy |
 | `mise run sandbox` | Create a sandbox on the running cluster |
-| `mise run cluster:sandbox` | Run sandbox container interactively |
 | `mise run test` | Default test suite |
 | `mise run test:e2e` | Default end-to-end test lane |
 | `mise run ci` | Full local CI checks (lint, compile/type checks, tests) |
diff --git a/tasks/cluster.toml b/tasks/cluster.toml
index a0beb00d..46ba4393 100644
--- a/tasks/cluster.toml
+++ b/tasks/cluster.toml
@@ -25,6 +25,7 @@ flag "-e --env <env>" var=#true help="Environment variables to pass into the san
 arg "[command]" var=#true help="Command to run in the sandbox (default: /bin/bash)"
 """
 run = "bash tasks/scripts/run-sandbox.sh"
+hide = true
 
 ["cluster:deploy"]
 description = "Alias for cluster (incremental deploy)"

From eb5598aa720bb29c1020cdc68862b1eac11b6356 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:08:49 -0800
Subject: [PATCH 05/14] chore: rename test:e2e to e2e as public task

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md | 2 +-
 tasks/test.toml | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8dabeea1..12e41779 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -63,7 +63,7 @@ These are the primary `mise` tasks for day-to-day development:
 | `mise run cluster` | Bootstrap or incremental deploy |
 | `mise run sandbox` | Create a sandbox on the running cluster |
 | `mise run test` | Default test suite |
-| `mise run test:e2e` | Default end-to-end test lane |
+| `mise run e2e` | Default end-to-end test lane |
 | `mise run ci` | Full local CI checks (lint, compile/type checks, tests) |
 | `mise run clean` | Clean build artifacts |
 
diff --git a/tasks/test.toml b/tasks/test.toml
index f086260e..9408d91f 100644
--- a/tasks/test.toml
+++ b/tasks/test.toml
@@ -7,10 +7,15 @@
 description = "Run all tests (Rust + Python)"
 depends = ["test:rust", "test:python"]
 
-["test:e2e"]
+[e2e]
 description = "Run default end-to-end test lane"
 depends = ["test:e2e:sandbox"]
 
+["test:e2e"]
+description = "Alias for e2e"
+depends = ["e2e"]
+hide = true
+
 ["test:rust"]
 description = "Run Rust tests"
 run = "cargo test --workspace"

From a2feba0a17cc3c245e6434b74eaec2c35eb4f292 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:12:12 -0800
Subject: [PATCH 06/14] chore: hide version task and align public tasks with
 docs

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md    | 44 ++++++++++++++++++++++----------------------
 tasks/version.toml |  1 +
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 12e41779..711bb079 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -22,6 +22,7 @@ echo 'eval "$(~/.local/bin/mise activate zsh)"' >> ~/.zshrc
 ```
 
 Project requirements:
+
 - Rust 1.88+
 - Python 3.12+
 - Docker (running)
@@ -41,9 +42,8 @@ mise run sandbox
 
 ## `ncl` Shortcut
 
-Inside this repository, `ncl` is a local shortcut script at `scripts/bin/ncl`.
+Inside this repository, `ncl` is a local shortcut script at `scripts/bin/ncl`. The script will
 
-It:
 1. Builds `navigator-cli` if needed.
 2. Runs the local debug CLI binary (`target/debug/nemoclaw`).
 
@@ -58,29 +58,29 @@ ncl sandbox create -- codex
 
 These are the primary `mise` tasks for day-to-day development:
 
-| Task | Purpose |
-|---|---|
-| `mise run cluster` | Bootstrap or incremental deploy |
-| `mise run sandbox` | Create a sandbox on the running cluster |
-| `mise run test` | Default test suite |
-| `mise run e2e` | Default end-to-end test lane |
-| `mise run ci` | Full local CI checks (lint, compile/type checks, tests) |
-| `mise run clean` | Clean build artifacts |
+| Task               | Purpose                                                 |
+| ------------------ | ------------------------------------------------------- |
+| `mise run cluster` | Bootstrap or incremental deploy                         |
+| `mise run sandbox` | Create a sandbox on the running cluster                 |
+| `mise run test`    | Default test suite                                      |
+| `mise run e2e`     | Default end-to-end test lane                            |
+| `mise run ci`      | Full local CI checks (lint, compile/type checks, tests) |
+| `mise run clean`   | Clean build artifacts                                   |
 
 ## Project Structure
 
-| Path | Purpose |
-|---|---|
-| `crates/navigator-core/` | Shared core library |
-| `crates/navigator-server/` | Gateway/control plane server |
-| `crates/navigator-sandbox/` | Sandbox runtime |
-| `crates/navigator-bootstrap/` | Cluster bootstrap logic |
-| `crates/navigator-cli/` | CLI implementation |
-| `python/` | Python SDK/bindings |
-| `proto/` | Protocol buffer definitions |
-| `tasks/` | `mise` tasks and build scripts |
-| `deploy/` | Dockerfiles, Helm chart, Kubernetes manifests |
-| `architecture/` | Architecture docs and plans |
+| Path                          | Purpose                                       |
+| ----------------------------- | --------------------------------------------- |
+| `crates/navigator-core/`      | Shared core library                           |
+| `crates/navigator-server/`    | Gateway/control plane server                  |
+| `crates/navigator-sandbox/`   | Sandbox runtime                               |
+| `crates/navigator-bootstrap/` | Cluster bootstrap logic                       |
+| `crates/navigator-cli/`       | CLI implementation                            |
+| `python/`                     | Python SDK/bindings                           |
+| `proto/`                      | Protocol buffer definitions                   |
+| `tasks/`                      | `mise` tasks and build scripts                |
+| `deploy/`                     | Dockerfiles, Helm chart, Kubernetes manifests |
+| `architecture/`               | Architecture docs and plans                   |
 
 ## Pull Requests
 
diff --git a/tasks/version.toml b/tasks/version.toml
index 49a8e09b..e0486279 100644
--- a/tasks/version.toml
+++ b/tasks/version.toml
@@ -6,6 +6,7 @@
 ["version"]
 description = "Print git-derived version"
 run = "uv run python tasks/scripts/release.py get-version"
+hide = true
 
 ["version:print"]
 description = "Print git-derived version"

From 4de705071c0379d9369fede988c3a9a02a71cfdd Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:13:26 -0800
Subject: [PATCH 07/14] docs: use tree-style project structure in
 CONTRIBUTING.md

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 711bb079..f3f1c920 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -69,18 +69,17 @@ These are the primary `mise` tasks for day-to-day development:
 
 ## Project Structure
 
-| Path                          | Purpose                                       |
-| ----------------------------- | --------------------------------------------- |
-| `crates/navigator-core/`      | Shared core library                           |
-| `crates/navigator-server/`    | Gateway/control plane server                  |
-| `crates/navigator-sandbox/`   | Sandbox runtime                               |
-| `crates/navigator-bootstrap/` | Cluster bootstrap logic                       |
-| `crates/navigator-cli/`       | CLI implementation                            |
-| `python/`                     | Python SDK/bindings                           |
-| `proto/`                      | Protocol buffer definitions                   |
-| `tasks/`                      | `mise` tasks and build scripts                |
-| `deploy/`                     | Dockerfiles, Helm chart, Kubernetes manifests |
-| `architecture/`               | Architecture docs and plans                   |
+```
+crates/                  # Rust crates
+python/                  # Python SDK and bindings
+proto/                   # Protocol buffer definitions
+tasks/                   # mise task definitions and build scripts
+deploy/
+  docker/                # Dockerfiles (sandbox, server, cluster, CI, wheels)
+  helm/navigator/        # Helm chart for the gateway
+  kube/manifests/        # Kubernetes manifests for k3s auto-deploy
+architecture/            # Architecture docs and plans
+```
 
 ## Pull Requests
 

From 51e88f4c5e0831f10e88b59643eb35a77616bd21 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:13:58 -0800
Subject: [PATCH 08/14] docs: enumerate crates and python in project structure

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f3f1c920..231c3e30 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -70,8 +70,18 @@ These are the primary `mise` tasks for day-to-day development:
 ## Project Structure
 
 ```
-crates/                  # Rust crates
-python/                  # Python SDK and bindings
+crates/
+  navigator-cli/         # CLI implementation
+  navigator-server/      # Gateway / control-plane server
+  navigator-sandbox/     # Sandbox runtime
+  navigator-bootstrap/   # Cluster bootstrap logic
+  navigator-core/        # Shared core library
+  navigator-policy/      # Policy engine
+  navigator-providers/   # Agent provider integrations
+  navigator-router/      # Inference routing
+  navigator-tui/         # Terminal UI (Gator)
+python/
+  navigator/             # Python SDK and sandbox helpers
 proto/                   # Protocol buffer definitions
 tasks/                   # mise task definitions and build scripts
 deploy/

From db8e7fa8a68fbd4e5d9c28592c523ae98bfdab59 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:14:20 -0800
Subject: [PATCH 09/14] docs: simplify project structure to top-level table

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 231c3e30..ec2546d8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -69,27 +69,14 @@ These are the primary `mise` tasks for day-to-day development:
 
 ## Project Structure
 
-```
-crates/
-  navigator-cli/         # CLI implementation
-  navigator-server/      # Gateway / control-plane server
-  navigator-sandbox/     # Sandbox runtime
-  navigator-bootstrap/   # Cluster bootstrap logic
-  navigator-core/        # Shared core library
-  navigator-policy/      # Policy engine
-  navigator-providers/   # Agent provider integrations
-  navigator-router/      # Inference routing
-  navigator-tui/         # Terminal UI (Gator)
-python/
-  navigator/             # Python SDK and sandbox helpers
-proto/                   # Protocol buffer definitions
-tasks/                   # mise task definitions and build scripts
-deploy/
-  docker/                # Dockerfiles (sandbox, server, cluster, CI, wheels)
-  helm/navigator/        # Helm chart for the gateway
-  kube/manifests/        # Kubernetes manifests for k3s auto-deploy
-architecture/            # Architecture docs and plans
-```
+| Path | Purpose |
+|---|---|
+| `crates/` | Rust crates (CLI, server, sandbox, bootstrap, core, policy, providers, router, TUI) |
+| `python/` | Python SDK and bindings |
+| `proto/` | Protocol buffer definitions |
+| `tasks/` | `mise` task definitions and build scripts |
+| `deploy/` | Dockerfiles, Helm chart, Kubernetes manifests |
+| `architecture/` | Architecture docs and plans |
 
 ## Pull Requests
 

From 8fcf02d1d5ac7cdf611e3352e1a071a11c7119c1 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:26:20 -0800
Subject: [PATCH 10/14] chore: sandbox task depends on cluster, simplify
 getting started

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md | 5 +----
 tasks/ci.toml   | 1 +
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ec2546d8..c1a27993 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -33,10 +33,7 @@ Project requirements:
 # One-time trust
 mise trust
 
-# Bootstrap or incremental deploy
-mise run cluster
-
-# Launch a sandbox
+# Launch a sandbox (deploys a cluster if one isn't running)
 mise run sandbox
 ```
 
diff --git a/tasks/ci.toml b/tasks/ci.toml
index caf960ed..5793264d 100644
--- a/tasks/ci.toml
+++ b/tasks/ci.toml
@@ -29,6 +29,7 @@ hide = true
 
 [sandbox]
 description = "Create a sandbox on the running cluster"
+depends = ["cluster"]
 raw = true
 usage = """
 arg "[command]" var=#true help="Command to run in the sandbox (default: interactive agent)"

From 0b523cb718e76452c201474c57478cd0a4a51876 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:30:55 -0800
Subject: [PATCH 11/14] chore: sandbox task checks for running cluster instead
 of depending on it

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 tasks/ci.toml | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tasks/ci.toml b/tasks/ci.toml
index 5793264d..00841987 100644
--- a/tasks/ci.toml
+++ b/tasks/ci.toml
@@ -29,9 +29,17 @@ hide = true
 
 [sandbox]
 description = "Create a sandbox on the running cluster"
-depends = ["cluster"]
 raw = true
 usage = """
 arg "[command]" var=#true help="Command to run in the sandbox (default: interactive agent)"
 """
-run = "ncl sandbox create -- ${usage_command:-claude}"
+run = """
+#!/usr/bin/env bash
+set -euo pipefail
+CLUSTER_NAME=${CLUSTER_NAME:-$(basename "$PWD")}
+CONTAINER_NAME="navigator-cluster-${CLUSTER_NAME}"
+if ! docker ps -q --filter "name=${CONTAINER_NAME}" | grep -q .; then
+  mise run cluster
+fi
+ncl sandbox create -- ${usage_command:-claude}
+"""

From caf11af1991a6044ef575fa07722204100e6dd94 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:35:48 -0800
Subject: [PATCH 12/14] docs: add TESTING.md with test patterns and conventions

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 TESTING.md | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 TESTING.md

diff --git a/TESTING.md b/TESTING.md
new file mode 100644
index 00000000..69d21137
--- /dev/null
+++ b/TESTING.md
@@ -0,0 +1,149 @@
+# Testing
+
+## Running Tests
+
+```bash
+mise run test          # Rust + Python unit tests
+mise run e2e           # End-to-end tests (requires a running cluster)
+mise run ci            # Everything: lint, compile checks, and tests
+```
+
+## Test Layout
+
+```
+crates/*/src/          # Inline #[cfg(test)] modules
+crates/*/tests/        # Rust integration tests
+python/navigator/      # Python unit tests (*_test.py suffix)
+e2e/python/            # Python E2E tests (test_*.py prefix)
+e2e/bash/              # Bash E2E scripts
+```
+
+## Rust Tests
+
+Unit tests live inline with `#[cfg(test)] mod tests` blocks. Integration tests
+go in `crates/*/tests/` and are named `*_integration.rs`.
+
+Use `#[tokio::test]` for anything async:
+
+```rust
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn store_round_trip() {
+        let store = Store::connect("sqlite::memory:").await.unwrap();
+        store.put("sandbox", "abc", "my-sandbox", b"payload").await.unwrap();
+        let record = store.get("sandbox", "abc").await.unwrap().unwrap();
+        assert_eq!(record.payload, b"payload");
+    }
+}
+```
+
+Run Rust tests only:
+
+```bash
+mise run test:rust     # cargo test --workspace
+```
+
+## Python Unit Tests
+
+Python unit tests use the `*_test.py` suffix convention (not `test_*` prefix)
+and live alongside the source in `python/navigator/`. They use mock-based
+patterns with fake gRPC stubs:
+
+```python
+def test_exec_python_serializes_callable_payload() -> None:
+    stub = _FakeStub()
+    client = _client_with_fake_stub(stub)
+
+    def add(a: int, b: int) -> int:
+        return a + b
+
+    result = client.exec_python("sandbox-1", add, args=(2, 3))
+    assert result.exit_code == 0
+```
+
+Run Python unit tests only:
+
+```bash
+mise run test:python   # uv run pytest python/
+```
+
+## E2E Tests
+
+E2E tests run against a live cluster. `mise run e2e` deploys changed components
+before running the suite.
+
+### Python E2E (`e2e/python/`)
+
+Tests use the `sandbox` fixture from `conftest.py` to create real sandboxes:
+
+```python
+def test_exec_returns_stdout(sandbox):
+    with sandbox(delete_on_exit=True) as sb:
+        result = sb.exec(["echo", "hello"])
+        assert result.exit_code == 0
+        assert "hello" in result.stdout
+```
+
+#### `Sandbox.exec_python`
+
+`exec_python` serializes a Python callable with `cloudpickle`, sends it to the
+sandbox, and returns the result. Because cloudpickle serializes module-level
+functions by reference (which fails inside the sandbox), use one of these
+patterns:
+
+**Closures from factory functions:**
+
+```python
+def _make_adder():
+    def add(a, b):
+        return a + b
+    return add
+
+def test_addition(sandbox):
+    with sandbox(delete_on_exit=True) as sb:
+        result = sb.exec_python(_make_adder(), args=(2, 3))
+        assert result.stdout.strip() == "5"
+```
+
+**Bound methods on local classes:**
+
+```python
+def test_multiply(sandbox):
+    class Calculator:
+        def multiply(self, a, b):
+            return a * b
+
+    with sandbox(delete_on_exit=True) as sb:
+        result = sb.exec_python(Calculator().multiply, args=(6, 7))
+        assert result.stdout.strip() == "42"
+```
+
+#### Shared Fixtures (`e2e/python/conftest.py`)
+
+| Fixture | Scope | Purpose |
+|---|---|---|
+| `sandbox_client` | session | gRPC client connected to the active cluster |
+| `sandbox` | function | Factory returning a `Sandbox` context manager |
+| `inference_client` | session | Client for managing inference routes |
+| `mock_inference_route` | session | Creates a mock OpenAI-protocol route for tests |
+
+### Bash E2E (`e2e/bash/`)
+
+Self-contained shell scripts that exercise the CLI directly:
+
+- `test_sandbox_sync.sh` — file sync round-trip
+- `test_sandbox_custom_image.sh` — custom Docker image build and run
+- `test_port_forward.sh` — TCP port forwarding through a sandbox
+
+Pattern: `set -euo pipefail`, cleanup via `trap`, poll-based readiness checks
+parsing CLI output.
+
+## Environment Variables
+
+| Variable | Purpose |
+|---|---|
+| `NEMOCLAW_CLUSTER` | Override active cluster name for E2E tests |
+| `NAV_BIN` | Override `ncl` binary path in bash E2E tests |

From 740cf298064272b202e51d831255d5e44fe8f52d Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 23:36:26 -0800
Subject: [PATCH 13/14] docs: simplify crates description in project structure

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c1a27993..8cdadacf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -68,7 +68,7 @@ These are the primary `mise` tasks for day-to-day development:
 
 | Path | Purpose |
 |---|---|
-| `crates/` | Rust crates (CLI, server, sandbox, bootstrap, core, policy, providers, router, TUI) |
+| `crates/` | Rust crates |
 | `python/` | Python SDK and bindings |
 | `proto/` | Protocol buffer definitions |
 | `tasks/` | `mise` task definitions and build scripts |

From 6efd8ef59bb1aa0f798568d2adc21c7ab324c71b Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Wed, 4 Mar 2026 12:45:11 -0800
Subject: [PATCH 14/14] refactor: rename ncl shortcut script to nemoclaw

Rename scripts/bin/ncl to scripts/bin/nemoclaw so the local dev
shortcut matches the actual binary name. Update all references across
task definitions, shell scripts, e2e tests, architecture docs, examples,
agent skills, and config files.
---
 .../skills/debug-navigator-cluster/SKILL.md   |  14 +-
 .agents/skills/tui-development/SKILL.md       |  14 +-
 .env.example                                  |   2 +-
 CONTRIBUTING.md                               |  12 +-
 TESTING.md                                    |   2 +-
 architecture/README.md                        |  20 +--
 architecture/cluster-single-node.md           |  24 +--
 architecture/sandbox-connect.md               |  18 +--
 architecture/sandbox-custom-containers.md     |  10 +-
 architecture/sandbox-providers.md             |  18 +--
 architecture/system-architecture.md           |   4 +-
 e2e/bash/test_port_forward.sh                 |   6 +-
 e2e/bash/test_sandbox_custom_image.sh         |  10 +-
 e2e/bash/test_sandbox_sync.sh                 |  10 +-
 examples/bring-your-own-container/README.md   |   8 +-
 examples/openclaw.md                          |   2 +-
 scripts/bin/ncl                               |  11 --
 scripts/bin/nemoclaw                          | 143 ++++++++++++++++++
 tasks/gator.toml                              |   4 +-
 tasks/scripts/cluster-bootstrap.sh            |  26 +++-
 tasks/scripts/sandbox.sh                      | 111 ++++++++++++++
 21 files changed, 367 insertions(+), 102 deletions(-)
 delete mode 100755 scripts/bin/ncl
 create mode 100755 scripts/bin/nemoclaw
 create mode 100755 tasks/scripts/sandbox.sh

diff --git a/.agents/skills/debug-navigator-cluster/SKILL.md b/.agents/skills/debug-navigator-cluster/SKILL.md
index 94da5920..238c10cb 100644
--- a/.agents/skills/debug-navigator-cluster/SKILL.md
+++ b/.agents/skills/debug-navigator-cluster/SKILL.md
@@ -1,17 +1,17 @@
 ---
 name: debug-navigator-cluster
-description: Debug why a nemoclaw cluster failed to start or is unhealthy. Use when the user has a failed `ncl cluster admin deploy`, cluster health check failure, or wants to diagnose cluster infrastructure issues. Trigger keywords - debug cluster, cluster failing, cluster not starting, deploy failed, cluster troubleshoot, cluster health, cluster diagnose, why won't my cluster start, health check failed.
+description: Debug why a nemoclaw cluster failed to start or is unhealthy. Use when the user has a failed `nemoclaw cluster admin deploy`, cluster health check failure, or wants to diagnose cluster infrastructure issues. Trigger keywords - debug cluster, cluster failing, cluster not starting, deploy failed, cluster troubleshoot, cluster health, cluster diagnose, why won't my cluster start, health check failed.
 ---
 
 # Debug NemoClaw Cluster
 
-Diagnose why a nemoclaw cluster failed to start after `ncl cluster admin deploy`.
+Diagnose why a nemoclaw cluster failed to start after `nemoclaw cluster admin deploy`.
 
 ## Overview
 
-`ncl cluster admin deploy` creates a Docker container running k3s with the NemoClaw server and Envoy Gateway deployed via Helm. The deployment stages, in order, are:
+`nemoclaw cluster admin deploy` creates a Docker container running k3s with the NemoClaw server and Envoy Gateway deployed via Helm. The deployment stages, in order, are:
 
-1. **Pre-deploy check**: `ncl cluster admin deploy` in interactive mode prompts to **reuse** (keep volume, clean stale nodes) or **recreate** (destroy everything, fresh start). `mise run cluster` always recreates before deploy.
+1. **Pre-deploy check**: `nemoclaw cluster admin deploy` in interactive mode prompts to **reuse** (keep volume, clean stale nodes) or **recreate** (destroy everything, fresh start). `mise run cluster` always recreates before deploy.
 2. Ensure cluster image is available (local build or remote pull)
 3. Create Docker network (`navigator-cluster`) and volume (`navigator-cluster-{name}`)
 4. Create and start a privileged Docker container (`navigator-cluster-{name}`)
@@ -31,7 +31,7 @@ For local deploys, metadata endpoint selection now depends on Docker connectivit
 - default local Docker socket (`unix:///var/run/docker.sock`): `https://127.0.0.1:{port}` (default port 8080)
 - TCP Docker daemon (`DOCKER_HOST=tcp://<host>:<port>`): `https://<host>:{port}` for non-loopback hosts
 
-The host port is configurable via `--port` on `ncl cluster admin deploy` (default 8080) and is stored in `ClusterMetadata.gateway_port`.
+The host port is configurable via `--port` on `nemoclaw cluster admin deploy` (default 8080) and is stored in `ClusterMetadata.gateway_port`.
 
 The TCP host is also added as an extra gateway TLS SAN so mTLS hostname validation succeeds.
 
@@ -40,7 +40,7 @@ The default cluster name is `nemoclaw`. The container is `navigator-cluster-{nam
 ## Prerequisites
 
 - Docker must be running (locally or on the remote host)
-- The `ncl` CLI must be available
+- The `nemoclaw` CLI must be available
 - For remote clusters: SSH access to the remote host
 
 ## Workflow
@@ -331,7 +331,7 @@ docker -H ssh://<host> logs navigator-cluster-<name>
 **Setting up kubectl access** (requires tunnel):
 
 ```bash
-ncl cluster admin tunnel --name <name> --remote <host>
+nemoclaw cluster admin tunnel --name <name> --remote <host>
 # Then in another terminal:
 export KUBECONFIG=~/.config/nemoclaw/clusters/<name>/kubeconfig
 kubectl get pods -A
diff --git a/.agents/skills/tui-development/SKILL.md b/.agents/skills/tui-development/SKILL.md
index 5a1de658..f2986948 100644
--- a/.agents/skills/tui-development/SKILL.md
+++ b/.agents/skills/tui-development/SKILL.md
@@ -9,9 +9,9 @@ Comprehensive reference for any agent working on the Gator TUI.
 
 ## 1. Overview
 
-Gator is a ratatui-based terminal UI for the NemoClaw platform. It provides a keyboard-driven interface for managing clusters, sandboxes, and logs — the same operations available via the `ncl` CLI, but with a live, interactive dashboard.
+Gator is a ratatui-based terminal UI for the NemoClaw platform. It provides a keyboard-driven interface for managing clusters, sandboxes, and logs — the same operations available via the `nemoclaw` CLI, but with a live, interactive dashboard.
 
-- **Launched via:** `ncl gator` or `mise run gator`
+- **Launched via:** `nemoclaw gator` or `mise run gator`
 - **Crate:** `crates/navigator-tui/`
 - **Key dependencies:**
   - `ratatui` (workspace version) — uses `frame.size()` (not `frame.area()`)
@@ -225,14 +225,14 @@ The `confirm_delete` flag in `App` gates destructive key handling — while true
 
 ### CLI parity
 
-Gator actions should parallel `ncl` CLI commands so users have familiar mental models:
+Gator actions should parallel `nemoclaw` CLI commands so users have familiar mental models:
 
 | CLI Command | Gator Equivalent |
 | --- | --- |
-| `ncl sandbox list` | Sandbox table on Dashboard |
-| `ncl sandbox delete <name>` | `[d]` on sandbox detail, then `[y]` to confirm |
-| `ncl sandbox logs <name>` | `[l]` on sandbox detail to open log viewer |
-| `ncl cluster health` | Status in title bar + cluster list |
+| `nemoclaw sandbox list` | Sandbox table on Dashboard |
+| `nemoclaw sandbox delete <name>` | `[d]` on sandbox detail, then `[y]` to confirm |
+| `nemoclaw sandbox logs <name>` | `[l]` on sandbox detail to open log viewer |
+| `nemoclaw cluster health` | Status in title bar + cluster list |
 
 When adding new TUI features, check what the CLI offers and maintain consistency.
 
diff --git a/.env.example b/.env.example
index fed22216..3c52d421 100644
--- a/.env.example
+++ b/.env.example
@@ -11,7 +11,7 @@
 # basename (e.g. "nemoclaw-c").
 #CLUSTER_NAME=nemoclaw-c
 
-# Default cluster name used by `ncl` commands in this repo when `--cluster`
+# Default cluster name used by `nemoclaw` commands in this repo when `--cluster`
 # is not provided. Usually matches CLUSTER_NAME.
 #NEMOCLAW_CLUSTER=nemoclaw-c
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8cdadacf..aaa7b71c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -37,18 +37,18 @@ mise trust
 mise run sandbox
 ```
 
-## `ncl` Shortcut
+## `nemoclaw` Shortcut
 
-Inside this repository, `ncl` is a local shortcut script at `scripts/bin/ncl`. The script will
+Inside this repository, `nemoclaw` is a local shortcut script at `scripts/bin/nemoclaw`. The script will
 
 1. Builds `navigator-cli` if needed.
 2. Runs the local debug CLI binary (`target/debug/nemoclaw`).
 
-Because `mise` adds `scripts/bin` to `PATH` for this project, you can run `ncl` directly from the repo.
+Because `mise` adds `scripts/bin` to `PATH` for this project, you can run `nemoclaw` directly from the repo.
 
 ```bash
-ncl --help
-ncl sandbox create -- codex
+nemoclaw --help
+nemoclaw sandbox create -- codex
 ```
 
 ## Main Tasks
@@ -108,7 +108,7 @@ This project uses [Conventional Commits](https://www.conventionalcommits.org/).
 **Examples:**
 
 ```
-feat(cli): add --verbose flag to ncl run
+feat(cli): add --verbose flag to nemoclaw run
 fix(sandbox): handle timeout errors gracefully
 docs: update installation instructions
 chore(deps): bump tokio to 1.40
diff --git a/TESTING.md b/TESTING.md
index 69d21137..bdde8105 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -146,4 +146,4 @@ parsing CLI output.
 | Variable | Purpose |
 |---|---|
 | `NEMOCLAW_CLUSTER` | Override active cluster name for E2E tests |
-| `NAV_BIN` | Override `ncl` binary path in bash E2E tests |
+| `NAV_BIN` | Override `nemoclaw` binary path in bash E2E tests |
diff --git a/architecture/README.md b/architecture/README.md
index 562262b5..99ca9908 100644
--- a/architecture/README.md
+++ b/architecture/README.md
@@ -120,7 +120,7 @@ The target onboarding experience is two commands:
 
 ```bash
 pip install <package>
-ncl sandbox create --remote user@host -- claude
+nemoclaw sandbox create --remote user@host -- claude
 ```
 
 The first command installs the CLI. The second command bootstraps the cluster on the remote host (if needed) and launches a sandbox running the specified agent.
@@ -190,7 +190,7 @@ The inference routing system transparently intercepts AI inference API calls fro
 - Agents need zero code changes -- standard OpenAI/Anthropic SDK calls work transparently.
 - The sandbox never sees the real API key for the backend -- credential isolation is maintained.
 - Policy controls which routes a sandbox can access via `inference.allowed_routes`.
-- Routes are managed as server-side resources via CLI (`ncl inference create/update/delete/list`).
+- Routes are managed as server-side resources via CLI (`nemoclaw inference create/update/delete/list`).
 
 **Inference routes** are stored on the gateway as protobuf objects (`InferenceRoute` in `proto/inference.proto`) and have these fields: `routing_hint` (name for policy matching), `base_url` (backend endpoint), `protocols` (supported API protocols like `openai_chat_completions` or `anthropic_messages`), `api_key`, `model_id`, and `enabled` flag.
 
@@ -239,12 +239,12 @@ For more detail, see [Policy Language](security-policy.md).
 
 The CLI is the primary way users interact with the platform. It provides commands organized into four groups:
 
-- **Cluster management** (`ncl cluster`): Deploy, stop, destroy, and inspect clusters. Supports both local and remote (SSH) targets. Includes a tunnel command for accessing the Kubernetes API on remote clusters.
-- **Sandbox management** (`ncl sandbox`): Create sandboxes (with optional file sync and provider auto-discovery), list running sandboxes, connect to sandboxes via SSH, and delete sandboxes.
-- **Provider management** (`ncl provider`): Create, update, list, and delete external service credentials.
-- **Inference management** (`ncl inference`): Configure routing rules for AI model API endpoints.
+- **Cluster management** (`nemoclaw cluster`): Deploy, stop, destroy, and inspect clusters. Supports both local and remote (SSH) targets. Includes a tunnel command for accessing the Kubernetes API on remote clusters.
+- **Sandbox management** (`nemoclaw sandbox`): Create sandboxes (with optional file sync and provider auto-discovery), list running sandboxes, connect to sandboxes via SSH, and delete sandboxes.
+- **Provider management** (`nemoclaw provider`): Create, update, list, and delete external service credentials.
+- **Inference management** (`nemoclaw inference`): Configure routing rules for AI model API endpoints.
 
-The CLI resolves which cluster to operate on through a priority chain: explicit `--cluster` flag, then the `NEMOCLAW_CLUSTER` environment variable, then the active cluster set by `ncl cluster use`. It supports TLS client certificates for mutual authentication with the gateway.
+The CLI resolves which cluster to operate on through a priority chain: explicit `--cluster` flag, then the `NEMOCLAW_CLUSTER` environment variable, then the active cluster set by `nemoclaw cluster use`. It supports TLS client certificates for mutual authentication with the gateway.
 
 ## How Users Get Started
 
@@ -259,7 +259,7 @@ pip install <package>
 **Step 2: Create a sandbox.**
 
 ```bash
-ncl sandbox create -- claude
+nemoclaw sandbox create -- claude
 ```
 
 If no cluster exists, the CLI automatically bootstraps one. It provisions a local Kubernetes cluster inside a Docker container, waits for it to become healthy, discovers the user's AI provider credentials from local configuration files, uploads them to the gateway, and launches a sandbox running the specified agent -- all from a single command.
@@ -267,7 +267,7 @@ If no cluster exists, the CLI automatically bootstraps one. It provisions a loca
 For remote deployment (running the sandbox on a different machine):
 
 ```bash
-ncl sandbox create --remote user@hostname -- claude
+nemoclaw sandbox create --remote user@hostname -- claude
 ```
 
 This performs the same bootstrap flow on the remote host via SSH.
@@ -275,7 +275,7 @@ This performs the same bootstrap flow on the remote host via SSH.
 **Step 3: Connect to a running sandbox.**
 
 ```bash
-ncl sandbox connect <sandbox-name>
+nemoclaw sandbox connect <sandbox-name>
 ```
 
 This opens an interactive SSH session into the sandbox, with all provider credentials available as environment variables.
diff --git a/architecture/cluster-single-node.md b/architecture/cluster-single-node.md
index be4de4eb..208dc1c0 100644
--- a/architecture/cluster-single-node.md
+++ b/architecture/cluster-single-node.md
@@ -39,18 +39,18 @@ Out of scope:
 
 ## CLI Commands
 
-All cluster lifecycle commands live under `ncl cluster admin`:
+All cluster lifecycle commands live under `nemoclaw cluster admin`:
 
 | Command | Description |
 |---|---|
-| `ncl cluster admin deploy [--name NAME] [--remote user@host] [--ssh-key PATH]` | Provision or update a cluster |
-| `ncl cluster admin stop [--name NAME] [--remote user@host]` | Stop the container (preserves state) |
-| `ncl cluster admin destroy [--name NAME] [--remote user@host]` | Destroy container, attached volumes, kubeconfig directory, metadata, and network |
-| `ncl cluster admin info [--name NAME]` | Show deployment details (endpoint, kubeconfig path, SSH host) |
-| `ncl cluster admin tunnel [--name NAME] [--remote user@host] [--print-command]` | Start or print SSH tunnel for kubectl access |
-| `ncl cluster status` | Show gateway health via gRPC/HTTP |
-| `ncl cluster use <name>` | Set the active cluster |
-| `ncl cluster list` | List all clusters with metadata |
+| `nemoclaw cluster admin deploy [--name NAME] [--remote user@host] [--ssh-key PATH]` | Provision or update a cluster |
+| `nemoclaw cluster admin stop [--name NAME] [--remote user@host]` | Stop the container (preserves state) |
+| `nemoclaw cluster admin destroy [--name NAME] [--remote user@host]` | Destroy container, attached volumes, kubeconfig directory, metadata, and network |
+| `nemoclaw cluster admin info [--name NAME]` | Show deployment details (endpoint, kubeconfig path, SSH host) |
+| `nemoclaw cluster admin tunnel [--name NAME] [--remote user@host] [--print-command]` | Start or print SSH tunnel for kubectl access |
+| `nemoclaw cluster status` | Show gateway health via gRPC/HTTP |
+| `nemoclaw cluster use <name>` | Set the active cluster |
+| `nemoclaw cluster list` | List all clusters with metadata |
 
 The `--name` flag defaults to `"nemoclaw"`. When omitted on commands that accept it, the CLI resolves the active cluster via: `--cluster` flag, then `NEMOCLAW_CLUSTER` env, then `~/.config/nemoclaw/active_cluster` file.
 
@@ -76,7 +76,7 @@ sequenceDiagram
   participant L as Local Docker daemon
   participant R as Remote Docker daemon (SSH)
 
-  U->>C: ncl cluster admin deploy --remote user@host
+  U->>C: nemoclaw cluster admin deploy --remote user@host
   C->>B: deploy_cluster(DeployOptions)
 
   B->>B: create_ssh_docker_client (ssh://, 600s timeout)
@@ -328,7 +328,7 @@ ssh -L 6443:127.0.0.1:6443 -N user@host
 CLI helper:
 
 ```bash
-ncl cluster admin tunnel --name <name>
+nemoclaw cluster admin tunnel --name <name>
 ```
 
 The `--remote` flag is optional; the CLI resolves the SSH destination from stored cluster metadata. Pass `--print-command` to print the SSH command without executing it.
@@ -378,7 +378,7 @@ The `--remote` flag is optional; the CLI resolves the SSH destination from store
 
 ## Auto-Bootstrap from `sandbox create`
 
-When `ncl sandbox create` cannot connect to a cluster (connection refused, DNS error, missing default TLS certs), the CLI offers to bootstrap one automatically:
+When `nemoclaw sandbox create` cannot connect to a cluster (connection refused, DNS error, missing default TLS certs), the CLI offers to bootstrap one automatically:
 
 1. `should_attempt_bootstrap()` in `crates/navigator-cli/src/bootstrap.rs` checks the error type. It returns `true` for connectivity errors and missing default TLS materials, but `false` for TLS handshake/auth errors.
 2. If running in a terminal, the user is prompted to confirm.
diff --git a/architecture/sandbox-connect.md b/architecture/sandbox-connect.md
index 6867949c..a8201e61 100644
--- a/architecture/sandbox-connect.md
+++ b/architecture/sandbox-connect.md
@@ -152,7 +152,7 @@ The `sandbox exec` path is identical to interactive connect except:
 
 ### Port Forwarding (`sandbox forward start`)
 
-`ncl sandbox forward start <port> <name>` opens a local SSH tunnel so connections to `127.0.0.1:<port>`
+`nemoclaw sandbox forward start <port> <name>` opens a local SSH tunnel so connections to `127.0.0.1:<port>`
 on the host are forwarded to `127.0.0.1:<port>` inside the sandbox.
 
 #### CLI
@@ -162,13 +162,13 @@ on the host are forwarded to `127.0.0.1:<port>` inside the sandbox.
 - By default stays attached in foreground until interrupted (Ctrl+C).
 - With `-d`/`--background`, SSH forks after auth and the CLI exits. The PID is
   tracked in `~/.config/nemoclaw/forwards/<name>-<port>.pid` along with sandbox id metadata.
-- `ncl sandbox forward stop <port> <name>` validates PID ownership and then kills a background forward.
-- `ncl sandbox forward list` shows all tracked forwards.
-- `ncl sandbox forward stop` and `ncl sandbox forward list` are local operations and do not require
+- `nemoclaw sandbox forward stop <port> <name>` validates PID ownership and then kills a background forward.
+- `nemoclaw sandbox forward list` shows all tracked forwards.
+- `nemoclaw sandbox forward stop` and `nemoclaw sandbox forward list` are local operations and do not require
   resolving an active cluster.
-- `ncl sandbox create --forward <port>` starts a background forward before connect/exec, including
+- `nemoclaw sandbox create --forward <port>` starts a background forward before connect/exec, including
   when no trailing command is provided.
-- `ncl sandbox delete` auto-stops any active forwards for the deleted sandbox.
+- `nemoclaw sandbox delete` auto-stops any active forwards for the deleted sandbox.
 
 #### TUI (Gator)
 
@@ -287,16 +287,16 @@ When `--sync` is passed to `sandbox create`, the CLI pushes local git-tracked fi
 3. `sandbox_sync_up_files()` creates an SSH session config, spawns `ssh <proxy> sandbox "tar xf - -C /sandbox"`, and streams a tar archive of the file list to the SSH child's stdin using the `tar` crate
 4. Files land in `/sandbox` inside the container
 
-#### `ncl sandbox sync` command
+#### `nemoclaw sandbox sync` command
 
 The standalone `sandbox sync` subcommand supports bidirectional file transfer:
 
 ```bash
 # Push local files up to sandbox
-ncl sandbox sync <name> --up <local-path> [<sandbox-path>]
+nemoclaw sandbox sync <name> --up <local-path> [<sandbox-path>]
 
 # Pull sandbox files down to local
-ncl sandbox sync <name> --down <sandbox-path> [<local-path>]
+nemoclaw sandbox sync <name> --down <sandbox-path> [<local-path>]
 ```
 
 - **Push (`--up`)**: `sandbox_sync_up()` streams a tar archive of the local path to `ssh ... tar xf - -C <dest>` on the sandbox side. Default destination: `/sandbox`.
diff --git a/architecture/sandbox-custom-containers.md b/architecture/sandbox-custom-containers.md
index 8e183760..4eced89b 100644
--- a/architecture/sandbox-custom-containers.md
+++ b/architecture/sandbox-custom-containers.md
@@ -1,6 +1,6 @@
 # Sandbox Custom Containers
 
-Users can run `ncl sandbox create --image <any-linux-image>` to launch a sandbox with an arbitrary container image while keeping the `navigator-sandbox` process supervisor in control.
+Users can run `nemoclaw sandbox create --image <any-linux-image>` to launch a sandbox with an arbitrary container image while keeping the `navigator-sandbox` process supervisor in control.
 
 ## How It Works
 
@@ -40,7 +40,7 @@ These transforms apply to both generated templates and user-provided `pod_templa
 ### Creating a sandbox with a custom image
 
 ```bash
-ncl sandbox create --image myimage:latest -- echo "hello from custom container"
+nemoclaw sandbox create --image myimage:latest -- echo "hello from custom container"
 ```
 
 When `--image` is set the CLI clears the default `run_as_user`/`run_as_group` policy (which expects a `sandbox` user) so that arbitrary images that lack that user can start without error.
@@ -48,11 +48,11 @@ When `--image` is set the CLI clears the default `run_as_user`/`run_as_group` po
 ### Pushing custom images into the cluster
 
 ```bash
-ncl sandbox image push --dockerfile ./Dockerfile --tag my-sandbox:latest
-ncl sandbox create --image my-sandbox:latest
+nemoclaw sandbox image push --dockerfile ./Dockerfile --tag my-sandbox:latest
+nemoclaw sandbox create --image my-sandbox:latest
 ```
 
-`ncl sandbox image push` accepts:
+`nemoclaw sandbox image push` accepts:
 
 | Flag | Description |
 |------|-------------|
diff --git a/architecture/sandbox-providers.md b/architecture/sandbox-providers.md
index 32f607bc..a30f9bb3 100644
--- a/architecture/sandbox-providers.md
+++ b/architecture/sandbox-providers.md
@@ -49,7 +49,7 @@ The gRPC surface is defined in `proto/navigator.proto`:
   - provider registry and per-provider discovery plugins,
   - shared discovery engine and context abstraction for testability.
 - `crates/navigator-cli`
-  - `ncl provider ...` command handlers,
+  - `nemoclaw provider ...` command handlers,
   - sandbox provider requirement resolution in `sandbox create`.
 - `crates/navigator-server` (gateway)
   - provider CRUD gRPC handlers,
@@ -158,7 +158,7 @@ This keeps provider tests isolated from host environment and filesystem.
 
 ### Provider CRUD
 
-`ncl provider create --type <type> --name <name> [--from-existing] [--credential k=v]... [--config k=v]...`
+`nemoclaw provider create --type <type> --name <name> [--from-existing] [--credential k=v]... [--config k=v]...`
 
 - `--credential` supports `KEY=VALUE` and `KEY` forms.
   - `KEY=VALUE` sets an explicit credential value.
@@ -169,14 +169,14 @@ This keeps provider tests isolated from host environment and filesystem.
 
 Also supported:
 
-- `ncl provider get <name>`
-- `ncl provider list`
-- `ncl provider update <name> ...`
-- `ncl provider delete <name> [<name>...]`
+- `nemoclaw provider get <name>`
+- `nemoclaw provider list`
+- `nemoclaw provider update <name> ...`
+- `nemoclaw provider delete <name> [<name>...]`
 
 ### Sandbox Create
 
-`ncl sandbox create --provider gitlab -- claude`
+`nemoclaw sandbox create --provider gitlab -- claude`
 
 Resolution logic (CLI side, `crates/navigator-cli/src/run.rs`):
 
@@ -272,7 +272,7 @@ isolation, privilege dropping, seccomp, and Landlock restrictions via `pre_exec`
 
 **2. SSH shell sessions** (`crates/navigator-sandbox/src/ssh.rs`):
 
-When a user connects via `ncl sandbox connect`, a PTY shell is spawned:
+When a user connects via `nemoclaw sandbox connect`, a PTY shell is spawned:
 
 ```rust
 let mut cmd = Command::new(shell);
@@ -293,7 +293,7 @@ passes it to `spawn_pty_shell()` for each new shell or exec request.
 ### End-to-End Flow
 
 ```
-CLI: ncl sandbox create -- claude
+CLI: nemoclaw sandbox create -- claude
   |
   +-- detect_provider_from_command(["claude"]) -> "claude"
   +-- ensure_required_providers() -> discovers local ANTHROPIC_API_KEY
diff --git a/architecture/system-architecture.md b/architecture/system-architecture.md
index 07335191..ad92f000 100644
--- a/architecture/system-architecture.md
+++ b/architecture/system-architecture.md
@@ -6,8 +6,8 @@ graph TB
     %% USER'S MACHINE
     %% ============================================================
     subgraph UserMachine["User's Machine"]
-        CLI["NemoClaw CLI<br/>(ncl)"]
-        TUI["Gator TUI<br/>(ncl gator)"]
+        CLI["NemoClaw CLI<br/>(nemoclaw)"]
+        TUI["Gator TUI<br/>(nemoclaw gator)"]
         SDK["Python SDK<br/>(nemoclaw)"]
         LocalConfig["~/.config/nemoclaw/<br/>clusters, mTLS certs,<br/>active_cluster"]
     end
diff --git a/e2e/bash/test_port_forward.sh b/e2e/bash/test_port_forward.sh
index 125fdd08..67b5214a 100755
--- a/e2e/bash/test_port_forward.sh
+++ b/e2e/bash/test_port_forward.sh
@@ -6,8 +6,8 @@
 # Integration test for port forwarding through a sandbox.
 #
 # Prerequisites:
-#   - A running nemoclaw cluster (ncl cluster admin deploy)
-#   - The `ncl` binary on PATH (or set NAV_BIN)
+#   - A running nemoclaw cluster (nemoclaw cluster admin deploy)
+#   - The `nemoclaw` binary on PATH (or set NAV_BIN)
 #
 # Usage:
 #   ./e2e/bash/test_port_forward.sh
@@ -27,7 +27,7 @@ if [[ -n "${NAV_BIN:-}" ]]; then
 elif [[ -x "${PROJECT_ROOT}/target/debug/nemoclaw" ]]; then
   NAV="${PROJECT_ROOT}/target/debug/nemoclaw"
 else
-  NAV="ncl"
+  NAV="nemoclaw"
 fi
 
 FORWARD_PORT="${FORWARD_PORT:-19876}"
diff --git a/e2e/bash/test_sandbox_custom_image.sh b/e2e/bash/test_sandbox_custom_image.sh
index 433a07de..a92fa9f4 100755
--- a/e2e/bash/test_sandbox_custom_image.sh
+++ b/e2e/bash/test_sandbox_custom_image.sh
@@ -7,13 +7,13 @@
 # with it.
 #
 # Verifies the full flow:
-#   1. ncl sandbox image push --dockerfile <path>  (build + import into cluster)
-#   2. ncl sandbox create --image <tag> -- <cmd>   (run sandbox with custom image)
+#   1. nemoclaw sandbox image push --dockerfile <path>  (build + import into cluster)
+#   2. nemoclaw sandbox create --image <tag> -- <cmd>   (run sandbox with custom image)
 #
 # Prerequisites:
-#   - A running nemoclaw cluster (ncl cluster admin deploy)
+#   - A running nemoclaw cluster (nemoclaw cluster admin deploy)
 #   - Docker daemon running (for image build)
-#   - The `ncl` binary on PATH (or set NAV_BIN)
+#   - The `nemoclaw` binary on PATH (or set NAV_BIN)
 #
 # Usage:
 #   ./e2e/bash/test_sandbox_custom_image.sh
@@ -32,7 +32,7 @@ if [[ -n "${NAV_BIN:-}" ]]; then
 elif [[ -x "${PROJECT_ROOT}/target/debug/nemoclaw" ]]; then
   NAV="${PROJECT_ROOT}/target/debug/nemoclaw"
 else
-  NAV="ncl"
+  NAV="nemoclaw"
 fi
 
 IMAGE_TAG="e2e-custom-image:test-$(date +%s)"
diff --git a/e2e/bash/test_sandbox_sync.sh b/e2e/bash/test_sandbox_sync.sh
index 717a8228..dee01b51 100755
--- a/e2e/bash/test_sandbox_sync.sh
+++ b/e2e/bash/test_sandbox_sync.sh
@@ -6,14 +6,14 @@
 # Integration test for bidirectional file sync with a sandbox.
 #
 # Verifies the full flow:
-#   1. ncl sandbox create --keep  (long-running sandbox for sync tests)
-#   2. ncl sandbox sync <name> --up <local> <sandbox-dest>  (push)
-#   3. ncl sandbox sync <name> --down <sandbox-path> <local-dest>  (pull)
+#   1. nemoclaw sandbox create --keep  (long-running sandbox for sync tests)
+#   2. nemoclaw sandbox sync <name> --up <local> <sandbox-dest>  (push)
+#   3. nemoclaw sandbox sync <name> --down <sandbox-path> <local-dest>  (pull)
 #   4. Single-file round-trip
 #
 # Prerequisites:
 #   - A running nemoclaw cluster (nemoclaw cluster admin deploy)
-#   - The `ncl` binary on PATH (or set NAV_BIN)
+#   - The `nemoclaw` binary on PATH (or set NAV_BIN)
 #
 # Usage:
 #   ./e2e/bash/test_sandbox_sync.sh
@@ -32,7 +32,7 @@ if [[ -n "${NAV_BIN:-}" ]]; then
 elif [[ -x "${PROJECT_ROOT}/target/debug/nemoclaw" ]]; then
   NAV="${PROJECT_ROOT}/target/debug/nemoclaw"
 else
-  NAV="ncl"
+  NAV="nemoclaw"
 fi
 
 SANDBOX_NAME=""
diff --git a/examples/bring-your-own-container/README.md b/examples/bring-your-own-container/README.md
index bae94fa9..72c4e3fc 100644
--- a/examples/bring-your-own-container/README.md
+++ b/examples/bring-your-own-container/README.md
@@ -6,7 +6,7 @@ your local machine through port forwarding.
 
 ## Prerequisites
 
-- A running NemoClaw cluster (`ncl cluster admin deploy`)
+- A running NemoClaw cluster (`nemoclaw cluster admin deploy`)
 - Docker daemon running
 
 ## What's in this example
@@ -21,7 +21,7 @@ your local machine through port forwarding.
 ### 1. Build and push the image
 
 ```bash
-ncl sandbox image push \
+nemoclaw sandbox image push \
     --dockerfile examples/bring-your-own-container/Dockerfile \
     --tag        byoc-demo:latest
 ```
@@ -29,7 +29,7 @@ ncl sandbox image push \
 ### 2. Create a sandbox with port forwarding
 
 ```bash
-ncl sandbox create --image byoc-demo:latest --forward 8080 -- python /sandbox/app.py
+nemoclaw sandbox create --image byoc-demo:latest --forward 8080 -- python /sandbox/app.py
 ```
 
 The `--forward 8080` flag opens an SSH tunnel so `localhost:8080` on your
@@ -94,5 +94,5 @@ bridges the tunnel to `127.0.0.1:<port>` inside the container.
 Delete the sandbox when you're done (this also stops port forwards):
 
 ```bash
-ncl sandbox delete <sandbox-name>
+nemoclaw sandbox delete <sandbox-name>
 ```
diff --git a/examples/openclaw.md b/examples/openclaw.md
index 39eccaac..a804df8c 100644
--- a/examples/openclaw.md
+++ b/examples/openclaw.md
@@ -3,7 +3,7 @@
 ## Quick start
 
 ```sh
-ncl sandbox create --forward 18789 -- openclaw-start
+nemoclaw sandbox create --forward 18789 -- openclaw-start
 ```
 
 `openclaw-start` is a helper script pre-installed in the sandbox that runs the
diff --git a/scripts/bin/ncl b/scripts/bin/ncl
deleted file mode 100755
index 0ca043bb..00000000
--- a/scripts/bin/ncl
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
-BINARY="$PROJECT_ROOT/target/debug/nemoclaw"
-
-# Build if needed (cargo handles change detection)
-cargo build --package navigator-cli --quiet
-
-exec "$BINARY" "$@"
diff --git a/scripts/bin/nemoclaw b/scripts/bin/nemoclaw
new file mode 100755
index 00000000..748833d7
--- /dev/null
+++ b/scripts/bin/nemoclaw
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+BINARY="$PROJECT_ROOT/target/debug/nemoclaw"
+STATE_FILE="$PROJECT_ROOT/.cache/nemoclaw-build.state"
+
+# ---------------------------------------------------------------------------
+# Fingerprint-based rebuild check
+#
+# Mirrors the approach in tasks/scripts/cluster-deploy-fast.sh: collect dirty
+# files from git, filter to paths in the navigator-cli dependency closure,
+# hash their contents, and compare against a persisted state file.  We also
+# track HEAD so that branch switches / pulls are detected.
+# ---------------------------------------------------------------------------
+
+needs_build=0
+
+if [[ ! -x "$BINARY" ]]; then
+  needs_build=1
+else
+  cd "$PROJECT_ROOT"
+
+  # Current HEAD commit (detects branch switches, pulls, rebases)
+  current_head=$(git rev-parse HEAD 2>/dev/null || echo "unknown")
+
+  # Collect dirty (modified, staged, untracked) files
+  mapfile -t changed_files < <(
+    {
+      git diff --name-only 2>/dev/null
+      git diff --name-only --cached 2>/dev/null
+      git ls-files --others --exclude-standard 2>/dev/null
+    } | sort -u
+  )
+
+  # Filter to paths that can affect the navigator-cli binary
+  matches_cli() {
+    local path=$1
+    case "$path" in
+      Cargo.toml|Cargo.lock|proto/*)
+        return 0 ;;
+      crates/navigator-cli/*|crates/navigator-core/*|crates/navigator-bootstrap/*)
+        return 0 ;;
+      crates/navigator-policy/*|crates/navigator-providers/*|crates/navigator-tui/*)
+        return 0 ;;
+      *)
+        return 1 ;;
+    esac
+  }
+
+  # Compute a composite fingerprint of matching dirty files
+  payload=""
+  for path in "${changed_files[@]}"; do
+    matches_cli "$path" || continue
+    if [[ -e "$path" ]]; then
+      digest=$(shasum -a 256 "$path" | cut -d ' ' -f 1)
+    else
+      digest="__MISSING__"
+    fi
+    payload+="${path}:${digest}"$'\n'
+  done
+
+  if [[ -n "$payload" ]]; then
+    current_fingerprint=$(printf '%s' "$payload" | shasum -a 256 | cut -d ' ' -f 1)
+  else
+    current_fingerprint=""
+  fi
+
+  # Load previous state
+  previous_head=""
+  previous_fingerprint=""
+  if [[ -f "$STATE_FILE" ]]; then
+    while IFS='=' read -r key value; do
+      case "$key" in
+        head) previous_head=$value ;;
+        fingerprint) previous_fingerprint=$value ;;
+      esac
+    done < "$STATE_FILE"
+  fi
+
+  # Decide whether to rebuild
+  if [[ "$current_head" != "$previous_head" ]]; then
+    needs_build=1
+  elif [[ "$current_fingerprint" != "$previous_fingerprint" ]]; then
+    needs_build=1
+  fi
+fi
+
+if [[ "$needs_build" == "1" ]]; then
+  echo "Recompiling navigator-cli..." >&2
+  cargo build --package navigator-cli --quiet
+  # Persist state after successful build
+  mkdir -p "$(dirname "$STATE_FILE")"
+  cd "$PROJECT_ROOT"
+  new_head=$(git rev-parse HEAD 2>/dev/null || echo "unknown")
+  # Recompute fingerprint of remaining dirty files (build may not change them)
+  mapfile -t post_files < <(
+    {
+      git diff --name-only 2>/dev/null
+      git diff --name-only --cached 2>/dev/null
+      git ls-files --others --exclude-standard 2>/dev/null
+    } | sort -u
+  )
+
+  matches_cli() {
+    local path=$1
+    case "$path" in
+      Cargo.toml|Cargo.lock|proto/*)
+        return 0 ;;
+      crates/navigator-cli/*|crates/navigator-core/*|crates/navigator-bootstrap/*)
+        return 0 ;;
+      crates/navigator-policy/*|crates/navigator-providers/*|crates/navigator-tui/*)
+        return 0 ;;
+      *)
+        return 1 ;;
+    esac
+  }
+
+  post_payload=""
+  for path in "${post_files[@]}"; do
+    matches_cli "$path" || continue
+    if [[ -e "$path" ]]; then
+      digest=$(shasum -a 256 "$path" | cut -d ' ' -f 1)
+    else
+      digest="__MISSING__"
+    fi
+    post_payload+="${path}:${digest}"$'\n'
+  done
+
+  if [[ -n "$post_payload" ]]; then
+    new_fingerprint=$(printf '%s' "$post_payload" | shasum -a 256 | cut -d ' ' -f 1)
+  else
+    new_fingerprint=""
+  fi
+
+  cat > "$STATE_FILE" <<EOF
+head=${new_head}
+fingerprint=${new_fingerprint}
+EOF
+fi
+
+exec "$BINARY" "$@"
diff --git a/tasks/gator.toml b/tasks/gator.toml
index e64c04ce..1420d204 100644
--- a/tasks/gator.toml
+++ b/tasks/gator.toml
@@ -5,10 +5,10 @@
 
 [gator]
 description = "Launch the Gator TUI"
-run = "ncl gator"
+run = "nemoclaw gator"
 hide = true
 
 ["gator:dev"]
 description = "Launch the Gator TUI with hot-reload on file changes"
-run = "cargo watch -s 'ncl gator'"
+run = "cargo watch -s 'nemoclaw gator'"
 hide = true
diff --git a/tasks/scripts/cluster-bootstrap.sh b/tasks/scripts/cluster-bootstrap.sh
index 1149ab66..7f1129b4 100755
--- a/tasks/scripts/cluster-bootstrap.sh
+++ b/tasks/scripts/cluster-bootstrap.sh
@@ -209,7 +209,7 @@ VOLUME_NAME="navigator-cluster-${CLUSTER_NAME}"
 if [ "${MODE}" = "fast" ]; then
   if docker inspect "${CONTAINER_NAME}" >/dev/null 2>&1 || docker volume inspect "${VOLUME_NAME}" >/dev/null 2>&1; then
     echo "Recreating cluster '${CLUSTER_NAME}' from scratch..."
-    ncl cluster admin destroy --name "${CLUSTER_NAME}"
+    nemoclaw cluster admin destroy --name "${CLUSTER_NAME}"
   fi
 fi
 
@@ -221,7 +221,22 @@ elif [ "${MODE}" = "build" ] || [ "${MODE}" = "fast" ]; then
   done
 fi
 
-DEPLOY_CMD=(ncl cluster admin deploy --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}" --update-kube-config)
+# Build the cluster image so it contains the latest Helm chart, manifests,
+# and entrypoint from the working tree.  This ensures the k3s container
+# always starts with the correct chart version.
+if [ "${SKIP_CLUSTER_IMAGE_BUILD:-}" != "1" ]; then
+  tasks/scripts/docker-build-cluster.sh
+fi
+
+# In fast/build modes, use the locally-built cluster image rather than the
+# remote distribution registry image.  The local image is built by
+# `docker-build-cluster.sh` and contains the bundled Helm chart and
+# manifests from the current working tree.
+if [ -z "${NEMOCLAW_CLUSTER_IMAGE:-}" ]; then
+  export NEMOCLAW_CLUSTER_IMAGE="navigator/cluster:${IMAGE_TAG}"
+fi
+
+DEPLOY_CMD=(nemoclaw cluster admin deploy --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}" --update-kube-config)
 
 if [ -n "${GATEWAY_HOST:-}" ]; then
   DEPLOY_CMD+=(--gateway-host "${GATEWAY_HOST}")
@@ -249,6 +264,13 @@ fi
 
 "${DEPLOY_CMD[@]}"
 
+# Clear the fast-deploy state file so the next incremental deploy
+# recalculates from scratch.  This prevents stale fingerprints from a
+# prior session from masking changes that the bootstrap has already baked
+# into the freshly pushed images.
+DEPLOY_FAST_STATE_FILE=${DEPLOY_FAST_STATE_FILE:-.cache/cluster-deploy-fast.state}
+rm -f "${DEPLOY_FAST_STATE_FILE}"
+
 echo ""
 echo "Cluster '${CLUSTER_NAME}' is ready."
 echo "KUBECONFIG has been updated."
diff --git a/tasks/scripts/sandbox.sh b/tasks/scripts/sandbox.sh
new file mode 100755
index 00000000..820a18f4
--- /dev/null
+++ b/tasks/scripts/sandbox.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Create or reconnect to the persistent "dev" sandbox.
+#
+# - Ensures the cluster is running (bootstraps if needed).
+# - Redeploys if local source has changed since last deploy.
+# - Recreates the sandbox if the cluster was redeployed since the sandbox
+#   was last created.
+# - Provisions an "anthropic" provider from $ANTHROPIC_API_KEY when available.
+
+set -euo pipefail
+
+SANDBOX_NAME="dev"
+CLUSTER_NAME=${CLUSTER_NAME:-$(basename "$PWD")}
+CONTAINER_NAME="navigator-cluster-${CLUSTER_NAME}"
+STATE_DIR=${SANDBOX_STATE_DIR:-.cache}
+SANDBOX_STATE_FILE=${STATE_DIR}/sandbox-dev.state
+DEPLOY_STATE_FILE=${DEPLOY_FAST_STATE_FILE:-${STATE_DIR}/cluster-deploy-fast.state}
+CMD=(${usage_command:-claude})
+
+# -------------------------------------------------------------------
+# 1. Ensure the cluster is running; redeploy if dirty
+# -------------------------------------------------------------------
+if ! docker ps -q --filter "name=${CONTAINER_NAME}" | grep -q .; then
+  echo "No running cluster found. Bootstrapping..."
+  mise run cluster
+else
+  # Run incremental deploy — it no-ops when nothing has changed.
+  mise run cluster
+fi
+
+# Capture the current deploy fingerprint so we can tell later whether the
+# sandbox predates the most recent deploy.
+deploy_fingerprint=""
+if [[ -f "${DEPLOY_STATE_FILE}" ]]; then
+  deploy_fingerprint=$(shasum -a 256 "${DEPLOY_STATE_FILE}" | cut -d ' ' -f 1)
+fi
+
+# -------------------------------------------------------------------
+# 2. Decide whether to (re)create the sandbox
+# -------------------------------------------------------------------
+need_create=1
+
+if nemoclaw sandbox get "${SANDBOX_NAME}" >/dev/null 2>&1; then
+  # Sandbox exists — only recreate if the cluster has been redeployed.
+  # The command passed via `-- <cmd>` only affects the SSH exec session,
+  # not the sandbox pod itself (which always runs `sleep infinity`), so
+  # a command change never requires recreation.
+  previous_deploy_fingerprint=""
+  if [[ -f "${SANDBOX_STATE_FILE}" ]]; then
+    while IFS='=' read -r key value; do
+      case "${key}" in
+        deploy) previous_deploy_fingerprint="${value}" ;;
+      esac
+    done < "${SANDBOX_STATE_FILE}"
+  fi
+
+  if [[ -n "${deploy_fingerprint}" && "${deploy_fingerprint}" == "${previous_deploy_fingerprint}" ]]; then
+    need_create=0
+  else
+    echo "Cluster has been redeployed since sandbox '${SANDBOX_NAME}' was created. Recreating..."
+    nemoclaw sandbox delete "${SANDBOX_NAME}" || true
+  fi
+fi
+
+# -------------------------------------------------------------------
+# 3. Ensure the anthropic provider exists when the key is available
+# -------------------------------------------------------------------
+ensure_anthropic_provider() {
+  if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then
+    return
+  fi
+
+  if nemoclaw provider get anthropic >/dev/null 2>&1; then
+    # Provider already registered — nothing to do.
+    return
+  fi
+
+  echo "Registering anthropic provider..."
+  nemoclaw provider create \
+    --name anthropic \
+    --type claude \
+    --credential "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}"
+}
+
+ensure_anthropic_provider
+
+# -------------------------------------------------------------------
+# 4. Create or connect to the sandbox
+# -------------------------------------------------------------------
+PROVIDER_ARGS=()
+if nemoclaw provider get anthropic >/dev/null 2>&1; then
+  PROVIDER_ARGS+=(--provider anthropic)
+fi
+
+if [[ "${need_create}" == "1" ]]; then
+  echo "Creating sandbox '${SANDBOX_NAME}'..."
+  nemoclaw sandbox create --name "${SANDBOX_NAME}" "${PROVIDER_ARGS[@]}" -- "${CMD[@]}"
+else
+  echo "Connecting to existing sandbox '${SANDBOX_NAME}'..."
+  nemoclaw sandbox connect "${SANDBOX_NAME}"
+fi
+
+# Record state so we know this sandbox matches the current deploy.
+mkdir -p "$(dirname "${SANDBOX_STATE_FILE}")"
+cat > "${SANDBOX_STATE_FILE}" <<EOF
+deploy=${deploy_fingerprint}
+EOF