From 6b322d048d4965d59ea1bd0300144fdcc9746b9b Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 18:01:20 -0800
Subject: [PATCH 1/8] feat(sandbox): support policy discovery and restrictive
 defaults on sandbox containers

Allow sandboxes to operate without a pre-configured policy by supporting
three resolution modes:

1. Policy provided at create time - sandbox loads from gateway (unchanged)
2. Policy null, found on disk at /etc/navigator/policy.yaml - sandbox reads
   from disk, syncs to gateway, reads back canonical version
3. Policy null, no disk policy - sandbox uses hardcoded restrictive default
   (all network blocked), syncs to gateway

Key changes:
- Add restrictive_default_policy() and CONTAINER_POLICY_PATH to navigator-policy
- Make spec.policy optional in gateway create_sandbox
- Modify UpdateSandboxPolicy to handle no-baseline case (backfill spec.policy)
- Pass NEMOCLAW_SANDBOX_NAME env var to sandbox containers
- Add sync_policy() gRPC client method for sandbox-to-gateway policy sync
- Add disk discovery fallback in sandbox load_policy()

Closes #82
---
 Cargo.lock                                  |   1 +
 crates/navigator-policy/src/lib.rs          | 157 ++++++++++++++
 crates/navigator-sandbox/Cargo.toml         |   1 +
 crates/navigator-sandbox/src/grpc_client.rs |  53 ++++-
 crates/navigator-sandbox/src/lib.rs         | 150 ++++++++++++-
 crates/navigator-sandbox/src/main.rs        |   6 +
 crates/navigator-server/src/grpc.rs         | 220 ++++++++++++++++++--
 crates/navigator-server/src/sandbox/mod.rs  |  15 ++
 8 files changed, 579 insertions(+), 24 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7c667886..114f9e57 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2777,6 +2777,7 @@ dependencies = [
  "libc",
  "miette",
  "navigator-core",
+ "navigator-policy",
  "navigator-router",
  "nix",
  "rand_core 0.6.4",
diff --git a/crates/navigator-policy/src/lib.rs b/crates/navigator-policy/src/lib.rs
index cec8a1ec..da43c746 100644
--- a/crates/navigator-policy/src/lib.rs
+++ b/crates/navigator-policy/src/lib.rs
@@ -381,6 +381,46 @@ pub fn default_sandbox_policy() -> SandboxPolicy {
         .expect("built-in dev-sandbox-policy.yaml must be valid")
 }
 
+/// Well-known path where a sandbox container image can ship a policy YAML file.
+///
+/// When the gateway provides no policy at sandbox creation time, the sandbox
+/// supervisor probes this path before falling back to the restrictive default.
+pub const CONTAINER_POLICY_PATH: &str = "/etc/navigator/policy.yaml";
+
+/// Return a restrictive default policy suitable for sandboxes that have no
+/// explicit policy configured.
+///
+/// This policy grants filesystem access to standard system paths, runs as the
+/// `sandbox` user, enables Landlock in best-effort mode, and **blocks all
+/// network access** (no network policies, no inference routing).
+pub fn restrictive_default_policy() -> SandboxPolicy {
+    SandboxPolicy {
+        version: 1,
+        filesystem: Some(FilesystemPolicy {
+            include_workdir: true,
+            read_only: vec![
+                "/usr".into(),
+                "/lib".into(),
+                "/proc".into(),
+                "/dev/urandom".into(),
+                "/app".into(),
+                "/etc".into(),
+                "/var/log".into(),
+            ],
+            read_write: vec!["/sandbox".into(), "/tmp".into(), "/dev/null".into()],
+        }),
+        landlock: Some(LandlockPolicy {
+            compatibility: "best_effort".into(),
+        }),
+        process: Some(ProcessPolicy {
+            run_as_user: "sandbox".into(),
+            run_as_group: "sandbox".into(),
+        }),
+        network_policies: HashMap::new(),
+        inference: None,
+    }
+}
+
 /// Clear `run_as_user` / `run_as_group` from the policy's process section.
 ///
 /// Call this when a custom image is specified, since the image may lack the
@@ -527,4 +567,121 @@ inference:
         assert_eq!(patterns1[0].protocol, patterns2[0].protocol);
         assert_eq!(patterns1[0].kind, patterns2[0].kind);
     }
+
+    #[test]
+    fn restrictive_default_has_no_network_policies() {
+        let policy = restrictive_default_policy();
+        assert!(
+            policy.network_policies.is_empty(),
+            "restrictive default must block all network"
+        );
+    }
+
+    #[test]
+    fn restrictive_default_has_no_inference() {
+        let policy = restrictive_default_policy();
+        assert!(policy.inference.is_none());
+    }
+
+    #[test]
+    fn restrictive_default_has_filesystem_policy() {
+        let policy = restrictive_default_policy();
+        let fs = policy.filesystem.expect("must have filesystem policy");
+        assert!(fs.include_workdir);
+        assert!(
+            fs.read_only.iter().any(|p| p == "/usr"),
+            "read_only should contain /usr"
+        );
+        assert!(
+            fs.read_write.iter().any(|p| p == "/sandbox"),
+            "read_write should contain /sandbox"
+        );
+        assert!(
+            fs.read_write.iter().any(|p| p == "/tmp"),
+            "read_write should contain /tmp"
+        );
+    }
+
+    #[test]
+    fn restrictive_default_has_process_identity() {
+        let policy = restrictive_default_policy();
+        let proc = policy.process.expect("must have process policy");
+        assert_eq!(proc.run_as_user, "sandbox");
+        assert_eq!(proc.run_as_group, "sandbox");
+    }
+
+    #[test]
+    fn restrictive_default_has_landlock() {
+        let policy = restrictive_default_policy();
+        let ll = policy.landlock.expect("must have landlock policy");
+        assert_eq!(ll.compatibility, "best_effort");
+    }
+
+    #[test]
+    fn restrictive_default_version_is_one() {
+        let policy = restrictive_default_policy();
+        assert_eq!(policy.version, 1);
+    }
+
+    #[test]
+    fn parse_minimal_policy_yaml() {
+        let yaml = "version: 1\n";
+        let policy = parse_sandbox_policy(yaml).expect("should parse");
+        assert_eq!(policy.version, 1);
+        assert!(policy.network_policies.is_empty());
+        assert!(policy.filesystem.is_none());
+        assert!(policy.inference.is_none());
+    }
+
+    #[test]
+    fn parse_policy_with_network_rules() {
+        let yaml = r#"
+version: 1
+network_policies:
+  test:
+    name: test_policy
+    endpoints:
+      - { host: example.com, port: 443 }
+    binaries:
+      - { path: /usr/bin/curl }
+"#;
+        let policy = parse_sandbox_policy(yaml).expect("should parse");
+        assert_eq!(policy.network_policies.len(), 1);
+        let rule = &policy.network_policies["test"];
+        assert_eq!(rule.name, "test_policy");
+        assert_eq!(rule.endpoints.len(), 1);
+        assert_eq!(rule.endpoints[0].host, "example.com");
+        assert_eq!(rule.endpoints[0].port, 443);
+        assert_eq!(rule.binaries.len(), 1);
+        assert_eq!(rule.binaries[0].path, "/usr/bin/curl");
+    }
+
+    #[test]
+    fn parse_rejects_unknown_fields() {
+        let yaml = "version: 1\nbogus_field: true\n";
+        assert!(parse_sandbox_policy(yaml).is_err());
+    }
+
+    #[test]
+    fn default_sandbox_policy_is_valid() {
+        let policy = default_sandbox_policy();
+        assert_eq!(policy.version, 1);
+        // Dev default has network policies (unlike the restrictive default).
+        assert!(!policy.network_policies.is_empty());
+    }
+
+    #[test]
+    fn clear_process_identity_clears_fields() {
+        let mut policy = restrictive_default_policy();
+        assert_eq!(policy.process.as_ref().unwrap().run_as_user, "sandbox");
+        clear_process_identity(&mut policy);
+        let proc = policy.process.unwrap();
+        assert!(proc.run_as_user.is_empty());
+        assert!(proc.run_as_group.is_empty());
+    }
+
+    #[test]
+    fn container_policy_path_is_expected() {
+        assert_eq!(CONTAINER_POLICY_PATH, "/etc/navigator/policy.yaml");
+    }
 }
diff --git a/crates/navigator-sandbox/Cargo.toml b/crates/navigator-sandbox/Cargo.toml
index 041da4c4..a99fca02 100644
--- a/crates/navigator-sandbox/Cargo.toml
+++ b/crates/navigator-sandbox/Cargo.toml
@@ -16,6 +16,7 @@ path = "src/main.rs"
 
 [dependencies]
 navigator-core = { path = "../navigator-core" }
+navigator-policy = { path = "../navigator-policy" }
 navigator-router = { path = "../navigator-router" }
 
 # Async runtime
diff --git a/crates/navigator-sandbox/src/grpc_client.rs b/crates/navigator-sandbox/src/grpc_client.rs
index ceac3743..bdc46c23 100644
--- a/crates/navigator-sandbox/src/grpc_client.rs
+++ b/crates/navigator-sandbox/src/grpc_client.rs
@@ -11,8 +11,8 @@ use miette::{IntoDiagnostic, Result, WrapErr};
 use navigator_core::proto::{
     GetSandboxInferenceBundleRequest, GetSandboxInferenceBundleResponse, GetSandboxPolicyRequest,
     GetSandboxProviderEnvironmentRequest, PolicyStatus, ReportPolicyStatusRequest,
-    SandboxPolicy as ProtoSandboxPolicy, inference_client::InferenceClient,
-    navigator_client::NavigatorClient,
+    SandboxPolicy as ProtoSandboxPolicy, UpdateSandboxPolicyRequest,
+    inference_client::InferenceClient, navigator_client::NavigatorClient,
 };
 use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity};
 use tracing::debug;
@@ -74,7 +74,11 @@ async fn connect(endpoint: &str) -> Result<NavigatorClient<Channel>> {
 }
 
 /// Fetch sandbox policy from NemoClaw server via gRPC.
-pub async fn fetch_policy(endpoint: &str, sandbox_id: &str) -> Result<ProtoSandboxPolicy> {
+///
+/// Returns `Ok(Some(policy))` when the server has a policy configured,
+/// or `Ok(None)` when the sandbox was created without a policy (the sandbox
+/// should discover one from disk or use the restrictive default).
+pub async fn fetch_policy(endpoint: &str, sandbox_id: &str) -> Result<Option<ProtoSandboxPolicy>> {
     debug!(endpoint = %endpoint, sandbox_id = %sandbox_id, "Connecting to NemoClaw server");
 
     let mut client = connect(endpoint).await?;
@@ -88,10 +92,45 @@ pub async fn fetch_policy(endpoint: &str, sandbox_id: &str) -> Result<ProtoSandb
         .await
         .into_diagnostic()?;
 
-    response
-        .into_inner()
-        .policy
-        .ok_or_else(|| miette::miette!("Server returned empty policy"))
+    let inner = response.into_inner();
+
+    // version 0 with no policy means the sandbox was created without one.
+    if inner.version == 0 && inner.policy.is_none() {
+        return Ok(None);
+    }
+
+    Ok(Some(inner.policy.ok_or_else(|| {
+        miette::miette!("Server returned non-zero version but empty policy")
+    })?))
+}
+
+/// Sync a locally-discovered policy to the NemoClaw server.
+///
+/// Used when the sandbox discovers a policy from disk or falls back to the
+/// restrictive default. The server will store this as the baseline policy.
+pub async fn sync_policy(
+    endpoint: &str,
+    sandbox_name: &str,
+    policy: &ProtoSandboxPolicy,
+) -> Result<()> {
+    debug!(
+        endpoint = %endpoint,
+        sandbox_name = %sandbox_name,
+        "Syncing locally-discovered policy to server"
+    );
+
+    let mut client = connect(endpoint).await?;
+
+    client
+        .update_sandbox_policy(UpdateSandboxPolicyRequest {
+            name: sandbox_name.to_string(),
+            policy: Some(policy.clone()),
+        })
+        .await
+        .into_diagnostic()
+        .wrap_err("failed to sync policy to server")?;
+
+    Ok(())
 }
 
 /// Fetch provider environment variables for a sandbox from NemoClaw server via gRPC.
diff --git a/crates/navigator-sandbox/src/lib.rs b/crates/navigator-sandbox/src/lib.rs
index da1f7fab..b4889490 100644
--- a/crates/navigator-sandbox/src/lib.rs
+++ b/crates/navigator-sandbox/src/lib.rs
@@ -120,6 +120,7 @@ pub async fn run_sandbox(
     timeout_secs: u64,
     interactive: bool,
     sandbox_id: Option<String>,
+    sandbox_name: Option<String>,
     navigator_endpoint: Option<String>,
     policy_rules: Option<String>,
     policy_data: Option<String>,
@@ -138,6 +139,7 @@ pub async fn run_sandbox(
     let navigator_endpoint_for_proxy = navigator_endpoint.clone();
     let (mut policy, opa_engine) = load_policy(
         sandbox_id.clone(),
+        sandbox_name,
         navigator_endpoint.clone(),
         policy_rules,
         policy_data,
@@ -681,9 +683,11 @@ fn spawn_route_refresh(
 /// Priority:
 /// 1. If `policy_rules` and `policy_data` are provided, load OPA engine from local files
 /// 2. If `sandbox_id` and `navigator_endpoint` are provided, fetch via gRPC
-/// 3. Otherwise, return an error
+/// 3. If the server returns no policy, discover from disk or use restrictive default
+/// 4. Otherwise, return an error
 async fn load_policy(
     sandbox_id: Option<String>,
+    sandbox_name: Option<String>,
     navigator_endpoint: Option<String>,
     policy_rules: Option<String>,
     policy_data: Option<String>,
@@ -722,6 +726,27 @@ async fn load_policy(
         );
         let proto_policy = grpc_client::fetch_policy(endpoint, id).await?;
 
+        let proto_policy = match proto_policy {
+            Some(p) => p,
+            None => {
+                // No policy configured on the server. Discover from disk or
+                // fall back to the restrictive default, then sync to the
+                // gateway so it becomes the authoritative baseline.
+                info!("Server returned no policy; attempting local discovery");
+                let discovered = discover_policy_from_disk_or_default();
+                sync_discovered_policy(endpoint, sandbox_name.as_deref(), &discovered).await?;
+
+                // Re-fetch from gateway so we get the canonical version/hash.
+                grpc_client::fetch_policy(endpoint, id)
+                    .await?
+                    .ok_or_else(|| {
+                        miette::miette!(
+                            "Server still returned no policy after sync — this is a bug"
+                        )
+                    })?
+            }
+        };
+
         // Build OPA engine from baked-in rules + typed proto data.
         // The engine is needed when network policies exist OR inference routing
         // is configured (inference routing uses OPA to decide inspect_for_inference).
@@ -750,6 +775,62 @@ async fn load_policy(
     ))
 }
 
+/// Try to discover a sandbox policy from the well-known disk path, falling
+/// back to the hardcoded restrictive default if no file is found.
+fn discover_policy_from_disk_or_default() -> navigator_core::proto::SandboxPolicy {
+    discover_policy_from_path(std::path::Path::new(
+        navigator_policy::CONTAINER_POLICY_PATH,
+    ))
+}
+
+/// Try to read a sandbox policy YAML from `path`, falling back to the
+/// hardcoded restrictive default if the file is missing or invalid.
+fn discover_policy_from_path(path: &std::path::Path) -> navigator_core::proto::SandboxPolicy {
+    use navigator_policy::{parse_sandbox_policy, restrictive_default_policy};
+
+    match std::fs::read_to_string(path) {
+        Ok(yaml) => {
+            info!(
+                path = %path.display(),
+                "Loaded sandbox policy from container disk"
+            );
+            match parse_sandbox_policy(&yaml) {
+                Ok(policy) => policy,
+                Err(e) => {
+                    warn!(
+                        path = %path.display(),
+                        error = %e,
+                        "Failed to parse disk policy, using restrictive default"
+                    );
+                    restrictive_default_policy()
+                }
+            }
+        }
+        Err(_) => {
+            info!(
+                path = %path.display(),
+                "No policy file on disk, using restrictive default"
+            );
+            restrictive_default_policy()
+        }
+    }
+}
+
+/// Sync a locally-discovered policy to the gateway server.
+async fn sync_discovered_policy(
+    endpoint: &str,
+    sandbox_name: Option<&str>,
+    policy: &navigator_core::proto::SandboxPolicy,
+) -> Result<()> {
+    let name = sandbox_name.ok_or_else(|| {
+        miette::miette!(
+            "Cannot sync discovered policy: sandbox name not available.\n\
+             Set NEMOCLAW_SANDBOX_NAME or --sandbox-name to enable policy sync."
+        )
+    })?;
+    grpc_client::sync_policy(endpoint, name, policy).await
+}
+
 /// Prepare filesystem for the sandboxed process.
 ///
 /// Creates `read_write` directories if they don't exist and sets ownership
@@ -1078,4 +1159,71 @@ routes:
             InferenceRouteSource::None
         ));
     }
+
+    // ---- Policy disk discovery tests ----
+
+    #[test]
+    fn discover_policy_from_nonexistent_path_returns_restrictive_default() {
+        let path = std::path::Path::new("/nonexistent/policy.yaml");
+        let policy = discover_policy_from_path(path);
+        // Restrictive default has no network policies.
+        assert!(policy.network_policies.is_empty());
+        assert!(policy.inference.is_none());
+        // But does have filesystem and process policies.
+        assert!(policy.filesystem.is_some());
+        assert!(policy.process.is_some());
+    }
+
+    #[test]
+    fn discover_policy_from_valid_yaml_file() {
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("policy.yaml");
+        std::fs::write(
+            &path,
+            r#"
+version: 1
+filesystem_policy:
+  include_workdir: false
+  read_only:
+    - /usr
+  read_write:
+    - /tmp
+network_policies:
+  test:
+    name: test
+    endpoints:
+      - { host: example.com, port: 443 }
+    binaries:
+      - { path: /usr/bin/curl }
+"#,
+        )
+        .unwrap();
+
+        let policy = discover_policy_from_path(&path);
+        assert_eq!(policy.network_policies.len(), 1);
+        assert!(policy.network_policies.contains_key("test"));
+        let fs = policy.filesystem.unwrap();
+        assert!(!fs.include_workdir);
+    }
+
+    #[test]
+    fn discover_policy_from_invalid_yaml_returns_restrictive_default() {
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("policy.yaml");
+        std::fs::write(&path, "this is not valid yaml: [[[").unwrap();
+
+        let policy = discover_policy_from_path(&path);
+        // Falls back to restrictive default.
+        assert!(policy.network_policies.is_empty());
+        assert!(policy.filesystem.is_some());
+    }
+
+    #[test]
+    fn discover_policy_restrictive_default_blocks_network() {
+        // Verify that the restrictive default results in NetworkMode::Block
+        // when converted to the sandbox-local SandboxPolicy type.
+        let proto = navigator_policy::restrictive_default_policy();
+        let local_policy = SandboxPolicy::try_from(proto).expect("conversion should succeed");
+        assert!(matches!(local_policy.network.mode, NetworkMode::Block));
+    }
 }
diff --git a/crates/navigator-sandbox/src/main.rs b/crates/navigator-sandbox/src/main.rs
index 88a0c96c..1b61851b 100644
--- a/crates/navigator-sandbox/src/main.rs
+++ b/crates/navigator-sandbox/src/main.rs
@@ -39,6 +39,11 @@ struct Args {
     #[arg(long, env = "NEMOCLAW_SANDBOX_ID")]
     sandbox_id: Option<String>,
 
+    /// Sandbox name (used for policy sync when the sandbox discovers policy
+    /// from disk or falls back to the restrictive default).
+    #[arg(long, env = "NEMOCLAW_SANDBOX_NAME")]
+    sandbox_name: Option<String>,
+
     /// NemoClaw server gRPC endpoint for fetching policy.
     /// Required when using --sandbox-id.
     #[arg(long, env = "NEMOCLAW_ENDPOINT")]
@@ -172,6 +177,7 @@ async fn main() -> Result<()> {
         args.timeout,
         args.interactive,
         args.sandbox_id,
+        args.sandbox_name,
         args.navigator_endpoint,
         args.policy_rules,
         args.policy_data,
diff --git a/crates/navigator-server/src/grpc.rs b/crates/navigator-server/src/grpc.rs
index b92c8b71..c8af0c8e 100644
--- a/crates/navigator-server/src/grpc.rs
+++ b/crates/navigator-server/src/grpc.rs
@@ -77,10 +77,6 @@ impl Navigator for NavigatorService {
         let spec = request
             .spec
             .ok_or_else(|| Status::invalid_argument("spec is required"))?;
-        if spec.policy.is_none() {
-            return Err(Status::invalid_argument("spec.policy is required"));
-        }
-
         // Validate provider names exist (fail fast). Credentials are fetched at
         // runtime by the sandbox supervisor via GetSandboxProviderEnvironment.
         for name in &spec.providers {
@@ -595,14 +591,27 @@ impl Navigator for NavigatorService {
             }));
         }
 
-        // Lazy backfill: no policy history exists yet, create version 1 from spec.policy.
+        // Lazy backfill: no policy history exists yet.
         let spec = sandbox
             .spec
             .ok_or_else(|| Status::internal("sandbox has no spec"))?;
-        let policy = spec
-            .policy
-            .ok_or_else(|| Status::failed_precondition("sandbox has no policy configured"))?;
 
+        // If spec.policy is None, the sandbox was created without a policy.
+        // Return an empty response so the sandbox can discover policy from disk
+        // or fall back to its restrictive default.
+        let Some(policy) = spec.policy else {
+            debug!(
+                sandbox_id = %sandbox_id,
+                "GetSandboxPolicy: no policy configured, returning empty response"
+            );
+            return Ok(Response::new(GetSandboxPolicyResponse {
+                policy: None,
+                version: 0,
+                policy_hash: String::new(),
+            }));
+        };
+
+        // Create version 1 from spec.policy.
         let payload = policy.encode_to_vec();
         let hash = deterministic_policy_hash(&policy);
         let policy_id = uuid::Uuid::new_v4().to_string();
@@ -842,16 +851,32 @@ impl Navigator for NavigatorService {
             .spec
             .as_ref()
             .ok_or_else(|| Status::internal("sandbox has no spec"))?;
-        let baseline_policy = spec
-            .policy
-            .as_ref()
-            .ok_or_else(|| Status::failed_precondition("sandbox has no policy configured"))?;
 
-        // Validate static fields haven't changed.
-        validate_static_fields_unchanged(baseline_policy, &new_policy)?;
+        if let Some(baseline_policy) = spec.policy.as_ref() {
+            // Validate static fields haven't changed.
+            validate_static_fields_unchanged(baseline_policy, &new_policy)?;
 
-        // Validate network mode hasn't changed (Block ↔ Proxy).
-        validate_network_mode_unchanged(baseline_policy, &new_policy)?;
+            // Validate network mode hasn't changed (Block ↔ Proxy).
+            validate_network_mode_unchanged(baseline_policy, &new_policy)?;
+        } else {
+            // No baseline policy exists (sandbox created without one). The
+            // sandbox is syncing a locally-discovered or restrictive-default
+            // policy. Backfill spec.policy so future updates can validate
+            // against it.
+            let mut sandbox = sandbox;
+            if let Some(ref mut spec) = sandbox.spec {
+                spec.policy = Some(new_policy.clone());
+            }
+            self.state
+                .store
+                .put_message(&sandbox)
+                .await
+                .map_err(|e| Status::internal(format!("backfill spec.policy failed: {e}")))?;
+            info!(
+                sandbox_id = %sandbox_id,
+                "UpdateSandboxPolicy: backfilled spec.policy from sandbox-discovered policy"
+            );
+        }
 
         // Determine next version number.
         let latest = self
@@ -2294,4 +2319,167 @@ mod tests {
         let result = store.get_message::<Sandbox>("nonexistent").await.unwrap();
         assert!(result.is_none());
     }
+
+    // ---- Policy validation tests ----
+
+    #[test]
+    fn validate_static_fields_allows_unchanged() {
+        use super::{validate_network_mode_unchanged, validate_static_fields_unchanged};
+        use navigator_core::proto::{
+            FilesystemPolicy, LandlockPolicy, ProcessPolicy, SandboxPolicy as ProtoSandboxPolicy,
+        };
+
+        let policy = ProtoSandboxPolicy {
+            version: 1,
+            filesystem: Some(FilesystemPolicy {
+                include_workdir: true,
+                read_only: vec!["/usr".into()],
+                read_write: vec!["/tmp".into()],
+            }),
+            landlock: Some(LandlockPolicy {
+                compatibility: "best_effort".into(),
+            }),
+            process: Some(ProcessPolicy {
+                run_as_user: "sandbox".into(),
+                run_as_group: "sandbox".into(),
+            }),
+            ..Default::default()
+        };
+        assert!(validate_static_fields_unchanged(&policy, &policy).is_ok());
+        assert!(validate_network_mode_unchanged(&policy, &policy).is_ok());
+    }
+
+    #[test]
+    fn validate_static_fields_rejects_filesystem_change() {
+        use super::validate_static_fields_unchanged;
+        use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
+
+        let baseline = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                read_only: vec!["/usr".into()],
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let changed = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                read_only: vec!["/usr".into(), "/lib".into()],
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let result = validate_static_fields_unchanged(&baseline, &changed);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().message().contains("filesystem"));
+    }
+
+    #[test]
+    fn validate_network_mode_rejects_block_to_proxy() {
+        use super::validate_network_mode_unchanged;
+        use navigator_core::proto::{NetworkPolicyRule, SandboxPolicy as ProtoSandboxPolicy};
+
+        let baseline = ProtoSandboxPolicy::default(); // no network policies = Block
+        let mut changed = ProtoSandboxPolicy::default();
+        changed.network_policies.insert(
+            "test".into(),
+            NetworkPolicyRule {
+                name: "test".into(),
+                ..Default::default()
+            },
+        );
+        assert!(validate_network_mode_unchanged(&baseline, &changed).is_err());
+    }
+
+    // ---- Sandbox creation without policy ----
+
+    #[tokio::test]
+    async fn sandbox_without_policy_stores_successfully() {
+        use navigator_core::proto::{Sandbox, SandboxPhase, SandboxSpec};
+
+        let store = Store::connect("sqlite::memory:").await.unwrap();
+
+        let sandbox = Sandbox {
+            id: "sb-no-policy".to_string(),
+            name: "no-policy-sandbox".to_string(),
+            namespace: "default".to_string(),
+            spec: Some(SandboxSpec {
+                policy: None, // no policy
+                ..Default::default()
+            }),
+            phase: SandboxPhase::Provisioning as i32,
+            ..Default::default()
+        };
+        store.put_message(&sandbox).await.unwrap();
+
+        let loaded = store
+            .get_message::<Sandbox>("sb-no-policy")
+            .await
+            .unwrap()
+            .unwrap();
+        assert!(loaded.spec.unwrap().policy.is_none());
+    }
+
+    #[tokio::test]
+    async fn sandbox_policy_backfill_on_update_when_no_baseline() {
+        use navigator_core::proto::{
+            FilesystemPolicy, LandlockPolicy, ProcessPolicy, Sandbox, SandboxPhase,
+            SandboxPolicy as ProtoSandboxPolicy, SandboxSpec,
+        };
+
+        let store = Store::connect("sqlite::memory:").await.unwrap();
+
+        // Create sandbox without policy.
+        let sandbox = Sandbox {
+            id: "sb-backfill".to_string(),
+            name: "backfill-sandbox".to_string(),
+            namespace: "default".to_string(),
+            spec: Some(SandboxSpec {
+                policy: None,
+                ..Default::default()
+            }),
+            phase: SandboxPhase::Provisioning as i32,
+            ..Default::default()
+        };
+        store.put_message(&sandbox).await.unwrap();
+
+        // Simulate what update_sandbox_policy does when spec.policy is None:
+        // backfill spec.policy with the new policy.
+        let new_policy = ProtoSandboxPolicy {
+            version: 1,
+            filesystem: Some(FilesystemPolicy {
+                include_workdir: true,
+                read_only: vec!["/usr".into()],
+                read_write: vec!["/tmp".into()],
+            }),
+            landlock: Some(LandlockPolicy {
+                compatibility: "best_effort".into(),
+            }),
+            process: Some(ProcessPolicy {
+                run_as_user: "sandbox".into(),
+                run_as_group: "sandbox".into(),
+            }),
+            ..Default::default()
+        };
+
+        let mut sandbox = store
+            .get_message::<Sandbox>("sb-backfill")
+            .await
+            .unwrap()
+            .unwrap();
+        if let Some(ref mut spec) = sandbox.spec {
+            spec.policy = Some(new_policy.clone());
+        }
+        store.put_message(&sandbox).await.unwrap();
+
+        // Verify backfill succeeded.
+        let loaded = store
+            .get_message::<Sandbox>("sb-backfill")
+            .await
+            .unwrap()
+            .unwrap();
+        let policy = loaded.spec.unwrap().policy.unwrap();
+        assert_eq!(policy.version, 1);
+        assert!(policy.filesystem.is_some());
+        assert_eq!(policy.process.unwrap().run_as_user, "sandbox");
+    }
 }
diff --git a/crates/navigator-server/src/sandbox/mod.rs b/crates/navigator-server/src/sandbox/mod.rs
index b5a4cef1..6c142a16 100644
--- a/crates/navigator-server/src/sandbox/mod.rs
+++ b/crates/navigator-server/src/sandbox/mod.rs
@@ -135,6 +135,7 @@ impl SandboxClient {
             sandbox.spec.as_ref(),
             &self.default_image,
             &sandbox.id,
+            &sandbox.name,
             &self.grpc_endpoint,
             self.ssh_listen_addr(),
             self.ssh_handshake_secret(),
@@ -517,6 +518,7 @@ fn sandbox_to_k8s_spec(
     spec: Option<&SandboxSpec>,
     default_image: &str,
     sandbox_id: &str,
+    sandbox_name: &str,
     grpc_endpoint: &str,
     ssh_listen_addr: &str,
     ssh_handshake_secret: &str,
@@ -541,6 +543,7 @@ fn sandbox_to_k8s_spec(
                     template,
                     default_image,
                     sandbox_id,
+                    sandbox_name,
                     grpc_endpoint,
                     ssh_listen_addr,
                     ssh_handshake_secret,
@@ -571,6 +574,7 @@ fn sandbox_to_k8s_spec(
                 &SandboxTemplate::default(),
                 default_image,
                 sandbox_id,
+                sandbox_name,
                 grpc_endpoint,
                 ssh_listen_addr,
                 ssh_handshake_secret,
@@ -591,6 +595,7 @@ fn sandbox_template_to_k8s(
     template: &SandboxTemplate,
     default_image: &str,
     sandbox_id: &str,
+    sandbox_name: &str,
     grpc_endpoint: &str,
     ssh_listen_addr: &str,
     ssh_handshake_secret: &str,
@@ -604,6 +609,7 @@ fn sandbox_template_to_k8s(
             template,
             default_image,
             sandbox_id,
+            sandbox_name,
             grpc_endpoint,
             ssh_listen_addr,
             ssh_handshake_secret,
@@ -659,6 +665,7 @@ fn sandbox_template_to_k8s(
         &template.environment,
         spec_environment,
         sandbox_id,
+        sandbox_name,
         grpc_endpoint,
         ssh_listen_addr,
         ssh_handshake_secret,
@@ -734,6 +741,7 @@ fn inject_pod_template(
     template: &SandboxTemplate,
     default_image: &str,
     sandbox_id: &str,
+    sandbox_name: &str,
     grpc_endpoint: &str,
     ssh_listen_addr: &str,
     ssh_handshake_secret: &str,
@@ -784,6 +792,7 @@ fn inject_pod_template(
             container,
             template,
             sandbox_id,
+            sandbox_name,
             grpc_endpoint,
             ssh_listen_addr,
             ssh_handshake_secret,
@@ -826,6 +835,7 @@ fn update_container_env(
     container: &mut serde_json::Value,
     template: &SandboxTemplate,
     sandbox_id: &str,
+    sandbox_name: &str,
     grpc_endpoint: &str,
     ssh_listen_addr: &str,
     ssh_handshake_secret: &str,
@@ -844,6 +854,7 @@ fn update_container_env(
         &template.environment,
         spec_environment,
         sandbox_id,
+        sandbox_name,
         grpc_endpoint,
         ssh_listen_addr,
         ssh_handshake_secret,
@@ -858,6 +869,7 @@ fn build_env_list(
     template_environment: &std::collections::HashMap<String, String>,
     spec_environment: &std::collections::HashMap<String, String>,
     sandbox_id: &str,
+    sandbox_name: &str,
     grpc_endpoint: &str,
     ssh_listen_addr: &str,
     ssh_handshake_secret: &str,
@@ -869,6 +881,7 @@ fn build_env_list(
     apply_required_env(
         &mut env,
         sandbox_id,
+        sandbox_name,
         grpc_endpoint,
         ssh_listen_addr,
         ssh_handshake_secret,
@@ -889,12 +902,14 @@ fn apply_env_map(
 fn apply_required_env(
     env: &mut Vec<serde_json::Value>,
     sandbox_id: &str,
+    sandbox_name: &str,
     grpc_endpoint: &str,
     ssh_listen_addr: &str,
     ssh_handshake_secret: &str,
     ssh_handshake_skew_secs: u64,
 ) {
     upsert_env(env, "NEMOCLAW_SANDBOX_ID", sandbox_id);
+    upsert_env(env, "NEMOCLAW_SANDBOX_NAME", sandbox_name);
     upsert_env(env, "NEMOCLAW_ENDPOINT", grpc_endpoint);
     upsert_env(env, "NEMOCLAW_SANDBOX_COMMAND", "sleep infinity");
     if !ssh_listen_addr.is_empty() {

From c5571aac3495ef5ae5c5babe1da631201d6e118d Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Tue, 3 Mar 2026 20:37:15 -0800
Subject: [PATCH 2/8] perf(sandbox): reuse single gRPC connection for policy
 discovery flow

Consolidate the sync + re-fetch calls during policy discovery into a
single TLS channel, reducing startup from 3 separate connections to 2.
---
 crates/navigator-sandbox/src/grpc_client.rs | 55 +++++++++++++++------
 crates/navigator-sandbox/src/lib.rs         | 35 ++++---------
 2 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/crates/navigator-sandbox/src/grpc_client.rs b/crates/navigator-sandbox/src/grpc_client.rs
index bdc46c23..7de9896d 100644
--- a/crates/navigator-sandbox/src/grpc_client.rs
+++ b/crates/navigator-sandbox/src/grpc_client.rs
@@ -85,6 +85,14 @@ pub async fn fetch_policy(endpoint: &str, sandbox_id: &str) -> Result<Option<Pro
 
     debug!("Connected, fetching sandbox policy");
 
+    fetch_policy_with_client(&mut client, sandbox_id).await
+}
+
+/// Fetch sandbox policy using an existing client connection.
+async fn fetch_policy_with_client(
+    client: &mut NavigatorClient<Channel>,
+    sandbox_id: &str,
+) -> Result<Option<ProtoSandboxPolicy>> {
     let response = client
         .get_sandbox_policy(GetSandboxPolicyRequest {
             sandbox_id: sandbox_id.to_string(),
@@ -104,23 +112,12 @@ pub async fn fetch_policy(endpoint: &str, sandbox_id: &str) -> Result<Option<Pro
     })?))
 }
 
-/// Sync a locally-discovered policy to the NemoClaw server.
-///
-/// Used when the sandbox discovers a policy from disk or falls back to the
-/// restrictive default. The server will store this as the baseline policy.
-pub async fn sync_policy(
-    endpoint: &str,
+/// Sync a locally-discovered policy using an existing client connection.
+async fn sync_policy_with_client(
+    client: &mut NavigatorClient<Channel>,
     sandbox_name: &str,
     policy: &ProtoSandboxPolicy,
 ) -> Result<()> {
-    debug!(
-        endpoint = %endpoint,
-        sandbox_name = %sandbox_name,
-        "Syncing locally-discovered policy to server"
-    );
-
-    let mut client = connect(endpoint).await?;
-
     client
         .update_sandbox_policy(UpdateSandboxPolicyRequest {
             name: sandbox_name.to_string(),
@@ -133,6 +130,36 @@ pub async fn sync_policy(
     Ok(())
 }
 
+/// Discover and sync policy using a single gRPC connection.
+///
+/// Performs the full discovery flow (fetch → sync → re-fetch) over one TLS
+/// channel instead of establishing three separate connections.
+pub async fn discover_and_sync_policy(
+    endpoint: &str,
+    sandbox_id: &str,
+    sandbox_name: &str,
+    discovered_policy: &ProtoSandboxPolicy,
+) -> Result<ProtoSandboxPolicy> {
+    debug!(
+        endpoint = %endpoint,
+        sandbox_id = %sandbox_id,
+        sandbox_name = %sandbox_name,
+        "Syncing discovered policy and re-fetching canonical version"
+    );
+
+    let mut client = connect(endpoint).await?;
+
+    // Sync the discovered policy to the gateway.
+    sync_policy_with_client(&mut client, sandbox_name, discovered_policy).await?;
+
+    // Re-fetch from the gateway to get the canonical version/hash.
+    fetch_policy_with_client(&mut client, sandbox_id)
+        .await?
+        .ok_or_else(|| {
+            miette::miette!("Server still returned no policy after sync — this is a bug")
+        })
+}
+
 /// Fetch provider environment variables for a sandbox from NemoClaw server via gRPC.
 ///
 /// Returns a map of environment variable names to values derived from provider
diff --git a/crates/navigator-sandbox/src/lib.rs b/crates/navigator-sandbox/src/lib.rs
index b4889490..0b3ccb6d 100644
--- a/crates/navigator-sandbox/src/lib.rs
+++ b/crates/navigator-sandbox/src/lib.rs
@@ -734,16 +734,16 @@ async fn load_policy(
                 // gateway so it becomes the authoritative baseline.
                 info!("Server returned no policy; attempting local discovery");
                 let discovered = discover_policy_from_disk_or_default();
-                sync_discovered_policy(endpoint, sandbox_name.as_deref(), &discovered).await?;
-
-                // Re-fetch from gateway so we get the canonical version/hash.
-                grpc_client::fetch_policy(endpoint, id)
-                    .await?
-                    .ok_or_else(|| {
-                        miette::miette!(
-                            "Server still returned no policy after sync — this is a bug"
-                        )
-                    })?
+                let name = sandbox_name.as_deref().ok_or_else(|| {
+                    miette::miette!(
+                        "Cannot sync discovered policy: sandbox name not available.\n\
+                         Set NEMOCLAW_SANDBOX_NAME or --sandbox-name to enable policy sync."
+                    )
+                })?;
+
+                // Sync and re-fetch over a single connection to avoid extra
+                // TLS handshakes.
+                grpc_client::discover_and_sync_policy(endpoint, id, name, &discovered).await?
             }
         };
 
@@ -816,21 +816,6 @@ fn discover_policy_from_path(path: &std::path::Path) -> navigator_core::proto::S
     }
 }
 
-/// Sync a locally-discovered policy to the gateway server.
-async fn sync_discovered_policy(
-    endpoint: &str,
-    sandbox_name: Option<&str>,
-    policy: &navigator_core::proto::SandboxPolicy,
-) -> Result<()> {
-    let name = sandbox_name.ok_or_else(|| {
-        miette::miette!(
-            "Cannot sync discovered policy: sandbox name not available.\n\
-             Set NEMOCLAW_SANDBOX_NAME or --sandbox-name to enable policy sync."
-        )
-    })?;
-    grpc_client::sync_policy(endpoint, name, policy).await
-}
-
 /// Prepare filesystem for the sandboxed process.
 ///
 /// Creates `read_write` directories if they don't exist and sets ownership

From 1ead5eb29452fffba10fc38d64666b9e99f8b01f Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Wed, 4 Mar 2026 14:33:07 -0800
Subject: [PATCH 3/8] chore(sandbox): add navigator-policy to docker build, fix
 clippy warnings, harden scripts

- Add navigator-policy crate to Dockerfile.base build cache layer
- Add dev-sandbox-policy.yaml to Dockerfile.base COPY step
- Use exact container name matching with health checks in cluster-deploy-fast
- Add navigator-policy and dev-sandbox-policy.yaml to sandbox fingerprint
- Implement fail-fast for parallel image builds in cluster-deploy-fast
- Collapse nested if-let in kubeconfig rewrite (clippy collapsible_if)
- Backtick-quote NemoClaw in doc comment (clippy doc_markdown)
---
 crates/navigator-bootstrap/src/kubeconfig.rs | 14 ++++-----
 crates/navigator-bootstrap/src/lib.rs        |  2 +-
 deploy/docker/sandbox/Dockerfile.base        |  5 ++-
 tasks/scripts/cluster-deploy-fast.sh         | 32 +++++++++++++++-----
 4 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/crates/navigator-bootstrap/src/kubeconfig.rs b/crates/navigator-bootstrap/src/kubeconfig.rs
index bf712f86..c705b577 100644
--- a/crates/navigator-bootstrap/src/kubeconfig.rs
+++ b/crates/navigator-bootstrap/src/kubeconfig.rs
@@ -120,13 +120,13 @@ pub fn rewrite_kubeconfig(contents: &str, cluster_name: &str, kube_port: Option<
     let mut replaced = Vec::new();
     for line in contents.lines() {
         let trimmed = line.trim_start();
-        if let Some(kp) = kube_port {
-            if trimmed.starts_with("server:") {
-                let indent_len = line.len() - trimmed.len();
-                let indent = &line[..indent_len];
-                replaced.push(format!("{indent}server: https://127.0.0.1:{kp}"));
-                continue;
-            }
+        if let Some(kp) = kube_port
+            && trimmed.starts_with("server:")
+        {
+            let indent_len = line.len() - trimmed.len();
+            let indent = &line[..indent_len];
+            replaced.push(format!("{indent}server: https://127.0.0.1:{kp}"));
+            continue;
         }
         // Rename default cluster/context/user to the cluster name
         // Handle both "name: default" and "- name: default" (YAML list item)
diff --git a/crates/navigator-bootstrap/src/lib.rs b/crates/navigator-bootstrap/src/lib.rs
index f562d8ba..c2f2dd0a 100644
--- a/crates/navigator-bootstrap/src/lib.rs
+++ b/crates/navigator-bootstrap/src/lib.rs
@@ -462,7 +462,7 @@ fn default_cluster_image_ref() -> String {
     image::pull_registry_image()
 }
 
-/// Create the three TLS K8s secrets required by the NemoClaw server and sandbox pods.
+/// Create the three TLS K8s secrets required by the `NemoClaw` server and sandbox pods.
 ///
 /// Secrets are created via `kubectl` exec'd inside the cluster container:
 /// - `navigator-server-tls` (kubernetes.io/tls): server cert + key
diff --git a/deploy/docker/sandbox/Dockerfile.base b/deploy/docker/sandbox/Dockerfile.base
index 4fc6f885..45accd5f 100644
--- a/deploy/docker/sandbox/Dockerfile.base
+++ b/deploy/docker/sandbox/Dockerfile.base
@@ -29,6 +29,7 @@ WORKDIR /build
 COPY Cargo.toml Cargo.lock ./
 COPY crates/navigator-cli/Cargo.toml crates/navigator-cli/Cargo.toml
 COPY crates/navigator-core/Cargo.toml crates/navigator-core/Cargo.toml
+COPY crates/navigator-policy/Cargo.toml crates/navigator-policy/Cargo.toml
 COPY crates/navigator-providers/Cargo.toml crates/navigator-providers/Cargo.toml
 COPY crates/navigator-router/Cargo.toml crates/navigator-router/Cargo.toml
 COPY crates/navigator-sandbox/Cargo.toml crates/navigator-sandbox/Cargo.toml
@@ -36,11 +37,12 @@ COPY crates/navigator-server/Cargo.toml crates/navigator-server/Cargo.toml
 COPY crates/navigator-bootstrap/Cargo.toml crates/navigator-bootstrap/Cargo.toml
 
 # Create dummy source files to build dependencies
-RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator-providers/src crates/navigator-router/src crates/navigator-sandbox/src crates/navigator-server/src crates/navigator-bootstrap/src && \
+RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator-policy/src crates/navigator-providers/src crates/navigator-router/src crates/navigator-sandbox/src crates/navigator-server/src crates/navigator-bootstrap/src && \
     echo "fn main() {}" > crates/navigator-cli/src/main.rs && \
     echo "fn main() {}" > crates/navigator-sandbox/src/main.rs && \
     echo "fn main() {}" > crates/navigator-server/src/main.rs && \
     touch crates/navigator-core/src/lib.rs && \
+    touch crates/navigator-policy/src/lib.rs && \
     touch crates/navigator-providers/src/lib.rs && \
     touch crates/navigator-router/src/lib.rs && \
     touch crates/navigator-bootstrap/src/lib.rs
@@ -48,6 +50,7 @@ RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator
 # Copy proto files needed for build
 COPY proto/ proto/
 COPY dev-sandbox-policy.rego ./
+COPY dev-sandbox-policy.yaml ./
 
 # Build dependencies only (cached unless Cargo.toml/lock changes).
 # sccache uses memcached in CI or the local disk cache mount for local dev.
diff --git a/tasks/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh
index 104fce6d..9d137d70 100755
--- a/tasks/scripts/cluster-deploy-fast.sh
+++ b/tasks/scripts/cluster-deploy-fast.sh
@@ -24,8 +24,8 @@ log_duration() {
   echo "${label} took $((end - start))s"
 }
 
-if ! docker ps -q --filter "name=${CONTAINER_NAME}" | grep -q .; then
-  echo "Error: Cluster container '${CONTAINER_NAME}' is not running."
+if ! docker ps -q --filter "name=^${CONTAINER_NAME}$" --filter "health=healthy" | grep -q .; then
+  echo "Error: Cluster container '${CONTAINER_NAME}' is not running or not healthy."
   echo "Start the cluster first with: mise run cluster"
   exit 1
 fi
@@ -139,7 +139,7 @@ matches_sandbox() {
     crates/navigator-core/*|crates/navigator-providers/*)
       return 0
       ;;
-    crates/navigator-sandbox/*|deploy/docker/sandbox/*|deploy/docker/openclaw-start.sh|python/*|pyproject.toml|uv.lock|dev-sandbox-policy.rego)
+    crates/navigator-policy/*|crates/navigator-sandbox/*|deploy/docker/sandbox/*|deploy/docker/openclaw-start.sh|python/*|pyproject.toml|uv.lock|dev-sandbox-policy.rego|dev-sandbox-policy.yaml)
       return 0
       ;;
     *)
@@ -273,6 +273,7 @@ done
 
 server_pid=""
 sandbox_pid=""
+build_failed=0
 
 if [[ "${build_server}" == "1" ]]; then
   if [[ "${build_sandbox}" == "1" ]]; then
@@ -292,11 +293,28 @@ if [[ "${build_sandbox}" == "1" ]]; then
   fi
 fi
 
-if [[ -n "${server_pid}" ]]; then
+# Wait for parallel builds and fail fast: if either build fails, kill the
+# other one immediately instead of letting it run to completion.
+if [[ -n "${server_pid}" && -n "${sandbox_pid}" ]]; then
+  # Both running in parallel — wait for either to finish first.
+  if ! wait -n "${server_pid}" "${sandbox_pid}" 2>/dev/null; then
+    build_failed=1
+  fi
+  # Whichever finished, wait for the other (or kill it on failure).
+  if [[ "${build_failed}" == "1" ]]; then
+    echo "Error: a parallel image build failed. Killing remaining build..." >&2
+    kill "${server_pid}" "${sandbox_pid}" 2>/dev/null || true
+    wait "${server_pid}" "${sandbox_pid}" 2>/dev/null || true
+    exit 1
+  fi
+  # First build succeeded; wait for the second.
+  if ! wait -n "${server_pid}" "${sandbox_pid}" 2>/dev/null; then
+    echo "Error: a parallel image build failed." >&2
+    exit 1
+  fi
+elif [[ -n "${server_pid}" ]]; then
   wait "${server_pid}"
-fi
-
-if [[ -n "${sandbox_pid}" ]]; then
+elif [[ -n "${sandbox_pid}" ]]; then
   wait "${sandbox_pid}"
 fi
 

From 642c4cb5452000804a994039521204a1bd49f719 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Wed, 4 Mar 2026 14:49:55 -0800
Subject: [PATCH 4/8] refactor: move sandbox policy files out of repo root

Move dev-sandbox-policy.rego into crates/navigator-policy/ (the
canonical policy crate) and dev-sandbox-policy.yaml into
deploy/docker/sandbox/ where it is baked into the container image
at /etc/navigator/policy.yaml.

This eliminates loose config files from the repo root and co-locates
the rego rules with the policy crate that owns them. The default
policy YAML now ships inside the sandbox container so sandboxes
without an explicit gateway-provided policy can discover it on disk.

Updated all include_str! paths, Dockerfiles, build-script fingerprints,
architecture docs, and agent skill references.
---
 .agents/skills/create-spike/SKILL.md               |  2 +-
 .agents/skills/generate-sandbox-policy/SKILL.md    | 14 +++++++-------
 .agents/skills/generate-sandbox-policy/examples.md | 12 ++++++------
 .claude/agent-memory/arch-doc-writer/MEMORY.md     |  2 +-
 architecture/README.md                             |  2 +-
 architecture/build-containers.md                   |  2 +-
 architecture/inference-routing.md                  |  8 ++++----
 architecture/sandbox.md                            |  2 +-
 .../navigator-policy/dev-sandbox-policy.rego       |  0
 crates/navigator-policy/src/lib.rs                 |  5 +++--
 crates/navigator-sandbox/src/opa.rs                |  7 ++++---
 deploy/docker/Dockerfile.python-wheels             |  2 +-
 deploy/docker/Dockerfile.python-wheels-macos       |  2 +-
 deploy/docker/sandbox/Dockerfile.base              |  8 ++++++--
 .../docker/sandbox/dev-sandbox-policy-empty.yaml   |  0
 .../docker/sandbox/dev-sandbox-policy.yaml         |  0
 examples/private-ip-routing/README.md              |  2 +-
 tasks/scripts/cluster-deploy-fast.sh               |  4 ++--
 18 files changed, 40 insertions(+), 34 deletions(-)
 rename dev-sandbox-policy.rego => crates/navigator-policy/dev-sandbox-policy.rego (100%)
 rename dev-sandbox-policy-empty.yaml => deploy/docker/sandbox/dev-sandbox-policy-empty.yaml (100%)
 rename dev-sandbox-policy.yaml => deploy/docker/sandbox/dev-sandbox-policy.yaml (100%)

diff --git a/.agents/skills/create-spike/SKILL.md b/.agents/skills/create-spike/SKILL.md
index da0f09b8..8c5ce0b5 100644
--- a/.agents/skills/create-spike/SKILL.md
+++ b/.agents/skills/create-spike/SKILL.md
@@ -289,7 +289,7 @@ User says: "Allow sandbox egress to private IP space via networking policy"
 1. Problem is clear — no clarification needed
 2. Fire `principal-engineer-reviewer` to investigate:
    - Finds `is_internal_ip()` SSRF check in `proxy.rs` that blocks RFC 1918 addresses
-   - Reads OPA policy evaluation pipeline in `opa.rs` and `dev-sandbox-policy.rego`
+   - Reads OPA policy evaluation pipeline in `opa.rs` and `crates/navigator-policy/dev-sandbox-policy.rego`
    - Reads proto definitions in `sandbox.proto` for `NetworkEndpoint`
    - Maps the 4-layer defense model: netns, seccomp, OPA, SSRF check
    - Reads `architecture/security-policy.md` and `architecture/sandbox.md`
diff --git a/.agents/skills/generate-sandbox-policy/SKILL.md b/.agents/skills/generate-sandbox-policy/SKILL.md
index 8782886f..5c842533 100644
--- a/.agents/skills/generate-sandbox-policy/SKILL.md
+++ b/.agents/skills/generate-sandbox-policy/SKILL.md
@@ -170,7 +170,7 @@ Key sections to reference:
 Also read the example policy for real-world patterns:
 
 ```
-Read dev-sandbox-policy.yaml
+Read deploy/docker/sandbox/dev-sandbox-policy.yaml
 ```
 
 ## Step 4: Choose Policy Shape
@@ -355,8 +355,8 @@ The policy needs to go somewhere. Determine which mode applies:
 
 | Signal | Mode |
 |--------|------|
-| User names an existing policy file (e.g., "add to dev-sandbox-policy.yaml") | **Update existing file** |
-| User says "update my policy", "add this to my policy file" | **Update existing file** — look for `dev-sandbox-policy.yaml` or ask which file |
+| User names an existing policy file (e.g., "add to deploy/docker/sandbox/dev-sandbox-policy.yaml") | **Update existing file** |
+| User says "update my policy", "add this to my policy file" | **Update existing file** — look for `deploy/docker/sandbox/dev-sandbox-policy.yaml` or ask which file |
 | User asks to modify an existing policy rule by name | **Update existing file** — edit the named policy in place |
 | User says "create a new policy file" or names a file that doesn't exist | **Create new file** |
 | No file context given | **Present only** — show the YAML and ask if the user wants it written to a file |
@@ -418,7 +418,7 @@ inference:
 
 The `filesystem_policy`, `landlock`, `process`, and `inference` sections above are sensible defaults. Tell the user these are defaults and may need adjustment for their environment. The generated `network_policies` block is the primary output.
 
-If the user provides a file path, write to it. Otherwise, suggest `dev-sandbox-policy.yaml` for local development or ask where to place it.
+If the user provides a file path, write to it. Otherwise, suggest `deploy/docker/sandbox/dev-sandbox-policy.yaml` for local development or ask where to place it.
 
 ### Mode C: Present Only (no file write)
 
@@ -427,7 +427,7 @@ Show the generated policy YAML with:
 1. **Summary** — what the policy allows and denies, in plain language
 2. **The YAML** — the complete `network_policies` block, ready to paste
 3. **Integration guidance**:
-   - For local dev: add to `dev-sandbox-policy.yaml` under `network_policies`
+   - For local dev: add to `deploy/docker/sandbox/dev-sandbox-policy.yaml` under `network_policies`
    - For production: configure via the gateway
 4. **Caveats** — any assumptions made, anything the user should verify
 
@@ -545,6 +545,6 @@ private_services:
 ## Additional Resources
 
 - Full policy schema: [architecture/security-policy.md](../../../architecture/security-policy.md)
-- Example policy file: [dev-sandbox-policy.yaml](../../../dev-sandbox-policy.yaml)
-- Rego evaluation rules: [dev-sandbox-policy.rego](../../../dev-sandbox-policy.rego)
+- Example policy file: [dev-sandbox-policy.yaml](../../../deploy/docker/sandbox/dev-sandbox-policy.yaml)
+- Rego evaluation rules: [dev-sandbox-policy.rego](../../../crates/navigator-policy/dev-sandbox-policy.rego)
 - For translation examples from real API docs, see [examples.md](examples.md)
diff --git a/.agents/skills/generate-sandbox-policy/examples.md b/.agents/skills/generate-sandbox-policy/examples.md
index c9611e2a..7fda8191 100644
--- a/.agents/skills/generate-sandbox-policy/examples.md
+++ b/.agents/skills/generate-sandbox-policy/examples.md
@@ -729,11 +729,11 @@ An exact IP is treated as `/32` — only that specific address is permitted.
 
 ### Example F1: Add a New Policy to an Existing File
 
-**User**: "Add read-only access to api.github.com for curl to my dev-sandbox-policy.yaml"
+**User**: "Add read-only access to api.github.com for curl to my deploy/docker/sandbox/dev-sandbox-policy.yaml"
 
 **Agent workflow**:
 
-1. Read `dev-sandbox-policy.yaml`
+1. Read `deploy/docker/sandbox/dev-sandbox-policy.yaml`
 2. Check that no existing policy already covers `api.github.com:443` — if one does, warn about overlap
 3. Check that the key `github_readonly` doesn't already exist
 4. Insert the new policy under `network_policies`:
@@ -760,11 +760,11 @@ The agent uses `StrReplace` to insert after the last existing policy in the `net
 
 ### Example F2: Modify an Existing Policy (Add an Endpoint)
 
-**User**: "Add sentry.io to the claude_code policy in dev-sandbox-policy.yaml"
+**User**: "Add sentry.io to the claude_code policy in deploy/docker/sandbox/dev-sandbox-policy.yaml"
 
 **Agent workflow**:
 
-1. Read `dev-sandbox-policy.yaml`
+1. Read `deploy/docker/sandbox/dev-sandbox-policy.yaml`
 2. Find the `claude_code` policy
 3. Check that `sentry.io:443` isn't already listed in its endpoints
 4. Add the new endpoint to the existing `endpoints` list:
@@ -878,11 +878,11 @@ The agent notes that `filesystem_policy`, `landlock`, `process`, and `inference`
 
 ### Example F5: Handle a Key Conflict
 
-**User**: "Add an nvidia policy to dev-sandbox-policy.yaml"
+**User**: "Add an nvidia policy to deploy/docker/sandbox/dev-sandbox-policy.yaml"
 
 **Agent workflow**:
 
-1. Read `dev-sandbox-policy.yaml`
+1. Read `deploy/docker/sandbox/dev-sandbox-policy.yaml`
 2. Find that a policy key `nvidia` already exists
 3. **Ask the user**: "A policy named `nvidia` already exists. Do you want to replace it, add endpoints to it, or use a different name (e.g., `nvidia_inference_v2`)?"
 4. Proceed based on the user's answer
diff --git a/.claude/agent-memory/arch-doc-writer/MEMORY.md b/.claude/agent-memory/arch-doc-writer/MEMORY.md
index 1dc66bbf..d1000eed 100644
--- a/.claude/agent-memory/arch-doc-writer/MEMORY.md
+++ b/.claude/agent-memory/arch-doc-writer/MEMORY.md
@@ -23,7 +23,7 @@
 - Naming convention: "gateway" in prose for the control plane component; code identifiers like `navigator-server` stay unchanged
 
 ## Key Patterns
-- OPA baked-in rules: `include_str!("../../../dev-sandbox-policy.rego")` in opa.rs
+- OPA baked-in rules: `include_str!("../../navigator-policy/dev-sandbox-policy.rego")` in opa.rs
 - Policy loading: gRPC mode (NAVIGATOR_SANDBOX_ID + NAVIGATOR_ENDPOINT) or file mode (--policy-rules + --policy-data)
 - Provider env injection: both entrypoint process (tokio Command) and SSH shell (std Command)
 - Cluster bootstrap: `sandbox_create_with_bootstrap()` auto-deploys when no cluster exists (main.rs ~line 632)
diff --git a/architecture/README.md b/architecture/README.md
index 99ca9908..a16d98fa 100644
--- a/architecture/README.md
+++ b/architecture/README.md
@@ -198,7 +198,7 @@ The inference routing system transparently intercepts AI inference API calls fro
 
 | Component | Location | Role |
 |---|---|---|
-| OPA `network_action` rule | `dev-sandbox-policy.rego` | Returns `inspect_for_inference` when no explicit policy match and inference routes exist |
+| OPA `network_action` rule | `crates/navigator-policy/dev-sandbox-policy.rego` | Returns `inspect_for_inference` when no explicit policy match and inference routes exist |
 | Proxy interception | `crates/navigator-sandbox/src/proxy.rs` | TLS-terminates intercepted connections, parses HTTP, calls gateway |
 | Inference pattern detection | `crates/navigator-sandbox/src/l7/inference.rs` | Matches HTTP method + path against known inference API patterns |
 | gRPC forwarding | `crates/navigator-sandbox/src/grpc_client.rs` | Sends `ProxyInferenceRequest` to the gateway |
diff --git a/architecture/build-containers.md b/architecture/build-containers.md
index 8ac6c1f9..ad64f75e 100644
--- a/architecture/build-containers.md
+++ b/architecture/build-containers.md
@@ -338,7 +338,7 @@ All builds use mise tasks defined in `tasks/*.toml` (included from `mise.toml`).
 | `crates/navigator-core/*`, `crates/navigator-providers/*` | Gateway + sandbox rebuild |
 | `crates/navigator-router/*` | Gateway rebuild |
 | `crates/navigator-server/*`, `deploy/docker/Dockerfile.server` | Gateway rebuild |
-| `crates/navigator-sandbox/*`, `deploy/docker/sandbox/*`, `deploy/docker/openclaw-start.sh`, `python/*`, `pyproject.toml`, `uv.lock`, `dev-sandbox-policy.rego` | Sandbox rebuild |
+| `crates/navigator-sandbox/*`, `deploy/docker/sandbox/*`, `deploy/docker/openclaw-start.sh`, `python/*`, `pyproject.toml`, `uv.lock`, `crates/navigator-policy/dev-sandbox-policy.rego` | Sandbox rebuild |
 | `deploy/helm/navigator/*` | Helm upgrade |
 
 **Explicit target mode** (arguments: `server`, `sandbox`, `chart`, `all`): Rebuilds only the specified components.
diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md
index 3ec8d30b..142c8628 100644
--- a/architecture/inference-routing.md
+++ b/architecture/inference-routing.md
@@ -22,7 +22,7 @@ The inference routing system transparently intercepts AI inference API calls fro
 | `crates/navigator-sandbox/src/main.rs` | Sandbox binary CLI: `--inference-routes` / `NEMOCLAW_INFERENCE_ROUTES` flag definition |
 | `tasks/ci.toml` | `[sandbox]` task: mounts `inference-routes.yaml`, sets env vars for dev sandbox |
 | `inference-routes.yaml` | Default standalone routes for dev sandbox (NVIDIA API endpoint) |
-| `dev-sandbox-policy.rego` | `network_action` Rego rule -- tri-state decision logic |
+| `crates/navigator-policy/dev-sandbox-policy.rego` | `network_action` Rego rule -- tri-state decision logic |
 
 ## Architecture Overview
 
@@ -174,7 +174,7 @@ The `evaluate_network_action()` method evaluates `data.navigator.sandbox.network
 
 ### Rego rules
 
-**File:** `dev-sandbox-policy.rego`
+**File:** `crates/navigator-policy/dev-sandbox-policy.rego`
 
 ```rego
 default network_action := "deny"
@@ -567,8 +567,8 @@ The `create` and `update` commands perform protocol auto-detection when `--proto
 
 Running `mise run cluster:sandbox` starts a standalone sandbox container with inference routing pre-configured. The task mounts three files into the container:
 
-- `dev-sandbox-policy.rego` as `/var/navigator/policy.rego`
-- `dev-sandbox-policy.yaml` as `/var/navigator/data.yaml`
+- `crates/navigator-policy/dev-sandbox-policy.rego` as `/var/navigator/policy.rego`
+- `deploy/docker/sandbox/dev-sandbox-policy.yaml` as `/var/navigator/data.yaml`
 - `inference-routes.yaml` as `/var/navigator/inference-routes.yaml`
 
 The container receives `NEMOCLAW_INFERENCE_ROUTES=/var/navigator/inference-routes.yaml` to enable standalone inference routing. `NVIDIA_API_KEY` is always forwarded from the host environment (empty string if unset).
diff --git a/architecture/sandbox.md b/architecture/sandbox.md
index 9f61aacb..6bcb44ed 100644
--- a/architecture/sandbox.md
+++ b/architecture/sandbox.md
@@ -191,7 +191,7 @@ The OPA engine lives in `crates/navigator-sandbox/src/opa.rs` and uses the `rego
 
 ### Baked-in rules
 
-The Rego rules are compiled into the binary via `include_str!("../../../dev-sandbox-policy.rego")`. The package is `navigator.sandbox`. Key rules:
+The Rego rules are compiled into the binary via `include_str!("../../navigator-policy/dev-sandbox-policy.rego")`. The package is `navigator.sandbox`. Key rules:
 
 | Rule | Type | Purpose |
 |------|------|---------|
diff --git a/dev-sandbox-policy.rego b/crates/navigator-policy/dev-sandbox-policy.rego
similarity index 100%
rename from dev-sandbox-policy.rego
rename to crates/navigator-policy/dev-sandbox-policy.rego
diff --git a/crates/navigator-policy/src/lib.rs b/crates/navigator-policy/src/lib.rs
index da43c746..9948e98d 100644
--- a/crates/navigator-policy/src/lib.rs
+++ b/crates/navigator-policy/src/lib.rs
@@ -10,7 +10,7 @@
 //! policy schema. Both parsing (YAML→proto) and serialization (proto→YAML) use
 //! these types, ensuring round-trip fidelity.
 
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap};
 
 use miette::{IntoDiagnostic, Result, WrapErr};
 use navigator_core::proto::{
@@ -20,7 +20,8 @@ use navigator_core::proto::{
 use serde::{Deserialize, Serialize};
 
 /// Built-in default sandbox policy YAML (embedded at compile time).
-const DEFAULT_SANDBOX_POLICY_YAML: &str = include_str!("../../../dev-sandbox-policy.yaml");
+const DEFAULT_SANDBOX_POLICY_YAML: &str =
+    include_str!("../../../deploy/docker/sandbox/dev-sandbox-policy.yaml");
 
 // ---------------------------------------------------------------------------
 // YAML serde types (canonical — used for both parsing and serialization)
diff --git a/crates/navigator-sandbox/src/opa.rs b/crates/navigator-sandbox/src/opa.rs
index 07b178e3..ba5b3551 100644
--- a/crates/navigator-sandbox/src/opa.rs
+++ b/crates/navigator-sandbox/src/opa.rs
@@ -16,7 +16,7 @@ use std::sync::Mutex;
 /// Baked-in rego rules for OPA policy evaluation.
 /// These rules define the network access decision logic and static config
 /// passthroughs. They reference `data.sandbox.*` for policy data.
-const BAKED_POLICY_RULES: &str = include_str!("../../../dev-sandbox-policy.rego");
+const BAKED_POLICY_RULES: &str = include_str!("../../navigator-policy/dev-sandbox-policy.rego");
 
 /// Result of evaluating a network access request against OPA policy.
 pub struct PolicyDecision {
@@ -671,8 +671,9 @@ mod tests {
         ProcessPolicy as ProtoProc, SandboxPolicy as ProtoSandboxPolicy,
     };
 
-    const TEST_POLICY: &str = include_str!("../../../dev-sandbox-policy.rego");
-    const TEST_DATA_YAML: &str = include_str!("../../../dev-sandbox-policy.yaml");
+    const TEST_POLICY: &str = include_str!("../../navigator-policy/dev-sandbox-policy.rego");
+    const TEST_DATA_YAML: &str =
+        include_str!("../../../deploy/docker/sandbox/dev-sandbox-policy.yaml");
 
     fn test_engine() -> OpaEngine {
         OpaEngine::from_strings(TEST_POLICY, TEST_DATA_YAML).expect("Failed to load test policy")
diff --git a/deploy/docker/Dockerfile.python-wheels b/deploy/docker/Dockerfile.python-wheels
index dd678ad0..2e411b2f 100644
--- a/deploy/docker/Dockerfile.python-wheels
+++ b/deploy/docker/Dockerfile.python-wheels
@@ -71,7 +71,7 @@ RUN --mount=type=cache,id=cargo-registry-python-wheels-${TARGETARCH},sharing=loc
 # Copy actual source code and Python packaging files.
 COPY crates/ crates/
 COPY pyproject.toml README.md ./
-COPY dev-sandbox-policy.yaml ./
+COPY deploy/docker/sandbox/dev-sandbox-policy.yaml ./deploy/docker/sandbox/
 COPY python/ python/
 
 # Touch source files to ensure they're rebuilt (not the cached dummy).
diff --git a/deploy/docker/Dockerfile.python-wheels-macos b/deploy/docker/Dockerfile.python-wheels-macos
index 46a4c6b1..e8d6327a 100644
--- a/deploy/docker/Dockerfile.python-wheels-macos
+++ b/deploy/docker/Dockerfile.python-wheels-macos
@@ -76,7 +76,7 @@ RUN --mount=type=cache,id=cargo-registry-python-wheels-macos-${TARGETARCH},shari
 # Copy actual source code and Python packaging files.
 COPY crates/ crates/
 COPY pyproject.toml README.md ./
-COPY dev-sandbox-policy.yaml ./
+COPY deploy/docker/sandbox/dev-sandbox-policy.yaml ./deploy/docker/sandbox/
 COPY python/ python/
 
 # Touch source files to ensure they're rebuilt (not the cached dummy).
diff --git a/deploy/docker/sandbox/Dockerfile.base b/deploy/docker/sandbox/Dockerfile.base
index 45accd5f..f0e7ebde 100644
--- a/deploy/docker/sandbox/Dockerfile.base
+++ b/deploy/docker/sandbox/Dockerfile.base
@@ -49,8 +49,8 @@ RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator
 
 # Copy proto files needed for build
 COPY proto/ proto/
-COPY dev-sandbox-policy.rego ./
-COPY dev-sandbox-policy.yaml ./
+COPY crates/navigator-policy/dev-sandbox-policy.rego ./crates/navigator-policy/
+COPY deploy/docker/sandbox/dev-sandbox-policy.yaml ./deploy/docker/sandbox/
 
 # Build dependencies only (cached unless Cargo.toml/lock changes).
 # sccache uses memcached in CI or the local disk cache mount for local dev.
@@ -172,6 +172,10 @@ ENV PATH="/app/.venv/bin:$PATH"
 # both ~/.claude/skills/ and ~/.agents/skills/ for agent discovery.
 COPY deploy/docker/sandbox/skills/ /sandbox/.openclaw/skills/
 
+# Bake the default sandbox policy into the image so that sandboxes without an
+# explicit policy from the gateway can discover it on disk.
+COPY deploy/docker/sandbox/dev-sandbox-policy.yaml /etc/navigator/policy.yaml
+
 RUN mkdir -p /var/navigator /sandbox /var/log && \
     touch /var/log/navigator.log && \
     chown supervisor:supervisor /var/log/navigator.log && \
diff --git a/dev-sandbox-policy-empty.yaml b/deploy/docker/sandbox/dev-sandbox-policy-empty.yaml
similarity index 100%
rename from dev-sandbox-policy-empty.yaml
rename to deploy/docker/sandbox/dev-sandbox-policy-empty.yaml
diff --git a/dev-sandbox-policy.yaml b/deploy/docker/sandbox/dev-sandbox-policy.yaml
similarity index 100%
rename from dev-sandbox-policy.yaml
rename to deploy/docker/sandbox/dev-sandbox-policy.yaml
diff --git a/examples/private-ip-routing/README.md b/examples/private-ip-routing/README.md
index 2a265780..1bdfa084 100644
--- a/examples/private-ip-routing/README.md
+++ b/examples/private-ip-routing/README.md
@@ -11,7 +11,7 @@ When an endpoint in the sandbox policy includes an `allowed_ips` field, the
 proxy validates the resolved IP against that CIDR allowlist instead of
 blanket-blocking. Loopback and link-local remain always-blocked regardless.
 
-The dev sandbox policy (`dev-sandbox-policy.yaml`) includes a `cluster_pods`
+The dev sandbox policy (`deploy/docker/sandbox/dev-sandbox-policy.yaml`) includes a `cluster_pods`
 entry that allows any binary to reach port 8080 on the k3s pod network:
 
 ```yaml
diff --git a/tasks/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh
index 9d137d70..cd099dbb 100755
--- a/tasks/scripts/cluster-deploy-fast.sh
+++ b/tasks/scripts/cluster-deploy-fast.sh
@@ -139,7 +139,7 @@ matches_sandbox() {
     crates/navigator-core/*|crates/navigator-providers/*)
       return 0
       ;;
-    crates/navigator-policy/*|crates/navigator-sandbox/*|deploy/docker/sandbox/*|deploy/docker/openclaw-start.sh|python/*|pyproject.toml|uv.lock|dev-sandbox-policy.rego|dev-sandbox-policy.yaml)
+    crates/navigator-policy/*|crates/navigator-sandbox/*|deploy/docker/sandbox/*|deploy/docker/openclaw-start.sh|python/*|pyproject.toml|uv.lock)
       return 0
       ;;
     *)
@@ -175,7 +175,7 @@ compute_fingerprint() {
       committed_trees=$(git ls-tree HEAD Cargo.toml Cargo.lock proto/ deploy/docker/cross-build.sh crates/navigator-core/ crates/navigator-providers/ crates/navigator-router/ crates/navigator-server/ deploy/docker/Dockerfile.server 2>/dev/null || true)
       ;;
     sandbox)
-      committed_trees=$(git ls-tree HEAD Cargo.toml Cargo.lock proto/ deploy/docker/cross-build.sh crates/navigator-core/ crates/navigator-providers/ crates/navigator-sandbox/ deploy/docker/sandbox/ deploy/docker/openclaw-start.sh python/ pyproject.toml uv.lock dev-sandbox-policy.rego 2>/dev/null || true)
+      committed_trees=$(git ls-tree HEAD Cargo.toml Cargo.lock proto/ deploy/docker/cross-build.sh crates/navigator-core/ crates/navigator-policy/ crates/navigator-providers/ crates/navigator-sandbox/ deploy/docker/sandbox/ deploy/docker/openclaw-start.sh python/ pyproject.toml uv.lock 2>/dev/null || true)
       ;;
     helm)
       committed_trees=$(git ls-tree HEAD deploy/helm/navigator/ 2>/dev/null || true)

From 5f726a269217e49a9d731ccbfab80fd22bf22416 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Wed, 4 Mar 2026 14:51:31 -0800
Subject: [PATCH 5/8] chore: remove unused dev-sandbox-policy-empty.yaml

---
 .../sandbox/dev-sandbox-policy-empty.yaml     | 36 -------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 deploy/docker/sandbox/dev-sandbox-policy-empty.yaml

diff --git a/deploy/docker/sandbox/dev-sandbox-policy-empty.yaml b/deploy/docker/sandbox/dev-sandbox-policy-empty.yaml
deleted file mode 100644
index f8a00760..00000000
--- a/deploy/docker/sandbox/dev-sandbox-policy-empty.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# Empty sandbox policy — no network access.
-# Use this as a starting point for iterative policy development.
-# Network policies and inference routes can be added at runtime
-# via `ncl sandbox policy set`.
-
-version: 1
-
-filesystem_policy:
-  include_workdir: true
-  read_only:
-    - /usr
-    - /lib
-    - /proc
-    - /dev/urandom
-    - /app
-    - /etc
-    - /var/log
-  read_write:
-    - /sandbox
-    - /tmp
-    - /dev/null
-
-landlock:
-  compatibility: best_effort
-
-process:
-  run_as_user: sandbox
-  run_as_group: sandbox
-
-# No network policies — all outbound connections will be denied.
-# Add policies here or push updates at runtime with:
-#   ncl sandbox policy set <name> --policy <file>
-network_policies: {}

From 3066b4f4fc3200c6f0d5084827335ea67a4b51aa Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Wed, 4 Mar 2026 14:56:16 -0800
Subject: [PATCH 6/8] remove inference config:

---
 inference-routes.yaml | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 inference-routes.yaml

diff --git a/inference-routes.yaml b/inference-routes.yaml
deleted file mode 100644
index b4af339e..00000000
--- a/inference-routes.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# Default inference routes for `mise run sandbox`.
-#
-# Points the "local" route at NVIDIA's hosted API.
-# Set NVIDIA_API_KEY in your environment (or .env) before running the sandbox.
-
-routes:
-  - routing_hint: local
-    endpoint: https://integrate.api.nvidia.com/
-    model: nvidia/nemotron-3-nano-30b-a3b
-    protocols:
-      - openai_chat_completions
-      - openai_completions
-    api_key_env: NVIDIA_API_KEY

From b7faf5a6c4582c6a5b7c62696f72f58d4452d224 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Wed, 4 Mar 2026 15:43:28 -0800
Subject: [PATCH 7/8] refactor: stop shipping dev policy onto the wheel and
 rename sandbox_name to sandbox

Remove the compile-time embed of dev-sandbox-policy.yaml from the
navigator-policy crate so the CLI and TUI no longer implicitly fall
back to the dev policy. Users must now explicitly pass --policy or set
NEMOCLAW_SANDBOX_POLICY; otherwise no policy is sent and the server /
sandbox applies its own default (disk discovery or restrictive default).

Also rename the sandbox_name parameter to sandbox throughout
navigator-sandbox and update the env var from NEMOCLAW_SANDBOX_NAME
to NEMOCLAW_SANDBOX.
---
 crates/navigator-cli/src/run.rs              | 22 +++---
 crates/navigator-policy/src/lib.rs           | 72 +++-----------------
 crates/navigator-sandbox/src/grpc_client.rs  | 10 +--
 crates/navigator-sandbox/src/lib.rs          | 14 ++--
 crates/navigator-sandbox/src/main.rs         |  8 +--
 crates/navigator-tui/src/lib.rs              | 16 +++--
 deploy/docker/Dockerfile.python-wheels       |  1 -
 deploy/docker/Dockerfile.python-wheels-macos |  1 -
 8 files changed, 49 insertions(+), 95 deletions(-)

diff --git a/crates/navigator-cli/src/run.rs b/crates/navigator-cli/src/run.rs
index 4cceb407..f3ec49e0 100644
--- a/crates/navigator-cli/src/run.rs
+++ b/crates/navigator-cli/src/run.rs
@@ -1037,11 +1037,10 @@ pub async fn sandbox_create(
 
     // When a custom image is specified, clear the default run_as_user/group
     // to prevent failures on images that lack the "sandbox" user/group.
-    if image.is_some()
-        && let Some(ref mut process) = policy.process
-    {
-        process.run_as_user = String::new();
-        process.run_as_group = String::new();
+    if image.is_some() {
+        if let Some(ref mut p) = policy {
+            navigator_policy::clear_process_identity(p);
+        }
     }
 
     let template = image.map(|img| SandboxTemplate {
@@ -1051,7 +1050,7 @@ pub async fn sandbox_create(
 
     let request = CreateSandboxRequest {
         spec: Some(SandboxSpec {
-            policy: Some(policy),
+            policy,
             providers: configured_providers,
             template,
             ..SandboxSpec::default()
@@ -1299,12 +1298,12 @@ pub async fn sandbox_create(
     }
 }
 
-/// Default sandbox policy YAML, baked in at compile time.
 /// Load sandbox policy YAML.
 ///
-/// Resolution order: `--policy` flag > `NEMOCLAW_SANDBOX_POLICY` env var > built-in default.
-/// Delegates to `navigator_policy::load_sandbox_policy`.
-fn load_sandbox_policy(cli_path: Option<&str>) -> Result<SandboxPolicy> {
+/// Resolution order: `--policy` flag > `NEMOCLAW_SANDBOX_POLICY` env var.
+/// Returns `None` when no policy source is configured, allowing the server
+/// to apply its own default.
+fn load_sandbox_policy(cli_path: Option<&str>) -> Result<Option<SandboxPolicy>> {
     navigator_policy::load_sandbox_policy(cli_path)
 }
 
@@ -2606,7 +2605,8 @@ pub async fn sandbox_policy_set(
     timeout_secs: u64,
     tls: &TlsOptions,
 ) -> Result<()> {
-    let policy = load_sandbox_policy(Some(policy_path))?;
+    let policy = load_sandbox_policy(Some(policy_path))?
+        .ok_or_else(|| miette::miette!("No policy loaded from {policy_path}"))?;
 
     let mut client = grpc_client(server, tls).await?;
 
diff --git a/crates/navigator-policy/src/lib.rs b/crates/navigator-policy/src/lib.rs
index 9948e98d..ba0ab2f3 100644
--- a/crates/navigator-policy/src/lib.rs
+++ b/crates/navigator-policy/src/lib.rs
@@ -3,8 +3,7 @@
 
 //! Shared sandbox policy parsing and defaults for NemoClaw.
 //!
-//! Provides bidirectional YAML↔proto conversion for sandbox policies, with a
-//! built-in default policy embedded from `dev-sandbox-policy.yaml`.
+//! Provides bidirectional YAML↔proto conversion for sandbox policies.
 //!
 //! The serde types here are the **single canonical representation** of the YAML
 //! policy schema. Both parsing (YAML→proto) and serialization (proto→YAML) use
@@ -19,10 +18,6 @@ use navigator_core::proto::{
 };
 use serde::{Deserialize, Serialize};
 
-/// Built-in default sandbox policy YAML (embedded at compile time).
-const DEFAULT_SANDBOX_POLICY_YAML: &str =
-    include_str!("../../../deploy/docker/sandbox/dev-sandbox-policy.yaml");
-
 // ---------------------------------------------------------------------------
 // YAML serde types (canonical — used for both parsing and serialization)
 // ---------------------------------------------------------------------------
@@ -353,12 +348,16 @@ pub fn serialize_sandbox_policy(policy: &SandboxPolicy) -> Result<String> {
         .wrap_err("failed to serialize policy to YAML")
 }
 
-/// Load a sandbox policy with the standard resolution order:
+/// Load a sandbox policy from an explicit source.
 ///
+/// Resolution order:
 /// 1. `cli_path` argument (e.g. from a `--policy` flag)
 /// 2. `NEMOCLAW_SANDBOX_POLICY` environment variable
-/// 3. Built-in default (`dev-sandbox-policy.yaml`)
-pub fn load_sandbox_policy(cli_path: Option<&str>) -> Result<SandboxPolicy> {
+///
+/// Returns `Ok(None)` when no policy source is configured, allowing the
+/// caller to omit the policy and let the server / sandbox apply its own
+/// default.
+pub fn load_sandbox_policy(cli_path: Option<&str>) -> Result<Option<SandboxPolicy>> {
     let contents = if let Some(p) = cli_path {
         let path = std::path::Path::new(p);
         std::fs::read_to_string(path)
@@ -370,16 +369,9 @@ pub fn load_sandbox_policy(cli_path: Option<&str>) -> Result<SandboxPolicy> {
             .into_diagnostic()
             .wrap_err_with(|| format!("failed to read sandbox policy from {}", path.display()))?
     } else {
-        DEFAULT_SANDBOX_POLICY_YAML.to_string()
+        return Ok(None);
     };
-    parse_sandbox_policy(&contents)
-}
-
-/// Return the built-in default sandbox policy.
-pub fn default_sandbox_policy() -> SandboxPolicy {
-    // The embedded YAML is known-good; unwrap is safe.
-    parse_sandbox_policy(DEFAULT_SANDBOX_POLICY_YAML)
-        .expect("built-in dev-sandbox-policy.yaml must be valid")
+    parse_sandbox_policy(&contents).map(Some)
 }
 
 /// Well-known path where a sandbox container image can ship a policy YAML file.
@@ -441,45 +433,11 @@ pub fn clear_process_identity(policy: &mut SandboxPolicy) {
 mod tests {
     use super::*;
 
-    /// Round-trip: parse the built-in default policy YAML → proto → YAML → proto
-    /// and verify the two proto representations are identical.
-    #[test]
-    fn round_trip_default_policy() {
-        let proto1 = parse_sandbox_policy(DEFAULT_SANDBOX_POLICY_YAML)
-            .expect("failed to parse default policy");
-
-        let yaml_str =
-            serialize_sandbox_policy(&proto1).expect("failed to serialize policy to YAML");
-
-        let proto2 = parse_sandbox_policy(&yaml_str).expect("failed to re-parse serialized policy");
-
-        assert_eq!(proto1.version, proto2.version);
-        assert_eq!(proto1.filesystem, proto2.filesystem);
-        assert_eq!(proto1.landlock, proto2.landlock);
-        assert_eq!(proto1.process, proto2.process);
-        assert_eq!(proto1.inference, proto2.inference);
-
-        // Compare network policies (proto HashMap ordering may differ, so
-        // compare key-by-key).
-        assert_eq!(
-            proto1.network_policies.len(),
-            proto2.network_policies.len(),
-            "network policy count mismatch"
-        );
-        for (key, rule1) in &proto1.network_policies {
-            let rule2 = proto2
-                .network_policies
-                .get(key)
-                .unwrap_or_else(|| panic!("missing network policy key: {key}"));
-            assert_eq!(rule1, rule2, "network policy mismatch for key: {key}");
-        }
-    }
-
     /// Verify that the serialized YAML uses `filesystem_policy` (not
     /// `filesystem`) so it can be fed back to `parse_sandbox_policy`.
     #[test]
     fn serialized_yaml_uses_filesystem_policy_key() {
-        let proto = default_sandbox_policy();
+        let proto = restrictive_default_policy();
         let yaml = serialize_sandbox_policy(&proto).expect("serialize failed");
         assert!(
             yaml.contains("filesystem_policy:"),
@@ -663,14 +621,6 @@ network_policies:
         assert!(parse_sandbox_policy(yaml).is_err());
     }
 
-    #[test]
-    fn default_sandbox_policy_is_valid() {
-        let policy = default_sandbox_policy();
-        assert_eq!(policy.version, 1);
-        // Dev default has network policies (unlike the restrictive default).
-        assert!(!policy.network_policies.is_empty());
-    }
-
     #[test]
     fn clear_process_identity_clears_fields() {
         let mut policy = restrictive_default_policy();
diff --git a/crates/navigator-sandbox/src/grpc_client.rs b/crates/navigator-sandbox/src/grpc_client.rs
index 7de9896d..167abd42 100644
--- a/crates/navigator-sandbox/src/grpc_client.rs
+++ b/crates/navigator-sandbox/src/grpc_client.rs
@@ -115,12 +115,12 @@ async fn fetch_policy_with_client(
 /// Sync a locally-discovered policy using an existing client connection.
 async fn sync_policy_with_client(
     client: &mut NavigatorClient<Channel>,
-    sandbox_name: &str,
+    sandbox: &str,
     policy: &ProtoSandboxPolicy,
 ) -> Result<()> {
     client
         .update_sandbox_policy(UpdateSandboxPolicyRequest {
-            name: sandbox_name.to_string(),
+            name: sandbox.to_string(),
             policy: Some(policy.clone()),
         })
         .await
@@ -137,20 +137,20 @@ async fn sync_policy_with_client(
 pub async fn discover_and_sync_policy(
     endpoint: &str,
     sandbox_id: &str,
-    sandbox_name: &str,
+    sandbox: &str,
     discovered_policy: &ProtoSandboxPolicy,
 ) -> Result<ProtoSandboxPolicy> {
     debug!(
         endpoint = %endpoint,
         sandbox_id = %sandbox_id,
-        sandbox_name = %sandbox_name,
+        sandbox = %sandbox,
         "Syncing discovered policy and re-fetching canonical version"
     );
 
     let mut client = connect(endpoint).await?;
 
     // Sync the discovered policy to the gateway.
-    sync_policy_with_client(&mut client, sandbox_name, discovered_policy).await?;
+    sync_policy_with_client(&mut client, sandbox, discovered_policy).await?;
 
     // Re-fetch from the gateway to get the canonical version/hash.
     fetch_policy_with_client(&mut client, sandbox_id)
diff --git a/crates/navigator-sandbox/src/lib.rs b/crates/navigator-sandbox/src/lib.rs
index 0b3ccb6d..1d7471b9 100644
--- a/crates/navigator-sandbox/src/lib.rs
+++ b/crates/navigator-sandbox/src/lib.rs
@@ -120,7 +120,7 @@ pub async fn run_sandbox(
     timeout_secs: u64,
     interactive: bool,
     sandbox_id: Option<String>,
-    sandbox_name: Option<String>,
+    sandbox: Option<String>,
     navigator_endpoint: Option<String>,
     policy_rules: Option<String>,
     policy_data: Option<String>,
@@ -139,7 +139,7 @@ pub async fn run_sandbox(
     let navigator_endpoint_for_proxy = navigator_endpoint.clone();
     let (mut policy, opa_engine) = load_policy(
         sandbox_id.clone(),
-        sandbox_name,
+        sandbox,
         navigator_endpoint.clone(),
         policy_rules,
         policy_data,
@@ -687,7 +687,7 @@ fn spawn_route_refresh(
 /// 4. Otherwise, return an error
 async fn load_policy(
     sandbox_id: Option<String>,
-    sandbox_name: Option<String>,
+    sandbox: Option<String>,
     navigator_endpoint: Option<String>,
     policy_rules: Option<String>,
     policy_data: Option<String>,
@@ -734,16 +734,16 @@ async fn load_policy(
                 // gateway so it becomes the authoritative baseline.
                 info!("Server returned no policy; attempting local discovery");
                 let discovered = discover_policy_from_disk_or_default();
-                let name = sandbox_name.as_deref().ok_or_else(|| {
+                let sandbox = sandbox.as_deref().ok_or_else(|| {
                     miette::miette!(
-                        "Cannot sync discovered policy: sandbox name not available.\n\
-                         Set NEMOCLAW_SANDBOX_NAME or --sandbox-name to enable policy sync."
+                        "Cannot sync discovered policy: sandbox not available.\n\
+                         Set NEMOCLAW_SANDBOX or --sandbox to enable policy sync."
                     )
                 })?;
 
                 // Sync and re-fetch over a single connection to avoid extra
                 // TLS handshakes.
-                grpc_client::discover_and_sync_policy(endpoint, id, name, &discovered).await?
+                grpc_client::discover_and_sync_policy(endpoint, id, sandbox, &discovered).await?
             }
         };
 
diff --git a/crates/navigator-sandbox/src/main.rs b/crates/navigator-sandbox/src/main.rs
index 1b61851b..783f9241 100644
--- a/crates/navigator-sandbox/src/main.rs
+++ b/crates/navigator-sandbox/src/main.rs
@@ -39,10 +39,10 @@ struct Args {
     #[arg(long, env = "NEMOCLAW_SANDBOX_ID")]
     sandbox_id: Option<String>,
 
-    /// Sandbox name (used for policy sync when the sandbox discovers policy
+    /// Sandbox (used for policy sync when the sandbox discovers policy
     /// from disk or falls back to the restrictive default).
-    #[arg(long, env = "NEMOCLAW_SANDBOX_NAME")]
-    sandbox_name: Option<String>,
+    #[arg(long, env = "NEMOCLAW_SANDBOX")]
+    sandbox: Option<String>,
 
     /// NemoClaw server gRPC endpoint for fetching policy.
     /// Required when using --sandbox-id.
@@ -177,7 +177,7 @@ async fn main() -> Result<()> {
         args.timeout,
         args.interactive,
         args.sandbox_id,
-        args.sandbox_name,
+        args.sandbox,
         args.navigator_endpoint,
         args.policy_rules,
         args.policy_data,
diff --git a/crates/navigator-tui/src/lib.rs b/crates/navigator-tui/src/lib.rs
index 972b2675..2bdbcaae 100644
--- a/crates/navigator-tui/src/lib.rs
+++ b/crates/navigator-tui/src/lib.rs
@@ -1150,17 +1150,23 @@ fn spawn_create_sandbox(app: &mut App, tx: mpsc::UnboundedSender<Event>) {
             None
         };
 
-        let mut policy = navigator_policy::default_sandbox_policy();
-        if has_custom_image {
-            navigator_policy::clear_process_identity(&mut policy);
-        }
+        let policy = if has_custom_image {
+            // Custom images may lack the default "sandbox" user/group, so
+            // use a restrictive default with cleared process identity.
+            let mut p = navigator_policy::restrictive_default_policy();
+            navigator_policy::clear_process_identity(&mut p);
+            Some(p)
+        } else {
+            // Let the server apply the sandbox's own default policy.
+            None
+        };
 
         let req = navigator_core::proto::CreateSandboxRequest {
             name,
             spec: Some(navigator_core::proto::SandboxSpec {
                 providers: selected_providers,
                 template,
-                policy: Some(policy),
+                policy,
                 ..Default::default()
             }),
         };
diff --git a/deploy/docker/Dockerfile.python-wheels b/deploy/docker/Dockerfile.python-wheels
index 2e411b2f..e8fe3db9 100644
--- a/deploy/docker/Dockerfile.python-wheels
+++ b/deploy/docker/Dockerfile.python-wheels
@@ -71,7 +71,6 @@ RUN --mount=type=cache,id=cargo-registry-python-wheels-${TARGETARCH},sharing=loc
 # Copy actual source code and Python packaging files.
 COPY crates/ crates/
 COPY pyproject.toml README.md ./
-COPY deploy/docker/sandbox/dev-sandbox-policy.yaml ./deploy/docker/sandbox/
 COPY python/ python/
 
 # Touch source files to ensure they're rebuilt (not the cached dummy).
diff --git a/deploy/docker/Dockerfile.python-wheels-macos b/deploy/docker/Dockerfile.python-wheels-macos
index e8d6327a..a074cb2f 100644
--- a/deploy/docker/Dockerfile.python-wheels-macos
+++ b/deploy/docker/Dockerfile.python-wheels-macos
@@ -76,7 +76,6 @@ RUN --mount=type=cache,id=cargo-registry-python-wheels-macos-${TARGETARCH},shari
 # Copy actual source code and Python packaging files.
 COPY crates/ crates/
 COPY pyproject.toml README.md ./
-COPY deploy/docker/sandbox/dev-sandbox-policy.yaml ./deploy/docker/sandbox/
 COPY python/ python/
 
 # Touch source files to ensure they're rebuilt (not the cached dummy).

From 62702c56a6058f9c401223593a3aae50db35f49e Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Wed, 4 Mar 2026 16:54:43 -0800
Subject: [PATCH 8/8] feat(sandbox): add --tty/--no-tty flags and fix SSH
 tunnel reliability

Fix sandbox_exec TTY detection so interactive commands like claude work
when launched through mise or other non-terminal wrappers. The old code
relied solely on stdout.is_terminal() which returns false in many valid
interactive contexts. Add explicit --tty/--no-tty overrides and default
sandbox.sh to --tty since it always intends interactive use.

Also fix env var mismatch (NEMOCLAW_SANDBOX_NAME -> NEMOCLAW_SANDBOX)
that caused sandbox pods to crash on startup, improve deploy state
tracking with container ID detection, simplify image eviction logic,
and add tracing to the SSH tunnel handshake path.
---
 .agents/skills/create-spike/SKILL.md          |  2 +-
 .../skills/generate-sandbox-policy/SKILL.md   |  2 +-
 .../agent-memory/arch-doc-writer/MEMORY.md    |  2 +-
 architecture/README.md                        |  2 +-
 architecture/build-containers.md              |  2 +-
 architecture/inference-routing.md             |  6 +-
 architecture/sandbox.md                       |  2 +-
 architecture/security-policy.md               | 12 ++--
 crates/navigator-cli/src/main.rs              | 24 +++++++
 crates/navigator-cli/src/run.rs               | 22 +++++-
 .../data/sandbox-policy.rego}                 |  0
 crates/navigator-sandbox/src/opa.rs           |  7 +-
 crates/navigator-sandbox/src/ssh.rs           |  3 +
 .../testdata/sandbox-policy.yaml              | 69 +++++++++++++++++++
 crates/navigator-server/src/sandbox/mod.rs    |  2 +-
 crates/navigator-server/src/ssh_tunnel.rs     | 22 ++++--
 deploy/docker/sandbox/Dockerfile.base         |  2 +-
 tasks/scripts/cluster-deploy-fast.sh          | 56 ++++++++-------
 tasks/scripts/sandbox.sh                      |  2 +-
 19 files changed, 184 insertions(+), 55 deletions(-)
 rename crates/{navigator-policy/dev-sandbox-policy.rego => navigator-sandbox/data/sandbox-policy.rego} (100%)
 create mode 100644 crates/navigator-sandbox/testdata/sandbox-policy.yaml

diff --git a/.agents/skills/create-spike/SKILL.md b/.agents/skills/create-spike/SKILL.md
index 8c5ce0b5..96dd8cf4 100644
--- a/.agents/skills/create-spike/SKILL.md
+++ b/.agents/skills/create-spike/SKILL.md
@@ -289,7 +289,7 @@ User says: "Allow sandbox egress to private IP space via networking policy"
 1. Problem is clear — no clarification needed
 2. Fire `principal-engineer-reviewer` to investigate:
    - Finds `is_internal_ip()` SSRF check in `proxy.rs` that blocks RFC 1918 addresses
-   - Reads OPA policy evaluation pipeline in `opa.rs` and `crates/navigator-policy/dev-sandbox-policy.rego`
+   - Reads OPA policy evaluation pipeline in `opa.rs` and `crates/navigator-sandbox/data/sandbox-policy.rego`
    - Reads proto definitions in `sandbox.proto` for `NetworkEndpoint`
    - Maps the 4-layer defense model: netns, seccomp, OPA, SSRF check
    - Reads `architecture/security-policy.md` and `architecture/sandbox.md`
diff --git a/.agents/skills/generate-sandbox-policy/SKILL.md b/.agents/skills/generate-sandbox-policy/SKILL.md
index 5c842533..bb9f5d43 100644
--- a/.agents/skills/generate-sandbox-policy/SKILL.md
+++ b/.agents/skills/generate-sandbox-policy/SKILL.md
@@ -546,5 +546,5 @@ private_services:
 
 - Full policy schema: [architecture/security-policy.md](../../../architecture/security-policy.md)
 - Example policy file: [dev-sandbox-policy.yaml](../../../deploy/docker/sandbox/dev-sandbox-policy.yaml)
-- Rego evaluation rules: [dev-sandbox-policy.rego](../../../crates/navigator-policy/dev-sandbox-policy.rego)
+- Rego evaluation rules: [sandbox-policy.rego](../../../crates/navigator-sandbox/data/sandbox-policy.rego)
 - For translation examples from real API docs, see [examples.md](examples.md)
diff --git a/.claude/agent-memory/arch-doc-writer/MEMORY.md b/.claude/agent-memory/arch-doc-writer/MEMORY.md
index d1000eed..8035ba37 100644
--- a/.claude/agent-memory/arch-doc-writer/MEMORY.md
+++ b/.claude/agent-memory/arch-doc-writer/MEMORY.md
@@ -23,7 +23,7 @@
 - Naming convention: "gateway" in prose for the control plane component; code identifiers like `navigator-server` stay unchanged
 
 ## Key Patterns
-- OPA baked-in rules: `include_str!("../../navigator-policy/dev-sandbox-policy.rego")` in opa.rs
+- OPA baked-in rules: `include_str!("../data/sandbox-policy.rego")` in opa.rs
 - Policy loading: gRPC mode (NAVIGATOR_SANDBOX_ID + NAVIGATOR_ENDPOINT) or file mode (--policy-rules + --policy-data)
 - Provider env injection: both entrypoint process (tokio Command) and SSH shell (std Command)
 - Cluster bootstrap: `sandbox_create_with_bootstrap()` auto-deploys when no cluster exists (main.rs ~line 632)
diff --git a/architecture/README.md b/architecture/README.md
index a16d98fa..431f86e9 100644
--- a/architecture/README.md
+++ b/architecture/README.md
@@ -198,7 +198,7 @@ The inference routing system transparently intercepts AI inference API calls fro
 
 | Component | Location | Role |
 |---|---|---|
-| OPA `network_action` rule | `crates/navigator-policy/dev-sandbox-policy.rego` | Returns `inspect_for_inference` when no explicit policy match and inference routes exist |
+| OPA `network_action` rule | `crates/navigator-sandbox/data/sandbox-policy.rego` | Returns `inspect_for_inference` when no explicit policy match and inference routes exist |
 | Proxy interception | `crates/navigator-sandbox/src/proxy.rs` | TLS-terminates intercepted connections, parses HTTP, calls gateway |
 | Inference pattern detection | `crates/navigator-sandbox/src/l7/inference.rs` | Matches HTTP method + path against known inference API patterns |
 | gRPC forwarding | `crates/navigator-sandbox/src/grpc_client.rs` | Sends `ProxyInferenceRequest` to the gateway |
diff --git a/architecture/build-containers.md b/architecture/build-containers.md
index ad64f75e..d4e9d83e 100644
--- a/architecture/build-containers.md
+++ b/architecture/build-containers.md
@@ -338,7 +338,7 @@ All builds use mise tasks defined in `tasks/*.toml` (included from `mise.toml`).
 | `crates/navigator-core/*`, `crates/navigator-providers/*` | Gateway + sandbox rebuild |
 | `crates/navigator-router/*` | Gateway rebuild |
 | `crates/navigator-server/*`, `deploy/docker/Dockerfile.server` | Gateway rebuild |
-| `crates/navigator-sandbox/*`, `deploy/docker/sandbox/*`, `deploy/docker/openclaw-start.sh`, `python/*`, `pyproject.toml`, `uv.lock`, `crates/navigator-policy/dev-sandbox-policy.rego` | Sandbox rebuild |
+| `crates/navigator-sandbox/*`, `deploy/docker/sandbox/*`, `deploy/docker/openclaw-start.sh`, `python/*`, `pyproject.toml`, `uv.lock`, `crates/navigator-sandbox/data/sandbox-policy.rego` | Sandbox rebuild |
 | `deploy/helm/navigator/*` | Helm upgrade |
 
 **Explicit target mode** (arguments: `server`, `sandbox`, `chart`, `all`): Rebuilds only the specified components.
diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md
index 142c8628..03997253 100644
--- a/architecture/inference-routing.md
+++ b/architecture/inference-routing.md
@@ -22,7 +22,7 @@ The inference routing system transparently intercepts AI inference API calls fro
 | `crates/navigator-sandbox/src/main.rs` | Sandbox binary CLI: `--inference-routes` / `NEMOCLAW_INFERENCE_ROUTES` flag definition |
 | `tasks/ci.toml` | `[sandbox]` task: mounts `inference-routes.yaml`, sets env vars for dev sandbox |
 | `inference-routes.yaml` | Default standalone routes for dev sandbox (NVIDIA API endpoint) |
-| `crates/navigator-policy/dev-sandbox-policy.rego` | `network_action` Rego rule -- tri-state decision logic |
+| `crates/navigator-sandbox/data/sandbox-policy.rego` | `network_action` Rego rule -- tri-state decision logic |
 
 ## Architecture Overview
 
@@ -174,7 +174,7 @@ The `evaluate_network_action()` method evaluates `data.navigator.sandbox.network
 
 ### Rego rules
 
-**File:** `crates/navigator-policy/dev-sandbox-policy.rego`
+**File:** `crates/navigator-sandbox/data/sandbox-policy.rego`
 
 ```rego
 default network_action := "deny"
@@ -567,7 +567,7 @@ The `create` and `update` commands perform protocol auto-detection when `--proto
 
 Running `mise run cluster:sandbox` starts a standalone sandbox container with inference routing pre-configured. The task mounts three files into the container:
 
-- `crates/navigator-policy/dev-sandbox-policy.rego` as `/var/navigator/policy.rego`
+- `crates/navigator-sandbox/data/sandbox-policy.rego` as `/var/navigator/policy.rego`
 - `deploy/docker/sandbox/dev-sandbox-policy.yaml` as `/var/navigator/data.yaml`
 - `inference-routes.yaml` as `/var/navigator/inference-routes.yaml`
 
diff --git a/architecture/sandbox.md b/architecture/sandbox.md
index 6bcb44ed..31bb629c 100644
--- a/architecture/sandbox.md
+++ b/architecture/sandbox.md
@@ -191,7 +191,7 @@ The OPA engine lives in `crates/navigator-sandbox/src/opa.rs` and uses the `rego
 
 ### Baked-in rules
 
-The Rego rules are compiled into the binary via `include_str!("../../navigator-policy/dev-sandbox-policy.rego")`. The package is `navigator.sandbox`. Key rules:
+The Rego rules are compiled into the binary via `include_str!("../data/sandbox-policy.rego")`. The package is `navigator.sandbox`. Key rules:
 
 | Rule | Type | Purpose |
 |------|------|---------|
diff --git a/architecture/security-policy.md b/architecture/security-policy.md
index dce09cd3..31b34562 100644
--- a/architecture/security-policy.md
+++ b/architecture/security-policy.md
@@ -17,7 +17,7 @@ Provide a Rego rules file and a YAML data file via CLI flags or environment vari
 
 ```bash
 navigator-sandbox \
-  --policy-rules dev-sandbox-policy.rego \
+  --policy-rules sandbox-policy.rego \
   --policy-data dev-sandbox-policy.yaml \
   -- /bin/bash
 ```
@@ -45,7 +45,7 @@ navigator-sandbox \
 | `--sandbox-id`         | `NEMOCLAW_SANDBOX_ID` | Sandbox ID for policy lookup |
 | `--nemoclaw-endpoint` | `NEMOCLAW_ENDPOINT`   | Gateway gRPC endpoint        |
 
-The gateway returns a `SandboxPolicy` protobuf message (defined in `proto/sandbox.proto`). The sandbox supervisor converts this proto into JSON, validates L7 config, expands presets, and loads it into the OPA engine using baked-in Rego rules (`dev-sandbox-policy.rego` compiled via `include_str!`). See `crates/navigator-sandbox/src/opa.rs` -- `OpaEngine::from_proto()`.
+The gateway returns a `SandboxPolicy` protobuf message (defined in `proto/sandbox.proto`). The sandbox supervisor converts this proto into JSON, validates L7 config, expands presets, and loads it into the OPA engine using baked-in Rego rules (`sandbox-policy.rego` compiled via `include_str!`). See `crates/navigator-sandbox/src/opa.rs` -- `OpaEngine::from_proto()`.
 
 ### Policy Loading Sequence
 
@@ -421,7 +421,7 @@ Each endpoint defines a network destination and, optionally, L7 inspection behav
 | ------ | -------- | -------- | ------------------------------------------------------------------ |
 | `path` | `string` | Yes      | Filesystem path of the binary. Supports glob patterns (`*`, `**`). |
 
-**Binary identity matching** is evaluated in the Rego rules (`dev-sandbox-policy.rego`) using four strategies, tried in order:
+**Binary identity matching** is evaluated in the Rego rules (`sandbox-policy.rego`) using four strategies, tried in order:
 
 1. **Direct path match** -- `exec.path == binary.path`
 2. **Ancestor match** -- any entry in `exec.ancestors` matches `binary.path`
@@ -450,7 +450,7 @@ rules:
 | `path`    | `string` | URL path glob pattern: `**` matches everything, otherwise `glob.match` with `/` delimiter.                                   |
 | `command` | `string` | SQL command: `SELECT`, `INSERT`, `UPDATE`, `DELETE`, or `*` (any). Case-insensitive matching. For `protocol: sql` endpoints. |
 
-Method and command fields use `*` as wildcard for "any". Path patterns use `**` for "match everything" and standard glob patterns with `/` as a delimiter otherwise. See `dev-sandbox-policy.rego` -- `method_matches()`, `path_matches()`, `command_matches()`.
+Method and command fields use `*` as wildcard for "any". Path patterns use `**` for "match everything" and standard glob patterns with `/` as a delimiter otherwise. See `sandbox-policy.rego` -- `method_matches()`, `path_matches()`, `command_matches()`.
 
 #### Access Presets
 
@@ -524,7 +524,7 @@ flowchart LR
 
 This is the single most important behavioral trigger in the policy language. An endpoint with no `protocol` field passes traffic opaquely after the L4 (CONNECT) check. Adding `protocol: rest` activates per-request HTTP parsing and policy evaluation inside the proxy.
 
-**Implementation path**: After L4 CONNECT is allowed, the proxy calls `query_l7_config()` which evaluates the Rego rule `data.navigator.sandbox.matched_endpoint_config`. This rule only matches endpoints that have a `protocol` field set (see `dev-sandbox-policy.rego` line `ep.protocol`). If a config is returned, the proxy enters `relay_with_inspection()` instead of `copy_bidirectional()`. See `crates/navigator-sandbox/src/proxy.rs` -- `handle_tcp_connection()`.
+**Implementation path**: After L4 CONNECT is allowed, the proxy calls `query_l7_config()` which evaluates the Rego rule `data.navigator.sandbox.matched_endpoint_config`. This rule only matches endpoints that have a `protocol` field set (see `sandbox-policy.rego` line `ep.protocol`). If a config is returned, the proxy enters `relay_with_inspection()` instead of `copy_bidirectional()`. See `crates/navigator-sandbox/src/proxy.rs` -- `handle_tcp_connection()`.
 
 **Validation requirement**: When `protocol` is set, either `rules` or `access` must also be present. An endpoint with `protocol` but no rules/access is rejected at validation time because it would deny all traffic (no allow rules means nothing matches). See `crates/navigator-sandbox/src/l7/mod.rs` -- `validate_l7_policies()`.
 
@@ -1055,7 +1055,7 @@ The OPA engine evaluates two categories of rules:
 | `allow_request`       | `input.network.*`, `input.exec.*`, `input.request.method`, `input.request.path` | `true` if the request matches any rule in the matched endpoint |
 | `request_deny_reason` | Same input                                                                      | Human-readable deny message                                    |
 
-See `dev-sandbox-policy.rego` for the full Rego implementation.
+See `sandbox-policy.rego` for the full Rego implementation.
 
 ---
 
diff --git a/crates/navigator-cli/src/main.rs b/crates/navigator-cli/src/main.rs
index 84abec76..bb9d1ae9 100644
--- a/crates/navigator-cli/src/main.rs
+++ b/crates/navigator-cli/src/main.rs
@@ -516,6 +516,17 @@ enum SandboxCommands {
         #[arg(long)]
         forward: Option<u16>,
 
+        /// Allocate a pseudo-terminal for the remote command.
+        /// Defaults to auto-detection (on when stdin and stdout are terminals).
+        /// Use --tty to force a PTY even when auto-detection fails, or
+        /// --no-tty to disable.
+        #[arg(long, overrides_with = "no_tty")]
+        tty: bool,
+
+        /// Disable pseudo-terminal allocation.
+        #[arg(long, overrides_with = "tty")]
+        no_tty: bool,
+
         /// Command to run after "--" (defaults to an interactive shell).
         #[arg(trailing_var_arg = true)]
         command: Vec<String>,
@@ -918,8 +929,19 @@ async fn main() -> Result<()> {
                     providers,
                     policy,
                     forward,
+                    tty,
+                    no_tty,
                     command,
                 } => {
+                    // Resolve --tty / --no-tty into an Option<bool> override.
+                    let tty_override = if no_tty {
+                        Some(false)
+                    } else if tty {
+                        Some(true)
+                    } else {
+                        None // auto-detect
+                    };
+
                     // For `sandbox create`, a missing cluster is not fatal — the
                     // bootstrap flow inside `sandbox_create` can deploy one.
                     match resolve_cluster(&cli.cluster) {
@@ -947,6 +969,7 @@ async fn main() -> Result<()> {
                                 policy.as_deref(),
                                 forward,
                                 &command,
+                                tty_override,
                                 &tls,
                             )
                             .await?;
@@ -964,6 +987,7 @@ async fn main() -> Result<()> {
                                 policy.as_deref(),
                                 forward,
                                 &command,
+                                tty_override,
                             )
                             .await?;
                         }
diff --git a/crates/navigator-cli/src/run.rs b/crates/navigator-cli/src/run.rs
index f3ec49e0..301d0c9c 100644
--- a/crates/navigator-cli/src/run.rs
+++ b/crates/navigator-cli/src/run.rs
@@ -978,6 +978,7 @@ pub async fn sandbox_create_with_bootstrap(
     policy: Option<&str>,
     forward: Option<u16>,
     command: &[String],
+    tty_override: Option<bool>,
 ) -> Result<()> {
     if !crate::bootstrap::confirm_bootstrap()? {
         return Err(miette::miette!(
@@ -988,7 +989,18 @@ pub async fn sandbox_create_with_bootstrap(
     }
     let (tls, server) = crate::bootstrap::run_bootstrap(remote, ssh_key).await?;
     sandbox_create(
-        &server, name, image, sync, keep, remote, ssh_key, providers, policy, forward, command,
+        &server,
+        name,
+        image,
+        sync,
+        keep,
+        remote,
+        ssh_key,
+        providers,
+        policy,
+        forward,
+        command,
+        tty_override,
         &tls,
     )
     .await
@@ -1008,6 +1020,7 @@ pub async fn sandbox_create(
     policy: Option<&str>,
     forward: Option<u16>,
     command: &[String],
+    tty_override: Option<bool>,
     tls: &TlsOptions,
 ) -> Result<()> {
     // Try connecting to the cluster. If it fails due to an unreachable cluster,
@@ -1249,11 +1262,16 @@ pub async fn sandbox_create(
             }
 
             eprintln!("Connecting...");
+            // Resolve TTY mode: explicit --tty / --no-tty wins, otherwise
+            // auto-detect from the local terminal.
+            let tty = tty_override.unwrap_or_else(|| {
+                std::io::stdin().is_terminal() && std::io::stdout().is_terminal()
+            });
             let exec_result = sandbox_exec(
                 &effective_server,
                 &sandbox_name,
                 command,
-                interactive,
+                tty,
                 &effective_tls,
             )
             .await;
diff --git a/crates/navigator-policy/dev-sandbox-policy.rego b/crates/navigator-sandbox/data/sandbox-policy.rego
similarity index 100%
rename from crates/navigator-policy/dev-sandbox-policy.rego
rename to crates/navigator-sandbox/data/sandbox-policy.rego
diff --git a/crates/navigator-sandbox/src/opa.rs b/crates/navigator-sandbox/src/opa.rs
index ba5b3551..94eae6b1 100644
--- a/crates/navigator-sandbox/src/opa.rs
+++ b/crates/navigator-sandbox/src/opa.rs
@@ -16,7 +16,7 @@ use std::sync::Mutex;
 /// Baked-in rego rules for OPA policy evaluation.
 /// These rules define the network access decision logic and static config
 /// passthroughs. They reference `data.sandbox.*` for policy data.
-const BAKED_POLICY_RULES: &str = include_str!("../../navigator-policy/dev-sandbox-policy.rego");
+const BAKED_POLICY_RULES: &str = include_str!("../data/sandbox-policy.rego");
 
 /// Result of evaluating a network access request against OPA policy.
 pub struct PolicyDecision {
@@ -671,9 +671,8 @@ mod tests {
         ProcessPolicy as ProtoProc, SandboxPolicy as ProtoSandboxPolicy,
     };
 
-    const TEST_POLICY: &str = include_str!("../../navigator-policy/dev-sandbox-policy.rego");
-    const TEST_DATA_YAML: &str =
-        include_str!("../../../deploy/docker/sandbox/dev-sandbox-policy.yaml");
+    const TEST_POLICY: &str = include_str!("../data/sandbox-policy.rego");
+    const TEST_DATA_YAML: &str = include_str!("../testdata/sandbox-policy.yaml");
 
     fn test_engine() -> OpaEngine {
         OpaEngine::from_strings(TEST_POLICY, TEST_DATA_YAML).expect("Failed to load test policy")
diff --git a/crates/navigator-sandbox/src/ssh.rs b/crates/navigator-sandbox/src/ssh.rs
index 8c7a4847..03192df1 100644
--- a/crates/navigator-sandbox/src/ssh.rs
+++ b/crates/navigator-sandbox/src/ssh.rs
@@ -132,9 +132,12 @@ async fn handle_connection(
     ca_file_paths: Option<Arc<(PathBuf, PathBuf)>>,
     provider_env: HashMap<String, String>,
 ) -> Result<()> {
+    info!(peer = %peer, "SSH connection: reading handshake preface");
     let mut line = String::new();
     read_line(&mut stream, &mut line).await?;
+    info!(peer = %peer, preface_len = line.len(), "SSH connection: preface received, verifying");
     if !verify_preface(&line, secret, handshake_skew_secs)? {
+        warn!(peer = %peer, "SSH connection: handshake verification failed");
         let _ = stream.write_all(b"ERR\n").await;
         return Ok(());
     }
diff --git a/crates/navigator-sandbox/testdata/sandbox-policy.yaml b/crates/navigator-sandbox/testdata/sandbox-policy.yaml
new file mode 100644
index 00000000..55ceaad2
--- /dev/null
+++ b/crates/navigator-sandbox/testdata/sandbox-policy.yaml
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Minimal sandbox policy fixture for OPA engine tests.
+# Covers the network policies, filesystem, process, and inference config
+# exercised by opa.rs unit tests.
+
+version: 1
+
+filesystem_policy:
+  include_workdir: true
+  read_only:
+    - /usr
+    - /lib
+    - /proc
+    - /dev/urandom
+    - /app
+    - /etc
+    - /var/log
+  read_write:
+    - /sandbox
+    - /tmp
+    - /dev/null
+
+landlock:
+  compatibility: best_effort
+
+process:
+  run_as_user: sandbox
+  run_as_group: sandbox
+
+network_policies:
+  claude_code:
+    name: claude_code
+    endpoints:
+      - { host: api.anthropic.com, port: 443 }
+      - { host: statsig.anthropic.com, port: 443 }
+    binaries:
+      - { path: /usr/local/bin/claude }
+      - { path: /usr/bin/node }
+
+  github_ssh_over_https:
+    name: github-ssh-over-https
+    endpoints:
+      - host: github.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        rules:
+          - allow:
+              method: GET
+              path: "/**/info/refs*"
+          - allow:
+              method: POST
+              path: "/**/git-upload-pack"
+    binaries:
+      - { path: /usr/bin/git }
+
+  gitlab:
+    name: gitlab
+    endpoints:
+      - { host: gitlab.com, port: 443 }
+    binaries:
+      - { path: /usr/bin/glab }
+
+inference:
+  allowed_routes:
+    - local
diff --git a/crates/navigator-server/src/sandbox/mod.rs b/crates/navigator-server/src/sandbox/mod.rs
index 6c142a16..63881900 100644
--- a/crates/navigator-server/src/sandbox/mod.rs
+++ b/crates/navigator-server/src/sandbox/mod.rs
@@ -909,7 +909,7 @@ fn apply_required_env(
     ssh_handshake_skew_secs: u64,
 ) {
     upsert_env(env, "NEMOCLAW_SANDBOX_ID", sandbox_id);
-    upsert_env(env, "NEMOCLAW_SANDBOX_NAME", sandbox_name);
+    upsert_env(env, "NEMOCLAW_SANDBOX", sandbox_name);
     upsert_env(env, "NEMOCLAW_ENDPOINT", grpc_endpoint);
     upsert_env(env, "NEMOCLAW_SANDBOX_COMMAND", "sleep infinity");
     if !ssh_listen_addr.is_empty() {
diff --git a/crates/navigator-server/src/ssh_tunnel.rs b/crates/navigator-server/src/ssh_tunnel.rs
index caf8171e..24eae022 100644
--- a/crates/navigator-server/src/ssh_tunnel.rs
+++ b/crates/navigator-server/src/ssh_tunnel.rs
@@ -143,11 +143,17 @@ async fn handle_tunnel(
         Duration::from_secs(10),
         Duration::from_secs(15),
     ];
+    let target_desc = match &target {
+        ConnectTarget::Ip(addr) => format!("{addr}"),
+        ConnectTarget::Host(host, port) => format!("{host}:{port}"),
+    };
+    info!(sandbox_id = %sandbox_id, target = %target_desc, "SSH tunnel: connecting to sandbox");
     for (attempt, delay) in std::iter::once(&Duration::ZERO)
         .chain(delays.iter())
         .enumerate()
     {
         if !delay.is_zero() {
+            info!(sandbox_id = %sandbox_id, attempt = attempt + 1, delay_ms = delay.as_millis() as u64, "SSH tunnel: retrying TCP connect");
             tokio::time::sleep(*delay).await;
         }
         let result = match &target {
@@ -156,17 +162,16 @@ async fn handle_tunnel(
         };
         match result {
             Ok(stream) => {
-                if attempt > 0 {
-                    info!(
-                        sandbox_id = %sandbox_id,
-                        attempts = attempt + 1,
-                        "SSH tunnel connected after retry"
-                    );
-                }
+                info!(
+                    sandbox_id = %sandbox_id,
+                    attempts = attempt + 1,
+                    "SSH tunnel: TCP connected to sandbox"
+                );
                 upstream = Some(stream);
                 break;
             }
             Err(err) => {
+                info!(sandbox_id = %sandbox_id, attempt = attempt + 1, error = %err, "SSH tunnel: TCP connect failed");
                 last_err = Some(err);
             }
         }
@@ -176,11 +181,14 @@ async fn handle_tunnel(
         format!("failed to connect to sandbox after retries: {err}")
     })?;
     upstream.set_nodelay(true)?;
+    info!(sandbox_id = %sandbox_id, "SSH tunnel: sending NSSH1 handshake preface");
     let preface = build_preface(token, secret)?;
     upstream.write_all(preface.as_bytes()).await?;
 
+    info!(sandbox_id = %sandbox_id, "SSH tunnel: waiting for handshake response");
     let mut response = String::new();
     read_line(&mut upstream, &mut response).await?;
+    info!(sandbox_id = %sandbox_id, response = %response.trim(), "SSH tunnel: handshake response received");
     if response.trim() != "OK" {
         return Err("sandbox handshake rejected".into());
     }
diff --git a/deploy/docker/sandbox/Dockerfile.base b/deploy/docker/sandbox/Dockerfile.base
index f0e7ebde..f9879b5c 100644
--- a/deploy/docker/sandbox/Dockerfile.base
+++ b/deploy/docker/sandbox/Dockerfile.base
@@ -49,7 +49,7 @@ RUN mkdir -p crates/navigator-cli/src crates/navigator-core/src crates/navigator
 
 # Copy proto files needed for build
 COPY proto/ proto/
-COPY crates/navigator-policy/dev-sandbox-policy.rego ./crates/navigator-policy/
+COPY crates/navigator-sandbox/data/sandbox-policy.rego ./crates/navigator-sandbox/data/
 COPY deploy/docker/sandbox/dev-sandbox-policy.yaml ./deploy/docker/sandbox/
 
 # Build dependencies only (cached unless Cargo.toml/lock changes).
diff --git a/tasks/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh
index cd099dbb..d29e0102 100755
--- a/tasks/scripts/cluster-deploy-fast.sh
+++ b/tasks/scripts/cluster-deploy-fast.sh
@@ -84,12 +84,21 @@ mapfile -t changed_files < <(
 detect_end=$(date +%s)
 log_duration "Change detection" "${detect_start}" "${detect_end}"
 
+# Track the cluster container ID so we can detect when the cluster was
+# recreated (e.g. via bootstrap).  A new container means the k3s state is
+# fresh and all images must be rebuilt and pushed regardless of source
+# fingerprints.
+current_container_id=$(docker inspect --format '{{.Id}}' "${CONTAINER_NAME}" 2>/dev/null || true)
+
 if [[ -f "${DEPLOY_FAST_STATE_FILE}" ]]; then
   while IFS='=' read -r key value; do
     case "${key}" in
       cluster_name)
         previous_cluster_name=${value}
         ;;
+      container_id)
+        previous_container_id=${value}
+        ;;
       server)
         previous_server_fingerprint=${value}
         ;;
@@ -107,6 +116,15 @@ if [[ -f "${DEPLOY_FAST_STATE_FILE}" ]]; then
     previous_sandbox_fingerprint=""
     previous_helm_fingerprint=""
   fi
+
+  # Invalidate all previous fingerprints when the cluster container has
+  # changed (recreated or replaced).  The new k3s instance has no pushed
+  # images so everything must be rebuilt.
+  if [[ -n "${current_container_id}" && "${current_container_id}" != "${previous_container_id:-}" ]]; then
+    previous_server_fingerprint=""
+    previous_sandbox_fingerprint=""
+    previous_helm_fingerprint=""
+  fi
 fi
 
 matches_server() {
@@ -262,14 +280,9 @@ fi
 
 build_start=$(date +%s)
 
-# Capture image IDs before rebuild so we can detect what changed.
-declare -A image_id_before=()
-for component in server sandbox; do
-  var="build_${component//-/_}"
-  if [[ "${!var}" == "1" ]]; then
-    image_id_before[${component}]=$(docker images -q "navigator/${component}:${IMAGE_TAG}" 2>/dev/null || true)
-  fi
-done
+# Track which components are being rebuilt so we can evict their images
+# from the k3s containerd cache after pushing.
+declare -a built_components=()
 
 server_pid=""
 sandbox_pid=""
@@ -322,20 +335,15 @@ build_end=$(date +%s)
 log_duration "Image builds" "${build_start}" "${build_end}"
 
 declare -a pushed_images=()
-declare -a changed_images=()
 
 for component in server sandbox; do
   var="build_${component//-/_}"
   if [[ "${!var}" == "1" ]]; then
-    docker tag "navigator/${component}:${IMAGE_TAG}" "${IMAGE_REPO_BASE}/${component}:${IMAGE_TAG}"
+    # Tag may fail with AlreadyExists when the image digest hasn't changed;
+    # this is harmless — the registry already has the correct image.
+    docker tag "navigator/${component}:${IMAGE_TAG}" "${IMAGE_REPO_BASE}/${component}:${IMAGE_TAG}" 2>/dev/null || true
     pushed_images+=("${IMAGE_REPO_BASE}/${component}:${IMAGE_TAG}")
-
-    # Detect whether the image actually changed by comparing Docker image IDs.
-    id_after=$(docker images -q "navigator/${component}:${IMAGE_TAG}" 2>/dev/null || true)
-    id_before=${image_id_before[${component}]:-}
-    if [[ -z "${id_before}" || "${id_before}" != "${id_after}" ]]; then
-      changed_images+=("${component}")
-    fi
+    built_components+=("${component}")
   fi
 done
 
@@ -349,13 +357,12 @@ if [[ "${#pushed_images[@]}" -gt 0 ]]; then
   log_duration "Image push" "${push_start}" "${push_end}"
 fi
 
-# Evict stale images from k3s's containerd store so new pods pull the
-# updated image from the registry.  Without this, k3s uses its cached copy
-# (imagePullPolicy defaults to IfNotPresent for non-:latest tags) and pods
-# run stale code.
-if [[ "${#changed_images[@]}" -gt 0 ]]; then
-  echo "Evicting stale images from k3s: ${changed_images[*]}"
-  for component in "${changed_images[@]}"; do
+# Always evict rebuilt images from k3s's containerd store so new pods pull
+# the updated image from the registry.  Without this, k3s may use a cached
+# copy even when the registry has a newer version with the same tag.
+if [[ "${#built_components[@]}" -gt 0 ]]; then
+  echo "Evicting stale images from k3s: ${built_components[*]}"
+  for component in "${built_components[@]}"; do
     docker exec "${CONTAINER_NAME}" crictl rmi "${IMAGE_REPO_BASE}/${component}:${IMAGE_TAG}" >/dev/null 2>&1 || true
   done
 fi
@@ -409,6 +416,7 @@ if [[ "${explicit_target}" == "0" ]]; then
   mkdir -p "$(dirname "${DEPLOY_FAST_STATE_FILE}")"
   cat > "${DEPLOY_FAST_STATE_FILE}" <<EOF
 cluster_name=${CLUSTER_NAME}
+container_id=${current_container_id}
 server=${current_server_fingerprint}
 sandbox=${current_sandbox_fingerprint}
 helm=${current_helm_fingerprint}
diff --git a/tasks/scripts/sandbox.sh b/tasks/scripts/sandbox.sh
index 820a18f4..3b8f8def 100755
--- a/tasks/scripts/sandbox.sh
+++ b/tasks/scripts/sandbox.sh
@@ -98,7 +98,7 @@ fi
 
 if [[ "${need_create}" == "1" ]]; then
   echo "Creating sandbox '${SANDBOX_NAME}'..."
-  nemoclaw sandbox create --name "${SANDBOX_NAME}" "${PROVIDER_ARGS[@]}" -- "${CMD[@]}"
+  nemoclaw sandbox create --name "${SANDBOX_NAME}" "${PROVIDER_ARGS[@]}" --tty -- "${CMD[@]}"
 else
   echo "Connecting to existing sandbox '${SANDBOX_NAME}'..."
   nemoclaw sandbox connect "${SANDBOX_NAME}"