From b52783a20d62badc4257b38b033510ebab27a72e Mon Sep 17 00:00:00 2001
From: Piotr Mlocek <pmlocek@nvidia.com>
Date: Tue, 3 Mar 2026 23:27:00 -0800
Subject: [PATCH 1/3] fix(sandbox): fix data corruption in sync --down and hang
 in sync --up

The sandbox SSH server used tokio::spawn fire-and-forget for each stdout/stderr
chunk, which provided no ordering guarantee under tokio's work-stealing scheduler.
Under load, chunks arrived out-of-order, corrupting tar streams during download.

Replace tokio::spawn with runtime.block_on() to ensure each chunk is fully sent
before reading the next. Remove AsyncWriteExt::shutdown() from the SSH proxy's
copy helper to prevent gateways from treating TCP half-close as full-close and
truncating in-flight downloads.

Implement channel_eof handler on the sandbox SSH server to close the child
process stdin when the client signals EOF. Without this, commands like
'cat | tar xf -' used by sync --up would hang waiting for stdin EOF
indefinitely. Add ValueHint::AnyPath to the --up argument for shell completion.

Signed-off-by: Piotr Mlocek <pmlocek@nvidia.com>
---
 crates/navigator-cli/src/main.rs    | 47 +++++++++++++++++++++++++++++
 crates/navigator-sandbox/src/ssh.rs | 31 +++++++++++--------
 2 files changed, 66 insertions(+), 12 deletions(-)
diff --git a/crates/navigator-cli/src/main.rs b/crates/navigator-cli/src/main.rs
index c7b28a96..84abec76 100644
--- a/crates/navigator-cli/src/main.rs
+++ b/crates/navigator-cli/src/main.rs
@@ -1339,6 +1339,7 @@ async fn main() -> Result<()> {
 mod tests {
     use super::*;
     use std::ffi::OsString;
+    use std::fs;
 
     #[test]
     fn cli_debug_assert() {
@@ -1462,4 +1463,50 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn sandbox_sync_up_uses_path_value_hint() {
+        let cmd = Cli::command();
+        let sandbox = cmd
+            .get_subcommands()
+            .find(|c| c.get_name() == "sandbox")
+            .expect("missing sandbox subcommand");
+        let sync = sandbox
+            .get_subcommands()
+            .find(|c| c.get_name() == "sync")
+            .expect("missing sandbox sync subcommand");
+        let up = sync
+            .get_arguments()
+            .find(|arg| arg.get_id() == "up")
+            .expect("missing --up argument");
+
+        assert_eq!(up.get_value_hint(), ValueHint::AnyPath);
+    }
+
+    #[test]
+    fn sandbox_sync_up_completion_suggests_local_paths() {
+        let temp = tempfile::tempdir().expect("failed to create tempdir");
+        fs::write(temp.path().join("sample.txt"), "x").expect("failed to create sample file");
+
+        let mut cmd = Cli::command();
+        let args: Vec<OsString> = vec![
+            "nemoclaw".into(),
+            "sandbox".into(),
+            "sync".into(),
+            "demo".into(),
+            "--up".into(),
+            "sa".into(),
+        ];
+        let candidates = clap_complete::engine::complete(&mut cmd, args, 5, Some(temp.path()))
+            .expect("completion engine failed");
+
+        let names: Vec<String> = candidates
+            .iter()
+            .map(|c| c.get_value().to_string_lossy().into_owned())
+            .collect();
+        assert!(
+            names.iter().any(|name| name.contains("sample.txt")),
+            "expected path completion for --up, got: {names:?}"
+        );
+    }
 }
diff --git a/crates/navigator-sandbox/src/ssh.rs b/crates/navigator-sandbox/src/ssh.rs
index deda9a9a..ff3a8cc6 100644
--- a/crates/navigator-sandbox/src/ssh.rs
+++ b/crates/navigator-sandbox/src/ssh.rs
@@ -418,6 +418,19 @@ impl russh::server::Handler for SshHandler {
         }
         Ok(())
     }
+
+    async fn channel_eof(
+        &mut self,
+        _channel: ChannelId,
+        _session: &mut Session,
+    ) -> Result<(), Self::Error> {
+        // Drop the input sender so the stdin writer thread sees a
+        // disconnected channel and closes the child's stdin pipe.  This
+        // is essential for commands like `cat | tar xf -` which need
+        // stdin EOF to know the input stream is complete.
+        self.input_sender.take();
+        Ok(())
+    }
 }
 
 impl SshHandler {
@@ -670,17 +683,14 @@ fn spawn_pty_shell(
                 Ok(n) => {
                     let data = CryptoVec::from_slice(&buf[..n]);
                     let handle_clone = handle_clone.clone();
-                    drop(runtime_reader.spawn(async move {
-                        let _ = handle_clone.data(channel, data).await;
-                    }));
+                    let _ = runtime_reader
+                        .block_on(async move { handle_clone.data(channel, data).await });
                 }
             }
         }
         // Send EOF to indicate no more data will be sent on this channel.
         let eof_handle = handle_clone.clone();
-        drop(runtime_reader.spawn(async move {
-            let _ = eof_handle.eof(channel).await;
-        }));
+        let _ = runtime_reader.block_on(async move { eof_handle.eof(channel).await });
         // Notify the exit thread that all output has been forwarded.
         let _ = reader_done_tx.send(());
     });
@@ -828,9 +838,7 @@ fn spawn_pipe_exec(
                 Ok(n) => {
                     let data = CryptoVec::from_slice(&buf[..n]);
                     let h = stdout_handle.clone();
-                    drop(stdout_runtime.spawn(async move {
-                        let _ = h.data(channel, data).await;
-                    }));
+                    let _ = stdout_runtime.block_on(async move { h.data(channel, data).await });
                 }
             }
         }
@@ -849,9 +857,8 @@ fn spawn_pipe_exec(
                 Ok(n) => {
                     let data = CryptoVec::from_slice(&buf[..n]);
                     let h = stderr_handle.clone();
-                    drop(stderr_runtime.spawn(async move {
-                        let _ = h.extended_data(channel, 1, data).await;
-                    }));
+                    let _ = stderr_runtime
+                        .block_on(async move { h.extended_data(channel, 1, data).await });
                 }
             }
         }

From 2f2edad30495a363885c062f93dacd0f7ef9d972 Mon Sep 17 00:00:00 2001
From: Piotr Mlocek <pmlocek@nvidia.com>
Date: Tue, 3 Mar 2026 23:53:32 -0800
Subject: [PATCH 2/3] test(sandbox): add tests for SSH stdin EOF delivery and
 large-file sync

Add Rust unit tests verifying that dropping the mpsc sender (the mechanism
channel_eof uses) closes the child's stdin pipe and delivers all buffered
data before EOF. These tests would hang without the channel_eof fix.

Add a 512 KiB large-file round-trip to the E2E sync bash test with SHA-256
checksum verification. Small test files fit in a single SSH chunk and cannot
trigger the data ordering bug that corrupted tar streams.

Signed-off-by: Piotr Mlocek <pmlocek@nvidia.com>
---
 crates/navigator-sandbox/src/ssh.rs | 100 ++++++++++++++++++++++++++++
 e2e/bash/test_sandbox_sync.sh       |  53 ++++++++++++++-
 2 files changed, 152 insertions(+), 1 deletion(-)

diff --git a/crates/navigator-sandbox/src/ssh.rs b/crates/navigator-sandbox/src/ssh.rs
index ff3a8cc6..91cd1488 100644
--- a/crates/navigator-sandbox/src/ssh.rs
+++ b/crates/navigator-sandbox/src/ssh.rs
@@ -975,3 +975,103 @@ mod unsafe_pty {
 fn to_u16(value: u32) -> u16 {
     u16::try_from(value.min(u32::from(u16::MAX))).unwrap_or(u16::MAX)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Verify that dropping the input sender (the operation `channel_eof`
+    /// performs) causes the stdin writer loop to exit and close the child's
+    /// stdin pipe.  Without this, commands like `cat | tar xf -` used by
+    /// `sync --up` hang forever waiting for EOF on stdin.
+    #[test]
+    fn dropping_input_sender_closes_child_stdin() {
+        let (sender, receiver) = mpsc::channel::<Vec<u8>>();
+
+        let mut child = Command::new("cat")
+            .stdin(std::process::Stdio::piped())
+            .stdout(std::process::Stdio::piped())
+            .spawn()
+            .expect("failed to spawn cat");
+
+        let child_stdin = child.stdin.take().expect("stdin must be piped");
+
+        // Replicate the stdin writer loop from spawn_pipe_exec.
+        std::thread::spawn(move || {
+            let mut stdin = child_stdin;
+            while let Ok(bytes) = receiver.recv() {
+                if stdin.write_all(&bytes).is_err() {
+                    break;
+                }
+                let _ = stdin.flush();
+            }
+        });
+
+        sender.send(b"hello".to_vec()).unwrap();
+
+        // Simulate what channel_eof does: drop the sender.
+        drop(sender);
+
+        // cat should see EOF on stdin and exit.  Use a timeout so the test
+        // fails fast instead of hanging if the mechanism is broken.
+        let (done_tx, done_rx) = mpsc::channel();
+        std::thread::spawn(move || {
+            let _ = done_tx.send(child.wait_with_output());
+        });
+        let output = done_rx
+            .recv_timeout(Duration::from_secs(5))
+            .expect("cat hung for 5s — stdin was not closed (channel_eof bug)")
+            .expect("failed to wait for cat");
+
+        assert!(output.status.success(), "cat exited with {:?}", output.status);
+        assert_eq!(output.stdout, b"hello");
+    }
+
+    /// Verify that the stdin writer delivers all buffered data before exiting
+    /// when the sender is dropped.  This ensures channel_eof doesn't cause
+    /// data loss — only signals "no more data after this".
+    #[test]
+    fn stdin_writer_delivers_buffered_data_before_eof() {
+        let (sender, receiver) = mpsc::channel::<Vec<u8>>();
+
+        let mut child = Command::new("wc")
+            .arg("-c")
+            .stdin(std::process::Stdio::piped())
+            .stdout(std::process::Stdio::piped())
+            .spawn()
+            .expect("failed to spawn wc");
+
+        let child_stdin = child.stdin.take().expect("stdin must be piped");
+
+        std::thread::spawn(move || {
+            let mut stdin = child_stdin;
+            while let Ok(bytes) = receiver.recv() {
+                if stdin.write_all(&bytes).is_err() {
+                    break;
+                }
+                let _ = stdin.flush();
+            }
+        });
+
+        // Send multiple chunks, then drop the sender.
+        for _ in 0..100 {
+            sender.send(vec![0u8; 1024]).unwrap();
+        }
+        drop(sender);
+
+        let (done_tx, done_rx) = mpsc::channel();
+        std::thread::spawn(move || {
+            let _ = done_tx.send(child.wait_with_output());
+        });
+        let output = done_rx
+            .recv_timeout(Duration::from_secs(5))
+            .expect("wc hung for 5s — stdin was not closed")
+            .expect("failed to wait for wc");
+
+        let count: usize = String::from_utf8_lossy(&output.stdout)
+            .trim()
+            .parse()
+            .expect("wc output was not a number");
+        assert_eq!(count, 100 * 1024, "expected all 100 KiB delivered before EOF");
+    }
+}
diff --git a/e2e/bash/test_sandbox_sync.sh b/e2e/bash/test_sandbox_sync.sh
index 717a8228..306fe355 100755
--- a/e2e/bash/test_sandbox_sync.sh
+++ b/e2e/bash/test_sandbox_sync.sh
@@ -210,7 +210,58 @@ fi
 info "subdir/nested.txt verified"
 
 ###############################################################################
-# Step 4 — Sync up a single file and round-trip it.
+# Step 4 — Large-file round-trip to exercise multi-chunk SSH transport.
+#
+# The tar archive is streamed through the SSH channel in 4096-byte chunks.
+# Historically, a fire-and-forget tokio::spawn per chunk caused out-of-order
+# delivery that corrupted the tar stream.  A ~512 KiB file spans many chunks
+# and makes such ordering bugs much more likely to surface.
+###############################################################################
+
+info "Generating large test file (~512 KiB)"
+
+LARGE_DIR="${TMPDIR_ROOT}/large_upload"
+mkdir -p "${LARGE_DIR}"
+
+# Deterministic pseudo-random content so we can verify with a checksum.
+dd if=/dev/urandom bs=1024 count=512 2>/dev/null > "${LARGE_DIR}/large.bin"
+EXPECTED_HASH=$(shasum -a 256 "${LARGE_DIR}/large.bin" | awk '{print $1}')
+
+info "Syncing large file up to sandbox"
+if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --up "${LARGE_DIR}" /sandbox/large_test \
+    > /dev/null 2>&1; then
+  error "sync --up large file failed"
+  exit 1
+fi
+
+info "Syncing large file back down"
+LARGE_DOWN="${TMPDIR_ROOT}/large_download"
+mkdir -p "${LARGE_DOWN}"
+
+if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --down /sandbox/large_test "${LARGE_DOWN}" \
+    > /dev/null 2>&1; then
+  error "sync --down large file failed"
+  exit 1
+fi
+
+ACTUAL_HASH=$(shasum -a 256 "${LARGE_DOWN}/large.bin" | awk '{print $1}')
+if [[ "${EXPECTED_HASH}" != "${ACTUAL_HASH}" ]]; then
+  error "large.bin checksum mismatch after round-trip"
+  error "  expected: ${EXPECTED_HASH}"
+  error "  actual:   ${ACTUAL_HASH}"
+  exit 1
+fi
+
+ACTUAL_SIZE=$(wc -c < "${LARGE_DOWN}/large.bin" | tr -d ' ')
+if [[ "${ACTUAL_SIZE}" -ne 524288 ]]; then
+  error "large.bin size mismatch: expected 524288, got ${ACTUAL_SIZE}"
+  exit 1
+fi
+
+info "Large file round-trip verified (SHA-256 match, ${ACTUAL_SIZE} bytes)"
+
+###############################################################################
+# Step 5 — Sync up a single file and round-trip it.
 ###############################################################################
 
 info "Testing single-file sync"

From 6abd1a4c5d3ce418655d6846c60df033caefb278 Mon Sep 17 00:00:00 2001
From: Piotr Mlocek <pmlocek@nvidia.com>
Date: Wed, 4 Mar 2026 00:09:54 -0800
Subject: [PATCH 3/3] format

---
 crates/navigator-sandbox/src/ssh.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/crates/navigator-sandbox/src/ssh.rs b/crates/navigator-sandbox/src/ssh.rs
index 91cd1488..8c7a4847 100644
--- a/crates/navigator-sandbox/src/ssh.rs
+++ b/crates/navigator-sandbox/src/ssh.rs
@@ -1023,7 +1023,11 @@ mod tests {
             .expect("cat hung for 5s — stdin was not closed (channel_eof bug)")
             .expect("failed to wait for cat");
 
-        assert!(output.status.success(), "cat exited with {:?}", output.status);
+        assert!(
+            output.status.success(),
+            "cat exited with {:?}",
+            output.status
+        );
         assert_eq!(output.stdout, b"hello");
     }
 
@@ -1072,6 +1076,10 @@ mod tests {
             .trim()
             .parse()
             .expect("wc output was not a number");
-        assert_eq!(count, 100 * 1024, "expected all 100 KiB delivered before EOF");
+        assert_eq!(
+            count,
+            100 * 1024,
+            "expected all 100 KiB delivered before EOF"
+        );
     }
 }