From 51a1564394809e7e48e78e54690a54d7a72978c4 Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Mon, 30 Mar 2026 02:31:23 -0500 Subject: [PATCH 1/2] fix: clear stale SSH host keys after gateway restart (#768) After a Docker container restart, the gateway regenerates its mTLS certificates but stale host key entries in ~/.ssh/known_hosts cause SSH handshake verification failures. This fix: - Purges old openshell host key entries from known_hosts when the gateway is destroyed during onboard - Uses ephemeral known_hosts files for sandbox SSH connections in debug.sh to avoid accumulating stale entries Fixes #768 Co-Authored-By: Claude Opus 4.6 --- bin/lib/onboard.js | 15 +++++++++++++++ scripts/debug.sh | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 16f392f17..6df90b6ec 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1277,6 +1277,21 @@ async function startGateway(gpu) { // Destroy old gateway runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true }); + // Clear stale SSH host keys from previous gateway (fixes #768) + try { + const { execFileSync } = require("child_process"); + execFileSync("ssh-keygen", ["-R", `openshell-${SANDBOX_NAME}`], { stdio: "ignore" }); + } catch {} + // Also purge any known_hosts entries matching the gateway hostname pattern + const knownHostsPath = path.join(os.homedir(), ".ssh", "known_hosts"); + if (fs.existsSync(knownHostsPath)) { + try { + const kh = fs.readFileSync(knownHostsPath, "utf8"); + const cleaned = kh.split("\n").filter(l => !l.includes("openshell-")).join("\n"); + if (cleaned !== kh) fs.writeFileSync(knownHostsPath, cleaned); + } catch {} + } + const gwArgs = ["--name", GATEWAY_NAME]; // Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is // routed through a host-side provider (Ollama, vLLM, or cloud API) — the diff --git a/scripts/debug.sh b/scripts/debug.sh index 045f38fc9..7e4d5c873 100755 --- a/scripts/debug.sh +++ b/scripts/debug.sh @@ -256,7 +256,8 @@ if command -v openshell &>/dev/null \ SANDBOX_SSH_CONFIG=$(mktemp "${TMPDIR_BASE}/nemoclaw-ssh-XXXXXX") if openshell sandbox ssh-config "$SANDBOX_NAME" >"$SANDBOX_SSH_CONFIG" 2>/dev/null; then SANDBOX_SSH_HOST="openshell-${SANDBOX_NAME}" - SANDBOX_SSH_OPTS=(-F "$SANDBOX_SSH_CONFIG" -o StrictHostKeyChecking=no -o ConnectTimeout=10) + SANDBOX_SSH_KNOWN=$(mktemp "${TMPDIR_BASE}/nemoclaw-ssh-known-XXXXXX") + SANDBOX_SSH_OPTS=(-F "$SANDBOX_SSH_CONFIG" -o StrictHostKeyChecking=accept-new -o "UserKnownHostsFile=$SANDBOX_SSH_KNOWN" -o ConnectTimeout=10) collect "sandbox-ps" ssh "${SANDBOX_SSH_OPTS[@]}" "$SANDBOX_SSH_HOST" ps -ef collect "sandbox-free" ssh "${SANDBOX_SSH_OPTS[@]}" "$SANDBOX_SSH_HOST" free -m From e28ce4896932eb39b0abb435ccd91190488e7cd0 Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Mon, 30 Mar 2026 05:53:07 -0500 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20CodeRabbit=20review=20?= =?UTF-8?q?=E2=80=94=20scope=20var,=20temp=20cleanup,=20host-field=20filte?= =?UTF-8?q?r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- bin/lib/onboard.js | 7 +++++-- scripts/debug.sh | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 58f8babc3..7c72c4b14 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1607,14 +1607,17 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { // Clear stale SSH host keys from previous gateway (fixes #768) try { const { execFileSync } = require("child_process"); - execFileSync("ssh-keygen", ["-R", `openshell-${SANDBOX_NAME}`], { stdio: "ignore" }); + execFileSync("ssh-keygen", ["-R", `openshell-${GATEWAY_NAME}`], { stdio: "ignore" }); } catch {} // Also purge any known_hosts entries matching the gateway hostname pattern const knownHostsPath = path.join(os.homedir(), ".ssh", "known_hosts"); if (fs.existsSync(knownHostsPath)) { try { const kh = fs.readFileSync(knownHostsPath, "utf8"); - const cleaned = kh.split("\n").filter(l => !l.includes("openshell-")).join("\n"); + const cleaned = kh.split("\n").filter(l => { + const host = l.split(/\s/)[0] || ""; + return !host.includes("openshell-"); + }).join("\n"); if (cleaned !== kh) fs.writeFileSync(knownHostsPath, cleaned); } catch {} } diff --git a/scripts/debug.sh b/scripts/debug.sh index 749b216de..c0dca908b 100755 --- a/scripts/debug.sh +++ b/scripts/debug.sh @@ -285,6 +285,7 @@ if command -v openshell &>/dev/null \ if openshell sandbox ssh-config "$SANDBOX_NAME" >"$SANDBOX_SSH_CONFIG" 2>/dev/null; then SANDBOX_SSH_HOST="openshell-${SANDBOX_NAME}" SANDBOX_SSH_KNOWN=$(mktemp "${TMPDIR_BASE}/nemoclaw-ssh-known-XXXXXX") + trap 'rm -f "$SANDBOX_SSH_KNOWN"' EXIT SANDBOX_SSH_OPTS=(-F "$SANDBOX_SSH_CONFIG" -o StrictHostKeyChecking=accept-new -o "UserKnownHostsFile=$SANDBOX_SSH_KNOWN" -o ConnectTimeout=10) collect "sandbox-ps" ssh "${SANDBOX_SSH_OPTS[@]}" "$SANDBOX_SSH_HOST" ps -ef