diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index c440cb9b7..6c4ef1737 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1616,6 +1616,26 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true }); } + // Clear stale SSH host keys from previous gateway (fixes #768) + try { + const { execFileSync } = require("child_process"); + execFileSync("ssh-keygen", ["-R", `openshell-${GATEWAY_NAME}`], { stdio: "ignore" }); + } catch {} + // Also purge any known_hosts entries matching the gateway hostname pattern + const knownHostsPath = path.join(os.homedir(), ".ssh", "known_hosts"); + if (fs.existsSync(knownHostsPath)) { + try { + const kh = fs.readFileSync(knownHostsPath, "utf8"); + const cleaned = kh.split("\n").filter(l => { + const trimmed = l.trim(); + if (!trimmed || trimmed.startsWith("#")) return true; + const hostField = trimmed.split(/\s+/)[0]; + return !hostField.split(",").some(h => h.startsWith("openshell-")); + }).join("\n"); + if (cleaned !== kh) fs.writeFileSync(knownHostsPath, cleaned); + } catch {} + } + const gwArgs = ["--name", GATEWAY_NAME]; // Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is // routed through a host-side provider (Ollama, vLLM, or cloud API) — the diff --git a/scripts/debug.sh b/scripts/debug.sh index 2426d4287..78d128b10 100755 --- a/scripts/debug.sh +++ b/scripts/debug.sh @@ -85,11 +85,15 @@ done TMPDIR_BASE="${TMPDIR:-/tmp}" COLLECT_DIR=$(mktemp -d "${TMPDIR_BASE}/nemoclaw-debug-XXXXXX") SANDBOX_SSH_CONFIG="" +SANDBOX_SSH_KNOWN="" cleanup() { rm -rf "$COLLECT_DIR" if [ -n "$SANDBOX_SSH_CONFIG" ]; then rm -f "$SANDBOX_SSH_CONFIG" fi + if [ -n "$SANDBOX_SSH_KNOWN" ]; then + rm -f "$SANDBOX_SSH_KNOWN" + fi } trap cleanup EXIT @@ -284,7 +288,8 @@ if command -v openshell &>/dev/null \ SANDBOX_SSH_CONFIG=$(mktemp "${TMPDIR_BASE}/nemoclaw-ssh-XXXXXX") if openshell sandbox ssh-config "$SANDBOX_NAME" >"$SANDBOX_SSH_CONFIG" 2>/dev/null; then SANDBOX_SSH_HOST="openshell-${SANDBOX_NAME}" - SANDBOX_SSH_OPTS=(-F "$SANDBOX_SSH_CONFIG" -o StrictHostKeyChecking=no -o ConnectTimeout=10) + SANDBOX_SSH_KNOWN=$(mktemp "${TMPDIR_BASE}/nemoclaw-ssh-known-XXXXXX") + SANDBOX_SSH_OPTS=(-F "$SANDBOX_SSH_CONFIG" -o StrictHostKeyChecking=accept-new -o "UserKnownHostsFile=$SANDBOX_SSH_KNOWN" -o ConnectTimeout=10) collect "sandbox-ps" ssh "${SANDBOX_SSH_OPTS[@]}" "$SANDBOX_SSH_HOST" ps -ef collect "sandbox-free" ssh "${SANDBOX_SSH_OPTS[@]}" "$SANDBOX_SSH_HOST" free -m