From 902d83997d72bc2e110d6df23b13814c013bda88 Mon Sep 17 00:00:00 2001 From: realkim93 Date: Thu, 19 Mar 2026 21:26:08 +0900 Subject: [PATCH 01/10] feat: add Jetson Orin Nano support Add GPU detection, iptables-legacy fix, and nemotron-3-nano:4b default for Jetson Orin Nano Super (8GB, JetPack 6.x). Co-Authored-By: Claude Opus 4.6 (1M context) --- bin/lib/local-inference.js | 11 ++ bin/lib/nim.js | 41 ++++++- bin/lib/onboard.js | 93 +++++++++++++++- bin/nemoclaw.js | 8 +- scripts/setup-jetson.sh | 207 +++++++++++++++++++++++++++++++++++ test/local-inference.test.js | 23 ++++ test/nim.test.js | 29 +++-- 7 files changed, 396 insertions(+), 16 deletions(-) create mode 100755 scripts/setup-jetson.sh diff --git a/bin/lib/local-inference.js b/bin/lib/local-inference.js index 3452e59e3..292a642b3 100644 --- a/bin/lib/local-inference.js +++ b/bin/lib/local-inference.js @@ -8,6 +8,7 @@ const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1"; const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b"; const SMALL_OLLAMA_MODEL = "qwen2.5:7b"; const LARGE_OLLAMA_MIN_MEMORY_MB = 32768; +const DEFAULT_OLLAMA_MODEL_JETSON = "nemotron-3-nano:4b"; function getLocalProviderBaseUrl(provider) { switch (provider) { @@ -138,6 +139,11 @@ function getOllamaModelOptions(runCapture) { } function getBootstrapOllamaModelOptions(gpu) { + // Jetson: fall back to the 4B model that fits in 8GB unified memory + // instead of the 30B default which would OOM. + if (gpu && gpu.jetson) { + return [DEFAULT_OLLAMA_MODEL_JETSON]; + } const options = [SMALL_OLLAMA_MODEL]; if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) { options.push(DEFAULT_OLLAMA_MODEL); @@ -151,6 +157,10 @@ function getDefaultOllamaModel(runCapture, gpu = null) { const bootstrap = getBootstrapOllamaModelOptions(gpu); return bootstrap[0]; } + if (gpu && gpu.jetson) { + if (models.includes(DEFAULT_OLLAMA_MODEL_JETSON)) return DEFAULT_OLLAMA_MODEL_JETSON; + return models[0]; + } return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0]; } @@ -201,6 +211,7 @@ function validateOllamaModel(model, runCapture) { module.exports = { CONTAINER_REACHABILITY_IMAGE, DEFAULT_OLLAMA_MODEL, + DEFAULT_OLLAMA_MODEL_JETSON, HOST_GATEWAY_URL, LARGE_OLLAMA_MIN_MEMORY_MB, SMALL_OLLAMA_MODEL, diff --git a/bin/lib/nim.js b/bin/lib/nim.js index f291a0967..771f11bb7 100644 --- a/bin/lib/nim.js +++ b/bin/lib/nim.js @@ -23,6 +23,11 @@ function listModels() { })); } +/** + * Detects the GPU on the current system. Returns an object describing the GPU + * type, memory, and capabilities, or null if no GPU is found. Supports + * discrete NVIDIA GPUs, DGX Spark (GB10), Jetson (Orin/Thor), and Apple Silicon. + */ function detectGpu() { // Try NVIDIA first — query VRAM try { @@ -46,14 +51,16 @@ function detectGpu() { } } catch { /* ignored */ } - // Fallback: DGX Spark (GB10) — VRAM not queryable due to unified memory architecture + // Fallback: unified-memory NVIDIA platforms where nvidia-smi reports [N/A] + // for memory.total. Query GPU name once and check for DGX Spark or Jetson. try { const nameOutput = runCapture( "nvidia-smi --query-gpu=name --format=csv,noheader,nounits", { ignoreError: true } ); + + // DGX Spark (GB10) — 128GB unified memory shared with Grace CPU if (nameOutput && nameOutput.includes("GB10")) { - // GB10 has 128GB unified memory shared with Grace CPU — use system RAM let totalMemoryMB = 0; try { const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); @@ -68,6 +75,36 @@ function detectGpu() { spark: true, }; } + + // NVIDIA Jetson — unified memory, nvidia-smi reports GPU name containing + // "Orin" or "Thor" but without discrete GPU identifiers like + // GeForce/RTX/Quadro. Tested on Jetson Orin Nano Super (JetPack 6.x). + // Other Jetson variants may also work via /proc/device-tree/model fallback. + const isJetsonGpu = nameOutput && + /orin|thor/i.test(nameOutput) && + !/geforce|rtx|quadro/i.test(nameOutput); + const dtModel = runCapture( + "cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'", + { ignoreError: true } + ); + const isJetsonDt = dtModel && /jetson/i.test(dtModel); + + if (isJetsonGpu || isJetsonDt) { + let totalMemoryMB = 0; + try { + const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); + if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0; + } catch { /* ignored */ } + return { + type: "nvidia", + name: dtModel || nameOutput || "Jetson", + count: 1, + totalMemoryMB, + perGpuMB: totalMemoryMB, + nimCapable: false, + jetson: true, + }; + } } catch { /* ignored */ } // macOS: detect Apple Silicon or discrete GPU diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 1480f435d..e8459c359 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1531,6 +1531,14 @@ async function preflight() { console.log(" ✓ Previous session cleaned up"); } + // Clean up any existing nemoclaw gateway before checking ports — + // a previous onboard run may have left the gateway running, which + // would block port 8080 and cause a confusing "port in use" error. + run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true }); + // Kill only nemoclaw-owned openclaw-gateway processes holding port 18789. + run("kill $(lsof -ti :18789 -c openclaw) 2>/dev/null || true", { ignoreError: true }); + sleep(2); + // Required ports — gateway (8080) and dashboard (18789) const requiredPorts = [ { port: 8080, label: "OpenShell gateway" }, @@ -1572,7 +1580,10 @@ async function preflight() { // GPU const gpu = nim.detectGpu(); - if (gpu && gpu.type === "nvidia") { + if (gpu && gpu.type === "nvidia" && gpu.jetson) { + console.log(` ✓ NVIDIA Jetson detected: ${gpu.name}, ${gpu.totalMemoryMB} MB unified memory`); + console.log(" ⓘ NIM containers not supported on Jetson — will use Ollama or cloud inference"); + } else if (gpu && gpu.type === "nvidia") { console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`); } else if (gpu && gpu.type === "apple") { console.log(` ✓ Apple GPU detected: ${gpu.name}${gpu.cores ? ` (${gpu.cores} cores)` : ""}, ${gpu.totalMemoryMB} MB unified memory`); @@ -1584,9 +1595,78 @@ async function preflight() { return gpu; } +// ── Jetson gateway image patch ─────────────────────────────────── +// +// JetPack kernels (Tegra) ship without nft_chain_filter and related +// nf_tables modules. The OpenShell gateway image embeds k3s, whose +// network policy controller calls iptables in nf_tables mode by default. +// Without kernel support the controller panics on startup. +// +// This function rebuilds the gateway image locally, switching the +// default iptables alternative to iptables-legacy so all rule +// manipulation uses the classic xtables backend that Tegra kernels +// fully support. + +/** Extracts the semver tag from the installed openshell CLI version. */ +function getGatewayImageTag() { + const openshellVersion = runCapture("openshell --version 2>/dev/null", { ignoreError: true }) || ""; + const match = openshellVersion.match(/(\d+\.\d+\.\d+)/); + return match ? match[1] : "latest"; +} + +/** + * Rebuilds the OpenShell gateway container image with iptables-legacy as the + * default backend. Idempotent — skips rebuild if the image is already patched + * (checked via Docker label). Required on Jetson because the Tegra kernel + * lacks nft_chain_filter modules that k3s's network policy controller needs. + */ +function patchGatewayImageForJetson() { + const tag = getGatewayImageTag(); + const image = `ghcr.io/nvidia/openshell/cluster:${tag}`; + + // Check if already patched (look for our label) + const inspectOut = runCapture( + `docker inspect --format='{{index .Config.Labels "io.nemoclaw.jetson-patched"}}' "${image}" 2>/dev/null`, + { ignoreError: true } + ).trim(); + if (inspectOut === "true") { + console.log(" ✓ Gateway image already patched for Jetson"); + return; + } + + console.log(" Patching gateway image for Jetson (iptables-legacy)..."); + console.log(" (this may take a moment on first run if the base image needs to be pulled)"); + + const os = require("os"); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-")); + const dockerfile = path.join(tmpDir, "Dockerfile"); + fs.writeFileSync( + dockerfile, + [ + `FROM ${image}`, + `RUN if command -v update-alternatives >/dev/null 2>&1 && \\`, + ` update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && \\`, + ` update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then \\`, + ` :; \\`, + ` elif [ -f /usr/sbin/iptables-legacy ] && [ -f /usr/sbin/ip6tables-legacy ]; then \\`, + ` ln -sf /usr/sbin/iptables-legacy /usr/sbin/iptables; \\`, + ` ln -sf /usr/sbin/ip6tables-legacy /usr/sbin/ip6tables; \\`, + ` else \\`, + ` echo "iptables-legacy not available in base image" >&2; exit 1; \\`, + ` fi`, + `LABEL io.nemoclaw.jetson-patched="true"`, + "", + ].join("\n") + ); + + run(`docker build --quiet -t "${image}" "${tmpDir}"`, { ignoreError: false }); + run(`rm -rf "${tmpDir}"`, { ignoreError: true }); + console.log(" ✓ Gateway image patched for Jetson (iptables-legacy)"); +} + // ── Step 2: Gateway ────────────────────────────────────────────── -async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { +async function startGatewayWithOptions(gpu, { exitOnFailure = true } = {}) { step(2, 7, "Starting OpenShell gateway"); const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); @@ -1599,6 +1679,15 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { return; } + // Jetson (Tegra kernel): The k3s container image ships iptables v1.8.10 in + // nf_tables mode, but JetPack kernels lack the nft_chain_filter module, + // causing the k3s network policy controller to panic on startup. + // Workaround: rebuild the gateway image locally with iptables-legacy as the + // default so iptables commands use the legacy (xtables) backend instead. + if (gpu && gpu.jetson) { + patchGatewayImageForJetson(); + } + if (hasStaleGateway(gwInfo)) { runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true }); } diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index 76e9512f5..f10a0c302 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -37,7 +37,7 @@ const { parseGatewayInference } = require("./lib/inference-config"); // ── Global commands ────────────────────────────────────────────── const GLOBAL_COMMANDS = new Set([ - "onboard", "list", "deploy", "setup", "setup-spark", + "onboard", "list", "deploy", "setup", "setup-spark", "setup-jetson", "start", "stop", "status", "debug", "uninstall", "help", "--help", "-h", "--version", "-v", ]); @@ -362,6 +362,10 @@ async function setupSpark() { run(`sudo bash "${SCRIPTS}/setup-spark.sh"`); } +async function setupJetson() { + run(`sudo -E bash "${SCRIPTS}/setup-jetson.sh"`); +} + // eslint-disable-next-line complexity async function deploy(instanceName) { if (!instanceName) { @@ -717,6 +721,7 @@ function help() { ${G}Getting Started:${R} ${B}nemoclaw onboard${R} Configure inference endpoint and credentials nemoclaw setup-spark Set up on DGX Spark ${D}(fixes cgroup v2 + Docker)${R} + nemoclaw setup-jetson Set up on Jetson ${D}(NVIDIA runtime + iptables fix)${R} ${G}Sandbox Management:${R} ${B}nemoclaw list${R} List all sandboxes @@ -773,6 +778,7 @@ const [cmd, ...args] = process.argv.slice(2); case "onboard": await onboard(args); break; case "setup": await setup(); break; case "setup-spark": await setupSpark(); break; + case "setup-jetson": await setupJetson(); break; case "deploy": await deploy(args[0]); break; case "start": await start(); break; case "stop": stop(); break; diff --git a/scripts/setup-jetson.sh b/scripts/setup-jetson.sh new file mode 100755 index 000000000..352ab42cb --- /dev/null +++ b/scripts/setup-jetson.sh @@ -0,0 +1,207 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# NemoClaw setup for NVIDIA Jetson devices (Orin Nano, Orin NX, AGX Orin, Thor). +# +# Jetson devices use unified memory and a Tegra kernel that lacks nf_tables +# chain modules (nft_chain_filter, nft_chain_nat, etc.). The OpenShell gateway +# runs k3s inside a Docker container, and k3s's network policy controller +# uses iptables in nf_tables mode by default, which panics on Tegra kernels. +# +# This script prepares the Jetson host so that `nemoclaw onboard` succeeds: +# 1. Verifies Jetson platform +# 2. Ensures NVIDIA Container Runtime is configured for Docker +# 3. Loads required kernel modules (br_netfilter, xt_comment) +# 4. Configures Docker daemon with default-runtime=nvidia +# +# The iptables-legacy patch for the gateway container image is handled +# automatically by `nemoclaw onboard` when it detects a Jetson GPU. +# +# Usage: +# sudo nemoclaw setup-jetson +# # or directly: +# sudo bash scripts/setup-jetson.sh + +set -euo pipefail + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +info() { echo -e "${GREEN}>>>${NC} $1"; } +warn() { echo -e "${YELLOW}>>>${NC} $1"; } +fail() { echo -e "${RED}>>>${NC} $1"; exit 1; } + +# ── Pre-flight checks ───────────────────────────────────────────── + +if [ "$(uname -s)" != "Linux" ]; then + fail "This script is for NVIDIA Jetson (Linux). Use 'nemoclaw setup' for macOS." +fi + +if [ "$(uname -m)" != "aarch64" ]; then + fail "Jetson devices are aarch64. This system is $(uname -m)." +fi + +if [ "$(id -u)" -ne 0 ]; then + fail "Must run as root: sudo nemoclaw setup-jetson" +fi + +# Verify Jetson platform +JETSON_MODEL="" +if [ -f /proc/device-tree/model ]; then + JETSON_MODEL=$(tr -d '\0' < /proc/device-tree/model) +fi + +if ! echo "$JETSON_MODEL" | grep -qi "jetson"; then + # Also check nvidia-smi for Orin GPU name + GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null || echo "") + if ! echo "$GPU_NAME" | grep -qiE "orin|thor"; then + fail "This does not appear to be a Jetson device. Use 'nemoclaw onboard' directly." + fi + # Exclude discrete GPUs that happen to contain matching strings + if echo "$GPU_NAME" | grep -qiE "geforce|rtx|quadro"; then + fail "Discrete GPU detected ('$GPU_NAME'). This script is for Jetson only." + fi + JETSON_MODEL="${GPU_NAME}" +fi + +info "Detected Jetson platform: ${JETSON_MODEL}" + +# Detect the real user (not root) for docker group add +REAL_USER="${SUDO_USER:-$(logname 2>/dev/null || echo "")}" + +command -v docker > /dev/null || fail "Docker not found. Install docker.io: sudo apt-get install -y docker.io" +command -v python3 > /dev/null || fail "python3 not found. Install with: sudo apt-get install -y python3-minimal" + +# ── 1. Docker group ─────────────────────────────────────────────── + +if [ -n "$REAL_USER" ]; then + if id -nG "$REAL_USER" | grep -qw docker; then + info "User '$REAL_USER' already in docker group" + else + info "Adding '$REAL_USER' to docker group..." + usermod -aG docker "$REAL_USER" + info "Added. Group will take effect on next login (or use 'newgrp docker')." + fi +fi + +# ── 2. NVIDIA Container Runtime ────────────────────────────────── +# +# Jetson JetPack pre-installs nvidia-container-runtime but Docker may +# not be configured to use it as the default runtime. + +DAEMON_JSON="/etc/docker/daemon.json" +NEEDS_RESTART=false + +configure_nvidia_runtime() { + if ! command -v nvidia-container-runtime > /dev/null 2>&1; then + warn "nvidia-container-runtime not found. GPU passthrough may not work." + warn "Install with: sudo apt-get install -y nvidia-container-toolkit" + return + fi + + if [ -f "$DAEMON_JSON" ]; then + # Check if nvidia runtime is already configured + if python3 -c " +import json, sys +try: + d = json.load(open('$DAEMON_JSON')) + runtimes = d.get('runtimes', {}) if isinstance(d, dict) else {} + if 'nvidia' in runtimes and d.get('default-runtime') == 'nvidia': + sys.exit(0) + sys.exit(1) +except (IOError, ValueError, KeyError, AttributeError): + sys.exit(1) +" 2>/dev/null; then + info "NVIDIA runtime already configured in Docker daemon" + else + info "Adding NVIDIA runtime to Docker daemon config..." + python3 -c " +import json +try: + with open('$DAEMON_JSON') as f: + d = json.load(f) +except (IOError, ValueError, KeyError): + d = {} +if not isinstance(d, dict): + d = {} +d.setdefault('runtimes', {})['nvidia'] = { + 'path': 'nvidia-container-runtime', + 'runtimeArgs': [] +} +d['default-runtime'] = 'nvidia' +with open('$DAEMON_JSON', 'w') as f: + json.dump(d, f, indent=2) +" + NEEDS_RESTART=true + fi + else + info "Creating Docker daemon config with NVIDIA runtime..." + mkdir -p "$(dirname "$DAEMON_JSON")" + cat > "$DAEMON_JSON" <<'DAEMONJSON' +{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + }, + "default-runtime": "nvidia" +} +DAEMONJSON + NEEDS_RESTART=true + fi +} + +configure_nvidia_runtime + +# ── 3. Kernel modules ──────────────────────────────────────────── + +info "Loading required kernel modules..." +modprobe br_netfilter 2>/dev/null || warn "Could not load br_netfilter" +modprobe xt_comment 2>/dev/null || warn "Could not load xt_comment" + +# Persist across reboots +MODULES_FILE="/etc/modules-load.d/nemoclaw-jetson.conf" +if [ ! -f "$MODULES_FILE" ]; then + info "Persisting kernel modules for boot..." + cat > "$MODULES_FILE" <<'MODULES' +# NemoClaw: required for k3s networking inside Docker +br_netfilter +xt_comment +MODULES +fi + +# ── 4. Restart Docker if needed ────────────────────────────────── + +if [ "$NEEDS_RESTART" = true ]; then + info "Restarting Docker daemon..." + if command -v systemctl > /dev/null 2>&1; then + systemctl restart docker + else + service docker restart 2>/dev/null || dockerd & + fi + for i in 1 2 3 4 5 6 7 8 9 10; do + if docker info > /dev/null 2>&1; then + break + fi + [ "$i" -eq 10 ] && fail "Docker didn't come back after restart. Check 'systemctl status docker'." + sleep 2 + done + info "Docker restarted with NVIDIA runtime" +fi + +# ── Done ───────────────────────────────────────────────────────── + +echo "" +info "Jetson setup complete." +info "" +info "Device: ${JETSON_MODEL}" +info "" +info "Next step: run 'nemoclaw onboard' to set up your sandbox." +info " nemoclaw onboard" +info "" +info "The onboard wizard will automatically patch the gateway image" +info "for Jetson iptables compatibility." diff --git a/test/local-inference.test.js b/test/local-inference.test.js index ec37b5f1f..11fc725bd 100644 --- a/test/local-inference.test.js +++ b/test/local-inference.test.js @@ -6,6 +6,7 @@ import { describe, it, expect } from "vitest"; import { CONTAINER_REACHABILITY_IMAGE, DEFAULT_OLLAMA_MODEL, + DEFAULT_OLLAMA_MODEL_JETSON, getDefaultOllamaModel, getLocalProviderBaseUrl, getLocalProviderContainerReachabilityCheck, @@ -19,6 +20,8 @@ import { validateLocalProvider, } from "../bin/lib/local-inference"; +const FAKE_JETSON_GPU = { type: "nvidia", jetson: true, totalMemoryMB: 7619 }; + describe("local inference helpers", () => { it("returns the expected base URL for vllm-local", () => { expect(getLocalProviderBaseUrl("vllm-local")).toBe("http://host.openshell.internal:8000/v1"); @@ -129,6 +132,26 @@ describe("local inference helpers", () => { ).toBe("qwen3:32b"); }); + it("returns jetson 4b model as default on jetson when available", () => { + const list = `nemotron-3-nano:4b abc 2.8 GB now\nqwen3:32b def 20 GB now`; + assert.equal( + getDefaultOllamaModel(() => list, FAKE_JETSON_GPU), + DEFAULT_OLLAMA_MODEL_JETSON, + ); + }); + + it("falls back to jetson 4b model when ollama list is empty on jetson", () => { + assert.deepEqual(getOllamaModelOptions(() => "", FAKE_JETSON_GPU), [DEFAULT_OLLAMA_MODEL_JETSON]); + assert.equal(getDefaultOllamaModel(() => "", FAKE_JETSON_GPU), DEFAULT_OLLAMA_MODEL_JETSON); + }); + + it("falls back to first available model on jetson when 4b is absent", () => { + assert.equal( + getDefaultOllamaModel(() => "qwen3:4b abc 3 GB now", FAKE_JETSON_GPU), + "qwen3:4b", + ); + }); + it("builds a background warmup command for ollama models", () => { const command = getOllamaWarmupCommand("nemotron-3-nano:30b"); expect(command).toMatch(/^nohup curl -s http:\/\/localhost:11434\/api\/generate /); diff --git a/test/nim.test.js b/test/nim.test.js index cd4cf6cd4..703cb7b9f 100644 --- a/test/nim.test.js +++ b/test/nim.test.js @@ -4,6 +4,12 @@ import { describe, it, expect } from "vitest"; import nim from "../bin/lib/nim"; +// Detect GPU once for conditional test gating. +const detectedGpu = nim.detectGpu(); +const isDiscreteNvidia = detectedGpu && detectedGpu.type === "nvidia" && !detectedGpu.jetson; +const isJetson = detectedGpu && detectedGpu.type === "nvidia" && detectedGpu.jetson; +const isApple = detectedGpu && detectedGpu.type === "apple"; + describe("nim", () => { describe("listModels", () => { it("returns 5 models", () => { @@ -47,19 +53,20 @@ describe("nim", () => { } }); - it("nvidia type is nimCapable", () => { - const gpu = nim.detectGpu(); - if (gpu && gpu.type === "nvidia") { - expect(gpu.nimCapable).toBe(true); - } + it("nvidia (discrete) type is nimCapable", { skip: !isDiscreteNvidia }, () => { + expect(detectedGpu.nimCapable).toBe(true); }); - it("apple type is not nimCapable", () => { - const gpu = nim.detectGpu(); - if (gpu && gpu.type === "apple") { - expect(gpu.nimCapable).toBe(false); - expect(gpu.name).toBeTruthy(); - } + it("nvidia (jetson) type is not nimCapable", { skip: !isJetson }, () => { + expect(detectedGpu.nimCapable).toBe(false); + expect(detectedGpu.name).toBeTruthy(); + expect(detectedGpu.jetson).toBe(true); + expect(detectedGpu.totalMemoryMB).toBeGreaterThan(0); + }); + + it("apple type is not nimCapable", { skip: !isApple }, () => { + expect(detectedGpu.nimCapable).toBe(false); + expect(detectedGpu.name).toBeTruthy(); }); }); From 674493eafbee910dcbd88a73f97f6196df912777 Mon Sep 17 00:00:00 2001 From: realkim93 Date: Fri, 20 Mar 2026 23:58:18 +0900 Subject: [PATCH 02/10] =?UTF-8?q?fix:=20address=20code=20review=20?= =?UTF-8?q?=E2=80=94=20null-safety,=20vllm-local=20parity,=20policy=20tigh?= =?UTF-8?q?tening?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Guard runCapture().trim() against null in patchGatewayImageForJetson - Apply same inference.local bypass to vllm-local (same DNS bug affects both local providers, not just Ollama) - Use getLocalProviderBaseUrl() as single source of truth for direct URLs - Add TODO to remove direct URLs when OpenShell fixes inference.local - Remove overly broad /usr/local/bin/node from ollama_local network policy (openclaw binary alone is sufficient) Co-Authored-By: Claude Opus 4.6 (1M context) --- bin/lib/onboard.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index e8459c359..d642143e6 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1625,10 +1625,10 @@ function patchGatewayImageForJetson() { const image = `ghcr.io/nvidia/openshell/cluster:${tag}`; // Check if already patched (look for our label) - const inspectOut = runCapture( + const inspectOut = (runCapture( `docker inspect --format='{{index .Config.Labels "io.nemoclaw.jetson-patched"}}' "${image}" 2>/dev/null`, { ignoreError: true } - ).trim(); + ) || "").trim(); if (inspectOut === "true") { console.log(" ✓ Gateway image already patched for Jetson"); return; From 9601a7135b47286e9a3658b08481b6c408297b28 Mon Sep 17 00:00:00 2001 From: realkim93 Date: Mon, 23 Mar 2026 10:39:37 +0900 Subject: [PATCH 03/10] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20port=20cleanup=20timing,=20provider=20mapping,=20an?= =?UTF-8?q?d=20cleanup=20safety?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Defer port-18789 kill to createSandbox() after recreate decision so no-op reruns don't break a healthy dashboard forward - Derive provider type from selectionConfig.provider metadata instead of comparing model names to DEFAULT_OLLAMA_MODEL (fixes Jetson misclassification) - Wrap patchGatewayImageForJetson tmpDir in try/finally with fs.rmSync - Remove unreachable duplicate nemoClawConfigScript in setupOpenclaw - Extend Docker restart timeout to 30s for slower Jetson devices Co-Authored-By: Claude Opus 4.6 (1M context) --- bin/lib/onboard.js | 56 +++++++++++++++++++++++------------------ scripts/setup-jetson.sh | 4 +-- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index d642143e6..859687583 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1535,8 +1535,9 @@ async function preflight() { // a previous onboard run may have left the gateway running, which // would block port 8080 and cause a confusing "port in use" error. run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true }); - // Kill only nemoclaw-owned openclaw-gateway processes holding port 18789. - run("kill $(lsof -ti :18789 -c openclaw) 2>/dev/null || true", { ignoreError: true }); + // NOTE: Port 18789 (dashboard forward) cleanup is deferred to createSandbox() + // so that a no-op rerun (keeping existing sandbox) does not kill a healthy + // dashboard forward. sleep(2); // Required ports — gateway (8080) and dashboard (18789) @@ -1639,29 +1640,32 @@ function patchGatewayImageForJetson() { const os = require("os"); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-")); - const dockerfile = path.join(tmpDir, "Dockerfile"); - fs.writeFileSync( - dockerfile, - [ - `FROM ${image}`, - `RUN if command -v update-alternatives >/dev/null 2>&1 && \\`, - ` update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && \\`, - ` update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then \\`, - ` :; \\`, - ` elif [ -f /usr/sbin/iptables-legacy ] && [ -f /usr/sbin/ip6tables-legacy ]; then \\`, - ` ln -sf /usr/sbin/iptables-legacy /usr/sbin/iptables; \\`, - ` ln -sf /usr/sbin/ip6tables-legacy /usr/sbin/ip6tables; \\`, - ` else \\`, - ` echo "iptables-legacy not available in base image" >&2; exit 1; \\`, - ` fi`, - `LABEL io.nemoclaw.jetson-patched="true"`, - "", - ].join("\n") - ); + try { + const dockerfile = path.join(tmpDir, "Dockerfile"); + fs.writeFileSync( + dockerfile, + [ + `FROM ${image}`, + `RUN if command -v update-alternatives >/dev/null 2>&1 && \\`, + ` update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && \\`, + ` update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then \\`, + ` :; \\`, + ` elif [ -f /usr/sbin/iptables-legacy ] && [ -f /usr/sbin/ip6tables-legacy ]; then \\`, + ` ln -sf /usr/sbin/iptables-legacy /usr/sbin/iptables; \\`, + ` ln -sf /usr/sbin/ip6tables-legacy /usr/sbin/ip6tables; \\`, + ` else \\`, + ` echo "iptables-legacy not available in base image" >&2; exit 1; \\`, + ` fi`, + `LABEL io.nemoclaw.jetson-patched="true"`, + "", + ].join("\n") + ); - run(`docker build --quiet -t "${image}" "${tmpDir}"`, { ignoreError: false }); - run(`rm -rf "${tmpDir}"`, { ignoreError: true }); - console.log(" ✓ Gateway image patched for Jetson (iptables-legacy)"); + run(`docker build --quiet -t "${image}" "${tmpDir}"`, { ignoreError: false }); + console.log(" ✓ Gateway image patched for Jetson (iptables-legacy)"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } } // ── Step 2: Gateway ────────────────────────────────────────────── @@ -1849,6 +1853,10 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null, registry.removeSandbox(sandboxName); } + // Kill stale dashboard-forward processes only when we are actually + // creating or recreating — avoids breaking a healthy forward on no-op reruns. + run("kill $(lsof -ti :18789 -c openclaw) 2>/dev/null || true", { ignoreError: true }); + // Stage build context const buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-")); const stagedDockerfile = path.join(buildCtx, "Dockerfile"); diff --git a/scripts/setup-jetson.sh b/scripts/setup-jetson.sh index 352ab42cb..e79526597 100755 --- a/scripts/setup-jetson.sh +++ b/scripts/setup-jetson.sh @@ -183,11 +183,11 @@ if [ "$NEEDS_RESTART" = true ]; then else service docker restart 2>/dev/null || dockerd & fi - for i in 1 2 3 4 5 6 7 8 9 10; do + for i in $(seq 1 15); do if docker info > /dev/null 2>&1; then break fi - [ "$i" -eq 10 ] && fail "Docker didn't come back after restart. Check 'systemctl status docker'." + [ "$i" -eq 15 ] && fail "Docker didn't come back after restart. Check 'systemctl status docker'." sleep 2 done info "Docker restarted with NVIDIA runtime" From 8e239313c450837e7e6bdd2450c7987db8841abf Mon Sep 17 00:00:00 2001 From: realkim93 Date: Mon, 23 Mar 2026 10:43:43 +0900 Subject: [PATCH 04/10] fix: remove port-18789 preflight check to avoid regression on re-run The previous commit deferred the port-18789 kill to createSandbox(), but left the port availability check in preflight. This caused a hard exit when re-running onboard with an existing dashboard forward still active. Port 18789 is now fully managed inside createSandbox(). Co-Authored-By: Claude Opus 4.6 (1M context) --- bin/lib/onboard.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 859687583..6c72c380b 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1540,10 +1540,11 @@ async function preflight() { // dashboard forward. sleep(2); - // Required ports — gateway (8080) and dashboard (18789) + // Required ports — only check gateway (8080) here. Port 18789 (dashboard) + // is managed inside createSandbox() so that re-running onboard with an + // existing sandbox does not fail the preflight check. const requiredPorts = [ { port: 8080, label: "OpenShell gateway" }, - { port: 18789, label: "NemoClaw dashboard" }, ]; for (const { port, label } of requiredPorts) { const portCheck = await checkPortAvailable(port); From 9d57586c6bae664439446e7c24c2b5fb5328df84 Mon Sep 17 00:00:00 2001 From: realkim93 Date: Mon, 23 Mar 2026 20:13:46 +0900 Subject: [PATCH 05/10] fix: align test assertions with merged implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - inference-config.test: use getLocalProviderBaseUrl() for ollama-local endpoint URL (host-gateway bypass for OpenShell 0.0.10 DNS issue) - local-inference.test: convert assert → expect (vitest) for jetson tests Co-Authored-By: Claude Opus 4.6 (1M context) --- test/inference-config.test.js | 5 +++-- test/local-inference.test.js | 14 ++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/test/inference-config.test.js b/test/inference-config.test.js index ad13b3088..82ca43e76 100644 --- a/test/inference-config.test.js +++ b/test/inference-config.test.js @@ -15,6 +15,7 @@ import { getProviderSelectionConfig, parseGatewayInference, } from "../bin/lib/inference-config"; +import { getLocalProviderBaseUrl } from "../bin/lib/local-inference"; describe("inference selection config", () => { it("exposes the curated cloud model picker options", () => { @@ -28,10 +29,10 @@ describe("inference selection config", () => { ]); }); - it("maps ollama-local to the sandbox inference route and default model", () => { + it("maps ollama-local to host-gateway URL (bypasses inference.local DNS)", () => { expect(getProviderSelectionConfig("ollama-local")).toEqual({ endpointType: "custom", - endpointUrl: INFERENCE_ROUTE_URL, + endpointUrl: getLocalProviderBaseUrl("ollama-local"), ncpPartner: null, model: DEFAULT_OLLAMA_MODEL, profile: DEFAULT_ROUTE_PROFILE, diff --git a/test/local-inference.test.js b/test/local-inference.test.js index 11fc725bd..b0c0195e6 100644 --- a/test/local-inference.test.js +++ b/test/local-inference.test.js @@ -134,22 +134,20 @@ describe("local inference helpers", () => { it("returns jetson 4b model as default on jetson when available", () => { const list = `nemotron-3-nano:4b abc 2.8 GB now\nqwen3:32b def 20 GB now`; - assert.equal( + expect( getDefaultOllamaModel(() => list, FAKE_JETSON_GPU), - DEFAULT_OLLAMA_MODEL_JETSON, - ); + ).toBe(DEFAULT_OLLAMA_MODEL_JETSON); }); it("falls back to jetson 4b model when ollama list is empty on jetson", () => { - assert.deepEqual(getOllamaModelOptions(() => "", FAKE_JETSON_GPU), [DEFAULT_OLLAMA_MODEL_JETSON]); - assert.equal(getDefaultOllamaModel(() => "", FAKE_JETSON_GPU), DEFAULT_OLLAMA_MODEL_JETSON); + expect(getOllamaModelOptions(() => "", FAKE_JETSON_GPU)).toEqual([DEFAULT_OLLAMA_MODEL_JETSON]); + expect(getDefaultOllamaModel(() => "", FAKE_JETSON_GPU)).toBe(DEFAULT_OLLAMA_MODEL_JETSON); }); it("falls back to first available model on jetson when 4b is absent", () => { - assert.equal( + expect( getDefaultOllamaModel(() => "qwen3:4b abc 3 GB now", FAKE_JETSON_GPU), - "qwen3:4b", - ); + ).toBe("qwen3:4b"); }); it("builds a background warmup command for ollama models", () => { From 59291f15c2d51b40653934c65763d22d30cc508e Mon Sep 17 00:00:00 2001 From: realkim93 Date: Sun, 29 Mar 2026 23:48:55 +0900 Subject: [PATCH 06/10] fix: align tests with main after rebase - inference-config.test.js: use INFERENCE_ROUTE_URL for ollama-local (PR #1037 fixed inference.local routing, host-gateway bypass removed) - local-inference.test.js: getOllamaModelOptions no longer takes gpu param; Jetson fallback moved to getBootstrapOllamaModelOptions Co-Authored-By: Claude Opus 4.6 (1M context) --- test/inference-config.test.js | 5 ++--- test/local-inference.test.js | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/inference-config.test.js b/test/inference-config.test.js index 82ca43e76..ad13b3088 100644 --- a/test/inference-config.test.js +++ b/test/inference-config.test.js @@ -15,7 +15,6 @@ import { getProviderSelectionConfig, parseGatewayInference, } from "../bin/lib/inference-config"; -import { getLocalProviderBaseUrl } from "../bin/lib/local-inference"; describe("inference selection config", () => { it("exposes the curated cloud model picker options", () => { @@ -29,10 +28,10 @@ describe("inference selection config", () => { ]); }); - it("maps ollama-local to host-gateway URL (bypasses inference.local DNS)", () => { + it("maps ollama-local to the sandbox inference route and default model", () => { expect(getProviderSelectionConfig("ollama-local")).toEqual({ endpointType: "custom", - endpointUrl: getLocalProviderBaseUrl("ollama-local"), + endpointUrl: INFERENCE_ROUTE_URL, ncpPartner: null, model: DEFAULT_OLLAMA_MODEL, profile: DEFAULT_ROUTE_PROFILE, diff --git a/test/local-inference.test.js b/test/local-inference.test.js index b0c0195e6..7ea970dec 100644 --- a/test/local-inference.test.js +++ b/test/local-inference.test.js @@ -140,7 +140,8 @@ describe("local inference helpers", () => { }); it("falls back to jetson 4b model when ollama list is empty on jetson", () => { - expect(getOllamaModelOptions(() => "", FAKE_JETSON_GPU)).toEqual([DEFAULT_OLLAMA_MODEL_JETSON]); + // getOllamaModelOptions does not take gpu — bootstrap fallback happens in + // getBootstrapOllamaModelOptions (called by getDefaultOllamaModel). expect(getDefaultOllamaModel(() => "", FAKE_JETSON_GPU)).toBe(DEFAULT_OLLAMA_MODEL_JETSON); }); From 92e51f17cc5ebf122e75a05f834be72ada308f8c Mon Sep 17 00:00:00 2001 From: realkim93 Date: Mon, 30 Mar 2026 01:15:14 +0900 Subject: [PATCH 07/10] refactor: extract Jetson detection to reduce detectGpu complexity Extract detectJetson() and getUnifiedMemoryMB() helper functions to bring detectGpu() cyclomatic complexity under the lint threshold (20). Co-Authored-By: Claude Opus 4.6 (1M context) --- bin/lib/nim.js | 66 ++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/bin/lib/nim.js b/bin/lib/nim.js index 771f11bb7..2af41ab26 100644 --- a/bin/lib/nim.js +++ b/bin/lib/nim.js @@ -23,6 +23,38 @@ function listModels() { })); } +function getUnifiedMemoryMB() { + try { + const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); + if (memLine) return parseInt(memLine.trim(), 10) || 0; + } catch { /* ignored */ } + return 0; +} + +function detectJetson(nameOutput) { + const isJetsonGpu = nameOutput && + /orin|thor/i.test(nameOutput) && + !/geforce|rtx|quadro/i.test(nameOutput); + const dtModel = runCapture( + "cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'", + { ignoreError: true } + ); + const isJetsonDt = dtModel && /jetson/i.test(dtModel); + + if (!isJetsonGpu && !isJetsonDt) return null; + + const totalMemoryMB = getUnifiedMemoryMB(); + return { + type: "nvidia", + name: dtModel || nameOutput || "Jetson", + count: 1, + totalMemoryMB, + perGpuMB: totalMemoryMB, + nimCapable: false, + jetson: true, + }; +} + /** * Detects the GPU on the current system. Returns an object describing the GPU * type, memory, and capabilities, or null if no GPU is found. Supports @@ -61,11 +93,7 @@ function detectGpu() { // DGX Spark (GB10) — 128GB unified memory shared with Grace CPU if (nameOutput && nameOutput.includes("GB10")) { - let totalMemoryMB = 0; - try { - const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); - if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0; - } catch { /* ignored */ } + const totalMemoryMB = getUnifiedMemoryMB(); return { type: "nvidia", count: 1, @@ -79,32 +107,8 @@ function detectGpu() { // NVIDIA Jetson — unified memory, nvidia-smi reports GPU name containing // "Orin" or "Thor" but without discrete GPU identifiers like // GeForce/RTX/Quadro. Tested on Jetson Orin Nano Super (JetPack 6.x). - // Other Jetson variants may also work via /proc/device-tree/model fallback. - const isJetsonGpu = nameOutput && - /orin|thor/i.test(nameOutput) && - !/geforce|rtx|quadro/i.test(nameOutput); - const dtModel = runCapture( - "cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'", - { ignoreError: true } - ); - const isJetsonDt = dtModel && /jetson/i.test(dtModel); - - if (isJetsonGpu || isJetsonDt) { - let totalMemoryMB = 0; - try { - const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); - if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0; - } catch { /* ignored */ } - return { - type: "nvidia", - name: dtModel || nameOutput || "Jetson", - count: 1, - totalMemoryMB, - perGpuMB: totalMemoryMB, - nimCapable: false, - jetson: true, - }; - } + const jetson = detectJetson(nameOutput); + if (jetson) return jetson; } catch { /* ignored */ } // macOS: detect Apple Silicon or discrete GPU From c4d41dd7b8f07bcbcb8031f01af17fb80c73a7a8 Mon Sep 17 00:00:00 2001 From: realkim93 Date: Mon, 30 Mar 2026 01:16:46 +0900 Subject: [PATCH 08/10] chore: apply shfmt formatting to setup-jetson.sh Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/setup-jetson.sh | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/scripts/setup-jetson.sh b/scripts/setup-jetson.sh index e79526597..46416c1b9 100755 --- a/scripts/setup-jetson.sh +++ b/scripts/setup-jetson.sh @@ -32,7 +32,10 @@ NC='\033[0m' info() { echo -e "${GREEN}>>>${NC} $1"; } warn() { echo -e "${YELLOW}>>>${NC} $1"; } -fail() { echo -e "${RED}>>>${NC} $1"; exit 1; } +fail() { + echo -e "${RED}>>>${NC} $1" + exit 1 +} # ── Pre-flight checks ───────────────────────────────────────────── @@ -51,7 +54,7 @@ fi # Verify Jetson platform JETSON_MODEL="" if [ -f /proc/device-tree/model ]; then - JETSON_MODEL=$(tr -d '\0' < /proc/device-tree/model) + JETSON_MODEL=$(tr -d '\0' /dev/null || fail "Docker not found. Install docker.io: sudo apt-get install -y docker.io" -command -v python3 > /dev/null || fail "python3 not found. Install with: sudo apt-get install -y python3-minimal" +command -v docker >/dev/null || fail "Docker not found. Install docker.io: sudo apt-get install -y docker.io" +command -v python3 >/dev/null || fail "python3 not found. Install with: sudo apt-get install -y python3-minimal" # ── 1. Docker group ─────────────────────────────────────────────── @@ -96,7 +99,7 @@ DAEMON_JSON="/etc/docker/daemon.json" NEEDS_RESTART=false configure_nvidia_runtime() { - if ! command -v nvidia-container-runtime > /dev/null 2>&1; then + if ! command -v nvidia-container-runtime >/dev/null 2>&1; then warn "nvidia-container-runtime not found. GPU passthrough may not work." warn "Install with: sudo apt-get install -y nvidia-container-toolkit" return @@ -140,7 +143,7 @@ with open('$DAEMON_JSON', 'w') as f: else info "Creating Docker daemon config with NVIDIA runtime..." mkdir -p "$(dirname "$DAEMON_JSON")" - cat > "$DAEMON_JSON" <<'DAEMONJSON' + cat >"$DAEMON_JSON" <<'DAEMONJSON' { "runtimes": { "nvidia": { @@ -167,7 +170,7 @@ modprobe xt_comment 2>/dev/null || warn "Could not load xt_comment" MODULES_FILE="/etc/modules-load.d/nemoclaw-jetson.conf" if [ ! -f "$MODULES_FILE" ]; then info "Persisting kernel modules for boot..." - cat > "$MODULES_FILE" <<'MODULES' + cat >"$MODULES_FILE" <<'MODULES' # NemoClaw: required for k3s networking inside Docker br_netfilter xt_comment @@ -178,13 +181,13 @@ fi if [ "$NEEDS_RESTART" = true ]; then info "Restarting Docker daemon..." - if command -v systemctl > /dev/null 2>&1; then + if command -v systemctl >/dev/null 2>&1; then systemctl restart docker else service docker restart 2>/dev/null || dockerd & fi for i in $(seq 1 15); do - if docker info > /dev/null 2>&1; then + if docker info >/dev/null 2>&1; then break fi [ "$i" -eq 15 ] && fail "Docker didn't come back after restart. Check 'systemctl status docker'." From 79af0ff7fcd6e3f5e3e2265aa22e777391ab671a Mon Sep 17 00:00:00 2001 From: realkim93 Date: Mon, 30 Mar 2026 11:44:01 +0900 Subject: [PATCH 09/10] fix: restore preflight idempotency and fix local provider sandbox config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback from cv and CodeRabbit: 1. Remove unconditional gateway destroy in preflight() — the existing getGatewayReuseState() logic already handles stale/unnamed cleanup while preserving healthy gateways. The unconditional destroy broke safe re-run behavior and could tear down a running session. 2. Restore port 18789 (dashboard) to requiredPorts — the existing healthy-gateway skip logic already handles the re-run case correctly. Removing it entirely masked conflicts from unrelated processes. 3. Add ollama-local and vllm-local cases to getSandboxInferenceConfig() so that Jetson's default model (nemotron-3-nano:4b) gets the correct direct endpoint URL instead of falling through to the nvidia-nim default path. 4. Add tests for ollama-local and vllm-local sandbox inference config to prevent future regressions in provider mapping. --- bin/lib/onboard.js | 20 ++++++++------------ test/onboard.test.js | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 6c72c380b..5d6c72e8b 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -566,6 +566,12 @@ function getSandboxInferenceConfig(model, provider = null, preferredInferenceApi supportsStore: false, }; break; + case "ollama-local": + case "vllm-local": + providerKey = "inference"; + primaryModelRef = `inference/${model}`; + inferenceBaseUrl = getLocalProviderBaseUrl(provider); + break; case "nvidia-prod": case "nvidia-nim": default: @@ -1531,20 +1537,10 @@ async function preflight() { console.log(" ✓ Previous session cleaned up"); } - // Clean up any existing nemoclaw gateway before checking ports — - // a previous onboard run may have left the gateway running, which - // would block port 8080 and cause a confusing "port in use" error. - run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true }); - // NOTE: Port 18789 (dashboard forward) cleanup is deferred to createSandbox() - // so that a no-op rerun (keeping existing sandbox) does not kill a healthy - // dashboard forward. - sleep(2); - - // Required ports — only check gateway (8080) here. Port 18789 (dashboard) - // is managed inside createSandbox() so that re-running onboard with an - // existing sandbox does not fail the preflight check. + // Required ports — gateway (8080) and dashboard (18789) const requiredPorts = [ { port: 8080, label: "OpenShell gateway" }, + { port: 18789, label: "NemoClaw dashboard" }, ]; for (const { port, label } of requiredPorts) { const portCheck = await checkPortAvailable(port); diff --git a/test/onboard.test.js b/test/onboard.test.js index 16b7e5453..b2ae1ee0f 100644 --- a/test/onboard.test.js +++ b/test/onboard.test.js @@ -188,6 +188,32 @@ describe("onboard helpers", () => { ); }); + it("maps ollama-local to direct endpoint with correct base URL", () => { + assert.deepEqual( + getSandboxInferenceConfig("nemotron-3-nano:4b", "ollama-local"), + { + providerKey: "inference", + primaryModelRef: "inference/nemotron-3-nano:4b", + inferenceBaseUrl: "http://host.openshell.internal:11434/v1", + inferenceApi: "openai-completions", + inferenceCompat: null, + } + ); + }); + + it("maps vllm-local to direct endpoint with correct base URL", () => { + assert.deepEqual( + getSandboxInferenceConfig("meta-llama/llama-3.1-8b", "vllm-local"), + { + providerKey: "inference", + primaryModelRef: "inference/meta-llama/llama-3.1-8b", + inferenceBaseUrl: "http://host.openshell.internal:8000/v1", + inferenceApi: "openai-completions", + inferenceCompat: null, + } + ); + }); + it("pins the gateway image to the installed OpenShell release version", () => { expect(getInstalledOpenshellVersion("openshell 0.0.12")).toBe("0.0.12"); expect(getInstalledOpenshellVersion("openshell 0.0.13-dev.8+gbbcaed2ea")).toBe("0.0.13"); From fc8c790d6d9a8ef5bbd659a1fe7662a69ea39353 Mon Sep 17 00:00:00 2001 From: realkim93 Date: Wed, 1 Apr 2026 19:36:15 +0900 Subject: [PATCH 10/10] fix: correct setup-jetson placement and apply Prettier formatting - Move setup-jetson case into correct switch position (after setup-spark) - Apply Prettier formatting to all modified files - All 747 tests pass --- bin/lib/nim.js | 10 +++---- bin/nemoclaw.js | 11 ++++---- test/local-inference.test.js | 1 - test/nim.test.js | 55 +++++++++++++++++++----------------- test/onboard.test.js | 34 +++++++++------------- 5 files changed, 53 insertions(+), 58 deletions(-) diff --git a/bin/lib/nim.js b/bin/lib/nim.js index 3c4d78b55..2790e3ff4 100644 --- a/bin/lib/nim.js +++ b/bin/lib/nim.js @@ -75,7 +75,8 @@ function detectGpu() { const count = unifiedGpuNames.length; const perGpuMB = count > 0 ? Math.floor(totalMemoryMB / count) : totalMemoryMB; const isSpark = unifiedGpuNames.some((name) => /GB10/i.test(name)); - const isJetson = unifiedGpuNames.some((name) => /orin|thor/i.test(name)) && + const isJetson = + unifiedGpuNames.some((name) => /orin|thor/i.test(name)) && !unifiedGpuNames.some((name) => /geforce|rtx|quadro/i.test(name)); return { type: "nvidia", @@ -95,10 +96,9 @@ function detectGpu() { // Jetson fallback: /proc/device-tree/model (for cases where nvidia-smi is absent) try { - const dtModel = runCapture( - "cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'", - { ignoreError: true }, - ); + const dtModel = runCapture("cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'", { + ignoreError: true, + }); if (dtModel && /jetson/i.test(dtModel)) { let totalMemoryMB = 0; try { diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index 34c711577..92b7c30b6 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -1194,6 +1194,11 @@ const [cmd, ...args] = process.argv.slice(2); case "setup-spark": await setupSpark(); break; + case "setup-jetson": { + const { setupJetson } = require("./lib/local-inference"); + await setupJetson(); + break; + } case "deploy": await deploy(args[0]); break; @@ -1285,9 +1290,3 @@ const [cmd, ...args] = process.argv.slice(2); console.error(` Run 'nemoclaw help' for usage.`); process.exit(1); })(); - case "setup-jetson": { - const { setupJetson } = require("./lib/local-inference"); - await setupJetson(); - break; - } - diff --git a/test/local-inference.test.js b/test/local-inference.test.js index d8f1c3bf8..577d1e14f 100644 --- a/test/local-inference.test.js +++ b/test/local-inference.test.js @@ -252,5 +252,4 @@ describe("local inference helpers", () => { expect(getBootstrapOllamaModelOptions(FAKE_JETSON_GPU)).toEqual([DEFAULT_OLLAMA_MODEL_JETSON]); expect(getDefaultOllamaModel(() => "", FAKE_JETSON_GPU)).toBe(DEFAULT_OLLAMA_MODEL_JETSON); }); - }); diff --git a/test/nim.test.js b/test/nim.test.js index d7dbf440a..f4a4be6b4 100644 --- a/test/nim.test.js +++ b/test/nim.test.js @@ -278,33 +278,36 @@ describe("nim", () => { }); }); - it("detects Jetson Orin and sets jetson flag", () => { - const runCapture = vi.fn((cmd) => { - if (cmd.includes("memory.total")) return ""; - if (cmd.includes("query-gpu=name")) return "Orin"; - if (cmd.includes("free -m")) return "7627"; - return ""; - }); - const { nimModule, restore } = loadNimWithMockedRunner(runCapture); - try { - const gpu = nimModule.detectGpu(); - expect(gpu).toMatchObject({ type: "nvidia", jetson: true, unifiedMemory: true }); - } finally { restore(); } + it("detects Jetson Orin and sets jetson flag", () => { + const runCapture = vi.fn((cmd) => { + if (cmd.includes("memory.total")) return ""; + if (cmd.includes("query-gpu=name")) return "Orin"; + if (cmd.includes("free -m")) return "7627"; + return ""; }); + const { nimModule, restore } = loadNimWithMockedRunner(runCapture); + try { + const gpu = nimModule.detectGpu(); + expect(gpu).toMatchObject({ type: "nvidia", jetson: true, unifiedMemory: true }); + } finally { + restore(); + } + }); - it("detects Jetson via /proc/device-tree/model fallback", () => { - const runCapture = vi.fn((cmd) => { - if (cmd.includes("memory.total")) return ""; - if (cmd.includes("query-gpu=name")) return ""; - if (cmd.includes("device-tree/model")) return "NVIDIA Jetson Orin Nano Super Developer Kit"; - if (cmd.includes("free -m")) return "7627"; - return ""; - }); - const { nimModule, restore } = loadNimWithMockedRunner(runCapture); - try { - const gpu = nimModule.detectGpu(); - expect(gpu).toMatchObject({ type: "nvidia", jetson: true }); - } finally { restore(); } + it("detects Jetson via /proc/device-tree/model fallback", () => { + const runCapture = vi.fn((cmd) => { + if (cmd.includes("memory.total")) return ""; + if (cmd.includes("query-gpu=name")) return ""; + if (cmd.includes("device-tree/model")) return "NVIDIA Jetson Orin Nano Super Developer Kit"; + if (cmd.includes("free -m")) return "7627"; + return ""; }); - + const { nimModule, restore } = loadNimWithMockedRunner(runCapture); + try { + const gpu = nimModule.detectGpu(); + expect(gpu).toMatchObject({ type: "nvidia", jetson: true }); + } finally { + restore(); + } + }); }); diff --git a/test/onboard.test.js b/test/onboard.test.js index 3a8252265..541825b08 100644 --- a/test/onboard.test.js +++ b/test/onboard.test.js @@ -242,29 +242,23 @@ describe("onboard helpers", () => { }); it("maps ollama-local to direct endpoint with correct base URL", () => { - assert.deepEqual( - getSandboxInferenceConfig("nemotron-3-nano:4b", "ollama-local"), - { - providerKey: "inference", - primaryModelRef: "inference/nemotron-3-nano:4b", - inferenceBaseUrl: "http://host.openshell.internal:11434/v1", - inferenceApi: "openai-completions", - inferenceCompat: null, - } - ); + assert.deepEqual(getSandboxInferenceConfig("nemotron-3-nano:4b", "ollama-local"), { + providerKey: "inference", + primaryModelRef: "inference/nemotron-3-nano:4b", + inferenceBaseUrl: "http://host.openshell.internal:11434/v1", + inferenceApi: "openai-completions", + inferenceCompat: null, + }); }); it("maps vllm-local to direct endpoint with correct base URL", () => { - assert.deepEqual( - getSandboxInferenceConfig("meta-llama/llama-3.1-8b", "vllm-local"), - { - providerKey: "inference", - primaryModelRef: "inference/meta-llama/llama-3.1-8b", - inferenceBaseUrl: "http://host.openshell.internal:8000/v1", - inferenceApi: "openai-completions", - inferenceCompat: null, - } - ); + assert.deepEqual(getSandboxInferenceConfig("meta-llama/llama-3.1-8b", "vllm-local"), { + providerKey: "inference", + primaryModelRef: "inference/meta-llama/llama-3.1-8b", + inferenceBaseUrl: "http://host.openshell.internal:8000/v1", + inferenceApi: "openai-completions", + inferenceCompat: null, + }); }); it("pins the gateway image to the installed OpenShell release version", () => {