diff --git a/bin/lib/local-inference.js b/bin/lib/local-inference.js index 923363389..1cd8b021c 100644 --- a/bin/lib/local-inference.js +++ b/bin/lib/local-inference.js @@ -8,6 +8,7 @@ const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1"; const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b"; const SMALL_OLLAMA_MODEL = "qwen2.5:7b"; const LARGE_OLLAMA_MIN_MEMORY_MB = 32768; +const DEFAULT_OLLAMA_MODEL_JETSON = "nemotron-3-nano:4b"; function getLocalProviderBaseUrl(provider) { switch (provider) { @@ -144,6 +145,11 @@ function getOllamaModelOptions(runCapture) { } function getBootstrapOllamaModelOptions(gpu) { + // Jetson: fall back to the 4B model that fits in 8GB unified memory + // instead of the 30B default which would OOM. + if (gpu && gpu.jetson) { + return [DEFAULT_OLLAMA_MODEL_JETSON]; + } const options = [SMALL_OLLAMA_MODEL]; if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) { options.push(DEFAULT_OLLAMA_MODEL); @@ -157,6 +163,10 @@ function getDefaultOllamaModel(runCapture, gpu = null) { const bootstrap = getBootstrapOllamaModelOptions(gpu); return bootstrap[0]; } + if (gpu && gpu.jetson) { + if (models.includes(DEFAULT_OLLAMA_MODEL_JETSON)) return DEFAULT_OLLAMA_MODEL_JETSON; + return models[0]; + } return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0]; } @@ -209,6 +219,7 @@ function validateOllamaModel(model, runCapture) { module.exports = { CONTAINER_REACHABILITY_IMAGE, DEFAULT_OLLAMA_MODEL, + DEFAULT_OLLAMA_MODEL_JETSON, HOST_GATEWAY_URL, LARGE_OLLAMA_MIN_MEMORY_MB, SMALL_OLLAMA_MODEL, diff --git a/bin/lib/nim.js b/bin/lib/nim.js index 25c050097..2790e3ff4 100644 --- a/bin/lib/nim.js +++ b/bin/lib/nim.js @@ -75,6 +75,9 @@ function detectGpu() { const count = unifiedGpuNames.length; const perGpuMB = count > 0 ? Math.floor(totalMemoryMB / count) : totalMemoryMB; const isSpark = unifiedGpuNames.some((name) => /GB10/i.test(name)); + const isJetson = + unifiedGpuNames.some((name) => /orin|thor/i.test(name)) && + !unifiedGpuNames.some((name) => /geforce|rtx|quadro/i.test(name)); return { type: "nvidia", name: unifiedGpuNames[0], @@ -84,6 +87,35 @@ function detectGpu() { nimCapable: canRunNimWithMemory(totalMemoryMB), unifiedMemory: true, spark: isSpark, + jetson: isJetson, + }; + } + } catch { + /* ignored */ + } + + // Jetson fallback: /proc/device-tree/model (for cases where nvidia-smi is absent) + try { + const dtModel = runCapture("cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'", { + ignoreError: true, + }); + if (dtModel && /jetson/i.test(dtModel)) { + let totalMemoryMB = 0; + try { + const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true }); + if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0; + } catch { + /* ignored */ + } + return { + type: "nvidia", + name: dtModel.trim(), + count: 1, + totalMemoryMB, + perGpuMB: totalMemoryMB, + nimCapable: false, + unifiedMemory: true, + jetson: true, }; } } catch { diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index e6e2c0925..3f4c4474f 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -1002,6 +1002,12 @@ function getSandboxInferenceConfig(model, provider = null, preferredInferenceApi supportsStore: false, }; break; + case "ollama-local": + case "vllm-local": + providerKey = "inference"; + primaryModelRef = `inference/${model}`; + inferenceBaseUrl = getLocalProviderBaseUrl(provider); + break; case "nvidia-prod": case "nvidia-nim": default: @@ -2057,7 +2063,10 @@ async function preflight() { // GPU const gpu = nim.detectGpu(); - if (gpu && gpu.type === "nvidia") { + if (gpu && gpu.type === "nvidia" && gpu.jetson) { + console.log(` ✓ NVIDIA Jetson detected: ${gpu.name}, ${gpu.totalMemoryMB} MB unified memory`); + console.log(" ⓘ NIM containers not supported on Jetson — will use Ollama or cloud inference"); + } else if (gpu && gpu.type === "nvidia") { console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`); if (!gpu.nimCapable) { console.log(" ⓘ GPU VRAM too small for local NIM — will use cloud inference"); @@ -2117,9 +2126,81 @@ async function preflight() { return gpu; } +// ── Jetson gateway image patch ─────────────────────────────────── +// +// JetPack kernels (Tegra) ship without nft_chain_filter and related +// nf_tables modules. The OpenShell gateway image embeds k3s, whose +// network policy controller calls iptables in nf_tables mode by default. +// Without kernel support the controller panics on startup. +// +// This function rebuilds the gateway image locally, switching the +// default iptables alternative to iptables-legacy so all rule +// manipulation uses the classic xtables backend that Tegra kernels +// fully support. + +/** Extracts the semver tag from the installed openshell CLI version. */ +function getGatewayImageTag() { + const openshellVersion = runCapture("openshell --version 2>/dev/null", { ignoreError: true }) || ""; + const match = openshellVersion.match(/(\d+\.\d+\.\d+)/); + return match ? match[1] : "latest"; +} + +/** + * Rebuilds the OpenShell gateway container image with iptables-legacy as the + * default backend. Idempotent — skips rebuild if the image is already patched + * (checked via Docker label). Required on Jetson because the Tegra kernel + * lacks nft_chain_filter modules that k3s's network policy controller needs. + */ +function patchGatewayImageForJetson() { + const tag = getGatewayImageTag(); + const image = `ghcr.io/nvidia/openshell/cluster:${tag}`; + + // Check if already patched (look for our label) + const inspectOut = (runCapture( + `docker inspect --format='{{index .Config.Labels "io.nemoclaw.jetson-patched"}}' "${image}" 2>/dev/null`, + { ignoreError: true } + ) || "").trim(); + if (inspectOut === "true") { + console.log(" ✓ Gateway image already patched for Jetson"); + return; + } + + console.log(" Patching gateway image for Jetson (iptables-legacy)..."); + console.log(" (this may take a moment on first run if the base image needs to be pulled)"); + + const os = require("os"); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-")); + try { + const dockerfile = path.join(tmpDir, "Dockerfile"); + fs.writeFileSync( + dockerfile, + [ + `FROM ${image}`, + `RUN if command -v update-alternatives >/dev/null 2>&1 && \\`, + ` update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && \\`, + ` update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then \\`, + ` :; \\`, + ` elif [ -f /usr/sbin/iptables-legacy ] && [ -f /usr/sbin/ip6tables-legacy ]; then \\`, + ` ln -sf /usr/sbin/iptables-legacy /usr/sbin/iptables; \\`, + ` ln -sf /usr/sbin/ip6tables-legacy /usr/sbin/ip6tables; \\`, + ` else \\`, + ` echo "iptables-legacy not available in base image" >&2; exit 1; \\`, + ` fi`, + `LABEL io.nemoclaw.jetson-patched="true"`, + "", + ].join("\n") + ); + + run(`docker build --quiet -t "${image}" "${tmpDir}"`, { ignoreError: false }); + console.log(" ✓ Gateway image patched for Jetson (iptables-legacy)"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } +} + // ── Step 2: Gateway ────────────────────────────────────────────── -async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { +async function startGatewayWithOptions(gpu, { exitOnFailure = true } = {}) { step(2, 7, "Starting OpenShell gateway"); const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); @@ -2134,6 +2215,15 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { return; } + // Jetson (Tegra kernel): The k3s container image ships iptables v1.8.10 in + // nf_tables mode, but JetPack kernels lack the nft_chain_filter module, + // causing the k3s network policy controller to panic on startup. + // Workaround: rebuild the gateway image locally with iptables-legacy as the + // default so iptables commands use the legacy (xtables) backend instead. + if (gpu && gpu.jetson) { + patchGatewayImageForJetson(); + } + if (hasStaleGateway(gwInfo)) { runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true }); } @@ -2346,6 +2436,10 @@ async function createSandbox( registry.removeSandbox(sandboxName); } + // Kill stale dashboard-forward processes only when we are actually + // creating or recreating — avoids breaking a healthy forward on no-op reruns. + run("kill $(lsof -ti :18789 -c openclaw) 2>/dev/null || true", { ignoreError: true }); + // Stage build context const buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-")); const stagedDockerfile = path.join(buildCtx, "Dockerfile"); diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index e45c80cc9..92b7c30b6 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -54,6 +54,7 @@ const GLOBAL_COMMANDS = new Set([ "deploy", "setup", "setup-spark", + "setup-jetson", "start", "stop", "status", @@ -1193,6 +1194,11 @@ const [cmd, ...args] = process.argv.slice(2); case "setup-spark": await setupSpark(); break; + case "setup-jetson": { + const { setupJetson } = require("./lib/local-inference"); + await setupJetson(); + break; + } case "deploy": await deploy(args[0]); break; diff --git a/scripts/setup-jetson.sh b/scripts/setup-jetson.sh new file mode 100755 index 000000000..46416c1b9 --- /dev/null +++ b/scripts/setup-jetson.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# NemoClaw setup for NVIDIA Jetson devices (Orin Nano, Orin NX, AGX Orin, Thor). +# +# Jetson devices use unified memory and a Tegra kernel that lacks nf_tables +# chain modules (nft_chain_filter, nft_chain_nat, etc.). The OpenShell gateway +# runs k3s inside a Docker container, and k3s's network policy controller +# uses iptables in nf_tables mode by default, which panics on Tegra kernels. +# +# This script prepares the Jetson host so that `nemoclaw onboard` succeeds: +# 1. Verifies Jetson platform +# 2. Ensures NVIDIA Container Runtime is configured for Docker +# 3. Loads required kernel modules (br_netfilter, xt_comment) +# 4. Configures Docker daemon with default-runtime=nvidia +# +# The iptables-legacy patch for the gateway container image is handled +# automatically by `nemoclaw onboard` when it detects a Jetson GPU. +# +# Usage: +# sudo nemoclaw setup-jetson +# # or directly: +# sudo bash scripts/setup-jetson.sh + +set -euo pipefail + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +info() { echo -e "${GREEN}>>>${NC} $1"; } +warn() { echo -e "${YELLOW}>>>${NC} $1"; } +fail() { + echo -e "${RED}>>>${NC} $1" + exit 1 +} + +# ── Pre-flight checks ───────────────────────────────────────────── + +if [ "$(uname -s)" != "Linux" ]; then + fail "This script is for NVIDIA Jetson (Linux). Use 'nemoclaw setup' for macOS." +fi + +if [ "$(uname -m)" != "aarch64" ]; then + fail "Jetson devices are aarch64. This system is $(uname -m)." +fi + +if [ "$(id -u)" -ne 0 ]; then + fail "Must run as root: sudo nemoclaw setup-jetson" +fi + +# Verify Jetson platform +JETSON_MODEL="" +if [ -f /proc/device-tree/model ]; then + JETSON_MODEL=$(tr -d '\0' /dev/null || echo "") + if ! echo "$GPU_NAME" | grep -qiE "orin|thor"; then + fail "This does not appear to be a Jetson device. Use 'nemoclaw onboard' directly." + fi + # Exclude discrete GPUs that happen to contain matching strings + if echo "$GPU_NAME" | grep -qiE "geforce|rtx|quadro"; then + fail "Discrete GPU detected ('$GPU_NAME'). This script is for Jetson only." + fi + JETSON_MODEL="${GPU_NAME}" +fi + +info "Detected Jetson platform: ${JETSON_MODEL}" + +# Detect the real user (not root) for docker group add +REAL_USER="${SUDO_USER:-$(logname 2>/dev/null || echo "")}" + +command -v docker >/dev/null || fail "Docker not found. Install docker.io: sudo apt-get install -y docker.io" +command -v python3 >/dev/null || fail "python3 not found. Install with: sudo apt-get install -y python3-minimal" + +# ── 1. Docker group ─────────────────────────────────────────────── + +if [ -n "$REAL_USER" ]; then + if id -nG "$REAL_USER" | grep -qw docker; then + info "User '$REAL_USER' already in docker group" + else + info "Adding '$REAL_USER' to docker group..." + usermod -aG docker "$REAL_USER" + info "Added. Group will take effect on next login (or use 'newgrp docker')." + fi +fi + +# ── 2. NVIDIA Container Runtime ────────────────────────────────── +# +# Jetson JetPack pre-installs nvidia-container-runtime but Docker may +# not be configured to use it as the default runtime. + +DAEMON_JSON="/etc/docker/daemon.json" +NEEDS_RESTART=false + +configure_nvidia_runtime() { + if ! command -v nvidia-container-runtime >/dev/null 2>&1; then + warn "nvidia-container-runtime not found. GPU passthrough may not work." + warn "Install with: sudo apt-get install -y nvidia-container-toolkit" + return + fi + + if [ -f "$DAEMON_JSON" ]; then + # Check if nvidia runtime is already configured + if python3 -c " +import json, sys +try: + d = json.load(open('$DAEMON_JSON')) + runtimes = d.get('runtimes', {}) if isinstance(d, dict) else {} + if 'nvidia' in runtimes and d.get('default-runtime') == 'nvidia': + sys.exit(0) + sys.exit(1) +except (IOError, ValueError, KeyError, AttributeError): + sys.exit(1) +" 2>/dev/null; then + info "NVIDIA runtime already configured in Docker daemon" + else + info "Adding NVIDIA runtime to Docker daemon config..." + python3 -c " +import json +try: + with open('$DAEMON_JSON') as f: + d = json.load(f) +except (IOError, ValueError, KeyError): + d = {} +if not isinstance(d, dict): + d = {} +d.setdefault('runtimes', {})['nvidia'] = { + 'path': 'nvidia-container-runtime', + 'runtimeArgs': [] +} +d['default-runtime'] = 'nvidia' +with open('$DAEMON_JSON', 'w') as f: + json.dump(d, f, indent=2) +" + NEEDS_RESTART=true + fi + else + info "Creating Docker daemon config with NVIDIA runtime..." + mkdir -p "$(dirname "$DAEMON_JSON")" + cat >"$DAEMON_JSON" <<'DAEMONJSON' +{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + }, + "default-runtime": "nvidia" +} +DAEMONJSON + NEEDS_RESTART=true + fi +} + +configure_nvidia_runtime + +# ── 3. Kernel modules ──────────────────────────────────────────── + +info "Loading required kernel modules..." +modprobe br_netfilter 2>/dev/null || warn "Could not load br_netfilter" +modprobe xt_comment 2>/dev/null || warn "Could not load xt_comment" + +# Persist across reboots +MODULES_FILE="/etc/modules-load.d/nemoclaw-jetson.conf" +if [ ! -f "$MODULES_FILE" ]; then + info "Persisting kernel modules for boot..." + cat >"$MODULES_FILE" <<'MODULES' +# NemoClaw: required for k3s networking inside Docker +br_netfilter +xt_comment +MODULES +fi + +# ── 4. Restart Docker if needed ────────────────────────────────── + +if [ "$NEEDS_RESTART" = true ]; then + info "Restarting Docker daemon..." + if command -v systemctl >/dev/null 2>&1; then + systemctl restart docker + else + service docker restart 2>/dev/null || dockerd & + fi + for i in $(seq 1 15); do + if docker info >/dev/null 2>&1; then + break + fi + [ "$i" -eq 15 ] && fail "Docker didn't come back after restart. Check 'systemctl status docker'." + sleep 2 + done + info "Docker restarted with NVIDIA runtime" +fi + +# ── Done ───────────────────────────────────────────────────────── + +echo "" +info "Jetson setup complete." +info "" +info "Device: ${JETSON_MODEL}" +info "" +info "Next step: run 'nemoclaw onboard' to set up your sandbox." +info " nemoclaw onboard" +info "" +info "The onboard wizard will automatically patch the gateway image" +info "for Jetson iptables compatibility." diff --git a/test/local-inference.test.js b/test/local-inference.test.js index e028aa736..577d1e14f 100644 --- a/test/local-inference.test.js +++ b/test/local-inference.test.js @@ -7,6 +7,7 @@ import { CONTAINER_REACHABILITY_IMAGE, DEFAULT_OLLAMA_MODEL, LARGE_OLLAMA_MIN_MEMORY_MB, + DEFAULT_OLLAMA_MODEL_JETSON, getDefaultOllamaModel, getBootstrapOllamaModelOptions, getLocalProviderBaseUrl, @@ -22,6 +23,8 @@ import { validateLocalProvider, } from "../bin/lib/local-inference"; +const FAKE_JETSON_GPU = { type: "nvidia", totalMemoryMB: 7627, jetson: true, unifiedMemory: true }; + describe("local inference helpers", () => { it("returns the expected base URL for vllm-local", () => { expect(getLocalProviderBaseUrl("vllm-local")).toBe("http://host.openshell.internal:8000/v1"); @@ -239,4 +242,14 @@ describe("local inference helpers", () => { it("treats non-JSON probe output as success once the model responds", () => { expect(validateOllamaModel("nemotron-3-nano:30b", () => "ok")).toEqual({ ok: true }); }); + + it("returns jetson 4b model as default on jetson when available", () => { + const list = "nemotron-3-nano:4b abc 2.8 GB now\nqwen3:32b def 20 GB now"; + expect(getDefaultOllamaModel(() => list, FAKE_JETSON_GPU)).toBe(DEFAULT_OLLAMA_MODEL_JETSON); + }); + + it("falls back to jetson 4b model when ollama list is empty on jetson", () => { + expect(getBootstrapOllamaModelOptions(FAKE_JETSON_GPU)).toEqual([DEFAULT_OLLAMA_MODEL_JETSON]); + expect(getDefaultOllamaModel(() => "", FAKE_JETSON_GPU)).toBe(DEFAULT_OLLAMA_MODEL_JETSON); + }); }); diff --git a/test/nim.test.js b/test/nim.test.js index 44f613a55..f4a4be6b4 100644 --- a/test/nim.test.js +++ b/test/nim.test.js @@ -277,4 +277,37 @@ describe("nim", () => { } }); }); + + it("detects Jetson Orin and sets jetson flag", () => { + const runCapture = vi.fn((cmd) => { + if (cmd.includes("memory.total")) return ""; + if (cmd.includes("query-gpu=name")) return "Orin"; + if (cmd.includes("free -m")) return "7627"; + return ""; + }); + const { nimModule, restore } = loadNimWithMockedRunner(runCapture); + try { + const gpu = nimModule.detectGpu(); + expect(gpu).toMatchObject({ type: "nvidia", jetson: true, unifiedMemory: true }); + } finally { + restore(); + } + }); + + it("detects Jetson via /proc/device-tree/model fallback", () => { + const runCapture = vi.fn((cmd) => { + if (cmd.includes("memory.total")) return ""; + if (cmd.includes("query-gpu=name")) return ""; + if (cmd.includes("device-tree/model")) return "NVIDIA Jetson Orin Nano Super Developer Kit"; + if (cmd.includes("free -m")) return "7627"; + return ""; + }); + const { nimModule, restore } = loadNimWithMockedRunner(runCapture); + try { + const gpu = nimModule.detectGpu(); + expect(gpu).toMatchObject({ type: "nvidia", jetson: true }); + } finally { + restore(); + } + }); }); diff --git a/test/onboard.test.js b/test/onboard.test.js index 267696119..3c45a502c 100644 --- a/test/onboard.test.js +++ b/test/onboard.test.js @@ -241,6 +241,26 @@ describe("onboard helpers", () => { }); }); + it("maps ollama-local to direct endpoint with correct base URL", () => { + assert.deepEqual(getSandboxInferenceConfig("nemotron-3-nano:4b", "ollama-local"), { + providerKey: "inference", + primaryModelRef: "inference/nemotron-3-nano:4b", + inferenceBaseUrl: "http://host.openshell.internal:11434/v1", + inferenceApi: "openai-completions", + inferenceCompat: null, + }); + }); + + it("maps vllm-local to direct endpoint with correct base URL", () => { + assert.deepEqual(getSandboxInferenceConfig("meta-llama/llama-3.1-8b", "vllm-local"), { + providerKey: "inference", + primaryModelRef: "inference/meta-llama/llama-3.1-8b", + inferenceBaseUrl: "http://host.openshell.internal:8000/v1", + inferenceApi: "openai-completions", + inferenceCompat: null, + }); + }); + it("pins the gateway image to the installed OpenShell release version", () => { expect(getInstalledOpenshellVersion("openshell 0.0.12")).toBe("0.0.12"); expect(getInstalledOpenshellVersion("openshell 0.0.13-dev.8+gbbcaed2ea")).toBe("0.0.13");