Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions bin/lib/local-inference.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1";
const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b";
const SMALL_OLLAMA_MODEL = "qwen2.5:7b";
const LARGE_OLLAMA_MIN_MEMORY_MB = 32768;
const DEFAULT_OLLAMA_MODEL_JETSON = "nemotron-3-nano:4b";

function getLocalProviderBaseUrl(provider) {
switch (provider) {
Expand Down Expand Up @@ -144,6 +145,11 @@ function getOllamaModelOptions(runCapture) {
}

function getBootstrapOllamaModelOptions(gpu) {
// Jetson: fall back to the 4B model that fits in 8GB unified memory
// instead of the 30B default which would OOM.
if (gpu && gpu.jetson) {
return [DEFAULT_OLLAMA_MODEL_JETSON];
}
const options = [SMALL_OLLAMA_MODEL];
if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) {
options.push(DEFAULT_OLLAMA_MODEL);
Expand All @@ -157,6 +163,10 @@ function getDefaultOllamaModel(runCapture, gpu = null) {
const bootstrap = getBootstrapOllamaModelOptions(gpu);
return bootstrap[0];
}
if (gpu && gpu.jetson) {
if (models.includes(DEFAULT_OLLAMA_MODEL_JETSON)) return DEFAULT_OLLAMA_MODEL_JETSON;
return models[0];
}
return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0];
}

Expand Down Expand Up @@ -209,6 +219,7 @@ function validateOllamaModel(model, runCapture) {
module.exports = {
CONTAINER_REACHABILITY_IMAGE,
DEFAULT_OLLAMA_MODEL,
DEFAULT_OLLAMA_MODEL_JETSON,
HOST_GATEWAY_URL,
LARGE_OLLAMA_MIN_MEMORY_MB,
SMALL_OLLAMA_MODEL,
Expand Down
32 changes: 32 additions & 0 deletions bin/lib/nim.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
return nimImages.models.some((m) => m.minGpuMemoryMB <= totalMemoryMB);
}

function detectGpu() {

Check failure on line 31 in bin/lib/nim.js

View workflow job for this annotation

GitHub Actions / checks

Function 'detectGpu' has a complexity of 27. Maximum allowed is 20
// Try NVIDIA first — query VRAM
try {
const output = runCapture("nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits", {
Expand Down Expand Up @@ -75,6 +75,8 @@
const count = unifiedGpuNames.length;
const perGpuMB = count > 0 ? Math.floor(totalMemoryMB / count) : totalMemoryMB;
const isSpark = unifiedGpuNames.some((name) => /GB10/i.test(name));
const isJetson = unifiedGpuNames.some((name) => /orin|thor/i.test(name)) &&
!unifiedGpuNames.some((name) => /geforce|rtx|quadro/i.test(name));
return {
type: "nvidia",
name: unifiedGpuNames[0],
Expand All @@ -84,6 +86,36 @@
nimCapable: canRunNimWithMemory(totalMemoryMB),
unifiedMemory: true,
spark: isSpark,
jetson: isJetson,
};
}
} catch {
/* ignored */
}

// Jetson fallback: /proc/device-tree/model (for cases where nvidia-smi is absent)
try {
const dtModel = runCapture(
"cat /proc/device-tree/model 2>/dev/null | tr -d '\\0'",
{ ignoreError: true },
);
if (dtModel && /jetson/i.test(dtModel)) {
let totalMemoryMB = 0;
try {
const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true });
if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0;
} catch {
/* ignored */
}
return {
type: "nvidia",
name: dtModel.trim(),
count: 1,
totalMemoryMB,
perGpuMB: totalMemoryMB,
nimCapable: false,
unifiedMemory: true,
jetson: true,
};
}
} catch {
Expand Down
98 changes: 96 additions & 2 deletions bin/lib/onboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,12 @@ function getSandboxInferenceConfig(model, provider = null, preferredInferenceApi
supportsStore: false,
};
break;
case "ollama-local":
case "vllm-local":
providerKey = "inference";
primaryModelRef = `inference/${model}`;
inferenceBaseUrl = getLocalProviderBaseUrl(provider);
break;
case "nvidia-prod":
case "nvidia-nim":
default:
Expand Down Expand Up @@ -2051,7 +2057,10 @@ async function preflight() {

// GPU
const gpu = nim.detectGpu();
if (gpu && gpu.type === "nvidia") {
if (gpu && gpu.type === "nvidia" && gpu.jetson) {
console.log(` ✓ NVIDIA Jetson detected: ${gpu.name}, ${gpu.totalMemoryMB} MB unified memory`);
console.log(" ⓘ NIM containers not supported on Jetson — will use Ollama or cloud inference");
} else if (gpu && gpu.type === "nvidia") {
console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`);
if (!gpu.nimCapable) {
console.log(" ⓘ GPU VRAM too small for local NIM — will use cloud inference");
Expand Down Expand Up @@ -2111,9 +2120,81 @@ async function preflight() {
return gpu;
}

// ── Jetson gateway image patch ───────────────────────────────────
//
// JetPack kernels (Tegra) ship without nft_chain_filter and related
// nf_tables modules. The OpenShell gateway image embeds k3s, whose
// network policy controller calls iptables in nf_tables mode by default.
// Without kernel support the controller panics on startup.
//
// This function rebuilds the gateway image locally, switching the
// default iptables alternative to iptables-legacy so all rule
// manipulation uses the classic xtables backend that Tegra kernels
// fully support.

/** Extracts the semver tag from the installed openshell CLI version. */
function getGatewayImageTag() {
const openshellVersion = runCapture("openshell --version 2>/dev/null", { ignoreError: true }) || "";
const match = openshellVersion.match(/(\d+\.\d+\.\d+)/);
return match ? match[1] : "latest";
}

/**
* Rebuilds the OpenShell gateway container image with iptables-legacy as the
* default backend. Idempotent — skips rebuild if the image is already patched
* (checked via Docker label). Required on Jetson because the Tegra kernel
* lacks nft_chain_filter modules that k3s's network policy controller needs.
*/
function patchGatewayImageForJetson() {
const tag = getGatewayImageTag();
const image = `ghcr.io/nvidia/openshell/cluster:${tag}`;

// Check if already patched (look for our label)
const inspectOut = (runCapture(
`docker inspect --format='{{index .Config.Labels "io.nemoclaw.jetson-patched"}}' "${image}" 2>/dev/null`,
{ ignoreError: true }
) || "").trim();
if (inspectOut === "true") {
console.log(" ✓ Gateway image already patched for Jetson");
return;
}

console.log(" Patching gateway image for Jetson (iptables-legacy)...");
console.log(" (this may take a moment on first run if the base image needs to be pulled)");

const os = require("os");
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-jetson-"));
try {
const dockerfile = path.join(tmpDir, "Dockerfile");
fs.writeFileSync(
dockerfile,
[
`FROM ${image}`,
`RUN if command -v update-alternatives >/dev/null 2>&1 && \\`,
` update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && \\`,
` update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then \\`,
` :; \\`,
` elif [ -f /usr/sbin/iptables-legacy ] && [ -f /usr/sbin/ip6tables-legacy ]; then \\`,
` ln -sf /usr/sbin/iptables-legacy /usr/sbin/iptables; \\`,
` ln -sf /usr/sbin/ip6tables-legacy /usr/sbin/ip6tables; \\`,
` else \\`,
` echo "iptables-legacy not available in base image" >&2; exit 1; \\`,
` fi`,
`LABEL io.nemoclaw.jetson-patched="true"`,
"",
].join("\n")
);

run(`docker build --quiet -t "${image}" "${tmpDir}"`, { ignoreError: false });
console.log(" ✓ Gateway image patched for Jetson (iptables-legacy)");
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
}

// ── Step 2: Gateway ──────────────────────────────────────────────

async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
async function startGatewayWithOptions(gpu, { exitOnFailure = true } = {}) {
step(2, 7, "Starting OpenShell gateway");

const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
Expand All @@ -2128,6 +2209,15 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
return;
}

// Jetson (Tegra kernel): The k3s container image ships iptables v1.8.10 in
// nf_tables mode, but JetPack kernels lack the nft_chain_filter module,
// causing the k3s network policy controller to panic on startup.
// Workaround: rebuild the gateway image locally with iptables-legacy as the
// default so iptables commands use the legacy (xtables) backend instead.
if (gpu && gpu.jetson) {
patchGatewayImageForJetson();
}

if (hasStaleGateway(gwInfo)) {
runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
}
Expand Down Expand Up @@ -2340,6 +2430,10 @@ async function createSandbox(
registry.removeSandbox(sandboxName);
}

// Kill stale dashboard-forward processes only when we are actually
// creating or recreating — avoids breaking a healthy forward on no-op reruns.
run("kill $(lsof -ti :18789 -c openclaw) 2>/dev/null || true", { ignoreError: true });

// Stage build context
const buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-"));
const stagedDockerfile = path.join(buildCtx, "Dockerfile");
Expand Down
7 changes: 7 additions & 0 deletions bin/nemoclaw.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const GLOBAL_COMMANDS = new Set([
"deploy",
"setup",
"setup-spark",
"setup-jetson",
"start",
"stop",
"status",
Expand Down Expand Up @@ -1284,3 +1285,9 @@ const [cmd, ...args] = process.argv.slice(2);
console.error(` Run 'nemoclaw help' for usage.`);
process.exit(1);
})();
case "setup-jetson": {
const { setupJetson } = require("./lib/local-inference");
await setupJetson();
break;
}

Loading
Loading