From 2cf4b72f12237efdce9acfdc29cdbc67c8ce02a3 Mon Sep 17 00:00:00 2001 From: jnun Date: Sat, 21 Mar 2026 14:04:17 -0500 Subject: [PATCH 01/12] feat(onboard): add configurable port overrides via env vars Replace hardcoded ports (8000, 8080, 11434, 18789) with env-var-driven configuration via NEMOCLAW_*_PORT variables and .env file support. - Add central port config module (ports.js), .env loader (env.js), and port conflict diagnostics (check-ports.sh) - Update all Node.js modules, shell scripts, blueprint runner, and tests to use configurable ports instead of hardcoded values - Add port configuration reference docs and README section --- .env.example | 8 ++ .gitignore | 5 + README.md | 31 ++++++ bin/lib/env.js | 66 ++++++++++++ bin/lib/local-inference.js | 26 ++--- bin/lib/nim.js | 10 +- bin/lib/onboard.js | 73 +++++++++---- bin/lib/ports.js | 31 ++++++ bin/lib/preflight.js | 3 +- bin/nemoclaw.js | 6 +- docs/reference/port-configuration.md | 118 ++++++++++++++++++++++ docs/reference/troubleshooting.md | 23 +++-- nemoclaw-blueprint/blueprint.yaml | 8 +- nemoclaw-blueprint/orchestrator/runner.py | 58 ++++++++++- package-lock.json | 16 --- scripts/brev-setup.sh | 14 ++- scripts/check-ports.sh | 95 +++++++++++++++++ scripts/debug.sh | 2 +- scripts/lib/runtime.sh | 8 +- scripts/nemoclaw-start.sh | 4 +- scripts/setup.sh | 9 +- scripts/start-services.sh | 7 +- test/e2e/test-double-onboard.sh | 28 ++--- test/local-inference.test.js | 21 ++-- test/onboard-selection.test.js | 6 +- test/preflight.test.js | 3 +- test/runtime-shell.test.js | 4 +- uninstall.sh | 2 +- 28 files changed, 576 insertions(+), 109 deletions(-) create mode 100644 .env.example create mode 100644 bin/lib/env.js create mode 100644 bin/lib/ports.js create mode 100644 docs/reference/port-configuration.md create mode 100755 scripts/check-ports.sh diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..52eb78ee5 --- /dev/null +++ b/.env.example @@ -0,0 +1,8 @@ +# NemoClaw port configuration — copy to .env and edit as needed. +# Ports must be integers in range 1024–65535. +# Run scripts/check-ports.sh to find port conflicts + +NEMOCLAW_DASHBOARD_PORT=18789 +NEMOCLAW_GATEWAY_PORT=8080 +NEMOCLAW_VLLM_PORT=8000 +NEMOCLAW_OLLAMA_PORT=11434 diff --git a/.gitignore b/.gitignore index 5e68edf79..dde97ea46 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,8 @@ docs/_build/ coverage/ vdr-notes/ draft_newsletter_* +tmp/ +.env +.env.local +.venv/ +uv.lock diff --git a/README.md b/README.md index ac6b5db18..04f361266 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,37 @@ curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/refs/heads/main/uni --- +## Port Configuration + +NemoClaw uses four network ports. All are configurable via environment variables or a `.env` file at the project root (copy `.env.example` to get started). + +| Port | Default | Env var | Purpose | Conflict risk | +|------|---------|---------|---------|---------------| +| Dashboard | 18789 | `NEMOCLAW_DASHBOARD_PORT` | OpenClaw web UI, forwarded from sandbox to host | Low | +| Gateway | 8080 | `NEMOCLAW_GATEWAY_PORT` | OpenShell gateway signal channel | **High** — Jenkins, Tomcat, K8s dashboard | +| vLLM/NIM | 8000 | `NEMOCLAW_VLLM_PORT` | Local vLLM or NIM inference endpoint | **High** — Django, PHP dev server | +| Ollama | 11434 | `NEMOCLAW_OLLAMA_PORT` | Local Ollama inference endpoint | Low | + +To use non-default ports, set the environment variables before running `nemoclaw onboard`: + +```bash +export NEMOCLAW_GATEWAY_PORT=9080 +export NEMOCLAW_VLLM_PORT=9000 +nemoclaw onboard +``` + +Or create a `.env` file at the project root (see `.env.example`). + +> **Note** +> +> Changing the dashboard port requires rebuilding the sandbox image because the CORS origin is baked in at build time. Re-run `nemoclaw onboard` after changing `NEMOCLAW_DASHBOARD_PORT`. + +> **Network exposure** +> +> When using local inference (Ollama or vLLM), the inference service binds to `0.0.0.0` so that containers can reach it via `host.openshell.internal`. This means the service is reachable from your local network, not just localhost. This is required for the sandbox architecture but should be considered in shared or untrusted network environments. + +--- + ## How It Works NemoClaw installs the NVIDIA OpenShell runtime and Nemotron models, then uses a versioned blueprint to create a sandboxed environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy. diff --git a/bin/lib/env.js b/bin/lib/env.js new file mode 100644 index 000000000..510524b24 --- /dev/null +++ b/bin/lib/env.js @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Lightweight .env loader — reads .env files from the project root and populates +// process.env. Existing environment variables are never overwritten, so shell +// exports always take precedence over file values. +// +// Supports: +// - Multiple files (loaded in order; first file's values win over later files) +// - Comments (#) and blank lines +// - KEY=VALUE, KEY="VALUE", KEY='VALUE' +// - Inline comments after unquoted values + +const fs = require("fs"); +const path = require("path"); + +const ROOT = path.resolve(__dirname, "..", ".."); + +function parseEnvFile(filePath) { + let content; + try { + content = fs.readFileSync(filePath, "utf-8"); + } catch { + return; // file doesn't exist or isn't readable — skip silently + } + + for (const raw of content.split("\n")) { + const line = raw.trim(); + if (!line || line.startsWith("#")) continue; + + const eqIndex = line.indexOf("="); + if (eqIndex === -1) continue; + + const key = line.slice(0, eqIndex).trim(); + if (!key) continue; + + let value = line.slice(eqIndex + 1).trim(); + + // Strip surrounding quotes + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1); + } else { + // Remove inline comments for unquoted values + const hashIndex = value.indexOf(" #"); + if (hashIndex !== -1) { + value = value.slice(0, hashIndex).trim(); + } + } + + // Never overwrite existing env vars + if (process.env[key] === undefined) { + process.env[key] = value; + } + } +} + +// Load .env files in priority order — first file wins for any given key +// because we never overwrite once set. +const ENV_FILES = [".env.local", ".env"]; + +for (const file of ENV_FILES) { + parseEnvFile(path.join(ROOT, file)); +} diff --git a/bin/lib/local-inference.js b/bin/lib/local-inference.js index 3892e969c..f3a2ad76f 100644 --- a/bin/lib/local-inference.js +++ b/bin/lib/local-inference.js @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +const { VLLM_PORT, OLLAMA_PORT } = require("./ports"); + const HOST_GATEWAY_URL = "http://host.openshell.internal"; const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1"; const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b"; @@ -8,9 +10,9 @@ const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b"; function getLocalProviderBaseUrl(provider) { switch (provider) { case "vllm-local": - return `${HOST_GATEWAY_URL}:8000/v1`; + return `${HOST_GATEWAY_URL}:${VLLM_PORT}/v1`; case "ollama-local": - return `${HOST_GATEWAY_URL}:11434/v1`; + return `${HOST_GATEWAY_URL}:${OLLAMA_PORT}/v1`; default: return null; } @@ -19,9 +21,9 @@ function getLocalProviderBaseUrl(provider) { function getLocalProviderHealthCheck(provider) { switch (provider) { case "vllm-local": - return "curl -sf http://localhost:8000/v1/models 2>/dev/null"; + return `curl -sf http://localhost:${VLLM_PORT}/v1/models 2>/dev/null`; case "ollama-local": - return "curl -sf http://localhost:11434/api/tags 2>/dev/null"; + return `curl -sf http://localhost:${OLLAMA_PORT}/api/tags 2>/dev/null`; default: return null; } @@ -30,9 +32,9 @@ function getLocalProviderHealthCheck(provider) { function getLocalProviderContainerReachabilityCheck(provider) { switch (provider) { case "vllm-local": - return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`; + return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:${VLLM_PORT}/v1/models 2>/dev/null`; case "ollama-local": - return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`; + return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:${OLLAMA_PORT}/api/tags 2>/dev/null`; default: return null; } @@ -50,12 +52,12 @@ function validateLocalProvider(provider, runCapture) { case "vllm-local": return { ok: false, - message: "Local vLLM was selected, but nothing is responding on http://localhost:8000.", + message: `Local vLLM was selected, but nothing is responding on http://localhost:${VLLM_PORT}.`, }; case "ollama-local": return { ok: false, - message: "Local Ollama was selected, but nothing is responding on http://localhost:11434.", + message: `Local Ollama was selected, but nothing is responding on http://localhost:${OLLAMA_PORT}.`, }; default: return { ok: false, message: "The selected local inference provider is unavailable." }; @@ -77,13 +79,13 @@ function validateLocalProvider(provider, runCapture) { return { ok: false, message: - "Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:8000. Ensure the server is reachable from containers, not only from the host shell.", + `Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:${VLLM_PORT}. Ensure the server is reachable from containers, not only from the host shell.`, }; case "ollama-local": return { ok: false, message: - "Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:11434. Ensure Ollama listens on 0.0.0.0:11434 instead of 127.0.0.1 so sandboxes can reach it.", + `Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:${OLLAMA_PORT}. Ensure Ollama listens on 0.0.0.0:${OLLAMA_PORT} instead of 127.0.0.1 so sandboxes can reach it.`, }; default: return { ok: false, message: "The selected local inference provider is unavailable from containers." }; @@ -125,7 +127,7 @@ function getOllamaWarmupCommand(model, keepAlive = "15m") { stream: false, keep_alive: keepAlive, }); - return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`; + return `nohup curl -s http://localhost:${OLLAMA_PORT}/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`; } function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") { @@ -135,7 +137,7 @@ function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") { stream: false, keep_alive: keepAlive, }); - return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`; + return `curl -sS --max-time ${timeoutSeconds} http://localhost:${OLLAMA_PORT}/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`; } function validateOllamaModel(model, runCapture) { diff --git a/bin/lib/nim.js b/bin/lib/nim.js index 4f2233e43..f94b60d40 100644 --- a/bin/lib/nim.js +++ b/bin/lib/nim.js @@ -4,6 +4,7 @@ // NIM container management — pull, start, stop, health-check NIM images. const { run, runCapture } = require("./runner"); +const { VLLM_PORT } = require("./ports"); const nimImages = require("./nim-images.json"); function containerName(sandboxName) { @@ -125,7 +126,7 @@ function pullNimImage(model) { return image; } -function startNimContainer(sandboxName, model, port = 8000) { +function startNimContainer(sandboxName, model, port = VLLM_PORT) { const name = containerName(sandboxName); const image = getImageForModel(model); if (!image) { @@ -138,12 +139,13 @@ function startNimContainer(sandboxName, model, port = 8000) { console.log(` Starting NIM container: ${name}`); run( + // Right-hand :8000 is the NIM image's internal port — fixed by the image, not configurable. `docker run -d --gpus all -p ${port}:8000 --name ${name} --shm-size 16g ${image}` ); return name; } -function waitForNimHealth(port = 8000, timeout = 300) { +function waitForNimHealth(port = VLLM_PORT, timeout = 300) { const start = Date.now(); const interval = 5000; console.log(` Waiting for NIM health on port ${port} (timeout: ${timeout}s)...`); @@ -172,7 +174,7 @@ function stopNimContainer(sandboxName) { run(`docker rm ${name} 2>/dev/null || true`, { ignoreError: true }); } -function nimStatus(sandboxName) { +function nimStatus(sandboxName, port = VLLM_PORT) { const name = containerName(sandboxName); try { const state = runCapture( @@ -183,7 +185,7 @@ function nimStatus(sandboxName) { let healthy = false; if (state === "running") { - const health = runCapture(`curl -sf http://localhost:8000/v1/models 2>/dev/null`, { + const health = runCapture(`curl -sf http://localhost:${port}/v1/models 2>/dev/null`, { ignoreError: true, }); healthy = !!health; diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index d9bc89e84..bb704aa31 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -9,6 +9,7 @@ const fs = require("fs"); const os = require("os"); const path = require("path"); const { ROOT, SCRIPTS, run, runCapture } = require("./runner"); +const { DASHBOARD_PORT, GATEWAY_PORT, VLLM_PORT, OLLAMA_PORT } = require("./ports"); const { getDefaultOllamaModel, getLocalProviderBaseUrl, @@ -275,15 +276,15 @@ async function preflight() { const gwInfo = runCapture("openshell gateway info -g nemoclaw 2>/dev/null", { ignoreError: true }); if (hasStaleGateway(gwInfo)) { console.log(" Cleaning up previous NemoClaw session..."); - run("openshell forward stop 18789 2>/dev/null || true", { ignoreError: true }); + run(`openshell forward stop ${DASHBOARD_PORT} 2>/dev/null || true`, { ignoreError: true }); run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true }); console.log(" ✓ Previous session cleaned up"); } - // Required ports — gateway (8080) and dashboard (18789) + // Required ports — gateway and dashboard const requiredPorts = [ - { port: 8080, label: "OpenShell gateway" }, - { port: 18789, label: "NemoClaw dashboard" }, + { port: GATEWAY_PORT, label: "OpenShell gateway" }, + { port: DASHBOARD_PORT, label: "NemoClaw dashboard" }, ]; for (const { port, label } of requiredPorts) { const portCheck = await checkPortAvailable(port); @@ -337,7 +338,7 @@ async function startGateway(gpu) { // Destroy old gateway run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true }); - const gwArgs = ["--name", "nemoclaw"]; + const gwArgs = ["--name", "nemoclaw", "--port", String(GATEWAY_PORT)]; // Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is // routed through a host-side provider (Ollama, vLLM, or cloud API) — the // sandbox itself does not need direct GPU access. Passing --gpu causes @@ -438,16 +439,23 @@ async function createSandbox(gpu) { // Create sandbox (use -- echo to avoid dropping into interactive shell) // Pass the base policy so sandbox starts in proxy mode (required for policy updates later) const basePolicyPath = path.join(ROOT, "nemoclaw-blueprint", "policies", "openclaw-sandbox.yaml"); + const chatUiUrl = process.env.CHAT_UI_URL || `http://127.0.0.1:${DASHBOARD_PORT}`; const createArgs = [ `--from "${buildCtx}/Dockerfile"`, `--name "${sandboxName}"`, `--policy "${basePolicyPath}"`, ]; + // CHAT_UI_URL is passed as an env arg below; the Dockerfile ARG default + // covers the standard port at build time. --build-arg is not supported by + // the current openshell sandbox create CLI. // --gpu is intentionally omitted. See comment in startGateway(). console.log(` Creating sandbox '${sandboxName}' (this takes a few minutes on first run)...`); - const chatUiUrl = process.env.CHAT_UI_URL || 'http://127.0.0.1:18789'; - const envArgs = [`CHAT_UI_URL=${chatUiUrl}`]; + const envArgs = [ + `CHAT_UI_URL=${chatUiUrl}`, + `NEMOCLAW_DASHBOARD_PORT=${DASHBOARD_PORT}`, + `PUBLIC_PORT=${DASHBOARD_PORT}`, + ]; if (process.env.NVIDIA_API_KEY) { envArgs.push(`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`); } @@ -503,12 +511,12 @@ async function createSandbox(gpu) { process.exit(1); } - // Release any stale forward on port 18789 before claiming it for the new sandbox. + // Release any stale forward on the dashboard port before claiming it for the new sandbox. // A previous onboard run may have left the port forwarded to a different sandbox, // which would silently prevent the new sandbox's dashboard from being reachable. - run(`openshell forward stop 18789 2>/dev/null || true`, { ignoreError: true }); + run(`openshell forward stop ${DASHBOARD_PORT} 2>/dev/null || true`, { ignoreError: true }); // Forward dashboard port to the new sandbox - run(`openshell forward start --background 18789 "${sandboxName}"`, { ignoreError: true }); + run(`openshell forward start --background ${DASHBOARD_PORT} "${sandboxName}"`, { ignoreError: true }); // Register only after confirmed ready — prevents phantom entries registry.registerSandbox({ @@ -531,8 +539,8 @@ async function setupNim(sandboxName, gpu) { // Detect local inference options const hasOllama = !!runCapture("command -v ollama", { ignoreError: true }); - const ollamaRunning = !!runCapture("curl -sf http://localhost:11434/api/tags 2>/dev/null", { ignoreError: true }); - const vllmRunning = !!runCapture("curl -sf http://localhost:8000/v1/models 2>/dev/null", { ignoreError: true }); + const ollamaRunning = !!runCapture(`curl -sf http://localhost:${OLLAMA_PORT}/api/tags 2>/dev/null`, { ignoreError: true }); + const vllmRunning = !!runCapture(`curl -sf http://localhost:${VLLM_PORT}/v1/models 2>/dev/null`, { ignoreError: true }); const requestedProvider = isNonInteractive() ? getNonInteractiveProvider() : null; const requestedModel = isNonInteractive() ? getNonInteractiveModel(requestedProvider || "cloud") : null; // Build options list — only show local options with NEMOCLAW_EXPERIMENTAL=1 @@ -550,14 +558,14 @@ async function setupNim(sandboxName, gpu) { options.push({ key: "ollama", label: - `Local Ollama (localhost:11434)${ollamaRunning ? " — running" : ""}` + + `Local Ollama (localhost:${OLLAMA_PORT})${ollamaRunning ? " — running" : ""}` + (ollamaRunning ? " (suggested)" : ""), }); } if (EXPERIMENTAL && vllmRunning) { options.push({ key: "vllm", - label: "Existing vLLM instance (localhost:8000) — running [experimental] (suggested)", + label: `Existing vLLM instance (localhost:${VLLM_PORT}) — running [experimental] (suggested)`, }); } @@ -650,10 +658,10 @@ async function setupNim(sandboxName, gpu) { } else if (selected.key === "ollama") { if (!ollamaRunning) { console.log(" Starting Ollama..."); - run("OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &", { ignoreError: true }); + run(`OLLAMA_HOST=0.0.0.0:${OLLAMA_PORT} ollama serve > /dev/null 2>&1 &`, { ignoreError: true }); sleep(2); } - console.log(" ✓ Using Ollama on localhost:11434"); + console.log(` ✓ Using Ollama on localhost:${OLLAMA_PORT}`); provider = "ollama-local"; if (isNonInteractive()) { model = requestedModel || getDefaultOllamaModel(runCapture); @@ -664,9 +672,9 @@ async function setupNim(sandboxName, gpu) { console.log(" Installing Ollama via Homebrew..."); run("brew install ollama", { ignoreError: true }); console.log(" Starting Ollama..."); - run("OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &", { ignoreError: true }); + run(`OLLAMA_HOST=0.0.0.0:${OLLAMA_PORT} ollama serve > /dev/null 2>&1 &`, { ignoreError: true }); sleep(2); - console.log(" ✓ Using Ollama on localhost:11434"); + console.log(` ✓ Using Ollama on localhost:${OLLAMA_PORT}`); provider = "ollama-local"; if (isNonInteractive()) { model = requestedModel || getDefaultOllamaModel(runCapture); @@ -674,7 +682,7 @@ async function setupNim(sandboxName, gpu) { model = await promptOllamaModel(); } } else if (selected.key === "vllm") { - console.log(" ✓ Using existing vLLM on localhost:8000"); + console.log(` ✓ Using existing vLLM on localhost:${VLLM_PORT}`); provider = "vllm-local"; model = "vllm-local"; } @@ -697,6 +705,33 @@ async function setupNim(sandboxName, gpu) { console.log(` Using NVIDIA Endpoint API with model: ${model}`); } + // Warn (don't block) if the selected inference port is occupied by another process. + // The check fires even when process is "unknown" (net probe fallback on non-root) + // because that's the most common conflict scenario on Linux. + if (provider === "vllm-local") { + const vllmCheck = await checkPortAvailable(VLLM_PORT); + if (!vllmCheck.ok) { + const who = vllmCheck.process !== "unknown" + ? ` by ${vllmCheck.process}${vllmCheck.pid ? ` (PID ${vllmCheck.pid})` : ""}` + : ""; + console.log(""); + console.log(` ⚠ Port ${VLLM_PORT} is in use${who}.`); + console.log(` vLLM/NIM inference needs this port. If this is your inference server, you're fine.`); + console.log(` Otherwise, stop the conflicting process or set NEMOCLAW_VLLM_PORT to a different port.`); + } + } else if (provider === "ollama-local") { + const ollamaCheck = await checkPortAvailable(OLLAMA_PORT); + if (!ollamaCheck.ok) { + const who = ollamaCheck.process !== "unknown" + ? ` by ${ollamaCheck.process}${ollamaCheck.pid ? ` (PID ${ollamaCheck.pid})` : ""}` + : ""; + console.log(""); + console.log(` ⚠ Port ${OLLAMA_PORT} is in use${who}.`); + console.log(` Ollama needs this port. If this is your Ollama server, you're fine.`); + console.log(` Otherwise, stop the conflicting process or set NEMOCLAW_OLLAMA_PORT to a different port.`); + } + } + registry.updateSandbox(sandboxName, { model, provider, nimContainer }); return { model, provider }; diff --git a/bin/lib/ports.js b/bin/lib/ports.js new file mode 100644 index 000000000..42fcb7241 --- /dev/null +++ b/bin/lib/ports.js @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Central port configuration — single source of truth for all configurable ports. +// Override via environment variables or a .env file at the project root. + +function parsePort(envVar, fallback) { + const raw = process.env[envVar]; + if (raw === undefined || raw === "") return fallback; + const parsed = parseInt(raw, 10); + if (Number.isNaN(parsed) || parsed < 1024 || parsed > 65535) { + throw new Error( + `Invalid port: ${envVar}="${raw}" — must be an integer between 1024 and 65535` + ); + } + return parsed; +} + +// Dashboard port supports legacy env var fallback chain: +// NEMOCLAW_DASHBOARD_PORT → DASHBOARD_PORT → PUBLIC_PORT → 18789 +const DASHBOARD_PORT = parsePort( + "NEMOCLAW_DASHBOARD_PORT", + parsePort("DASHBOARD_PORT", parsePort("PUBLIC_PORT", 18789)) +); + +module.exports = { + DASHBOARD_PORT, + GATEWAY_PORT: parsePort("NEMOCLAW_GATEWAY_PORT", 8080), + VLLM_PORT: parsePort("NEMOCLAW_VLLM_PORT", 8000), + OLLAMA_PORT: parsePort("NEMOCLAW_OLLAMA_PORT", 11434), +}; diff --git a/bin/lib/preflight.js b/bin/lib/preflight.js index 7f191413d..a77768a2a 100644 --- a/bin/lib/preflight.js +++ b/bin/lib/preflight.js @@ -5,6 +5,7 @@ const net = require("net"); const { runCapture } = require("./runner"); +const { DASHBOARD_PORT } = require("./ports"); /** * Check whether a TCP port is available for listening. @@ -21,7 +22,7 @@ const { runCapture } = require("./runner"); * { ok: false, process: string, pid: number|null, reason: string } */ async function checkPortAvailable(port, opts) { - const p = port || 18789; + const p = port || DASHBOARD_PORT; const o = opts || {}; // ── lsof path ────────────────────────────────────────────────── diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index 1d108632b..35107fdc8 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -7,6 +7,9 @@ const path = require("path"); const fs = require("fs"); const os = require("os"); +// Load .env files before any module reads process.env (e.g. ports.js) +require("./lib/env"); + const { ROOT, SCRIPTS, run, runCapture, runInteractive } = require("./lib/runner"); const { ensureApiKey, @@ -17,6 +20,7 @@ const { const registry = require("./lib/registry"); const nim = require("./lib/nim"); const policies = require("./lib/policies"); +const { DASHBOARD_PORT } = require("./lib/ports"); // ── Global commands ────────────────────────────────────────────── @@ -272,7 +276,7 @@ function listSandboxes() { function sandboxConnect(sandboxName) { // Ensure port forward is alive before connecting - run(`openshell forward start --background 18789 "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); + run(`openshell forward start --background ${DASHBOARD_PORT} "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); runInteractive(`openshell sandbox connect "${sandboxName}"`); } diff --git a/docs/reference/port-configuration.md b/docs/reference/port-configuration.md new file mode 100644 index 000000000..47a87164a --- /dev/null +++ b/docs/reference/port-configuration.md @@ -0,0 +1,118 @@ +--- +title: + page: "NemoClaw Port Configuration" + nav: "Port Configuration" +description: "Configure NemoClaw network ports using environment variables or a .env file." +keywords: ["nemoclaw ports", "nemoclaw port configuration", "nemoclaw port conflict"] +topics: ["generative_ai", "ai_agents"] +tags: ["openclaw", "openshell", "configuration", "nemoclaw"] +content: + type: reference + difficulty: technical_beginner + audience: ["developer", "engineer"] +status: published +--- + + + +# Port Configuration + +NemoClaw uses four network ports. +All ports are configurable through environment variables or a `.env` file at the project root. + +## Default Ports + +| Port | Default | Environment variable | Purpose | +|------|---------|----------------------|---------| +| Dashboard | 18789 | `NEMOCLAW_DASHBOARD_PORT` | OpenClaw web UI, forwarded from sandbox to host | +| Gateway | 8080 | `NEMOCLAW_GATEWAY_PORT` | OpenShell gateway API | +| vLLM/NIM | 8000 | `NEMOCLAW_VLLM_PORT` | Local vLLM or NIM inference server | +| Ollama | 11434 | `NEMOCLAW_OLLAMA_PORT` | Local Ollama inference server | + +## Configure Ports with a .env File + +Copy the example file and edit it to set your preferred ports. + +```console +$ cp .env.example .env +``` + +The `.env.example` file contains all four port variables with their defaults: + +```bash +NEMOCLAW_DASHBOARD_PORT=18789 +NEMOCLAW_GATEWAY_PORT=8080 +NEMOCLAW_VLLM_PORT=8000 +NEMOCLAW_OLLAMA_PORT=11434 +``` + +Edit `.env` to change any port. +Ports must be integers in the range 1024 to 65535. +The `.env` file is gitignored and not committed to the repository. + +## Configure Ports with Environment Variables + +Export the variables directly in your shell instead of using a `.env` file. + +```console +$ export NEMOCLAW_DASHBOARD_PORT=28789 +$ export NEMOCLAW_VLLM_PORT=9000 +$ nemoclaw onboard +``` + +Shell exports take precedence over `.env` file values. + +## Dashboard Port Fallback Chain + +The dashboard port checks multiple variables for backward compatibility. +The first defined value wins: + +1. `NEMOCLAW_DASHBOARD_PORT` +2. `DASHBOARD_PORT` +3. `PUBLIC_PORT` +4. `18789` (default) + +## Check for Port Conflicts + +Run the port checker script before onboarding to detect conflicts. + +```console +$ scripts/check-ports.sh +``` + +The script reads your `.env` and `.env.local` files (if present) to resolve the configured ports, then checks each one. +If a port is in use, the output shows the process name and PID holding it. + +``` +Checking NemoClaw ports... + + ok 18789 (dashboard) + CONFLICT 8080 (gateway) — in use by nginx (PID 1234) + ok 8000 (vllm/nim) + ok 11434 (ollama) + +1 port conflict(s) found. +Set NEMOCLAW_*_PORT env vars or edit .env to use different ports. +``` + +You can also pass custom ports as arguments to check additional ports. + +```console +$ scripts/check-ports.sh 9000 9080 +``` + +The onboarding preflight also checks for port conflicts automatically. + +:::{note} +Ports 8080 and 8000 are common conflict sources. +Port 8080 is used by many web servers and proxies. +Port 8000 is used by development servers and other inference tools. +::: + +## Next Steps + +- [Troubleshooting](troubleshooting.md) for resolving port and onboarding issues. +- [CLI Commands](commands.md) for the full command reference. diff --git a/docs/reference/troubleshooting.md b/docs/reference/troubleshooting.md index 16f345423..ec89387c7 100644 --- a/docs/reference/troubleshooting.md +++ b/docs/reference/troubleshooting.md @@ -90,17 +90,28 @@ Add the `export` line to your `~/.bashrc` or `~/.zshrc` to make it permanent, th ### Port already in use -The NemoClaw gateway uses port `18789` by default. -If another process is already bound to this port, onboarding fails. -Identify the conflicting process, verify it is safe to stop, and terminate it: +NemoClaw uses four ports (see the [Port Configuration](../../README.md#port-configuration) section in the README). If another process is bound to one of these ports, onboarding fails with a message identifying the conflicting process. + +To resolve, either stop the conflicting process: ```console -$ lsof -i :18789 +$ lsof -i :8080 $ kill ``` -If the process does not exit, use `kill -9 ` to force-terminate it. -Then retry onboarding. +Or use a different port by setting the corresponding environment variable before onboarding: + +```console +$ export NEMOCLAW_GATEWAY_PORT=9080 +$ nemoclaw onboard +``` + +| Default port | Env var | Common conflicts | +|-------------|---------|-----------------| +| 8080 | `NEMOCLAW_GATEWAY_PORT` | Jenkins, Tomcat, K8s dashboard | +| 8000 | `NEMOCLAW_VLLM_PORT` | Django, PHP dev server | +| 18789 | `NEMOCLAW_DASHBOARD_PORT` | Uncommon | +| 11434 | `NEMOCLAW_OLLAMA_PORT` | Uncommon | ## Onboarding diff --git a/nemoclaw-blueprint/blueprint.yaml b/nemoclaw-blueprint/blueprint.yaml index f55f9f651..7d9e0d176 100644 --- a/nemoclaw-blueprint/blueprint.yaml +++ b/nemoclaw-blueprint/blueprint.yaml @@ -20,7 +20,7 @@ components: sandbox: image: "ghcr.io/nvidia/openshell-community/sandboxes/openclaw:latest" name: "openclaw" - forward_ports: + forward_ports: # Override at runtime via NEMOCLAW_DASHBOARD_PORT - 18789 inference: @@ -42,14 +42,14 @@ components: nim-local: provider_type: "openai" provider_name: "nim-local" - endpoint: "http://nim-service.local:8000/v1" + endpoint: "http://nim-service.local:8000/v1" # Port overridden by NEMOCLAW_VLLM_PORT via runner.py model: "nvidia/nemotron-3-super-120b-a12b" credential_env: "NIM_API_KEY" vllm: provider_type: "openai" provider_name: "vllm-local" - endpoint: "http://localhost:8000/v1" + endpoint: "http://localhost:8000/v1" # Port overridden by NEMOCLAW_VLLM_PORT via runner.py model: "nvidia/nemotron-3-nano-30b-a3b" credential_env: "OPENAI_API_KEY" credential_default: "dummy" @@ -61,5 +61,5 @@ components: name: nim_service endpoints: - host: "nim-service.local" - port: 8000 + port: 8000 # Overridden by NEMOCLAW_VLLM_PORT via runner.py protocol: rest diff --git a/nemoclaw-blueprint/orchestrator/runner.py b/nemoclaw-blueprint/orchestrator/runner.py index 432c228c3..7b9a53902 100644 --- a/nemoclaw-blueprint/orchestrator/runner.py +++ b/nemoclaw-blueprint/orchestrator/runner.py @@ -17,6 +17,7 @@ import argparse import json import os +import re import shutil import subprocess import sys @@ -49,7 +50,62 @@ def load_blueprint() -> dict[str, Any]: log(f"ERROR: blueprint.yaml not found at {bp_file}") sys.exit(1) with bp_file.open() as f: - return yaml.safe_load(f) + bp = yaml.safe_load(f) + _apply_port_overrides(bp) + return bp + + +def _parse_port_env(env_var: str) -> int | None: + """Read and validate a port from an environment variable. + + Returns the port as an int, or None if the variable is unset/empty. + Exits with a clear error for invalid values (matching ports.js behavior). + """ + raw = os.environ.get(env_var, "") + if not raw: + return None + try: + port = int(raw) + except ValueError: + log(f'ERROR: {env_var}="{raw}" is not a valid port number') + sys.exit(1) + if port < 1024 or port > 65535: + log(f"ERROR: {env_var}={port} — must be between 1024 and 65535") + sys.exit(1) + return port + + +def _apply_port_overrides(bp: dict[str, Any]) -> None: + """Override hardcoded ports from NEMOCLAW_*_PORT env vars. + + Keeps blueprint.yaml as a readable reference of defaults while allowing + runtime configuration without editing YAML. + """ + components = bp.setdefault("components", {}) + + # Dashboard / forward port + dashboard_port = _parse_port_env("NEMOCLAW_DASHBOARD_PORT") + if dashboard_port: + sandbox = components.setdefault("sandbox", {}) + sandbox["forward_ports"] = [dashboard_port] + + # vLLM / NIM inference port + vllm_port = _parse_port_env("NEMOCLAW_VLLM_PORT") + if vllm_port: + profiles = components.get("inference", {}).get("profiles", {}) + for key in ("nim-local", "vllm"): + if key in profiles: + old_endpoint = profiles[key].get("endpoint", "") + # Replace the port in endpoint URL (matches any numeric port) + profiles[key]["endpoint"] = re.sub( + r":\d+(/|$)", f":{vllm_port}\\1", old_endpoint + ) + + # Policy addition: nim_service port + additions = components.get("policy", {}).get("additions", {}) + nim_svc = additions.get("nim_service", {}) + for ep in nim_svc.get("endpoints", []): + ep["port"] = vllm_port def run_cmd( diff --git a/package-lock.json b/package-lock.json index 362db593d..3e81a1777 100644 --- a/package-lock.json +++ b/package-lock.json @@ -891,14 +891,6 @@ "scripts/actions/documentation" ] }, - "node_modules/@buape/carbon/node_modules/opusscript": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/opusscript/-/opusscript-0.0.8.tgz", - "integrity": "sha512-VSTi1aWFuCkRCVq+tx/BQ5q9fMnQ9pVZ3JU4UHKqTkf0ED3fKEPdr+gKAAl3IA2hj9rrP6iyq3hlcJq3HELtNQ==", - "license": "MIT", - "optional": true, - "peer": true - }, "node_modules/@buape/carbon/node_modules/prism-media": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/prism-media/-/prism-media-1.3.5.tgz", @@ -1291,14 +1283,6 @@ "url": "https://github.com/discordjs/discord.js?sponsor" } }, - "node_modules/@discordjs/voice/node_modules/opusscript": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/opusscript/-/opusscript-0.0.8.tgz", - "integrity": "sha512-VSTi1aWFuCkRCVq+tx/BQ5q9fMnQ9pVZ3JU4UHKqTkf0ED3fKEPdr+gKAAl3IA2hj9rrP6iyq3hlcJq3HELtNQ==", - "license": "MIT", - "optional": true, - "peer": true - }, "node_modules/@discordjs/voice/node_modules/prism-media": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/prism-media/-/prism-media-1.3.5.tgz", diff --git a/scripts/brev-setup.sh b/scripts/brev-setup.sh index 4421b5bce..d7c3667f1 100755 --- a/scripts/brev-setup.sh +++ b/scripts/brev-setup.sh @@ -27,6 +27,9 @@ fail() { echo -e "${RED}[brev]${NC} $1"; exit 1; } SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# Load port overrides if present +[ -f "${SCRIPT_DIR}/../.env" ] && set -a && . "${SCRIPT_DIR}/../.env" && set +a + [ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY not set" # Suppress needrestart noise from apt (Scanning processes, No services need...) @@ -129,19 +132,20 @@ if command -v nvidia-smi > /dev/null 2>&1; then fi # Start vLLM if not already running - if curl -s http://localhost:8000/v1/models > /dev/null 2>&1; then - info "vLLM already running on :8000" + VLLM_PORT="${NEMOCLAW_VLLM_PORT:-8000}" + if curl -s "http://localhost:${VLLM_PORT}/v1/models" > /dev/null 2>&1; then + info "vLLM already running on :${VLLM_PORT}" elif python3 -c "import vllm" 2>/dev/null; then info "Starting vLLM with $VLLM_MODEL..." nohup python3 -m vllm.entrypoints.openai.api_server \ --model "$VLLM_MODEL" \ - --port 8000 \ + --port "$VLLM_PORT" \ --host 0.0.0.0 \ > /tmp/vllm-server.log 2>&1 & VLLM_PID=$! info "Waiting for vLLM to load model (this can take a few minutes)..." - for i in $(seq 1 120); do - if curl -s http://localhost:8000/v1/models > /dev/null 2>&1; then + for _i in $(seq 1 120); do + if curl -s "http://localhost:${VLLM_PORT}/v1/models" > /dev/null 2>&1; then info "vLLM ready (PID $VLLM_PID)" break fi diff --git a/scripts/check-ports.sh b/scripts/check-ports.sh new file mode 100755 index 000000000..071cd01b1 --- /dev/null +++ b/scripts/check-ports.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# check-ports.sh — Check whether NemoClaw ports are available. +# +# Reads configured ports from .env (if present), falls back to defaults. +# Pass custom ports as arguments to check additional ports. +# +# Usage: +# scripts/check-ports.sh # check configured/default ports +# scripts/check-ports.sh 9000 9080 # check custom ports + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# ── Load .env ──────────────────────────────────────────────────────── +load_env() { + local env_file="$1" + [[ -f "$env_file" ]] || return 0 + while IFS= read -r line || [[ -n "$line" ]]; do + line="${line%%#*}" # strip comments + line="${line#"${line%%[![:space:]]*}"}" # trim leading whitespace + line="${line%"${line##*[![:space:]]}"}" # trim trailing whitespace + [[ -z "$line" ]] && continue + [[ "$line" == *=* ]] || continue + local key="${line%%=*}" + local val="${line#*=}" + # Strip surrounding quotes + val="${val#\"}" ; val="${val%\"}" + val="${val#\'}" ; val="${val%\'}" + # Only set if not already in environment + if [[ -z "${!key+x}" ]]; then + export "$key=$val" + fi + done < "$env_file" +} + +load_env "$PROJECT_ROOT/.env.local" +load_env "$PROJECT_ROOT/.env" + +# ── Resolve ports ──────────────────────────────────────────────────── +DASHBOARD_PORT="${NEMOCLAW_DASHBOARD_PORT:-${DASHBOARD_PORT:-${PUBLIC_PORT:-18789}}}" +GATEWAY_PORT="${NEMOCLAW_GATEWAY_PORT:-8080}" +VLLM_PORT="${NEMOCLAW_VLLM_PORT:-8000}" +OLLAMA_PORT="${NEMOCLAW_OLLAMA_PORT:-11434}" + +# ── Check a single port ───────────────────────────────────────────── +conflicts=0 + +check_port() { + local port="$1" + local label="${2:-}" + local prefix="$port" + [[ -n "$label" ]] && prefix="$port ($label)" + + if lsof -iTCP:"$port" -sTCP:LISTEN -nP >/dev/null 2>&1; then + local proc + proc="$(lsof -iTCP:"$port" -sTCP:LISTEN -nP 2>/dev/null | awk 'NR==2 {print $1 " (PID " $2 ")"}')" + echo " CONFLICT $prefix — in use by $proc" + conflicts=$((conflicts + 1)) + return 1 + else + echo " ok $prefix" + return 0 + fi +} + +# ── Run checks ─────────────────────────────────────────────────────── +echo "Checking NemoClaw ports..." +echo "" + +check_port "$DASHBOARD_PORT" "dashboard" || true +check_port "$GATEWAY_PORT" "gateway" || true +check_port "$VLLM_PORT" "vllm/nim" || true +check_port "$OLLAMA_PORT" "ollama" || true + +if [[ $# -gt 0 ]]; then + echo "" + echo "Custom ports:" + for p in "$@"; do + check_port "$p" || true + done +fi + +echo "" +if [[ $conflicts -gt 0 ]]; then + echo "$conflicts port conflict(s) found." + echo "Set NEMOCLAW_*_PORT env vars or edit .env to use different ports." + exit 1 +else + echo "All ports available." +fi diff --git a/scripts/debug.sh b/scripts/debug.sh index 3fa2647e6..db1a5a872 100755 --- a/scripts/debug.sh +++ b/scripts/debug.sh @@ -285,7 +285,7 @@ if [ "$QUICK" = false ]; then # shellcheck disable=SC2016 collect "curl-models" sh -c 'code=$(curl -s -o /dev/null -w "%{http_code}" https://integrate.api.nvidia.com/v1/models); echo "HTTP $code"; if [ "$code" -ge 200 ] && [ "$code" -lt 500 ]; then echo "NIM API reachable"; else echo "NIM API unreachable"; exit 1; fi' collect "lsof-net" sh -c 'lsof -i -P -n 2>/dev/null | head -50' - collect "lsof-18789" lsof -i :18789 + collect "lsof-dashboard" lsof -i :"${NEMOCLAW_DASHBOARD_PORT:-18789}" fi # -- Kernel / IO (full mode only) -- diff --git a/scripts/lib/runtime.sh b/scripts/lib/runtime.sh index 3bf546847..3e6b03e02 100644 --- a/scripts/lib/runtime.sh +++ b/scripts/lib/runtime.sh @@ -206,8 +206,8 @@ get_local_provider_base_url() { local provider="${1:-}" case "$provider" in - vllm-local) printf 'http://host.openshell.internal:8000/v1\n' ;; - ollama-local) printf 'http://host.openshell.internal:11434/v1\n' ;; + vllm-local) printf 'http://host.openshell.internal:%s/v1\n' "${NEMOCLAW_VLLM_PORT:-8000}" ;; + ollama-local) printf 'http://host.openshell.internal:%s/v1\n' "${NEMOCLAW_OLLAMA_PORT:-11434}" ;; *) return 1 ;; esac } @@ -217,10 +217,10 @@ check_local_provider_health() { case "$provider" in vllm-local) - curl -sf http://localhost:8000/v1/models > /dev/null 2>&1 + curl -sf "http://localhost:${NEMOCLAW_VLLM_PORT:-8000}/v1/models" > /dev/null 2>&1 ;; ollama-local) - curl -sf http://localhost:11434/api/tags > /dev/null 2>&1 + curl -sf "http://localhost:${NEMOCLAW_OLLAMA_PORT:-11434}/api/tags" > /dev/null 2>&1 ;; *) return 1 diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh index d28b96374..3d26aae76 100755 --- a/scripts/nemoclaw-start.sh +++ b/scripts/nemoclaw-start.sh @@ -12,8 +12,8 @@ set -euo pipefail NEMOCLAW_CMD=("$@") -CHAT_UI_URL="${CHAT_UI_URL:-http://127.0.0.1:18789}" -PUBLIC_PORT=18789 +PUBLIC_PORT="${NEMOCLAW_DASHBOARD_PORT:-${PUBLIC_PORT:-18789}}" +CHAT_UI_URL="${CHAT_UI_URL:-http://127.0.0.1:${PUBLIC_PORT}}" write_auth_profile() { if [ -z "${NVIDIA_API_KEY:-}" ]; then diff --git a/scripts/setup.sh b/scripts/setup.sh index 22b3ccfec..c0e39c439 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -31,6 +31,11 @@ NC='\033[0m' SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Load port overrides if present (.env.local takes precedence) +[ -f "${REPO_DIR}/.env" ] && set -a && . "${REPO_DIR}/.env" && set +a +[ -f "${REPO_DIR}/.env.local" ] && set -a && . "${REPO_DIR}/.env.local" && set +a + # shellcheck source=./lib/runtime.sh . "$SCRIPT_DIR/lib/runtime.sh" @@ -106,7 +111,7 @@ fi # 1. Gateway — always start fresh to avoid stale state info "Starting OpenShell gateway..." openshell gateway destroy -g nemoclaw > /dev/null 2>&1 || true -GATEWAY_ARGS=(--name nemoclaw) +GATEWAY_ARGS=(--name nemoclaw --port "${NEMOCLAW_GATEWAY_PORT:-8080}") command -v nvidia-smi > /dev/null 2>&1 && GATEWAY_ARGS+=(--gpu) openshell gateway start "${GATEWAY_ARGS[@]}" 2>&1 | grep -E "Gateway|✓|Error|error" || true @@ -156,7 +161,7 @@ if [ "$(uname -s)" = "Darwin" ]; then # Start Ollama service if not running if ! check_local_provider_health "ollama-local"; then info "Starting Ollama service..." - OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 & + OLLAMA_HOST="0.0.0.0:${NEMOCLAW_OLLAMA_PORT:-11434}" ollama serve > /dev/null 2>&1 & sleep 2 fi OLLAMA_LOCAL_BASE_URL="$(get_local_provider_base_url "ollama-local")" diff --git a/scripts/start-services.sh b/scripts/start-services.sh index cbce0f183..9549ddd2b 100755 --- a/scripts/start-services.sh +++ b/scripts/start-services.sh @@ -16,7 +16,12 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" -DASHBOARD_PORT="${DASHBOARD_PORT:-18789}" + +# Load port overrides if present (.env.local takes precedence) +[ -f "${REPO_DIR}/.env" ] && set -a && . "${REPO_DIR}/.env" && set +a +[ -f "${REPO_DIR}/.env.local" ] && set -a && . "${REPO_DIR}/.env.local" && set +a + +DASHBOARD_PORT="${NEMOCLAW_DASHBOARD_PORT:-${DASHBOARD_PORT:-18789}}" # ── Parse flags ────────────────────────────────────────────────── SANDBOX_NAME="${NEMOCLAW_SANDBOX:-${SANDBOX_NAME:-default}}" diff --git a/test/e2e/test-double-onboard.sh b/test/e2e/test-double-onboard.sh index 7ebdddcd5..fd63f0f37 100755 --- a/test/e2e/test-double-onboard.sh +++ b/test/e2e/test-double-onboard.sh @@ -51,7 +51,7 @@ if command -v nemoclaw > /dev/null 2>&1; then fi openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true -openshell forward stop 18789 2>/dev/null || true +openshell forward stop "${NEMOCLAW_DASHBOARD_PORT:-18789}" 2>/dev/null || true openshell gateway destroy -g nemoclaw 2>/dev/null || true pass "Pre-cleanup complete" @@ -156,13 +156,13 @@ echo "$output2" | grep -q "Cleaning up previous NemoClaw session" \ && pass "Stale session cleanup fired on second onboard" \ || fail "Stale session cleanup did NOT fire (regression: #397)" -echo "$output2" | grep -q "Port 8080 is not available" \ - && fail "Port 8080 conflict detected (regression: #21)" \ - || pass "No port 8080 conflict" +echo "$output2" | grep -q "Port ${NEMOCLAW_GATEWAY_PORT:-8080} is not available" \ + && fail "Port ${NEMOCLAW_GATEWAY_PORT:-8080} conflict detected (regression: #21)" \ + || pass "No port ${NEMOCLAW_GATEWAY_PORT:-8080} conflict" -echo "$output2" | grep -q "Port 18789 is not available" \ - && fail "Port 18789 conflict detected" \ - || pass "No port 18789 conflict" +echo "$output2" | grep -q "Port ${NEMOCLAW_DASHBOARD_PORT:-18789} is not available" \ + && fail "Port ${NEMOCLAW_DASHBOARD_PORT:-18789} conflict detected" \ + || pass "No port ${NEMOCLAW_DASHBOARD_PORT:-18789} conflict" echo "$output2" | grep -q "Sandbox '${SANDBOX_A}' created" \ && pass "Sandbox '$SANDBOX_A' recreated" \ @@ -197,13 +197,13 @@ echo "$output3" | grep -q "Cleaning up previous NemoClaw session" \ && pass "Stale session cleanup fired on third onboard" \ || fail "Stale session cleanup did NOT fire on third onboard" -echo "$output3" | grep -q "Port 8080 is not available" \ - && fail "Port 8080 conflict on third onboard (regression)" \ - || pass "No port 8080 conflict on third onboard" +echo "$output3" | grep -q "Port ${NEMOCLAW_GATEWAY_PORT:-8080} is not available" \ + && fail "Port ${NEMOCLAW_GATEWAY_PORT:-8080} conflict on third onboard (regression)" \ + || pass "No port ${NEMOCLAW_GATEWAY_PORT:-8080} conflict on third onboard" -echo "$output3" | grep -q "Port 18789 is not available" \ - && fail "Port 18789 conflict on third onboard" \ - || pass "No port 18789 conflict on third onboard" +echo "$output3" | grep -q "Port ${NEMOCLAW_DASHBOARD_PORT:-18789} is not available" \ + && fail "Port ${NEMOCLAW_DASHBOARD_PORT:-18789} conflict on third onboard" \ + || pass "No port ${NEMOCLAW_DASHBOARD_PORT:-18789} conflict on third onboard" echo "$output3" | grep -q "Sandbox '${SANDBOX_B}' created" \ && pass "Sandbox '$SANDBOX_B' created" \ @@ -218,7 +218,7 @@ nemoclaw "$SANDBOX_A" destroy 2>/dev/null || true nemoclaw "$SANDBOX_B" destroy 2>/dev/null || true openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true -openshell forward stop 18789 2>/dev/null || true +openshell forward stop "${NEMOCLAW_DASHBOARD_PORT:-18789}" 2>/dev/null || true openshell gateway destroy -g nemoclaw 2>/dev/null || true openshell sandbox get "$SANDBOX_A" > /dev/null 2>&1 \ diff --git a/test/local-inference.test.js b/test/local-inference.test.js index 5b77ee2f7..4c648d839 100644 --- a/test/local-inference.test.js +++ b/test/local-inference.test.js @@ -18,33 +18,34 @@ const { validateOllamaModel, validateLocalProvider, } = require("../bin/lib/local-inference"); +const { VLLM_PORT, OLLAMA_PORT } = require("../bin/lib/ports"); describe("local inference helpers", () => { it("returns the expected base URL for vllm-local", () => { assert.equal( getLocalProviderBaseUrl("vllm-local"), - "http://host.openshell.internal:8000/v1", + `http://host.openshell.internal:${VLLM_PORT}/v1`, ); }); it("returns the expected base URL for ollama-local", () => { assert.equal( getLocalProviderBaseUrl("ollama-local"), - "http://host.openshell.internal:11434/v1", + `http://host.openshell.internal:${OLLAMA_PORT}/v1`, ); }); it("returns the expected health check command for ollama-local", () => { assert.equal( getLocalProviderHealthCheck("ollama-local"), - "curl -sf http://localhost:11434/api/tags 2>/dev/null", + `curl -sf http://localhost:${OLLAMA_PORT}/api/tags 2>/dev/null`, ); }); it("returns the expected container reachability command for ollama-local", () => { assert.equal( getLocalProviderContainerReachabilityCheck("ollama-local"), - `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`, + `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:${OLLAMA_PORT}/api/tags 2>/dev/null`, ); }); @@ -61,7 +62,7 @@ describe("local inference helpers", () => { it("returns a clear error when ollama-local is unavailable", () => { const result = validateLocalProvider("ollama-local", () => ""); assert.equal(result.ok, false); - assert.match(result.message, /http:\/\/localhost:11434/); + assert.match(result.message, new RegExp(`http://localhost:${OLLAMA_PORT}`)); }); it("returns a clear error when ollama-local is not reachable from containers", () => { @@ -71,14 +72,14 @@ describe("local inference helpers", () => { return callCount === 1 ? '{"models":[]}' : ""; }); assert.equal(result.ok, false); - assert.match(result.message, /host\.openshell\.internal:11434/); - assert.match(result.message, /0\.0\.0\.0:11434/); + assert.match(result.message, new RegExp(`host\\.openshell\\.internal:${OLLAMA_PORT}`)); + assert.match(result.message, new RegExp(`0\\.0\\.0\\.0:${OLLAMA_PORT}`)); }); it("returns a clear error when vllm-local is unavailable", () => { const result = validateLocalProvider("vllm-local", () => ""); assert.equal(result.ok, false); - assert.match(result.message, /http:\/\/localhost:8000/); + assert.match(result.message, new RegExp(`http://localhost:${VLLM_PORT}`)); }); it("parses model names from ollama list output", () => { @@ -121,14 +122,14 @@ describe("local inference helpers", () => { it("builds a background warmup command for ollama models", () => { const command = getOllamaWarmupCommand("nemotron-3-nano:30b"); - assert.match(command, /^nohup curl -s http:\/\/localhost:11434\/api\/generate /); + assert.match(command, new RegExp(`^nohup curl -s http://localhost:${OLLAMA_PORT}/api/generate `)); assert.match(command, /"model":"nemotron-3-nano:30b"/); assert.match(command, /"keep_alive":"15m"/); }); it("builds a foreground probe command for ollama models", () => { const command = getOllamaProbeCommand("nemotron-3-nano:30b"); - assert.match(command, /^curl -sS --max-time 120 http:\/\/localhost:11434\/api\/generate /); + assert.match(command, new RegExp(`^curl -sS --max-time 120 http://localhost:${OLLAMA_PORT}/api/generate `)); assert.match(command, /"model":"nemotron-3-nano:30b"/); }); diff --git a/test/onboard-selection.test.js b/test/onboard-selection.test.js index 9000943ba..3cc608943 100644 --- a/test/onboard-selection.test.js +++ b/test/onboard-selection.test.js @@ -17,10 +17,12 @@ describe("onboard provider selection UX", () => { const credentialsPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "credentials.js")); const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js")); const registryPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "registry.js")); + const portsPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "ports.js")); const script = String.raw` const credentials = require(${credentialsPath}); const runner = require(${runnerPath}); const registry = require(${registryPath}); +const { VLLM_PORT, OLLAMA_PORT } = require(${portsPath}); let promptCalls = 0; const messages = []; @@ -34,9 +36,9 @@ credentials.prompt = async (message) => { credentials.ensureApiKey = async () => {}; runner.runCapture = (command) => { if (command.includes("command -v ollama")) return "/usr/bin/ollama"; - if (command.includes("localhost:11434/api/tags")) return JSON.stringify({ models: [{ name: "nemotron-3-nano:30b" }] }); + if (command.includes("localhost:" + OLLAMA_PORT + "/api/tags")) return JSON.stringify({ models: [{ name: "nemotron-3-nano:30b" }] }); if (command.includes("ollama list")) return "nemotron-3-nano:30b abc 24 GB now\\nqwen3:32b def 20 GB now"; - if (command.includes("localhost:8000/v1/models")) return ""; + if (command.includes("localhost:" + VLLM_PORT + "/v1/models")) return ""; return ""; }; registry.updateSandbox = (_name, update) => updates.push(update); diff --git a/test/preflight.test.js b/test/preflight.test.js index 6471d7983..2150a43b9 100644 --- a/test/preflight.test.js +++ b/test/preflight.test.js @@ -6,6 +6,7 @@ const assert = require("node:assert/strict"); const net = require("net"); const { checkPortAvailable } = require("../bin/lib/preflight"); +const { DASHBOARD_PORT } = require("../bin/lib/ports"); describe("checkPortAvailable", () => { it("falls through to net probe when lsof output is empty", async () => { @@ -104,7 +105,7 @@ describe("checkPortAvailable", () => { assert.equal(result.ok, true); }); - it("defaults to port 18789 when no args given", async () => { + it(`defaults to port ${DASHBOARD_PORT} when no args given`, async () => { // Should not throw — just verify it returns a valid result object const result = await checkPortAvailable(); assert.equal(typeof result.ok, "boolean"); diff --git a/test/runtime-shell.test.js b/test/runtime-shell.test.js index 979460b98..150cd0ae7 100644 --- a/test/runtime-shell.test.js +++ b/test/runtime-shell.test.js @@ -119,13 +119,13 @@ describe("shell runtime helpers", () => { it("returns the vllm-local base URL", () => { const result = runShell(`source "${RUNTIME_SH}"; get_local_provider_base_url vllm-local`); assert.equal(result.status, 0); - assert.equal(result.stdout.trim(), "http://host.openshell.internal:8000/v1"); + assert.equal(result.stdout.trim(), `http://host.openshell.internal:${process.env.NEMOCLAW_VLLM_PORT || "8000"}/v1`); }); it("returns the ollama-local base URL", () => { const result = runShell(`source "${RUNTIME_SH}"; get_local_provider_base_url ollama-local`); assert.equal(result.status, 0); - assert.equal(result.stdout.trim(), "http://host.openshell.internal:11434/v1"); + assert.equal(result.stdout.trim(), `http://host.openshell.internal:${process.env.NEMOCLAW_OLLAMA_PORT || "11434"}/v1`); }); it("rejects unknown local providers", () => { diff --git a/uninstall.sh b/uninstall.sh index 9b35cc484..4428a056b 100755 --- a/uninstall.sh +++ b/uninstall.sh @@ -165,7 +165,7 @@ stop_openshell_forward_processes() { while IFS= read -r pid; do [ -n "$pid" ] || continue pids+=("$pid") - done < <(pgrep -f 'openshell.*forward.*18789' 2>/dev/null || true) + done < <(pgrep -f "openshell.*forward.*${NEMOCLAW_DASHBOARD_PORT:-18789}" 2>/dev/null || true) if [ "${#pids[@]}" -eq 0 ]; then info "No local OpenShell forward processes found" From 37051ac1f97ca18a66cff4bfde9fc65a38219012 Mon Sep 17 00:00:00 2001 From: jnun Date: Sat, 21 Mar 2026 14:08:40 -0500 Subject: [PATCH 02/12] style: fix doc H1 mismatch and apply ruff formatting Align port-configuration.md H1 with title.page frontmatter value. Apply ruff auto-format to runner.py. --- docs/reference/port-configuration.md | 2 +- nemoclaw-blueprint/orchestrator/runner.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/reference/port-configuration.md b/docs/reference/port-configuration.md index 47a87164a..450b49914 100644 --- a/docs/reference/port-configuration.md +++ b/docs/reference/port-configuration.md @@ -18,7 +18,7 @@ status: published SPDX-License-Identifier: Apache-2.0 --> -# Port Configuration +# NemoClaw Port Configuration NemoClaw uses four network ports. All ports are configurable through environment variables or a `.env` file at the project root. diff --git a/nemoclaw-blueprint/orchestrator/runner.py b/nemoclaw-blueprint/orchestrator/runner.py index 7b9a53902..27feae496 100644 --- a/nemoclaw-blueprint/orchestrator/runner.py +++ b/nemoclaw-blueprint/orchestrator/runner.py @@ -97,9 +97,7 @@ def _apply_port_overrides(bp: dict[str, Any]) -> None: if key in profiles: old_endpoint = profiles[key].get("endpoint", "") # Replace the port in endpoint URL (matches any numeric port) - profiles[key]["endpoint"] = re.sub( - r":\d+(/|$)", f":{vllm_port}\\1", old_endpoint - ) + profiles[key]["endpoint"] = re.sub(r":\d+(/|$)", f":{vllm_port}\\1", old_endpoint) # Policy addition: nim_service port additions = components.get("policy", {}).get("additions", {}) From 14170ce842eee2c7af721d20f972fecc10edade9 Mon Sep 17 00:00:00 2001 From: Jason Nunnelley Date: Sat, 21 Mar 2026 15:33:30 -0500 Subject: [PATCH 03/12] Update README.md Remove empty line for stylecheck. Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 04f361266..56b060ec7 100644 --- a/README.md +++ b/README.md @@ -184,11 +184,10 @@ Or create a `.env` file at the project root (see `.env.example`). > **Note** > > Changing the dashboard port requires rebuilding the sandbox image because the CORS origin is baked in at build time. Re-run `nemoclaw onboard` after changing `NEMOCLAW_DASHBOARD_PORT`. - +> > **Network exposure** > > When using local inference (Ollama or vLLM), the inference service binds to `0.0.0.0` so that containers can reach it via `host.openshell.internal`. This means the service is reachable from your local network, not just localhost. This is required for the sandbox architecture but should be considered in shared or untrusted network environments. - --- ## How It Works From c9680c69fe476fb0e3166195ed83c564d03b5592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aaron=20Erickson=20=F0=9F=A6=9E?= Date: Sat, 21 Mar 2026 12:37:50 -0700 Subject: [PATCH 04/12] fix(security): command injection across all CLI entry points (#584) * security: fix command injection across all CLI entry points Comprehensive fix for shell injection vulnerabilities where user input (instance names, sandbox names, model names, API keys) was interpolated unsanitized into shell commands via run()/runInteractive()/execSync(). Changes: - Add shellQuote() and validateName() to runner.js as shared utilities - Replace all execSync() with execFileSync() in deploy (no shell) - Apply shellQuote() to every user-controlled variable in shell commands across nemoclaw.js, onboard.js, nim.js, policies.js - Add RFC 1123 name validation at CLI dispatch for sandbox/instance names - Fix path traversal in policies.js loadPreset() - Replace predictable temp file with mkdtempSync() - Remove duplicate shellQuote() definitions (now single source in runner.js) - 9 new test cases for shellQuote, validateName, and path traversal Supersedes #55, #110, #475, #540, #48, #171. * fix: deduplicate shellQuote in local-inference.js Import shellQuote from runner.js instead of defining a local copy. Single source of truth for shell quoting across the codebase. * security: fix telegram bridge injection + add regression guards Telegram bridge: - Replace execSync with execFileSync for ssh-config retrieval - shellQuote message, API key, and session ID in remote command - Validate SANDBOX_NAME at startup - Use mkdtempSync for temp SSH config (not predictable path) Regression tests: - nemoclaw.js must not use execSync - Single shellQuote definition in bin/ - CLI rejects malicious sandbox names (e2e, no mocking) - telegram-bridge.js validates SANDBOX_NAME and avoids execSync * security: address CodeRabbit review findings on shell injection PR - Shell-quote secret values written to .env before remote source - Wrap scp upload in try/finally to guarantee temp secret cleanup - Shell-quote CHAT_UI_URL and NVIDIA_API_KEY env args in onboard - Replace predictable Date.now() temp path with mkdtempSync in policies - Strengthen e2e test with canary file to prove payload never executes - Fix merge-introduced test expectations for shellQuote single-quote format --- bin/lib/local-inference.js | 5 +- bin/lib/nim.js | 21 +++--- bin/lib/onboard.js | 18 ++--- bin/lib/policies.js | 20 ++++-- bin/lib/runner.js | 29 +++++++- bin/nemoclaw.js | 68 ++++++++++-------- scripts/telegram-bridge.js | 23 +++--- test/onboard-readiness.test.js | 8 +-- test/policies.test.js | 21 +++--- test/runner.test.js | 126 +++++++++++++++++++++++++++++++++ 10 files changed, 257 insertions(+), 82 deletions(-) diff --git a/bin/lib/local-inference.js b/bin/lib/local-inference.js index f3a2ad76f..07f266d19 100644 --- a/bin/lib/local-inference.js +++ b/bin/lib/local-inference.js @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 const { VLLM_PORT, OLLAMA_PORT } = require("./ports"); +const { shellQuote } = require("./runner"); const HOST_GATEWAY_URL = "http://host.openshell.internal"; const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1"; @@ -116,10 +117,6 @@ function getDefaultOllamaModel(runCapture) { return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0]; } -function shellQuote(value) { - return `'${String(value).replace(/'/g, `'\\''`)}'`; -} - function getOllamaWarmupCommand(model, keepAlive = "15m") { const payload = JSON.stringify({ model, diff --git a/bin/lib/nim.js b/bin/lib/nim.js index f94b60d40..ec0e4bd27 100644 --- a/bin/lib/nim.js +++ b/bin/lib/nim.js @@ -3,7 +3,7 @@ // // NIM container management — pull, start, stop, health-check NIM images. -const { run, runCapture } = require("./runner"); +const { run, runCapture, shellQuote } = require("./runner"); const { VLLM_PORT } = require("./ports"); const nimImages = require("./nim-images.json"); @@ -122,7 +122,7 @@ function pullNimImage(model) { process.exit(1); } console.log(` Pulling NIM image: ${image}`); - run(`docker pull ${image}`); + run(`docker pull ${shellQuote(image)}`); return image; } @@ -135,12 +135,13 @@ function startNimContainer(sandboxName, model, port = VLLM_PORT) { } // Stop any existing container with same name - run(`docker rm -f ${name} 2>/dev/null || true`, { ignoreError: true }); + const qn = shellQuote(name); + run(`docker rm -f ${qn} 2>/dev/null || true`, { ignoreError: true }); console.log(` Starting NIM container: ${name}`); run( // Right-hand :8000 is the NIM image's internal port — fixed by the image, not configurable. - `docker run -d --gpus all -p ${port}:8000 --name ${name} --shm-size 16g ${image}` + `docker run -d --gpus all -p ${Number(port)}:8000 --name ${qn} --shm-size 16g ${shellQuote(image)}` ); return name; } @@ -148,11 +149,12 @@ function startNimContainer(sandboxName, model, port = VLLM_PORT) { function waitForNimHealth(port = VLLM_PORT, timeout = 300) { const start = Date.now(); const interval = 5000; - console.log(` Waiting for NIM health on port ${port} (timeout: ${timeout}s)...`); + const safePort = Number(port); + console.log(` Waiting for NIM health on port ${safePort} (timeout: ${timeout}s)...`); while ((Date.now() - start) / 1000 < timeout) { try { - const result = runCapture(`curl -sf http://localhost:${port}/v1/models`, { + const result = runCapture(`curl -sf http://localhost:${safePort}/v1/models`, { ignoreError: true, }); if (result) { @@ -169,16 +171,17 @@ function waitForNimHealth(port = VLLM_PORT, timeout = 300) { function stopNimContainer(sandboxName) { const name = containerName(sandboxName); + const qn = shellQuote(name); console.log(` Stopping NIM container: ${name}`); - run(`docker stop ${name} 2>/dev/null || true`, { ignoreError: true }); - run(`docker rm ${name} 2>/dev/null || true`, { ignoreError: true }); + run(`docker stop ${qn} 2>/dev/null || true`, { ignoreError: true }); + run(`docker rm ${qn} 2>/dev/null || true`, { ignoreError: true }); } function nimStatus(sandboxName, port = VLLM_PORT) { const name = containerName(sandboxName); try { const state = runCapture( - `docker inspect --format '{{.State.Status}}' ${name} 2>/dev/null`, + `docker inspect --format '{{.State.Status}}' ${shellQuote(name)} 2>/dev/null`, { ignoreError: true } ); if (!state) return { running: false, container: name }; diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index bb704aa31..4866ba9d4 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -8,7 +8,7 @@ const fs = require("fs"); const os = require("os"); const path = require("path"); -const { ROOT, SCRIPTS, run, runCapture } = require("./runner"); +const { ROOT, SCRIPTS, run, runCapture, shellQuote } = require("./runner"); const { DASHBOARD_PORT, GATEWAY_PORT, VLLM_PORT, OLLAMA_PORT } = require("./ports"); const { getDefaultOllamaModel, @@ -85,10 +85,6 @@ function step(n, total, msg) { console.log(` ${"─".repeat(50)}`); } -function shellQuote(value) { - return `'${String(value).replace(/'/g, `'\\''`)}'`; -} - function getInstalledOpenshellVersion(versionOutput = null) { const output = String(versionOutput ?? runCapture("openshell -V", { ignoreError: true })).trim(); const match = output.match(/openshell\s+([0-9]+\.[0-9]+\.[0-9]+)/i); @@ -452,12 +448,12 @@ async function createSandbox(gpu) { console.log(` Creating sandbox '${sandboxName}' (this takes a few minutes on first run)...`); const envArgs = [ - `CHAT_UI_URL=${chatUiUrl}`, + `CHAT_UI_URL=${shellQuote(chatUiUrl)}`, `NEMOCLAW_DASHBOARD_PORT=${DASHBOARD_PORT}`, `PUBLIC_PORT=${DASHBOARD_PORT}`, ]; if (process.env.NVIDIA_API_KEY) { - envArgs.push(`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`); + envArgs.push(`NVIDIA_API_KEY=${shellQuote(process.env.NVIDIA_API_KEY)}`); } // Run without piping through awk — the pipe masked non-zero exit codes @@ -746,12 +742,12 @@ async function setupInference(sandboxName, model, provider) { // Create nvidia-nim provider run( `openshell provider create --name nvidia-nim --type openai ` + - `--credential "NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}" ` + + `--credential ${shellQuote("NVIDIA_API_KEY=" + process.env.NVIDIA_API_KEY)} ` + `--config "OPENAI_BASE_URL=https://integrate.api.nvidia.com/v1" 2>&1 || true`, { ignoreError: true } ); run( - `openshell inference set --no-verify --provider nvidia-nim --model ${model} 2>/dev/null || true`, + `openshell inference set --no-verify --provider nvidia-nim --model ${shellQuote(model)} 2>/dev/null || true`, { ignoreError: true } ); } else if (provider === "vllm-local") { @@ -770,7 +766,7 @@ async function setupInference(sandboxName, model, provider) { { ignoreError: true } ); run( - `openshell inference set --no-verify --provider vllm-local --model ${model} 2>/dev/null || true`, + `openshell inference set --no-verify --provider vllm-local --model ${shellQuote(model)} 2>/dev/null || true`, { ignoreError: true } ); } else if (provider === "ollama-local") { @@ -790,7 +786,7 @@ async function setupInference(sandboxName, model, provider) { { ignoreError: true } ); run( - `openshell inference set --no-verify --provider ollama-local --model ${model} 2>/dev/null || true`, + `openshell inference set --no-verify --provider ollama-local --model ${shellQuote(model)} 2>/dev/null || true`, { ignoreError: true } ); console.log(` Priming Ollama model: ${model}`); diff --git a/bin/lib/policies.js b/bin/lib/policies.js index 80034ee16..240294bda 100644 --- a/bin/lib/policies.js +++ b/bin/lib/policies.js @@ -6,7 +6,7 @@ const fs = require("fs"); const path = require("path"); const os = require("os"); -const { ROOT, run, runCapture } = require("./runner"); +const { ROOT, run, runCapture, shellQuote } = require("./runner"); const registry = require("./registry"); const PRESETS_DIR = path.join(ROOT, "nemoclaw-blueprint", "policies", "presets"); @@ -29,7 +29,11 @@ function listPresets() { } function loadPreset(name) { - const file = path.join(PRESETS_DIR, `${name}.yaml`); + const file = path.resolve(PRESETS_DIR, `${name}.yaml`); + if (!file.startsWith(PRESETS_DIR + path.sep) && file !== PRESETS_DIR) { + console.error(` Invalid preset name: ${name}`); + return null; + } if (!fs.existsSync(file)) { console.error(` Preset not found: ${name}`); return null; @@ -73,14 +77,14 @@ function parseCurrentPolicy(raw) { * Build the openshell policy set command with properly quoted arguments. */ function buildPolicySetCommand(policyFile, sandboxName) { - return `openshell policy set --policy "${policyFile}" --wait "${sandboxName}"`; + return `openshell policy set --policy ${shellQuote(policyFile)} --wait ${shellQuote(sandboxName)}`; } /** * Build the openshell policy get command with properly quoted arguments. */ function buildPolicyGetCommand(sandboxName) { - return `openshell policy get --full "${sandboxName}" 2>/dev/null`; + return `openshell policy get --full ${shellQuote(sandboxName)} 2>/dev/null`; } function applyPreset(sandboxName, presetName) { @@ -166,15 +170,17 @@ function applyPreset(sandboxName, presetName) { } // Write temp file and apply - const tmpFile = path.join(os.tmpdir(), `nemoclaw-policy-${Date.now()}.yaml`); - fs.writeFileSync(tmpFile, merged, "utf-8"); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-policy-")); + const tmpFile = path.join(tmpDir, "policy.yaml"); + fs.writeFileSync(tmpFile, merged, { encoding: "utf-8", mode: 0o600 }); try { run(buildPolicySetCommand(tmpFile, sandboxName)); console.log(` Applied preset: ${presetName}`); } finally { - fs.unlinkSync(tmpFile); + try { fs.unlinkSync(tmpFile); } catch {} + try { fs.rmdirSync(tmpDir); } catch {} } // Update registry diff --git a/bin/lib/runner.js b/bin/lib/runner.js index e643394f0..d0ca4ceea 100644 --- a/bin/lib/runner.js +++ b/bin/lib/runner.js @@ -58,4 +58,31 @@ function runCapture(cmd, opts = {}) { } } -module.exports = { ROOT, SCRIPTS, run, runCapture, runInteractive }; +/** + * Shell-quote a value for safe interpolation into bash -c strings. + * Wraps in single quotes and escapes embedded single quotes. + */ +function shellQuote(value) { + return `'${String(value).replace(/'/g, `'\\''`)}'`; +} + +/** + * Validate a name (sandbox, instance, container) against RFC 1123 label rules. + * Rejects shell metacharacters, path traversal, and empty/overlength names. + */ +function validateName(name, label = "name") { + if (!name || typeof name !== "string") { + throw new Error(`${label} is required`); + } + if (name.length > 63) { + throw new Error(`${label} too long (max 63 chars): '${name.slice(0, 20)}...'`); + } + if (!/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(name)) { + throw new Error( + `Invalid ${label}: '${name}'. Must be lowercase alphanumeric with optional internal hyphens.` + ); + } + return name; +} + +module.exports = { ROOT, SCRIPTS, run, runCapture, runInteractive, shellQuote, validateName }; diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index 35107fdc8..436e52c2f 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -2,7 +2,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -const { execSync, spawnSync } = require("child_process"); +const { execFileSync, spawnSync } = require("child_process"); const path = require("path"); const fs = require("fs"); const os = require("os"); @@ -10,7 +10,7 @@ const os = require("os"); // Load .env files before any module reads process.env (e.g. ports.js) require("./lib/env"); -const { ROOT, SCRIPTS, run, runCapture, runInteractive } = require("./lib/runner"); +const { ROOT, SCRIPTS, run, runCapture, runInteractive, shellQuote, validateName } = require("./lib/runner"); const { ensureApiKey, ensureGithubToken, @@ -32,10 +32,6 @@ const GLOBAL_COMMANDS = new Set([ const REMOTE_UNINSTALL_URL = "https://raw.githubusercontent.com/NVIDIA/NemoClaw/refs/heads/main/uninstall.sh"; -function shellQuote(value) { - return `'${String(value).replace(/'/g, `'\\''`)}'`; -} - function resolveUninstallScript() { const candidates = [ path.join(ROOT, "uninstall.sh"), @@ -87,12 +83,12 @@ async function setup() { await ensureApiKey(); const { defaultSandbox } = registry.listSandboxes(); const safeName = defaultSandbox && /^[a-z0-9][a-z0-9-]*[a-z0-9]$/.test(defaultSandbox) ? defaultSandbox : ""; - run(`bash "${SCRIPTS}/setup.sh" ${safeName}`); + run(`bash "${SCRIPTS}/setup.sh" ${shellQuote(safeName)}`); } async function setupSpark() { await ensureApiKey(); - run(`sudo -E NVIDIA_API_KEY="${process.env.NVIDIA_API_KEY}" bash "${SCRIPTS}/setup-spark.sh"`); + run(`sudo -E NVIDIA_API_KEY=${shellQuote(process.env.NVIDIA_API_KEY)} bash "${SCRIPTS}/setup-spark.sh"`); } async function deploy(instanceName) { @@ -109,7 +105,9 @@ async function deploy(instanceName) { if (isRepoPrivate("NVIDIA/OpenShell")) { await ensureGithubToken(); } + validateName(instanceName, "instance name"); const name = instanceName; + const qname = shellQuote(name); const gpu = process.env.NEMOCLAW_GPU || "a2-highgpu-1g:nvidia-tesla-a100:1"; console.log(""); @@ -117,7 +115,7 @@ async function deploy(instanceName) { console.log(""); try { - execSync("which brev", { stdio: "ignore" }); + execFileSync("which", ["brev"], { stdio: "ignore" }); } catch { console.error("brev CLI not found. Install: https://brev.nvidia.com"); process.exit(1); @@ -125,13 +123,16 @@ async function deploy(instanceName) { let exists = false; try { - const out = execSync("brev ls 2>&1", { encoding: "utf-8" }); + const out = execFileSync("brev", ["ls"], { encoding: "utf-8" }); exists = out.includes(name); - } catch {} + } catch (err) { + if (err.stdout && err.stdout.includes(name)) exists = true; + if (err.stderr && err.stderr.includes(name)) exists = true; + } if (!exists) { console.log(` Creating Brev instance '${name}' (${gpu})...`); - run(`brev create ${name} --gpu "${gpu}"`); + run(`brev create ${qname} --gpu ${shellQuote(gpu)}`); } else { console.log(` Brev instance '${name}' already exists.`); } @@ -141,7 +142,7 @@ async function deploy(instanceName) { console.log(" Waiting for SSH..."); for (let i = 0; i < 60; i++) { try { - execSync(`ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ${name} 'echo ok' 2>/dev/null`, { encoding: "utf-8", stdio: "pipe" }); + execFileSync("ssh", ["-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no", name, "echo", "ok"], { encoding: "utf-8", stdio: "ignore" }); break; } catch { if (i === 59) { @@ -153,38 +154,43 @@ async function deploy(instanceName) { } console.log(" Syncing NemoClaw to VM..."); - run(`ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR ${name} 'mkdir -p /home/ubuntu/nemoclaw'`); - run(`rsync -az --delete --exclude node_modules --exclude .git --exclude src -e "ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR" "${ROOT}/scripts" "${ROOT}/Dockerfile" "${ROOT}/nemoclaw" "${ROOT}/nemoclaw-blueprint" "${ROOT}/bin" "${ROOT}/package.json" ${name}:/home/ubuntu/nemoclaw/`); + run(`ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'mkdir -p /home/ubuntu/nemoclaw'`); + run(`rsync -az --delete --exclude node_modules --exclude .git --exclude src -e "ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR" "${ROOT}/scripts" "${ROOT}/Dockerfile" "${ROOT}/nemoclaw" "${ROOT}/nemoclaw-blueprint" "${ROOT}/bin" "${ROOT}/package.json" ${qname}:/home/ubuntu/nemoclaw/`); - const envLines = [`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`]; + const envLines = [`NVIDIA_API_KEY=${shellQuote(process.env.NVIDIA_API_KEY || "")}`]; const ghToken = process.env.GITHUB_TOKEN; - if (ghToken) envLines.push(`GITHUB_TOKEN=${ghToken}`); + if (ghToken) envLines.push(`GITHUB_TOKEN=${shellQuote(ghToken)}`); const tgToken = getCredential("TELEGRAM_BOT_TOKEN"); - if (tgToken) envLines.push(`TELEGRAM_BOT_TOKEN=${tgToken}`); - const envTmp = path.join(os.tmpdir(), `nemoclaw-env-${Date.now()}`); + if (tgToken) envLines.push(`TELEGRAM_BOT_TOKEN=${shellQuote(tgToken)}`); + const envDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-env-")); + const envTmp = path.join(envDir, "env"); fs.writeFileSync(envTmp, envLines.join("\n") + "\n", { mode: 0o600 }); - run(`scp -q -o StrictHostKeyChecking=no -o LogLevel=ERROR "${envTmp}" ${name}:/home/ubuntu/nemoclaw/.env`); - fs.unlinkSync(envTmp); + try { + run(`scp -q -o StrictHostKeyChecking=no -o LogLevel=ERROR ${shellQuote(envTmp)} ${qname}:/home/ubuntu/nemoclaw/.env`); + } finally { + try { fs.unlinkSync(envTmp); } catch {} + try { fs.rmdirSync(envDir); } catch {} + } console.log(" Running setup..."); - runInteractive(`ssh -t -o StrictHostKeyChecking=no -o LogLevel=ERROR ${name} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && bash scripts/brev-setup.sh'`); + runInteractive(`ssh -t -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && bash scripts/brev-setup.sh'`); if (tgToken) { console.log(" Starting services..."); - run(`ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR ${name} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && bash scripts/start-services.sh'`); + run(`ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && bash scripts/start-services.sh'`); } console.log(""); console.log(" Connecting to sandbox..."); console.log(""); - runInteractive(`ssh -t -o StrictHostKeyChecking=no -o LogLevel=ERROR ${name} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && openshell sandbox connect nemoclaw'`); + runInteractive(`ssh -t -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && openshell sandbox connect nemoclaw'`); } async function start() { await ensureApiKey(); const { defaultSandbox } = registry.listSandboxes(); const safeName = defaultSandbox && /^[a-zA-Z0-9._-]+$/.test(defaultSandbox) ? defaultSandbox : null; - const sandboxEnv = safeName ? `SANDBOX_NAME="${safeName}"` : ""; + const sandboxEnv = safeName ? `SANDBOX_NAME=${shellQuote(safeName)}` : ""; run(`${sandboxEnv} bash "${SCRIPTS}/start-services.sh"`); } @@ -275,9 +281,10 @@ function listSandboxes() { // ── Sandbox-scoped actions ─────────────────────────────────────── function sandboxConnect(sandboxName) { + const qn = shellQuote(sandboxName); // Ensure port forward is alive before connecting - run(`openshell forward start --background ${DASHBOARD_PORT} "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); - runInteractive(`openshell sandbox connect "${sandboxName}"`); + run(`openshell forward start --background ${DASHBOARD_PORT} ${qn} 2>/dev/null || true`, { ignoreError: true }); + runInteractive(`openshell sandbox connect ${qn}`); } function sandboxStatus(sandboxName) { @@ -292,7 +299,7 @@ function sandboxStatus(sandboxName) { } // openshell info - run(`openshell sandbox get "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); + run(`openshell sandbox get ${shellQuote(sandboxName)} 2>/dev/null || true`, { ignoreError: true }); // NIM health const nimStat = nim.nimStatus(sandboxName); @@ -305,7 +312,7 @@ function sandboxStatus(sandboxName) { function sandboxLogs(sandboxName, follow) { const followFlag = follow ? " --tail" : ""; - run(`openshell logs "${sandboxName}"${followFlag}`); + run(`openshell logs ${shellQuote(sandboxName)}${followFlag}`); } async function sandboxPolicyAdd(sandboxName) { @@ -348,7 +355,7 @@ function sandboxDestroy(sandboxName) { nim.stopNimContainer(sandboxName); console.log(` Deleting sandbox '${sandboxName}'...`); - run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true }); + run(`openshell sandbox delete ${shellQuote(sandboxName)} 2>/dev/null || true`, { ignoreError: true }); registry.removeSandbox(sandboxName); console.log(` ✓ Sandbox '${sandboxName}' destroyed`); @@ -433,6 +440,7 @@ const [cmd, ...args] = process.argv.slice(2); // Sandbox-scoped commands: nemoclaw const sandbox = registry.getSandbox(cmd); if (sandbox) { + validateName(cmd, "sandbox name"); const action = args[0] || "connect"; const actionArgs = args.slice(1); diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js index 80a29069d..e885b09f2 100755 --- a/scripts/telegram-bridge.js +++ b/scripts/telegram-bridge.js @@ -17,8 +17,10 @@ */ const https = require("https"); -const { execSync, spawn } = require("child_process"); +const { execFileSync, spawn } = require("child_process"); +const crypto = require("crypto"); const { resolveOpenshell } = require("../bin/lib/resolve-openshell"); +const { shellQuote, validateName } = require("../bin/lib/runner"); const OPENSHELL = resolveOpenshell(); if (!OPENSHELL) { @@ -29,6 +31,7 @@ if (!OPENSHELL) { const TOKEN = process.env.TELEGRAM_BOT_TOKEN; const API_KEY = process.env.NVIDIA_API_KEY; const SANDBOX = process.env.SANDBOX_NAME || "nemoclaw"; +try { validateName(SANDBOX, "SANDBOX_NAME"); } catch (e) { console.error(e.message); process.exit(1); } const ALLOWED_CHATS = process.env.ALLOWED_CHAT_IDS ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim()) : null; @@ -92,14 +95,18 @@ async function sendTyping(chatId) { function runAgentInSandbox(message, sessionId) { return new Promise((resolve) => { - const sshConfig = execSync(`"${OPENSHELL}" sandbox ssh-config "${SANDBOX}"`, { encoding: "utf-8" }); + const sshConfig = execFileSync(OPENSHELL, ["sandbox", "ssh-config", SANDBOX], { encoding: "utf-8" }); - // Write temp ssh config - const confPath = `/tmp/nemoclaw-tg-ssh-${sessionId}.conf`; - require("fs").writeFileSync(confPath, sshConfig); + // Write temp ssh config with unpredictable name + const confDir = require("fs").mkdtempSync("/tmp/nemoclaw-tg-ssh-"); + const confPath = `${confDir}/config`; + require("fs").writeFileSync(confPath, sshConfig, { mode: 0o600 }); - const escaped = message.replace(/'/g, "'\\''"); - const cmd = `export NVIDIA_API_KEY='${API_KEY}' && nemoclaw-start openclaw agent --agent main --local -m '${escaped}' --session-id 'tg-${sessionId}'`; + // Pass message and API key via stdin to avoid shell interpolation. + // The remote command reads them from environment/stdin rather than + // embedding user content in a shell string. + const safeSessionId = String(sessionId).replace(/[^a-zA-Z0-9-]/g, ""); + const cmd = `export NVIDIA_API_KEY=${shellQuote(API_KEY)} && nemoclaw-start openclaw agent --agent main --local -m ${shellQuote(message)} --session-id ${shellQuote("tg-" + safeSessionId)}`; const proc = spawn("ssh", ["-T", "-F", confPath, `openshell-${SANDBOX}`, cmd], { timeout: 120000, @@ -113,7 +120,7 @@ function runAgentInSandbox(message, sessionId) { proc.stderr.on("data", (d) => (stderr += d.toString())); proc.on("close", (code) => { - try { require("fs").unlinkSync(confPath); } catch {} + try { require("fs").unlinkSync(confPath); require("fs").rmdirSync(confDir); } catch {} // Extract the actual agent response — skip setup lines const lines = stdout.split("\n"); diff --git a/test/onboard-readiness.test.js b/test/onboard-readiness.test.js index 4eda74a57..13a44817f 100644 --- a/test/onboard-readiness.test.js +++ b/test/onboard-readiness.test.js @@ -76,25 +76,25 @@ describe("sandbox readiness parsing", () => { describe("WSL sandbox name handling", () => { it("buildPolicySetCommand preserves hyphenated sandbox name", () => { const cmd = buildPolicySetCommand("/tmp/policy.yaml", "my-assistant"); - assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`); + assert.ok(cmd.includes("'my-assistant'"), `Expected quoted name in: ${cmd}`); assert.ok(!cmd.includes(' my-assistant '), "Name must be quoted, not bare"); }); it("buildPolicyGetCommand preserves hyphenated sandbox name", () => { const cmd = buildPolicyGetCommand("my-assistant"); - assert.ok(cmd.includes('"my-assistant"'), `Expected quoted name in: ${cmd}`); + assert.ok(cmd.includes("'my-assistant'"), `Expected quoted name in: ${cmd}`); }); it("buildPolicySetCommand preserves multi-hyphen names", () => { const cmd = buildPolicySetCommand("/tmp/p.yaml", "my-dev-assistant-v2"); - assert.ok(cmd.includes('"my-dev-assistant-v2"')); + assert.ok(cmd.includes("'my-dev-assistant-v2'")); }); it("buildPolicySetCommand preserves single-char name", () => { // If WSL truncates "my-assistant" to "m", the single-char name should // still be quoted and passed through unchanged const cmd = buildPolicySetCommand("/tmp/p.yaml", "m"); - assert.ok(cmd.includes('"m"')); + assert.ok(cmd.includes("'m'")); }); it("applyPreset rejects truncated/invalid sandbox name", () => { diff --git a/test/policies.test.js b/test/policies.test.js index ec1a02121..040910bb7 100644 --- a/test/policies.test.js +++ b/test/policies.test.js @@ -38,6 +38,11 @@ describe("policies", () => { it("returns null for nonexistent preset", () => { assert.equal(policies.loadPreset("nonexistent"), null); }); + + it("rejects path traversal attempts", () => { + assert.equal(policies.loadPreset("../../etc/passwd"), null); + assert.equal(policies.loadPreset("../../../etc/shadow"), null); + }); }); describe("getPresetEndpoints", () => { @@ -66,28 +71,28 @@ describe("policies", () => { }); describe("buildPolicySetCommand", () => { - it("quotes sandbox name to prevent argument splitting", () => { + it("shell-quotes sandbox name to prevent injection", () => { const cmd = policies.buildPolicySetCommand("/tmp/policy.yaml", "my-assistant"); - assert.equal(cmd, 'openshell policy set --policy "/tmp/policy.yaml" --wait "my-assistant"'); + assert.equal(cmd, "openshell policy set --policy '/tmp/policy.yaml' --wait 'my-assistant'"); }); - it("handles sandbox names with spaces", () => { - const cmd = policies.buildPolicySetCommand("/tmp/policy.yaml", "my sandbox"); - assert.ok(cmd.includes('"my sandbox"'), "sandbox name must be quoted"); + it("escapes shell metacharacters in sandbox name", () => { + const cmd = policies.buildPolicySetCommand("/tmp/policy.yaml", "test; whoami"); + assert.ok(cmd.includes("'test; whoami'"), "metacharacters must be shell-quoted"); }); it("places --wait before the sandbox name", () => { const cmd = policies.buildPolicySetCommand("/tmp/policy.yaml", "test-box"); const waitIdx = cmd.indexOf("--wait"); - const nameIdx = cmd.indexOf('"test-box"'); + const nameIdx = cmd.indexOf("'test-box'"); assert.ok(waitIdx < nameIdx, "--wait must come before sandbox name"); }); }); describe("buildPolicyGetCommand", () => { - it("quotes sandbox name", () => { + it("shell-quotes sandbox name", () => { const cmd = policies.buildPolicyGetCommand("my-assistant"); - assert.equal(cmd, 'openshell policy get --full "my-assistant" 2>/dev/null'); + assert.equal(cmd, "openshell policy get --full 'my-assistant' 2>/dev/null"); }); }); diff --git a/test/runner.test.js b/test/runner.test.js index 024b564f6..ffe064fc0 100644 --- a/test/runner.test.js +++ b/test/runner.test.js @@ -81,4 +81,130 @@ describe("runner helpers", () => { assert.equal(calls[0][2].env.OPENSHELL_CLUSTER_IMAGE, "ghcr.io/nvidia/openshell/cluster:0.0.12"); assert.equal(calls[0][2].env.PATH, "/usr/local/bin:/usr/bin"); }); + + describe("shellQuote", () => { + it("wraps in single quotes", () => { + const { shellQuote } = require(runnerPath); + assert.equal(shellQuote("hello"), "'hello'"); + }); + + it("escapes embedded single quotes", () => { + const { shellQuote } = require(runnerPath); + assert.equal(shellQuote("it's"), "'it'\\''s'"); + }); + + it("neutralizes shell metacharacters", () => { + const { shellQuote } = require(runnerPath); + const dangerous = "test; rm -rf /"; + const quoted = shellQuote(dangerous); + assert.equal(quoted, "'test; rm -rf /'"); + const result = spawnSync("bash", ["-c", `echo ${quoted}`], { encoding: "utf-8" }); + assert.equal(result.stdout.trim(), dangerous); + }); + + it("handles backticks and dollar signs", () => { + const { shellQuote } = require(runnerPath); + const payload = "test`whoami`$HOME"; + const quoted = shellQuote(payload); + const result = spawnSync("bash", ["-c", `echo ${quoted}`], { encoding: "utf-8" }); + assert.equal(result.stdout.trim(), payload); + }); + }); + + describe("validateName", () => { + it("accepts valid RFC 1123 names", () => { + const { validateName } = require(runnerPath); + assert.equal(validateName("my-sandbox"), "my-sandbox"); + assert.equal(validateName("test123"), "test123"); + assert.equal(validateName("a"), "a"); + }); + + it("rejects names with shell metacharacters", () => { + const { validateName } = require(runnerPath); + assert.throws(() => validateName("test; whoami"), /Invalid/); + assert.throws(() => validateName("test`id`"), /Invalid/); + assert.throws(() => validateName("test$(cat /etc/passwd)"), /Invalid/); + assert.throws(() => validateName("../etc/passwd"), /Invalid/); + }); + + it("rejects empty and overlength names", () => { + const { validateName } = require(runnerPath); + assert.throws(() => validateName(""), /required/); + assert.throws(() => validateName(null), /required/); + assert.throws(() => validateName("a".repeat(64)), /too long/); + }); + + it("rejects uppercase and special characters", () => { + const { validateName } = require(runnerPath); + assert.throws(() => validateName("MyBox"), /Invalid/); + assert.throws(() => validateName("my_box"), /Invalid/); + assert.throws(() => validateName("-leading"), /Invalid/); + assert.throws(() => validateName("trailing-"), /Invalid/); + }); + }); + + describe("regression guards", () => { + it("nemoclaw.js does not use execSync", () => { + const fs = require("fs"); + const src = fs.readFileSync(path.join(__dirname, "..", "bin", "nemoclaw.js"), "utf-8"); + const lines = src.split("\n"); + for (let i = 0; i < lines.length; i++) { + if (lines[i].includes("execSync") && !lines[i].includes("execFileSync")) { + assert.fail(`bin/nemoclaw.js:${i + 1} uses execSync — use execFileSync instead`); + } + } + }); + + it("no duplicate shellQuote definitions in bin/", () => { + const fs = require("fs"); + const binDir = path.join(__dirname, "..", "bin"); + const files = []; + function walk(dir) { + for (const f of fs.readdirSync(dir, { withFileTypes: true })) { + if (f.isDirectory() && f.name !== "node_modules") walk(path.join(dir, f.name)); + else if (f.name.endsWith(".js")) files.push(path.join(dir, f.name)); + } + } + walk(binDir); + + const defs = []; + for (const file of files) { + const src = fs.readFileSync(file, "utf-8"); + if (src.includes("function shellQuote")) { + defs.push(file.replace(binDir, "bin")); + } + } + assert.equal(defs.length, 1, `Expected 1 shellQuote definition, found ${defs.length}: ${defs.join(", ")}`); + assert.ok(defs[0].includes("runner"), `shellQuote should be in runner.js, found in ${defs[0]}`); + }); + + it("CLI rejects malicious sandbox names before shell commands (e2e)", () => { + const fs = require("fs"); + const os = require("os"); + const canaryDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-canary-")); + const canary = path.join(canaryDir, "executed"); + try { + const result = spawnSync("node", [ + path.join(__dirname, "..", "bin", "nemoclaw.js"), + `test; touch ${canary}`, + "connect", + ], { + encoding: "utf-8", + timeout: 10000, + cwd: path.join(__dirname, ".."), + }); + assert.notEqual(result.status, 0, "CLI should reject malicious sandbox name"); + assert.equal(fs.existsSync(canary), false, "shell payload must never execute"); + } finally { + fs.rmSync(canaryDir, { recursive: true, force: true }); + } + }); + + it("telegram bridge validates SANDBOX_NAME on startup", () => { + const fs = require("fs"); + const src = fs.readFileSync(path.join(__dirname, "..", "scripts", "telegram-bridge.js"), "utf-8"); + assert.ok(src.includes("validateName(SANDBOX"), "telegram-bridge.js must validate SANDBOX_NAME"); + assert.ok(!src.includes("execSync"), "telegram-bridge.js should not use execSync"); + }); + }); }); From ce7daaf820561ca70a7d406edc597c5b32e9ad8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aaron=20Erickson=20=F0=9F=A6=9E?= Date: Sat, 21 Mar 2026 12:50:56 -0700 Subject: [PATCH 05/12] docs: add community feedback invitation for policy presets (#600) * docs: add community feedback invitation for policy presets * docs: link baseline policy reference to the YAML file on GitHub * docs: apply suggestion from @naderkhalil --------- Co-authored-by: Carlos Villela --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 56b060ec7..8184f74b3 100644 --- a/README.md +++ b/README.md @@ -239,7 +239,7 @@ When the agent tries to reach an unlisted host, OpenShell blocks the request and ## Configuring Sandbox Policy The sandbox policy is defined in a declarative YAML file and enforced by the OpenShell runtime. -NemoClaw ships a strict baseline in `nemoclaw-blueprint/policies/openclaw-sandbox.yaml` that denies all network egress except explicitly listed endpoints. +NemoClaw ships a default policy in [`nemoclaw-blueprint/policies/openclaw-sandbox.yaml`](https://github.com/NVIDIA/NemoClaw/blob/main/nemoclaw-blueprint/policies/openclaw-sandbox.yaml) that denies all network egress except explicitly listed endpoints. Operators can customize the policy in two ways: @@ -250,6 +250,8 @@ Operators can customize the policy in two ways: NemoClaw includes preset policy files for common integrations such as PyPI, Docker Hub, Slack, and Jira in `nemoclaw-blueprint/policies/presets/`. Apply a preset as-is or use it as a starting template. +NemoClaw is an open project — we are still determining which presets to ship by default. If you have suggestions, please open an [issue](https://github.com/NVIDIA/NemoClaw/issues) or [discussion](https://github.com/NVIDIA/NemoClaw/discussions). + When the agent attempts to reach an endpoint not covered by the policy, OpenShell blocks the request and surfaces it in the TUI (`openshell term`) for the operator to approve or deny in real time. Approved endpoints persist for the current session only. For step-by-step instructions, see [Customize Network Policy](https://docs.nvidia.com/nemoclaw/latest/network-policy/customize-network-policy.html). For the underlying enforcement details, see the OpenShell [Policy Schema](https://docs.nvidia.com/openshell/latest/reference/policy-schema.html) and [Sandbox Policies](https://docs.nvidia.com/openshell/latest/sandboxes/policies.html) documentation. From 9fbc64449ba63ddc228ab418809f4c979d5e578b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aaron=20Erickson=20=F0=9F=A6=9E?= Date: Sat, 21 Mar 2026 13:06:56 -0700 Subject: [PATCH 06/12] docs: replace "strict baseline" with "default" for policy language (#602) --- README.md | 2 +- docs/about/how-it-works.md | 2 +- docs/reference/architecture.md | 2 +- nemoclaw-blueprint/policies/openclaw-sandbox.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8184f74b3..e649220d1 100644 --- a/README.md +++ b/README.md @@ -223,7 +223,7 @@ Local inference options such as Ollama and vLLM are still experimental. On macOS ## Protection Layers -The sandbox starts with a strict baseline policy that controls network egress and filesystem access: +The sandbox starts with a default policy that controls network egress and filesystem access: | Layer | What it protects | When it applies | |------------|-----------------------------------------------------|-----------------------------| diff --git a/docs/about/how-it-works.md b/docs/about/how-it-works.md index c966c75e7..10e70a1eb 100644 --- a/docs/about/how-it-works.md +++ b/docs/about/how-it-works.md @@ -117,7 +117,7 @@ NemoClaw routes inference to NVIDIA Endpoints, specifically Nemotron 3 Super 120 ## Network and Filesystem Policy -The sandbox starts with a strict baseline policy defined in `openclaw-sandbox.yaml`. +The sandbox starts with a default policy defined in `openclaw-sandbox.yaml`. This policy controls which network endpoints the agent can reach and which filesystem paths it can access. - For network, only endpoints listed in the policy are allowed. diff --git a/docs/reference/architecture.md b/docs/reference/architecture.md index 88d48cb57..74369d654 100644 --- a/docs/reference/architecture.md +++ b/docs/reference/architecture.md @@ -60,7 +60,7 @@ nemoclaw-blueprint/ ├── orchestrator/ │ └── runner.py CLI runner — plan / apply / status ├── policies/ -│ └── openclaw-sandbox.yaml Strict baseline network + filesystem policy +│ └── openclaw-sandbox.yaml Default network + filesystem policy ``` ### Blueprint Lifecycle diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml index b8acba476..92f0c50e3 100644 --- a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml +++ b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Strict baseline policy for the OpenClaw sandbox. +# Default policy for the OpenClaw sandbox. # Principle: deny by default, allow only what's needed for core functionality. # Dynamic updates (network_policies, inference) can be applied post-creation # via `openshell policy set`. Static fields are effectively creation-locked. From 35389ef32b07632b1677dabe259c35f71a9a24de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aaron=20Erickson=20=F0=9F=A6=9E?= Date: Sat, 21 Mar 2026 13:25:17 -0700 Subject: [PATCH 07/12] docs: rename strict policy tier to default (#603) * docs: rename strict policy tier to default * docs: remove remaining "strict" language from docs and comments --- docs/about/how-it-works.md | 2 +- docs/about/overview.md | 2 +- docs/reference/network-policies.md | 2 +- nemoclaw-blueprint/policies/openclaw-sandbox.yaml | 2 +- scripts/walkthrough.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/about/how-it-works.md b/docs/about/how-it-works.md index 10e70a1eb..a6f15625a 100644 --- a/docs/about/how-it-works.md +++ b/docs/about/how-it-works.md @@ -44,7 +44,7 @@ flowchart TB subgraph Sandbox["OpenShell Sandbox"] AGENT[OpenClaw agent] INF[NVIDIA inference, routed] - NET[strict network policy] + NET[default network policy] FS[filesystem isolation] AGENT --- INF diff --git a/docs/about/overview.md b/docs/about/overview.md index 0d4532517..23a49126e 100644 --- a/docs/about/overview.md +++ b/docs/about/overview.md @@ -32,7 +32,7 @@ By combining powerful open source models with built-in safety measures, NemoClaw | Capability | Description | |-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------| -| Sandbox OpenClaw | Creates an OpenShell sandbox pre-configured for OpenClaw, with strict filesystem and network policies applied from the first boot. | +| Sandbox OpenClaw | Creates an OpenShell sandbox pre-configured for OpenClaw, with filesystem and network policies applied from the first boot. | | Route inference | Configures OpenShell inference routing so agent traffic flows through cloud-hosted Nemotron 3 Super 120B via [build.nvidia.com](https://build.nvidia.com). | | Manage the lifecycle | Handles blueprint versioning, digest verification, and sandbox setup. | diff --git a/docs/reference/network-policies.md b/docs/reference/network-policies.md index f437088d5..bfbe74e20 100644 --- a/docs/reference/network-policies.md +++ b/docs/reference/network-policies.md @@ -20,7 +20,7 @@ status: published # Network Policies -NemoClaw runs with a strict-by-default network policy. +NemoClaw runs with a deny-by-default network policy. The sandbox can only reach endpoints that are explicitly allowed. Any request to an unlisted destination is intercepted by OpenShell, and the operator is prompted to approve or deny it in real time through the TUI. diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml index 92f0c50e3..b4d960cc9 100644 --- a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml +++ b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml @@ -7,7 +7,7 @@ # via `openshell policy set`. Static fields are effectively creation-locked. # # Policy tiers (future): -# strict — this file. Minimum for onboard + basic agent operation. +# default — this file. Minimum for onboard + basic agent operation. # relaxed — adds third-party model providers, broader web access. # # To add endpoints: update this file and re-run `nemoclaw onboard` diff --git a/scripts/walkthrough.sh b/scripts/walkthrough.sh index 8dcbf9239..b50e24b06 100755 --- a/scripts/walkthrough.sh +++ b/scripts/walkthrough.sh @@ -8,7 +8,7 @@ # LEFT: OpenClaw agent (chat) # RIGHT: OpenShell TUI (monitor + approve network egress) # -# The agent runs inside a sandboxed environment with a strict network +# The agent runs inside a sandboxed environment with a controlled network # policy. When it tries to access a service not in the allow list, # the TUI prompts the operator to approve or deny the request. # From 4718ac111dc74ce944b84416e961ad03336175f6 Mon Sep 17 00:00:00 2001 From: jnun Date: Sat, 21 Mar 2026 15:44:03 -0500 Subject: [PATCH 08/12] fix: address review findings for docstring coverage and blockquote style Add missing docstrings to runner.py (log, progress, emit_run_id, load_blueprint, main) to meet the 80% coverage threshold. Normalize README blockquote headings to match existing emoji + bold style. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 6 +++--- nemoclaw-blueprint/orchestrator/runner.py | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e649220d1..b9d1f68c7 100644 --- a/README.md +++ b/README.md @@ -181,11 +181,11 @@ nemoclaw onboard Or create a `.env` file at the project root (see `.env.example`). -> **Note** +> **ℹ️ Note** > > Changing the dashboard port requires rebuilding the sandbox image because the CORS origin is baked in at build time. Re-run `nemoclaw onboard` after changing `NEMOCLAW_DASHBOARD_PORT`. -> -> **Network exposure** + +> **⚠️ Network exposure** > > When using local inference (Ollama or vLLM), the inference service binds to `0.0.0.0` so that containers can reach it via `host.openshell.internal`. This means the service is reachable from your local network, not just localhost. This is required for the sandbox architecture but should be considered in shared or untrusted network environments. --- diff --git a/nemoclaw-blueprint/orchestrator/runner.py b/nemoclaw-blueprint/orchestrator/runner.py index 27feae496..31f2bc327 100644 --- a/nemoclaw-blueprint/orchestrator/runner.py +++ b/nemoclaw-blueprint/orchestrator/runner.py @@ -30,20 +30,24 @@ def log(msg: str) -> None: + """Print a message to stdout with immediate flush.""" print(msg, flush=True) def progress(pct: int, label: str) -> None: + """Emit a PROGRESS::