From 637d376e0a1054f1ced7a4f367da855e34369136 Mon Sep 17 00:00:00 2001 From: Andy Ratsirarson Date: Sat, 21 Mar 2026 22:49:03 -0700 Subject: [PATCH] feat(onboard): add custom OpenAI-compatible provider option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a "Custom OpenAI-compatible endpoint" option to the onboarding wizard, allowing users to bring any provider that exposes an OpenAI-compatible /v1/chat/completions endpoint (e.g. Google Gemini via AI Studio, OpenRouter, Together AI, LiteLLM). The custom provider follows the same gateway-routed architecture as existing providers: the sandbox talks to inference.local, and the OpenShell gateway proxies to the user's endpoint with credential injection and model rewriting. Non-NVIDIA endpoints may reject OpenAI-specific parameters like "store". Set supportsStore: false in the default openclaw.json model compat to prevent 400 rejections from strict endpoints. This is safe for all providers — NVIDIA and Ollama ignore the flag. Interactive mode prompts for base URL, API key, and model name. Non-interactive mode reads NEMOCLAW_CUSTOM_BASE_URL, NEMOCLAW_CUSTOM_API_KEY, and NEMOCLAW_MODEL. Tested with Google Gemini (gemini-2.5-flash) and local Ollama (llama3.2) to verify backward compatibility. --- Dockerfile | 2 +- README.md | 7 +- bin/lib/inference-config.js | 11 ++ bin/lib/onboard.js | 111 +++++++++++++++++-- docs/inference/switch-inference-providers.md | 38 +++++++ nemoclaw-blueprint/blueprint.yaml | 8 ++ test/inference-config.test.js | 32 ++++++ test/onboard-selection.test.js | 56 ++++++++++ 8 files changed, 255 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 04c23227d..fbcb7cbbb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -113,7 +113,7 @@ config = { \ 'baseUrl': 'https://inference.local/v1', \ 'apiKey': 'unused', \ 'api': 'openai-completions', \ - 'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \ + 'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096, 'compat': {'supportsStore': False}}] \ } \ }}, \ 'channels': {'defaults': {'configWrites': False}}, \ diff --git a/README.md b/README.md index 287ff0d82..786d3c000 100644 --- a/README.md +++ b/README.md @@ -179,13 +179,16 @@ When something goes wrong, errors may originate from either NemoClaw or the Open ## Inference -Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the NVIDIA Endpoint provider. +Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it through the gateway proxy. | Provider | Model | Use Case | |--------------|--------------------------------------|-------------------------------------------------| | NVIDIA Endpoint | `nvidia/nemotron-3-super-120b-a12b` | Production. Requires an NVIDIA API key. | +| Custom OpenAI-compatible | User-specified | Any provider with an OpenAI-compatible `/v1/chat/completions` endpoint. | -Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup. +For the NVIDIA endpoint, get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup. + +For custom providers, select "Custom OpenAI-compatible endpoint" during `nemoclaw onboard` and provide the base URL, API key, and model name. Any provider that exposes an OpenAI-compatible `/v1/chat/completions` endpoint will work. For non-interactive mode, set `NEMOCLAW_PROVIDER=custom`, `NEMOCLAW_CUSTOM_BASE_URL`, `NEMOCLAW_CUSTOM_API_KEY`, and `NEMOCLAW_MODEL`. Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host. diff --git a/bin/lib/inference-config.js b/bin/lib/inference-config.js index 0f6683dfd..e07a1c635 100644 --- a/bin/lib/inference-config.js +++ b/bin/lib/inference-config.js @@ -51,6 +51,17 @@ function getProviderSelectionConfig(provider, model) { provider, providerLabel: "Local Ollama", }; + case "custom": + return { + endpointType: "custom", + endpointUrl: INFERENCE_ROUTE_URL, + ncpPartner: null, + model: model || null, + profile: DEFAULT_ROUTE_PROFILE, + credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV, + provider, + providerLabel: "Custom Provider", + }; default: return null; } diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 2bbbda577..3b1833e4d 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -27,7 +27,7 @@ const { isUnsupportedMacosRuntime, shouldPatchCoredns, } = require("./platform"); -const { prompt, ensureApiKey, getCredential } = require("./credentials"); +const { prompt, ensureApiKey, getCredential, saveCredential } = require("./credentials"); const registry = require("./registry"); const nim = require("./nim"); const policies = require("./policies"); @@ -209,10 +209,10 @@ function getNonInteractiveProvider() { const providerKey = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase(); if (!providerKey) return null; - const validProviders = new Set(["cloud", "ollama", "vllm", "nim"]); + const validProviders = new Set(["cloud", "ollama", "vllm", "nim", "custom"]); if (!validProviders.has(providerKey)) { console.error(` Unsupported NEMOCLAW_PROVIDER: ${providerKey}`); - console.error(" Valid values: cloud, ollama, vllm, nim"); + console.error(" Valid values: cloud, ollama, vllm, nim, custom"); process.exit(1); } @@ -532,6 +532,7 @@ async function setupNim(sandboxName, gpu) { let model = null; let provider = "nvidia-nim"; let nimContainer = null; + let customCreds = null; // Detect local inference options const hasOllama = !!runCapture("command -v ollama", { ignoreError: true }); @@ -570,6 +571,8 @@ async function setupNim(sandboxName, gpu) { options.push({ key: "install-ollama", label: "Install Ollama (macOS)" }); } + options.push({ key: "custom", label: "Custom OpenAI-compatible endpoint (bring your own)" }); + if (options.length > 1) { let selected; @@ -681,6 +684,83 @@ async function setupNim(sandboxName, gpu) { console.log(" ✓ Using existing vLLM on localhost:8000"); provider = "vllm-local"; model = "vllm-local"; + } else if (selected.key === "custom") { + provider = "custom"; + let customBaseUrl; + let customApiKey; + if (isNonInteractive()) { + customBaseUrl = (process.env.NEMOCLAW_CUSTOM_BASE_URL || "").trim(); + customApiKey = (process.env.NEMOCLAW_CUSTOM_API_KEY || "").trim(); + model = requestedModel; + if (!customBaseUrl || !customApiKey || !model) { + console.error(" Custom provider requires NEMOCLAW_CUSTOM_BASE_URL, NEMOCLAW_CUSTOM_API_KEY, and NEMOCLAW_MODEL."); + process.exit(1); + } + } else { + console.log(""); + console.log(" ┌─────────────────────────────────────────────────────────────────┐"); + console.log(" │ Custom OpenAI-compatible provider │"); + console.log(" │ │"); + console.log(" │ Provide a base URL and API key for any provider that │"); + console.log(" │ exposes an OpenAI-compatible /v1/chat/completions endpoint. │"); + console.log(" │ │"); + console.log(" │ Examples: │"); + console.log(" │ Google Gemini https://generativelanguage.googleapis.com/v1beta/openai │"); + console.log(" │ OpenRouter https://openrouter.ai/api/v1 │"); + console.log(" │ Together AI https://api.together.xyz/v1 │"); + console.log(" │ LiteLLM http://localhost:4000/v1 │"); + console.log(" └─────────────────────────────────────────────────────────────────┘"); + console.log(""); + + customBaseUrl = (await prompt(" Base URL: ")).trim(); + if (!customBaseUrl) { + console.error(" Base URL is required."); + process.exit(1); + } + + const previousBaseUrl = getCredential("CUSTOM_PROVIDER_BASE_URL"); + saveCredential("CUSTOM_PROVIDER_BASE_URL", customBaseUrl); + + customApiKey = previousBaseUrl === customBaseUrl + ? getCredential("CUSTOM_PROVIDER_API_KEY") + : null; + if (!customApiKey) { + if (previousBaseUrl && previousBaseUrl !== customBaseUrl) { + console.log(" Base URL changed — please enter a new API key."); + } + customApiKey = (await prompt(" API Key: ")).trim(); + if (!customApiKey) { + console.error(" API key is required."); + process.exit(1); + } + saveCredential("CUSTOM_PROVIDER_API_KEY", customApiKey); + console.log(" Key saved to ~/.nemoclaw/credentials.json"); + } else { + console.log(" Using saved API key from credentials."); + } + + model = await prompt(" Model name (e.g. gemini-2.5-flash): "); + if (!model) { + console.error(" Model name is required."); + process.exit(1); + } + } + + // Validate base URL + try { + const parsed = new URL(customBaseUrl); + if (parsed.protocol === "http:" && !["localhost", "127.0.0.1", "::1"].includes(parsed.hostname)) { + console.error(" Insecure http:// URLs are only allowed for localhost. Use https:// for remote endpoints."); + process.exit(1); + } + } catch { + console.error(` Invalid URL: ${customBaseUrl}`); + process.exit(1); + } + + // Store credentials for setupInference to use + customCreds = { baseUrl: customBaseUrl, apiKey: customApiKey }; + console.log(` ✓ Using custom provider with model: ${model}`); } // else: cloud — fall through to default below } @@ -703,12 +783,12 @@ async function setupNim(sandboxName, gpu) { registry.updateSandbox(sandboxName, { model, provider, nimContainer }); - return { model, provider }; + return { model, provider, customCreds }; } // ── Step 5: Inference provider ─────────────────────────────────── -async function setupInference(sandboxName, model, provider) { +async function setupInference(sandboxName, model, provider, customCreds) { step(5, 7, "Setting up inference provider"); if (provider === "nvidia-nim") { @@ -769,6 +849,22 @@ async function setupInference(sandboxName, model, provider) { console.error(` ${probe.message}`); process.exit(1); } + } else if (provider === "custom") { + const baseUrl = customCreds?.baseUrl || getCredential("CUSTOM_PROVIDER_BASE_URL"); + const apiKey = customCreds?.apiKey || getCredential("CUSTOM_PROVIDER_API_KEY"); + run( + `openshell provider create --name custom-provider --type openai ` + + `--credential ${shellQuote("OPENAI_API_KEY=" + apiKey)} ` + + `--config ${shellQuote("OPENAI_BASE_URL=" + baseUrl)} 2>&1 || ` + + `openshell provider update custom-provider ` + + `--credential ${shellQuote("OPENAI_API_KEY=" + apiKey)} ` + + `--config ${shellQuote("OPENAI_BASE_URL=" + baseUrl)} 2>&1 || true`, + { ignoreError: true } + ); + run( + `openshell inference set --no-verify --provider custom-provider --model ${shellQuote(model)} 2>/dev/null || true`, + { ignoreError: true } + ); } registry.updateSandbox(sandboxName, { model, provider }); @@ -921,6 +1017,7 @@ function printDashboard(sandboxName, model, provider) { if (provider === "nvidia-nim") providerLabel = "NVIDIA Endpoint API"; else if (provider === "vllm-local") providerLabel = "Local vLLM"; else if (provider === "ollama-local") providerLabel = "Local Ollama"; + else if (provider === "custom") providerLabel = "Custom Provider"; console.log(""); console.log(` ${"─".repeat(50)}`); @@ -949,8 +1046,8 @@ async function onboard(opts = {}) { const gpu = await preflight(); await startGateway(gpu); const sandboxName = await createSandbox(gpu); - const { model, provider } = await setupNim(sandboxName, gpu); - await setupInference(sandboxName, model, provider); + const { model, provider, customCreds } = await setupNim(sandboxName, gpu); + await setupInference(sandboxName, model, provider, customCreds); await setupOpenclaw(sandboxName, model, provider); await setupPolicies(sandboxName); printDashboard(sandboxName, model, provider); diff --git a/docs/inference/switch-inference-providers.md b/docs/inference/switch-inference-providers.md index 582c7bf13..9857732c9 100644 --- a/docs/inference/switch-inference-providers.md +++ b/docs/inference/switch-inference-providers.md @@ -67,6 +67,44 @@ You can switch to any of these models at runtime. | `nvidia/llama-3.3-nemotron-super-49b-v1.5` | Nemotron Super 49B v1.5 | 131,072 | 4,096 | | `nvidia/nemotron-3-nano-30b-a3b` | Nemotron 3 Nano 30B | 131,072 | 4,096 | +## Custom OpenAI-Compatible Providers + +You can use any provider that exposes an OpenAI-compatible `/v1/chat/completions` endpoint. + +During `nemoclaw onboard`, select **"Custom OpenAI-compatible endpoint"** and provide: + +- **Base URL** — the provider's API base (e.g. `https://generativelanguage.googleapis.com/v1beta/openai`) +- **API key** — your provider credential +- **Model name** — the model identifier (e.g. `gemini-2.5-flash`) + +Examples of compatible providers: + +| Provider | Base URL | +|---|---| +| Google AI Studio (Gemini) | `https://generativelanguage.googleapis.com/v1beta/openai` | +| OpenRouter | `https://openrouter.ai/api/v1` | +| Together AI | `https://api.together.xyz/v1` | +| LiteLLM (local) | `http://localhost:4000/v1` | + +To switch to a custom provider at runtime: + +```console +$ openshell provider create --name custom-provider --type openai \ + --credential "OPENAI_API_KEY=" \ + --config "OPENAI_BASE_URL=" +$ openshell inference set --no-verify --provider custom-provider --model +``` + +For non-interactive onboarding: + +```console +$ NEMOCLAW_PROVIDER=custom \ + NEMOCLAW_CUSTOM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai \ + NEMOCLAW_CUSTOM_API_KEY=AIza... \ + NEMOCLAW_MODEL=gemini-2.5-flash \ + nemoclaw onboard --non-interactive +``` + ## Related Topics - [Inference Profiles](../reference/inference-profiles.md) for full profile configuration details. diff --git a/nemoclaw-blueprint/blueprint.yaml b/nemoclaw-blueprint/blueprint.yaml index f55f9f651..f0d8748a7 100644 --- a/nemoclaw-blueprint/blueprint.yaml +++ b/nemoclaw-blueprint/blueprint.yaml @@ -11,6 +11,7 @@ profiles: - ncp - nim-local - vllm + - custom description: | NemoClaw blueprint: orchestrates OpenClaw sandbox creation, migration, @@ -54,6 +55,13 @@ components: credential_env: "OPENAI_API_KEY" credential_default: "dummy" + custom: + provider_type: "openai" + provider_name: "custom-provider" + endpoint: "" + model: "" + credential_env: "OPENAI_API_KEY" + policy: base: "sandboxes/openclaw/policy.yaml" additions: diff --git a/test/inference-config.test.js b/test/inference-config.test.js index 2a1a47b80..3adf73939 100644 --- a/test/inference-config.test.js +++ b/test/inference-config.test.js @@ -56,6 +56,38 @@ describe("inference selection config", () => { }); }); + it("maps custom to the sandbox inference route with user-specified model", () => { + assert.deepEqual(getProviderSelectionConfig("custom", "gemini-2.5-flash"), { + endpointType: "custom", + endpointUrl: INFERENCE_ROUTE_URL, + ncpPartner: null, + model: "gemini-2.5-flash", + profile: DEFAULT_ROUTE_PROFILE, + credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV, + provider: "custom", + providerLabel: "Custom Provider", + }); + }); + + it("returns null model for custom provider when no model specified", () => { + const config = getProviderSelectionConfig("custom"); + assert.equal(config.model, null); + assert.equal(config.providerLabel, "Custom Provider"); + }); + + it("returns all expected fields for custom provider", () => { + const config = getProviderSelectionConfig("custom", "gemini-2.5-flash"); + assert.equal(config.endpointType, "custom"); + assert.equal(config.endpointUrl, INFERENCE_ROUTE_URL); + assert.equal(config.ncpPartner, null); + assert.equal(config.credentialEnv, DEFAULT_ROUTE_CREDENTIAL_ENV); + assert.equal(config.profile, DEFAULT_ROUTE_PROFILE); + }); + + it("returns null for unknown provider", () => { + assert.equal(getProviderSelectionConfig("unknown-provider"), null); + }); + it("builds a qualified OpenClaw primary model for ollama-local", () => { assert.equal( getOpenClawPrimaryModel("ollama-local", "nemotron-3-nano:30b"), diff --git a/test/onboard-selection.test.js b/test/onboard-selection.test.js index 9000943ba..24d388d6d 100644 --- a/test/onboard-selection.test.js +++ b/test/onboard-selection.test.js @@ -81,4 +81,60 @@ const { setupNim } = require(${onboardPath}); assert.ok(payload.lines.some((line) => line.includes("Press Enter to keep the cloud default"))); assert.ok(payload.lines.some((line) => line.includes("Cloud models:"))); }); + + it("custom provider config uses gateway-routed architecture", () => { + const { getProviderSelectionConfig, INFERENCE_ROUTE_URL } = require("../bin/lib/inference-config"); + const config = getProviderSelectionConfig("custom", "gemini-2.5-flash"); + // Custom provider should route through inference.local like all other providers + assert.equal(config.endpointUrl, INFERENCE_ROUTE_URL); + assert.equal(config.endpointType, "custom"); + assert.equal(config.provider, "custom"); + assert.equal(config.providerLabel, "Custom Provider"); + }); + + it("custom provider URL validation rejects insecure http:// for non-localhost", () => { + const insecureUrls = [ + "http://evil.example.com/v1", + "http://api.openai.com/v1", + ]; + for (const url of insecureUrls) { + const parsed = new URL(url); + const isLocalhost = ["localhost", "127.0.0.1", "::1"].includes(parsed.hostname); + assert.equal(isLocalhost, false, `${url} should not be treated as localhost`); + assert.equal(parsed.protocol, "http:", `${url} should be http`); + // Our validation rejects http:// + non-localhost + } + }); + + it("custom provider URL validation allows http:// for localhost", () => { + const localhostUrls = [ + "http://localhost:4000/v1", + "http://127.0.0.1:8000/v1", + ]; + for (const url of localhostUrls) { + const parsed = new URL(url); + const isLocalhost = ["localhost", "127.0.0.1", "::1"].includes(parsed.hostname); + assert.equal(isLocalhost, true, `${url} should be treated as localhost`); + } + }); + + it("custom provider URL validation allows https:// for any host", () => { + const httpsUrls = [ + "https://generativelanguage.googleapis.com/v1beta/openai", + "https://openrouter.ai/api/v1", + "https://api.together.xyz/v1", + ]; + for (const url of httpsUrls) { + const parsed = new URL(url); + assert.equal(parsed.protocol, "https:"); + // Our validation allows all https:// URLs + } + }); + + it("custom provider URL validation rejects malformed URLs", () => { + const badUrls = ["not-a-url", ""]; + for (const url of badUrls) { + assert.throws(() => new URL(url), `${url} should be rejected`); + } + }); });