Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ config = { \
'baseUrl': 'https://inference.local/v1', \
'apiKey': 'unused', \
'api': 'openai-completions', \
'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096, 'compat': {'supportsStore': False}}] \
} \
}}, \
'channels': {'defaults': {'configWrites': False}}, \
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,16 @@ When something goes wrong, errors may originate from either NemoClaw or the Open

## Inference

Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the NVIDIA Endpoint provider.
Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it through the gateway proxy.

| Provider | Model | Use Case |
|--------------|--------------------------------------|-------------------------------------------------|
| NVIDIA Endpoint | `nvidia/nemotron-3-super-120b-a12b` | Production. Requires an NVIDIA API key. |
| Custom OpenAI-compatible | User-specified | Any provider with an OpenAI-compatible `/v1/chat/completions` endpoint. |

Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
For the NVIDIA endpoint, get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.

For custom providers, select "Custom OpenAI-compatible endpoint" during `nemoclaw onboard` and provide the base URL, API key, and model name. Any provider that exposes an OpenAI-compatible `/v1/chat/completions` endpoint will work. For non-interactive mode, set `NEMOCLAW_PROVIDER=custom`, `NEMOCLAW_CUSTOM_BASE_URL`, `NEMOCLAW_CUSTOM_API_KEY`, and `NEMOCLAW_MODEL`.

Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.

Expand Down
11 changes: 11 additions & 0 deletions bin/lib/inference-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ function getProviderSelectionConfig(provider, model) {
provider,
providerLabel: "Local Ollama",
};
case "custom":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || null,
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
provider,
providerLabel: "Custom Provider",
};
default:
return null;
}
Expand Down
111 changes: 104 additions & 7 deletions bin/lib/onboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const {
isUnsupportedMacosRuntime,
shouldPatchCoredns,
} = require("./platform");
const { prompt, ensureApiKey, getCredential } = require("./credentials");
const { prompt, ensureApiKey, getCredential, saveCredential } = require("./credentials");
const registry = require("./registry");
const nim = require("./nim");
const policies = require("./policies");
Expand Down Expand Up @@ -209,10 +209,10 @@ function getNonInteractiveProvider() {
const providerKey = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase();
if (!providerKey) return null;

const validProviders = new Set(["cloud", "ollama", "vllm", "nim"]);
const validProviders = new Set(["cloud", "ollama", "vllm", "nim", "custom"]);
if (!validProviders.has(providerKey)) {
console.error(` Unsupported NEMOCLAW_PROVIDER: ${providerKey}`);
console.error(" Valid values: cloud, ollama, vllm, nim");
console.error(" Valid values: cloud, ollama, vllm, nim, custom");
process.exit(1);
}

Expand Down Expand Up @@ -532,6 +532,7 @@ async function setupNim(sandboxName, gpu) {
let model = null;
let provider = "nvidia-nim";
let nimContainer = null;
let customCreds = null;

// Detect local inference options
const hasOllama = !!runCapture("command -v ollama", { ignoreError: true });
Expand Down Expand Up @@ -570,6 +571,8 @@ async function setupNim(sandboxName, gpu) {
options.push({ key: "install-ollama", label: "Install Ollama (macOS)" });
}

options.push({ key: "custom", label: "Custom OpenAI-compatible endpoint (bring your own)" });

if (options.length > 1) {
let selected;

Expand Down Expand Up @@ -681,6 +684,83 @@ async function setupNim(sandboxName, gpu) {
console.log(" ✓ Using existing vLLM on localhost:8000");
provider = "vllm-local";
model = "vllm-local";
} else if (selected.key === "custom") {
provider = "custom";
let customBaseUrl;
let customApiKey;
if (isNonInteractive()) {
customBaseUrl = (process.env.NEMOCLAW_CUSTOM_BASE_URL || "").trim();
customApiKey = (process.env.NEMOCLAW_CUSTOM_API_KEY || "").trim();
model = requestedModel;
if (!customBaseUrl || !customApiKey || !model) {
console.error(" Custom provider requires NEMOCLAW_CUSTOM_BASE_URL, NEMOCLAW_CUSTOM_API_KEY, and NEMOCLAW_MODEL.");
process.exit(1);
}
} else {
console.log("");
console.log(" ┌─────────────────────────────────────────────────────────────────┐");
console.log(" │ Custom OpenAI-compatible provider │");
console.log(" │ │");
console.log(" │ Provide a base URL and API key for any provider that │");
console.log(" │ exposes an OpenAI-compatible /v1/chat/completions endpoint. │");
console.log(" │ │");
console.log(" │ Examples: │");
console.log(" │ Google Gemini https://generativelanguage.googleapis.com/v1beta/openai │");
console.log(" │ OpenRouter https://openrouter.ai/api/v1 │");
console.log(" │ Together AI https://api.together.xyz/v1 │");
console.log(" │ LiteLLM http://localhost:4000/v1 │");
console.log(" └─────────────────────────────────────────────────────────────────┘");
console.log("");

customBaseUrl = (await prompt(" Base URL: ")).trim();
if (!customBaseUrl) {
console.error(" Base URL is required.");
process.exit(1);
}

const previousBaseUrl = getCredential("CUSTOM_PROVIDER_BASE_URL");
saveCredential("CUSTOM_PROVIDER_BASE_URL", customBaseUrl);

customApiKey = previousBaseUrl === customBaseUrl
? getCredential("CUSTOM_PROVIDER_API_KEY")
: null;
if (!customApiKey) {
if (previousBaseUrl && previousBaseUrl !== customBaseUrl) {
console.log(" Base URL changed — please enter a new API key.");
}
customApiKey = (await prompt(" API Key: ")).trim();
if (!customApiKey) {
console.error(" API key is required.");
process.exit(1);
}
saveCredential("CUSTOM_PROVIDER_API_KEY", customApiKey);
console.log(" Key saved to ~/.nemoclaw/credentials.json");
} else {
console.log(" Using saved API key from credentials.");
}

model = await prompt(" Model name (e.g. gemini-2.5-flash): ");
if (!model) {
console.error(" Model name is required.");
process.exit(1);
}
}

// Validate base URL
try {
const parsed = new URL(customBaseUrl);
if (parsed.protocol === "http:" && !["localhost", "127.0.0.1", "::1"].includes(parsed.hostname)) {
console.error(" Insecure http:// URLs are only allowed for localhost. Use https:// for remote endpoints.");
process.exit(1);
}
} catch {
console.error(` Invalid URL: ${customBaseUrl}`);
process.exit(1);
}

// Store credentials for setupInference to use
customCreds = { baseUrl: customBaseUrl, apiKey: customApiKey };
console.log(` ✓ Using custom provider with model: ${model}`);
}
// else: cloud — fall through to default below
}
Expand All @@ -703,12 +783,12 @@ async function setupNim(sandboxName, gpu) {

registry.updateSandbox(sandboxName, { model, provider, nimContainer });

return { model, provider };
return { model, provider, customCreds };
}

// ── Step 5: Inference provider ───────────────────────────────────

async function setupInference(sandboxName, model, provider) {
async function setupInference(sandboxName, model, provider, customCreds) {
step(5, 7, "Setting up inference provider");

if (provider === "nvidia-nim") {
Expand Down Expand Up @@ -769,6 +849,22 @@ async function setupInference(sandboxName, model, provider) {
console.error(` ${probe.message}`);
process.exit(1);
}
} else if (provider === "custom") {
const baseUrl = customCreds?.baseUrl || getCredential("CUSTOM_PROVIDER_BASE_URL");
const apiKey = customCreds?.apiKey || getCredential("CUSTOM_PROVIDER_API_KEY");
run(
`openshell provider create --name custom-provider --type openai ` +
`--credential ${shellQuote("OPENAI_API_KEY=" + apiKey)} ` +
`--config ${shellQuote("OPENAI_BASE_URL=" + baseUrl)} 2>&1 || ` +
`openshell provider update custom-provider ` +
`--credential ${shellQuote("OPENAI_API_KEY=" + apiKey)} ` +
`--config ${shellQuote("OPENAI_BASE_URL=" + baseUrl)} 2>&1 || true`,
{ ignoreError: true }
);
run(
`openshell inference set --no-verify --provider custom-provider --model ${shellQuote(model)} 2>/dev/null || true`,
{ ignoreError: true }
);
}

registry.updateSandbox(sandboxName, { model, provider });
Expand Down Expand Up @@ -921,6 +1017,7 @@ function printDashboard(sandboxName, model, provider) {
if (provider === "nvidia-nim") providerLabel = "NVIDIA Endpoint API";
else if (provider === "vllm-local") providerLabel = "Local vLLM";
else if (provider === "ollama-local") providerLabel = "Local Ollama";
else if (provider === "custom") providerLabel = "Custom Provider";

console.log("");
console.log(` ${"─".repeat(50)}`);
Expand Down Expand Up @@ -949,8 +1046,8 @@ async function onboard(opts = {}) {
const gpu = await preflight();
await startGateway(gpu);
const sandboxName = await createSandbox(gpu);
const { model, provider } = await setupNim(sandboxName, gpu);
await setupInference(sandboxName, model, provider);
const { model, provider, customCreds } = await setupNim(sandboxName, gpu);
await setupInference(sandboxName, model, provider, customCreds);
await setupOpenclaw(sandboxName, model, provider);
await setupPolicies(sandboxName);
printDashboard(sandboxName, model, provider);
Expand Down
38 changes: 38 additions & 0 deletions docs/inference/switch-inference-providers.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,44 @@ You can switch to any of these models at runtime.
| `nvidia/llama-3.3-nemotron-super-49b-v1.5` | Nemotron Super 49B v1.5 | 131,072 | 4,096 |
| `nvidia/nemotron-3-nano-30b-a3b` | Nemotron 3 Nano 30B | 131,072 | 4,096 |

## Custom OpenAI-Compatible Providers

You can use any provider that exposes an OpenAI-compatible `/v1/chat/completions` endpoint.

During `nemoclaw onboard`, select **"Custom OpenAI-compatible endpoint"** and provide:

- **Base URL** — the provider's API base (e.g. `https://generativelanguage.googleapis.com/v1beta/openai`)
- **API key** — your provider credential
- **Model name** — the model identifier (e.g. `gemini-2.5-flash`)

Examples of compatible providers:

| Provider | Base URL |
|---|---|
| Google AI Studio (Gemini) | `https://generativelanguage.googleapis.com/v1beta/openai` |
| OpenRouter | `https://openrouter.ai/api/v1` |
| Together AI | `https://api.together.xyz/v1` |
| LiteLLM (local) | `http://localhost:4000/v1` |

To switch to a custom provider at runtime:

```console
$ openshell provider create --name custom-provider --type openai \
--credential "OPENAI_API_KEY=<your-key>" \
--config "OPENAI_BASE_URL=<base-url>"
$ openshell inference set --no-verify --provider custom-provider --model <model-name>
```

For non-interactive onboarding:

```console
$ NEMOCLAW_PROVIDER=custom \
NEMOCLAW_CUSTOM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai \
NEMOCLAW_CUSTOM_API_KEY=AIza... \
NEMOCLAW_MODEL=gemini-2.5-flash \
nemoclaw onboard --non-interactive
```

## Related Topics

- [Inference Profiles](../reference/inference-profiles.md) for full profile configuration details.
8 changes: 8 additions & 0 deletions nemoclaw-blueprint/blueprint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ profiles:
- ncp
- nim-local
- vllm
- custom

description: |
NemoClaw blueprint: orchestrates OpenClaw sandbox creation, migration,
Expand Down Expand Up @@ -54,6 +55,13 @@ components:
credential_env: "OPENAI_API_KEY"
credential_default: "dummy"

custom:
provider_type: "openai"
provider_name: "custom-provider"
endpoint: ""
model: ""
credential_env: "OPENAI_API_KEY"

policy:
base: "sandboxes/openclaw/policy.yaml"
additions:
Expand Down
32 changes: 32 additions & 0 deletions test/inference-config.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,38 @@ describe("inference selection config", () => {
});
});

it("maps custom to the sandbox inference route with user-specified model", () => {
assert.deepEqual(getProviderSelectionConfig("custom", "gemini-2.5-flash"), {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: "gemini-2.5-flash",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
provider: "custom",
providerLabel: "Custom Provider",
});
});

it("returns null model for custom provider when no model specified", () => {
const config = getProviderSelectionConfig("custom");
assert.equal(config.model, null);
assert.equal(config.providerLabel, "Custom Provider");
});

it("returns all expected fields for custom provider", () => {
const config = getProviderSelectionConfig("custom", "gemini-2.5-flash");
assert.equal(config.endpointType, "custom");
assert.equal(config.endpointUrl, INFERENCE_ROUTE_URL);
assert.equal(config.ncpPartner, null);
assert.equal(config.credentialEnv, DEFAULT_ROUTE_CREDENTIAL_ENV);
assert.equal(config.profile, DEFAULT_ROUTE_PROFILE);
});

it("returns null for unknown provider", () => {
assert.equal(getProviderSelectionConfig("unknown-provider"), null);
});

it("builds a qualified OpenClaw primary model for ollama-local", () => {
assert.equal(
getOpenClawPrimaryModel("ollama-local", "nemotron-3-nano:30b"),
Expand Down
56 changes: 56 additions & 0 deletions test/onboard-selection.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,60 @@ const { setupNim } = require(${onboardPath});
assert.ok(payload.lines.some((line) => line.includes("Press Enter to keep the cloud default")));
assert.ok(payload.lines.some((line) => line.includes("Cloud models:")));
});

it("custom provider config uses gateway-routed architecture", () => {
const { getProviderSelectionConfig, INFERENCE_ROUTE_URL } = require("../bin/lib/inference-config");
const config = getProviderSelectionConfig("custom", "gemini-2.5-flash");
// Custom provider should route through inference.local like all other providers
assert.equal(config.endpointUrl, INFERENCE_ROUTE_URL);
assert.equal(config.endpointType, "custom");
assert.equal(config.provider, "custom");
assert.equal(config.providerLabel, "Custom Provider");
});

it("custom provider URL validation rejects insecure http:// for non-localhost", () => {
const insecureUrls = [
"http://evil.example.com/v1",
"http://api.openai.com/v1",
];
for (const url of insecureUrls) {
const parsed = new URL(url);
const isLocalhost = ["localhost", "127.0.0.1", "::1"].includes(parsed.hostname);
assert.equal(isLocalhost, false, `${url} should not be treated as localhost`);
assert.equal(parsed.protocol, "http:", `${url} should be http`);
// Our validation rejects http:// + non-localhost
}
});

it("custom provider URL validation allows http:// for localhost", () => {
const localhostUrls = [
"http://localhost:4000/v1",
"http://127.0.0.1:8000/v1",
];
for (const url of localhostUrls) {
const parsed = new URL(url);
const isLocalhost = ["localhost", "127.0.0.1", "::1"].includes(parsed.hostname);
assert.equal(isLocalhost, true, `${url} should be treated as localhost`);
}
});

it("custom provider URL validation allows https:// for any host", () => {
const httpsUrls = [
"https://generativelanguage.googleapis.com/v1beta/openai",
"https://openrouter.ai/api/v1",
"https://api.together.xyz/v1",
];
for (const url of httpsUrls) {
const parsed = new URL(url);
assert.equal(parsed.protocol, "https:");
// Our validation allows all https:// URLs
}
});

it("custom provider URL validation rejects malformed URLs", () => {
const badUrls = ["not-a-url", ""];
for (const url of badUrls) {
assert.throws(() => new URL(url), `${url} should be rejected`);
}
});
});