Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions bin/lib/local-inference.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ function getLocalProviderBaseUrl(provider) {
function getLocalProviderHealthCheck(provider) {
switch (provider) {
case "vllm-local":
return "curl -sf http://localhost:8000/v1/models 2>/dev/null";
return ["curl", "-sf", "http://localhost:8000/v1/models"];
case "ollama-local":
return "curl -sf http://localhost:11434/api/tags 2>/dev/null";
return ["curl", "-sf", "http://localhost:11434/api/tags"];
default:
return null;
}
Expand All @@ -32,9 +32,27 @@ function getLocalProviderHealthCheck(provider) {
function getLocalProviderContainerReachabilityCheck(provider) {
switch (provider) {
case "vllm-local":
return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`;
return [
"docker",
"run",
"--rm",
"--add-host",
"host.openshell.internal:host-gateway",
CONTAINER_REACHABILITY_IMAGE,
"-sf",
"http://host.openshell.internal:8000/v1/models",
];
case "ollama-local":
return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`;
return [
"docker",
"run",
"--rm",
"--add-host",
"host.openshell.internal:host-gateway",
CONTAINER_REACHABILITY_IMAGE,
"-sf",
"http://host.openshell.internal:11434/api/tags",
];
default:
return null;
}
Expand Down Expand Up @@ -103,7 +121,7 @@ function parseOllamaList(output) {
}

function getOllamaModelOptions(runCapture) {
const output = runCapture("ollama list 2>/dev/null", { ignoreError: true });
const output = runCapture(["ollama", "list"], { ignoreError: true });
const parsed = parseOllamaList(output);
if (parsed.length > 0) {
return parsed;
Expand All @@ -123,7 +141,11 @@ function getOllamaWarmupCommand(model, keepAlive = "15m") {
stream: false,
keep_alive: keepAlive,
});
return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`;
return [
"bash",
"-c",
`nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`,
];
}

function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") {
Expand All @@ -133,7 +155,17 @@ function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") {
stream: false,
keep_alive: keepAlive,
});
return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`;
return [
"curl",
"-sS",
"--max-time",
String(timeoutSeconds),
"http://localhost:11434/api/generate",
"-H",
"Content-Type: application/json",
"-d",
payload,
];
}

function validateOllamaModel(model, runCapture) {
Expand Down
54 changes: 36 additions & 18 deletions bin/lib/nim.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function detectGpu() {
// Try NVIDIA first — query VRAM
try {
const output = runCapture(
"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits"],
{ ignoreError: true }
);
if (output) {
Expand All @@ -49,15 +49,21 @@ function detectGpu() {
// Fallback: DGX Spark (GB10) — VRAM not queryable due to unified memory architecture
try {
const nameOutput = runCapture(
"nvidia-smi --query-gpu=name --format=csv,noheader,nounits",
["nvidia-smi", "--query-gpu=name", "--format=csv,noheader,nounits"],
{ ignoreError: true }
);
if (nameOutput && nameOutput.includes("GB10")) {
// GB10 has 128GB unified memory shared with Grace CPU — use system RAM
let totalMemoryMB = 0;
try {
const memLine = runCapture("free -m | awk '/Mem:/ {print $2}'", { ignoreError: true });
if (memLine) totalMemoryMB = parseInt(memLine.trim(), 10) || 0;
const freeOut = runCapture(["free", "-m"], { ignoreError: true });
if (freeOut) {
const memLine = freeOut.split("\n").find((l) => l.includes("Mem:"));
if (memLine) {
const parts = memLine.split(/\s+/);
totalMemoryMB = parseInt(parts[1], 10) || 0;
}
}
} catch {}
return {
type: "nvidia",
Expand All @@ -74,7 +80,7 @@ function detectGpu() {
if (process.platform === "darwin") {
try {
const spOutput = runCapture(
"system_profiler SPDisplaysDataType 2>/dev/null",
["system_profiler", "SPDisplaysDataType"],
{ ignoreError: true }
);
if (spOutput) {
Expand All @@ -92,7 +98,7 @@ function detectGpu() {
} else {
// Apple Silicon shares system RAM — read total memory
try {
const memBytes = runCapture("sysctl -n hw.memsize", { ignoreError: true });
const memBytes = runCapture(["sysctl", "-n", "hw.memsize"], { ignoreError: true });
if (memBytes) memoryMB = Math.floor(parseInt(memBytes, 10) / 1024 / 1024);
} catch {}
}
Expand Down Expand Up @@ -121,7 +127,7 @@ function pullNimImage(model) {
process.exit(1);
}
console.log(` Pulling NIM image: ${image}`);
run(`docker pull ${shellQuote(image)}`);
run(["docker", "pull", image]);
return image;
}

Expand All @@ -134,13 +140,23 @@ function startNimContainer(sandboxName, model, port = 8000) {
}

// Stop any existing container with same name
const qn = shellQuote(name);
run(`docker rm -f ${qn} 2>/dev/null || true`, { ignoreError: true });
run(["docker", "rm", "-f", name], { ignoreError: true });

console.log(` Starting NIM container: ${name}`);
run(
`docker run -d --gpus all -p ${Number(port)}:8000 --name ${qn} --shm-size 16g ${shellQuote(image)}`
);
run([
"docker",
"run",
"-d",
"--gpus",
"all",
"-p",
`${port}:8000`,
"--name",
name,
"--shm-size",
"16g",
image,
]);
return name;
}

Expand All @@ -152,7 +168,7 @@ function waitForNimHealth(port = 8000, timeout = 300) {

while ((Date.now() - start) / 1000 < timeout) {
try {
const result = runCapture(`curl -sf http://localhost:${safePort}/v1/models`, {
const result = runCapture(["curl", "-sf", `http://localhost:${port}/v1/models`], {
ignoreError: true,
});
if (result) {
Expand All @@ -169,24 +185,26 @@ function waitForNimHealth(port = 8000, timeout = 300) {

function stopNimContainer(sandboxName) {
const name = containerName(sandboxName);
const qn = shellQuote(name);
console.log(` Stopping NIM container: ${name}`);
run(`docker stop ${qn} 2>/dev/null || true`, { ignoreError: true });
run(`docker rm ${qn} 2>/dev/null || true`, { ignoreError: true });
run(["docker", "stop", name], { ignoreError: true });
run(["docker", "rm", name], { ignoreError: true });
}

function nimStatus(sandboxName) {
const name = containerName(sandboxName);
try {
const state = runCapture(
`docker inspect --format '{{.State.Status}}' ${shellQuote(name)} 2>/dev/null`,
["docker", "inspect", "--format", "{{.State.Status}}", name],
{ ignoreError: true }
);
if (!state) return { running: false, container: name };

let healthy = false;
if (state === "running") {
const health = runCapture(`curl -sf http://localhost:8000/v1/models 2>/dev/null`, {
const registry = require("./registry");
const sandbox = registry.getSandbox(sandboxName);
const port = sandbox ? sandbox.nimPort || 8000 : 8000;
const health = runCapture(["curl", "-sf", `http://localhost:${port}/v1/models`], {
ignoreError: true,
});
healthy = !!health;
Expand Down
Loading
Loading