Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions bin/lib/nim.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
// NIM container management — pull, start, stop, health-check NIM images.

const { run, runCapture } = require("./runner");
const { run, runArgv, runCapture, runCaptureArgv, assertSafeName } = require("./runner");
const nimImages = require("./nim-images.json");

function containerName(sandboxName) {
Expand Down Expand Up @@ -121,11 +121,12 @@ function pullNimImage(model) {
process.exit(1);
}
console.log(` Pulling NIM image: ${image}`);
run(`docker pull ${image}`);
runArgv("docker", ["pull", image]);
return image;
}

function startNimContainer(sandboxName, model, port = 8000) {
assertSafeName(sandboxName, "sandbox name");
const name = containerName(sandboxName);
const image = getImageForModel(model);
if (!image) {
Expand All @@ -134,12 +135,17 @@ function startNimContainer(sandboxName, model, port = 8000) {
}

// Stop any existing container with same name
run(`docker rm -f ${name} 2>/dev/null || true`, { ignoreError: true });
runArgv("docker", ["rm", "-f", name], { ignoreError: true });

console.log(` Starting NIM container: ${name}`);
run(
`docker run -d --gpus all -p ${port}:8000 --name ${name} --shm-size 16g ${image}`
);
runArgv("docker", [
"run", "-d",
"--gpus", "all",
"-p", `${port}:8000`,
"--name", name,
"--shm-size", "16g",
image,
]);
return name;
}

Expand All @@ -150,7 +156,7 @@ function waitForNimHealth(port = 8000, timeout = 300) {

while ((Date.now() - start) / 1000 < timeout) {
try {
const result = runCapture(`curl -sf http://localhost:${port}/v1/models`, {
const result = runCaptureArgv("curl", ["-sf", `http://localhost:${port}/v1/models`], {
ignoreError: true,
});
if (result) {
Expand All @@ -168,27 +174,27 @@ function waitForNimHealth(port = 8000, timeout = 300) {
function stopNimContainer(sandboxName) {
const name = containerName(sandboxName);
console.log(` Stopping NIM container: ${name}`);
run(`docker stop ${name} 2>/dev/null || true`, { ignoreError: true });
run(`docker rm ${name} 2>/dev/null || true`, { ignoreError: true });
runArgv("docker", ["stop", name], { ignoreError: true });
runArgv("docker", ["rm", name], { ignoreError: true });
}

function nimStatus(sandboxName) {
const name = containerName(sandboxName);
try {
const state = runCapture(
`docker inspect --format '{{.State.Status}}' ${name} 2>/dev/null`,
const state = runCaptureArgv(
"docker", ["inspect", "--format", "{{.State.Status}}", name],
{ ignoreError: true }
);
if (!state) return { running: false, container: name };

let healthy = false;
if (state === "running") {
const health = runCapture(`curl -sf http://localhost:8000/v1/models 2>/dev/null`, {
if (state.trim() === "running") {
const health = runCaptureArgv("curl", ["-sf", "http://localhost:8000/v1/models"], {
ignoreError: true,
});
healthy = !!health;
}
return { running: state === "running", healthy, container: name, state };
return { running: state.trim() === "running", healthy, container: name, state: state.trim() };
} catch {
return { running: false, container: name };
}
Expand Down
Loading
Loading