From 7875f3efe749ac2c4fe520d272e69de778ee1711 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Sun, 29 Mar 2026 19:17:32 -0700 Subject: [PATCH] fix(onboard): increase endpoint probe timeout for large model inference The onboard endpoint validation sends a full inference request to verify the provider is reachable. The 20s max-time was too tight for large models like nemotron-3-super-120b-a12b on NVIDIA Endpoints, causing the probe to time out and onboard to fail in non-interactive mode. Increase connect-timeout from 5s to 10s and max-time from 20s to 60s. This only runs once during onboard, so the longer timeout is acceptable. This probe was added in #648 (March 24) but has never run successfully in the nightly e2e because the e2e has been broken since March 23. --- bin/lib/onboard.js | 2 +- test/credential-exposure.test.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 1480f435d..d5606ae51 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -448,7 +448,7 @@ function hydrateCredentialEnv(envName) { } function getCurlTimingArgs() { - return ["--connect-timeout 5", "--max-time 20"]; + return ["--connect-timeout 10", "--max-time 60"]; } function buildProviderArgs(action, name, type, credentialEnv, baseUrl) { diff --git a/test/credential-exposure.test.js b/test/credential-exposure.test.js index 08f880a9d..8c5381cbf 100644 --- a/test/credential-exposure.test.js +++ b/test/credential-exposure.test.js @@ -87,7 +87,7 @@ describe("credential exposure in process arguments", () => { const src = fs.readFileSync(ONBOARD_JS, "utf-8"); expect(src).toMatch(/function getCurlTimingArgs\(\)/); - expect(src).toMatch(/--connect-timeout 5/); - expect(src).toMatch(/--max-time 20/); + expect(src).toMatch(/--connect-timeout 10/); + expect(src).toMatch(/--max-time 60/); }); });