From 7875f3efe749ac2c4fe520d272e69de778ee1711 Mon Sep 17 00:00:00 2001
From: Aaron Erickson <aerickson@nvidia.com>
Date: Sun, 29 Mar 2026 19:17:32 -0700
Subject: [PATCH] fix(onboard): increase endpoint probe timeout for large model
 inference

The onboard endpoint validation sends a full inference request to verify
the provider is reachable. The 20s max-time was too tight for large
models like nemotron-3-super-120b-a12b on NVIDIA Endpoints, causing
the probe to time out and onboard to fail in non-interactive mode.

Increase connect-timeout from 5s to 10s and max-time from 20s to 60s.
This only runs once during onboard, so the longer timeout is acceptable.

This probe was added in #648 (March 24) but has never run successfully
in the nightly e2e because the e2e has been broken since March 23.
---
 bin/lib/onboard.js               | 2 +-
 test/credential-exposure.test.js | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 1480f435d..d5606ae51 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -448,7 +448,7 @@ function hydrateCredentialEnv(envName) {
 }
 
 function getCurlTimingArgs() {
-  return ["--connect-timeout 5", "--max-time 20"];
+  return ["--connect-timeout 10", "--max-time 60"];
 }
 
 function buildProviderArgs(action, name, type, credentialEnv, baseUrl) {
diff --git a/test/credential-exposure.test.js b/test/credential-exposure.test.js
index 08f880a9d..8c5381cbf 100644
--- a/test/credential-exposure.test.js
+++ b/test/credential-exposure.test.js
@@ -87,7 +87,7 @@ describe("credential exposure in process arguments", () => {
     const src = fs.readFileSync(ONBOARD_JS, "utf-8");
 
     expect(src).toMatch(/function getCurlTimingArgs\(\)/);
-    expect(src).toMatch(/--connect-timeout 5/);
-    expect(src).toMatch(/--max-time 20/);
+    expect(src).toMatch(/--connect-timeout 10/);
+    expect(src).toMatch(/--max-time 60/);
   });
 });