Skip to content
84 changes: 79 additions & 5 deletions .github/workflows/nightly-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
# Nightly E2E tests:
#
# cloud-e2e Cloud inference (NVIDIA Endpoint API) on ubuntu-latest.
# cloud-experimental-e2e Experimental cloud inference test.
# cloud-experimental-e2e Experimental cloud inference test (main script skips embedded
# check-docs + final cleanup; follow-up steps run check-docs,
# skip/05-network-policy.sh, then cleanup.sh --verify with if: always()).
# gpu-e2e Local Ollama inference on a GPU self-hosted runner.
# Controlled by the GPU_E2E_ENABLED repository variable.
# Set vars.GPU_E2E_ENABLED to "true" in repo settings to enable.
Expand All @@ -13,7 +15,11 @@
# Runs directly on the runner (not inside Docker) because OpenShell bootstraps
# a K3s cluster inside a privileged Docker container — nesting would break networking.
#
# Requires NVIDIA_API_KEY repository secret (for cloud-e2e and cloud-experimental-e2e).
# NVIDIA_API_KEY for cloud-e2e and cloud-experimental-e2e:
# - Repository secret: Settings → Secrets and variables → Actions → Repository secrets.
# - Environment secret: only available if the job sets `environment: <that environment name>`.
# (Storing the key under Environments / NVIDIA_API_KEY without `environment:` here leaves the
# variable empty in the job — repository secrets and environment secrets are separate.)
# Only runs on schedule and manual dispatch — never on PRs (secret protection).

name: nightly-e2e
Expand Down Expand Up @@ -59,12 +65,14 @@ jobs:
cloud-experimental-e2e:
if: github.repository == 'NVIDIA/NemoClaw'
runs-on: ubuntu-latest
environment: NVIDIA_API_KEY
timeout-minutes: 45
# Main suite + check-docs + network-policy skip script can exceed 45m on cold runners.
timeout-minutes: 90
steps:
- name: Checkout
uses: actions/checkout@v6

# Split Phase 5f (check-docs) and Phase 6 (cleanup) out of the main script so CI shows
# failures in dedicated steps; tear-down always runs last (if: always()).
- name: Run cloud-experimental E2E test
env:
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
Expand All @@ -75,14 +83,80 @@ jobs:
NEMOCLAW_RECREATE_SANDBOX: "1"
NEMOCLAW_POLICY_MODE: "custom"
NEMOCLAW_POLICY_PRESETS: "npm,pypi"
RUN_E2E_CLOUD_EXPERIMENTAL_SKIP_FINAL_CLEANUP: "1"
RUN_E2E_CLOUD_EXPERIMENTAL_SKIP_CHECK_DOCS: "1"
run: bash test/e2e/test-e2e-cloud-experimental.sh

- name: Documentation checks (check-docs.sh)
if: always()
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
set -euo pipefail
if [ -f "$HOME/.bashrc" ]; then
# shellcheck source=/dev/null
source "$HOME/.bashrc" 2>/dev/null || true
fi
export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
if [ -s "$NVM_DIR/nvm.sh" ]; then
# shellcheck source=/dev/null
. "$NVM_DIR/nvm.sh"
fi
if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
export PATH="$HOME/.local/bin:$PATH"
fi
bash test/e2e/e2e-cloud-experimental/check-docs.sh

- name: Network policy checks (skip/05-network-policy.sh)
if: always()
env:
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
GITHUB_TOKEN: ${{ github.token }}
SANDBOX_NAME: e2e-cloud-experimental
NEMOCLAW_SANDBOX_NAME: e2e-cloud-experimental
run: |
set -euo pipefail
if [ -f "$HOME/.bashrc" ]; then
# shellcheck source=/dev/null
source "$HOME/.bashrc" 2>/dev/null || true
fi
export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
if [ -s "$NVM_DIR/nvm.sh" ]; then
# shellcheck source=/dev/null
. "$NVM_DIR/nvm.sh"
fi
if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
export PATH="$HOME/.local/bin:$PATH"
fi
bash test/e2e/e2e-cloud-experimental/skip/05-network-policy.sh

- name: Tear down cloud-experimental sandbox (always)
if: always()
env:
SANDBOX_NAME: e2e-cloud-experimental
NEMOCLAW_SANDBOX_NAME: e2e-cloud-experimental
run: |
set -euo pipefail
if [ -f "$HOME/.bashrc" ]; then
# shellcheck source=/dev/null
source "$HOME/.bashrc" 2>/dev/null || true
fi
export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
if [ -s "$NVM_DIR/nvm.sh" ]; then
# shellcheck source=/dev/null
. "$NVM_DIR/nvm.sh"
fi
if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
export PATH="$HOME/.local/bin:$PATH"
fi
bash test/e2e/e2e-cloud-experimental/cleanup.sh --verify

- name: Upload install log on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: install-log-cloud-experimental
path: /tmp/nemoclaw-e2e-install.log
path: /tmp/nemoclaw-e2e-cloud-experimental-install.log
if-no-files-found: ignore

# ── GPU E2E (Ollama local inference) ──────────────────────────
Expand Down
81 changes: 74 additions & 7 deletions test/e2e/e2e-cloud-experimental/check-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# Environment:
# CHECK_DOC_LINKS_REMOTE If 0, skip http(s) probes for links check.
# CHECK_DOC_LINKS_VERBOSE If 1, log each URL during curl (same as --verbose).
# CHECK_DOC_LINKS_IGNORE_EXTRA Comma-separated extra http(s) URLs to skip curling (exact match, #fragment ignored).
# CHECK_DOC_LINKS_IGNORE_URL_REGEX If set, skip curl when the whole URL matches this ERE (bash [[ =~ ]]).
# NODE Node for CLI check (default: node).
# CURL curl binary (default: curl).

Expand Down Expand Up @@ -51,7 +53,8 @@ Options:
--verbose Log each URL while curling (link check).
-h, --help Show this help.

Environment: CHECK_DOC_LINKS_REMOTE, CHECK_DOC_LINKS_VERBOSE, NODE, CURL.
Environment: CHECK_DOC_LINKS_REMOTE, CHECK_DOC_LINKS_VERBOSE, CHECK_DOC_LINKS_IGNORE_EXTRA,
CHECK_DOC_LINKS_IGNORE_URL_REGEX, NODE, CURL.
EOF
}

Expand Down Expand Up @@ -268,6 +271,54 @@ check_remote_url() {
return 0
}

# Normalized form: strip #fragment and trailing slash for ignore-list comparison.
normalize_url_for_ignore_match() {
local u="$1"
u="${u%%\#*}"
u="${u%/}"
printf '%s' "$u"
}

# Built-in skip list: pages that often fail in CI (bot wall, redirects, or flaky) but are non-critical for doc correctness.
check_docs_default_ignored_urls() {
printf '%s\n' \
'https://github.com/NVIDIA/NemoClaw/commits/main' \
'https://github.com/NVIDIA/NemoClaw/pulls?q=is%3Apr+is%3Amerged' \
'https://github.com/NVIDIA/NemoClaw/pulls?q=is:pr+is:merged' \
'https://github.com/openclaw/openclaw/issues/49950'
}

url_should_skip_remote_probe() {
local url="$1"
local nu ign _re
nu="$(normalize_url_for_ignore_match "$url")"

while IFS= read -r ign || [[ -n "${ign:-}" ]]; do
[[ -z "${ign:-}" ]] && continue
[[ "$(normalize_url_for_ignore_match "$ign")" == "$nu" ]] && return 0
done < <(check_docs_default_ignored_urls)

if [[ -n "${CHECK_DOC_LINKS_IGNORE_EXTRA:-}" ]]; then
local -a _extra_parts=()
local IFS=','
read -ra _extra_parts <<<"${CHECK_DOC_LINKS_IGNORE_EXTRA}"
unset IFS
for ign in "${_extra_parts[@]}"; do
ign="${ign#"${ign%%[![:space:]]*}"}"
ign="${ign%"${ign##*[![:space:]]}"}"
[[ -z "$ign" ]] && continue
[[ "$(normalize_url_for_ignore_match "$ign")" == "$nu" ]] && return 0
done
fi

if [[ -n "${CHECK_DOC_LINKS_IGNORE_URL_REGEX:-}" ]]; then
_re="${CHECK_DOC_LINKS_IGNORE_URL_REGEX}"
[[ "$url" =~ $_re ]] && return 0
fi

return 1
}

run_links_check() {
local -a DOC_FILES
if [[ ${#EXTRA_FILES[@]} -gt 0 ]]; then
Expand All @@ -293,6 +344,7 @@ run_links_check() {
fi
if [[ "$CHECK_DOC_LINKS_REMOTE" != 0 ]]; then
log "[links] remote: curl unique http(s) targets (disable: CHECK_DOC_LINKS_REMOTE=0 or --local-only)"
log "[links] remote: built-in skip list for flaky/GitHub pages (override: CHECK_DOC_LINKS_IGNORE_EXTRA, CHECK_DOC_LINKS_IGNORE_URL_REGEX)"
else
log "[links] remote: skipped (local paths only)"
fi
Expand Down Expand Up @@ -356,18 +408,33 @@ run_links_check() {

if [[ "$CHECK_DOC_LINKS_REMOTE" != 0 ]]; then
if [[ -n "$_deduped" ]]; then
log "[links] phase 2/2: curl ${_unique} URL(s) (GET, -L, fail 4xx/5xx)"
local _probe_list="" _skip_count=0 _probe_n=0
while IFS= read -r url || [[ -n "${url:-}" ]]; do
[[ -z "${url:-}" ]] && continue
if url_should_skip_remote_probe "$url"; then
log "[links] skipped (ignore list): ${url}"
_skip_count=$((_skip_count + 1))
else
_probe_list+="${url}"$'\n'
fi
done <<<"$_deduped"
_probe_n="$(printf '%s\n' "$_probe_list" | grep -c . || true)"
if [[ "$_skip_count" -gt 0 ]]; then
log "[links] phase 2/2: curl ${_probe_n} URL(s), ${_skip_count} skipped (GET, -L, fail 4xx/5xx)"
else
log "[links] phase 2/2: curl ${_probe_n} URL(s) (GET, -L, fail 4xx/5xx)"
fi
_i=0
while IFS= read -r url || [[ -n "$url" ]]; do
[[ -z "$url" ]] && continue
while IFS= read -r url || [[ -n "${url:-}" ]]; do
[[ -z "${url:-}" ]] && continue
_i=$((_i + 1))
if [[ "$VERBOSE" -eq 1 ]]; then
log "[links] [${_i}/${_unique}] ${url}"
log "[links] [${_i}/${_probe_n}] ${url}"
fi
if ! check_remote_url "$url"; then
failures=1
fi
done <<<"$_deduped"
done <<<"$_probe_list"
else
log "[links] phase 2/2: no http(s) links"
fi
Expand All @@ -384,7 +451,7 @@ run_links_check() {
return 1
fi
if [[ "$CHECK_DOC_LINKS_REMOTE" != 0 ]] && [[ ${_unique:-0} -gt 0 ]]; then
log "[links] phase 2 OK (${_unique} URL(s))"
log "[links] phase 2 OK (${_unique} unique http(s); probed those not in ignore list)"
fi
log "[links] summary: ${#DOC_FILES[@]} file(s), local OK$(
[[ "$CHECK_DOC_LINKS_REMOTE" != 0 ]] && [[ ${_unique:-0} -gt 0 ]] && printf ', %s remote OK' "${_unique}"
Expand Down
79 changes: 79 additions & 0 deletions test/e2e/e2e-cloud-experimental/cleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Shared teardown for e2e-cloud-experimental (extracted from test-e2e-cloud-experimental.sh Phase 0 + Phase 6).
#
# Destroys nemoclaw sandbox, OpenShell sandbox, port 18789 forward, and nemoclaw gateway.
#
# Usage:
# SANDBOX_NAME=my-sbx bash test/e2e/e2e-cloud-experimental/cleanup.sh
# SANDBOX_NAME=my-sbx bash test/e2e/e2e-cloud-experimental/cleanup.sh --verify
#
# Environment:
# SANDBOX_NAME or NEMOCLAW_SANDBOX_NAME — default: e2e-cloud-experimental
#
# Modes:
# (default) — destroy only (best-effort; always exits 0)
# --verify — destroy then assert sandbox is gone from openshell get + nemoclaw list (exits 1 on failure)

set -uo pipefail

pass() { printf '\033[32m PASS: %s\033[0m\n' "$1"; }
fail() { printf '\033[31m FAIL: %s\033[0m\n' "$1"; }
skip() { printf '\033[33m SKIP: %s\033[0m\n' "$1"; }
info() { printf '\033[1;34m [info]\033[0m %s\n' "$1"; }

SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-${SANDBOX_NAME:-e2e-cloud-experimental}}"
VERIFY=0
if [ "${1:-}" = "--verify" ]; then
VERIFY=1
fi
Comment on lines +28 to +31
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Reject unexpected args instead of silently downgrading to destroy-only.

Anything other than exact --verify currently falls through to the success path. A typo here disables the verification gate without failing CI.

Suggested fix
 VERIFY=0
-if [ "${1:-}" = "--verify" ]; then
-  VERIFY=1
-fi
+case "${1:-}" in
+  "")
+    ;;
+  --verify)
+    VERIFY=1
+    ;;
+  *)
+    printf '%s\n' "cleanup.sh: unknown option: ${1}" >&2
+    exit 2
+    ;;
+esac
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
VERIFY=0
if [ "${1:-}" = "--verify" ]; then
VERIFY=1
fi
VERIFY=0
case "${1:-}" in
"")
;;
--verify)
VERIFY=1
;;
*)
printf '%s\n' "cleanup.sh: unknown option: ${1}" >&2
exit 2
;;
esac
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/e2e/e2e-cloud-experimental/cleanup.sh` around lines 28 - 31, The script
currently treats any argument other than the exact string "--verify" as no-ops
and silently leaves VERIFY=0; change the argument handling to explicitly reject
unexpected args: check the positional parameters (e.g., test "$#" and "$1") and
if an argument is present and not equal to "--verify" print an error to stderr
and exit non-zero, otherwise set VERIFY=1 only when "$1" == "--verify"; update
the logic around the VERIFY variable and the if condition that reads "$1" so
typos or extra args cause a failing exit instead of silently downgrading to
destroy-only.


info "e2e-cloud-experimental cleanup: sandbox='${SANDBOX_NAME}' (verify=${VERIFY})"

if command -v nemoclaw >/dev/null 2>&1; then
nemoclaw "$SANDBOX_NAME" destroy --yes 2>/dev/null || true
fi
if command -v openshell >/dev/null 2>&1; then
openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true
openshell forward stop 18789 2>/dev/null || true
openshell gateway destroy -g nemoclaw 2>/dev/null || true
fi

if [ "$VERIFY" != "1" ]; then
pass "Cleanup destroy complete (no --verify)"
exit 0
fi

# ── Post-teardown checks (Phase 6 parity) ──
if command -v openshell >/dev/null 2>&1; then
if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then
fail "openshell sandbox get '${SANDBOX_NAME}' still succeeds after cleanup"
exit 1
fi
pass "openshell: sandbox '${SANDBOX_NAME}' no longer visible to sandbox get"
Comment on lines +50 to +55
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't treat CLI failures as proof the sandbox is gone.

openshell sandbox get can fail for auth/API/runtime reasons, and nemoclaw list failures are skipped entirely. In --verify mode that can report green while the sandbox still exists. Please fail unless you can positively identify a real “not found” result.

Also applies to: 60-73

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/e2e/e2e-cloud-experimental/cleanup.sh` around lines 50 - 55, The current
verification treats any nonzero exit from "openshell sandbox get" and skipped
failures from "nemoclaw list" as proof the sandbox is gone; change both checks
to positively detect a "not found" response and otherwise fail: run "openshell
sandbox get $SANDBOX_NAME" capturing stdout/stderr and exit code, and only treat
it as success when the output (or stderr) contains a canonical not-found string
(e.g. "not found", "No such sandbox", or the provider-specific message) — on any
other nonzero exit or ambiguous output, call fail/exit with an error; likewise
run "nemoclaw list" and parse its successful output for SANDBOX_NAME (treat
absence as not found) but if the list command itself errors or returns ambiguous
output, treat that as a test failure rather than success.

else
skip "openshell not on PATH — skipped sandbox get check after cleanup"
fi

if command -v nemoclaw >/dev/null 2>&1; then
set +e
list_out=$(nemoclaw list 2>&1)
list_rc=$?
set -uo pipefail
if [ "$list_rc" -eq 0 ]; then
if echo "$list_out" | grep -Fq " ${SANDBOX_NAME}"; then
fail "nemoclaw list still lists '${SANDBOX_NAME}' after destroy"
exit 1
fi
pass "nemoclaw list: '${SANDBOX_NAME}' removed from registry"
else
skip "nemoclaw list failed after cleanup — could not verify registry (exit $list_rc)"
fi
else
skip "nemoclaw not on PATH — skipped list check after cleanup"
fi

pass "Cleanup + verify complete"
exit 0
11 changes: 7 additions & 4 deletions test/e2e/e2e-cloud-experimental/skip/01-onboard-completion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,26 @@
# 5) OpenShell sees the sandbox: `openshell sandbox get <sandbox>` succeeds.
# 6) OpenShell list contains the sandbox name.
# 7) `openclaw --help`, `openclaw agent --help`, and `openclaw skills list` succeed inside sandbox.
# 8) `openshell inference get` shows provider `nvidia-nim` and the expected model (VDR3 #12).
# 8) `openshell inference get` shows the expected provider (default nvidia-nim; VDR3 #12) and model.
#
# Requires:
# nemoclaw, openshell, openclaw on PATH.
#
# Env (optional — defaults match test-e2e-cloud-experimental.sh):
# SANDBOX_NAME or NEMOCLAW_SANDBOX_NAME (default: e2e-cloud-experimental)
# CLOUD_EXPERIMENTAL_MODEL (legacy: SCENARIO_A_MODEL, NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL, NEMOCLAW_SCENARIO_A_MODEL)
# CLOUD_EXPERIMENTAL_INFERENCE_PROVIDER — substring matched in `openshell inference get` (default: nvidia-nim; e.g. ollama-local for local gateways)
#
# Example:
# bash test/e2e/e2e-cloud-experimental/checks/01-onboard-completion.sh
# bash test/e2e/e2e-cloud-experimental/skip/01-onboard-completion.sh
# SANDBOX_NAME=my-box CLOUD_EXPERIMENTAL_MODEL=nvidia/nemotron-3-super-120b-a12b bash ...
# SANDBOX_NAME=test01 CLOUD_EXPERIMENTAL_INFERENCE_PROVIDER=ollama-local CLOUD_EXPERIMENTAL_MODEL=nemotron-3-nano:30b bash ...

set -euo pipefail

SANDBOX_NAME="${SANDBOX_NAME:-${NEMOCLAW_SANDBOX_NAME:-e2e-cloud-experimental}}"
CLOUD_EXPERIMENTAL_MODEL="${CLOUD_EXPERIMENTAL_MODEL:-${SCENARIO_A_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-${NEMOCLAW_SCENARIO_A_MODEL:-moonshotai/kimi-k2.5}}}}"
CLOUD_EXPERIMENTAL_INFERENCE_PROVIDER="${CLOUD_EXPERIMENTAL_INFERENCE_PROVIDER:-nvidia-nim}"
die() {
printf '%s\n' "01-onboard-completion: FAIL: $*" >&2
exit 1
Expand Down Expand Up @@ -142,8 +145,8 @@ inf_check=$(openshell inference get 2>&1)
ig=$?
set -e
[ "$ig" -eq 0 ] || die "openshell inference get failed: ${inf_check:0:200}"
echo "$inf_check" | grep -qi "nvidia-nim" \
|| die "openshell inference get missing nvidia-nim provider. Output (first 500 chars): ${inf_check:0:500}"
echo "$inf_check" | grep -Fqi "$CLOUD_EXPERIMENTAL_INFERENCE_PROVIDER" \
|| die "openshell inference get missing provider '${CLOUD_EXPERIMENTAL_INFERENCE_PROVIDER}' (set CLOUD_EXPERIMENTAL_INFERENCE_PROVIDER to match Gateway). Output (first 500 chars): ${inf_check:0:500}"
if ! echo "$inf_check" | grep -Fq "$CLOUD_EXPERIMENTAL_MODEL"; then
die "inference model mismatch: expected substring '${CLOUD_EXPERIMENTAL_MODEL}' (from CLOUD_EXPERIMENTAL_MODEL / NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL) inside 'openshell inference get', but it was not found. If the sandbox was onboarded with another model, export the same id for this check (e.g. NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL=nvidia/nemotron-3-super-120b-a12b). --- openshell inference get (first 800 chars) --- ${inf_check:0:800}"
fi
Expand Down
Loading
Loading