diff --git a/.github/workflows/definition-of-done.yml b/.github/workflows/definition-of-done.yml new file mode 100644 index 0000000..279e56c --- /dev/null +++ b/.github/workflows/definition-of-done.yml @@ -0,0 +1,138 @@ +name: Definition of Done + +# Enforces CLAUDE.md HARD RULE #0: a user-facing feature is not done until +# demonstrated through its actual runtime. See axonflow-claude-plugin#59 +# for the doctrine + lint-no-mocks rationale. + +on: + pull_request: + types: [opened, synchronize, reopened, edited] + +permissions: + contents: read + pull-requests: read + +jobs: + lint-no-mocks-in-runtime-e2e: + name: Lint — no mocks in runtime-e2e/ + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run lint + run: | + if [ -x scripts/lint-no-mocks-in-runtime-e2e.sh ]; then + ./scripts/lint-no-mocks-in-runtime-e2e.sh + else + echo "lint-no-mocks-in-runtime-e2e.sh not present — skipping (older branch)." + fi + + runtime-e2e-required: + name: Runtime E2E required for user-facing changes + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Detect user-facing surface changes + id: detect + env: + BASE: ${{ github.event.pull_request.base.sha }} + HEAD: ${{ github.event.pull_request.head.sha }} + run: | + set -uo pipefail + # Python SDK user-facing surface: the published package source + # (axonflow/), build/install metadata (pyproject.toml), and the + # examples/ tree that downstream users copy from. Changes here + # are observable from a `pip install axonflow` + `from axonflow + # import AxonFlow` flow against a real running agent. + USER_FACING_GLOBS=( + 'axonflow/' + 'pyproject.toml' + 'examples/' + ) + CHANGED=$(git diff --name-only "$BASE" "$HEAD" || true) + echo "Changed files in PR:" >&2 + printf ' %s\n' $CHANGED >&2 + + MATCHED="" + for f in $CHANGED; do + for pat in "${USER_FACING_GLOBS[@]}"; do + case "$f" in + "$pat"*|*"/$pat"*) + MATCHED="$MATCHED $f" + break + ;; + esac + done + done + + RUNTIME_E2E_TOUCHED=$(echo "$CHANGED" | grep -c '^runtime-e2e/' || true) + + { + echo "user_facing_changed=$([ -n "$MATCHED" ] && echo true || echo false)" + echo "runtime_e2e_touched=$RUNTIME_E2E_TOUCHED" + } >> "$GITHUB_OUTPUT" + + if [ -n "$MATCHED" ]; then + echo "User-facing files changed:" >&2 + for f in $MATCHED; do echo " - $f" >&2; done + fi + + - name: Check escape-hatch justification + id: hatch + if: steps.detect.outputs.user_facing_changed == 'true' && steps.detect.outputs.runtime_e2e_touched == '0' + env: + PR_TITLE: ${{ github.event.pull_request.title }} + PR_BODY: ${{ github.event.pull_request.body }} + run: | + set -uo pipefail + if [[ "$PR_TITLE" == *"[skip-runtime-e2e]"* ]]; then + if echo "$PR_BODY" | grep -q '## Skip-runtime-e2e justification'; then + echo "Escape hatch active." >&2 + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "::error::PR title carries [skip-runtime-e2e] but body has no '## Skip-runtime-e2e justification' section." + exit 1 + fi + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Enforce runtime-e2e/ presence + if: steps.detect.outputs.user_facing_changed == 'true' && steps.detect.outputs.runtime_e2e_touched == '0' && steps.hatch.outputs.skip != 'true' + run: | + cat <<'EOF' >&2 + ::error::This PR touches real SDK code (axonflow/, pyproject.toml, examples/) + without runtime-e2e/ test in the same PR. + + Per CLAUDE.md HARD RULE #0: + A user-facing feature is not done until you have demonstrated it + working through its actual runtime — a real `from axonflow import + AxonFlow` over real httpx against a real running AxonFlow agent. + + Mocks, stubs, MagicMock, httpx_mock.add_response, and capture-stub + harnesses do NOT count as runtime proof. + + To resolve, do ONE of: + 1. Add a test under runtime-e2e//test.py that invokes + the SDK against a real running agent and asserts on the + agent's response (not on a mocked response object). + 2. If genuinely internal (build / deps / lint baseline / docs), + add `[skip-runtime-e2e]` to PR title AND a + `## Skip-runtime-e2e justification` section to PR body. + + See: axonflow-internal-docs/engineering/E2E_EXAMPLES_TESTING_WORKFLOW.md + EOF + exit 1 + + - name: All clear + if: steps.detect.outputs.user_facing_changed == 'false' || steps.detect.outputs.runtime_e2e_touched != '0' || steps.hatch.outputs.skip == 'true' + run: | + if [ "${{ steps.detect.outputs.user_facing_changed }}" = 'false' ]; then + echo "No user-facing surface changed. Gate not applicable." >&2 + elif [ "${{ steps.detect.outputs.runtime_e2e_touched }}" != '0' ]; then + echo "User-facing change detected and runtime-e2e/ updated in same PR. ✓" >&2 + else + echo "Escape hatch active with valid justification. ✓" >&2 + fi diff --git a/pyproject.toml b/pyproject.toml index c317f3b..baa254c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -172,6 +172,26 @@ ignore = [ "examples/**/*.py" = ["T201", "ANN", "S106", "ERA001", "PLC0415", "F541"] # BLE001 (blind-except) intentionally NOT in this list — examples must demonstrate proper exception handling "examples/wcp_retry_idempotency.py" = ["T201", "ANN", "S101", "S106", "ERA001", "BLE001", "PLC0415", "F541", "E501"] # E2E validation script: bare asserts + long assertion strings are intentional "scripts/**/*.py" = ["T201", "ANN", "S106", "BLE001", "UP045", "DTZ005", "PTH123"] +# runtime-e2e/ is the real-stack assertion harness (CLAUDE.md HARD RULE #0): +# driver scripts that hit a real running agent and assert on its real +# responses. These are intentionally not pytest tests; they're standalone +# scripts. The same carve-outs as tests/heartbeat-real-stack/ apply: print() +# is the report channel, monkey-patching httpx is how we force scope-mismatch +# assertions, and broad except is appropriate when inspecting whatever +# wrapped error the SDK surfaces. +"runtime-e2e/**/*.py" = [ + "T201", # print — the report channel for these scripts + "ANN", # type annotations — driver scripts, not library code + "BLE001", # blind except — we inspect whatever the SDK wraps + "S101", "S106", # assert / hardcoded fixture creds via env + "PLC0415", # lazy/local imports + "PLR2004", # magic numbers — fine in assertion scripts + "E501", # long descriptive lines + "F541", # f-string without placeholders + "RET504", # explicit return for clarity + "PLR0911", "PLR0912", "PLR0915", # complexity in main() + "SLF001", # private member access (httpx.AsyncClient.request monkey-patch) +] [tool.ruff.lint.isort] known-first-party = ["axonflow"] diff --git a/runtime-e2e/README.md b/runtime-e2e/README.md new file mode 100644 index 0000000..7092e45 --- /dev/null +++ b/runtime-e2e/README.md @@ -0,0 +1,34 @@ +# SDK runtime tests + +Per CLAUDE.md HARD RULE #0: a user-facing feature is not done until you +have demonstrated it working through the SDK's actual runtime — a real +httpx call from a real `from axonflow import AxonFlow` against a real +running AxonFlow agent. + +**Tests in this directory MUST hit a real endpoint.** No `unittest.mock`, +no `MagicMock`, no `httpx_mock.add_response`, no fixture servers. The +`scripts/lint-no-mocks-in-runtime-e2e.sh` lint enforces this. + + + +**Convention.** Each test lives in `runtime-e2e//test.py`. + +**How to run locally.** Set `AXONFLOW_AGENT_URL` (default +`http://localhost:8080`). Bring up a local agent and register a tenant. +Then: + +``` +export AXONFLOW_AGENT_URL=http://localhost:8080 +RESP=$(curl -s -X POST $AXONFLOW_AGENT_URL/api/v1/register \ + -H "Content-Type: application/json" -d '{"label":"sdk-runtime-e2e"}') +export AXONFLOW_TENANT_ID=$(echo "$RESP" | jq -r .tenant_id) +export AXONFLOW_TENANT_SECRET=$(echo "$RESP" | jq -r .secret) + +for d in runtime-e2e/*/; do + python3 "$d/test.py" || exit 1 +done +``` + +**What counts as a test.** Each test.py exits non-zero if the SDK's real +wire output to a real agent isn't what you expect. Capture an agent +log line or response field that echoes a value the SDK sent. diff --git a/runtime-e2e/x-axonflow-client/test.py b/runtime-e2e/x-axonflow-client/test.py new file mode 100644 index 0000000..f2e7316 --- /dev/null +++ b/runtime-e2e/x-axonflow-client/test.py @@ -0,0 +1,71 @@ +"""Real-stack assertion: SDK emits X-Axonflow-Client: sdk-python/<__version__>. + +Per CLAUDE.md HARD RULE #0 — this test MUST hit a real agent. +""" + +import asyncio +import os +import sys + +import httpx + +from axonflow import AxonFlow +from axonflow._version import __version__ + +EXPECTED = f"sdk-python/{__version__}" +ENDPOINT = os.environ.get("AXONFLOW_AGENT_URL", "http://localhost:8080") +TENANT = os.environ.get("AXONFLOW_TENANT_ID") +SECRET = os.environ.get("AXONFLOW_TENANT_SECRET") +PLUGIN_TOKEN = os.environ.get("AXONFLOW_E2E_PLUGIN_TOKEN") + +_missing = [] +if not TENANT: + _missing.append("AXONFLOW_TENANT_ID") +if not SECRET: + _missing.append("AXONFLOW_TENANT_SECRET") # noqa: S105 +if not PLUGIN_TOKEN: + _missing.append("AXONFLOW_E2E_PLUGIN_TOKEN") # noqa: S105 +if _missing: + sys.stderr.write("required env vars not set; see ../README.md\n") + sys.exit(2) + +# Wrap httpx so we can inject X-License-Token (forces scope-mismatch path +# so the agent echoes our X-Axonflow-Client value back in its response). +_orig = httpx.AsyncClient.request + + +async def patched(self, method, url, **kw): + headers = dict(kw.get("headers") or {}) + headers["X-License-Token"] = PLUGIN_TOKEN + kw["headers"] = headers + return await _orig(self, method, url, **kw) + + +httpx.AsyncClient.request = patched + + +async def main(): + print(f"Asserting wire X-Axonflow-Client = {EXPECTED}") + client = AxonFlow(endpoint=ENDPOINT, client_id=TENANT, client_secret=SECRET) + async with client: + try: + await client.proxy_llm_call(user_token="", query="ping", request_type="chat") + except Exception as e: + msg = str(e) + if EXPECTED in msg: + print(f"PASS: agent reflected {EXPECTED} in scope_mismatch response") + sys.exit(0) + # Some error wrappers may strip the agent's body; do a fallback check. + if "scope_mismatch" in msg or "Invalid credentials" in msg: + print( + f"PARTIAL: agent rejected as expected but response didn't echo client header verbatim ({msg[:200]})", + file=sys.stderr, + ) + sys.exit(1) + print(f"FAIL: unexpected error: {msg[:200]}", file=sys.stderr) + sys.exit(1) + print("UNEXPECTED 200 — agent should have rejected scope_mismatch", file=sys.stderr) + sys.exit(1) + + +asyncio.run(main()) diff --git a/scripts/lint-no-mocks-in-runtime-e2e.sh b/scripts/lint-no-mocks-in-runtime-e2e.sh new file mode 100755 index 0000000..5c76700 --- /dev/null +++ b/scripts/lint-no-mocks-in-runtime-e2e.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# lint-no-mocks-in-runtime-e2e.sh +# +# Per HARD RULE #0 in CLAUDE.md: runtime-e2e/ tests MUST hit a real endpoint +# (real plugin in real host CLI; real SDK with real fetch against a real +# running agent). Mocks, stubs, simulators, capture-stub harnesses do NOT +# count as runtime proof. +# +# This script greps runtime-e2e/ for forbidden mock-pattern strings and +# fails the build if any are found. It runs in CI as part of the +# definition-of-done.yml gate. +# +# To bypass for a specific file (rare, must justify in PR): +# add a line `# allow-mocks-here: ` near the offending line and +# the lint will skip that file. Reviewers should challenge any usage. + +set -uo pipefail + +SCAN_DIR="${1:-runtime-e2e}" + +if [ ! -d "$SCAN_DIR" ]; then + echo "lint-no-mocks: $SCAN_DIR not present, nothing to scan." + exit 0 +fi + +# Forbidden patterns. Each one represents a way to fake a runtime response. +# Add to this list as new mock libraries arrive in the codebase. +PATTERNS=( + 'mockFetch' # jest fetch mock + 'jest\.mock' # jest module mock + 'jest\.fn' # jest stub + 'sinon\.stub' # sinon test double + 'unittest\.mock' # python stdlib mock + 'MagicMock' # python mock class + 'httpx_mock\.add_response' # python httpx mock + 'wiremock' # java/jvm wiremock + 'WireMockServer' # wiremock builder + 'stubFor' # wiremock stub + 'httptest\.NewServer' # go httptest stub server + 'capture-stub\.py' # local capture harness + 'fixture-server' # generic fixture server + 'msw\.setupServer' # jsdom mock service worker + 'nock\.' # nock http stubs (node) +) + +EXIT=0 +COUNT=0 + +# Build a regex from PATTERNS; escape literal dots already in the pattern source +REGEX=$(IFS='|'; echo "${PATTERNS[*]}") + +# Use plain grep -r so we catch untracked files too (CI sees tracked PR +# content, but local dev/pre-commit may run against new files not yet added). +matches=$(grep -rnE "$REGEX" "$SCAN_DIR" 2>/dev/null || true) + +if [ -z "$matches" ]; then + echo "lint-no-mocks: $SCAN_DIR is clean (no forbidden mock patterns found)." + exit 0 +fi + +# Filter out lines explicitly allowed via the inline marker. +while IFS= read -r line; do + file=$(echo "$line" | cut -d: -f1) + if [ -n "$file" ] && grep -q "allow-mocks-here:" "$file" 2>/dev/null; then + continue + fi + echo " $line" + COUNT=$((COUNT + 1)) + EXIT=1 +done <<< "$matches" + +if [ "$EXIT" -ne 0 ]; then + echo "" + echo "lint-no-mocks: $COUNT forbidden mock-pattern hit(s) in $SCAN_DIR." >&2 + echo "" >&2 + echo "Per CLAUDE.md HARD RULE #0, runtime-e2e/ tests MUST hit a real endpoint." >&2 + echo "Mocks, stubs, fixture-servers, and capture harnesses do NOT count as" >&2 + echo "runtime proof. The runtime-e2e/ test for a feature must invoke the" >&2 + echo "feature through its actual user-facing surface (host CLI tool/skill," >&2 + echo "real SDK fetch to a running agent, etc.)." >&2 + echo "" >&2 + echo "If a specific test legitimately needs a stub (rare — usually means" >&2 + echo "it's not actually a runtime test and belongs elsewhere), add a" >&2 + echo " # allow-mocks-here: " >&2 + echo "comment on the line and a reviewer must explicitly approve it." >&2 +fi + +exit "$EXIT"