Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions .github/workflows/definition-of-done.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
name: Definition of Done

# Enforces CLAUDE.md HARD RULE #0: a user-facing feature is not done until
# demonstrated through its actual runtime. See axonflow-claude-plugin#59
# for the doctrine + lint-no-mocks rationale.

on:
pull_request:
types: [opened, synchronize, reopened, edited]

permissions:
contents: read
pull-requests: read

jobs:
lint-no-mocks-in-runtime-e2e:
name: Lint — no mocks in runtime-e2e/
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run lint
run: |
if [ -x scripts/lint-no-mocks-in-runtime-e2e.sh ]; then
./scripts/lint-no-mocks-in-runtime-e2e.sh
else
echo "lint-no-mocks-in-runtime-e2e.sh not present — skipping (older branch)."
fi

runtime-e2e-required:
name: Runtime E2E required for user-facing changes
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Detect user-facing surface changes
id: detect
env:
BASE: ${{ github.event.pull_request.base.sha }}
HEAD: ${{ github.event.pull_request.head.sha }}
run: |
set -uo pipefail
# Python SDK user-facing surface: the published package source
# (axonflow/), build/install metadata (pyproject.toml), and the
# examples/ tree that downstream users copy from. Changes here
# are observable from a `pip install axonflow` + `from axonflow
# import AxonFlow` flow against a real running agent.
USER_FACING_GLOBS=(
'axonflow/'
'pyproject.toml'
'examples/'
)
CHANGED=$(git diff --name-only "$BASE" "$HEAD" || true)
echo "Changed files in PR:" >&2
printf ' %s\n' $CHANGED >&2

MATCHED=""
for f in $CHANGED; do
for pat in "${USER_FACING_GLOBS[@]}"; do
case "$f" in
"$pat"*|*"/$pat"*)
MATCHED="$MATCHED $f"
break
;;
esac
done
done

RUNTIME_E2E_TOUCHED=$(echo "$CHANGED" | grep -c '^runtime-e2e/' || true)

{
echo "user_facing_changed=$([ -n "$MATCHED" ] && echo true || echo false)"
echo "runtime_e2e_touched=$RUNTIME_E2E_TOUCHED"
} >> "$GITHUB_OUTPUT"

if [ -n "$MATCHED" ]; then
echo "User-facing files changed:" >&2
for f in $MATCHED; do echo " - $f" >&2; done
fi

- name: Check escape-hatch justification
id: hatch
if: steps.detect.outputs.user_facing_changed == 'true' && steps.detect.outputs.runtime_e2e_touched == '0'
env:
PR_TITLE: ${{ github.event.pull_request.title }}
PR_BODY: ${{ github.event.pull_request.body }}
run: |
set -uo pipefail
if [[ "$PR_TITLE" == *"[skip-runtime-e2e]"* ]]; then
if echo "$PR_BODY" | grep -q '## Skip-runtime-e2e justification'; then
echo "Escape hatch active." >&2
echo "skip=true" >> "$GITHUB_OUTPUT"
else
echo "::error::PR title carries [skip-runtime-e2e] but body has no '## Skip-runtime-e2e justification' section."
exit 1
fi
else
echo "skip=false" >> "$GITHUB_OUTPUT"
fi

- name: Enforce runtime-e2e/ presence
if: steps.detect.outputs.user_facing_changed == 'true' && steps.detect.outputs.runtime_e2e_touched == '0' && steps.hatch.outputs.skip != 'true'
run: |
cat <<'EOF' >&2
::error::This PR touches real SDK code (axonflow/, pyproject.toml, examples/)
without runtime-e2e/ test in the same PR.

Per CLAUDE.md HARD RULE #0:
A user-facing feature is not done until you have demonstrated it
working through its actual runtime — a real `from axonflow import
AxonFlow` over real httpx against a real running AxonFlow agent.

Mocks, stubs, MagicMock, httpx_mock.add_response, and capture-stub
harnesses do NOT count as runtime proof.

To resolve, do ONE of:
1. Add a test under runtime-e2e/<feature>/test.py that invokes
the SDK against a real running agent and asserts on the
agent's response (not on a mocked response object).
2. If genuinely internal (build / deps / lint baseline / docs),
add `[skip-runtime-e2e]` to PR title AND a
`## Skip-runtime-e2e justification` section to PR body.

See: axonflow-internal-docs/engineering/E2E_EXAMPLES_TESTING_WORKFLOW.md
EOF
exit 1

- name: All clear
if: steps.detect.outputs.user_facing_changed == 'false' || steps.detect.outputs.runtime_e2e_touched != '0' || steps.hatch.outputs.skip == 'true'
run: |
if [ "${{ steps.detect.outputs.user_facing_changed }}" = 'false' ]; then
echo "No user-facing surface changed. Gate not applicable." >&2
elif [ "${{ steps.detect.outputs.runtime_e2e_touched }}" != '0' ]; then
echo "User-facing change detected and runtime-e2e/ updated in same PR. ✓" >&2
else
echo "Escape hatch active with valid justification. ✓" >&2
fi
20 changes: 20 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,26 @@ ignore = [
"examples/**/*.py" = ["T201", "ANN", "S106", "ERA001", "PLC0415", "F541"] # BLE001 (blind-except) intentionally NOT in this list — examples must demonstrate proper exception handling
"examples/wcp_retry_idempotency.py" = ["T201", "ANN", "S101", "S106", "ERA001", "BLE001", "PLC0415", "F541", "E501"] # E2E validation script: bare asserts + long assertion strings are intentional
"scripts/**/*.py" = ["T201", "ANN", "S106", "BLE001", "UP045", "DTZ005", "PTH123"]
# runtime-e2e/ is the real-stack assertion harness (CLAUDE.md HARD RULE #0):
# driver scripts that hit a real running agent and assert on its real
# responses. These are intentionally not pytest tests; they're standalone
# scripts. The same carve-outs as tests/heartbeat-real-stack/ apply: print()
# is the report channel, monkey-patching httpx is how we force scope-mismatch
# assertions, and broad except is appropriate when inspecting whatever
# wrapped error the SDK surfaces.
"runtime-e2e/**/*.py" = [
"T201", # print — the report channel for these scripts
"ANN", # type annotations — driver scripts, not library code
"BLE001", # blind except — we inspect whatever the SDK wraps
"S101", "S106", # assert / hardcoded fixture creds via env
"PLC0415", # lazy/local imports
"PLR2004", # magic numbers — fine in assertion scripts
"E501", # long descriptive lines
"F541", # f-string without placeholders
"RET504", # explicit return for clarity
"PLR0911", "PLR0912", "PLR0915", # complexity in main()
"SLF001", # private member access (httpx.AsyncClient.request monkey-patch)
]

[tool.ruff.lint.isort]
known-first-party = ["axonflow"]
Expand Down
34 changes: 34 additions & 0 deletions runtime-e2e/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# SDK runtime tests

Per CLAUDE.md HARD RULE #0: a user-facing feature is not done until you
have demonstrated it working through the SDK's actual runtime — a real
httpx call from a real `from axonflow import AxonFlow` against a real
running AxonFlow agent.

**Tests in this directory MUST hit a real endpoint.** No `unittest.mock`,
no `MagicMock`, no `httpx_mock.add_response`, no fixture servers. The
`scripts/lint-no-mocks-in-runtime-e2e.sh` lint enforces this.
<!-- allow-mocks-here: this README documents the forbidden-pattern list itself; no executable usage. -->


**Convention.** Each test lives in `runtime-e2e/<feature>/test.py`.

**How to run locally.** Set `AXONFLOW_AGENT_URL` (default
`http://localhost:8080`). Bring up a local agent and register a tenant.
Then:

```
export AXONFLOW_AGENT_URL=http://localhost:8080
RESP=$(curl -s -X POST $AXONFLOW_AGENT_URL/api/v1/register \
-H "Content-Type: application/json" -d '{"label":"sdk-runtime-e2e"}')
export AXONFLOW_TENANT_ID=$(echo "$RESP" | jq -r .tenant_id)
export AXONFLOW_TENANT_SECRET=$(echo "$RESP" | jq -r .secret)

for d in runtime-e2e/*/; do
python3 "$d/test.py" || exit 1
done
```

**What counts as a test.** Each test.py exits non-zero if the SDK's real
wire output to a real agent isn't what you expect. Capture an agent
log line or response field that echoes a value the SDK sent.
71 changes: 71 additions & 0 deletions runtime-e2e/x-axonflow-client/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Real-stack assertion: SDK emits X-Axonflow-Client: sdk-python/<__version__>.

Per CLAUDE.md HARD RULE #0 — this test MUST hit a real agent.
"""

import asyncio
import os
import sys

import httpx

from axonflow import AxonFlow
from axonflow._version import __version__

EXPECTED = f"sdk-python/{__version__}"
ENDPOINT = os.environ.get("AXONFLOW_AGENT_URL", "http://localhost:8080")
TENANT = os.environ.get("AXONFLOW_TENANT_ID")
SECRET = os.environ.get("AXONFLOW_TENANT_SECRET")
PLUGIN_TOKEN = os.environ.get("AXONFLOW_E2E_PLUGIN_TOKEN")

_missing = []
if not TENANT:
_missing.append("AXONFLOW_TENANT_ID")
if not SECRET:
_missing.append("AXONFLOW_TENANT_SECRET") # noqa: S105
if not PLUGIN_TOKEN:
_missing.append("AXONFLOW_E2E_PLUGIN_TOKEN") # noqa: S105
if _missing:
sys.stderr.write("required env vars not set; see ../README.md\n")
sys.exit(2)

# Wrap httpx so we can inject X-License-Token (forces scope-mismatch path
# so the agent echoes our X-Axonflow-Client value back in its response).
_orig = httpx.AsyncClient.request


async def patched(self, method, url, **kw):
headers = dict(kw.get("headers") or {})
headers["X-License-Token"] = PLUGIN_TOKEN
kw["headers"] = headers
return await _orig(self, method, url, **kw)


httpx.AsyncClient.request = patched


async def main():
print(f"Asserting wire X-Axonflow-Client = {EXPECTED}")
client = AxonFlow(endpoint=ENDPOINT, client_id=TENANT, client_secret=SECRET)
async with client:
try:
await client.proxy_llm_call(user_token="", query="ping", request_type="chat")
except Exception as e:
msg = str(e)
if EXPECTED in msg:
print(f"PASS: agent reflected {EXPECTED} in scope_mismatch response")
sys.exit(0)
# Some error wrappers may strip the agent's body; do a fallback check.
if "scope_mismatch" in msg or "Invalid credentials" in msg:
print(
f"PARTIAL: agent rejected as expected but response didn't echo client header verbatim ({msg[:200]})",
file=sys.stderr,
)
sys.exit(1)
print(f"FAIL: unexpected error: {msg[:200]}", file=sys.stderr)
sys.exit(1)
print("UNEXPECTED 200 — agent should have rejected scope_mismatch", file=sys.stderr)
sys.exit(1)


asyncio.run(main())
88 changes: 88 additions & 0 deletions scripts/lint-no-mocks-in-runtime-e2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env bash
# lint-no-mocks-in-runtime-e2e.sh
#
# Per HARD RULE #0 in CLAUDE.md: runtime-e2e/ tests MUST hit a real endpoint
# (real plugin in real host CLI; real SDK with real fetch against a real
# running agent). Mocks, stubs, simulators, capture-stub harnesses do NOT
# count as runtime proof.
#
# This script greps runtime-e2e/ for forbidden mock-pattern strings and
# fails the build if any are found. It runs in CI as part of the
# definition-of-done.yml gate.
#
# To bypass for a specific file (rare, must justify in PR):
# add a line `# allow-mocks-here: <reason>` near the offending line and
# the lint will skip that file. Reviewers should challenge any usage.

set -uo pipefail

SCAN_DIR="${1:-runtime-e2e}"

if [ ! -d "$SCAN_DIR" ]; then
echo "lint-no-mocks: $SCAN_DIR not present, nothing to scan."
exit 0
fi

# Forbidden patterns. Each one represents a way to fake a runtime response.
# Add to this list as new mock libraries arrive in the codebase.
PATTERNS=(
'mockFetch' # jest fetch mock
'jest\.mock' # jest module mock
'jest\.fn' # jest stub
'sinon\.stub' # sinon test double
'unittest\.mock' # python stdlib mock
'MagicMock' # python mock class
'httpx_mock\.add_response' # python httpx mock
'wiremock' # java/jvm wiremock
'WireMockServer' # wiremock builder
'stubFor' # wiremock stub
'httptest\.NewServer' # go httptest stub server
'capture-stub\.py' # local capture harness
'fixture-server' # generic fixture server
'msw\.setupServer' # jsdom mock service worker
'nock\.' # nock http stubs (node)
)

EXIT=0
COUNT=0

# Build a regex from PATTERNS; escape literal dots already in the pattern source
REGEX=$(IFS='|'; echo "${PATTERNS[*]}")

# Use plain grep -r so we catch untracked files too (CI sees tracked PR
# content, but local dev/pre-commit may run against new files not yet added).
matches=$(grep -rnE "$REGEX" "$SCAN_DIR" 2>/dev/null || true)

if [ -z "$matches" ]; then
echo "lint-no-mocks: $SCAN_DIR is clean (no forbidden mock patterns found)."
exit 0
fi

# Filter out lines explicitly allowed via the inline marker.
while IFS= read -r line; do
file=$(echo "$line" | cut -d: -f1)
if [ -n "$file" ] && grep -q "allow-mocks-here:" "$file" 2>/dev/null; then
continue
fi
echo " $line"
COUNT=$((COUNT + 1))
EXIT=1
done <<< "$matches"

if [ "$EXIT" -ne 0 ]; then
echo ""
echo "lint-no-mocks: $COUNT forbidden mock-pattern hit(s) in $SCAN_DIR." >&2
echo "" >&2
echo "Per CLAUDE.md HARD RULE #0, runtime-e2e/ tests MUST hit a real endpoint." >&2
echo "Mocks, stubs, fixture-servers, and capture harnesses do NOT count as" >&2
echo "runtime proof. The runtime-e2e/ test for a feature must invoke the" >&2
echo "feature through its actual user-facing surface (host CLI tool/skill," >&2
echo "real SDK fetch to a running agent, etc.)." >&2
echo "" >&2
echo "If a specific test legitimately needs a stub (rare — usually means" >&2
echo "it's not actually a runtime test and belongs elsewhere), add a" >&2
echo " # allow-mocks-here: <reason>" >&2
echo "comment on the line and a reviewer must explicitly approve it." >&2
fi

exit "$EXIT"
Loading