From 342452b94ca1f6daa79c6ed5bd61e38a167b860c Mon Sep 17 00:00:00 2001 From: GitHub CI Date: Thu, 16 Apr 2026 22:06:14 -0700 Subject: [PATCH 1/2] fix(webhook): surface push-notification delivery path at INFO (closes #61) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quinn reportedly accepts `tasks/pushNotificationConfig/set` but never POSTs to the registered callback URL. Investigation found the actual delivery path is likely firing, but nothing in docker logs could confirm it: Python's default root log level is WARNING, and `server.py` never called `logging.basicConfig`, so every `logger.info` in the A2A handler — including "push config registered" and "webhook delivered" — was silently filtered. Looking at the logs directly with log-level instrumentation showed the `opentelemetry.context` logger at WARNING, while the `a2a_handler` logger was NOTSET (inherits root = WARNING). Only WARNING+ lines ever made it through, so we only saw the 2 "webhook request error" warnings from a real DNS blip at 02:16 UTC; everything else — success, registration, late-binding branch, the whole normal path — ran silent. Fix: three changes, scoped to observability only (no behavior change): 1. `server.py` — install `logging.basicConfig(level=INFO)` with a formatter at module import. Operators can tune via `LOG_LEVEL` env var. INFO is the right default for a production agent. 2. `a2a_handler.py::_deliver_webhook` — upgrade successful delivery log from DEBUG → INFO, and include task id + final state + URL in the message so docker-logs grep works naturally. 3. `a2a_handler.py` JSON-RPC `pushNotificationConfig/set` handler — add the matching "push config registered (jsonrpc)" INFO line the REST alias has had since PR #19. Both paths now surface identically. Regression test: `test_webhook_success_is_logged_at_info_level` asserts a successful delivery produces an INFO record — this is the specific visibility gap that masked #61 for a full day of polling fallback. Co-Authored-By: Claude Opus 4.6 (1M context) --- a2a_handler.py | 16 +++++++++++++++- server.py | 11 +++++++++++ tests/test_a2a_handler.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/a2a_handler.py b/a2a_handler.py index 93fb526..db8fee8 100644 --- a/a2a_handler.py +++ b/a2a_handler.py @@ -655,7 +655,14 @@ async def _deliver_webhook(record: TaskRecord, push_config: PushNotificationConf try: resp = await client.post(push_config.url, json=payload, headers=headers) if resp.status_code < 500: - logger.debug("[a2a] webhook delivered → %s (%s)", push_config.url, resp.status_code) + # Upgraded from DEBUG → INFO (quinn#61). The absence of + # this line was the main reason we couldn't tell whether + # webhook delivery was firing at all — the whole path ran + # under the default WARNING threshold. + logger.info( + "[a2a] webhook delivered task=%s state=%s → %s (%s)", + record.id, record.state, push_config.url, resp.status_code, + ) return logger.warning("[a2a] webhook 5xx (attempt %d): %s", attempt + 1, resp.status_code) except httpx.RequestError as exc: @@ -1199,8 +1206,15 @@ def _rpc_result(result): ) async with _store._lock: record.push_config = cfg + # Fire immediately if the task already reached terminal state + # before the caller got around to registering — otherwise the + # webhook would never be delivered (quinn#61). if record.state in _TERMINAL: await _push(record) + logger.info( + "[a2a] push config registered (jsonrpc) task=%s state=%s → %s", + task_id, record.state, cfg.url, + ) return _rpc_result({ "taskId": task_id, "pushNotificationConfig": {"url": cfg.url, "id": cfg.id}, diff --git a/server.py b/server.py index ee43be7..0fd650a 100644 --- a/server.py +++ b/server.py @@ -24,6 +24,17 @@ # otherwise block anyone from importing tiny helpers (e.g. _build_agent_card) # out of this module for tests. Keep the import inside _main(). +# Root-level log config. Python's default is WARNING, which silently filtered +# every `logger.info(...)` call — including "push config registered" and +# "webhook delivered" lines from a2a_handler, making every diagnosis of the +# A2A/webhook path a guess-and-check session (quinn#61). INFO is the right +# default for a production agent; LOG_LEVEL env var lets operators tune up +# or down without a code change. +logging.basicConfig( + level=os.environ.get("LOG_LEVEL", "INFO").upper(), + format="%(asctime)s %(levelname)s %(name)s %(message)s", +) + # Module-level logger — used to surface uncaught exceptions in the LangGraph # stream/invoke code with a full traceback. Without this the A2A handler only # sees str(e), which drops the frame location and makes every runtime bug a diff --git a/tests/test_a2a_handler.py b/tests/test_a2a_handler.py index 0a91f01..22ef87b 100644 --- a/tests/test_a2a_handler.py +++ b/tests/test_a2a_handler.py @@ -660,6 +660,36 @@ async def test_webhook_delivery_success(): assert payload["artifact"]["artifactId"] == "test-task-id" +@pytest.mark.asyncio +async def test_webhook_success_is_logged_at_info_level(caplog): + """Quinn #61 root cause: successful webhook delivery used to log at + DEBUG, which is below the default WARNING threshold the server runs + at. There was no way to tell from docker logs whether delivery was + actually firing. Upgrade locked in — every successful POST must + produce an INFO line carrying the task id, final state, and URL.""" + import logging + record = _make_record(state=COMPLETED) + cfg = PushNotificationConfig(url="https://example.com/hook") + + with patch("a2a_handler.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.post = AsyncMock(return_value=mock_response) + mock_client_cls.return_value = mock_client + + with caplog.at_level(logging.INFO, logger="a2a_handler"): + await _deliver_webhook(record, cfg) + + info_records = [r for r in caplog.records if r.levelno == logging.INFO] + assert any("webhook delivered" in r.getMessage() for r in info_records), ( + "successful delivery must log at INFO, not DEBUG — otherwise the " + "default WARNING log level silently hides whether delivery is working" + ) + + @pytest.mark.asyncio async def test_webhook_delivery_no_token(): record = _make_record(state=FAILED, error_message="oops") From d481fc7dc472ffe3f38e51e250a023e0b2f1ed28 Mon Sep 17 00:00:00 2001 From: GitHub CI Date: Thu, 16 Apr 2026 22:13:24 -0700 Subject: [PATCH 2/2] fix(a2a): message/send JSON-RPC result must be a Task with kind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec audit of Quinn against the A2A streaming guide (quinn#61 follow- up) found that the JSON-RPC `message/send` handler built its result dict inline at lines 1124-1129 — every other path that returns a Task (SSE initial frame, REST alias, `tasks/get`, `tasks/resubscribe`) uses `_task_to_response(record)` which correctly sets `kind: "task"`. The inline build omitted it. `@a2a-js/sdk`'s client routes results by the `kind` discriminator; missing it means the SDK may treat the response as a `Message` instead of a Task, which is the same class of bug that Quinn #40 fixed on the streaming side. Fix: replace the inline dict with `_task_to_response(record)` so every Task snapshot goes through a single builder. Updated test_message_send_returns_submitted to assert the kind discriminator + contextId are present on the result. Co-Authored-By: Claude Opus 4.6 (1M context) --- a2a_handler.py | 13 +++++++------ tests/test_a2a_handler.py | 5 +++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/a2a_handler.py b/a2a_handler.py index db8fee8..fd5da5f 100644 --- a/a2a_handler.py +++ b/a2a_handler.py @@ -1121,12 +1121,13 @@ def _rpc_result(result): if method == "message/send": record = await _submit_task(text, context_id, push_config, caller_trace) - return _rpc_result({ - "id": record.id, - "contextId": record.context_id, - "status": {"state": SUBMITTED, - "timestamp": record.created_at}, - }) + # Use _task_to_response so the `kind: "task"` discriminator + # and every other Task field land consistently with the + # SSE / REST / tasks/get paths. Building the dict inline + # omitted `kind`, which `@a2a-js/sdk` uses to route the + # result into its Task handler (spec-compliance with the + # a2a-streaming guide). + return _rpc_result(_task_to_response(record)) # streaming path — SSE frames wrapped in JSON-RPC envelopes return StreamingResponse( diff --git a/tests/test_a2a_handler.py b/tests/test_a2a_handler.py index 22ef87b..8c34d1b 100644 --- a/tests/test_a2a_handler.py +++ b/tests/test_a2a_handler.py @@ -1033,8 +1033,13 @@ async def test_message_send_returns_submitted(): assert resp.status_code == 200 data = resp.json() + # Spec-compliance: the result must be a full Task object with the + # `kind` discriminator — @a2a-js/sdk routes by kind. Inline-dict + # builds have omitted it in the past (quinn#61 investigation). + assert data["result"]["kind"] == "task" assert data["result"]["status"]["state"] == SUBMITTED assert "id" in data["result"] + assert "contextId" in data["result"] @pytest.mark.asyncio