From 342452b94ca1f6daa79c6ed5bd61e38a167b860c Mon Sep 17 00:00:00 2001
From: GitHub CI <ci@example.com>
Date: Thu, 16 Apr 2026 22:06:14 -0700
Subject: [PATCH 1/2] fix(webhook): surface push-notification delivery path at
 INFO (closes #61)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Quinn reportedly accepts `tasks/pushNotificationConfig/set` but never
POSTs to the registered callback URL. Investigation found the actual
delivery path is likely firing, but nothing in docker logs could
confirm it: Python's default root log level is WARNING, and
`server.py` never called `logging.basicConfig`, so every
`logger.info` in the A2A handler — including "push config registered"
and "webhook delivered" — was silently filtered.

Looking at the logs directly with log-level instrumentation showed
the `opentelemetry.context` logger at WARNING, while the
`a2a_handler` logger was NOTSET (inherits root = WARNING). Only
WARNING+ lines ever made it through, so we only saw the 2
"webhook request error" warnings from a real DNS blip at 02:16 UTC;
everything else — success, registration, late-binding branch, the
whole normal path — ran silent.

Fix: three changes, scoped to observability only (no behavior change):

1. `server.py` — install `logging.basicConfig(level=INFO)` with a
   formatter at module import. Operators can tune via `LOG_LEVEL` env
   var. INFO is the right default for a production agent.

2. `a2a_handler.py::_deliver_webhook` — upgrade successful delivery
   log from DEBUG → INFO, and include task id + final state + URL in
   the message so docker-logs grep works naturally.

3. `a2a_handler.py` JSON-RPC `pushNotificationConfig/set` handler —
   add the matching "push config registered (jsonrpc)" INFO line the
   REST alias has had since PR #19. Both paths now surface identically.

Regression test: `test_webhook_success_is_logged_at_info_level` asserts
a successful delivery produces an INFO record — this is the specific
visibility gap that masked #61 for a full day of polling fallback.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 a2a_handler.py            | 16 +++++++++++++++-
 server.py                 | 11 +++++++++++
 tests/test_a2a_handler.py | 30 ++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/a2a_handler.py b/a2a_handler.py
index 93fb526..db8fee8 100644
--- a/a2a_handler.py
+++ b/a2a_handler.py
@@ -655,7 +655,14 @@ async def _deliver_webhook(record: TaskRecord, push_config: PushNotificationConf
             try:
                 resp = await client.post(push_config.url, json=payload, headers=headers)
                 if resp.status_code < 500:
-                    logger.debug("[a2a] webhook delivered → %s (%s)", push_config.url, resp.status_code)
+                    # Upgraded from DEBUG → INFO (quinn#61). The absence of
+                    # this line was the main reason we couldn't tell whether
+                    # webhook delivery was firing at all — the whole path ran
+                    # under the default WARNING threshold.
+                    logger.info(
+                        "[a2a] webhook delivered task=%s state=%s → %s (%s)",
+                        record.id, record.state, push_config.url, resp.status_code,
+                    )
                     return
                 logger.warning("[a2a] webhook 5xx (attempt %d): %s", attempt + 1, resp.status_code)
             except httpx.RequestError as exc:
@@ -1199,8 +1206,15 @@ def _rpc_result(result):
             )
             async with _store._lock:
                 record.push_config = cfg
+            # Fire immediately if the task already reached terminal state
+            # before the caller got around to registering — otherwise the
+            # webhook would never be delivered (quinn#61).
             if record.state in _TERMINAL:
                 await _push(record)
+            logger.info(
+                "[a2a] push config registered (jsonrpc) task=%s state=%s → %s",
+                task_id, record.state, cfg.url,
+            )
             return _rpc_result({
                 "taskId": task_id,
                 "pushNotificationConfig": {"url": cfg.url, "id": cfg.id},
diff --git a/server.py b/server.py
index ee43be7..0fd650a 100644
--- a/server.py
+++ b/server.py
@@ -24,6 +24,17 @@
 # otherwise block anyone from importing tiny helpers (e.g. _build_agent_card)
 # out of this module for tests. Keep the import inside _main().
 
+# Root-level log config. Python's default is WARNING, which silently filtered
+# every `logger.info(...)` call — including "push config registered" and
+# "webhook delivered" lines from a2a_handler, making every diagnosis of the
+# A2A/webhook path a guess-and-check session (quinn#61). INFO is the right
+# default for a production agent; LOG_LEVEL env var lets operators tune up
+# or down without a code change.
+logging.basicConfig(
+    level=os.environ.get("LOG_LEVEL", "INFO").upper(),
+    format="%(asctime)s %(levelname)s %(name)s %(message)s",
+)
+
 # Module-level logger — used to surface uncaught exceptions in the LangGraph
 # stream/invoke code with a full traceback. Without this the A2A handler only
 # sees str(e), which drops the frame location and makes every runtime bug a
diff --git a/tests/test_a2a_handler.py b/tests/test_a2a_handler.py
index 0a91f01..22ef87b 100644
--- a/tests/test_a2a_handler.py
+++ b/tests/test_a2a_handler.py
@@ -660,6 +660,36 @@ async def test_webhook_delivery_success():
         assert payload["artifact"]["artifactId"] == "test-task-id"
 
 
+@pytest.mark.asyncio
+async def test_webhook_success_is_logged_at_info_level(caplog):
+    """Quinn #61 root cause: successful webhook delivery used to log at
+    DEBUG, which is below the default WARNING threshold the server runs
+    at. There was no way to tell from docker logs whether delivery was
+    actually firing. Upgrade locked in — every successful POST must
+    produce an INFO line carrying the task id, final state, and URL."""
+    import logging
+    record = _make_record(state=COMPLETED)
+    cfg = PushNotificationConfig(url="https://example.com/hook")
+
+    with patch("a2a_handler.httpx.AsyncClient") as mock_client_cls:
+        mock_client = AsyncMock()
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=None)
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_client.post = AsyncMock(return_value=mock_response)
+        mock_client_cls.return_value = mock_client
+
+        with caplog.at_level(logging.INFO, logger="a2a_handler"):
+            await _deliver_webhook(record, cfg)
+
+    info_records = [r for r in caplog.records if r.levelno == logging.INFO]
+    assert any("webhook delivered" in r.getMessage() for r in info_records), (
+        "successful delivery must log at INFO, not DEBUG — otherwise the "
+        "default WARNING log level silently hides whether delivery is working"
+    )
+
+
 @pytest.mark.asyncio
 async def test_webhook_delivery_no_token():
     record = _make_record(state=FAILED, error_message="oops")

From d481fc7dc472ffe3f38e51e250a023e0b2f1ed28 Mon Sep 17 00:00:00 2001
From: GitHub CI <ci@example.com>
Date: Thu, 16 Apr 2026 22:13:24 -0700
Subject: [PATCH 2/2] fix(a2a): message/send JSON-RPC result must be a Task
 with kind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Spec audit of Quinn against the A2A streaming guide (quinn#61 follow-
up) found that the JSON-RPC `message/send` handler built its result
dict inline at lines 1124-1129 — every other path that returns a Task
(SSE initial frame, REST alias, `tasks/get`, `tasks/resubscribe`) uses
`_task_to_response(record)` which correctly sets `kind: "task"`. The
inline build omitted it.

`@a2a-js/sdk`'s client routes results by the `kind` discriminator;
missing it means the SDK may treat the response as a `Message`
instead of a Task, which is the same class of bug that
Quinn #40 fixed on the streaming side.

Fix: replace the inline dict with `_task_to_response(record)` so every
Task snapshot goes through a single builder.

Updated test_message_send_returns_submitted to assert the kind
discriminator + contextId are present on the result.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 a2a_handler.py            | 13 +++++++------
 tests/test_a2a_handler.py |  5 +++++
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/a2a_handler.py b/a2a_handler.py
index db8fee8..fd5da5f 100644
--- a/a2a_handler.py
+++ b/a2a_handler.py
@@ -1121,12 +1121,13 @@ def _rpc_result(result):
 
             if method == "message/send":
                 record = await _submit_task(text, context_id, push_config, caller_trace)
-                return _rpc_result({
-                    "id": record.id,
-                    "contextId": record.context_id,
-                    "status": {"state": SUBMITTED,
-                               "timestamp": record.created_at},
-                })
+                # Use _task_to_response so the `kind: "task"` discriminator
+                # and every other Task field land consistently with the
+                # SSE / REST / tasks/get paths. Building the dict inline
+                # omitted `kind`, which `@a2a-js/sdk` uses to route the
+                # result into its Task handler (spec-compliance with the
+                # a2a-streaming guide).
+                return _rpc_result(_task_to_response(record))
 
             # streaming path — SSE frames wrapped in JSON-RPC envelopes
             return StreamingResponse(
diff --git a/tests/test_a2a_handler.py b/tests/test_a2a_handler.py
index 22ef87b..8c34d1b 100644
--- a/tests/test_a2a_handler.py
+++ b/tests/test_a2a_handler.py
@@ -1033,8 +1033,13 @@ async def test_message_send_returns_submitted():
 
     assert resp.status_code == 200
     data = resp.json()
+    # Spec-compliance: the result must be a full Task object with the
+    # `kind` discriminator — @a2a-js/sdk routes by kind. Inline-dict
+    # builds have omitted it in the past (quinn#61 investigation).
+    assert data["result"]["kind"] == "task"
     assert data["result"]["status"]["state"] == SUBMITTED
     assert "id" in data["result"]
+    assert "contextId" in data["result"]
 
 
 @pytest.mark.asyncio