diff --git a/a2a_handler.py b/a2a_handler.py index 93fb526..fd5da5f 100644 --- a/a2a_handler.py +++ b/a2a_handler.py @@ -655,7 +655,14 @@ async def _deliver_webhook(record: TaskRecord, push_config: PushNotificationConf try: resp = await client.post(push_config.url, json=payload, headers=headers) if resp.status_code < 500: - logger.debug("[a2a] webhook delivered → %s (%s)", push_config.url, resp.status_code) + # Upgraded from DEBUG → INFO (quinn#61). The absence of + # this line was the main reason we couldn't tell whether + # webhook delivery was firing at all — the whole path ran + # under the default WARNING threshold. + logger.info( + "[a2a] webhook delivered task=%s state=%s → %s (%s)", + record.id, record.state, push_config.url, resp.status_code, + ) return logger.warning("[a2a] webhook 5xx (attempt %d): %s", attempt + 1, resp.status_code) except httpx.RequestError as exc: @@ -1114,12 +1121,13 @@ def _rpc_result(result): if method == "message/send": record = await _submit_task(text, context_id, push_config, caller_trace) - return _rpc_result({ - "id": record.id, - "contextId": record.context_id, - "status": {"state": SUBMITTED, - "timestamp": record.created_at}, - }) + # Use _task_to_response so the `kind: "task"` discriminator + # and every other Task field land consistently with the + # SSE / REST / tasks/get paths. Building the dict inline + # omitted `kind`, which `@a2a-js/sdk` uses to route the + # result into its Task handler (spec-compliance with the + # a2a-streaming guide). + return _rpc_result(_task_to_response(record)) # streaming path — SSE frames wrapped in JSON-RPC envelopes return StreamingResponse( @@ -1199,8 +1207,15 @@ def _rpc_result(result): ) async with _store._lock: record.push_config = cfg + # Fire immediately if the task already reached terminal state + # before the caller got around to registering — otherwise the + # webhook would never be delivered (quinn#61). if record.state in _TERMINAL: await _push(record) + logger.info( + "[a2a] push config registered (jsonrpc) task=%s state=%s → %s", + task_id, record.state, cfg.url, + ) return _rpc_result({ "taskId": task_id, "pushNotificationConfig": {"url": cfg.url, "id": cfg.id}, diff --git a/server.py b/server.py index ee43be7..0fd650a 100644 --- a/server.py +++ b/server.py @@ -24,6 +24,17 @@ # otherwise block anyone from importing tiny helpers (e.g. _build_agent_card) # out of this module for tests. Keep the import inside _main(). +# Root-level log config. Python's default is WARNING, which silently filtered +# every `logger.info(...)` call — including "push config registered" and +# "webhook delivered" lines from a2a_handler, making every diagnosis of the +# A2A/webhook path a guess-and-check session (quinn#61). INFO is the right +# default for a production agent; LOG_LEVEL env var lets operators tune up +# or down without a code change. +logging.basicConfig( + level=os.environ.get("LOG_LEVEL", "INFO").upper(), + format="%(asctime)s %(levelname)s %(name)s %(message)s", +) + # Module-level logger — used to surface uncaught exceptions in the LangGraph # stream/invoke code with a full traceback. Without this the A2A handler only # sees str(e), which drops the frame location and makes every runtime bug a diff --git a/tests/test_a2a_handler.py b/tests/test_a2a_handler.py index 0a91f01..8c34d1b 100644 --- a/tests/test_a2a_handler.py +++ b/tests/test_a2a_handler.py @@ -660,6 +660,36 @@ async def test_webhook_delivery_success(): assert payload["artifact"]["artifactId"] == "test-task-id" +@pytest.mark.asyncio +async def test_webhook_success_is_logged_at_info_level(caplog): + """Quinn #61 root cause: successful webhook delivery used to log at + DEBUG, which is below the default WARNING threshold the server runs + at. There was no way to tell from docker logs whether delivery was + actually firing. Upgrade locked in — every successful POST must + produce an INFO line carrying the task id, final state, and URL.""" + import logging + record = _make_record(state=COMPLETED) + cfg = PushNotificationConfig(url="https://example.com/hook") + + with patch("a2a_handler.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=None) + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.post = AsyncMock(return_value=mock_response) + mock_client_cls.return_value = mock_client + + with caplog.at_level(logging.INFO, logger="a2a_handler"): + await _deliver_webhook(record, cfg) + + info_records = [r for r in caplog.records if r.levelno == logging.INFO] + assert any("webhook delivered" in r.getMessage() for r in info_records), ( + "successful delivery must log at INFO, not DEBUG — otherwise the " + "default WARNING log level silently hides whether delivery is working" + ) + + @pytest.mark.asyncio async def test_webhook_delivery_no_token(): record = _make_record(state=FAILED, error_message="oops") @@ -1003,8 +1033,13 @@ async def test_message_send_returns_submitted(): assert resp.status_code == 200 data = resp.json() + # Spec-compliance: the result must be a full Task object with the + # `kind` discriminator — @a2a-js/sdk routes by kind. Inline-dict + # builds have omitted it in the past (quinn#61 investigation). + assert data["result"]["kind"] == "task" assert data["result"]["status"]["state"] == SUBMITTED assert "id" in data["result"] + assert "contextId" in data["result"] @pytest.mark.asyncio