From 6dff618eca56a8b42f7c42c4a53bfff6b7f2b6e3 Mon Sep 17 00:00:00 2001 From: Ellis Low Date: Tue, 28 Apr 2026 13:55:11 -0400 Subject: [PATCH 1/3] feat(observability): add user_agent to ResponsesEventData for CLA/Goose differentiation Adds a sanitized user_agent field to ResponsesEventData and the Splunk event payload, enabling differentiation between Goose and other clients in telemetry. Extracts and sanitizes the User-Agent header (strips control characters, truncates to 128 chars) before storing. Closes RSPEED-2849 --- src/app/endpoints/responses.py | 42 +++++++++++++ src/observability/formats/responses.py | 4 +- .../app/endpoints/test_responses_splunk.py | 56 +++++++++++++++++ .../observability/formats/test_responses.py | 63 +++++++++++++++++++ 4 files changed, 164 insertions(+), 1 deletion(-) diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py index 331341279..d999dbed8 100644 --- a/src/app/endpoints/responses.py +++ b/src/app/endpoints/responses.py @@ -112,6 +112,30 @@ logger = get_logger(__name__) router = APIRouter(tags=["responses"]) +_USER_AGENT_MAX_LENGTH = 128 + + +def _get_user_agent(request: Request) -> Optional[str]: + """Extract and sanitize the User-Agent header from the request. + + Parses the raw User-Agent header, strips control characters and newlines, + and truncates to a safe maximum length. Returns None when the header is + absent or empty. + + Args: + request: The FastAPI request object. + + Returns: + Sanitized User-Agent string, or None if the header is absent or empty. + """ + raw = request.headers.get("User-Agent", "") + if not raw: + return None + sanitized = "".join(c for c in raw if ord(c) >= 32 and c not in ("\r", "\n")) + sanitized = sanitized[:_USER_AGENT_MAX_LENGTH] + return sanitized or None + + responses_response: dict[int | str, dict[str, Any]] = { 200: ResponsesResponse.openapi_response(), 401: UnauthorizedResponse.openapi_response( @@ -153,6 +177,7 @@ def _queue_responses_splunk_event( # pylint: disable=too-many-arguments,too-man input_tokens: int = 0, output_tokens: int = 0, fire_and_forget: bool = False, + user_agent: Optional[str] = None, ) -> None: """Build and queue a Splunk telemetry event for the responses endpoint. @@ -173,6 +198,7 @@ def _queue_responses_splunk_event( # pylint: disable=too-many-arguments,too-man fire_and_forget: When True, dispatch via asyncio.create_task() instead of background_tasks. Use for error paths where an HTTPException follows, since FastAPI discards BackgroundTasks on non-2xx responses. + user_agent: Sanitized User-Agent string from the request header, or None. """ if not fire_and_forget and background_tasks is None: return @@ -187,6 +213,7 @@ def _queue_responses_splunk_event( # pylint: disable=too-many-arguments,too-man inference_time=inference_time, input_tokens=input_tokens, output_tokens=output_tokens, + user_agent=user_agent, ) event = build_responses_event(event_data) if fire_and_forget: @@ -360,6 +387,7 @@ async def responses_endpoint_handler( filter_server_tools=filter_server_tools, background_tasks=background_tasks, rh_identity_context=rh_identity_context, + user_agent=_get_user_agent(request), ) @@ -375,6 +403,7 @@ async def handle_streaming_response( filter_server_tools: bool = False, background_tasks: Optional[BackgroundTasks] = None, rh_identity_context: tuple[str, str] = ("", ""), + user_agent: Optional[str] = None, ) -> StreamingResponse: """Handle streaming response from Responses API. @@ -424,6 +453,7 @@ async def handle_streaming_response( rh_identity_context=rh_identity_context, inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_shield_blocked", + user_agent=user_agent, ) else: try: @@ -452,6 +482,7 @@ async def handle_streaming_response( inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_error", fire_and_forget=True, + user_agent=user_agent, ) error_response = PromptTooLongResponse(model=api_params.model) raise HTTPException(**error_response.model_dump()) from e @@ -467,6 +498,7 @@ async def handle_streaming_response( inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_error", fire_and_forget=True, + user_agent=user_agent, ) error_response = ServiceUnavailableResponse( backend_name="Llama Stack", @@ -484,6 +516,7 @@ async def handle_streaming_response( inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_error", fire_and_forget=True, + user_agent=user_agent, ) error_response = handle_known_apistatus_errors(e, api_params.model) raise HTTPException(**error_response.model_dump()) from e @@ -501,6 +534,7 @@ async def handle_streaming_response( background_tasks=background_tasks, rh_identity_context=rh_identity_context, shield_blocked=(moderation_result.decision == "blocked"), + user_agent=user_agent, ), media_type="text/event-stream", ) @@ -894,6 +928,7 @@ async def generate_response( background_tasks: Optional[BackgroundTasks] = None, rh_identity_context: tuple[str, str] = ("", ""), shield_blocked: bool = False, + user_agent: Optional[str] = None, ) -> AsyncIterator[str]: """Stream the response from the generator and persist conversation details. @@ -956,6 +991,7 @@ async def generate_response( if turn_summary.token_usage else 0 ), + user_agent=user_agent, ) @@ -971,6 +1007,7 @@ async def handle_non_streaming_response( filter_server_tools: bool = False, background_tasks: Optional[BackgroundTasks] = None, rh_identity_context: tuple[str, str] = ("", ""), + user_agent: Optional[str] = None, ) -> ResponsesResponse: """Handle non-streaming response from Responses API. @@ -1019,6 +1056,7 @@ async def handle_non_streaming_response( rh_identity_context=rh_identity_context, inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_shield_blocked", + user_agent=user_agent, ) else: try: @@ -1057,6 +1095,7 @@ async def handle_non_streaming_response( inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_error", fire_and_forget=True, + user_agent=user_agent, ) error_response = PromptTooLongResponse(model=api_params.model) raise HTTPException(**error_response.model_dump()) from e @@ -1072,6 +1111,7 @@ async def handle_non_streaming_response( inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_error", fire_and_forget=True, + user_agent=user_agent, ) error_response = ServiceUnavailableResponse( backend_name="Llama Stack", @@ -1089,6 +1129,7 @@ async def handle_non_streaming_response( inference_time=(datetime.now(UTC) - started_at).total_seconds(), sourcetype="responses_error", fire_and_forget=True, + user_agent=user_agent, ) error_response = handle_known_apistatus_errors(e, api_params.model) raise HTTPException(**error_response.model_dump()) from e @@ -1135,6 +1176,7 @@ async def handle_non_streaming_response( if turn_summary.token_usage else 0 ), + user_agent=user_agent, ) if api_params.store: store_query_results( diff --git a/src/observability/formats/responses.py b/src/observability/formats/responses.py index d8564de8d..380cfd8eb 100644 --- a/src/observability/formats/responses.py +++ b/src/observability/formats/responses.py @@ -6,7 +6,7 @@ """ from dataclasses import dataclass -from typing import Any +from typing import Any, Optional from configuration import configuration @@ -24,6 +24,7 @@ class ResponsesEventData: # pylint: disable=too-many-instance-attributes inference_time: float input_tokens: int = 0 output_tokens: int = 0 + user_agent: Optional[str] = None def build_responses_event(data: ResponsesEventData) -> dict[str, Any]: @@ -45,4 +46,5 @@ def build_responses_event(data: ResponsesEventData) -> dict[str, Any]: "org_id": data.org_id, "system_id": data.system_id, "total_llm_tokens": data.input_tokens + data.output_tokens, + "user_agent": data.user_agent, } diff --git a/tests/unit/app/endpoints/test_responses_splunk.py b/tests/unit/app/endpoints/test_responses_splunk.py index ce1588a75..1b6ccf283 100644 --- a/tests/unit/app/endpoints/test_responses_splunk.py +++ b/tests/unit/app/endpoints/test_responses_splunk.py @@ -16,6 +16,7 @@ from app.endpoints.responses import ( _background_splunk_tasks, + _get_user_agent, _queue_responses_splunk_event, handle_non_streaming_response, handle_streaming_response, @@ -715,3 +716,58 @@ async def test_splunk_disabled_no_background_tasks( mock_queue.assert_called_once() assert mock_queue.call_args[1]["background_tasks"] is None + + +class TestGetUserAgent: + """Tests for _get_user_agent header extraction and sanitization.""" + + def test_returns_user_agent_from_header(self, mocker: MockerFixture) -> None: + """Test that a valid User-Agent header is returned as-is.""" + request = mocker.MagicMock() + request.headers.get.return_value = "goose/1.0.0" + + result = _get_user_agent(request) + + assert result == "goose/1.0.0" + + def test_returns_none_when_header_absent(self, mocker: MockerFixture) -> None: + """Test that None is returned when User-Agent header is empty.""" + request = mocker.MagicMock() + request.headers.get.return_value = "" + + result = _get_user_agent(request) + + assert result is None + + def test_strips_control_characters(self, mocker: MockerFixture) -> None: + """Test that control characters and newlines are stripped from User-Agent.""" + request = mocker.MagicMock() + request.headers.get.return_value = "goose/1.0.0\r\nX-Injected: evil" + + result: str = _get_user_agent(request) or "" + + assert result != "" + assert "\r" not in result + assert "\n" not in result + assert "goose/1.0.0" in result + + def test_truncates_to_128_characters(self, mocker: MockerFixture) -> None: + """Test that User-Agent is truncated to 128 characters.""" + request = mocker.MagicMock() + request.headers.get.return_value = "a" * 200 + + result = _get_user_agent(request) + + assert isinstance(result, str) + assert len(result) == 128 + + def test_returns_none_for_only_control_characters( + self, mocker: MockerFixture + ) -> None: + """Test that None is returned when User-Agent contains only control characters.""" + request = mocker.MagicMock() + request.headers.get.return_value = "\r\n\x01\x02" + + result = _get_user_agent(request) + + assert result is None diff --git a/tests/unit/observability/formats/test_responses.py b/tests/unit/observability/formats/test_responses.py index 03e585076..25d71267e 100644 --- a/tests/unit/observability/formats/test_responses.py +++ b/tests/unit/observability/formats/test_responses.py @@ -20,6 +20,21 @@ def sample_event_data_fixture() -> ResponsesEventData: ) +@pytest.fixture(name="sample_event_data_with_user_agent") +def sample_event_data_with_user_agent_fixture() -> ResponsesEventData: + """Create sample responses event data with user_agent set.""" + return ResponsesEventData( + input_text="How do I configure SSH?", + response_text="To configure SSH, edit /etc/ssh/sshd_config...", + conversation_id="conv-abc-123", + model="granite-3-8b-instruct", + org_id="12345678", + system_id="abc-def-123", + inference_time=2.34, + user_agent="goose/1.0.0", + ) + + def test_builds_event_with_all_fields( mocker: MockerFixture, sample_event_data: ResponsesEventData ) -> None: @@ -97,3 +112,51 @@ def test_default_token_values() -> None: assert data.input_tokens == 0 assert data.output_tokens == 0 + + +def test_user_agent_defaults_to_none() -> None: + """Test user_agent field defaults to None when not provided.""" + data = ResponsesEventData( + input_text="test", + response_text="test", + conversation_id="conv-789", + inference_time=1.0, + model="test-model", + org_id="org1", + system_id="sys1", + ) + + assert data.user_agent is None + + +def test_user_agent_included_in_splunk_event( + mocker: MockerFixture, + sample_event_data_with_user_agent: ResponsesEventData, +) -> None: + """Test user_agent field is included in the Splunk event payload.""" + mock_config = mocker.patch("observability.formats.responses.configuration") + mock_config.deployment_environment = "production" + + event = build_responses_event(sample_event_data_with_user_agent) + + assert event["user_agent"] == "goose/1.0.0" + + +def test_user_agent_none_included_in_splunk_event(mocker: MockerFixture) -> None: + """Test user_agent=None is included in the Splunk event payload.""" + mock_config = mocker.patch("observability.formats.responses.configuration") + mock_config.deployment_environment = "production" + + data = ResponsesEventData( + input_text="test", + response_text="test", + conversation_id="conv-123", + inference_time=1.0, + model="test-model", + org_id="org1", + system_id="sys1", + ) + + event = build_responses_event(data) + + assert event["user_agent"] is None From b1b141dd2f0dcb52991875b7921578e7eba12bfd Mon Sep 17 00:00:00 2001 From: Ellis Low Date: Tue, 28 Apr 2026 14:17:53 -0400 Subject: [PATCH 2/3] fix: add Final annotation and docstring updates for user_agent --- src/app/endpoints/responses.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py index d999dbed8..230ce111e 100644 --- a/src/app/endpoints/responses.py +++ b/src/app/endpoints/responses.py @@ -6,7 +6,7 @@ import json from collections.abc import AsyncIterator from datetime import UTC, datetime -from typing import Annotated, Any, Optional, cast +from typing import Annotated, Any, Final, Optional, cast from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request from fastapi.responses import StreamingResponse @@ -112,7 +112,7 @@ logger = get_logger(__name__) router = APIRouter(tags=["responses"]) -_USER_AGENT_MAX_LENGTH = 128 +_USER_AGENT_MAX_LENGTH: Final[int] = 128 def _get_user_agent(request: Request) -> Optional[str]: @@ -418,6 +418,7 @@ async def handle_streaming_response( filter_server_tools: Whether to filter server-deployed MCP tool events from the stream background_tasks: FastAPI background task manager for telemetry events rh_identity_context: Tuple of (org_id, system_id) from RH identity + user_agent: Sanitized User-Agent string from request headers, or None. Returns: StreamingResponse with SSE-formatted events """ @@ -946,6 +947,7 @@ async def generate_response( background_tasks: FastAPI background task manager for telemetry events rh_identity_context: Tuple of (org_id, system_id) from RH identity shield_blocked: Whether the request was blocked by a shield + user_agent: Sanitized User-Agent string from request headers, or None. Yields: SSE-formatted strings from the generator """ @@ -1023,6 +1025,7 @@ async def handle_non_streaming_response( filter_server_tools: Whether to filter server-deployed MCP tool output background_tasks: FastAPI background task manager for telemetry events rh_identity_context: Tuple of (org_id, system_id) from RH identity + user_agent: Sanitized User-Agent string from request headers, or None. Returns: ResponsesResponse with the completed response """ From b0ec99a46ece494f7cd1f68c11936e1a329d5e13 Mon Sep 17 00:00:00 2001 From: Ellis Low Date: Tue, 28 Apr 2026 14:45:05 -0400 Subject: [PATCH 3/3] docs: add PR title prefix requirement to AGENTS.md --- AGENTS.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index d36362f09..119022978 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -193,6 +193,15 @@ uv run make test-e2e # End-to-end tests - **SQLAlchemy**: Database ORM - **Kubernetes**: K8s auth integration +## Pull Request Requirements + +**PR titles MUST start with a JIRA issue key prefix.** CI enforces this via `pr-title-checker` (config: `.github/pr-title-checker-config.json`). + +Allowed prefixes: `LCORE-`, `RSPEED-`, `MGTM-`, `OLS-`, `RHDHPAI-`, `LEADS-` + +- ✅ `RSPEED-2849: add user_agent to ResponsesEventData` +- ❌ `feat(observability): add user_agent to ResponsesEventData` + ## Development Workflow 1. Use `uv sync --group dev --group llslibdev` for dependencies 2. Always use `uv run` prefix for commands