Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ uv run make test-e2e # End-to-end tests
- **SQLAlchemy**: Database ORM
- **Kubernetes**: K8s auth integration

## Pull Request Requirements

**PR titles MUST start with a JIRA issue key prefix.** CI enforces this via `pr-title-checker` (config: `.github/pr-title-checker-config.json`).

Allowed prefixes: `LCORE-`, `RSPEED-`, `MGTM-`, `OLS-`, `RHDHPAI-`, `LEADS-`

- ✅ `RSPEED-2849: add user_agent to ResponsesEventData`
- ❌ `feat(observability): add user_agent to ResponsesEventData`

## Development Workflow
1. Use `uv sync --group dev --group llslibdev` for dependencies
2. Always use `uv run` prefix for commands
Expand Down
47 changes: 46 additions & 1 deletion src/app/endpoints/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import json
from collections.abc import AsyncIterator
from datetime import UTC, datetime
from typing import Annotated, Any, Optional, cast
from typing import Annotated, Any, Final, Optional, cast

from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request
from fastapi.responses import StreamingResponse
Expand Down Expand Up @@ -112,6 +112,30 @@
logger = get_logger(__name__)
router = APIRouter(tags=["responses"])

_USER_AGENT_MAX_LENGTH: Final[int] = 128


def _get_user_agent(request: Request) -> Optional[str]:
"""Extract and sanitize the User-Agent header from the request.

Parses the raw User-Agent header, strips control characters and newlines,
and truncates to a safe maximum length. Returns None when the header is
absent or empty.

Args:
request: The FastAPI request object.

Returns:
Sanitized User-Agent string, or None if the header is absent or empty.
"""
raw = request.headers.get("User-Agent", "")
if not raw:
return None
sanitized = "".join(c for c in raw if ord(c) >= 32 and c not in ("\r", "\n"))
sanitized = sanitized[:_USER_AGENT_MAX_LENGTH]
return sanitized or None


responses_response: dict[int | str, dict[str, Any]] = {
200: ResponsesResponse.openapi_response(),
401: UnauthorizedResponse.openapi_response(
Expand Down Expand Up @@ -153,6 +177,7 @@ def _queue_responses_splunk_event( # pylint: disable=too-many-arguments,too-man
input_tokens: int = 0,
output_tokens: int = 0,
fire_and_forget: bool = False,
user_agent: Optional[str] = None,
) -> None:
"""Build and queue a Splunk telemetry event for the responses endpoint.

Expand All @@ -173,6 +198,7 @@ def _queue_responses_splunk_event( # pylint: disable=too-many-arguments,too-man
fire_and_forget: When True, dispatch via asyncio.create_task() instead
of background_tasks. Use for error paths where an HTTPException
follows, since FastAPI discards BackgroundTasks on non-2xx responses.
user_agent: Sanitized User-Agent string from the request header, or None.
"""
if not fire_and_forget and background_tasks is None:
return
Expand All @@ -187,6 +213,7 @@ def _queue_responses_splunk_event( # pylint: disable=too-many-arguments,too-man
inference_time=inference_time,
input_tokens=input_tokens,
output_tokens=output_tokens,
user_agent=user_agent,
)
event = build_responses_event(event_data)
if fire_and_forget:
Expand Down Expand Up @@ -360,6 +387,7 @@ async def responses_endpoint_handler(
filter_server_tools=filter_server_tools,
background_tasks=background_tasks,
rh_identity_context=rh_identity_context,
user_agent=_get_user_agent(request),
)


Expand All @@ -375,6 +403,7 @@ async def handle_streaming_response(
filter_server_tools: bool = False,
background_tasks: Optional[BackgroundTasks] = None,
rh_identity_context: tuple[str, str] = ("", ""),
user_agent: Optional[str] = None,
) -> StreamingResponse:
"""Handle streaming response from Responses API.

Expand All @@ -389,6 +418,7 @@ async def handle_streaming_response(
filter_server_tools: Whether to filter server-deployed MCP tool events from the stream
background_tasks: FastAPI background task manager for telemetry events
rh_identity_context: Tuple of (org_id, system_id) from RH identity
user_agent: Sanitized User-Agent string from request headers, or None.
Returns:
StreamingResponse with SSE-formatted events
"""
Expand Down Expand Up @@ -424,6 +454,7 @@ async def handle_streaming_response(
rh_identity_context=rh_identity_context,
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_shield_blocked",
user_agent=user_agent,
)
else:
try:
Expand Down Expand Up @@ -452,6 +483,7 @@ async def handle_streaming_response(
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_error",
fire_and_forget=True,
user_agent=user_agent,
)
error_response = PromptTooLongResponse(model=api_params.model)
raise HTTPException(**error_response.model_dump()) from e
Expand All @@ -467,6 +499,7 @@ async def handle_streaming_response(
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_error",
fire_and_forget=True,
user_agent=user_agent,
)
error_response = ServiceUnavailableResponse(
backend_name="Llama Stack",
Expand All @@ -484,6 +517,7 @@ async def handle_streaming_response(
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_error",
fire_and_forget=True,
user_agent=user_agent,
)
error_response = handle_known_apistatus_errors(e, api_params.model)
raise HTTPException(**error_response.model_dump()) from e
Expand All @@ -501,6 +535,7 @@ async def handle_streaming_response(
background_tasks=background_tasks,
rh_identity_context=rh_identity_context,
shield_blocked=(moderation_result.decision == "blocked"),
user_agent=user_agent,
),
media_type="text/event-stream",
)
Expand Down Expand Up @@ -894,6 +929,7 @@ async def generate_response(
background_tasks: Optional[BackgroundTasks] = None,
rh_identity_context: tuple[str, str] = ("", ""),
shield_blocked: bool = False,
user_agent: Optional[str] = None,
) -> AsyncIterator[str]:
"""Stream the response from the generator and persist conversation details.

Expand All @@ -911,6 +947,7 @@ async def generate_response(
background_tasks: FastAPI background task manager for telemetry events
rh_identity_context: Tuple of (org_id, system_id) from RH identity
shield_blocked: Whether the request was blocked by a shield
user_agent: Sanitized User-Agent string from request headers, or None.
Yields:
SSE-formatted strings from the generator
"""
Expand Down Expand Up @@ -956,6 +993,7 @@ async def generate_response(
if turn_summary.token_usage
else 0
),
user_agent=user_agent,
)


Expand All @@ -971,6 +1009,7 @@ async def handle_non_streaming_response(
filter_server_tools: bool = False,
background_tasks: Optional[BackgroundTasks] = None,
rh_identity_context: tuple[str, str] = ("", ""),
user_agent: Optional[str] = None,
) -> ResponsesResponse:
"""Handle non-streaming response from Responses API.

Expand All @@ -986,6 +1025,7 @@ async def handle_non_streaming_response(
filter_server_tools: Whether to filter server-deployed MCP tool output
background_tasks: FastAPI background task manager for telemetry events
rh_identity_context: Tuple of (org_id, system_id) from RH identity
user_agent: Sanitized User-Agent string from request headers, or None.
Returns:
ResponsesResponse with the completed response
"""
Expand Down Expand Up @@ -1019,6 +1059,7 @@ async def handle_non_streaming_response(
rh_identity_context=rh_identity_context,
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_shield_blocked",
user_agent=user_agent,
)
else:
try:
Expand Down Expand Up @@ -1057,6 +1098,7 @@ async def handle_non_streaming_response(
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_error",
fire_and_forget=True,
user_agent=user_agent,
)
error_response = PromptTooLongResponse(model=api_params.model)
raise HTTPException(**error_response.model_dump()) from e
Expand All @@ -1072,6 +1114,7 @@ async def handle_non_streaming_response(
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_error",
fire_and_forget=True,
user_agent=user_agent,
)
error_response = ServiceUnavailableResponse(
backend_name="Llama Stack",
Expand All @@ -1089,6 +1132,7 @@ async def handle_non_streaming_response(
inference_time=(datetime.now(UTC) - started_at).total_seconds(),
sourcetype="responses_error",
fire_and_forget=True,
user_agent=user_agent,
)
error_response = handle_known_apistatus_errors(e, api_params.model)
raise HTTPException(**error_response.model_dump()) from e
Expand Down Expand Up @@ -1135,6 +1179,7 @@ async def handle_non_streaming_response(
if turn_summary.token_usage
else 0
),
user_agent=user_agent,
)
if api_params.store:
store_query_results(
Expand Down
4 changes: 3 additions & 1 deletion src/observability/formats/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""

from dataclasses import dataclass
from typing import Any
from typing import Any, Optional

from configuration import configuration

Expand All @@ -24,6 +24,7 @@ class ResponsesEventData: # pylint: disable=too-many-instance-attributes
inference_time: float
input_tokens: int = 0
output_tokens: int = 0
user_agent: Optional[str] = None


def build_responses_event(data: ResponsesEventData) -> dict[str, Any]:
Expand All @@ -45,4 +46,5 @@ def build_responses_event(data: ResponsesEventData) -> dict[str, Any]:
"org_id": data.org_id,
"system_id": data.system_id,
"total_llm_tokens": data.input_tokens + data.output_tokens,
"user_agent": data.user_agent,
}
56 changes: 56 additions & 0 deletions tests/unit/app/endpoints/test_responses_splunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from app.endpoints.responses import (
_background_splunk_tasks,
_get_user_agent,
_queue_responses_splunk_event,
handle_non_streaming_response,
handle_streaming_response,
Expand Down Expand Up @@ -715,3 +716,58 @@ async def test_splunk_disabled_no_background_tasks(

mock_queue.assert_called_once()
assert mock_queue.call_args[1]["background_tasks"] is None


class TestGetUserAgent:
"""Tests for _get_user_agent header extraction and sanitization."""

def test_returns_user_agent_from_header(self, mocker: MockerFixture) -> None:
"""Test that a valid User-Agent header is returned as-is."""
request = mocker.MagicMock()
request.headers.get.return_value = "goose/1.0.0"

result = _get_user_agent(request)

assert result == "goose/1.0.0"

def test_returns_none_when_header_absent(self, mocker: MockerFixture) -> None:
"""Test that None is returned when User-Agent header is empty."""
request = mocker.MagicMock()
request.headers.get.return_value = ""

result = _get_user_agent(request)

assert result is None

def test_strips_control_characters(self, mocker: MockerFixture) -> None:
"""Test that control characters and newlines are stripped from User-Agent."""
request = mocker.MagicMock()
request.headers.get.return_value = "goose/1.0.0\r\nX-Injected: evil"

result: str = _get_user_agent(request) or ""

assert result != ""
assert "\r" not in result
assert "\n" not in result
assert "goose/1.0.0" in result

def test_truncates_to_128_characters(self, mocker: MockerFixture) -> None:
"""Test that User-Agent is truncated to 128 characters."""
request = mocker.MagicMock()
request.headers.get.return_value = "a" * 200

result = _get_user_agent(request)

assert isinstance(result, str)
assert len(result) == 128

def test_returns_none_for_only_control_characters(
self, mocker: MockerFixture
) -> None:
"""Test that None is returned when User-Agent contains only control characters."""
request = mocker.MagicMock()
request.headers.get.return_value = "\r\n\x01\x02"

result = _get_user_agent(request)

assert result is None
63 changes: 63 additions & 0 deletions tests/unit/observability/formats/test_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,21 @@ def sample_event_data_fixture() -> ResponsesEventData:
)


@pytest.fixture(name="sample_event_data_with_user_agent")
def sample_event_data_with_user_agent_fixture() -> ResponsesEventData:
"""Create sample responses event data with user_agent set."""
return ResponsesEventData(
input_text="How do I configure SSH?",
response_text="To configure SSH, edit /etc/ssh/sshd_config...",
conversation_id="conv-abc-123",
model="granite-3-8b-instruct",
org_id="12345678",
system_id="abc-def-123",
inference_time=2.34,
user_agent="goose/1.0.0",
)


def test_builds_event_with_all_fields(
mocker: MockerFixture, sample_event_data: ResponsesEventData
) -> None:
Expand Down Expand Up @@ -97,3 +112,51 @@ def test_default_token_values() -> None:

assert data.input_tokens == 0
assert data.output_tokens == 0


def test_user_agent_defaults_to_none() -> None:
"""Test user_agent field defaults to None when not provided."""
data = ResponsesEventData(
input_text="test",
response_text="test",
conversation_id="conv-789",
inference_time=1.0,
model="test-model",
org_id="org1",
system_id="sys1",
)

assert data.user_agent is None


def test_user_agent_included_in_splunk_event(
mocker: MockerFixture,
sample_event_data_with_user_agent: ResponsesEventData,
) -> None:
"""Test user_agent field is included in the Splunk event payload."""
mock_config = mocker.patch("observability.formats.responses.configuration")
mock_config.deployment_environment = "production"

event = build_responses_event(sample_event_data_with_user_agent)

assert event["user_agent"] == "goose/1.0.0"


def test_user_agent_none_included_in_splunk_event(mocker: MockerFixture) -> None:
"""Test user_agent=None is included in the Splunk event payload."""
mock_config = mocker.patch("observability.formats.responses.configuration")
mock_config.deployment_environment = "production"

data = ResponsesEventData(
input_text="test",
response_text="test",
conversation_id="conv-123",
inference_time=1.0,
model="test-model",
org_id="org1",
system_id="sys1",
)

event = build_responses_event(data)

assert event["user_agent"] is None
Loading