From bf6be85da6a0e8004215096514aac9df1cbcd755 Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Mon, 6 Apr 2026 21:29:50 -0400 Subject: [PATCH 1/4] fix(runner,rbac,cli,routes): credential auth reliability and SSE improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - runner: retry credential fetch with fresh CP token on 401 instead of aborting the turn; adds refresh_bot_token() to utils.py and wires it into _fetch_credential() for both the direct BOT_TOKEN and caller-token fallback paths - rbac: fix pathToAction() so GET /credentials/{id}/token resolves to action "token" (matching credential:token-reader role) instead of "read" - cli: improve acpctl session send -f output with reasoning accumulation ([thinking] blocks), tool call annotations ([ToolName] → result), and proper newline handling for streamed text - routes: add haproxy.router.openshift.io/timeout: 10m to all ambient-api-server Routes (production, mpp-openshift, local-dev) to prevent 504s on long-running SSE streams 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../ambient-api-server/pkg/rbac/middleware.go | 18 ++++++- .../ambient-cli/cmd/acpctl/session/send.go | 47 +++++++++++++++++-- .../local-dev/ambient-api-server-route.yaml | 4 ++ .../ambient-api-server-route.yaml | 2 + .../production/ambient-api-server-route.yaml | 4 ++ .../ambient_runner/platform/auth.py | 46 +++++++++++++++--- .../ambient_runner/platform/utils.py | 21 +++++++++ 7 files changed, 129 insertions(+), 13 deletions(-) diff --git a/components/ambient-api-server/pkg/rbac/middleware.go b/components/ambient-api-server/pkg/rbac/middleware.go index 9f27a15a0..a0cb1f5b4 100644 --- a/components/ambient-api-server/pkg/rbac/middleware.go +++ b/components/ambient-api-server/pkg/rbac/middleware.go @@ -57,7 +57,7 @@ func (m *DBAuthorizationMiddleware) AuthorizeApi(next http.Handler) http.Handler } func (m *DBAuthorizationMiddleware) isAllowed(g *gorm.DB, username, method, path string) (bool, error) { - action := httpMethodToAction(method) + action := pathToAction(method, path) resource := pathToResource(path) var bindings []roleBindingRow @@ -130,6 +130,22 @@ func httpMethodToAction(method string) string { } } +func pathToAction(method, path string) string { + parts := strings.Split(strings.TrimPrefix(path, "/"), "/") + for i, p := range parts { + if p == "v1" && i+2 < len(parts) { + last := parts[len(parts)-1] + switch last { + case "token": + return "token" + case "start", "stop", "ignite", "ignition": + return last + } + } + } + return httpMethodToAction(method) +} + func pathToResource(path string) string { parts := strings.Split(strings.TrimPrefix(path, "/"), "/") for i, p := range parts { diff --git a/components/ambient-cli/cmd/acpctl/session/send.go b/components/ambient-cli/cmd/acpctl/session/send.go index 626c6846e..efeff9ece 100644 --- a/components/ambient-cli/cmd/acpctl/session/send.go +++ b/components/ambient-cli/cmd/acpctl/session/send.go @@ -78,6 +78,8 @@ func runSend(cmd *cobra.Command, args []string) error { out := cmd.OutOrStdout() scanner := bufio.NewScanner(stream) + var reasoningBuf strings.Builder + var inText bool for scanner.Scan() { line := scanner.Text() if !strings.HasPrefix(line, "data: ") { @@ -91,18 +93,53 @@ func runSend(cmd *cobra.Command, args []string) error { } var evt struct { - Type string `json:"type"` - Delta string `json:"delta"` + Type string `json:"type"` + Delta string `json:"delta"` + ToolCallName string `json:"toolCallName"` + Content string `json:"content"` } if err := json.Unmarshal([]byte(data), &evt); err != nil { continue } - if evt.Type == "TEXT_MESSAGE_CONTENT" && evt.Delta != "" { - fmt.Fprint(out, evt.Delta) + switch evt.Type { + case "REASONING_MESSAGE_CONTENT": + reasoningBuf.WriteString(evt.Delta) + case "REASONING_END": + if reasoningBuf.Len() > 0 { + fmt.Fprintf(out, "[thinking] %s\n", strings.TrimSpace(reasoningBuf.String())) + reasoningBuf.Reset() + } + case "TEXT_MESSAGE_CONTENT": + if evt.Delta != "" { + inText = true + fmt.Fprint(out, evt.Delta) + } + case "TEXT_MESSAGE_END": + if inText { + fmt.Fprintln(out) + inText = false + } + case "TOOL_CALL_START": + if evt.ToolCallName != "" { + fmt.Fprintf(out, "[%s] ", evt.ToolCallName) + } + case "TOOL_CALL_RESULT": + if evt.Content != "" { + var content string + if err := json.Unmarshal([]byte(evt.Content), &content); err != nil { + content = evt.Content + } + lines := strings.SplitN(strings.TrimSpace(content), "\n", 4) + preview := strings.Join(lines, " | ") + if len(lines) >= 4 { + preview += " ..." + } + fmt.Fprintf(out, "→ %s\n", preview) + } } } - if !sendFollowJSON { + if inText { fmt.Fprintln(out) } diff --git a/components/manifests/overlays/local-dev/ambient-api-server-route.yaml b/components/manifests/overlays/local-dev/ambient-api-server-route.yaml index 1530d558f..1b3c195a9 100644 --- a/components/manifests/overlays/local-dev/ambient-api-server-route.yaml +++ b/components/manifests/overlays/local-dev/ambient-api-server-route.yaml @@ -6,6 +6,8 @@ metadata: labels: app: ambient-api-server component: api + annotations: + haproxy.router.openshift.io/timeout: 10m spec: to: kind: Service @@ -23,6 +25,8 @@ metadata: labels: app: ambient-api-server component: grpc + annotations: + haproxy.router.openshift.io/timeout: 10m spec: to: kind: Service diff --git a/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml b/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml index 278d21f00..ad44f7b67 100644 --- a/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml +++ b/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml @@ -7,6 +7,8 @@ metadata: app: ambient-api-server component: api shard: internal + annotations: + haproxy.router.openshift.io/timeout: 10m spec: to: kind: Service diff --git a/components/manifests/overlays/production/ambient-api-server-route.yaml b/components/manifests/overlays/production/ambient-api-server-route.yaml index 1530d558f..1b3c195a9 100644 --- a/components/manifests/overlays/production/ambient-api-server-route.yaml +++ b/components/manifests/overlays/production/ambient-api-server-route.yaml @@ -6,6 +6,8 @@ metadata: labels: app: ambient-api-server component: api + annotations: + haproxy.router.openshift.io/timeout: 10m spec: to: kind: Service @@ -23,6 +25,8 @@ metadata: labels: app: ambient-api-server component: grpc + annotations: + haproxy.router.openshift.io/timeout: 10m spec: to: kind: Service diff --git a/components/runners/ambient-runner/ambient_runner/platform/auth.py b/components/runners/ambient-runner/ambient_runner/platform/auth.py index 757750f45..0ecc5014c 100755 --- a/components/runners/ambient-runner/ambient_runner/platform/auth.py +++ b/components/runners/ambient-runner/ambient_runner/platform/auth.py @@ -17,7 +17,7 @@ from urllib.parse import urlparse from ambient_runner.platform.context import RunnerContext -from ambient_runner.platform.utils import get_bot_token +from ambient_runner.platform.utils import get_bot_token, refresh_bot_token logger = logging.getLogger(__name__) @@ -174,6 +174,16 @@ def _do_req(): try: with _urllib_request.urlopen(fallback_req, timeout=10) as resp: return resp.read().decode("utf-8", errors="replace") + except _urllib_request.HTTPError as fallback_err: + if fallback_err.code in (401, 403): + return _retry_with_fresh_bot_token() + logger.warning( + f"{credential_type} BOT_TOKEN fallback also failed: {fallback_err}" + ) + raise PermissionError( + f"{credential_type} authentication failed: caller token expired " + f"and BOT_TOKEN fallback also failed" + ) from fallback_err except Exception as fallback_err: logger.warning( f"{credential_type} BOT_TOKEN fallback also failed: {fallback_err}" @@ -183,18 +193,40 @@ def _do_req(): f"and BOT_TOKEN fallback also failed" ) from fallback_err if e.code in (401, 403): - logger.warning( - f"{credential_type} credential fetch failed with HTTP {e.code}: {e}" - ) - raise PermissionError( - f"{credential_type} authentication failed with HTTP {e.code}" - ) from e + # BOT_TOKEN may have expired — refresh from CP endpoint and retry once. + return _retry_with_fresh_bot_token() logger.warning(f"{credential_type} credential fetch failed: {e}") return "" except Exception as e: logger.warning(f"{credential_type} credential fetch failed: {e}") return "" + def _retry_with_fresh_bot_token(): + logger.info( + f"{credential_type} got 401 with cached BOT_TOKEN — refreshing from CP endpoint and retrying" + ) + try: + fresh_bot = refresh_bot_token() + except Exception as refresh_err: + logger.warning(f"{credential_type} CP token refresh failed: {refresh_err}") + raise PermissionError( + f"{credential_type} authentication failed with HTTP 401" + ) from refresh_err + retry_req = _urllib_request.Request(url, method="GET") + if fresh_bot: + retry_req.add_header("Authorization", f"Bearer {fresh_bot}") + if context.current_user_id: + retry_req.add_header("X-Runner-Current-User", context.current_user_id) + try: + with _urllib_request.urlopen(retry_req, timeout=10) as resp: + logger.info(f"{credential_type} retry with fresh BOT_TOKEN succeeded") + return resp.read().decode("utf-8", errors="replace") + except Exception as retry_err: + logger.warning(f"{credential_type} retry with fresh BOT_TOKEN failed: {retry_err}") + raise PermissionError( + f"{credential_type} authentication failed with HTTP 401" + ) from retry_err + resp_text = await loop.run_in_executor(None, _do_req) if not resp_text: return {} diff --git a/components/runners/ambient-runner/ambient_runner/platform/utils.py b/components/runners/ambient-runner/ambient_runner/platform/utils.py index c8643e206..5788eb97d 100644 --- a/components/runners/ambient-runner/ambient_runner/platform/utils.py +++ b/components/runners/ambient-runner/ambient_runner/platform/utils.py @@ -71,6 +71,27 @@ def get_bot_token() -> str: return (os.getenv("BOT_TOKEN") or "").strip() +def refresh_bot_token() -> str: + """Fetch a fresh token from the CP token endpoint and update the in-process cache. + + Returns the new token, or the current cached token if the CP endpoint is not + configured (local dev mode). Raises RuntimeError if the CP fetch fails. + """ + cp_token_url = os.getenv("AMBIENT_CP_TOKEN_URL", "") + if not cp_token_url: + return get_bot_token() + + public_key_pem = os.getenv("AMBIENT_CP_TOKEN_PUBLIC_KEY", "") + session_id = os.getenv("SESSION_ID", "") + if not public_key_pem or not session_id: + logger.warning("refresh_bot_token: CP env vars incomplete, skipping refresh") + return get_bot_token() + + from ambient_runner._grpc_client import _fetch_token_from_cp + + return _fetch_token_from_cp(cp_token_url, public_key_pem, session_id) + + def is_env_truthy(value: str) -> bool: """Return True for "1", "true", or "yes" (case-insensitive).""" return value.strip().lower() in _TRUTHY_VALUES From 8af45d4105368ed3029276489d49041a0c4cfe62 Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Mon, 6 Apr 2026 21:48:13 -0400 Subject: [PATCH 2/4] fix(runner): preserve original HTTP status code in credential retry error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _retry_with_fresh_bot_token helper was hardcoding HTTP 401 in the PermissionError message regardless of the actual response code. This broke test_raises_permission_error_on_403_without_caller_token which expects "authentication failed with HTTP 403". Now the original status code is passed through and the retry error message reflects the actual HTTP code from both the initial and retry attempts. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../ambient_runner/platform/auth.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/components/runners/ambient-runner/ambient_runner/platform/auth.py b/components/runners/ambient-runner/ambient_runner/platform/auth.py index 0ecc5014c..76e9f1209 100755 --- a/components/runners/ambient-runner/ambient_runner/platform/auth.py +++ b/components/runners/ambient-runner/ambient_runner/platform/auth.py @@ -176,7 +176,7 @@ def _do_req(): return resp.read().decode("utf-8", errors="replace") except _urllib_request.HTTPError as fallback_err: if fallback_err.code in (401, 403): - return _retry_with_fresh_bot_token() + return _retry_with_fresh_bot_token(fallback_err.code) logger.warning( f"{credential_type} BOT_TOKEN fallback also failed: {fallback_err}" ) @@ -194,23 +194,23 @@ def _do_req(): ) from fallback_err if e.code in (401, 403): # BOT_TOKEN may have expired — refresh from CP endpoint and retry once. - return _retry_with_fresh_bot_token() + return _retry_with_fresh_bot_token(e.code) logger.warning(f"{credential_type} credential fetch failed: {e}") return "" except Exception as e: logger.warning(f"{credential_type} credential fetch failed: {e}") return "" - def _retry_with_fresh_bot_token(): + def _retry_with_fresh_bot_token(original_code: int): logger.info( - f"{credential_type} got 401 with cached BOT_TOKEN — refreshing from CP endpoint and retrying" + f"{credential_type} got {original_code} with cached BOT_TOKEN — refreshing from CP endpoint and retrying" ) try: fresh_bot = refresh_bot_token() except Exception as refresh_err: logger.warning(f"{credential_type} CP token refresh failed: {refresh_err}") raise PermissionError( - f"{credential_type} authentication failed with HTTP 401" + f"{credential_type} authentication failed with HTTP {original_code}" ) from refresh_err retry_req = _urllib_request.Request(url, method="GET") if fresh_bot: @@ -221,10 +221,15 @@ def _retry_with_fresh_bot_token(): with _urllib_request.urlopen(retry_req, timeout=10) as resp: logger.info(f"{credential_type} retry with fresh BOT_TOKEN succeeded") return resp.read().decode("utf-8", errors="replace") + except _urllib_request.HTTPError as retry_err: + logger.warning(f"{credential_type} retry with fresh BOT_TOKEN failed: {retry_err}") + raise PermissionError( + f"{credential_type} authentication failed with HTTP {retry_err.code}" + ) from retry_err except Exception as retry_err: logger.warning(f"{credential_type} retry with fresh BOT_TOKEN failed: {retry_err}") raise PermissionError( - f"{credential_type} authentication failed with HTTP 401" + f"{credential_type} authentication failed with HTTP {original_code}" ) from retry_err resp_text = await loop.run_in_executor(None, _do_req) From f998e546ea6c6cb13dfe41febcb39d5e5963a033 Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Mon, 6 Apr 2026 22:07:22 -0400 Subject: [PATCH 3/4] fix(runner): fix credential retry error message and observability end_turn import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - auth.py: don't retry with fresh CP token when caller-token BOT_TOKEN fallback fails; preserve that path's original error message - observability.py: guard claude_agent_sdk import in end_turn with try/except so Langfuse generation.update() is still called when the SDK is not installed 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../ambient-runner/ambient_runner/observability.py | 11 ++++++++--- .../ambient-runner/ambient_runner/platform/auth.py | 10 ---------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/components/runners/ambient-runner/ambient_runner/observability.py b/components/runners/ambient-runner/ambient_runner/observability.py index 6d1c38469..16653f2b7 100644 --- a/components/runners/ambient-runner/ambient_runner/observability.py +++ b/components/runners/ambient-runner/ambient_runner/observability.py @@ -515,14 +515,19 @@ def end_turn( return try: - from claude_agent_sdk import TextBlock + # Extract text content — TextBlock is optional (claude_agent_sdk may not be installed) + try: + from claude_agent_sdk import TextBlock as _TextBlock + except ImportError: + _TextBlock = None - # Extract text content text_content = [] message_content = getattr(message, "content", []) or [] for blk in message_content: - if isinstance(blk, TextBlock): + if _TextBlock is not None and isinstance(blk, _TextBlock): text_content.append(getattr(blk, "text", "")) + elif hasattr(blk, "text") and isinstance(getattr(blk, "text", None), str): + text_content.append(blk.text) output_text = ( "\n".join(text_content) if text_content else "(no text output)" diff --git a/components/runners/ambient-runner/ambient_runner/platform/auth.py b/components/runners/ambient-runner/ambient_runner/platform/auth.py index 76e9f1209..747b8891f 100755 --- a/components/runners/ambient-runner/ambient_runner/platform/auth.py +++ b/components/runners/ambient-runner/ambient_runner/platform/auth.py @@ -174,16 +174,6 @@ def _do_req(): try: with _urllib_request.urlopen(fallback_req, timeout=10) as resp: return resp.read().decode("utf-8", errors="replace") - except _urllib_request.HTTPError as fallback_err: - if fallback_err.code in (401, 403): - return _retry_with_fresh_bot_token(fallback_err.code) - logger.warning( - f"{credential_type} BOT_TOKEN fallback also failed: {fallback_err}" - ) - raise PermissionError( - f"{credential_type} authentication failed: caller token expired " - f"and BOT_TOKEN fallback also failed" - ) from fallback_err except Exception as fallback_err: logger.warning( f"{credential_type} BOT_TOKEN fallback also failed: {fallback_err}" From 029062db153a1eee699737c85a59499090da1503 Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Tue, 7 Apr 2026 08:24:48 -0400 Subject: [PATCH 4/4] fix(manifests): add haproxy timeout annotation to mpp-openshift grpc route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../overlays/mpp-openshift/ambient-api-server-route.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml b/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml index ad44f7b67..52c6a0a98 100644 --- a/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml +++ b/components/manifests/overlays/mpp-openshift/ambient-api-server-route.yaml @@ -28,6 +28,8 @@ metadata: app: ambient-api-server component: grpc shard: internal + annotations: + haproxy.router.openshift.io/timeout: 10m spec: to: kind: Service