From 21cbe4d7a91976899b0d983946ec1b62f04a4cd7 Mon Sep 17 00:00:00 2001
From: Eamon O'Reilly <eamono@microsoft.com>
Date: Mon, 26 Jan 2026 15:28:51 -0800
Subject: [PATCH 1/2] [Cognitiveservices] Add console log streaming: `az
 cognitiveservices agent logs show`

Add console log streaming commands and options for hosted agents:

- Add new `az cognitiveservices agent logs show` command to stream
  console output (stdout/stderr) or system events from agent containers
- Add --show-logs flag to `az cognitiveservices agent create` to stream
  logs during deployment for troubleshooting
- Add --show-logs and --timeout flags to `az cognitiveservices agent start`
  to stream logs during startup

Implementation details:
- Implement _stream_agent_logs() using HTTP streaming with Bearer auth
- Create _BackgroundLogStreamer context manager for reusable log streaming
- Add retry logic to handle container startup delays
- Extract helper functions for auth and URL building
- Add unit tests for new functionality
---
 .../cognitiveservices/_help.py                |  40 +++
 .../cognitiveservices/_params.py              |  71 ++++
 .../cognitiveservices/commands.py             |   4 +
 .../cognitiveservices/custom.py               | 323 ++++++++++++++++--
 .../tests/latest/test_agent.py                | 227 ++++++++++++
 5 files changed, 634 insertions(+), 31 deletions(-)

diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py
index 59f09be6d5d..6eacc50580c 100644
--- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py
+++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_help.py
@@ -635,6 +635,14 @@
           --name my-agent \\
           --image myregistry.azurecr.io/my-large-agent:v1.0 \\
           --timeout 1200
+  - name: Create agent and stream container logs during deployment
+    text: |
+        az cognitiveservices agent create \\
+          --account-name myAccount \\
+          --project-name myProject \\
+          --name my-agent \\
+          --image myregistry.azurecr.io/my-agent:v1.0 \\
+          --show-logs
 """
 
 helps[
@@ -642,9 +650,41 @@
 ] = """
 type: command
 short-summary: Start a hosted agent deployment.
+long-summary: |
+    Starts a previously stopped agent deployment. Use --show-logs to stream
+    container console logs during startup for troubleshooting.
 examples:
   - name: Start hosted agent deployment.
     text: az cognitiveservices agent start --account-name myAccount --project-name myProject --name myAgent --agent-version 1
+  - name: Start agent and stream console logs during startup.
+    text: az cognitiveservices agent start --account-name myAccount --project-name myProject --name myAgent --agent-version 1 --show-logs
+"""
+
+helps[
+    "cognitiveservices agent logs"
+] = """
+type: group
+short-summary: Manage hosted agent container logs.
+"""
+
+helps[
+    "cognitiveservices agent logs show"
+] = """
+type: command
+short-summary: Show logs from a hosted agent container.
+long-summary: |
+    Streams console output (stdout/stderr) or system events from an agent container.
+    Use --follow to stream logs in real-time, or omit it to fetch recent logs and exit.
+    This is useful for troubleshooting agent startup issues or monitoring agent behavior.
+examples:
+  - name: Fetch the last 50 lines of console logs from an agent.
+    text: az cognitiveservices agent logs show --account-name myAccount --project-name myProject --name myAgent --agent-version 1
+  - name: Stream console logs in real-time.
+    text: az cognitiveservices agent logs show --account-name myAccount --project-name myProject --name myAgent --agent-version 1 --follow
+  - name: Fetch the last 100 lines of system event logs.
+    text: az cognitiveservices agent logs show --account-name myAccount --project-name myProject --name myAgent --agent-version 1 --type system --tail 100
+  - name: Stream logs with custom tail size.
+    text: az cognitiveservices agent logs show --account-name myAccount --project-name myProject --name myAgent --agent-version 1 --follow --tail 200
 """
 
 helps[
diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py
index efc3cf77da0..6edf03f47b2 100644
--- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py
+++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/_params.py
@@ -511,6 +511,16 @@ def load_arguments(self, _):
             ),
             default=600
         )
+        c.argument(
+            'show_logs',
+            options_list=['--show-logs'],
+            action='store_true',
+            help=(
+                'Stream container console logs during deployment. '
+                'Shows real-time output from the agent container as it starts up. '
+                'Useful for debugging startup issues.'
+            )
+        )
 
     with self.argument_context("cognitiveservices agent update") as c:
         c.argument(
@@ -533,6 +543,67 @@ def load_arguments(self, _):
             help="Cognitive Services hosted agent version. If not provided, deletes all versions.",
             required=False,
         )
+
+    with self.argument_context("cognitiveservices agent start") as c:
+        c.argument(
+            'show_logs',
+            options_list=['--show-logs'],
+            action='store_true',
+            help=(
+                'Stream container console logs during startup. '
+                'Shows real-time output from the agent container as it starts. '
+                'Useful for debugging startup issues.'
+            )
+        )
+        c.argument(
+            'timeout',
+            type=int,
+            help=(
+                'Maximum time in seconds to wait for deployment to be ready. '
+                'Default: 600 seconds (10 minutes).'
+            ),
+            default=600
+        )
+
+    with self.argument_context("cognitiveservices agent logs") as c:
+        c.argument(
+            "account_name",
+            options_list=["--account-name", "-a"],
+            help="Cognitive service account name."
+        )
+        c.argument(
+            "project_name",
+            options_list=["--project-name", "-p"],
+            help="AI project name"
+        )
+        c.argument(
+            "agent_name",
+            options_list=["--name", "-n"],
+            help="Cognitive Services hosted agent name",
+        )
+        c.argument("agent_version", help="Cognitive Services hosted agent version")
+
+    with self.argument_context("cognitiveservices agent logs show") as c:
+        c.argument(
+            'kind',
+            options_list=['--type', '-t'],
+            help="Type of logs to stream. 'console' for stdout/stderr, 'system' for container events.",
+            arg_type=get_enum_type(['console', 'system']),
+            default='console'
+        )
+        c.argument(
+            'tail',
+            type=int,
+            help='Number of trailing log lines to fetch (1-300). Default: 50',
+            default=50
+        )
+        c.argument(
+            'follow',
+            options_list=['--follow', '-f'],
+            action='store_true',
+            help='Stream logs in real-time. Without this flag, fetches recent logs and exits.'
+        )
+
     with self.argument_context('cognitiveservices') as c:
         c.argument('account_name', arg_type=name_arg_type, help='cognitive service account name',
                    completer=get_resource_name_completion_list('Microsoft.CognitiveServices/accounts'))
diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py
index 3432cd5f514..d9a33bec661 100644
--- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py
+++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/commands.py
@@ -129,6 +129,10 @@ def load_command_table(self, _):
         g.custom_command('list', 'agent_list')
         g.custom_command('list-versions', 'agent_versions_list')
         g.custom_show_command('show', 'agent_show')
+
+    with self.command_group('cognitiveservices agent logs', client_factory=cf_ai_projects, is_preview=True) as g:
+        g.custom_show_command('show', 'agent_logs_show')
+
     with self.command_group(
             'cognitiveservices account project', projects_type,
             client_factory=cf_projects) as g:
diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py
index 9db5eb1c7ff..9521ae57017 100644
--- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py
+++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py
@@ -908,6 +908,162 @@ def _get_agent_container_status(client, agent_name, agent_version):
     return response.json()
 
 
+# Constants for log streaming
+LOG_STREAM_CONNECT_TIMEOUT = 10  # seconds
+LOG_STREAM_READ_TIMEOUT = 5  # seconds for non-follow mode
+LOG_STREAM_RETRY_INTERVAL = 5  # seconds between retries
+LOG_STREAM_MAX_RETRIES = 30  # max retry attempts (~2.5 minutes)
+LOG_STREAM_POST_DEPLOY_WAIT = 15  # seconds to stream after deployment ready
+
+
+def _get_log_stream_auth_header(cmd):
+    """
+    Get authorization header for log stream API.
+
+    Args:
+        cmd: CLI command context
+
+    Returns:
+        dict: Authorization header with Bearer token
+    """
+    from azure.cli.core._profile import Profile
+
+    profile = Profile(cli_ctx=cmd.cli_ctx)
+    credential, _, _ = profile.get_login_credentials(
+        subscription_id=cmd.cli_ctx.data.get("subscription_id")
+    )
+    token = credential.get_token("https://ai.azure.com/.default")
+    return {"Authorization": f"Bearer {token.token}"}
+
+
+def _build_log_stream_url(client, agent_name, agent_version, container_name="default"):
+    """
+    Build the log stream URL for an agent container.
+
+    Args:
+        client: Service client with endpoint configuration
+        agent_name: Name of the agent
+        agent_version: Version of the agent
+        container_name: Container name (default: 'default')
+
+    Returns:
+        str: Full URL for the log stream endpoint
+    """
+    endpoint = client._config.endpoint  # pylint: disable=protected-access
+    return (
+        f"{endpoint}/agents/{urllib.parse.quote(agent_name)}"
+        f"/versions/{urllib.parse.quote(str(agent_version))}"
+        f"/containers/{urllib.parse.quote(container_name)}:logstream"
+    )
+
+
+def _stream_agent_logs(
+    cmd,
+    client,
+    agent_name,
+    agent_version,
+    kind="console",
+    tail=50,
+    follow=True,
+):
+    """
+    Stream logs from an agent container.
+
+    Args:
+        cmd: CLI command context
+        client: Service client (AIProjectClient)
+        agent_name: Name of the agent
+        agent_version: Version of the agent
+        kind: Type of logs - 'console' (stdout/stderr) or 'system' (container events)
+        tail: Number of trailing lines to fetch (1-300)
+        follow: Whether to stream logs in real-time
+
+    Yields:
+        str: Log lines as they arrive
+
+    Raises:
+        InvalidArgumentValueError: If tail or kind parameters are invalid
+        AzureResponseError: If connection to log stream fails
+    """
+    import requests as http_requests
+
+    # Validate parameters
+    if tail is not None and not 1 <= tail <= 300:
+        raise InvalidArgumentValueError("--tail must be between 1 and 300")
+    if kind not in ("console", "system"):
+        raise InvalidArgumentValueError("--type must be 'console' or 'system'")
+
+    log_url = _build_log_stream_url(client, agent_name, agent_version)
+    params = {
+        "api-version": AGENT_API_VERSION_PARAMS["api-version"],
+        "kind": kind,
+        "tail": tail,
+    }
+    headers = _get_log_stream_auth_header(cmd)
+
+    logger.info("Connecting to log stream: %s", log_url)
+
+    timeout = None if follow else (LOG_STREAM_CONNECT_TIMEOUT, LOG_STREAM_READ_TIMEOUT)
+
+    try:
+        response = http_requests.get(
+            log_url, params=params, headers=headers, stream=True, timeout=timeout
+        )
+
+        if not response.ok:
+            error_detail = response.text or f"HTTP {response.status_code}"
+            raise AzureResponseError(f"Failed to connect to log stream: {error_detail}")
+
+        for line in response.iter_lines():
+            if line:
+                yield line.decode("utf-8", errors="replace")
+
+    except http_requests.exceptions.Timeout:
+        pass  # Expected when follow=False - read timeout after fetching available logs
+    except http_requests.exceptions.ConnectionError as e:
+        if "timed out" in str(e).lower():
+            pass  # Timeout wrapped in ConnectionError
+        else:
+            raise AzureResponseError(f"Failed to connect to log stream: {e}") from e
+    except KeyboardInterrupt:
+        logger.warning("Log streaming interrupted by user")
+        raise
+
+
+def agent_logs_show(
+    cmd,
+    client,
+    account_name,
+    project_name,
+    agent_name,
+    agent_version,
+    kind="console",
+    tail=50,
+    follow=False,
+):  # pylint: disable=unused-argument
+    """
+    Show logs from a hosted agent container.
+
+    Args:
+        cmd: CLI command context
+        client: Service client
+        account_name: Cognitive Services account name (unused, for CLI routing)
+        project_name: AI Foundry project name (unused, for CLI routing)
+        agent_name: Name of the agent
+        agent_version: Version of the agent
+        kind: Type of logs - 'console' or 'system'
+        tail: Number of trailing lines (1-300)
+        follow: Stream logs in real-time if True
+    """
+    try:
+        for log_line in _stream_agent_logs(
+            cmd, client, agent_name, agent_version, kind=kind, tail=tail, follow=follow
+        ):
+            print(log_line)
+    except KeyboardInterrupt:
+        pass  # Clean exit on Ctrl+C
+
+
 def _wait_for_agent_deployment_ready(
         cmd, client, agent_name, agent_version, timeout=600, poll_interval=5):
     """
@@ -1208,7 +1364,98 @@ def _build_and_push_locally():
     return image
 
 
-def _deploy_agent_version(cmd, client, agent_name, created_version, min_replicas, max_replicas, timeout=600):
+class _BackgroundLogStreamer:
+    """
+    Context manager for streaming logs in a background thread during deployment.
+
+    Usage:
+        with _BackgroundLogStreamer(cmd, client, agent_name, version) as streamer:
+            # deployment operations...
+            streamer.wait_after_ready()  # optional: stream logs after deployment ready
+    """
+
+    def __init__(self, cmd, client, agent_name, agent_version, enabled=True):
+        self.cmd = cmd
+        self.client = client
+        self.agent_name = agent_name
+        self.agent_version = agent_version
+        self.enabled = enabled
+        self._thread = None
+        self._stop_event = None
+
+    def __enter__(self):
+        if not self.enabled:
+            return self
+
+        import threading
+        self._stop_event = threading.Event()
+        self._thread = threading.Thread(target=self._stream_with_retry, daemon=True)
+        self._thread.start()
+        logger.warning("Streaming container logs (Ctrl+C to stop)...")
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self._thread and self._stop_event:
+            self._stop_event.set()
+            self._thread.join(timeout=2)
+        return False  # Don't suppress exceptions
+
+    def _stream_with_retry(self):
+        """Stream logs with retry logic for container startup."""
+        import time
+
+        for attempt in range(LOG_STREAM_MAX_RETRIES):
+            if self._stop_event.is_set():
+                return
+
+            try:
+                # Check if container is in a streamable state
+                if not self._is_container_ready():
+                    time.sleep(LOG_STREAM_RETRY_INTERVAL)
+                    continue
+
+                # Stream logs
+                for log_line in _stream_agent_logs(
+                    self.cmd, self.client, self.agent_name, self.agent_version,
+                    kind="console", tail=100, follow=True
+                ):
+                    if self._stop_event.is_set():
+                        return
+                    print(log_line)
+                return  # Successfully streamed
+
+            except Exception as e:  # pylint: disable=broad-except
+                if self._stop_event.is_set():
+                    return
+                logger.debug("Log stream attempt %d failed: %s", attempt + 1, e)
+                if attempt < LOG_STREAM_MAX_RETRIES - 1:
+                    time.sleep(LOG_STREAM_RETRY_INTERVAL)
+
+    def _is_container_ready(self):
+        """Check if container is in a state where logs can be streamed."""
+        try:
+            status = _get_agent_container_status(self.client, self.agent_name, self.agent_version)
+            return status.get("status", "").lower() in ("running", "starting", "pending")
+        except Exception:  # pylint: disable=broad-except
+            return True  # Try streaming anyway if status check fails
+
+    def wait_after_ready(self, seconds=LOG_STREAM_POST_DEPLOY_WAIT):
+        """Wait for additional log streaming after deployment is ready."""
+        import time
+
+        if not self.enabled or not self._thread or not self._thread.is_alive():
+            return
+
+        logger.warning("Deployment ready. Streaming logs for %d more seconds (Ctrl+C to stop)...", seconds)
+        for _ in range(seconds):
+            if not self._thread.is_alive():
+                break
+            time.sleep(1)
+
+
+def _deploy_agent_version(
+    cmd, client, agent_name, created_version, min_replicas, max_replicas, timeout=600, show_logs=False
+):
     """
     Deploy an agent version with horizontal scaling configuration.
 
@@ -1220,6 +1467,7 @@ def _deploy_agent_version(cmd, client, agent_name, created_version, min_replicas
         min_replicas: Minimum number of replicas (default 0)
         max_replicas: Maximum number of replicas (default 3)
         timeout: Maximum time to wait for deployment (default 600 seconds)
+        show_logs: Stream container logs during deployment (default False)
     """
     effective_min_replicas = min_replicas if min_replicas is not None else 0
     effective_max_replicas = max_replicas if max_replicas is not None else 3
@@ -1229,35 +1477,26 @@ def _deploy_agent_version(cmd, client, agent_name, created_version, min_replicas
         effective_min_replicas,
         effective_max_replicas,
     )
-    try:
-        _invoke_agent_container_operation(
-            client,
-            agent_name,
-            created_version,
-            action="start",
-        )
 
-        _wait_for_agent_deployment_ready(cmd, client, agent_name, created_version, timeout=timeout)
+    with _BackgroundLogStreamer(cmd, client, agent_name, created_version, enabled=show_logs) as streamer:
+        try:
+            _invoke_agent_container_operation(client, agent_name, created_version, action="start")
+            _wait_for_agent_deployment_ready(cmd, client, agent_name, created_version, timeout=timeout)
 
-        if min_replicas is not None or max_replicas is not None:
-            _invoke_agent_container_operation(
-                client,
-                agent_name,
-                created_version,
-                action="update",
-                min_replicas=effective_min_replicas,
-                max_replicas=effective_max_replicas,
-            )
+            if min_replicas is not None or max_replicas is not None:
+                _invoke_agent_container_operation(
+                    client, agent_name, created_version, action="update",
+                    min_replicas=effective_min_replicas, max_replicas=effective_max_replicas
+                )
 
-        logger.info("Agent deployment started successfully")
-    except Exception as deploy_err:
-        recommendation = (
-            "Use 'az cognitiveservices agent start' to retry deployment once the underlying issue is resolved."
-        )
-        raise DeploymentError(
-            f"Agent version '{created_version}' was created but deployment failed: {deploy_err}",
-            recommendation=recommendation,
-        ) from deploy_err
+            logger.info("Agent deployment started successfully")
+            streamer.wait_after_ready()
+
+        except Exception as deploy_err:
+            raise DeploymentError(
+                f"Agent version '{created_version}' was created but deployment failed: {deploy_err}",
+                recommendation="Use 'az cognitiveservices agent start' to retry deployment."
+            ) from deploy_err
 
 
 def agent_update(
@@ -1298,14 +1537,33 @@ def agent_stop(
 
 
 def agent_start(
-    client, account_name, project_name, agent_name, agent_version
+    cmd, client, account_name, project_name, agent_name, agent_version, show_logs=False, timeout=600
 ):  # pylint: disable=unused-argument
     """
     Start hosted agent deployment.
+
+    Args:
+        cmd: CLI command context
+        client: Service client
+        account_name: Cognitive Services account name (unused, for CLI routing)
+        project_name: AI Foundry project name (unused, for CLI routing)
+        agent_name: Name of the agent
+        agent_version: Version of the agent to start
+        show_logs: Stream container logs during startup (default False)
+        timeout: Maximum time to wait for deployment to be ready (default 600 seconds)
     """
-    return _invoke_agent_container_operation(
-        client, agent_name, agent_version, action="start"
-    )
+    result = _invoke_agent_container_operation(client, agent_name, agent_version, action="start")
+
+    if show_logs:
+        with _BackgroundLogStreamer(cmd, client, agent_name, agent_version) as streamer:
+            try:
+                _wait_for_agent_deployment_ready(cmd, client, agent_name, agent_version, timeout=timeout)
+                logger.warning("Agent deployment is now running")
+                streamer.wait_after_ready()
+            except KeyboardInterrupt:
+                logger.warning("Log streaming interrupted")
+
+    return result
 
 
 def agent_delete_deployment(
@@ -1621,6 +1879,7 @@ def agent_create(  # pylint: disable=too-many-locals
     no_wait=False,
     no_start=False,
     timeout=600,
+    show_logs=False,
 ):
     """
     Create a new hosted agent from a container image or source code.
@@ -1652,6 +1911,7 @@ def agent_create(  # pylint: disable=too-many-locals
         no_wait: Don't wait for operation completion (default False)
         no_start: Skip automatic deployment after version creation (default False)
         timeout: Maximum time in seconds to wait for deployment (default 600)
+        show_logs: Stream container logs during deployment (default False)
 
     Returns:
         dict: Created agent version details including status, version, and configuration
@@ -1779,6 +2039,7 @@ def agent_create(  # pylint: disable=too-many-locals
             min_replicas,
             max_replicas,
             timeout=timeout,
+            show_logs=show_logs,
         )
     elif created_version and no_start:
         logger.info("Agent version created but not deployed (--no-start specified). "
diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/tests/latest/test_agent.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/tests/latest/test_agent.py
index 534545e2801..09c94f2263d 100644
--- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/tests/latest/test_agent.py
+++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/tests/latest/test_agent.py
@@ -298,6 +298,78 @@ def test_validate_path_for_subprocess_empty_path(self):
         with self.assertRaises(InvalidArgumentValueError):
             _validate_path_for_subprocess(None, "test path")
 
+    # =========================================================================
+    # Tests for agent logs functionality
+    # =========================================================================
+
+    def test_stream_agent_logs_function_signature(self):
+        """Test that _stream_agent_logs has correct parameters."""
+        from inspect import signature
+        from azure.cli.command_modules.cognitiveservices.custom import _stream_agent_logs
+        
+        sig = signature(_stream_agent_logs)
+        self.assertIn('cmd', sig.parameters)
+        self.assertIn('client', sig.parameters)
+        self.assertIn('agent_name', sig.parameters)
+        self.assertIn('agent_version', sig.parameters)
+        self.assertIn('kind', sig.parameters)
+        self.assertIn('tail', sig.parameters)
+        self.assertIn('follow', sig.parameters)
+        # Verify defaults
+        self.assertEqual(sig.parameters['kind'].default, "console")
+        self.assertEqual(sig.parameters['tail'].default, 50)
+        self.assertEqual(sig.parameters['follow'].default, True)
+
+    def test_agent_logs_show_function_signature(self):
+        """Test that agent_logs_show has correct parameters."""
+        from inspect import signature
+        from azure.cli.command_modules.cognitiveservices.custom import agent_logs_show
+        
+        sig = signature(agent_logs_show)
+        self.assertIn('cmd', sig.parameters)
+        self.assertIn('client', sig.parameters)
+        self.assertIn('account_name', sig.parameters)
+        self.assertIn('project_name', sig.parameters)
+        self.assertIn('agent_name', sig.parameters)
+        self.assertIn('agent_version', sig.parameters)
+        self.assertIn('kind', sig.parameters)
+        self.assertIn('tail', sig.parameters)
+        self.assertIn('follow', sig.parameters)
+        # Verify follow defaults to False for non-streaming behavior
+        self.assertEqual(sig.parameters['follow'].default, False)
+        self.assertEqual(sig.parameters['kind'].default, "console")
+        self.assertEqual(sig.parameters['tail'].default, 50)
+
+    def test_agent_start_show_logs_parameter(self):
+        """Test that agent_start accepts show_logs and timeout parameters."""
+        from inspect import signature
+        from azure.cli.command_modules.cognitiveservices.custom import agent_start
+        
+        sig = signature(agent_start)
+        self.assertIn('cmd', sig.parameters)
+        self.assertIn('show_logs', sig.parameters)
+        self.assertIn('timeout', sig.parameters)
+        self.assertEqual(sig.parameters['show_logs'].default, False)
+        self.assertEqual(sig.parameters['timeout'].default, 600)
+
+    def test_agent_create_show_logs_parameter(self):
+        """Test that agent_create accepts show_logs parameter."""
+        from inspect import signature
+        from azure.cli.command_modules.cognitiveservices.custom import agent_create
+        
+        sig = signature(agent_create)
+        self.assertIn('show_logs', sig.parameters)
+        self.assertEqual(sig.parameters['show_logs'].default, False)
+
+    def test_deploy_agent_version_show_logs_parameter(self):
+        """Test that _deploy_agent_version accepts show_logs parameter."""
+        from inspect import signature
+        from azure.cli.command_modules.cognitiveservices.custom import _deploy_agent_version
+        
+        sig = signature(_deploy_agent_version)
+        self.assertIn('show_logs', sig.parameters)
+        self.assertEqual(sig.parameters['show_logs'].default, False)
+
 
 class CognitiveServicesAgentTests(ScenarioTest):
     """
@@ -820,6 +892,161 @@ def test_agent_create_errors(self, resource_group):
         # Cleanup
         self.cmd('az cognitiveservices account delete -n {account} -g {rg}')
 
+    # =========================================================================
+    # Integration tests for agent logs functionality
+    # =========================================================================
+
+    @live_only()
+    @serial_test()
+    @ResourceGroupPreparer(location='eastus')
+    def test_agent_logs_show_basic(self, resource_group):
+        """
+        Test basic log streaming without --follow flag.
+
+        Validates:
+        - Log command executes without error
+        - Default parameters (console type, 50 lines tail)
+        - Command exits after fetching initial logs
+        """
+        account_name = self.create_random_name(prefix='cs_logs_', length=20)
+        project_name = self.create_random_name(prefix='proj_', length=15)
+        agent_name = 'test-logs-agent'
+
+        self.kwargs.update({
+            'account': account_name,
+            'project': project_name,
+            'agent': agent_name,
+            'kind': 'AIServices',
+            'sku': 'S0',
+            'location': 'eastus',
+            'image': 'mcr.microsoft.com/azuredocs/aci-helloworld:latest'
+        })
+
+        # Create Cognitive Services account
+        self.cmd('az cognitiveservices account create -n {account} -g {rg} '
+                 '--kind {kind} --sku {sku} -l {location} --yes --manage-projects',
+                 checks=[self.check('properties.provisioningState', 'Succeeded')])
+
+        # Create agent with a sample image
+        self.cmd('az cognitiveservices agent create --skip-acr-check '
+                 '-a {account} --project-name {project} --name {agent} '
+                 '--image {image}',
+                 checks=[self.check('name', '{agent}')])
+
+        # Fetch logs without follow (should return and exit)
+        # This verifies the command runs successfully
+        self.cmd('az cognitiveservices agent logs show '
+                 '-a {account} -p {project} -n {agent} --agent-version 1')
+
+        # Cleanup
+        self.cmd('az cognitiveservices agent delete -a {account} -p {project} -n {agent}')
+        self.cmd('az cognitiveservices account delete -n {account} -g {rg}')
+
+    @live_only()
+    @serial_test()
+    @ResourceGroupPreparer(location='eastus')
+    def test_agent_logs_show_with_options(self, resource_group):
+        """
+        Test log streaming with various options.
+
+        Validates:
+        - --type system option
+        - --tail custom value
+        - Different log type outputs
+        """
+        account_name = self.create_random_name(prefix='cs_logs_', length=20)
+        project_name = self.create_random_name(prefix='proj_', length=15)
+        agent_name = 'test-logs-opts'
+
+        self.kwargs.update({
+            'account': account_name,
+            'project': project_name,
+            'agent': agent_name,
+            'kind': 'AIServices',
+            'sku': 'S0',
+            'location': 'eastus',
+            'image': 'mcr.microsoft.com/azuredocs/aci-helloworld:latest'
+        })
+
+        # Create Cognitive Services account
+        self.cmd('az cognitiveservices account create -n {account} -g {rg} '
+                 '--kind {kind} --sku {sku} -l {location} --yes --manage-projects',
+                 checks=[self.check('properties.provisioningState', 'Succeeded')])
+
+        # Create agent
+        self.cmd('az cognitiveservices agent create --skip-acr-check '
+                 '-a {account} --project-name {project} --name {agent} '
+                 '--image {image}',
+                 checks=[self.check('name', '{agent}')])
+
+        # Test with --type system
+        self.cmd('az cognitiveservices agent logs show '
+                 '-a {account} -p {project} -n {agent} --agent-version 1 '
+                 '--type system')
+
+        # Test with --tail custom value
+        self.cmd('az cognitiveservices agent logs show '
+                 '-a {account} -p {project} -n {agent} --agent-version 1 '
+                 '--tail 100')
+
+        # Test with both options
+        self.cmd('az cognitiveservices agent logs show '
+                 '-a {account} -p {project} -n {agent} --agent-version 1 '
+                 '--type console --tail 200')
+
+        # Cleanup
+        self.cmd('az cognitiveservices agent delete -a {account} -p {project} -n {agent}')
+        self.cmd('az cognitiveservices account delete -n {account} -g {rg}')
+
+    @live_only()
+    @serial_test()
+    @ResourceGroupPreparer(location='eastus')
+    def test_agent_start_with_show_logs(self, resource_group):
+        """
+        Test agent start with --show-logs flag.
+
+        Validates:
+        - Agent can be stopped and started
+        - --show-logs flag streams logs during startup
+        """
+        account_name = self.create_random_name(prefix='cs_start_', length=20)
+        project_name = self.create_random_name(prefix='proj_', length=15)
+        agent_name = 'test-start-logs'
+
+        self.kwargs.update({
+            'account': account_name,
+            'project': project_name,
+            'agent': agent_name,
+            'kind': 'AIServices',
+            'sku': 'S0',
+            'location': 'eastus',
+            'image': 'mcr.microsoft.com/azuredocs/aci-helloworld:latest'
+        })
+
+        # Create Cognitive Services account
+        self.cmd('az cognitiveservices account create -n {account} -g {rg} '
+                 '--kind {kind} --sku {sku} -l {location} --yes --manage-projects',
+                 checks=[self.check('properties.provisioningState', 'Succeeded')])
+
+        # Create agent
+        self.cmd('az cognitiveservices agent create --skip-acr-check '
+                 '-a {account} --project-name {project} --name {agent} '
+                 '--image {image}',
+                 checks=[self.check('name', '{agent}')])
+
+        # Stop the agent first
+        self.cmd('az cognitiveservices agent stop '
+                 '-a {account} -p {project} -n {agent} --agent-version 1')
+
+        # Start with --show-logs
+        self.cmd('az cognitiveservices agent start '
+                 '-a {account} -p {project} -n {agent} --agent-version 1 '
+                 '--show-logs --timeout 120')
+
+        # Cleanup
+        self.cmd('az cognitiveservices agent delete -a {account} -p {project} -n {agent}')
+        self.cmd('az cognitiveservices account delete -n {account} -g {rg}')
+
 
 if __name__ == '__main__':
     unittest.main()

From bc04891a82d41e50d24b2d746f188875ecc0d8e2 Mon Sep 17 00:00:00 2001
From: Eamon O'Reilly <eamono@microsoft.com>
Date: Mon, 26 Jan 2026 16:25:24 -0800
Subject: [PATCH 2/2] Address review: add user warning when log streaming fails

- Distinguish transient errors (ConnectionError, Timeout) from unexpected errors
- Track last error and show warning to user after all retries exhausted
- Provides actionable feedback when --show-logs cannot establish connection
---
 .../cognitiveservices/custom.py               | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py b/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py
index 9521ae57017..443575453e7 100644
--- a/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py
+++ b/src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py
@@ -1403,7 +1403,9 @@ def __exit__(self, exc_type, exc_val, exc_tb):
     def _stream_with_retry(self):
         """Stream logs with retry logic for container startup."""
         import time
+        from requests.exceptions import ConnectionError as RequestsConnectionError, Timeout
 
+        last_error = None
         for attempt in range(LOG_STREAM_MAX_RETRIES):
             if self._stop_event.is_set():
                 return
@@ -1424,13 +1426,32 @@ def _stream_with_retry(self):
                     print(log_line)
                 return  # Successfully streamed
 
+            except (RequestsConnectionError, Timeout) as e:
+                # Expected transient errors during container startup
+                if self._stop_event.is_set():
+                    return
+                last_error = e
+                logger.debug("Log stream attempt %d failed (transient): %s", attempt + 1, e)
+                if attempt < LOG_STREAM_MAX_RETRIES - 1:
+                    time.sleep(LOG_STREAM_RETRY_INTERVAL)
+
             except Exception as e:  # pylint: disable=broad-except
+                # Unexpected errors - log and continue retrying
                 if self._stop_event.is_set():
                     return
+                last_error = e
                 logger.debug("Log stream attempt %d failed: %s", attempt + 1, e)
                 if attempt < LOG_STREAM_MAX_RETRIES - 1:
                     time.sleep(LOG_STREAM_RETRY_INTERVAL)
 
+        # All retries exhausted - warn user
+        if last_error and not self._stop_event.is_set():
+            logger.warning(
+                "Unable to establish log stream after %d attempts. "
+                "The agent may still be starting. Last error: %s",
+                LOG_STREAM_MAX_RETRIES, last_error
+            )
+
     def _is_container_ready(self):
         """Check if container is in a state where logs can be streamed."""
         try: