From b0dbdc11ffefa843a496493bc426213fa5db6a0e Mon Sep 17 00:00:00 2001
From: Nathan Schram <5553883+nathanschram@users.noreply.github.com>
Date: Sun, 22 Mar 2026 04:27:12 +0000
Subject: [PATCH 1/5] feat: emoji buttons + edit-in-place for outline approval
 (#186)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add emoji prefixes to ExitPlanMode and post-outline buttons (✅/❌/📋).
Post-outline approve/deny now edits the "Asked Claude Code to outline
the plan" message in-place instead of creating a second message.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                  |   1 +
 CLAUDE.md                                     |   2 +-
 src/untether/runners/claude.py                |  15 +-
 .../telegram/commands/claude_control.py       |  76 ++++++--
 tests/test_ask_user_question.py               |   4 +-
 tests/test_claude_control.py                  | 178 +++++++++++++++---
 tests/test_cooldown_bypass.py                 |   8 +-
 7 files changed, 237 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab0594d..2b57ad2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -60,6 +60,7 @@
   - `AutoContinueSettings` with `enabled` (default true) and `max_retries` (default 1) in `[auto_continue]` config section
   - detection based on protocol invariant: normal sessions always end with `last_event_type=result`
   - sends "⚠️ Auto-continuing — Claude stopped before processing tool results" notification before resuming
+- emoji button labels and edit-in-place for outline approval — ExitPlanMode buttons now show ✅/❌/📋 emoji prefixes; post-outline "Approve Plan"/"Deny" edits the "Asked Claude Code to outline the plan" message in-place instead of creating a second message [#186](https://github.com/littlebearapps/untether/issues/186)
 
 ### tests
 
diff --git a/CLAUDE.md b/CLAUDE.md
index c70314f..9e13eb9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -153,7 +153,7 @@ Rules in `.claude/rules/` auto-load when editing matching files:
 
 ## Tests
 
-1765 unit tests, 80% coverage threshold. Integration testing against `@untether_dev_bot` is **mandatory before every release** — see `docs/reference/integration-testing.md` for the full playbook with per-release-type tier requirements (patch/minor/major). All integration test tiers are fully automated by Claude Code via Telegram MCP tools and Bash.
+1766 unit tests, 80% coverage threshold. Integration testing against `@untether_dev_bot` is **mandatory before every release** — see `docs/reference/integration-testing.md` for the full playbook with per-release-type tier requirements (patch/minor/major). All integration test tiers are fully automated by Claude Code via Telegram MCP tools and Bash.
 
 Key test files:
 
diff --git a/src/untether/runners/claude.py b/src/untether/runners/claude.py
index 45b91ec..1f14a1d 100644
--- a/src/untether/runners/claude.py
+++ b/src/untether/runners/claude.py
@@ -729,11 +729,11 @@ def translate_claude_event(
                                         "buttons": [
                                             [
                                                 {
-                                                    "text": "Approve Plan",
+                                                    "text": "✅ Approve Plan",
                                                     "callback_data": f"claude_control:approve:{button_request_id}",
                                                 },
                                                 {
-                                                    "text": "Deny",
+                                                    "text": "❌ Deny",
                                                     "callback_data": f"claude_control:deny:{button_request_id}",
                                                 },
                                             ],
@@ -839,11 +839,11 @@ def translate_claude_event(
             button_rows: list[list[dict[str, str]]] = [
                 [
                     {
-                        "text": "Approve",
+                        "text": "✅ Approve",
                         "callback_data": f"claude_control:approve:{request_id}",
                     },
                     {
-                        "text": "Deny",
+                        "text": "❌ Deny",
                         "callback_data": f"claude_control:deny:{request_id}",
                     },
                 ],
@@ -855,7 +855,7 @@ def translate_claude_event(
                     button_rows.append(
                         [
                             {
-                                "text": "Pause & Outline Plan",
+                                "text": "📋 Pause & Outline Plan",
                                 "callback_data": f"claude_control:discuss:{request_id}",
                             },
                         ]
@@ -1937,6 +1937,11 @@ def _cleanup_session_registries(session_id: str) -> None:
     if session_id in _OUTLINE_PENDING:
         cleaned.append("outline_pending")
     _OUTLINE_PENDING.discard(session_id)
+    # Clean up discuss feedback ref (post-outline edit-instead-of-send tracking)
+    from ..telegram.commands.claude_control import _DISCUSS_FEEDBACK_REFS
+
+    if _DISCUSS_FEEDBACK_REFS.pop(session_id, None) is not None:
+        cleaned.append("discuss_feedback_ref")
     stale = [k for k, v in _REQUEST_TO_SESSION.items() if v == session_id]
     if stale:
         cleaned.append(f"requests({len(stale)})")
diff --git a/src/untether/telegram/commands/claude_control.py b/src/untether/telegram/commands/claude_control.py
index 5433e3a..64520fd 100644
--- a/src/untether/telegram/commands/claude_control.py
+++ b/src/untether/telegram/commands/claude_control.py
@@ -4,7 +4,7 @@
 
 from ...commands import CommandBackend, CommandContext, CommandResult
 from ...logging import get_logger
-from ...runner_bridge import delete_outline_messages
+from ...runner_bridge import delete_outline_messages, register_ephemeral_message
 from ...runners.claude import (
     _ACTIVE_RUNNERS,
     _DISCUSS_APPROVED,
@@ -15,9 +15,14 @@
     send_claude_control_response,
     set_discuss_cooldown,
 )
+from ...transport import MessageRef
 
 logger = get_logger(__name__)
 
+# Tracks the "📋 Asked Claude Code to outline the plan" message ref per session,
+# so the post-outline approve/deny can edit it instead of sending a 2nd message.
+_DISCUSS_FEEDBACK_REFS: dict[str, MessageRef] = {}
+
 
 _DISCUSS_DENY_MESSAGE = (
     "STOP. Do NOT call ExitPlanMode yet.\n\n"
@@ -136,10 +141,19 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None:
                 request_id=request_id,
                 action=action,
             )
-            return CommandResult(
-                text="📋 Asked Claude Code to outline the plan",
+
+            # Send feedback directly and store ref so post-outline approve/deny
+            # can edit this message instead of creating a second one.
+            ref = await ctx.executor.send(
+                "📋 Asked Claude Code to outline the plan",
                 notify=True,
             )
+            if ref and session_id:
+                _DISCUSS_FEEDBACK_REFS[session_id] = ref
+                register_ephemeral_message(
+                    ctx.message.channel_id, ctx.message.message_id, ref
+                )
+            return None
 
         approved = action == "approve"
 
@@ -156,6 +170,7 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None:
                     "claude_control.discuss_plan_session_ended",
                     session_id=session_id,
                 )
+                _DISCUSS_FEEDBACK_REFS.pop(session_id, None)
                 return CommandResult(
                     text=(
                         "⚠️ Session has ended — start a new run"
@@ -175,11 +190,7 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None:
                     "claude_control.discuss_plan_approved",
                     session_id=session_id,
                 )
-                return CommandResult(
-                    text="✅ Plan approved — Claude Code will proceed",
-                    notify=True,
-                    skip_reply=True,
-                )
+                action_text = "✅ Plan approved — Claude Code will proceed"
             else:
                 _OUTLINE_PENDING.discard(session_id)
                 clear_discuss_cooldown(session_id)
@@ -187,11 +198,26 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None:
                     "claude_control.discuss_plan_denied",
                     session_id=session_id,
                 )
-                return CommandResult(
-                    text="❌ Plan denied — send a follow-up message with feedback",
-                    notify=True,
-                    skip_reply=True,
-                )
+                action_text = "❌ Plan denied — send a follow-up message with feedback"
+
+            # Edit the discuss feedback message instead of sending a new one
+            existing_ref = _DISCUSS_FEEDBACK_REFS.pop(session_id, None)
+            if existing_ref:
+                try:
+                    await ctx.executor.edit(existing_ref, action_text)
+                    return None
+                except Exception:  # noqa: BLE001
+                    logger.debug(
+                        "claude_control.discuss_feedback_edit_failed",
+                        session_id=session_id,
+                        exc_info=True,
+                    )
+            # Fallback: send as new message if edit failed or no ref stored
+            return CommandResult(
+                text=action_text,
+                notify=True,
+                skip_reply=True,
+            )
 
         # Grab session_id before send_claude_control_response deletes it
         session_id = _REQUEST_TO_SESSION.get(request_id)
@@ -233,6 +259,30 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None:
 
             had_outline = session_id in _OUTLINE_REGISTRY
             await delete_outline_messages(session_id)
+            # Try to edit the discuss feedback message for outline-flow
+            # approve/deny (when outline was long enough to use real request_id
+            # instead of da: prefix).
+            existing_ref = _DISCUSS_FEEDBACK_REFS.pop(session_id, None)
+            if existing_ref:
+                action_text = (
+                    "✅ Plan approved — Claude Code will proceed"
+                    if approved
+                    else "❌ Plan denied — send a follow-up message with feedback"
+                )
+                try:
+                    await ctx.executor.edit(existing_ref, action_text)
+                    logger.info(
+                        "claude_control.sent",
+                        request_id=request_id,
+                        approved=approved,
+                    )
+                    return None
+                except Exception:  # noqa: BLE001
+                    logger.debug(
+                        "claude_control.discuss_feedback_edit_failed",
+                        session_id=session_id,
+                        exc_info=True,
+                    )
 
         action_text = "✅ Approved" if approved else "❌ Denied"
         logger.info(
diff --git a/tests/test_ask_user_question.py b/tests/test_ask_user_question.py
index 4e209cb..3c69bc2 100644
--- a/tests/test_ask_user_question.py
+++ b/tests/test_ask_user_question.py
@@ -175,8 +175,8 @@ def test_ask_user_question_has_inline_keyboard() -> None:
     assert "buttons" in kb
     # Should have approve/deny buttons
     button_texts = [b["text"] for row in kb["buttons"] for b in row]
-    assert "Approve" in button_texts
-    assert "Deny" in button_texts
+    assert "✅ Approve" in button_texts
+    assert "❌ Deny" in button_texts
 
 
 # ===========================================================================
diff --git a/tests/test_claude_control.py b/tests/test_claude_control.py
index 52c6062..722a077 100644
--- a/tests/test_claude_control.py
+++ b/tests/test_claude_control.py
@@ -85,6 +85,9 @@ def _clear_registries():
     _REQUEST_TO_INPUT.clear()
     _HANDLED_REQUESTS.clear()
     _DISCUSS_COOLDOWN.clear()
+    from untether.telegram.commands.claude_control import _DISCUSS_FEEDBACK_REFS
+
+    _DISCUSS_FEEDBACK_REFS.clear()
 
 
 # ===========================================================================
@@ -120,13 +123,13 @@ def test_can_use_tool_produces_warning_with_inline_keyboard() -> None:
     buttons = kb["buttons"]
     assert len(buttons) == 2  # two rows for ExitPlanMode
     assert len(buttons[0]) == 2  # Approve + Deny
-    assert buttons[0][0]["text"] == "Approve"
+    assert buttons[0][0]["text"] == "✅ Approve"
     assert "req-1" in buttons[0][0]["callback_data"]
-    assert buttons[0][1]["text"] == "Deny"
+    assert buttons[0][1]["text"] == "❌ Deny"
     assert "req-1" in buttons[0][1]["callback_data"]
     # Second row: Outline Plan
     assert len(buttons[1]) == 1
-    assert buttons[1][0]["text"] == "Pause & Outline Plan"
+    assert buttons[1][0]["text"] == "📋 Pause & Outline Plan"
     assert "discuss" in buttons[1][0]["callback_data"]
     assert "req-1" in buttons[1][0]["callback_data"]
 
@@ -490,6 +493,9 @@ def test_stream_end_events_cleans_registries() -> None:
 
 def test_cleanup_session_registries_clears_all_state() -> None:
     """_cleanup_session_registries clears cooldown, outline, and approval state."""
+    from untether.telegram.commands.claude_control import _DISCUSS_FEEDBACK_REFS
+    from untether.transport import MessageRef
+
     runner = ClaudeRunner(claude_cmd="claude")
     session_id = "sess-full-cleanup"
 
@@ -501,6 +507,7 @@ def test_cleanup_session_registries_clears_all_state() -> None:
     _OUTLINE_PENDING.add(session_id)
     _REQUEST_TO_SESSION["req-a"] = session_id
     _REQUEST_TO_SESSION["req-b"] = session_id
+    _DISCUSS_FEEDBACK_REFS[session_id] = MessageRef(channel_id=1, message_id=1)
 
     _cleanup_session_registries(session_id)
 
@@ -511,6 +518,7 @@ def test_cleanup_session_registries_clears_all_state() -> None:
     assert session_id not in _OUTLINE_PENDING
     assert "req-a" not in _REQUEST_TO_SESSION
     assert "req-b" not in _REQUEST_TO_SESSION
+    assert session_id not in _DISCUSS_FEEDBACK_REFS
 
 
 def test_cleanup_session_registries_idempotent() -> None:
@@ -752,6 +760,7 @@ async def test_discuss_action_sends_deny_with_custom_message() -> None:
     from untether.telegram.commands.claude_control import (
         ClaudeControlCommand,
         _DISCUSS_DENY_MESSAGE,
+        _DISCUSS_FEEDBACK_REFS,
     )
 
     runner = ClaudeRunner(claude_cmd="claude")
@@ -763,10 +772,14 @@ async def test_discuss_action_sends_deny_with_custom_message() -> None:
     _REQUEST_TO_SESSION["req-discuss"] = session_id
     _REQUEST_TO_INPUT["req-discuss"] = {}
 
-    # Build a minimal CommandContext
+    # Build a minimal CommandContext with a fake executor
     from untether.commands import CommandContext
     from untether.transport import MessageRef
 
+    fake_executor = AsyncMock()
+    sent_ref = MessageRef(channel_id=123, message_id=99)
+    fake_executor.send = AsyncMock(return_value=sent_ref)
+
     ctx = CommandContext(
         command="claude_control",
         text="claude_control:discuss:req-discuss",
@@ -778,14 +791,21 @@ async def test_discuss_action_sends_deny_with_custom_message() -> None:
         config_path=None,
         plugin_config=None,  # type: ignore[arg-type]
         runtime=None,  # type: ignore[arg-type]
-        executor=None,  # type: ignore[arg-type]
+        executor=fake_executor,
     )
 
     cmd = ClaudeControlCommand()
     result = await cmd.handle(ctx)
 
-    assert result is not None
-    assert "outline" in result.text.lower()
+    # Handler sends directly and returns None
+    assert result is None
+    fake_executor.send.assert_called_once()
+    sent_text = fake_executor.send.call_args[0][0]
+    assert "outline" in sent_text.lower()
+
+    # Verify the discuss feedback ref was stored for later editing
+    assert session_id in _DISCUSS_FEEDBACK_REFS
+    assert _DISCUSS_FEEDBACK_REFS[session_id] == sent_ref
 
     # Verify the stdin payload
     payload = json.loads(fake_stdin.send.call_args[0][0].decode())
@@ -897,7 +917,7 @@ def test_exit_plan_mode_auto_denied_during_cooldown() -> None:
     buttons = evt.action.detail["inline_keyboard"]["buttons"]
     assert len(buttons) == 1  # One row with Approve + Deny
     assert len(buttons[0]) == 2
-    assert "Approve" in buttons[0][0]["text"]
+    assert "Approve" in buttons[0][0]["text"]  # "✅ Approve Plan"
 
 
 def test_exit_plan_mode_blocked_after_cooldown_expires_without_outline() -> None:
@@ -976,8 +996,8 @@ def test_exit_plan_mode_after_cooldown_expires_with_outline_shows_synthetic_butt
     buttons = detail["inline_keyboard"]["buttons"]
     assert len(buttons) == 1
     assert len(buttons[0]) == 2
-    assert buttons[0][0]["text"] == "Approve Plan"
-    assert buttons[0][1]["text"] == "Deny"
+    assert buttons[0][0]["text"] == "✅ Approve Plan"
+    assert buttons[0][1]["text"] == "❌ Deny"
     # Outline-ready uses real request_id (not da: prefix)
     assert buttons[0][0]["callback_data"] == "claude_control:approve:req-cd-outline"
 
@@ -1046,7 +1066,7 @@ async def test_discuss_handler_sets_cooldown() -> None:
         config_path=None,
         plugin_config=None,  # type: ignore[arg-type]
         runtime=None,  # type: ignore[arg-type]
-        executor=None,  # type: ignore[arg-type]
+        executor=AsyncMock(send=AsyncMock(return_value=None)),
     )
 
     cmd = ClaudeControlCommand()
@@ -1613,16 +1633,24 @@ def test_resumed_session_no_stale_outline_guard(self):
 
 
 @pytest.mark.anyio
-async def test_discuss_approve_result_skips_reply() -> None:
-    """Post-outline 'Approve Plan' returns CommandResult with skip_reply=True."""
+async def test_discuss_approve_edits_feedback_message() -> None:
+    """Post-outline 'Approve Plan' edits the discuss feedback message."""
     from untether.commands import CommandContext
-    from untether.telegram.commands.claude_control import ClaudeControlCommand
+    from untether.telegram.commands.claude_control import (
+        ClaudeControlCommand,
+        _DISCUSS_FEEDBACK_REFS,
+    )
     from untether.transport import MessageRef
 
     runner = ClaudeRunner(claude_cmd="claude")
     session_id = "sess-skip"
     _ACTIVE_RUNNERS[session_id] = (runner, 0.0)
 
+    # Simulate a stored discuss feedback ref
+    feedback_ref = MessageRef(channel_id=123, message_id=99)
+    _DISCUSS_FEEDBACK_REFS[session_id] = feedback_ref
+
+    fake_executor = AsyncMock()
     ctx = CommandContext(
         command="claude_control",
         text=f"claude_control:approve:da:{session_id}",
@@ -1634,27 +1662,41 @@ async def test_discuss_approve_result_skips_reply() -> None:
         config_path=None,
         plugin_config={},
         runtime=None,  # type: ignore[arg-type]
-        executor=None,  # type: ignore[arg-type]
+        executor=fake_executor,
     )
 
     cmd = ClaudeControlCommand()
     result = await cmd.handle(ctx)
-    assert result is not None
-    assert result.skip_reply is True
-    assert "approved" in result.text.lower()
+
+    # Handler edits the feedback message and returns None
+    assert result is None
+    fake_executor.edit.assert_called_once()
+    edit_ref, edit_text = fake_executor.edit.call_args[0]
+    assert edit_ref == feedback_ref
+    assert "approved" in edit_text.lower()
+    # Ref should be cleaned up
+    assert session_id not in _DISCUSS_FEEDBACK_REFS
 
 
 @pytest.mark.anyio
-async def test_discuss_deny_result_skips_reply() -> None:
-    """Post-outline 'Deny' returns CommandResult with skip_reply=True."""
+async def test_discuss_deny_edits_feedback_message() -> None:
+    """Post-outline 'Deny' edits the discuss feedback message."""
     from untether.commands import CommandContext
-    from untether.telegram.commands.claude_control import ClaudeControlCommand
+    from untether.telegram.commands.claude_control import (
+        ClaudeControlCommand,
+        _DISCUSS_FEEDBACK_REFS,
+    )
     from untether.transport import MessageRef
 
     runner = ClaudeRunner(claude_cmd="claude")
     session_id = "sess-skip-deny"
     _ACTIVE_RUNNERS[session_id] = (runner, 0.0)
 
+    # Simulate a stored discuss feedback ref
+    feedback_ref = MessageRef(channel_id=123, message_id=99)
+    _DISCUSS_FEEDBACK_REFS[session_id] = feedback_ref
+
+    fake_executor = AsyncMock()
     ctx = CommandContext(
         command="claude_control",
         text=f"claude_control:deny:da:{session_id}",
@@ -1666,11 +1708,103 @@ async def test_discuss_deny_result_skips_reply() -> None:
         config_path=None,
         plugin_config={},
         runtime=None,  # type: ignore[arg-type]
+        executor=fake_executor,
+    )
+
+    cmd = ClaudeControlCommand()
+    result = await cmd.handle(ctx)
+
+    # Handler edits the feedback message and returns None
+    assert result is None
+    fake_executor.edit.assert_called_once()
+    edit_ref, edit_text = fake_executor.edit.call_args[0]
+    assert edit_ref == feedback_ref
+    assert "denied" in edit_text.lower()
+    # Ref should be cleaned up
+    assert session_id not in _DISCUSS_FEEDBACK_REFS
+
+
+@pytest.mark.anyio
+async def test_discuss_approve_falls_back_without_stored_ref() -> None:
+    """Post-outline approve falls back to CommandResult when no stored ref."""
+    from untether.commands import CommandContext
+    from untether.telegram.commands.claude_control import ClaudeControlCommand
+    from untether.transport import MessageRef
+
+    runner = ClaudeRunner(claude_cmd="claude")
+    session_id = "sess-no-ref"
+    _ACTIVE_RUNNERS[session_id] = (runner, 0.0)
+    # No _DISCUSS_FEEDBACK_REFS entry
+
+    ctx = CommandContext(
+        command="claude_control",
+        text=f"claude_control:approve:da:{session_id}",
+        args_text=f"approve:da:{session_id}",
+        args=(f"approve:da:{session_id}",),
+        message=MessageRef(channel_id=123, message_id=1),
+        reply_to=None,
+        reply_text=None,
+        config_path=None,
+        plugin_config={},
+        runtime=None,  # type: ignore[arg-type]
         executor=None,  # type: ignore[arg-type]
     )
 
     cmd = ClaudeControlCommand()
     result = await cmd.handle(ctx)
+    # Falls back to CommandResult
     assert result is not None
     assert result.skip_reply is True
-    assert "denied" in result.text.lower()
+    assert "approved" in result.text.lower()
+
+
+@pytest.mark.anyio
+async def test_normal_approve_edits_feedback_when_outline_ref_exists() -> None:
+    """Normal approve (real request_id, not da:) edits discuss feedback if ref stored."""
+    from untether.commands import CommandContext
+    from untether.telegram.commands.claude_control import (
+        ClaudeControlCommand,
+        _DISCUSS_FEEDBACK_REFS,
+    )
+    from untether.transport import MessageRef
+
+    runner = ClaudeRunner(claude_cmd="claude")
+    session_id = "sess-normal-outline"
+
+    _ACTIVE_RUNNERS[session_id] = (runner, 0.0)
+    fake_stdin = AsyncMock()
+    _SESSION_STDIN[session_id] = fake_stdin
+    _REQUEST_TO_SESSION["req-outline-real"] = session_id
+    _REQUEST_TO_INPUT["req-outline-real"] = {}
+    _REQUEST_TO_TOOL_NAME["req-outline-real"] = "ExitPlanMode"
+
+    # Simulate a stored discuss feedback ref from the earlier "Pause & Outline" click
+    feedback_ref = MessageRef(channel_id=123, message_id=99)
+    _DISCUSS_FEEDBACK_REFS[session_id] = feedback_ref
+
+    fake_executor = AsyncMock()
+    ctx = CommandContext(
+        command="claude_control",
+        text="claude_control:approve:req-outline-real",
+        args_text="approve:req-outline-real",
+        args=("approve:req-outline-real",),
+        message=MessageRef(channel_id=123, message_id=1),
+        reply_to=None,
+        reply_text=None,
+        config_path=None,
+        plugin_config={},
+        runtime=None,  # type: ignore[arg-type]
+        executor=fake_executor,
+    )
+
+    cmd = ClaudeControlCommand()
+    result = await cmd.handle(ctx)
+
+    # Handler should edit the feedback message and return None
+    assert result is None
+    fake_executor.edit.assert_called_once()
+    edit_ref, edit_text = fake_executor.edit.call_args[0]
+    assert edit_ref == feedback_ref
+    assert "approved" in edit_text.lower()
+    # Ref should be cleaned up
+    assert session_id not in _DISCUSS_FEEDBACK_REFS
diff --git a/tests/test_cooldown_bypass.py b/tests/test_cooldown_bypass.py
index 48bc002..d88fa1a 100644
--- a/tests/test_cooldown_bypass.py
+++ b/tests/test_cooldown_bypass.py
@@ -145,8 +145,8 @@ def test_outline_ready_buttons_use_real_request_id():
     # Only 1 row with 2 buttons: Approve Plan, Deny
     assert len(buttons) == 1
     assert len(buttons[0]) == 2
-    assert buttons[0][0]["text"] == "Approve Plan"
-    assert buttons[0][1]["text"] == "Deny"
+    assert buttons[0][0]["text"] == "✅ Approve Plan"
+    assert buttons[0][1]["text"] == "❌ Deny"
     # Callback data uses REAL request_id (not da: prefix)
     assert buttons[0][0]["callback_data"] == f"claude_control:approve:{request_id}"
     assert buttons[0][1]["callback_data"] == f"claude_control:deny:{request_id}"
@@ -532,8 +532,8 @@ def test_hold_open_after_cooldown_expires_with_outline():
     buttons = detail["inline_keyboard"]["buttons"]
     assert len(buttons) == 1
     assert len(buttons[0]) == 2
-    assert buttons[0][0]["text"] == "Approve Plan"
-    assert buttons[0][1]["text"] == "Deny"
+    assert buttons[0][0]["text"] == "✅ Approve Plan"
+    assert buttons[0][1]["text"] == "❌ Deny"
     # Request should be held open (not auto-denied)
     assert len(state.auto_deny_queue) == 0
     assert request_id in state.pending_control_requests

From 75c13009d02da8ca8d2cdf08c759bb60553c1670 Mon Sep 17 00:00:00 2001
From: Nathan Schram <5553883+nathanschram@users.noreply.github.com>
Date: Sun, 22 Mar 2026 05:05:20 +0000
Subject: [PATCH 2/5] feat: redesign startup message layout (#187)

Split engine info into separate lines, add italic subheadings,
rename "projects" to "directories", add bug report link.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                     |  1 +
 src/untether/telegram/backend.py | 26 ++++++++++++++------------
 tests/test_telegram_backend.py   | 29 +++++++++++++++--------------
 3 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b57ad2..23c75d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -61,6 +61,7 @@
   - detection based on protocol invariant: normal sessions always end with `last_event_type=result`
   - sends "⚠️ Auto-continuing — Claude stopped before processing tool results" notification before resuming
 - emoji button labels and edit-in-place for outline approval — ExitPlanMode buttons now show ✅/❌/📋 emoji prefixes; post-outline "Approve Plan"/"Deny" edits the "Asked Claude Code to outline the plan" message in-place instead of creating a second message [#186](https://github.com/littlebearapps/untether/issues/186)
+- redesign startup message layout — version in parentheses, split engine info into "default engine" and "installed engines" lines, italic subheadings, renamed "projects" to "directories" (matching `dir:` footer label), added bug report link [#187](https://github.com/littlebearapps/untether/issues/187)
 
 ### tests
 
diff --git a/src/untether/telegram/backend.py b/src/untether/telegram/backend.py
index 29908b8..8638f1c 100644
--- a/src/untether/telegram/backend.py
+++ b/src/untether/telegram/backend.py
@@ -111,9 +111,9 @@ def _build_startup_message(
 ) -> str:
     project_aliases = sorted(set(runtime.project_aliases()), key=str.lower)
 
-    header = f"\N{DOG} **untether v{__version__} is ready**"
+    header = f"\N{DOG} **untether is ready** (v{__version__})"
 
-    # engine — merged default + available on one line
+    # engines — separate default and installed lines
     available_engines = list(runtime.available_engine_ids())
     missing_engines = list(runtime.missing_engine_ids())
     misconfigured_engines = list(runtime.engine_ids_with_status("bad_config"))
@@ -128,23 +128,23 @@ def _build_startup_message(
     engine_list = ", ".join(available_engines) if available_engines else "none"
 
     details: list[str] = []
+    details.append(f"_default engine:_ `{runtime.default_engine}`")
     if engine_notes:
         details.append(
-            f"engine: `{runtime.default_engine}`"
-            f" · engines: `{engine_list} ({'; '.join(engine_notes)})`"
+            f"_installed engines:_ `{engine_list}` ({'; '.join(engine_notes)})"
         )
     else:
-        details.append(f"engine: `{runtime.default_engine}` · engines: `{engine_list}`")
+        details.append(f"_installed engines:_ `{engine_list}`")
 
     # mode — derived from session_mode + topics
     mode = _resolve_mode_label(session_mode, topics.enabled)
-    details.append(f"mode: `{mode}`")
+    details.append(f"_mode:_ `{mode}`")
 
-    # projects — listed by name
+    # directories — listed by name
     if project_aliases:
-        details.append(f"projects: `{', '.join(project_aliases)}`")
+        details.append(f"_directories:_ `{', '.join(project_aliases)}`")
     else:
-        details.append("projects: `none`")
+        details.append("_directories:_ `none`")
 
     # topics — only shown when enabled
     if topics.enabled:
@@ -154,18 +154,20 @@ def _build_startup_message(
         scope_label = (
             f"auto ({resolved_scope})" if topics.scope == "auto" else resolved_scope
         )
-        details.append(f"topics: `enabled (scope={scope_label})`")
+        details.append(f"_topics:_ `enabled (scope={scope_label})`")
 
     # triggers — only shown when enabled
     if trigger_config and trigger_config.get("enabled"):
         n_wh = len(trigger_config.get("webhooks", []))
         n_cr = len(trigger_config.get("crons", []))
-        details.append(f"triggers: `enabled ({n_wh} webhooks, {n_cr} crons)`")
+        details.append(f"_triggers:_ `enabled ({n_wh} webhooks, {n_cr} crons)`")
 
     _DOCS_URL = "https://littlebearapps.com/tools/untether/"
+    _ISSUES_URL = "https://github.com/littlebearapps/untether/issues"
     footer = (
         f"\n\nSend a message to start, or /config for settings."
-        f"\n\N{OPEN BOOK} [Click here for help guide]({_DOCS_URL})"
+        f"\n\N{OPEN BOOK} [Click here for help]({_DOCS_URL})"
+        f" | \N{BUG} [Click here to report a bug]({_ISSUES_URL})"
     )
 
     return header + "\n\n" + "\n\n".join(details) + footer
diff --git a/tests/test_telegram_backend.py b/tests/test_telegram_backend.py
index dbde1f5..6b1b5fe 100644
--- a/tests/test_telegram_backend.py
+++ b/tests/test_telegram_backend.py
@@ -47,9 +47,9 @@ def test_build_startup_message_includes_missing_engines(tmp_path: Path) -> None:
         topics=TelegramTopicsSettings(),
     )
 
-    assert "untether" in message and "is ready" in message
+    assert "untether is ready" in message
     assert "not installed: pi" in message
-    assert "projects: `none`" in message
+    assert "_directories:_ `none`" in message
 
 
 def test_build_startup_message_surfaces_unavailable_engine_reasons(
@@ -87,7 +87,7 @@ def test_build_startup_message_surfaces_unavailable_engine_reasons(
         topics=TelegramTopicsSettings(),
     )
 
-    assert "engines:" in message and "codex" in message
+    assert "_installed engines:_" in message and "codex" in message
     assert "misconfigured: pi" in message
     assert "failed to load: claude" in message
 
@@ -135,15 +135,16 @@ def test_startup_message_core_fields() -> None:
         chat_id=123,
         topics=TelegramTopicsSettings(),
     )
-    assert "engine: `claude`" in message
-    assert "engines: `claude`" in message
-    assert "projects: `none`" in message
+    assert "_default engine:_ `claude`" in message
+    assert "_installed engines:_ `claude`" in message
+    assert "_directories:_ `none`" in message
     # Disabled topics/triggers should NOT appear
-    assert "topics:" not in message
-    assert "triggers:" not in message
+    assert "_topics:_" not in message
+    assert "_triggers:_" not in message
     # Quick-start hint and help link
     assert "/config" in message
     assert "littlebearapps.com" in message
+    assert "report a bug" in message
 
 
 def test_startup_message_shows_topics_when_enabled() -> None:
@@ -153,7 +154,7 @@ def test_startup_message_shows_topics_when_enabled() -> None:
         chat_id=123,
         topics=TelegramTopicsSettings(enabled=True, scope="main"),
     )
-    assert "topics:" in message
+    assert "_topics:_" in message
 
 
 def test_startup_message_shows_mode_assistant() -> None:
@@ -164,7 +165,7 @@ def test_startup_message_shows_mode_assistant() -> None:
         topics=TelegramTopicsSettings(),
         session_mode="chat",
     )
-    assert "mode: `assistant`" in message
+    assert "_mode:_ `assistant`" in message
 
 
 def test_startup_message_shows_mode_workspace() -> None:
@@ -175,7 +176,7 @@ def test_startup_message_shows_mode_workspace() -> None:
         topics=TelegramTopicsSettings(enabled=True, scope="main"),
         session_mode="chat",
     )
-    assert "mode: `workspace`" in message
+    assert "_mode:_ `workspace`" in message
 
 
 def test_startup_message_shows_mode_handoff() -> None:
@@ -186,7 +187,7 @@ def test_startup_message_shows_mode_handoff() -> None:
         topics=TelegramTopicsSettings(),
         session_mode="stateless",
     )
-    assert "mode: `handoff`" in message
+    assert "_mode:_ `handoff`" in message
 
 
 def test_startup_message_shows_triggers_when_enabled() -> None:
@@ -197,7 +198,7 @@ def test_startup_message_shows_triggers_when_enabled() -> None:
         topics=TelegramTopicsSettings(),
         trigger_config={"enabled": True, "webhooks": [{}], "crons": []},
     )
-    assert "triggers:" in message
+    assert "_triggers:_" in message
     assert "1 webhooks" in message
 
 
@@ -233,7 +234,7 @@ def test_startup_message_project_count(tmp_path: Path) -> None:
         chat_id=123,
         topics=TelegramTopicsSettings(),
     )
-    assert "projects: `proj-a, proj-b`" in message
+    assert "_directories:_ `proj-a, proj-b`" in message
 
 
 def test_telegram_backend_build_and_run_wires_config(

From 18cbd07c7d4f40e9a1d132cb86a504c541539aab Mon Sep 17 00:00:00 2001
From: Nathan Schram <5553883+nathanschram@users.noreply.github.com>
Date: Sun, 22 Mar 2026 05:10:59 +0000
Subject: [PATCH 3/5] =?UTF-8?q?feat:=20add=20=F0=9F=A7=B9=20emoji=20to=20/?=
 =?UTF-8?q?new=20session=20clear=20messages?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Part of startup message UX improvements (#187).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/untether/telegram/commands/topics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/untether/telegram/commands/topics.py b/src/untether/telegram/commands/topics.py
index 817da09..e09493c 100644
--- a/src/untether/telegram/commands/topics.py
+++ b/src/untether/telegram/commands/topics.py
@@ -241,7 +241,7 @@ async def _handle_new_command(
         await reply(text="this command only works inside a topic.")
         return
     await store.clear_sessions(*tkey)
-    await reply(text="cleared stored sessions for this topic.")
+    await reply(text="\N{BROOM} cleared stored sessions for this topic.")
 
 
 async def _handle_chat_new_command(
@@ -256,9 +256,9 @@ async def _handle_chat_new_command(
         return
     await store.clear_sessions(session_key[0], session_key[1])
     if msg.chat_type == "private":
-        text = "cleared stored sessions for this chat."
+        text = "\N{BROOM} cleared stored sessions for this chat."
     else:
-        text = "cleared stored sessions for you in this chat."
+        text = "\N{BROOM} cleared stored sessions for you in this chat."
     await reply(text=text)
 
 

From ec18e92fbfebcb399c14ce405c5718b9ae931ea8 Mon Sep 17 00:00:00 2001
From: Nathan Schram <5553883+nathanschram@users.noreply.github.com>
Date: Sun, 22 Mar 2026 05:22:28 +0000
Subject: [PATCH 4/5] fix: make stall warnings succinct and accurate for
 long-running tools (#188)

Truncate Last: to 80 chars, recognise command: prefix for Bash tools,
use reassuring "still running" when CPU active, drop PID diagnostics
from Telegram messages, only say "may be stuck" when genuinely stuck.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                  |  1 +
 src/untether/runner_bridge.py | 40 ++++++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23c75d8..94d4f71 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@
 - clarify /config default labels and remove redundant "Works with" lines [#119](https://github.com/littlebearapps/untether/issues/119)
 - Codex: always pass `--ask-for-approval` in headless mode — default to `never` (auto-approve all) so Codex never blocks on terminal input; `safe` permission mode still uses `untrusted` [#184](https://github.com/littlebearapps/untether/issues/184)
 - OpenCode: surface unsupported JSONL event types as visible Telegram warnings instead of silently dropping them — prevents silent 5-minute hangs when OpenCode emits new event types (e.g. `question`, `permission`) [#183](https://github.com/littlebearapps/untether/issues/183)
+- stall warnings now succinct and accurate for long-running tools — truncate "Last:" to 80 chars, recognise `command:` prefix (Bash tools), reassuring "still running" message when CPU active, drop PID diagnostics from Telegram messages, only say "may be stuck" when genuinely stuck [#188](https://github.com/littlebearapps/untether/issues/188)
 
 ### changes
 
diff --git a/src/untether/runner_bridge.py b/src/untether/runner_bridge.py
index 3ed696a..8c85468 100644
--- a/src/untether/runner_bridge.py
+++ b/src/untether/runner_bridge.py
@@ -727,7 +727,7 @@ async def _monitor() -> None:
 
     async def _stall_monitor(self) -> None:
         """Periodically check for event stalls, log diagnostics, and notify."""
-        from .utils.proc_diag import collect_proc_diag, format_diag, is_cpu_active
+        from .utils.proc_diag import collect_proc_diag, is_cpu_active
 
         while True:
             await anyio.sleep(self._stall_check_interval)
@@ -934,34 +934,50 @@ async def _stall_monitor(self) -> None:
                     parts = [f"⏳ MCP tool running: {mcp_server} ({mins} min)"]
                 else:
                     # Extract tool name from last running action for
-                    # actionable stall messages ("Bash tool may be stuck"
+                    # actionable stall messages ("Bash command still running"
                     # instead of generic "session may be stuck").
                     _tool_name = None
                     if last_action:
-                        for _prefix in ("tool:", "note:"):
+                        for _prefix in ("tool:", "note:", "command:"):
                             if last_action.startswith(_prefix):
                                 _rest = last_action[len(_prefix) :]
-                                _tool_name = _rest.split(" ", 1)[0].split(":", 1)[0]
+                                _raw = _rest.split(" ", 1)[0].split(":", 1)[0]
+                                # Map kind prefix to user-friendly name
+                                _tool_name = "Bash" if _prefix == "command:" else _raw
                                 break
                     if _tool_name and main_sleeping:
-                        parts = [
-                            f"⏳ {_tool_name} tool may be stuck ({mins} min, process waiting)"
-                        ]
+                        if cpu_active is True:
+                            parts = [
+                                f"⏳ {_tool_name} command still running ({mins} min)"
+                            ]
+                        else:
+                            parts = [
+                                f"⏳ {_tool_name} tool may be stuck ({mins} min, no CPU activity)"
+                            ]
+                    elif cpu_active is True:
+                        parts = [f"⏳ Still working ({mins} min, CPU active)"]
                     else:
                         parts = [f"⏳ No progress for {mins} min"]
                 if self._stall_warn_count > 1:
                     parts[0] += f" (warned {self._stall_warn_count}x)"
-                if (
+                # "session may be stuck" — only when genuinely stuck
+                # (no tool identified, cpu not active, not MCP/frozen)
+                _genuinely_stuck = (
                     not mcp_hung
                     and not frozen_escalate
                     and mcp_server is None
                     and not (_tool_name and main_sleeping)
-                ):
+                    and cpu_active is not True
+                )
+                if _genuinely_stuck:
                     parts.append("— session may be stuck.")
                 if last_action:
-                    parts.append(f"Last: {last_action}")
-                if diag:
-                    parts.append(f"PID {diag.pid}: {format_diag(diag)}")
+                    _summary = (
+                        last_action
+                        if len(last_action) <= 80
+                        else last_action[:77] + "..."
+                    )
+                    parts.append(f"Last: {_summary}")
                 parts.append("/cancel to stop.")
                 text = "\n".join(parts)
                 try:

From c422fb88b1f5215963cd765622f5e6d1e9e05004 Mon Sep 17 00:00:00 2001
From: Nathan Schram <5553883+nathanschram@users.noreply.github.com>
Date: Sun, 22 Mar 2026 05:57:17 +0000
Subject: [PATCH 5/5] fix: frozen ring escalation shows reassuring message for
 long Bash commands (#188)

When a known tool is running (main sleeping, CPU active on children),
frozen ring escalation now shows "Bash command still running" instead
of alarming "No progress" message. Found via wpnav staging session
where benchmark scripts ran for 60+ min with false warnings.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                  |  1 +
 src/untether/runner_bridge.py | 25 ++++++++--
 tests/test_exec_bridge.py     | 90 +++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 94d4f71..a777cbd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@
 - Codex: always pass `--ask-for-approval` in headless mode — default to `never` (auto-approve all) so Codex never blocks on terminal input; `safe` permission mode still uses `untrusted` [#184](https://github.com/littlebearapps/untether/issues/184)
 - OpenCode: surface unsupported JSONL event types as visible Telegram warnings instead of silently dropping them — prevents silent 5-minute hangs when OpenCode emits new event types (e.g. `question`, `permission`) [#183](https://github.com/littlebearapps/untether/issues/183)
 - stall warnings now succinct and accurate for long-running tools — truncate "Last:" to 80 chars, recognise `command:` prefix (Bash tools), reassuring "still running" message when CPU active, drop PID diagnostics from Telegram messages, only say "may be stuck" when genuinely stuck [#188](https://github.com/littlebearapps/untether/issues/188)
+  - frozen ring buffer escalation now uses tool-aware "still running" message when a known tool is actively running (main sleeping, CPU active on children), instead of alarming "No progress" message
 
 ### changes
 
diff --git a/src/untether/runner_bridge.py b/src/untether/runner_bridge.py
index 8c85468..fd1bcb7 100644
--- a/src/untether/runner_bridge.py
+++ b/src/untether/runner_bridge.py
@@ -927,9 +927,28 @@ async def _stall_monitor(self) -> None:
                         seconds_since_last_event=round(elapsed, 1),
                         pid=self.pid,
                     )
-                    parts = [
-                        f"⏳ No progress for {mins} min (CPU active, no new events)"
-                    ]
+                    # When a known tool is running and main process is sleeping
+                    # (waiting for child), use reassuring message instead of
+                    # alarming "No progress" — the tool subprocess is working.
+                    _frozen_tool = None
+                    if last_action:
+                        for _pfx in ("tool:", "note:", "command:"):
+                            if last_action.startswith(_pfx):
+                                _rest = last_action[len(_pfx) :]
+                                _frozen_tool = (
+                                    "Bash"
+                                    if _pfx == "command:"
+                                    else _rest.split(" ", 1)[0].split(":", 1)[0]
+                                )
+                                break
+                    if _frozen_tool and main_sleeping and cpu_active is True:
+                        parts = [
+                            f"⏳ {_frozen_tool} command still running ({mins} min)"
+                        ]
+                    else:
+                        parts = [
+                            f"⏳ No progress for {mins} min (CPU active, no new events)"
+                        ]
                 elif mcp_server is not None:
                     parts = [f"⏳ MCP tool running: {mcp_server} ({mins} min)"]
                 else:
diff --git a/tests/test_exec_bridge.py b/tests/test_exec_bridge.py
index 7de364e..9186953 100644
--- a/tests/test_exec_bridge.py
+++ b/tests/test_exec_bridge.py
@@ -2750,6 +2750,96 @@ async def drive() -> None:
     assert "cpu active" in notify_msgs[0]["message"].text.lower()
 
 
+@pytest.mark.anyio
+async def test_stall_frozen_ring_uses_tool_message_when_bash_running() -> None:
+    """When ring buffer is frozen but a Bash command is running (main sleeping,
+    CPU active on children), show reassuring 'still running' instead of 'No progress'.
+
+    Regression test for #188: frozen_escalate branch fired alarming 'No progress'
+    message even when Claude was legitimately waiting for a long Bash command.
+    """
+    from collections import deque
+    from types import SimpleNamespace
+    from unittest.mock import patch
+
+    from untether.model import Action, ActionEvent
+    from untether.utils.proc_diag import ProcessDiag
+
+    transport = FakeTransport()
+    presenter = _KeyboardPresenter()
+    clock = _FakeClock(start=100.0)
+    edits = _make_edits(transport, presenter, clock=clock)
+    edits._stall_check_interval = 0.01
+    edits._STALL_THRESHOLD_SECONDS = 0.05
+    edits._STALL_THRESHOLD_TOOL = 0.05  # override 600s tool threshold
+    edits._stall_repeat_seconds = 0.0
+    edits._STALL_MAX_WARNINGS = 100
+    edits.pid = 12345
+    edits.event_seq = 5
+
+    # Simulate a running Bash command action
+    await edits.on_event(
+        ActionEvent(
+            engine="claude",
+            action=Action(
+                id="a1",
+                kind="command",
+                title='echo "running benchmarks"',
+            ),
+            phase="started",
+        )
+    )
+
+    # Provide a frozen ring buffer
+    fake_stream = SimpleNamespace(
+        recent_events=deque([(1.0, "assistant"), (2.0, "result")], maxlen=10),
+        last_event_type="result",
+        stderr_capture=[],
+    )
+    edits.stream = fake_stream
+
+    clock.set(100.0)
+    call_count = 0
+
+    def sleeping_cpu_diag(pid: int) -> ProcessDiag:
+        nonlocal call_count
+        call_count += 1
+        return ProcessDiag(
+            pid=pid,
+            alive=True,
+            state="S",  # main process sleeping (waiting for child)
+            cpu_utime=1000 + call_count * 300,
+            cpu_stime=200 + call_count * 50,
+        )
+
+    with patch(
+        "untether.utils.proc_diag.collect_proc_diag",
+        side_effect=sleeping_cpu_diag,
+    ):
+        async with anyio.create_task_group() as tg:
+
+            async def drive() -> None:
+                for i in range(8):
+                    clock.set(100.1 + i * 0.1)
+                    await anyio.sleep(0.03)
+                edits.signal_send.close()
+
+            tg.start_soon(edits.run)
+            tg.start_soon(drive)
+
+    # Should have sent notification with reassuring tool-aware message
+    notify_msgs = [
+        c for c in transport.send_calls if "still running" in c["message"].text.lower()
+    ]
+    assert len(notify_msgs) >= 1, (
+        f"Expected 'Bash command still running' message, got: "
+        f"{[c['message'].text for c in transport.send_calls]}"
+    )
+    # Should mention Bash, NOT "No progress"
+    assert "bash" in notify_msgs[0]["message"].text.lower()
+    assert "no progress" not in notify_msgs[0]["message"].text.lower()
+
+
 def test_frozen_ring_count_resets_on_event() -> None:
     """_frozen_ring_count and _prev_recent_events reset when a real event arrives."""
     transport = FakeTransport()