diff --git a/CHANGELOG.md b/CHANGELOG.md index ab0594d..a777cbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ - clarify /config default labels and remove redundant "Works with" lines [#119](https://github.com/littlebearapps/untether/issues/119) - Codex: always pass `--ask-for-approval` in headless mode — default to `never` (auto-approve all) so Codex never blocks on terminal input; `safe` permission mode still uses `untrusted` [#184](https://github.com/littlebearapps/untether/issues/184) - OpenCode: surface unsupported JSONL event types as visible Telegram warnings instead of silently dropping them — prevents silent 5-minute hangs when OpenCode emits new event types (e.g. `question`, `permission`) [#183](https://github.com/littlebearapps/untether/issues/183) +- stall warnings now succinct and accurate for long-running tools — truncate "Last:" to 80 chars, recognise `command:` prefix (Bash tools), reassuring "still running" message when CPU active, drop PID diagnostics from Telegram messages, only say "may be stuck" when genuinely stuck [#188](https://github.com/littlebearapps/untether/issues/188) + - frozen ring buffer escalation now uses tool-aware "still running" message when a known tool is actively running (main sleeping, CPU active on children), instead of alarming "No progress" message ### changes @@ -60,6 +62,8 @@ - `AutoContinueSettings` with `enabled` (default true) and `max_retries` (default 1) in `[auto_continue]` config section - detection based on protocol invariant: normal sessions always end with `last_event_type=result` - sends "⚠️ Auto-continuing — Claude stopped before processing tool results" notification before resuming +- emoji button labels and edit-in-place for outline approval — ExitPlanMode buttons now show ✅/❌/📋 emoji prefixes; post-outline "Approve Plan"/"Deny" edits the "Asked Claude Code to outline the plan" message in-place instead of creating a second message [#186](https://github.com/littlebearapps/untether/issues/186) +- redesign startup message layout — version in parentheses, split engine info into "default engine" and "installed engines" lines, italic subheadings, renamed "projects" to "directories" (matching `dir:` footer label), added bug report link [#187](https://github.com/littlebearapps/untether/issues/187) ### tests diff --git a/CLAUDE.md b/CLAUDE.md index c70314f..9e13eb9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -153,7 +153,7 @@ Rules in `.claude/rules/` auto-load when editing matching files: ## Tests -1765 unit tests, 80% coverage threshold. Integration testing against `@untether_dev_bot` is **mandatory before every release** — see `docs/reference/integration-testing.md` for the full playbook with per-release-type tier requirements (patch/minor/major). All integration test tiers are fully automated by Claude Code via Telegram MCP tools and Bash. +1766 unit tests, 80% coverage threshold. Integration testing against `@untether_dev_bot` is **mandatory before every release** — see `docs/reference/integration-testing.md` for the full playbook with per-release-type tier requirements (patch/minor/major). All integration test tiers are fully automated by Claude Code via Telegram MCP tools and Bash. Key test files: diff --git a/src/untether/runner_bridge.py b/src/untether/runner_bridge.py index 3ed696a..fd1bcb7 100644 --- a/src/untether/runner_bridge.py +++ b/src/untether/runner_bridge.py @@ -727,7 +727,7 @@ async def _monitor() -> None: async def _stall_monitor(self) -> None: """Periodically check for event stalls, log diagnostics, and notify.""" - from .utils.proc_diag import collect_proc_diag, format_diag, is_cpu_active + from .utils.proc_diag import collect_proc_diag, is_cpu_active while True: await anyio.sleep(self._stall_check_interval) @@ -927,41 +927,76 @@ async def _stall_monitor(self) -> None: seconds_since_last_event=round(elapsed, 1), pid=self.pid, ) - parts = [ - f"⏳ No progress for {mins} min (CPU active, no new events)" - ] + # When a known tool is running and main process is sleeping + # (waiting for child), use reassuring message instead of + # alarming "No progress" — the tool subprocess is working. + _frozen_tool = None + if last_action: + for _pfx in ("tool:", "note:", "command:"): + if last_action.startswith(_pfx): + _rest = last_action[len(_pfx) :] + _frozen_tool = ( + "Bash" + if _pfx == "command:" + else _rest.split(" ", 1)[0].split(":", 1)[0] + ) + break + if _frozen_tool and main_sleeping and cpu_active is True: + parts = [ + f"⏳ {_frozen_tool} command still running ({mins} min)" + ] + else: + parts = [ + f"⏳ No progress for {mins} min (CPU active, no new events)" + ] elif mcp_server is not None: parts = [f"⏳ MCP tool running: {mcp_server} ({mins} min)"] else: # Extract tool name from last running action for - # actionable stall messages ("Bash tool may be stuck" + # actionable stall messages ("Bash command still running" # instead of generic "session may be stuck"). _tool_name = None if last_action: - for _prefix in ("tool:", "note:"): + for _prefix in ("tool:", "note:", "command:"): if last_action.startswith(_prefix): _rest = last_action[len(_prefix) :] - _tool_name = _rest.split(" ", 1)[0].split(":", 1)[0] + _raw = _rest.split(" ", 1)[0].split(":", 1)[0] + # Map kind prefix to user-friendly name + _tool_name = "Bash" if _prefix == "command:" else _raw break if _tool_name and main_sleeping: - parts = [ - f"⏳ {_tool_name} tool may be stuck ({mins} min, process waiting)" - ] + if cpu_active is True: + parts = [ + f"⏳ {_tool_name} command still running ({mins} min)" + ] + else: + parts = [ + f"⏳ {_tool_name} tool may be stuck ({mins} min, no CPU activity)" + ] + elif cpu_active is True: + parts = [f"⏳ Still working ({mins} min, CPU active)"] else: parts = [f"⏳ No progress for {mins} min"] if self._stall_warn_count > 1: parts[0] += f" (warned {self._stall_warn_count}x)" - if ( + # "session may be stuck" — only when genuinely stuck + # (no tool identified, cpu not active, not MCP/frozen) + _genuinely_stuck = ( not mcp_hung and not frozen_escalate and mcp_server is None and not (_tool_name and main_sleeping) - ): + and cpu_active is not True + ) + if _genuinely_stuck: parts.append("— session may be stuck.") if last_action: - parts.append(f"Last: {last_action}") - if diag: - parts.append(f"PID {diag.pid}: {format_diag(diag)}") + _summary = ( + last_action + if len(last_action) <= 80 + else last_action[:77] + "..." + ) + parts.append(f"Last: {_summary}") parts.append("/cancel to stop.") text = "\n".join(parts) try: diff --git a/src/untether/runners/claude.py b/src/untether/runners/claude.py index 45b91ec..1f14a1d 100644 --- a/src/untether/runners/claude.py +++ b/src/untether/runners/claude.py @@ -729,11 +729,11 @@ def translate_claude_event( "buttons": [ [ { - "text": "Approve Plan", + "text": "✅ Approve Plan", "callback_data": f"claude_control:approve:{button_request_id}", }, { - "text": "Deny", + "text": "❌ Deny", "callback_data": f"claude_control:deny:{button_request_id}", }, ], @@ -839,11 +839,11 @@ def translate_claude_event( button_rows: list[list[dict[str, str]]] = [ [ { - "text": "Approve", + "text": "✅ Approve", "callback_data": f"claude_control:approve:{request_id}", }, { - "text": "Deny", + "text": "❌ Deny", "callback_data": f"claude_control:deny:{request_id}", }, ], @@ -855,7 +855,7 @@ def translate_claude_event( button_rows.append( [ { - "text": "Pause & Outline Plan", + "text": "📋 Pause & Outline Plan", "callback_data": f"claude_control:discuss:{request_id}", }, ] @@ -1937,6 +1937,11 @@ def _cleanup_session_registries(session_id: str) -> None: if session_id in _OUTLINE_PENDING: cleaned.append("outline_pending") _OUTLINE_PENDING.discard(session_id) + # Clean up discuss feedback ref (post-outline edit-instead-of-send tracking) + from ..telegram.commands.claude_control import _DISCUSS_FEEDBACK_REFS + + if _DISCUSS_FEEDBACK_REFS.pop(session_id, None) is not None: + cleaned.append("discuss_feedback_ref") stale = [k for k, v in _REQUEST_TO_SESSION.items() if v == session_id] if stale: cleaned.append(f"requests({len(stale)})") diff --git a/src/untether/telegram/backend.py b/src/untether/telegram/backend.py index 29908b8..8638f1c 100644 --- a/src/untether/telegram/backend.py +++ b/src/untether/telegram/backend.py @@ -111,9 +111,9 @@ def _build_startup_message( ) -> str: project_aliases = sorted(set(runtime.project_aliases()), key=str.lower) - header = f"\N{DOG} **untether v{__version__} is ready**" + header = f"\N{DOG} **untether is ready** (v{__version__})" - # engine — merged default + available on one line + # engines — separate default and installed lines available_engines = list(runtime.available_engine_ids()) missing_engines = list(runtime.missing_engine_ids()) misconfigured_engines = list(runtime.engine_ids_with_status("bad_config")) @@ -128,23 +128,23 @@ def _build_startup_message( engine_list = ", ".join(available_engines) if available_engines else "none" details: list[str] = [] + details.append(f"_default engine:_ `{runtime.default_engine}`") if engine_notes: details.append( - f"engine: `{runtime.default_engine}`" - f" · engines: `{engine_list} ({'; '.join(engine_notes)})`" + f"_installed engines:_ `{engine_list}` ({'; '.join(engine_notes)})" ) else: - details.append(f"engine: `{runtime.default_engine}` · engines: `{engine_list}`") + details.append(f"_installed engines:_ `{engine_list}`") # mode — derived from session_mode + topics mode = _resolve_mode_label(session_mode, topics.enabled) - details.append(f"mode: `{mode}`") + details.append(f"_mode:_ `{mode}`") - # projects — listed by name + # directories — listed by name if project_aliases: - details.append(f"projects: `{', '.join(project_aliases)}`") + details.append(f"_directories:_ `{', '.join(project_aliases)}`") else: - details.append("projects: `none`") + details.append("_directories:_ `none`") # topics — only shown when enabled if topics.enabled: @@ -154,18 +154,20 @@ def _build_startup_message( scope_label = ( f"auto ({resolved_scope})" if topics.scope == "auto" else resolved_scope ) - details.append(f"topics: `enabled (scope={scope_label})`") + details.append(f"_topics:_ `enabled (scope={scope_label})`") # triggers — only shown when enabled if trigger_config and trigger_config.get("enabled"): n_wh = len(trigger_config.get("webhooks", [])) n_cr = len(trigger_config.get("crons", [])) - details.append(f"triggers: `enabled ({n_wh} webhooks, {n_cr} crons)`") + details.append(f"_triggers:_ `enabled ({n_wh} webhooks, {n_cr} crons)`") _DOCS_URL = "https://littlebearapps.com/tools/untether/" + _ISSUES_URL = "https://github.com/littlebearapps/untether/issues" footer = ( f"\n\nSend a message to start, or /config for settings." - f"\n\N{OPEN BOOK} [Click here for help guide]({_DOCS_URL})" + f"\n\N{OPEN BOOK} [Click here for help]({_DOCS_URL})" + f" | \N{BUG} [Click here to report a bug]({_ISSUES_URL})" ) return header + "\n\n" + "\n\n".join(details) + footer diff --git a/src/untether/telegram/commands/claude_control.py b/src/untether/telegram/commands/claude_control.py index 5433e3a..64520fd 100644 --- a/src/untether/telegram/commands/claude_control.py +++ b/src/untether/telegram/commands/claude_control.py @@ -4,7 +4,7 @@ from ...commands import CommandBackend, CommandContext, CommandResult from ...logging import get_logger -from ...runner_bridge import delete_outline_messages +from ...runner_bridge import delete_outline_messages, register_ephemeral_message from ...runners.claude import ( _ACTIVE_RUNNERS, _DISCUSS_APPROVED, @@ -15,9 +15,14 @@ send_claude_control_response, set_discuss_cooldown, ) +from ...transport import MessageRef logger = get_logger(__name__) +# Tracks the "📋 Asked Claude Code to outline the plan" message ref per session, +# so the post-outline approve/deny can edit it instead of sending a 2nd message. +_DISCUSS_FEEDBACK_REFS: dict[str, MessageRef] = {} + _DISCUSS_DENY_MESSAGE = ( "STOP. Do NOT call ExitPlanMode yet.\n\n" @@ -136,10 +141,19 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None: request_id=request_id, action=action, ) - return CommandResult( - text="📋 Asked Claude Code to outline the plan", + + # Send feedback directly and store ref so post-outline approve/deny + # can edit this message instead of creating a second one. + ref = await ctx.executor.send( + "📋 Asked Claude Code to outline the plan", notify=True, ) + if ref and session_id: + _DISCUSS_FEEDBACK_REFS[session_id] = ref + register_ephemeral_message( + ctx.message.channel_id, ctx.message.message_id, ref + ) + return None approved = action == "approve" @@ -156,6 +170,7 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None: "claude_control.discuss_plan_session_ended", session_id=session_id, ) + _DISCUSS_FEEDBACK_REFS.pop(session_id, None) return CommandResult( text=( "⚠️ Session has ended — start a new run" @@ -175,11 +190,7 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None: "claude_control.discuss_plan_approved", session_id=session_id, ) - return CommandResult( - text="✅ Plan approved — Claude Code will proceed", - notify=True, - skip_reply=True, - ) + action_text = "✅ Plan approved — Claude Code will proceed" else: _OUTLINE_PENDING.discard(session_id) clear_discuss_cooldown(session_id) @@ -187,11 +198,26 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None: "claude_control.discuss_plan_denied", session_id=session_id, ) - return CommandResult( - text="❌ Plan denied — send a follow-up message with feedback", - notify=True, - skip_reply=True, - ) + action_text = "❌ Plan denied — send a follow-up message with feedback" + + # Edit the discuss feedback message instead of sending a new one + existing_ref = _DISCUSS_FEEDBACK_REFS.pop(session_id, None) + if existing_ref: + try: + await ctx.executor.edit(existing_ref, action_text) + return None + except Exception: # noqa: BLE001 + logger.debug( + "claude_control.discuss_feedback_edit_failed", + session_id=session_id, + exc_info=True, + ) + # Fallback: send as new message if edit failed or no ref stored + return CommandResult( + text=action_text, + notify=True, + skip_reply=True, + ) # Grab session_id before send_claude_control_response deletes it session_id = _REQUEST_TO_SESSION.get(request_id) @@ -233,6 +259,30 @@ async def handle(self, ctx: CommandContext) -> CommandResult | None: had_outline = session_id in _OUTLINE_REGISTRY await delete_outline_messages(session_id) + # Try to edit the discuss feedback message for outline-flow + # approve/deny (when outline was long enough to use real request_id + # instead of da: prefix). + existing_ref = _DISCUSS_FEEDBACK_REFS.pop(session_id, None) + if existing_ref: + action_text = ( + "✅ Plan approved — Claude Code will proceed" + if approved + else "❌ Plan denied — send a follow-up message with feedback" + ) + try: + await ctx.executor.edit(existing_ref, action_text) + logger.info( + "claude_control.sent", + request_id=request_id, + approved=approved, + ) + return None + except Exception: # noqa: BLE001 + logger.debug( + "claude_control.discuss_feedback_edit_failed", + session_id=session_id, + exc_info=True, + ) action_text = "✅ Approved" if approved else "❌ Denied" logger.info( diff --git a/src/untether/telegram/commands/topics.py b/src/untether/telegram/commands/topics.py index 817da09..e09493c 100644 --- a/src/untether/telegram/commands/topics.py +++ b/src/untether/telegram/commands/topics.py @@ -241,7 +241,7 @@ async def _handle_new_command( await reply(text="this command only works inside a topic.") return await store.clear_sessions(*tkey) - await reply(text="cleared stored sessions for this topic.") + await reply(text="\N{BROOM} cleared stored sessions for this topic.") async def _handle_chat_new_command( @@ -256,9 +256,9 @@ async def _handle_chat_new_command( return await store.clear_sessions(session_key[0], session_key[1]) if msg.chat_type == "private": - text = "cleared stored sessions for this chat." + text = "\N{BROOM} cleared stored sessions for this chat." else: - text = "cleared stored sessions for you in this chat." + text = "\N{BROOM} cleared stored sessions for you in this chat." await reply(text=text) diff --git a/tests/test_ask_user_question.py b/tests/test_ask_user_question.py index 4e209cb..3c69bc2 100644 --- a/tests/test_ask_user_question.py +++ b/tests/test_ask_user_question.py @@ -175,8 +175,8 @@ def test_ask_user_question_has_inline_keyboard() -> None: assert "buttons" in kb # Should have approve/deny buttons button_texts = [b["text"] for row in kb["buttons"] for b in row] - assert "Approve" in button_texts - assert "Deny" in button_texts + assert "✅ Approve" in button_texts + assert "❌ Deny" in button_texts # =========================================================================== diff --git a/tests/test_claude_control.py b/tests/test_claude_control.py index 52c6062..722a077 100644 --- a/tests/test_claude_control.py +++ b/tests/test_claude_control.py @@ -85,6 +85,9 @@ def _clear_registries(): _REQUEST_TO_INPUT.clear() _HANDLED_REQUESTS.clear() _DISCUSS_COOLDOWN.clear() + from untether.telegram.commands.claude_control import _DISCUSS_FEEDBACK_REFS + + _DISCUSS_FEEDBACK_REFS.clear() # =========================================================================== @@ -120,13 +123,13 @@ def test_can_use_tool_produces_warning_with_inline_keyboard() -> None: buttons = kb["buttons"] assert len(buttons) == 2 # two rows for ExitPlanMode assert len(buttons[0]) == 2 # Approve + Deny - assert buttons[0][0]["text"] == "Approve" + assert buttons[0][0]["text"] == "✅ Approve" assert "req-1" in buttons[0][0]["callback_data"] - assert buttons[0][1]["text"] == "Deny" + assert buttons[0][1]["text"] == "❌ Deny" assert "req-1" in buttons[0][1]["callback_data"] # Second row: Outline Plan assert len(buttons[1]) == 1 - assert buttons[1][0]["text"] == "Pause & Outline Plan" + assert buttons[1][0]["text"] == "📋 Pause & Outline Plan" assert "discuss" in buttons[1][0]["callback_data"] assert "req-1" in buttons[1][0]["callback_data"] @@ -490,6 +493,9 @@ def test_stream_end_events_cleans_registries() -> None: def test_cleanup_session_registries_clears_all_state() -> None: """_cleanup_session_registries clears cooldown, outline, and approval state.""" + from untether.telegram.commands.claude_control import _DISCUSS_FEEDBACK_REFS + from untether.transport import MessageRef + runner = ClaudeRunner(claude_cmd="claude") session_id = "sess-full-cleanup" @@ -501,6 +507,7 @@ def test_cleanup_session_registries_clears_all_state() -> None: _OUTLINE_PENDING.add(session_id) _REQUEST_TO_SESSION["req-a"] = session_id _REQUEST_TO_SESSION["req-b"] = session_id + _DISCUSS_FEEDBACK_REFS[session_id] = MessageRef(channel_id=1, message_id=1) _cleanup_session_registries(session_id) @@ -511,6 +518,7 @@ def test_cleanup_session_registries_clears_all_state() -> None: assert session_id not in _OUTLINE_PENDING assert "req-a" not in _REQUEST_TO_SESSION assert "req-b" not in _REQUEST_TO_SESSION + assert session_id not in _DISCUSS_FEEDBACK_REFS def test_cleanup_session_registries_idempotent() -> None: @@ -752,6 +760,7 @@ async def test_discuss_action_sends_deny_with_custom_message() -> None: from untether.telegram.commands.claude_control import ( ClaudeControlCommand, _DISCUSS_DENY_MESSAGE, + _DISCUSS_FEEDBACK_REFS, ) runner = ClaudeRunner(claude_cmd="claude") @@ -763,10 +772,14 @@ async def test_discuss_action_sends_deny_with_custom_message() -> None: _REQUEST_TO_SESSION["req-discuss"] = session_id _REQUEST_TO_INPUT["req-discuss"] = {} - # Build a minimal CommandContext + # Build a minimal CommandContext with a fake executor from untether.commands import CommandContext from untether.transport import MessageRef + fake_executor = AsyncMock() + sent_ref = MessageRef(channel_id=123, message_id=99) + fake_executor.send = AsyncMock(return_value=sent_ref) + ctx = CommandContext( command="claude_control", text="claude_control:discuss:req-discuss", @@ -778,14 +791,21 @@ async def test_discuss_action_sends_deny_with_custom_message() -> None: config_path=None, plugin_config=None, # type: ignore[arg-type] runtime=None, # type: ignore[arg-type] - executor=None, # type: ignore[arg-type] + executor=fake_executor, ) cmd = ClaudeControlCommand() result = await cmd.handle(ctx) - assert result is not None - assert "outline" in result.text.lower() + # Handler sends directly and returns None + assert result is None + fake_executor.send.assert_called_once() + sent_text = fake_executor.send.call_args[0][0] + assert "outline" in sent_text.lower() + + # Verify the discuss feedback ref was stored for later editing + assert session_id in _DISCUSS_FEEDBACK_REFS + assert _DISCUSS_FEEDBACK_REFS[session_id] == sent_ref # Verify the stdin payload payload = json.loads(fake_stdin.send.call_args[0][0].decode()) @@ -897,7 +917,7 @@ def test_exit_plan_mode_auto_denied_during_cooldown() -> None: buttons = evt.action.detail["inline_keyboard"]["buttons"] assert len(buttons) == 1 # One row with Approve + Deny assert len(buttons[0]) == 2 - assert "Approve" in buttons[0][0]["text"] + assert "Approve" in buttons[0][0]["text"] # "✅ Approve Plan" def test_exit_plan_mode_blocked_after_cooldown_expires_without_outline() -> None: @@ -976,8 +996,8 @@ def test_exit_plan_mode_after_cooldown_expires_with_outline_shows_synthetic_butt buttons = detail["inline_keyboard"]["buttons"] assert len(buttons) == 1 assert len(buttons[0]) == 2 - assert buttons[0][0]["text"] == "Approve Plan" - assert buttons[0][1]["text"] == "Deny" + assert buttons[0][0]["text"] == "✅ Approve Plan" + assert buttons[0][1]["text"] == "❌ Deny" # Outline-ready uses real request_id (not da: prefix) assert buttons[0][0]["callback_data"] == "claude_control:approve:req-cd-outline" @@ -1046,7 +1066,7 @@ async def test_discuss_handler_sets_cooldown() -> None: config_path=None, plugin_config=None, # type: ignore[arg-type] runtime=None, # type: ignore[arg-type] - executor=None, # type: ignore[arg-type] + executor=AsyncMock(send=AsyncMock(return_value=None)), ) cmd = ClaudeControlCommand() @@ -1613,16 +1633,24 @@ def test_resumed_session_no_stale_outline_guard(self): @pytest.mark.anyio -async def test_discuss_approve_result_skips_reply() -> None: - """Post-outline 'Approve Plan' returns CommandResult with skip_reply=True.""" +async def test_discuss_approve_edits_feedback_message() -> None: + """Post-outline 'Approve Plan' edits the discuss feedback message.""" from untether.commands import CommandContext - from untether.telegram.commands.claude_control import ClaudeControlCommand + from untether.telegram.commands.claude_control import ( + ClaudeControlCommand, + _DISCUSS_FEEDBACK_REFS, + ) from untether.transport import MessageRef runner = ClaudeRunner(claude_cmd="claude") session_id = "sess-skip" _ACTIVE_RUNNERS[session_id] = (runner, 0.0) + # Simulate a stored discuss feedback ref + feedback_ref = MessageRef(channel_id=123, message_id=99) + _DISCUSS_FEEDBACK_REFS[session_id] = feedback_ref + + fake_executor = AsyncMock() ctx = CommandContext( command="claude_control", text=f"claude_control:approve:da:{session_id}", @@ -1634,27 +1662,41 @@ async def test_discuss_approve_result_skips_reply() -> None: config_path=None, plugin_config={}, runtime=None, # type: ignore[arg-type] - executor=None, # type: ignore[arg-type] + executor=fake_executor, ) cmd = ClaudeControlCommand() result = await cmd.handle(ctx) - assert result is not None - assert result.skip_reply is True - assert "approved" in result.text.lower() + + # Handler edits the feedback message and returns None + assert result is None + fake_executor.edit.assert_called_once() + edit_ref, edit_text = fake_executor.edit.call_args[0] + assert edit_ref == feedback_ref + assert "approved" in edit_text.lower() + # Ref should be cleaned up + assert session_id not in _DISCUSS_FEEDBACK_REFS @pytest.mark.anyio -async def test_discuss_deny_result_skips_reply() -> None: - """Post-outline 'Deny' returns CommandResult with skip_reply=True.""" +async def test_discuss_deny_edits_feedback_message() -> None: + """Post-outline 'Deny' edits the discuss feedback message.""" from untether.commands import CommandContext - from untether.telegram.commands.claude_control import ClaudeControlCommand + from untether.telegram.commands.claude_control import ( + ClaudeControlCommand, + _DISCUSS_FEEDBACK_REFS, + ) from untether.transport import MessageRef runner = ClaudeRunner(claude_cmd="claude") session_id = "sess-skip-deny" _ACTIVE_RUNNERS[session_id] = (runner, 0.0) + # Simulate a stored discuss feedback ref + feedback_ref = MessageRef(channel_id=123, message_id=99) + _DISCUSS_FEEDBACK_REFS[session_id] = feedback_ref + + fake_executor = AsyncMock() ctx = CommandContext( command="claude_control", text=f"claude_control:deny:da:{session_id}", @@ -1666,11 +1708,103 @@ async def test_discuss_deny_result_skips_reply() -> None: config_path=None, plugin_config={}, runtime=None, # type: ignore[arg-type] + executor=fake_executor, + ) + + cmd = ClaudeControlCommand() + result = await cmd.handle(ctx) + + # Handler edits the feedback message and returns None + assert result is None + fake_executor.edit.assert_called_once() + edit_ref, edit_text = fake_executor.edit.call_args[0] + assert edit_ref == feedback_ref + assert "denied" in edit_text.lower() + # Ref should be cleaned up + assert session_id not in _DISCUSS_FEEDBACK_REFS + + +@pytest.mark.anyio +async def test_discuss_approve_falls_back_without_stored_ref() -> None: + """Post-outline approve falls back to CommandResult when no stored ref.""" + from untether.commands import CommandContext + from untether.telegram.commands.claude_control import ClaudeControlCommand + from untether.transport import MessageRef + + runner = ClaudeRunner(claude_cmd="claude") + session_id = "sess-no-ref" + _ACTIVE_RUNNERS[session_id] = (runner, 0.0) + # No _DISCUSS_FEEDBACK_REFS entry + + ctx = CommandContext( + command="claude_control", + text=f"claude_control:approve:da:{session_id}", + args_text=f"approve:da:{session_id}", + args=(f"approve:da:{session_id}",), + message=MessageRef(channel_id=123, message_id=1), + reply_to=None, + reply_text=None, + config_path=None, + plugin_config={}, + runtime=None, # type: ignore[arg-type] executor=None, # type: ignore[arg-type] ) cmd = ClaudeControlCommand() result = await cmd.handle(ctx) + # Falls back to CommandResult assert result is not None assert result.skip_reply is True - assert "denied" in result.text.lower() + assert "approved" in result.text.lower() + + +@pytest.mark.anyio +async def test_normal_approve_edits_feedback_when_outline_ref_exists() -> None: + """Normal approve (real request_id, not da:) edits discuss feedback if ref stored.""" + from untether.commands import CommandContext + from untether.telegram.commands.claude_control import ( + ClaudeControlCommand, + _DISCUSS_FEEDBACK_REFS, + ) + from untether.transport import MessageRef + + runner = ClaudeRunner(claude_cmd="claude") + session_id = "sess-normal-outline" + + _ACTIVE_RUNNERS[session_id] = (runner, 0.0) + fake_stdin = AsyncMock() + _SESSION_STDIN[session_id] = fake_stdin + _REQUEST_TO_SESSION["req-outline-real"] = session_id + _REQUEST_TO_INPUT["req-outline-real"] = {} + _REQUEST_TO_TOOL_NAME["req-outline-real"] = "ExitPlanMode" + + # Simulate a stored discuss feedback ref from the earlier "Pause & Outline" click + feedback_ref = MessageRef(channel_id=123, message_id=99) + _DISCUSS_FEEDBACK_REFS[session_id] = feedback_ref + + fake_executor = AsyncMock() + ctx = CommandContext( + command="claude_control", + text="claude_control:approve:req-outline-real", + args_text="approve:req-outline-real", + args=("approve:req-outline-real",), + message=MessageRef(channel_id=123, message_id=1), + reply_to=None, + reply_text=None, + config_path=None, + plugin_config={}, + runtime=None, # type: ignore[arg-type] + executor=fake_executor, + ) + + cmd = ClaudeControlCommand() + result = await cmd.handle(ctx) + + # Handler should edit the feedback message and return None + assert result is None + fake_executor.edit.assert_called_once() + edit_ref, edit_text = fake_executor.edit.call_args[0] + assert edit_ref == feedback_ref + assert "approved" in edit_text.lower() + # Ref should be cleaned up + assert session_id not in _DISCUSS_FEEDBACK_REFS diff --git a/tests/test_cooldown_bypass.py b/tests/test_cooldown_bypass.py index 48bc002..d88fa1a 100644 --- a/tests/test_cooldown_bypass.py +++ b/tests/test_cooldown_bypass.py @@ -145,8 +145,8 @@ def test_outline_ready_buttons_use_real_request_id(): # Only 1 row with 2 buttons: Approve Plan, Deny assert len(buttons) == 1 assert len(buttons[0]) == 2 - assert buttons[0][0]["text"] == "Approve Plan" - assert buttons[0][1]["text"] == "Deny" + assert buttons[0][0]["text"] == "✅ Approve Plan" + assert buttons[0][1]["text"] == "❌ Deny" # Callback data uses REAL request_id (not da: prefix) assert buttons[0][0]["callback_data"] == f"claude_control:approve:{request_id}" assert buttons[0][1]["callback_data"] == f"claude_control:deny:{request_id}" @@ -532,8 +532,8 @@ def test_hold_open_after_cooldown_expires_with_outline(): buttons = detail["inline_keyboard"]["buttons"] assert len(buttons) == 1 assert len(buttons[0]) == 2 - assert buttons[0][0]["text"] == "Approve Plan" - assert buttons[0][1]["text"] == "Deny" + assert buttons[0][0]["text"] == "✅ Approve Plan" + assert buttons[0][1]["text"] == "❌ Deny" # Request should be held open (not auto-denied) assert len(state.auto_deny_queue) == 0 assert request_id in state.pending_control_requests diff --git a/tests/test_exec_bridge.py b/tests/test_exec_bridge.py index 7de364e..9186953 100644 --- a/tests/test_exec_bridge.py +++ b/tests/test_exec_bridge.py @@ -2750,6 +2750,96 @@ async def drive() -> None: assert "cpu active" in notify_msgs[0]["message"].text.lower() +@pytest.mark.anyio +async def test_stall_frozen_ring_uses_tool_message_when_bash_running() -> None: + """When ring buffer is frozen but a Bash command is running (main sleeping, + CPU active on children), show reassuring 'still running' instead of 'No progress'. + + Regression test for #188: frozen_escalate branch fired alarming 'No progress' + message even when Claude was legitimately waiting for a long Bash command. + """ + from collections import deque + from types import SimpleNamespace + from unittest.mock import patch + + from untether.model import Action, ActionEvent + from untether.utils.proc_diag import ProcessDiag + + transport = FakeTransport() + presenter = _KeyboardPresenter() + clock = _FakeClock(start=100.0) + edits = _make_edits(transport, presenter, clock=clock) + edits._stall_check_interval = 0.01 + edits._STALL_THRESHOLD_SECONDS = 0.05 + edits._STALL_THRESHOLD_TOOL = 0.05 # override 600s tool threshold + edits._stall_repeat_seconds = 0.0 + edits._STALL_MAX_WARNINGS = 100 + edits.pid = 12345 + edits.event_seq = 5 + + # Simulate a running Bash command action + await edits.on_event( + ActionEvent( + engine="claude", + action=Action( + id="a1", + kind="command", + title='echo "running benchmarks"', + ), + phase="started", + ) + ) + + # Provide a frozen ring buffer + fake_stream = SimpleNamespace( + recent_events=deque([(1.0, "assistant"), (2.0, "result")], maxlen=10), + last_event_type="result", + stderr_capture=[], + ) + edits.stream = fake_stream + + clock.set(100.0) + call_count = 0 + + def sleeping_cpu_diag(pid: int) -> ProcessDiag: + nonlocal call_count + call_count += 1 + return ProcessDiag( + pid=pid, + alive=True, + state="S", # main process sleeping (waiting for child) + cpu_utime=1000 + call_count * 300, + cpu_stime=200 + call_count * 50, + ) + + with patch( + "untether.utils.proc_diag.collect_proc_diag", + side_effect=sleeping_cpu_diag, + ): + async with anyio.create_task_group() as tg: + + async def drive() -> None: + for i in range(8): + clock.set(100.1 + i * 0.1) + await anyio.sleep(0.03) + edits.signal_send.close() + + tg.start_soon(edits.run) + tg.start_soon(drive) + + # Should have sent notification with reassuring tool-aware message + notify_msgs = [ + c for c in transport.send_calls if "still running" in c["message"].text.lower() + ] + assert len(notify_msgs) >= 1, ( + f"Expected 'Bash command still running' message, got: " + f"{[c['message'].text for c in transport.send_calls]}" + ) + # Should mention Bash, NOT "No progress" + assert "bash" in notify_msgs[0]["message"].text.lower() + assert "no progress" not in notify_msgs[0]["message"].text.lower() + + def test_frozen_ring_count_resets_on_event() -> None: """_frozen_ring_count and _prev_recent_events reset when a real event arrives.""" transport = FakeTransport() diff --git a/tests/test_telegram_backend.py b/tests/test_telegram_backend.py index dbde1f5..6b1b5fe 100644 --- a/tests/test_telegram_backend.py +++ b/tests/test_telegram_backend.py @@ -47,9 +47,9 @@ def test_build_startup_message_includes_missing_engines(tmp_path: Path) -> None: topics=TelegramTopicsSettings(), ) - assert "untether" in message and "is ready" in message + assert "untether is ready" in message assert "not installed: pi" in message - assert "projects: `none`" in message + assert "_directories:_ `none`" in message def test_build_startup_message_surfaces_unavailable_engine_reasons( @@ -87,7 +87,7 @@ def test_build_startup_message_surfaces_unavailable_engine_reasons( topics=TelegramTopicsSettings(), ) - assert "engines:" in message and "codex" in message + assert "_installed engines:_" in message and "codex" in message assert "misconfigured: pi" in message assert "failed to load: claude" in message @@ -135,15 +135,16 @@ def test_startup_message_core_fields() -> None: chat_id=123, topics=TelegramTopicsSettings(), ) - assert "engine: `claude`" in message - assert "engines: `claude`" in message - assert "projects: `none`" in message + assert "_default engine:_ `claude`" in message + assert "_installed engines:_ `claude`" in message + assert "_directories:_ `none`" in message # Disabled topics/triggers should NOT appear - assert "topics:" not in message - assert "triggers:" not in message + assert "_topics:_" not in message + assert "_triggers:_" not in message # Quick-start hint and help link assert "/config" in message assert "littlebearapps.com" in message + assert "report a bug" in message def test_startup_message_shows_topics_when_enabled() -> None: @@ -153,7 +154,7 @@ def test_startup_message_shows_topics_when_enabled() -> None: chat_id=123, topics=TelegramTopicsSettings(enabled=True, scope="main"), ) - assert "topics:" in message + assert "_topics:_" in message def test_startup_message_shows_mode_assistant() -> None: @@ -164,7 +165,7 @@ def test_startup_message_shows_mode_assistant() -> None: topics=TelegramTopicsSettings(), session_mode="chat", ) - assert "mode: `assistant`" in message + assert "_mode:_ `assistant`" in message def test_startup_message_shows_mode_workspace() -> None: @@ -175,7 +176,7 @@ def test_startup_message_shows_mode_workspace() -> None: topics=TelegramTopicsSettings(enabled=True, scope="main"), session_mode="chat", ) - assert "mode: `workspace`" in message + assert "_mode:_ `workspace`" in message def test_startup_message_shows_mode_handoff() -> None: @@ -186,7 +187,7 @@ def test_startup_message_shows_mode_handoff() -> None: topics=TelegramTopicsSettings(), session_mode="stateless", ) - assert "mode: `handoff`" in message + assert "_mode:_ `handoff`" in message def test_startup_message_shows_triggers_when_enabled() -> None: @@ -197,7 +198,7 @@ def test_startup_message_shows_triggers_when_enabled() -> None: topics=TelegramTopicsSettings(), trigger_config={"enabled": True, "webhooks": [{}], "crons": []}, ) - assert "triggers:" in message + assert "_triggers:_" in message assert "1 webhooks" in message @@ -233,7 +234,7 @@ def test_startup_message_project_count(tmp_path: Path) -> None: chat_id=123, topics=TelegramTopicsSettings(), ) - assert "projects: `proj-a, proj-b`" in message + assert "_directories:_ `proj-a, proj-b`" in message def test_telegram_backend_build_and_run_wires_config(