diff --git a/new_session.py b/new_session.py index 3d0acad..dc90388 100644 --- a/new_session.py +++ b/new_session.py @@ -206,6 +206,27 @@ def _tool_summary(name, inp): return f"{name}()" +# Patterns that indicate a user message is pure boilerplate (hook/system noise) +# and should be dropped entirely from SESSION_STATE.md +_BOILERPLATE_PATTERNS = [ + re.compile(r'^Stop hook feedback:\s*\nDO NOT STOP', re.MULTILINE), + re.compile(r'^You are a self-analysis agent\.'), + re.compile(r'SESSION START INSTRUCTIONS:'), + re.compile(r'^Context was reset\. Do not ask what to do\.'), +] + + +def _is_boilerplate_user_msg(text): + """Return True if the user message text is pure hook/system boilerplate.""" + stripped = text.strip() + if not stripped: + return True + for pat in _BOILERPLATE_PATTERNS: + if pat.search(stripped): + return True + return False + + def _parse_and_render_tail(jsonl_lines, max_chars=32000): """Parse raw JSONL lines into readable conversation text. @@ -269,6 +290,15 @@ def _parse_and_render_tail(jsonl_lines, max_chars=32000): if isinstance(content, str): content = [{"type": "text", "text": content}] + # Skip user messages that are pure boilerplate (hooks, system prompts) + if role == 'user': + raw_text = ' '.join( + b.get('text', '') for b in content + if isinstance(b, dict) and b.get('type') == 'text' + ) + if _is_boilerplate_user_msg(raw_text): + continue + # Format timestamp ts_short = '' if ts: diff --git a/scripts/test.py b/scripts/test.py index 62826f2..50690fd 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -154,6 +154,38 @@ def test(name, condition): ctx_boundary = context_reset._parse_and_render_tail(boundary_entries) test("boundary shown", "compacted" in ctx_boundary and "150,000" in ctx_boundary) +# --- _is_boilerplate_user_msg --- +print("\n=== _is_boilerplate_user_msg ===") +test("empty is boilerplate", context_reset._is_boilerplate_user_msg("")) +test("whitespace is boilerplate", context_reset._is_boilerplate_user_msg(" \n ")) +test("stop hook is boilerplate", context_reset._is_boilerplate_user_msg( + "Stop hook feedback:\nDO NOT STOP. DO NOT SUMMARIZE. DO NOT LIST OPTIONS. Follow this order:\n1) Check TODO.md..." +)) +test("self-analysis is boilerplate", context_reset._is_boilerplate_user_msg( + "You are a self-analysis agent. A user interrupted Claude mid-response." +)) +test("session start is boilerplate", context_reset._is_boilerplate_user_msg( + "SESSION START INSTRUCTIONS: Check TODO.md in $CLAUDE_PROJECT_DIR for pending tasks." +)) +test("context reset prompt is boilerplate", context_reset._is_boilerplate_user_msg( + "Context was reset. Do not ask what to do. Pick up where the last session left off." +)) +test("real user message is NOT boilerplate", not context_reset._is_boilerplate_user_msg( + "please fix the bug in auth.py" +)) +test("looks good is NOT boilerplate", not context_reset._is_boilerplate_user_msg("looks good")) + +# Test boilerplate filtering in _parse_and_render_tail +boilerplate_entries = [ + _json.dumps({"type": "user", "message": {"role": "user", "content": [{"type": "text", "text": "Stop hook feedback:\nDO NOT STOP. DO NOT SUMMARIZE. keep going"}]}}), + _json.dumps({"type": "assistant", "message": {"role": "assistant", "content": [{"type": "text", "text": "Continuing work"}]}}), + _json.dumps({"type": "user", "message": {"role": "user", "content": [{"type": "text", "text": "real feedback here"}]}}), +] +ctx_bp = context_reset._parse_and_render_tail(boilerplate_entries) +test("boilerplate user msg filtered out", "DO NOT STOP" not in ctx_bp) +test("real user msg kept", "real feedback here" in ctx_bp) +test("assistant after boilerplate kept", "Continuing work" in ctx_bp) + # --- extract_session_context (integration) --- print("\n=== extract_session_context ===") with tempfile.TemporaryDirectory() as d: