From b3942a1ddb8427dbe1f51008b9d90caede26cc6b Mon Sep 17 00:00:00 2001 From: ZoranPandovski Date: Mon, 23 Mar 2026 17:03:40 +0100 Subject: [PATCH 1/3] Exact entry match instead of substring --- anton/memory/hippocampus.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/anton/memory/hippocampus.py b/anton/memory/hippocampus.py index 9cf5381..71f40f8 100644 --- a/anton/memory/hippocampus.py +++ b/anton/memory/hippocampus.py @@ -233,8 +233,8 @@ def encode_rule( else: content = "# Rules\n\n## Always\n\n## Never\n\n## When\n" - # Check for duplicate (same text, ignoring metadata) - if text in content: + # Check for duplicate (exact entry match, ignoring metadata) + if text in self._extract_entry_texts(content): return # Find the section and append @@ -296,9 +296,9 @@ def encode_lesson( mode="write", ) else: - # Check for duplicate + # Check for duplicate (exact entry match, ignoring metadata) existing = self._lessons_path.read_text(encoding="utf-8") - if text in existing: + if text in self._extract_entry_texts(existing): return self._encode_with_lock(self._lessons_path, entry, mode="append") @@ -315,7 +315,7 @@ def encode_lesson( ) else: existing = topic_path.read_text(encoding="utf-8") - if text not in existing: + if text not in self._extract_entry_texts(existing): self._encode_with_lock(topic_path, entry, mode="append") def rewrite_identity(self, entries: list[str]) -> None: @@ -381,6 +381,27 @@ def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None import fcntl fcntl.flock(f.fileno(), fcntl.LOCK_UN) + @staticmethod + def _extract_entry_texts(content: str) -> set[str]: + """Extract the set of normalized entry texts from a markdown memory file. + + Strips the leading ``- ``, trailing metadata comments, and whitespace + so that dedup comparisons are exact-match on the *meaning* line only. + """ + texts: set[str] = set() + for line in content.splitlines(): + stripped = line.strip() + if not stripped.startswith("- "): + continue + # Remove leading "- " + entry = stripped[2:] + # Remove trailing metadata + entry = re.sub(r"\s*\s*$", "", entry) + entry = entry.strip() + if entry: + texts.add(entry) + return texts + @staticmethod def _sanitize_slug(name: str) -> str: """Sanitize a topic name into a safe file slug.""" From 0ccf0707588e49ea5ec68fe6e83b971ef374fea0 Mon Sep 17 00:00:00 2001 From: ZoranPandovski Date: Mon, 23 Mar 2026 17:03:47 +0100 Subject: [PATCH 2/3] Add test --- tests/test_hippocampus.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_hippocampus.py b/tests/test_hippocampus.py index afb5734..10c5cec 100644 --- a/tests/test_hippocampus.py +++ b/tests/test_hippocampus.py @@ -134,6 +134,20 @@ def test_includes_metadata(self, hc, mem_dir): assert "confidence:high" in content assert "source:user" in content + def test_allows_superstring_of_existing(self, hc, mem_dir): + """A longer, more specific rule should NOT be blocked by a shorter one.""" + hc.encode_rule("Use httpx", kind="always") + hc.encode_rule("Use httpx with timeout=15", kind="always") + content = (mem_dir / "rules.md").read_text() + assert "Use httpx with timeout=15" in content + + def test_allows_substring_of_existing(self, hc, mem_dir): + """A shorter rule should NOT be blocked by a longer one containing it.""" + hc.encode_rule("Use httpx with timeout=15", kind="always") + hc.encode_rule("Use httpx", kind="always") + content = (mem_dir / "rules.md").read_text() + assert content.count("Use httpx") == 2 # both present + class TestEncodeLesson: def test_creates_lessons_file(self, hc, mem_dir): @@ -158,6 +172,20 @@ def test_no_topic_no_topic_file(self, hc, mem_dir): hc.encode_lesson("Simple fact") assert not (mem_dir / "topics").exists() or not any((mem_dir / "topics").iterdir()) + def test_allows_superstring_of_existing_lesson(self, hc, mem_dir): + """A more detailed lesson should NOT be blocked by a shorter one.""" + hc.encode_lesson("CoinGecko limits at 50/min") + hc.encode_lesson("CoinGecko limits at 50/min for free tier accounts") + content = (mem_dir / "lessons.md").read_text() + assert "for free tier accounts" in content + + def test_skips_exact_duplicate_with_metadata(self, hc, mem_dir): + """Exact same text should be blocked even when metadata differs.""" + hc.encode_lesson("Fact one", topic="api") + hc.encode_lesson("Fact one", topic="other") + content = (mem_dir / "lessons.md").read_text() + assert content.count("Fact one") == 1 + class TestRewriteIdentity: def test_creates_profile(self, hc, mem_dir): From 35c8fa6927708c54f0778ba7c36c127d88488474 Mon Sep 17 00:00:00 2001 From: ZoranPandovski Date: Mon, 23 Mar 2026 17:07:07 +0100 Subject: [PATCH 3/3] Fix regex --- anton/memory/hippocampus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anton/memory/hippocampus.py b/anton/memory/hippocampus.py index 71f40f8..a5aeaad 100644 --- a/anton/memory/hippocampus.py +++ b/anton/memory/hippocampus.py @@ -396,7 +396,7 @@ def _extract_entry_texts(content: str) -> set[str]: # Remove leading "- " entry = stripped[2:] # Remove trailing metadata - entry = re.sub(r"\s*\s*$", "", entry) + entry = re.sub(r"\s*\s*$", "", entry) entry = entry.strip() if entry: texts.add(entry)