Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions anton/memory/hippocampus.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@ def encode_rule(
else:
content = "# Rules\n\n## Always\n\n## Never\n\n## When\n"

# Check for duplicate (same text, ignoring metadata)
if text in content:
# Check for duplicate (exact entry match, ignoring metadata)
if text in self._extract_entry_texts(content):
return

# Find the section and append
Expand Down Expand Up @@ -296,9 +296,9 @@ def encode_lesson(
mode="write",
)
else:
# Check for duplicate
# Check for duplicate (exact entry match, ignoring metadata)
existing = self._lessons_path.read_text(encoding="utf-8")
if text in existing:
if text in self._extract_entry_texts(existing):
return
self._encode_with_lock(self._lessons_path, entry, mode="append")

Expand All @@ -315,7 +315,7 @@ def encode_lesson(
)
else:
existing = topic_path.read_text(encoding="utf-8")
if text not in existing:
if text not in self._extract_entry_texts(existing):
self._encode_with_lock(topic_path, entry, mode="append")

def rewrite_identity(self, entries: list[str]) -> None:
Expand Down Expand Up @@ -381,6 +381,27 @@ def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None
import fcntl
fcntl.flock(f.fileno(), fcntl.LOCK_UN)

@staticmethod
def _extract_entry_texts(content: str) -> set[str]:
"""Extract the set of normalized entry texts from a markdown memory file.

Strips the leading ``- ``, trailing metadata comments, and whitespace
so that dedup comparisons are exact-match on the *meaning* line only.
"""
texts: set[str] = set()
for line in content.splitlines():
stripped = line.strip()
if not stripped.startswith("- "):
continue
# Remove leading "- "
entry = stripped[2:]
# Remove trailing <!-- ... --> metadata
entry = re.sub(r"\s*<!--[\s\S]*?-->\s*$", "", entry)
entry = entry.strip()
if entry:
texts.add(entry)
return texts

@staticmethod
def _sanitize_slug(name: str) -> str:
"""Sanitize a topic name into a safe file slug."""
Expand Down
28 changes: 28 additions & 0 deletions tests/test_hippocampus.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,20 @@ def test_includes_metadata(self, hc, mem_dir):
assert "confidence:high" in content
assert "source:user" in content

def test_allows_superstring_of_existing(self, hc, mem_dir):
"""A longer, more specific rule should NOT be blocked by a shorter one."""
hc.encode_rule("Use httpx", kind="always")
hc.encode_rule("Use httpx with timeout=15", kind="always")
content = (mem_dir / "rules.md").read_text()
assert "Use httpx with timeout=15" in content

def test_allows_substring_of_existing(self, hc, mem_dir):
"""A shorter rule should NOT be blocked by a longer one containing it."""
hc.encode_rule("Use httpx with timeout=15", kind="always")
hc.encode_rule("Use httpx", kind="always")
content = (mem_dir / "rules.md").read_text()
assert content.count("Use httpx") == 2 # both present


class TestEncodeLesson:
def test_creates_lessons_file(self, hc, mem_dir):
Expand All @@ -158,6 +172,20 @@ def test_no_topic_no_topic_file(self, hc, mem_dir):
hc.encode_lesson("Simple fact")
assert not (mem_dir / "topics").exists() or not any((mem_dir / "topics").iterdir())

def test_allows_superstring_of_existing_lesson(self, hc, mem_dir):
"""A more detailed lesson should NOT be blocked by a shorter one."""
hc.encode_lesson("CoinGecko limits at 50/min")
hc.encode_lesson("CoinGecko limits at 50/min for free tier accounts")
content = (mem_dir / "lessons.md").read_text()
assert "for free tier accounts" in content

def test_skips_exact_duplicate_with_metadata(self, hc, mem_dir):
"""Exact same text should be blocked even when metadata differs."""
hc.encode_lesson("Fact one", topic="api")
hc.encode_lesson("Fact one", topic="other")
content = (mem_dir / "lessons.md").read_text()
assert content.count("Fact one") == 1


class TestRewriteIdentity:
def test_creates_profile(self, hc, mem_dir):
Expand Down
Loading