diff --git a/packages/openai-sdk-python/src/supermemory_openai/__init__.py b/packages/openai-sdk-python/src/supermemory_openai/__init__.py index 8dee08fc0..15adf20c2 100644 --- a/packages/openai-sdk-python/src/supermemory_openai/__init__.py +++ b/packages/openai-sdk-python/src/supermemory_openai/__init__.py @@ -28,6 +28,8 @@ get_last_user_message, get_conversation_content, convert_profile_to_markdown, + deduplicate_memories, + DeduplicatedMemories, ) from .exceptions import ( @@ -64,6 +66,8 @@ "get_last_user_message", "get_conversation_content", "convert_profile_to_markdown", + "deduplicate_memories", + "DeduplicatedMemories", # Exceptions "SupermemoryError", "SupermemoryConfigurationError", diff --git a/packages/openai-sdk-python/src/supermemory_openai/middleware.py b/packages/openai-sdk-python/src/supermemory_openai/middleware.py index a21492aed..e2399bb62 100644 --- a/packages/openai-sdk-python/src/supermemory_openai/middleware.py +++ b/packages/openai-sdk-python/src/supermemory_openai/middleware.py @@ -18,6 +18,7 @@ get_last_user_message, get_conversation_content, convert_profile_to_markdown, + deduplicate_memories, ) from .exceptions import ( SupermemoryConfigurationError, @@ -119,8 +120,11 @@ async def add_system_prompt( container_tag, query_text, api_key ) - memory_count_static = len(memories_response.profile.get("static", [])) - memory_count_dynamic = len(memories_response.profile.get("dynamic", [])) + profile = memories_response.profile or {} + search_results_data = memories_response.search_results or {} + memory_count_static = len(profile.get("static", [])) + memory_count_dynamic = len(profile.get("dynamic", [])) + memory_count_search = len(search_results_data.get("results", [])) logger.info( "Memory search completed", @@ -133,39 +137,39 @@ async def add_system_prompt( }, ) + deduplicated = deduplicate_memories( + static=profile.get("static", []), + dynamic=profile.get("dynamic", []), + search_results=search_results_data.get("results", []), + ) + + logger.debug( + "Memory deduplication completed", + { + "static": {"original": memory_count_static, "deduplicated": len(deduplicated.static)}, + "dynamic": {"original": memory_count_dynamic, "deduplicated": len(deduplicated.dynamic)}, + "search_results": {"original": memory_count_search, "deduplicated": len(deduplicated.search_results)}, + }, + ) + profile_data = "" if mode != "query": profile_data = convert_profile_to_markdown( { "profile": { - "static": [ - item.get("memory", "") if isinstance(item, dict) else str(item) - for item in memories_response.profile.get("static", []) - ], - "dynamic": [ - item.get("memory", "") if isinstance(item, dict) else str(item) - for item in memories_response.profile.get("dynamic", []) - ], - }, - "searchResults": { - "results": [ - {"memory": item.get("memory", "") if isinstance(item, dict) else str(item)} - for item in memories_response.search_results.get("results", []) - ], + "static": deduplicated.static, + "dynamic": deduplicated.dynamic, }, + "searchResults": {"results": []}, } ) search_results_memories = "" - if mode != "profile": - search_results = memories_response.search_results.get("results", []) - if search_results: - search_results_memories = ( - f"Search results for user's recent message: \n" - + "\n".join( - f"- {result.get('memory', '') if isinstance(result, dict) else str(result)}" for result in search_results - ) - ) + if mode != "profile" and deduplicated.search_results: + search_results_memories = ( + "Search results for user's recent message: \n" + + "\n".join(f"- {memory}" for memory in deduplicated.search_results) + ) memories = f"{profile_data}\n{search_results_memories}".strip() diff --git a/packages/openai-sdk-python/src/supermemory_openai/utils.py b/packages/openai-sdk-python/src/supermemory_openai/utils.py index d9ea68453..6858e09ee 100644 --- a/packages/openai-sdk-python/src/supermemory_openai/utils.py +++ b/packages/openai-sdk-python/src/supermemory_openai/utils.py @@ -187,6 +187,72 @@ def get_conversation_content( return "\n\n".join(conversation_parts) +class DeduplicatedMemories: + """Deduplicated memory strings organized by source.""" + + def __init__(self, static: list[str], dynamic: list[str], search_results: list[str]): + self.static = static + self.dynamic = dynamic + self.search_results = search_results + + +def deduplicate_memories( + static: Optional[list[Any]] = None, + dynamic: Optional[list[Any]] = None, + search_results: Optional[list[Any]] = None, +) -> DeduplicatedMemories: + """ + Deduplicates memory items across sources. Priority: Static > Dynamic > Search Results. + Same memory appearing in multiple sources is kept only in the highest-priority source. + """ + static_items = static or [] + dynamic_items = dynamic or [] + search_items = search_results or [] + + def extract_memory_text(item: Any) -> Optional[str]: + if item is None: + return None + if isinstance(item, dict): + memory = item.get("memory") + if isinstance(memory, str): + trimmed = memory.strip() + return trimmed if trimmed else None + return None + if isinstance(item, str): + trimmed = item.strip() + return trimmed if trimmed else None + return None + + static_memories: list[str] = [] + seen_memories: set[str] = set() + + for item in static_items: + memory = extract_memory_text(item) + if memory is not None: + static_memories.append(memory) + seen_memories.add(memory) + + dynamic_memories: list[str] = [] + for item in dynamic_items: + memory = extract_memory_text(item) + if memory is not None and memory not in seen_memories: + dynamic_memories.append(memory) + seen_memories.add(memory) + + search_memories: list[str] = [] + for item in search_items: + memory = extract_memory_text(item) + if memory is not None and memory not in seen_memories: + search_memories.append(memory) + seen_memories.add(memory) + + return DeduplicatedMemories( + static=static_memories, + dynamic=dynamic_memories, + search_results=search_memories, + ) + + def convert_profile_to_markdown(data: dict[str, Any]) -> str: """ Convert profile data to markdown based on profile.static and profile.dynamic properties.