Skip to content

Commit 5373b14

Browse files
authored
Feat: insert fulltext search into pipeline (#567)
* feat: update memos headers * feat: headers add * feat: update search agent * feat: upadte mem story * feat: update mem scehduler * feat: update deepsearch mem code * feat: update deepsearch agent * feat: update test code * fix: remove dup config * feat: dock search pipeline * fix: code test * feat: add test scripts * feat: add test * feat: update need_raw process * fix: add initter * fix: change agent search func name * feat: update logs and defined * feat: update full text mem search * feat: cp plugin to dev * feat: add one recall for fulltext retrieval * fix: set default for fulltext search
1 parent 5759599 commit 5373b14

File tree

2 files changed

+68
-2
lines changed

2 files changed

+68
-2
lines changed

src/memos/graph_dbs/polardb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1553,7 +1553,7 @@ def search_by_fulltext(
15531553
"""
15541554

15551555
params = [tsquery_string, tsquery_string]
1556-
1556+
logger.info(f"[search_by_fulltext] query: {query}, params: {params}")
15571557
conn = self._get_connection()
15581558
try:
15591559
with conn.cursor() as cursor:

src/memos/memories/textual/tree_text_memory/retrieve/recall.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,27 @@ def retrieve(
101101
user_name=user_name,
102102
search_filter=id_filter,
103103
)
104+
if use_fast_graph:
105+
future_fulltext = executor.submit(
106+
self._fulltext_recall,
107+
query_words=parsed_goal.keys or [],
108+
memory_scope=memory_scope,
109+
top_k=top_k,
110+
search_filter=search_filter,
111+
search_priority=search_priority,
112+
user_name=user_name,
113+
)
104114

105115
graph_results = future_graph.result()
106116
vector_results = future_vector.result()
107117
bm25_results = future_bm25.result() if self.use_bm25 else []
118+
fulltext_results = future_fulltext.result() if use_fast_graph else []
108119

109120
# Merge and deduplicate by ID
110-
combined = {item.id: item for item in graph_results + vector_results + bm25_results}
121+
combined = {
122+
item.id: item
123+
for item in graph_results + vector_results + bm25_results + fulltext_results
124+
}
111125

112126
return list(combined.values())
113127

@@ -404,3 +418,55 @@ def _bm25_recall(
404418
)
405419

406420
return [TextualMemoryItem.from_dict(n) for n in bm25_results]
421+
422+
def _fulltext_recall(
423+
self,
424+
query_words: list[str],
425+
memory_scope: str,
426+
top_k: int = 20,
427+
max_num: int = 5,
428+
status: str = "activated",
429+
cube_name: str | None = None,
430+
search_filter: dict | None = None,
431+
search_priority: dict | None = None,
432+
user_name: str | None = None,
433+
):
434+
"""Perform fulltext-based retrieval.
435+
Args:
436+
query_words: list of query words
437+
memory_scope: memory scope
438+
top_k: top k results
439+
max_num: max number of query words
440+
status: status
441+
cube_name: cube name
442+
search_filter: search filter
443+
search_priority: search priority
444+
user_name: user name
445+
Returns:
446+
list of TextualMemoryItem
447+
"""
448+
if not query_words:
449+
return []
450+
logger.info(f"[FULLTEXT] query_words: {query_words}")
451+
all_hits = self.graph_store.search_by_fulltext(
452+
query_words=query_words,
453+
top_k=top_k,
454+
status=status,
455+
scope=memory_scope,
456+
cube_name=cube_name,
457+
search_filter=search_priority,
458+
filter=search_filter,
459+
user_name=user_name,
460+
)
461+
if not all_hits:
462+
return []
463+
464+
# merge and deduplicate
465+
unique_ids = {r["id"] for r in all_hits if r.get("id")}
466+
node_dicts = (
467+
self.graph_store.get_nodes(
468+
list(unique_ids), include_embedding=False, cube_name=cube_name, user_name=user_name
469+
)
470+
or []
471+
)
472+
return [TextualMemoryItem.from_dict(n) for n in node_dicts]

0 commit comments

Comments
 (0)