Skip to content

Commit 7d16794

Browse files
authored
feat: multimodal reader (#560)
* fix: multi-model memreader init error * fix: kwargs bug * feat: init examples for each multi-model parser * feat: simple user_parser * feat: add multi-model-parser example * feat: add multi-model-parser example * feat: update user parser: only tackle with ChatCompletionUserMessageParam message * feat: rewrite create source and parse fast for system parser * feat: rewrite create source and parse fast for system parser * feat: rewrite assistant parser * feat: add additional sources to assistant parser * feat: add concat fast-mode memories from multi parsers * refactor: fix name * refactor: fix name * refactor: fix name * refactor: fix name * refactor: fix name * refactor: fix name
1 parent 1e01164 commit 7d16794

30 files changed

+2164
-426
lines changed

examples/mem_reader/multimodel_struct_reader.py renamed to examples/mem_reader/multimodal_struct_reader.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
from dotenv import load_dotenv
99

10-
from memos.configs.mem_reader import MultiModelStructMemReaderConfig
11-
from memos.mem_reader.multi_model_struct import MultiModelStructMemReader
10+
from memos.configs.mem_reader import MultiModalStructMemReaderConfig
11+
from memos.mem_reader.multi_modal_struct import MultiModalStructMemReader
1212
from memos.memories.textual.item import (
1313
SourceMessage,
1414
TextualMemoryItem,
@@ -111,11 +111,11 @@ def get_reader_config() -> dict[str, Any]:
111111
"""
112112
Get reader configuration from environment variables.
113113
114-
Returns a dictionary that can be used to create MultiModelStructMemReaderConfig.
114+
Returns a dictionary that can be used to create MultiModalStructMemReaderConfig.
115115
Similar to APIConfig.get_reader_config() in server_router_api.py.
116116
117117
Returns:
118-
Configuration dictionary for MultiModelStructMemReaderConfig
118+
Configuration dictionary for MultiModalStructMemReaderConfig
119119
"""
120120
openai_api_key = os.getenv("OPENAI_API_KEY")
121121
openai_base_url = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
@@ -228,13 +228,13 @@ def main():
228228
if openai_api_key:
229229
# Use environment variables (similar to server_router_api.py)
230230
config_dict = get_reader_config()
231-
reader_config = MultiModelStructMemReaderConfig.model_validate(config_dict)
231+
reader_config = MultiModalStructMemReaderConfig.model_validate(config_dict)
232232
else:
233233
# Fall back to JSON file
234-
reader_config = MultiModelStructMemReaderConfig.from_json_file(
234+
reader_config = MultiModalStructMemReaderConfig.from_json_file(
235235
"examples/data/config/simple_struct_reader_config.json"
236236
)
237-
reader = MultiModelStructMemReader(reader_config)
237+
reader = MultiModalStructMemReader(reader_config)
238238

239239
# 2. Define scene data
240240
scene_data = [
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Parser examples for different message types."""
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"""Shared configuration utilities for parser examples.
2+
3+
This module provides configuration functions that match the configuration
4+
logic in examples/mem_reader/multimodal_struct_reader.py.
5+
"""
6+
7+
import os
8+
9+
from typing import Any
10+
11+
from memos.configs.embedder import EmbedderConfigFactory
12+
from memos.configs.llm import LLMConfigFactory
13+
from memos.embedders.factory import EmbedderFactory
14+
from memos.llms.factory import LLMFactory
15+
16+
17+
def get_reader_config() -> dict[str, Any]:
18+
"""
19+
Get reader configuration from environment variables.
20+
21+
Returns a dictionary that can be used to create MultiModalStructMemReaderConfig.
22+
Matches the configuration logic in examples/mem_reader/multimodal_struct_reader.py.
23+
24+
Returns:
25+
Configuration dictionary with llm, embedder, and chunker configs
26+
"""
27+
openai_api_key = os.getenv("OPENAI_API_KEY")
28+
openai_base_url = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
29+
ollama_api_base = os.getenv("OLLAMA_API_BASE", "http://localhost:11434")
30+
31+
# Get LLM backend and config
32+
llm_backend = os.getenv("MEM_READER_LLM_BACKEND", "openai")
33+
if llm_backend == "ollama":
34+
llm_config = {
35+
"backend": "ollama",
36+
"config": {
37+
"model_name_or_path": os.getenv("MEM_READER_LLM_MODEL", "qwen3:0.6b"),
38+
"api_base": ollama_api_base,
39+
"temperature": float(os.getenv("MEM_READER_LLM_TEMPERATURE", "0.0")),
40+
"remove_think_prefix": os.getenv(
41+
"MEM_READER_LLM_REMOVE_THINK_PREFIX", "true"
42+
).lower()
43+
== "true",
44+
"max_tokens": int(os.getenv("MEM_READER_LLM_MAX_TOKENS", "8192")),
45+
},
46+
}
47+
else: # openai
48+
llm_config = {
49+
"backend": "openai",
50+
"config": {
51+
"model_name_or_path": os.getenv("MEM_READER_LLM_MODEL", "gpt-4o-mini"),
52+
"api_key": openai_api_key or os.getenv("MEMRADER_API_KEY", "EMPTY"),
53+
"api_base": openai_base_url,
54+
"temperature": float(os.getenv("MEM_READER_LLM_TEMPERATURE", "0.5")),
55+
"remove_think_prefix": os.getenv(
56+
"MEM_READER_LLM_REMOVE_THINK_PREFIX", "true"
57+
).lower()
58+
== "true",
59+
"max_tokens": int(os.getenv("MEM_READER_LLM_MAX_TOKENS", "8192")),
60+
},
61+
}
62+
63+
# Get embedder backend and config
64+
embedder_backend = os.getenv(
65+
"MEM_READER_EMBEDDER_BACKEND", os.getenv("MOS_EMBEDDER_BACKEND", "ollama")
66+
)
67+
if embedder_backend == "universal_api":
68+
embedder_config = {
69+
"backend": "universal_api",
70+
"config": {
71+
"provider": os.getenv(
72+
"MEM_READER_EMBEDDER_PROVIDER", os.getenv("MOS_EMBEDDER_PROVIDER", "openai")
73+
),
74+
"api_key": os.getenv(
75+
"MEM_READER_EMBEDDER_API_KEY",
76+
os.getenv("MOS_EMBEDDER_API_KEY", openai_api_key or "sk-xxxx"),
77+
),
78+
"model_name_or_path": os.getenv(
79+
"MEM_READER_EMBEDDER_MODEL",
80+
os.getenv("MOS_EMBEDDER_MODEL", "text-embedding-3-large"),
81+
),
82+
"base_url": os.getenv(
83+
"MEM_READER_EMBEDDER_API_BASE",
84+
os.getenv("MOS_EMBEDDER_API_BASE", openai_base_url),
85+
),
86+
},
87+
}
88+
else: # ollama
89+
embedder_config = {
90+
"backend": "ollama",
91+
"config": {
92+
"model_name_or_path": os.getenv(
93+
"MEM_READER_EMBEDDER_MODEL",
94+
os.getenv("MOS_EMBEDDER_MODEL", "nomic-embed-text:latest"),
95+
),
96+
"api_base": ollama_api_base,
97+
},
98+
}
99+
100+
return {
101+
"llm": llm_config,
102+
"embedder": embedder_config,
103+
"chunker": {
104+
"backend": "sentence",
105+
"config": {
106+
"tokenizer_or_token_counter": "gpt2",
107+
"chunk_size": 512,
108+
"chunk_overlap": 128,
109+
"min_sentences_per_chunk": 1,
110+
},
111+
},
112+
}
113+
114+
115+
def init_embedder_and_llm():
116+
"""
117+
Initialize embedder and LLM from environment variables.
118+
119+
Returns:
120+
Tuple of (embedder, llm) instances
121+
"""
122+
config_dict = get_reader_config()
123+
124+
# Initialize embedder
125+
embedder_config = EmbedderConfigFactory.model_validate(config_dict["embedder"])
126+
embedder = EmbedderFactory.from_config(embedder_config)
127+
128+
# Initialize LLM
129+
llm_config = LLMConfigFactory.model_validate(config_dict["llm"])
130+
llm = LLMFactory.from_config(llm_config)
131+
132+
return embedder, llm
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""Example demonstrating AssistantParser usage.
2+
3+
AssistantParser handles assistant messages in chat conversations.
4+
"""
5+
6+
import sys
7+
8+
from pathlib import Path
9+
10+
from dotenv import load_dotenv
11+
12+
from memos.mem_reader.read_multi_modal.assistant_parser import AssistantParser
13+
14+
15+
# Handle imports for both script and module usage
16+
try:
17+
from .config_utils import init_embedder_and_llm
18+
except ImportError:
19+
# When running as script, add parent directory to path
20+
sys.path.insert(0, str(Path(__file__).parent))
21+
from config_utils import init_embedder_and_llm
22+
23+
# Load environment variables
24+
load_dotenv()
25+
26+
27+
def main():
28+
"""Demonstrate AssistantParser usage."""
29+
print("=== AssistantParser Example ===\n")
30+
31+
# 1. Initialize embedder and LLM (using shared config)
32+
embedder, llm = init_embedder_and_llm()
33+
34+
# 3. Create AssistantParser
35+
parser = AssistantParser(embedder=embedder, llm=llm)
36+
37+
# 4. Example assistant messages
38+
assistant_messages = [
39+
{
40+
"role": "assistant",
41+
"content": "I'm sorry to hear that you're feeling down. Would you like to talk about what's been going on?",
42+
"chat_time": "2025-01-15T10:00:30",
43+
"message_id": "msg_001",
44+
},
45+
{
46+
"role": "assistant",
47+
"content": "Based on the document you provided, I can see several key points: 1) The project timeline, 2) Budget considerations, and 3) Resource allocation.",
48+
"chat_time": "2025-01-15T10:05:30",
49+
"message_id": "msg_002",
50+
},
51+
{
52+
"role": "assistant",
53+
"content": "Here's a Python solution for your problem:\n```python\ndef solve_problem():\n return 'solution'\n```",
54+
"chat_time": "2025-01-15T10:10:30",
55+
"message_id": "msg_003",
56+
},
57+
]
58+
59+
print("📝 Processing assistant messages:\n")
60+
for i, message in enumerate(assistant_messages, 1):
61+
print(f"Assistant Message {i}:")
62+
print(f" Content: {message['content'][:60]}...")
63+
64+
# Create source from assistant message
65+
info = {"user_id": "user1", "session_id": "session1"}
66+
source = parser.create_source(message, info)
67+
68+
print(" ✅ Created SourceMessage:")
69+
print(f" - Type: {source.type}")
70+
print(f" - Role: {source.role}")
71+
print(f" - Content: {source.content[:60]}...")
72+
print(f" - Chat Time: {source.chat_time}")
73+
print(f" - Message ID: {source.message_id}")
74+
print()
75+
76+
# Parse in fast mode
77+
memory_items = parser.parse_fast(message, info)
78+
print(f" 📊 Fast mode generated {len(memory_items)} memory item(s)")
79+
if memory_items:
80+
print(f" - Memory: {memory_items[0].memory[:60]}...")
81+
print(f" - Memory Type: {memory_items[0].metadata.memory_type}")
82+
print(f" - Tags: {memory_items[0].metadata.tags}")
83+
print()
84+
85+
# Rebuild assistant message from source
86+
rebuilt = parser.rebuild_from_source(source)
87+
print(f" 🔄 Rebuilt message: role={rebuilt['role']}, content={rebuilt['content'][:40]}...")
88+
print()
89+
90+
print("✅ AssistantParser example completed!")
91+
92+
93+
if __name__ == "__main__":
94+
main()

0 commit comments

Comments
 (0)