Skip to content

Commit 0c0a402

Browse files
authored
feat/complete multi modal (#570)
* fix: multi-model memreader init error * fix: kwargs bug * feat: init examples for each multi-model parser * feat: simple user_parser * feat: add multi-model-parser example * feat: add multi-model-parser example * feat: update user parser: only tackle with ChatCompletionUserMessageParam message * feat: rewrite create source and parse fast for system parser * feat: rewrite create source and parse fast for system parser * feat: rewrite assistant parser * feat: add additional sources to assistant parser * feat: add concat fast-mode memories from multi parsers * refactor: fix name * refactor: fix name * refactor: fix name * refactor: fix name * refactor: fix name * refactor: fix name * feat: add fine process path-A in multi_modal_struct * feat: add fine process path-A in multi_modal_struct * feat: add compare simple&multimodal example * feat: add _process_transfer_multi_modal_data in multimodal * feat: add image type * feat: add tool role; update string/text/tool parser * feat: update file_content_parser and multimodal reader * feat: default mem-reader for api is not set to multimodal reqader * feat: add exmples * feat: temperal fix server router bug
1 parent 5373b14 commit 0c0a402

File tree

3 files changed

+273
-10
lines changed

3 files changed

+273
-10
lines changed

examples/api/server_router_api.py

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,91 @@ def example_03_assistant_with_tool_calls():
181181

182182
# ===========================================================================
183183
# 4. MultiModel messages
184+
def example_03b_tool_message_with_result():
185+
"""
186+
Tool message returning the result of a tool call.
187+
188+
- `role = tool`, `content` contains the tool execution result.
189+
- `tool_call_id` links this message to the original tool call.
190+
- This is the standard format for tool execution results in OpenAI-style conversations.
191+
"""
192+
payload = {
193+
"user_id": USER_ID,
194+
"writable_cube_ids": [MEM_CUBE_ID],
195+
"messages": [
196+
{
197+
"role": "assistant",
198+
"content": None,
199+
"tool_calls": [
200+
{
201+
"id": "tool-call-weather-1",
202+
"type": "function",
203+
"function": {
204+
"name": "get_weather",
205+
"arguments": '{"location": "北京"}',
206+
},
207+
}
208+
],
209+
"chat_time": "2025-11-24T10:12:00Z",
210+
"message_id": "assistant-with-call-1",
211+
},
212+
{
213+
"role": "tool",
214+
"content": "北京今天天气晴朗,温度25°C,湿度60%。",
215+
"tool_call_id": "tool-call-weather-1",
216+
"chat_time": "2025-11-24T10:12:05Z",
217+
"message_id": "tool-result-1",
218+
},
219+
],
220+
"info": {"source_type": "tool_execution"},
221+
}
222+
call_add_api("03b_tool_message_with_result", payload)
223+
224+
225+
def example_03c_tool_description_input_output():
226+
"""
227+
Custom tool message format: tool_description, tool_input, tool_output.
228+
229+
- This demonstrates the custom tool message format (not OpenAI standard).
230+
- `tool_description`: describes the tool/function definition.
231+
- `tool_input`: the input parameters for the tool call.
232+
- `tool_output`: the result/output from the tool execution.
233+
- These are alternative formats for representing tool interactions.
234+
"""
235+
payload = {
236+
"user_id": USER_ID,
237+
"writable_cube_ids": [MEM_CUBE_ID],
238+
"messages": [
239+
{
240+
"type": "tool_description",
241+
"name": "get_weather",
242+
"description": "获取指定地点的当前天气信息",
243+
"parameters": {
244+
"type": "object",
245+
"properties": {"location": {"type": "string", "description": "城市名称"}},
246+
"required": ["location"],
247+
},
248+
},
249+
{
250+
"type": "tool_input",
251+
"call_id": "call_123",
252+
"name": "get_weather",
253+
"argument": {"location": "北京"},
254+
},
255+
{
256+
"type": "tool_output",
257+
"call_id": "call_123",
258+
"name": "get_weather",
259+
"output": {"weather": "晴朗", "temperature": 25, "humidity": 60},
260+
},
261+
],
262+
"info": {"source_type": "custom_tool_format"},
263+
}
264+
call_add_api("03c_tool_description_input_output", payload)
265+
266+
267+
# ===========================================================================
268+
# 4. Multimodal messages
184269
# ===========================================================================
185270

186271

@@ -414,6 +499,56 @@ def example_09b_pure_file_input_by_file_data():
414499
call_add_api("09b_pure_file_input_by_file_data", payload)
415500

416501

502+
def example_09c_pure_file_input_by_oss_url():
503+
"""
504+
Pure file input item using file_data with OSS URL.
505+
506+
- Uses `file_data` with OSS URL (object storage service URL).
507+
- This format is used when files are stored in cloud storage (e.g., Alibaba Cloud OSS).
508+
- The file_data field accepts both base64-encoded content and OSS URLs.
509+
"""
510+
payload = {
511+
"user_id": USER_ID,
512+
"writable_cube_ids": [MEM_CUBE_ID],
513+
"messages": [
514+
{
515+
"type": "file",
516+
"file": {
517+
"file_data": "oss_url", # OSS URL instead of base64
518+
"filename": "document.pdf",
519+
},
520+
}
521+
],
522+
"info": {"source_type": "file_ingestion_oss"},
523+
}
524+
call_add_api("09c_pure_file_input_by_oss_url", payload)
525+
526+
527+
def example_09d_pure_image_input():
528+
"""
529+
Pure image input item without dialog context.
530+
531+
- This demonstrates adding an image as a standalone input item (not part of a conversation).
532+
- Uses the same format as pure text/file inputs, but with image_url type.
533+
- Useful for batch image ingestion or when images don't have associated dialog.
534+
"""
535+
payload = {
536+
"user_id": USER_ID,
537+
"writable_cube_ids": [MEM_CUBE_ID],
538+
"messages": [
539+
{
540+
"type": "image_url",
541+
"image_url": {
542+
"url": "https://example.com/standalone_image.jpg",
543+
"detail": "high",
544+
},
545+
}
546+
],
547+
"info": {"source_type": "image_ingestion"},
548+
}
549+
call_add_api("09d_pure_image_input", payload)
550+
551+
417552
def example_10_mixed_text_file_image():
418553
"""
419554
Mixed multimodal message: text + file + image in a single user message.
@@ -619,6 +754,96 @@ def example_16_feedback_add():
619754
call_add_api("16_feedback_add", payload)
620755

621756

757+
def example_17_family_travel_conversation():
758+
"""
759+
Multi-turn conversation example: family travel planning.
760+
761+
- Demonstrates a complete conversation with multiple user-assistant exchanges.
762+
- Shows how to add a full conversation history in a single request.
763+
- Uses async_mode for asynchronous processing.
764+
- This example shows a Chinese conversation about summer travel planning for families.
765+
"""
766+
payload = {
767+
"user_id": "memos_automated_testing",
768+
"writable_cube_ids": [MEM_CUBE_ID],
769+
"session_id": "0610",
770+
"async_mode": "async",
771+
"messages": [
772+
{
773+
"role": "user",
774+
"content": "我想暑假出去玩,你能帮我推荐下吗?",
775+
},
776+
{
777+
"role": "assistant",
778+
"content": "好的!是自己出行还是和家人朋友一起呢?",
779+
},
780+
{
781+
"role": "user",
782+
"content": "肯定要带孩子啊,我们家出门都是全家一起。",
783+
},
784+
{
785+
"role": "assistant",
786+
"content": "明白了,所以你们是父母带孩子一块儿旅行,对吗?",
787+
},
788+
{
789+
"role": "user",
790+
"content": "对,带上孩子和老人,一般都是全家行动。",
791+
},
792+
{
793+
"role": "assistant",
794+
"content": "收到,那我会帮你推荐适合家庭出游的目的地。",
795+
},
796+
],
797+
"custom_tags": [],
798+
"info": {
799+
"source_type": "chat",
800+
"conversation_id": "0610",
801+
},
802+
}
803+
call_add_api("17_family_travel_conversation", payload)
804+
805+
806+
def example_18_add_with_chat_history():
807+
"""
808+
Add memory with chat_history field.
809+
810+
- `chat_history` provides additional conversation context separate from `messages`.
811+
- This is useful when you want to add specific messages while providing broader context.
812+
- The chat_history helps the system understand the conversation flow better.
813+
"""
814+
payload = {
815+
"user_id": USER_ID,
816+
"writable_cube_ids": [MEM_CUBE_ID],
817+
"session_id": "session_with_history",
818+
"messages": [
819+
{
820+
"role": "user",
821+
"content": "我想了解一下这个产品的价格。",
822+
},
823+
{
824+
"role": "assistant",
825+
"content": "好的,我来为您查询价格信息。",
826+
},
827+
],
828+
"chat_history": [
829+
{
830+
"role": "system",
831+
"content": "You are a helpful product assistant.",
832+
},
833+
{
834+
"role": "user",
835+
"content": "你好,我想咨询产品信息。",
836+
},
837+
{
838+
"role": "assistant",
839+
"content": "您好!我很乐意为您提供产品信息。",
840+
},
841+
],
842+
"info": {"source_type": "chat_with_history"},
843+
}
844+
call_add_api("18_add_with_chat_history", payload)
845+
846+
622847
# ===========================================================================
623848
# Entry point
624849
# ===========================================================================
@@ -628,17 +853,23 @@ def example_16_feedback_add():
628853
example_01_string_message_minimal()
629854
example_02_standard_chat_triplet()
630855
example_03_assistant_with_tool_calls()
856+
example_03b_tool_message_with_result()
857+
example_03c_tool_description_input_output()
631858
example_04_extreme_multimodal_single_message()
632859
example_05_multimodal_text_and_image()
633860
example_06_multimodal_text_and_file()
634861
example_07_audio_only_message()
635862
example_08_pure_text_input_items()
636863
example_09_pure_file_input_by_file_id()
637864
example_09b_pure_file_input_by_file_data()
865+
example_09c_pure_file_input_by_oss_url()
866+
example_09d_pure_image_input()
638867
example_10_mixed_text_file_image()
639868
example_11_deprecated_memory_content_and_doc_path()
640869
example_12_async_default_pipeline()
641870
example_13_sync_fast_pipeline()
642871
example_14_sync_fine_pipeline()
643872
example_15_async_with_task_id()
644873
example_16_feedback_add()
874+
example_17_family_travel_conversation()
875+
example_18_add_with_chat_history()

examples/mem_reader/multimodal_struct_reader.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,38 @@ def get_info(self) -> dict[str, Any]:
164164
]
165165
],
166166
),
167+
TestCase(
168+
name="chat_with_list_content",
169+
description="",
170+
scene_data=[
171+
[
172+
{
173+
"role": "user",
174+
"content": [
175+
{
176+
"type": "text",
177+
"text": "我是测试base64",
178+
},
179+
{
180+
"type": "file",
181+
"file": {
182+
"file_data": "Hello World",
183+
"filename": "2102b64c-25a2-481c-a940-4325496baf39.txt",
184+
"file_id": "90ee1bcf-5295-4b75-91a4-23fe1f7ab30a",
185+
},
186+
},
187+
{
188+
"type": "image_url",
189+
"image_url": {
190+
"url": "https://play-groud-test-1.oss-cn-shanghai.aliyuncs.com/algorithmImages/2025/12/01/ce545319ba6d4d21a0aebcb75337acc3.jpeg"
191+
},
192+
},
193+
],
194+
"message_id": "1995458892790317057",
195+
}
196+
]
197+
],
198+
),
167199
]
168200

169201
# Tool-related test cases

src/memos/api/product_models.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
# Import message types from core types module
88
from memos.log import get_logger
9-
from memos.types import MessageList, MessagesType, PermissionDict, SearchMode
9+
from memos.types import PermissionDict, SearchMode
1010

1111

1212
logger = get_logger(__name__)
@@ -56,7 +56,7 @@ class Message(BaseModel):
5656

5757
class MemoryCreate(BaseRequest):
5858
user_id: str = Field(..., description="User ID")
59-
messages: list[Message] | None = Field(None, description="List of messages to store.")
59+
messages: list | None = Field(None, description="List of messages to store.")
6060
memory_content: str | None = Field(None, description="Content to store as memory")
6161
doc_path: str | None = Field(None, description="Path to document to store")
6262
mem_cube_id: str | None = Field(None, description="ID of the memory cube")
@@ -83,7 +83,7 @@ class ChatRequest(BaseRequest):
8383
writable_cube_ids: list[str] | None = Field(
8484
None, description="List of cube IDs user can write for multi-cube chat"
8585
)
86-
history: MessageList | None = Field(None, description="Chat history")
86+
history: list | None = Field(None, description="Chat history")
8787
mode: SearchMode = Field(SearchMode.FAST, description="search mode: fast, fine, or mixture")
8888
system_prompt: str | None = Field(None, description="Base system prompt to use for chat")
8989
top_k: int = Field(10, description="Number of results to return")
@@ -165,7 +165,7 @@ class ChatCompleteRequest(BaseRequest):
165165
user_id: str = Field(..., description="User ID")
166166
query: str = Field(..., description="Chat query message")
167167
mem_cube_id: str | None = Field(None, description="Cube ID to use for chat")
168-
history: MessageList | None = Field(None, description="Chat history")
168+
history: list | None = Field(None, description="Chat history")
169169
internet_search: bool = Field(False, description="Whether to use internet search")
170170
system_prompt: str | None = Field(None, description="Base prompt to use for chat")
171171
top_k: int = Field(10, description="Number of results to return")
@@ -251,7 +251,7 @@ class MemoryCreateRequest(BaseRequest):
251251
"""Request model for creating memories."""
252252

253253
user_id: str = Field(..., description="User ID")
254-
messages: MessagesType | None = Field(None, description="List of messages to store.")
254+
messages: str | list | None = Field(None, description="List of messages to store.")
255255
memory_content: str | None = Field(None, description="Memory content to store")
256256
doc_path: str | None = Field(None, description="Path to document to store")
257257
mem_cube_id: str | None = Field(None, description="Cube ID")
@@ -360,7 +360,7 @@ class APISearchRequest(BaseRequest):
360360
)
361361

362362
# ==== Context ====
363-
chat_history: MessageList | None = Field(
363+
chat_history: list | None = Field(
364364
None,
365365
description=(
366366
"Historical chat messages used internally by algorithms. "
@@ -490,7 +490,7 @@ class APIADDRequest(BaseRequest):
490490
)
491491

492492
# ==== Input content ====
493-
messages: MessagesType | None = Field(
493+
messages: str | list | None = Field(
494494
None,
495495
description=(
496496
"List of messages to store. Supports: "
@@ -506,7 +506,7 @@ class APIADDRequest(BaseRequest):
506506
)
507507

508508
# ==== Chat history ====
509-
chat_history: MessageList | None = Field(
509+
chat_history: list | None = Field(
510510
None,
511511
description=(
512512
"Historical chat messages used internally by algorithms. "
@@ -639,7 +639,7 @@ class APIChatCompleteRequest(BaseRequest):
639639
writable_cube_ids: list[str] | None = Field(
640640
None, description="List of cube IDs user can write for multi-cube chat"
641641
)
642-
history: MessageList | None = Field(None, description="Chat history")
642+
history: list | None = Field(None, description="Chat history")
643643
mode: SearchMode = Field(SearchMode.FAST, description="search mode: fast, fine, or mixture")
644644
system_prompt: str | None = Field(None, description="Base system prompt to use for chat")
645645
top_k: int = Field(10, description="Number of results to return")
@@ -707,7 +707,7 @@ class SuggestionRequest(BaseRequest):
707707
user_id: str = Field(..., description="User ID")
708708
mem_cube_id: str = Field(..., description="Cube ID")
709709
language: Literal["zh", "en"] = Field("zh", description="Language for suggestions")
710-
message: MessagesType | None = Field(None, description="List of messages to store.")
710+
message: list | None = Field(None, description="List of messages to store.")
711711

712712

713713
# ─── MemOS Client Response Models ──────────────────────────────────────────────

0 commit comments

Comments
 (0)