-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Added Audio to FastMCP #1130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Added Audio to FastMCP #1130
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
import base64 | ||
from pathlib import Path | ||
|
||
from mcp.types import ImageContent | ||
from mcp.types import AudioContent, ImageContent | ||
|
||
|
||
class Image: | ||
|
@@ -52,3 +52,52 @@ def to_image_content(self) -> ImageContent: | |
raise ValueError("No image data available") | ||
|
||
return ImageContent(type="image", data=data, mimeType=self._mime_type) | ||
|
||
|
||
class Audio: | ||
"""Helper class for returning audio from tools.""" | ||
|
||
def __init__( | ||
self, | ||
path: str | Path | None = None, | ||
data: bytes | None = None, | ||
format: str | None = None, | ||
): | ||
if path is None and data is None: | ||
raise ValueError("Either path or data must be provided") | ||
if path is not None and data is not None: | ||
raise ValueError("Only one of path or data can be provided") | ||
Comment on lines
+66
to
+69
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can do |
||
|
||
self.path = Path(path) if path else None | ||
self.data = data | ||
self._format = format | ||
self._mime_type = self._get_mime_type() | ||
|
||
def _get_mime_type(self) -> str: | ||
"""Get MIME type from format or guess from file extension.""" | ||
if self._format: | ||
return f"audio/{self._format.lower()}" | ||
|
||
if self.path: | ||
suffix = self.path.suffix.lower() | ||
return { | ||
".wav": "audio/wav", | ||
".mp3": "audio/mpeg", | ||
".ogg": "audio/ogg", | ||
".flac": "audio/flac", | ||
".aac": "audio/aac", | ||
".m4a": "audio/mp4", | ||
}.get(suffix, "application/octet-stream") | ||
return "audio/wav" # default for raw binary data | ||
|
||
def to_audio_content(self) -> AudioContent: | ||
"""Convert to MCP AudioContent.""" | ||
if self.path: | ||
with open(self.path, "rb") as f: | ||
data = base64.b64encode(f.read()).decode() | ||
elif self.data is not None: | ||
data = base64.b64encode(self.data).decode() | ||
else: | ||
raise ValueError("No audio data available") | ||
|
||
return AudioContent(type="audio", data=data, mimeType=self._mime_type) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ | |
from mcp.server.fastmcp import Context, FastMCP | ||
from mcp.server.fastmcp.prompts.base import Message, UserMessage | ||
from mcp.server.fastmcp.resources import FileResource, FunctionResource | ||
from mcp.server.fastmcp.utilities.types import Image | ||
from mcp.server.fastmcp.utilities.types import Audio, Image | ||
from mcp.shared.exceptions import McpError | ||
from mcp.shared.memory import ( | ||
create_connected_server_and_client_session as client_session, | ||
|
@@ -194,6 +194,10 @@ def image_tool_fn(path: str) -> Image: | |
return Image(path) | ||
|
||
|
||
def audio_tool_fn(path: str) -> Audio: | ||
return Audio(path) | ||
|
||
|
||
def mixed_content_tool_fn() -> list[ContentBlock]: | ||
return [ | ||
TextContent(type="text", text="Hello"), | ||
|
@@ -299,6 +303,60 @@ async def test_tool_image_helper(self, tmp_path: Path): | |
# Check structured content - Image return type should NOT have structured output | ||
assert result.structuredContent is None | ||
|
||
@pytest.mark.anyio | ||
async def test_tool_audio_helper(self, tmp_path: Path): | ||
# Create a test audio | ||
audio_path = tmp_path / "test.wav" | ||
audio_path.write_bytes(b"fake wav data") | ||
|
||
mcp = FastMCP() | ||
mcp.add_tool(audio_tool_fn) | ||
async with client_session(mcp._mcp_server) as client: | ||
result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)}) | ||
assert len(result.content) == 1 | ||
content = result.content[0] | ||
assert isinstance(content, AudioContent) | ||
assert content.type == "audio" | ||
assert content.mimeType == "audio/wav" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also add something to exercice the suffix-based mime type detection? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just added a test to test the mimetypes, do let me know if this is what you are looking for! |
||
# Verify base64 encoding | ||
decoded = base64.b64decode(content.data) | ||
assert decoded == b"fake wav data" | ||
# Check structured content - Image return type should NOT have structured output | ||
assert result.structuredContent is None | ||
|
||
@pytest.mark.anyio | ||
async def test_tool_audio_suffix_detection(self, tmp_path: Path): | ||
# Test different audio file extensions | ||
test_cases = [ | ||
("test.wav", "audio/wav"), | ||
("test.mp3", "audio/mpeg"), | ||
("test.ogg", "audio/ogg"), | ||
("test.flac", "audio/flac"), | ||
("test.aac", "audio/aac"), | ||
("test.m4a", "audio/mp4"), | ||
("test.unknown", "application/octet-stream"), # Unknown extension fallback | ||
] | ||
Comment on lines
+330
to
+338
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
|
||
mcp = FastMCP() | ||
mcp.add_tool(audio_tool_fn) | ||
async with client_session(mcp._mcp_server) as client: | ||
for filename, expected_mime_type in test_cases: | ||
# Create a test audio file with the specific extension | ||
audio_path = tmp_path / filename | ||
audio_path.write_bytes(b"fake audio data") | ||
|
||
result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)}) | ||
assert len(result.content) == 1 | ||
content = result.content[0] | ||
assert isinstance(content, AudioContent) | ||
assert content.type == "audio" | ||
assert content.mimeType == expected_mime_type, ( | ||
f"Expected {expected_mime_type} for {filename}, got {content.mimeType}" | ||
) | ||
# Verify base64 encoding | ||
decoded = base64.b64decode(content.data) | ||
assert decoded == b"fake audio data" | ||
|
||
@pytest.mark.anyio | ||
async def test_tool_mixed_content(self): | ||
mcp = FastMCP() | ||
|
@@ -371,6 +429,47 @@ def mixed_list_fn() -> list: | |
# Check structured content - untyped list with Image objects should NOT have structured output | ||
assert result.structuredContent is None | ||
|
||
@pytest.mark.anyio | ||
async def test_tool_mixed_list_with_audio(self, tmp_path: Path): | ||
"""Test that lists containing Audio objects and other types are handled | ||
correctly""" | ||
# Create a test audio | ||
audio_path = tmp_path / "test.wav" | ||
audio_path.write_bytes(b"test audio data") | ||
|
||
def mixed_list_fn() -> list: | ||
return [ | ||
"text message", | ||
Audio(audio_path), | ||
{"key": "value"}, | ||
TextContent(type="text", text="direct content"), | ||
] | ||
|
||
mcp = FastMCP() | ||
mcp.add_tool(mixed_list_fn) | ||
async with client_session(mcp._mcp_server) as client: | ||
result = await client.call_tool("mixed_list_fn", {}) | ||
assert len(result.content) == 4 | ||
# Check text conversion | ||
content1 = result.content[0] | ||
assert isinstance(content1, TextContent) | ||
assert content1.text == "text message" | ||
# Check audio conversion | ||
content2 = result.content[1] | ||
assert isinstance(content2, AudioContent) | ||
assert content2.mimeType == "audio/wav" | ||
assert base64.b64decode(content2.data) == b"test audio data" | ||
# Check dict conversion | ||
content3 = result.content[2] | ||
assert isinstance(content3, TextContent) | ||
Comment on lines
+432
to
+464
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this test and add Audio on the above please. |
||
assert '"key": "value"' in content3.text | ||
# Check direct TextContent | ||
content4 = result.content[3] | ||
assert isinstance(content4, TextContent) | ||
assert content4.text == "direct content" | ||
# Check structured content - untyped list with Audio objects should NOT have structured output | ||
assert result.structuredContent is None | ||
|
||
@pytest.mark.anyio | ||
async def test_tool_structured_output_basemodel(self): | ||
"""Test tool with structured output returning BaseModel""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's use
@typing.override
.