From 1f62b6535216cded10b57f447c0f28930f6d91ca Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Thu, 23 Jan 2025 22:48:28 +0000 Subject: [PATCH 01/21] add callbacks for json validators --- mcp_bridge/config/final.py | 1 + .../inference_engine_mappers/chat/generic.py | 14 ++++++++++++++ .../chat/openrouter/request.py | 6 ++++++ .../chat/openrouter/response.py | 6 ++++++ .../chat/openrouter/stream_response.py | 6 ++++++ .../inference_engine_mappers/chat/requester.py | 15 +++++++++++++++ .../inference_engine_mappers/chat/responder.py | 15 +++++++++++++++ .../chat/stream_responder.py | 16 ++++++++++++++++ mcp_bridge/openai_clients/chatCompletion.py | 13 +++++++++---- .../openai_clients/streamChatCompletion.py | 11 +++++------ 10 files changed, 93 insertions(+), 10 deletions(-) create mode 100644 mcp_bridge/inference_engine_mappers/chat/generic.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/openrouter/request.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/openrouter/response.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/requester.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/responder.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/stream_responder.py diff --git a/mcp_bridge/config/final.py b/mcp_bridge/config/final.py index 7b1b766..1a88915 100644 --- a/mcp_bridge/config/final.py +++ b/mcp_bridge/config/final.py @@ -7,6 +7,7 @@ class InferenceServer(BaseModel): + type: Literal["openai", "openrouter"] = Field("openai", description="Type of inference server") # used to apply data mappers base_url: str = Field( default="http://localhost:11434/v1", description="Base URL of the inference server", diff --git a/mcp_bridge/inference_engine_mappers/chat/generic.py b/mcp_bridge/inference_engine_mappers/chat/generic.py new file mode 100644 index 0000000..ddd7a11 --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/generic.py @@ -0,0 +1,14 @@ +from lmos_openai_types import ( + CreateChatCompletionRequest, + CreateChatCompletionResponse, + CreateChatCompletionStreamResponse +) + +def chat_completion_generic_request(data: CreateChatCompletionRequest) -> dict: + return data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True) + +def chat_completion_generic_response(data: dict) -> CreateChatCompletionResponse: + return CreateChatCompletionResponse.model_validate(data) + +def chat_completion_generic_stream_response(data: dict) -> CreateChatCompletionStreamResponse: + return CreateChatCompletionStreamResponse.model_validate(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py new file mode 100644 index 0000000..d0b2d1f --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py @@ -0,0 +1,6 @@ +from lmos_openai_types import ( + CreateChatCompletionRequest +) + +def chat_completion_openrouter_request(data: CreateChatCompletionRequest) -> dict: + return data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True) diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py new file mode 100644 index 0000000..5c6347d --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py @@ -0,0 +1,6 @@ +from lmos_openai_types import ( + CreateChatCompletionResponse +) + +def chat_completion_openrouter_response(data: dict) -> CreateChatCompletionResponse: + return CreateChatCompletionResponse.model_validate(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py new file mode 100644 index 0000000..8e15b91 --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py @@ -0,0 +1,6 @@ +from lmos_openai_types import ( + CreateChatCompletionStreamResponse +) + +def chat_completion_openrouter_stream_response(data: dict) -> CreateChatCompletionStreamResponse: + return CreateChatCompletionStreamResponse.model_validate(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/requester.py b/mcp_bridge/inference_engine_mappers/chat/requester.py new file mode 100644 index 0000000..213d1e1 --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/requester.py @@ -0,0 +1,15 @@ +from .generic import chat_completion_generic_request +from .openrouter.request import chat_completion_openrouter_request +from lmos_openai_types import CreateChatCompletionRequest +from mcp_bridge.config import config + +def chat_completion_requester(data: CreateChatCompletionRequest) -> dict: + client_type = config.inference_server.type + + match client_type: + # apply incoming data mappers + case "openai": + return chat_completion_generic_request(data) + case "openrouter": + # TODO: implement openrouter requester + return chat_completion_openrouter_request(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/responder.py b/mcp_bridge/inference_engine_mappers/chat/responder.py new file mode 100644 index 0000000..ffa63e0 --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/responder.py @@ -0,0 +1,15 @@ +from .generic import chat_completion_generic_response +from .openrouter.response import chat_completion_openrouter_response +from lmos_openai_types import CreateChatCompletionResponse +from mcp_bridge.config import config + +def chat_completion_responder(data: dict) -> CreateChatCompletionResponse: + client_type = config.inference_server.type + + match client_type: + # apply incoming data mappers + case "openai": + return chat_completion_generic_response(data) + case "openrouter": + # TODO: implement openrouter responser + return chat_completion_openrouter_response(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/stream_responder.py b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py new file mode 100644 index 0000000..987c424 --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py @@ -0,0 +1,16 @@ +from .generic import chat_completion_generic_stream_response +from .openrouter.stream_response import chat_completion_openrouter_stream_response +from lmos_openai_types import CreateChatCompletionStreamResponse +from mcp_bridge.config import config + +def chat_completion_stream_responder(data: dict) -> CreateChatCompletionStreamResponse: + client_type = config.inference_server.type + + match client_type: + # apply incoming data mappers + case "openai": + return chat_completion_generic_stream_response(data) + case "openrouter": + # TODO: implement openrouter responser + return chat_completion_openrouter_stream_response(data) + diff --git a/mcp_bridge/openai_clients/chatCompletion.py b/mcp_bridge/openai_clients/chatCompletion.py index a43d7f6..ce7ea1b 100644 --- a/mcp_bridge/openai_clients/chatCompletion.py +++ b/mcp_bridge/openai_clients/chatCompletion.py @@ -7,7 +7,8 @@ from .utils import call_tool, chat_completion_add_tools from .genericHttpxClient import client from mcp_bridge.mcp_clients.McpClientManager import ClientManager -from mcp_bridge.tool_mappers import mcp2openai +from mcp_bridge.inference_engine_mappers.chat.requester import chat_completion_requester +from mcp_bridge.inference_engine_mappers.chat.responder import chat_completion_responder from loguru import logger import json @@ -28,16 +29,20 @@ async def chat_completions( #content=request.model_dump_json( # exclude_defaults=True, exclude_none=True, exclude_unset=True #), - json=request.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True), + json=chat_completion_requester(request), ) ).text logger.debug(text) try: - response = CreateChatCompletionResponse.model_validate_json(text) + response = chat_completion_responder(json.loads(text)) except Exception as e: logger.error(f"Error parsing response: {text}") logger.error(e) - return + return # type: ignore + + if not response.choices: + logger.error("no choices found in response") + return # type: ignore msg = response.choices[0].message msg = ChatCompletionRequestMessage( diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index 67c7f67..c2c6c27 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -8,6 +8,9 @@ CreateChatCompletionStreamResponse, Function1, ) + +from mcp_bridge.inference_engine_mappers.chat.requester import chat_completion_requester +from mcp_bridge.inference_engine_mappers.chat.stream_responder import chat_completion_stream_responder from .utils import call_tool, chat_completion_add_tools from mcp_bridge.models import SSEData from .genericHttpxClient import client @@ -46,9 +49,7 @@ async def chat_completions(request: CreateChatCompletionRequest): # exclude_defaults=True, exclude_none=True, exclude_unset=True # ) - json_data = json.dumps(request.model_dump( - exclude_defaults=True, exclude_none=True, exclude_unset=True - )) + json_data = json.dumps(chat_completion_requester(request)) # logger.debug(json_data) @@ -100,9 +101,7 @@ async def chat_completions(request: CreateChatCompletionRequest): logger.debug(f"failed to lowercase finish_reason: {e}") try: - parsed_data = CreateChatCompletionStreamResponse.model_validate_json( - data - ) + parsed_data = chat_completion_stream_responder(json.loads(data)) except Exception as e: logger.debug(data) raise e From ec6008e80f8c33bf785b12ac6f154d019a3f6c78 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sat, 25 Jan 2025 14:19:21 +0000 Subject: [PATCH 02/21] dynamic httpx clients --- mcp_bridge/endpoints.py | 8 +++-- mcp_bridge/http_clients/__init__.py | 35 +++++++++++++++++++ .../chat/responder.py | 1 + mcp_bridge/openai_clients/__init__.py | 3 +- mcp_bridge/openai_clients/chatCompletion.py | 4 +-- mcp_bridge/openai_clients/completion.py | 4 +-- .../openai_clients/genericHttpxClient.py | 8 ----- .../openai_clients/streamChatCompletion.py | 9 +++-- mcp_bridge/sampling/sampler.py | 4 +-- 9 files changed, 55 insertions(+), 21 deletions(-) create mode 100644 mcp_bridge/http_clients/__init__.py delete mode 100644 mcp_bridge/openai_clients/genericHttpxClient.py diff --git a/mcp_bridge/endpoints.py b/mcp_bridge/endpoints.py index 23326cf..42bc04e 100644 --- a/mcp_bridge/endpoints.py +++ b/mcp_bridge/endpoints.py @@ -3,11 +3,11 @@ from lmos_openai_types import CreateChatCompletionRequest, CreateCompletionRequest from mcp_bridge.openai_clients import ( - client, completions, chat_completions, streaming_chat_completions, ) +from mcp_bridge.http_clients import get_client from mcp_bridge.openapi_tags import Tag @@ -34,6 +34,8 @@ async def openai_chat_completions(request: CreateChatCompletionRequest): @router.get("/models") async def models(): - """List models""" - response = await client.get("/models") + """List models. + + This is a passthrough to the inference server and returns the same response json.""" + response = await get_client().get("/models") return response.json() diff --git a/mcp_bridge/http_clients/__init__.py b/mcp_bridge/http_clients/__init__.py new file mode 100644 index 0000000..2a36ab1 --- /dev/null +++ b/mcp_bridge/http_clients/__init__.py @@ -0,0 +1,35 @@ +from httpx import AsyncClient +from mcp_bridge.config import config + + +# change this if you want to hard fork the repo +# its used to show ranking on openrouter and other inference providers +BRIDGE_REPO_URL = "https://github.com/lmos-io/mcp-bridge" +BRIDGE_APP_TITLE = "MCP Bridge" + + +def get_client() -> AsyncClient: + client: AsyncClient = AsyncClient( + base_url=config.inference_server.base_url, + headers={"Content-Type": "application/json"}, + timeout=10000, + ) + + # generic openai + if config.inference_server.type == "openai": + client.headers["Authorization"] = f"Bearer {config.inference_server.api_key}" + return client + + # openrouter + if config.inference_server.type == "openrouter": + client.headers["Authorization"] = f"Bearer {config.inference_server.api_key}" + client.headers["HTTP-Referer"] = BRIDGE_REPO_URL + client.headers["X-Title"] = BRIDGE_APP_TITLE + return client + + # gemini models + if config.inference_server.type == "google": + pass + # TODO: implement google openai auth + + raise NotImplementedError("Inference Server Type not supported") \ No newline at end of file diff --git a/mcp_bridge/inference_engine_mappers/chat/responder.py b/mcp_bridge/inference_engine_mappers/chat/responder.py index ffa63e0..c286944 100644 --- a/mcp_bridge/inference_engine_mappers/chat/responder.py +++ b/mcp_bridge/inference_engine_mappers/chat/responder.py @@ -13,3 +13,4 @@ def chat_completion_responder(data: dict) -> CreateChatCompletionResponse: case "openrouter": # TODO: implement openrouter responser return chat_completion_openrouter_response(data) + diff --git a/mcp_bridge/openai_clients/__init__.py b/mcp_bridge/openai_clients/__init__.py index b47def7..c7b4a80 100644 --- a/mcp_bridge/openai_clients/__init__.py +++ b/mcp_bridge/openai_clients/__init__.py @@ -1,6 +1,5 @@ -from .genericHttpxClient import client from .completion import completions from .chatCompletion import chat_completions from .streamChatCompletion import streaming_chat_completions -__all__ = ["client", "completions", "chat_completions", "streaming_chat_completions"] +__all__ = ["completions", "chat_completions", "streaming_chat_completions"] diff --git a/mcp_bridge/openai_clients/chatCompletion.py b/mcp_bridge/openai_clients/chatCompletion.py index ce7ea1b..89a0bc6 100644 --- a/mcp_bridge/openai_clients/chatCompletion.py +++ b/mcp_bridge/openai_clients/chatCompletion.py @@ -5,7 +5,7 @@ ) from .utils import call_tool, chat_completion_add_tools -from .genericHttpxClient import client +from mcp_bridge.http_clients import get_client from mcp_bridge.mcp_clients.McpClientManager import ClientManager from mcp_bridge.inference_engine_mappers.chat.requester import chat_completion_requester from mcp_bridge.inference_engine_mappers.chat.responder import chat_completion_responder @@ -24,7 +24,7 @@ async def chat_completions( # logger.debug(request.model_dump_json()) text = ( - await client.post( + await get_client().post( "/chat/completions", #content=request.model_dump_json( # exclude_defaults=True, exclude_none=True, exclude_unset=True diff --git a/mcp_bridge/openai_clients/completion.py b/mcp_bridge/openai_clients/completion.py index 42d06f1..8377699 100644 --- a/mcp_bridge/openai_clients/completion.py +++ b/mcp_bridge/openai_clients/completion.py @@ -1,11 +1,11 @@ from lmos_openai_types import CreateCompletionRequest -from .genericHttpxClient import client +from mcp_bridge.http_clients import get_client async def completions(request: CreateCompletionRequest) -> dict: """performs a completion using the inference server""" - response = await client.post( + response = await get_client().post( "/completions", json=request.model_dump( exclude_defaults=True, exclude_none=True, exclude_unset=True diff --git a/mcp_bridge/openai_clients/genericHttpxClient.py b/mcp_bridge/openai_clients/genericHttpxClient.py deleted file mode 100644 index a89506f..0000000 --- a/mcp_bridge/openai_clients/genericHttpxClient.py +++ /dev/null @@ -1,8 +0,0 @@ -from httpx import AsyncClient -from mcp_bridge.config import config - -client: AsyncClient = AsyncClient( - base_url=config.inference_server.base_url, - headers={"Authorization": f"Bearer {config.inference_server.api_key}", "Content-Type": "application/json"}, - timeout=10000, -) diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index c2c6c27..34de54b 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -13,7 +13,7 @@ from mcp_bridge.inference_engine_mappers.chat.stream_responder import chat_completion_stream_responder from .utils import call_tool, chat_completion_add_tools from mcp_bridge.models import SSEData -from .genericHttpxClient import client +from mcp_bridge.http_clients import get_client from mcp_bridge.mcp_clients.McpClientManager import ClientManager from mcp_bridge.tool_mappers import mcp2openai from loguru import logger @@ -62,7 +62,7 @@ async def chat_completions(request: CreateChatCompletionRequest): tool_call_id: str = "" async with aconnect_sse( - client, "post", "/chat/completions", content=json_data + get_client(), "post", "/chat/completions", content=json_data ) as event_source: # check if the content type is correct because the aiter_sse method @@ -105,6 +105,11 @@ async def chat_completions(request: CreateChatCompletionRequest): except Exception as e: logger.debug(data) raise e + + # handle empty response (usually caused by "usage" reporting) + if len(parsed_data.choices) == 0: + logger.debug("no choices found in response") + continue # add the delta to the response content content = parsed_data.choices[0].delta.content diff --git a/mcp_bridge/sampling/sampler.py b/mcp_bridge/sampling/sampler.py index 96745bd..c2379ae 100644 --- a/mcp_bridge/sampling/sampler.py +++ b/mcp_bridge/sampling/sampler.py @@ -5,7 +5,7 @@ from mcp.types import CreateMessageRequestParams, CreateMessageResult from mcp_bridge.config import config -from mcp_bridge.openai_clients.genericHttpxClient import client +from mcp_bridge.http_clients import get_client from mcp_bridge.sampling.modelSelector import find_best_model def make_message(x: SamplingMessage): @@ -52,7 +52,7 @@ async def handle_sampling_message( logger.debug(request) - resp = await client.post( + resp = await get_client().post( "/chat/completions", json=request, timeout=config.sampling.timeout, From 6ba2a2ab3fac006fe25b9f5886e412a23cc1f77a Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sat, 25 Jan 2025 15:12:10 +0000 Subject: [PATCH 03/21] fix url --- mcp_bridge/http_clients/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mcp_bridge/http_clients/__init__.py b/mcp_bridge/http_clients/__init__.py index 2a36ab1..75d0958 100644 --- a/mcp_bridge/http_clients/__init__.py +++ b/mcp_bridge/http_clients/__init__.py @@ -4,7 +4,7 @@ # change this if you want to hard fork the repo # its used to show ranking on openrouter and other inference providers -BRIDGE_REPO_URL = "https://github.com/lmos-io/mcp-bridge" +BRIDGE_REPO_URL = "https://github.com/SecretiveShell/MCP-Bridge" BRIDGE_APP_TITLE = "MCP Bridge" @@ -22,7 +22,7 @@ def get_client() -> AsyncClient: # openrouter if config.inference_server.type == "openrouter": - client.headers["Authorization"] = f"Bearer {config.inference_server.api_key}" + client.headers["authorization"] = f"Bearer {config.inference_server.api_key}" client.headers["HTTP-Referer"] = BRIDGE_REPO_URL client.headers["X-Title"] = BRIDGE_APP_TITLE return client From 9cf9a6b44545ae95b27243dc8f241615aef5c763 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sat, 25 Jan 2025 18:03:48 +0000 Subject: [PATCH 04/21] minor fixes --- mcp_bridge/http_clients/__init__.py | 2 +- .../openai_clients/streamChatCompletion.py | 20 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/mcp_bridge/http_clients/__init__.py b/mcp_bridge/http_clients/__init__.py index 75d0958..84b53df 100644 --- a/mcp_bridge/http_clients/__init__.py +++ b/mcp_bridge/http_clients/__init__.py @@ -22,7 +22,7 @@ def get_client() -> AsyncClient: # openrouter if config.inference_server.type == "openrouter": - client.headers["authorization"] = f"Bearer {config.inference_server.api_key}" + client.headers["Authorization"] = fr"Bearer {config.inference_server.api_key}" client.headers["HTTP-Referer"] = BRIDGE_REPO_URL client.headers["X-Title"] = BRIDGE_APP_TITLE return client diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index 34de54b..e0a6a3a 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -65,10 +65,20 @@ async def chat_completions(request: CreateChatCompletionRequest): get_client(), "post", "/chat/completions", content=json_data ) as event_source: + logger.debug(event_source.response.status_code) + # check if the content type is correct because the aiter_sse method # will raise an exception if the content type is not correct if "Content-Type" in event_source.response.headers: content_type = event_source.response.headers["Content-Type"] + if "application/json" in content_type: + logger.error(f"Unexpected Content-Type: {content_type}") + error_data = await event_source.response.aread() + logger.error(f"Request URL: {event_source.response.url}") + logger.error(f"Response Status: {event_source.response.status_code}") + logger.error(f"Response Data: {error_data.decode(event_source.response.encoding or 'utf-8')}") + raise HTTPException(status_code=500, detail="Unexpected Content-Type") + if "text/event-stream" not in content_type: logger.error(f"Unexpected Content-Type: {content_type}") error_data = await event_source.response.aread() @@ -94,12 +104,6 @@ async def chat_completions(request: CreateChatCompletionRequest): logger.debug("inference serverstream done") break - # for some reason openrouter uses uppercase for finish_reason - try: - data['choices'][0]['finish_reason'] = data['choices'][0]['finish_reason'].lower() # type: ignore - except Exception as e: - logger.debug(f"failed to lowercase finish_reason: {e}") - try: parsed_data = chat_completion_stream_responder(json.loads(data)) except Exception as e: @@ -157,7 +161,9 @@ async def chat_completions(request: CreateChatCompletionRequest): # ideally we should check this properly assert last is not None - assert last.choices[0].finish_reason is not None + if last.choices[0].finish_reason is None: + logger.debug("no finish reason found") + continue if last.choices[0].finish_reason.value in ["stop", "length"]: logger.debug("no tool calls found") From 44982a89d5f3d2d770be1bea9ec20c60c1af3667 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sat, 25 Jan 2025 19:12:43 +0000 Subject: [PATCH 05/21] add better error handling --- .../openai_clients/streamChatCompletion.py | 45 ++++++++++++++----- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index e0a6a3a..331c743 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -1,6 +1,7 @@ import json +import time from typing import Optional -from fastapi import HTTPException +from secrets import token_hex from lmos_openai_types import ( ChatCompletionMessageToolCall, ChatCompletionRequestMessage, @@ -14,26 +15,37 @@ from .utils import call_tool, chat_completion_add_tools from mcp_bridge.models import SSEData from mcp_bridge.http_clients import get_client -from mcp_bridge.mcp_clients.McpClientManager import ClientManager -from mcp_bridge.tool_mappers import mcp2openai from loguru import logger from httpx_sse import aconnect_sse -from sse_starlette.sse import EventSourceResponse, ServerSentEvent +from sse_starlette.sse import EventSourceResponse +from sse_starlette.event import ServerSentEvent async def streaming_chat_completions(request: CreateChatCompletionRequest): # raise NotImplementedError("Streaming Chat Completion is not supported") - try: return EventSourceResponse( content=chat_completions(request), media_type="text/event-stream", headers={"Cache-Control": "no-cache"}, ) - except Exception as e: - logger.error(e) +def format_error_as_sse(message: str) -> str: + return SSEData.model_validate({ + "id": f"error-{token_hex(16)}", + "provider": "MCP-Bridge", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": "MCP-Bridge", + "choices": [{ + "index": 0, + "delta": { + "role": "assistant", + "content": message, + }, + }] + }).model_dump_json() async def chat_completions(request: CreateChatCompletionRequest): @@ -74,10 +86,17 @@ async def chat_completions(request: CreateChatCompletionRequest): if "application/json" in content_type: logger.error(f"Unexpected Content-Type: {content_type}") error_data = await event_source.response.aread() - logger.error(f"Request URL: {event_source.response.url}") - logger.error(f"Response Status: {event_source.response.status_code}") - logger.error(f"Response Data: {error_data.decode(event_source.response.encoding or 'utf-8')}") - raise HTTPException(status_code=500, detail="Unexpected Content-Type") + # logger.error(f"Request URL: {event_source.response.url}") + # logger.error(f"Response Status: {event_source.response.status_code}") + # logger.error(f"Response Data: {error_data.decode(event_source.response.encoding or 'utf-8')}") + # raise HTTPException(status_code=500, detail="Unexpected Content-Type") + data = json.loads(error_data.decode(event_source.response.encoding or 'utf-8')) + if message := data.get("error", {}).get("message"): + logger.error(f"Upstream error: {message}") + yield format_error_as_sse(message) + yield ['DONE'] # ServerSentEvent(event="message", data="[DONE]", id=None, retry=None) + return + if "text/event-stream" not in content_type: logger.error(f"Unexpected Content-Type: {content_type}") @@ -86,7 +105,9 @@ async def chat_completions(request: CreateChatCompletionRequest): logger.error(f"Request Data: {json_data}") logger.error(f"Response Status: {event_source.response.status_code}") logger.error(f"Response Data: {error_data.decode(event_source.response.encoding or 'utf-8')}") - raise HTTPException(status_code=500, detail="Unexpected Content-Type") + yield format_error_as_sse("Upsteam error: Unexpected Content-Type") + yield ServerSentEvent(event="message", data="[DONE]", id=None, retry=None) + return # iterate over the SSE stream async for sse in event_source.aiter_sse(): From 0d301ec3a343b4aabf5cac259b65b92a46ea2523 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sat, 25 Jan 2025 21:24:55 +0000 Subject: [PATCH 06/21] format codebase --- mcp_bridge/__init__.py | 2 +- mcp_bridge/config/final.py | 15 +++- mcp_bridge/endpoints.py | 4 +- mcp_bridge/http_clients/__init__.py | 8 +- .../inference_engine_mappers/chat/generic.py | 9 ++- .../chat/openrouter/request.py | 5 +- .../chat/openrouter/response.py | 5 +- .../chat/openrouter/stream_response.py | 15 +++- .../chat/requester.py | 1 + .../chat/responder.py | 2 +- .../chat/stream_responder.py | 2 +- mcp_bridge/main.py | 7 +- mcp_bridge/mcpManagement/prompts.py | 1 - mcp_bridge/mcpManagement/resources.py | 2 +- mcp_bridge/mcp_clients/AbstractClient.py | 13 +++- mcp_bridge/mcp_clients/McpClientManager.py | 4 +- mcp_bridge/mcp_clients/StdioClient.py | 4 +- mcp_bridge/mcp_clients/session.py | 14 ++-- mcp_bridge/openai_clients/chatCompletion.py | 16 ++-- .../openai_clients/streamChatCompletion.py | 73 +++++++++++-------- mcp_bridge/sampling/modelSelector.py | 22 ++++-- mcp_bridge/sampling/sampler.py | 24 +++--- 22 files changed, 152 insertions(+), 96 deletions(-) diff --git a/mcp_bridge/__init__.py b/mcp_bridge/__init__.py index 222c11c..6a9beea 100644 --- a/mcp_bridge/__init__.py +++ b/mcp_bridge/__init__.py @@ -1 +1 @@ -__version__ = '0.4.0' \ No newline at end of file +__version__ = "0.4.0" diff --git a/mcp_bridge/config/final.py b/mcp_bridge/config/final.py index 1a88915..30beabf 100644 --- a/mcp_bridge/config/final.py +++ b/mcp_bridge/config/final.py @@ -7,7 +7,9 @@ class InferenceServer(BaseModel): - type: Literal["openai", "openrouter"] = Field("openai", description="Type of inference server") # used to apply data mappers + type: Literal["openai", "openrouter"] = Field( + "openai", description="Type of inference server" + ) # used to apply data mappers base_url: str = Field( default="http://localhost:11434/v1", description="Base URL of the inference server", @@ -25,14 +27,19 @@ class Logging(BaseModel): class SamplingModel(BaseModel): model: Annotated[str, Field(description="Name of the sampling model")] - intelligence: Annotated[float, Field(description="Intelligence of the sampling model")] = 0.5 + intelligence: Annotated[ + float, Field(description="Intelligence of the sampling model") + ] = 0.5 cost: Annotated[float, Field(description="Cost of the sampling model")] = 0.5 speed: Annotated[float, Field(description="Speed of the sampling model")] = 0.5 class Sampling(BaseModel): timeout: Annotated[int, Field(description="Timeout for sampling requests")] = 10 - models: Annotated[list[SamplingModel], Field(description="List of sampling models")] = [] + models: Annotated[ + list[SamplingModel], Field(description="List of sampling models") + ] = [] + class SSEMCPServer(BaseModel): # TODO: expand this once I find a good definition for this @@ -53,7 +60,7 @@ class Network(BaseModel): class Settings(BaseSettings): inference_server: InferenceServer = Field( default_factory=lambda: InferenceServer.model_construct(), - description="Inference server configuration" + description="Inference server configuration", ) mcp_servers: dict[str, MCPServer] = Field( diff --git a/mcp_bridge/endpoints.py b/mcp_bridge/endpoints.py index 42bc04e..77a6f3a 100644 --- a/mcp_bridge/endpoints.py +++ b/mcp_bridge/endpoints.py @@ -34,8 +34,8 @@ async def openai_chat_completions(request: CreateChatCompletionRequest): @router.get("/models") async def models(): - """List models. - + """List models. + This is a passthrough to the inference server and returns the same response json.""" response = await get_client().get("/models") return response.json() diff --git a/mcp_bridge/http_clients/__init__.py b/mcp_bridge/http_clients/__init__.py index 84b53df..9c4c9f1 100644 --- a/mcp_bridge/http_clients/__init__.py +++ b/mcp_bridge/http_clients/__init__.py @@ -19,17 +19,17 @@ def get_client() -> AsyncClient: if config.inference_server.type == "openai": client.headers["Authorization"] = f"Bearer {config.inference_server.api_key}" return client - + # openrouter if config.inference_server.type == "openrouter": - client.headers["Authorization"] = fr"Bearer {config.inference_server.api_key}" + client.headers["Authorization"] = rf"Bearer {config.inference_server.api_key}" client.headers["HTTP-Referer"] = BRIDGE_REPO_URL client.headers["X-Title"] = BRIDGE_APP_TITLE return client - + # gemini models if config.inference_server.type == "google": pass # TODO: implement google openai auth - raise NotImplementedError("Inference Server Type not supported") \ No newline at end of file + raise NotImplementedError("Inference Server Type not supported") diff --git a/mcp_bridge/inference_engine_mappers/chat/generic.py b/mcp_bridge/inference_engine_mappers/chat/generic.py index ddd7a11..16c19a8 100644 --- a/mcp_bridge/inference_engine_mappers/chat/generic.py +++ b/mcp_bridge/inference_engine_mappers/chat/generic.py @@ -1,14 +1,19 @@ from lmos_openai_types import ( CreateChatCompletionRequest, CreateChatCompletionResponse, - CreateChatCompletionStreamResponse + CreateChatCompletionStreamResponse, ) + def chat_completion_generic_request(data: CreateChatCompletionRequest) -> dict: return data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True) + def chat_completion_generic_response(data: dict) -> CreateChatCompletionResponse: return CreateChatCompletionResponse.model_validate(data) -def chat_completion_generic_stream_response(data: dict) -> CreateChatCompletionStreamResponse: + +def chat_completion_generic_stream_response( + data: dict, +) -> CreateChatCompletionStreamResponse: return CreateChatCompletionStreamResponse.model_validate(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py index d0b2d1f..4905a37 100644 --- a/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py @@ -1,6 +1,5 @@ -from lmos_openai_types import ( - CreateChatCompletionRequest -) +from lmos_openai_types import CreateChatCompletionRequest + def chat_completion_openrouter_request(data: CreateChatCompletionRequest) -> dict: return data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True) diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py index 5c6347d..1252650 100644 --- a/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py @@ -1,6 +1,5 @@ -from lmos_openai_types import ( - CreateChatCompletionResponse -) +from lmos_openai_types import CreateChatCompletionResponse + def chat_completion_openrouter_response(data: dict) -> CreateChatCompletionResponse: return CreateChatCompletionResponse.model_validate(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py index 8e15b91..1c0f628 100644 --- a/mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/stream_response.py @@ -1,6 +1,13 @@ -from lmos_openai_types import ( - CreateChatCompletionStreamResponse -) +from lmos_openai_types import CreateChatCompletionStreamResponse -def chat_completion_openrouter_stream_response(data: dict) -> CreateChatCompletionStreamResponse: + +def chat_completion_openrouter_stream_response( + data: dict, +) -> CreateChatCompletionStreamResponse: # type: ignore + try: + data["choices"][0]["finish_reason"] = data["choices"][0][ + "finish_reason" + ].lower() # type: ignore + except Exception: + pass return CreateChatCompletionStreamResponse.model_validate(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/requester.py b/mcp_bridge/inference_engine_mappers/chat/requester.py index 213d1e1..a231f1f 100644 --- a/mcp_bridge/inference_engine_mappers/chat/requester.py +++ b/mcp_bridge/inference_engine_mappers/chat/requester.py @@ -3,6 +3,7 @@ from lmos_openai_types import CreateChatCompletionRequest from mcp_bridge.config import config + def chat_completion_requester(data: CreateChatCompletionRequest) -> dict: client_type = config.inference_server.type diff --git a/mcp_bridge/inference_engine_mappers/chat/responder.py b/mcp_bridge/inference_engine_mappers/chat/responder.py index c286944..9322d07 100644 --- a/mcp_bridge/inference_engine_mappers/chat/responder.py +++ b/mcp_bridge/inference_engine_mappers/chat/responder.py @@ -3,6 +3,7 @@ from lmos_openai_types import CreateChatCompletionResponse from mcp_bridge.config import config + def chat_completion_responder(data: dict) -> CreateChatCompletionResponse: client_type = config.inference_server.type @@ -13,4 +14,3 @@ def chat_completion_responder(data: dict) -> CreateChatCompletionResponse: case "openrouter": # TODO: implement openrouter responser return chat_completion_openrouter_response(data) - diff --git a/mcp_bridge/inference_engine_mappers/chat/stream_responder.py b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py index 987c424..7d97b57 100644 --- a/mcp_bridge/inference_engine_mappers/chat/stream_responder.py +++ b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py @@ -3,6 +3,7 @@ from lmos_openai_types import CreateChatCompletionStreamResponse from mcp_bridge.config import config + def chat_completion_stream_responder(data: dict) -> CreateChatCompletionStreamResponse: client_type = config.inference_server.type @@ -13,4 +14,3 @@ def chat_completion_stream_responder(data: dict) -> CreateChatCompletionStreamRe case "openrouter": # TODO: implement openrouter responser return chat_completion_openrouter_stream_response(data) - diff --git a/mcp_bridge/main.py b/mcp_bridge/main.py index 8add34f..e767c39 100644 --- a/mcp_bridge/main.py +++ b/mcp_bridge/main.py @@ -7,6 +7,7 @@ from mcp_bridge.openapi_tags import tags_metadata from mcp_bridge import __version__ as version + def create_app() -> FastAPI: """ Create and configure the FastAPI application. @@ -26,12 +27,16 @@ def create_app() -> FastAPI: return app + app = create_app() + def run(): import uvicorn from mcp_bridge.config import config + uvicorn.run(app, host=config.network.host, port=config.network.port) + if __name__ == "__main__": - run() \ No newline at end of file + run() diff --git a/mcp_bridge/mcpManagement/prompts.py b/mcp_bridge/mcpManagement/prompts.py index a9e1171..c20f38b 100644 --- a/mcp_bridge/mcpManagement/prompts.py +++ b/mcp_bridge/mcpManagement/prompts.py @@ -1,4 +1,3 @@ -from typing import Any from fastapi import APIRouter, HTTPException from mcp_bridge.mcp_clients.McpClientManager import ClientManager from mcp.types import ListPromptsResult, GetPromptResult diff --git a/mcp_bridge/mcpManagement/resources.py b/mcp_bridge/mcpManagement/resources.py index 0a2c583..c9c7197 100644 --- a/mcp_bridge/mcpManagement/resources.py +++ b/mcp_bridge/mcpManagement/resources.py @@ -1,4 +1,4 @@ -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter from mcp_bridge.mcp_clients.McpClientManager import ClientManager from mcp.types import ListResourcesResult diff --git a/mcp_bridge/mcp_clients/AbstractClient.py b/mcp_bridge/mcp_clients/AbstractClient.py index 2fc1532..b07ab01 100644 --- a/mcp_bridge/mcp_clients/AbstractClient.py +++ b/mcp_bridge/mcp_clients/AbstractClient.py @@ -41,9 +41,13 @@ async def _session_maintainer(self): try: await self._maintain_session() except FileNotFoundError as e: - logger.error(f"failed to maintain session for {self.name}: file {e.filename} not found.") + logger.error( + f"failed to maintain session for {self.name}: file {e.filename} not found." + ) except Exception as e: - logger.error(f"failed to maintain session for {self.name}: {type(e)} {e.args}") + logger.error( + f"failed to maintain session for {self.name}: {type(e)} {e.args}" + ) logger.debug(f"restarting session for {self.name}") await asyncio.sleep(0.5) @@ -139,10 +143,11 @@ async def _wait_for_session(self, timeout: int = 5, http_error: bool = True): except asyncio.TimeoutError: if http_error: raise HTTPException( - status_code=500, detail=f"Could not connect to MCP server \"{self.name}\"." + status_code=500, + detail=f'Could not connect to MCP server "{self.name}".', ) - raise TimeoutError(f"Could not connect to MCP server \"{self.name}\"." ) + raise TimeoutError(f'Could not connect to MCP server "{self.name}".') assert self.session is not None, "Session is None" diff --git a/mcp_bridge/mcp_clients/McpClientManager.py b/mcp_bridge/mcp_clients/McpClientManager.py index 7bf6c89..f30eff8 100644 --- a/mcp_bridge/mcp_clients/McpClientManager.py +++ b/mcp_bridge/mcp_clients/McpClientManager.py @@ -40,7 +40,7 @@ async def construct_client(self, name, server_config) -> client_types: client = SseClient(name, server_config) # type: ignore await client.start() return client - + if isinstance(server_config, DockerMCPServer): client = DockerClient(name, server_config) await client.start() @@ -56,7 +56,6 @@ def get_clients(self): async def get_client_from_tool(self, tool: str): for name, client in self.get_clients(): - # client cannot have tools if it is not connected if not client.session: continue @@ -71,7 +70,6 @@ async def get_client_from_tool(self, tool: str): async def get_client_from_prompt(self, prompt: str): for name, client in self.get_clients(): - # client cannot have prompts if it is not connected if not client.session: continue diff --git a/mcp_bridge/mcp_clients/StdioClient.py b/mcp_bridge/mcp_clients/StdioClient.py index 1923f40..189ace7 100644 --- a/mcp_bridge/mcp_clients/StdioClient.py +++ b/mcp_bridge/mcp_clients/StdioClient.py @@ -12,6 +12,7 @@ # Keywords to identify virtual environment variables venv_keywords = ["CONDA", "VIRTUAL", "PYTHON"] + class StdioClient(GenericMcpClient): config: StdioServerParameters @@ -25,7 +26,8 @@ def __init__(self, name: str, config: StdioServerParameters) -> None: env = dict(os.environ.copy()) env = { - key: value for key, value in env.items() + key: value + for key, value in env.items() if not any(key.startswith(keyword) for keyword in venv_keywords) } diff --git a/mcp_bridge/mcp_clients/session.py b/mcp_bridge/mcp_clients/session.py index 6b29296..e4c380f 100644 --- a/mcp_bridge/mcp_clients/session.py +++ b/mcp_bridge/mcp_clients/session.py @@ -25,7 +25,6 @@ class McpClientSession( types.ServerNotification, ] ): - def __init__( self, read_stream: MemoryObjectReceiveStream[types.JSONRPCMessage | Exception], @@ -50,11 +49,11 @@ async def initialize(self) -> types.InitializeResult: capabilities=types.ClientCapabilities( sampling=types.SamplingCapability(), experimental=None, - roots=types.RootsCapability( - listChanged=True - ), + roots=types.RootsCapability(listChanged=True), + ), + clientInfo=types.Implementation( + name="MCP-Bridge", version=version ), - clientInfo=types.Implementation(name="MCP-Bridge", version=version), ), ) ), @@ -248,9 +247,10 @@ async def _received_request( client_response = types.ClientResult(**response.model_dump()) await responder.respond(client_response) - async def sample(self, params: types.CreateMessageRequestParams) -> types.CreateMessageResult: + async def sample( + self, params: types.CreateMessageRequestParams + ) -> types.CreateMessageResult: logger.info("got sampling request from mcp server") resp = await handle_sampling_message(params) logger.info("finished sampling request from mcp server") return resp - \ No newline at end of file diff --git a/mcp_bridge/openai_clients/chatCompletion.py b/mcp_bridge/openai_clients/chatCompletion.py index 89a0bc6..c817890 100644 --- a/mcp_bridge/openai_clients/chatCompletion.py +++ b/mcp_bridge/openai_clients/chatCompletion.py @@ -1,3 +1,4 @@ +import secrets from lmos_openai_types import ( CreateChatCompletionRequest, CreateChatCompletionResponse, @@ -6,7 +7,6 @@ from .utils import call_tool, chat_completion_add_tools from mcp_bridge.http_clients import get_client -from mcp_bridge.mcp_clients.McpClientManager import ClientManager from mcp_bridge.inference_engine_mappers.chat.requester import chat_completion_requester from mcp_bridge.inference_engine_mappers.chat.responder import chat_completion_responder from loguru import logger @@ -26,9 +26,9 @@ async def chat_completions( text = ( await get_client().post( "/chat/completions", - #content=request.model_dump_json( + # content=request.model_dump_json( # exclude_defaults=True, exclude_none=True, exclude_unset=True - #), + # ), json=chat_completion_requester(request), ) ).text @@ -38,11 +38,11 @@ async def chat_completions( except Exception as e: logger.error(f"Error parsing response: {text}") logger.error(e) - return # type: ignore - + return # type: ignore + if not response.choices: logger.error("no choices found in response") - return # type: ignore + return # type: ignore msg = response.choices[0].message msg = ChatCompletionRequestMessage( @@ -89,9 +89,9 @@ async def chat_completions( { "role": "tool", "content": tools_content, - "tool_call_id": tool_call.id, + "tool_call_id": tool_call.id or secrets.token_hex(16), } ) ) - logger.debug("sending next iteration of chat completion request") + logger.debug("sending next iteration of chat completion request") diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index 331c743..47fd3c8 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -11,7 +11,9 @@ ) from mcp_bridge.inference_engine_mappers.chat.requester import chat_completion_requester -from mcp_bridge.inference_engine_mappers.chat.stream_responder import chat_completion_stream_responder +from mcp_bridge.inference_engine_mappers.chat.stream_responder import ( + chat_completion_stream_responder, +) from .utils import call_tool, chat_completion_add_tools from mcp_bridge.models import SSEData from mcp_bridge.http_clients import get_client @@ -25,27 +27,32 @@ async def streaming_chat_completions(request: CreateChatCompletionRequest): # raise NotImplementedError("Streaming Chat Completion is not supported") - return EventSourceResponse( - content=chat_completions(request), - media_type="text/event-stream", - headers={"Cache-Control": "no-cache"}, - ) + return EventSourceResponse( + content=chat_completions(request), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache"}, + ) + def format_error_as_sse(message: str) -> str: - return SSEData.model_validate({ - "id": f"error-{token_hex(16)}", - "provider": "MCP-Bridge", - "object": "chat.completion.chunk", - "created": int(time.time()), - "model": "MCP-Bridge", - "choices": [{ - "index": 0, - "delta": { - "role": "assistant", - "content": message, - }, - }] - }).model_dump_json() + return SSEData.model_validate( + { + "id": f"error-{token_hex(16)}", + "provider": "MCP-Bridge", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": "MCP-Bridge", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": message, + }, + } + ], + } + ).model_dump_json() async def chat_completions(request: CreateChatCompletionRequest): @@ -76,9 +83,8 @@ async def chat_completions(request: CreateChatCompletionRequest): async with aconnect_sse( get_client(), "post", "/chat/completions", content=json_data ) as event_source: - logger.debug(event_source.response.status_code) - + # check if the content type is correct because the aiter_sse method # will raise an exception if the content type is not correct if "Content-Type" in event_source.response.headers: @@ -90,23 +96,32 @@ async def chat_completions(request: CreateChatCompletionRequest): # logger.error(f"Response Status: {event_source.response.status_code}") # logger.error(f"Response Data: {error_data.decode(event_source.response.encoding or 'utf-8')}") # raise HTTPException(status_code=500, detail="Unexpected Content-Type") - data = json.loads(error_data.decode(event_source.response.encoding or 'utf-8')) + data = json.loads( + error_data.decode(event_source.response.encoding or "utf-8") + ) if message := data.get("error", {}).get("message"): logger.error(f"Upstream error: {message}") yield format_error_as_sse(message) - yield ['DONE'] # ServerSentEvent(event="message", data="[DONE]", id=None, retry=None) + yield [ + "DONE" + ] # ServerSentEvent(event="message", data="[DONE]", id=None, retry=None) return - if "text/event-stream" not in content_type: logger.error(f"Unexpected Content-Type: {content_type}") error_data = await event_source.response.aread() logger.error(f"Request URL: {event_source.response.url}") logger.error(f"Request Data: {json_data}") - logger.error(f"Response Status: {event_source.response.status_code}") - logger.error(f"Response Data: {error_data.decode(event_source.response.encoding or 'utf-8')}") + logger.error( + f"Response Status: {event_source.response.status_code}" + ) + logger.error( + f"Response Data: {error_data.decode(event_source.response.encoding or 'utf-8')}" + ) yield format_error_as_sse("Upsteam error: Unexpected Content-Type") - yield ServerSentEvent(event="message", data="[DONE]", id=None, retry=None) + yield ServerSentEvent( + event="message", data="[DONE]", id=None, retry=None + ) return # iterate over the SSE stream @@ -130,7 +145,7 @@ async def chat_completions(request: CreateChatCompletionRequest): except Exception as e: logger.debug(data) raise e - + # handle empty response (usually caused by "usage" reporting) if len(parsed_data.choices) == 0: logger.debug("no choices found in response") diff --git a/mcp_bridge/sampling/modelSelector.py b/mcp_bridge/sampling/modelSelector.py index ea70be7..d2e5731 100644 --- a/mcp_bridge/sampling/modelSelector.py +++ b/mcp_bridge/sampling/modelSelector.py @@ -4,21 +4,29 @@ from mcp_bridge.config import config + def euclidean_distance(point1, point2): """ Calculates the Euclidean distance between two points, ignoring None values. """ - valid_dimensions = [(p1, p2) for p1, p2 in zip(point1, point2) if p1 is not None and p2 is not None] + valid_dimensions = [ + (p1, p2) for p1, p2 in zip(point1, point2) if p1 is not None and p2 is not None + ] if not valid_dimensions: # No valid dimensions to compare - return float('inf') - + return float("inf") + return math.sqrt(sum((p1 - p2) ** 2 for p1, p2 in valid_dimensions)) + def find_best_model(preferences: ModelPreferences): distance = math.inf preffered_model = None - preference_points = (preferences.intelligencePriority, preferences.speedPriority, preferences.costPriority) + preference_points = ( + preferences.intelligencePriority, + preferences.speedPriority, + preferences.costPriority, + ) if preference_points == (None, None, None): return config.sampling.models[0] @@ -29,8 +37,8 @@ def find_best_model(preferences: ModelPreferences): if model_distance < distance: distance = model_distance preffered_model = model - + if preffered_model is None: preffered_model = config.sampling.models[0] - - return preffered_model \ No newline at end of file + + return preffered_model diff --git a/mcp_bridge/sampling/sampler.py b/mcp_bridge/sampling/sampler.py index c2379ae..85061ca 100644 --- a/mcp_bridge/sampling/sampler.py +++ b/mcp_bridge/sampling/sampler.py @@ -8,31 +8,37 @@ from mcp_bridge.http_clients import get_client from mcp_bridge.sampling.modelSelector import find_best_model + def make_message(x: SamplingMessage): if x.content.type == "text": return { "role": x.role, - "content": [{ - "type": "text", - "text": x.content.text, - }] + "content": [ + { + "type": "text", + "text": x.content.text, + } + ], } if x.content.type == "image": return { "role": x.role, - "content": [{ - "type": "image", - "image_url": x.content.data, - }] + "content": [ + { + "type": "image", + "image_url": x.content.data, + } + ], } + async def handle_sampling_message( message: CreateMessageRequestParams, ) -> CreateMessageResult: """perform sampling""" logger.debug(f"sampling message: {message.modelPreferences}") - + # select model model = config.sampling.models[0] if message.modelPreferences is not None: From 84e32b4e4ad227a885b79ee1b4e836f0db0dc210 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sat, 25 Jan 2025 21:27:23 +0000 Subject: [PATCH 07/21] make sampler use client mapper --- mcp_bridge/sampling/sampler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mcp_bridge/sampling/sampler.py b/mcp_bridge/sampling/sampler.py index 85061ca..439f3a6 100644 --- a/mcp_bridge/sampling/sampler.py +++ b/mcp_bridge/sampling/sampler.py @@ -1,12 +1,13 @@ +import json from loguru import logger from mcp import SamplingMessage import mcp.types as types -from lmos_openai_types import CreateChatCompletionResponse from mcp.types import CreateMessageRequestParams, CreateMessageResult from mcp_bridge.config import config from mcp_bridge.http_clients import get_client from mcp_bridge.sampling.modelSelector import find_best_model +from mcp_bridge.inference_engine_mappers.chat.generic import chat_completion_generic_response def make_message(x: SamplingMessage): @@ -68,7 +69,7 @@ async def handle_sampling_message( text = resp.text logger.debug(text) - response = CreateChatCompletionResponse.model_validate_json(text) + response = chat_completion_generic_response(json.loads(text)) logger.debug("sampling request received from endpoint") From ba245a070aad6ac4c2ff3b0aad450345e72ed711 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sun, 26 Jan 2025 23:21:52 +0000 Subject: [PATCH 08/21] add error handler to chunk validation --- mcp_bridge/models/upstream_error.py | 10 ++++++++++ .../openai_clients/streamChatCompletion.py | 18 ++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 mcp_bridge/models/upstream_error.py diff --git a/mcp_bridge/models/upstream_error.py b/mcp_bridge/models/upstream_error.py new file mode 100644 index 0000000..67d144a --- /dev/null +++ b/mcp_bridge/models/upstream_error.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel, Field +from typing import Annotated + + +class UpstreamErrorDetails(BaseModel): + message: Annotated[str, Field(description="Error message")] = "An upstream error occurred" + code : Annotated[str | None, Field(description="Error code")] = "UPSTREAM_ERROR" + +class UpstreamError(BaseModel): + error: Annotated[UpstreamErrorDetails, Field(description="Error details")] \ No newline at end of file diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index 47fd3c8..9b53329 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -15,7 +15,7 @@ chat_completion_stream_responder, ) from .utils import call_tool, chat_completion_add_tools -from mcp_bridge.models import SSEData +from mcp_bridge.models import SSEData, upstream_error from mcp_bridge.http_clients import get_client from loguru import logger from httpx_sse import aconnect_sse @@ -140,11 +140,21 @@ async def chat_completions(request: CreateChatCompletionRequest): logger.debug("inference serverstream done") break + # try to parse the data as json, if this fails we assume it is an error message + # if parsing fails we send the error message to the client + dict_data = json.loads(data) try: - parsed_data = chat_completion_stream_responder(json.loads(data)) - except Exception as e: + parsed_data = chat_completion_stream_responder(dict_data) + except Exception: logger.debug(data) - raise e + try: + parsed_error_data = upstream_error.UpstreamError.model_validate_json(data) + yield format_error_as_sse(parsed_error_data.error.message) + except Exception: + yield format_error_as_sse(f"Error parsing response: {data}") + + yield ServerSentEvent(event="message", data="[DONE]", id=None, retry=None) + return # handle empty response (usually caused by "usage" reporting) if len(parsed_data.choices) == 0: From cf8face3d59ce9ceb572deddf6924a4078ef3fee Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Mon, 27 Jan 2025 00:17:57 +0000 Subject: [PATCH 09/21] make openrouter call tools properly --- .../openai_clients/streamChatCompletion.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index 9b53329..c87c2cc 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -76,6 +76,7 @@ async def chat_completions(request: CreateChatCompletionRequest): tool_call_name: str = "" tool_call_json: str = "" + has_tool_calls: bool = False should_forward: bool = True response_content: str = "" tool_call_id: str = "" @@ -151,7 +152,7 @@ async def chat_completions(request: CreateChatCompletionRequest): parsed_error_data = upstream_error.UpstreamError.model_validate_json(data) yield format_error_as_sse(parsed_error_data.error.message) except Exception: - yield format_error_as_sse(f"Error parsing response: {data}") + yield format_error_as_sse(f"Error parsing response: {json.loads(data)}") yield ServerSentEvent(event="message", data="[DONE]", id=None, retry=None) return @@ -175,6 +176,9 @@ async def chat_completions(request: CreateChatCompletionRequest): fully_done = True else: should_forward = False + + if parsed_data.choices[0].finish_reason.value == "tool_calls": + has_tool_calls = True # this manages the incoming tool call schema # most of this is assertions to please mypy @@ -188,6 +192,8 @@ async def chat_completions(request: CreateChatCompletionRequest): name = name if name is not None else "" tool_call_name = name if tool_call_name == "" else tool_call_name + logger.debug(f"ARGS: {parsed_data.choices[0].delta.tool_calls[0].function.arguments}") + call_id = parsed_data.choices[0].delta.tool_calls[0].id call_id = call_id if call_id is not None else "" tool_call_id = id if tool_call_id == "" else tool_call_id @@ -207,13 +213,15 @@ async def chat_completions(request: CreateChatCompletionRequest): # ideally we should check this properly assert last is not None - if last.choices[0].finish_reason is None: - logger.debug("no finish reason found") - continue - if last.choices[0].finish_reason.value in ["stop", "length"]: - logger.debug("no tool calls found") - fully_done = True + if last.choices[0].finish_reason: + if last.choices[0].finish_reason.value in ["stop", "length"]: + logger.debug("no tool calls found") + fully_done = True + continue + + if last.choices[0].finish_reason is None and not has_tool_calls: + logger.debug("no finish reason found") continue logger.debug("tool calls found") From 889f0ff533aa04a7f3d3ee3a7bd6129ff19d602d Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Mon, 27 Jan 2025 20:54:53 +0000 Subject: [PATCH 10/21] fix gemini auth issue --- mcp_bridge/config/final.py | 2 +- mcp_bridge/endpoints.py | 39 +++++++++++++++++++ mcp_bridge/http_clients/__init__.py | 6 +-- .../chat/requester.py | 2 + .../chat/responder.py | 2 + .../chat/stream_responder.py | 2 + 6 files changed, 49 insertions(+), 4 deletions(-) diff --git a/mcp_bridge/config/final.py b/mcp_bridge/config/final.py index 30beabf..93b9575 100644 --- a/mcp_bridge/config/final.py +++ b/mcp_bridge/config/final.py @@ -7,7 +7,7 @@ class InferenceServer(BaseModel): - type: Literal["openai", "openrouter"] = Field( + type: Literal["openai", "openrouter", "gemini"] = Field( "openai", description="Type of inference server" ) # used to apply data mappers base_url: str = Field( diff --git a/mcp_bridge/endpoints.py b/mcp_bridge/endpoints.py index 77a6f3a..e7cb570 100644 --- a/mcp_bridge/endpoints.py +++ b/mcp_bridge/endpoints.py @@ -2,6 +2,7 @@ from lmos_openai_types import CreateChatCompletionRequest, CreateCompletionRequest +from mcp_bridge.config.final import InferenceServer from mcp_bridge.openai_clients import ( completions, chat_completions, @@ -10,6 +11,7 @@ from mcp_bridge.http_clients import get_client from mcp_bridge.openapi_tags import Tag +from mcp_bridge.config import config router = APIRouter(prefix="/v1", tags=[Tag.openai]) @@ -37,5 +39,42 @@ async def models(): """List models. This is a passthrough to the inference server and returns the same response json.""" + + # this is an ugly hack to fix an upstream bug in gemini upstream + if config.inference_server.type == "gemini": + return list_gemini_models() + response = await get_client().get("/models") return response.json() + +def list_gemini_models(): + """temp hack to fix gemini bug""" + return { + "object": "list", + "data": [ + { + "id": "gemini-2.0-flash-exp", + "object": "model", + "created": 1686935002, + "owned_by": "google", + }, + { + "id": "gemini-1.5-flash", + "object": "model", + "created": 1686935002, + "owned_by": "google", + }, + { + "id": "gemini-1.5-flash-8b", + "object": "model", + "created": 1686935002, + "owned_by": "google", + }, + { + "id": "gemini-1.5-pro", + "object": "model", + "created": 1686935002, + "owned_by": "google", + } + ], + } \ No newline at end of file diff --git a/mcp_bridge/http_clients/__init__.py b/mcp_bridge/http_clients/__init__.py index 9c4c9f1..194f102 100644 --- a/mcp_bridge/http_clients/__init__.py +++ b/mcp_bridge/http_clients/__init__.py @@ -28,8 +28,8 @@ def get_client() -> AsyncClient: return client # gemini models - if config.inference_server.type == "google": - pass - # TODO: implement google openai auth + if config.inference_server.type == "gemini": + client.headers["Authorization"] = rf"Bearer {config.inference_server.api_key}" + return client raise NotImplementedError("Inference Server Type not supported") diff --git a/mcp_bridge/inference_engine_mappers/chat/requester.py b/mcp_bridge/inference_engine_mappers/chat/requester.py index a231f1f..c7b5d22 100644 --- a/mcp_bridge/inference_engine_mappers/chat/requester.py +++ b/mcp_bridge/inference_engine_mappers/chat/requester.py @@ -14,3 +14,5 @@ def chat_completion_requester(data: CreateChatCompletionRequest) -> dict: case "openrouter": # TODO: implement openrouter requester return chat_completion_openrouter_request(data) + case _: + return chat_completion_generic_request(data) \ No newline at end of file diff --git a/mcp_bridge/inference_engine_mappers/chat/responder.py b/mcp_bridge/inference_engine_mappers/chat/responder.py index 9322d07..daac7eb 100644 --- a/mcp_bridge/inference_engine_mappers/chat/responder.py +++ b/mcp_bridge/inference_engine_mappers/chat/responder.py @@ -14,3 +14,5 @@ def chat_completion_responder(data: dict) -> CreateChatCompletionResponse: case "openrouter": # TODO: implement openrouter responser return chat_completion_openrouter_response(data) + case _: + return chat_completion_generic_response(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/stream_responder.py b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py index 7d97b57..72f6544 100644 --- a/mcp_bridge/inference_engine_mappers/chat/stream_responder.py +++ b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py @@ -14,3 +14,5 @@ def chat_completion_stream_responder(data: dict) -> CreateChatCompletionStreamRe case "openrouter": # TODO: implement openrouter responser return chat_completion_openrouter_stream_response(data) + case _: + return chat_completion_generic_stream_response(data) From 4009de01004f6c64b3c0964a96305bab6dd858af Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Wed, 29 Jan 2025 22:46:44 +0000 Subject: [PATCH 11/21] add error handler to non streaming chat completion --- mcp_bridge/openai_clients/chatCompletion.py | 44 +++++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/mcp_bridge/openai_clients/chatCompletion.py b/mcp_bridge/openai_clients/chatCompletion.py index c817890..8bf21a7 100644 --- a/mcp_bridge/openai_clients/chatCompletion.py +++ b/mcp_bridge/openai_clients/chatCompletion.py @@ -1,8 +1,13 @@ import secrets +import time +from turtle import st from lmos_openai_types import ( + ChatCompletionResponseMessage, + Choice1, CreateChatCompletionRequest, CreateChatCompletionResponse, ChatCompletionRequestMessage, + FinishReason1, ) from .utils import call_tool, chat_completion_add_tools @@ -12,6 +17,25 @@ from loguru import logger import json +def format_error_as_chat_completion(message: str) -> CreateChatCompletionResponse: + return CreateChatCompletionResponse.model_validate( + { + "model": "MCP-Bridge", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "content": message, + "role": "assistant", + } + } + ], + "id": secrets.token_hex(16), + "created": int(time.time()), + "object": "chat.completion", + } + ) async def chat_completions( request: CreateChatCompletionRequest, @@ -23,26 +47,30 @@ async def chat_completions( while True: # logger.debug(request.model_dump_json()) - text = ( - await get_client().post( + response = await get_client().post( "/chat/completions", - # content=request.model_dump_json( - # exclude_defaults=True, exclude_none=True, exclude_unset=True - # ), json=chat_completion_requester(request), ) - ).text + text = response.text logger.debug(text) try: response = chat_completion_responder(json.loads(text)) except Exception as e: logger.error(f"Error parsing response: {text}") logger.error(e) - return # type: ignore + + # openrouter returns a json error message + try : + response = json.loads(text) + return format_error_as_chat_completion(f"Upstream error: {response['error']['message']}") + except Exception: + pass + + return format_error_as_chat_completion(f"Error parsing response: {text}") if not response.choices: logger.error("no choices found in response") - return # type: ignore + return format_error_as_chat_completion("no choices found in response") msg = response.choices[0].message msg = ChatCompletionRequestMessage( From 28f85c1516020d4b30305386128a11f0fb01a4ed Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:06:17 +0800 Subject: [PATCH 12/21] Update launch.json --- .vscode/launch.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 977eeda..2dcb99c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,6 +10,7 @@ "request": "launch", "django": true, "module": "mcp_bridge.main", + "pythonArgs": ["-Xutf8"] } ] -} \ No newline at end of file +} From 8823a93e4a518114c8f4a56d1beda8b8c1f8c8e6 Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:09:00 +0800 Subject: [PATCH 13/21] Update utils.py --- mcp_bridge/openai_clients/utils.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mcp_bridge/openai_clients/utils.py b/mcp_bridge/openai_clients/utils.py index 58c269b..57c1be7 100644 --- a/mcp_bridge/openai_clients/utils.py +++ b/mcp_bridge/openai_clients/utils.py @@ -3,6 +3,7 @@ from lmos_openai_types import CreateChatCompletionRequest import mcp.types import json +import traceback from mcp_bridge.mcp_clients.McpClientManager import ClientManager from mcp_bridge.tool_mappers import mcp2openai @@ -10,17 +11,22 @@ async def chat_completion_add_tools(request: CreateChatCompletionRequest): request.tools = [] + logger.info("adding tools to request") for _, session in ClientManager.get_clients(): # if session is None, then the client is not running if session.session is None: - logger.error(f"session is `None` for {session.name}") + logger.error(f"session is `None` for {session.name}") # Date:2025/01/25 why not running? continue - + logger.debug(f"session ready for {session.name}") tools = await session.session.list_tools() for tool in tools.tools: request.tools.append(mcp2openai(tool)) - + + if request.tools == []: + logger.info("this request loads no tools") + # raise Exception("no tools found. unable to initiate chat completion.") + request.tools = None return request @@ -42,9 +48,10 @@ async def call_tool( return None try: - tool_call_args = json.loads(tool_call_json) + tool_call_args = json.loads(tool_call_json) # Date: 2025/01/26 cannot load this tool call json? except json.JSONDecodeError: logger.error(f"failed to decode json for {tool_call_name}") + traceback.print_exc() return None return await session.call_tool(tool_call_name, tool_call_args, timeout) From a0cf387058af746bb95e4fd1306fb5ce20e86186 Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:14:53 +0800 Subject: [PATCH 14/21] Update utils.py --- mcp_bridge/openai_clients/utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mcp_bridge/openai_clients/utils.py b/mcp_bridge/openai_clients/utils.py index 57c1be7..73e318d 100644 --- a/mcp_bridge/openai_clients/utils.py +++ b/mcp_bridge/openai_clients/utils.py @@ -9,6 +9,21 @@ from mcp_bridge.tool_mappers import mcp2openai +def validate_if_json_object_parsable(content: str): + try: + json.loads(content) + return True + except ValueError: + return False + + +def salvage_parsable_json_object(content: str): + content = content.strip() + for i in range(0, len(content)): + snippet = content[: len(content) - i] + if validate_if_json_object_parsable(snippet): + return snippet + async def chat_completion_add_tools(request: CreateChatCompletionRequest): request.tools = [] logger.info("adding tools to request") From 1fb17d7a5a5e4362227f9ff1e223076e8d6b0946 Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:16:32 +0800 Subject: [PATCH 15/21] Delete mcp_bridge/openai_clients/streamCompletion.py --- mcp_bridge/openai_clients/streamCompletion.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 mcp_bridge/openai_clients/streamCompletion.py diff --git a/mcp_bridge/openai_clients/streamCompletion.py b/mcp_bridge/openai_clients/streamCompletion.py deleted file mode 100644 index e69de29..0000000 From 96e91b825cb79b14a28db3ecba1d05e4d995a335 Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:31:32 +0800 Subject: [PATCH 16/21] Update streamChatCompletion.py --- .../openai_clients/streamChatCompletion.py | 41 +++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index c87c2cc..7c27b88 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -1,5 +1,8 @@ +import datetime import json +import os import time +import traceback from typing import Optional from secrets import token_hex from lmos_openai_types import ( @@ -8,13 +11,19 @@ CreateChatCompletionRequest, CreateChatCompletionStreamResponse, Function1, + FinishReason1, ) from mcp_bridge.inference_engine_mappers.chat.requester import chat_completion_requester from mcp_bridge.inference_engine_mappers.chat.stream_responder import ( chat_completion_stream_responder, ) -from .utils import call_tool, chat_completion_add_tools +from .utils import ( + call_tool, + chat_completion_add_tools, + validate_if_json_object_parsable, + salvage_parsable_json_object, +) from mcp_bridge.models import SSEData, upstream_error from mcp_bridge.http_clients import get_client from loguru import logger @@ -68,9 +77,9 @@ async def chat_completions(request: CreateChatCompletionRequest): # exclude_defaults=True, exclude_none=True, exclude_unset=True # ) - json_data = json.dumps(chat_completion_requester(request)) + json_data = json.dumps(chat_completion_requester(request), indent=4, ensure_ascii=False) - # logger.debug(json_data) + logger.debug("Request JSON:\n%s" % json_data) last: Optional[CreateChatCompletionStreamResponse] = None # last message @@ -211,6 +220,29 @@ async def chat_completions(request: CreateChatCompletionRequest): # save the last message last = parsed_data + # perform early stopping on parsable tool_call_json + if tool_call_json: + if tool_call_json.strip().startswith("{"): + if validate_if_json_object_parsable(tool_call_json): + logger.debug( + f"tool call json '{tool_call_json}' is parsable now." + ) + logger.debug("exiting message receive loop") + last.choices[0].finish_reason = FinishReason1.tool_calls + break + salvaged_json_object = salvage_parsable_json_object( + tool_call_json + ) + if salvaged_json_object: + tool_call_json = salvaged_json_object + logger.debug( + f"tool call json '{tool_call_json}' is salvagable now." + ) + logger.debug("salvaged json content:", tool_call_json) + logger.debug("exiting message receive loop") + last.choices[0].finish_reason = FinishReason1.tool_calls + break + # ideally we should check this properly assert last is not None @@ -229,6 +261,9 @@ async def chat_completions(request: CreateChatCompletionRequest): f"{tool_call_name=} {tool_call_json=}" ) # this should not be error but its easier to debug + logger.debug("clearing tool contexts to prevent tool call loops") + request.tools = None + # add received message to the history msg = ChatCompletionRequestMessage( role="assistant", From e4a83e08a357de0e613f48cfeba51894eb54c6e9 Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:37:42 +0800 Subject: [PATCH 17/21] Update chatCompletion.py --- mcp_bridge/openai_clients/chatCompletion.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mcp_bridge/openai_clients/chatCompletion.py b/mcp_bridge/openai_clients/chatCompletion.py index 8bf21a7..3241c91 100644 --- a/mcp_bridge/openai_clients/chatCompletion.py +++ b/mcp_bridge/openai_clients/chatCompletion.py @@ -10,7 +10,7 @@ FinishReason1, ) -from .utils import call_tool, chat_completion_add_tools +from .utils import call_tool, chat_completion_add_tools, validate_if_json_object_parsable, json_pretty_print from mcp_bridge.http_clients import get_client from mcp_bridge.inference_engine_mappers.chat.requester import chat_completion_requester from mcp_bridge.inference_engine_mappers.chat.responder import chat_completion_responder @@ -86,11 +86,22 @@ async def chat_completions( return response logger.debug("tool calls found") + + logger.debug("clearing tool contexts to prevent tool call loops") + request.tools = None + for tool_call in response.choices[0].message.tool_calls.root: logger.debug( - f"tool call: {tool_call.function.name} arguments: {json.loads(tool_call.function.arguments)}" + f"tool call: {tool_call.function.name}" ) + if validate_if_json_object_parsable(tool): + logger.debug(f"arguments:\n{json_pretty_print(tool_call.function.arguments)}") + else: + logger.debug("non-json arguments given: %s" % tool_call.function.arguments) + logger.debug("unable to parse tool call argument as json. skipping...") + continue + # FIXME: this can probably be done in parallel using asyncio gather tool_call_result = await call_tool( tool_call.function.name, tool_call.function.arguments From e21cff532f176a15744b1bfd17a531d0bbc83508 Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:40:29 +0800 Subject: [PATCH 18/21] Update utils.py --- mcp_bridge/openai_clients/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mcp_bridge/openai_clients/utils.py b/mcp_bridge/openai_clients/utils.py index 73e318d..010974c 100644 --- a/mcp_bridge/openai_clients/utils.py +++ b/mcp_bridge/openai_clients/utils.py @@ -9,6 +9,14 @@ from mcp_bridge.tool_mappers import mcp2openai +def json_pretty_print(obj) -> str: + if type(obj) == bytes: + obj = obj.decode() + if type(obj) == str: + obj = json.loads(obj) + ret = json.dumps(obj, indent=4, ensure_ascii=False) + return ret + def validate_if_json_object_parsable(content: str): try: json.loads(content) From 1a322e5144389f1304af58a635080cbf6e986464 Mon Sep 17 00:00:00 2001 From: James Brown Date: Thu, 30 Jan 2025 21:41:41 +0800 Subject: [PATCH 19/21] Update streamChatCompletion.py --- mcp_bridge/openai_clients/streamChatCompletion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index 7c27b88..494e1d2 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -21,8 +21,9 @@ from .utils import ( call_tool, chat_completion_add_tools, - validate_if_json_object_parsable, + json_pretty_print, salvage_parsable_json_object, + validate_if_json_object_parsable, ) from mcp_bridge.models import SSEData, upstream_error from mcp_bridge.http_clients import get_client @@ -77,7 +78,7 @@ async def chat_completions(request: CreateChatCompletionRequest): # exclude_defaults=True, exclude_none=True, exclude_unset=True # ) - json_data = json.dumps(chat_completion_requester(request), indent=4, ensure_ascii=False) + json_data = json_pretty_print(chat_completion_requester(request)) logger.debug("Request JSON:\n%s" % json_data) From a5373084fe74a35eaa589497b6f3170113707c99 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Thu, 6 Feb 2025 16:17:36 +0000 Subject: [PATCH 20/21] enforce tool call ids --- mcp_bridge/http_clients/__init__.py | 2 +- .../chat/openrouter/request.py | 33 ++++++++++++++++++- .../chat/openrouter/response.py | 18 +++++++++- mcp_bridge/openai_clients/chatCompletion.py | 2 +- .../openai_clients/streamChatCompletion.py | 4 +++ 5 files changed, 55 insertions(+), 4 deletions(-) diff --git a/mcp_bridge/http_clients/__init__.py b/mcp_bridge/http_clients/__init__.py index 194f102..81e0d8f 100644 --- a/mcp_bridge/http_clients/__init__.py +++ b/mcp_bridge/http_clients/__init__.py @@ -17,7 +17,7 @@ def get_client() -> AsyncClient: # generic openai if config.inference_server.type == "openai": - client.headers["Authorization"] = f"Bearer {config.inference_server.api_key}" + client.headers["Authorization"] = rf"Bearer {config.inference_server.api_key}" return client # openrouter diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py index 4905a37..0244d38 100644 --- a/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/request.py @@ -1,5 +1,36 @@ +import json +import secrets +from typing import Any, cast from lmos_openai_types import CreateChatCompletionRequest +from loguru import logger def chat_completion_openrouter_request(data: CreateChatCompletionRequest) -> dict: - return data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True) + + dumped_data = data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True) + + # make sure we have a tool call id for each tool call + try: + for message in dumped_data["messages"]: + + message = cast(dict[str, Any], message) + + if message["role"] == "assistant": + if message.get("tool_calls") is None: + continue + for tool_call in message["tool_calls"]: + tool_call["tool_call_id"] = tool_call.get("id", secrets.token_hex(16)) + + if message["role"] == "tool": + if message.get("tool_call_id") is None: + message["tool_call_id"] = secrets.token_hex(16) + if message.get("id") is None: + message["id"] = message["tool_call_id"] + + except Exception as e: + print(e) + + logger.debug(f"dumped data: {dumped_data}") + logger.debug(f"json dumped data: {json.dumps(dumped_data)}") + + return dumped_data diff --git a/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py b/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py index 1252650..1a96bcf 100644 --- a/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py +++ b/mcp_bridge/inference_engine_mappers/chat/openrouter/response.py @@ -1,5 +1,21 @@ +import secrets +from typing import cast from lmos_openai_types import CreateChatCompletionResponse +from loguru import logger def chat_completion_openrouter_response(data: dict) -> CreateChatCompletionResponse: - return CreateChatCompletionResponse.model_validate(data) + validated_data = CreateChatCompletionResponse.model_validate(data) + + # make sure tool call ids are not none + for choice in validated_data.choices: + if choice.message.tool_calls is None: + continue + for tool_call in choice.message.tool_calls: + logger.error(f"tool call: {tool_call[1]}") + for calls in tool_call[1]: + if calls.id is None: + calls.id = secrets.token_hex(16) + + logger.debug(f"validated data: {validated_data.model_dump_json()}") + return validated_data \ No newline at end of file diff --git a/mcp_bridge/openai_clients/chatCompletion.py b/mcp_bridge/openai_clients/chatCompletion.py index 3241c91..f67cf60 100644 --- a/mcp_bridge/openai_clients/chatCompletion.py +++ b/mcp_bridge/openai_clients/chatCompletion.py @@ -95,7 +95,7 @@ async def chat_completions( f"tool call: {tool_call.function.name}" ) - if validate_if_json_object_parsable(tool): + if validate_if_json_object_parsable(tool_call.function.arguments): logger.debug(f"arguments:\n{json_pretty_print(tool_call.function.arguments)}") else: logger.debug("non-json arguments given: %s" % tool_call.function.arguments) diff --git a/mcp_bridge/openai_clients/streamChatCompletion.py b/mcp_bridge/openai_clients/streamChatCompletion.py index 494e1d2..1862cb0 100644 --- a/mcp_bridge/openai_clients/streamChatCompletion.py +++ b/mcp_bridge/openai_clients/streamChatCompletion.py @@ -1,6 +1,7 @@ import datetime import json import os +import secrets import time import traceback from typing import Optional @@ -265,6 +266,9 @@ async def chat_completions(request: CreateChatCompletionRequest): logger.debug("clearing tool contexts to prevent tool call loops") request.tools = None + if tool_call_id is None or tool_call_id == "": + tool_call_id = secrets.token_hex(16) + # add received message to the history msg = ChatCompletionRequestMessage( role="assistant", From 76f5f7981fbc09472d1f1082ecd1a5107176f436 Mon Sep 17 00:00:00 2001 From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com> Date: Sat, 8 Feb 2025 21:34:27 +0000 Subject: [PATCH 21/21] add gemini response support --- .../chat/gemini/request.py | 8 ++++++++ .../chat/gemini/response.py | 10 ++++++++++ .../chat/gemini/stream_response.py | 14 ++++++++++++++ .../inference_engine_mappers/chat/requester.py | 3 +++ .../inference_engine_mappers/chat/responder.py | 3 +++ .../chat/stream_responder.py | 3 +++ mcp_bridge/models/chatCompletionStreamResponse.py | 2 +- 7 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 mcp_bridge/inference_engine_mappers/chat/gemini/request.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/gemini/response.py create mode 100644 mcp_bridge/inference_engine_mappers/chat/gemini/stream_response.py diff --git a/mcp_bridge/inference_engine_mappers/chat/gemini/request.py b/mcp_bridge/inference_engine_mappers/chat/gemini/request.py new file mode 100644 index 0000000..3298a06 --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/gemini/request.py @@ -0,0 +1,8 @@ +from lmos_openai_types import CreateChatCompletionRequest + + +def chat_completion_gemini_request(data: CreateChatCompletionRequest) -> dict: + + dumped_data = data.model_dump(exclude_defaults=True, exclude_none=True, exclude_unset=True) + + return dumped_data diff --git a/mcp_bridge/inference_engine_mappers/chat/gemini/response.py b/mcp_bridge/inference_engine_mappers/chat/gemini/response.py new file mode 100644 index 0000000..7dd0d0d --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/gemini/response.py @@ -0,0 +1,10 @@ +from lmos_openai_types import CreateChatCompletionResponse + + +def chat_completion_gemini_response(data: dict) -> CreateChatCompletionResponse: + + if "id" not in data or data["id"] is "": + data["id"] = "default-id" + + validated_data = CreateChatCompletionResponse.model_validate(data) + return validated_data \ No newline at end of file diff --git a/mcp_bridge/inference_engine_mappers/chat/gemini/stream_response.py b/mcp_bridge/inference_engine_mappers/chat/gemini/stream_response.py new file mode 100644 index 0000000..f1b0ec4 --- /dev/null +++ b/mcp_bridge/inference_engine_mappers/chat/gemini/stream_response.py @@ -0,0 +1,14 @@ +from lmos_openai_types import CreateChatCompletionStreamResponse +from loguru import logger + + +def chat_completion_gemini_stream_response( + data: dict, +) -> CreateChatCompletionStreamResponse: # type: ignore + + logger.debug(f"data: {data}") + + if "id" not in data or data["id"] == "": + data["id"] = "default-id" + + return CreateChatCompletionStreamResponse.model_validate(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/requester.py b/mcp_bridge/inference_engine_mappers/chat/requester.py index c7b5d22..aa3a47c 100644 --- a/mcp_bridge/inference_engine_mappers/chat/requester.py +++ b/mcp_bridge/inference_engine_mappers/chat/requester.py @@ -1,3 +1,4 @@ +from mcp_bridge.inference_engine_mappers.chat.gemini.request import chat_completion_gemini_request from .generic import chat_completion_generic_request from .openrouter.request import chat_completion_openrouter_request from lmos_openai_types import CreateChatCompletionRequest @@ -14,5 +15,7 @@ def chat_completion_requester(data: CreateChatCompletionRequest) -> dict: case "openrouter": # TODO: implement openrouter requester return chat_completion_openrouter_request(data) + case "gemini": + return chat_completion_gemini_request(data) case _: return chat_completion_generic_request(data) \ No newline at end of file diff --git a/mcp_bridge/inference_engine_mappers/chat/responder.py b/mcp_bridge/inference_engine_mappers/chat/responder.py index daac7eb..f74ea62 100644 --- a/mcp_bridge/inference_engine_mappers/chat/responder.py +++ b/mcp_bridge/inference_engine_mappers/chat/responder.py @@ -1,3 +1,4 @@ +from mcp_bridge.inference_engine_mappers.chat.gemini.response import chat_completion_gemini_response from .generic import chat_completion_generic_response from .openrouter.response import chat_completion_openrouter_response from lmos_openai_types import CreateChatCompletionResponse @@ -14,5 +15,7 @@ def chat_completion_responder(data: dict) -> CreateChatCompletionResponse: case "openrouter": # TODO: implement openrouter responser return chat_completion_openrouter_response(data) + case "gemini": + return chat_completion_gemini_response(data) case _: return chat_completion_generic_response(data) diff --git a/mcp_bridge/inference_engine_mappers/chat/stream_responder.py b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py index 72f6544..fafb166 100644 --- a/mcp_bridge/inference_engine_mappers/chat/stream_responder.py +++ b/mcp_bridge/inference_engine_mappers/chat/stream_responder.py @@ -1,3 +1,4 @@ +from mcp_bridge.inference_engine_mappers.chat.gemini.stream_response import chat_completion_gemini_stream_response from .generic import chat_completion_generic_stream_response from .openrouter.stream_response import chat_completion_openrouter_stream_response from lmos_openai_types import CreateChatCompletionStreamResponse @@ -14,5 +15,7 @@ def chat_completion_stream_responder(data: dict) -> CreateChatCompletionStreamRe case "openrouter": # TODO: implement openrouter responser return chat_completion_openrouter_stream_response(data) + case "gemini": + return chat_completion_gemini_stream_response(data) case _: return chat_completion_generic_stream_response(data) diff --git a/mcp_bridge/models/chatCompletionStreamResponse.py b/mcp_bridge/models/chatCompletionStreamResponse.py index a3bcefc..daee7fb 100644 --- a/mcp_bridge/models/chatCompletionStreamResponse.py +++ b/mcp_bridge/models/chatCompletionStreamResponse.py @@ -15,7 +15,7 @@ class Choice(BaseModel): class SSEData(BaseModel): - id: str + id: str = "default-id" object: str created: int model: str