From 324e7aa81c55b91de5467643e3f411d621390588 Mon Sep 17 00:00:00 2001 From: "Lim, Ghim Boon" Date: Thu, 26 Feb 2026 10:28:39 +0800 Subject: [PATCH 1/5] fix: close MCP exit stack on connection failure to prevent resource leaks When an MCP connection fails (network error, auth failure, initialization timeout), the AsyncExitStack is never closed because the except block raises HTTPException before cleanup. The abandoned exit stack is later garbage-collected in a different asyncio task, causing: RuntimeError: Attempted to exit cancel scope in a different task than it was entered in This corrupts anyio's cancel-scope stack and can spin a CPU core at 100%. Changes: - Add safe_mcp_exit_stack_close() helper that suppresses the cross-task cancel scope RuntimeError from anyio during MCP cleanup - connect_mcp: track whether exit_stack was stored via a flag; close it in a finally block when the connection was not successfully stored - connect_mcp: properly delete the old session entry when reconnecting - disconnect_mcp: use safe_mcp_exit_stack_close instead of bare try/except - WebsocketSession.delete: use safe_mcp_exit_stack_close for consistent cleanup - Add tests for safe_mcp_exit_stack_close and cancel-scope error handling Fixes #2182 --- backend/chainlit/server.py | 31 ++++++---- backend/chainlit/session.py | 51 +++++++++++++++-- backend/tests/test_session.py | 105 ++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 16 deletions(-) diff --git a/backend/chainlit/server.py b/backend/chainlit/server.py index f9393e5e3b..c923f04905 100644 --- a/backend/chainlit/server.py +++ b/backend/chainlit/server.py @@ -1307,7 +1307,7 @@ async def connect_mcp( StdioMcpConnection, validate_mcp_command, ) - from chainlit.session import WebsocketSession + from chainlit.session import WebsocketSession, safe_mcp_exit_stack_close session = WebsocketSession.get_by_id(payload.sessionId) context = init_ws_context(session) @@ -1327,14 +1327,20 @@ async def connect_mcp( if payload.name in session.mcp_sessions: old_client_session, old_exit_stack = session.mcp_sessions[payload.name] if on_mcp_disconnect := config.code.on_mcp_disconnect: - await on_mcp_disconnect(payload.name, old_client_session) - try: - await old_exit_stack.aclose() - except Exception: - pass + try: + await on_mcp_disconnect(payload.name, old_client_session) + except Exception: + logger.debug( + "Error in on_mcp_disconnect callback for %s", + payload.name, + exc_info=True, + ) + await safe_mcp_exit_stack_close(old_exit_stack) + del session.mcp_sessions[payload.name] + exit_stack = AsyncExitStack() + exit_stack_stored = False try: - exit_stack = AsyncExitStack() mcp_connection: McpConnection if payload.clientType == "sse": @@ -1413,6 +1419,7 @@ async def connect_mcp( # Store the session session.mcp_sessions[mcp_connection.name] = (mcp_session, exit_stack) + exit_stack_stored = True # Call the callback if config.code.on_mcp_connect: @@ -1423,6 +1430,9 @@ async def connect_mcp( status_code=400, detail=f"Could not connect to the MCP: {e!s}", ) + finally: + if not exit_stack_stored: + await safe_mcp_exit_stack_close(exit_stack) else: raise HTTPException( status_code=400, @@ -1459,7 +1469,7 @@ async def disconnect_mcp( current_user: UserParam, ): from chainlit.context import init_ws_context - from chainlit.session import WebsocketSession + from chainlit.session import WebsocketSession, safe_mcp_exit_stack_close session = WebsocketSession.get_by_id(payload.sessionId) context = init_ws_context(session) @@ -1480,10 +1490,7 @@ async def disconnect_mcp( if callback: await callback(payload.name, client_session) - try: - await exit_stack.aclose() - except Exception: - pass + await safe_mcp_exit_stack_close(exit_stack) del session.mcp_sessions[payload.name] except Exception as e: diff --git a/backend/chainlit/session.py b/backend/chainlit/session.py index d6bd3f6214..10c7a45134 100644 --- a/backend/chainlit/session.py +++ b/backend/chainlit/session.py @@ -12,6 +12,52 @@ from chainlit.logger import logger from chainlit.types import AskFileSpec, FileReference + +async def safe_mcp_exit_stack_close(exit_stack: AsyncExitStack) -> None: + """Close an MCP exit stack, suppressing cross-task cancel scope errors. + + AnyIO raises RuntimeError when an AsyncExitStack that was entered in one + asyncio task is closed from a different task (e.g., during HTTP request + handling for disconnect, session deletion, or reconnection). + + The MCP SDK's streamable-http transport wraps this in a + BaseExceptionGroup via its internal TaskGroup, so both forms are caught. + + This helper catches the error so MCP cleanup never propagates a cross-task + cancel scope exception, which would otherwise leave orphaned resources and + can cause 100% CPU spin loops. + + See: https://github.com/Chainlit/chainlit/issues/2182 + """ + try: + await exit_stack.aclose() + except (RuntimeError, BaseExceptionGroup) as exc: + if _is_cancel_scope_error(exc): + logger.debug( + "Suppressed cross-task cancel scope error during MCP cleanup: %s", + exc, + ) + else: + logger.warning( + "Error closing MCP exit stack: %s", exc, exc_info=True + ) + except Exception: + logger.debug("Error closing MCP exit stack", exc_info=True) + + +def _is_cancel_scope_error(exc: BaseException) -> bool: + """Check whether an exception is an anyio cancel-scope cross-task error. + + Handles both a bare RuntimeError and a BaseExceptionGroup wrapping one + (as produced by anyio's TaskGroup when the streamable-http transport + tears down). + """ + if isinstance(exc, RuntimeError): + return "cancel scope" in str(exc) + if isinstance(exc, BaseExceptionGroup): + return any(_is_cancel_scope_error(e) for e in exc.exceptions) + return False + if TYPE_CHECKING: from mcp import ClientSession @@ -322,10 +368,7 @@ async def delete(self): ws_sessions_id.pop(self.id, None) for _, exit_stack in self.mcp_sessions.values(): - try: - await exit_stack.aclose() - except Exception: - pass + await safe_mcp_exit_stack_close(exit_stack) async def flush_method_queue(self): for method_name, queue in self.thread_queues.items(): diff --git a/backend/tests/test_session.py b/backend/tests/test_session.py index e98b7a0994..9ca5a3fdce 100644 --- a/backend/tests/test_session.py +++ b/backend/tests/test_session.py @@ -11,7 +11,9 @@ HTTPSession, JSONEncoderIgnoreNonSerializable, WebsocketSession, + _is_cancel_scope_error, clean_metadata, + safe_mcp_exit_stack_close, ) @@ -620,3 +622,106 @@ async def test_websocket_session_delete_with_mcp_sessions(self): await session.delete() mock_exit_stack.aclose.assert_called_once() + + @pytest.mark.asyncio + async def test_websocket_session_delete_with_cancel_scope_error(self): + """Test that session delete handles cancel scope RuntimeError gracefully.""" + + with tempfile.TemporaryDirectory() as tmpdir: + with patch("chainlit.config.FILES_DIRECTORY", Path(tmpdir)): + session = WebsocketSession( + id="ws_id", + socket_id="socket_123", + emit=Mock(), + emit_call=Mock(), + user_env={}, + client_type="webapp", + ) + + # Mock MCP session with exit stack that raises cancel scope error + mock_exit_stack = AsyncMock() + mock_exit_stack.aclose.side_effect = RuntimeError( + "Attempted to exit cancel scope in a different task" + ) + session.mcp_sessions["mcp1"] = (Mock(), mock_exit_stack) + + # Should not raise + await session.delete() + + mock_exit_stack.aclose.assert_called_once() + + +class TestSafeMcpExitStackClose: + """Test suite for safe_mcp_exit_stack_close helper.""" + + @pytest.mark.asyncio + async def test_closes_exit_stack_normally(self): + """Test normal exit stack close succeeds.""" + mock_exit_stack = AsyncMock() + await safe_mcp_exit_stack_close(mock_exit_stack) + mock_exit_stack.aclose.assert_called_once() + + @pytest.mark.asyncio + async def test_suppresses_cancel_scope_runtime_error(self): + """Test that cancel scope RuntimeError is suppressed.""" + mock_exit_stack = AsyncMock() + mock_exit_stack.aclose.side_effect = RuntimeError( + "Attempted to exit cancel scope in a different task than it was entered in" + ) + # Should not raise + await safe_mcp_exit_stack_close(mock_exit_stack) + mock_exit_stack.aclose.assert_called_once() + + @pytest.mark.asyncio + async def test_logs_warning_for_non_cancel_scope_runtime_error(self): + """Test that non-cancel-scope RuntimeErrors are logged as warnings.""" + mock_exit_stack = AsyncMock() + mock_exit_stack.aclose.side_effect = RuntimeError("something else") + # Should not raise + await safe_mcp_exit_stack_close(mock_exit_stack) + mock_exit_stack.aclose.assert_called_once() + + @pytest.mark.asyncio + async def test_suppresses_other_exceptions(self): + """Test that other exceptions during close are suppressed.""" + mock_exit_stack = AsyncMock() + mock_exit_stack.aclose.side_effect = OSError("connection reset") + # Should not raise + await safe_mcp_exit_stack_close(mock_exit_stack) + mock_exit_stack.aclose.assert_called_once() + + @pytest.mark.asyncio + async def test_suppresses_cancel_scope_wrapped_in_exception_group(self): + """Test that a BaseExceptionGroup wrapping a cancel scope error is suppressed.""" + mock_exit_stack = AsyncMock() + inner = RuntimeError( + "Attempted to exit cancel scope in a different task" + ) + mock_exit_stack.aclose.side_effect = BaseExceptionGroup("errors", [inner]) + # Should not raise + await safe_mcp_exit_stack_close(mock_exit_stack) + mock_exit_stack.aclose.assert_called_once() + + +class TestIsCancelScopeError: + """Test suite for _is_cancel_scope_error helper.""" + + def test_matches_cancel_scope_runtime_error(self): + assert _is_cancel_scope_error( + RuntimeError("Attempted to exit cancel scope in a different task") + ) + + def test_rejects_unrelated_runtime_error(self): + assert not _is_cancel_scope_error(RuntimeError("something unrelated")) + + def test_rejects_non_runtime_error(self): + assert not _is_cancel_scope_error(ValueError("cancel scope in message")) + + def test_matches_wrapped_in_exception_group(self): + inner = RuntimeError("Attempted to exit cancel scope in a different task") + assert _is_cancel_scope_error(BaseExceptionGroup("errors", [inner])) + + def test_rejects_exception_group_without_cancel_scope(self): + assert not _is_cancel_scope_error( + BaseExceptionGroup("errors", [RuntimeError("unrelated")]) + ) From 2a839b8ebc0f9d4dec9f75d85c8435f5b1e6c412 Mon Sep 17 00:00:00 2001 From: "Lim, Ghim Boon" Date: Thu, 26 Feb 2026 13:19:48 +0800 Subject: [PATCH 2/5] fix: ruff --- backend/chainlit/session.py | 17 +++++++++++++---- backend/tests/test_session.py | 14 +++++++++++--- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/backend/chainlit/session.py b/backend/chainlit/session.py index 10c7a45134..5c128bcc6a 100644 --- a/backend/chainlit/session.py +++ b/backend/chainlit/session.py @@ -1,4 +1,5 @@ import asyncio +import builtins import json import mimetypes import re @@ -12,6 +13,8 @@ from chainlit.logger import logger from chainlit.types import AskFileSpec, FileReference +_BASE_EXCEPTION_GROUP = getattr(builtins, "BaseExceptionGroup", None) + async def safe_mcp_exit_stack_close(exit_stack: AsyncExitStack) -> None: """Close an MCP exit stack, suppressing cross-task cancel scope errors. @@ -31,7 +34,7 @@ async def safe_mcp_exit_stack_close(exit_stack: AsyncExitStack) -> None: """ try: await exit_stack.aclose() - except (RuntimeError, BaseExceptionGroup) as exc: + except RuntimeError as exc: if _is_cancel_scope_error(exc): logger.debug( "Suppressed cross-task cancel scope error during MCP cleanup: %s", @@ -41,8 +44,14 @@ async def safe_mcp_exit_stack_close(exit_stack: AsyncExitStack) -> None: logger.warning( "Error closing MCP exit stack: %s", exc, exc_info=True ) - except Exception: - logger.debug("Error closing MCP exit stack", exc_info=True) + except Exception as exc: + if _is_cancel_scope_error(exc): + logger.debug( + "Suppressed cross-task cancel scope error during MCP cleanup: %s", + exc, + ) + else: + logger.debug("Error closing MCP exit stack", exc_info=True) def _is_cancel_scope_error(exc: BaseException) -> bool: @@ -54,7 +63,7 @@ def _is_cancel_scope_error(exc: BaseException) -> bool: """ if isinstance(exc, RuntimeError): return "cancel scope" in str(exc) - if isinstance(exc, BaseExceptionGroup): + if _BASE_EXCEPTION_GROUP and isinstance(exc, _BASE_EXCEPTION_GROUP): return any(_is_cancel_scope_error(e) for e in exc.exceptions) return False diff --git a/backend/tests/test_session.py b/backend/tests/test_session.py index 9ca5a3fdce..27d845cfe5 100644 --- a/backend/tests/test_session.py +++ b/backend/tests/test_session.py @@ -1,3 +1,4 @@ +import builtins import json import tempfile import uuid @@ -17,6 +18,13 @@ ) +def make_exception_group(message: str, exceptions: list[BaseException]): + base_exception_group = getattr(builtins, "BaseExceptionGroup", None) + if not base_exception_group: + pytest.skip("BaseExceptionGroup is unavailable on this Python version") + return base_exception_group(message, exceptions) + + class TestJSONEncoderIgnoreNonSerializable: """Test suite for JSONEncoderIgnoreNonSerializable.""" @@ -697,7 +705,7 @@ async def test_suppresses_cancel_scope_wrapped_in_exception_group(self): inner = RuntimeError( "Attempted to exit cancel scope in a different task" ) - mock_exit_stack.aclose.side_effect = BaseExceptionGroup("errors", [inner]) + mock_exit_stack.aclose.side_effect = make_exception_group("errors", [inner]) # Should not raise await safe_mcp_exit_stack_close(mock_exit_stack) mock_exit_stack.aclose.assert_called_once() @@ -719,9 +727,9 @@ def test_rejects_non_runtime_error(self): def test_matches_wrapped_in_exception_group(self): inner = RuntimeError("Attempted to exit cancel scope in a different task") - assert _is_cancel_scope_error(BaseExceptionGroup("errors", [inner])) + assert _is_cancel_scope_error(make_exception_group("errors", [inner])) def test_rejects_exception_group_without_cancel_scope(self): assert not _is_cancel_scope_error( - BaseExceptionGroup("errors", [RuntimeError("unrelated")]) + make_exception_group("errors", [RuntimeError("unrelated")]) ) From 66f6c447b4f269ab811f9136d3cf9cf836f07618 Mon Sep 17 00:00:00 2001 From: "Lim, Ghim Boon" Date: Thu, 26 Feb 2026 13:23:48 +0800 Subject: [PATCH 3/5] fix: cubic recommended changes --- backend/chainlit/server.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/chainlit/server.py b/backend/chainlit/server.py index c923f04905..c4dd6ba656 100644 --- a/backend/chainlit/server.py +++ b/backend/chainlit/server.py @@ -1417,14 +1417,14 @@ async def connect_mcp( # Initialize the session await mcp_session.initialize() - # Store the session - session.mcp_sessions[mcp_connection.name] = (mcp_session, exit_stack) - exit_stack_stored = True - # Call the callback if config.code.on_mcp_connect: await config.code.on_mcp_connect(mcp_connection, mcp_session) + # Store the session + session.mcp_sessions[mcp_connection.name] = (mcp_session, exit_stack) + exit_stack_stored = True + except Exception as e: raise HTTPException( status_code=400, From 54e4d024fb52bbdc07daf665349e7dd1a4dd0aae Mon Sep 17 00:00:00 2001 From: "Lim, Ghim Boon" Date: Thu, 26 Feb 2026 13:28:28 +0800 Subject: [PATCH 4/5] fix: ruff --- backend/chainlit/session.py | 5 ++--- backend/tests/test_session.py | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/backend/chainlit/session.py b/backend/chainlit/session.py index 5c128bcc6a..409906382c 100644 --- a/backend/chainlit/session.py +++ b/backend/chainlit/session.py @@ -41,9 +41,7 @@ async def safe_mcp_exit_stack_close(exit_stack: AsyncExitStack) -> None: exc, ) else: - logger.warning( - "Error closing MCP exit stack: %s", exc, exc_info=True - ) + logger.warning("Error closing MCP exit stack: %s", exc, exc_info=True) except Exception as exc: if _is_cancel_scope_error(exc): logger.debug( @@ -67,6 +65,7 @@ def _is_cancel_scope_error(exc: BaseException) -> bool: return any(_is_cancel_scope_error(e) for e in exc.exceptions) return False + if TYPE_CHECKING: from mcp import ClientSession diff --git a/backend/tests/test_session.py b/backend/tests/test_session.py index 27d845cfe5..7672d7223a 100644 --- a/backend/tests/test_session.py +++ b/backend/tests/test_session.py @@ -702,9 +702,7 @@ async def test_suppresses_other_exceptions(self): async def test_suppresses_cancel_scope_wrapped_in_exception_group(self): """Test that a BaseExceptionGroup wrapping a cancel scope error is suppressed.""" mock_exit_stack = AsyncMock() - inner = RuntimeError( - "Attempted to exit cancel scope in a different task" - ) + inner = RuntimeError("Attempted to exit cancel scope in a different task") mock_exit_stack.aclose.side_effect = make_exception_group("errors", [inner]) # Should not raise await safe_mcp_exit_stack_close(mock_exit_stack) From c4bb6e67aa174fe7994a96c0fef5ed0687e72bb1 Mon Sep 17 00:00:00 2001 From: "Lim, Ghim Boon" Date: Thu, 26 Feb 2026 13:58:15 +0800 Subject: [PATCH 5/5] fix: copilot recommendations --- backend/chainlit/session.py | 7 +++++-- backend/tests/test_session.py | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/backend/chainlit/session.py b/backend/chainlit/session.py index 409906382c..53bf2bed62 100644 --- a/backend/chainlit/session.py +++ b/backend/chainlit/session.py @@ -57,12 +57,15 @@ def _is_cancel_scope_error(exc: BaseException) -> bool: Handles both a bare RuntimeError and a BaseExceptionGroup wrapping one (as produced by anyio's TaskGroup when the streamable-http transport - tears down). + tears down). Only treats a group as a cancel-scope error when *all* + contained exceptions match, so mixed groups surface real failures. """ if isinstance(exc, RuntimeError): return "cancel scope" in str(exc) if _BASE_EXCEPTION_GROUP and isinstance(exc, _BASE_EXCEPTION_GROUP): - return any(_is_cancel_scope_error(e) for e in exc.exceptions) + return bool(exc.exceptions) and all( + _is_cancel_scope_error(e) for e in exc.exceptions + ) return False diff --git a/backend/tests/test_session.py b/backend/tests/test_session.py index 7672d7223a..0532fb2dd3 100644 --- a/backend/tests/test_session.py +++ b/backend/tests/test_session.py @@ -731,3 +731,10 @@ def test_rejects_exception_group_without_cancel_scope(self): assert not _is_cancel_scope_error( make_exception_group("errors", [RuntimeError("unrelated")]) ) + + def test_rejects_mixed_exception_group(self): + cancel = RuntimeError("Attempted to exit cancel scope in a different task") + other = RuntimeError("unrelated failure") + assert not _is_cancel_scope_error( + make_exception_group("errors", [cancel, other]) + )