From 3d5b511022baabeef78d7e0a108e8ff164e7bd8f Mon Sep 17 00:00:00 2001 From: giwaov Date: Mon, 23 Mar 2026 19:01:28 +0100 Subject: [PATCH 1/5] feat: support structured outputs (response_format) in chat completions Wire the OpenAI-compatible response_format parameter through the chat completion pipeline: - Bind response_format to LangChain model via model.bind() for json_object and json_schema types (text is a no-op) - Apply to both streaming and non-streaming code paths - Include response_format in the canonical request dict so TEE hashing covers the requested output format - Add 14 unit tests covering parsing, hash-dict serialization, model binding, and interaction with tool calling Closes #14 --- tee_gateway/controllers/chat_controller.py | 18 ++ tests/test_structured_outputs.py | 313 +++++++++++++++++++++ 2 files changed, 331 insertions(+) create mode 100644 tests/test_structured_outputs.py diff --git a/tee_gateway/controllers/chat_controller.py b/tee_gateway/controllers/chat_controller.py index aed4e4e..af0ff53 100644 --- a/tee_gateway/controllers/chat_controller.py +++ b/tee_gateway/controllers/chat_controller.py @@ -82,6 +82,14 @@ def _create_non_streaming_response(chat_request: CreateChatCompletionRequest): tools_list.append(tool) model = model.bind_tools(tools_list) + # Bind response_format if provided (json_object or json_schema) + if chat_request.response_format: + rf = chat_request.response_format + rf_type = rf.get("type", "text") if isinstance(rf, dict) else getattr(rf, "type", "text") + if rf_type != "text": + rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} + model = model.bind(response_format=rf_dict) + langchain_messages = convert_messages(chat_request.messages) response = model.invoke(langchain_messages) @@ -196,6 +204,14 @@ def _create_streaming_response(chat_request: CreateChatCompletionRequest): tools_list.append(tool) model = model.bind_tools(tools_list) + # Bind response_format if provided (json_object or json_schema) + if chat_request.response_format: + rf = chat_request.response_format + rf_type = rf.get("type", "text") if isinstance(rf, dict) else getattr(rf, "type", "text") + if rf_type != "text": + rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} + model = model.bind(response_format=rf_dict) + langchain_messages = convert_messages(chat_request.messages) tee_keys = get_tee_keys() @@ -481,6 +497,8 @@ def _chat_request_to_dict(chat_request: CreateChatCompletionRequest) -> dict: if isinstance(chat_request.tools, list) else list(chat_request.tools) ) + if chat_request.response_format: + d["response_format"] = chat_request.response_format return d diff --git a/tests/test_structured_outputs.py b/tests/test_structured_outputs.py new file mode 100644 index 0000000..442c5fb --- /dev/null +++ b/tests/test_structured_outputs.py @@ -0,0 +1,313 @@ +import json +import unittest +from unittest.mock import patch, MagicMock + +from tee_gateway.controllers.chat_controller import ( + _parse_chat_request as parse_chat_request, + _chat_request_to_dict as chat_request_to_dict, +) +from tee_gateway.models.create_chat_completion_request import ( + CreateChatCompletionRequest, +) + + +class TestResponseFormatParsing(unittest.TestCase): + """Tests for response_format parsing from request dicts.""" + + def _base_request(self, **overrides): + d = { + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hello"}], + } + d.update(overrides) + return d + + def test_no_response_format(self): + req = parse_chat_request(self._base_request()) + self.assertIsNone(req.response_format) + + def test_text_response_format(self): + req = parse_chat_request(self._base_request(response_format={"type": "text"})) + self.assertEqual(req.response_format, {"type": "text"}) + + def test_json_object_response_format(self): + rf = {"type": "json_object"} + req = parse_chat_request(self._base_request(response_format=rf)) + self.assertEqual(req.response_format, {"type": "json_object"}) + + def test_json_schema_response_format(self): + rf = { + "type": "json_schema", + "json_schema": { + "name": "user_info", + "strict": True, + "schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + "additionalProperties": False, + }, + }, + } + req = parse_chat_request(self._base_request(response_format=rf)) + self.assertEqual(req.response_format["type"], "json_schema") + self.assertEqual(req.response_format["json_schema"]["name"], "user_info") + self.assertTrue(req.response_format["json_schema"]["strict"]) + + +class TestResponseFormatInHashDict(unittest.TestCase): + """Tests that response_format is included in the TEE hash dict.""" + + def _make_request(self, response_format=None): + return CreateChatCompletionRequest( + model="gpt-4o", + messages=[], + temperature=1.0, + response_format=response_format, + ) + + def test_no_response_format_omitted(self): + req = self._make_request() + d = chat_request_to_dict(req) + self.assertNotIn("response_format", d) + + def test_json_object_included(self): + req = self._make_request(response_format={"type": "json_object"}) + d = chat_request_to_dict(req) + self.assertIn("response_format", d) + self.assertEqual(d["response_format"]["type"], "json_object") + + def test_json_schema_included(self): + rf = { + "type": "json_schema", + "json_schema": { + "name": "math_answer", + "schema": {"type": "object", "properties": {"answer": {"type": "number"}}}, + }, + } + req = self._make_request(response_format=rf) + d = chat_request_to_dict(req) + self.assertEqual(d["response_format"], rf) + + def test_hash_deterministic_with_response_format(self): + rf = {"type": "json_object"} + req = self._make_request(response_format=rf) + d1 = json.dumps(chat_request_to_dict(req), sort_keys=True) + d2 = json.dumps(chat_request_to_dict(req), sort_keys=True) + self.assertEqual(d1, d2) + + def test_hash_differs_with_and_without_response_format(self): + req_plain = self._make_request() + req_json = self._make_request(response_format={"type": "json_object"}) + h1 = json.dumps(chat_request_to_dict(req_plain), sort_keys=True) + h2 = json.dumps(chat_request_to_dict(req_json), sort_keys=True) + self.assertNotEqual(h1, h2) + + +class TestResponseFormatModelBinding(unittest.TestCase): + """Tests that response_format is bound to the model before invocation.""" + + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_json_object_binds_to_model(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): + from tee_gateway.controllers.chat_controller import _create_non_streaming_response + + mock_model = MagicMock() + mock_bound = MagicMock() + mock_model.bind.return_value = mock_bound + mock_get_model.return_value = mock_model + + mock_response = MagicMock() + mock_response.content = "test" + mock_response.tool_calls = None + mock_bound.invoke.return_value = mock_response + + mock_convert.return_value = [] + mock_hash.return_value = (b"hash", "input_hex", "output_hex") + mock_keys = MagicMock() + mock_keys.sign_data.return_value = "sig" + mock_keys.get_tee_id.return_value = "abc" + mock_tee_keys.return_value = mock_keys + + req = CreateChatCompletionRequest( + model="gpt-4o", + messages=[], + temperature=1.0, + response_format={"type": "json_object"}, + ) + + _create_non_streaming_response(req) + + mock_model.bind.assert_called_once_with(response_format={"type": "json_object"}) + mock_bound.invoke.assert_called_once() + + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_text_format_does_not_bind(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): + from tee_gateway.controllers.chat_controller import _create_non_streaming_response + + mock_model = MagicMock() + mock_get_model.return_value = mock_model + + mock_response = MagicMock() + mock_response.content = "test" + mock_response.tool_calls = None + mock_model.invoke.return_value = mock_response + + mock_convert.return_value = [] + mock_hash.return_value = (b"hash", "input_hex", "output_hex") + mock_keys = MagicMock() + mock_keys.sign_data.return_value = "sig" + mock_keys.get_tee_id.return_value = "abc" + mock_tee_keys.return_value = mock_keys + + req = CreateChatCompletionRequest( + model="gpt-4o", + messages=[], + temperature=1.0, + response_format={"type": "text"}, + ) + + _create_non_streaming_response(req) + + mock_model.bind.assert_not_called() + mock_model.invoke.assert_called_once() + + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_no_format_does_not_bind(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): + from tee_gateway.controllers.chat_controller import _create_non_streaming_response + + mock_model = MagicMock() + mock_get_model.return_value = mock_model + + mock_response = MagicMock() + mock_response.content = "result" + mock_response.tool_calls = None + mock_model.invoke.return_value = mock_response + + mock_convert.return_value = [] + mock_hash.return_value = (b"hash", "input_hex", "output_hex") + mock_keys = MagicMock() + mock_keys.sign_data.return_value = "sig" + mock_keys.get_tee_id.return_value = "abc" + mock_tee_keys.return_value = mock_keys + + req = CreateChatCompletionRequest( + model="gpt-4o", + messages=[], + temperature=1.0, + ) + + _create_non_streaming_response(req) + + mock_model.bind.assert_not_called() + + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_json_schema_binds_full_schema(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): + from tee_gateway.controllers.chat_controller import _create_non_streaming_response + + mock_model = MagicMock() + mock_bound = MagicMock() + mock_model.bind.return_value = mock_bound + mock_get_model.return_value = mock_model + + mock_response = MagicMock() + mock_response.content = '{"name": "Alice", "age": 30}' + mock_response.tool_calls = None + mock_bound.invoke.return_value = mock_response + + mock_convert.return_value = [] + mock_hash.return_value = (b"hash", "input_hex", "output_hex") + mock_keys = MagicMock() + mock_keys.sign_data.return_value = "sig" + mock_keys.get_tee_id.return_value = "abc" + mock_tee_keys.return_value = mock_keys + + rf = { + "type": "json_schema", + "json_schema": { + "name": "user_info", + "strict": True, + "schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + "additionalProperties": False, + }, + }, + } + + req = CreateChatCompletionRequest( + model="gpt-4o", + messages=[], + temperature=1.0, + response_format=rf, + ) + + _create_non_streaming_response(req) + + mock_model.bind.assert_called_once_with(response_format=rf) + + +class TestResponseFormatWithTools(unittest.TestCase): + """Tests that response_format works alongside tool binding.""" + + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_tools_and_response_format_both_bind(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): + from tee_gateway.controllers.chat_controller import _create_non_streaming_response + + mock_model = MagicMock() + mock_after_tools = MagicMock() + mock_after_format = MagicMock() + mock_model.bind_tools.return_value = mock_after_tools + mock_after_tools.bind.return_value = mock_after_format + mock_get_model.return_value = mock_model + + mock_response = MagicMock() + mock_response.content = '{"result": 42}' + mock_response.tool_calls = None + mock_after_format.invoke.return_value = mock_response + + mock_convert.return_value = [] + mock_hash.return_value = (b"hash", "input_hex", "output_hex") + mock_keys = MagicMock() + mock_keys.sign_data.return_value = "sig" + mock_keys.get_tee_id.return_value = "abc" + mock_tee_keys.return_value = mock_keys + + req = CreateChatCompletionRequest( + model="gpt-4o", + messages=[], + temperature=1.0, + tools=[{"type": "function", "function": {"name": "calc", "parameters": {}}}], + response_format={"type": "json_object"}, + ) + + _create_non_streaming_response(req) + + mock_model.bind_tools.assert_called_once() + mock_after_tools.bind.assert_called_once_with(response_format={"type": "json_object"}) + mock_after_format.invoke.assert_called_once() + + +if __name__ == "__main__": + unittest.main() From bcdaf3b0ef18add82fd9a44a76830b6fb5c05502 Mon Sep 17 00:00:00 2001 From: giwaov Date: Fri, 27 Mar 2026 10:38:54 +0100 Subject: [PATCH 2/5] style: format files with ruff --- tee_gateway/controllers/chat_controller.py | 12 ++++- tests/test_structured_outputs.py | 53 ++++++++++++++++------ 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/tee_gateway/controllers/chat_controller.py b/tee_gateway/controllers/chat_controller.py index af0ff53..004f5be 100644 --- a/tee_gateway/controllers/chat_controller.py +++ b/tee_gateway/controllers/chat_controller.py @@ -85,7 +85,11 @@ def _create_non_streaming_response(chat_request: CreateChatCompletionRequest): # Bind response_format if provided (json_object or json_schema) if chat_request.response_format: rf = chat_request.response_format - rf_type = rf.get("type", "text") if isinstance(rf, dict) else getattr(rf, "type", "text") + rf_type = ( + rf.get("type", "text") + if isinstance(rf, dict) + else getattr(rf, "type", "text") + ) if rf_type != "text": rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} model = model.bind(response_format=rf_dict) @@ -207,7 +211,11 @@ def _create_streaming_response(chat_request: CreateChatCompletionRequest): # Bind response_format if provided (json_object or json_schema) if chat_request.response_format: rf = chat_request.response_format - rf_type = rf.get("type", "text") if isinstance(rf, dict) else getattr(rf, "type", "text") + rf_type = ( + rf.get("type", "text") + if isinstance(rf, dict) + else getattr(rf, "type", "text") + ) if rf_type != "text": rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} model = model.bind(response_format=rf_dict) diff --git a/tests/test_structured_outputs.py b/tests/test_structured_outputs.py index 442c5fb..475e657 100644 --- a/tests/test_structured_outputs.py +++ b/tests/test_structured_outputs.py @@ -85,7 +85,10 @@ def test_json_schema_included(self): "type": "json_schema", "json_schema": { "name": "math_answer", - "schema": {"type": "object", "properties": {"answer": {"type": "number"}}}, + "schema": { + "type": "object", + "properties": {"answer": {"type": "number"}}, + }, }, } req = self._make_request(response_format=rf) @@ -114,8 +117,12 @@ class TestResponseFormatModelBinding(unittest.TestCase): @patch("tee_gateway.controllers.chat_controller.get_tee_keys") @patch("tee_gateway.controllers.chat_controller.convert_messages") @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") - def test_json_object_binds_to_model(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): - from tee_gateway.controllers.chat_controller import _create_non_streaming_response + def test_json_object_binds_to_model( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash + ): + from tee_gateway.controllers.chat_controller import ( + _create_non_streaming_response, + ) mock_model = MagicMock() mock_bound = MagicMock() @@ -150,8 +157,12 @@ def test_json_object_binds_to_model(self, mock_get_model, mock_convert, mock_tee @patch("tee_gateway.controllers.chat_controller.get_tee_keys") @patch("tee_gateway.controllers.chat_controller.convert_messages") @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") - def test_text_format_does_not_bind(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): - from tee_gateway.controllers.chat_controller import _create_non_streaming_response + def test_text_format_does_not_bind( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash + ): + from tee_gateway.controllers.chat_controller import ( + _create_non_streaming_response, + ) mock_model = MagicMock() mock_get_model.return_value = mock_model @@ -184,8 +195,12 @@ def test_text_format_does_not_bind(self, mock_get_model, mock_convert, mock_tee_ @patch("tee_gateway.controllers.chat_controller.get_tee_keys") @patch("tee_gateway.controllers.chat_controller.convert_messages") @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") - def test_no_format_does_not_bind(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): - from tee_gateway.controllers.chat_controller import _create_non_streaming_response + def test_no_format_does_not_bind( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash + ): + from tee_gateway.controllers.chat_controller import ( + _create_non_streaming_response, + ) mock_model = MagicMock() mock_get_model.return_value = mock_model @@ -216,8 +231,12 @@ def test_no_format_does_not_bind(self, mock_get_model, mock_convert, mock_tee_ke @patch("tee_gateway.controllers.chat_controller.get_tee_keys") @patch("tee_gateway.controllers.chat_controller.convert_messages") @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") - def test_json_schema_binds_full_schema(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): - from tee_gateway.controllers.chat_controller import _create_non_streaming_response + def test_json_schema_binds_full_schema( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash + ): + from tee_gateway.controllers.chat_controller import ( + _create_non_streaming_response, + ) mock_model = MagicMock() mock_bound = MagicMock() @@ -272,8 +291,12 @@ class TestResponseFormatWithTools(unittest.TestCase): @patch("tee_gateway.controllers.chat_controller.get_tee_keys") @patch("tee_gateway.controllers.chat_controller.convert_messages") @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") - def test_tools_and_response_format_both_bind(self, mock_get_model, mock_convert, mock_tee_keys, mock_hash): - from tee_gateway.controllers.chat_controller import _create_non_streaming_response + def test_tools_and_response_format_both_bind( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash + ): + from tee_gateway.controllers.chat_controller import ( + _create_non_streaming_response, + ) mock_model = MagicMock() mock_after_tools = MagicMock() @@ -298,14 +321,18 @@ def test_tools_and_response_format_both_bind(self, mock_get_model, mock_convert, model="gpt-4o", messages=[], temperature=1.0, - tools=[{"type": "function", "function": {"name": "calc", "parameters": {}}}], + tools=[ + {"type": "function", "function": {"name": "calc", "parameters": {}}} + ], response_format={"type": "json_object"}, ) _create_non_streaming_response(req) mock_model.bind_tools.assert_called_once() - mock_after_tools.bind.assert_called_once_with(response_format={"type": "json_object"}) + mock_after_tools.bind.assert_called_once_with( + response_format={"type": "json_object"} + ) mock_after_format.invoke.assert_called_once() From 6e0a44884fa2bce02f036c33fbbe6ba78633bc1b Mon Sep 17 00:00:00 2001 From: kylexqian Date: Thu, 2 Apr 2026 02:50:29 -0700 Subject: [PATCH 3/5] fix: Anthropic structured output via with_structured_output + streaming tests - Route Anthropic json_schema requests through with_structured_output() instead of bind(), which Anthropic does not support for response_format. Raise a clear error for json_object (no Anthropic native equivalent). - Inject the json_schema wrapper 'name' as 'title' in the schema dict so LangChain-Anthropic can derive a function name for its tool-use mechanism. - Handle Anthropic structured output in the streaming path by invoking synchronously and emitting the result as a single SSE content chunk. - Fix OpenAPI spec: remove 'type' from required in ResponseFormatJsonSchema so json_schema requests pass connexion validation. - Fix pre-existing test breakage: gpt-4o -> gpt-4.1 (model removed from registry). - Add streaming tests: binding behaviour for all providers, Anthropic SSE chunk output, and TEE hash content correctness. Co-Authored-By: Claude Sonnet 4.6 --- tee_gateway/controllers/chat_controller.py | 104 +++++- tee_gateway/openapi/openapi.yaml | 1 - tests/test_structured_outputs.py | 374 ++++++++++++++++++++- 3 files changed, 465 insertions(+), 14 deletions(-) diff --git a/tee_gateway/controllers/chat_controller.py b/tee_gateway/controllers/chat_controller.py index 004f5be..5b1280a 100644 --- a/tee_gateway/controllers/chat_controller.py +++ b/tee_gateway/controllers/chat_controller.py @@ -21,6 +21,8 @@ ChatCompletionRequestFunctionMessage, ) +from langchain_core.messages import AIMessage + from tee_gateway.tee_manager import get_tee_keys, compute_tee_msg_hash from tee_gateway.llm_backend import ( get_provider_from_model, @@ -50,6 +52,47 @@ def create_chat_completion(body): return _create_non_streaming_response(chat_request) +def _invoke_anthropic_structured( + model, rf: dict, langchain_messages: list +) -> AIMessage: + """ + Use LangChain's with_structured_output() for Anthropic structured output. + + Anthropic does not support response_format via bind(). For json_schema, we use + with_structured_output(schema, method="json_schema") which calls Anthropic's + native structured output API. The parsed dict result is re-wrapped as an AIMessage + so all downstream signing/response-building code stays unchanged. + + json_object (no schema) has no Anthropic native equivalent — callers should use + json_schema with an explicit schema instead. + """ + rf_type = rf.get("type", "text") + if rf_type != "json_schema": + raise ValueError( + f"response_format type '{rf_type}' is not natively supported by Anthropic. " + "Use json_schema with an explicit schema instead." + ) + + schema_obj = rf.get("json_schema", {}) + schema_def = schema_obj.get("schema", {}) + name = schema_obj.get("name", "output") + strict = schema_obj.get("strict", False) + + # LangChain-Anthropic derives the tool function name from the schema's "title" key. + # The OpenAI-compatible json_schema wrapper puts this as "name" one level up, so + # we inject it into the schema dict if it isn't already there. + if "title" not in schema_def: + schema_def = {**schema_def, "title": name} + + structured = model.with_structured_output( + schema_def, method="json_schema", strict=strict + ) + result = structured.invoke(langchain_messages) + + content_str = json.dumps(result) if isinstance(result, dict) else str(result) + return AIMessage(content=content_str) + + def _create_non_streaming_response(chat_request: CreateChatCompletionRequest): """Handle non-streaming chat completion via direct LangChain call.""" try: @@ -82,7 +125,11 @@ def _create_non_streaming_response(chat_request: CreateChatCompletionRequest): tools_list.append(tool) model = model.bind_tools(tools_list) - # Bind response_format if provided (json_object or json_schema) + # Bind response_format if provided (json_object or json_schema). + # Anthropic does not support response_format via bind(); use + # with_structured_output() for json_schema instead (json_object has no + # Anthropic native equivalent and raises a clear error). + rf_dict: dict | None = None if chat_request.response_format: rf = chat_request.response_format rf_type = ( @@ -92,10 +139,14 @@ def _create_non_streaming_response(chat_request: CreateChatCompletionRequest): ) if rf_type != "text": rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} - model = model.bind(response_format=rf_dict) + if get_provider_from_model(chat_request.model) != "anthropic": + model = model.bind(response_format=rf_dict) langchain_messages = convert_messages(chat_request.messages) - response = model.invoke(langchain_messages) + if rf_dict and get_provider_from_model(chat_request.model) == "anthropic": + response = _invoke_anthropic_structured(model, rf_dict, langchain_messages) + else: + response = model.invoke(langchain_messages) # Normalize content (Gemini may return a list of content parts) if isinstance(response.content, list): @@ -208,7 +259,11 @@ def _create_streaming_response(chat_request: CreateChatCompletionRequest): tools_list.append(tool) model = model.bind_tools(tools_list) - # Bind response_format if provided (json_object or json_schema) + # Bind response_format if provided (json_object or json_schema). + # Anthropic does not support response_format via bind(); use + # with_structured_output() for json_schema instead (json_object has no + # Anthropic native equivalent and raises a clear error). + anthropic_structured_rf: dict | None = None if chat_request.response_format: rf = chat_request.response_format rf_type = ( @@ -218,11 +273,30 @@ def _create_streaming_response(chat_request: CreateChatCompletionRequest): ) if rf_type != "text": rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} - model = model.bind(response_format=rf_dict) + if provider == "anthropic": + anthropic_structured_rf = rf_dict + else: + model = model.bind(response_format=rf_dict) langchain_messages = convert_messages(chat_request.messages) tee_keys = get_tee_keys() + # For Anthropic structured output, with_structured_output() invokes + # synchronously and returns a complete dict — streaming partial JSON is + # not meaningful for schema-validated output. We invoke once and emit the + # full result as a single content chunk inside the SSE stream. + if anthropic_structured_rf is not None: + ai_msg = _invoke_anthropic_structured( + model, anthropic_structured_rf, langchain_messages + ) + anthropic_structured_content: str | None = ( + ai_msg.content + if isinstance(ai_msg.content, str) + else json.dumps(ai_msg.content) + ) + else: + anthropic_structured_content = None + def generate(): full_content = "" final_usage = None @@ -230,7 +304,25 @@ def generate(): finish_reason = "stop" try: - for chunk in model.stream(langchain_messages): + if anthropic_structured_content is not None: + # Emit the pre-computed structured result as a single chunk + full_content = anthropic_structured_content + data = { + "choices": [ + { + "delta": {"content": full_content, "role": "assistant"}, + "index": 0, + "finish_reason": None, + } + ], + "model": chat_request.model, + } + yield f"data: {json.dumps(data)}\n\n" + chunks_iter: list = [] + else: + chunks_iter = model.stream(langchain_messages) # type: ignore[assignment] + + for chunk in chunks_iter: # --- Text content --- if chunk.content: if isinstance(chunk.content, str): diff --git a/tee_gateway/openapi/openapi.yaml b/tee_gateway/openapi/openapi.yaml index 7c95073..0db3c22 100644 --- a/tee_gateway/openapi/openapi.yaml +++ b/tee_gateway/openapi/openapi.yaml @@ -18214,7 +18214,6 @@ components: type: boolean required: - name - - type title: ResponseFormatJsonSchema_json_schema type: object RunObject_required_action_submit_tool_outputs: diff --git a/tests/test_structured_outputs.py b/tests/test_structured_outputs.py index 475e657..dda281b 100644 --- a/tests/test_structured_outputs.py +++ b/tests/test_structured_outputs.py @@ -16,7 +16,7 @@ class TestResponseFormatParsing(unittest.TestCase): def _base_request(self, **overrides): d = { - "model": "gpt-4o", + "model": "gpt-4.1", "messages": [{"role": "user", "content": "Hello"}], } d.update(overrides) @@ -63,7 +63,7 @@ class TestResponseFormatInHashDict(unittest.TestCase): def _make_request(self, response_format=None): return CreateChatCompletionRequest( - model="gpt-4o", + model="gpt-4.1", messages=[], temperature=1.0, response_format=response_format, @@ -142,7 +142,7 @@ def test_json_object_binds_to_model( mock_tee_keys.return_value = mock_keys req = CreateChatCompletionRequest( - model="gpt-4o", + model="gpt-4.1", messages=[], temperature=1.0, response_format={"type": "json_object"}, @@ -180,7 +180,7 @@ def test_text_format_does_not_bind( mock_tee_keys.return_value = mock_keys req = CreateChatCompletionRequest( - model="gpt-4o", + model="gpt-4.1", messages=[], temperature=1.0, response_format={"type": "text"}, @@ -218,7 +218,7 @@ def test_no_format_does_not_bind( mock_tee_keys.return_value = mock_keys req = CreateChatCompletionRequest( - model="gpt-4o", + model="gpt-4.1", messages=[], temperature=1.0, ) @@ -273,7 +273,7 @@ def test_json_schema_binds_full_schema( } req = CreateChatCompletionRequest( - model="gpt-4o", + model="gpt-4.1", messages=[], temperature=1.0, response_format=rf, @@ -318,7 +318,7 @@ def test_tools_and_response_format_both_bind( mock_tee_keys.return_value = mock_keys req = CreateChatCompletionRequest( - model="gpt-4o", + model="gpt-4.1", messages=[], temperature=1.0, tools=[ @@ -336,5 +336,365 @@ def test_tools_and_response_format_both_bind( mock_after_format.invoke.assert_called_once() +class TestAnthropicTitleInjection(unittest.TestCase): + """Tests that the schema 'name' is injected as 'title' for LangChain-Anthropic.""" + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_name_injected_as_title( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash, mock_provider + ): + """Schema 'name' from json_schema wrapper is added as 'title' in the schema dict.""" + from tee_gateway.controllers.chat_controller import _invoke_anthropic_structured + from langchain_core.messages import AIMessage + + mock_model = MagicMock() + mock_structured = MagicMock() + mock_model.with_structured_output.return_value = mock_structured + mock_structured.invoke.return_value = {"name": "Alice", "age": 30} + + rf = { + "type": "json_schema", + "json_schema": { + "name": "person", + "strict": True, + "schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + "additionalProperties": False, + # NOTE: no "title" key here — must be injected from "name" + }, + }, + } + + result = _invoke_anthropic_structured(mock_model, rf, []) + + called_schema = mock_model.with_structured_output.call_args[0][0] + self.assertEqual(called_schema["title"], "person") + self.assertIsInstance(result, AIMessage) + self.assertEqual(json.loads(result.content), {"name": "Alice", "age": 30}) + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_existing_title_not_overwritten( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash, mock_provider + ): + """If the schema already has a 'title', it is left untouched.""" + from tee_gateway.controllers.chat_controller import _invoke_anthropic_structured + + mock_model = MagicMock() + mock_structured = MagicMock() + mock_model.with_structured_output.return_value = mock_structured + mock_structured.invoke.return_value = {"x": 1} + + rf = { + "type": "json_schema", + "json_schema": { + "name": "wrapper_name", + "schema": { + "type": "object", + "title": "existing_title", + "properties": {"x": {"type": "integer"}}, + }, + }, + } + + _invoke_anthropic_structured(mock_model, rf, []) + + called_schema = mock_model.with_structured_output.call_args[0][0] + self.assertEqual(called_schema["title"], "existing_title") + + def test_json_object_raises_for_anthropic(self): + """json_object raises a clear ValueError for Anthropic.""" + from tee_gateway.controllers.chat_controller import _invoke_anthropic_structured + + mock_model = MagicMock() + with self.assertRaises(ValueError) as ctx: + _invoke_anthropic_structured(mock_model, {"type": "json_object"}, []) + self.assertIn("json_object", str(ctx.exception)) + self.assertIn("json_schema", str(ctx.exception)) + + +class TestStreamingResponseFormatBinding(unittest.TestCase): + """Tests that response_format is bound correctly in the streaming path.""" + + def _base_streaming_request(self, model="gpt-4.1", response_format=None): + return CreateChatCompletionRequest( + model=model, + messages=[], + temperature=1.0, + stream=True, + response_format=response_format, + ) + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_json_object_binds_in_streaming( + self, mock_get_model, mock_convert, mock_provider + ): + """json_object is bound to the model in the streaming path.""" + from tee_gateway.controllers.chat_controller import _create_streaming_response + + mock_provider.return_value = "openai" + mock_model = MagicMock() + mock_bound = MagicMock() + mock_model.bind.return_value = mock_bound + mock_get_model.return_value = mock_model + mock_convert.return_value = [] + + _create_streaming_response( + self._base_streaming_request(response_format={"type": "json_object"}) + ) + + mock_model.bind.assert_called_once_with(response_format={"type": "json_object"}) + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_text_format_does_not_bind_in_streaming( + self, mock_get_model, mock_convert, mock_provider + ): + """text format skips binding in the streaming path.""" + from tee_gateway.controllers.chat_controller import _create_streaming_response + + mock_provider.return_value = "openai" + mock_model = MagicMock() + mock_get_model.return_value = mock_model + mock_convert.return_value = [] + + _create_streaming_response( + self._base_streaming_request(response_format={"type": "text"}) + ) + + mock_model.bind.assert_not_called() + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_no_format_does_not_bind_in_streaming( + self, mock_get_model, mock_convert, mock_provider + ): + """Omitting response_format skips binding in the streaming path.""" + from tee_gateway.controllers.chat_controller import _create_streaming_response + + mock_provider.return_value = "openai" + mock_model = MagicMock() + mock_get_model.return_value = mock_model + mock_convert.return_value = [] + + _create_streaming_response(self._base_streaming_request()) + + mock_model.bind.assert_not_called() + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_json_schema_binds_full_schema_in_streaming( + self, mock_get_model, mock_convert, mock_provider + ): + """The full json_schema dict is bound to the model in the streaming path.""" + from tee_gateway.controllers.chat_controller import _create_streaming_response + + mock_provider.return_value = "openai" + mock_model = MagicMock() + mock_bound = MagicMock() + mock_model.bind.return_value = mock_bound + mock_get_model.return_value = mock_model + mock_convert.return_value = [] + + rf = { + "type": "json_schema", + "json_schema": { + "name": "person", + "strict": True, + "schema": { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + "additionalProperties": False, + }, + }, + } + _create_streaming_response(self._base_streaming_request(response_format=rf)) + + mock_model.bind.assert_called_once_with(response_format=rf) + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_anthropic_does_not_bind_in_streaming( + self, mock_get_model, mock_convert, mock_provider + ): + """Anthropic models never call model.bind() — structured output goes via with_structured_output.""" + from tee_gateway.controllers.chat_controller import _create_streaming_response + from langchain_core.messages import AIMessage + + mock_provider.return_value = "anthropic" + mock_model = MagicMock() + mock_structured = MagicMock() + mock_model.with_structured_output.return_value = mock_structured + mock_structured.invoke.return_value = {"name": "Alice", "age": 30} + mock_get_model.return_value = mock_model + mock_convert.return_value = [] + + rf = { + "type": "json_schema", + "json_schema": { + "name": "person", + "schema": { + "type": "object", + "properties": {"name": {"type": "string"}}, + }, + }, + } + _create_streaming_response( + self._base_streaming_request(model="claude-sonnet-4-5", response_format=rf) + ) + + mock_model.bind.assert_not_called() + mock_model.with_structured_output.assert_called_once() + + +class TestStreamingAnthropicStructuredOutput(unittest.TestCase): + """Tests the SSE output of Anthropic structured output in the streaming path.""" + + def _consume_sse(self, response) -> list[dict]: + """Drain the SSE generator and return parsed data objects (skips [DONE]).""" + events = [] + for line in response.response: + if isinstance(line, bytes): + line = line.decode() + line = line.strip() + if line.startswith("data: "): + payload = line[6:] + if payload != "[DONE]": + events.append(json.loads(payload)) + return events + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_anthropic_structured_content_emitted_as_single_chunk( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash, mock_provider + ): + """Anthropic structured output is emitted as one complete content chunk.""" + from tee_gateway.controllers.chat_controller import _create_streaming_response + + mock_provider.return_value = "anthropic" + mock_model = MagicMock() + mock_structured = MagicMock() + mock_model.with_structured_output.return_value = mock_structured + mock_structured.invoke.return_value = {"name": "Alice", "age": 30} + mock_get_model.return_value = mock_model + mock_convert.return_value = [] + + mock_hash.return_value = (b"hash", "input_hex", "output_hex") + mock_keys = MagicMock() + mock_keys.sign_data.return_value = "sig" + mock_keys.get_tee_id.return_value = "abc" + mock_tee_keys.return_value = mock_keys + + rf = { + "type": "json_schema", + "json_schema": { + "name": "person", + "schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + "additionalProperties": False, + }, + }, + } + req = CreateChatCompletionRequest( + model="claude-sonnet-4-5", + messages=[], + temperature=1.0, + stream=True, + response_format=rf, + ) + + resp = _create_streaming_response(req) + events = self._consume_sse(resp) + + # First chunk carries the full structured content + content_chunk = events[0] + delta_content = content_chunk["choices"][0]["delta"]["content"] + self.assertEqual(json.loads(delta_content), {"name": "Alice", "age": 30}) + + # Final chunk carries the TEE signature fields + final_chunk = events[-1] + self.assertIn("tee_signature", final_chunk) + self.assertIn("tee_request_hash", final_chunk) + self.assertIn("tee_output_hash", final_chunk) + + @patch("tee_gateway.controllers.chat_controller.get_provider_from_model") + @patch("tee_gateway.controllers.chat_controller.compute_tee_msg_hash") + @patch("tee_gateway.controllers.chat_controller.get_tee_keys") + @patch("tee_gateway.controllers.chat_controller.convert_messages") + @patch("tee_gateway.controllers.chat_controller.get_chat_model_cached") + def test_anthropic_structured_output_in_tee_hash( + self, mock_get_model, mock_convert, mock_tee_keys, mock_hash, mock_provider + ): + """The structured JSON content (not raw dict repr) is passed to compute_tee_msg_hash.""" + from tee_gateway.controllers.chat_controller import _create_streaming_response + + mock_provider.return_value = "anthropic" + mock_model = MagicMock() + mock_structured = MagicMock() + mock_model.with_structured_output.return_value = mock_structured + mock_structured.invoke.return_value = {"answer": 42} + mock_get_model.return_value = mock_model + mock_convert.return_value = [] + + mock_hash.return_value = (b"hash", "input_hex", "output_hex") + mock_keys = MagicMock() + mock_keys.sign_data.return_value = "sig" + mock_keys.get_tee_id.return_value = "abc" + mock_tee_keys.return_value = mock_keys + + rf = { + "type": "json_schema", + "json_schema": { + "name": "result", + "schema": { + "type": "object", + "properties": {"answer": {"type": "integer"}}, + }, + }, + } + req = CreateChatCompletionRequest( + model="claude-sonnet-4-5", + messages=[], + temperature=1.0, + stream=True, + response_format=rf, + ) + + resp = _create_streaming_response(req) + self._consume_sse(resp) + + # compute_tee_msg_hash must receive a JSON string, not a Python dict repr + _, output_content_arg, _ = mock_hash.call_args[0] + parsed = json.loads(output_content_arg) + self.assertEqual(parsed, {"answer": 42}) + + if __name__ == "__main__": unittest.main() From e846e9d8c4d0a13f5ef85775047dec015430080f Mon Sep 17 00:00:00 2001 From: kylexqian Date: Thu, 2 Apr 2026 03:30:02 -0700 Subject: [PATCH 4/5] fix: remove unused AIMessage import in streaming test Co-Authored-By: Claude Sonnet 4.6 --- tests/test_structured_outputs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_structured_outputs.py b/tests/test_structured_outputs.py index dda281b..011003a 100644 --- a/tests/test_structured_outputs.py +++ b/tests/test_structured_outputs.py @@ -538,7 +538,6 @@ def test_anthropic_does_not_bind_in_streaming( ): """Anthropic models never call model.bind() — structured output goes via with_structured_output.""" from tee_gateway.controllers.chat_controller import _create_streaming_response - from langchain_core.messages import AIMessage mock_provider.return_value = "anthropic" mock_model = MagicMock() From b8235a40e75a7defa2d43dc0eca6918407a9f4be Mon Sep 17 00:00:00 2001 From: kylexqian Date: Thu, 2 Apr 2026 03:24:25 -0700 Subject: [PATCH 5/5] fix: normalize response_format to dict before binding and hashing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add _normalize_response_format() helper that coerces response_format to a plain dict regardless of whether it arrives as a dict, Pydantic model, or other object. Apply it in both streaming/non-streaming binding paths and in the TEE hash dict — preventing silent json_schema payload loss when rf_dict was reconstructed as only {"type": ...}, and preventing a potential json.dumps failure in _chat_request_to_dict on non-dict input. Co-Authored-By: Claude Sonnet 4.6 --- tee_gateway/controllers/chat_controller.py | 36 ++++++++++------------ 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/tee_gateway/controllers/chat_controller.py b/tee_gateway/controllers/chat_controller.py index 5b1280a..391c0fa 100644 --- a/tee_gateway/controllers/chat_controller.py +++ b/tee_gateway/controllers/chat_controller.py @@ -52,6 +52,15 @@ def create_chat_completion(body): return _create_non_streaming_response(chat_request) +def _normalize_response_format(rf) -> dict: + """Coerce response_format to a plain dict, preserving all fields including json_schema.""" + if isinstance(rf, dict): + return rf + if hasattr(rf, "model_dump"): + return rf.model_dump() + return vars(rf) + + def _invoke_anthropic_structured( model, rf: dict, langchain_messages: list ) -> AIMessage: @@ -131,14 +140,9 @@ def _create_non_streaming_response(chat_request: CreateChatCompletionRequest): # Anthropic native equivalent and raises a clear error). rf_dict: dict | None = None if chat_request.response_format: - rf = chat_request.response_format - rf_type = ( - rf.get("type", "text") - if isinstance(rf, dict) - else getattr(rf, "type", "text") - ) - if rf_type != "text": - rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} + rf = _normalize_response_format(chat_request.response_format) + if rf.get("type", "text") != "text": + rf_dict = rf if get_provider_from_model(chat_request.model) != "anthropic": model = model.bind(response_format=rf_dict) @@ -265,18 +269,12 @@ def _create_streaming_response(chat_request: CreateChatCompletionRequest): # Anthropic native equivalent and raises a clear error). anthropic_structured_rf: dict | None = None if chat_request.response_format: - rf = chat_request.response_format - rf_type = ( - rf.get("type", "text") - if isinstance(rf, dict) - else getattr(rf, "type", "text") - ) - if rf_type != "text": - rf_dict = rf if isinstance(rf, dict) else {"type": rf_type} + rf = _normalize_response_format(chat_request.response_format) + if rf.get("type", "text") != "text": if provider == "anthropic": - anthropic_structured_rf = rf_dict + anthropic_structured_rf = rf else: - model = model.bind(response_format=rf_dict) + model = model.bind(response_format=rf) langchain_messages = convert_messages(chat_request.messages) tee_keys = get_tee_keys() @@ -598,7 +596,7 @@ def _chat_request_to_dict(chat_request: CreateChatCompletionRequest) -> dict: else list(chat_request.tools) ) if chat_request.response_format: - d["response_format"] = chat_request.response_format + d["response_format"] = _normalize_response_format(chat_request.response_format) return d