From 0c3a44af6eea533a96272b751f20bdd90d6a5ea9 Mon Sep 17 00:00:00 2001 From: Chad Voegele Date: Fri, 28 Nov 2025 06:10:49 -0600 Subject: [PATCH 1/4] Make invalid schema a user error (400) --- common/json-schema-to-grammar.cpp | 2 +- tests/test-json-schema-to-grammar.cpp | 2 +- .../server/tests/unit/test_chat_completion.py | 23 +++++++++---------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index c8421e1e826..c3b4e5d9dc7 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -974,7 +974,7 @@ class SchemaConverter { void check_errors() { if (!_errors.empty()) { - throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n")); + throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n")); } if (!_warnings.empty()) { fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str()); diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 1e568219d21..6a4bd8fb4dc 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -1375,7 +1375,7 @@ int main() { try { tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true)); tc.verify_status(SUCCESS); - } catch (const std::runtime_error & ex) { + } catch (const std::invalid_argument & ex) { fprintf(stderr, "Error: %s\n", ex.what()); tc.verify_status(FAILURE); } diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index 392e0efecdb..49a9a85bf29 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -171,18 +171,18 @@ def test_apply_chat_template(): assert res.body["prompt"] == "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a test.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" -@pytest.mark.parametrize("response_format,n_predicted,re_content", [ - ({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\""), - ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]"), - ({"type": "json_schema", "json_schema": {"schema": {"const": "foooooo"}}}, 10, "\"foooooo\""), - ({"type": "json_object"}, 10, "(\\{|John)+"), - ({"type": "sound"}, 0, None), +@pytest.mark.parametrize("response_format,n_predicted,re_content,expected_status_code", [ + ({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\"", 200), + ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]", 200), + ({"type": "json_schema", "json_schema": {"schema": {"const": "foooooo"}}}, 10, "\"foooooo\"", 200), + ({"type": "json_object"}, 10, "(\\{|John)+", 200), + ({"type": "sound"}, 0, None, 500), # invalid response format (expected to fail) - ({"type": "json_object", "schema": 123}, 0, None), - ({"type": "json_object", "schema": {"type": 123}}, 0, None), - ({"type": "json_object", "schema": {"type": "hiccup"}}, 0, None), + ({"type": "json_object", "schema": 123}, 0, None, 400), + ({"type": "json_object", "schema": {"type": 123}}, 0, None, 400), + ({"type": "json_object", "schema": {"type": "hiccup"}}, 0, None, 400), ]) -def test_completion_with_response_format(response_format: dict, n_predicted: int, re_content: str | None): +def test_completion_with_response_format(response_format: dict, n_predicted: int, re_content: str | None, expected_status_code: int): global server server.start() res = server.make_request("POST", "/chat/completions", data={ @@ -193,12 +193,11 @@ def test_completion_with_response_format(response_format: dict, n_predicted: int ], "response_format": response_format, }) + assert res.status_code == expected_status_code if re_content is not None: - assert res.status_code == 200 choice = res.body["choices"][0] assert match_regex(re_content, choice["message"]["content"]) else: - assert res.status_code != 200 assert "error" in res.body From 3779ed62cf672a14547d34a2c2f785a1a726ef86 Mon Sep 17 00:00:00 2001 From: Chad Voegele Date: Sun, 30 Nov 2025 06:19:40 -0600 Subject: [PATCH 2/4] Move invalid_argument exception handler to ex_wrapper --- common/chat.cpp | 32 +++++++++++++++--------------- tools/server/server-common.cpp | 36 +++++++++++++++++----------------- tools/server/server.cpp | 8 +++++++- 3 files changed, 41 insertions(+), 35 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index b4a0f985e2e..5baafd6ea3b 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -163,7 +163,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin if (tool_choice == "required") { return COMMON_CHAT_TOOL_CHOICE_REQUIRED; } - throw std::runtime_error("Invalid tool_choice: " + tool_choice); + throw std::invalid_argument("Invalid tool_choice: " + tool_choice); } bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) { @@ -186,17 +186,17 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa try { if (!messages.is_array()) { - throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump()); + throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump()); } for (const auto & message : messages) { if (!message.is_object()) { - throw std::runtime_error("Expected 'message' to be an object, got " + message.dump()); + throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump()); } common_chat_msg msg; if (!message.contains("role")) { - throw std::runtime_error("Missing 'role' in message: " + message.dump()); + throw std::invalid_argument("Missing 'role' in message: " + message.dump()); } msg.role = message.at("role"); @@ -209,11 +209,11 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa } else if (content.is_array()) { for (const auto & part : content) { if (!part.contains("type")) { - throw std::runtime_error("Missing content part type: " + part.dump()); + throw std::invalid_argument("Missing content part type: " + part.dump()); } const auto & type = part.at("type"); if (type != "text") { - throw std::runtime_error("Unsupported content part type: " + type.dump()); + throw std::invalid_argument("Unsupported content part type: " + type.dump()); } common_chat_msg_content_part msg_part; msg_part.type = type; @@ -221,25 +221,25 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa msg.content_parts.push_back(msg_part); } } else if (!content.is_null()) { - throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); + throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); } } if (has_tool_calls) { for (const auto & tool_call : message.at("tool_calls")) { common_chat_tool_call tc; if (!tool_call.contains("type")) { - throw std::runtime_error("Missing tool call type: " + tool_call.dump()); + throw std::invalid_argument("Missing tool call type: " + tool_call.dump()); } const auto & type = tool_call.at("type"); if (type != "function") { - throw std::runtime_error("Unsupported tool call type: " + tool_call.dump()); + throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump()); } if (!tool_call.contains("function")) { - throw std::runtime_error("Missing tool call function: " + tool_call.dump()); + throw std::invalid_argument("Missing tool call function: " + tool_call.dump()); } const auto & fc = tool_call.at("function"); if (!fc.contains("name")) { - throw std::runtime_error("Missing tool call name: " + tool_call.dump()); + throw std::invalid_argument("Missing tool call name: " + tool_call.dump()); } tc.name = fc.at("name"); tc.arguments = fc.at("arguments"); @@ -250,7 +250,7 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa } } if (!has_content && !has_tool_calls) { - throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)"); + throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)"); } if (message.contains("reasoning_content")) { msg.reasoning_content = message.at("reasoning_content"); @@ -353,18 +353,18 @@ std::vector common_chat_tools_parse_oaicompat(const json & too try { if (!tools.is_null()) { if (!tools.is_array()) { - throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump()); + throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump()); } for (const auto & tool : tools) { if (!tool.contains("type")) { - throw std::runtime_error("Missing tool type: " + tool.dump()); + throw std::invalid_argument("Missing tool type: " + tool.dump()); } const auto & type = tool.at("type"); if (!type.is_string() || type != "function") { - throw std::runtime_error("Unsupported tool type: " + tool.dump()); + throw std::invalid_argument("Unsupported tool type: " + tool.dump()); } if (!tool.contains("function")) { - throw std::runtime_error("Missing tool function: " + tool.dump()); + throw std::invalid_argument("Missing tool function: " + tool.dump()); } const auto & function = tool.at("function"); diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 0bbc4e858f2..9c4ae553482 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -819,26 +819,26 @@ json oaicompat_chat_params_parse( auto schema_wrapper = json_value(response_format, "json_schema", json::object()); json_schema = json_value(schema_wrapper, "schema", json::object()); } else if (!response_type.empty() && response_type != "text") { - throw std::runtime_error("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type); + throw std::invalid_argument("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type); } } // get input files if (!body.contains("messages")) { - throw std::runtime_error("'messages' is required"); + throw std::invalid_argument("'messages' is required"); } json & messages = body.at("messages"); if (!messages.is_array()) { - throw std::runtime_error("Expected 'messages' to be an array"); + throw std::invalid_argument("Expected 'messages' to be an array"); } for (auto & msg : messages) { std::string role = json_value(msg, "role", std::string()); if (role != "assistant" && !msg.contains("content")) { - throw std::runtime_error("All non-assistant messages must contain 'content'"); + throw std::invalid_argument("All non-assistant messages must contain 'content'"); } if (role == "assistant") { if (!msg.contains("content") && !msg.contains("tool_calls")) { - throw std::runtime_error("Assistant message must contain either 'content' or 'tool_calls'!"); + throw std::invalid_argument("Assistant message must contain either 'content' or 'tool_calls'!"); } if (!msg.contains("content")) { continue; // avoid errors with no content @@ -850,7 +850,7 @@ json oaicompat_chat_params_parse( } if (!content.is_array()) { - throw std::runtime_error("Expected 'content' to be a string or an array"); + throw std::invalid_argument("Expected 'content' to be a string or an array"); } for (auto & p : content) { @@ -884,11 +884,11 @@ json oaicompat_chat_params_parse( // try to decode base64 image std::vector parts = string_split(url, /*separator*/ ','); if (parts.size() != 2) { - throw std::runtime_error("Invalid image_url.url value"); + throw std::invalid_argument("Invalid image_url.url value"); } else if (!string_starts_with(parts[0], "data:image/")) { - throw std::runtime_error("Invalid image_url.url format: " + parts[0]); + throw std::invalid_argument("Invalid image_url.url format: " + parts[0]); } else if (!string_ends_with(parts[0], "base64")) { - throw std::runtime_error("image_url.url must be base64 encoded"); + throw std::invalid_argument("image_url.url must be base64 encoded"); } else { auto base64_data = parts[1]; auto decoded_data = base64_decode(base64_data); @@ -911,7 +911,7 @@ json oaicompat_chat_params_parse( std::string format = json_value(input_audio, "format", std::string()); // while we also support flac, we don't allow it here so we matches the OAI spec if (format != "wav" && format != "mp3") { - throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'"); + throw std::invalid_argument("input_audio.format must be either 'wav' or 'mp3'"); } auto decoded_data = base64_decode(data); // expected to be base64 encoded out_files.push_back(decoded_data); @@ -922,7 +922,7 @@ json oaicompat_chat_params_parse( p.erase("input_audio"); } else if (type != "text") { - throw std::runtime_error("unsupported content[].type"); + throw std::invalid_argument("unsupported content[].type"); } } } @@ -940,7 +940,7 @@ json oaicompat_chat_params_parse( inputs.enable_thinking = opt.enable_thinking; if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) { if (body.contains("grammar")) { - throw std::runtime_error("Cannot use custom grammar constraints with tools."); + throw std::invalid_argument("Cannot use custom grammar constraints with tools."); } llama_params["parse_tool_calls"] = true; } @@ -959,7 +959,7 @@ json oaicompat_chat_params_parse( } else if (enable_thinking_kwarg == "false") { inputs.enable_thinking = false; } else if (!enable_thinking_kwarg.empty() && enable_thinking_kwarg[0] == '"') { - throw std::runtime_error("invalid type for \"enable_thinking\" (expected boolean, got string)"); + throw std::invalid_argument("invalid type for \"enable_thinking\" (expected boolean, got string)"); } // if the assistant message appears at the end of list, we do not add end-of-turn token @@ -972,14 +972,14 @@ json oaicompat_chat_params_parse( /* sanity check, max one assistant message at the end of the list */ if (!inputs.messages.empty() && inputs.messages.back().role == "assistant"){ - throw std::runtime_error("Cannot have 2 or more assistant messages at the end of the list."); + throw std::invalid_argument("Cannot have 2 or more assistant messages at the end of the list."); } /* TODO: test this properly */ inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE; if ( inputs.enable_thinking ) { - throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking."); + throw std::invalid_argument("Assistant response prefill is incompatible with enable_thinking."); } inputs.add_generation_prompt = true; @@ -1020,18 +1020,18 @@ json oaicompat_chat_params_parse( // Handle "n" field int n_choices = json_value(body, "n", 1); if (n_choices != 1) { - throw std::runtime_error("Only one completion choice is allowed"); + throw std::invalid_argument("Only one completion choice is allowed"); } // Handle "logprobs" field // TODO: The response format of this option is not yet OAI-compatible, but seems like no one really using it; We may need to fix it in the future if (json_value(body, "logprobs", false)) { if (has_tools && stream) { - throw std::runtime_error("logprobs is not supported with tools + stream"); + throw std::invalid_argument("logprobs is not supported with tools + stream"); } llama_params["n_probs"] = json_value(body, "top_logprobs", 20); } else if (body.contains("top_logprobs") && !body.at("top_logprobs").is_null()) { - throw std::runtime_error("top_logprobs requires logprobs to be set to true"); + throw std::invalid_argument("top_logprobs requires logprobs to be set to true"); } // Copy remaining properties to llama_params diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 950537d82d0..16e52a1a0cd 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -34,18 +34,24 @@ static inline void signal_handler(int signal) { static server_http_context::handler_t ex_wrapper(server_http_context::handler_t func) { return [func = std::move(func)](const server_http_req & req) -> server_http_res_ptr { std::string message; + error_type error; try { return func(req); + } catch (const std::invalid_argument & e) { + error = ERROR_TYPE_INVALID_REQUEST; + message = e.what(); } catch (const std::exception & e) { + error = ERROR_TYPE_SERVER; message = e.what(); } catch (...) { + error = ERROR_TYPE_SERVER; message = "unknown error"; } auto res = std::make_unique(); res->status = 500; try { - json error_data = format_error_response(message, ERROR_TYPE_SERVER); + json error_data = format_error_response(message, error); res->status = json_value(error_data, "code", 500); res->data = safe_json_to_str({{ "error", error_data }}); SRV_WRN("got exception: %s\n", res->data.c_str()); From 3935aead33f5c8844aa6d6abee96cce382153a5b Mon Sep 17 00:00:00 2001 From: Chad Voegele Date: Sun, 30 Nov 2025 09:47:46 -0600 Subject: [PATCH 3/4] Fix test --- tools/server/tests/unit/test_chat_completion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index 49a9a85bf29..c9a914b1e16 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -176,7 +176,7 @@ def test_apply_chat_template(): ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]", 200), ({"type": "json_schema", "json_schema": {"schema": {"const": "foooooo"}}}, 10, "\"foooooo\"", 200), ({"type": "json_object"}, 10, "(\\{|John)+", 200), - ({"type": "sound"}, 0, None, 500), + ({"type": "sound"}, 0, None, 400), # invalid response format (expected to fail) ({"type": "json_object", "schema": 123}, 0, None, 400), ({"type": "json_object", "schema": {"type": 123}}, 0, None, 400), From 9caccbd0582091bc8f0c9f98578bbb835881b4f2 Mon Sep 17 00:00:00 2001 From: Chad Voegele Date: Sun, 30 Nov 2025 21:11:28 -0600 Subject: [PATCH 4/4] Simplify test back to original pattern --- .../server/tests/unit/test_chat_completion.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index c9a914b1e16..4050df30c89 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -171,18 +171,18 @@ def test_apply_chat_template(): assert res.body["prompt"] == "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a test.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" -@pytest.mark.parametrize("response_format,n_predicted,re_content,expected_status_code", [ - ({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\"", 200), - ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]", 200), - ({"type": "json_schema", "json_schema": {"schema": {"const": "foooooo"}}}, 10, "\"foooooo\"", 200), - ({"type": "json_object"}, 10, "(\\{|John)+", 200), - ({"type": "sound"}, 0, None, 400), +@pytest.mark.parametrize("response_format,n_predicted,re_content", [ + ({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\""), + ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]"), + ({"type": "json_schema", "json_schema": {"schema": {"const": "foooooo"}}}, 10, "\"foooooo\""), + ({"type": "json_object"}, 10, "(\\{|John)+"), + ({"type": "sound"}, 0, None), # invalid response format (expected to fail) - ({"type": "json_object", "schema": 123}, 0, None, 400), - ({"type": "json_object", "schema": {"type": 123}}, 0, None, 400), - ({"type": "json_object", "schema": {"type": "hiccup"}}, 0, None, 400), + ({"type": "json_object", "schema": 123}, 0, None), + ({"type": "json_object", "schema": {"type": 123}}, 0, None), + ({"type": "json_object", "schema": {"type": "hiccup"}}, 0, None), ]) -def test_completion_with_response_format(response_format: dict, n_predicted: int, re_content: str | None, expected_status_code: int): +def test_completion_with_response_format(response_format: dict, n_predicted: int, re_content: str | None): global server server.start() res = server.make_request("POST", "/chat/completions", data={ @@ -193,11 +193,12 @@ def test_completion_with_response_format(response_format: dict, n_predicted: int ], "response_format": response_format, }) - assert res.status_code == expected_status_code if re_content is not None: + assert res.status_code == 200 choice = res.body["choices"][0] assert match_regex(re_content, choice["message"]["content"]) else: + assert res.status_code == 400 assert "error" in res.body