Server: Change Invalid Schema from Server Error (500) to User Error (400) (#17572)

chadvoegele · web-flow · commit c4357dcc35ba · 2025-12-02T17:33:50.000+01:00
* Make invalid schema a user error (400)

* Move invalid_argument exception handler to ex_wrapper

* Fix test

* Simplify test back to original pattern
diff --git a/common/chat.cpp b/common/chat.cpp
@@ -163,7 +163,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
     if (tool_choice == "required") {
         return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
     }
-    throw std::runtime_error("Invalid tool_choice: " + tool_choice);
+    throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
 }
 
 bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
@@ -186,17 +186,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
     try {
 
         if (!messages.is_array()) {
-            throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
+            throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
         }
 
         for (const auto & message : messages) {
             if (!message.is_object()) {
-                throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
+                throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
             }
 
             common_chat_msg msg;
             if (!message.contains("role")) {
-                throw std::runtime_error("Missing 'role' in message: " + message.dump());
+                throw std::invalid_argument("Missing 'role' in message: " + message.dump());
             }
             msg.role = message.at("role");
 
@@ -209,37 +209,37 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                 } else if (content.is_array()) {
                     for (const auto & part : content) {
                         if (!part.contains("type")) {
-                            throw std::runtime_error("Missing content part type: " + part.dump());
+                            throw std::invalid_argument("Missing content part type: " + part.dump());
                         }
                         const auto & type = part.at("type");
                         if (type != "text") {
-                            throw std::runtime_error("Unsupported content part type: " + type.dump());
+                            throw std::invalid_argument("Unsupported content part type: " + type.dump());
                         }
                         common_chat_msg_content_part msg_part;
                         msg_part.type = type;
                         msg_part.text = part.at("text");
                         msg.content_parts.push_back(msg_part);
                     }
                 } else if (!content.is_null()) {
-                    throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
                 }
             }
             if (has_tool_calls) {
                 for (const auto & tool_call : message.at("tool_calls")) {
                     common_chat_tool_call tc;
                     if (!tool_call.contains("type")) {
-                        throw std::runtime_error("Missing tool call type: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
                     }
                     const auto & type = tool_call.at("type");
                     if (type != "function") {
-                        throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
+                        throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
                     }
                     if (!tool_call.contains("function")) {
-                        throw std::runtime_error("Missing tool call function: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
                     }
                     const auto & fc = tool_call.at("function");
                     if (!fc.contains("name")) {
-                        throw std::runtime_error("Missing tool call name: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
                     }
                     tc.name = fc.at("name");
                     tc.arguments = fc.at("arguments");
@@ -250,7 +250,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                 }
             }
             if (!has_content && !has_tool_calls) {
-                throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+                throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
             }
             if (message.contains("reasoning_content")) {
                 msg.reasoning_content = message.at("reasoning_content");
@@ -353,18 +353,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
     try {
         if (!tools.is_null()) {
             if (!tools.is_array()) {
-                throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
+                throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
             }
             for (const auto & tool : tools) {
                 if (!tool.contains("type")) {
-                    throw std::runtime_error("Missing tool type: " + tool.dump());
+                    throw std::invalid_argument("Missing tool type: " + tool.dump());
                 }
                 const auto & type = tool.at("type");
                 if (!type.is_string() || type != "function") {
-                    throw std::runtime_error("Unsupported tool type: " + tool.dump());
+                    throw std::invalid_argument("Unsupported tool type: " + tool.dump());
                 }
                 if (!tool.contains("function")) {
-                    throw std::runtime_error("Missing tool function: " + tool.dump());
+                    throw std::invalid_argument("Missing tool function: " + tool.dump());
                 }
 
                 const auto & function = tool.at("function");
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
@@ -974,7 +974,7 @@ class SchemaConverter {
 
     void check_errors() {
         if (!_errors.empty()) {
-            throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
+            throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
         }
         if (!_warnings.empty()) {
             fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp
@@ -1375,7 +1375,7 @@ int main() {
         try {
             tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
             tc.verify_status(SUCCESS);
-        } catch (const std::runtime_error & ex) {
+        } catch (const std::invalid_argument & ex) {
             fprintf(stderr, "Error: %s\n", ex.what());
             tc.verify_status(FAILURE);
         }
diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
@@ -819,26 +819,26 @@ json oaicompat_chat_params_parse(
             auto schema_wrapper = json_value(response_format, "json_schema", json::object());
             json_schema = json_value(schema_wrapper, "schema", json::object());
         } else if (!response_type.empty() && response_type != "text") {
-            throw std::runtime_error("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
+            throw std::invalid_argument("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
         }
     }
 
     // get input files
     if (!body.contains("messages")) {
-        throw std::runtime_error("'messages' is required");
+        throw std::invalid_argument("'messages' is required");
     }
     json & messages = body.at("messages");
     if (!messages.is_array()) {
-        throw std::runtime_error("Expected 'messages' to be an array");
+        throw std::invalid_argument("Expected 'messages' to be an array");
     }
     for (auto & msg : messages) {
         std::string role = json_value(msg, "role", std::string());
         if (role != "assistant" && !msg.contains("content")) {
-            throw std::runtime_error("All non-assistant messages must contain 'content'");
+            throw std::invalid_argument("All non-assistant messages must contain 'content'");
         }
         if (role == "assistant") {
             if (!msg.contains("content") && !msg.contains("tool_calls")) {
-                throw std::runtime_error("Assistant message must contain either 'content' or 'tool_calls'!");
+                throw std::invalid_argument("Assistant message must contain either 'content' or 'tool_calls'!");
             }
             if (!msg.contains("content")) {
                 continue; // avoid errors with no content
@@ -850,7 +850,7 @@ json oaicompat_chat_params_parse(
         }
 
         if (!content.is_array()) {
-            throw std::runtime_error("Expected 'content' to be a string or an array");
+            throw std::invalid_argument("Expected 'content' to be a string or an array");
         }
 
         for (auto & p : content) {
@@ -884,11 +884,11 @@ json oaicompat_chat_params_parse(
                     // try to decode base64 image
                     std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
                     if (parts.size() != 2) {
-                        throw std::runtime_error("Invalid image_url.url value");
+                        throw std::invalid_argument("Invalid image_url.url value");
                     } else if (!string_starts_with(parts[0], "data:image/")) {
-                        throw std::runtime_error("Invalid image_url.url format: " + parts[0]);
+                        throw std::invalid_argument("Invalid image_url.url format: " + parts[0]);
                     } else if (!string_ends_with(parts[0], "base64")) {
-                        throw std::runtime_error("image_url.url must be base64 encoded");
+                        throw std::invalid_argument("image_url.url must be base64 encoded");
                     } else {
                         auto base64_data = parts[1];
                         auto decoded_data = base64_decode(base64_data);
@@ -911,7 +911,7 @@ json oaicompat_chat_params_parse(
                 std::string format = json_value(input_audio, "format", std::string());
                 // while we also support flac, we don't allow it here so we matches the OAI spec
                 if (format != "wav" && format != "mp3") {
-                    throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'");
+                    throw std::invalid_argument("input_audio.format must be either 'wav' or 'mp3'");
                 }
                 auto decoded_data = base64_decode(data); // expected to be base64 encoded
                 out_files.push_back(decoded_data);
@@ -922,7 +922,7 @@ json oaicompat_chat_params_parse(
                 p.erase("input_audio");
 
             } else if (type != "text") {
-                throw std::runtime_error("unsupported content[].type");
+                throw std::invalid_argument("unsupported content[].type");
             }
         }
     }
@@ -940,7 +940,7 @@ json oaicompat_chat_params_parse(
     inputs.enable_thinking       = opt.enable_thinking;
     if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
         if (body.contains("grammar")) {
-            throw std::runtime_error("Cannot use custom grammar constraints with tools.");
+            throw std::invalid_argument("Cannot use custom grammar constraints with tools.");
         }
         llama_params["parse_tool_calls"] = true;
     }
@@ -959,7 +959,7 @@ json oaicompat_chat_params_parse(
     } else if (enable_thinking_kwarg == "false") {
         inputs.enable_thinking = false;
     } else if (!enable_thinking_kwarg.empty() && enable_thinking_kwarg[0] == '"') {
-        throw std::runtime_error("invalid type for \"enable_thinking\" (expected boolean, got string)");
+        throw std::invalid_argument("invalid type for \"enable_thinking\" (expected boolean, got string)");
     }
 
     // if the assistant message appears at the end of list, we do not add end-of-turn token
@@ -972,14 +972,14 @@ json oaicompat_chat_params_parse(
 
         /* sanity check, max one assistant message at the end of the list */
         if (!inputs.messages.empty() && inputs.messages.back().role == "assistant"){
-            throw std::runtime_error("Cannot have 2 or more assistant messages at the end of the list.");
+            throw std::invalid_argument("Cannot have 2 or more assistant messages at the end of the list.");
         }
 
         /* TODO: test this properly */
         inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;
 
         if ( inputs.enable_thinking ) {
-            throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
+            throw std::invalid_argument("Assistant response prefill is incompatible with enable_thinking.");
         }
 
         inputs.add_generation_prompt = true;
@@ -1020,18 +1020,18 @@ json oaicompat_chat_params_parse(
     // Handle "n" field
     int n_choices = json_value(body, "n", 1);
     if (n_choices != 1) {
-        throw std::runtime_error("Only one completion choice is allowed");
+        throw std::invalid_argument("Only one completion choice is allowed");
     }
 
     // Handle "logprobs" field
     // TODO: The response format of this option is not yet OAI-compatible, but seems like no one really using it; We may need to fix it in the future
     if (json_value(body, "logprobs", false)) {
         if (has_tools && stream) {
-            throw std::runtime_error("logprobs is not supported with tools + stream");
+            throw std::invalid_argument("logprobs is not supported with tools + stream");
         }
         llama_params["n_probs"] = json_value(body, "top_logprobs", 20);
     } else if (body.contains("top_logprobs") && !body.at("top_logprobs").is_null()) {
-        throw std::runtime_error("top_logprobs requires logprobs to be set to true");
+        throw std::invalid_argument("top_logprobs requires logprobs to be set to true");
     }
 
     // Copy remaining properties to llama_params
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
@@ -34,18 +34,24 @@ static inline void signal_handler(int signal) {
 static server_http_context::handler_t ex_wrapper(server_http_context::handler_t func) {
     return [func = std::move(func)](const server_http_req & req) -> server_http_res_ptr {
         std::string message;
+        error_type error;
         try {
             return func(req);
+        } catch (const std::invalid_argument & e) {
+            error = ERROR_TYPE_INVALID_REQUEST;
+            message = e.what();
         } catch (const std::exception & e) {
+            error = ERROR_TYPE_SERVER;
             message = e.what();
         } catch (...) {
+            error = ERROR_TYPE_SERVER;
             message = "unknown error";
         }
 
         auto res = std::make_unique<server_http_res>();
         res->status = 500;
         try {
-            json error_data = format_error_response(message, ERROR_TYPE_SERVER);
+            json error_data = format_error_response(message, error);
             res->status = json_value(error_data, "code", 500);
             res->data = safe_json_to_str({{ "error", error_data }});
             SRV_WRN("got exception: %s\n", res->data.c_str());
diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py
@@ -199,7 +199,7 @@ def test_completion_with_response_format(response_format: dict, n_predicted: int
         choice = res.body["choices"][0]
         assert match_regex(re_content, choice["message"]["content"])
     else:
-        assert res.status_code != 200
+        assert res.status_code == 400
         assert "error" in res.body
 
 

Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin`
`163`	`163`	`if (tool_choice == "required") {`
`164`	`164`	`return COMMON_CHAT_TOOL_CHOICE_REQUIRED;`
`165`	`165`	`}`
`166`		`- throw std::runtime_error("Invalid tool_choice: " + tool_choice);`
	`166`	`+ throw std::invalid_argument("Invalid tool_choice: " + tool_choice);`
`167`	`167`	`}`
`168`	`168`
`169`	`169`	`bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {`
`@@ -186,17 +186,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa`
`186`	`186`	`try {`
`187`	`187`
`188`	`188`	`if (!messages.is_array()) {`
`189`		`- throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());`
	`189`	`+ throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());`
`190`	`190`	`}`
`191`	`191`
`192`	`192`	`for (const auto & message : messages) {`
`193`	`193`	`if (!message.is_object()) {`
`194`		`- throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());`
	`194`	`+ throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());`
`195`	`195`	`}`
`196`	`196`
`197`	`197`	`common_chat_msg msg;`
`198`	`198`	`if (!message.contains("role")) {`
`199`		`- throw std::runtime_error("Missing 'role' in message: " + message.dump());`
	`199`	`+ throw std::invalid_argument("Missing 'role' in message: " + message.dump());`
`200`	`200`	`}`
`201`	`201`	`msg.role = message.at("role");`
`202`	`202`
`@@ -209,37 +209,37 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa`
`209`	`209`	`} else if (content.is_array()) {`
`210`	`210`	`for (const auto & part : content) {`
`211`	`211`	`if (!part.contains("type")) {`
`212`		`- throw std::runtime_error("Missing content part type: " + part.dump());`
	`212`	`+ throw std::invalid_argument("Missing content part type: " + part.dump());`
`213`	`213`	`}`
`214`	`214`	`const auto & type = part.at("type");`
`215`	`215`	`if (type != "text") {`
`216`		`- throw std::runtime_error("Unsupported content part type: " + type.dump());`
	`216`	`+ throw std::invalid_argument("Unsupported content part type: " + type.dump());`
`217`	`217`	`}`
`218`	`218`	`common_chat_msg_content_part msg_part;`
`219`	`219`	`msg_part.type = type;`
`220`	`220`	`msg_part.text = part.at("text");`
`221`	`221`	`msg.content_parts.push_back(msg_part);`
`222`	`222`	`}`
`223`	`223`	`} else if (!content.is_null()) {`
`224`		`- throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");`
	`224`	`+ throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");`
`225`	`225`	`}`
`226`	`226`	`}`
`227`	`227`	`if (has_tool_calls) {`
`228`	`228`	`for (const auto & tool_call : message.at("tool_calls")) {`
`229`	`229`	`common_chat_tool_call tc;`
`230`	`230`	`if (!tool_call.contains("type")) {`
`231`		`- throw std::runtime_error("Missing tool call type: " + tool_call.dump());`
	`231`	`+ throw std::invalid_argument("Missing tool call type: " + tool_call.dump());`
`232`	`232`	`}`
`233`	`233`	`const auto & type = tool_call.at("type");`
`234`	`234`	`if (type != "function") {`
`235`		`- throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());`
	`235`	`+ throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());`
`236`	`236`	`}`
`237`	`237`	`if (!tool_call.contains("function")) {`
`238`		`- throw std::runtime_error("Missing tool call function: " + tool_call.dump());`
	`238`	`+ throw std::invalid_argument("Missing tool call function: " + tool_call.dump());`
`239`	`239`	`}`
`240`	`240`	`const auto & fc = tool_call.at("function");`
`241`	`241`	`if (!fc.contains("name")) {`
`242`		`- throw std::runtime_error("Missing tool call name: " + tool_call.dump());`
	`242`	`+ throw std::invalid_argument("Missing tool call name: " + tool_call.dump());`
`243`	`243`	`}`
`244`	`244`	`tc.name = fc.at("name");`
`245`	`245`	`tc.arguments = fc.at("arguments");`
`@@ -250,7 +250,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa`
`250`	`250`	`}`
`251`	`251`	`}`
`252`	`252`	`if (!has_content && !has_tool_calls) {`
`253`		`- throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");`
	`253`	`+ throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");`
`254`	`254`	`}`
`255`	`255`	`if (message.contains("reasoning_content")) {`
`256`	`256`	`msg.reasoning_content = message.at("reasoning_content");`
`@@ -353,18 +353,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too`
`353`	`353`	`try {`
`354`	`354`	`if (!tools.is_null()) {`
`355`	`355`	`if (!tools.is_array()) {`
`356`		`- throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());`
	`356`	`+ throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());`
`357`	`357`	`}`
`358`	`358`	`for (const auto & tool : tools) {`
`359`	`359`	`if (!tool.contains("type")) {`
`360`		`- throw std::runtime_error("Missing tool type: " + tool.dump());`
	`360`	`+ throw std::invalid_argument("Missing tool type: " + tool.dump());`
`361`	`361`	`}`
`362`	`362`	`const auto & type = tool.at("type");`
`363`	`363`	`if (!type.is_string() \|\| type != "function") {`
`364`		`- throw std::runtime_error("Unsupported tool type: " + tool.dump());`
	`364`	`+ throw std::invalid_argument("Unsupported tool type: " + tool.dump());`
`365`	`365`	`}`
`366`	`366`	`if (!tool.contains("function")) {`
`367`		`- throw std::runtime_error("Missing tool function: " + tool.dump());`
	`367`	`+ throw std::invalid_argument("Missing tool function: " + tool.dump());`
`368`	`368`	`}`
`369`	`369`
`370`	`370`	`const auto & function = tool.at("function");`
Original file line number	Diff line number	Diff line change
`@@ -974,7 +974,7 @@ class SchemaConverter {`
`974`	`974`
`975`	`975`	`void check_errors() {`
`976`	`976`	`if (!_errors.empty()) {`
`977`		`- throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));`
	`977`	`+ throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));`
`978`	`978`	`}`
`979`	`979`	`if (!_warnings.empty()) {`
`980`	`980`	`fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());`
Original file line number	Diff line number	Diff line change
`@@ -1375,7 +1375,7 @@ int main() {`
`1375`	`1375`	`try {`
`1376`	`1376`	`tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));`
`1377`	`1377`	`tc.verify_status(SUCCESS);`
`1378`		`- } catch (const std::runtime_error & ex) {`
	`1378`	`+ } catch (const std::invalid_argument & ex) {`
`1379`	`1379`	`fprintf(stderr, "Error: %s\n", ex.what());`
`1380`	`1380`	`tc.verify_status(FAILURE);`
`1381`	`1381`	`}`