From e8f8c2c71189e9074b810cb14376c413a1eadcd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Tue, 24 Jun 2025 12:01:37 +0200
Subject: [PATCH 1/3] fix assistant prefilling when content is an array

---
 tools/server/utils.hpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp
index f8fab2c86664e..13e2a893b2c63 100644
--- a/tools/server/utils.hpp
+++ b/tools/server/utils.hpp
@@ -779,7 +779,13 @@ static json oaicompat_chat_params_parse(
 
     /* Append assistant prefilled message */
     if (prefill_assistant_message) {
-         chat_params.prompt += last_message.content;
+        if (last_message.content.is_array()) {
+            for (auto & p : last_message.content) {
+                chat_params.prompt += p["text"];
+            }
+        } else {
+            chat_params.prompt += last_message.content;
+        }
     }
 
     llama_params["chat_format"]      = static_cast<int>(chat_params.format);

From 1500690046cbf67c49200449585f61b40ff4c486 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Tue, 24 Jun 2025 12:21:04 +0200
Subject: [PATCH 2/3] text is stored in content_parts

---
 tools/server/utils.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp
index 13e2a893b2c63..6add39830cba1 100644
--- a/tools/server/utils.hpp
+++ b/tools/server/utils.hpp
@@ -779,9 +779,9 @@ static json oaicompat_chat_params_parse(
 
     /* Append assistant prefilled message */
     if (prefill_assistant_message) {
-        if (last_message.content.is_array()) {
-            for (auto & p : last_message.content) {
-                chat_params.prompt += p["text"];
+        if (!last_message.content_parts.empty()) {
+            for (auto & p : last_message.content_parts) {
+                chat_params.prompt += p.text;
             }
         } else {
             chat_params.prompt += last_message.content;

From 2aac8e81b06c0796369a061a6afe4409e7c2189a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Tue, 24 Jun 2025 14:02:34 +0200
Subject: [PATCH 3/3] add tests

---
 .../server/tests/unit/test_chat_completion.py | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py
index 1b5205f79d610..7ee9a1651400d 100644
--- a/tools/server/tests/unit/test_chat_completion.py
+++ b/tools/server/tests/unit/test_chat_completion.py
@@ -132,6 +132,28 @@ def test_chat_template():
     assert res.body["__verbose"]["prompt"] == "<s> <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
 
 
+@pytest.mark.parametrize("prefill,re_prefill", [
+    ("Whill", "Whill"),
+    ([{"type": "text", "text": "Wh"}, {"type": "text", "text": "ill"}], "Whill"),
+])
+def test_chat_template_assistant_prefill(prefill, re_prefill):
+    global server
+    server.chat_template = "llama3"
+    server.debug = True  # to get the "__verbose" object in the response
+    server.start()
+    res = server.make_request("POST", "/chat/completions", data={
+        "max_tokens": 8,
+        "messages": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+            {"role": "assistant", "content": prefill},
+        ]
+    })
+    assert res.status_code == 200
+    assert "__verbose" in res.body
+    assert res.body["__verbose"]["prompt"] == f"<s> <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{re_prefill}"
+
+
 def test_apply_chat_template():
     global server
     server.chat_template = "command-r"
@@ -228,6 +250,7 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re
     [{"role": "system", "content": 123}],
     # [{"content": "hello"}], # TODO: should not be a valid case
     [{"role": "system", "content": "test"}, {}],
+    [{"role": "user", "content": "test"}, {"role": "assistant", "content": "test"}, {"role": "assistant", "content": "test"}],
 ])
 def test_invalid_chat_completion_req(messages):
     global server