From cad9a19ed7dce687372806586c65ca605152a887 Mon Sep 17 00:00:00 2001 From: HyunKyun Moon Date: Fri, 30 Jan 2026 01:50:33 +0000 Subject: [PATCH] fix Signed-off-by: HyunKyun Moon --- .../openai/test_render_no_inference.py | 24 +++++++++++++++---- .../openai/chat_completion/api_router.py | 7 +++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/tests/entrypoints/openai/test_render_no_inference.py b/tests/entrypoints/openai/test_render_no_inference.py index b2ce89a0b357..8383ccc778b0 100644 --- a/tests/entrypoints/openai/test_render_no_inference.py +++ b/tests/entrypoints/openai/test_render_no_inference.py @@ -13,7 +13,7 @@ from ...utils import RemoteOpenAIServer -MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM" +MODEL_NAME = "openai/gpt-oss-20b" @pytest.fixture(scope="module") @@ -60,9 +60,25 @@ async def test_chat_completion_render_works_without_inference(client): conversation, engine_prompts = data - # Verify conversation is preserved - assert conversation[0]["role"] == "user" - assert "Hello" in conversation[0]["content"] + # Verify conversation contains messages + assert len(conversation) > 0 + + # Find the user message (Harmony models use 'author' dict instead of 'role') + user_msg = None + for msg in conversation: + # Check for direct 'role' key (standard format) + if msg.get("role") == "user": + user_msg = msg + break + # Check for nested 'author' dict (Harmony format) + author = msg.get("author") + if isinstance(author, dict) and author.get("role") == "user": + user_msg = msg + break + + assert user_msg is not None, ( + f"User message not found in conversation: {conversation}" + ) # Verify tokenization occurred assert len(engine_prompts) > 0 diff --git a/vllm/entrypoints/openai/chat_completion/api_router.py b/vllm/entrypoints/openai/chat_completion/api_router.py index d3576ab24aea..939b8ff125f9 100644 --- a/vllm/entrypoints/openai/chat_completion/api_router.py +++ b/vllm/entrypoints/openai/chat_completion/api_router.py @@ -101,7 +101,12 @@ async def render_chat_completion(request: ChatCompletionRequest, raw_request: Re if isinstance(result, ErrorResponse): return JSONResponse(content=result.model_dump(), status_code=result.error.code) - return JSONResponse(content=result) + conversation, engine_prompts = result + # Serialize Pydantic models (Harmony) or pass through TypedDicts as-is + serialized_conversation = [ + getattr(msg, "model_dump", lambda m=msg: m)() for msg in conversation + ] + return JSONResponse(content=[serialized_conversation, engine_prompts]) def attach_router(app: FastAPI):