-
-
Notifications
You must be signed in to change notification settings - Fork 11.7k
Chat API: Force server-generated request_id to avoid collisions; improve clarity and safety #27189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,7 +70,7 @@ | |
| truncate_tool_call_ids, | ||
| validate_request_params, | ||
| ) | ||
| from vllm.utils import as_list | ||
| from vllm.utils import as_list, random_uuid | ||
|
|
||
| logger = init_logger(__name__) | ||
|
|
||
|
|
@@ -255,10 +255,19 @@ async def create_chat_completion( | |
| except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e: | ||
| logger.exception("Error in preprocessing prompt inputs") | ||
| return self.create_error_response(f"{e} {e.__cause__}") | ||
|
|
||
| request_id = self._add_prefix(getattr(request, "request_id", None)) | ||
|
|
||
| request_id = ( | ||
| f"chatcmpl-{self._base_request_id(raw_request, request.request_id)}" | ||
| ) | ||
| if request.kv_transfer_params: | ||
| request_id = self._add_prefix(request.kv_transfer_params.get("p_side_request_id", request_id)) | ||
|
|
||
| default = request.request_id or request_id | ||
| if raw_request is None: | ||
| req_id_head = default | ||
| else: | ||
| req_id_head = raw_request.headers.get("X-Request-ID") | ||
|
|
||
| raw_request_id = self._add_prefix(req_id_head) if req_id_head else request_id | ||
|
Comment on lines
+259
to
+270
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current logic for determining the internal The internal request ID should always be a newly generated, globally unique identifier on the P-side to ensure system stability and clear observability. The client-provided ID should only be used for the response to maintain backward compatibility. I've suggested a refactoring of this logic to correctly and clearly separate the generation of the internal ID from the determination of the response ID. This change makes the code safer and easier to understand. # P-side always generates a globally unique internal_request_id.
# D-side receives it from P-side.
if request.kv_transfer_params:
# D-side: use the ID from P-side. If it's not present,
# generate a new one to avoid using a client-provided ID.
internal_request_id = self._add_prefix(
request.kv_transfer_params.get("p_side_request_id"))
else:
# P-side: always generate a new unique ID for internal processing.
internal_request_id = self._add_prefix(None)
# For backward compatibility, the request_id in the response is
# determined by the following order of precedence:
# 1. X-Request-ID header
# 2. client-supplied request.request_id
# 3. server-generated internal_request_id
header_id = raw_request.headers.get("X-Request-ID") if raw_request else None
client_id = getattr(request, "request_id", None)
# The ID to be returned in the response.
response_id = self._add_prefix(header_id or client_id or internal_request_id)
# Use the unique internal ID for all internal processing.
request_id = internal_request_id
# Use the determined response ID for the final response.
raw_request_id = response_id
Comment on lines
+264
to
+270
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The response ID returned to the user is computed from Useful? React with 👍 / 👎. |
||
|
|
||
| request_metadata = RequestResponseMetadata(request_id=request_id) | ||
| if raw_request: | ||
|
|
@@ -346,18 +355,20 @@ async def create_chat_completion( | |
| return self.chat_completion_stream_generator( | ||
| request, | ||
| result_generator, | ||
| request_id, | ||
| raw_request_id, # the real request ID to return to user | ||
| model_name, | ||
| conversation, | ||
| tokenizer, | ||
| request_metadata, | ||
| ) | ||
|
|
||
| try: | ||
| # P side in P/D seperate always call the full generator, no need to pass internal request id to streaming generator | ||
| return await self.chat_completion_full_generator( | ||
| request, | ||
| result_generator, | ||
| request_id, | ||
| raw_request_id, # the real request ID to return to user | ||
| request_id, # the internal vLLM request ID, pass it to D side in kv_transfer_params | ||
| model_name, | ||
| conversation, | ||
| tokenizer, | ||
|
|
@@ -367,6 +378,13 @@ async def create_chat_completion( | |
| # TODO: Use a vllm-specific Validation Error | ||
| return self.create_error_response(str(e)) | ||
|
|
||
| def _add_prefix(self, request_id, prefix="chatcmpl-"): | ||
| if request_id and not request_id.startswith(prefix): | ||
| return prefix + request_id | ||
| elif not request_id: | ||
| return prefix + random_uuid() | ||
| return request_id | ||
|
|
||
| def get_chat_request_role(self, request: ChatCompletionRequest) -> str: | ||
| if request.add_generation_prompt: | ||
| return self.response_role | ||
|
|
@@ -1251,7 +1269,8 @@ async def chat_completion_full_generator( | |
| self, | ||
| request: ChatCompletionRequest, | ||
| result_generator: AsyncIterator[RequestOutput], | ||
| request_id: str, | ||
| request_id: str, # the real request ID to return to user | ||
| vllm_request_id: str, # the internal vLLM request ID, pass it to D side in kv_transfer_params | ||
| model_name: str, | ||
| conversation: list[ConversationMessage], | ||
| tokenizer: AnyTokenizer, | ||
|
|
@@ -1271,6 +1290,10 @@ async def chat_completion_full_generator( | |
|
|
||
| assert final_res is not None | ||
|
|
||
| # Pass the internal request id in P side to D side in kv_transfer_params | ||
| if final_res.kv_transfer_params: | ||
| final_res.kv_transfer_params["p_side_request_id"] = vllm_request_id | ||
|
|
||
| choices: list[ChatCompletionResponseChoice] = [] | ||
| if self.tool_call_id_type == "kimi_k2": | ||
| history_tool_call_cnt = get_history_tool_calls_cnt(conversation) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new logic still seeds the internal
request_idfromrequest.request_idand fromkv_transfer_params["p_side_request_id"], only adding a prefix. When a client provides either of these fields, the value is used verbatim for routing, logging and the engine call, so the server never generates a unique ID of its own. This leaves the system vulnerable to the same request-id collisions the change set is intended to prevent. The internal ID should be generated withrandom_uuid()regardless of client input and any client-supplied identifier should be kept separate for response echoing only.Useful? React with 👍 / 👎.