Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
truncate_tool_call_ids,
validate_request_params,
)
from vllm.utils import as_list
from vllm.utils import as_list, random_uuid

logger = init_logger(__name__)

Expand Down Expand Up @@ -255,10 +255,19 @@ async def create_chat_completion(
except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e:
logger.exception("Error in preprocessing prompt inputs")
return self.create_error_response(f"{e} {e.__cause__}")

request_id = self._add_prefix(getattr(request, "request_id", None))

request_id = (
f"chatcmpl-{self._base_request_id(raw_request, request.request_id)}"
)
if request.kv_transfer_params:
request_id = self._add_prefix(request.kv_transfer_params.get("p_side_request_id", request_id))
Comment on lines +259 to +262

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Stop using client request_id as internal identifier

The new logic still seeds the internal request_id from request.request_id and from kv_transfer_params["p_side_request_id"], only adding a prefix. When a client provides either of these fields, the value is used verbatim for routing, logging and the engine call, so the server never generates a unique ID of its own. This leaves the system vulnerable to the same request-id collisions the change set is intended to prevent. The internal ID should be generated with random_uuid() regardless of client input and any client-supplied identifier should be kept separate for response echoing only.

Useful? React with 👍 / 👎.


default = request.request_id or request_id
if raw_request is None:
req_id_head = default
else:
req_id_head = raw_request.headers.get("X-Request-ID")

raw_request_id = self._add_prefix(req_id_head) if req_id_head else request_id
Comment on lines +259 to +270
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The current logic for determining the internal request_id does not align with the PR's goal of forcing a server-generated ID to prevent collisions. When a client provides a request_id, it is used as the basis for the internal ID, which can lead to the very collisions this change aims to prevent.

The internal request ID should always be a newly generated, globally unique identifier on the P-side to ensure system stability and clear observability. The client-provided ID should only be used for the response to maintain backward compatibility.

I've suggested a refactoring of this logic to correctly and clearly separate the generation of the internal ID from the determination of the response ID. This change makes the code safer and easier to understand.

        # P-side always generates a globally unique internal_request_id.
        # D-side receives it from P-side.
        if request.kv_transfer_params:
            # D-side: use the ID from P-side. If it's not present,
            # generate a new one to avoid using a client-provided ID.
            internal_request_id = self._add_prefix(
                request.kv_transfer_params.get("p_side_request_id"))
        else:
            # P-side: always generate a new unique ID for internal processing.
            internal_request_id = self._add_prefix(None)

        # For backward compatibility, the request_id in the response is
        # determined by the following order of precedence:
        # 1. X-Request-ID header
        # 2. client-supplied request.request_id
        # 3. server-generated internal_request_id
        header_id = raw_request.headers.get("X-Request-ID") if raw_request else None
        client_id = getattr(request, "request_id", None)

        # The ID to be returned in the response.
        response_id = self._add_prefix(header_id or client_id or internal_request_id)

        # Use the unique internal ID for all internal processing.
        request_id = internal_request_id
        # Use the determined response ID for the final response.
        raw_request_id = response_id

Comment on lines +264 to +270

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Response request_id no longer reflects client-provided value

The response ID returned to the user is computed from raw_request.headers['X-Request-ID'] or the internal request_id (which may be random). When a client supplies request_id in the request body but no X-Request-ID header, raw_request_id is set to the internally generated ID, not the client value. This contradicts the stated goal of echoing the client-supplied ID for backward compatibility and may break clients that rely on receiving their original request ID.

Useful? React with 👍 / 👎.


request_metadata = RequestResponseMetadata(request_id=request_id)
if raw_request:
Expand Down Expand Up @@ -346,18 +355,20 @@ async def create_chat_completion(
return self.chat_completion_stream_generator(
request,
result_generator,
request_id,
raw_request_id, # the real request ID to return to user
model_name,
conversation,
tokenizer,
request_metadata,
)

try:
# P side in P/D seperate always call the full generator, no need to pass internal request id to streaming generator
return await self.chat_completion_full_generator(
request,
result_generator,
request_id,
raw_request_id, # the real request ID to return to user
request_id, # the internal vLLM request ID, pass it to D side in kv_transfer_params
model_name,
conversation,
tokenizer,
Expand All @@ -367,6 +378,13 @@ async def create_chat_completion(
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))

def _add_prefix(self, request_id, prefix="chatcmpl-"):
if request_id and not request_id.startswith(prefix):
return prefix + request_id
elif not request_id:
return prefix + random_uuid()
return request_id

def get_chat_request_role(self, request: ChatCompletionRequest) -> str:
if request.add_generation_prompt:
return self.response_role
Expand Down Expand Up @@ -1251,7 +1269,8 @@ async def chat_completion_full_generator(
self,
request: ChatCompletionRequest,
result_generator: AsyncIterator[RequestOutput],
request_id: str,
request_id: str, # the real request ID to return to user
vllm_request_id: str, # the internal vLLM request ID, pass it to D side in kv_transfer_params
model_name: str,
conversation: list[ConversationMessage],
tokenizer: AnyTokenizer,
Expand All @@ -1271,6 +1290,10 @@ async def chat_completion_full_generator(

assert final_res is not None

# Pass the internal request id in P side to D side in kv_transfer_params
if final_res.kv_transfer_params:
final_res.kv_transfer_params["p_side_request_id"] = vllm_request_id

choices: list[ChatCompletionResponseChoice] = []
if self.tool_call_id_type == "kimi_k2":
history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
Expand Down