From c506ad7f2a1532a581c1008c068e9643de430b2a Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Tue, 28 Oct 2025 22:28:48 -0700 Subject: [PATCH 1/3] Make request IDs unique Signed-off-by: Seiji Eicher --- vllm/benchmarks/lib/endpoint_request_func.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/benchmarks/lib/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py index ed0fdec25186..217dee66a739 100644 --- a/vllm/benchmarks/lib/endpoint_request_func.py +++ b/vllm/benchmarks/lib/endpoint_request_func.py @@ -135,7 +135,8 @@ def _update_headers_common( if request_func_input.extra_headers: headers |= request_func_input.extra_headers if request_func_input.request_id: - headers["x-request-id"] = request_func_input.request_id + # Ensure unique request IDs across benchmark clients + headers["x-request-id"] = f"{os.getpid()}-{request_func_input.request_id}" async def async_request_openai_completions( From 903226072f946eaf7a6df29ca9c7035ec65cbeca Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Wed, 29 Oct 2025 00:09:10 -0700 Subject: [PATCH 2/3] Verify request ID uniqueness in the scheduler Signed-off-by: Seiji Eicher --- vllm/v1/core/sched/scheduler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 00b34fe4fbb9..db30c259cdc5 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -1164,8 +1164,12 @@ def get_request_counts(self) -> tuple[int, int]: return len(self.running), len(self.waiting) def add_request(self, request: Request) -> None: + request_id = request.request_id + if request_id in self.requests: + raise ValueError(f"Request id {request_id} already exists.") + self.waiting.add_request(request) - self.requests[request.request_id] = request + self.requests[request_id] = request if self.log_stats: request.record_event(EngineCoreEventType.QUEUED) From 75c913612735751acd0200e6f1213408ce345d2c Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Wed, 29 Oct 2025 07:06:17 -0700 Subject: [PATCH 3/3] Make --request-id-prefix default unique Signed-off-by: Seiji Eicher --- vllm/benchmarks/lib/endpoint_request_func.py | 3 +-- vllm/benchmarks/serve.py | 3 ++- vllm/v1/core/sched/scheduler.py | 6 +----- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/vllm/benchmarks/lib/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py index 217dee66a739..ed0fdec25186 100644 --- a/vllm/benchmarks/lib/endpoint_request_func.py +++ b/vllm/benchmarks/lib/endpoint_request_func.py @@ -135,8 +135,7 @@ def _update_headers_common( if request_func_input.extra_headers: headers |= request_func_input.extra_headers if request_func_input.request_id: - # Ensure unique request IDs across benchmark clients - headers["x-request-id"] = f"{os.getpid()}-{request_func_input.request_id}" + headers["x-request-id"] = request_func_input.request_id async def async_request_openai_completions( diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 71d136d61cea..4b15d8e62913 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -26,6 +26,7 @@ import random import shutil import time +import uuid import warnings from collections.abc import AsyncGenerator, Iterable from dataclasses import dataclass @@ -1160,7 +1161,7 @@ def add_cli_args(parser: argparse.ArgumentParser): "--request-id-prefix", type=str, required=False, - default="benchmark-serving", + default=f"bench-{uuid.uuid4().hex[:8]}-", help="Specify the prefix of request id.", ) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index db30c259cdc5..00b34fe4fbb9 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -1164,12 +1164,8 @@ def get_request_counts(self) -> tuple[int, int]: return len(self.running), len(self.waiting) def add_request(self, request: Request) -> None: - request_id = request.request_id - if request_id in self.requests: - raise ValueError(f"Request id {request_id} already exists.") - self.waiting.add_request(request) - self.requests[request_id] = request + self.requests[request.request_id] = request if self.log_stats: request.record_event(EngineCoreEventType.QUEUED)