Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/detokenizer/test_min_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_min_tokens_with_stop(min_tokens: int, stop: str, truth: str):
)
request = EngineCoreRequest(
request_id="",
external_req_id="",
prompt_token_ids=prompt_token_ids,
mm_features=None,
sampling_params=params,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def _make_request(stop, include_stop_str_in_output: bool, min_tokens: int = 0):
# Keep other fields minimal for unit test purposes.
req = EngineCoreRequest(
request_id="test",
external_req_id="test-ext",
prompt_token_ids=[],
mm_features=None,
sampling_params=params,
Expand Down
12 changes: 10 additions & 2 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,9 @@ async def _fake_process_inputs(
trace_headers,
priority,
):
return dict(engine_prompt), {}
mock_request = MagicMock()
mock_request.request_id = request_id
return mock_request, {}

serving_chat._process_inputs = AsyncMock(side_effect=_fake_process_inputs)
return serving_chat
Expand Down Expand Up @@ -662,7 +664,11 @@ async def test_serving_chat_data_parallel_rank_extraction():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()

mock_request = MagicMock()
mock_request.request_id = "test-request-internal"
mock_engine.input_processor = MagicMock()
mock_engine.input_processor.process_inputs.return_value = mock_request
mock_engine.io_processor = MagicMock()

# Mock the generate method to return an async generator
Expand All @@ -689,7 +695,9 @@ async def mock_generate(*args, **kwargs):
finished=True,
)

mock_engine.generate = AsyncMock(side_effect=mock_generate)
mock_engine.generate = MagicMock(
side_effect=lambda *args, **kwargs: mock_generate()
)

serving_chat = _build_serving_chat(mock_engine)

Expand Down
1 change: 1 addition & 0 deletions tests/tokenizers_/test_detokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def _run_incremental_decode(
)
request = EngineCoreRequest(
request_id="",
external_req_id="",
prompt_token_ids=prompt_token_ids,
mm_features=None,
sampling_params=params,
Expand Down
4 changes: 2 additions & 2 deletions tests/v1/engine/test_async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ async def test_multi_abort(output_kind: RequestOutputKind):

# Use multi-abort to abort multiple requests at once
abort_request_ids = [request_ids[i] for i in REQUEST_IDS_TO_ABORT]
await engine.abort(abort_request_ids)
await engine.abort(abort_request_ids, internal=False)

# Wait for all tasks to complete
results = await asyncio.gather(*tasks, return_exceptions=True)
Expand Down Expand Up @@ -548,7 +548,7 @@ async def test_abort_final_output(output_kind: RequestOutputKind):
await asyncio.sleep(0.5)

# Abort the request
await engine.abort(request_id)
await engine.abort(request_id, internal=False)

# Wait for generation to complete and return final output
final_output = await generated
Expand Down
8 changes: 7 additions & 1 deletion tests/v1/engine/test_engine_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,16 @@
PROMPT = "I am Gyoubu Masataka Oniwa"
PROMPT_TOKENS = TOKENIZER(PROMPT).input_ids

_REQUEST_COUNTER = 0


def make_request() -> EngineCoreRequest:
global _REQUEST_COUNTER
_REQUEST_COUNTER += 1
request_id = f"request-{_REQUEST_COUNTER}"
return EngineCoreRequest(
request_id=str(uuid.uuid4()),
request_id=request_id,
external_req_id=f"{request_id}-{uuid.uuid4()}",
prompt_token_ids=PROMPT_TOKENS,
mm_features=None,
sampling_params=SamplingParams(),
Expand Down
8 changes: 7 additions & 1 deletion tests/v1/engine/test_engine_core_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,21 @@
PROMPT = "Hello my name is Robert and I love quantization kernels"
PROMPT_TOKENS = TOKENIZER(PROMPT).input_ids

_REQUEST_COUNTER = 0


def make_request(
params: SamplingParams, prompt_tokens_ids: list[int] | None = None
) -> EngineCoreRequest:
if not prompt_tokens_ids:
prompt_tokens_ids = PROMPT_TOKENS

global _REQUEST_COUNTER
_REQUEST_COUNTER += 1
request_id = f"request-{_REQUEST_COUNTER}"
return EngineCoreRequest(
request_id=str(uuid.uuid4()),
request_id=request_id,
external_req_id=f"{request_id}-{uuid.uuid4()}",
prompt_token_ids=prompt_tokens_ids,
mm_features=None,
sampling_params=params,
Expand Down
1 change: 1 addition & 0 deletions tests/v1/engine/test_fast_incdec_prefix_err.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def test_fast_inc_detok_invalid_utf8_err_case():
params = SamplingParams(skip_special_tokens=True)
request = EngineCoreRequest(
request_id="test",
external_req_id="test-ext",
prompt_token_ids=prompt_token_ids,
mm_features=None,
sampling_params=params,
Expand Down
Loading