Skip to content

Commit fe3398f

Browse files
[Chore] Enable passing tokenizer=None into MM processor (#29724)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent ad7f714 commit fe3398f

File tree

8 files changed

+68
-91
lines changed

8 files changed

+68
-91
lines changed

tests/multimodal/test_processing.py

Lines changed: 8 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import time
55
from contextlib import nullcontext
6-
from typing import cast
76

87
import numpy as np
98
import pytest
@@ -24,7 +23,6 @@
2423
replace_token_matches,
2524
)
2625
from vllm.multimodal.profiling import MultiModalProfiler
27-
from vllm.tokenizers import TokenizerLike
2826

2927
from .utils import random_image
3028

@@ -238,15 +236,12 @@ def test_find_token_matches(
238236
expected_by_key,
239237
update_type,
240238
):
241-
# Should not be used since there is nothing to convert to token IDs
242-
mock_tokenizer = cast(TokenizerLike, object())
243-
244239
prompt_updates = {
245240
key: update_type(key, target, []).resolve(0)
246241
for key, target in target_by_key.items()
247242
}
248243
result = {
249-
key: list(update.iter_token_matches(prompt, mock_tokenizer))
244+
key: list(update.iter_token_matches(prompt, tokenizer=None))
250245
for key, update in prompt_updates.items()
251246
}
252247

@@ -385,15 +380,12 @@ def test_find_text_matches(
385380
expected_by_key,
386381
update_type,
387382
):
388-
# Should not be used since there is nothing to convert to text
389-
mock_tokenizer = cast(TokenizerLike, object())
390-
391383
prompt_updates = {
392384
key: update_type(key, target, []).resolve(0)
393385
for key, target in target_by_key.items()
394386
}
395387
result = {
396-
key: list(update.iter_text_matches(prompt, mock_tokenizer))
388+
key: list(update.iter_text_matches(prompt, tokenizer=None))
397389
for key, update in prompt_updates.items()
398390
}
399391

@@ -545,9 +537,6 @@ def test_find_update_text(
545537
repl_by_key,
546538
expected_by_update_type_mm_count,
547539
):
548-
# Should not be used since there is nothing to convert to text
549-
mock_tokenizer = cast(TokenizerLike, object())
550-
551540
for (
552541
update_type,
553542
expected_by_mm_count,
@@ -564,7 +553,7 @@ def test_find_update_text(
564553
new_prompt, result = apply_text_matches(
565554
prompt,
566555
mm_prompt_updates,
567-
mock_tokenizer,
556+
tokenizer=None,
568557
)
569558

570559
# Only displayed on error
@@ -750,9 +739,6 @@ def test_find_update_tokens(
750739
repl_by_key,
751740
expected_by_update_type_mm_count,
752741
):
753-
# Should not be used since there is nothing to convert to tokens
754-
mock_tokenizer = cast(TokenizerLike, object())
755-
756742
for (
757743
update_type,
758744
expected_by_mm_count,
@@ -769,7 +755,7 @@ def test_find_update_tokens(
769755
new_prompt, result = apply_token_matches(
770756
prompt,
771757
mm_prompt_updates,
772-
mock_tokenizer,
758+
tokenizer=None,
773759
)
774760

775761
# Only displayed on error
@@ -900,15 +886,12 @@ def test_find_mm_placeholders(
900886
expected,
901887
update_type,
902888
):
903-
# Should not be used since there is nothing to convert to tokens
904-
mock_tokenizer = cast(TokenizerLike, object())
905-
906889
mm_prompt_updates = {
907890
key: [[update_type(key, [], repl).resolve(i)] for i in range(3)]
908891
for key, repl in repl_by_key.items()
909892
}
910893

911-
result = find_mm_placeholders(prompt, mm_prompt_updates, mock_tokenizer)
894+
result = find_mm_placeholders(prompt, mm_prompt_updates, tokenizer=None)
912895

913896
# Only displayed on error
914897
print("result:", result)
@@ -1029,12 +1012,9 @@ def test_hf_processor_init_kwargs(
10291012
inference_kwargs,
10301013
expected_kwargs,
10311014
):
1032-
# Should not be used since there is nothing to convert to tokens
1033-
mock_tokenizer = cast(TokenizerLike, object())
1034-
10351015
ctx = InputProcessingContext(
10361016
model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
1037-
tokenizer=mock_tokenizer,
1017+
tokenizer=None,
10381018
)
10391019

10401020
processor = ctx.get_hf_processor(
@@ -1065,12 +1045,9 @@ def test_hf_processor_call_kwargs(
10651045
inference_kwargs,
10661046
expected_kwargs,
10671047
):
1068-
# Should not be used since there is nothing to convert to tokens
1069-
mock_tokenizer = cast(TokenizerLike, object())
1070-
10711048
ctx = InputProcessingContext(
10721049
model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
1073-
tokenizer=mock_tokenizer,
1050+
tokenizer=None,
10741051
)
10751052

10761053
processor = ctx.get_hf_processor(DummyProcessor) # type: ignore[arg-type]
@@ -1089,8 +1066,6 @@ def test_apply_matches_no_match_exits_quickly():
10891066
10901067
With the fix, it should exit immediately when no match is found.
10911068
"""
1092-
mock_tokenizer = cast(TokenizerLike, object())
1093-
10941069
# Create a long prompt with no placeholder
10951070
long_prompt = "x" * 10000
10961071

@@ -1103,7 +1078,7 @@ def test_apply_matches_no_match_exits_quickly():
11031078
result, _ = _apply_matches(
11041079
long_prompt,
11051080
mm_prompt_updates,
1106-
mock_tokenizer,
1081+
tokenizer=None,
11071082
)
11081083
elapsed = time.perf_counter() - start
11091084

vllm/entrypoints/openai/serving_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ async def beam_search(
337337
tokenizer = input_processor.tokenizer
338338
if tokenizer is None:
339339
raise ValueError(
340-
"You cannot use beam search when `skip_tokenizer_init` is True"
340+
"You cannot use beam search when `skip_tokenizer_init=True`"
341341
)
342342

343343
eos_token_id: int = tokenizer.eos_token_id # type: ignore

vllm/inputs/preprocess.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def __init__(
6262
def get_tokenizer(self) -> TokenizerLike:
6363
if self.tokenizer is None:
6464
raise ValueError(
65-
"You cannot pass text prompts when `skip_tokenizer_init` is True"
65+
"You cannot pass text prompts when `skip_tokenizer_init=True`"
6666
)
6767

6868
return self.tokenizer
@@ -228,22 +228,11 @@ def _tokenize_prompt(
228228

229229
return tokenizer.encode(prompt, **tokenization_kwargs)
230230

231-
def _get_mm_tokenizer(self) -> TokenizerLike:
232-
# PrithviGeoSpatialMAE needs to be initialized without a tokenizer
233-
# while using also multi-modal input
234-
if not self.tokenizer:
235-
return cast(TokenizerLike, object()) # Dummy
236-
237-
tokenizer = self.get_tokenizer()
238-
return tokenizer
239-
240231
def _get_mm_processor(self) -> BaseMultiModalProcessor:
241232
if not hasattr(self, "_mm_processor"):
242-
tokenizer = self._get_mm_tokenizer()
243-
244233
self._mm_processor = self.mm_registry.create_processor(
245234
self.model_config,
246-
tokenizer=tokenizer,
235+
tokenizer=self.tokenizer,
247236
cache=self.mm_processor_cache,
248237
)
249238

vllm/model_executor/models/glm4_1v.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -866,12 +866,6 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
866866

867867

868868
class Glm4vProcessingInfo(BaseProcessingInfo):
869-
def get_hf_config(self):
870-
return self.ctx.get_hf_config()
871-
872-
def get_tokenizer(self):
873-
return self.ctx.tokenizer
874-
875869
def get_supported_mm_limits(self) -> Mapping[str, int | None]:
876870
return {"image": None, "video": 1}
877871

vllm/model_executor/models/qwen3_vl.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -615,9 +615,6 @@ def get_hf_processor(self, **kwargs: object) -> Qwen3VLProcessor:
615615
**kwargs,
616616
)
617617

618-
def get_tokenizer(self):
619-
return self.ctx.tokenizer
620-
621618
def get_image_processor(self, **kwargs: object) -> Qwen2VLImageProcessorFast:
622619
return self.get_hf_processor(**kwargs).image_processor
623620

vllm/model_executor/models/qwen_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def __call__(
555555

556556
class QwenVLProcessingInfo(BaseProcessingInfo):
557557
def get_tokenizer(self) -> PreTrainedTokenizer:
558-
tokenizer = self.ctx.tokenizer
558+
tokenizer = self.ctx.get_tokenizer()
559559
assert isinstance(tokenizer, PreTrainedTokenizer)
560560

561561
return _get_tokenizer_without_image_pad(tokenizer)

0 commit comments

Comments
 (0)