add dummy get_input_embeddings to fix vllm model type check

kuafou · kuafou · commit 494d22d7b57a · 2025-11-05T18:21:44.000Z
Signed-off-by: Allen Jia &lt;kuafou@gmail.com&gt;
diff --git a/tests/test_vllm_wrapper.py b/tests/test_vllm_wrapper.py
@@ -0,0 +1,21 @@
+from tpu_inference.models.common.model_loader import register_model
+
+
+class DummyModel:
+    def __init__(self, vllm_config=None): pass
+    def __call__(self, kv_caches=None, input_ids=None, attention_metadata=None): pass
+
+def test_vllm_wrapper_has_required_methods():
+    register_model("DummyForCausalLM", DummyModel)
+
+    from vllm.model_executor.models.registry import ModelRegistry
+    wrapper_cls = ModelRegistry.models.get("DummyForCausalLM").model_cls
+    assert hasattr(wrapper_cls, "get_input_embeddings")
+    m = wrapper_cls()
+    try:
+        m.get_input_embeddings(input_ids=None, positions=None, inputs_embeds=None)
+    except NotImplementedError:
+        pass
+
+    from vllm.model_executor.models.interfaces_base import is_vllm_model
+    assert is_vllm_model(wrapper_cls)
diff --git a/tpu_inference/models/common/model_loader.py b/tpu_inference/models/common/model_loader.py
@@ -415,6 +415,17 @@ def unimplemented_forward(
             "This is a JAX model and does not implement the PyTorch forward method."
         )
 
+    # Same as `forward`, this is a dummy method to satisfy vLLM's type checks.
+    def unimplemented_get_input_embeddings(
+        self,
+        input_ids: "torch.Tensor",
+        positions: "torch.Tensor",
+        inputs_embeds: Optional["torch.Tensor"] = None,
+    ) -> "torch.Tensor":
+        raise NotImplementedError(
+            "This is a JAX model and does not implement the PyTorch get_input_embeddings method."
+        )
+
     # We need a custom __init__ that only calls torch.nn.Module's init,
     # to avoid triggering JAX logic when vLLM inspects the class.
     def wrapper_init(self, *args, **kwargs):
@@ -428,6 +439,7 @@ def wrapper_init(self, *args, **kwargs):
         {
             "__init__": wrapper_init,
             "forward": unimplemented_forward,
+            "get_input_embeddings": unimplemented_get_input_embeddings,
             # Prevent vLLM from trying to load weights into this dummy class.
             "load_weights": lambda self, *args, **kwargs: None,
         })