From 5c6083e3ed973456236743e83401f8972be01fa8 Mon Sep 17 00:00:00 2001 From: elronbandel Date: Thu, 7 Aug 2025 10:47:35 +0300 Subject: [PATCH 1/8] Add vllm to cross provider engine Signed-off-by: elronbandel --- src/unitxt/inference.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py index 87488d1da7..1b5d4144da 100644 --- a/src/unitxt/inference.py +++ b/src/unitxt/inference.py @@ -3231,6 +3231,7 @@ def get_return_object(self, responses, return_meta_data): "vertex-ai", "replicate", "hf-local", + "vllm", ] @@ -3475,6 +3476,7 @@ class CrossProviderInferenceEngine( provider_model_map["watsonx"] = { k: f"watsonx/{v}" for k, v in provider_model_map["watsonx-sdk"].items() } + provider_model_map["vllm"] = provider_model_map["hf-local"].items() _provider_to_base_class = { "watsonx": LiteLLMInferenceEngine, @@ -3488,6 +3490,7 @@ class CrossProviderInferenceEngine( "vertex-ai": LiteLLMInferenceEngine, "replicate": LiteLLMInferenceEngine, "hf-local": HFAutoModelInferenceEngine, + "vllm": VLLMInferenceEngine, } _provider_param_renaming = { From b14303893ed90922ba6ba3387f97bfacf0fc06ca Mon Sep 17 00:00:00 2001 From: elronbandel Date: Thu, 7 Aug 2025 11:13:13 +0300 Subject: [PATCH 2/8] Fix Signed-off-by: elronbandel --- src/unitxt/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py index 1b5d4144da..484ceccce9 100644 --- a/src/unitxt/inference.py +++ b/src/unitxt/inference.py @@ -3476,7 +3476,7 @@ class CrossProviderInferenceEngine( provider_model_map["watsonx"] = { k: f"watsonx/{v}" for k, v in provider_model_map["watsonx-sdk"].items() } - provider_model_map["vllm"] = provider_model_map["hf-local"].items() + provider_model_map["vllm"] = provider_model_map["hf-local"] _provider_to_base_class = { "watsonx": LiteLLMInferenceEngine, From bd9f1a94d930348ceaa8155e096a342c58d2b56e Mon Sep 17 00:00:00 2001 From: elronbandel Date: Thu, 7 Aug 2025 17:50:33 +0300 Subject: [PATCH 3/8] Fix vllm engine Signed-off-by: elronbandel --- src/unitxt/inference.py | 17 ++++++++++------- tests/inference/test_inference_engine.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py index 484ceccce9..de14392d6a 100644 --- a/src/unitxt/inference.py +++ b/src/unitxt/inference.py @@ -3014,23 +3014,26 @@ class VLLMParamsMixin(Artifact): model: str n: int = 1 best_of: Optional[int] = None - _real_n: Optional[int] = None - presence_penalty: float = 0.0 - frequency_penalty: float = 0.0 - repetition_penalty: float = 1.0 - temperature: float = 0.0 + temperature: float = 1.0 top_p: float = 1.0 - top_k: int = -1 + top_k: int = 0 min_p: float = 0.0 seed: Optional[int] = None + presence_penalty: float = 0.0 + frequency_penalty: float = 0.0 + repetition_penalty: float = 1.0 stop: Optional[Union[str, List[str]]] = None stop_token_ids: Optional[List[int]] = None bad_words: Optional[List[str]] = None + include_stop_str_in_output: bool = False ignore_eos: bool = False max_tokens: Optional[int] = 16 min_tokens: int = 0 logprobs: Optional[int] = None prompt_logprobs: Optional[int] = None + detokenize: bool = True + skip_special_tokens: bool = True + spaces_between_special_tokens: bool = True class VLLMInferenceEngine(InferenceEngine, PackageRequirementsMixin, VLLMParamsMixin): @@ -3047,7 +3050,6 @@ def prepare_engine(self): self.sampling_params = SamplingParams(**args) self.llm = LLM( model=self.model, - device="auto", trust_remote_code=True, max_num_batched_tokens=4096, gpu_memory_utilization=0.7, @@ -3497,6 +3499,7 @@ class CrossProviderInferenceEngine( "watsonx-sdk": {"model": "model_name"}, "rits": {"model": "model_name"}, "hf-local": {"model": "model_name", "max_tokens": "max_new_tokens"}, + "vllm": {"top_logprobs": "logprobs", "logprobs": "prompt_logprobs"}, } def get_return_object(self, **kwargs): diff --git a/tests/inference/test_inference_engine.py b/tests/inference/test_inference_engine.py index 40fd604f2e..e1c83130c3 100644 --- a/tests/inference/test_inference_engine.py +++ b/tests/inference/test_inference_engine.py @@ -19,6 +19,7 @@ OptionSelectingByLogProbsInferenceEngine, RITSInferenceEngine, TextGenerationInferenceOutput, + VLLMInferenceEngine, WMLInferenceEngineChat, WMLInferenceEngineGeneration, ) @@ -189,6 +190,20 @@ def test_watsonx_chat_inference(self): self.assertListEqual(predictions, ["7", "2"]) + def test_vllm_chat_inference(self): + model = VLLMInferenceEngine( + model=local_decoder_model, + data_classification_policy=["public"], + temperature=0, + max_tokens=1, + ) + + dataset = get_text_dataset() + + predictions = model(dataset) + + self.assertListEqual(list(predictions), ["7", "1"]) + def test_watsonx_inference_with_external_client(self): from ibm_watsonx_ai.client import APIClient, Credentials From d3b3926422ffbda9585cc578b8560c7ac2100c05 Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Sun, 12 Oct 2025 10:56:20 +0300 Subject: [PATCH 4/8] Added VLLM requirement list Signed-off-by: Yoav Katz --- src/unitxt/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py index 502f612a5a..164ce25b27 100644 --- a/src/unitxt/inference.py +++ b/src/unitxt/inference.py @@ -3037,6 +3037,7 @@ class VLLMParamsMixin(Artifact): class VLLMInferenceEngine(InferenceEngine, PackageRequirementsMixin, VLLMParamsMixin): + _requirements_list: list = ["vllm"] label = "vllm" def get_engine_id(self): From ae1ff773c528ed3d000e2132ebec1a234f4f1a82 Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Sun, 12 Oct 2025 11:33:47 +0300 Subject: [PATCH 5/8] Added vllm to cross provider example Signed-off-by: Yoav Katz --- examples/inference_using_cross_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/inference_using_cross_provider.py b/examples/inference_using_cross_provider.py index 3abc5371c0..292059a272 100644 --- a/examples/inference_using_cross_provider.py +++ b/examples/inference_using_cross_provider.py @@ -2,7 +2,7 @@ from unitxt.text_utils import print_dict if __name__ == "__main__": - for provider in ["watsonx", "rits", "watsonx-sdk", "hf-local"]: + for provider in ["vllm", "watsonx", "rits", "watsonx-sdk", "hf-local"]: print() print("------------------------------------------------ ") print("PROVIDER:", provider) From 5a9d5a4d3d72c4b6ccbb25a4f7529cf4813e7b75 Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Mon, 13 Oct 2025 12:31:31 +0300 Subject: [PATCH 6/8] Updated inference tests Signed-off-by: Yoav Katz --- pyproject.toml | 7 +++++-- tests/inference/test_inference_engine.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5a7db6c150..5cb2767a79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,9 @@ tests = [ "sqlparse", "diskcache", "pydantic", - "jsonschema_rs" + "jsonschema_rs", + "evalassist", + "vllm" ] ui = [ "gradio", @@ -128,7 +130,8 @@ inference-tests = [ "tenacity", "diskcache", "numpy==1.26.4", - "ollama" + "ollama", + "vllm" ] assistant = [ "streamlit", diff --git a/tests/inference/test_inference_engine.py b/tests/inference/test_inference_engine.py index 848b1927d0..64fc90b8f3 100644 --- a/tests/inference/test_inference_engine.py +++ b/tests/inference/test_inference_engine.py @@ -294,7 +294,7 @@ def test_option_selecting_by_log_prob_inference_engines(self): ] watsonx_engine = WMLInferenceEngineGeneration( - model_name="meta-llama/llama-3-2-1b-instruct" + model_name="meta-llama/llama-3-3-70b-instruct" ) for engine in [watsonx_engine]: @@ -398,7 +398,7 @@ def test_lite_llm_inference_engine(self): def test_lite_llm_inference_engine_without_task_data_not_failing(self): LiteLLMInferenceEngine( - model="watsonx/meta-llama/llama-3-2-1b-instruct", + model="watsonx/meta-llama/llama-3-3-70b-instruct", max_tokens=2, temperature=0, top_p=1, From 18a7caec9d5f83d01556af8085b514b743539266 Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Mon, 13 Oct 2025 12:33:46 +0300 Subject: [PATCH 7/8] Updated python version in consistenty check Signed-off-by: Yoav Katz --- .github/workflows/catalog_consistency.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/catalog_consistency.yml b/.github/workflows/catalog_consistency.yml index 4b42a8843b..96eab4c6e5 100644 --- a/.github/workflows/catalog_consistency.yml +++ b/.github/workflows/catalog_consistency.yml @@ -30,7 +30,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.10' - run: curl -LsSf https://astral.sh/uv/install.sh | sh - run: uv pip install --system -e ".[tests]" From 6ee38e10adbbcc3dcb415278fb292dbdb03f4cfc Mon Sep 17 00:00:00 2001 From: Yoav Katz Date: Mon, 13 Oct 2025 12:40:36 +0300 Subject: [PATCH 8/8] Removed eval assist from this PR Signed-off-by: Yoav Katz --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5cb2767a79..e9430c8ecc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,6 @@ tests = [ "diskcache", "pydantic", "jsonschema_rs", - "evalassist", "vllm" ] ui = [