From 5c6083e3ed973456236743e83401f8972be01fa8 Mon Sep 17 00:00:00 2001
From: elronbandel <elronbandel@gmail.com>
Date: Thu, 7 Aug 2025 10:47:35 +0300
Subject: [PATCH 1/8] Add vllm to cross provider engine

Signed-off-by: elronbandel <elronbandel@gmail.com>
---
 src/unitxt/inference.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
index 87488d1da7..1b5d4144da 100644
--- a/src/unitxt/inference.py
+++ b/src/unitxt/inference.py
@@ -3231,6 +3231,7 @@ def get_return_object(self, responses, return_meta_data):
     "vertex-ai",
     "replicate",
     "hf-local",
+    "vllm",
 ]
 
 
@@ -3475,6 +3476,7 @@ class CrossProviderInferenceEngine(
     provider_model_map["watsonx"] = {
         k: f"watsonx/{v}" for k, v in provider_model_map["watsonx-sdk"].items()
     }
+    provider_model_map["vllm"] = provider_model_map["hf-local"].items()
 
     _provider_to_base_class = {
         "watsonx": LiteLLMInferenceEngine,
@@ -3488,6 +3490,7 @@ class CrossProviderInferenceEngine(
         "vertex-ai": LiteLLMInferenceEngine,
         "replicate": LiteLLMInferenceEngine,
         "hf-local": HFAutoModelInferenceEngine,
+        "vllm": VLLMInferenceEngine,
     }
 
     _provider_param_renaming = {

From b14303893ed90922ba6ba3387f97bfacf0fc06ca Mon Sep 17 00:00:00 2001
From: elronbandel <elronbandel@gmail.com>
Date: Thu, 7 Aug 2025 11:13:13 +0300
Subject: [PATCH 2/8] Fix

Signed-off-by: elronbandel <elronbandel@gmail.com>
---
 src/unitxt/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
index 1b5d4144da..484ceccce9 100644
--- a/src/unitxt/inference.py
+++ b/src/unitxt/inference.py
@@ -3476,7 +3476,7 @@ class CrossProviderInferenceEngine(
     provider_model_map["watsonx"] = {
         k: f"watsonx/{v}" for k, v in provider_model_map["watsonx-sdk"].items()
     }
-    provider_model_map["vllm"] = provider_model_map["hf-local"].items()
+    provider_model_map["vllm"] = provider_model_map["hf-local"]
 
     _provider_to_base_class = {
         "watsonx": LiteLLMInferenceEngine,

From bd9f1a94d930348ceaa8155e096a342c58d2b56e Mon Sep 17 00:00:00 2001
From: elronbandel <elronbandel@gmail.com>
Date: Thu, 7 Aug 2025 17:50:33 +0300
Subject: [PATCH 3/8] Fix vllm engine

Signed-off-by: elronbandel <elronbandel@gmail.com>
---
 src/unitxt/inference.py                  | 17 ++++++++++-------
 tests/inference/test_inference_engine.py | 15 +++++++++++++++
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
index 484ceccce9..de14392d6a 100644
--- a/src/unitxt/inference.py
+++ b/src/unitxt/inference.py
@@ -3014,23 +3014,26 @@ class VLLMParamsMixin(Artifact):
     model: str
     n: int = 1
     best_of: Optional[int] = None
-    _real_n: Optional[int] = None
-    presence_penalty: float = 0.0
-    frequency_penalty: float = 0.0
-    repetition_penalty: float = 1.0
-    temperature: float = 0.0
+    temperature: float = 1.0
     top_p: float = 1.0
-    top_k: int = -1
+    top_k: int = 0
     min_p: float = 0.0
     seed: Optional[int] = None
+    presence_penalty: float = 0.0
+    frequency_penalty: float = 0.0
+    repetition_penalty: float = 1.0
     stop: Optional[Union[str, List[str]]] = None
     stop_token_ids: Optional[List[int]] = None
     bad_words: Optional[List[str]] = None
+    include_stop_str_in_output: bool = False
     ignore_eos: bool = False
     max_tokens: Optional[int] = 16
     min_tokens: int = 0
     logprobs: Optional[int] = None
     prompt_logprobs: Optional[int] = None
+    detokenize: bool = True
+    skip_special_tokens: bool = True
+    spaces_between_special_tokens: bool = True
 
 
 class VLLMInferenceEngine(InferenceEngine, PackageRequirementsMixin, VLLMParamsMixin):
@@ -3047,7 +3050,6 @@ def prepare_engine(self):
         self.sampling_params = SamplingParams(**args)
         self.llm = LLM(
             model=self.model,
-            device="auto",
             trust_remote_code=True,
             max_num_batched_tokens=4096,
             gpu_memory_utilization=0.7,
@@ -3497,6 +3499,7 @@ class CrossProviderInferenceEngine(
         "watsonx-sdk": {"model": "model_name"},
         "rits": {"model": "model_name"},
         "hf-local": {"model": "model_name", "max_tokens": "max_new_tokens"},
+        "vllm": {"top_logprobs": "logprobs", "logprobs": "prompt_logprobs"},
     }
 
     def get_return_object(self, **kwargs):
diff --git a/tests/inference/test_inference_engine.py b/tests/inference/test_inference_engine.py
index 40fd604f2e..e1c83130c3 100644
--- a/tests/inference/test_inference_engine.py
+++ b/tests/inference/test_inference_engine.py
@@ -19,6 +19,7 @@
     OptionSelectingByLogProbsInferenceEngine,
     RITSInferenceEngine,
     TextGenerationInferenceOutput,
+    VLLMInferenceEngine,
     WMLInferenceEngineChat,
     WMLInferenceEngineGeneration,
 )
@@ -189,6 +190,20 @@ def test_watsonx_chat_inference(self):
 
         self.assertListEqual(predictions, ["7", "2"])
 
+    def test_vllm_chat_inference(self):
+        model = VLLMInferenceEngine(
+            model=local_decoder_model,
+            data_classification_policy=["public"],
+            temperature=0,
+            max_tokens=1,
+        )
+
+        dataset = get_text_dataset()
+
+        predictions = model(dataset)
+
+        self.assertListEqual(list(predictions), ["7", "1"])
+
     def test_watsonx_inference_with_external_client(self):
         from ibm_watsonx_ai.client import APIClient, Credentials
 

From d3b3926422ffbda9585cc578b8560c7ac2100c05 Mon Sep 17 00:00:00 2001
From: Yoav Katz <katz@il.ibm.com>
Date: Sun, 12 Oct 2025 10:56:20 +0300
Subject: [PATCH 4/8] Added VLLM requirement list

Signed-off-by: Yoav Katz <katz@il.ibm.com>
---
 src/unitxt/inference.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
index 502f612a5a..164ce25b27 100644
--- a/src/unitxt/inference.py
+++ b/src/unitxt/inference.py
@@ -3037,6 +3037,7 @@ class VLLMParamsMixin(Artifact):
 
 
 class VLLMInferenceEngine(InferenceEngine, PackageRequirementsMixin, VLLMParamsMixin):
+    _requirements_list: list = ["vllm"]
     label = "vllm"
 
     def get_engine_id(self):

From ae1ff773c528ed3d000e2132ebec1a234f4f1a82 Mon Sep 17 00:00:00 2001
From: Yoav Katz <katz@il.ibm.com>
Date: Sun, 12 Oct 2025 11:33:47 +0300
Subject: [PATCH 5/8] Added vllm to cross provider example

Signed-off-by: Yoav Katz <katz@il.ibm.com>
---
 examples/inference_using_cross_provider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/inference_using_cross_provider.py b/examples/inference_using_cross_provider.py
index 3abc5371c0..292059a272 100644
--- a/examples/inference_using_cross_provider.py
+++ b/examples/inference_using_cross_provider.py
@@ -2,7 +2,7 @@
 from unitxt.text_utils import print_dict
 
 if __name__ == "__main__":
-    for provider in ["watsonx", "rits", "watsonx-sdk", "hf-local"]:
+    for provider in ["vllm", "watsonx", "rits", "watsonx-sdk", "hf-local"]:
         print()
         print("------------------------------------------------ ")
         print("PROVIDER:", provider)

From 5a9d5a4d3d72c4b6ccbb25a4f7529cf4813e7b75 Mon Sep 17 00:00:00 2001
From: Yoav Katz <katz@il.ibm.com>
Date: Mon, 13 Oct 2025 12:31:31 +0300
Subject: [PATCH 6/8] Updated inference tests

Signed-off-by: Yoav Katz <katz@il.ibm.com>
---
 pyproject.toml                           | 7 +++++--
 tests/inference/test_inference_engine.py | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5a7db6c150..5cb2767a79 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,7 +108,9 @@ tests = [
     "sqlparse",
     "diskcache",
     "pydantic",
-    "jsonschema_rs"
+    "jsonschema_rs",
+    "evalassist",
+    "vllm"
 ]
 ui = [
     "gradio",
@@ -128,7 +130,8 @@ inference-tests = [
   "tenacity",
   "diskcache",
   "numpy==1.26.4",
-  "ollama"
+  "ollama",
+  "vllm"
 ]
 assistant = [
     "streamlit",
diff --git a/tests/inference/test_inference_engine.py b/tests/inference/test_inference_engine.py
index 848b1927d0..64fc90b8f3 100644
--- a/tests/inference/test_inference_engine.py
+++ b/tests/inference/test_inference_engine.py
@@ -294,7 +294,7 @@ def test_option_selecting_by_log_prob_inference_engines(self):
         ]
 
         watsonx_engine = WMLInferenceEngineGeneration(
-            model_name="meta-llama/llama-3-2-1b-instruct"
+            model_name="meta-llama/llama-3-3-70b-instruct"
         )
 
         for engine in [watsonx_engine]:
@@ -398,7 +398,7 @@ def test_lite_llm_inference_engine(self):
 
     def test_lite_llm_inference_engine_without_task_data_not_failing(self):
         LiteLLMInferenceEngine(
-            model="watsonx/meta-llama/llama-3-2-1b-instruct",
+            model="watsonx/meta-llama/llama-3-3-70b-instruct",
             max_tokens=2,
             temperature=0,
             top_p=1,

From 18a7caec9d5f83d01556af8085b514b743539266 Mon Sep 17 00:00:00 2001
From: Yoav Katz <katz@il.ibm.com>
Date: Mon, 13 Oct 2025 12:33:46 +0300
Subject: [PATCH 7/8] Updated python version  in consistenty check

Signed-off-by: Yoav Katz <katz@il.ibm.com>
---
 .github/workflows/catalog_consistency.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/catalog_consistency.yml b/.github/workflows/catalog_consistency.yml
index 4b42a8843b..96eab4c6e5 100644
--- a/.github/workflows/catalog_consistency.yml
+++ b/.github/workflows/catalog_consistency.yml
@@ -30,7 +30,7 @@ jobs:
 
      - uses: actions/setup-python@v5
        with:
-         python-version: '3.9'
+         python-version: '3.10'
 
      - run: curl -LsSf https://astral.sh/uv/install.sh | sh
      - run: uv pip install --system -e ".[tests]"

From 6ee38e10adbbcc3dcb415278fb292dbdb03f4cfc Mon Sep 17 00:00:00 2001
From: Yoav Katz <katz@il.ibm.com>
Date: Mon, 13 Oct 2025 12:40:36 +0300
Subject: [PATCH 8/8] Removed eval assist from this PR

Signed-off-by: Yoav Katz <katz@il.ibm.com>
---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5cb2767a79..e9430c8ecc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,7 +109,6 @@ tests = [
     "diskcache",
     "pydantic",
     "jsonschema_rs",
-    "evalassist",
     "vllm"
 ]
 ui = [