Add version filter for Llama parallel tool calling

fede-kamel · fede-kamel · commit 9bd0122d4977 · 2025-11-12T04:49:40.000-08:00
Only Llama 4+ models support parallel tool calling based on testing.

Parallel tool calling support:
- Llama 4+ - SUPPORTED (tested and verified with real OCI API)
- ALL Llama 3.x (3.0, 3.1, 3.2, 3.3) - BLOCKED
- Cohere - BLOCKED (existing behavior)
- Other models (xAI Grok, OpenAI, Mistral) - SUPPORTED

Implementation:
- Added _supports_parallel_tool_calls() helper method with regex version parsing
- Updated bind_tools() to validate model version before enabling parallel calls
- Provides clear error messages: "only available for Llama 4+ models"

Unit tests added (8 tests, all mocked, no OCI connection):
- test_version_filter_llama_3_0_blocked
- test_version_filter_llama_3_1_blocked
- test_version_filter_llama_3_2_blocked
- test_version_filter_llama_3_3_blocked (Llama 3.3 doesn't support it either)
- test_version_filter_llama_4_allowed
- test_version_filter_other_models_allowed
- test_version_filter_supports_parallel_tool_calls_method
- Plus existing parallel tool calling tests updated to use Llama 4
diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -1200,6 +1200,49 @@ def _prepare_request(
 
         return request
 
+    def _supports_parallel_tool_calls(self, model_id: str) -> bool:
+        """Check if the model supports parallel tool calling.
+
+        Parallel tool calling is supported for:
+        - Llama 4+ only (tested and verified)
+        - Other GenericChatRequest models (xAI Grok, OpenAI, Mistral)
+
+        Not supported for:
+        - All Llama 3.x versions (3.0, 3.1, 3.2, 3.3)
+        - Cohere models
+
+        Args:
+            model_id: The model identifier (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8")
+
+        Returns:
+            bool: True if model supports parallel tool calling, False otherwise
+        """
+        import re
+
+        # Extract provider from model_id (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+        provider = model_id.split(".")[0].lower()
+
+        # Cohere models don't support parallel tool calling
+        if provider == "cohere":
+            return False
+
+        # For Meta/Llama models, check version
+        if provider == "meta" and "llama" in model_id.lower():
+            # Extract version number (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+            version_match = re.search(r"llama-(\d+)", model_id.lower())
+            if version_match:
+                major = int(version_match.group(1))
+
+                # Only Llama 4+ supports parallel tool calling
+                # Llama 3.x (including 3.3) does NOT support it based on testing
+                if major >= 4:
+                    return True
+
+                return False
+
+        # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it
+        return True
+
     def bind_tools(
         self,
         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
@@ -1251,6 +1294,18 @@ def bind_tools(
             else self.parallel_tool_calls
         )
         if use_parallel:
+            # Validate model supports parallel tool calling
+            if not self._supports_parallel_tool_calls(self.model_id):
+                if "llama" in self.model_id.lower():
+                    raise ValueError(
+                        f"Parallel tool calls are not supported for {self.model_id}. "
+                        "This feature is only available for Llama 4+ models. "
+                        "Llama 3.x models (including 3.3) do not support parallel tool calling."
+                    )
+                else:
+                    raise ValueError(
+                        f"Parallel tool calls are not supported for {self.model_id}."
+                    )
             kwargs["is_parallel_tool_calls"] = True
 
         return super().bind(tools=formatted_tools, **kwargs)
diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
@@ -11,7 +11,7 @@ def test_parallel_tool_calls_class_level():
     """Test class-level parallel_tool_calls parameter."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         parallel_tool_calls=True,
         client=oci_gen_ai_client
     )
@@ -23,7 +23,7 @@ def test_parallel_tool_calls_default_false():
     """Test that parallel_tool_calls defaults to False."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
     assert llm.parallel_tool_calls is False
@@ -34,7 +34,7 @@ def test_parallel_tool_calls_bind_tools_explicit_true():
     """Test parallel_tool_calls=True in bind_tools."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
 
@@ -59,7 +59,7 @@ def test_parallel_tool_calls_bind_tools_explicit_false():
     """Test parallel_tool_calls=False in bind_tools."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
 
@@ -81,7 +81,7 @@ def test_parallel_tool_calls_bind_tools_uses_class_default():
     """Test that bind_tools uses class default when not specified."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         parallel_tool_calls=True,  # Set class default
         client=oci_gen_ai_client
     )
@@ -102,7 +102,7 @@ def test_parallel_tool_calls_bind_tools_overrides_class_default():
     """Test that bind_tools parameter overrides class default."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         parallel_tool_calls=True,  # Set class default to True
         client=oci_gen_ai_client
     )
@@ -125,7 +125,7 @@ def test_parallel_tool_calls_passed_to_oci_api_meta():
 
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
 
@@ -197,3 +197,134 @@ def tool1(x: int) -> int:
             stream=False,
             **llm_with_tools.kwargs
         )
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_0_blocked():
+    """Test that Llama 3.0 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError when trying to enable parallel tool calling
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_1_blocked():
+    """Test that Llama 3.1 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.1-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_2_blocked():
+    """Test that Llama 3.2 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-11b-vision-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_3_blocked():
+    """Test that Llama 3.3 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError - Llama 3.3 doesn't actually support parallel calls
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_4_allowed():
+    """Test that Llama 4 models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_other_models_allowed():
+    """Test that other GenericChatRequest models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+
+    # Test with xAI Grok
+    llm_grok = ChatOCIGenAI(
+        model_id="xai.grok-4-fast",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError for Grok
+    llm_with_tools = llm_grok.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_supports_parallel_tool_calls_method():
+    """Test the _supports_parallel_tool_calls method directly."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+        client=oci_gen_ai_client
+    )
+
+    # Test various model IDs
+    assert llm._supports_parallel_tool_calls("meta.llama-4-maverick-17b-128e-instruct-fp8") is True
+    assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False  # Llama 3.3 NOT supported
+    assert llm._supports_parallel_tool_calls("meta.llama-3.2-11b-vision-instruct") is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False
+    assert llm._supports_parallel_tool_calls("xai.grok-4-fast") is True
+    assert llm._supports_parallel_tool_calls("openai.gpt-4") is True
+    assert llm._supports_parallel_tool_calls("mistral.mistral-large") is True