Add version filter for Llama parallel tool calling

fede-kamel · claude · fede-kamel · commit 98eb54e0af3c · 2025-11-12T04:26:48.000-08:00
Parallel tool calling support is now version-aware: - ✅ Llama 3.3+ (December 2024 onwards) - SUPPORTED - ✅ Llama 4+ - SUPPORTED - ❌ Llama 3.0, 3.1, 3.2 - BLOCKED with clear error message - ❌ Cohere - BLOCKED (existing behavior) - ✅ Other models (xAI Grok, OpenAI, Mistral) - SUPPORTED Implementation: - Added _supports_parallel_tool_calls() helper method with regex version parsing - Updated bind_tools() to validate model version before enabling parallel calls - Provides clear error messages indicating which versions are supported Tests added (all mocked, no OCI connection): - test_version_filter_llama_3_0_blocked - test_version_filter_llama_3_1_blocked - test_version_filter_llama_3_2_blocked - test_version_filter_llama_3_3_allowed - test_version_filter_llama_4_allowed - test_version_filter_other_models_allowed - test_version_filter_supports_parallel_tool_calls_method 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -1200,6 +1200,55 @@ def _prepare_request(
 
         return request
 
+    def _supports_parallel_tool_calls(self, model_id: str) -> bool:
+        """Check if the model supports parallel tool calling.
+
+        Parallel tool calling is supported for:
+        - Llama 3.3+ (December 2024 onwards)
+        - Llama 4+
+        - Other GenericChatRequest models (xAI Grok, OpenAI, Mistral)
+
+        Not supported for:
+        - Llama 3.0, 3.1, 3.2
+        - Cohere models
+
+        Args:
+            model_id: The model identifier (e.g., "meta.llama-3.3-70b-instruct")
+
+        Returns:
+            bool: True if model supports parallel tool calling, False otherwise
+        """
+        import re
+
+        # Extract provider from model_id (e.g., "meta" from "meta.llama-3.3-70b-instruct")
+        provider = model_id.split(".")[0].lower()
+
+        # Cohere models don't support parallel tool calling
+        if provider == "cohere":
+            return False
+
+        # For Meta/Llama models, check version
+        if provider == "meta" and "llama" in model_id.lower():
+            # Extract version number (e.g., "3.3" from "meta.llama-3.3-70b-instruct")
+            version_match = re.search(r"llama-(\d+)\.?(\d+)?", model_id.lower())
+            if version_match:
+                major = int(version_match.group(1))
+                minor = int(version_match.group(2)) if version_match.group(2) else 0
+
+                # Llama 4+ supports parallel tool calling
+                if major >= 4:
+                    return True
+
+                # Llama 3.3+ supports parallel tool calling (December 2024 onwards)
+                if major == 3 and minor >= 3:
+                    return True
+
+                # Llama 3.0, 3.1, 3.2 don't support parallel tool calling
+                return False
+
+        # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it
+        return True
+
     def bind_tools(
         self,
         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
@@ -1251,6 +1300,18 @@ def bind_tools(
             else self.parallel_tool_calls
         )
         if use_parallel:
+            # Validate model supports parallel tool calling
+            if not self._supports_parallel_tool_calls(self.model_id):
+                if "llama" in self.model_id.lower():
+                    raise ValueError(
+                        f"Parallel tool calls are not supported for {self.model_id}. "
+                        "This feature is only available for Llama 3.3+ (December 2024 onwards) "
+                        "and Llama 4+ models."
+                    )
+                else:
+                    raise ValueError(
+                        f"Parallel tool calls are not supported for {self.model_id}."
+                    )
             kwargs["is_parallel_tool_calls"] = True
 
         return super().bind(tools=formatted_tools, **kwargs)
diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
@@ -197,3 +197,134 @@ def tool1(x: int) -> int:
             stream=False,
             **llm_with_tools.kwargs
         )
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_0_blocked():
+    """Test that Llama 3.0 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError when trying to enable parallel tool calling
+    with pytest.raises(ValueError, match="Llama 3.3\\+.*Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_1_blocked():
+    """Test that Llama 3.1 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.1-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 3.3\\+.*Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_2_blocked():
+    """Test that Llama 3.2 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-11b-vision-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 3.3\\+.*Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_3_allowed():
+    """Test that Llama 3.3 models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_4_allowed():
+    """Test that Llama 4 models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_other_models_allowed():
+    """Test that other GenericChatRequest models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+
+    # Test with xAI Grok
+    llm_grok = ChatOCIGenAI(
+        model_id="xai.grok-4-fast",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError for Grok
+    llm_with_tools = llm_grok.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_supports_parallel_tool_calls_method():
+    """Test the _supports_parallel_tool_calls method directly."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    # Test various model IDs
+    assert llm._supports_parallel_tool_calls("meta.llama-4-maverick-17b-128e-instruct-fp8") is True
+    assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is True
+    assert llm._supports_parallel_tool_calls("meta.llama-3.2-11b-vision-instruct") is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False
+    assert llm._supports_parallel_tool_calls("xai.grok-4-fast") is True
+    assert llm._supports_parallel_tool_calls("openai.gpt-4") is True
+    assert llm._supports_parallel_tool_calls("mistral.mistral-large") is True