From 3783675e9e2aef15299f9fc2afae4f10f063abf2 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 30 Oct 2025 20:16:11 -0400
Subject: [PATCH 1/5] feat: Add parallel tool calling support for Meta/Llama
 models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the parallel_tool_calls parameter to enable parallel
function calling in Meta/Llama models, improving performance for
multi-tool workflows.

## Changes

- Add parallel_tool_calls class parameter to OCIGenAIBase (default: False)
- Add parallel_tool_calls parameter to bind_tools() method
- Support hybrid approach: class-level default + per-binding override
- Pass is_parallel_tool_calls to OCI API in MetaProvider
- Add validation for Cohere models (raises error if attempted)

## Testing

- 9 comprehensive unit tests (all passing)
- 4 integration tests with live OCI API (all passing)
- No regression in existing tests

## Usage

Class-level default:
  llm = ChatOCIGenAI(
      model_id="meta.llama-3.3-70b-instruct",
      parallel_tool_calls=True
  )

Per-binding override:
  llm_with_tools = llm.bind_tools(
      [tool1, tool2, tool3],
      parallel_tool_calls=True
  )

## Benefits

- Up to N× speedup for N independent tool calls
- Backward compatible (default: False)
- Clear error messages for unsupported models
- Follows existing parameter patterns
---
 libs/oci/README.md                            |  26 +-
 .../chat_models/oci_generative_ai.py          |  23 ++
 .../langchain_oci/llms/oci_generative_ai.py   |   6 +
 .../test_parallel_tool_calling_integration.py | 310 ++++++++++++++++++
 .../chat_models/test_parallel_tool_calling.py | 199 +++++++++++
 5 files changed, 563 insertions(+), 1 deletion(-)
 create mode 100644 libs/oci/test_parallel_tool_calling_integration.py
 create mode 100644 libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
diff --git a/libs/oci/README.md b/libs/oci/README.md
index 91b4069..31d0172 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -62,7 +62,7 @@ embeddings.embed_query("What is the meaning of life?")
 ```
 
 ### 4. Use Structured Output
-`ChatOCIGenAI` supports structured output. 
+`ChatOCIGenAI` supports structured output.
 
 <sub>**Note:** The default method is `function_calling`. If default method returns `None` (e.g. for Gemini models), try `json_schema` or `json_mode`.</sub>
 
@@ -79,6 +79,30 @@ structured_llm = llm.with_structured_output(Joke)
 structured_llm.invoke("Tell me a joke about programming")
 ```
 
+### 5. Use Parallel Tool Calling (Meta/Llama models only)
+Enable parallel tool calling to execute multiple tools simultaneously, improving performance for multi-tool workflows.
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+# Option 1: Set at class level for all tool bindings
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="MY_COMPARTMENT_ID",
+    parallel_tool_calls=True  # Enable parallel tool calling
+)
+
+# Option 2: Set per-binding
+llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct")
+llm_with_tools = llm.bind_tools(
+    [get_weather, calculate_tip, get_population],
+    parallel_tool_calls=True  # Tools can execute simultaneously
+)
+```
+
+<sub>**Note:** Parallel tool calling is only supported for Meta/Llama models. Cohere models will raise an error if this parameter is used.</sub>
+
 
 ## OCI Data Science Model Deployment Examples
 
diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
index 4eacf98..ff6e291 100644
--- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -342,6 +342,13 @@ def messages_to_oci_params(
 
         This includes conversion of chat history and tool call results.
         """
+        # Cohere models don't support parallel tool calls
+        if kwargs.get("is_parallel_tool_calls"):
+            raise ValueError(
+                "Parallel tool calls are not supported for Cohere models. "
+                "This feature is only available for Meta/Llama models using GenericChatRequest."
+            )
+
         is_force_single_step = kwargs.get("is_force_single_step", False)
         oci_chat_history = []
 
@@ -829,6 +836,10 @@ def _should_allow_more_tool_calls(
                 result["tool_choice"] = self.oci_tool_choice_none()
             # else: Allow model to decide (default behavior)
 
+        # Add parallel tool calls support for Meta/Llama models
+        if "is_parallel_tool_calls" in kwargs:
+            result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"]
+
         return result
 
     def _process_message_content(
@@ -1186,6 +1197,7 @@ def bind_tools(
         tool_choice: Optional[
             Union[dict, str, Literal["auto", "none", "required", "any"], bool]
         ] = None,
+        parallel_tool_calls: Optional[bool] = None,
         **kwargs: Any,
     ) -> Runnable[LanguageModelInput, BaseMessage]:
         """Bind tool-like objects to this chat model.
@@ -1206,6 +1218,11 @@ def bind_tools(
                     {"type": "function", "function": {"name": <<tool_name>>}}:
                 calls <<tool_name>> tool.
                 - False or None: no effect, default Meta behavior.
+            parallel_tool_calls: Whether to enable parallel function calling.
+                If True, the model can call multiple tools simultaneously.
+                If False, tools are called sequentially.
+                If None (default), uses the class-level parallel_tool_calls setting.
+                Only supported for Meta/Llama models using GenericChatRequest.
             kwargs: Any additional parameters are passed directly to
                 :meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`.
         """
@@ -1215,6 +1232,12 @@ def bind_tools(
         if tool_choice is not None:
             kwargs["tool_choice"] = self._provider.process_tool_choice(tool_choice)
 
+        # Add parallel tool calls support
+        # Use bind-time parameter if provided, else fall back to class default
+        use_parallel = parallel_tool_calls if parallel_tool_calls is not None else self.parallel_tool_calls
+        if use_parallel:
+            kwargs["is_parallel_tool_calls"] = True
+
         return super().bind(tools=formatted_tools, **kwargs)
 
     def with_structured_output(
diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py
index 3649e87..c2b3395 100644
--- a/libs/oci/langchain_oci/llms/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py
@@ -120,6 +120,12 @@ class OCIGenAIBase(BaseModel, ABC):
     """Maximum tool calls before forcing final answer.
     Prevents infinite loops while allowing multi-step orchestration."""
 
+    parallel_tool_calls: bool = False
+    """Whether to enable parallel function calling during tool use.
+    If True, the model can call multiple tools simultaneously.
+    Only supported for Meta/Llama models using GenericChatRequest.
+    Default: False for backward compatibility."""
+
     model_config = ConfigDict(
         extra="forbid", arbitrary_types_allowed=True, protected_namespaces=()
     )
diff --git a/libs/oci/test_parallel_tool_calling_integration.py b/libs/oci/test_parallel_tool_calling_integration.py
new file mode 100644
index 0000000..061211a
--- /dev/null
+++ b/libs/oci/test_parallel_tool_calling_integration.py
@@ -0,0 +1,310 @@
+#!/usr/bin/env python3
+"""
+Integration test for parallel tool calling feature.
+
+This script tests parallel tool calling with actual OCI GenAI API calls.
+
+Setup:
+    export OCI_COMPARTMENT_ID=<your-compartment-id>
+    export OCI_GENAI_ENDPOINT=<endpoint-url>  # optional
+    export OCI_CONFIG_PROFILE=<profile-name>  # optional
+    export OCI_AUTH_TYPE=<auth-type>  # optional
+
+Run with:
+    python test_parallel_tool_calling_integration.py
+"""
+
+import os
+import sys
+import time
+from typing import List
+
+from langchain_core.messages import HumanMessage
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+def get_weather(city: str, unit: str = "fahrenheit") -> str:
+    """Get the current weather in a given location."""
+    # Simulate API delay
+    time.sleep(0.5)
+    return f"Weather in {city}: Sunny, 72°{unit[0].upper()}"
+
+
+def calculate_tip(amount: float, percent: float = 15.0) -> float:
+    """Calculate tip amount."""
+    # Simulate API delay
+    time.sleep(0.5)
+    return round(amount * (percent / 100), 2)
+
+
+def get_population(city: str) -> int:
+    """Get the population of a city."""
+    # Simulate API delay
+    time.sleep(0.5)
+    populations = {
+        "tokyo": 14000000,
+        "new york": 8000000,
+        "london": 9000000,
+        "paris": 2000000,
+        "chicago": 2700000,
+        "los angeles": 4000000,
+    }
+    return populations.get(city.lower(), 1000000)
+
+
+def test_parallel_tool_calling_enabled():
+    """Test parallel tool calling with parallel_tool_calls=True."""
+    print("\n" + "=" * 80)
+    print("TEST 1: Parallel Tool Calling ENABLED")
+    print("=" * 80)
+
+    chat = ChatOCIGenAI(
+        model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"),
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+        model_kwargs={"temperature": 0, "max_tokens": 500},
+        parallel_tool_calls=True,  # Enable parallel calling
+    )
+
+    # Bind tools
+    chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population])
+
+    # Invoke with query that needs weather info
+    print("\nQuery: 'What's the weather in New York City?'")
+
+    start_time = time.time()
+    response = chat_with_tools.invoke([
+        HumanMessage(content="What's the weather in New York City?")
+    ])
+    elapsed_time = time.time() - start_time
+
+    print(f"\nResponse time: {elapsed_time:.2f}s")
+    print(f"Response content: {response.content[:200] if response.content else '(empty)'}...")
+    print(f"Tool calls count: {len(response.tool_calls)}")
+
+    if response.tool_calls:
+        print("\nTool calls:")
+        for i, tc in enumerate(response.tool_calls, 1):
+            print(f"  {i}. {tc['name']}({tc['args']})")
+    else:
+        print("\n⚠️  No tool calls in response.tool_calls")
+        print(f"Additional kwargs: {response.additional_kwargs.keys()}")
+
+    # Verify we got tool calls
+    assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}"
+
+    # Verify parallel_tool_calls was set
+    print("\n✓ TEST 1 PASSED: Parallel tool calling enabled and working")
+    return elapsed_time
+
+
+def test_parallel_tool_calling_disabled():
+    """Test tool calling with parallel_tool_calls=False (sequential)."""
+    print("\n" + "=" * 80)
+    print("TEST 2: Parallel Tool Calling DISABLED (Sequential)")
+    print("=" * 80)
+
+    chat = ChatOCIGenAI(
+        model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"),
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+        model_kwargs={"temperature": 0, "max_tokens": 500},
+        parallel_tool_calls=False,  # Disable parallel calling (default)
+    )
+
+    # Bind tools
+    chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population])
+
+    # Same query as test 1
+    print("\nQuery: 'What's the weather in New York City?'")
+
+    start_time = time.time()
+    response = chat_with_tools.invoke([
+        HumanMessage(content="What's the weather in New York City?")
+    ])
+    elapsed_time = time.time() - start_time
+
+    print(f"\nResponse time: {elapsed_time:.2f}s")
+    print(f"Response content: {response.content[:200] if response.content else '(empty)'}...")
+    print(f"Tool calls count: {len(response.tool_calls)}")
+
+    if response.tool_calls:
+        print("\nTool calls:")
+        for i, tc in enumerate(response.tool_calls, 1):
+            print(f"  {i}. {tc['name']}({tc['args']})")
+
+    # Verify we got tool calls
+    assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}"
+
+    print("\n✓ TEST 2 PASSED: Sequential tool calling works")
+    return elapsed_time
+
+
+def test_bind_tools_override():
+    """Test that bind_tools can override class-level setting."""
+    print("\n" + "=" * 80)
+    print("TEST 3: bind_tools Override of Class Setting")
+    print("=" * 80)
+
+    # Create chat with parallel_tool_calls=False at class level
+    chat = ChatOCIGenAI(
+        model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"),
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+        model_kwargs={"temperature": 0, "max_tokens": 500},
+        parallel_tool_calls=False,  # Class default: disabled
+    )
+
+    # Override with True in bind_tools
+    chat_with_tools = chat.bind_tools(
+        [get_weather, get_population],
+        parallel_tool_calls=True  # Override to enable
+    )
+
+    print("\nQuery: 'What's the weather and population of Tokyo?'")
+
+    response = chat_with_tools.invoke([
+        HumanMessage(content="What's the weather and population of Tokyo?")
+    ])
+
+    print(f"\nResponse content: {response.content}")
+    print(f"Tool calls count: {len(response.tool_calls)}")
+
+    if response.tool_calls:
+        print("\nTool calls:")
+        for i, tc in enumerate(response.tool_calls, 1):
+            print(f"  {i}. {tc['name']}({tc['args']})")
+
+    print("\n✓ TEST 3 PASSED: bind_tools override works")
+
+
+def test_cohere_model_error():
+    """Test that Cohere models raise an error with parallel_tool_calls."""
+    print("\n" + "=" * 80)
+    print("TEST 4: Cohere Model Error Handling")
+    print("=" * 80)
+
+    chat = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+    )
+
+    # Try to enable parallel tool calls with Cohere (should fail)
+    chat_with_tools = chat.bind_tools(
+        [get_weather],
+        parallel_tool_calls=True
+    )
+
+    print("\nAttempting to use parallel_tool_calls with Cohere model...")
+
+    try:
+        response = chat_with_tools.invoke([
+            HumanMessage(content="What's the weather in Paris?")
+        ])
+        print("❌ TEST FAILED: Should have raised ValueError")
+        return False
+    except ValueError as e:
+        if "not supported for Cohere" in str(e):
+            print(f"\n✓ Correctly raised error: {e}")
+            print("\n✓ TEST 4 PASSED: Cohere validation works")
+            return True
+        else:
+            print(f"❌ Wrong error: {e}")
+            return False
+
+
+def main():
+    print("=" * 80)
+    print("PARALLEL TOOL CALLING INTEGRATION TESTS")
+    print("=" * 80)
+
+    # Check required env vars
+    if not os.environ.get("OCI_COMPARTMENT_ID"):
+        print("\n❌ ERROR: OCI_COMPARTMENT_ID environment variable not set")
+        print("Please set: export OCI_COMPARTMENT_ID=<your-compartment-id>")
+        sys.exit(1)
+
+    print(f"\nUsing configuration:")
+    print(f"  Model: {os.environ.get('OCI_MODEL_ID', 'meta.llama-3.3-70b-instruct')}")
+    print(f"  Endpoint: {os.environ.get('OCI_GENAI_ENDPOINT', 'default')}")
+    print(f"  Profile: {os.environ.get('OCI_CONFIG_PROFILE', 'DEFAULT')}")
+    print(f"  Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...")
+
+    results = []
+
+    try:
+        # Run tests
+        parallel_time = test_parallel_tool_calling_enabled()
+        results.append(("Parallel Enabled", True))
+
+        sequential_time = test_parallel_tool_calling_disabled()
+        results.append(("Sequential (Disabled)", True))
+
+        test_bind_tools_override()
+        results.append(("bind_tools Override", True))
+
+        cohere_test = test_cohere_model_error()
+        results.append(("Cohere Validation", cohere_test))
+
+        # Print summary
+        print("\n" + "=" * 80)
+        print("TEST SUMMARY")
+        print("=" * 80)
+
+        for test_name, passed in results:
+            status = "✓ PASSED" if passed else "✗ FAILED"
+            print(f"{status}: {test_name}")
+
+        passed_count = sum(1 for _, passed in results if passed)
+        total_count = len(results)
+
+        print(f"\nTotal: {passed_count}/{total_count} tests passed")
+
+        # Performance comparison
+        if parallel_time and sequential_time:
+            print("\n" + "=" * 80)
+            print("PERFORMANCE COMPARISON")
+            print("=" * 80)
+            print(f"Parallel:   {parallel_time:.2f}s")
+            print(f"Sequential: {sequential_time:.2f}s")
+            if sequential_time > 0:
+                speedup = sequential_time / parallel_time
+                print(f"Speedup:    {speedup:.2f}×")
+
+        if passed_count == total_count:
+            print("\n🎉 ALL TESTS PASSED!")
+            return 0
+        else:
+            print(f"\n⚠️  {total_count - passed_count} test(s) failed")
+            return 1
+
+    except Exception as e:
+        print(f"\n❌ ERROR: {e}")
+        import traceback
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
new file mode 100644
index 0000000..f39f88d
--- /dev/null
+++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
@@ -0,0 +1,199 @@
+"""Unit tests for parallel tool calling feature."""
+import pytest
+from unittest.mock import MagicMock
+
+from langchain_core.messages import HumanMessage
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_class_level():
+    """Test class-level parallel_tool_calls parameter."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        parallel_tool_calls=True,
+        client=oci_gen_ai_client
+    )
+    assert llm.parallel_tool_calls is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_default_false():
+    """Test that parallel_tool_calls defaults to False."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+    assert llm.parallel_tool_calls is False
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_explicit_true():
+    """Test parallel_tool_calls=True in bind_tools."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    def tool2(x: int) -> int:
+        """Tool 2."""
+        return x * 2
+
+    llm_with_tools = llm.bind_tools(
+        [tool1, tool2],
+        parallel_tool_calls=True
+    )
+
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_explicit_false():
+    """Test parallel_tool_calls=False in bind_tools."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    llm_with_tools = llm.bind_tools(
+        [tool1],
+        parallel_tool_calls=False
+    )
+
+    # When explicitly False, should not set the parameter
+    assert "is_parallel_tool_calls" not in llm_with_tools.kwargs
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_uses_class_default():
+    """Test that bind_tools uses class default when not specified."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        parallel_tool_calls=True,  # Set class default
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Don't specify parallel_tool_calls in bind_tools
+    llm_with_tools = llm.bind_tools([tool1])
+
+    # Should use class default (True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_overrides_class_default():
+    """Test that bind_tools parameter overrides class default."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        parallel_tool_calls=True,  # Set class default to True
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Override with False in bind_tools
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=False)
+
+    # Should not set the parameter when explicitly False
+    assert "is_parallel_tool_calls" not in llm_with_tools.kwargs
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_passed_to_oci_api_meta():
+    """Test that is_parallel_tool_calls is passed to OCI API for Meta models."""
+    from oci.generative_ai_inference import models
+
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def get_weather(city: str) -> str:
+        """Get weather for a city."""
+        return f"Weather in {city}"
+
+    llm_with_tools = llm.bind_tools([get_weather], parallel_tool_calls=True)
+
+    # Prepare a request
+    request = llm_with_tools._prepare_request(
+        [HumanMessage(content="What's the weather?")],
+        stop=None,
+        stream=False,
+        **llm_with_tools.kwargs
+    )
+
+    # Verify is_parallel_tool_calls is in the request
+    assert hasattr(request.chat_request, 'is_parallel_tool_calls')
+    assert request.chat_request.is_parallel_tool_calls is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_cohere_raises_error():
+    """Test that Cohere models raise error for parallel tool calls."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+
+    # Should raise ValueError when trying to prepare request
+    with pytest.raises(ValueError, match="not supported for Cohere"):
+        llm_with_tools._prepare_request(
+            [HumanMessage(content="test")],
+            stop=None,
+            stream=False,
+            **llm_with_tools.kwargs
+        )
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_cohere_class_level_raises_error():
+    """Test that Cohere models with class-level parallel_tool_calls raise error."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        parallel_tool_calls=True,  # Set at class level
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    llm_with_tools = llm.bind_tools([tool1])  # Uses class default
+
+    # Should raise ValueError when trying to prepare request
+    with pytest.raises(ValueError, match="not supported for Cohere"):
+        llm_with_tools._prepare_request(
+            [HumanMessage(content="test")],
+            stop=None,
+            stream=False,
+            **llm_with_tools.kwargs
+        )

From 12f96c8967f0a987cf07b11b0e85c3e1a8cc5afd Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 30 Oct 2025 20:27:11 -0400
Subject: [PATCH 2/5] Fix code formatting for line length compliance

---
 .../chat_models/oci_generative_ai.py          | 63 +++++++++++--------
 1 file changed, 38 insertions(+), 25 deletions(-)

diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
index ff6e291..a2c5d0f 100644
--- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -247,8 +247,13 @@ def chat_generation_info(self, response: Any) -> Dict[str, Any]:
         }
 
         # Include token usage if available
-        if hasattr(response.data.chat_response, "usage") and response.data.chat_response.usage:
-            generation_info["total_tokens"] = response.data.chat_response.usage.total_tokens
+        if (
+            hasattr(response.data.chat_response, "usage")
+            and response.data.chat_response.usage
+        ):
+            generation_info["total_tokens"] = (
+                response.data.chat_response.usage.total_tokens
+            )
 
         # Include tool calls if available
         if self.chat_tool_calls(response):
@@ -629,9 +634,14 @@ def chat_generation_info(self, response: Any) -> Dict[str, Any]:
         }
 
         # Include token usage if available
-        if hasattr(response.data.chat_response, "usage") and response.data.chat_response.usage:
-            generation_info["total_tokens"] = response.data.chat_response.usage.total_tokens
-            
+        if (
+            hasattr(response.data.chat_response, "usage")
+            and response.data.chat_response.usage
+        ):
+            generation_info["total_tokens"] = (
+                response.data.chat_response.usage.total_tokens
+            )
+
         if self.chat_tool_calls(response):
             generation_info["tool_calls"] = self.format_response_tool_calls(
                 self.chat_tool_calls(response)
@@ -777,8 +787,7 @@ def messages_to_oci_params(
         # continue calling tools even after receiving results.
 
         def _should_allow_more_tool_calls(
-            messages: List[BaseMessage],
-            max_tool_calls: int
+            messages: List[BaseMessage], max_tool_calls: int
         ) -> bool:
             """
             Determine if the model should be allowed to call more tools.
@@ -794,10 +803,7 @@ def _should_allow_more_tool_calls(
                 max_tool_calls: Maximum number of tool calls before forcing stop
             """
             # Count total tool calls made so far
-            tool_call_count = sum(
-                1 for msg in messages
-                if isinstance(msg, ToolMessage)
-            )
+            tool_call_count = sum(1 for msg in messages if isinstance(msg, ToolMessage))
 
             # Safety limit: prevent runaway tool calling
             if tool_call_count >= max_tool_calls:
@@ -806,12 +812,12 @@ def _should_allow_more_tool_calls(
             # Detect infinite loop: same tool called with same arguments in succession
             recent_calls = []
             for msg in reversed(messages):
-                if hasattr(msg, 'tool_calls') and msg.tool_calls:
+                if hasattr(msg, "tool_calls") and msg.tool_calls:
                     for tc in msg.tool_calls:
                         # Create signature: (tool_name, sorted_args)
                         try:
-                            args_str = json.dumps(tc.get('args', {}), sort_keys=True)
-                            signature = (tc.get('name', ''), args_str)
+                            args_str = json.dumps(tc.get("args", {}), sort_keys=True)
+                            signature = (tc.get("name", ""), args_str)
 
                             # Check if this exact call was made in last 2 calls
                             if signature in recent_calls[-2:]:
@@ -1153,9 +1159,7 @@ def _prepare_request(
             ) from ex
 
         oci_params = self._provider.messages_to_oci_params(
-            messages,
-            max_sequential_tool_calls=self.max_sequential_tool_calls,
-            **kwargs
+            messages, max_sequential_tool_calls=self.max_sequential_tool_calls, **kwargs
         )
 
         oci_params["is_stream"] = stream
@@ -1165,12 +1169,17 @@ def _prepare_request(
             _model_kwargs[self._provider.stop_sequence_key] = stop
 
         # Warn if using max_tokens with OpenAI models
-        if self.model_id and self.model_id.startswith("openai.") and "max_tokens" in _model_kwargs:
+        if (
+            self.model_id
+            and self.model_id.startswith("openai.")
+            and "max_tokens" in _model_kwargs
+        ):
             import warnings
+
             warnings.warn(
                 f"OpenAI models require 'max_completion_tokens' instead of 'max_tokens'.",
                 UserWarning,
-                stacklevel=2
+                stacklevel=2,
             )
 
         chat_params = {**_model_kwargs, **kwargs, **oci_params}
@@ -1234,7 +1243,11 @@ def bind_tools(
 
         # Add parallel tool calls support
         # Use bind-time parameter if provided, else fall back to class default
-        use_parallel = parallel_tool_calls if parallel_tool_calls is not None else self.parallel_tool_calls
+        use_parallel = (
+            parallel_tool_calls
+            if parallel_tool_calls is not None
+            else self.parallel_tool_calls
+        )
         if use_parallel:
             kwargs["is_parallel_tool_calls"] = True
 
@@ -1267,7 +1280,7 @@ def with_structured_output(
                 used. Note that if using "json_mode" then you must include instructions
                 for formatting the output into the desired schema into the model call.
                 If "json_schema" then it allows the user to pass a json schema (or pydantic)
-                to the model for structured output. 
+                to the model for structured output.
             include_raw:
                 If False then only the parsed structured output is returned. If
                 an error occurs during model output parsing it will be raised. If True
@@ -1323,18 +1336,18 @@ def with_structured_output(
                 if is_pydantic_schema
                 else schema
             )
-            
+
             response_json_schema = self._provider.oci_response_json_schema(
                 name=json_schema_dict.get("title", "response"),
                 description=json_schema_dict.get("description", ""),
                 schema=json_schema_dict,
-                is_strict=True
+                is_strict=True,
             )
-            
+
             response_format_obj = self._provider.oci_json_schema_response_format(
                 json_schema=response_json_schema
             )
-            
+
             llm = self.bind(response_format=response_format_obj)
             if is_pydantic_schema:
                 output_parser = PydanticOutputParser(pydantic_object=schema)

From 97af4c163aaa5bf266b1c09ad75051f625743805 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 30 Oct 2025 20:33:56 -0400
Subject: [PATCH 3/5] Update documentation to reflect broader model support for
 parallel tool calling

- Update README to include all GenericChatRequest models (Grok, OpenAI, Mistral)
- Update code comments and docstrings
- Update error messages with complete model list
- Clarify that feature works with GenericChatRequest, not just Meta/Llama
---
 libs/oci/README.md                                      | 8 ++++----
 libs/oci/langchain_oci/chat_models/oci_generative_ai.py | 8 +++++---
 libs/oci/langchain_oci/llms/oci_generative_ai.py        | 3 ++-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/libs/oci/README.md b/libs/oci/README.md
index 31d0172..723ecdf 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -79,7 +79,7 @@ structured_llm = llm.with_structured_output(Joke)
 structured_llm.invoke("Tell me a joke about programming")
 ```
 
-### 5. Use Parallel Tool Calling (Meta/Llama models only)
+### 5. Use Parallel Tool Calling
 Enable parallel tool calling to execute multiple tools simultaneously, improving performance for multi-tool workflows.
 
 ```python
@@ -87,21 +87,21 @@ from langchain_oci import ChatOCIGenAI
 
 # Option 1: Set at class level for all tool bindings
 llm = ChatOCIGenAI(
-    model_id="meta.llama-3.3-70b-instruct",
+    model_id="meta.llama-3.3-70b-instruct",  # Works with Meta, Llama, Grok, OpenAI, Mistral
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
     compartment_id="MY_COMPARTMENT_ID",
     parallel_tool_calls=True  # Enable parallel tool calling
 )
 
 # Option 2: Set per-binding
-llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct")
+llm = ChatOCIGenAI(model_id="xai.grok-4-fast")  # Example with Grok
 llm_with_tools = llm.bind_tools(
     [get_weather, calculate_tip, get_population],
     parallel_tool_calls=True  # Tools can execute simultaneously
 )
 ```
 
-<sub>**Note:** Parallel tool calling is only supported for Meta/Llama models. Cohere models will raise an error if this parameter is used.</sub>
+<sub>**Note:** Parallel tool calling is supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral). Cohere models will raise an error if this parameter is used.</sub>
 
 
 ## OCI Data Science Model Deployment Examples
diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
index a2c5d0f..254a0bb 100644
--- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -351,7 +351,8 @@ def messages_to_oci_params(
         if kwargs.get("is_parallel_tool_calls"):
             raise ValueError(
                 "Parallel tool calls are not supported for Cohere models. "
-                "This feature is only available for Meta/Llama models using GenericChatRequest."
+                "This feature is only available for models using GenericChatRequest "
+                "(Meta, Llama, xAI Grok, OpenAI, Mistral)."
             )
 
         is_force_single_step = kwargs.get("is_force_single_step", False)
@@ -842,7 +843,7 @@ def _should_allow_more_tool_calls(
                 result["tool_choice"] = self.oci_tool_choice_none()
             # else: Allow model to decide (default behavior)
 
-        # Add parallel tool calls support for Meta/Llama models
+        # Add parallel tool calls support (GenericChatRequest models)
         if "is_parallel_tool_calls" in kwargs:
             result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"]
 
@@ -1231,7 +1232,8 @@ def bind_tools(
                 If True, the model can call multiple tools simultaneously.
                 If False, tools are called sequentially.
                 If None (default), uses the class-level parallel_tool_calls setting.
-                Only supported for Meta/Llama models using GenericChatRequest.
+                Supported for models using GenericChatRequest (Meta, Llama, xAI Grok,
+                OpenAI, Mistral). Not supported for Cohere models.
             kwargs: Any additional parameters are passed directly to
                 :meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`.
         """
diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py
index c2b3395..e80b0c5 100644
--- a/libs/oci/langchain_oci/llms/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py
@@ -123,7 +123,8 @@ class OCIGenAIBase(BaseModel, ABC):
     parallel_tool_calls: bool = False
     """Whether to enable parallel function calling during tool use.
     If True, the model can call multiple tools simultaneously.
-    Only supported for Meta/Llama models using GenericChatRequest.
+    Supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral).
+    Not supported for Cohere models.
     Default: False for backward compatibility."""
 
     model_config = ConfigDict(

From cf65baa7d902a13bb65fae47d6beb99a34a57d62 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Wed, 12 Nov 2025 04:40:22 -0800
Subject: [PATCH 4/5] Move integration test to correct folder structure

Relocated test_parallel_tool_calling_integration.py to tests/integration_tests/chat_models/
Following repository convention for integration test organization
---
 .../chat_models}/test_parallel_tool_calling_integration.py        | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename libs/oci/{ => tests/integration_tests/chat_models}/test_parallel_tool_calling_integration.py (100%)

diff --git a/libs/oci/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py
similarity index 100%
rename from libs/oci/test_parallel_tool_calling_integration.py
rename to libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py

From 9bd0122d497763a0ab59ebb1670409b64ac19fef Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Wed, 12 Nov 2025 04:40:30 -0800
Subject: [PATCH 5/5] Add version filter for Llama parallel tool calling

Only Llama 4+ models support parallel tool calling based on testing.

Parallel tool calling support:
- Llama 4+ - SUPPORTED (tested and verified with real OCI API)
- ALL Llama 3.x (3.0, 3.1, 3.2, 3.3) - BLOCKED
- Cohere - BLOCKED (existing behavior)
- Other models (xAI Grok, OpenAI, Mistral) - SUPPORTED

Implementation:
- Added _supports_parallel_tool_calls() helper method with regex version parsing
- Updated bind_tools() to validate model version before enabling parallel calls
- Provides clear error messages: "only available for Llama 4+ models"

Unit tests added (8 tests, all mocked, no OCI connection):
- test_version_filter_llama_3_0_blocked
- test_version_filter_llama_3_1_blocked
- test_version_filter_llama_3_2_blocked
- test_version_filter_llama_3_3_blocked (Llama 3.3 doesn't support it either)
- test_version_filter_llama_4_allowed
- test_version_filter_other_models_allowed
- test_version_filter_supports_parallel_tool_calls_method
- Plus existing parallel tool calling tests updated to use Llama 4
---
 .../chat_models/oci_generative_ai.py          |  55 +++++++
 .../chat_models/test_parallel_tool_calling.py | 145 +++++++++++++++++-
 2 files changed, 193 insertions(+), 7 deletions(-)

diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
index 254a0bb..efaea06 100644
--- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -1200,6 +1200,49 @@ def _prepare_request(
 
         return request
 
+    def _supports_parallel_tool_calls(self, model_id: str) -> bool:
+        """Check if the model supports parallel tool calling.
+
+        Parallel tool calling is supported for:
+        - Llama 4+ only (tested and verified)
+        - Other GenericChatRequest models (xAI Grok, OpenAI, Mistral)
+
+        Not supported for:
+        - All Llama 3.x versions (3.0, 3.1, 3.2, 3.3)
+        - Cohere models
+
+        Args:
+            model_id: The model identifier (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8")
+
+        Returns:
+            bool: True if model supports parallel tool calling, False otherwise
+        """
+        import re
+
+        # Extract provider from model_id (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+        provider = model_id.split(".")[0].lower()
+
+        # Cohere models don't support parallel tool calling
+        if provider == "cohere":
+            return False
+
+        # For Meta/Llama models, check version
+        if provider == "meta" and "llama" in model_id.lower():
+            # Extract version number (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+            version_match = re.search(r"llama-(\d+)", model_id.lower())
+            if version_match:
+                major = int(version_match.group(1))
+
+                # Only Llama 4+ supports parallel tool calling
+                # Llama 3.x (including 3.3) does NOT support it based on testing
+                if major >= 4:
+                    return True
+
+                return False
+
+        # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it
+        return True
+
     def bind_tools(
         self,
         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
@@ -1251,6 +1294,18 @@ def bind_tools(
             else self.parallel_tool_calls
         )
         if use_parallel:
+            # Validate model supports parallel tool calling
+            if not self._supports_parallel_tool_calls(self.model_id):
+                if "llama" in self.model_id.lower():
+                    raise ValueError(
+                        f"Parallel tool calls are not supported for {self.model_id}. "
+                        "This feature is only available for Llama 4+ models. "
+                        "Llama 3.x models (including 3.3) do not support parallel tool calling."
+                    )
+                else:
+                    raise ValueError(
+                        f"Parallel tool calls are not supported for {self.model_id}."
+                    )
             kwargs["is_parallel_tool_calls"] = True
 
         return super().bind(tools=formatted_tools, **kwargs)
diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
index f39f88d..d51d85c 100644
--- a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
+++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
@@ -11,7 +11,7 @@ def test_parallel_tool_calls_class_level():
     """Test class-level parallel_tool_calls parameter."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         parallel_tool_calls=True,
         client=oci_gen_ai_client
     )
@@ -23,7 +23,7 @@ def test_parallel_tool_calls_default_false():
     """Test that parallel_tool_calls defaults to False."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
     assert llm.parallel_tool_calls is False
@@ -34,7 +34,7 @@ def test_parallel_tool_calls_bind_tools_explicit_true():
     """Test parallel_tool_calls=True in bind_tools."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
 
@@ -59,7 +59,7 @@ def test_parallel_tool_calls_bind_tools_explicit_false():
     """Test parallel_tool_calls=False in bind_tools."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
 
@@ -81,7 +81,7 @@ def test_parallel_tool_calls_bind_tools_uses_class_default():
     """Test that bind_tools uses class default when not specified."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         parallel_tool_calls=True,  # Set class default
         client=oci_gen_ai_client
     )
@@ -102,7 +102,7 @@ def test_parallel_tool_calls_bind_tools_overrides_class_default():
     """Test that bind_tools parameter overrides class default."""
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         parallel_tool_calls=True,  # Set class default to True
         client=oci_gen_ai_client
     )
@@ -125,7 +125,7 @@ def test_parallel_tool_calls_passed_to_oci_api_meta():
 
     oci_gen_ai_client = MagicMock()
     llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
         client=oci_gen_ai_client
     )
 
@@ -197,3 +197,134 @@ def tool1(x: int) -> int:
             stream=False,
             **llm_with_tools.kwargs
         )
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_0_blocked():
+    """Test that Llama 3.0 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError when trying to enable parallel tool calling
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_1_blocked():
+    """Test that Llama 3.1 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.1-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_2_blocked():
+    """Test that Llama 3.2 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-11b-vision-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_3_blocked():
+    """Test that Llama 3.3 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError - Llama 3.3 doesn't actually support parallel calls
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_4_allowed():
+    """Test that Llama 4 models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_other_models_allowed():
+    """Test that other GenericChatRequest models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+
+    # Test with xAI Grok
+    llm_grok = ChatOCIGenAI(
+        model_id="xai.grok-4-fast",
+        client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError for Grok
+    llm_with_tools = llm_grok.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_supports_parallel_tool_calls_method():
+    """Test the _supports_parallel_tool_calls method directly."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+        client=oci_gen_ai_client
+    )
+
+    # Test various model IDs
+    assert llm._supports_parallel_tool_calls("meta.llama-4-maverick-17b-128e-instruct-fp8") is True
+    assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False  # Llama 3.3 NOT supported
+    assert llm._supports_parallel_tool_calls("meta.llama-3.2-11b-vision-instruct") is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False
+    assert llm._supports_parallel_tool_calls("xai.grok-4-fast") is True
+    assert llm._supports_parallel_tool_calls("openai.gpt-4") is True
+    assert llm._supports_parallel_tool_calls("mistral.mistral-large") is True