diff --git a/libs/oci/README.md b/libs/oci/README.md
index 91b4069..723ecdf 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -62,7 +62,7 @@ embeddings.embed_query("What is the meaning of life?")
```
### 4. Use Structured Output
-`ChatOCIGenAI` supports structured output.
+`ChatOCIGenAI` supports structured output.
**Note:** The default method is `function_calling`. If default method returns `None` (e.g. for Gemini models), try `json_schema` or `json_mode`.
@@ -79,6 +79,30 @@ structured_llm = llm.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about programming")
```
+### 5. Use Parallel Tool Calling
+Enable parallel tool calling to execute multiple tools simultaneously, improving performance for multi-tool workflows.
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+# Option 1: Set at class level for all tool bindings
+llm = ChatOCIGenAI(
+ model_id="meta.llama-3.3-70b-instruct", # Works with Meta, Llama, Grok, OpenAI, Mistral
+ service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+ compartment_id="MY_COMPARTMENT_ID",
+ parallel_tool_calls=True # Enable parallel tool calling
+)
+
+# Option 2: Set per-binding
+llm = ChatOCIGenAI(model_id="xai.grok-4-fast") # Example with Grok
+llm_with_tools = llm.bind_tools(
+ [get_weather, calculate_tip, get_population],
+ parallel_tool_calls=True # Tools can execute simultaneously
+)
+```
+
+**Note:** Parallel tool calling is supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral). Cohere models will raise an error if this parameter is used.
+
## OCI Data Science Model Deployment Examples
diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
index 4eacf98..efaea06 100644
--- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -247,8 +247,13 @@ def chat_generation_info(self, response: Any) -> Dict[str, Any]:
}
# Include token usage if available
- if hasattr(response.data.chat_response, "usage") and response.data.chat_response.usage:
- generation_info["total_tokens"] = response.data.chat_response.usage.total_tokens
+ if (
+ hasattr(response.data.chat_response, "usage")
+ and response.data.chat_response.usage
+ ):
+ generation_info["total_tokens"] = (
+ response.data.chat_response.usage.total_tokens
+ )
# Include tool calls if available
if self.chat_tool_calls(response):
@@ -342,6 +347,14 @@ def messages_to_oci_params(
This includes conversion of chat history and tool call results.
"""
+ # Cohere models don't support parallel tool calls
+ if kwargs.get("is_parallel_tool_calls"):
+ raise ValueError(
+ "Parallel tool calls are not supported for Cohere models. "
+ "This feature is only available for models using GenericChatRequest "
+ "(Meta, Llama, xAI Grok, OpenAI, Mistral)."
+ )
+
is_force_single_step = kwargs.get("is_force_single_step", False)
oci_chat_history = []
@@ -622,9 +635,14 @@ def chat_generation_info(self, response: Any) -> Dict[str, Any]:
}
# Include token usage if available
- if hasattr(response.data.chat_response, "usage") and response.data.chat_response.usage:
- generation_info["total_tokens"] = response.data.chat_response.usage.total_tokens
-
+ if (
+ hasattr(response.data.chat_response, "usage")
+ and response.data.chat_response.usage
+ ):
+ generation_info["total_tokens"] = (
+ response.data.chat_response.usage.total_tokens
+ )
+
if self.chat_tool_calls(response):
generation_info["tool_calls"] = self.format_response_tool_calls(
self.chat_tool_calls(response)
@@ -770,8 +788,7 @@ def messages_to_oci_params(
# continue calling tools even after receiving results.
def _should_allow_more_tool_calls(
- messages: List[BaseMessage],
- max_tool_calls: int
+ messages: List[BaseMessage], max_tool_calls: int
) -> bool:
"""
Determine if the model should be allowed to call more tools.
@@ -787,10 +804,7 @@ def _should_allow_more_tool_calls(
max_tool_calls: Maximum number of tool calls before forcing stop
"""
# Count total tool calls made so far
- tool_call_count = sum(
- 1 for msg in messages
- if isinstance(msg, ToolMessage)
- )
+ tool_call_count = sum(1 for msg in messages if isinstance(msg, ToolMessage))
# Safety limit: prevent runaway tool calling
if tool_call_count >= max_tool_calls:
@@ -799,12 +813,12 @@ def _should_allow_more_tool_calls(
# Detect infinite loop: same tool called with same arguments in succession
recent_calls = []
for msg in reversed(messages):
- if hasattr(msg, 'tool_calls') and msg.tool_calls:
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
for tc in msg.tool_calls:
# Create signature: (tool_name, sorted_args)
try:
- args_str = json.dumps(tc.get('args', {}), sort_keys=True)
- signature = (tc.get('name', ''), args_str)
+ args_str = json.dumps(tc.get("args", {}), sort_keys=True)
+ signature = (tc.get("name", ""), args_str)
# Check if this exact call was made in last 2 calls
if signature in recent_calls[-2:]:
@@ -829,6 +843,10 @@ def _should_allow_more_tool_calls(
result["tool_choice"] = self.oci_tool_choice_none()
# else: Allow model to decide (default behavior)
+ # Add parallel tool calls support (GenericChatRequest models)
+ if "is_parallel_tool_calls" in kwargs:
+ result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"]
+
return result
def _process_message_content(
@@ -1142,9 +1160,7 @@ def _prepare_request(
) from ex
oci_params = self._provider.messages_to_oci_params(
- messages,
- max_sequential_tool_calls=self.max_sequential_tool_calls,
- **kwargs
+ messages, max_sequential_tool_calls=self.max_sequential_tool_calls, **kwargs
)
oci_params["is_stream"] = stream
@@ -1154,12 +1170,17 @@ def _prepare_request(
_model_kwargs[self._provider.stop_sequence_key] = stop
# Warn if using max_tokens with OpenAI models
- if self.model_id and self.model_id.startswith("openai.") and "max_tokens" in _model_kwargs:
+ if (
+ self.model_id
+ and self.model_id.startswith("openai.")
+ and "max_tokens" in _model_kwargs
+ ):
import warnings
+
warnings.warn(
f"OpenAI models require 'max_completion_tokens' instead of 'max_tokens'.",
UserWarning,
- stacklevel=2
+ stacklevel=2,
)
chat_params = {**_model_kwargs, **kwargs, **oci_params}
@@ -1179,6 +1200,49 @@ def _prepare_request(
return request
+ def _supports_parallel_tool_calls(self, model_id: str) -> bool:
+ """Check if the model supports parallel tool calling.
+
+ Parallel tool calling is supported for:
+ - Llama 4+ only (tested and verified)
+ - Other GenericChatRequest models (xAI Grok, OpenAI, Mistral)
+
+ Not supported for:
+ - All Llama 3.x versions (3.0, 3.1, 3.2, 3.3)
+ - Cohere models
+
+ Args:
+ model_id: The model identifier (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8")
+
+ Returns:
+ bool: True if model supports parallel tool calling, False otherwise
+ """
+ import re
+
+ # Extract provider from model_id (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+ provider = model_id.split(".")[0].lower()
+
+ # Cohere models don't support parallel tool calling
+ if provider == "cohere":
+ return False
+
+ # For Meta/Llama models, check version
+ if provider == "meta" and "llama" in model_id.lower():
+ # Extract version number (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+ version_match = re.search(r"llama-(\d+)", model_id.lower())
+ if version_match:
+ major = int(version_match.group(1))
+
+ # Only Llama 4+ supports parallel tool calling
+ # Llama 3.x (including 3.3) does NOT support it based on testing
+ if major >= 4:
+ return True
+
+ return False
+
+ # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it
+ return True
+
def bind_tools(
self,
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
@@ -1186,6 +1250,7 @@ def bind_tools(
tool_choice: Optional[
Union[dict, str, Literal["auto", "none", "required", "any"], bool]
] = None,
+ parallel_tool_calls: Optional[bool] = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, BaseMessage]:
"""Bind tool-like objects to this chat model.
@@ -1206,6 +1271,12 @@ def bind_tools(
{"type": "function", "function": {"name": <>}}:
calls <> tool.
- False or None: no effect, default Meta behavior.
+ parallel_tool_calls: Whether to enable parallel function calling.
+ If True, the model can call multiple tools simultaneously.
+ If False, tools are called sequentially.
+ If None (default), uses the class-level parallel_tool_calls setting.
+ Supported for models using GenericChatRequest (Meta, Llama, xAI Grok,
+ OpenAI, Mistral). Not supported for Cohere models.
kwargs: Any additional parameters are passed directly to
:meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`.
"""
@@ -1215,6 +1286,28 @@ def bind_tools(
if tool_choice is not None:
kwargs["tool_choice"] = self._provider.process_tool_choice(tool_choice)
+ # Add parallel tool calls support
+ # Use bind-time parameter if provided, else fall back to class default
+ use_parallel = (
+ parallel_tool_calls
+ if parallel_tool_calls is not None
+ else self.parallel_tool_calls
+ )
+ if use_parallel:
+ # Validate model supports parallel tool calling
+ if not self._supports_parallel_tool_calls(self.model_id):
+ if "llama" in self.model_id.lower():
+ raise ValueError(
+ f"Parallel tool calls are not supported for {self.model_id}. "
+ "This feature is only available for Llama 4+ models. "
+ "Llama 3.x models (including 3.3) do not support parallel tool calling."
+ )
+ else:
+ raise ValueError(
+ f"Parallel tool calls are not supported for {self.model_id}."
+ )
+ kwargs["is_parallel_tool_calls"] = True
+
return super().bind(tools=formatted_tools, **kwargs)
def with_structured_output(
@@ -1244,7 +1337,7 @@ def with_structured_output(
used. Note that if using "json_mode" then you must include instructions
for formatting the output into the desired schema into the model call.
If "json_schema" then it allows the user to pass a json schema (or pydantic)
- to the model for structured output.
+ to the model for structured output.
include_raw:
If False then only the parsed structured output is returned. If
an error occurs during model output parsing it will be raised. If True
@@ -1300,18 +1393,18 @@ def with_structured_output(
if is_pydantic_schema
else schema
)
-
+
response_json_schema = self._provider.oci_response_json_schema(
name=json_schema_dict.get("title", "response"),
description=json_schema_dict.get("description", ""),
schema=json_schema_dict,
- is_strict=True
+ is_strict=True,
)
-
+
response_format_obj = self._provider.oci_json_schema_response_format(
json_schema=response_json_schema
)
-
+
llm = self.bind(response_format=response_format_obj)
if is_pydantic_schema:
output_parser = PydanticOutputParser(pydantic_object=schema)
diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py
index 3649e87..e80b0c5 100644
--- a/libs/oci/langchain_oci/llms/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py
@@ -120,6 +120,13 @@ class OCIGenAIBase(BaseModel, ABC):
"""Maximum tool calls before forcing final answer.
Prevents infinite loops while allowing multi-step orchestration."""
+ parallel_tool_calls: bool = False
+ """Whether to enable parallel function calling during tool use.
+ If True, the model can call multiple tools simultaneously.
+ Supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral).
+ Not supported for Cohere models.
+ Default: False for backward compatibility."""
+
model_config = ConfigDict(
extra="forbid", arbitrary_types_allowed=True, protected_namespaces=()
)
diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py
new file mode 100644
index 0000000..061211a
--- /dev/null
+++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py
@@ -0,0 +1,310 @@
+#!/usr/bin/env python3
+"""
+Integration test for parallel tool calling feature.
+
+This script tests parallel tool calling with actual OCI GenAI API calls.
+
+Setup:
+ export OCI_COMPARTMENT_ID=
+ export OCI_GENAI_ENDPOINT= # optional
+ export OCI_CONFIG_PROFILE= # optional
+ export OCI_AUTH_TYPE= # optional
+
+Run with:
+ python test_parallel_tool_calling_integration.py
+"""
+
+import os
+import sys
+import time
+from typing import List
+
+from langchain_core.messages import HumanMessage
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+def get_weather(city: str, unit: str = "fahrenheit") -> str:
+ """Get the current weather in a given location."""
+ # Simulate API delay
+ time.sleep(0.5)
+ return f"Weather in {city}: Sunny, 72°{unit[0].upper()}"
+
+
+def calculate_tip(amount: float, percent: float = 15.0) -> float:
+ """Calculate tip amount."""
+ # Simulate API delay
+ time.sleep(0.5)
+ return round(amount * (percent / 100), 2)
+
+
+def get_population(city: str) -> int:
+ """Get the population of a city."""
+ # Simulate API delay
+ time.sleep(0.5)
+ populations = {
+ "tokyo": 14000000,
+ "new york": 8000000,
+ "london": 9000000,
+ "paris": 2000000,
+ "chicago": 2700000,
+ "los angeles": 4000000,
+ }
+ return populations.get(city.lower(), 1000000)
+
+
+def test_parallel_tool_calling_enabled():
+ """Test parallel tool calling with parallel_tool_calls=True."""
+ print("\n" + "=" * 80)
+ print("TEST 1: Parallel Tool Calling ENABLED")
+ print("=" * 80)
+
+ chat = ChatOCIGenAI(
+ model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"),
+ service_endpoint=os.environ.get(
+ "OCI_GENAI_ENDPOINT",
+ "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+ ),
+ compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+ auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+ auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+ model_kwargs={"temperature": 0, "max_tokens": 500},
+ parallel_tool_calls=True, # Enable parallel calling
+ )
+
+ # Bind tools
+ chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population])
+
+ # Invoke with query that needs weather info
+ print("\nQuery: 'What's the weather in New York City?'")
+
+ start_time = time.time()
+ response = chat_with_tools.invoke([
+ HumanMessage(content="What's the weather in New York City?")
+ ])
+ elapsed_time = time.time() - start_time
+
+ print(f"\nResponse time: {elapsed_time:.2f}s")
+ print(f"Response content: {response.content[:200] if response.content else '(empty)'}...")
+ print(f"Tool calls count: {len(response.tool_calls)}")
+
+ if response.tool_calls:
+ print("\nTool calls:")
+ for i, tc in enumerate(response.tool_calls, 1):
+ print(f" {i}. {tc['name']}({tc['args']})")
+ else:
+ print("\n⚠️ No tool calls in response.tool_calls")
+ print(f"Additional kwargs: {response.additional_kwargs.keys()}")
+
+ # Verify we got tool calls
+ assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}"
+
+ # Verify parallel_tool_calls was set
+ print("\n✓ TEST 1 PASSED: Parallel tool calling enabled and working")
+ return elapsed_time
+
+
+def test_parallel_tool_calling_disabled():
+ """Test tool calling with parallel_tool_calls=False (sequential)."""
+ print("\n" + "=" * 80)
+ print("TEST 2: Parallel Tool Calling DISABLED (Sequential)")
+ print("=" * 80)
+
+ chat = ChatOCIGenAI(
+ model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"),
+ service_endpoint=os.environ.get(
+ "OCI_GENAI_ENDPOINT",
+ "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+ ),
+ compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+ auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+ auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+ model_kwargs={"temperature": 0, "max_tokens": 500},
+ parallel_tool_calls=False, # Disable parallel calling (default)
+ )
+
+ # Bind tools
+ chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population])
+
+ # Same query as test 1
+ print("\nQuery: 'What's the weather in New York City?'")
+
+ start_time = time.time()
+ response = chat_with_tools.invoke([
+ HumanMessage(content="What's the weather in New York City?")
+ ])
+ elapsed_time = time.time() - start_time
+
+ print(f"\nResponse time: {elapsed_time:.2f}s")
+ print(f"Response content: {response.content[:200] if response.content else '(empty)'}...")
+ print(f"Tool calls count: {len(response.tool_calls)}")
+
+ if response.tool_calls:
+ print("\nTool calls:")
+ for i, tc in enumerate(response.tool_calls, 1):
+ print(f" {i}. {tc['name']}({tc['args']})")
+
+ # Verify we got tool calls
+ assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}"
+
+ print("\n✓ TEST 2 PASSED: Sequential tool calling works")
+ return elapsed_time
+
+
+def test_bind_tools_override():
+ """Test that bind_tools can override class-level setting."""
+ print("\n" + "=" * 80)
+ print("TEST 3: bind_tools Override of Class Setting")
+ print("=" * 80)
+
+ # Create chat with parallel_tool_calls=False at class level
+ chat = ChatOCIGenAI(
+ model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"),
+ service_endpoint=os.environ.get(
+ "OCI_GENAI_ENDPOINT",
+ "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+ ),
+ compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+ auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+ auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+ model_kwargs={"temperature": 0, "max_tokens": 500},
+ parallel_tool_calls=False, # Class default: disabled
+ )
+
+ # Override with True in bind_tools
+ chat_with_tools = chat.bind_tools(
+ [get_weather, get_population],
+ parallel_tool_calls=True # Override to enable
+ )
+
+ print("\nQuery: 'What's the weather and population of Tokyo?'")
+
+ response = chat_with_tools.invoke([
+ HumanMessage(content="What's the weather and population of Tokyo?")
+ ])
+
+ print(f"\nResponse content: {response.content}")
+ print(f"Tool calls count: {len(response.tool_calls)}")
+
+ if response.tool_calls:
+ print("\nTool calls:")
+ for i, tc in enumerate(response.tool_calls, 1):
+ print(f" {i}. {tc['name']}({tc['args']})")
+
+ print("\n✓ TEST 3 PASSED: bind_tools override works")
+
+
+def test_cohere_model_error():
+ """Test that Cohere models raise an error with parallel_tool_calls."""
+ print("\n" + "=" * 80)
+ print("TEST 4: Cohere Model Error Handling")
+ print("=" * 80)
+
+ chat = ChatOCIGenAI(
+ model_id="cohere.command-r-plus",
+ service_endpoint=os.environ.get(
+ "OCI_GENAI_ENDPOINT",
+ "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+ ),
+ compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+ auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+ auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+ )
+
+ # Try to enable parallel tool calls with Cohere (should fail)
+ chat_with_tools = chat.bind_tools(
+ [get_weather],
+ parallel_tool_calls=True
+ )
+
+ print("\nAttempting to use parallel_tool_calls with Cohere model...")
+
+ try:
+ response = chat_with_tools.invoke([
+ HumanMessage(content="What's the weather in Paris?")
+ ])
+ print("❌ TEST FAILED: Should have raised ValueError")
+ return False
+ except ValueError as e:
+ if "not supported for Cohere" in str(e):
+ print(f"\n✓ Correctly raised error: {e}")
+ print("\n✓ TEST 4 PASSED: Cohere validation works")
+ return True
+ else:
+ print(f"❌ Wrong error: {e}")
+ return False
+
+
+def main():
+ print("=" * 80)
+ print("PARALLEL TOOL CALLING INTEGRATION TESTS")
+ print("=" * 80)
+
+ # Check required env vars
+ if not os.environ.get("OCI_COMPARTMENT_ID"):
+ print("\n❌ ERROR: OCI_COMPARTMENT_ID environment variable not set")
+ print("Please set: export OCI_COMPARTMENT_ID=")
+ sys.exit(1)
+
+ print(f"\nUsing configuration:")
+ print(f" Model: {os.environ.get('OCI_MODEL_ID', 'meta.llama-3.3-70b-instruct')}")
+ print(f" Endpoint: {os.environ.get('OCI_GENAI_ENDPOINT', 'default')}")
+ print(f" Profile: {os.environ.get('OCI_CONFIG_PROFILE', 'DEFAULT')}")
+ print(f" Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...")
+
+ results = []
+
+ try:
+ # Run tests
+ parallel_time = test_parallel_tool_calling_enabled()
+ results.append(("Parallel Enabled", True))
+
+ sequential_time = test_parallel_tool_calling_disabled()
+ results.append(("Sequential (Disabled)", True))
+
+ test_bind_tools_override()
+ results.append(("bind_tools Override", True))
+
+ cohere_test = test_cohere_model_error()
+ results.append(("Cohere Validation", cohere_test))
+
+ # Print summary
+ print("\n" + "=" * 80)
+ print("TEST SUMMARY")
+ print("=" * 80)
+
+ for test_name, passed in results:
+ status = "✓ PASSED" if passed else "✗ FAILED"
+ print(f"{status}: {test_name}")
+
+ passed_count = sum(1 for _, passed in results if passed)
+ total_count = len(results)
+
+ print(f"\nTotal: {passed_count}/{total_count} tests passed")
+
+ # Performance comparison
+ if parallel_time and sequential_time:
+ print("\n" + "=" * 80)
+ print("PERFORMANCE COMPARISON")
+ print("=" * 80)
+ print(f"Parallel: {parallel_time:.2f}s")
+ print(f"Sequential: {sequential_time:.2f}s")
+ if sequential_time > 0:
+ speedup = sequential_time / parallel_time
+ print(f"Speedup: {speedup:.2f}×")
+
+ if passed_count == total_count:
+ print("\n🎉 ALL TESTS PASSED!")
+ return 0
+ else:
+ print(f"\n⚠️ {total_count - passed_count} test(s) failed")
+ return 1
+
+ except Exception as e:
+ print(f"\n❌ ERROR: {e}")
+ import traceback
+ traceback.print_exc()
+ return 1
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
new file mode 100644
index 0000000..d51d85c
--- /dev/null
+++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
@@ -0,0 +1,330 @@
+"""Unit tests for parallel tool calling feature."""
+import pytest
+from unittest.mock import MagicMock
+
+from langchain_core.messages import HumanMessage
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_class_level():
+ """Test class-level parallel_tool_calls parameter."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ parallel_tool_calls=True,
+ client=oci_gen_ai_client
+ )
+ assert llm.parallel_tool_calls is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_default_false():
+ """Test that parallel_tool_calls defaults to False."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ client=oci_gen_ai_client
+ )
+ assert llm.parallel_tool_calls is False
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_explicit_true():
+ """Test parallel_tool_calls=True in bind_tools."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ def tool2(x: int) -> int:
+ """Tool 2."""
+ return x * 2
+
+ llm_with_tools = llm.bind_tools(
+ [tool1, tool2],
+ parallel_tool_calls=True
+ )
+
+ assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_explicit_false():
+ """Test parallel_tool_calls=False in bind_tools."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ llm_with_tools = llm.bind_tools(
+ [tool1],
+ parallel_tool_calls=False
+ )
+
+ # When explicitly False, should not set the parameter
+ assert "is_parallel_tool_calls" not in llm_with_tools.kwargs
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_uses_class_default():
+ """Test that bind_tools uses class default when not specified."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ parallel_tool_calls=True, # Set class default
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Don't specify parallel_tool_calls in bind_tools
+ llm_with_tools = llm.bind_tools([tool1])
+
+ # Should use class default (True)
+ assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_overrides_class_default():
+ """Test that bind_tools parameter overrides class default."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ parallel_tool_calls=True, # Set class default to True
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Override with False in bind_tools
+ llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=False)
+
+ # Should not set the parameter when explicitly False
+ assert "is_parallel_tool_calls" not in llm_with_tools.kwargs
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_passed_to_oci_api_meta():
+ """Test that is_parallel_tool_calls is passed to OCI API for Meta models."""
+ from oci.generative_ai_inference import models
+
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ client=oci_gen_ai_client
+ )
+
+ def get_weather(city: str) -> str:
+ """Get weather for a city."""
+ return f"Weather in {city}"
+
+ llm_with_tools = llm.bind_tools([get_weather], parallel_tool_calls=True)
+
+ # Prepare a request
+ request = llm_with_tools._prepare_request(
+ [HumanMessage(content="What's the weather?")],
+ stop=None,
+ stream=False,
+ **llm_with_tools.kwargs
+ )
+
+ # Verify is_parallel_tool_calls is in the request
+ assert hasattr(request.chat_request, 'is_parallel_tool_calls')
+ assert request.chat_request.is_parallel_tool_calls is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_cohere_raises_error():
+ """Test that Cohere models raise error for parallel tool calls."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="cohere.command-r-plus",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+
+ # Should raise ValueError when trying to prepare request
+ with pytest.raises(ValueError, match="not supported for Cohere"):
+ llm_with_tools._prepare_request(
+ [HumanMessage(content="test")],
+ stop=None,
+ stream=False,
+ **llm_with_tools.kwargs
+ )
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_cohere_class_level_raises_error():
+ """Test that Cohere models with class-level parallel_tool_calls raise error."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="cohere.command-r-plus",
+ parallel_tool_calls=True, # Set at class level
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ llm_with_tools = llm.bind_tools([tool1]) # Uses class default
+
+ # Should raise ValueError when trying to prepare request
+ with pytest.raises(ValueError, match="not supported for Cohere"):
+ llm_with_tools._prepare_request(
+ [HumanMessage(content="test")],
+ stop=None,
+ stream=False,
+ **llm_with_tools.kwargs
+ )
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_0_blocked():
+ """Test that Llama 3.0 models are blocked from parallel tool calling."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-3-70b-instruct",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Should raise ValueError when trying to enable parallel tool calling
+ with pytest.raises(ValueError, match="Llama 4\\+"):
+ llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_1_blocked():
+ """Test that Llama 3.1 models are blocked from parallel tool calling."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-3.1-70b-instruct",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Should raise ValueError
+ with pytest.raises(ValueError, match="Llama 4\\+"):
+ llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_2_blocked():
+ """Test that Llama 3.2 models are blocked from parallel tool calling."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-3.2-11b-vision-instruct",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Should raise ValueError
+ with pytest.raises(ValueError, match="Llama 4\\+"):
+ llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_3_blocked():
+ """Test that Llama 3.3 models are blocked from parallel tool calling."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-3.3-70b-instruct",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Should raise ValueError - Llama 3.3 doesn't actually support parallel calls
+ with pytest.raises(ValueError, match="Llama 4\\+"):
+ llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_4_allowed():
+ """Test that Llama 4 models are allowed parallel tool calling."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Should NOT raise ValueError
+ llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+ assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_other_models_allowed():
+ """Test that other GenericChatRequest models are allowed parallel tool calling."""
+ oci_gen_ai_client = MagicMock()
+
+ # Test with xAI Grok
+ llm_grok = ChatOCIGenAI(
+ model_id="xai.grok-4-fast",
+ client=oci_gen_ai_client
+ )
+
+ def tool1(x: int) -> int:
+ """Tool 1."""
+ return x + 1
+
+ # Should NOT raise ValueError for Grok
+ llm_with_tools = llm_grok.bind_tools([tool1], parallel_tool_calls=True)
+ assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_supports_parallel_tool_calls_method():
+ """Test the _supports_parallel_tool_calls method directly."""
+ oci_gen_ai_client = MagicMock()
+ llm = ChatOCIGenAI(
+ model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+ client=oci_gen_ai_client
+ )
+
+ # Test various model IDs
+ assert llm._supports_parallel_tool_calls("meta.llama-4-maverick-17b-128e-instruct-fp8") is True
+ assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False # Llama 3.3 NOT supported
+ assert llm._supports_parallel_tool_calls("meta.llama-3.2-11b-vision-instruct") is False
+ assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False
+ assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False
+ assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False
+ assert llm._supports_parallel_tool_calls("xai.grok-4-fast") is True
+ assert llm._supports_parallel_tool_calls("openai.gpt-4") is True
+ assert llm._supports_parallel_tool_calls("mistral.mistral-large") is True