[BUG] Fix json_schema method in the with_structured_output function. (#54)

paxiaatucsdedu · web-flow · commit 8b18374f5762 · 2025-10-30T13:22:30.000-07:00
* Fix with_structured_output json_schema method

Fix json_schema method in the with_structured_output function.

* Remove import and define an alias for the class in the init

Remove import and define an alias for the class in the init

* Set default structured output method to function_calling

Changed the default method for structured output in ChatOCIGenAI from 'json_schema' to 'function_calling'. Updated documentation to clarify the default and suggest alternatives if it fails.

* Fix assertions in unit tests to match the new json_schema method in with_structured_output

* Clarify with_structured_output methods in README

Clarify with_structured_output methods in README
diff --git a/libs/oci/README.md b/libs/oci/README.md
@@ -61,6 +61,24 @@ embeddings = OCIGenAIEmbeddings()
 embeddings.embed_query("What is the meaning of life?")
 ```
 
+### 4. Use Structured Output
+`ChatOCIGenAI` supports structured output. 
+
+<sub>**Note:** The default method is `function_calling`. If default method returns `None` (e.g. for Gemini models), try `json_schema` or `json_mode`.</sub>
+
+```python
+from langchain_oci import ChatOCIGenAI
+from pydantic import BaseModel
+
+class Joke(BaseModel):
+    setup: str
+    punchline: str
+
+llm = ChatOCIGenAI()
+structured_llm = llm.with_structured_output(Joke)
+structured_llm.invoke("Tell me a joke about programming")
+```
+
 
 ## OCI Data Science Model Deployment Examples
 
diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -213,6 +213,9 @@ def __init__(self) -> None:
             "SYSTEM": models.CohereSystemMessage,
             "TOOL": models.CohereToolMessage,
         }
+
+        self.oci_response_json_schema = models.ResponseJsonSchema
+        self.oci_json_schema_response_format = models.JsonSchemaResponseFormat
         self.chat_api_format = models.BaseChatRequest.API_FORMAT_COHERE
 
     def chat_response_to_text(self, response: Any) -> str:
@@ -588,6 +591,10 @@ def __init__(self) -> None:
         self.oci_tool_call = models.FunctionCall
         self.oci_tool_message = models.ToolMessage
 
+        # Response format models
+        self.oci_response_json_schema = models.ResponseJsonSchema
+        self.oci_json_schema_response_format = models.JsonSchemaResponseFormat
+
         self.chat_api_format = models.BaseChatRequest.API_FORMAT_GENERIC
 
     def chat_response_to_text(self, response: Any) -> str:
@@ -1230,14 +1237,14 @@ def with_structured_output(
                 `method` is "function_calling" and `schema` is a dict, then the dict
                 must match the OCI Generative AI function-calling spec.
             method:
-                The method for steering model generation, either "function_calling"
-                or "json_mode" or "json_schema. If "function_calling" then the schema
+                The method for steering model generation, either "function_calling" (default method)
+                or "json_mode" or "json_schema". If "function_calling" then the schema
                 will be converted to an OCI function and the returned model will make
                 use of the function-calling API. If "json_mode" then Cohere's JSON mode will be
                 used. Note that if using "json_mode" then you must include instructions
                 for formatting the output into the desired schema into the model call.
                 If "json_schema" then it allows the user to pass a json schema (or pydantic)
-                to the model for structured output. This is the default method.
+                to the model for structured output. 
             include_raw:
                 If False then only the parsed structured output is returned. If
                 an error occurs during model output parsing it will be raised. If True
@@ -1288,19 +1295,24 @@ def with_structured_output(
                 else JsonOutputParser()
             )
         elif method == "json_schema":
-            response_format = (
-                dict(
-                    schema.model_json_schema().items()  # type: ignore[union-attr]
-                )
+            json_schema_dict = (
+                schema.model_json_schema()  # type: ignore[union-attr]
                 if is_pydantic_schema
                 else schema
             )
-            llm_response_format: Dict[Any, Any] = {"type": "JSON_OBJECT"}
-            llm_response_format["schema"] = {
-                k: v
-                for k, v in response_format.items()  # type: ignore[union-attr]
-            }
-            llm = self.bind(response_format=llm_response_format)
+            
+            response_json_schema = self._provider.oci_response_json_schema(
+                name=json_schema_dict.get("title", "response"),
+                description=json_schema_dict.get("description", ""),
+                schema=json_schema_dict,
+                is_strict=True
+            )
+            
+            response_format_obj = self._provider.oci_json_schema_response_format(
+                json_schema=response_json_schema
+            )
+            
+            llm = self.bind(response_format=response_format_obj)
             if is_pydantic_schema:
                 output_parser = PydanticOutputParser(pydantic_object=schema)
             else:
diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai.py b/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai.py
@@ -14,7 +14,7 @@
 
 class MockResponseDict(dict):
     def __getattr__(self, val):  # type: ignore[no-untyped-def]
-        return self[val]
+        return self.get(val)
 
 
 class MockToolCall(dict):
@@ -473,10 +473,10 @@ class WeatherResponse(BaseModel):
     llm = ChatOCIGenAI(model_id="cohere.command-latest", client=oci_gen_ai_client)
 
     def mocked_response(*args, **kwargs):  # type: ignore[no-untyped-def]
-        # Verify that response_format contains the schema
+        # Verify that response_format is a JsonSchemaResponseFormat object
         request = args[0]
-        assert request.chat_request.response_format["type"] == "JSON_OBJECT"
-        assert "schema" in request.chat_request.response_format
+        assert hasattr(request.chat_request, 'response_format')
+        assert request.chat_request.response_format is not None
 
         return MockResponseDict(
             {