From 781212a355d42834a6f1ef6909d57d2c074d93eb Mon Sep 17 00:00:00 2001 From: Ryan Alyn Porter Date: Wed, 29 Apr 2026 20:00:28 -0400 Subject: [PATCH] feat: support GPT-5 reasoning and verbosity controls Add GPT-5-family reasoning_effort and verbosity controls through the Tactus runtime and model call path. --- ..._cf2d0564-6c1e-461b-8292-d4aa3654a056.json | 18 +++++ ..._556c81e3-a2c2-4143-b679-e5001b0b7ddb.json | 18 +++++ ..._d48dcde7-1b94-4f8c-9480-04bab8e02a59.json | 18 +++++ ..._54c134e2-01a3-4ba7-9265-95f5e2a8f911.json | 12 +++ ..._57743a66-6944-41e9-aeca-dc3eb08d71c7.json | 12 +++ ..._9e1ccc24-a941-4b2c-98e0-b142a2bbc816.json | 12 +++ ..._c288b592-34c2-4a83-ae21-8f2626641213.json | 18 +++++ ..._cc92bc88-3666-4900-8a34-30efbc3ba790.json | 12 +++ ..._8ec11433-64d2-4bca-8490-18d8dcc39818.json | 12 +++ ...-5f92fe70-3a01-465c-9982-687ec28025a3.json | 18 +++++ ...-9116f787-cc42-4a34-bdd5-5c4dbd57bac6.json | 40 ++++++++++ ...-d7500513-926c-4253-aef5-6349b2d81dd6.json | 18 +++++ ...-da6da4a2-7331-43cf-b3a5-0ad9be1fc89b.json | 18 +++++ tactus/backends/llm_backend.py | 8 ++ tactus/core/dsl_stubs.py | 22 ++++++ tactus/core/runtime.py | 30 ++++++++ tactus/core/yaml_parser.py | 21 ++++- tactus/dspy/agent.py | 14 ++++ tactus/dspy/config.py | 58 ++++++++++++++ tactus/primitives/model.py | 8 ++ tests/core/test_runtime_helpers.py | 16 +++- .../test_runtime_setup_agents_branches.py | 65 +++++++++++++++- tests/core/test_runtime_tasks.py | 77 +++++-------------- tests/core/test_yaml_parser.py | 6 +- tests/dspy/test_agent_model_normalization.py | 13 +++- tests/dspy/test_config.py | 52 +++++++++++++ tests/models/test_llm_backend.py | 42 ++++++++++ 27 files changed, 592 insertions(+), 66 deletions(-) create mode 100644 project/events/2026-04-29T22:09:04.273Z__cf2d0564-6c1e-461b-8292-d4aa3654a056.json create mode 100644 project/events/2026-04-29T22:09:18.678Z__556c81e3-a2c2-4143-b679-e5001b0b7ddb.json create mode 100644 project/events/2026-04-29T22:09:18.678Z__d48dcde7-1b94-4f8c-9480-04bab8e02a59.json create mode 100644 project/events/2026-04-29T22:09:23.347Z__54c134e2-01a3-4ba7-9265-95f5e2a8f911.json create mode 100644 project/events/2026-04-29T22:09:23.373Z__57743a66-6944-41e9-aeca-dc3eb08d71c7.json create mode 100644 project/events/2026-04-29T22:21:30.856Z__9e1ccc24-a941-4b2c-98e0-b142a2bbc816.json create mode 100644 project/events/2026-04-29T22:21:36.722Z__c288b592-34c2-4a83-ae21-8f2626641213.json create mode 100644 project/events/2026-04-29T22:21:45.981Z__cc92bc88-3666-4900-8a34-30efbc3ba790.json create mode 100644 project/events/2026-04-29T22:21:45.994Z__8ec11433-64d2-4bca-8490-18d8dcc39818.json create mode 100644 project/issues/tcts-5f92fe70-3a01-465c-9982-687ec28025a3.json create mode 100644 project/issues/tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6.json create mode 100644 project/issues/tcts-d7500513-926c-4253-aef5-6349b2d81dd6.json create mode 100644 project/issues/tcts-da6da4a2-7331-43cf-b3a5-0ad9be1fc89b.json diff --git a/project/events/2026-04-29T22:09:04.273Z__cf2d0564-6c1e-461b-8292-d4aa3654a056.json b/project/events/2026-04-29T22:09:04.273Z__cf2d0564-6c1e-461b-8292-d4aa3654a056.json new file mode 100644 index 00000000..e1298caf --- /dev/null +++ b/project/events/2026-04-29T22:09:04.273Z__cf2d0564-6c1e-461b-8292-d4aa3654a056.json @@ -0,0 +1,18 @@ +{ + "schema_version": 1, + "event_id": "cf2d0564-6c1e-461b-8292-d4aa3654a056", + "issue_id": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "event_type": "issue_created", + "occurred_at": "2026-04-29T22:09:04.273Z", + "actor_id": "ryan", + "payload": { + "assignee": null, + "description": "Add first-class runtime support for reasoning_effort and verbosity so host applications can configure GPT-5-family controls for Tactus agents and LLM-backed models.\n\nDefinition of Done:\n- Runtime accepts reasoning_effort and verbosity.\n- Settings propagate through agents, LLMModelBackend, ModelPrimitive, and DSPy LM configuration.\n- Chat and Responses API shapes are covered by tests.\n- Targeted tests pass.", + "issue_type": "epic", + "labels": [], + "parent": null, + "priority": 2, + "status": "open", + "title": "Support runtime reasoning and verbosity controls" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:09:18.678Z__556c81e3-a2c2-4143-b679-e5001b0b7ddb.json b/project/events/2026-04-29T22:09:18.678Z__556c81e3-a2c2-4143-b679-e5001b0b7ddb.json new file mode 100644 index 00000000..31269472 --- /dev/null +++ b/project/events/2026-04-29T22:09:18.678Z__556c81e3-a2c2-4143-b679-e5001b0b7ddb.json @@ -0,0 +1,18 @@ +{ + "schema_version": 1, + "event_id": "556c81e3-a2c2-4143-b679-e5001b0b7ddb", + "issue_id": "tcts-d7500513-926c-4253-aef5-6349b2d81dd6", + "event_type": "issue_created", + "occurred_at": "2026-04-29T22:09:18.678Z", + "actor_id": "ryan", + "payload": { + "assignee": null, + "description": "Feature: Runtime GPT-5 controls\n\nScenario: Runtime configures agent model controls\nGiven a runtime has reasoning_effort and verbosity\nWhen an agent configures its DSPy LM\nThen both settings reach the LiteLLM request kwargs\n\nScenario: Runtime configures LLM-backed models\nGiven a ClassifyProcedure uses an LLMModel\nWhen runtime reasoning_effort and verbosity are set\nThen the LLM backend receives and forwards both settings\n\nScenario: Responses mode is used\nGiven model_type is responses\nWhen verbosity is set\nThen verbosity is sent as text.verbosity", + "issue_type": "story", + "labels": [], + "parent": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "priority": 2, + "status": "open", + "title": "Propagate GPT-5 controls through Tactus runtime" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:09:18.678Z__d48dcde7-1b94-4f8c-9480-04bab8e02a59.json b/project/events/2026-04-29T22:09:18.678Z__d48dcde7-1b94-4f8c-9480-04bab8e02a59.json new file mode 100644 index 00000000..09bb6557 --- /dev/null +++ b/project/events/2026-04-29T22:09:18.678Z__d48dcde7-1b94-4f8c-9480-04bab8e02a59.json @@ -0,0 +1,18 @@ +{ + "schema_version": 1, + "event_id": "d48dcde7-1b94-4f8c-9480-04bab8e02a59", + "issue_id": "tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6", + "event_type": "issue_created", + "occurred_at": "2026-04-29T22:09:18.678Z", + "actor_id": "ryan", + "payload": { + "assignee": null, + "description": "Add runtime, agent, model, backend, and DSPy config support for reasoning_effort and verbosity.", + "issue_type": "task", + "labels": [], + "parent": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "priority": 2, + "status": "open", + "title": "Implement Tactus runtime GPT-5 controls" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:09:23.347Z__54c134e2-01a3-4ba7-9265-95f5e2a8f911.json b/project/events/2026-04-29T22:09:23.347Z__54c134e2-01a3-4ba7-9265-95f5e2a8f911.json new file mode 100644 index 00000000..37ae0721 --- /dev/null +++ b/project/events/2026-04-29T22:09:23.347Z__54c134e2-01a3-4ba7-9265-95f5e2a8f911.json @@ -0,0 +1,12 @@ +{ + "schema_version": 1, + "event_id": "54c134e2-01a3-4ba7-9265-95f5e2a8f911", + "issue_id": "tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6", + "event_type": "state_transition", + "occurred_at": "2026-04-29T22:09:23.347Z", + "actor_id": "ryan", + "payload": { + "from_status": "open", + "to_status": "in_progress" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:09:23.373Z__57743a66-6944-41e9-aeca-dc3eb08d71c7.json b/project/events/2026-04-29T22:09:23.373Z__57743a66-6944-41e9-aeca-dc3eb08d71c7.json new file mode 100644 index 00000000..08ea9df3 --- /dev/null +++ b/project/events/2026-04-29T22:09:23.373Z__57743a66-6944-41e9-aeca-dc3eb08d71c7.json @@ -0,0 +1,12 @@ +{ + "schema_version": 1, + "event_id": "57743a66-6944-41e9-aeca-dc3eb08d71c7", + "issue_id": "tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6", + "event_type": "comment_added", + "occurred_at": "2026-04-29T22:09:23.373Z", + "actor_id": "ryan", + "payload": { + "comment_author": "ryan", + "comment_id": "9972f909-5549-4fc8-8708-3a9e7fa0482d" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:21:30.856Z__9e1ccc24-a941-4b2c-98e0-b142a2bbc816.json b/project/events/2026-04-29T22:21:30.856Z__9e1ccc24-a941-4b2c-98e0-b142a2bbc816.json new file mode 100644 index 00000000..58f6c605 --- /dev/null +++ b/project/events/2026-04-29T22:21:30.856Z__9e1ccc24-a941-4b2c-98e0-b142a2bbc816.json @@ -0,0 +1,12 @@ +{ + "schema_version": 1, + "event_id": "9e1ccc24-a941-4b2c-98e0-b142a2bbc816", + "issue_id": "tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6", + "event_type": "comment_added", + "occurred_at": "2026-04-29T22:21:30.856Z", + "actor_id": "ryan", + "payload": { + "comment_author": "ryan", + "comment_id": "9d5b1e13-38ff-409d-b32e-a8a6e7644ea0" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:21:36.722Z__c288b592-34c2-4a83-ae21-8f2626641213.json b/project/events/2026-04-29T22:21:36.722Z__c288b592-34c2-4a83-ae21-8f2626641213.json new file mode 100644 index 00000000..982cc812 --- /dev/null +++ b/project/events/2026-04-29T22:21:36.722Z__c288b592-34c2-4a83-ae21-8f2626641213.json @@ -0,0 +1,18 @@ +{ + "schema_version": 1, + "event_id": "c288b592-34c2-4a83-ae21-8f2626641213", + "issue_id": "tcts-da6da4a2-7331-43cf-b3a5-0ad9be1fc89b", + "event_type": "issue_created", + "occurred_at": "2026-04-29T22:21:36.722Z", + "actor_id": "ryan", + "payload": { + "assignee": null, + "description": "Publish a Tactus package version containing runtime reasoning_effort and verbosity support so downstream repos can depend on it from package indexes.\n\nDefinition of Done:\n- Tactus feature branch is merged through the normal review path.\n- A new package version is published and installable.\n- Release notes mention TactusRuntime reasoning_effort and verbosity support.", + "issue_type": "task", + "labels": [], + "parent": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "priority": 2, + "status": "open", + "title": "Release Tactus package with GPT-5 controls" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:21:45.981Z__cc92bc88-3666-4900-8a34-30efbc3ba790.json b/project/events/2026-04-29T22:21:45.981Z__cc92bc88-3666-4900-8a34-30efbc3ba790.json new file mode 100644 index 00000000..3006ffe9 --- /dev/null +++ b/project/events/2026-04-29T22:21:45.981Z__cc92bc88-3666-4900-8a34-30efbc3ba790.json @@ -0,0 +1,12 @@ +{ + "schema_version": 1, + "event_id": "cc92bc88-3666-4900-8a34-30efbc3ba790", + "issue_id": "tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6", + "event_type": "comment_added", + "occurred_at": "2026-04-29T22:21:45.981Z", + "actor_id": "ryan", + "payload": { + "comment_author": "ryan", + "comment_id": "0cbe6e11-4712-4d2f-98dc-05396a6bfe5f" + } +} \ No newline at end of file diff --git a/project/events/2026-04-29T22:21:45.994Z__8ec11433-64d2-4bca-8490-18d8dcc39818.json b/project/events/2026-04-29T22:21:45.994Z__8ec11433-64d2-4bca-8490-18d8dcc39818.json new file mode 100644 index 00000000..76c3249b --- /dev/null +++ b/project/events/2026-04-29T22:21:45.994Z__8ec11433-64d2-4bca-8490-18d8dcc39818.json @@ -0,0 +1,12 @@ +{ + "schema_version": 1, + "event_id": "8ec11433-64d2-4bca-8490-18d8dcc39818", + "issue_id": "tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6", + "event_type": "state_transition", + "occurred_at": "2026-04-29T22:21:45.994Z", + "actor_id": "ryan", + "payload": { + "from_status": "in_progress", + "to_status": "closed" + } +} \ No newline at end of file diff --git a/project/issues/tcts-5f92fe70-3a01-465c-9982-687ec28025a3.json b/project/issues/tcts-5f92fe70-3a01-465c-9982-687ec28025a3.json new file mode 100644 index 00000000..43af69e9 --- /dev/null +++ b/project/issues/tcts-5f92fe70-3a01-465c-9982-687ec28025a3.json @@ -0,0 +1,18 @@ +{ + "id": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "title": "Support runtime reasoning and verbosity controls", + "description": "Add first-class runtime support for reasoning_effort and verbosity so host applications can configure GPT-5-family controls for Tactus agents and LLM-backed models.\n\nDefinition of Done:\n- Runtime accepts reasoning_effort and verbosity.\n- Settings propagate through agents, LLMModelBackend, ModelPrimitive, and DSPy LM configuration.\n- Chat and Responses API shapes are covered by tests.\n- Targeted tests pass.", + "type": "epic", + "status": "open", + "priority": 2, + "assignee": null, + "creator": null, + "parent": null, + "labels": [], + "dependencies": [], + "comments": [], + "created_at": "2026-04-29T22:09:04.273139Z", + "updated_at": "2026-04-29T22:09:04.273139Z", + "closed_at": null, + "custom": {} +} \ No newline at end of file diff --git a/project/issues/tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6.json b/project/issues/tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6.json new file mode 100644 index 00000000..b3b72f19 --- /dev/null +++ b/project/issues/tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6.json @@ -0,0 +1,40 @@ +{ + "id": "tcts-9116f787-cc42-4a34-bdd5-5c4dbd57bac6", + "title": "Implement Tactus runtime GPT-5 controls", + "description": "Add runtime, agent, model, backend, and DSPy config support for reasoning_effort and verbosity.", + "type": "task", + "status": "closed", + "priority": 2, + "assignee": null, + "creator": null, + "parent": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "labels": [], + "dependencies": [], + "comments": [ + { + "id": "9972f909-5549-4fc8-8708-3a9e7fa0482d", + "author": "ryan", + "text": "Started implementation on feature/tactus-score-reasoning-verbosity. Scope is runtime-level reasoning_effort and verbosity propagation through agent and LLM-backed model execution.", + "created_at": "2026-04-29T22:09:23.372707Z" + }, + { + "id": "9d5b1e13-38ff-409d-b32e-a8a6e7644ea0", + "author": "ryan", + "text": "Implemented runtime support for reasoning_effort and verbosity across TactusRuntime, DSPy agent configuration, configure_lm/create_lm, LLMModelBackend, ModelPrimitive, immediate DSL stubs, and YAML validation. Focused tests pass. Full ruff and black pass. behave --tags=-skip passes. test-ci.sh is blocked locally because /usr/local/bin/python3 lacks Poetry; direct pytest tests/ is blocked by missing moto in the local environment.", + "created_at": "2026-04-29T22:21:30.856720Z" + }, + { + "id": "0cbe6e11-4712-4d2f-98dc-05396a6bfe5f", + "author": "ryan", + "text": "Closing implementation task: code implementation complete. Added Tactus runtime fields, propagation through agents/models/configuration, validation, and focused coverage. Release follow-up filed as tcts-da6da4.", + "created_at": "2026-04-29T22:21:45.981225Z" + } + ], + "created_at": "2026-04-29T22:09:18.678440Z", + "updated_at": "2026-04-29T22:21:45.994208Z", + "closed_at": "2026-04-29T22:21:45.994208Z", + "custom": { + "project_label": "tcts", + "source": "shared" + } +} \ No newline at end of file diff --git a/project/issues/tcts-d7500513-926c-4253-aef5-6349b2d81dd6.json b/project/issues/tcts-d7500513-926c-4253-aef5-6349b2d81dd6.json new file mode 100644 index 00000000..999b6ce7 --- /dev/null +++ b/project/issues/tcts-d7500513-926c-4253-aef5-6349b2d81dd6.json @@ -0,0 +1,18 @@ +{ + "id": "tcts-d7500513-926c-4253-aef5-6349b2d81dd6", + "title": "Propagate GPT-5 controls through Tactus runtime", + "description": "Feature: Runtime GPT-5 controls\n\nScenario: Runtime configures agent model controls\nGiven a runtime has reasoning_effort and verbosity\nWhen an agent configures its DSPy LM\nThen both settings reach the LiteLLM request kwargs\n\nScenario: Runtime configures LLM-backed models\nGiven a ClassifyProcedure uses an LLMModel\nWhen runtime reasoning_effort and verbosity are set\nThen the LLM backend receives and forwards both settings\n\nScenario: Responses mode is used\nGiven model_type is responses\nWhen verbosity is set\nThen verbosity is sent as text.verbosity", + "type": "story", + "status": "open", + "priority": 2, + "assignee": null, + "creator": null, + "parent": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "labels": [], + "dependencies": [], + "comments": [], + "created_at": "2026-04-29T22:09:18.678297Z", + "updated_at": "2026-04-29T22:09:18.678297Z", + "closed_at": null, + "custom": {} +} \ No newline at end of file diff --git a/project/issues/tcts-da6da4a2-7331-43cf-b3a5-0ad9be1fc89b.json b/project/issues/tcts-da6da4a2-7331-43cf-b3a5-0ad9be1fc89b.json new file mode 100644 index 00000000..1d9ddb00 --- /dev/null +++ b/project/issues/tcts-da6da4a2-7331-43cf-b3a5-0ad9be1fc89b.json @@ -0,0 +1,18 @@ +{ + "id": "tcts-da6da4a2-7331-43cf-b3a5-0ad9be1fc89b", + "title": "Release Tactus package with GPT-5 controls", + "description": "Publish a Tactus package version containing runtime reasoning_effort and verbosity support so downstream repos can depend on it from package indexes.\n\nDefinition of Done:\n- Tactus feature branch is merged through the normal review path.\n- A new package version is published and installable.\n- Release notes mention TactusRuntime reasoning_effort and verbosity support.", + "type": "task", + "status": "open", + "priority": 2, + "assignee": null, + "creator": null, + "parent": "tcts-5f92fe70-3a01-465c-9982-687ec28025a3", + "labels": [], + "dependencies": [], + "comments": [], + "created_at": "2026-04-29T22:21:36.722540Z", + "updated_at": "2026-04-29T22:21:36.722540Z", + "closed_at": null, + "custom": {} +} \ No newline at end of file diff --git a/tactus/backends/llm_backend.py b/tactus/backends/llm_backend.py index 30c93be1..73602550 100644 --- a/tactus/backends/llm_backend.py +++ b/tactus/backends/llm_backend.py @@ -26,6 +26,8 @@ def __init__( provider: Optional[str] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, mock_manager: Optional[Any] = None, registry: Optional[Any] = None, execution_context: Optional[Any] = None, @@ -41,6 +43,8 @@ def __init__( temperature: Model temperature. None applies a model-specific default: gpt-5 family omits temperature entirely; all others use 0.0. max_tokens: Maximum tokens for response + reasoning_effort: Optional GPT-5-family reasoning effort control + verbosity: Optional GPT-5-family response verbosity control mock_manager: Optional MockManager instance for testing registry: Optional Registry instance execution_context: Optional ExecutionContext (not used by internal Agent) @@ -54,6 +58,8 @@ def __init__( temperature = default_temperature_for_model(model) self.temperature = temperature self.max_tokens = max_tokens + self.reasoning_effort = reasoning_effort + self.verbosity = verbosity self.mock_manager = mock_manager self.registry = registry # Note: execution_context not passed to Agent - we don't checkpoint internal turns @@ -67,6 +73,8 @@ def __init__( provider=provider, temperature=temperature, max_tokens=max_tokens, + reasoning_effort=reasoning_effort, + verbosity=verbosity, mock_manager=mock_manager, registry=registry, execution_context=None, # Don't checkpoint internal agent turns diff --git a/tactus/core/dsl_stubs.py b/tactus/core/dsl_stubs.py index d2fa0f17..467f7bb4 100644 --- a/tactus/core/dsl_stubs.py +++ b/tactus/core/dsl_stubs.py @@ -626,6 +626,8 @@ def accept_config(config) -> ModelHandle: config=config_dict, context=_runtime_context.get("execution_context"), mock_manager=_runtime_context.get("mock_manager"), + reasoning_effort=_runtime_context.get("reasoning_effort"), + verbosity=_runtime_context.get("verbosity"), ) handle._set_primitive(primitive) _runtime_context.setdefault("_created_models", {})[model_name] = primitive @@ -2013,6 +2015,16 @@ def _process_agent_config(agent_name, config): # Add log_handler from runtime context if "log_handler" in _runtime_context: agent_config["log_handler"] = _runtime_context["log_handler"] + if ( + _runtime_context.get("reasoning_effort") is not None + and "reasoning_effort" not in agent_config + ): + agent_config["reasoning_effort"] = _runtime_context["reasoning_effort"] + if ( + _runtime_context.get("verbosity") is not None + and "verbosity" not in agent_config + ): + agent_config["verbosity"] = _runtime_context["verbosity"] agent_primitive = create_dspy_agent( agent_name, @@ -2199,6 +2211,16 @@ def accept_config(config): # Add log_handler from runtime context if "log_handler" in _runtime_context: agent_config["log_handler"] = _runtime_context["log_handler"] + if ( + _runtime_context.get("reasoning_effort") is not None + and "reasoning_effort" not in agent_config + ): + agent_config["reasoning_effort"] = _runtime_context["reasoning_effort"] + if ( + _runtime_context.get("verbosity") is not None + and "verbosity" not in agent_config + ): + agent_config["verbosity"] = _runtime_context["verbosity"] logger.debug( f"[AGENT_CREATION] Creating agent immediately: name={temporary_agent_name}, has_log_handler={'log_handler' in agent_config}" diff --git a/tactus/core/runtime.py b/tactus/core/runtime.py index 9b254320..e90a0e61 100644 --- a/tactus/core/runtime.py +++ b/tactus/core/runtime.py @@ -19,6 +19,7 @@ from tactus.core.registry import ProcedureRegistry, RegistryBuilder, TaskDeclaration from tactus.core.dsl_stubs import create_dsl_stubs, lua_table_to_dict from tactus.core.template_resolver import TemplateResolver +from tactus.dspy.config import validate_gpt5_controls from tactus.dspy.model_params import default_temperature_for_model from tactus.core.message_history_manager import MessageHistoryManager from tactus.core.lua_sandbox import LuaSandbox, LuaSandboxError, validate_python_module_name @@ -86,6 +87,8 @@ def __init__( external_config: Optional[Dict[str, Any]] = None, run_id: Optional[str] = None, source_file_path: Optional[str] = None, + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, ): """ Initialize the Tactus runtime. @@ -104,7 +107,11 @@ def __init__( external_config: Optional external config (from .tac.yml) to merge with DSL config run_id: Optional run identifier for tagging checkpoints source_file_path: Optional path to the .tac file being executed (for accurate source locations) + reasoning_effort: Optional GPT-5-family reasoning effort control + verbosity: Optional GPT-5-family response verbosity control """ + validate_gpt5_controls(reasoning_effort=reasoning_effort, verbosity=verbosity) + self.procedure_id = procedure_id self.storage_backend = storage_backend @@ -150,6 +157,8 @@ def __init__( self.dependency_prompt_handler = None self.run_id = run_id self.source_file_path = source_file_path + self.reasoning_effort = reasoning_effort + self.verbosity = verbosity self.python_modules: Dict[str, Any] = {} # Will be initialized during setup @@ -2235,6 +2244,19 @@ async def _setup_agents(self, context: dict[str, Any]): else: resolved_temperature = default_temperature_for_model(model_name) + resolved_reasoning_effort = self.reasoning_effort + resolved_verbosity = self.verbosity + if model_settings is not None: + if ( + "reasoning_effort" in model_settings + or "openai_reasoning_effort" in model_settings + ): + resolved_reasoning_effort = model_settings.get( + "reasoning_effort", model_settings.get("openai_reasoning_effort") + ) + if "verbosity" in model_settings: + resolved_verbosity = model_settings["verbosity"] + dspy_config = { "system_prompt": system_prompt_template, "model": model_name, @@ -2262,6 +2284,10 @@ async def _setup_agents(self, context: dict[str, Any]): "message_history_filter": message_history_filter, "response": agent_config.get("response"), } + if resolved_reasoning_effort is not None: + dspy_config["reasoning_effort"] = resolved_reasoning_effort + if resolved_verbosity is not None: + dspy_config["verbosity"] = resolved_verbosity logger.info( f"Agent '{agent_name}' dspy_config has tool_choice={dspy_config.get('tool_choice')}" ) @@ -2314,6 +2340,8 @@ async def _setup_models(self): config=model_config, context=self.execution_context, mock_manager=self.mock_manager, + reasoning_effort=self.reasoning_effort, + verbosity=self.verbosity, ) self.models[model_name] = model_primitive @@ -3588,6 +3616,8 @@ def _parse_declarations( "execution_context": self.execution_context, "log_handler": self.log_handler, "sandbox": sandbox, + "reasoning_effort": self.reasoning_effort, + "verbosity": self.verbosity, "_created_agents": {}, # Will be populated during parsing "is_parsing": True, # Stubs can use this to defer runtime-only behavior } diff --git a/tactus/core/yaml_parser.py b/tactus/core/yaml_parser.py index ddc4c7d1..f64ea25b 100644 --- a/tactus/core/yaml_parser.py +++ b/tactus/core/yaml_parser.py @@ -204,6 +204,8 @@ def _validate_agents(agents: dict[str, Any], parsed_configuration: dict[str, Any "timeout", # OpenAI reasoning models (o1, GPT-5) "openai_reasoning_effort", + "reasoning_effort", + "verbosity", # Extra fields "extra_headers", "extra_body", @@ -246,14 +248,25 @@ def _validate_agents(agents: dict[str, Any], parsed_configuration: dict[str, Any f"Agent '{agent_name}' max_tokens must be a positive integer" ) - if "openai_reasoning_effort" in model_value: - reasoning_effort = model_value["openai_reasoning_effort"] - valid_efforts = ["low", "medium", "high"] + for reasoning_key in ("openai_reasoning_effort", "reasoning_effort"): + if reasoning_key not in model_value: + continue + reasoning_effort = model_value[reasoning_key] + valid_efforts = ["none", "minimal", "low", "medium", "high", "xhigh"] if reasoning_effort not in valid_efforts: raise ProcedureConfigError( - f"Agent '{agent_name}' openai_reasoning_effort must be one of: {', '.join(valid_efforts)}. " + f"Agent '{agent_name}' {reasoning_key} must be one of: {', '.join(valid_efforts)}. " f"Got: {reasoning_effort}" ) + + if "verbosity" in model_value: + verbosity = model_value["verbosity"] + valid_verbosity = ["low", "medium", "high"] + if verbosity not in valid_verbosity: + raise ProcedureConfigError( + f"Agent '{agent_name}' verbosity must be one of: {', '.join(valid_verbosity)}. " + f"Got: {verbosity}" + ) else: raise ProcedureConfigError( f"Agent '{agent_name}' model must be a string or dict with 'name' key" diff --git a/tactus/dspy/agent.py b/tactus/dspy/agent.py index 06bd5d3a..b3a784f7 100644 --- a/tactus/dspy/agent.py +++ b/tactus/dspy/agent.py @@ -181,6 +181,8 @@ def __init__( temperature: Optional[float] = None, max_tokens: Optional[int] = None, model_type: Optional[str] = None, + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, module: str = "Raw", initial_message: Optional[str] = None, registry: Any = None, @@ -207,6 +209,8 @@ def __init__( (0.0 for most models; GPT-5 family omits the parameter). max_tokens: Maximum tokens for response model_type: Model type for DSPy (e.g., "chat", "responses" for reasoning models) + reasoning_effort: Optional GPT-5-family reasoning effort control + verbosity: Optional GPT-5-family response verbosity control module: DSPy module type to use (default: "Raw", case-insensitive). Options: - "Raw": Minimal formatting, direct LM calls (lowest token overhead) - "Predict": Simple pass-through prediction (no reasoning traces) @@ -236,6 +240,8 @@ def __init__( self.temperature = temperature self.max_tokens = max_tokens self.model_type = model_type + self.reasoning_effort = reasoning_effort + self.verbosity = verbosity self.module = module self.initial_message = initial_message self.registry = registry @@ -1545,6 +1551,10 @@ def _execute_turn(self, opts: Dict[str, Any]) -> Any: config_kwargs["max_tokens"] = self.max_tokens if self.model_type is not None: config_kwargs["model_type"] = self.model_type + if self.reasoning_effort is not None: + config_kwargs["reasoning_effort"] = self.reasoning_effort + if self.verbosity is not None: + config_kwargs["verbosity"] = self.verbosity if self.tool_choice is not None and (self.tools or self.toolsets): config_kwargs["tool_choice"] = self.tool_choice logger.debug(f"Configuring LM with tool_choice={self.tool_choice}") @@ -2044,6 +2054,8 @@ def create_dspy_agent( temperature=config.get("temperature"), max_tokens=config.get("max_tokens"), model_type=config.get("model_type"), + reasoning_effort=config.get("reasoning_effort"), + verbosity=config.get("verbosity"), module=config.get("module", "Raw"), initial_message=config.get("initial_message"), registry=registry, @@ -2067,6 +2079,8 @@ def create_dspy_agent( "temperature", "max_tokens", "model_type", + "reasoning_effort", + "verbosity", "module", "initial_message", "log_handler", diff --git a/tactus/dspy/config.py b/tactus/dspy/config.py index e9a19e91..92185154 100644 --- a/tactus/dspy/config.py +++ b/tactus/dspy/config.py @@ -14,6 +14,44 @@ # Global reference to the current LM configuration _current_lm: Optional[dspy.BaseLM] = None +REASONING_EFFORT_VALUES = {"none", "minimal", "low", "medium", "high", "xhigh"} +VERBOSITY_VALUES = {"low", "medium", "high"} + + +def validate_gpt5_controls( + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, +) -> None: + """Validate optional GPT-5-family reasoning and verbosity controls.""" + if reasoning_effort is not None and reasoning_effort not in REASONING_EFFORT_VALUES: + allowed = ", ".join(sorted(REASONING_EFFORT_VALUES)) + raise ValueError(f"reasoning_effort must be one of: {allowed}. Got: {reasoning_effort}") + + if verbosity is not None and verbosity not in VERBOSITY_VALUES: + allowed = ", ".join(sorted(VERBOSITY_VALUES)) + raise ValueError(f"verbosity must be one of: {allowed}. Got: {verbosity}") + + +def _apply_gpt5_controls( + lm_kwargs: dict[str, Any], + *, + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, + model_type: Optional[str] = None, +) -> None: + validate_gpt5_controls(reasoning_effort=reasoning_effort, verbosity=verbosity) + + if reasoning_effort is not None: + lm_kwargs["reasoning_effort"] = reasoning_effort + + if verbosity is not None: + if model_type == "responses": + text_config = dict(lm_kwargs.get("text") or {}) + text_config["verbosity"] = verbosity + lm_kwargs["text"] = text_config + else: + lm_kwargs["verbosity"] = verbosity + def configure_lm( model: str, @@ -22,6 +60,8 @@ def configure_lm( temperature: Optional[float] = None, max_tokens: Optional[int] = None, model_type: Optional[str] = None, + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, **kwargs: Any, ) -> dspy.BaseLM: """ @@ -41,6 +81,8 @@ def configure_lm( parameter; other models default to 0.0 (deterministic when supported). max_tokens: Maximum tokens in response (optional) model_type: Model type (e.g., "chat", "responses" for reasoning models) + reasoning_effort: Optional GPT-5-family reasoning effort control + verbosity: Optional GPT-5-family response verbosity control **kwargs: Additional LiteLLM parameters Returns: @@ -99,6 +141,12 @@ def configure_lm( lm_kwargs["max_tokens"] = max_tokens if model_type: lm_kwargs["model_type"] = model_type + _apply_gpt5_controls( + lm_kwargs, + reasoning_effort=reasoning_effort, + verbosity=verbosity, + model_type=model_type, + ) # If running inside the secretless runtime container, use the brokered LM. if os.environ.get("TACTUS_BROKER_SOCKET"): @@ -184,6 +232,8 @@ def create_lm( temperature: Optional[float] = None, max_tokens: Optional[int] = None, model_type: Optional[str] = None, + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, **kwargs: Any, ) -> dspy.LM: """ @@ -204,6 +254,8 @@ def create_lm( temperature: Sampling temperature; if omitted, same defaults as configure_lm. max_tokens: Maximum tokens in response (optional) model_type: Model type (e.g., "chat", "responses" for reasoning models) + reasoning_effort: Optional GPT-5-family reasoning effort control + verbosity: Optional GPT-5-family response verbosity control **kwargs: Additional LiteLLM parameters Returns: @@ -254,6 +306,12 @@ def create_lm( lm_kwargs["max_tokens"] = max_tokens if model_type: lm_kwargs["model_type"] = model_type + _apply_gpt5_controls( + lm_kwargs, + reasoning_effort=reasoning_effort, + verbosity=verbosity, + model_type=model_type, + ) # Create LM without setting as global default return dspy.LM(model, **lm_kwargs) diff --git a/tactus/primitives/model.py b/tactus/primitives/model.py index 9463b452..1939936f 100644 --- a/tactus/primitives/model.py +++ b/tactus/primitives/model.py @@ -34,6 +34,8 @@ def __init__( config: dict, context: Optional[ExecutionContext] = None, mock_manager: Optional[Any] = None, + reasoning_effort: Optional[str] = None, + verbosity: Optional[str] = None, ): """ Initialize model primitive. @@ -46,11 +48,15 @@ def __init__( - output: Optional output schema - Backend-specific config (endpoint, path, etc.) context: Execution context for checkpointing + reasoning_effort: Optional runtime-level GPT-5-family reasoning effort control + verbosity: Optional runtime-level GPT-5-family response verbosity control """ self.model_name = model_name self.config = config self.context = context self.mock_manager = mock_manager + self.reasoning_effort = reasoning_effort + self.verbosity = verbosity # Resolve input/output schemas to Pydantic models self.input_schema_dict = config.get("input", {}) @@ -149,6 +155,8 @@ def _create_backend(self, config: dict): provider=config.get("provider"), temperature=config.get("temperature"), max_tokens=config.get("max_tokens"), + reasoning_effort=self.reasoning_effort, + verbosity=self.verbosity, mock_manager=self.mock_manager, registry=None, # TODO: Pass registry when available execution_context=None, # Don't checkpoint internal agent turns diff --git a/tests/core/test_runtime_helpers.py b/tests/core/test_runtime_helpers.py index 9cb97063..4bb90045 100644 --- a/tests/core/test_runtime_helpers.py +++ b/tests/core/test_runtime_helpers.py @@ -544,9 +544,20 @@ async def test_setup_models_registers_models(monkeypatch): created = [] + runtime.reasoning_effort = "low" + runtime.verbosity = "high" + class DummyModel: - def __init__(self, model_name, config, context, mock_manager): - created.append((model_name, config, context, mock_manager)) + def __init__( + self, + model_name, + config, + context, + mock_manager, + reasoning_effort=None, + verbosity=None, + ): + created.append((model_name, config, context, mock_manager, reasoning_effort, verbosity)) monkeypatch.setattr("tactus.primitives.model.ModelPrimitive", DummyModel) @@ -554,6 +565,7 @@ def __init__(self, model_name, config, context, mock_manager): assert runtime.models["demo"] assert created[0][0] == "demo" + assert created[0][4:] == ("low", "high") @pytest.mark.asyncio diff --git a/tests/core/test_runtime_setup_agents_branches.py b/tests/core/test_runtime_setup_agents_branches.py index c0655bf8..dcf29791 100644 --- a/tests/core/test_runtime_setup_agents_branches.py +++ b/tests/core/test_runtime_setup_agents_branches.py @@ -26,9 +26,26 @@ def dict(self): return dict(self._data) +@pytest.mark.parametrize( + "kwargs,error_match", + [ + ({"reasoning_effort": "invalid"}, "reasoning_effort"), + ({"verbosity": "invalid"}, "verbosity"), + ], +) +def test_runtime_rejects_invalid_gpt5_controls(kwargs, error_match): + with pytest.raises(ValueError, match=error_match): + runtime_module.TactusRuntime(procedure_id="proc", hitl_handler=object(), **kwargs) + + @pytest.mark.asyncio async def test_setup_agents_accepts_v1_agent_config_and_model_settings(monkeypatch): - runtime = runtime_module.TactusRuntime(procedure_id="proc", hitl_handler=object()) + runtime = runtime_module.TactusRuntime( + procedure_id="proc", + hitl_handler=object(), + reasoning_effort="minimal", + verbosity="high", + ) runtime.lua_sandbox = DummyLuaSandbox() runtime.toolset_registry = {} runtime.config = {} @@ -62,6 +79,52 @@ async def _noop_dependencies(): assert captured["name"] == "agent" assert captured["config"]["model"] == "openai/gpt-4o" assert captured["config"]["temperature"] == 0.5 + assert captured["config"]["reasoning_effort"] == "minimal" + assert captured["config"]["verbosity"] == "high" + + +@pytest.mark.asyncio +async def test_setup_agents_model_settings_override_runtime_gpt5_controls(monkeypatch): + runtime = runtime_module.TactusRuntime( + procedure_id="proc", + hitl_handler=object(), + reasoning_effort="low", + verbosity="medium", + ) + runtime.lua_sandbox = DummyLuaSandbox() + runtime.toolset_registry = {} + runtime.config = {} + runtime.registry = SimpleNamespace(agents={}) + runtime.agents = {} + + captured = {} + + def _create_agent(_name, config, **_kwargs): + captured["config"] = config + return SimpleNamespace() + + runtime.registry.agents = { + "agent": { + "system_prompt": "system", + "provider": "openai", + "model": { + "name": "gpt-5-mini", + "reasoning_effort": "xhigh", + "verbosity": "low", + }, + } + } + + async def _noop_dependencies(): + return None + + monkeypatch.setattr(runtime, "_initialize_dependencies", _noop_dependencies) + monkeypatch.setattr("tactus.dspy.agent.create_dspy_agent", _create_agent) + + await runtime._setup_agents(context={}) + + assert captured["config"]["reasoning_effort"] == "xhigh" + assert captured["config"]["verbosity"] == "low" @pytest.mark.asyncio diff --git a/tests/core/test_runtime_tasks.py b/tests/core/test_runtime_tasks.py index ab07344d..afc9cab3 100644 --- a/tests/core/test_runtime_tasks.py +++ b/tests/core/test_runtime_tasks.py @@ -16,6 +16,19 @@ def _runtime_with_registry(registry): return runtime +def _runtime_for_parse(tmp_path, sandbox): + runtime = TactusRuntime.__new__(TactusRuntime) + runtime.lua_sandbox = sandbox + runtime.mock_manager = None + runtime.execution_context = None + runtime.log_handler = None + runtime.agents = {} + runtime.source_file_path = str(tmp_path / "main.tac") + runtime.reasoning_effort = None + runtime.verbosity = None + return runtime + + def test_execute_workflow_selects_single_task(): registry = SimpleNamespace( tasks={"fetch": TaskDeclaration(name="fetch")}, retrievers={}, named_procedures={} @@ -564,13 +577,7 @@ def execute(self, source): include_file = tmp_path / "tasks.tac" include_file.write_text(include_source) - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox(include_source) - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox(include_source)) registry = runtime._parse_declarations('IncludeTasks("tasks.tac")') @@ -607,13 +614,7 @@ def execute(self, source): (tmp_path / "tasks.tac").write_text(include_source) (tmp_path / "nested.tac").write_text(nested_source) - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox(include_source, nested_source) - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox(include_source, nested_source)) registry = runtime._parse_declarations('IncludeTasks("tasks.tac")') @@ -648,13 +649,7 @@ def include_tasks(_path=None, _namespace=None): monkeypatch.setattr("tactus.core.runtime.create_dsl_stubs", fake_create_dsl_stubs) - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox() - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox()) registry = runtime._parse_declarations("IncludeTasks(nil)") @@ -687,13 +682,7 @@ def execute(self, source): include_file = tmp_path / "tasks.tac" include_file.write_text(include_source) - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox(include_source) - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox(include_source)) with pytest.raises( TactusRuntimeError, match="IncludeTasks files must only contain Task declarations" @@ -725,13 +714,7 @@ def execute(self, source): include_file = tmp_path / "tasks.tac" include_file.write_text(include_source) - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox(include_source) - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox(include_source)) with pytest.raises(TactusRuntimeError, match="IncludeTasks cycle detected"): runtime._parse_declarations('IncludeTasks("tasks.tac")') @@ -753,13 +736,7 @@ def execute(self, _source): self._globals["IncludeTasks"]("missing.tac") return None - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox() - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox()) with pytest.raises(TactusRuntimeError, match="Included tasks file not found"): runtime._parse_declarations('IncludeTasks("missing.tac")') @@ -790,13 +767,7 @@ def execute(self, source): include_file = tmp_path / "tasks.tac" include_file.write_text(include_source) - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox(include_source) - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox(include_source)) with pytest.raises(TactusRuntimeError, match="Failed to execute IncludeTasks file"): runtime._parse_declarations('IncludeTasks("tasks.tac")') @@ -827,13 +798,7 @@ def execute(self, source): include_file = tmp_path / "tasks.tac" include_file.write_text(include_source) - runtime = TactusRuntime.__new__(TactusRuntime) - runtime.lua_sandbox = FakeSandbox(include_source) - runtime.mock_manager = None - runtime.execution_context = None - runtime.log_handler = None - runtime.agents = {} - runtime.source_file_path = str(tmp_path / "main.tac") + runtime = _runtime_for_parse(tmp_path, FakeSandbox(include_source)) with pytest.raises(TactusRuntimeError, match="Duplicate task namespace"): runtime._parse_declarations('IncludeTasks("tasks.tac", "extras")') diff --git a/tests/core/test_yaml_parser.py b/tests/core/test_yaml_parser.py index 5b366c8e..fd34a0ee 100644 --- a/tests/core/test_yaml_parser.py +++ b/tests/core/test_yaml_parser.py @@ -38,7 +38,9 @@ def test_parse_valid_config_with_params_outputs_and_model_settings(): "temperature": 0.7, "top_p": 0.5, "max_tokens": 10, - "openai_reasoning_effort": "low", + "openai_reasoning_effort": "xhigh", + "reasoning_effort": "minimal", + "verbosity": "low", } yaml_content = yaml.safe_dump(config) @@ -258,6 +260,8 @@ def test_agent_model_name_type_and_empty(): {"name": "gpt-4o", "max_tokens": 0}, {"name": "gpt-4o", "max_tokens": "bad"}, {"name": "gpt-4o", "openai_reasoning_effort": "invalid"}, + {"name": "gpt-4o", "reasoning_effort": "invalid"}, + {"name": "gpt-4o", "verbosity": "invalid"}, ], ) def test_agent_model_invalid_settings(model_value): diff --git a/tests/dspy/test_agent_model_normalization.py b/tests/dspy/test_agent_model_normalization.py index 85418d94..c90eb1d3 100644 --- a/tests/dspy/test_agent_model_normalization.py +++ b/tests/dspy/test_agent_model_normalization.py @@ -35,13 +35,22 @@ def test_agent_auto_config_uses_normalized_model(monkeypatch): called = {} - def fake_configure_lm(model: str, **_kwargs): + def fake_configure_lm(model: str, **kwargs): called["model"] = model + called["kwargs"] = kwargs return None monkeypatch.setattr("tactus.dspy.config.configure_lm", fake_configure_lm) - agent = DSPyAgentHandle(name="agent", provider="openai", model="gpt-4o-mini") + agent = DSPyAgentHandle( + name="agent", + provider="openai", + model="gpt-4o-mini", + reasoning_effort="xhigh", + verbosity="low", + ) agent({"message": "hello"}) assert called["model"] == "openai/gpt-4o-mini" + assert called["kwargs"]["reasoning_effort"] == "xhigh" + assert called["kwargs"]["verbosity"] == "low" diff --git a/tests/dspy/test_config.py b/tests/dspy/test_config.py index 07326093..74c7329a 100644 --- a/tests/dspy/test_config.py +++ b/tests/dspy/test_config.py @@ -80,6 +80,8 @@ def __init__(self, model, **kwargs): temperature=0.2, max_tokens=123, model_type="responses", + reasoning_effort="high", + verbosity="low", ) assert captured["model"] == "openai/gpt-4o" @@ -88,6 +90,33 @@ def __init__(self, model, **kwargs): assert captured["kwargs"]["model_type"] == "responses" assert captured["kwargs"]["api_key"] == "key" assert captured["kwargs"]["api_base"] == "http://base" + assert captured["kwargs"]["reasoning_effort"] == "high" + assert captured["kwargs"]["text"]["verbosity"] == "low" + + +def test_configure_lm_passes_chat_mode_gpt5_controls(monkeypatch): + dspy_config.reset_lm_configuration() + + captured = {} + + class FakeLM: + def __init__(self, model, **kwargs): + captured["model"] = model + captured["kwargs"] = kwargs + + monkeypatch.delenv("TACTUS_BROKER_SOCKET", raising=False) + monkeypatch.setattr(dspy_config.dspy, "LM", FakeLM) + monkeypatch.setattr(dspy_config.dspy, "configure", lambda **_kwargs: None) + + dspy_config.configure_lm( + "openai/gpt-5-mini", + model_type="chat", + reasoning_effort="xhigh", + verbosity="medium", + ) + + assert captured["kwargs"]["reasoning_effort"] == "xhigh" + assert captured["kwargs"]["verbosity"] == "medium" def test_reset_lm_configuration_clears_state(monkeypatch): @@ -129,6 +158,9 @@ def __init__(self, model, **kwargs): temperature=0.1, max_tokens=55, model_type="responses", + reasoning_effort="minimal", + verbosity="high", + text={"format": {"type": "text"}}, extra="value", ) @@ -138,6 +170,11 @@ def __init__(self, model, **kwargs): assert captured["kwargs"]["temperature"] == 0.1 assert captured["kwargs"]["max_tokens"] == 55 assert captured["kwargs"]["model_type"] == "responses" + assert captured["kwargs"]["reasoning_effort"] == "minimal" + assert captured["kwargs"]["text"] == { + "format": {"type": "text"}, + "verbosity": "high", + } assert captured["kwargs"]["extra"] == "value" @@ -159,3 +196,18 @@ def __init__(self, model, **kwargs): assert "api_base" not in captured["kwargs"] assert "max_tokens" not in captured["kwargs"] assert "model_type" not in captured["kwargs"] + assert "reasoning_effort" not in captured["kwargs"] + assert "verbosity" not in captured["kwargs"] + assert "text" not in captured["kwargs"] + + +@pytest.mark.parametrize("reasoning_effort", ["", "max", "extreme"]) +def test_configure_lm_rejects_invalid_reasoning_effort(reasoning_effort): + with pytest.raises(ValueError, match="reasoning_effort"): + dspy_config.configure_lm("openai/gpt-5-mini", reasoning_effort=reasoning_effort) + + +@pytest.mark.parametrize("verbosity", ["", "minimal", "verbose"]) +def test_create_lm_rejects_invalid_verbosity(verbosity): + with pytest.raises(ValueError, match="verbosity"): + dspy_config.create_lm("openai/gpt-5-mini", verbosity=verbosity) diff --git a/tests/models/test_llm_backend.py b/tests/models/test_llm_backend.py index 45852519..5a35f7a0 100644 --- a/tests/models/test_llm_backend.py +++ b/tests/models/test_llm_backend.py @@ -26,6 +26,28 @@ def test_llm_backend_initialization(self): assert backend.system_prompt == "Classify sentiment as positive or negative" assert backend.temperature == 0.0 + def test_llm_backend_passes_gpt5_controls_to_agent(self, monkeypatch): + """Test LLM backend forwards runtime GPT-5 controls to its internal agent.""" + captured = {} + + class FakeAgent: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr("tactus.backends.llm_backend.DSPyAgentHandle", FakeAgent) + + backend = LLMModelBackend( + model="openai/gpt-5-mini", + system_prompt="Classify sentiment", + reasoning_effort="minimal", + verbosity="high", + ) + + assert backend.reasoning_effort == "minimal" + assert backend.verbosity == "high" + assert captured["reasoning_effort"] == "minimal" + assert captured["verbosity"] == "high" + def test_llm_backend_predict_success(self): """Test LLM backend successfully predicts with valid response.""" backend = LLMModelBackend( @@ -192,6 +214,26 @@ def test_model_primitive_creates_llm_backend(self): assert isinstance(model.backend, LLMModelBackend) assert model.backend.model == "openai/gpt-4o-mini" + def test_model_primitive_passes_runtime_gpt5_controls_to_llm_backend(self): + """Test Model primitive forwards runtime GPT-5 controls to type='llm' backend.""" + config = { + "type": "llm", + "model": "openai/gpt-5-mini", + "system_prompt": "Classify sentiment", + "input": {"text": "string"}, + "output": {"label": "string", "confidence": "float"}, + } + + model = ModelPrimitive( + "sentiment_classifier", + config, + reasoning_effort="xhigh", + verbosity="low", + ) + + assert model.backend.reasoning_effort == "xhigh" + assert model.backend.verbosity == "low" + def test_model_primitive_llm_predict(self): """Test Model primitive predict with LLM backend.""" config = {