From 81ed7123c597a74283a8e5b01deb76868fbf7de3 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Wed, 14 May 2025 22:40:58 +1000 Subject: [PATCH 01/16] feat: wip - add thinking content to messages --- lib/ruby_llm/message.rb | 3 ++- lib/ruby_llm/model_info.rb | 7 ++++++- lib/ruby_llm/models.json | 1 + lib/ruby_llm/providers/anthropic/chat.rb | 21 +++++++++++++++++---- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb index 6e753b9b1..d0e2c4508 100644 --- a/lib/ruby_llm/message.rb +++ b/lib/ruby_llm/message.rb @@ -7,11 +7,12 @@ module RubyLLM class Message ROLES = %i[system user assistant tool].freeze - attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id + attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking_content def initialize(options = {}) @role = options[:role].to_sym @content = normalize_content(options[:content]) + @thinking_content = options[:thinking_content] @tool_calls = options[:tool_calls] @input_tokens = options[:input_tokens] @output_tokens = options[:output_tokens] diff --git a/lib/ruby_llm/model_info.rb b/lib/ruby_llm/model_info.rb index 5cd95801d..d5bc19127 100644 --- a/lib/ruby_llm/model_info.rb +++ b/lib/ruby_llm/model_info.rb @@ -12,7 +12,7 @@ module RubyLLM # model.input_price_per_million # => 30.0 class ModelInfo attr_reader :id, :name, :provider, :family, :created_at, :context_window, :max_output_tokens, :knowledge_cutoff, - :modalities, :capabilities, :pricing, :metadata + :modalities, :capabilities, :pricing, :metadata, :thinking def initialize(data) @id = data[:id] @@ -22,6 +22,7 @@ def initialize(data) @created_at = data[:created_at] @context_window = data[:context_window] @max_output_tokens = data[:max_output_tokens] + @thinking = data[:thinking] @knowledge_cutoff = data[:knowledge_cutoff] @modalities = Modalities.new(data[:modalities] || {}) @capabilities = data[:capabilities] || [] @@ -57,6 +58,10 @@ def supports_functions? function_calling? end + def supports_thinking? + thinking + end + def input_price_per_million pricing.text_tokens.input end diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json index b8046969c..15ae6d1da 100644 --- a/lib/ruby_llm/models.json +++ b/lib/ruby_llm/models.json @@ -297,6 +297,7 @@ "created_at": null, "context_window": 200000, "max_output_tokens": 64000, + "thinking": true, "knowledge_cutoff": null, "modalities": { "input": [ diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index 2ba96009d..7320d8b76 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -39,9 +39,13 @@ def build_base_payload(chat_messages, temperature, model, stream) { model: model, messages: chat_messages.map { |msg| format_message(msg) }, - temperature: temperature, + temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking stream: stream, - max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096 + max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096, + thinking: { + type: RubyLLM.models.find(model)&.supports_thinking? ? 'enabled' : 'disabled', # TODO: Make this configurable + budget_tokens: 1024 # TODO: Make this configurable + } } end @@ -52,12 +56,20 @@ def add_optional_fields(payload, system_content:, tools:) def parse_completion_response(response) data = response.body + RubyLLM.logger.debug("Anthropic response: #{data}") + content_blocks = data['content'] || [] + thinking_content = extract_thinking_content(content_blocks) text_content = extract_text_content(content_blocks) tool_use = Tools.find_tool_use(content_blocks) - build_message(data, text_content, tool_use) + build_message(data, text_content, tool_use, thinking_content) + end + + def extract_thinking_content(blocks) + thinking_blocks = blocks.select { |c| c['type'] == 'thinking' } + thinking_blocks.map { |c| c['thinking'] }.join end def extract_text_content(blocks) @@ -65,10 +77,11 @@ def extract_text_content(blocks) text_blocks.map { |c| c['text'] }.join end - def build_message(data, content, tool_use) + def build_message(data, content, tool_use, thinking_content) Message.new( role: :assistant, content: content, + thinking_content: thinking_content, tool_calls: Tools.parse_tool_calls(tool_use), input_tokens: data.dig('usage', 'input_tokens'), output_tokens: data.dig('usage', 'output_tokens'), From b6e1bb039ba2fe44aff569b6e3c2672000001ed5 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 6 Jun 2025 19:06:04 +1000 Subject: [PATCH 02/16] chore: add thinking to capabilities --- lib/ruby_llm/models.json | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json index 6ef13ef1e..002cc7381 100644 --- a/lib/ruby_llm/models.json +++ b/lib/ruby_llm/models.json @@ -162,7 +162,8 @@ "output": [] }, "capabilities": [ - "function_calling" + "function_calling", + "thinking" ], "pricing": { "text_tokens": { @@ -287,7 +288,8 @@ ] }, "capabilities": [ - "function_calling" + "function_calling", + "thinking" ], "pricing": { "text_tokens": { @@ -319,7 +321,8 @@ ] }, "capabilities": [ - "function_calling" + "function_calling", + "thinking" ], "pricing": { "text_tokens": { @@ -9512,7 +9515,8 @@ }, "capabilities": [ "streaming", - "function_calling" + "function_calling", + "thinking" ], "pricing": { "text_tokens": { From ecb69c9c655d70e268ab5dcc7c3ae179f61484f9 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 6 Jun 2025 19:48:35 +1000 Subject: [PATCH 03/16] chore: pass thinking through from chat initialisation --- lib/ruby_llm/chat.rb | 12 ++++++++++-- lib/ruby_llm/model/info.rb | 2 +- lib/ruby_llm/provider.rb | 3 ++- lib/ruby_llm/providers/anthropic/chat.rb | 18 ++++++++++-------- lib/ruby_llm/providers/bedrock/chat.rb | 2 +- lib/ruby_llm/providers/gemini/chat.rb | 2 +- lib/ruby_llm/providers/openai/chat.rb | 2 +- 7 files changed, 26 insertions(+), 15 deletions(-) diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index 3b5bfa83a..b56c27c7f 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -13,7 +13,7 @@ class Chat attr_reader :model, :messages, :tools - def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil) + def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil, thinking: false) if assume_model_exists && !provider raise ArgumentError, 'Provider must be specified if assume_model_exists is true' end @@ -22,6 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n @config = context&.config || RubyLLM.config model_id = model || @config.default_model with_model(model_id, provider: provider, assume_exists: assume_model_exists) + @thinking = thinking @temperature = 0.7 @messages = [] @tools = {} @@ -60,9 +61,15 @@ def with_tools(*tools) self end - def with_model(model_id, provider: nil, assume_exists: false) + def with_model(model_id, provider: nil, thinking: nil, assume_exists: false) @model, @provider = Models.resolve(model_id, provider:, assume_exists:) @connection = @context ? @context.connection_for(@provider) : @provider.connection(@config) + + # Preserve thinking state from initialization + unless thinking.nil? + @thinking = thinking + end + self end @@ -99,6 +106,7 @@ def complete(&) tools: @tools, temperature: @temperature, model: @model.id, + thinking: @thinking, connection: @connection, & ) diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb index 9c72bcdf3..cdee5a2aa 100644 --- a/lib/ruby_llm/model/info.rb +++ b/lib/ruby_llm/model/info.rb @@ -35,7 +35,7 @@ def supports?(capability) capabilities.include?(capability.to_s) end - %w[function_calling structured_output batch reasoning citations streaming].each do |cap| + %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap| define_method "#{cap}?" do supports?(cap) end diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index 2b09cdee1..fe6854f6b 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -10,13 +10,14 @@ module Provider module Methods extend Streaming - def complete(messages, tools:, temperature:, model:, connection:, &) + def complete(messages, tools:, temperature:, model:, thinking:, connection:, &) normalized_temperature = maybe_normalize_temperature(temperature, model) payload = render_payload(messages, tools: tools, temperature: normalized_temperature, model: model, + thinking: thinking, stream: block_given?) if block_given? diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index 7320d8b76..0efe4b2d2 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -11,12 +11,12 @@ def completion_url '/v1/messages' end - def render_payload(messages, tools:, temperature:, model:, stream: false) + def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) system_messages, chat_messages = separate_messages(messages) system_content = build_system_content(system_messages) build_base_payload(chat_messages, temperature, model, stream).tap do |payload| - add_optional_fields(payload, system_content:, tools:) + add_optional_fields(payload, system_content:, tools:, thinking:) end end @@ -41,17 +41,19 @@ def build_base_payload(chat_messages, temperature, model, stream) messages: chat_messages.map { |msg| format_message(msg) }, temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking stream: stream, - max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096, - thinking: { - type: RubyLLM.models.find(model)&.supports_thinking? ? 'enabled' : 'disabled', # TODO: Make this configurable - budget_tokens: 1024 # TODO: Make this configurable - } + max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096 } end - def add_optional_fields(payload, system_content:, tools:) + def add_optional_fields(payload, system_content:, tools:, thinking:) payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any? payload[:system] = system_content unless system_content.empty? + if thinking + payload[:thinking] = { + type: 'enabled', + budget_tokens: 1024, # TODO: default + } + end end def parse_completion_response(response) diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index 742579558..bb942338b 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -39,7 +39,7 @@ def completion_url "model/#{@model_id}/invoke" end - def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument # Hold model_id in instance variable for use in completion_url and stream_url @model_id = model diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index d6ba1696f..7bb794911 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -11,7 +11,7 @@ def completion_url "models/#{@model}:generateContent" end - def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument @model = model # Store model for completion_url/stream_url payload = { contents: format_messages(messages), diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index 545cc9bee..ffcd8dcf6 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -11,7 +11,7 @@ def completion_url module_function - def render_payload(messages, tools:, temperature:, model:, stream: false) + def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) { model: model, messages: format_messages(messages), From a014b7746ecfdabe9d1424771e80d6c2681bd046 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 6 Jun 2025 19:57:00 +1000 Subject: [PATCH 04/16] chore: add very basic config for thinking budget through global configuration --- lib/ruby_llm/configuration.rb | 2 ++ lib/ruby_llm/providers/anthropic/chat.rb | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index 0fd3bf23c..6bdf465ed 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -28,6 +28,7 @@ class Configuration :default_model, :default_embedding_model, :default_image_model, + :default_thinking_budget # Connection configuration :request_timeout, :max_retries, @@ -53,6 +54,7 @@ def initialize @default_model = 'gpt-4.1-nano' @default_embedding_model = 'text-embedding-3-small' @default_image_model = 'dall-e-3' + @default_thinking_budget = 1024 # Logging configuration @log_file = $stdout diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index 0efe4b2d2..cbf3b75d0 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -51,7 +51,7 @@ def add_optional_fields(payload, system_content:, tools:, thinking:) if thinking payload[:thinking] = { type: 'enabled', - budget_tokens: 1024, # TODO: default + budget_tokens: RubyLLM.configuration.default_thinking_budget || 1024, } end end From ddb0ae1c697b20191790d601d4a19ca07fe5423c Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 6 Jun 2025 20:35:38 +1000 Subject: [PATCH 05/16] bug: fix config missing comma --- lib/ruby_llm/configuration.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index 6bdf465ed..4401548a8 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -28,7 +28,7 @@ class Configuration :default_model, :default_embedding_model, :default_image_model, - :default_thinking_budget + :default_thinking_budget, # Connection configuration :request_timeout, :max_retries, From 6d66491acc1c66128a9b0d5ef3b7797f099d420e Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 6 Jun 2025 20:35:56 +1000 Subject: [PATCH 06/16] chore: add streaming content --- lib/ruby_llm/providers/anthropic/chat.rb | 2 +- lib/ruby_llm/providers/anthropic/streaming.rb | 1 + lib/ruby_llm/stream_accumulator.rb | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index cbf3b75d0..459a825a9 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -51,7 +51,7 @@ def add_optional_fields(payload, system_content:, tools:, thinking:) if thinking payload[:thinking] = { type: 'enabled', - budget_tokens: RubyLLM.configuration.default_thinking_budget || 1024, + budget_tokens: RubyLLM.config.default_thinking_budget || 1024, } end end diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb index 3bf842150..a1d5c44f1 100644 --- a/lib/ruby_llm/providers/anthropic/streaming.rb +++ b/lib/ruby_llm/providers/anthropic/streaming.rb @@ -16,6 +16,7 @@ def build_chunk(data) role: :assistant, model_id: extract_model_id(data), content: data.dig('delta', 'text'), + thinking_content: data.dig('delta', 'thinking'), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), tool_calls: extract_tool_calls(data) diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb index 7fca306a2..233ce9cae 100644 --- a/lib/ruby_llm/stream_accumulator.rb +++ b/lib/ruby_llm/stream_accumulator.rb @@ -9,6 +9,7 @@ class StreamAccumulator def initialize @content = String.new + @thinking_content = String.new @tool_calls = {} @input_tokens = 0 @output_tokens = 0 @@ -23,6 +24,7 @@ def add(chunk) accumulate_tool_calls chunk.tool_calls else @content << (chunk.content || '') + @thinking_content << (chunk.thinking_content || '') end count_tokens chunk @@ -33,6 +35,7 @@ def to_message Message.new( role: :assistant, content: content.empty? ? nil : content, + thinking_content: @thinking_content.empty? ? nil : @thinking_content, model_id: model_id, tool_calls: tool_calls_from_stream, input_tokens: @input_tokens.positive? ? @input_tokens : nil, From 7da672e7dd8058ef7c9e6687d6bb4a8a6a0511a6 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 6 Jun 2025 22:57:34 +1000 Subject: [PATCH 07/16] chore: rename to use existing reasoning capability --- lib/ruby_llm/chat.rb | 29 +++++++++++------- lib/ruby_llm/configuration.rb | 4 +-- lib/ruby_llm/error.rb | 1 + lib/ruby_llm/message.rb | 4 +-- lib/ruby_llm/model/info.rb | 2 +- lib/ruby_llm/models.json | 6 ++-- lib/ruby_llm/provider.rb | 4 +-- lib/ruby_llm/providers/anthropic/chat.rb | 30 +++++++++---------- lib/ruby_llm/providers/anthropic/streaming.rb | 2 +- lib/ruby_llm/providers/bedrock/chat.rb | 2 +- lib/ruby_llm/providers/gemini/chat.rb | 2 +- lib/ruby_llm/providers/openai/chat.rb | 2 +- lib/ruby_llm/stream_accumulator.rb | 6 ++-- 13 files changed, 52 insertions(+), 42 deletions(-) diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index b56c27c7f..ad77d01cf 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -13,7 +13,7 @@ class Chat attr_reader :model, :messages, :tools - def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil, thinking: false) + def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil) if assume_model_exists && !provider raise ArgumentError, 'Provider must be specified if assume_model_exists is true' end @@ -22,7 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n @config = context&.config || RubyLLM.config model_id = model || @config.default_model with_model(model_id, provider: provider, assume_exists: assume_model_exists) - @thinking = thinking + @reasoning = false @temperature = 0.7 @messages = [] @tools = {} @@ -61,15 +61,11 @@ def with_tools(*tools) self end - def with_model(model_id, provider: nil, thinking: nil, assume_exists: false) + def with_model(model_id, provider: nil, assume_exists: false) @model, @provider = Models.resolve(model_id, provider:, assume_exists:) @connection = @context ? @context.connection_for(@provider) : @provider.connection(@config) - - # Preserve thinking state from initialization - unless thinking.nil? - @thinking = thinking - end - + # TODO: Currently the unsupported errors will not retrigger after model reassignment. + self end @@ -78,6 +74,15 @@ def with_temperature(temperature) self end + def with_reasoning(reasoning = true) + if reasoning && !@model.reasoning? + raise UnsupportedReasoningError, "Model #{@model.id} doesn't support reasoning" + end + + @reasoning = reasoning + self + end + def with_context(context) @context = context @config = context.config @@ -106,7 +111,7 @@ def complete(&) tools: @tools, temperature: @temperature, model: @model.id, - thinking: @thinking, + reasoning: @reasoning, connection: @connection, & ) @@ -130,6 +135,10 @@ def reset_messages! @messages.clear end + def thinking? + @thinking + end + private def handle_tool_calls(response, &) diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index 4401548a8..06de5bb97 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -28,7 +28,7 @@ class Configuration :default_model, :default_embedding_model, :default_image_model, - :default_thinking_budget, + :default_reasoning_budget, # Connection configuration :request_timeout, :max_retries, @@ -54,7 +54,7 @@ def initialize @default_model = 'gpt-4.1-nano' @default_embedding_model = 'text-embedding-3-small' @default_image_model = 'dall-e-3' - @default_thinking_budget = 1024 + @default_reasoning_budget = 1024 # Logging configuration @log_file = $stdout diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb index 228053a5e..102ec3b79 100644 --- a/lib/ruby_llm/error.rb +++ b/lib/ruby_llm/error.rb @@ -25,6 +25,7 @@ class InvalidRoleError < StandardError; end class ModelNotFoundError < StandardError; end class UnsupportedFunctionsError < StandardError; end class UnsupportedAttachmentError < StandardError; end + class UnsupportedReasoningError < StandardError; end # Error classes for different HTTP status codes class BadRequestError < Error; end diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb index f7b07bce7..5624920e2 100644 --- a/lib/ruby_llm/message.rb +++ b/lib/ruby_llm/message.rb @@ -7,12 +7,12 @@ module RubyLLM class Message ROLES = %i[system user assistant tool].freeze - attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking_content + attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :reasoning_content def initialize(options = {}) @role = options.fetch(:role).to_sym @content = normalize_content(options.fetch(:content)) - @thinking_content = options[:thinking_content] + @reasoning_content = options[:reasoning_content] @tool_calls = options[:tool_calls] @input_tokens = options[:input_tokens] @output_tokens = options[:output_tokens] diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb index cdee5a2aa..9c72bcdf3 100644 --- a/lib/ruby_llm/model/info.rb +++ b/lib/ruby_llm/model/info.rb @@ -35,7 +35,7 @@ def supports?(capability) capabilities.include?(capability.to_s) end - %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap| + %w[function_calling structured_output batch reasoning citations streaming].each do |cap| define_method "#{cap}?" do supports?(cap) end diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json index 002cc7381..4a0cf19e3 100644 --- a/lib/ruby_llm/models.json +++ b/lib/ruby_llm/models.json @@ -163,7 +163,7 @@ }, "capabilities": [ "function_calling", - "thinking" + "reasoning" ], "pricing": { "text_tokens": { @@ -289,7 +289,7 @@ }, "capabilities": [ "function_calling", - "thinking" + "reasoning" ], "pricing": { "text_tokens": { @@ -322,7 +322,7 @@ }, "capabilities": [ "function_calling", - "thinking" + "reasoning" ], "pricing": { "text_tokens": { diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index fe6854f6b..64a9fd1f9 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -10,14 +10,14 @@ module Provider module Methods extend Streaming - def complete(messages, tools:, temperature:, model:, thinking:, connection:, &) + def complete(messages, tools:, temperature:, model:, reasoning:, connection:, &) # rubocop:disable Metrics/ParameterLists normalized_temperature = maybe_normalize_temperature(temperature, model) payload = render_payload(messages, tools: tools, temperature: normalized_temperature, model: model, - thinking: thinking, + reasoning: reasoning, stream: block_given?) if block_given? diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index 459a825a9..d24e799aa 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -11,12 +11,12 @@ def completion_url '/v1/messages' end - def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) + def render_payload(messages, tools:, temperature:, model:, reasoning:, stream: false) # rubocop:disable Metrics/ParameterLists system_messages, chat_messages = separate_messages(messages) system_content = build_system_content(system_messages) build_base_payload(chat_messages, temperature, model, stream).tap do |payload| - add_optional_fields(payload, system_content:, tools:, thinking:) + add_optional_fields(payload, system_content:, tools:, reasoning:) end end @@ -39,21 +39,21 @@ def build_base_payload(chat_messages, temperature, model, stream) { model: model, messages: chat_messages.map { |msg| format_message(msg) }, - temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking + temperature: temperature, stream: stream, max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096 } end - def add_optional_fields(payload, system_content:, tools:, thinking:) + def add_optional_fields(payload, system_content:, tools:, reasoning:) payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any? payload[:system] = system_content unless system_content.empty? - if thinking - payload[:thinking] = { - type: 'enabled', - budget_tokens: RubyLLM.config.default_thinking_budget || 1024, - } - end + return unless reasoning + + payload[:thinking] = { + type: 'enabled', + budget_tokens: RubyLLM.config.default_reasoning_budget || 1024 + } end def parse_completion_response(response) @@ -62,14 +62,14 @@ def parse_completion_response(response) content_blocks = data['content'] || [] - thinking_content = extract_thinking_content(content_blocks) + reasoning_content = extract_reasoning_content(content_blocks) text_content = extract_text_content(content_blocks) tool_use = Tools.find_tool_use(content_blocks) - build_message(data, text_content, tool_use, thinking_content) + build_message(data, text_content, tool_use, reasoning_content) end - def extract_thinking_content(blocks) + def extract_reasoning_content(blocks) thinking_blocks = blocks.select { |c| c['type'] == 'thinking' } thinking_blocks.map { |c| c['thinking'] }.join end @@ -79,11 +79,11 @@ def extract_text_content(blocks) text_blocks.map { |c| c['text'] }.join end - def build_message(data, content, tool_use, thinking_content) + def build_message(data, content, tool_use, reasoning_content) Message.new( role: :assistant, content: content, - thinking_content: thinking_content, + reasoning_content: reasoning_content, tool_calls: Tools.parse_tool_calls(tool_use), input_tokens: data.dig('usage', 'input_tokens'), output_tokens: data.dig('usage', 'output_tokens'), diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb index a1d5c44f1..6ebb07d73 100644 --- a/lib/ruby_llm/providers/anthropic/streaming.rb +++ b/lib/ruby_llm/providers/anthropic/streaming.rb @@ -16,7 +16,7 @@ def build_chunk(data) role: :assistant, model_id: extract_model_id(data), content: data.dig('delta', 'text'), - thinking_content: data.dig('delta', 'thinking'), + reasoning_content: data.dig('delta', 'thinking'), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), tool_calls: extract_tool_calls(data) diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index bb942338b..62c33683a 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -39,7 +39,7 @@ def completion_url "model/#{@model_id}/invoke" end - def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists # Hold model_id in instance variable for use in completion_url and stream_url @model_id = model diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index 7bb794911..5a18546aa 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -11,7 +11,7 @@ def completion_url "models/#{@model}:generateContent" end - def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists @model = model # Store model for completion_url/stream_url payload = { contents: format_messages(messages), diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index ffcd8dcf6..8170b3849 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -11,7 +11,7 @@ def completion_url module_function - def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) + def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists { model: model, messages: format_messages(messages), diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb index 233ce9cae..5f3bb0e7e 100644 --- a/lib/ruby_llm/stream_accumulator.rb +++ b/lib/ruby_llm/stream_accumulator.rb @@ -9,7 +9,7 @@ class StreamAccumulator def initialize @content = String.new - @thinking_content = String.new + @reasoning_content = String.new @tool_calls = {} @input_tokens = 0 @output_tokens = 0 @@ -24,7 +24,7 @@ def add(chunk) accumulate_tool_calls chunk.tool_calls else @content << (chunk.content || '') - @thinking_content << (chunk.thinking_content || '') + @reasoning_content << (chunk.reasoning_content || '') end count_tokens chunk @@ -35,7 +35,7 @@ def to_message Message.new( role: :assistant, content: content.empty? ? nil : content, - thinking_content: @thinking_content.empty? ? nil : @thinking_content, + reasoning_content: @reasoning_content.empty? ? nil : @reasoning_content, model_id: model_id, tool_calls: tool_calls_from_stream, input_tokens: @input_tokens.positive? ? @input_tokens : nil, From 6b4fb8372bb9f385b3e9272cba8bf69f716295e1 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Sun, 22 Jun 2025 21:51:24 +1000 Subject: [PATCH 08/16] chore: rename to thinking --- lib/ruby_llm/chat.rb | 21 ++++++++++++------- lib/ruby_llm/configuration.rb | 10 +++++++-- lib/ruby_llm/error.rb | 2 +- lib/ruby_llm/message.rb | 4 ++-- lib/ruby_llm/provider.rb | 5 +++-- lib/ruby_llm/providers/anthropic/chat.rb | 20 +++++++++--------- lib/ruby_llm/providers/anthropic/streaming.rb | 2 +- lib/ruby_llm/providers/gemini/chat.rb | 2 +- lib/ruby_llm/stream_accumulator.rb | 6 +++--- 9 files changed, 42 insertions(+), 30 deletions(-) diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index ad77d01cf..e6af95334 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -22,8 +22,9 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n @config = context&.config || RubyLLM.config model_id = model || @config.default_model with_model(model_id, provider: provider, assume_exists: assume_model_exists) - @reasoning = false - @temperature = 0.7 + @thinking = @config.default_thinking + @thinking_budget = @config.default_thinking_budget + @temperature = @config.default_temperature @messages = [] @tools = {} @on = { @@ -74,12 +75,15 @@ def with_temperature(temperature) self end - def with_reasoning(reasoning = true) - if reasoning && !@model.reasoning? - raise UnsupportedReasoningError, "Model #{@model.id} doesn't support reasoning" - end + def with_thinking(thinking: true, budget: nil) + raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking? - @reasoning = reasoning + @thinking = thinking + + if budget + @thinking_budget = budget + end + self end @@ -111,7 +115,8 @@ def complete(&) tools: @tools, temperature: @temperature, model: @model.id, - reasoning: @reasoning, + thinking: @thinking, + thinking_budget: @thinking_budget, connection: @connection, & ) diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index 886728168..a5ed635b4 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -28,7 +28,9 @@ class Configuration :default_model, :default_embedding_model, :default_image_model, - :default_reasoning_budget, + # Default model settings + :default_thinking, + :default_thinking_budget, # Connection configuration :request_timeout, :max_retries, @@ -55,7 +57,11 @@ def initialize @default_model = 'gpt-4.1-nano' @default_embedding_model = 'text-embedding-3-small' @default_image_model = 'dall-e-3' - @default_reasoning_budget = 1024 + + # Default model settings + @default_thinking = false + @default_thinking_budget = 2048 + @default_temperature = 0.7 # Logging configuration @log_file = $stdout diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb index 102ec3b79..01948dbbe 100644 --- a/lib/ruby_llm/error.rb +++ b/lib/ruby_llm/error.rb @@ -25,7 +25,7 @@ class InvalidRoleError < StandardError; end class ModelNotFoundError < StandardError; end class UnsupportedFunctionsError < StandardError; end class UnsupportedAttachmentError < StandardError; end - class UnsupportedReasoningError < StandardError; end + class UnsupportedThinkingError < StandardError; end # Error classes for different HTTP status codes class BadRequestError < Error; end diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb index 5624920e2..cb337fa36 100644 --- a/lib/ruby_llm/message.rb +++ b/lib/ruby_llm/message.rb @@ -7,12 +7,12 @@ module RubyLLM class Message ROLES = %i[system user assistant tool].freeze - attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :reasoning_content + attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking def initialize(options = {}) @role = options.fetch(:role).to_sym @content = normalize_content(options.fetch(:content)) - @reasoning_content = options[:reasoning_content] + @thinking = options[:thinking] @tool_calls = options[:tool_calls] @input_tokens = options[:input_tokens] @output_tokens = options[:output_tokens] diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index 64a9fd1f9..1549abd1a 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -10,14 +10,15 @@ module Provider module Methods extend Streaming - def complete(messages, tools:, temperature:, model:, reasoning:, connection:, &) # rubocop:disable Metrics/ParameterLists + def complete(messages, tools:, temperature:, model:, thinking:, thinking_budget:, connection:, &) # rubocop:disable Metrics/ParameterLists normalized_temperature = maybe_normalize_temperature(temperature, model) payload = render_payload(messages, tools: tools, temperature: normalized_temperature, model: model, - reasoning: reasoning, + thinking: thinking, + thinking_budget: thinking_budget, stream: block_given?) if block_given? diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index d24e799aa..3b7f79956 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -11,12 +11,12 @@ def completion_url '/v1/messages' end - def render_payload(messages, tools:, temperature:, model:, reasoning:, stream: false) # rubocop:disable Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, stream: false) # rubocop:disable Metrics/ParameterLists system_messages, chat_messages = separate_messages(messages) system_content = build_system_content(system_messages) build_base_payload(chat_messages, temperature, model, stream).tap do |payload| - add_optional_fields(payload, system_content:, tools:, reasoning:) + add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:) end end @@ -45,14 +45,14 @@ def build_base_payload(chat_messages, temperature, model, stream) } end - def add_optional_fields(payload, system_content:, tools:, reasoning:) + def add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:) payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any? payload[:system] = system_content unless system_content.empty? - return unless reasoning + return unless thinking payload[:thinking] = { type: 'enabled', - budget_tokens: RubyLLM.config.default_reasoning_budget || 1024 + budget_tokens: thinking_budget } end @@ -62,14 +62,14 @@ def parse_completion_response(response) content_blocks = data['content'] || [] - reasoning_content = extract_reasoning_content(content_blocks) + thinking_content = extract_thinking_content(content_blocks) text_content = extract_text_content(content_blocks) tool_use = Tools.find_tool_use(content_blocks) - build_message(data, text_content, tool_use, reasoning_content) + build_message(data, text_content, tool_use, thinking_content) end - def extract_reasoning_content(blocks) + def extract_thinking_content(blocks) thinking_blocks = blocks.select { |c| c['type'] == 'thinking' } thinking_blocks.map { |c| c['thinking'] }.join end @@ -79,11 +79,11 @@ def extract_text_content(blocks) text_blocks.map { |c| c['text'] }.join end - def build_message(data, content, tool_use, reasoning_content) + def build_message(data, content, tool_use, thinking_content) Message.new( role: :assistant, content: content, - reasoning_content: reasoning_content, + thinking: thinking_content, tool_calls: Tools.parse_tool_calls(tool_use), input_tokens: data.dig('usage', 'input_tokens'), output_tokens: data.dig('usage', 'output_tokens'), diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb index 6ebb07d73..5a0bc01ff 100644 --- a/lib/ruby_llm/providers/anthropic/streaming.rb +++ b/lib/ruby_llm/providers/anthropic/streaming.rb @@ -16,7 +16,7 @@ def build_chunk(data) role: :assistant, model_id: extract_model_id(data), content: data.dig('delta', 'text'), - reasoning_content: data.dig('delta', 'thinking'), + thinking: data.dig('delta', 'thinking'), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), tool_calls: extract_tool_calls(data) diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index 5a18546aa..fcb8eaa0e 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -11,7 +11,7 @@ def completion_url "models/#{@model}:generateContent" end - def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, stream: false, **) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists @model = model # Store model for completion_url/stream_url payload = { contents: format_messages(messages), diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb index 5f3bb0e7e..9eeb10b23 100644 --- a/lib/ruby_llm/stream_accumulator.rb +++ b/lib/ruby_llm/stream_accumulator.rb @@ -9,7 +9,7 @@ class StreamAccumulator def initialize @content = String.new - @reasoning_content = String.new + @thinking = String.new @tool_calls = {} @input_tokens = 0 @output_tokens = 0 @@ -24,7 +24,7 @@ def add(chunk) accumulate_tool_calls chunk.tool_calls else @content << (chunk.content || '') - @reasoning_content << (chunk.reasoning_content || '') + @thinking << (chunk.thinking || '') end count_tokens chunk @@ -35,7 +35,7 @@ def to_message Message.new( role: :assistant, content: content.empty? ? nil : content, - reasoning_content: @reasoning_content.empty? ? nil : @reasoning_content, + thinking: @thinking.empty? ? nil : @thinking, model_id: model_id, tool_calls: tool_calls_from_stream, input_tokens: @input_tokens.positive? ? @input_tokens : nil, From 7ec6733421e1d5a68942a035809b7c052f20d076 Mon Sep 17 00:00:00 2001 From: Hiemanshu Sharma Date: Fri, 27 Jun 2025 22:51:06 +0530 Subject: [PATCH 09/16] Get thinking working with bedrock --- lib/ruby_llm/chat.rb | 9 ++++----- lib/ruby_llm/configuration.rb | 3 ++- lib/ruby_llm/model/info.rb | 4 ++++ lib/ruby_llm/models.json | 12 +++++++---- .../providers/anthropic/capabilities.rb | 20 ++++++++++--------- .../providers/bedrock/capabilities.rb | 8 +++++--- lib/ruby_llm/providers/bedrock/chat.rb | 4 ++-- 7 files changed, 36 insertions(+), 24 deletions(-) diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index e6af95334..52ebb79eb 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -79,11 +79,10 @@ def with_thinking(thinking: true, budget: nil) raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking? @thinking = thinking - - if budget - @thinking_budget = budget - end - + @temperature = 1 if thinking # Thinking requires temperature be set to 1 + + @thinking_budget = budget if budget + self end diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index a5ed635b4..424118244 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -29,6 +29,7 @@ class Configuration :default_embedding_model, :default_image_model, # Default model settings + :default_temperature, :default_thinking, :default_thinking_budget, # Connection configuration @@ -57,7 +58,7 @@ def initialize @default_model = 'gpt-4.1-nano' @default_embedding_model = 'text-embedding-3-small' @default_image_model = 'dall-e-3' - + # Default model settings @default_thinking = false @default_thinking_budget = 2048 diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb index 9c72bcdf3..768fcede1 100644 --- a/lib/ruby_llm/model/info.rb +++ b/lib/ruby_llm/model/info.rb @@ -58,6 +58,10 @@ def supports_functions? function_calling? end + def thinking? + reasoning? + end + def input_price_per_million pricing.text_tokens.input end diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json index 2e8e7bd92..0bb33db62 100644 --- a/lib/ruby_llm/models.json +++ b/lib/ruby_llm/models.json @@ -1809,6 +1809,7 @@ }, "capabilities": [ "streaming", + "reasoning", "function_calling", "structured_output" ], @@ -1861,7 +1862,8 @@ "capabilities": [ "streaming", "function_calling", - "structured_output" + "structured_output", + "reasoning" ], "pricing": { "text_tokens": { @@ -9654,7 +9656,8 @@ }, "capabilities": [ "streaming", - "function_calling" + "function_calling", + "reasoning" ], "pricing": { "text_tokens": { @@ -9718,7 +9721,8 @@ }, "capabilities": [ "streaming", - "function_calling" + "function_calling", + "reasoning" ], "pricing": { "text_tokens": { @@ -27665,4 +27669,4 @@ ] } } -] \ No newline at end of file +] diff --git a/lib/ruby_llm/providers/anthropic/capabilities.rb b/lib/ruby_llm/providers/anthropic/capabilities.rb index 78cbf50c4..c1dce1adf 100644 --- a/lib/ruby_llm/providers/anthropic/capabilities.rb +++ b/lib/ruby_llm/providers/anthropic/capabilities.rb @@ -65,7 +65,7 @@ def supports_json_mode?(model_id) # @param model_id [String] the model identifier # @return [Boolean] true if the model supports extended thinking def supports_extended_thinking?(model_id) - model_id.match?(/claude-3-7-sonnet/) + model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/) end # Determines the model family for a given model ID @@ -73,6 +73,8 @@ def supports_extended_thinking?(model_id) # @return [Symbol] the model family identifier def model_family(model_id) case model_id + when /claude-sonnet-4/ then 'claude-sonnet-4' + when /claude-opus-4/ then 'claude-opus-4' when /claude-3-7-sonnet/ then 'claude-3-7-sonnet' when /claude-3-5-sonnet/ then 'claude-3-5-sonnet' when /claude-3-5-haiku/ then 'claude-3-5-haiku' @@ -131,17 +133,17 @@ def capabilities_for(model_id) capabilities = ['streaming'] # Function calling for Claude 3+ - if model_id.match?(/claude-3/) + if model_id.match?(/claude-3|claude-sonnet-4|claude-opus-4/) capabilities << 'function_calling' capabilities << 'structured_output' capabilities << 'batch' end - # Extended thinking (reasoning) for Claude 3.7 - capabilities << 'reasoning' if model_id.match?(/claude-3-7/) + # Extended thinking for Claude 3.7 and Claude 4 + capabilities << 'reasoning' if supports_extended_thinking?(model_id) # Citations - capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7/) + capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7|claude-sonnet-4|claude-opus-4/) capabilities end @@ -161,10 +163,10 @@ def pricing_for(model_id) output_per_million: prices[:output] * 0.5 } - # Add reasoning output pricing for 3.7 models - if model_id.match?(/claude-3-7/) - standard_pricing[:reasoning_output_per_million] = prices[:output] * 2.5 - batch_pricing[:reasoning_output_per_million] = prices[:output] * 1.25 + # Add thinking output pricing for 3.7 and 4 models + if model_id.match?(/claude-3-7|claude-sonnet-4|claude-opus-4/) + standard_pricing[:thinking_output_per_million] = prices[:output] * 2.5 + batch_pricing[:thinking_output_per_million] = prices[:output] * 1.25 end { diff --git a/lib/ruby_llm/providers/bedrock/capabilities.rb b/lib/ruby_llm/providers/bedrock/capabilities.rb index 342c67897..ae3dbfc17 100644 --- a/lib/ruby_llm/providers/bedrock/capabilities.rb +++ b/lib/ruby_llm/providers/bedrock/capabilities.rb @@ -117,7 +117,9 @@ def supports_structured_output?(model_id) /anthropic\.claude-3-haiku/ => :claude3_haiku, /anthropic\.claude-3-5-haiku/ => :claude3_5_haiku, /anthropic\.claude-v2/ => :claude2, - /anthropic\.claude-instant/ => :claude_instant + /anthropic\.claude-instant/ => :claude_instant, + /anthropic\.claude-sonnet-4/ => :claude_sonnet4, + /anthropic\.claude-opus-4/ => :claude_opus4 }.freeze # Determines the model family for pricing and capability lookup @@ -187,8 +189,8 @@ def capabilities_for(model_id) capabilities << 'structured_output' if supports_json_mode?(model_id) - # Extended thinking for 3.7 models - capabilities << 'reasoning' if model_id.match?(/claude-3-7/) + # Extended thinking for 3.7, and 4 models + capabilities << 'reasoning' if model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/) # Batch capabilities for newer Claude models if model_id.match?(/claude-3\.5|claude-3-7/) diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index 62c33683a..d6ae8139a 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -39,7 +39,7 @@ def completion_url "model/#{@model_id}/invoke" end - def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists # Hold model_id in instance variable for use in completion_url and stream_url @model_id = model @@ -47,7 +47,7 @@ def render_payload(messages, tools:, temperature:, model:, thinking:, stream: fa system_content = Anthropic::Chat.build_system_content(system_messages) build_base_payload(chat_messages, temperature, model).tap do |payload| - Anthropic::Chat.add_optional_fields(payload, system_content:, tools:) + Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:) end end From 5577bae20079e29b16ab124ea7b6c02f2b07dacc Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 18 Jul 2025 14:48:48 +1000 Subject: [PATCH 10/16] chore: update anthropic capabilities with thinking - Add 'thinking' to anthropic capabilities - Add 'thinking' to bedrock capabilities (for anthropic supported models) - Update models.json file to reflect changes --- lib/ruby_llm/models.json | 186 +++++++++++------- .../providers/anthropic/capabilities.rb | 2 +- .../providers/bedrock/capabilities.rb | 6 +- 3 files changed, 120 insertions(+), 74 deletions(-) diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json index c942e788a..f36d9e8df 100644 --- a/lib/ruby_llm/models.json +++ b/lib/ruby_llm/models.json @@ -71,7 +71,7 @@ "id": "claude-3-5-haiku-20241022", "name": "Claude Haiku 3.5", "provider": "anthropic", - "family": "claude-3-5-haiku", + "family": "claude-haiku-3.5", "created_at": null, "context_window": 200000, "max_output_tokens": 8192, @@ -103,7 +103,7 @@ "id": "claude-3-5-sonnet-20240620", "name": "Claude Sonnet 3.5", "provider": "anthropic", - "family": "claude-3-5-sonnet", + "family": "claude-sonnet-3.5", "created_at": null, "context_window": 200000, "max_output_tokens": 8192, @@ -135,7 +135,7 @@ "id": "claude-3-5-sonnet-20241022", "name": "Claude Sonnet 3.5", "provider": "anthropic", - "family": "claude-3-5-sonnet", + "family": "claude-sonnet-3.5", "created_at": null, "context_window": 200000, "max_output_tokens": 8192, @@ -167,7 +167,7 @@ "id": "claude-3-7-sonnet-20250219", "name": "Claude Sonnet 3.7", "provider": "anthropic", - "family": "claude-3-7-sonnet", + "family": "claude-sonnet-3.7", "created_at": null, "context_window": 200000, "max_output_tokens": 64000, @@ -183,7 +183,7 @@ }, "capabilities": [ "function_calling", - "reasoning" + "thinking" ], "pricing": { "text_tokens": { @@ -200,7 +200,7 @@ "id": "claude-3-haiku-20240307", "name": "Claude Haiku 3", "provider": "anthropic", - "family": "claude-3-haiku", + "family": "claude-haiku-3", "created_at": null, "context_window": 200000, "max_output_tokens": 4096, @@ -232,7 +232,7 @@ "id": "claude-3-opus-20240229", "name": "Claude Opus 3", "provider": "anthropic", - "family": "claude-3-opus", + "family": "claude-opus-3", "created_at": null, "context_window": 200000, "max_output_tokens": 4096, @@ -319,7 +319,7 @@ }, "capabilities": [ "function_calling", - "reasoning" + "thinking" ], "pricing": { "text_tokens": { @@ -352,7 +352,7 @@ }, "capabilities": [ "function_calling", - "reasoning" + "thinking" ], "pricing": { "text_tokens": { @@ -1757,7 +1757,7 @@ "streaming", "function_calling", "structured_output", - "reasoning", + "thinking", "batch", "citations" ], @@ -1809,9 +1809,9 @@ }, "capabilities": [ "streaming", - "reasoning", "function_calling", - "structured_output" + "structured_output", + "thinking" ], "pricing": { "text_tokens": { @@ -1863,7 +1863,7 @@ "streaming", "function_calling", "structured_output", - "reasoning" + "thinking" ], "pricing": { "text_tokens": { @@ -3401,7 +3401,7 @@ }, { "id": "gemini-2.5-flash-preview-tts", - "name": "Gemini 2.5 Flash Preview TTS", + "name": "Gemini 2.5 Flash Preview Text-to-Speech", "provider": "gemini", "family": "gemini-2.5-flash-preview-tts", "created_at": null, @@ -3629,7 +3629,7 @@ }, { "id": "gemini-2.5-pro-preview-tts", - "name": "Gemini 2.5 Pro Preview TTS", + "name": "Gemini 2.5 Pro Preview Text-to-Speech", "provider": "gemini", "family": "gemini-2.5-pro-preview-tts", "created_at": null, @@ -4168,7 +4168,7 @@ "family": "imagen-3.0-generate-002", "created_at": null, "context_window": null, - "max_output_tokens": null, + "max_output_tokens": 4, "knowledge_cutoff": null, "modalities": { "input": [ @@ -4179,13 +4179,7 @@ ] }, "capabilities": [], - "pricing": { - "text_tokens": { - "standard": { - "output_per_million": 0.03 - } - } - }, + "pricing": {}, "metadata": { "version": "002", "description": "Vertex served Imagen 3.0 002 model", @@ -4201,7 +4195,7 @@ "family": "imagen-4.0-generate-preview-06-06", "created_at": null, "context_window": 480, - "max_output_tokens": null, + "max_output_tokens": 1, "knowledge_cutoff": null, "modalities": { "input": [ @@ -4212,13 +4206,7 @@ ] }, "capabilities": [], - "pricing": { - "text_tokens": { - "standard": { - "output_per_million": 0.04 - } - } - }, + "pricing": {}, "metadata": { "version": "01", "description": "Vertex served Imagen 4.0 model", @@ -4234,7 +4222,7 @@ "family": "imagen-4.0-generate-preview-06-06", "created_at": null, "context_window": 480, - "max_output_tokens": null, + "max_output_tokens": 1, "knowledge_cutoff": null, "modalities": { "input": [ @@ -4245,13 +4233,7 @@ ] }, "capabilities": [], - "pricing": { - "text_tokens": { - "standard": { - "output_per_million": 0.04 - } - } - }, + "pricing": {}, "metadata": { "version": "01", "description": "Vertex served Imagen 4.0 ultra model", @@ -4341,7 +4323,7 @@ "family": "veo-2.0-generate-001", "created_at": null, "context_window": null, - "max_output_tokens": null, + "max_output_tokens": 2, "knowledge_cutoff": null, "modalities": { "input": [ @@ -4351,13 +4333,7 @@ "output": [] }, "capabilities": [], - "pricing": { - "text_tokens": { - "standard": { - "output_per_million": 0.35 - } - } - }, + "pricing": {}, "metadata": {} }, { @@ -7306,7 +7282,7 @@ "pricing": { "text_tokens": { "standard": { - "output_per_million": 30.0 + "input_per_million": 30.0 } } }, @@ -9761,8 +9737,7 @@ }, "capabilities": [ "streaming", - "function_calling", - "thinking" + "function_calling" ], "pricing": { "text_tokens": { @@ -9824,8 +9799,7 @@ }, "capabilities": [ "streaming", - "function_calling", - "reasoning" + "function_calling" ], "pricing": { "text_tokens": { @@ -9889,8 +9863,7 @@ }, "capabilities": [ "streaming", - "function_calling", - "reasoning" + "function_calling" ], "pricing": { "text_tokens": { @@ -17503,22 +17476,22 @@ "max_tokens", "temperature", "top_p", - "tools", - "tool_choice", "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "stop", "frequency_penalty", "presence_penalty", + "top_k", + "repetition_penalty", "logit_bias", "logprobs", - "seed", - "repetition_penalty", - "top_k", "top_logprobs", - "min_p" + "min_p", + "seed", + "tools", + "tool_choice", + "structured_outputs", + "response_format" ] } }, @@ -18786,7 +18759,8 @@ "capabilities": [ "streaming", "function_calling", - "structured_output" + "structured_output", + "predicted_outputs" ], "pricing": { "text_tokens": { @@ -18820,15 +18794,17 @@ "max_tokens", "temperature", "top_p", - "tools", - "tool_choice", "stop", "frequency_penalty", "presence_penalty", - "response_format", - "structured_outputs", + "logit_bias", + "logprobs", "seed", "repetition_penalty", + "tools", + "tool_choice", + "response_format", + "structured_outputs", "top_k", "min_p" ] @@ -19355,16 +19331,16 @@ "top_p", "tools", "tool_choice", + "structured_outputs", + "response_format", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "response_format", "top_k", - "seed", - "min_p", - "structured_outputs", + "repetition_penalty", "logit_bias", + "min_p", + "seed", "top_logprobs", "logprobs" ] @@ -20606,6 +20582,72 @@ ] } }, + { + "id": "openai/gpt-3.5-turbo", + "name": "OpenAI: GPT-3.5 Turbo", + "provider": "openrouter", + "family": "openai", + "created_at": "2023-05-28 02:00:00 +0200", + "context_window": 16385, + "max_output_tokens": 4096, + "knowledge_cutoff": null, + "modalities": { + "input": [ + "text" + ], + "output": [ + "text" + ] + }, + "capabilities": [ + "streaming", + "function_calling", + "structured_output" + ], + "pricing": { + "text_tokens": { + "standard": { + "input_per_million": 0.5, + "output_per_million": 1.5 + } + } + }, + "metadata": { + "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "GPT", + "instruct_type": null + }, + "top_provider": { + "context_length": 16385, + "max_completion_tokens": 4096, + "is_moderated": true + }, + "per_request_limits": null, + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "tools", + "tool_choice", + "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format" + ] + } + }, { "id": "openai/gpt-3.5-turbo-0613", "name": "OpenAI: GPT-3.5 Turbo (older v0613)", diff --git a/lib/ruby_llm/providers/anthropic/capabilities.rb b/lib/ruby_llm/providers/anthropic/capabilities.rb index c1dce1adf..0cb8bd946 100644 --- a/lib/ruby_llm/providers/anthropic/capabilities.rb +++ b/lib/ruby_llm/providers/anthropic/capabilities.rb @@ -140,7 +140,7 @@ def capabilities_for(model_id) end # Extended thinking for Claude 3.7 and Claude 4 - capabilities << 'reasoning' if supports_extended_thinking?(model_id) + capabilities << 'thinking' if supports_extended_thinking?(model_id) # Citations capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7|claude-sonnet-4|claude-opus-4/) diff --git a/lib/ruby_llm/providers/bedrock/capabilities.rb b/lib/ruby_llm/providers/bedrock/capabilities.rb index ae3dbfc17..976263000 100644 --- a/lib/ruby_llm/providers/bedrock/capabilities.rb +++ b/lib/ruby_llm/providers/bedrock/capabilities.rb @@ -108,6 +108,10 @@ def supports_structured_output?(model_id) model_id.match?(/anthropic\.claude/) end + def supports_extended_thinking?(model_id) + model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/) + end + # Model family patterns for capability lookup MODEL_FAMILIES = { /anthropic\.claude-3-opus/ => :claude3_opus, @@ -190,7 +194,7 @@ def capabilities_for(model_id) capabilities << 'structured_output' if supports_json_mode?(model_id) # Extended thinking for 3.7, and 4 models - capabilities << 'reasoning' if model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/) + capabilities << 'thinking' if supports_extended_thinking?(model_id) # Batch capabilities for newer Claude models if model_id.match?(/claude-3\.5|claude-3-7/) From 5c02af2da17153901adcf33779a5270fda33c931 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 18 Jul 2025 15:17:29 +1000 Subject: [PATCH 11/16] chore: move temperature setting to param --- lib/ruby_llm/chat.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index 52ebb79eb..cc47adf6f 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -75,12 +75,13 @@ def with_temperature(temperature) self end - def with_thinking(thinking: true, budget: nil) - raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking? + def with_thinking(thinking: true, budget: nil, temperature: 1) + raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.supports_thinking? @thinking = thinking - @temperature = 1 if thinking # Thinking requires temperature be set to 1 + # Most thinking models require set temperature so force it 1 here, however allowing override via param. + @temperature = temperature @thinking_budget = budget if budget self From 153440c6b61d5948321ca0ad42db1b7ac59e2b81 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 18 Jul 2025 15:18:20 +1000 Subject: [PATCH 12/16] chore: use 'thinking' capability instead of reasoning in Model::Info --- lib/ruby_llm/model/info.rb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb index 768fcede1..cdee5a2aa 100644 --- a/lib/ruby_llm/model/info.rb +++ b/lib/ruby_llm/model/info.rb @@ -35,7 +35,7 @@ def supports?(capability) capabilities.include?(capability.to_s) end - %w[function_calling structured_output batch reasoning citations streaming].each do |cap| + %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap| define_method "#{cap}?" do supports?(cap) end @@ -58,10 +58,6 @@ def supports_functions? function_calling? end - def thinking? - reasoning? - end - def input_price_per_million pricing.text_tokens.input end From 627ffe070e7b5b8e2eab91cc497f83270c8c275d Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 18 Jul 2025 15:18:42 +1000 Subject: [PATCH 13/16] chore: allow thinking capabilties on assumed models --- lib/ruby_llm/models.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ruby_llm/models.rb b/lib/ruby_llm/models.rb index b1fa08d74..035c61453 100644 --- a/lib/ruby_llm/models.rb +++ b/lib/ruby_llm/models.rb @@ -57,7 +57,7 @@ def resolve(model_id, provider: nil, assume_exists: false) # rubocop:disable Met id: model_id, name: model_id.gsub('-', ' ').capitalize, provider: provider.slug, - capabilities: %w[function_calling streaming], + capabilities: %w[function_calling streaming thinking], modalities: { input: %w[text image], output: %w[text] }, metadata: { warning: 'Assuming model exists, capabilities may not be accurate' } ) From 8a6453dbaf85f43b12ad99184e19998644c82c92 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 18 Jul 2025 15:25:38 +1000 Subject: [PATCH 14/16] bug: fix call to check if thinking supported in 'with_thinking' - incorrectly using 'supports_thinking?' instead of 'thinking?' --- lib/ruby_llm/chat.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index cc47adf6f..403df7d1d 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -76,7 +76,7 @@ def with_temperature(temperature) end def with_thinking(thinking: true, budget: nil, temperature: 1) - raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.supports_thinking? + raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking? @thinking = thinking From cc1ce5f238d4eebb237e67d6153120d2914c6a67 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 18 Jul 2025 16:14:51 +1000 Subject: [PATCH 15/16] test: add basic spec for anthropic models - Adds chat_thinking_spec.rb - Adds THINKING_MODELS and includes anthropic provider models - Adds NON_THINKING_MODELS and includes anthropic provider models - Adds cassette files --- ...sic_conversation_with_thinking_enabled.yml | 88 ++++++++++ ...ns_thinking_mode_across_multiple_turns.yml | 162 ++++++++++++++++++ ...sic_conversation_with_thinking_enabled.yml | 81 +++++++++ ...ns_thinking_mode_across_multiple_turns.yml | 161 +++++++++++++++++ ...sic_conversation_with_thinking_enabled.yml | 93 ++++++++++ ...ns_thinking_mode_across_multiple_turns.yml | 161 +++++++++++++++++ spec/ruby_llm/chat_thinking_spec.rb | 109 ++++++++++++ spec/spec_helper.rb | 11 ++ 8 files changed, 866 insertions(+) create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml create mode 100644 spec/ruby_llm/chat_thinking_spec.rb diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml new file mode 100644 index 000000000..9130d4f3a --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml @@ -0,0 +1,88 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-7-sonnet-20250219","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 2 + 2? Think through this step by step."}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:38:50 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:38:47Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:38:51Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:38:47Z' + Anthropic-Ratelimit-Tokens-Limit: + - '28000' + Anthropic-Ratelimit-Tokens-Remaining: + - '28000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:38:47Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01H8phMkgUJm2jRb6mjh1TyE","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"thinking","thinking":"This + is a very basic arithmetic problem. Let''s solve it step by step.\n\n2 + 2 + means I need to add the number 2 with the number 2.\n\nTo add these numbers, + I can think of it as:\n- Starting with 2\n- Then adding 2 more\n\nSo:\n2 + + 2 = 4\n\nThat''s the answer: 4.","signature":"ErUBCkYIBRgCIkDYEWR6TDzVtqVJyMdFWFx9CUzT61wYklKVzM6g2GYLr0biNf88UKUY851WNn5+NWQ5BImkpVnBXuPrXYTLXLeoEgwajyIH8En4csCcQXMaDLvR03WVgYn4llf4SyIw9obvmcjnLQiqW1pJbZAYyBVChfUdu+4geQF/17LMVmH0j5hHgJOFlxeJRkKixyZQKh3F2jQ9AjbxG8PVSclPOFIi/2Ckm7Pgy8dzCm/fNxgC"},{"type":"text","text":"To + solve 2 + 2, I''ll break it down:\n\n1. I need to add the number 2 with another + number 2\n2. Addition means combining quantities together\n3. If I have 2 + items and get 2 more items, I''ll have a total of 4 items\n4. Therefore, 2 + + 2 = 4\n\nThe answer is 4."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":51,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":190,"service_tier":"standard"}}' + recorded_at: Fri, 18 Jul 2025 05:38:50 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml new file mode 100644 index 000000000..40756a8cf --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml @@ -0,0 +1,162 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-7-sonnet-20250219","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 5 + 3?"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:38:52 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:38:51Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:38:52Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:38:51Z' + Anthropic-Ratelimit-Tokens-Limit: + - '28000' + Anthropic-Ratelimit-Tokens-Remaining: + - '28000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:38:51Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01UVtt5CXncUaY5eMdZz5yXk","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"thinking","thinking":"This + is a simple addition problem.\n\n5 + 3 = 8\n\nSo the answer is 8.","signature":"ErUBCkYIBRgCIkCj2IP3xc6RELPu8t9/N1bormyfIlJHWjS35zy3nE/PwNDRooJm43bzvCxFsJji6R7cAImgnsSKdSadE5UTSxybEgxzjihH9btpHvFvYd4aDM2QOkD1VQddEb3W+yIwiBdK30embODvJqFB4RG45ySacd2jmju/7B8PWp2NyZkdtlKXnrA7U3eqqgGqY2rCKh1JUx4LQqzYQjbVSvZjQ7bOCIkln9tg6ZUJFiWCvBgC"},{"type":"text","text":"The + sum of 5 + 3 is 8."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":44,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":51,"service_tier":"standard"}}' + recorded_at: Fri, 18 Jul 2025 05:38:52 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-7-sonnet-20250219","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 5 + 3?"}]},{"role":"assistant","content":[{"type":"text","text":"The sum of + 5 + 3 is 8."}]},{"role":"user","content":[{"type":"text","text":"Now multiply + that result by 2"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:38:54 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:38:53Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:38:54Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:38:53Z' + Anthropic-Ratelimit-Tokens-Limit: + - '28000' + Anthropic-Ratelimit-Tokens-Remaining: + - '28000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:38:53Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: !binary |- + eyJpZCI6Im1zZ18wMUVHVjc0ZTlhOUpkaXFHY0J6Z1hGNWoiLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS0zLTctc29ubmV0LTIwMjUwMjE5IiwiY29udGVudCI6W3sidHlwZSI6InRoaW5raW5nIiwidGhpbmtpbmciOiJJIG5lZWQgdG8gbXVsdGlwbHkgdGhlIHByZXZpb3VzIHJlc3VsdCAoOCkgYnkgMi5cblxuOCDDlyAyID0gMTYiLCJzaWduYXR1cmUiOiJFclVCQ2tZSUJSZ0NJa0NDeEhjQXVEcU1MMTlCVytFSjNxQmhjKyt0S0w2TlpHNlprN1h6NTNqeDQyVGxybi9xSTNVenVMZE9ZeVdFVm1OekR5T1cvdUhLMS9oMEV1TWxQZVBoRWd3TzlzV25IclJCV0lVOGZJOGFERHVjbXcxcnZndXNKVzRXS2lJdys1elY1NGw2dlRkaU9sOWhsRGVTNTlMUk9od2Q5d2VUb1Y0QTRFbmpKYTJSWlFuTjJGY3VHTVNBK3FvNE5PVy9LaDNyT3RiQzNlUGVvQ25LdkMyS01zaVNpTkliOWxKY2hQT01tQ0hYaFJnQyJ9LHsidHlwZSI6InRleHQiLCJ0ZXh0IjoiVG8gbXVsdGlwbHkgdGhlIHByZXZpb3VzIHJlc3VsdCBieSAyOlxuXG44IMOXIDIgPSAxNlxuXG5UaGUgYW5zd2VyIGlzIDE2LiJ9XSwic3RvcF9yZWFzb24iOiJlbmRfdHVybiIsInN0b3Bfc2VxdWVuY2UiOm51bGwsInVzYWdlIjp7ImlucHV0X3Rva2VucyI6NzEsImNhY2hlX2NyZWF0aW9uX2lucHV0X3Rva2VucyI6MCwiY2FjaGVfcmVhZF9pbnB1dF90b2tlbnMiOjAsIm91dHB1dF90b2tlbnMiOjYyLCJzZXJ2aWNlX3RpZXIiOiJzdGFuZGFyZCJ9fQ== + recorded_at: Fri, 18 Jul 2025 05:38:54 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml new file mode 100644 index 000000000..b3a4e0924 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml @@ -0,0 +1,81 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-opus-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 2 + 2? Think through this step by step."}]}],"temperature":1,"stream":false,"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:39:23 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:39:17Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '4000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '4000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:39:27Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:39:16Z' + Anthropic-Ratelimit-Tokens-Limit: + - '24000' + Anthropic-Ratelimit-Tokens-Remaining: + - '24000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:39:17Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: !binary |- + eyJpZCI6Im1zZ18wMTlDMnA3cVdudzQ3RFpBeXNyc2JlUGkiLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS1vcHVzLTQtMjAyNTA1MTQiLCJjb250ZW50IjpbeyJ0eXBlIjoidGhpbmtpbmciLCJ0aGlua2luZyI6IlRoaXMgaXMgYSB2ZXJ5IHNpbXBsZSBhcml0aG1ldGljIHF1ZXN0aW9uLiBUaGUgdXNlciBpcyBhc2tpbmcgbWUgdG8gYWRkIDIgKyAyLCBidXQgdGhleSd2ZSBhbHNvIGFza2VkIG1lIHRvIHRoaW5rIHRocm91Z2ggaXQgc3RlcCBieSBzdGVwLiBFdmVuIHRob3VnaCB0aGlzIGlzIGVsZW1lbnRhcnksIEkgc2hvdWxkIGhvbm9yIHRoZWlyIHJlcXVlc3QgdG8gc2hvdyB0aGUgc3RlcHMuXG5cbjIgKyAyID0gNFxuXG5JIGNhbiBleHBsYWluIHRoaXMgaW4gYSBmZXcgd2F5czpcbi0gQ291bnRpbmcgdXA6IFN0YXJ0aW5nIGF0IDIsIGNvdW50IHVwIDIgbW9yZTogMywgNFxuLSBHcm91cGluZzogSWYgeW91IGhhdmUgMiBpdGVtcyBhbmQgYWRkIDIgbW9yZSBpdGVtcywgeW91IGhhdmUgNCBpdGVtcyB0b3RhbFxuLSBOdW1iZXIgbGluZTogU3RhcnRpbmcgYXQgMiBvbiBhIG51bWJlciBsaW5lLCBtb3ZlIDIgc3BhY2VzIHRvIHRoZSByaWdodCwgbGFuZGluZyBvbiA0Iiwic2lnbmF0dXJlIjoiRW9vRkNrWUlCUmdDS2tBWEpSVXVJdTlXT00za1NySTl0WHFJRkxwZEhudHg2NWxuc1N3cmIzRU0vbjZpTFd2MmZTVDlFSERvWU1seEtaLzlWZGx3bm04bnhLSHFtYVVwb0NOakVnekJIWVREYlBzajNVQTZ5STRhRExhL2IxSGtaWE5QVmU3U2Z5SXc5OTlucm0zazN1azZkczlES3dSMmhQOFdrbHpGZnBoWHptOFZYdkNJK1AySHRLUmJmYncwc0hrNlFVTGdhQXVoS3ZFRHFITVovd3I0OTFEMHh4Y1hyL2JmRUxuYXFyUkhlbVZvZ2NMc2hlTWxWeDVNdUJURThLM3pYekZQZ1B1ZU9XeFNZb1FFcnVCUnQ4QzN1VjQwTTArMjFsZE5zYndwbEl4QWJhQ2s3dTdteHR2eTRPeFhzVllRTU05UVYwZzlyWFBEY1lFZzR1YTJ1N1B5RXZZTkNhRDZHWVZ2MFp2TU9mSGxxTm11T1FuZGRIYlRac1EreGhJeE5Cb1pwbWVQeU9uSFQwRXkvbUxxN0ppOWdBVlZ2QThlejcwODdlMjIxdW5sS0NMZFVGSjFPNlFWd2ZZTGRIZjBqYzNRR3l4ODlOWXVZamE2MUlRTTlReEJkekx5dUFaYmV4QjBTK3hreWVKdk9xNFA5N05CdjBWT29Zblc5bklPZW1rdDc2Sis5U3RBVnJjT01CRzdaa1ZtNWNPa0ZVS0VsT0o5SVI3SzFPWTZHd2dLbDJpcUFCLzBpcVB1Y01qZnhaQkV4OFRIcjVpMmtLeUtUbHFRK3k5WUpmeWlKcS9BRXMrRW5wTUJDNUxRK2I1dERud0lTeEV2eDNGcnpUNnE0WVcxMU8yVWgvVkdGS0RUUEY4NkZyUDVFaUJBNXFPVW9LUW55NWtlK2trbTM5MWo3Q0dXY1FHWmxkSkpEZnNhUHQ0a0x0WGdvcXR6ZWZjcE9FR1R5dGFRRWxidGhBWjFyaS91ZHh1Rk9FRnZyWm41VExnY0Zpd0lnd2xKamIrb2dnbHkxWmRJUGcrRFI3blVrSEllbHB4L2gvamhZR1dlM3drYXFUMEpINzZ5L3JGTkd0NHpZZ0dVMElwM2xydXdFS0NVbEtYVC90Q1lCZzhBL1N1TkNMNk9IUGFOVFRVaFZORVlBUT09In0seyJ0eXBlIjoidGV4dCIsInRleHQiOiJJJ2xsIHdvcmsgdGhyb3VnaCB0aGlzIGFkZGl0aW9uIHN0ZXAgYnkgc3RlcC5cblxuU3RhcnRpbmcgd2l0aDogMiArIDJcblxuU3RlcCAxOiBUYWtlIHRoZSBmaXJzdCBudW1iZXIsIHdoaWNoIGlzIDJcblN0ZXAgMjogQWRkIHRoZSBzZWNvbmQgbnVtYmVyLCB3aGljaCBpcyBhbHNvIDJcblN0ZXAgMzogQ291bnQgdXAgZnJvbSAyOiBcbi0gU3RhcnQgYXQgMlxuLSBBZGQgMSDihpIgZ2V0IDNcbi0gQWRkIDEgbW9yZSDihpIgZ2V0IDRcblxuVGhlcmVmb3JlOiAyICsgMiA9IDRcblxuQW5vdGhlciB3YXkgdG8gdGhpbmsgYWJvdXQgaXQ6IElmIHlvdSBoYXZlIDIgb2JqZWN0cyBhbmQgc29tZW9uZSBnaXZlcyB5b3UgMiBtb3JlIG9iamVjdHMsIHlvdSBub3cgaGF2ZSA0IG9iamVjdHMgdG90YWwuIn1dLCJzdG9wX3JlYXNvbiI6ImVuZF90dXJuIiwic3RvcF9zZXF1ZW5jZSI6bnVsbCwidXNhZ2UiOnsiaW5wdXRfdG9rZW5zIjo1MSwiY2FjaGVfY3JlYXRpb25faW5wdXRfdG9rZW5zIjowLCJjYWNoZV9yZWFkX2lucHV0X3Rva2VucyI6MCwib3V0cHV0X3Rva2VucyI6Mjk4LCJzZXJ2aWNlX3RpZXIiOiJzdGFuZGFyZCJ9fQ== + recorded_at: Fri, 18 Jul 2025 05:39:23 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml new file mode 100644 index 000000000..64f170189 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml @@ -0,0 +1,161 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-opus-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 5 + 3?"}]}],"temperature":1,"stream":false,"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:39:26 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:39:26Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '4000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '4000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:39:27Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:39:25Z' + Anthropic-Ratelimit-Tokens-Limit: + - '24000' + Anthropic-Ratelimit-Tokens-Remaining: + - '24000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:39:26Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01YXCT7MgRWQbxwwfs1bWhN2","type":"message","role":"assistant","model":"claude-opus-4-20250514","content":[{"type":"thinking","thinking":"This + is a simple arithmetic question. 5 + 3 = 8.","signature":"EtgBCkYIBRgCKkCyqtg4YSovHjJWjT5xWNBV0HDNY0NkeiSISwchPehu+JHqF14GKTlprSnmlk1ohL26KlGnQRhwg33jqkxTjsJiEgz7IAVT6nqF9r6eMC8aDDtFLpYkLlDKnJjnpSIwvHR9G483A2OajVNq3vWQr7SfmZ7p5CnDQNuZp/QkVIQMc8IGCOtLX15SWVC2HKeaKkAcQTrZsHVQM5K8hKFfSDAEngoOyzJ0kus67m+ETlZZL4r1WFKIc9VoOMlD0yej7XnEFDlG1Ck5oCzPH1qrNKD1GAE="},{"type":"text","text":"5 + + 3 = 8"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":44,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":39,"service_tier":"standard"}}' + recorded_at: Fri, 18 Jul 2025 05:39:26 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-opus-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 5 + 3?"}]},{"role":"assistant","content":[{"type":"text","text":"5 + 3 = 8"}]},{"role":"user","content":[{"type":"text","text":"Now + multiply that result by 2"}]}],"temperature":1,"stream":false,"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:39:29 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '20000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:39:28Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '4000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '4000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:39:29Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:39:28Z' + Anthropic-Ratelimit-Tokens-Limit: + - '24000' + Anthropic-Ratelimit-Tokens-Remaining: + - '24000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:39:28Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: !binary |- + eyJpZCI6Im1zZ18wMVdLY24zeGNYMlJ1Y2JieHdGdTQ0NWgiLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS1vcHVzLTQtMjAyNTA1MTQiLCJjb250ZW50IjpbeyJ0eXBlIjoidGhpbmtpbmciLCJ0aGlua2luZyI6IlRoZSBwcmV2aW91cyByZXN1bHQgd2FzIDggKGZyb20gNSArIDMpLiBOb3cgSSBuZWVkIHRvIG11bHRpcGx5IDggYnkgMi5cbjggw5cgMiA9IDE2Iiwic2lnbmF0dXJlIjoiRXZvQkNrWUlCUmdDS2tDSDBkcWh3Qi9rRUttajBUcHJlellxVmpuOHZES1FLQ3FwK2NNeFBCbHoxb096TFdrNDF0ZUFRYkJ0cnd3bkp4QXA4OUQ4VkdNemxIWEZIa21xYTZKTUVnd2w0MENqSExySDVmV2hxRGdhREptdzdDWXNPTUNUMnVhZDZ5SXdLcmhIb3N3dnNsUmE2UFMxYmZ4VVlhVTk2Rk92WSt2Y2QwRThWV1RYQUN3bHFqWHZkZFJ5d01qckJDbWpPenhZS21LL1N1dnM1dGo4ZWE3Wll0THdzODBYYXNrQ0E5OGw2N0JVVktYZys0cWpQRWZvNjYzZWJSRkRsZEpvR2F5L1ByUjJULzlVL1ZjZStvZGdleCtlSXB4KzcxWTBRdC95T2dJZ2NvbG9XckthaWpKemdFdkltd1RwVXNRWENjK2JJMUV0SUJnQiJ9LHsidHlwZSI6InRleHQiLCJ0ZXh0IjoiOCDDlyAyID0gMTYifV0sInN0b3BfcmVhc29uIjoiZW5kX3R1cm4iLCJzdG9wX3NlcXVlbmNlIjpudWxsLCJ1c2FnZSI6eyJpbnB1dF90b2tlbnMiOjY3LCJjYWNoZV9jcmVhdGlvbl9pbnB1dF90b2tlbnMiOjAsImNhY2hlX3JlYWRfaW5wdXRfdG9rZW5zIjowLCJvdXRwdXRfdG9rZW5zIjo1OSwic2VydmljZV90aWVyIjoic3RhbmRhcmQifX0= + recorded_at: Fri, 18 Jul 2025 05:39:29 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml new file mode 100644 index 000000000..f2d9ca256 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml @@ -0,0 +1,93 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 2 + 2? Think through this step by step."}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:39:01 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '30000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '30000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:38:55Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:39:03Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:38:55Z' + Anthropic-Ratelimit-Tokens-Limit: + - '38000' + Anthropic-Ratelimit-Tokens-Remaining: + - '38000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:38:55Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01ELjQtoVzofUkfT2MG4NnvY","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"thinking","thinking":"This + is a very basic arithmetic question. Let me think through it step by step + as requested.\n\n2 + 2\n\nThis is asking me to add the number 2 to itself.\n\nStep + 1: I start with the first number, which is 2.\nStep 2: I need to add 2 to + it.\nStep 3: 2 + 2 = 4\n\nI can think of this in several ways:\n- Counting: + If I have 2 items, and I add 2 more items, I have 4 items total\n- On a number + line: Starting at 2, if I move 2 spaces to the right, I land on 4\n- Basic + addition fact: This is one of the fundamental addition facts that 2 + 2 = + 4","signature":"ErkFCkYIBRgCKkAGD5ERRvwl/3LTfinLm7VjiqqC4SUrYGpZtWlYkibMcIFwHN/sTGd8t4tNZmDLjZrsHp4qLIeQ2ZaXrx/cdekJEgwdoMEt+uVUTc41uWkaDNgGvIHy6w6qICAlXiIwPii9Ia+JG/IRms/CawNXQ8IutTtFXpmc5cE0XCSXq3BEohpEaGHVUwzMbnG2KH5OKqAE/u5S+Nw+AsmuwpxOzd+CnFM6yvqYDBbRCZRDJqlWaffXEmeZp5066Yl9CePSoBqP7MXRbJE35kvwrfZZXuwFH88Xmkxv7WOZkL2L8keUXX9LamLnxrQthqSqivuhskIJUiup5gsUTQUztKOztDxcwEfGDOeBIVeZ8GRqXusIy2cZxQNF87fPvj712KyXkrUej05qAAmyLLlRF2U+e0Rmuq2uKVqifmk6uvL+qM/5nVxknMAlBgU3h8wPR0NYQdVGC8Y6qi+IDn5vahBSADCnraBXv/PeY/dLzyIq1WsBFbnOriSIYNhVhulBlGm6e3dIk6L54j1xT41d5RxFXYXZi5ROSAgyapLN0eX/XI+m6Fd09Y953n0YSSKlBiEJvet6ZB/9PD06oQf7zWrQ4Edqm0QBc97UZjPVZp/7aAbpfkSmYq3JkyT0YLUGbDOvEQTdaJLN5dlPkB5fmihZKA2DrqhOrI4+/bBtj4wO66M3wvnQqIBJkivzHVOqwJz4eJ7ZjZFw8dHTmb7/lEY8f3dn42kNwfY735j7vvrvpHCXw5ST3h4fW88/IHymeWtr0U7ohFMbzhxo2vY7nFah5wHd7RUqEOwtSlDvicXj4O0tvp/0cITCn6dqYCAk1fh1EBiw+OfLLRypTfFGSH5KV0Wb3TF3m944/KeSx8Jetmd2xVSBEwfhYolr9UacGENv8eoQAvMm1hMM5lBC+DGTLJZErRgB"},{"type":"text","text":"I''ll + work through this step by step:\n\n**Step 1:** Start with the first number: + 2\n\n**Step 2:** Add the second number: 2\n\n**Step 3:** Combine them: 2 + + 2\n\n**Step 4:** Calculate the sum: 4\n\nSo 2 + 2 = 4.\n\nYou can think of + this as having 2 objects, then adding 2 more objects, which gives you 4 objects + total."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":51,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":298,"service_tier":"standard"}}' + recorded_at: Fri, 18 Jul 2025 05:39:01 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml new file mode 100644 index 000000000..fa8068eab --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml @@ -0,0 +1,161 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 5 + 3?"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:39:03 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '30000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '30000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:39:03Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:39:03Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:39:03Z' + Anthropic-Ratelimit-Tokens-Limit: + - '38000' + Anthropic-Ratelimit-Tokens-Remaining: + - '38000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:39:03Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01BzuwUNCKCLie4m3CUR9sRr","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"thinking","thinking":"This + is a simple addition problem. 5 + 3 = 8.","signature":"EtUBCkYIBRgCKkBTKgGEUioRSsJoxhuzo1AT0IuxJmwIxAkRRNe15z10h5e8CMTYppvz7xt2wwXONyqpRoWYmGBQfSlspsv8kJpBEgzBiQiuQU2am3MN3YQaDJow1tAgVZRXRLLi3CIw67uUejmUfdAYNzynxnnHwd6Ba6C4OgDvQ8yqX7Au6xBHmHFYDhUYzbinIo/WSGLjKj0jXruNlLkGAoGxJ0IGvpuX0979sQApiBQaoBJFXMoa4ryX9E77kbLyprbcwg76uE6yCTbh41h0yxU/G+/BGAE="},{"type":"text","text":"5 + + 3 = 8"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":44,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":39,"service_tier":"standard"}}' + recorded_at: Fri, 18 Jul 2025 05:39:03 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s + 5 + 3?"}]},{"role":"assistant","content":[{"type":"text","text":"5 + 3 = 8"}]},{"role":"user","content":[{"type":"text","text":"Now + multiply that result by 2"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}' + headers: + User-Agent: + - Faraday v2.13.2 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 18 Jul 2025 05:39:14 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '30000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '30000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-07-18T05:39:14Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '8000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-07-18T05:39:15Z' + Anthropic-Ratelimit-Requests-Limit: + - '50' + Anthropic-Ratelimit-Requests-Remaining: + - '49' + Anthropic-Ratelimit-Requests-Reset: + - '2025-07-18T05:39:05Z' + Anthropic-Ratelimit-Tokens-Limit: + - '38000' + Anthropic-Ratelimit-Tokens-Remaining: + - '38000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-07-18T05:39:14Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: !binary |- + eyJpZCI6Im1zZ18wMThabnY0d1BGYWtWeHJFaUs5Yjd2a3ciLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS1zb25uZXQtNC0yMDI1MDUxNCIsImNvbnRlbnQiOlt7InR5cGUiOiJ0aGlua2luZyIsInRoaW5raW5nIjoiVGhlIHVzZXIgYXNrZWQgbWUgdG8gbXVsdGlwbHkgdGhlIHJlc3VsdCBmcm9tIHRoZSBwcmV2aW91cyBjYWxjdWxhdGlvbiBieSAyLiBUaGUgcHJldmlvdXMgcmVzdWx0IHdhcyA4LCBzbyBJIG5lZWQgdG8gY2FsY3VsYXRlIDggw5cgMi5cblxuOCDDlyAyID0gMTYiLCJzaWduYXR1cmUiOiJFcndDQ2tZSUJSZ0NLa0J4TGdyNWdYVTVyOEJJN000cXVkUFRsdmdUaTBheGFFRWZLdmttaWxkMFQxYjBCajB6cHB6UUc4WE1GU1gyOC91WldnL21QM1UyK1JGZy9PRmpON2RjRWd5ZW0xMDMvbXg1UENhN1QrUWFESGEvVk5oR2RBd2ZwMEc1ZFNJd1lKdU5MbXNBMHptR01IZy8wNjFpZWVxcXRiN3lKQVkxSGE0NU5aVElDZ1M5SkQ1NC9lQ2dJMnIvVlJxckZQbHdLcU1CWDltenU3UU12Q3pXNWJrMjhPRVp5MDA4TS9LSVVqZkZNWHZlSE1DbzFMWWJUaWxFLzBPbEJ5RFdpRVNQRzVPVTE5R3Q3SWpqT3dwUkFSaERGcnNJSTJiNE9WeHhXMzdZc0x2RG1vclFodkdxYTBHQWFYNjQ5dUVFNWkxZExzQURUdTN4a3lWZGRsZGJKeXYyQ290TllNR0JBNW9nNk1FRlpNNVlxUlJCcmdSUlBpVDNFaUNZK2R4TWxKRjRmejZ1U3lHeFRvaktMTEM5L211c0NrYitWQ09vZ2hnQiJ9LHsidHlwZSI6InRleHQiLCJ0ZXh0IjoiOCDDlyAyID0gMTYifV0sInN0b3BfcmVhc29uIjoiZW5kX3R1cm4iLCJzdG9wX3NlcXVlbmNlIjpudWxsLCJ1c2FnZSI6eyJpbnB1dF90b2tlbnMiOjY3LCJjYWNoZV9jcmVhdGlvbl9pbnB1dF90b2tlbnMiOjAsImNhY2hlX3JlYWRfaW5wdXRfdG9rZW5zIjowLCJvdXRwdXRfdG9rZW5zIjo2Niwic2VydmljZV90aWVyIjoic3RhbmRhcmQifX0= + recorded_at: Fri, 18 Jul 2025 05:39:14 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/ruby_llm/chat_thinking_spec.rb b/spec/ruby_llm/chat_thinking_spec.rb new file mode 100644 index 000000000..4f968f25e --- /dev/null +++ b/spec/ruby_llm/chat_thinking_spec.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Chat do + include_context 'with configured RubyLLM' + + describe 'thinking mode functionality' do + describe '#with_thinking' do + context 'with thinking-capable models' do # rubocop:disable RSpec/NestedGroups + THINKING_MODELS.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + + it "#{provider}/#{model} enables thinking mode successfully" do # rubocop:disable RSpec/MultipleExpectations + chat = RubyLLM.chat(model: model, provider: provider) + + expect { chat.with_thinking }.not_to raise_error + expect(chat.instance_variable_get(:@thinking)).to be true + expect(chat.instance_variable_get(:@temperature)).to eq 1 + end + + it "#{provider}/#{model} accepts custom thinking parameters" do # rubocop:disable RSpec/MultipleExpectations + chat = RubyLLM.chat(model: model, provider: provider) + + chat.with_thinking(budget: 20_000, temperature: 0.8) + + expect(chat.instance_variable_get(:@thinking)).to be true + expect(chat.instance_variable_get(:@thinking_budget)).to eq 20_000 + expect(chat.instance_variable_get(:@temperature)).to eq 0.8 + end + + it "#{provider}/#{model} can disable thinking mode" do + chat = RubyLLM.chat(model: model, provider: provider) + + chat.with_thinking(thinking: false) + + expect(chat.instance_variable_get(:@thinking)).to be false + end + + it "#{provider}/#{model} can chain with other methods" do # rubocop:disable RSpec/MultipleExpectations + chat = RubyLLM.chat(model: model, provider: provider) + + result = chat.with_thinking.with_temperature(0.5) + + expect(result).to be_a(described_class) + expect(chat.instance_variable_get(:@thinking)).to be true + # Temperature should be overridden by the subsequent with_temperature call + expect(chat.instance_variable_get(:@temperature)).to eq 0.5 + end + end + end + + context 'with non-thinking models' do # rubocop:disable RSpec/NestedGroups + NON_THINKING_MODELS.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + + it "#{provider}/#{model} raises UnsupportedThinkingError when enabling thinking" do + chat = RubyLLM.chat(model: model, provider: provider) + + expect { chat.with_thinking }.to raise_error(RubyLLM::UnsupportedThinkingError) + end + + it "#{provider}/#{model} allows disabling thinking without error" do # rubocop:disable RSpec/MultipleExpectations + chat = RubyLLM.chat(model: model, provider: provider) + + expect { chat.with_thinking(thinking: false) }.not_to raise_error + expect(chat.instance_variable_get(:@thinking)).to be false + end + end + end + end + + describe 'thinking mode integration with chat' do + THINKING_MODELS.each do |model_info| + model = model_info[:model] + provider = model_info[:provider] + + it "#{provider}/#{model} can handle basic conversation with thinking enabled" do # rubocop:disable RSpec/MultipleExpectations, RSpec/ExampleLength + chat = RubyLLM.chat(model: model, provider: provider) + chat.with_thinking + + response = chat.ask("What's 2 + 2? Think through this step by step.") + + expect(response.content).to be_present + expect(response.thinking).to be_present + expect(response.role).to eq(:assistant) + expect(response.input_tokens).to be_positive + expect(response.output_tokens).to be_positive + end + + it "#{provider}/#{model} maintains thinking mode across multiple turns" do # rubocop:disable RSpec/MultipleExpectations, RSpec/ExampleLength + chat = RubyLLM.chat(model: model, provider: provider) + chat.with_thinking + + first = chat.ask("What's 5 + 3?") + expect(first.content).to include('8') + + second = chat.ask('Now multiply that result by 2') + expect(second.content).to include('16') + + # Thinking mode should still be enabled + expect(chat.instance_variable_get(:@thinking)).to be true + end + end + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index f3ac86c06..5d2d6bc73 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -160,3 +160,14 @@ AUDIO_MODELS = [ { provider: :openai, model: 'gpt-4o-mini-audio-preview' } ].freeze + +THINKING_MODELS = [ + { model: 'claude-3-7-sonnet', provider: 'anthropic' }, + { model: 'claude-sonnet-4', provider: 'anthropic' }, + { model: 'claude-opus-4', provider: 'anthropic' } +].freeze + +NON_THINKING_MODELS = [ + { model: 'claude-3-haiku', provider: 'anthropic' }, + { model: 'claude-3-sonnet', provider: 'anthropic' } +].freeze From 06daa1cd0a1daa5c8eb07f21b744cc8050e8e517 Mon Sep 17 00:00:00 2001 From: Rhys Murray Date: Fri, 18 Jul 2025 16:30:50 +1000 Subject: [PATCH 16/16] bug: ensure render_payload args compatibility across all providers - use ** instead of using including unused parameters --- lib/ruby_llm/providers/bedrock/chat.rb | 2 +- lib/ruby_llm/providers/gemini/chat.rb | 2 +- lib/ruby_llm/providers/openai/chat.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index d6ae8139a..9d5486d4a 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -39,7 +39,7 @@ def completion_url "model/#{@model_id}/invoke" end - def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, **) # rubocop:disable Metrics/ParameterLists # Hold model_id in instance variable for use in completion_url and stream_url @model_id = model diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index fcb8eaa0e..bb0bd5258 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -11,7 +11,7 @@ def completion_url "models/#{@model}:generateContent" end - def render_payload(messages, tools:, temperature:, model:, stream: false, **) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, **) @model = model # Store model for completion_url/stream_url payload = { contents: format_messages(messages), diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index 697442b2f..b33cfffcb 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -11,7 +11,7 @@ def completion_url module_function - def render_payload(messages, tools:, temperature:, model:, stream: false) + def render_payload(messages, tools:, temperature:, model:, stream: false, **) # rubocop:disable Metrics/ParameterLists payload = { model: model, messages: format_messages(messages),