From 81ed7123c597a74283a8e5b01deb76868fbf7de3 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Wed, 14 May 2025 22:40:58 +1000
Subject: [PATCH 01/16] feat: wip - add thinking content to messages

---
 lib/ruby_llm/message.rb                  |  3 ++-
 lib/ruby_llm/model_info.rb               |  7 ++++++-
 lib/ruby_llm/models.json                 |  1 +
 lib/ruby_llm/providers/anthropic/chat.rb | 21 +++++++++++++++++----
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
index 6e753b9b1..d0e2c4508 100644
--- a/lib/ruby_llm/message.rb
+++ b/lib/ruby_llm/message.rb
@@ -7,11 +7,12 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking_content
 
     def initialize(options = {})
       @role = options[:role].to_sym
       @content = normalize_content(options[:content])
+      @thinking_content = options[:thinking_content]
       @tool_calls = options[:tool_calls]
       @input_tokens = options[:input_tokens]
       @output_tokens = options[:output_tokens]
diff --git a/lib/ruby_llm/model_info.rb b/lib/ruby_llm/model_info.rb
index 5cd95801d..d5bc19127 100644
--- a/lib/ruby_llm/model_info.rb
+++ b/lib/ruby_llm/model_info.rb
@@ -12,7 +12,7 @@ module RubyLLM
   #   model.input_price_per_million   # => 30.0
   class ModelInfo
     attr_reader :id, :name, :provider, :family, :created_at, :context_window, :max_output_tokens, :knowledge_cutoff,
-                :modalities, :capabilities, :pricing, :metadata
+                :modalities, :capabilities, :pricing, :metadata, :thinking
 
     def initialize(data)
       @id = data[:id]
@@ -22,6 +22,7 @@ def initialize(data)
       @created_at = data[:created_at]
       @context_window = data[:context_window]
       @max_output_tokens = data[:max_output_tokens]
+      @thinking = data[:thinking]
       @knowledge_cutoff = data[:knowledge_cutoff]
       @modalities = Modalities.new(data[:modalities] || {})
       @capabilities = data[:capabilities] || []
@@ -57,6 +58,10 @@ def supports_functions?
       function_calling?
     end
 
+    def supports_thinking?
+      thinking
+    end
+
     def input_price_per_million
       pricing.text_tokens.input
     end
diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
index b8046969c..15ae6d1da 100644
--- a/lib/ruby_llm/models.json
+++ b/lib/ruby_llm/models.json
@@ -297,6 +297,7 @@
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 64000,
+    "thinking": true,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
index 2ba96009d..7320d8b76 100644
--- a/lib/ruby_llm/providers/anthropic/chat.rb
+++ b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -39,9 +39,13 @@ def build_base_payload(chat_messages, temperature, model, stream)
           {
             model: model,
             messages: chat_messages.map { |msg| format_message(msg) },
-            temperature: temperature,
+            temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking
             stream: stream,
-            max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
+            max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096,
+            thinking: {
+              type: RubyLLM.models.find(model)&.supports_thinking? ? 'enabled' : 'disabled', # TODO: Make this configurable
+              budget_tokens: 1024 # TODO: Make this configurable
+            }
           }
         end
 
@@ -52,12 +56,20 @@ def add_optional_fields(payload, system_content:, tools:)
 
         def parse_completion_response(response)
           data = response.body
+          RubyLLM.logger.debug("Anthropic response: #{data}")
+
           content_blocks = data['content'] || []
 
+          thinking_content = extract_thinking_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use = Tools.find_tool_use(content_blocks)
 
-          build_message(data, text_content, tool_use)
+          build_message(data, text_content, tool_use, thinking_content)
+        end
+
+        def extract_thinking_content(blocks)
+          thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
+          thinking_blocks.map { |c| c['thinking'] }.join
         end
 
         def extract_text_content(blocks)
@@ -65,10 +77,11 @@ def extract_text_content(blocks)
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use)
+        def build_message(data, content, tool_use, thinking_content)
           Message.new(
             role: :assistant,
             content: content,
+            thinking_content: thinking_content,
             tool_calls: Tools.parse_tool_calls(tool_use),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),

From b6e1bb039ba2fe44aff569b6e3c2672000001ed5 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 6 Jun 2025 19:06:04 +1000
Subject: [PATCH 02/16] chore: add thinking to capabilities

---
 lib/ruby_llm/models.json | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
index 6ef13ef1e..002cc7381 100644
--- a/lib/ruby_llm/models.json
+++ b/lib/ruby_llm/models.json
@@ -162,7 +162,8 @@
       "output": []
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -287,7 +288,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -319,7 +321,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -9512,7 +9515,8 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {

From ecb69c9c655d70e268ab5dcc7c3ae179f61484f9 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 6 Jun 2025 19:48:35 +1000
Subject: [PATCH 03/16] chore: pass thinking through from chat initialisation

---
 lib/ruby_llm/chat.rb                     | 12 ++++++++++--
 lib/ruby_llm/model/info.rb               |  2 +-
 lib/ruby_llm/provider.rb                 |  3 ++-
 lib/ruby_llm/providers/anthropic/chat.rb | 18 ++++++++++--------
 lib/ruby_llm/providers/bedrock/chat.rb   |  2 +-
 lib/ruby_llm/providers/gemini/chat.rb    |  2 +-
 lib/ruby_llm/providers/openai/chat.rb    |  2 +-
 7 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index 3b5bfa83a..b56c27c7f 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -13,7 +13,7 @@ class Chat
 
     attr_reader :model, :messages, :tools
 
-    def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil)
+    def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil, thinking: false)
       if assume_model_exists && !provider
         raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
       end
@@ -22,6 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @config = context&.config || RubyLLM.config
       model_id = model || @config.default_model
       with_model(model_id, provider: provider, assume_exists: assume_model_exists)
+      @thinking = thinking
       @temperature = 0.7
       @messages = []
       @tools = {}
@@ -60,9 +61,15 @@ def with_tools(*tools)
       self
     end
 
-    def with_model(model_id, provider: nil, assume_exists: false)
+    def with_model(model_id, provider: nil, thinking: nil, assume_exists: false)
       @model, @provider = Models.resolve(model_id, provider:, assume_exists:)
       @connection = @context ? @context.connection_for(@provider) : @provider.connection(@config)
+      
+      # Preserve thinking state from initialization
+      unless thinking.nil?
+        @thinking = thinking
+      end
+      
       self
     end
 
@@ -99,6 +106,7 @@ def complete(&)
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
+        thinking: @thinking,
         connection: @connection,
         &
       )
diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb
index 9c72bcdf3..cdee5a2aa 100644
--- a/lib/ruby_llm/model/info.rb
+++ b/lib/ruby_llm/model/info.rb
@@ -35,7 +35,7 @@ def supports?(capability)
         capabilities.include?(capability.to_s)
       end
 
-      %w[function_calling structured_output batch reasoning citations streaming].each do |cap|
+      %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
         define_method "#{cap}?" do
           supports?(cap)
         end
diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
index 2b09cdee1..fe6854f6b 100644
--- a/lib/ruby_llm/provider.rb
+++ b/lib/ruby_llm/provider.rb
@@ -10,13 +10,14 @@ module Provider
     module Methods
       extend Streaming
 
-      def complete(messages, tools:, temperature:, model:, connection:, &)
+      def complete(messages, tools:, temperature:, model:, thinking:, connection:, &)
         normalized_temperature = maybe_normalize_temperature(temperature, model)
 
         payload = render_payload(messages,
                                  tools: tools,
                                  temperature: normalized_temperature,
                                  model: model,
+                                 thinking: thinking,
                                  stream: block_given?)
 
         if block_given?
diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
index 7320d8b76..0efe4b2d2 100644
--- a/lib/ruby_llm/providers/anthropic/chat.rb
+++ b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -11,12 +11,12 @@ def completion_url
           '/v1/messages'
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:)
+            add_optional_fields(payload, system_content:, tools:, thinking:)
           end
         end
 
@@ -41,17 +41,19 @@ def build_base_payload(chat_messages, temperature, model, stream)
             messages: chat_messages.map { |msg| format_message(msg) },
             temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking
             stream: stream,
-            max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096,
-            thinking: {
-              type: RubyLLM.models.find(model)&.supports_thinking? ? 'enabled' : 'disabled', # TODO: Make this configurable
-              budget_tokens: 1024 # TODO: Make this configurable
-            }
+            max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end
 
-        def add_optional_fields(payload, system_content:, tools:)
+        def add_optional_fields(payload, system_content:, tools:, thinking:)
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
+          if thinking
+            payload[:thinking] = {
+              type: 'enabled',
+              budget_tokens: 1024, # TODO: default
+            }
+          end
         end
 
         def parse_completion_response(response)
diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
index 742579558..bb942338b 100644
--- a/lib/ruby_llm/providers/bedrock/chat.rb
+++ b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -39,7 +39,7 @@ def completion_url
           "model/#{@model_id}/invoke"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
 
diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
index d6ba1696f..7bb794911 100644
--- a/lib/ruby_llm/providers/gemini/chat.rb
+++ b/lib/ruby_llm/providers/gemini/chat.rb
@@ -11,7 +11,7 @@ def completion_url
           "models/#{@model}:generateContent"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),
diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
index 545cc9bee..ffcd8dcf6 100644
--- a/lib/ruby_llm/providers/openai/chat.rb
+++ b/lib/ruby_llm/providers/openai/chat.rb
@@ -11,7 +11,7 @@ def completion_url
 
         module_function
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
           {
             model: model,
             messages: format_messages(messages),

From a014b7746ecfdabe9d1424771e80d6c2681bd046 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 6 Jun 2025 19:57:00 +1000
Subject: [PATCH 04/16] chore: add very basic config for thinking budget
 through global configuration

---
 lib/ruby_llm/configuration.rb            | 2 ++
 lib/ruby_llm/providers/anthropic/chat.rb | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
index 0fd3bf23c..6bdf465ed 100644
--- a/lib/ruby_llm/configuration.rb
+++ b/lib/ruby_llm/configuration.rb
@@ -28,6 +28,7 @@ class Configuration
                   :default_model,
                   :default_embedding_model,
                   :default_image_model,
+                  :default_thinking_budget
                   # Connection configuration
                   :request_timeout,
                   :max_retries,
@@ -53,6 +54,7 @@ def initialize
       @default_model = 'gpt-4.1-nano'
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
+      @default_thinking_budget = 1024
 
       # Logging configuration
       @log_file = $stdout
diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
index 0efe4b2d2..cbf3b75d0 100644
--- a/lib/ruby_llm/providers/anthropic/chat.rb
+++ b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -51,7 +51,7 @@ def add_optional_fields(payload, system_content:, tools:, thinking:)
           if thinking
             payload[:thinking] = {
               type: 'enabled',
-              budget_tokens: 1024, # TODO: default
+              budget_tokens: RubyLLM.configuration.default_thinking_budget || 1024,
             }
           end
         end

From ddb0ae1c697b20191790d601d4a19ca07fe5423c Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 6 Jun 2025 20:35:38 +1000
Subject: [PATCH 05/16] bug: fix config missing comma

---
 lib/ruby_llm/configuration.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
index 6bdf465ed..4401548a8 100644
--- a/lib/ruby_llm/configuration.rb
+++ b/lib/ruby_llm/configuration.rb
@@ -28,7 +28,7 @@ class Configuration
                   :default_model,
                   :default_embedding_model,
                   :default_image_model,
-                  :default_thinking_budget
+                  :default_thinking_budget,
                   # Connection configuration
                   :request_timeout,
                   :max_retries,

From 6d66491acc1c66128a9b0d5ef3b7797f099d420e Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 6 Jun 2025 20:35:56 +1000
Subject: [PATCH 06/16] chore: add streaming content

---
 lib/ruby_llm/providers/anthropic/chat.rb      | 2 +-
 lib/ruby_llm/providers/anthropic/streaming.rb | 1 +
 lib/ruby_llm/stream_accumulator.rb            | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
index cbf3b75d0..459a825a9 100644
--- a/lib/ruby_llm/providers/anthropic/chat.rb
+++ b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -51,7 +51,7 @@ def add_optional_fields(payload, system_content:, tools:, thinking:)
           if thinking
             payload[:thinking] = {
               type: 'enabled',
-              budget_tokens: RubyLLM.configuration.default_thinking_budget || 1024,
+              budget_tokens: RubyLLM.config.default_thinking_budget || 1024,
             }
           end
         end
diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb
index 3bf842150..a1d5c44f1 100644
--- a/lib/ruby_llm/providers/anthropic/streaming.rb
+++ b/lib/ruby_llm/providers/anthropic/streaming.rb
@@ -16,6 +16,7 @@ def build_chunk(data)
             role: :assistant,
             model_id: extract_model_id(data),
             content: data.dig('delta', 'text'),
+            thinking_content: data.dig('delta', 'thinking'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             tool_calls: extract_tool_calls(data)
diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb
index 7fca306a2..233ce9cae 100644
--- a/lib/ruby_llm/stream_accumulator.rb
+++ b/lib/ruby_llm/stream_accumulator.rb
@@ -9,6 +9,7 @@ class StreamAccumulator
 
     def initialize
       @content = String.new
+      @thinking_content = String.new
       @tool_calls = {}
       @input_tokens = 0
       @output_tokens = 0
@@ -23,6 +24,7 @@ def add(chunk)
         accumulate_tool_calls chunk.tool_calls
       else
         @content << (chunk.content || '')
+        @thinking_content << (chunk.thinking_content || '')
       end
 
       count_tokens chunk
@@ -33,6 +35,7 @@ def to_message
       Message.new(
         role: :assistant,
         content: content.empty? ? nil : content,
+        thinking_content: @thinking_content.empty? ? nil : @thinking_content,
         model_id: model_id,
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,

From 7da672e7dd8058ef7c9e6687d6bb4a8a6a0511a6 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 6 Jun 2025 22:57:34 +1000
Subject: [PATCH 07/16] chore: rename to use existing reasoning capability

---
 lib/ruby_llm/chat.rb                          | 29 +++++++++++-------
 lib/ruby_llm/configuration.rb                 |  4 +--
 lib/ruby_llm/error.rb                         |  1 +
 lib/ruby_llm/message.rb                       |  4 +--
 lib/ruby_llm/model/info.rb                    |  2 +-
 lib/ruby_llm/models.json                      |  6 ++--
 lib/ruby_llm/provider.rb                      |  4 +--
 lib/ruby_llm/providers/anthropic/chat.rb      | 30 +++++++++----------
 lib/ruby_llm/providers/anthropic/streaming.rb |  2 +-
 lib/ruby_llm/providers/bedrock/chat.rb        |  2 +-
 lib/ruby_llm/providers/gemini/chat.rb         |  2 +-
 lib/ruby_llm/providers/openai/chat.rb         |  2 +-
 lib/ruby_llm/stream_accumulator.rb            |  6 ++--
 13 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index b56c27c7f..ad77d01cf 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -13,7 +13,7 @@ class Chat
 
     attr_reader :model, :messages, :tools
 
-    def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil, thinking: false)
+    def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil)
       if assume_model_exists && !provider
         raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
       end
@@ -22,7 +22,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @config = context&.config || RubyLLM.config
       model_id = model || @config.default_model
       with_model(model_id, provider: provider, assume_exists: assume_model_exists)
-      @thinking = thinking
+      @reasoning = false
       @temperature = 0.7
       @messages = []
       @tools = {}
@@ -61,15 +61,11 @@ def with_tools(*tools)
       self
     end
 
-    def with_model(model_id, provider: nil, thinking: nil, assume_exists: false)
+    def with_model(model_id, provider: nil, assume_exists: false)
       @model, @provider = Models.resolve(model_id, provider:, assume_exists:)
       @connection = @context ? @context.connection_for(@provider) : @provider.connection(@config)
-      
-      # Preserve thinking state from initialization
-      unless thinking.nil?
-        @thinking = thinking
-      end
-      
+      # TODO: Currently the unsupported errors will not retrigger after model reassignment.
+
       self
     end
 
@@ -78,6 +74,15 @@ def with_temperature(temperature)
       self
     end
 
+    def with_reasoning(reasoning = true)
+      if reasoning && !@model.reasoning?
+        raise UnsupportedReasoningError, "Model #{@model.id} doesn't support reasoning"
+      end
+
+      @reasoning = reasoning
+      self
+    end
+
     def with_context(context)
       @context = context
       @config = context.config
@@ -106,7 +111,7 @@ def complete(&)
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
-        thinking: @thinking,
+        reasoning: @reasoning,
         connection: @connection,
         &
       )
@@ -130,6 +135,10 @@ def reset_messages!
       @messages.clear
     end
 
+    def thinking?
+      @thinking
+    end
+
     private
 
     def handle_tool_calls(response, &)
diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
index 4401548a8..06de5bb97 100644
--- a/lib/ruby_llm/configuration.rb
+++ b/lib/ruby_llm/configuration.rb
@@ -28,7 +28,7 @@ class Configuration
                   :default_model,
                   :default_embedding_model,
                   :default_image_model,
-                  :default_thinking_budget,
+                  :default_reasoning_budget,
                   # Connection configuration
                   :request_timeout,
                   :max_retries,
@@ -54,7 +54,7 @@ def initialize
       @default_model = 'gpt-4.1-nano'
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
-      @default_thinking_budget = 1024
+      @default_reasoning_budget = 1024
 
       # Logging configuration
       @log_file = $stdout
diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb
index 228053a5e..102ec3b79 100644
--- a/lib/ruby_llm/error.rb
+++ b/lib/ruby_llm/error.rb
@@ -25,6 +25,7 @@ class InvalidRoleError < StandardError; end
   class ModelNotFoundError < StandardError; end
   class UnsupportedFunctionsError < StandardError; end
   class UnsupportedAttachmentError < StandardError; end
+  class UnsupportedReasoningError < StandardError; end
 
   # Error classes for different HTTP status codes
   class BadRequestError < Error; end
diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
index f7b07bce7..5624920e2 100644
--- a/lib/ruby_llm/message.rb
+++ b/lib/ruby_llm/message.rb
@@ -7,12 +7,12 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking_content
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :reasoning_content
 
     def initialize(options = {})
       @role = options.fetch(:role).to_sym
       @content = normalize_content(options.fetch(:content))
-      @thinking_content = options[:thinking_content]
+      @reasoning_content = options[:reasoning_content]
       @tool_calls = options[:tool_calls]
       @input_tokens = options[:input_tokens]
       @output_tokens = options[:output_tokens]
diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb
index cdee5a2aa..9c72bcdf3 100644
--- a/lib/ruby_llm/model/info.rb
+++ b/lib/ruby_llm/model/info.rb
@@ -35,7 +35,7 @@ def supports?(capability)
         capabilities.include?(capability.to_s)
       end
 
-      %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
+      %w[function_calling structured_output batch reasoning citations streaming].each do |cap|
         define_method "#{cap}?" do
           supports?(cap)
         end
diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
index 002cc7381..4a0cf19e3 100644
--- a/lib/ruby_llm/models.json
+++ b/lib/ruby_llm/models.json
@@ -163,7 +163,7 @@
     },
     "capabilities": [
       "function_calling",
-      "thinking"
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -289,7 +289,7 @@
     },
     "capabilities": [
       "function_calling",
-      "thinking"
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -322,7 +322,7 @@
     },
     "capabilities": [
       "function_calling",
-      "thinking"
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
index fe6854f6b..64a9fd1f9 100644
--- a/lib/ruby_llm/provider.rb
+++ b/lib/ruby_llm/provider.rb
@@ -10,14 +10,14 @@ module Provider
     module Methods
       extend Streaming
 
-      def complete(messages, tools:, temperature:, model:, thinking:, connection:, &)
+      def complete(messages, tools:, temperature:, model:, reasoning:, connection:, &) # rubocop:disable Metrics/ParameterLists
         normalized_temperature = maybe_normalize_temperature(temperature, model)
 
         payload = render_payload(messages,
                                  tools: tools,
                                  temperature: normalized_temperature,
                                  model: model,
-                                 thinking: thinking,
+                                 reasoning: reasoning,
                                  stream: block_given?)
 
         if block_given?
diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
index 459a825a9..d24e799aa 100644
--- a/lib/ruby_llm/providers/anthropic/chat.rb
+++ b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -11,12 +11,12 @@ def completion_url
           '/v1/messages'
         end
 
-        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, reasoning:, stream: false) # rubocop:disable Metrics/ParameterLists
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:, thinking:)
+            add_optional_fields(payload, system_content:, tools:, reasoning:)
           end
         end
 
@@ -39,21 +39,21 @@ def build_base_payload(chat_messages, temperature, model, stream)
           {
             model: model,
             messages: chat_messages.map { |msg| format_message(msg) },
-            temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking
+            temperature: temperature,
             stream: stream,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end
 
-        def add_optional_fields(payload, system_content:, tools:, thinking:)
+        def add_optional_fields(payload, system_content:, tools:, reasoning:)
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
-          if thinking
-            payload[:thinking] = {
-              type: 'enabled',
-              budget_tokens: RubyLLM.config.default_thinking_budget || 1024,
-            }
-          end
+          return unless reasoning
+
+          payload[:thinking] = {
+            type: 'enabled',
+            budget_tokens: RubyLLM.config.default_reasoning_budget || 1024
+          }
         end
 
         def parse_completion_response(response)
@@ -62,14 +62,14 @@ def parse_completion_response(response)
 
           content_blocks = data['content'] || []
 
-          thinking_content = extract_thinking_content(content_blocks)
+          reasoning_content = extract_reasoning_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use = Tools.find_tool_use(content_blocks)
 
-          build_message(data, text_content, tool_use, thinking_content)
+          build_message(data, text_content, tool_use, reasoning_content)
         end
 
-        def extract_thinking_content(blocks)
+        def extract_reasoning_content(blocks)
           thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
           thinking_blocks.map { |c| c['thinking'] }.join
         end
@@ -79,11 +79,11 @@ def extract_text_content(blocks)
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use, thinking_content)
+        def build_message(data, content, tool_use, reasoning_content)
           Message.new(
             role: :assistant,
             content: content,
-            thinking_content: thinking_content,
+            reasoning_content: reasoning_content,
             tool_calls: Tools.parse_tool_calls(tool_use),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),
diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb
index a1d5c44f1..6ebb07d73 100644
--- a/lib/ruby_llm/providers/anthropic/streaming.rb
+++ b/lib/ruby_llm/providers/anthropic/streaming.rb
@@ -16,7 +16,7 @@ def build_chunk(data)
             role: :assistant,
             model_id: extract_model_id(data),
             content: data.dig('delta', 'text'),
-            thinking_content: data.dig('delta', 'thinking'),
+            reasoning_content: data.dig('delta', 'thinking'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             tool_calls: extract_tool_calls(data)
diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
index bb942338b..62c33683a 100644
--- a/lib/ruby_llm/providers/bedrock/chat.rb
+++ b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -39,7 +39,7 @@ def completion_url
           "model/#{@model_id}/invoke"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
 
diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
index 7bb794911..5a18546aa 100644
--- a/lib/ruby_llm/providers/gemini/chat.rb
+++ b/lib/ruby_llm/providers/gemini/chat.rb
@@ -11,7 +11,7 @@ def completion_url
           "models/#{@model}:generateContent"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),
diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
index ffcd8dcf6..8170b3849 100644
--- a/lib/ruby_llm/providers/openai/chat.rb
+++ b/lib/ruby_llm/providers/openai/chat.rb
@@ -11,7 +11,7 @@ def completion_url
 
         module_function
 
-        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           {
             model: model,
             messages: format_messages(messages),
diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb
index 233ce9cae..5f3bb0e7e 100644
--- a/lib/ruby_llm/stream_accumulator.rb
+++ b/lib/ruby_llm/stream_accumulator.rb
@@ -9,7 +9,7 @@ class StreamAccumulator
 
     def initialize
       @content = String.new
-      @thinking_content = String.new
+      @reasoning_content = String.new
       @tool_calls = {}
       @input_tokens = 0
       @output_tokens = 0
@@ -24,7 +24,7 @@ def add(chunk)
         accumulate_tool_calls chunk.tool_calls
       else
         @content << (chunk.content || '')
-        @thinking_content << (chunk.thinking_content || '')
+        @reasoning_content << (chunk.reasoning_content || '')
       end
 
       count_tokens chunk
@@ -35,7 +35,7 @@ def to_message
       Message.new(
         role: :assistant,
         content: content.empty? ? nil : content,
-        thinking_content: @thinking_content.empty? ? nil : @thinking_content,
+        reasoning_content: @reasoning_content.empty? ? nil : @reasoning_content,
         model_id: model_id,
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,

From 6b4fb8372bb9f385b3e9272cba8bf69f716295e1 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Sun, 22 Jun 2025 21:51:24 +1000
Subject: [PATCH 08/16] chore: rename to thinking

---
 lib/ruby_llm/chat.rb                          | 21 ++++++++++++-------
 lib/ruby_llm/configuration.rb                 | 10 +++++++--
 lib/ruby_llm/error.rb                         |  2 +-
 lib/ruby_llm/message.rb                       |  4 ++--
 lib/ruby_llm/provider.rb                      |  5 +++--
 lib/ruby_llm/providers/anthropic/chat.rb      | 20 +++++++++---------
 lib/ruby_llm/providers/anthropic/streaming.rb |  2 +-
 lib/ruby_llm/providers/gemini/chat.rb         |  2 +-
 lib/ruby_llm/stream_accumulator.rb            |  6 +++---
 9 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index ad77d01cf..e6af95334 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -22,8 +22,9 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @config = context&.config || RubyLLM.config
       model_id = model || @config.default_model
       with_model(model_id, provider: provider, assume_exists: assume_model_exists)
-      @reasoning = false
-      @temperature = 0.7
+      @thinking = @config.default_thinking
+      @thinking_budget = @config.default_thinking_budget
+      @temperature = @config.default_temperature
       @messages = []
       @tools = {}
       @on = {
@@ -74,12 +75,15 @@ def with_temperature(temperature)
       self
     end
 
-    def with_reasoning(reasoning = true)
-      if reasoning && !@model.reasoning?
-        raise UnsupportedReasoningError, "Model #{@model.id} doesn't support reasoning"
-      end
+    def with_thinking(thinking: true, budget: nil)
+      raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking?
 
-      @reasoning = reasoning
+      @thinking = thinking
+      
+      if budget
+        @thinking_budget = budget
+      end
+      
       self
     end
 
@@ -111,7 +115,8 @@ def complete(&)
         tools: @tools,
         temperature: @temperature,
         model: @model.id,
-        reasoning: @reasoning,
+        thinking: @thinking,
+        thinking_budget: @thinking_budget,
         connection: @connection,
         &
       )
diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
index 886728168..a5ed635b4 100644
--- a/lib/ruby_llm/configuration.rb
+++ b/lib/ruby_llm/configuration.rb
@@ -28,7 +28,9 @@ class Configuration
                   :default_model,
                   :default_embedding_model,
                   :default_image_model,
-                  :default_reasoning_budget,
+                  # Default model settings
+                  :default_thinking,
+                  :default_thinking_budget,
                   # Connection configuration
                   :request_timeout,
                   :max_retries,
@@ -55,7 +57,11 @@ def initialize
       @default_model = 'gpt-4.1-nano'
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
-      @default_reasoning_budget = 1024
+      
+      # Default model settings
+      @default_thinking = false
+      @default_thinking_budget = 2048
+      @default_temperature = 0.7
 
       # Logging configuration
       @log_file = $stdout
diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb
index 102ec3b79..01948dbbe 100644
--- a/lib/ruby_llm/error.rb
+++ b/lib/ruby_llm/error.rb
@@ -25,7 +25,7 @@ class InvalidRoleError < StandardError; end
   class ModelNotFoundError < StandardError; end
   class UnsupportedFunctionsError < StandardError; end
   class UnsupportedAttachmentError < StandardError; end
-  class UnsupportedReasoningError < StandardError; end
+  class UnsupportedThinkingError < StandardError; end
 
   # Error classes for different HTTP status codes
   class BadRequestError < Error; end
diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
index 5624920e2..cb337fa36 100644
--- a/lib/ruby_llm/message.rb
+++ b/lib/ruby_llm/message.rb
@@ -7,12 +7,12 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :reasoning_content
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking
 
     def initialize(options = {})
       @role = options.fetch(:role).to_sym
       @content = normalize_content(options.fetch(:content))
-      @reasoning_content = options[:reasoning_content]
+      @thinking = options[:thinking]
       @tool_calls = options[:tool_calls]
       @input_tokens = options[:input_tokens]
       @output_tokens = options[:output_tokens]
diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
index 64a9fd1f9..1549abd1a 100644
--- a/lib/ruby_llm/provider.rb
+++ b/lib/ruby_llm/provider.rb
@@ -10,14 +10,15 @@ module Provider
     module Methods
       extend Streaming
 
-      def complete(messages, tools:, temperature:, model:, reasoning:, connection:, &) # rubocop:disable Metrics/ParameterLists
+      def complete(messages, tools:, temperature:, model:, thinking:, thinking_budget:, connection:, &) # rubocop:disable Metrics/ParameterLists
         normalized_temperature = maybe_normalize_temperature(temperature, model)
 
         payload = render_payload(messages,
                                  tools: tools,
                                  temperature: normalized_temperature,
                                  model: model,
-                                 reasoning: reasoning,
+                                 thinking: thinking,
+                                 thinking_budget: thinking_budget,
                                  stream: block_given?)
 
         if block_given?
diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
index d24e799aa..3b7f79956 100644
--- a/lib/ruby_llm/providers/anthropic/chat.rb
+++ b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -11,12 +11,12 @@ def completion_url
           '/v1/messages'
         end
 
-        def render_payload(messages, tools:, temperature:, model:, reasoning:, stream: false) # rubocop:disable Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, stream: false) # rubocop:disable Metrics/ParameterLists
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:, reasoning:)
+            add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:)
           end
         end
 
@@ -45,14 +45,14 @@ def build_base_payload(chat_messages, temperature, model, stream)
           }
         end
 
-        def add_optional_fields(payload, system_content:, tools:, reasoning:)
+        def add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:)
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
-          return unless reasoning
+          return unless thinking
 
           payload[:thinking] = {
             type: 'enabled',
-            budget_tokens: RubyLLM.config.default_reasoning_budget || 1024
+            budget_tokens: thinking_budget
           }
         end
 
@@ -62,14 +62,14 @@ def parse_completion_response(response)
 
           content_blocks = data['content'] || []
 
-          reasoning_content = extract_reasoning_content(content_blocks)
+          thinking_content = extract_thinking_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use = Tools.find_tool_use(content_blocks)
 
-          build_message(data, text_content, tool_use, reasoning_content)
+          build_message(data, text_content, tool_use, thinking_content)
         end
 
-        def extract_reasoning_content(blocks)
+        def extract_thinking_content(blocks)
           thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
           thinking_blocks.map { |c| c['thinking'] }.join
         end
@@ -79,11 +79,11 @@ def extract_text_content(blocks)
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use, reasoning_content)
+        def build_message(data, content, tool_use, thinking_content)
           Message.new(
             role: :assistant,
             content: content,
-            reasoning_content: reasoning_content,
+            thinking: thinking_content,
             tool_calls: Tools.parse_tool_calls(tool_use),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),
diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb
index 6ebb07d73..5a0bc01ff 100644
--- a/lib/ruby_llm/providers/anthropic/streaming.rb
+++ b/lib/ruby_llm/providers/anthropic/streaming.rb
@@ -16,7 +16,7 @@ def build_chunk(data)
             role: :assistant,
             model_id: extract_model_id(data),
             content: data.dig('delta', 'text'),
-            reasoning_content: data.dig('delta', 'thinking'),
+            thinking: data.dig('delta', 'thinking'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             tool_calls: extract_tool_calls(data)
diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
index 5a18546aa..fcb8eaa0e 100644
--- a/lib/ruby_llm/providers/gemini/chat.rb
+++ b/lib/ruby_llm/providers/gemini/chat.rb
@@ -11,7 +11,7 @@ def completion_url
           "models/#{@model}:generateContent"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, stream: false, **) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),
diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb
index 5f3bb0e7e..9eeb10b23 100644
--- a/lib/ruby_llm/stream_accumulator.rb
+++ b/lib/ruby_llm/stream_accumulator.rb
@@ -9,7 +9,7 @@ class StreamAccumulator
 
     def initialize
       @content = String.new
-      @reasoning_content = String.new
+      @thinking = String.new
       @tool_calls = {}
       @input_tokens = 0
       @output_tokens = 0
@@ -24,7 +24,7 @@ def add(chunk)
         accumulate_tool_calls chunk.tool_calls
       else
         @content << (chunk.content || '')
-        @reasoning_content << (chunk.reasoning_content || '')
+        @thinking << (chunk.thinking || '')
       end
 
       count_tokens chunk
@@ -35,7 +35,7 @@ def to_message
       Message.new(
         role: :assistant,
         content: content.empty? ? nil : content,
-        reasoning_content: @reasoning_content.empty? ? nil : @reasoning_content,
+        thinking: @thinking.empty? ? nil : @thinking,
         model_id: model_id,
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,

From 7ec6733421e1d5a68942a035809b7c052f20d076 Mon Sep 17 00:00:00 2001
From: Hiemanshu Sharma <hiemanshu@gmail.com>
Date: Fri, 27 Jun 2025 22:51:06 +0530
Subject: [PATCH 09/16] Get thinking working with bedrock

---
 lib/ruby_llm/chat.rb                          |  9 ++++-----
 lib/ruby_llm/configuration.rb                 |  3 ++-
 lib/ruby_llm/model/info.rb                    |  4 ++++
 lib/ruby_llm/models.json                      | 12 +++++++----
 .../providers/anthropic/capabilities.rb       | 20 ++++++++++---------
 .../providers/bedrock/capabilities.rb         |  8 +++++---
 lib/ruby_llm/providers/bedrock/chat.rb        |  4 ++--
 7 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index e6af95334..52ebb79eb 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -79,11 +79,10 @@ def with_thinking(thinking: true, budget: nil)
       raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking?
 
       @thinking = thinking
-      
-      if budget
-        @thinking_budget = budget
-      end
-      
+      @temperature = 1 if thinking # Thinking requires temperature be set to 1
+
+      @thinking_budget = budget if budget
+
       self
     end
 
diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
index a5ed635b4..424118244 100644
--- a/lib/ruby_llm/configuration.rb
+++ b/lib/ruby_llm/configuration.rb
@@ -29,6 +29,7 @@ class Configuration
                   :default_embedding_model,
                   :default_image_model,
                   # Default model settings
+                  :default_temperature,
                   :default_thinking,
                   :default_thinking_budget,
                   # Connection configuration
@@ -57,7 +58,7 @@ def initialize
       @default_model = 'gpt-4.1-nano'
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
-      
+
       # Default model settings
       @default_thinking = false
       @default_thinking_budget = 2048
diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb
index 9c72bcdf3..768fcede1 100644
--- a/lib/ruby_llm/model/info.rb
+++ b/lib/ruby_llm/model/info.rb
@@ -58,6 +58,10 @@ def supports_functions?
         function_calling?
       end
 
+      def thinking?
+        reasoning?
+      end
+
       def input_price_per_million
         pricing.text_tokens.input
       end
diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
index 2e8e7bd92..0bb33db62 100644
--- a/lib/ruby_llm/models.json
+++ b/lib/ruby_llm/models.json
@@ -1809,6 +1809,7 @@
     },
     "capabilities": [
       "streaming",
+      "reasoning",
       "function_calling",
       "structured_output"
     ],
@@ -1861,7 +1862,8 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output"
+      "structured_output",
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -9654,7 +9656,8 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -9718,7 +9721,8 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "reasoning"
     ],
     "pricing": {
       "text_tokens": {
@@ -27665,4 +27669,4 @@
       ]
     }
   }
-]
\ No newline at end of file
+]
diff --git a/lib/ruby_llm/providers/anthropic/capabilities.rb b/lib/ruby_llm/providers/anthropic/capabilities.rb
index 78cbf50c4..c1dce1adf 100644
--- a/lib/ruby_llm/providers/anthropic/capabilities.rb
+++ b/lib/ruby_llm/providers/anthropic/capabilities.rb
@@ -65,7 +65,7 @@ def supports_json_mode?(model_id)
         # @param model_id [String] the model identifier
         # @return [Boolean] true if the model supports extended thinking
         def supports_extended_thinking?(model_id)
-          model_id.match?(/claude-3-7-sonnet/)
+          model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/)
         end
 
         # Determines the model family for a given model ID
@@ -73,6 +73,8 @@ def supports_extended_thinking?(model_id)
         # @return [Symbol] the model family identifier
         def model_family(model_id)
           case model_id
+          when /claude-sonnet-4/    then 'claude-sonnet-4'
+          when /claude-opus-4/      then 'claude-opus-4'
           when /claude-3-7-sonnet/  then 'claude-3-7-sonnet'
           when /claude-3-5-sonnet/  then 'claude-3-5-sonnet'
           when /claude-3-5-haiku/   then 'claude-3-5-haiku'
@@ -131,17 +133,17 @@ def capabilities_for(model_id)
           capabilities = ['streaming']
 
           # Function calling for Claude 3+
-          if model_id.match?(/claude-3/)
+          if model_id.match?(/claude-3|claude-sonnet-4|claude-opus-4/)
             capabilities << 'function_calling'
             capabilities << 'structured_output'
             capabilities << 'batch'
           end
 
-          # Extended thinking (reasoning) for Claude 3.7
-          capabilities << 'reasoning' if model_id.match?(/claude-3-7/)
+          # Extended thinking for Claude 3.7 and Claude 4
+          capabilities << 'reasoning' if supports_extended_thinking?(model_id)
 
           # Citations
-          capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7/)
+          capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7|claude-sonnet-4|claude-opus-4/)
 
           capabilities
         end
@@ -161,10 +163,10 @@ def pricing_for(model_id)
             output_per_million: prices[:output] * 0.5
           }
 
-          # Add reasoning output pricing for 3.7 models
-          if model_id.match?(/claude-3-7/)
-            standard_pricing[:reasoning_output_per_million] = prices[:output] * 2.5
-            batch_pricing[:reasoning_output_per_million] = prices[:output] * 1.25
+          # Add thinking output pricing for 3.7 and 4 models
+          if model_id.match?(/claude-3-7|claude-sonnet-4|claude-opus-4/)
+            standard_pricing[:thinking_output_per_million] = prices[:output] * 2.5
+            batch_pricing[:thinking_output_per_million] = prices[:output] * 1.25
           end
 
           {
diff --git a/lib/ruby_llm/providers/bedrock/capabilities.rb b/lib/ruby_llm/providers/bedrock/capabilities.rb
index 342c67897..ae3dbfc17 100644
--- a/lib/ruby_llm/providers/bedrock/capabilities.rb
+++ b/lib/ruby_llm/providers/bedrock/capabilities.rb
@@ -117,7 +117,9 @@ def supports_structured_output?(model_id)
           /anthropic\.claude-3-haiku/ => :claude3_haiku,
           /anthropic\.claude-3-5-haiku/ => :claude3_5_haiku,
           /anthropic\.claude-v2/ => :claude2,
-          /anthropic\.claude-instant/ => :claude_instant
+          /anthropic\.claude-instant/ => :claude_instant,
+          /anthropic\.claude-sonnet-4/ => :claude_sonnet4,
+          /anthropic\.claude-opus-4/ => :claude_opus4
         }.freeze
 
         # Determines the model family for pricing and capability lookup
@@ -187,8 +189,8 @@ def capabilities_for(model_id)
 
           capabilities << 'structured_output' if supports_json_mode?(model_id)
 
-          # Extended thinking for 3.7 models
-          capabilities << 'reasoning' if model_id.match?(/claude-3-7/)
+          # Extended thinking for 3.7, and 4 models
+          capabilities << 'reasoning' if model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/)
 
           # Batch capabilities for newer Claude models
           if model_id.match?(/claude-3\.5|claude-3-7/)
diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
index 62c33683a..d6ae8139a 100644
--- a/lib/ruby_llm/providers/bedrock/chat.rb
+++ b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -39,7 +39,7 @@ def completion_url
           "model/#{@model_id}/invoke"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, thinking:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
 
@@ -47,7 +47,7 @@ def render_payload(messages, tools:, temperature:, model:, thinking:, stream: fa
           system_content = Anthropic::Chat.build_system_content(system_messages)
 
           build_base_payload(chat_messages, temperature, model).tap do |payload|
-            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:)
+            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:)
           end
         end
 

From 5577bae20079e29b16ab124ea7b6c02f2b07dacc Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 18 Jul 2025 14:48:48 +1000
Subject: [PATCH 10/16] chore: update anthropic capabilities with thinking  -
 Add 'thinking' to anthropic capabilities  - Add 'thinking' to bedrock
 capabilities (for anthropic supported models)   - Update models.json file to
 reflect changes

---
 lib/ruby_llm/models.json                      | 186 +++++++++++-------
 .../providers/anthropic/capabilities.rb       |   2 +-
 .../providers/bedrock/capabilities.rb         |   6 +-
 3 files changed, 120 insertions(+), 74 deletions(-)

diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
index c942e788a..f36d9e8df 100644
--- a/lib/ruby_llm/models.json
+++ b/lib/ruby_llm/models.json
@@ -71,7 +71,7 @@
     "id": "claude-3-5-haiku-20241022",
     "name": "Claude Haiku 3.5",
     "provider": "anthropic",
-    "family": "claude-3-5-haiku",
+    "family": "claude-haiku-3.5",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 8192,
@@ -103,7 +103,7 @@
     "id": "claude-3-5-sonnet-20240620",
     "name": "Claude Sonnet 3.5",
     "provider": "anthropic",
-    "family": "claude-3-5-sonnet",
+    "family": "claude-sonnet-3.5",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 8192,
@@ -135,7 +135,7 @@
     "id": "claude-3-5-sonnet-20241022",
     "name": "Claude Sonnet 3.5",
     "provider": "anthropic",
-    "family": "claude-3-5-sonnet",
+    "family": "claude-sonnet-3.5",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 8192,
@@ -167,7 +167,7 @@
     "id": "claude-3-7-sonnet-20250219",
     "name": "Claude Sonnet 3.7",
     "provider": "anthropic",
-    "family": "claude-3-7-sonnet",
+    "family": "claude-sonnet-3.7",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 64000,
@@ -183,7 +183,7 @@
     },
     "capabilities": [
       "function_calling",
-      "reasoning"
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -200,7 +200,7 @@
     "id": "claude-3-haiku-20240307",
     "name": "Claude Haiku 3",
     "provider": "anthropic",
-    "family": "claude-3-haiku",
+    "family": "claude-haiku-3",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 4096,
@@ -232,7 +232,7 @@
     "id": "claude-3-opus-20240229",
     "name": "Claude Opus 3",
     "provider": "anthropic",
-    "family": "claude-3-opus",
+    "family": "claude-opus-3",
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 4096,
@@ -319,7 +319,7 @@
     },
     "capabilities": [
       "function_calling",
-      "reasoning"
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -352,7 +352,7 @@
     },
     "capabilities": [
       "function_calling",
-      "reasoning"
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -1757,7 +1757,7 @@
       "streaming",
       "function_calling",
       "structured_output",
-      "reasoning",
+      "thinking",
       "batch",
       "citations"
     ],
@@ -1809,9 +1809,9 @@
     },
     "capabilities": [
       "streaming",
-      "reasoning",
       "function_calling",
-      "structured_output"
+      "structured_output",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -1863,7 +1863,7 @@
       "streaming",
       "function_calling",
       "structured_output",
-      "reasoning"
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -3401,7 +3401,7 @@
   },
   {
     "id": "gemini-2.5-flash-preview-tts",
-    "name": "Gemini 2.5 Flash Preview TTS",
+    "name": "Gemini 2.5 Flash Preview Text-to-Speech",
     "provider": "gemini",
     "family": "gemini-2.5-flash-preview-tts",
     "created_at": null,
@@ -3629,7 +3629,7 @@
   },
   {
     "id": "gemini-2.5-pro-preview-tts",
-    "name": "Gemini 2.5 Pro Preview TTS",
+    "name": "Gemini 2.5 Pro Preview Text-to-Speech",
     "provider": "gemini",
     "family": "gemini-2.5-pro-preview-tts",
     "created_at": null,
@@ -4168,7 +4168,7 @@
     "family": "imagen-3.0-generate-002",
     "created_at": null,
     "context_window": null,
-    "max_output_tokens": null,
+    "max_output_tokens": 4,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -4179,13 +4179,7 @@
       ]
     },
     "capabilities": [],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "output_per_million": 0.03
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
       "version": "002",
       "description": "Vertex served Imagen 3.0 002 model",
@@ -4201,7 +4195,7 @@
     "family": "imagen-4.0-generate-preview-06-06",
     "created_at": null,
     "context_window": 480,
-    "max_output_tokens": null,
+    "max_output_tokens": 1,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -4212,13 +4206,7 @@
       ]
     },
     "capabilities": [],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "output_per_million": 0.04
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
       "version": "01",
       "description": "Vertex served Imagen 4.0 model",
@@ -4234,7 +4222,7 @@
     "family": "imagen-4.0-generate-preview-06-06",
     "created_at": null,
     "context_window": 480,
-    "max_output_tokens": null,
+    "max_output_tokens": 1,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -4245,13 +4233,7 @@
       ]
     },
     "capabilities": [],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "output_per_million": 0.04
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {
       "version": "01",
       "description": "Vertex served Imagen 4.0 ultra model",
@@ -4341,7 +4323,7 @@
     "family": "veo-2.0-generate-001",
     "created_at": null,
     "context_window": null,
-    "max_output_tokens": null,
+    "max_output_tokens": 2,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [
@@ -4351,13 +4333,7 @@
       "output": []
     },
     "capabilities": [],
-    "pricing": {
-      "text_tokens": {
-        "standard": {
-          "output_per_million": 0.35
-        }
-      }
-    },
+    "pricing": {},
     "metadata": {}
   },
   {
@@ -7306,7 +7282,7 @@
     "pricing": {
       "text_tokens": {
         "standard": {
-          "output_per_million": 30.0
+          "input_per_million": 30.0
         }
       }
     },
@@ -9761,8 +9737,7 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling",
-      "thinking"
+      "function_calling"
     ],
     "pricing": {
       "text_tokens": {
@@ -9824,8 +9799,7 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling",
-      "reasoning"
+      "function_calling"
     ],
     "pricing": {
       "text_tokens": {
@@ -9889,8 +9863,7 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling",
-      "reasoning"
+      "function_calling"
     ],
     "pricing": {
       "text_tokens": {
@@ -17503,22 +17476,22 @@
         "max_tokens",
         "temperature",
         "top_p",
-        "tools",
-        "tool_choice",
         "reasoning",
         "include_reasoning",
-        "structured_outputs",
-        "response_format",
         "stop",
         "frequency_penalty",
         "presence_penalty",
+        "top_k",
+        "repetition_penalty",
         "logit_bias",
         "logprobs",
-        "seed",
-        "repetition_penalty",
-        "top_k",
         "top_logprobs",
-        "min_p"
+        "min_p",
+        "seed",
+        "tools",
+        "tool_choice",
+        "structured_outputs",
+        "response_format"
       ]
     }
   },
@@ -18786,7 +18759,8 @@
     "capabilities": [
       "streaming",
       "function_calling",
-      "structured_output"
+      "structured_output",
+      "predicted_outputs"
     ],
     "pricing": {
       "text_tokens": {
@@ -18820,15 +18794,17 @@
         "max_tokens",
         "temperature",
         "top_p",
-        "tools",
-        "tool_choice",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "response_format",
-        "structured_outputs",
+        "logit_bias",
+        "logprobs",
         "seed",
         "repetition_penalty",
+        "tools",
+        "tool_choice",
+        "response_format",
+        "structured_outputs",
         "top_k",
         "min_p"
       ]
@@ -19355,16 +19331,16 @@
         "top_p",
         "tools",
         "tool_choice",
+        "structured_outputs",
+        "response_format",
         "stop",
         "frequency_penalty",
         "presence_penalty",
-        "repetition_penalty",
-        "response_format",
         "top_k",
-        "seed",
-        "min_p",
-        "structured_outputs",
+        "repetition_penalty",
         "logit_bias",
+        "min_p",
+        "seed",
         "top_logprobs",
         "logprobs"
       ]
@@ -20606,6 +20582,72 @@
       ]
     }
   },
+  {
+    "id": "openai/gpt-3.5-turbo",
+    "name": "OpenAI: GPT-3.5 Turbo",
+    "provider": "openrouter",
+    "family": "openai",
+    "created_at": "2023-05-28 02:00:00 +0200",
+    "context_window": 16385,
+    "max_output_tokens": 4096,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [
+        "text"
+      ],
+      "output": [
+        "text"
+      ]
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling",
+      "structured_output"
+    ],
+    "pricing": {
+      "text_tokens": {
+        "standard": {
+          "input_per_million": 0.5,
+          "output_per_million": 1.5
+        }
+      }
+    },
+    "metadata": {
+      "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.",
+      "architecture": {
+        "modality": "text->text",
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ],
+        "tokenizer": "GPT",
+        "instruct_type": null
+      },
+      "top_provider": {
+        "context_length": 16385,
+        "max_completion_tokens": 4096,
+        "is_moderated": true
+      },
+      "per_request_limits": null,
+      "supported_parameters": [
+        "max_tokens",
+        "temperature",
+        "top_p",
+        "tools",
+        "tool_choice",
+        "stop",
+        "frequency_penalty",
+        "presence_penalty",
+        "seed",
+        "logit_bias",
+        "logprobs",
+        "top_logprobs",
+        "response_format"
+      ]
+    }
+  },
   {
     "id": "openai/gpt-3.5-turbo-0613",
     "name": "OpenAI: GPT-3.5 Turbo (older v0613)",
diff --git a/lib/ruby_llm/providers/anthropic/capabilities.rb b/lib/ruby_llm/providers/anthropic/capabilities.rb
index c1dce1adf..0cb8bd946 100644
--- a/lib/ruby_llm/providers/anthropic/capabilities.rb
+++ b/lib/ruby_llm/providers/anthropic/capabilities.rb
@@ -140,7 +140,7 @@ def capabilities_for(model_id)
           end
 
           # Extended thinking for Claude 3.7 and Claude 4
-          capabilities << 'reasoning' if supports_extended_thinking?(model_id)
+          capabilities << 'thinking' if supports_extended_thinking?(model_id)
 
           # Citations
           capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7|claude-sonnet-4|claude-opus-4/)
diff --git a/lib/ruby_llm/providers/bedrock/capabilities.rb b/lib/ruby_llm/providers/bedrock/capabilities.rb
index ae3dbfc17..976263000 100644
--- a/lib/ruby_llm/providers/bedrock/capabilities.rb
+++ b/lib/ruby_llm/providers/bedrock/capabilities.rb
@@ -108,6 +108,10 @@ def supports_structured_output?(model_id)
           model_id.match?(/anthropic\.claude/)
         end
 
+        def supports_extended_thinking?(model_id)
+          model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/)
+        end
+
         # Model family patterns for capability lookup
         MODEL_FAMILIES = {
           /anthropic\.claude-3-opus/ => :claude3_opus,
@@ -190,7 +194,7 @@ def capabilities_for(model_id)
           capabilities << 'structured_output' if supports_json_mode?(model_id)
 
           # Extended thinking for 3.7, and 4 models
-          capabilities << 'reasoning' if model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4/)
+          capabilities << 'thinking' if supports_extended_thinking?(model_id)
 
           # Batch capabilities for newer Claude models
           if model_id.match?(/claude-3\.5|claude-3-7/)

From 5c02af2da17153901adcf33779a5270fda33c931 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 18 Jul 2025 15:17:29 +1000
Subject: [PATCH 11/16] chore: move temperature setting to param

---
 lib/ruby_llm/chat.rb | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index 52ebb79eb..cc47adf6f 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -75,12 +75,13 @@ def with_temperature(temperature)
       self
     end
 
-    def with_thinking(thinking: true, budget: nil)
-      raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking?
+    def with_thinking(thinking: true, budget: nil, temperature: 1)
+      raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.supports_thinking?
 
       @thinking = thinking
-      @temperature = 1 if thinking # Thinking requires temperature be set to 1
 
+      # Most thinking models require set temperature so force it 1 here, however allowing override via param.
+      @temperature = temperature
       @thinking_budget = budget if budget
 
       self

From 153440c6b61d5948321ca0ad42db1b7ac59e2b81 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 18 Jul 2025 15:18:20 +1000
Subject: [PATCH 12/16] chore: use 'thinking' capability instead of reasoning
 in Model::Info

---
 lib/ruby_llm/model/info.rb | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb
index 768fcede1..cdee5a2aa 100644
--- a/lib/ruby_llm/model/info.rb
+++ b/lib/ruby_llm/model/info.rb
@@ -35,7 +35,7 @@ def supports?(capability)
         capabilities.include?(capability.to_s)
       end
 
-      %w[function_calling structured_output batch reasoning citations streaming].each do |cap|
+      %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
         define_method "#{cap}?" do
           supports?(cap)
         end
@@ -58,10 +58,6 @@ def supports_functions?
         function_calling?
       end
 
-      def thinking?
-        reasoning?
-      end
-
       def input_price_per_million
         pricing.text_tokens.input
       end

From 627ffe070e7b5b8e2eab91cc497f83270c8c275d Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 18 Jul 2025 15:18:42 +1000
Subject: [PATCH 13/16] chore: allow thinking capabilties on assumed models

---
 lib/ruby_llm/models.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/ruby_llm/models.rb b/lib/ruby_llm/models.rb
index b1fa08d74..035c61453 100644
--- a/lib/ruby_llm/models.rb
+++ b/lib/ruby_llm/models.rb
@@ -57,7 +57,7 @@ def resolve(model_id, provider: nil, assume_exists: false) # rubocop:disable Met
             id: model_id,
             name: model_id.gsub('-', ' ').capitalize,
             provider: provider.slug,
-            capabilities: %w[function_calling streaming],
+            capabilities: %w[function_calling streaming thinking],
             modalities: { input: %w[text image], output: %w[text] },
             metadata: { warning: 'Assuming model exists, capabilities may not be accurate' }
           )

From 8a6453dbaf85f43b12ad99184e19998644c82c92 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 18 Jul 2025 15:25:38 +1000
Subject: [PATCH 14/16] bug: fix call to check if thinking supported in
 'with_thinking'

- incorrectly using 'supports_thinking?' instead of 'thinking?'
---
 lib/ruby_llm/chat.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index cc47adf6f..403df7d1d 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -76,7 +76,7 @@ def with_temperature(temperature)
     end
 
     def with_thinking(thinking: true, budget: nil, temperature: 1)
-      raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.supports_thinking?
+      raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking?
 
       @thinking = thinking
 

From cc1ce5f238d4eebb237e67d6153120d2914c6a67 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 18 Jul 2025 16:14:51 +1000
Subject: [PATCH 15/16] test: add basic spec for anthropic models

- Adds chat_thinking_spec.rb
- Adds THINKING_MODELS and includes anthropic provider models
- Adds NON_THINKING_MODELS and includes anthropic provider models
- Adds cassette files
---
 ...sic_conversation_with_thinking_enabled.yml |  88 ++++++++++
 ...ns_thinking_mode_across_multiple_turns.yml | 162 ++++++++++++++++++
 ...sic_conversation_with_thinking_enabled.yml |  81 +++++++++
 ...ns_thinking_mode_across_multiple_turns.yml | 161 +++++++++++++++++
 ...sic_conversation_with_thinking_enabled.yml |  93 ++++++++++
 ...ns_thinking_mode_across_multiple_turns.yml | 161 +++++++++++++++++
 spec/ruby_llm/chat_thinking_spec.rb           | 109 ++++++++++++
 spec/spec_helper.rb                           |  11 ++
 8 files changed, 866 insertions(+)
 create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml
 create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml
 create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml
 create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml
 create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml
 create mode 100644 spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml
 create mode 100644 spec/ruby_llm/chat_thinking_spec.rb

diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml
new file mode 100644
index 000000000..9130d4f3a
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_can_handle_basic_conversation_with_thinking_enabled.yml
@@ -0,0 +1,88 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-3-7-sonnet-20250219","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        2 + 2? Think through this step by step."}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:38:50 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:38:47Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:38:51Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:38:47Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '28000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '28000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:38:47Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: '{"id":"msg_01H8phMkgUJm2jRb6mjh1TyE","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"thinking","thinking":"This
+        is a very basic arithmetic problem. Let''s solve it step by step.\n\n2 + 2
+        means I need to add the number 2 with the number 2.\n\nTo add these numbers,
+        I can think of it as:\n- Starting with 2\n- Then adding 2 more\n\nSo:\n2 +
+        2 = 4\n\nThat''s the answer: 4.","signature":"ErUBCkYIBRgCIkDYEWR6TDzVtqVJyMdFWFx9CUzT61wYklKVzM6g2GYLr0biNf88UKUY851WNn5+NWQ5BImkpVnBXuPrXYTLXLeoEgwajyIH8En4csCcQXMaDLvR03WVgYn4llf4SyIw9obvmcjnLQiqW1pJbZAYyBVChfUdu+4geQF/17LMVmH0j5hHgJOFlxeJRkKixyZQKh3F2jQ9AjbxG8PVSclPOFIi/2Ckm7Pgy8dzCm/fNxgC"},{"type":"text","text":"To
+        solve 2 + 2, I''ll break it down:\n\n1. I need to add the number 2 with another
+        number 2\n2. Addition means combining quantities together\n3. If I have 2
+        items and get 2 more items, I''ll have a total of 4 items\n4. Therefore, 2
+        + 2 = 4\n\nThe answer is 4."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":51,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":190,"service_tier":"standard"}}'
+  recorded_at: Fri, 18 Jul 2025 05:38:50 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml
new file mode 100644
index 000000000..40756a8cf
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-3-7-sonnet_maintains_thinking_mode_across_multiple_turns.yml
@@ -0,0 +1,162 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-3-7-sonnet-20250219","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        5 + 3?"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:38:52 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:38:51Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:38:52Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:38:51Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '28000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '28000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:38:51Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: '{"id":"msg_01UVtt5CXncUaY5eMdZz5yXk","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"thinking","thinking":"This
+        is a simple addition problem.\n\n5 + 3 = 8\n\nSo the answer is 8.","signature":"ErUBCkYIBRgCIkCj2IP3xc6RELPu8t9/N1bormyfIlJHWjS35zy3nE/PwNDRooJm43bzvCxFsJji6R7cAImgnsSKdSadE5UTSxybEgxzjihH9btpHvFvYd4aDM2QOkD1VQddEb3W+yIwiBdK30embODvJqFB4RG45ySacd2jmju/7B8PWp2NyZkdtlKXnrA7U3eqqgGqY2rCKh1JUx4LQqzYQjbVSvZjQ7bOCIkln9tg6ZUJFiWCvBgC"},{"type":"text","text":"The
+        sum of 5 + 3 is 8."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":44,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":51,"service_tier":"standard"}}'
+  recorded_at: Fri, 18 Jul 2025 05:38:52 GMT
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-3-7-sonnet-20250219","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        5 + 3?"}]},{"role":"assistant","content":[{"type":"text","text":"The sum of
+        5 + 3 is 8."}]},{"role":"user","content":[{"type":"text","text":"Now multiply
+        that result by 2"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:38:54 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:38:53Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:38:54Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:38:53Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '28000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '28000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:38:53Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: !binary |-
+        eyJpZCI6Im1zZ18wMUVHVjc0ZTlhOUpkaXFHY0J6Z1hGNWoiLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS0zLTctc29ubmV0LTIwMjUwMjE5IiwiY29udGVudCI6W3sidHlwZSI6InRoaW5raW5nIiwidGhpbmtpbmciOiJJIG5lZWQgdG8gbXVsdGlwbHkgdGhlIHByZXZpb3VzIHJlc3VsdCAoOCkgYnkgMi5cblxuOCDDlyAyID0gMTYiLCJzaWduYXR1cmUiOiJFclVCQ2tZSUJSZ0NJa0NDeEhjQXVEcU1MMTlCVytFSjNxQmhjKyt0S0w2TlpHNlprN1h6NTNqeDQyVGxybi9xSTNVenVMZE9ZeVdFVm1OekR5T1cvdUhLMS9oMEV1TWxQZVBoRWd3TzlzV25IclJCV0lVOGZJOGFERHVjbXcxcnZndXNKVzRXS2lJdys1elY1NGw2dlRkaU9sOWhsRGVTNTlMUk9od2Q5d2VUb1Y0QTRFbmpKYTJSWlFuTjJGY3VHTVNBK3FvNE5PVy9LaDNyT3RiQzNlUGVvQ25LdkMyS01zaVNpTkliOWxKY2hQT01tQ0hYaFJnQyJ9LHsidHlwZSI6InRleHQiLCJ0ZXh0IjoiVG8gbXVsdGlwbHkgdGhlIHByZXZpb3VzIHJlc3VsdCBieSAyOlxuXG44IMOXIDIgPSAxNlxuXG5UaGUgYW5zd2VyIGlzIDE2LiJ9XSwic3RvcF9yZWFzb24iOiJlbmRfdHVybiIsInN0b3Bfc2VxdWVuY2UiOm51bGwsInVzYWdlIjp7ImlucHV0X3Rva2VucyI6NzEsImNhY2hlX2NyZWF0aW9uX2lucHV0X3Rva2VucyI6MCwiY2FjaGVfcmVhZF9pbnB1dF90b2tlbnMiOjAsIm91dHB1dF90b2tlbnMiOjYyLCJzZXJ2aWNlX3RpZXIiOiJzdGFuZGFyZCJ9fQ==
+  recorded_at: Fri, 18 Jul 2025 05:38:54 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml
new file mode 100644
index 000000000..b3a4e0924
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_can_handle_basic_conversation_with_thinking_enabled.yml
@@ -0,0 +1,81 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-opus-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        2 + 2? Think through this step by step."}]}],"temperature":1,"stream":false,"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:39:23 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:39:17Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '4000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '4000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:39:27Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:39:16Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '24000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '24000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:39:17Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: !binary |-
+        eyJpZCI6Im1zZ18wMTlDMnA3cVdudzQ3RFpBeXNyc2JlUGkiLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS1vcHVzLTQtMjAyNTA1MTQiLCJjb250ZW50IjpbeyJ0eXBlIjoidGhpbmtpbmciLCJ0aGlua2luZyI6IlRoaXMgaXMgYSB2ZXJ5IHNpbXBsZSBhcml0aG1ldGljIHF1ZXN0aW9uLiBUaGUgdXNlciBpcyBhc2tpbmcgbWUgdG8gYWRkIDIgKyAyLCBidXQgdGhleSd2ZSBhbHNvIGFza2VkIG1lIHRvIHRoaW5rIHRocm91Z2ggaXQgc3RlcCBieSBzdGVwLiBFdmVuIHRob3VnaCB0aGlzIGlzIGVsZW1lbnRhcnksIEkgc2hvdWxkIGhvbm9yIHRoZWlyIHJlcXVlc3QgdG8gc2hvdyB0aGUgc3RlcHMuXG5cbjIgKyAyID0gNFxuXG5JIGNhbiBleHBsYWluIHRoaXMgaW4gYSBmZXcgd2F5czpcbi0gQ291bnRpbmcgdXA6IFN0YXJ0aW5nIGF0IDIsIGNvdW50IHVwIDIgbW9yZTogMywgNFxuLSBHcm91cGluZzogSWYgeW91IGhhdmUgMiBpdGVtcyBhbmQgYWRkIDIgbW9yZSBpdGVtcywgeW91IGhhdmUgNCBpdGVtcyB0b3RhbFxuLSBOdW1iZXIgbGluZTogU3RhcnRpbmcgYXQgMiBvbiBhIG51bWJlciBsaW5lLCBtb3ZlIDIgc3BhY2VzIHRvIHRoZSByaWdodCwgbGFuZGluZyBvbiA0Iiwic2lnbmF0dXJlIjoiRW9vRkNrWUlCUmdDS2tBWEpSVXVJdTlXT00za1NySTl0WHFJRkxwZEhudHg2NWxuc1N3cmIzRU0vbjZpTFd2MmZTVDlFSERvWU1seEtaLzlWZGx3bm04bnhLSHFtYVVwb0NOakVnekJIWVREYlBzajNVQTZ5STRhRExhL2IxSGtaWE5QVmU3U2Z5SXc5OTlucm0zazN1azZkczlES3dSMmhQOFdrbHpGZnBoWHptOFZYdkNJK1AySHRLUmJmYncwc0hrNlFVTGdhQXVoS3ZFRHFITVovd3I0OTFEMHh4Y1hyL2JmRUxuYXFyUkhlbVZvZ2NMc2hlTWxWeDVNdUJURThLM3pYekZQZ1B1ZU9XeFNZb1FFcnVCUnQ4QzN1VjQwTTArMjFsZE5zYndwbEl4QWJhQ2s3dTdteHR2eTRPeFhzVllRTU05UVYwZzlyWFBEY1lFZzR1YTJ1N1B5RXZZTkNhRDZHWVZ2MFp2TU9mSGxxTm11T1FuZGRIYlRac1EreGhJeE5Cb1pwbWVQeU9uSFQwRXkvbUxxN0ppOWdBVlZ2QThlejcwODdlMjIxdW5sS0NMZFVGSjFPNlFWd2ZZTGRIZjBqYzNRR3l4ODlOWXVZamE2MUlRTTlReEJkekx5dUFaYmV4QjBTK3hreWVKdk9xNFA5N05CdjBWT29Zblc5bklPZW1rdDc2Sis5U3RBVnJjT01CRzdaa1ZtNWNPa0ZVS0VsT0o5SVI3SzFPWTZHd2dLbDJpcUFCLzBpcVB1Y01qZnhaQkV4OFRIcjVpMmtLeUtUbHFRK3k5WUpmeWlKcS9BRXMrRW5wTUJDNUxRK2I1dERud0lTeEV2eDNGcnpUNnE0WVcxMU8yVWgvVkdGS0RUUEY4NkZyUDVFaUJBNXFPVW9LUW55NWtlK2trbTM5MWo3Q0dXY1FHWmxkSkpEZnNhUHQ0a0x0WGdvcXR6ZWZjcE9FR1R5dGFRRWxidGhBWjFyaS91ZHh1Rk9FRnZyWm41VExnY0Zpd0lnd2xKamIrb2dnbHkxWmRJUGcrRFI3blVrSEllbHB4L2gvamhZR1dlM3drYXFUMEpINzZ5L3JGTkd0NHpZZ0dVMElwM2xydXdFS0NVbEtYVC90Q1lCZzhBL1N1TkNMNk9IUGFOVFRVaFZORVlBUT09In0seyJ0eXBlIjoidGV4dCIsInRleHQiOiJJJ2xsIHdvcmsgdGhyb3VnaCB0aGlzIGFkZGl0aW9uIHN0ZXAgYnkgc3RlcC5cblxuU3RhcnRpbmcgd2l0aDogMiArIDJcblxuU3RlcCAxOiBUYWtlIHRoZSBmaXJzdCBudW1iZXIsIHdoaWNoIGlzIDJcblN0ZXAgMjogQWRkIHRoZSBzZWNvbmQgbnVtYmVyLCB3aGljaCBpcyBhbHNvIDJcblN0ZXAgMzogQ291bnQgdXAgZnJvbSAyOiBcbi0gU3RhcnQgYXQgMlxuLSBBZGQgMSDihpIgZ2V0IDNcbi0gQWRkIDEgbW9yZSDihpIgZ2V0IDRcblxuVGhlcmVmb3JlOiAyICsgMiA9IDRcblxuQW5vdGhlciB3YXkgdG8gdGhpbmsgYWJvdXQgaXQ6IElmIHlvdSBoYXZlIDIgb2JqZWN0cyBhbmQgc29tZW9uZSBnaXZlcyB5b3UgMiBtb3JlIG9iamVjdHMsIHlvdSBub3cgaGF2ZSA0IG9iamVjdHMgdG90YWwuIn1dLCJzdG9wX3JlYXNvbiI6ImVuZF90dXJuIiwic3RvcF9zZXF1ZW5jZSI6bnVsbCwidXNhZ2UiOnsiaW5wdXRfdG9rZW5zIjo1MSwiY2FjaGVfY3JlYXRpb25faW5wdXRfdG9rZW5zIjowLCJjYWNoZV9yZWFkX2lucHV0X3Rva2VucyI6MCwib3V0cHV0X3Rva2VucyI6Mjk4LCJzZXJ2aWNlX3RpZXIiOiJzdGFuZGFyZCJ9fQ==
+  recorded_at: Fri, 18 Jul 2025 05:39:23 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml
new file mode 100644
index 000000000..64f170189
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-opus-4_maintains_thinking_mode_across_multiple_turns.yml
@@ -0,0 +1,161 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-opus-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        5 + 3?"}]}],"temperature":1,"stream":false,"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:39:26 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:39:26Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '4000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '4000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:39:27Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:39:25Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '24000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '24000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:39:26Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: '{"id":"msg_01YXCT7MgRWQbxwwfs1bWhN2","type":"message","role":"assistant","model":"claude-opus-4-20250514","content":[{"type":"thinking","thinking":"This
+        is a simple arithmetic question. 5 + 3 = 8.","signature":"EtgBCkYIBRgCKkCyqtg4YSovHjJWjT5xWNBV0HDNY0NkeiSISwchPehu+JHqF14GKTlprSnmlk1ohL26KlGnQRhwg33jqkxTjsJiEgz7IAVT6nqF9r6eMC8aDDtFLpYkLlDKnJjnpSIwvHR9G483A2OajVNq3vWQr7SfmZ7p5CnDQNuZp/QkVIQMc8IGCOtLX15SWVC2HKeaKkAcQTrZsHVQM5K8hKFfSDAEngoOyzJ0kus67m+ETlZZL4r1WFKIc9VoOMlD0yej7XnEFDlG1Ck5oCzPH1qrNKD1GAE="},{"type":"text","text":"5
+        + 3 = 8"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":44,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":39,"service_tier":"standard"}}'
+  recorded_at: Fri, 18 Jul 2025 05:39:26 GMT
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-opus-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        5 + 3?"}]},{"role":"assistant","content":[{"type":"text","text":"5 + 3 = 8"}]},{"role":"user","content":[{"type":"text","text":"Now
+        multiply that result by 2"}]}],"temperature":1,"stream":false,"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:39:29 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '20000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:39:28Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '4000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '4000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:39:29Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:39:28Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '24000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '24000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:39:28Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: !binary |-
+        eyJpZCI6Im1zZ18wMVdLY24zeGNYMlJ1Y2JieHdGdTQ0NWgiLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS1vcHVzLTQtMjAyNTA1MTQiLCJjb250ZW50IjpbeyJ0eXBlIjoidGhpbmtpbmciLCJ0aGlua2luZyI6IlRoZSBwcmV2aW91cyByZXN1bHQgd2FzIDggKGZyb20gNSArIDMpLiBOb3cgSSBuZWVkIHRvIG11bHRpcGx5IDggYnkgMi5cbjggw5cgMiA9IDE2Iiwic2lnbmF0dXJlIjoiRXZvQkNrWUlCUmdDS2tDSDBkcWh3Qi9rRUttajBUcHJlellxVmpuOHZES1FLQ3FwK2NNeFBCbHoxb096TFdrNDF0ZUFRYkJ0cnd3bkp4QXA4OUQ4VkdNemxIWEZIa21xYTZKTUVnd2w0MENqSExySDVmV2hxRGdhREptdzdDWXNPTUNUMnVhZDZ5SXdLcmhIb3N3dnNsUmE2UFMxYmZ4VVlhVTk2Rk92WSt2Y2QwRThWV1RYQUN3bHFqWHZkZFJ5d01qckJDbWpPenhZS21LL1N1dnM1dGo4ZWE3Wll0THdzODBYYXNrQ0E5OGw2N0JVVktYZys0cWpQRWZvNjYzZWJSRkRsZEpvR2F5L1ByUjJULzlVL1ZjZStvZGdleCtlSXB4KzcxWTBRdC95T2dJZ2NvbG9XckthaWpKemdFdkltd1RwVXNRWENjK2JJMUV0SUJnQiJ9LHsidHlwZSI6InRleHQiLCJ0ZXh0IjoiOCDDlyAyID0gMTYifV0sInN0b3BfcmVhc29uIjoiZW5kX3R1cm4iLCJzdG9wX3NlcXVlbmNlIjpudWxsLCJ1c2FnZSI6eyJpbnB1dF90b2tlbnMiOjY3LCJjYWNoZV9jcmVhdGlvbl9pbnB1dF90b2tlbnMiOjAsImNhY2hlX3JlYWRfaW5wdXRfdG9rZW5zIjowLCJvdXRwdXRfdG9rZW5zIjo1OSwic2VydmljZV90aWVyIjoic3RhbmRhcmQifX0=
+  recorded_at: Fri, 18 Jul 2025 05:39:29 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml
new file mode 100644
index 000000000..f2d9ca256
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_can_handle_basic_conversation_with_thinking_enabled.yml
@@ -0,0 +1,93 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        2 + 2? Think through this step by step."}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:39:01 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '30000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '30000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:38:55Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:39:03Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:38:55Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '38000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '38000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:38:55Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: '{"id":"msg_01ELjQtoVzofUkfT2MG4NnvY","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"thinking","thinking":"This
+        is a very basic arithmetic question. Let me think through it step by step
+        as requested.\n\n2 + 2\n\nThis is asking me to add the number 2 to itself.\n\nStep
+        1: I start with the first number, which is 2.\nStep 2: I need to add 2 to
+        it.\nStep 3: 2 + 2 = 4\n\nI can think of this in several ways:\n- Counting:
+        If I have 2 items, and I add 2 more items, I have 4 items total\n- On a number
+        line: Starting at 2, if I move 2 spaces to the right, I land on 4\n- Basic
+        addition fact: This is one of the fundamental addition facts that 2 + 2 =
+        4","signature":"ErkFCkYIBRgCKkAGD5ERRvwl/3LTfinLm7VjiqqC4SUrYGpZtWlYkibMcIFwHN/sTGd8t4tNZmDLjZrsHp4qLIeQ2ZaXrx/cdekJEgwdoMEt+uVUTc41uWkaDNgGvIHy6w6qICAlXiIwPii9Ia+JG/IRms/CawNXQ8IutTtFXpmc5cE0XCSXq3BEohpEaGHVUwzMbnG2KH5OKqAE/u5S+Nw+AsmuwpxOzd+CnFM6yvqYDBbRCZRDJqlWaffXEmeZp5066Yl9CePSoBqP7MXRbJE35kvwrfZZXuwFH88Xmkxv7WOZkL2L8keUXX9LamLnxrQthqSqivuhskIJUiup5gsUTQUztKOztDxcwEfGDOeBIVeZ8GRqXusIy2cZxQNF87fPvj712KyXkrUej05qAAmyLLlRF2U+e0Rmuq2uKVqifmk6uvL+qM/5nVxknMAlBgU3h8wPR0NYQdVGC8Y6qi+IDn5vahBSADCnraBXv/PeY/dLzyIq1WsBFbnOriSIYNhVhulBlGm6e3dIk6L54j1xT41d5RxFXYXZi5ROSAgyapLN0eX/XI+m6Fd09Y953n0YSSKlBiEJvet6ZB/9PD06oQf7zWrQ4Edqm0QBc97UZjPVZp/7aAbpfkSmYq3JkyT0YLUGbDOvEQTdaJLN5dlPkB5fmihZKA2DrqhOrI4+/bBtj4wO66M3wvnQqIBJkivzHVOqwJz4eJ7ZjZFw8dHTmb7/lEY8f3dn42kNwfY735j7vvrvpHCXw5ST3h4fW88/IHymeWtr0U7ohFMbzhxo2vY7nFah5wHd7RUqEOwtSlDvicXj4O0tvp/0cITCn6dqYCAk1fh1EBiw+OfLLRypTfFGSH5KV0Wb3TF3m944/KeSx8Jetmd2xVSBEwfhYolr9UacGENv8eoQAvMm1hMM5lBC+DGTLJZErRgB"},{"type":"text","text":"I''ll
+        work through this step by step:\n\n**Step 1:** Start with the first number:
+        2\n\n**Step 2:** Add the second number: 2\n\n**Step 3:** Combine them: 2 +
+        2\n\n**Step 4:** Calculate the sum: 4\n\nSo 2 + 2 = 4.\n\nYou can think of
+        this as having 2 objects, then adding 2 more objects, which gives you 4 objects
+        total."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":51,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":298,"service_tier":"standard"}}'
+  recorded_at: Fri, 18 Jul 2025 05:39:01 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml
new file mode 100644
index 000000000..fa8068eab
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/chat_thinking_mode_functionality_thinking_mode_integration_with_chat_anthropic_claude-sonnet-4_maintains_thinking_mode_across_multiple_turns.yml
@@ -0,0 +1,161 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        5 + 3?"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:39:03 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '30000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '30000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:39:03Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:39:03Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:39:03Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '38000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '38000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:39:03Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: '{"id":"msg_01BzuwUNCKCLie4m3CUR9sRr","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"thinking","thinking":"This
+        is a simple addition problem. 5 + 3 = 8.","signature":"EtUBCkYIBRgCKkBTKgGEUioRSsJoxhuzo1AT0IuxJmwIxAkRRNe15z10h5e8CMTYppvz7xt2wwXONyqpRoWYmGBQfSlspsv8kJpBEgzBiQiuQU2am3MN3YQaDJow1tAgVZRXRLLi3CIw67uUejmUfdAYNzynxnnHwd6Ba6C4OgDvQ8yqX7Au6xBHmHFYDhUYzbinIo/WSGLjKj0jXruNlLkGAoGxJ0IGvpuX0979sQApiBQaoBJFXMoa4ryX9E77kbLyprbcwg76uE6yCTbh41h0yxU/G+/BGAE="},{"type":"text","text":"5
+        + 3 = 8"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":44,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":39,"service_tier":"standard"}}'
+  recorded_at: Fri, 18 Jul 2025 05:39:03 GMT
+- request:
+    method: post
+    uri: https://api.anthropic.com/v1/messages
+    body:
+      encoding: UTF-8
+      string: '{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"What''s
+        5 + 3?"}]},{"role":"assistant","content":[{"type":"text","text":"5 + 3 = 8"}]},{"role":"user","content":[{"type":"text","text":"Now
+        multiply that result by 2"}]}],"temperature":1,"stream":false,"max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":2048}}'
+    headers:
+      User-Agent:
+      - Faraday v2.13.2
+      X-Api-Key:
+      - "<ANTHROPIC_API_KEY>"
+      Anthropic-Version:
+      - '2023-06-01'
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 18 Jul 2025 05:39:14 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Anthropic-Ratelimit-Input-Tokens-Limit:
+      - '30000'
+      Anthropic-Ratelimit-Input-Tokens-Remaining:
+      - '30000'
+      Anthropic-Ratelimit-Input-Tokens-Reset:
+      - '2025-07-18T05:39:14Z'
+      Anthropic-Ratelimit-Output-Tokens-Limit:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Remaining:
+      - '8000'
+      Anthropic-Ratelimit-Output-Tokens-Reset:
+      - '2025-07-18T05:39:15Z'
+      Anthropic-Ratelimit-Requests-Limit:
+      - '50'
+      Anthropic-Ratelimit-Requests-Remaining:
+      - '49'
+      Anthropic-Ratelimit-Requests-Reset:
+      - '2025-07-18T05:39:05Z'
+      Anthropic-Ratelimit-Tokens-Limit:
+      - '38000'
+      Anthropic-Ratelimit-Tokens-Remaining:
+      - '38000'
+      Anthropic-Ratelimit-Tokens-Reset:
+      - '2025-07-18T05:39:14Z'
+      Request-Id:
+      - "<REQUEST_ID>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Anthropic-Organization-Id:
+      - a2bc4b1c-6bf0-4b62-b1f6-a1d9821a5e3a
+      Via:
+      - 1.1 google
+      Cf-Cache-Status:
+      - DYNAMIC
+      X-Robots-Tag:
+      - none
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+    body:
+      encoding: ASCII-8BIT
+      string: !binary |-
+        eyJpZCI6Im1zZ18wMThabnY0d1BGYWtWeHJFaUs5Yjd2a3ciLCJ0eXBlIjoibWVzc2FnZSIsInJvbGUiOiJhc3Npc3RhbnQiLCJtb2RlbCI6ImNsYXVkZS1zb25uZXQtNC0yMDI1MDUxNCIsImNvbnRlbnQiOlt7InR5cGUiOiJ0aGlua2luZyIsInRoaW5raW5nIjoiVGhlIHVzZXIgYXNrZWQgbWUgdG8gbXVsdGlwbHkgdGhlIHJlc3VsdCBmcm9tIHRoZSBwcmV2aW91cyBjYWxjdWxhdGlvbiBieSAyLiBUaGUgcHJldmlvdXMgcmVzdWx0IHdhcyA4LCBzbyBJIG5lZWQgdG8gY2FsY3VsYXRlIDggw5cgMi5cblxuOCDDlyAyID0gMTYiLCJzaWduYXR1cmUiOiJFcndDQ2tZSUJSZ0NLa0J4TGdyNWdYVTVyOEJJN000cXVkUFRsdmdUaTBheGFFRWZLdmttaWxkMFQxYjBCajB6cHB6UUc4WE1GU1gyOC91WldnL21QM1UyK1JGZy9PRmpON2RjRWd5ZW0xMDMvbXg1UENhN1QrUWFESGEvVk5oR2RBd2ZwMEc1ZFNJd1lKdU5MbXNBMHptR01IZy8wNjFpZWVxcXRiN3lKQVkxSGE0NU5aVElDZ1M5SkQ1NC9lQ2dJMnIvVlJxckZQbHdLcU1CWDltenU3UU12Q3pXNWJrMjhPRVp5MDA4TS9LSVVqZkZNWHZlSE1DbzFMWWJUaWxFLzBPbEJ5RFdpRVNQRzVPVTE5R3Q3SWpqT3dwUkFSaERGcnNJSTJiNE9WeHhXMzdZc0x2RG1vclFodkdxYTBHQWFYNjQ5dUVFNWkxZExzQURUdTN4a3lWZGRsZGJKeXYyQ290TllNR0JBNW9nNk1FRlpNNVlxUlJCcmdSUlBpVDNFaUNZK2R4TWxKRjRmejZ1U3lHeFRvaktMTEM5L211c0NrYitWQ09vZ2hnQiJ9LHsidHlwZSI6InRleHQiLCJ0ZXh0IjoiOCDDlyAyID0gMTYifV0sInN0b3BfcmVhc29uIjoiZW5kX3R1cm4iLCJzdG9wX3NlcXVlbmNlIjpudWxsLCJ1c2FnZSI6eyJpbnB1dF90b2tlbnMiOjY3LCJjYWNoZV9jcmVhdGlvbl9pbnB1dF90b2tlbnMiOjAsImNhY2hlX3JlYWRfaW5wdXRfdG9rZW5zIjowLCJvdXRwdXRfdG9rZW5zIjo2Niwic2VydmljZV90aWVyIjoic3RhbmRhcmQifX0=
+  recorded_at: Fri, 18 Jul 2025 05:39:14 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/ruby_llm/chat_thinking_spec.rb b/spec/ruby_llm/chat_thinking_spec.rb
new file mode 100644
index 000000000..4f968f25e
--- /dev/null
+++ b/spec/ruby_llm/chat_thinking_spec.rb
@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe RubyLLM::Chat do
+  include_context 'with configured RubyLLM'
+
+  describe 'thinking mode functionality' do
+    describe '#with_thinking' do
+      context 'with thinking-capable models' do # rubocop:disable RSpec/NestedGroups
+        THINKING_MODELS.each do |model_info|
+          model = model_info[:model]
+          provider = model_info[:provider]
+
+          it "#{provider}/#{model} enables thinking mode successfully" do # rubocop:disable RSpec/MultipleExpectations
+            chat = RubyLLM.chat(model: model, provider: provider)
+
+            expect { chat.with_thinking }.not_to raise_error
+            expect(chat.instance_variable_get(:@thinking)).to be true
+            expect(chat.instance_variable_get(:@temperature)).to eq 1
+          end
+
+          it "#{provider}/#{model} accepts custom thinking parameters" do # rubocop:disable RSpec/MultipleExpectations
+            chat = RubyLLM.chat(model: model, provider: provider)
+
+            chat.with_thinking(budget: 20_000, temperature: 0.8)
+
+            expect(chat.instance_variable_get(:@thinking)).to be true
+            expect(chat.instance_variable_get(:@thinking_budget)).to eq 20_000
+            expect(chat.instance_variable_get(:@temperature)).to eq 0.8
+          end
+
+          it "#{provider}/#{model} can disable thinking mode" do
+            chat = RubyLLM.chat(model: model, provider: provider)
+
+            chat.with_thinking(thinking: false)
+
+            expect(chat.instance_variable_get(:@thinking)).to be false
+          end
+
+          it "#{provider}/#{model} can chain with other methods" do # rubocop:disable RSpec/MultipleExpectations
+            chat = RubyLLM.chat(model: model, provider: provider)
+
+            result = chat.with_thinking.with_temperature(0.5)
+
+            expect(result).to be_a(described_class)
+            expect(chat.instance_variable_get(:@thinking)).to be true
+            # Temperature should be overridden by the subsequent with_temperature call
+            expect(chat.instance_variable_get(:@temperature)).to eq 0.5
+          end
+        end
+      end
+
+      context 'with non-thinking models' do # rubocop:disable RSpec/NestedGroups
+        NON_THINKING_MODELS.each do |model_info|
+          model = model_info[:model]
+          provider = model_info[:provider]
+
+          it "#{provider}/#{model} raises UnsupportedThinkingError when enabling thinking" do
+            chat = RubyLLM.chat(model: model, provider: provider)
+
+            expect { chat.with_thinking }.to raise_error(RubyLLM::UnsupportedThinkingError)
+          end
+
+          it "#{provider}/#{model} allows disabling thinking without error" do # rubocop:disable RSpec/MultipleExpectations
+            chat = RubyLLM.chat(model: model, provider: provider)
+
+            expect { chat.with_thinking(thinking: false) }.not_to raise_error
+            expect(chat.instance_variable_get(:@thinking)).to be false
+          end
+        end
+      end
+    end
+
+    describe 'thinking mode integration with chat' do
+      THINKING_MODELS.each do |model_info|
+        model = model_info[:model]
+        provider = model_info[:provider]
+
+        it "#{provider}/#{model} can handle basic conversation with thinking enabled" do # rubocop:disable RSpec/MultipleExpectations, RSpec/ExampleLength
+          chat = RubyLLM.chat(model: model, provider: provider)
+          chat.with_thinking
+
+          response = chat.ask("What's 2 + 2? Think through this step by step.")
+
+          expect(response.content).to be_present
+          expect(response.thinking).to be_present
+          expect(response.role).to eq(:assistant)
+          expect(response.input_tokens).to be_positive
+          expect(response.output_tokens).to be_positive
+        end
+
+        it "#{provider}/#{model} maintains thinking mode across multiple turns" do # rubocop:disable RSpec/MultipleExpectations, RSpec/ExampleLength
+          chat = RubyLLM.chat(model: model, provider: provider)
+          chat.with_thinking
+
+          first = chat.ask("What's 5 + 3?")
+          expect(first.content).to include('8')
+
+          second = chat.ask('Now multiply that result by 2')
+          expect(second.content).to include('16')
+
+          # Thinking mode should still be enabled
+          expect(chat.instance_variable_get(:@thinking)).to be true
+        end
+      end
+    end
+  end
+end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index f3ac86c06..5d2d6bc73 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -160,3 +160,14 @@
 AUDIO_MODELS = [
   { provider: :openai, model: 'gpt-4o-mini-audio-preview' }
 ].freeze
+
+THINKING_MODELS = [
+  { model: 'claude-3-7-sonnet', provider: 'anthropic' },
+  { model: 'claude-sonnet-4', provider: 'anthropic' },
+  { model: 'claude-opus-4', provider: 'anthropic' }
+].freeze
+
+NON_THINKING_MODELS = [
+  { model: 'claude-3-haiku', provider: 'anthropic' },
+  { model: 'claude-3-sonnet', provider: 'anthropic' }
+].freeze

From 06daa1cd0a1daa5c8eb07f21b744cc8050e8e517 Mon Sep 17 00:00:00 2001
From: Rhys Murray <contact@rhysmurray.me>
Date: Fri, 18 Jul 2025 16:30:50 +1000
Subject: [PATCH 16/16] bug: ensure render_payload args compatibility across
 all providers

- use ** instead of using including unused parameters
---
 lib/ruby_llm/providers/bedrock/chat.rb | 2 +-
 lib/ruby_llm/providers/gemini/chat.rb  | 2 +-
 lib/ruby_llm/providers/openai/chat.rb  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
index d6ae8139a..9d5486d4a 100644
--- a/lib/ruby_llm/providers/bedrock/chat.rb
+++ b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -39,7 +39,7 @@ def completion_url
           "model/#{@model_id}/invoke"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, stream: false) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, thinking:, thinking_budget:, **) # rubocop:disable Metrics/ParameterLists
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
 
diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb
index fcb8eaa0e..bb0bd5258 100644
--- a/lib/ruby_llm/providers/gemini/chat.rb
+++ b/lib/ruby_llm/providers/gemini/chat.rb
@@ -11,7 +11,7 @@ def completion_url
           "models/#{@model}:generateContent"
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false, **) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, **)
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),
diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb
index 697442b2f..b33cfffcb 100644
--- a/lib/ruby_llm/providers/openai/chat.rb
+++ b/lib/ruby_llm/providers/openai/chat.rb
@@ -11,7 +11,7 @@ def completion_url
 
         module_function
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, stream: false, **) # rubocop:disable Metrics/ParameterLists
           payload = {
             model: model,
             messages: format_messages(messages),