diff --git a/docs/_core_features/chat.md b/docs/_core_features/chat.md index e79b3ccf9..d706bd283 100644 --- a/docs/_core_features/chat.md +++ b/docs/_core_features/chat.md @@ -520,6 +520,42 @@ puts "Total Conversation Tokens: #{total_conversation_tokens}" Refer to the [Working with Models Guide]({% link _advanced/models.md %}) for details on accessing model-specific pricing. +## Prompt Caching + +### Enabling +For Anthropic models, RubyLLM automatically opts-in to prompt caching which is documented more fully in the [Anthropic API docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching). + +Disable prompt caching using configuration: + +```ruby +RubyLLM.configure do |config| + config.cache_prompts = false # Disable prompt caching with Anthropic models +end +``` + +Or specify exactly which pieces you want to enable caching for: +```ruby +# Enable caching only for specific types of content +chat = RubyLLM.chat(model: 'claude-3-5-haiku-20241022', cache: :system) # Cache system instructions +chat = RubyLLM.chat(model: 'claude-3-5-haiku-20241022', cache: :user) # Cache user messages +chat = RubyLLM.chat(model: 'claude-3-5-haiku-20241022', cache: :tools) # Cache tool definitions + +# Or a combination +chat = RubyLLM.chat(model: 'claude-3-5-haiku-20241022', cache: [:system, :tools]) # Cache system instructions and tool definitions + +# Or do the same on the ask method +chat.ask("What do you think?", cache: :system) +chat.ask("What do you think?", cache: :user) +chat.ask("What do you think?", cache: :tools) +chat.ask("What do you think?", cache: [:system, :tools]) + +``` + +### Checking cached token counts +For Anthropic, OpenAI, and Gemini, you can see the number of tokens read from cache by looking at the `cached_tokens` property on the output messages. + +For Anthropic, you can see the tokens written to cache by looking at the `cache_creation_tokens` property. + ## Chat Event Handlers You can register blocks to be called when certain events occur during the chat lifecycle. This is particularly useful for UI updates, logging, analytics, or building real-time chat interfaces. diff --git a/gemfiles/rails_7.1.gemfile.lock b/gemfiles/rails_7.1.gemfile.lock index 7188fb72c..d33c1e983 100644 --- a/gemfiles/rails_7.1.gemfile.lock +++ b/gemfiles/rails_7.1.gemfile.lock @@ -148,6 +148,7 @@ GEM concurrent-ruby (~> 1.1) webrick (~> 1.7) websocket-driver (~> 0.7) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) fiber-annotation (0.2.0) fiber-local (1.1.0) @@ -180,12 +181,12 @@ GEM ruby-vips (>= 2.0.17, < 3) iniparse (1.5.0) io-console (0.8.1) - io-event (1.14.0) + io-event (1.11.2) irb (1.15.2) pp (>= 0.6.0) rdoc (>= 4.0.0) reline (>= 0.4.2) - json (2.14.1) + json (2.15.0) json-schema (6.0.0) addressable (~> 2.8) bigdecimal (~> 3.1) @@ -224,6 +225,8 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.4) + nokogiri (1.18.10-arm64-darwin) + racc (~> 1.4) nokogiri (1.18.10-x86_64-linux-gnu) racc (~> 1.4) os (1.1.4) @@ -318,7 +321,7 @@ GEM rubocop-ast (>= 1.46.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.47.0) + rubocop-ast (1.47.1) parser (>= 3.3.7.2) prism (~> 1.4) rubocop-performance (1.26.0) @@ -355,6 +358,7 @@ GEM simplecov (~> 0.19) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) + sqlite3 (2.7.4-arm64-darwin) sqlite3 (2.7.4-x86_64-linux-gnu) stringio (3.1.7) thor (1.4.0) @@ -380,6 +384,7 @@ GEM zeitwerk (2.7.3) PLATFORMS + arm64-darwin-22 x86_64-linux DEPENDENCIES diff --git a/gemfiles/rails_7.2.gemfile.lock b/gemfiles/rails_7.2.gemfile.lock index 574915c29..f39ecd576 100644 --- a/gemfiles/rails_7.2.gemfile.lock +++ b/gemfiles/rails_7.2.gemfile.lock @@ -142,6 +142,7 @@ GEM concurrent-ruby (~> 1.1) webrick (~> 1.7) websocket-driver (~> 0.7) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) fiber-annotation (0.2.0) fiber-local (1.1.0) @@ -174,12 +175,12 @@ GEM ruby-vips (>= 2.0.17, < 3) iniparse (1.5.0) io-console (0.8.1) - io-event (1.14.0) + io-event (1.11.2) irb (1.15.2) pp (>= 0.6.0) rdoc (>= 4.0.0) reline (>= 0.4.2) - json (2.14.1) + json (2.15.0) json-schema (6.0.0) addressable (~> 2.8) bigdecimal (~> 3.1) @@ -217,6 +218,8 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.4) + nokogiri (1.18.10-arm64-darwin) + racc (~> 1.4) nokogiri (1.18.10-x86_64-linux-gnu) racc (~> 1.4) os (1.1.4) @@ -311,7 +314,7 @@ GEM rubocop-ast (>= 1.46.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.47.0) + rubocop-ast (1.47.1) parser (>= 3.3.7.2) prism (~> 1.4) rubocop-performance (1.26.0) @@ -348,6 +351,7 @@ GEM simplecov (~> 0.19) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) + sqlite3 (2.7.4-arm64-darwin) sqlite3 (2.7.4-x86_64-linux-gnu) stringio (3.1.7) thor (1.4.0) @@ -374,6 +378,7 @@ GEM zeitwerk (2.7.3) PLATFORMS + arm64-darwin-22 x86_64-linux DEPENDENCIES diff --git a/gemfiles/rails_8.0.gemfile.lock b/gemfiles/rails_8.0.gemfile.lock index 1bd32e58a..52aee2d95 100644 --- a/gemfiles/rails_8.0.gemfile.lock +++ b/gemfiles/rails_8.0.gemfile.lock @@ -142,6 +142,7 @@ GEM concurrent-ruby (~> 1.1) webrick (~> 1.7) websocket-driver (~> 0.7) + ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) fiber-annotation (0.2.0) fiber-local (1.1.0) @@ -174,12 +175,12 @@ GEM ruby-vips (>= 2.0.17, < 3) iniparse (1.5.0) io-console (0.8.1) - io-event (1.14.0) + io-event (1.11.2) irb (1.15.2) pp (>= 0.6.0) rdoc (>= 4.0.0) reline (>= 0.4.2) - json (2.14.1) + json (2.15.0) json-schema (6.0.0) addressable (~> 2.8) bigdecimal (~> 3.1) @@ -217,6 +218,8 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.4) + nokogiri (1.18.10-arm64-darwin) + racc (~> 1.4) nokogiri (1.18.10-x86_64-linux-gnu) racc (~> 1.4) os (1.1.4) @@ -311,7 +314,7 @@ GEM rubocop-ast (>= 1.46.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.47.0) + rubocop-ast (1.47.1) parser (>= 3.3.7.2) prism (~> 1.4) rubocop-performance (1.26.0) @@ -348,6 +351,7 @@ GEM simplecov (~> 0.19) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) + sqlite3 (2.7.4-arm64-darwin) sqlite3 (2.7.4-x86_64-linux-gnu) stringio (3.1.7) thor (1.4.0) @@ -374,6 +378,7 @@ GEM zeitwerk (2.7.3) PLATFORMS + arm64-darwin-22 x86_64-linux DEPENDENCIES diff --git a/lib/ruby_llm/active_record/chat_methods.rb b/lib/ruby_llm/active_record/chat_methods.rb index 173f14bf6..c75365b46 100644 --- a/lib/ruby_llm/active_record/chat_methods.rb +++ b/lib/ruby_llm/active_record/chat_methods.rb @@ -179,7 +179,8 @@ def create_user_message(content, with: nil) message_record end - def ask(message, with: nil, &) + def ask(message, with: nil, cache: nil, &) + to_llm.instance_variable_set(:@cache_prompts, cache) create_user_message(message, with:) complete(&) end diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index 6b5b94daa..e9a364885 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -7,7 +7,7 @@ class Chat attr_reader :model, :messages, :tools, :params, :headers, :schema - def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil) + def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil, cache: nil) if assume_model_exists && !provider raise ArgumentError, 'Provider must be specified if assume_model_exists is true' end @@ -19,6 +19,7 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n @temperature = nil @messages = [] @tools = {} + @cache_prompts = cache.nil? ? @config.cache_prompts : cache @params = {} @headers = {} @schema = nil @@ -30,7 +31,8 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n } end - def ask(message = nil, with: nil, &) + def ask(message = nil, with: nil, cache: nil, &) + @cache_prompts = cache if cache add_message role: :user, content: Content.new(message, with) complete(&) end @@ -127,6 +129,7 @@ def complete(&) # rubocop:disable Metrics/PerceivedComplexity tools: @tools, temperature: @temperature, model: @model, + cache_prompts: @cache_prompts.dup, params: @params, headers: @headers, schema: @schema, diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index eda2c3354..7aad4652b 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -43,7 +43,8 @@ class Configuration :logger, :log_file, :log_level, - :log_stream_debug + :log_stream_debug, + :cache_prompts def initialize @request_timeout = 120 @@ -64,6 +65,7 @@ def initialize @log_file = $stdout @log_level = ENV['RUBYLLM_DEBUG'] ? Logger::DEBUG : Logger::INFO @log_stream_debug = ENV['RUBYLLM_STREAM_DEBUG'] == 'true' + @cache_prompts = true end def instance_variables diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb index 239390215..bbba584ef 100644 --- a/lib/ruby_llm/message.rb +++ b/lib/ruby_llm/message.rb @@ -5,7 +5,8 @@ module RubyLLM class Message ROLES = %i[system user assistant tool].freeze - attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw + attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw, + :cached_tokens, :cache_creation_tokens attr_writer :content def initialize(options = {}) @@ -16,6 +17,8 @@ def initialize(options = {}) @output_tokens = options[:output_tokens] @model_id = options[:model_id] @tool_call_id = options[:tool_call_id] + @cached_tokens = options[:cached_tokens] + @cache_creation_tokens = options[:cache_creation_tokens] @raw = options[:raw] ensure_valid_role @@ -49,7 +52,9 @@ def to_h tool_call_id: tool_call_id, input_tokens: input_tokens, output_tokens: output_tokens, - model_id: model_id + model_id: model_id, + cache_creation_tokens: cache_creation_tokens, + cached_tokens: cached_tokens }.compact end diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index f3344e57d..c88b050c6 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -37,7 +37,8 @@ def configuration_requirements self.class.configuration_requirements end - def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists + def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, # rubocop:disable Metrics/ParameterLists + cache_prompts: nil, &) normalized_temperature = maybe_normalize_temperature(temperature, model) payload = Utils.deep_merge( @@ -46,6 +47,7 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc tools: tools, temperature: normalized_temperature, model: model, + cache_prompts: cache_prompts, stream: block_given?, schema: schema ), diff --git a/lib/ruby_llm/providers/anthropic.rb b/lib/ruby_llm/providers/anthropic.rb index cd7d38055..80145f926 100644 --- a/lib/ruby_llm/providers/anthropic.rb +++ b/lib/ruby_llm/providers/anthropic.rb @@ -10,6 +10,7 @@ class Anthropic < Provider include Anthropic::Models include Anthropic::Streaming include Anthropic::Tools + include Anthropic::Cache def api_base 'https://api.anthropic.com' diff --git a/lib/ruby_llm/providers/anthropic/cache.rb b/lib/ruby_llm/providers/anthropic/cache.rb new file mode 100644 index 000000000..23d129362 --- /dev/null +++ b/lib/ruby_llm/providers/anthropic/cache.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class Anthropic + # Handles caching of prompts for Anthropic + module Cache + def should_cache?(type) + return false unless cache_prompts + return true if cache_prompts == true + return true if cache_prompts.is_a?(Array) && cache_prompts.include?(type) + return true if cache_prompts.is_a?(Symbol) && cache_prompts == type + + false + end + end + end + end +end diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index 85630b0c1..8b0b876ce 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -7,16 +7,21 @@ class Anthropic module Chat module_function + attr_reader :cache_prompts + def completion_url '/v1/messages' end - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument + cache_prompts: nil) + @cache_prompts = cache_prompts system_messages, chat_messages = separate_messages(messages) - system_content = build_system_content(system_messages) + system_content = build_system_content(system_messages, cache: should_cache?(:system)) - build_base_payload(chat_messages, model, stream).tap do |payload| - add_optional_fields(payload, system_content:, tools:, temperature:) + build_base_payload(chat_messages, model, stream, cache: should_cache?(:user)).tap do |payload| + add_optional_fields(payload, system_content:, tools:, temperature:, + cache_tools: should_cache?(:tools)) end end @@ -24,28 +29,34 @@ def separate_messages(messages) messages.partition { |msg| msg.role == :system } end - def build_system_content(system_messages) - if system_messages.length > 1 - RubyLLM.logger.warn( - "Anthropic's Claude implementation only supports a single system message. " \ - 'Multiple system messages will be combined into one.' - ) + def build_system_content(system_messages, cache: false) + system_messages.flat_map.with_index do |msg, idx| + message_cache = cache if idx == system_messages.size - 1 + format_system_message(msg, cache: message_cache) end - - system_messages.map(&:content).join("\n\n") end - def build_base_payload(chat_messages, model, stream) + def build_base_payload(chat_messages, model, stream, cache: false) + messages = chat_messages.map.with_index do |msg, idx| + message_cache = cache if idx == chat_messages.size - 1 + format_message(msg, cache: message_cache) + end + { model: model.id, - messages: chat_messages.map { |msg| format_message(msg) }, + messages:, stream: stream, max_tokens: model.max_tokens || 4096 } end - def add_optional_fields(payload, system_content:, tools:, temperature:) - payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any? + def add_optional_fields(payload, system_content:, tools:, temperature:, cache_tools: false) + if tools.any? + tool_definitions = tools.values.map { |t| Tools.function_for(t) } + tool_definitions[-1][:cache_control] = { type: 'ephemeral' } if cache_tools + payload[:tools] = tool_definitions + end + payload[:system] = system_content unless system_content.empty? payload[:temperature] = temperature unless temperature.nil? end @@ -73,24 +84,30 @@ def build_message(data, content, tool_use_blocks, response) input_tokens: data.dig('usage', 'input_tokens'), output_tokens: data.dig('usage', 'output_tokens'), model_id: data['model'], + cache_creation_tokens: data.dig('usage', 'cache_creation_input_tokens'), + cached_tokens: data.dig('usage', 'cache_read_input_tokens'), raw: response ) end - def format_message(msg) + def format_message(msg, cache: false) if msg.tool_call? Tools.format_tool_call(msg) elsif msg.tool_result? Tools.format_tool_result(msg) else - format_basic_message(msg) + format_basic_message(msg, cache:) end end - def format_basic_message(msg) + def format_system_message(msg, cache: false) + Media.format_content(msg.content, cache:) + end + + def format_basic_message(msg, cache: false) { role: convert_role(msg.role), - content: Media.format_content(msg.content) + content: Media.format_content(msg.content, cache:) } end diff --git a/lib/ruby_llm/providers/anthropic/media.rb b/lib/ruby_llm/providers/anthropic/media.rb index a2e170a0f..788125b2c 100644 --- a/lib/ruby_llm/providers/anthropic/media.rb +++ b/lib/ruby_llm/providers/anthropic/media.rb @@ -7,12 +7,12 @@ class Anthropic module Media module_function - def format_content(content) - return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array) - return [format_text(content)] unless content.is_a?(Content) + def format_content(content, cache: false) + return [format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array) + return [format_text(content, cache:)] unless content.is_a?(Content) parts = [] - parts << format_text(content.text) if content.text + parts << format_text(content.text, cache:) if content.text content.attachments.each do |attachment| case attachment.type @@ -30,60 +30,84 @@ def format_content(content) parts end - def format_text(text) - { - type: 'text', - text: text - } + def format_text(text, cache: false) + with_cache_control( + { + type: 'text', + text: text + }, + cache: + ) end - def format_image(image) + def format_image(image, cache: false) if image.url? - { - type: 'image', - source: { - type: 'url', - url: image.source - } - } + with_cache_control( + { + type: 'image', + source: { + type: 'url', + url: image.source + } + }, + cache: + ) else - { - type: 'image', - source: { - type: 'base64', - media_type: image.mime_type, - data: image.encoded - } - } + with_cache_control( + { + type: 'image', + source: { + type: 'base64', + media_type: image.mime_type, + data: image.encoded + } + }, + cache: + ) end end - def format_pdf(pdf) + def format_pdf(pdf, cache: false) if pdf.url? - { - type: 'document', - source: { - type: 'url', - url: pdf.source - } - } + with_cache_control( + { + type: 'document', + source: { + type: 'url', + url: pdf.source + } + }, + cache: + ) else - { - type: 'document', - source: { - type: 'base64', - media_type: pdf.mime_type, - data: pdf.encoded - } - } + with_cache_control( + { + type: 'document', + source: { + type: 'base64', + media_type: pdf.mime_type, + data: pdf.encoded + } + }, + cache: + ) end end - def format_text_file(text_file) - { - type: 'text', - text: text_file.for_llm - } + def format_text_file(text_file, cache: false) + with_cache_control( + { + type: 'text', + text: text_file.for_llm + }, + cache: + ) + end + + def with_cache_control(hash, cache: false) + return hash unless cache + + hash.merge(cache_control: { type: 'ephemeral' }) end end end diff --git a/lib/ruby_llm/providers/anthropic/models.rb b/lib/ruby_llm/providers/anthropic/models.rb index 31066cd75..b3481851f 100644 --- a/lib/ruby_llm/providers/anthropic/models.rb +++ b/lib/ruby_llm/providers/anthropic/models.rb @@ -42,6 +42,14 @@ def extract_input_tokens(data) def extract_output_tokens(data) data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens') end + + def extract_cached_tokens(data) + data.dig('message', 'usage', 'cache_read_input_tokens') + end + + def extract_cache_creation_tokens(data) + data.dig('message', 'usage', 'cache_creation_input_tokens') + end end end end diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb index 93b6fdfa9..5fed14710 100644 --- a/lib/ruby_llm/providers/anthropic/streaming.rb +++ b/lib/ruby_llm/providers/anthropic/streaming.rb @@ -18,6 +18,8 @@ def build_chunk(data) content: data.dig('delta', 'text'), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), + cached_tokens: extract_cached_tokens(data), + cache_creation_tokens: extract_cache_creation_tokens(data), tool_calls: extract_tool_calls(data) ) end diff --git a/lib/ruby_llm/providers/bedrock.rb b/lib/ruby_llm/providers/bedrock.rb index 50474b27f..196e59f95 100644 --- a/lib/ruby_llm/providers/bedrock.rb +++ b/lib/ruby_llm/providers/bedrock.rb @@ -13,6 +13,7 @@ class Bedrock < Provider include Bedrock::Signing include Bedrock::Media include Anthropic::Tools + include Anthropic::Cache def api_base "https://bedrock-runtime.#{@config.bedrock_region}.amazonaws.com" diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index 4abdae3f3..fdb419a0a 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -7,6 +7,8 @@ class Bedrock module Chat module_function + attr_reader :cache_prompts + def sync_response(connection, payload, additional_headers = {}) signature = sign_request("#{connection.connection.url_prefix}#{completion_url}", payload:) response = connection.post completion_url, payload do |req| @@ -16,20 +18,20 @@ def sync_response(connection, payload, additional_headers = {}) Anthropic::Chat.parse_completion_response response end - def format_message(msg) + def format_message(msg, cache: false) if msg.tool_call? Anthropic::Tools.format_tool_call(msg) elsif msg.tool_result? Anthropic::Tools.format_tool_result(msg) else - format_basic_message(msg) + format_basic_message(msg, cache:) end end - def format_basic_message(msg) + def format_basic_message(msg, cache: false) { role: Anthropic::Chat.convert_role(msg.role), - content: Media.format_content(msg.content) + content: Media.format_content(msg.content, cache:) } end @@ -39,21 +41,33 @@ def completion_url "model/#{@model_id}/invoke" end - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists + cache_prompts: nil) @model_id = model.id + @cache_prompts = cache_prompts system_messages, chat_messages = Anthropic::Chat.separate_messages(messages) - system_content = Anthropic::Chat.build_system_content(system_messages) + system_content = Anthropic::Chat.build_system_content(system_messages, cache: should_cache?(:system)) - build_base_payload(chat_messages, model).tap do |payload| - Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:) + build_base_payload(chat_messages, model, cache: should_cache?(:user)).tap do |payload| + Anthropic::Chat.add_optional_fields( + payload, + system_content:, + tools:, + temperature:, + cache_tools: should_cache?(:tools) + ) end end - def build_base_payload(chat_messages, model) + def build_base_payload(chat_messages, model, cache: false) + messages = chat_messages.map.with_index do |msg, idx| + message_cache = cache if idx == chat_messages.size - 1 + format_message(msg, cache: message_cache) + end { anthropic_version: 'bedrock-2023-05-31', - messages: chat_messages.map { |msg| format_message(msg) }, + messages: messages, max_tokens: model.max_tokens || 4096 } end diff --git a/lib/ruby_llm/providers/bedrock/media.rb b/lib/ruby_llm/providers/bedrock/media.rb index af45e374f..7fcd6d26d 100644 --- a/lib/ruby_llm/providers/bedrock/media.rb +++ b/lib/ruby_llm/providers/bedrock/media.rb @@ -10,21 +10,21 @@ module Media module_function - def format_content(content) - return [Anthropic::Media.format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array) - return [Anthropic::Media.format_text(content)] unless content.is_a?(Content) + def format_content(content, cache: false) + return [Anthropic::Media.format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array) + return [Anthropic::Media.format_text(content, cache:)] unless content.is_a?(Content) parts = [] - parts << Anthropic::Media.format_text(content.text) if content.text + parts << Anthropic::Media.format_text(content.text, cache:) if content.text content.attachments.each do |attachment| case attachment.type when :image - parts << format_image(attachment) + parts << format_image(attachment, cache:) when :pdf - parts << format_pdf(attachment) + parts << format_pdf(attachment, cache:) when :text - parts << Anthropic::Media.format_text_file(attachment) + parts << Anthropic::Media.format_text_file(attachment, cache:) else raise UnsupportedAttachmentError, attachment.type end @@ -33,26 +33,38 @@ def format_content(content) parts end - def format_image(image) - { - type: 'image', - source: { - type: 'base64', - media_type: image.mime_type, - data: image.encoded - } - } + def format_image(image, cache: false) + with_cache_control( + { + type: 'image', + source: { + type: 'base64', + media_type: image.mime_type, + data: image.encoded + } + }, + cache: + ) end - def format_pdf(pdf) - { - type: 'document', - source: { - type: 'base64', - media_type: pdf.mime_type, - data: pdf.encoded - } - } + def format_pdf(pdf, cache: false) + with_cache_control( + { + type: 'document', + source: { + type: 'base64', + media_type: pdf.mime_type, + data: pdf.encoded + } + }, + cache: + ) + end + + def with_cache_control(hash, cache: false) + return hash unless cache + + hash.merge(cache_control: { type: 'ephemeral' }) end end end diff --git a/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb b/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb index 4f94d03db..d5baf7745 100644 --- a/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +++ b/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb @@ -32,6 +32,14 @@ def extract_output_tokens(data) data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens') end + def extract_cached_tokens(data) + data.dig('message', 'usage', 'cache_read_input_tokens') + end + + def extract_cache_creation_tokens(data) + data.dig('message', 'usage', 'cache_creation_input_tokens') + end + private def extract_content_by_type(data) diff --git a/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb b/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb index 757afb6ed..4855c66f6 100644 --- a/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +++ b/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb @@ -59,6 +59,8 @@ def extract_chunk_attributes(data) content: extract_streaming_content(data), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), + cached_tokens: extract_cached_tokens(data), + cache_creation_tokens: extract_cache_creation_tokens(data), tool_calls: extract_tool_calls(data) } end diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index c504b08a2..9e07de69b 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -11,7 +11,7 @@ def completion_url "models/#{@model}:generateContent" end - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument @model = model.id payload = { contents: format_messages(messages), @@ -81,6 +81,7 @@ def parse_completion_response(response) tool_calls: tool_calls, input_tokens: data.dig('usageMetadata', 'promptTokenCount'), output_tokens: calculate_output_tokens(data), + cached_tokens: data.dig('usageMetadata', 'cacheTokensDetails', 0, 'tokenCount') || 0, model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0], raw: response ) diff --git a/lib/ruby_llm/providers/gemini/streaming.rb b/lib/ruby_llm/providers/gemini/streaming.rb index 8aa630b27..6d41a6f0c 100644 --- a/lib/ruby_llm/providers/gemini/streaming.rb +++ b/lib/ruby_llm/providers/gemini/streaming.rb @@ -16,6 +16,7 @@ def build_chunk(data) content: extract_content(data), input_tokens: extract_input_tokens(data), output_tokens: extract_output_tokens(data), + cached_tokens: extract_cached_tokens(data), tool_calls: extract_tool_calls(data) ) end @@ -48,6 +49,10 @@ def extract_output_tokens(data) total.positive? ? total : nil end + def extract_cached_tokens(data) + data.dig('usageMetadata', 'cachedContentTokenCount') + end + def parse_streaming_error(data) error_data = JSON.parse(data) [error_data['error']['code'], error_data['error']['message']] diff --git a/lib/ruby_llm/providers/mistral/chat.rb b/lib/ruby_llm/providers/mistral/chat.rb index 10ec965e4..e71fb5124 100644 --- a/lib/ruby_llm/providers/mistral/chat.rb +++ b/lib/ruby_llm/providers/mistral/chat.rb @@ -12,7 +12,7 @@ def format_role(role) end # rubocop:disable Metrics/ParameterLists - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists payload = super payload.delete(:stream_options) payload diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index e6d58ec05..80745060f 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -11,7 +11,7 @@ def completion_url module_function - def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Lint/UnusedMethodArgument, Metrics/ParameterLists payload = { model: model.id, messages: format_messages(messages), @@ -53,6 +53,7 @@ def parse_completion_response(response) tool_calls: parse_tool_calls(message_data['tool_calls']), input_tokens: data['usage']['prompt_tokens'], output_tokens: data['usage']['completion_tokens'], + cached_tokens: data.dig('usage', 'prompt_tokens_details', 'cached_tokens'), model_id: data['model'], raw: response ) diff --git a/lib/ruby_llm/providers/openai/streaming.rb b/lib/ruby_llm/providers/openai/streaming.rb index c3932ec3b..d77dbe635 100644 --- a/lib/ruby_llm/providers/openai/streaming.rb +++ b/lib/ruby_llm/providers/openai/streaming.rb @@ -18,7 +18,8 @@ def build_chunk(data) content: data.dig('choices', 0, 'delta', 'content'), tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false), input_tokens: data.dig('usage', 'prompt_tokens'), - output_tokens: data.dig('usage', 'completion_tokens') + output_tokens: data.dig('usage', 'completion_tokens'), + cached_tokens: data.dig('usage', 'cached_tokens') ) end diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb index e5cfcbd93..a39367d61 100644 --- a/lib/ruby_llm/stream_accumulator.rb +++ b/lib/ruby_llm/stream_accumulator.rb @@ -10,6 +10,8 @@ def initialize @tool_calls = {} @input_tokens = 0 @output_tokens = 0 + @cached_tokens = 0 + @cache_creation_tokens = 0 @latest_tool_call_id = nil end @@ -35,6 +37,8 @@ def to_message(response) tool_calls: tool_calls_from_stream, input_tokens: @input_tokens.positive? ? @input_tokens : nil, output_tokens: @output_tokens.positive? ? @output_tokens : nil, + cached_tokens: @cached_tokens.positive? ? @cached_tokens : nil, + cache_creation_tokens: @cache_creation_tokens.positive? ? @cache_creation_tokens : nil, raw: response ) end @@ -90,6 +94,8 @@ def find_tool_call(tool_call_id) def count_tokens(chunk) @input_tokens = chunk.input_tokens if chunk.input_tokens @output_tokens = chunk.output_tokens if chunk.output_tokens + @cached_tokens = chunk.cached_tokens if chunk.cached_tokens + @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens end end end diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_caching_allows_prompt_caching.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_caching_allows_prompt_caching.yml new file mode 100644 index 000000000..0c18aa85f --- /dev/null +++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_caching_allows_prompt_caching.yml @@ -0,0 +1,80 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Hello","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 15 Aug 2025 21:07:10 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-15T21:07:10Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-15T21:07:10Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-15T21:07:10Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '480000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-15T21:07:10Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01GEK45t8NmPRgvVjrHWCmwU","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Hi + there! How are you doing today? Is there anything I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":8,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":21,"service_tier":"standard"}}' + recorded_at: Fri, 15 Aug 2025 21:07:10 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml index 4f531b151..0e63fb593 100644 --- a/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml +++ b/spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_anthropic_claude-3-5-haiku-20241022_successfully_uses_the_system_prompt.yml @@ -35,27 +35,27 @@ http_interactions: Connection: - keep-alive Anthropic-Ratelimit-Input-Tokens-Limit: - - '100000' + - '50000' Anthropic-Ratelimit-Input-Tokens-Remaining: - - '100000' + - '50000' Anthropic-Ratelimit-Input-Tokens-Reset: - '2025-09-21T14:42:31Z' Anthropic-Ratelimit-Output-Tokens-Limit: - - '20000' + - '10000' Anthropic-Ratelimit-Output-Tokens-Remaining: - - '20000' + - '10000' Anthropic-Ratelimit-Output-Tokens-Reset: - '2025-09-21T14:42:34Z' Anthropic-Ratelimit-Requests-Limit: - - '1000' + - '50' Anthropic-Ratelimit-Requests-Remaining: - - '999' + - '49' Anthropic-Ratelimit-Requests-Reset: - '2025-09-21T14:42:31Z' Anthropic-Ratelimit-Tokens-Limit: - - '120000' + - '60000' Anthropic-Ratelimit-Tokens-Remaining: - - '120000' + - '60000' Anthropic-Ratelimit-Tokens-Reset: - '2025-09-21T14:42:31Z' Request-Id: diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_multiple_caching_types_handles_multiple_caching_types_together.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_multiple_caching_types_handles_multiple_caching_types_together.yml new file mode 100644 index 000000000..6a8eb59b8 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_multiple_caching_types_handles_multiple_caching_types_together.yml @@ -0,0 +1,175 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:09 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '398000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:08Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:09Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:07Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '478000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:08Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01BDTerm8ELpf9Lh3a8v8bh1","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_01NSq4nB43eJJUAMzoB9DQWv","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":8514,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":8514,"ephemeral_1h_input_tokens":0},"output_tokens":54,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:09 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_01NSq4nB43eJJUAMzoB9DQWv","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01NSq4nB43eJJUAMzoB9DQWv","content":"Ruby + is a great language for building web applications."}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:15 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '395000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:11Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:15Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:09Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '475000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:11Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01JfsgKwnpUj5jL89fSETwdE","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Ruby + is a dynamic, object-oriented programming language known for its simplicity + and readability. Here are some key points about Ruby:\n\n1. Created by Yukihiro + Matsumoto (often called \"Matz\") in 1995\n2. Designed to be programmer-friendly + with a focus on human-readable syntax\n3. Particularly popular for web development, + especially with the Ruby on Rails framework\n4. Supports multiple programming + paradigms, including object-oriented, functional, and imperative programming\n5. + Known for its elegant and concise code\n6. Has a strong standard library and + a vibrant ecosystem of gems (libraries)\n7. Commonly used for:\n - Web development\n - + Scripting\n - Automation\n - Prototyping\n - DevOps tools\n\nSome popular + frameworks and tools in the Ruby ecosystem include:\n- Ruby on Rails (web + application framework)\n- Sinatra (lightweight web framework)\n- RSpec (testing + framework)\n- Bundler (dependency management)\n- Rake (build automation)\n\nRuby + emphasizes the principle of \"developer happiness\" and follows the philosophy + that there should be more than one way to do something, giving developers + flexibility in their coding approach."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":2953,"cache_creation_input_tokens":0,"cache_read_input_tokens":5639,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":271,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:15 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_streaming_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_streaming_reports_cached_tokens.yml new file mode 100644 index 000000000..e75f07e9c --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_streaming_reports_cached_tokens.yml @@ -0,0 +1,236 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"stream":true,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:19 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - "" + Cache-Control: + - no-cache + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '398000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:24:19Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:18Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:24:18Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '478000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:24:18Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + body: + encoding: UTF-8 + string: |+ + event: message_start + data: {"type":"message_start","message":{"id":"msg_01ESjJKDSNtD5bt6jcqvxp9e","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2744,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2744,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" counting"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" from 1 to"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n2\n3"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20} } + + event: message_stop + data: {"type":"message_stop" } + + recorded_at: Thu, 14 Aug 2025 16:24:19 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3"}]},{"role":"assistant","content":[{"type":"text","text":"Here''s + counting from 1 to 3:\n\n1\n2\n3"}]},{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"stream":true,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:21 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cf-Ray: + - "" + Cache-Control: + - no-cache + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '396000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:24:20Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:19Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:24:20Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '476000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:24:19Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + body: + encoding: UTF-8 + string: |+ + event: message_start + data: {"type":"message_start","message":{"id":"msg_01AgYaKnVSNp9SbL9xW9g4c3","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2764,"cache_read_input_tokens":2744,"cache_creation":{"ephemeral_5m_input_tokens":2764,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Here"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" counting"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" from 1 to"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" 3:\n\n1"} } + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n2\n3"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20}} + + event: message_stop + data: {"type":"message_stop" } + + recorded_at: Thu, 14 Aug 2025 16:24:21 GMT +recorded_with: VCR 6.3.1 +... diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml new file mode 100644 index 000000000..0adde294f --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml @@ -0,0 +1,174 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"stream":false,"max_tokens":8192,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:54Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:56Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:53Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:54Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01Fd5rjy7p2WRgRtdjyxnVHG","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + want to be direct and transparent with you. My key principles include being + helpful, honest, avoiding harm, protecting individual privacy, and being respectful. + I aim to give accurate information, acknowledge when I''m uncertain, and not + pretend to have capabilities I don''t. I won''t help with anything illegal + or dangerous, and I try to provide balanced, nuanced perspectives."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":2732,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2732,"ephemeral_1h_input_tokens":0},"output_tokens":79,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:56 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]},{"role":"assistant","content":[{"type":"text","text":"I + want to be direct and transparent with you. My key principles include being + helpful, honest, avoiding harm, protecting individual privacy, and being respectful. + I aim to give accurate information, acknowledge when I''m uncertain, and not + pretend to have capabilities I don''t. I won''t help with anything illegal + or dangerous, and I try to provide balanced, nuanced perspectives."}]},{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"stream":false,"max_tokens":8192,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:23:58 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:57Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:23:58Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:56Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:57Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01W75cVDRb1BVmcCYgaRMi3H","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + aim to be helpful, honest, and ethical. My key principles include:\n\n1. Being + truthful and transparent\n2. Protecting individual privacy \n3. Avoiding potential + harm\n4. Respecting human rights\n5. Providing balanced, factual information\n6. + Acknowledging the limits of my knowledge\n7. Maintaining appropriate boundaries\n8. + Declining requests that could be unethical or dangerous\n\nI strive to be + a responsible AI assistant that supports human wellbeing."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":105,"cache_creation_input_tokens":0,"cache_read_input_tokens":2732,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":109,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:23:58 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml new file mode 100644 index 000000000..7f405820f --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml @@ -0,0 +1,180 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:47:14 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:47:14Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:47:14Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:47:13Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:47:14Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01LVwUdFoKF4qiMoy8zpbt6k","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby for you."},{"type":"tool_use","id":"toolu_01J9F7Xi77jPzDXuGwkQEv5Q","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":145,"cache_creation_input_tokens":2902,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2902,"ephemeral_1h_input_tokens":0},"output_tokens":57,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:47:14 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby for you."},{"type":"tool_use","id":"toolu_01J9F7Xi77jPzDXuGwkQEv5Q","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01J9F7Xi77jPzDXuGwkQEv5Q","content":"Ruby + is a great language for building web applications."}]}],"stream":false,"max_tokens":8192,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:47:22 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:47:15Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:47:22Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:47:15Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:47:15Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_01PDSjZj6jjLSLWwRrU9gX2i","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Let + me provide you with more comprehensive information about Ruby:\n\nRuby is + a dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan during the mid-1990s. Here are some key characteristics:\n\n1. + Design Philosophy:\n- Designed to be programmer-friendly and productive\n- + Follows the principle of \"developer happiness\" and \"least surprise\"\n- + Emphasizes natural and readable syntax\n\n2. Key Features:\n- Fully object-oriented: + Everything in Ruby is an object\n- Dynamic typing\n- Supports functional programming + paradigms\n- Automatic memory management (garbage collection)\n- Extensive + standard library\n- Strong metaprogramming capabilities\n\n3. Popular Use + Cases:\n- Web development (especially with Ruby on Rails framework)\n- Scripting + and automation\n- DevOps tools\n- Prototyping\n- Backend web services\n\n4. + Notable Frameworks and Tools:\n- Ruby on Rails (web application framework)\n- + Sinatra (lightweight web framework)\n- RSpec (testing framework)\n- Bundler + (dependency management)\n\n5. Community and Ecosystem:\n- Active and supportive + open-source community\n- Extensive collection of libraries (called \"gems\")\n- + Regular language updates and improvements\n\nRuby is particularly loved by + developers for its elegant syntax, flexibility, and focus on developer productivity. + While it may not be as widely used as some other languages, it remains a powerful + and enjoyable language to work with, especially in web development and scripting + domains."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":222,"cache_creation_input_tokens":0,"cache_read_input_tokens":2902,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":338,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:47:22 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml new file mode 100644 index 000000000..974693754 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_anthropic_provider_claude-3-5-haiku-20241022_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml @@ -0,0 +1,199 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:03 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:23:59Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:03Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:23:59Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:23:59Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_018Kg2eT1LZZbyPTHmzomDEH","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" and does not contain any information about Ruby. If you would like to + know about Ruby, I can provide some information:\n\nRuby is a dynamic, object-oriented + programming language created by Yukihiro Matsumoto (often called \"Matz\") + in Japan during the mid-1990s. Some key characteristics of Ruby include:\n\n1. + Designed for programmer productivity and happiness\n2. Supports multiple programming + paradigms (object-oriented, functional, imperative)\n3. Known for its clean + and readable syntax\n4. Widely used for web development, particularly with + the Ruby on Rails framework\n5. Open-source programming language\n6. Interpreted + language\n7. Features like automatic memory management and dynamic typing\n8. + Strong support for metaprogramming\n9. Cross-platform compatibility\n\nWould + you like me to elaborate on any of these points about Ruby?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2745,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":2745,"ephemeral_1h_input_tokens":0},"output_tokens":207,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:24:03 GMT +- request: + method: post + uri: https://api.anthropic.com/v1/messages + body: + encoding: UTF-8 + string: '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" and does not contain any information about Ruby. If you would like to + know about Ruby, I can provide some information:\n\nRuby is a dynamic, object-oriented + programming language created by Yukihiro Matsumoto (often called \"Matz\") + in Japan during the mid-1990s. Some key characteristics of Ruby include:\n\n1. + Designed for programmer productivity and happiness\n2. Supports multiple programming + paradigms (object-oriented, functional, imperative)\n3. Known for its clean + and readable syntax\n4. Widely used for web development, particularly with + the Ruby on Rails framework\n5. Open-source programming language\n6. Interpreted + language\n7. Features like automatic memory management and dynamic typing\n8. + Strong support for metaprogramming\n9. Cross-platform compatibility\n\nWould + you like me to elaborate on any of these points about Ruby?"}]},{"role":"user","content":[{"type":"text","text":"Tell + me more about Ruby","cache_control":{"type":"ephemeral"}}]}],"stream":false,"max_tokens":8192,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Api-Key: + - "" + Anthropic-Version: + - '2023-06-01' + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:10 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Anthropic-Ratelimit-Input-Tokens-Limit: + - '400000' + Anthropic-Ratelimit-Input-Tokens-Remaining: + - '399000' + Anthropic-Ratelimit-Input-Tokens-Reset: + - '2025-08-14T16:24:04Z' + Anthropic-Ratelimit-Output-Tokens-Limit: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Remaining: + - '80000' + Anthropic-Ratelimit-Output-Tokens-Reset: + - '2025-08-14T16:24:10Z' + Anthropic-Ratelimit-Requests-Limit: + - '4000' + Anthropic-Ratelimit-Requests-Remaining: + - '3999' + Anthropic-Ratelimit-Requests-Reset: + - '2025-08-14T16:24:03Z' + Anthropic-Ratelimit-Tokens-Limit: + - '480000' + Anthropic-Ratelimit-Tokens-Remaining: + - '479000' + Anthropic-Ratelimit-Tokens-Reset: + - '2025-08-14T16:24:04Z' + Request-Id: + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Anthropic-Organization-Id: + - "" + Via: + - 1.1 google + Cf-Cache-Status: + - DYNAMIC + X-Robots-Tag: + - none + Server: + - cloudflare + Cf-Ray: + - "" + body: + encoding: ASCII-8BIT + string: '{"id":"msg_015rq7u1xDiESviQgHnN16LD","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Here''s + a more detailed overview of Ruby:\n\nLanguage Design:\n- Created by Yukihiro + Matsumoto in 1995\n- Philosophy: \"Principle of least surprise\"\n- Designed + to make programming more enjoyable and productive\n- Fully object-oriented + language (everything is an object)\n\nKey Technical Features:\n- Dynamic typing\n- + Automatic memory management\n- Supports multiple programming paradigms\n- + Interpreted language\n- Uses garbage collection\n- Strong metaprogramming + capabilities\n\nSyntax Characteristics:\n- Clean, readable, and concise\n- + Uses indentation for readability\n- Supports functional programming concepts\n- + Uses snake_case for method and variable names\n- Uses CamelCase for class + and module names\n\nPopular Use Cases:\n- Web development (Ruby on Rails framework)\n- + Scripting\n- Automation\n- DevOps tools\n- Prototyping\n- Backend web services\n\nEcosystem:\n- + RubyGems package manager\n- Large open-source community\n- Extensive library + of pre-built modules\n- Strong testing frameworks\n\nPerformance:\n- Generally + slower than compiled languages\n- Improved performance with JIT compilation + in recent versions\n- Good for rapid development and scripting\n\nMajor Companies + Using Ruby:\n- GitHub\n- Airbnb\n- Shopify\n- Twitch\n- SoundCloud\n\nWould + you like me to elaborate on any of these aspects of Ruby?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":215,"cache_read_input_tokens":2745,"cache_creation":{"ephemeral_5m_input_tokens":215,"ephemeral_1h_input_tokens":0},"output_tokens":315,"service_tier":"standard"}}' + recorded_at: Thu, 14 Aug 2025 16:24:10 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_multiple_caching_types_handles_multiple_caching_types_together.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_multiple_caching_types_handles_multiple_caching_types_together.yml new file mode 100644 index 000000000..9690ba72b --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_multiple_caching_types_handles_multiple_caching_types_together.yml @@ -0,0 +1,133 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163033Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 6c0e81fe3d33cff0f11ffa88ea3dfeaa4417e7c89c536f0633fd312f9aa886ff + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=3cb53bf7294051dbd7f988c48ff0d4ba0cd41a1a08b33175e476d4f83a43c41b + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:36 GMT + Content-Type: + - application/json + Content-Length: + - '492' + Connection: + - keep-alive + X-Amzn-Requestid: + - d7e1a435-0cd0-4274-9a58-9899c1f01431 + X-Amzn-Bedrock-Invocation-Latency: + - '2340' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '5612' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2902' + X-Amzn-Bedrock-Output-Token-Count: + - '54' + X-Amzn-Bedrock-Input-Token-Count: + - '4' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01RQ1dXhJCRzFJymM59YyfLe","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_bdrk_01GdWT6NQcLBHd2uhMfiZWXH","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":5612,"cache_read_input_tokens":2902,"output_tokens":54}}' + recorded_at: Thu, 14 Aug 2025 16:30:36 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + help you get information about Ruby development by using the available tool."},{"type":"tool_use","id":"toolu_bdrk_01GdWT6NQcLBHd2uhMfiZWXH","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_bdrk_01GdWT6NQcLBHd2uhMfiZWXH","content":"Ruby + is a great language for building web applications."}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163036Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 89c2f0e94fa48d99647af0b714fcef903c5a1613003cb736e9dfd55838470744 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=98251464b577738328efa3d551a410b2507df5c91294ff6f9945f63c14661e8b + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:41 GMT + Content-Type: + - application/json + Content-Length: + - '1448' + Connection: + - keep-alive + X-Amzn-Requestid: + - c728c6d9-2860-4e9b-a32f-0087597ad0d0 + X-Amzn-Bedrock-Invocation-Latency: + - '5178' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '5639' + X-Amzn-Bedrock-Output-Token-Count: + - '263' + X-Amzn-Bedrock-Input-Token-Count: + - '2953' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01E39r7wzTxKySur92U4XSPp","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Ruby + is a dynamic, object-oriented programming language known for its simplicity + and readability. Here are some key points about Ruby:\n\n1. Language Characteristics:\n- + Created by Yukihiro Matsumoto (Matz) in 1995\n- Designed to be programmer-friendly + and productive\n- Follows the principle of \"developer happiness\"\n\n2. Web + Development:\n- Particularly popular for web development through the Ruby + on Rails framework\n- Rails follows the Model-View-Controller (MVC) architectural + pattern\n- Known for rapid application development and convention over configuration\n\n3. + Key Features:\n- Dynamic typing\n- Garbage collection\n- Support for functional + programming paradigms\n- Extensive standard library\n- Strong metaprogramming + capabilities\n\n4. Common Use Cases:\n- Web applications\n- Scripting\n- Automation\n- + Prototyping\n- DevOps tools\n\n5. Popular Frameworks and Tools:\n- Ruby on + Rails\n- Sinatra (lightweight web framework)\n- RSpec (testing)\n- Bundler + (dependency management)\n\nRuby''s elegant syntax and powerful ecosystem make + it a favorite among developers who value clean, maintainable code and rapid + development."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":2953,"cache_creation_input_tokens":0,"cache_read_input_tokens":5639,"output_tokens":263}}' + recorded_at: Thu, 14 Aug 2025 16:30:41 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_streaming_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_streaming_reports_cached_tokens.yml new file mode 100644 index 000000000..22ef4bcd2 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_streaming_reports_cached_tokens.yml @@ -0,0 +1,102 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke-with-response-stream + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163041Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 4fc51a216af64f791658e86560c4177bc076f4d7d40a9c9842840568e6ad3975 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=6e9b4dec74c5d99de161fbd9b4fb416074b769c8c23e189fdcb949d849f8180c + Content-Type: + - application/json + Accept: + - application/vnd.amazon.eventstream + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:42 GMT + Content-Type: + - application/vnd.amazon.eventstream + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Amzn-Requestid: + - 41b1aea6-739b-4232-9450-deb5cde5c285 + X-Amzn-Bedrock-Content-Type: + - application/json + body: + encoding: ASCII-8BIT + string: !binary |- + AAACPgAAAEvX9eCdCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5emRHRnlkQ0lzSW0xbGMzTmhaMlVpT25zaWFXUWlPaUp0YzJkZlltUnlhMTh3TVZGdGJqWmpaRmhXT0c1d2NYUlFNMFZMYUVkeFVsb2lMQ0owZVhCbElqb2liV1Z6YzJGblpTSXNJbkp2YkdVaU9pSmhjM05wYzNSaGJuUWlMQ0p0YjJSbGJDSTZJbU5zWVhWa1pTMHpMVFV0YUdGcGEzVXRNakF5TkRFd01qSWlMQ0pqYjI1MFpXNTBJanBiWFN3aWMzUnZjRjl5WldGemIyNGlPbTUxYkd3c0luTjBiM0JmYzJWeGRXVnVZMlVpT201MWJHd3NJblZ6WVdkbElqcDdJbWx1Y0hWMFgzUnZhMlZ1Y3lJNk5Dd2lZMkZqYUdWZlkzSmxZWFJwYjI1ZmFXNXdkWFJmZEc5clpXNXpJam95TnpRMExDSmpZV05vWlY5eVpXRmtYMmx1Y0hWMFgzUnZhMlZ1Y3lJNk1Dd2liM1YwY0hWMFgzUnZhMlZ1Y3lJNk1YMTlmUT09IiwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVViJ9UFYdEwAAARUAAABL53CMJgs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2lZMjl1ZEdWdWRGOWliRzlqYTE5emRHRnlkQ0lzSW1sdVpHVjRJam93TENKamIyNTBaVzUwWDJKc2IyTnJJanA3SW5SNWNHVWlPaUowWlhoMElpd2lkR1Y0ZENJNklpSjlmUT09IiwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIiffGwR+QAAADmAAAASyO4dQ4LOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOWtaV3gwWVNJc0ltbHVaR1Y0SWpvd0xDSmtaV3gwWVNJNmV5SjBlWEJsSWpvaWRHVjRkRjlrWld4MFlTSXNJblJsZUhRaU9pSXhYRzR5SW4xOSIsInAiOiJhYmNkZWZnaCJ9H2bm+gAAAQ8AAABLzSADBQs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2lZMjl1ZEdWdWRGOWliRzlqYTE5a1pXeDBZU0lzSW1sdVpHVjRJam93TENKa1pXeDBZU0k2ZXlKMGVYQmxJam9pZEdWNGRGOWtaV3gwWVNJc0luUmxlSFFpT2lKY2JqTWlmWDA9IiwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVlcifdzMmQ4AAACwAAAAS5TrT2ULOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOXpkRzl3SWl3aWFXNWtaWGdpT2pCOSIsInAiOiJhYmNkZWZnaGlqa2xtbiJ9aUr6HgAAASwAAABLS0FW0Qs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2liV1Z6YzJGblpWOWtaV3gwWVNJc0ltUmxiSFJoSWpwN0luTjBiM0JmY21WaGMyOXVJam9pWlc1a1gzUjFjbTRpTENKemRHOXdYM05sY1hWbGJtTmxJanB1ZFd4c2ZTd2lkWE5oWjJVaU9uc2liM1YwY0hWMFgzUnZhMlZ1Y3lJNk9YMTkiLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVCJ9SVplJwAAAbQAAABLqjc6AAs6ZXZlbnQtdHlwZQcABWNodW5rDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiYnl0ZXMiOiJleUowZVhCbElqb2liV1Z6YzJGblpWOXpkRzl3SWl3aVlXMWhlbTl1TFdKbFpISnZZMnN0YVc1MmIyTmhkR2x2YmsxbGRISnBZM01pT25zaWFXNXdkWFJVYjJ0bGJrTnZkVzUwSWpvMExDSnZkWFJ3ZFhSVWIydGxia052ZFc1MElqbzVMQ0pwYm5adlkyRjBhVzl1VEdGMFpXNWplU0k2TVRJd09Dd2labWx5YzNSQ2VYUmxUR0YwWlc1amVTSTZNVEl3Tml3aVkyRmphR1ZTWldGa1NXNXdkWFJVYjJ0bGJrTnZkVzUwSWpvd0xDSmpZV05vWlZkeWFYUmxTVzV3ZFhSVWIydGxia052ZFc1MElqb3lOelEwZlgwPSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1AifZsqRpI= + recorded_at: Thu, 14 Aug 2025 16:30:43 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke-with-response-stream + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3"}]},{"role":"assistant","content":[{"type":"text","text":"1\n2\n3"}]},{"role":"user","content":[{"type":"text","text":"\n\nCount + from 1 to 3","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163043Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 050bbdfaa51d7829d07b5d3ccbd5826da2a52574004a35e999b54a15a2ee4357 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=44a676dbab70c7b3d98e297b7d46b7ef443c5ee515f219ad2eebf87c04e49b5a + Content-Type: + - application/json + Accept: + - application/vnd.amazon.eventstream + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:45 GMT + Content-Type: + - application/vnd.amazon.eventstream + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Amzn-Requestid: + - e55f4a17-7d39-45bb-90d9-da017c79be85 + X-Amzn-Bedrock-Content-Type: + - application/json + body: + encoding: ASCII-8BIT + string: !binary |- + AAACGwAAAEve1EDpCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5emRHRnlkQ0lzSW0xbGMzTmhaMlVpT25zaWFXUWlPaUp0YzJkZlltUnlhMTh3TVZabFkzVjFaMjB6VFdoT09WWkthMk5DVEU1bk5tUWlMQ0owZVhCbElqb2liV1Z6YzJGblpTSXNJbkp2YkdVaU9pSmhjM05wYzNSaGJuUWlMQ0p0YjJSbGJDSTZJbU5zWVhWa1pTMHpMVFV0YUdGcGEzVXRNakF5TkRFd01qSWlMQ0pqYjI1MFpXNTBJanBiWFN3aWMzUnZjRjl5WldGemIyNGlPbTUxYkd3c0luTjBiM0JmYzJWeGRXVnVZMlVpT201MWJHd3NJblZ6WVdkbElqcDdJbWx1Y0hWMFgzUnZhMlZ1Y3lJNk5Dd2lZMkZqYUdWZlkzSmxZWFJwYjI1ZmFXNXdkWFJmZEc5clpXNXpJam95TnpVekxDSmpZV05vWlY5eVpXRmtYMmx1Y0hWMFgzUnZhMlZ1Y3lJNk1qYzBOQ3dpYjNWMGNIVjBYM1J2YTJWdWN5STZNWDE5ZlE9PSIsInAiOiJhYmNkZWZnaGkiffKXxhcAAAEPAAAAS80gAwULOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOXpkR0Z5ZENJc0ltbHVaR1Y0SWpvd0xDSmpiMjUwWlc1MFgySnNiMk5ySWpwN0luUjVjR1VpT2lKMFpYaDBJaXdpZEdWNGRDSTZJaUo5ZlE9PSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXIn0hUuprAAAA/wAAAEtOSID9CzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaVkyOXVkR1Z1ZEY5aWJHOWphMTlrWld4MFlTSXNJbWx1WkdWNElqb3dMQ0prWld4MFlTSTZleUowZVhCbElqb2lkR1Y0ZEY5a1pXeDBZU0lzSW5SbGVIUWlPaUl4WEc0eUluMTkiLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHIn37nPuKAAAA/gAAAEtzKKlNCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaVkyOXVkR1Z1ZEY5aWJHOWphMTlrWld4MFlTSXNJbWx1WkdWNElqb3dMQ0prWld4MFlTSTZleUowZVhCbElqb2lkR1Y0ZEY5a1pXeDBZU0lzSW5SbGVIUWlPaUpjYmpNaWZYMD0iLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUYifRiiJ98AAADKAAAASyeJtwsLOmV2ZW50LXR5cGUHAAVjaHVuaw06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImJ5dGVzIjoiZXlKMGVYQmxJam9pWTI5dWRHVnVkRjlpYkc5amExOXpkRzl3SWl3aWFXNWtaWGdpT2pCOSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OIn1nPrWkAAABEQAAAEsS8CrmCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5a1pXeDBZU0lzSW1SbGJIUmhJanA3SW5OMGIzQmZjbVZoYzI5dUlqb2laVzVrWDNSMWNtNGlMQ0p6ZEc5d1gzTmxjWFZsYm1ObElqcHVkV3hzZlN3aWRYTmhaMlVpT25zaWIzVjBjSFYwWDNSdmEyVnVjeUk2T1gxOSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzIn07vfzYAAABvQAAAEunJ1hxCzpldmVudC10eXBlBwAFY2h1bmsNOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJieXRlcyI6ImV5SjBlWEJsSWpvaWJXVnpjMkZuWlY5emRHOXdJaXdpWVcxaGVtOXVMV0psWkhKdlkyc3RhVzUyYjJOaGRHbHZiazFsZEhKcFkzTWlPbnNpYVc1d2RYUlViMnRsYmtOdmRXNTBJam8wTENKdmRYUndkWFJVYjJ0bGJrTnZkVzUwSWpvNUxDSnBiblp2WTJGMGFXOXVUR0YwWlc1amVTSTZNVGd5T0N3aVptbHljM1JDZVhSbFRHRjBaVzVqZVNJNk1UZ3lOU3dpWTJGamFHVlNaV0ZrU1c1d2RYUlViMnRsYmtOdmRXNTBJam95TnpRMExDSmpZV05vWlZkeWFYUmxTVzV3ZFhSVWIydGxia052ZFc1MElqb3lOelV6ZlgwPSIsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVSJ99PzsnA== + recorded_at: Thu, 14 Aug 2025 16:30:45 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml new file mode 100644 index 000000000..d6c31ce26 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_system_message_caching_adds_cache_control_to_the_last_system_message_when_system_caching_is_requested.yml @@ -0,0 +1,134 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"max_tokens":4096,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163008Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 965bbb410cd806c839d6d17bc77733a814091b20d819e2363f76aa404c7e40b7 + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=66bc9900487e201394c7b44faa2df13d13012e2b052cdfe750549417bc8ebdb0 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:11 GMT + Content-Type: + - application/json + Content-Length: + - '736' + Connection: + - keep-alive + X-Amzn-Requestid: + - 8e32664d-1e16-4efd-8a97-ad76b4fd082e + X-Amzn-Bedrock-Invocation-Latency: + - '2771' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '2732' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Output-Token-Count: + - '91' + X-Amzn-Bedrock-Input-Token-Count: + - '15' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01AVm5QS4yCyB45uXUDGHYUB","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + want to be direct and transparent. My key principles include:\n\n1. Being + helpful while avoiding harm\n2. Being honest about my capabilities and limitations + \n3. Protecting individual privacy\n4. Respecting ethical boundaries\n5. Providing + accurate information\n6. Being objective and balanced\n7. Maintaining user + confidentiality\n\nI aim to be a supportive tool that assists humans while + maintaining clear ethical standards."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":2732,"cache_read_input_tokens":0,"output_tokens":91}}' + recorded_at: Thu, 14 Aug 2025 16:30:11 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]},{"role":"assistant","content":[{"type":"text","text":"I + want to be direct and transparent. My key principles include:\n\n1. Being + helpful while avoiding harm\n2. Being honest about my capabilities and limitations + \n3. Protecting individual privacy\n4. Respecting ethical boundaries\n5. Providing + accurate information\n6. Being objective and balanced\n7. Maintaining user + confidentiality\n\nI aim to be a supportive tool that assists humans while + maintaining clear ethical standards."}]},{"role":"user","content":[{"type":"text","text":"What + are the key principles you follow?"}]}],"max_tokens":4096,"system":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163011Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 1d77e990628c90735a3bfaf8a70a59a82d78d1baa8d76c016890743da433a58c + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=b8bd67a1f6e725a12099851544c233e8e3739713fdb313d510bb4c911234be20 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:13 GMT + Content-Type: + - application/json + Content-Length: + - '786' + Connection: + - keep-alive + X-Amzn-Requestid: + - 80933c39-82bc-43e7-a827-82f86a6e150f + X-Amzn-Bedrock-Invocation-Latency: + - '2074' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2732' + X-Amzn-Bedrock-Output-Token-Count: + - '108' + X-Amzn-Bedrock-Input-Token-Count: + - '117' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_0128v6SkYpy2RHfJAHKLTAyc","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + aim to be helpful while following key ethical principles:\n\n1. Be honest + and direct\n2. Protect individual privacy\n3. Avoid potential harm\n4. Provide + accurate information\n5. Respect ethical boundaries\n6. Be objective and balanced\n7. + Maintain user confidentiality\n8. Acknowledge my limitations\n9. Prioritize + human wellbeing\n10. Refuse inappropriate requests\n\nI strive to be a responsible + AI assistant that supports humans while maintaining strong moral guidelines."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":117,"cache_creation_input_tokens":0,"cache_read_input_tokens":2732,"output_tokens":108}}' + recorded_at: Thu, 14 Aug 2025 16:30:13 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml new file mode 100644 index 000000000..a2d4245ec --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_tool_definition_caching_adds_cache_control_to_tool_definitions_when_tools_caching_is_requested.yml @@ -0,0 +1,135 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163025Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 66d69cc12e95a1abcd338456d7ede496a815bdd198660e9d21b3e9606fa4db5a + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=6182b63f783094de2554de58a126beee0dd8f6e2f2acfa858895b4643308c085 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:26 GMT + Content-Type: + - application/json + Content-Length: + - '480' + Connection: + - keep-alive + X-Amzn-Requestid: + - 712b2d45-e506-4968-aa87-965bc58983a5 + X-Amzn-Bedrock-Invocation-Latency: + - '1565' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '2902' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Output-Token-Count: + - '55' + X-Amzn-Bedrock-Input-Token-Count: + - '145' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01Unm4o5kEo9PyDRXtnwzYQe","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby."},{"type":"tool_use","id":"toolu_bdrk_01HogbJT5garLoQbRin6Yi71","name":"describe_ruby_dev","input":{}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":145,"cache_creation_input_tokens":2902,"cache_read_input_tokens":0,"output_tokens":55}}' + recorded_at: Thu, 14 Aug 2025 16:30:27 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"Tell + me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I''ll + use the describe_ruby_dev function to get information about Ruby."},{"type":"tool_use","id":"toolu_bdrk_01HogbJT5garLoQbRin6Yi71","name":"describe_ruby_dev","input":{}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_bdrk_01HogbJT5garLoQbRin6Yi71","content":"Ruby + is a great language for building web applications."}]}],"max_tokens":4096,"tools":[{"name":"describe_ruby_dev","description":"","input_schema":{"type":"object","properties":{},"required":[]},"cache_control":{"type":"ephemeral"}}],"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163027Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 5fac1dda31038ff29ab45910ff27f5b62d41efd66a9ecc55c1c16dba749ed7de + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=62a82c52f27e503a4ccb759ca4e591cd43babf12ad9e5b8ffdbb3807abc42fc5 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:33 GMT + Content-Type: + - application/json + Content-Length: + - '1618' + Connection: + - keep-alive + X-Amzn-Requestid: + - 007070a2-fa7c-41a0-9fad-58ed8723f848 + X-Amzn-Bedrock-Invocation-Latency: + - '6420' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2902' + X-Amzn-Bedrock-Output-Token-Count: + - '302' + X-Amzn-Bedrock-Input-Token-Count: + - '220' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_015ca9q3c8ZiNZSBdQu3bbYD","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Let + me provide you with more comprehensive information about Ruby:\n\nRuby is + a dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan during the mid-1990s. Here are some key characteristics:\n\n1. + Design Philosophy:\n- Designed to be programmer-friendly and productive\n- + Follows the principle of \"developer happiness\" and \"least surprise\"\n- + Emphasizes natural, readable syntax\n\n2. Key Features:\n- Fully object-oriented: + Everything in Ruby is an object\n- Dynamic typing\n- Supports functional programming + paradigms\n- Automatic memory management (garbage collection)\n- Strong metaprogramming + capabilities\n\n3. Popular Use Cases:\n- Web development (especially with + Ruby on Rails framework)\n- Scripting and automation\n- DevOps tools\n- Prototyping\n- + Data processing\n\n4. Strengths:\n- Elegant and concise syntax\n- Highly productive\n- + Extensive libraries and gems\n- Strong community support\n- Cross-platform + compatibility\n\n5. Notable Frameworks and Tools:\n- Ruby on Rails (web application + framework)\n- Sinatra (lightweight web framework)\n- RSpec (testing framework)\n- + Bundler (dependency management)\n\nRuby continues to be a popular language, + particularly in web development and among developers who value clean, expressive + code."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":220,"cache_creation_input_tokens":0,"cache_read_input_tokens":2902,"output_tokens":302}}' + recorded_at: Thu, 14 Aug 2025 16:30:33 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml new file mode 100644 index 000000000..3ca5390bd --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_bedrock_provider_anthropic_claude-3-5-haiku-20241022-v1_0_with_user_message_caching_adds_cache_control_to_user_messages_when_user_caching_is_requested.yml @@ -0,0 +1,156 @@ +--- +http_interactions: +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163013Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 7369a818a412d229a72fcc73315b2c66fa39b4517242db86bcab3c2c7a6a9b3a + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=b1abc025aff6fbe749e7ebbb11c5c7fb2035c33d4bb85af92baa5738fccf5fb8 + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:18 GMT + Content-Type: + - application/json + Content-Length: + - '1172' + Connection: + - keep-alive + X-Amzn-Requestid: + - eadc536f-27c0-4a67-89e2-13a1b1a47ee2 + X-Amzn-Bedrock-Invocation-Latency: + - '4680' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '2745' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '0' + X-Amzn-Bedrock-Output-Token-Count: + - '195' + X-Amzn-Bedrock-Input-Token-Count: + - '4' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01DnvM7rCLxyNBRhW4MniqQs","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" which doesn''t contain any meaningful information about Ruby. If you''re + looking to learn about Ruby, I can provide some information:\n\nRuby is a + dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan in the mid-1990s. Some key characteristics + of Ruby include:\n\n1. Designed for programmer productivity and happiness\n2. + Supports multiple programming paradigms (object-oriented, functional, imperative)\n3. + Known for its clean, readable syntax\n4. Widely used for web development (especially + with Ruby on Rails framework)\n5. Open-source programming language\n6. Interpreted + language\n7. Supports automatic memory management\n8. Strong and dynamic typing\n\nWould + you like me to elaborate on any of these points about Ruby?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":2745,"cache_read_input_tokens":0,"output_tokens":195}}' + recorded_at: Thu, 14 Aug 2025 16:30:18 GMT +- request: + method: post + uri: https://bedrock-runtime..amazonaws.com/model/anthropic.claude-3-5-haiku-20241022-v1:0/invoke + body: + encoding: UTF-8 + string: '{"anthropic_version":"bedrock-2023-05-31","messages":[{"role":"user","content":[{"type":"text","text":"\n\nBased + on the above, tell me about Ruby"}]},{"role":"assistant","content":[{"type":"text","text":"I + apologize, but the input you provided is just a very long string of the letter + \"a\" which doesn''t contain any meaningful information about Ruby. If you''re + looking to learn about Ruby, I can provide some information:\n\nRuby is a + dynamic, object-oriented programming language created by Yukihiro Matsumoto + (often called \"Matz\") in Japan in the mid-1990s. Some key characteristics + of Ruby include:\n\n1. Designed for programmer productivity and happiness\n2. + Supports multiple programming paradigms (object-oriented, functional, imperative)\n3. + Known for its clean, readable syntax\n4. Widely used for web development (especially + with Ruby on Rails framework)\n5. Open-source programming language\n6. Interpreted + language\n7. Supports automatic memory management\n8. Strong and dynamic typing\n\nWould + you like me to elaborate on any of these points about Ruby?"}]},{"role":"user","content":[{"type":"text","text":"Tell + me more about Ruby","cache_control":{"type":"ephemeral"}}]}],"max_tokens":4096,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Host: + - bedrock-runtime..amazonaws.com + X-Amz-Date: + - 20250814T163018Z + X-Amz-Security-Token: + - "" + X-Amz-Content-Sha256: + - 4a6501317564f06251210000c287d721c5e7ffc6fcc43e78c536c83e7f05c59d + Authorization: + - AWS4-HMAC-SHA256 Credential=/20250814//bedrock/aws4_request, + SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=ad565dd91729662df099cafef1da0ac46caa51c4898cd052c96437ffd123dbff + Content-Type: + - application/json + Accept: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:30:25 GMT + Content-Type: + - application/json + Content-Length: + - '1729' + Connection: + - keep-alive + X-Amzn-Requestid: + - a2de6c3a-edb9-4053-ab79-9090b851a001 + X-Amzn-Bedrock-Invocation-Latency: + - '6605' + X-Amzn-Bedrock-Cache-Write-Input-Token-Count: + - '203' + X-Amzn-Bedrock-Cache-Read-Input-Token-Count: + - '2745' + X-Amzn-Bedrock-Output-Token-Count: + - '327' + X-Amzn-Bedrock-Input-Token-Count: + - '4' + body: + encoding: UTF-8 + string: '{"id":"msg_bdrk_01QBRJLbWgYrLtbtwmcqjhpN","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[{"type":"text","text":"Here''s + a more detailed overview of Ruby:\n\nLanguage Design and Philosophy:\n- Created + by Yukihiro Matsumoto in 1995\n- Designed with the principle of \"developer + happiness\"\n- Follows the principle of \"least surprise\" - code should behave + intuitively\n- Emphasizes human-readable code and programmer productivity\n\nKey + Technical Features:\n- Fully object-oriented: everything is an object\n- Dynamic + typing\n- Automatic memory management (garbage collection)\n- Support for + functional programming paradigms\n- Built-in support for metaprogramming\n- + Supports multiple inheritance through modules\n\nPopular Use Cases:\n1. Web + Development\n- Ruby on Rails framework is extremely popular\n- Used by companies + like Airbnb, GitHub, Shopify\n\n2. Scripting and Automation\n- Great for writing + quick scripts\n- Used in system administration\n- Supports cross-platform + scripting\n\n3. DevOps and Infrastructure\n- Used in configuration management\n- + Tools like Chef and Puppet are written in Ruby\n\nSyntax Characteristics:\n- + Clean and expressive syntax\n- Uses snake_case for method and variable names\n- + Optional parentheses in method calls\n- Blocks and lambdas are first-class + citizens\n\nCommunity and Ecosystem:\n- Strong, supportive open-source community\n- + RubyGems package manager\n- Regular language updates\n- Annual RubyConf and + regional conferences\n\nWould you like me to elaborate on any of these aspects?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":203,"cache_read_input_tokens":2745,"output_tokens":327}}' + recorded_at: Thu, 14 Aug 2025 16:30:25 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_gemini_provider_gemini-2_5-flash_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_gemini_provider_gemini-2_5-flash_reports_cached_tokens.yml new file mode 100644 index 000000000..b99386a88 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_gemini_provider_gemini-2_5-flash_reports_cached_tokens.yml @@ -0,0 +1,167 @@ +--- +http_interactions: +- request: + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + body: + encoding: UTF-8 + string: '{"contents":[{"role":"user","parts":[{"text":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}]}],"generationConfig":{"temperature":0.7}}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Goog-Api-Key: + - "" + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json; charset=UTF-8 + Vary: + - Origin + - Referer + - X-Origin + Date: + - Thu, 14 Aug 2025 16:24:24 GMT + Server: + - scaffolding on HTTPServer2 + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + X-Content-Type-Options: + - nosniff + Server-Timing: + - gfet4t7; dur=1217 + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "You've provided a long string of \"b\" characters. This doesn't contain any information about \"Ruby\".\n\nIf you'd like to know about Ruby, please provide some text or context that actually mentions her, or ask a question directly about her. For example, you could ask:\n\n* \"Tell me about the programming language Ruby.\"\n* \"Who is Ruby Bridges?\"\n* \"What is the gemstone Ruby known for?\"\n\nWithout any relevant information, I can't tell you anything about her." + } + ], + "role": "model" + }, + "finishReason": "STOP", + "index": 0 + } + ], + "usageMetadata": { + "promptTokenCount": 2572, + "candidatesTokenCount": 107, + "totalTokenCount": 2679, + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 2572 + } + ] + }, + "modelVersion": "gemini-2.5-flash", + "responseId": "uA2eaMWqFt-Hz7IPke-Z2Aw" + } + recorded_at: Thu, 14 Aug 2025 16:24:24 GMT +- request: + method: post + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + body: + encoding: UTF-8 + string: '{"contents":[{"role":"user","parts":[{"text":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}]}],"generationConfig":{"temperature":0.7}}' + headers: + User-Agent: + - Faraday v2.13.1 + X-Goog-Api-Key: + - "" + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - application/json; charset=UTF-8 + Vary: + - Origin + - Referer + - X-Origin + Date: + - Thu, 14 Aug 2025 16:24:27 GMT + Server: + - scaffolding on HTTPServer2 + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + X-Content-Type-Options: + - nosniff + Server-Timing: + - gfet4t7; dur=3019 + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "Based on the text you provided, which consists solely of repeated 'a' and 'b' characters, I cannot tell you anything about \"Ruby.\"\n\nThe provided text does not contain any information related to the Ruby programming language, gemstones, or any other topic that \"Ruby\" might refer to.\n\nIf you'd like to know about Ruby, please provide some relevant text or ask me a general question about it!" + } + ], + "role": "model" + }, + "finishReason": "STOP", + "index": 0 + } + ], + "usageMetadata": { + "promptTokenCount": 2572, + "candidatesTokenCount": 85, + "totalTokenCount": 3080, + "cachedContentTokenCount": 2030, + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 2572 + } + ], + "cacheTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 2030 + } + ], + "thoughtsTokenCount": 423 + }, + "modelVersion": "gemini-2.5-flash", + "responseId": "uw2eaOjOG7itz7IPo7Ot0A8" + } + recorded_at: Thu, 14 Aug 2025 16:24:27 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_openai_provider_gpt-4_1-nano_reports_cached_tokens.yml b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_openai_provider_gpt-4_1-nano_reports_cached_tokens.yml new file mode 100644 index 000000000..35116cca2 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/chat_complete_with_prompt_caching_with_openai_provider_gpt-4_1-nano_reports_cached_tokens.yml @@ -0,0 +1,227 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:30 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - "" + Openai-Processing-Ms: + - '2368' + Openai-Project: + - proj_j3YWwie2yjmMHTGYtUxoTOJ7 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '2742' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149995637' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 1ms + X-Request-Id: + - "" + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-C4V2auul1KMB2q1MI602aVLBEYb82", + "object": "chat.completion", + "created": 1755188668, + "model": "gpt-4.1-nano-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The provided text is an extremely long string composed mostly of the letters \"a\" and \"b\" repeated many times. It does not contain any specific information, words, or data related to \"Ruby\" or any other subject. Therefore, I cannot extract or infer any details about Ruby from this text.\n\nIf you have a specific question about Ruby (the programming language, the gemstone, or any other context), please provide more details or clarify your request!", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 2323, + "completion_tokens": 91, + "total_tokens": 2414, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_f12167b370" + } + recorded_at: Thu, 14 Aug 2025 16:24:30 GMT +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\nBased + on the above, tell me about Ruby"}],"stream":false,"temperature":0.7}' + headers: + User-Agent: + - Faraday v2.13.1 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Thu, 14 Aug 2025 16:24:33 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - "" + Openai-Processing-Ms: + - '2485' + Openai-Project: + - proj_j3YWwie2yjmMHTGYtUxoTOJ7 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '2664' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149995637' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 1ms + X-Request-Id: + - "" + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - "" + - "" + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - "" + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-C4V2dbv3q1PATuk3Hd2iN2BgaCrtE", + "object": "chat.completion", + "created": 1755188671, + "model": "gpt-4.1-nano-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The provided text is an extremely long string consisting primarily of repeated 'a's followed by repeated 'b's, with no meaningful information or context related to Ruby. Therefore, I do not have any specific details about Ruby from this data.\n\nIf you are referring to Ruby as a programming language, I can tell you that:\n- Ruby is a dynamic, open-source programming language focused on simplicity and productivity.\n- It was created by Yukihiro \"Matz\" Matsumoto in the mid-1990s.\n- Ruby is known for its elegant syntax that is natural to read and write.\n- It is widely used for web development, particularly with the Ruby on Rails framework.\n- Ruby supports multiple programming paradigms, including procedural, object-oriented, and functional programming.\n\nPlease let me know if you want more specific information about Ruby or if there's a particular aspect you're interested in!", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 2323, + "completion_tokens": 176, + "total_tokens": 2499, + "prompt_tokens_details": { + "cached_tokens": 2176, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_f12167b370" + } + recorded_at: Thu, 14 Aug 2025 16:24:33 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/ruby_llm/active_record/acts_as_spec.rb b/spec/ruby_llm/active_record/acts_as_spec.rb index df8376d9f..8f3be1757 100644 --- a/spec/ruby_llm/active_record/acts_as_spec.rb +++ b/spec/ruby_llm/active_record/acts_as_spec.rb @@ -617,6 +617,17 @@ def uploaded_file(path, type) end end + describe 'prompt caching' do + let(:model) { 'claude-3-5-haiku-20241022' } + + it 'allows prompt caching' do + chat = Chat.create!(model_id: model) + + response = chat.ask('Hello', cache: true) + expect(response.raw.env.request_body).to include('"cache_control":{"type":"ephemeral"}') + end + end + describe 'assume_model_exists' do it 'creates a Model record when assume_model_exists is true' do chat = Chat.new diff --git a/spec/ruby_llm/chat_complete_with_prompt_caching_spec.rb b/spec/ruby_llm/chat_complete_with_prompt_caching_spec.rb new file mode 100644 index 000000000..17fa1d353 --- /dev/null +++ b/spec/ruby_llm/chat_complete_with_prompt_caching_spec.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +require 'spec_helper' + +class DummyProvider + include RubyLLM::Providers::Anthropic::Cache + + attr_reader :cache_prompts + + def render_payload(cache_prompts:) + @cache_prompts = cache_prompts + end +end + +RSpec.describe RubyLLM::Chat, '.complete with prompt caching' do + include_context 'with configured RubyLLM' + + class DescribeRubyDev < RubyLLM::Tool # rubocop:disable Lint/ConstantDefinitionInBlock,RSpec/LeakyConstantDeclaration + description MASSIVE_TEXT_FOR_PROMPT_CACHING + + def execute + 'Ruby is a great language for building web applications.' + end + end + + context 'with cache setting' do + it 'caches by default' do + chat = RubyLLM.chat + expect(chat.instance_variable_get(:@cache_prompts)).to be_truthy + end + + it 'honors setting' do + RubyLLM.configure do |config| + config.cache_prompts = false + end + + chat = RubyLLM.chat + expect(chat.instance_variable_get(:@cache_prompts)).to be_falsey + end + end + + context 'with cache specification' do + it 'supports true' do + provider = DummyProvider.new + provider.render_payload(cache_prompts: true) + expect(provider).to be_should_cache(:system) + expect(provider).to be_should_cache(:user) + expect(provider).to be_should_cache(:tools) + end + + it 'supports false' do + provider = DummyProvider.new + provider.render_payload(cache_prompts: false) + expect(provider).not_to be_should_cache(:system) + expect(provider).not_to be_should_cache(:user) + expect(provider).not_to be_should_cache(:tools) + end + + it 'supports array' do + provider = DummyProvider.new + provider.render_payload(cache_prompts: %i[system tools]) + expect(provider).to be_should_cache(:system) + expect(provider).not_to be_should_cache(:user) + expect(provider).to be_should_cache(:tools) + end + + it 'supports symbol' do + provider = DummyProvider.new + provider.render_payload(cache_prompts: :tools) + expect(provider).not_to be_should_cache(:system) + expect(provider).not_to be_should_cache(:user) + expect(provider).to be_should_cache(:tools) + end + end + + CACHING_MODELS.each do |model_info| + provider = model_info[:provider] + model = model_info[:model] + + describe "with #{provider} provider (#{model})" do + let(:chat) { RubyLLM.chat(model: model, provider: provider, cache: false).with_temperature(0.7) } + + context 'with system message caching' do + it 'adds cache_control to the last system message when system caching is requested' do + chat.with_instructions(MASSIVE_TEXT_FOR_PROMPT_CACHING) + + response = chat.ask('What are the key principles you follow?', cache: :system) + + expect(response.cache_creation_tokens).to be_positive + + response = chat.ask('What are the key principles you follow?', cache: :system) + + expect(response.cached_tokens).to be_positive + end + end + + context 'with user message caching' do + it 'adds cache_control to user messages when user caching is requested' do + response = chat.ask( + "#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nBased on the above, tell me about Ruby", + cache: :user + ) + + expect(response.cache_creation_tokens).to be_positive + + response = chat.ask('Tell me more about Ruby', cache: :user) + + expect(response.cached_tokens).to be_positive + end + end + + context 'with tool definition caching' do + it 'adds cache_control to tool definitions when tools caching is requested' do + chat.with_tools(DescribeRubyDev) + + response = chat.ask('Tell me about Ruby', cache: :tools) + + expect(chat.messages[1].cache_creation_tokens).to be_positive + expect(response.cached_tokens).to be_positive + end + end + + context 'with multiple caching types' do + it 'handles multiple caching types together' do + chat.with_tools(DescribeRubyDev) + chat.with_instructions(MASSIVE_TEXT_FOR_PROMPT_CACHING) + + response = chat.ask( + "#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nBased on the above, tell me about Ruby", + cache: %i[system tools user] + ) + + expect(chat.messages[2].cache_creation_tokens).to be_positive + expect(response.cached_tokens).to be_positive + end + end + + context 'with streaming' do + it 'reports cached tokens' do + response = chat.ask("#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nCount from 1 to 3", cache: :user) do |chunk| + # do nothing + end + + expect(response.cache_creation_tokens).to be_positive + + response = chat.ask("#{MASSIVE_TEXT_FOR_PROMPT_CACHING}\n\nCount from 1 to 3", cache: :user) do |chunk| + # do nothing + end + + expect(response.cached_tokens).to be_positive + end + end + end + end + + CACHED_MODELS.each do |model_info| + provider = model_info[:provider] + model = model_info[:model] + + describe "with #{provider} provider (#{model})" do + let(:chat_first) { RubyLLM.chat(model: model, provider: provider).with_temperature(0.7) } + let(:chat_second) { RubyLLM.chat(model: model, provider: provider).with_temperature(0.7) } + + it 'reports cached tokens' do + question = "#{MASSIVE_TEXT_FOR_PROMPT_CACHE_REPORTING}\n\nBased on the above, tell me about Ruby" + response_first = chat_first.ask question + response_second = chat_second.ask question + + expect(response_first.cached_tokens).to be_zero + expect(response_second.cached_tokens).to be_positive + end + end + end +end diff --git a/spec/ruby_llm/chat_error_spec.rb b/spec/ruby_llm/chat_error_spec.rb index a5dfd8a74..fc8ccaf52 100644 --- a/spec/ruby_llm/chat_error_spec.rb +++ b/spec/ruby_llm/chat_error_spec.rb @@ -80,7 +80,7 @@ Psych::Parser.code_point_limit = 20_000_000 if Psych::Parser.respond_to?(:code_point_limit=) # Create a huge conversation (matching in spec_helper) - massive_text = 'a' * 1_000_000 + massive_text = MASSIVE_TEXT # Create a few copies in the conversation 5.times do diff --git a/spec/support/models_to_test.rb b/spec/support/models_to_test.rb index da9232516..3cf7d822a 100644 --- a/spec/support/models_to_test.rb +++ b/spec/support/models_to_test.rb @@ -50,3 +50,15 @@ { provider: :mistral, model: 'mistral-embed' }, { provider: :vertexai, model: 'text-embedding-004' } ].freeze + +# Models that require prompt caching configuration +CACHING_MODELS = [ + { provider: :anthropic, model: 'claude-3-5-haiku-20241022' }, + { provider: :bedrock, model: 'anthropic.claude-3-5-haiku-20241022-v1:0' } +].freeze + +# Models that report cached tokens +CACHED_MODELS = [ + { provider: :gemini, model: 'gemini-2.5-flash' }, + { provider: :openai, model: 'gpt-4.1-nano' } +].freeze diff --git a/spec/support/vcr_configuration.rb b/spec/support/vcr_configuration.rb index 0cdd63e7f..d205ae5eb 100644 --- a/spec/support/vcr_configuration.rb +++ b/spec/support/vcr_configuration.rb @@ -1,5 +1,9 @@ # frozen_string_literal: true +MASSIVE_TEXT = 'a' * 1_000_000 +MASSIVE_TEXT_FOR_PROMPT_CACHING = 'a' * (2048 * 4) +MASSIVE_TEXT_FOR_PROMPT_CACHE_REPORTING = (MASSIVE_TEXT_FOR_PROMPT_CACHING * 2) + ('b' * 1024) + # VCR Configuration VCR.configure do |config| config.cassette_library_dir = 'spec/fixtures/vcr_cassettes' @@ -88,7 +92,11 @@ config.filter_sensitive_data('') { |interaction| interaction.response.headers['Cf-Ray']&.first } # Filter large strings used to test "context length exceeded" error handling - config.filter_sensitive_data('') { 'a' * 1_000_000 } + config.filter_sensitive_data('') { MASSIVE_TEXT } + + # Filter large strings used to test prompt caching + config.filter_sensitive_data('') { MASSIVE_TEXT_FOR_PROMPT_CACHING } + config.filter_sensitive_data('') { MASSIVE_TEXT_FOR_PROMPT_CACHE_REPORTING } # Filter cookies config.before_record do |interaction|