diff --git a/.rubocop.yml b/.rubocop.yml
index f16c8f8..8eb5e87 100644
--- a/.rubocop.yml
+++ b/.rubocop.yml
@@ -51,3 +51,8 @@ Style/FrozenStringLiteralComment:
 
 Style/MultilineBlockChain:
   Enabled: false
+
+Style/Documentation:
+  Exclude:
+    - "examples/**/*"
+    - "spec/**/*"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b4d3d74..cbb477a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,30 @@
+## [2.1.0] - 2026-02-11
+
+### Breaking Changes
+- **Standalone runtime** - Removed `ruby_llm` runtime dependency. Raix now ships its own chat runtime.
+- **Configuration migration** - Primary API-key setup moved to `Raix.configure` (`openai_api_key`, `openrouter_api_key`).
+
+### Added
+- Native runtime components:
+  - `Raix::Runtime::Client`
+  - `Raix::Runtime::Transport`
+  - `Raix::Runtime::Providers::OpenAI`
+  - `Raix::Runtime::Providers::OpenRouter`
+  - `Raix::Runtime::StreamParser`
+  - `Raix::Runtime::StreamAccumulator`
+- `Raix::TranscriptStore` for thread-safe transcript handling without RubyLLM objects.
+- Migration guide: `docs/migration/standalone-runtime-migration.md`.
+- Comprehensive runtime unit tests (providers, transport, streaming parser/accumulator, transcript store, client routing).
+
+### Changed
+- `ChatCompletion` now delegates network execution to the internal runtime while preserving API compatibility.
+- Predicted outputs integration test enabled for OpenAI.
+- Examples and README updated to use Raix-native configuration.
+
+### Deprecated
+- Legacy `openai_client` / `openrouter_client` configuration paths.
+- `ruby_llm_config` shim remains temporarily for migration and emits deprecation warnings.
+
 ## [2.0.0] - 2025-12-17
 
 ### Breaking Changes
diff --git a/Gemfile.lock b/Gemfile.lock
index 4378b7d..b8f4068 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -1,11 +1,11 @@
 PATH
   remote: .
   specs:
-    raix (2.0.0)
+    raix (2.1.0)
       activesupport (>= 6.0)
+      faraday (~> 2.0)
       faraday-retry (~> 2.0)
       ostruct
-      ruby_llm (~> 1.9)
 
 GEM
   remote: https://rubygems.org/
@@ -37,11 +37,8 @@ GEM
     drb (2.2.1)
     e2mmap (0.1.0)
     erubi (1.13.0)
-    event_stream_parser (1.0.0)
     faraday (2.9.2)
       faraday-net_http (>= 2.0, < 3.2)
-    faraday-multipart (1.0.4)
-      multipart-post (~> 2)
     faraday-net_http (3.1.0)
       net-http
     faraday-retry (2.3.1)
@@ -77,10 +74,8 @@ GEM
       rb-fsevent (~> 0.10, >= 0.10.3)
       rb-inotify (~> 0.9, >= 0.9.10)
     lumberjack (1.2.10)
-    marcel (1.1.0)
     method_source (1.1.0)
     minitest (5.24.0)
-    multipart-post (2.4.1)
     mutex_m (0.2.0)
     nenv (0.3.0)
     net-http (0.4.1)
@@ -144,17 +139,6 @@ GEM
     rubocop-ast (1.31.2)
       parser (>= 3.3.0.4)
     ruby-progressbar (1.13.0)
-    ruby_llm (1.9.1)
-      base64
-      event_stream_parser (~> 1)
-      faraday (>= 1.10.0)
-      faraday-multipart (>= 1)
-      faraday-net_http (>= 1)
-      faraday-retry (>= 1)
-      marcel (~> 1.0)
-      ruby_llm-schema (~> 0.2.1)
-      zeitwerk (~> 2)
-    ruby_llm-schema (0.2.5)
     shellany (0.0.1)
     solargraph (0.50.0)
       backport (~> 1.2)
@@ -212,7 +196,6 @@ GEM
     yard-sorbet (0.8.1)
       sorbet-runtime (>= 0.5)
       yard (>= 0.9)
-    zeitwerk (2.7.3)
 
 PLATFORMS
   arm64-darwin-21
diff --git a/README.md b/README.md
index afa16a8..463bf13 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ Raix (pronounced "ray" because the x is silent) is a library that gives you ever
 
 Understanding how to use discrete AI components in otherwise normal code is key to productively leveraging Raix, and the subject of a book written by Raix's author Obie Fernandez, titled [Patterns of Application Development Using AI](https://leanpub.com/patterns-of-application-development-using-ai). You can easily support the ongoing development of this project by buying the book at Leanpub.
 
-Raix 2.0 is powered by [RubyLLM](https://github.com/crmne/ruby_llm), giving you unified access to OpenAI, Anthropic, Google Gemini, and dozens of other providers through OpenRouter. Note that you can use Raix to add AI capabilities to non-Rails applications as long as you include ActiveSupport as a dependency.
+Raix includes a standalone runtime with first-class support for OpenAI and OpenRouter. Note that you can use Raix to add AI capabilities to non-Rails applications as long as you include ActiveSupport as a dependency.
 
 ### Chat Completions
 
@@ -855,16 +855,16 @@ If bundler is not being used to manage dependencies, install the gem by executin
 
 ### Configuration
 
-Raix 2.0 uses [RubyLLM](https://github.com/crmne/ruby_llm) as its backend for LLM provider connections. Configure your API keys through RubyLLM:
+Configure API keys directly in Raix:
 
 ```ruby
 # config/initializers/raix.rb
-RubyLLM.configure do |config|
+Raix.configure do |config|
   config.openrouter_api_key = ENV["OPENROUTER_API_KEY"]
   config.openai_api_key = ENV["OPENAI_API_KEY"]
-  # Optional: configure other providers
-  # config.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
-  # config.gemini_api_key = ENV["GEMINI_API_KEY"]
+  # Optional OpenAI headers
+  # config.openai_organization_id = ENV["OPENAI_ORG_ID"]
+  # config.openai_project_id = ENV["OPENAI_PROJECT_ID"]
 end
 ```
 
@@ -896,7 +896,7 @@ class MyAssistant
 end
 ```
 
-### Upgrading from Raix 1.x
+### Upgrading
 
 If upgrading from Raix 1.x, update your configuration from:
 
@@ -908,7 +908,9 @@ Raix.configure do |config|
 end
 ```
 
-To the new RubyLLM-based configuration shown above.
+To the Raix-native configuration shown above.
+
+For migration details from RubyLLM-backed Raix 2.0, see `docs/migration/standalone-runtime-migration.md`.
 
 ## Development
 
diff --git a/docs/migration/standalone-runtime-migration.md b/docs/migration/standalone-runtime-migration.md
new file mode 100644
index 0000000..a4e7083
--- /dev/null
+++ b/docs/migration/standalone-runtime-migration.md
@@ -0,0 +1,56 @@
+# Raix Standalone Runtime Migration Guide
+
+This guide covers migration from RubyLLM-backed Raix 2.0 to the standalone runtime.
+
+## What Changed
+
+- Raix no longer depends on `ruby_llm` at runtime.
+- `Raix::ChatCompletion` now uses native OpenAI/OpenRouter provider adapters.
+- `Raix.configure` is now the primary API-key configuration surface.
+
+## New Configuration
+
+```ruby
+Raix.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+  config.openrouter_api_key = ENV["OPENROUTER_API_KEY"]
+  config.openai_organization_id = ENV["OPENAI_ORG_ID"] # optional
+  config.openai_project_id = ENV["OPENAI_PROJECT_ID"]  # optional
+end
+```
+
+## Legacy Compatibility (Temporary)
+
+Raix still reads legacy settings for one major-version migration window:
+
+- `config.openai_client` / `config.openrouter_client` (deprecated)
+- `config.ruby_llm_config` (deprecated shim)
+
+When legacy settings are used, Raix emits deprecation warnings.
+
+## Provider Selection Rules
+
+Provider selection is unchanged:
+
+- explicit `openai: "gpt-4o"` forces OpenAI
+- models starting with `gpt-` or `o` use OpenAI
+- all other models use OpenRouter
+
+## Behavior Compatibility
+
+The following behavior is preserved:
+
+- `chat_completion` signature and defaults
+- transcript acceptance of abbreviated and standard message formats
+- tool declaration/filtering/dispatch security checks
+- automatic tool-call continuation loop
+- `max_tool_calls` and `stop_tool_calls_and_respond!`
+- `before_completion` hook order (global -> class -> instance)
+- `Thread.current[:chat_completion_response]` assignment
+
+## Suggested Upgrade Steps
+
+1. Remove `ruby_llm` initialization from app boot code.
+2. Move API keys into `Raix.configure`.
+3. Run your test suite and watch for deprecation warnings.
+4. Remove deprecated legacy client or RubyLLM config usage.
diff --git a/docs/prd/raix-standalone-runtime-prd.md b/docs/prd/raix-standalone-runtime-prd.md
new file mode 100644
index 0000000..341ac64
--- /dev/null
+++ b/docs/prd/raix-standalone-runtime-prd.md
@@ -0,0 +1,455 @@
+# PRD: Raix Standalone Runtime (Decouple from RubyLLM)
+
+Date: 2026-02-11  
+Status: Draft for implementation planning  
+Owner: Raix maintainer
+
+## 1) Context
+
+Raix currently depends on `ruby_llm (~> 1.9)` and uses it for:
+
+- chat object lifecycle and execution (`RubyLLM.chat`, `Chat#ask/#complete`)
+- message model and transcript bridging (`RubyLLM::Message`, `TranscriptAdapter`)
+- tool model bridging (`RubyLLM::Tool`, `FunctionToolAdapter`)
+- provider transport and request/response parsing (OpenAI/OpenRouter through RubyLLM)
+- global API key configuration (`RubyLLM.config`)
+
+At the same time, Raix already re-implements major chat orchestration behavior:
+
+- `chat_completion` request parameter assembly
+- tool-call security checks and function dispatch
+- multi-turn continuation loop after tool calls
+- JSON mode handling and parsing
+- prompt caching transform behavior for Anthropic-style cache controls
+- hook pipeline (`before_completion`) with mutable context
+
+This overlap means Raix is coupled to RubyLLM while still carrying its own runtime semantics. The goal is to absorb required runtime functionality into Raix so the gem is standalone and controlled by Raix maintainers.
+
+## 2) Problem Statement
+
+Raix has product and roadmap risk from a hard runtime dependency on an external maintainer and architecture direction. This creates:
+
+- roadmap blocking for Raix-specific features
+- forced adaptation to upstream design choices
+- reduced ability to guarantee behavior stability for Raix users
+- duplicated logic across two chat runtimes
+
+## 3) Goals
+
+1. Remove hard dependency on `ruby_llm` for core Raix behavior.
+2. Preserve public Raix behavior and API compatibility for existing users.
+3. Keep provider support needed by Raix use-cases (OpenAI direct and OpenRouter routing).
+4. Preserve module-level features: `ChatCompletion`, `FunctionDispatch`, `PromptDeclarations`, `Predicate`, `MCP`, `ResponseFormat`, `before_completion`.
+5. Improve Raix autonomy for future features (including features currently blocked by RubyLLM gaps).
+
+## 4) Non-Goals
+
+1. Recreate all RubyLLM product surface (embeddings, moderation, images, transcription, ActiveRecord integrations, provider catalog and model registry tooling).
+2. Build a full chat UI framework.
+3. Introduce breaking API changes in this extraction phase unless explicitly versioned and documented.
+
+## 5) Scope
+
+### In Scope
+
+- A new internal Raix runtime for chat completion and tool orchestration.
+- Provider adapters for OpenAI and OpenRouter.
+- Internal transport layer (Faraday-based), retries, streaming parsing, error mapping.
+- Internal message and tool-call representations.
+- Replacement/removal of `TranscriptAdapter` and `FunctionToolAdapter`.
+- Configuration migration from RubyLLM-based keys to Raix-native keys.
+- Backward compatibility shim for migration window.
+- Updated tests, docs, examples, and changelog.
+
+### Out of Scope (for initial standalone release)
+
+- Native direct adapters for Anthropic, Gemini, Bedrock, etc. (can be added later via adapter pattern).
+- Model discovery and remote model registry refresh features.
+- Any broad DSL redesign.
+
+## 6) Current Dependency Inventory
+
+### Coupling points in current code
+
+- `lib/raix.rb`: hard `require "ruby_llm"`.
+- `lib/raix/chat_completion.rb`: `RubyLLM.chat`, `ruby_llm_request`, `ruby_llm_chat`.
+- `lib/raix/transcript_adapter.rb`: depends on `RubyLLM::Chat#messages`.
+- `lib/raix/function_tool_adapter.rb`: subclasses `RubyLLM::Tool`.
+- `lib/raix/configuration.rb`: `ruby_llm_config`, checks `RubyLLM.config` keys.
+- `raix.gemspec`: runtime dependency on `ruby_llm`.
+
+### Practical behavior gap to address
+
+- Raix currently routes non-OpenAI models to OpenRouter (`determine_provider`), so direct provider support promised by RubyLLM is not actually fully exploited by Raix.
+- Predicted outputs support is partially blocked in tests due RubyLLM behavior.
+- Two tool loop implementations overlap (`RubyLLM::Chat#complete` tool loop + Raix loop in `chat_completion`).
+
+## 7) Target Product Definition
+
+Raix ships with an internal, provider-agnostic chat runtime:
+
+- `Raix::Runtime::ChatSession`
+- `Raix::Runtime::Message`, `Raix::Runtime::ToolCall`, `Raix::Runtime::Chunk`
+- `Raix::Runtime::Providers::{OpenAI, OpenRouter}`
+- `Raix::Runtime::Transport` (HTTP, retries, streaming parser, errors)
+
+`Raix::ChatCompletion` remains the public API surface and delegates to runtime internals.
+
+## 8) Functional Requirements
+
+### A. API Compatibility
+
+FR-001: `ChatCompletion#chat_completion` must keep current signature and defaults (`params:`, `loop:`, `json:`, `raw:`, `openai:`, `save_response:`, `messages:`, `available_tools:`, `max_tool_calls:`).  
+Acceptance: Existing specs using this signature continue to pass without call-site changes.
+
+FR-002: `loop:` must remain accepted and emit deprecation warning, without changing behavior.  
+Acceptance: Passing `loop: true` does not break request execution.
+
+FR-003: `transcript` must continue to accept both abbreviated and standard message formats.  
+Acceptance: Existing transcript usage patterns in README/examples/specs remain valid.
+
+FR-004: `save_response` semantics must be preserved.  
+Acceptance: Response is appended to transcript only when `save_response: true`.
+
+FR-005: `messages:` override must bypass object transcript for that call while preserving existing race-safety behavior.  
+Acceptance: Concurrent calls with explicit `messages:` do not corrupt transcript ordering.
+
+FR-006: `raw: true` returns provider-normalized raw response object in current OpenAI-compatible envelope shape.  
+Acceptance: Existing consumers expecting `choices[0].message` continue to work.
+
+FR-007: Class-level `configure` and fallback behavior must remain available.  
+Acceptance: Per-class config override of global config remains intact.
+
+FR-008: `Configuration#client?` continues to report readiness, but against Raix-native config fields.  
+Acceptance: Equivalent true/false outcomes for configured and unconfigured states.
+
+FR-009: Public modules and constants remain available (`ChatCompletion`, `FunctionDispatch`, `PromptDeclarations`, `Predicate`, `MCP`, `ResponseFormat`, `CompletionContext`, `UndeclaredToolError`).  
+Acceptance: Existing includes and references compile and behave.
+
+### B. Chat Runtime and Message Handling
+
+FR-010: Implement internal message model with roles `system|user|assistant|tool`, content, tool_calls, tool_call_id, usage metadata, and raw payload.  
+Acceptance: All existing message transforms and transcript operations supported.
+
+FR-011: Preserve content handling for both string and structured/multipart content arrays (needed for prompt caching and tool messages).  
+Acceptance: Anthropic-style multipart payload paths continue to work.
+
+FR-012: Normalize outputs to OpenAI-compatible shape used by current Raix logic.  
+Acceptance: `response.dig("choices", 0, "message", "content")` continues to work.
+
+FR-013: Preserve `Thread.current[:chat_completion_response]` assignment behavior.  
+Acceptance: Prompt caching and existing debugging workflows can still access last raw response.
+
+FR-014: Preserve `strip` behavior for non-raw string responses.  
+Acceptance: Trailing whitespace is stripped while internal line breaks remain.
+
+FR-015: Preserve empty transcript guard (`Can't complete an empty transcript`).  
+Acceptance: Call fails fast for empty request context.
+
+FR-016: Provide an internal transcript store that removes dependence on RubyLLM message objects.  
+Acceptance: `TranscriptAdapter` no longer required for core operation.
+
+### C. Provider and Transport
+
+FR-017: Implement OpenAI provider adapter (chat completions sync + stream) with API key auth and optional org/project headers.  
+Acceptance: Current OpenAI tests and examples pass.
+
+FR-018: Implement OpenRouter provider adapter (chat completions sync + stream) with API key auth.  
+Acceptance: Current OpenRouter-based tests and examples pass.
+
+FR-019: Provider selection must preserve current behavior:
+- explicit `openai:` selects OpenAI
+- `gpt-*` and `o*` model IDs select OpenAI
+- all other models select OpenRouter  
+Acceptance: Selection matches existing `determine_provider` outcomes.
+
+FR-020: Implement retry, timeout, and error mapping comparable to current behavior.  
+Acceptance: network failures raise stable Raix runtime errors with useful provider messages.
+
+FR-021: Streaming must support incremental callback tokens/chunks with existing `self.stream = lambda { |chunk| ... }` usage.  
+Acceptance: streaming examples continue to work without API change.
+
+FR-022: Streaming accumulator must reconstruct final content/tool calls for downstream logic.  
+Acceptance: final response object in non-stream and stream modes is consistent.
+
+FR-023: Pass-through params support for existing attr-backed generation controls:
+`cache_at`, `frequency_penalty`, `logit_bias`, `logprobs`, `max_completion_tokens`, `max_tokens`, `min_p`, `prediction`, `presence_penalty`, `provider`, `repetition_penalty`, `response_format`, `seed`, `stop`, `temperature`, `tool_choice`, `top_a`, `top_k`, `top_logprobs`, `top_p`.  
+Acceptance: parameters are included in payload where applicable and ignored safely otherwise.
+
+FR-024: Preserve JSON parse retry behavior for blank/invalid JSON in JSON mode.  
+Acceptance: retry path and error behavior remain compatible.
+
+### D. Tools and Function Dispatch
+
+FR-025: Preserve function declaration DSL and schema generation (`function :name, description, **params`).  
+Acceptance: existing function declarations produce equivalent tool schemas.
+
+FR-026: Preserve parameter flags `required` and `optional`.  
+Acceptance: required arrays and properties remain as today.
+
+FR-027: Preserve tool filtering via `available_tools`:
+- `nil` => all declared tools
+- `false` => no tools
+- array => filtered tools, error on undeclared tools  
+Acceptance: `UndeclaredToolError` behavior preserved.
+
+FR-028: Preserve security check that only declared function names may be dispatched.  
+Acceptance: unauthorized tool name raises and is never `public_send`-ed.
+
+FR-029: Preserve automatic continuation loop after tool calls until text answer is produced.  
+Acceptance: function-dispatch integration tests keep returning final assistant text.
+
+FR-030: Preserve support for multiple tool calls in one assistant message.  
+Acceptance: all tool calls in a single turn are executed before continuation.
+
+FR-031: Preserve `max_tool_calls` and `stop_tool_calls_and_respond!` behavior.  
+Acceptance: forced non-tool final response path remains functional.
+
+### E. Hooks, Context, and Mutation
+
+FR-032: Preserve `before_completion` at global, class, and instance levels.  
+Acceptance: merge order global -> class -> instance remains unchanged.
+
+FR-033: Preserve mutable `CompletionContext` contract (`messages`, `params`, helpers).  
+Acceptance: message mutation use cases (redaction/injection/filtering) continue to work.
+
+FR-034: Allow hooks to override model and arbitrary params.  
+Acceptance: overridden model is used in provider call.
+
+FR-035: Ignore non-callable hooks and non-hash hook return values safely.  
+Acceptance: no exceptions from benign hook misuse.
+
+FR-036: Preserve hook execution timing (post-transform, pre-request).  
+Acceptance: hooks receive OpenAI-format request messages as now.
+
+### F. Higher-Level Modules
+
+FR-037: `PromptDeclarations` behavior remains compatible, including prompt ordering, conditions (`if`, `unless`, `until`), callbacks, stream handling, and `chat_completion_from_superclass`.  
+Acceptance: prompt declaration specs continue to pass.
+
+FR-038: `Predicate` behavior remains compatible (`yes?`, `no?`, `maybe?`, required handler validation).  
+Acceptance: predicate specs continue to pass.
+
+FR-039: `MCP` integration remains functionally unchanged (tool discovery, proxy calls, transcript logging, type coercion).  
+Acceptance: MCP specs continue to pass without RubyLLM runtime dependency.
+
+FR-040: `ResponseFormat` integration remains compatible with `chat_completion` JSON/structured output behavior.  
+Acceptance: response format specs and examples remain valid.
+
+FR-041: Preserve current public error classes where possible; new runtime errors must be namespaced and documented.  
+Acceptance: common rescue paths remain stable or receive documented replacements.
+
+### G. Migration and Packaging
+
+FR-042: Remove runtime dependency on `ruby_llm` from gemspec.  
+Acceptance: gem installs and runs without RubyLLM present.
+
+FR-043: Remove `require "ruby_llm"` from Raix runtime files.  
+Acceptance: require graph resolves cleanly.
+
+FR-044: Update docs/examples to use Raix-native configuration.  
+Acceptance: no required `RubyLLM.configure` in primary setup docs.
+
+FR-045: Provide migration shim for one major version window:
+- Read legacy config where practical
+- Emit deprecation warnings for RubyLLM-specific config usage  
+Acceptance: existing apps can migrate with guided warnings.
+
+FR-046: Publish migration guide with old/new configuration mappings and behavioral notes.  
+Acceptance: guide covers all breaking and non-breaking deltas.
+
+## 9) Non-Functional Requirements
+
+NFR-001 Reliability: error handling must classify provider/network failures and return actionable messages.
+
+NFR-002 Availability: no new single point of failure compared to current architecture.
+
+NFR-003 Performance: added Raix-side overhead for request orchestration should be minimal relative to provider latency.
+
+NFR-004 Concurrency safety: transcript updates and tool call append operations must remain race-safe.
+
+NFR-005 Backward compatibility: existing public API usage should continue to work for the targeted major release.
+
+NFR-006 Security: no dynamic dispatch beyond declared functions; avoid leaking API keys in logs/exceptions.
+
+NFR-007 Observability: request/response logging hooks remain feasible without monkey patches.
+
+NFR-008 Testability: runtime components must be unit-testable in isolation (provider adapter, parser, tool loop, hook pipeline).
+
+NFR-009 Maintainability: provider adapters follow shared interface with low coupling.
+
+NFR-010 Extensibility: adding a new provider should not require edits to core chat orchestration.
+
+NFR-011 Documentation quality: examples and README must accurately reflect runtime behavior and configuration.
+
+NFR-012 Determinism: seed and core params pass-through behavior remains stable where providers support it.
+
+NFR-013 Memory discipline: long transcript and streaming paths should avoid unnecessary object churn.
+
+NFR-014 Compliance with semantic versioning: breaking changes are explicit and documented.
+
+NFR-015 Governance autonomy: Raix release cadence and feature roadmap are not blocked by RubyLLM changes.
+
+## 10) Architecture Decisions (High Level)
+
+1. Create an internal provider abstraction (`ProviderAdapter`) with OpenAI and OpenRouter implementations first.
+2. Keep Raix public API stable; replace internals incrementally behind existing modules.
+3. Normalize provider outputs into a consistent OpenAI-like envelope used by current Raix flow.
+4. Keep tool loop orchestration in Raix (single source of truth).
+5. Avoid rebuilding non-chat RubyLLM domains (embeddings/moderation/images/transcription) in this phase.
+
+## 11) Delivery Plan (Tasks and Dependencies)
+
+No time estimates are included by design.
+
+### Phase 0: Contract Freeze
+
+T01: Build behavior contract from current specs and README examples.  
+Depends on: none
+
+T02: Freeze compatibility matrix (API, params, transcript, hooks, tool loop).  
+Depends on: T01
+
+### Phase 1: Runtime Foundation
+
+T03: Introduce `Raix::Runtime::Config` and map current `Configuration` fields.  
+Depends on: T02
+
+T04: Implement internal runtime data models (`Message`, `ToolCall`, `Chunk`, `ResponseEnvelope`).  
+Depends on: T02
+
+T05: Implement transcript store replacement and remove RubyLLM message coupling.  
+Depends on: T04
+
+T06: Implement transport layer (Faraday connection setup, timeout, retry, error middleware).  
+Depends on: T03
+
+T07: Define provider adapter interface and registry.  
+Depends on: T04, T06
+
+### Phase 2: Provider Adapters
+
+T08: Implement OpenAI adapter (sync request/response + streaming).  
+Depends on: T07
+
+T09: Implement OpenRouter adapter (sync request/response + streaming).  
+Depends on: T07
+
+T10: Implement streaming parser and accumulator shared by adapters.  
+Depends on: T06
+
+T11: Implement provider error normalization and response usage mapping.  
+Depends on: T08, T09
+
+### Phase 3: Chat Orchestration
+
+T12: Replace `ruby_llm_request` with runtime request executor.  
+Depends on: T05, T08, T09, T10, T11
+
+T13: Port parameter mapping and defaults from existing `chat_completion`.  
+Depends on: T12
+
+T14: Port JSON mode, response_format behavior, and retry parsing logic.  
+Depends on: T13
+
+T15: Port prompt caching transforms (`cache_at` multipart handling).  
+Depends on: T13
+
+T16: Port stream callback behavior and final response handling.  
+Depends on: T12, T10
+
+### Phase 4: Tools and Hooks
+
+T17: Replace `FunctionToolAdapter` with native tool schema serializer.  
+Depends on: T04, T12
+
+T18: Preserve available_tools filtering, undeclared-tool validation, and dispatch security checks.  
+Depends on: T17
+
+T19: Preserve multi-tool loop, max_tool_calls, and stop flag behavior.  
+Depends on: T12, T18
+
+T20: Preserve hook pipeline (`before_completion`) and `CompletionContext` mutation contract.  
+Depends on: T13
+
+### Phase 5: Module Compatibility
+
+T21: Verify and adapt `PromptDeclarations` integration with new runtime.  
+Depends on: T13, T20
+
+T22: Verify and adapt `Predicate` integration with new runtime.  
+Depends on: T13
+
+T23: Validate `MCP` behavior remains unchanged under new runtime.  
+Depends on: T19
+
+T24: Keep `ResponseFormat` behavior and ensure integration tests pass.  
+Depends on: T14
+
+### Phase 6: Migration and Cleanup
+
+T25: Remove `ruby_llm` requires and runtime references (`ruby_llm_chat`, adapters).  
+Depends on: T12, T17
+
+T26: Remove `ruby_llm` gem dependency from `raix.gemspec`; update Gemfile lock expectations.  
+Depends on: T25
+
+T27: Add migration shim/deprecation warnings for legacy configuration.  
+Depends on: T03, T25
+
+T28: Update README, examples, and upgrade notes to Raix-native setup.  
+Depends on: T26, T27
+
+T29: Update changelog and publish migration guide.  
+Depends on: T28
+
+### Phase 7: Verification and Release Readiness
+
+T30: Add/refresh unit tests for runtime internals and provider adapters.  
+Depends on: T08, T09, T10, T11, T12
+
+T31: Run compatibility test matrix across existing specs and representative examples.  
+Depends on: T21, T22, T23, T24, T30
+
+T32: Add concurrency/regression tests for transcript and tool loop race-safety.  
+Depends on: T19, T30
+
+T33: Release candidate checklist and cut standalone-ready version.  
+Depends on: T29, T31, T32
+
+## 12) Critical Path
+
+T01 -> T02 -> T03/T04/T06/T07 -> T08/T09/T10/T11 -> T12 -> T13 -> T17/T18/T19 -> T25/T26 -> T28 -> T31 -> T33
+
+## 13) Risks and Mitigations
+
+R1: Behavior drift in tool-loop orchestration.  
+Mitigation: lock behavior with contract tests copied from current specs and VCR fixtures.
+
+R2: Streaming regressions across providers.  
+Mitigation: adapter-level streaming integration tests and accumulator unit tests.
+
+R3: Configuration migration breakage for existing apps.  
+Mitigation: migration shim + warnings + explicit upgrade guide.
+
+R4: Hidden coupling in examples/docs/test helpers.  
+Mitigation: CI checks for `require "ruby_llm"` in runtime/docs/examples after migration.
+
+R5: Loss of future provider breadth previously implied by RubyLLM.  
+Mitigation: keep adapter architecture and document supported providers clearly (OpenAI + OpenRouter initially).
+
+## 14) Open Decisions
+
+D1: Should Raix vNext include direct Anthropic/Gemini adapters or keep OpenRouter as primary route for non-OpenAI models?
+
+D2: Should the migration shim for RubyLLM-style configuration exist for one minor or one major cycle?
+
+D3: Should Raix introduce its own namespaced error taxonomy now, or mirror current errors for one release and migrate later?
+
+## 15) Success Criteria
+
+1. Raix installs and runs without RubyLLM dependency.
+2. Existing public API and primary behavior remain compatible for targeted modules.
+3. Specs for chat, tools, hooks, predicate, prompt declarations, and MCP pass against the new runtime.
+4. Migration guide exists and covers all changed setup paths.
+5. Raix can ship runtime changes without being blocked by RubyLLM maintainership or release cycle.
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..ed33fbd
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,634 @@
+# Raix Examples
+
+This directory contains working examples demonstrating various Raix features and modules.
+
+## Setup
+
+All examples require API keys to be configured. Create a `.env` file in the project root with:
+
+```bash
+OR_ACCESS_TOKEN=your_openrouter_api_key
+OAI_ACCESS_TOKEN=your_openai_api_key
+```
+
+Make sure to run examples with `bundle exec`:
+
+```bash
+bundle exec ruby examples/example_name.rb
+```
+
+## Environment Variables
+
+### Common Variables
+
+- **`RAIX_DEBUG`** - Set to any value to enable detailed logging of LLM interactions (requests/responses)
+- **`RAIX_EXAMPLE_MODEL`** - Override the default model for examples that support it
+
+### Example Usage
+
+```bash
+# Run with debug logging enabled
+RAIX_DEBUG=1 bundle exec ruby examples/live_chat_completion.rb
+
+# Use a different model
+RAIX_EXAMPLE_MODEL="gpt-4o" bundle exec ruby examples/live_chat_completion.rb
+
+# Combine both
+RAIX_DEBUG=1 RAIX_EXAMPLE_MODEL="gpt-4o" bundle exec ruby examples/live_chat_completion.rb
+```
+
+## Available Examples
+
+### 1. Live Chat Completion
+
+**File:** `live_chat_completion.rb`
+
+**Demonstrates:**
+- Basic chat completion with `ChatCompletion` module
+- Function/tool definitions using `FunctionDispatch`
+- Tool calls and automatic continuation until text response
+
+**Features:**
+- Defines a `current_time` function that returns UTC time
+- Configurable via `RAIX_EXAMPLE_MODEL` and `RAIX_EXAMPLE_PROMPT`
+- Shows how functions are automatically called when the AI requests them
+
+**Usage:**
+
+```bash
+# Default prompt
+bundle exec ruby examples/live_chat_completion.rb
+
+# Custom prompt
+RAIX_EXAMPLE_PROMPT="What time is it?" bundle exec ruby examples/live_chat_completion.rb
+
+# With debug logging
+RAIX_DEBUG=1 bundle exec ruby examples/live_chat_completion.rb
+
+# Use OpenAI model
+RAIX_EXAMPLE_MODEL="gpt-4o-mini" bundle exec ruby examples/live_chat_completion.rb
+```
+
+### 2. Code Reviewer
+
+**File:** `code_reviewer.rb`
+
+**Demonstrates:**
+- The `Predicate` module for yes/no questions
+- Handler blocks for yes/no/maybe responses
+- Processing multiple questions in sequence
+- Tracking results across multiple calls
+
+**Features:**
+- Reviews three different code snippets
+- Provides approval/rejection with detailed explanations
+- Shows summary statistics at the end
+- Non-interactive demonstration
+
+**Usage:**
+
+```bash
+# Run code review demo
+bundle exec ruby examples/code_reviewer.rb
+
+# With debug logging to see full AI interactions
+RAIX_DEBUG=1 bundle exec ruby examples/code_reviewer.rb
+```
+
+**Example Output:**
+
+```
+🤖 AI Code Reviewer Demo
+============================================================
+
+Example 1: Clean Ruby code
+🔍 Reviewing code...
+------------------------------------------------------------
+✅ APPROVED
+   the code is following Ruby best practices...
+
+============================================================
+📊 Review Summary:
+   ✅ Approved: 1
+   ❌ Rejected: 2
+   ⚠️  Unclear:  0
+   📝 Total:    3
+```
+
+### 3. Trivia Game
+
+**File:** `trivia_game.rb`
+
+**Demonstrates:**
+- The `Predicate` module in an interactive context
+- User input handling
+- Dynamic yes/no question evaluation
+- Score tracking and result presentation
+
+**Features:**
+- Interactive command-line trivia game
+- AI judges whether your answers are correct
+- Tracks score and shows percentage
+- Supports custom questions via command line
+
+**Usage:**
+
+```bash
+# Play with default questions
+bundle exec ruby examples/trivia_game.rb
+
+# Provide your own questions
+bundle exec ruby examples/trivia_game.rb \
+  "Ruby was invented in Japan" \
+  "Python was released before Ruby" \
+  "Rails was created in 2004"
+
+# With debug logging
+RAIX_DEBUG=1 bundle exec ruby examples/trivia_game.rb
+```
+
+**Example Session:**
+
+```
+🎮 Welcome to AI Trivia Game!
+==================================================
+
+Question 1: Ruby was created by Yukihiro Matsumoto
+Your answer (true/false): true
+
+✓ CORRECT! The user answered 'true', which is correct...
+
+🏆 Final Score: 4/5
+📊 Percentage: 80%
+😊 Great job! Keep it up!
+```
+
+### 4. Structured Data Extraction
+
+**File:** `structured_data_extraction.rb`
+
+**Demonstrates:**
+- The `ResponseFormat` module for schema-validated JSON output
+- Extracting structured data from unstructured text
+- Defining complex nested schemas
+- Multiple extraction scenarios (people, products, meetings)
+
+**Features:**
+- Enforces strict JSON schema compliance
+- Perfect for data pipelines and form filling
+- Shows person, product, and meeting note extraction
+- Schema definition with types, arrays, and nested objects
+
+**Usage:**
+
+```bash
+# Run all extraction examples
+bundle exec ruby examples/structured_data_extraction.rb
+
+# With debug logging
+RAIX_DEBUG=1 bundle exec ruby examples/structured_data_extraction.rb
+```
+
+**Example Output:**
+
+```
+📄 Input Text:
+John Smith is a 32-year-old senior software engineer...
+
+✅ Extracted Data:
+{
+  "full_name": "John Smith",
+  "age": 32,
+  "email": "john.smith@example.com",
+  "occupation": "senior software engineer",
+  "skills": ["Ruby", "Python", "JavaScript"],
+  "experience_years": 8
+}
+```
+
+### 5. Streaming Chat
+
+**File:** `streaming_chat.rb`
+
+**Demonstrates:**
+- Real-time token-by-token response streaming
+- Progress indicators during generation
+- Performance metrics (tokens/second, word count)
+- Interactive chat mode
+
+**Features:**
+- Simple streaming with character-by-character output
+- Streaming with "thinking" progress indicator
+- Performance metrics tracking
+- Optional interactive chat session
+
+**Usage:**
+
+```bash
+# Run streaming demos
+bundle exec ruby examples/streaming_chat.rb
+
+# Interactive chat mode
+bundle exec ruby examples/streaming_chat.rb --interactive
+
+# With debug logging
+RAIX_DEBUG=1 bundle exec ruby examples/streaming_chat.rb
+```
+
+**Use Cases:**
+- Responsive chat interfaces
+- Progress indicators for long responses
+- Live transcription displays
+- Real-time content generation
+
+### 6. JSON Mode Demo
+
+**File:** `json_mode_demo.rb`
+
+**Demonstrates:**
+- JSON mode for flexible structured output
+- Comparison with ResponseFormat's strict schemas
+- Various use cases (sentiment analysis, comparisons, quizzes, recipes)
+- AI-determined JSON structure
+
+**Features:**
+- Valid JSON without strict schema enforcement
+- AI chooses optimal structure for the task
+- Works across different providers
+- Simpler than ResponseFormat when flexibility is needed
+
+**Usage:**
+
+```bash
+# Run all JSON mode examples
+bundle exec ruby examples/json_mode_demo.rb
+
+# With debug logging
+RAIX_DEBUG=1 bundle exec ruby examples/json_mode_demo.rb
+```
+
+**Key Difference:**
+- **JSON Mode**: Flexible structure, AI decides format
+- **ResponseFormat**: Strict schema, enforced validation
+
+### 7. Prompt Chain Workflow
+
+**File:** `prompt_chain_workflow.rb`
+
+**Demonstrates:**
+- The `PromptDeclarations` module for multi-step workflows
+- Conditional prompt execution with `if:` conditions
+- Success callbacks for processing responses
+- Retry logic with `until:` loops
+- State management between prompts
+
+**Features:**
+- Two complete workflow examples (research/writing and data processing)
+- Shows sequential prompt execution
+- Demonstrates conditional logic and callbacks
+- Includes retry/loop capabilities
+
+**Usage:**
+
+```bash
+# Run workflow demos
+bundle exec ruby examples/prompt_chain_workflow.rb
+
+# Custom research topic
+RESEARCH_TOPIC="Machine Learning Trends" bundle exec ruby examples/prompt_chain_workflow.rb
+
+# With debug logging
+RAIX_DEBUG=1 bundle exec ruby examples/prompt_chain_workflow.rb
+```
+
+**Use Cases:**
+- Multi-step research tasks
+- Complex data processing pipelines
+- Conversational sequences
+- Automated content generation
+
+### 8. Prompt Caching Demo
+
+**File:** `prompt_caching_demo.rb`
+
+**Demonstrates:**
+- Anthropic-style prompt caching for cost reduction
+- Caching large context (documents, character cards, knowledge bases)
+- Cache hit benefits (speed and cost)
+- Multiple queries against cached context
+
+**Features:**
+- Employee handbook Q&A example
+- Character roleplaying with cached personality
+- Shows cache performance benefits
+- Demonstrates cost savings (up to 98% reduction)
+
+**Usage:**
+
+```bash
+# Run caching demos (requires Anthropic model)
+RAIX_EXAMPLE_MODEL="anthropic/claude-3-5-sonnet" bundle exec ruby examples/prompt_caching_demo.rb
+
+# With debug logging
+RAIX_DEBUG=1 RAIX_EXAMPLE_MODEL="anthropic/claude-3-5-sonnet" bundle exec ruby examples/prompt_caching_demo.rb
+```
+
+**Cost Savings:**
+- First request: Full context processed (~1200 tokens)
+- Cached requests: Only new message processed (~10-20 tokens)
+- ~98% reduction in input token costs
+
+**Use Cases:**
+- RAG (Retrieval-Augmented Generation) applications
+- Document analysis with multiple queries
+- AI chatbots with large knowledge bases
+- Character-based AI roleplaying
+
+### 9. Advanced Tool Control
+
+**File:** `advanced_tool_control.rb`
+
+**Demonstrates:**
+- Multiple tool calls in a single AI response
+- `max_tool_calls` parameter for limiting function execution
+- `stop_tool_calls_and_respond!` for early termination
+- Autonomous agent patterns with safeguards
+
+**Features:**
+- Multiple parallel tool calls
+- Configurable call limits to prevent runaway loops
+- Early termination from within functions
+- Realistic order processing agent example
+
+**Usage:**
+
+```bash
+# Run tool control demos
+bundle exec ruby examples/advanced_tool_control.rb
+
+# With debug logging
+RAIX_DEBUG=1 bundle exec ruby examples/advanced_tool_control.rb
+```
+
+**Example Output:**
+
+```
+📋 Tool Call Log:
+------------------------------------------------------------
+1. search_database - 10:23:45.123
+   Args: {:query=>"Q4 sales", :limit=>10}
+2. calculate_stats - 10:23:45.456
+   Args: {:dataset=>"sales", :metric=>"average"}
+3. check_status - 10:23:45.789
+   Args: {:system=>"analytics"}
+```
+
+**Use Cases:**
+- Task automation with multiple steps
+- Autonomous agents with safeguards
+- Complex workflow orchestration
+- Order processing with validations
+
+### 10. Rails Initializer Example
+
+**File:** `rails_initializer_example.rb`
+
+**Demonstrates:**
+- How to configure Raix in a Rails application
+- Raix standalone runtime configuration options
+- Logging setup with custom filtering
+- Retry and timeout configuration
+
+**Features:**
+- Complete Rails initializer template
+- Shows all available configuration options
+- Includes custom logger example with sensitive data filtering
+- Documents differences from previous Raix versions
+
+**Usage:**
+
+This file is a reference/template. Copy relevant sections to your Rails app's `config/initializers/raix.rb`.
+
+## Module Overview
+
+### ChatCompletion
+
+The foundation module for LLM chat interactions.
+
+**Key Features:**
+- Transcript management
+- Message formatting
+- Automatic tool call handling
+- Streaming support
+- Multiple provider support (OpenAI, OpenRouter)
+
+**Configuration:**
+```ruby
+class MyBot
+  include Raix::ChatCompletion
+
+  configure do |config|
+    config.model = "gpt-4o"
+    config.temperature = 0.7
+    config.max_tokens = 1000
+  end
+end
+```
+
+### FunctionDispatch
+
+DSL for declaring functions/tools that the AI can call.
+
+**Key Features:**
+- Clean function definition syntax
+- Automatic JSON schema generation
+- Type validation
+- Integration with ChatCompletion
+
+**Example:**
+```ruby
+class Assistant
+  include Raix::ChatCompletion
+  include Raix::FunctionDispatch
+
+  function :get_weather,
+    "Get weather for a location",
+    location: { type: "string", required: true } do |args|
+    # Implementation
+    "Sunny, 72°F"
+  end
+end
+```
+
+### Predicate
+
+Module for handling yes/no/maybe questions with custom handlers.
+
+**Key Features:**
+- Simple yes?/no?/maybe? block syntax
+- Automatic response parsing
+- Pattern matching on AI responses
+- Clean separation of concerns
+
+**Example:**
+```ruby
+class Decision
+  include Raix::Predicate
+
+  yes? { |explanation| puts "Approved: #{explanation}" }
+  no? { |explanation| puts "Rejected: #{explanation}" }
+  maybe? { |explanation| puts "Unclear: #{explanation}" }
+end
+
+decision = Decision.new
+decision.ask("Is this code thread-safe?")
+```
+
+### ResponseFormat
+
+Module for defining strict JSON schemas that the AI must follow in its response.
+
+**Key Features:**
+- Schema validation and enforcement
+- Converts Ruby hash structures to OpenAI-compatible JSON schemas
+- Perfect for structured data extraction
+- Supports nested objects and arrays
+
+**Example:**
+```ruby
+class DataExtractor
+  include Raix::ChatCompletion
+
+  def extract_person(text)
+    format = Raix::ResponseFormat.new("Person", {
+      name: { type: "string" },
+      age: { type: "integer" },
+      skills: ["string"]
+    })
+
+    transcript << { user: "Extract person info: #{text}" }
+    chat_completion(params: { response_format: format })
+  end
+end
+```
+
+### PromptDeclarations
+
+Module for building multi-step AI workflows with conditional execution and callbacks.
+
+**Key Features:**
+- Sequential prompt execution
+- Conditional prompts with `if:` clauses
+- Success callbacks for processing responses
+- Retry logic with `until:` loops
+- State management between steps
+
+**Example:**
+```ruby
+class ResearchWorkflow
+  include Raix::ChatCompletion
+  include Raix::PromptDeclarations
+
+  attr_accessor :research_data
+
+  prompt text: -> { "Research topic: AI" }
+
+  prompt text: -> { "Analyze findings" },
+         if: -> { research_data },
+         success: ->(response) { process_analysis(response) }
+
+  prompt text: -> { "Generate report" },
+         until: -> { report_complete? }
+end
+```
+
+## Tips and Best Practices
+
+1. **Always use `bundle exec`** to ensure you're using the local development version of Raix
+2. **Enable debug logging** during development with `RAIX_DEBUG=1`
+3. **Use environment variables** for API keys, never commit them
+4. **Start with small examples** and build up to complex use cases
+5. **Check token usage** in debug logs to optimize prompts
+6. **Test with different models** to find the best balance of cost/quality
+
+## Troubleshooting
+
+### "Missing configuration for OpenRouter/OpenAI"
+
+Make sure your `.env` file is in the project root and contains valid API keys:
+
+```bash
+OR_ACCESS_TOKEN=sk-or-v1-...
+OAI_ACCESS_TOKEN=sk-...
+```
+
+### "No endpoints found for model"
+
+The model might not be available on OpenRouter. Try using an OpenAI model instead:
+
+```bash
+RAIX_EXAMPLE_MODEL="gpt-4o-mini" bundle exec ruby examples/live_chat_completion.rb
+```
+
+### Examples not using local Raix code
+
+Always use `bundle exec` to run examples. Without it, Ruby will load the installed gem instead of your local development code.
+
+### Debug logging not showing
+
+Make sure you're setting the environment variable before the command:
+
+```bash
+RAIX_DEBUG=1 bundle exec ruby examples/code_reviewer.rb
+```
+
+## Creating Your Own Examples
+
+When creating new examples, follow this template:
+
+```ruby
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "raix"
+
+# Load environment variables
+Dotenv.load
+
+# Configure Raix
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  class YourExample
+    include Raix::ChatCompletion
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.7
+    end
+
+    def run
+      transcript << { user: "Your prompt here" }
+      chat_completion
+    end
+
+    def self.run!
+      new.run
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  Examples::YourExample.run!
+end
+```
+
+## Additional Resources
+
+- [Raix Documentation](../README.md)
+- [OpenRouter Models](https://openrouter.ai/models)
+- [OpenAI Models](https://platform.openai.com/docs/models)
diff --git a/examples/advanced_tool_control.rb b/examples/advanced_tool_control.rb
new file mode 100755
index 0000000..c2ac62a
--- /dev/null
+++ b/examples/advanced_tool_control.rb
@@ -0,0 +1,323 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # Demonstrates advanced tool/function control features:
+  # - Multiple tool calls in one response
+  # - max_tool_calls limiting
+  # - stop_tool_calls_and_respond! for early termination
+  class AdvancedToolControl
+    include Raix::ChatCompletion
+    include Raix::FunctionDispatch
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.7
+      config.max_tool_calls = 10 # Default limit
+    end
+
+    attr_reader :call_log
+
+    def initialize
+      @call_log = []
+    end
+
+    # Tool 1: Search database
+    function :search_database,
+             "Search the database for information",
+             query: { type: "string", required: true },
+             limit: { type: "integer", default: 10 } do |args|
+      log_call(:search_database, args)
+      "Found #{rand(3..8)} results for '#{args[:query]}'"
+    end
+
+    # Tool 2: Calculate statistics
+    function :calculate_stats,
+             "Calculate statistics on data",
+             dataset: { type: "string", required: true },
+             metric: { type: "string", required: true } do |args|
+      log_call(:calculate_stats, args)
+      "#{args[:metric].capitalize}: #{rand(10..100)}"
+    end
+
+    # Tool 3: Send notification
+    function :send_notification,
+             "Send a notification to user",
+             message: { type: "string", required: true },
+             priority: { type: "string", enum: %w[low medium high] } do |args|
+      log_call(:send_notification, args)
+      "Notification sent: #{args[:message]} (priority: #{args[:priority]})"
+    end
+
+    # Tool 4: Check status
+    function :check_status,
+             "Check the status of a system",
+             system: { type: "string", required: true } do |args|
+      log_call(:check_status, args)
+      "System '#{args[:system]}' is operational"
+    end
+
+    # Tool 5: Complete task
+    function :complete_task,
+             "Mark task as complete and stop further processing",
+             task_id: { type: "string", required: true },
+             summary: { type: "string", required: true } do |args|
+      log_call(:complete_task, args)
+
+      # Signal to stop tool calls and provide final response
+      stop_tool_calls_and_respond!
+
+      "Task #{args[:task_id]} completed: #{args[:summary]}"
+    end
+
+    def log_call(function_name, args)
+      @call_log << { function: function_name, args:, timestamp: Time.now }
+      puts "   🔧 Called: #{function_name}(#{args.inspect})"
+    end
+
+    def print_call_log
+      puts "\n📋 Tool Call Log:"
+      puts "-" * 60
+      call_log.each_with_index do |entry, i|
+        puts "#{i + 1}. #{entry[:function]} - #{entry[:timestamp].strftime("%H:%M:%S.%L")}"
+        puts "   Args: #{entry[:args]}"
+      end
+      puts
+    end
+
+    # Example 1: Multiple tool calls in one response
+    def demo_multiple_calls
+      puts "Demo 1: Multiple Tool Calls"
+      puts "=" * 60
+      puts "Task: Analyze sales data and notify team"
+      puts
+
+      transcript << {
+        user: "Search for Q4 sales data, calculate the average revenue, " \
+              "check the analytics system status, and send a high-priority " \
+              "notification with the results."
+      }
+
+      result = chat_completion
+      print_call_log
+
+      puts "✅ Final Response:"
+      puts result
+      puts
+    end
+
+    # Example 2: Limited tool calls
+    def demo_limited_calls
+      puts "Demo 2: Limited Tool Calls"
+      puts "=" * 60
+      puts "Max tool calls: 3"
+      puts
+
+      transcript << {
+        user: "I need you to search for customer data, calculate conversion rates, " \
+              "check system status, search for product data, calculate product stats, " \
+              "and send notifications to the team."
+      }
+
+      result = chat_completion(max_tool_calls: 3)
+      print_call_log
+
+      puts "✅ Final Response:"
+      puts result
+      puts
+      puts "Note: AI was forced to respond after 3 tool calls"
+      puts
+    end
+
+    # Example 3: Early termination with stop_tool_calls_and_respond!
+    def demo_early_termination
+      puts "Demo 3: Early Termination"
+      puts "=" * 60
+      puts "Using complete_task() to stop tool execution"
+      puts
+
+      transcript << {
+        user: "Search the database for order #12345, check its status, " \
+              "verify payment, and then complete the task with ID 'ORDER-CHECK' " \
+              "once you have the information."
+      }
+
+      result = chat_completion
+      print_call_log
+
+      puts "✅ Final Response:"
+      puts result
+      puts
+      puts "Note: complete_task() called stop_tool_calls_and_respond!"
+      puts
+    end
+
+    def self.run!
+      puts "🎯 Raix Advanced Tool Control Demo"
+      puts "Multiple calls, limiting, and early termination"
+      puts "=" * 60
+      puts
+
+      # Demo 1: Multiple calls
+      demo1 = new
+      demo1.demo_multiple_calls
+
+      puts "\n#{"=" * 60}\n\n"
+
+      # Demo 2: Limited calls
+      demo2 = new
+      demo2.demo_limited_calls
+
+      puts "\n#{"=" * 60}\n\n"
+
+      # Demo 3: Early termination
+      demo3 = new
+      demo3.demo_early_termination
+
+      puts "=" * 60
+      puts "✨ Tool control demo complete!"
+      puts
+      puts "💡 Key Features Demonstrated:"
+      puts "   • Multiple tool calls: AI can call several functions in one turn"
+      puts "   • max_tool_calls: Limit function execution to prevent runaway loops"
+      puts "   • stop_tool_calls_and_respond!: Exit tool loop early from within a function"
+      puts
+      puts "🎯 Use Cases:"
+      puts "   • Task automation with multiple steps"
+      puts "   • Autonomous agents with safeguards"
+      puts "   • Complex workflows requiring orchestration"
+      puts "   • Order processing with multiple validations"
+    end
+  end
+
+  # Example showing a more realistic autonomous agent scenario
+  class OrderProcessingAgent
+    include Raix::ChatCompletion
+    include Raix::FunctionDispatch
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.3
+      config.max_tool_calls = 8 # Reasonable limit for order processing
+    end
+
+    attr_accessor :order_status
+
+    def initialize
+      @order_status = {
+        validated: false,
+        payment_verified: false,
+        inventory_checked: false,
+        shipping_calculated: false,
+        processed: false
+      }
+    end
+
+    function :validate_order,
+             "Validate order details and customer information",
+             order_id: { type: "string", required: true } do |args|
+      puts "   ✓ Validating order #{args[:order_id]}..."
+      @order_status[:validated] = true
+      "Order #{args[:order_id]} validated successfully"
+    end
+
+    function :verify_payment,
+             "Verify payment method and process payment",
+             order_id: { type: "string", required: true },
+             amount: { type: "number", required: true } do |args|
+      puts "   ✓ Verifying payment of $#{args[:amount]}..."
+      @order_status[:payment_verified] = true
+      "Payment of $#{args[:amount]} verified and authorized"
+    end
+
+    function :check_inventory,
+             "Check if items are in stock",
+             items: { type: "array", required: true } do |args|
+      puts "   ✓ Checking inventory for #{args[:items].length} items..."
+      @order_status[:inventory_checked] = true
+      "All items in stock and reserved"
+    end
+
+    function :calculate_shipping,
+             "Calculate shipping cost and estimated delivery",
+             zip_code: { type: "string", required: true } do |args|
+      puts "   ✓ Calculating shipping to #{args[:zip_code]}..."
+      @order_status[:shipping_calculated] = true
+      "Shipping: $12.50, Estimated delivery: 3-5 business days"
+    end
+
+    function :process_order,
+             "Finalize order processing",
+             order_id: { type: "string", required: true } do |args|
+      if order_status.values.take(4).all?
+        puts "   ✓ All validations passed, processing order..."
+        @order_status[:processed] = true
+        stop_tool_calls_and_respond!
+        "Order #{args[:order_id]} processed successfully. Confirmation email sent."
+      else
+        "Cannot process: Missing validations #{order_status.reject { |_k, v| v }.keys}"
+      end
+    end
+
+    def process_customer_order(order_details)
+      puts "📦 Processing Order: #{order_details[:order_id]}"
+      puts "=" * 60
+      puts
+
+      transcript << {
+        system: "You are an order processing agent. Validate, verify payment, " \
+                "check inventory, calculate shipping, then process the order."
+      }
+      transcript << {
+        user: "Process this order: #{order_details.inspect}"
+      }
+
+      result = chat_completion
+
+      puts "\n📊 Order Status:"
+      order_status.each { |k, v| puts "   #{k}: #{v ? "✓" : "✗"}" }
+      puts
+      puts "✅ Result: #{result}"
+      puts
+    end
+
+    def self.run!
+      puts "\n\n#{"=" * 60}"
+      puts "🤖 Autonomous Order Processing Agent"
+      puts "=" * 60
+      puts
+
+      agent = new
+      agent.process_customer_order(
+        order_id: "ORD-2024-001",
+        customer: "John Doe",
+        items: %w[PROD-123 PROD-456],
+        total: 149.99,
+        zip_code: "94105"
+      )
+
+      puts "💡 Agent Features:"
+      puts "   • Autonomous decision making"
+      puts "   • Multiple validation steps"
+      puts "   • Automatic termination when complete"
+      puts "   • Safety limits with max_tool_calls"
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  Examples::AdvancedToolControl.run!
+  Examples::OrderProcessingAgent.run!
+end
diff --git a/examples/code_reviewer.rb b/examples/code_reviewer.rb
new file mode 100755
index 0000000..e1c83c7
--- /dev/null
+++ b/examples/code_reviewer.rb
@@ -0,0 +1,121 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # An AI-powered code reviewer that answers yes/no questions about code quality
+  class CodeReviewer
+    include Raix::Predicate
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.2
+    end
+
+    attr_reader :approved_count, :rejected_count, :unclear_count
+
+    def initialize
+      @approved_count = 0
+      @rejected_count = 0
+      @unclear_count = 0
+    end
+
+    # When the code passes review
+    yes? do |response|
+      @approved_count += 1
+      puts "✅ APPROVED"
+      puts "   #{response.sub(/^yes,\s*/i, "").strip}"
+      puts
+    end
+
+    # When the code fails review
+    no? do |response|
+      @rejected_count += 1
+      puts "❌ REJECTED"
+      puts "   #{response.sub(/^no,\s*/i, "").strip}"
+      puts
+    end
+
+    # When the answer is unclear
+    maybe? do |response|
+      @unclear_count += 1
+      puts "⚠️  NEEDS REVIEW"
+      puts "   #{response.sub(/^maybe,\s*/i, "").strip}"
+      puts
+    end
+
+    def review_code(code, question = nil)
+      question ||= "Is this code following Ruby best practices and free of obvious issues?"
+      full_question = "#{question}\n\nCode:\n```ruby\n#{code}\n```"
+
+      puts "🔍 Reviewing code..."
+      puts "-" * 60
+      ask(full_question)
+    end
+
+    def summary
+      total = approved_count + rejected_count + unclear_count
+      return if total.zero?
+
+      puts "=" * 60
+      puts "📊 Review Summary:"
+      puts "   ✅ Approved: #{approved_count}"
+      puts "   ❌ Rejected: #{rejected_count}"
+      puts "   ⚠️  Unclear:  #{unclear_count}"
+      puts "   📝 Total:    #{total}"
+    end
+
+    def self.run!
+      reviewer = new
+
+      puts "🤖 AI Code Reviewer Demo"
+      puts "=" * 60
+      puts
+
+      # Example 1: Good code
+      puts "Example 1: Clean Ruby code"
+      reviewer.review_code(<<~RUBY)
+        def calculate_total(items)
+          items.sum(&:price)
+        end
+      RUBY
+
+      # Example 2: Code with issues
+      puts "Example 2: Code with potential issues"
+      reviewer.review_code(<<~RUBY)
+        def calc(x)
+          sum = 0
+          for i in 0..x.length-1
+            sum = sum + x[i]
+          end
+          return sum
+        end
+      RUBY
+
+      # Example 3: Complex case
+      puts "Example 3: Code that might be ambiguous"
+      reviewer.review_code(<<~RUBY, "Does this code properly handle nil values?")
+        def process_user(user)
+          user.name.upcase
+        end
+      RUBY
+
+      reviewer.summary
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  Examples::CodeReviewer.run!
+end
diff --git a/examples/json_mode_demo.rb b/examples/json_mode_demo.rb
new file mode 100755
index 0000000..4d6c409
--- /dev/null
+++ b/examples/json_mode_demo.rb
@@ -0,0 +1,210 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "json"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # Demonstrates JSON mode for getting flexible structured responses without
+  # strict schema validation. Unlike ResponseFormat, JSON mode lets the AI
+  # determine the structure while ensuring valid JSON output.
+  class JsonModeDemo
+    include Raix::ChatCompletion
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.3
+    end
+
+    # Analyze text sentiment with flexible JSON response
+    def analyze_sentiment(text)
+      puts "📝 Text to Analyze:"
+      puts text
+      puts "\n#{"-" * 60}"
+
+      transcript << {
+        system: "You are a sentiment analysis expert. Return your analysis as JSON."
+      }
+      transcript << {
+        user: <<~PROMPT
+          Analyze the sentiment of this text and return a JSON object with:
+          - overall_sentiment (positive/negative/neutral/mixed)
+          - confidence_score (0-1)
+          - key_emotions (array)
+          - summary (brief explanation)
+
+          Text: #{text}
+        PROMPT
+      }
+
+      puts "🔍 Analyzing sentiment...\n\n"
+      result = chat_completion(json: true)
+
+      puts "✅ Analysis Result:"
+      puts JSON.pretty_generate(result)
+      puts
+
+      result
+    end
+
+    # Compare items and return flexible comparison data
+    def compare_items(item1, item2, criteria)
+      puts "⚖️  Comparing: #{item1} vs #{item2}"
+      puts "Criteria: #{criteria.join(", ")}"
+      puts "\n#{"-" * 60}"
+
+      transcript << {
+        system: "You are a comparison expert. Return detailed comparisons as JSON."
+      }
+      transcript << {
+        user: <<~PROMPT
+          Compare #{item1} and #{item2} based on these criteria: #{criteria.join(", ")}.
+
+          Return a JSON object with:
+          - winner (which is better overall)
+          - comparison (object with each criterion as a key, containing scores and notes)
+          - recommendation (who should choose which option)
+        PROMPT
+      }
+
+      puts "🔍 Comparing...\n\n"
+      result = chat_completion(json: true)
+
+      puts "✅ Comparison Result:"
+      puts JSON.pretty_generate(result)
+      puts
+
+      result
+    end
+
+    # Generate a quiz with flexible structure
+    def generate_quiz(topic, num_questions)
+      puts "📚 Generating Quiz on: #{topic}"
+      puts "Number of questions: #{num_questions}"
+      puts "\n#{"-" * 60}"
+
+      transcript << {
+        system: "You create educational quizzes. Return quiz data as JSON."
+      }
+      transcript << {
+        user: <<~PROMPT
+          Create a #{num_questions}-question quiz about #{topic}.
+
+          Return JSON with:
+          - title
+          - questions (array of objects with question, options, correct_answer, explanation)
+          - difficulty_level
+        PROMPT
+      }
+
+      puts "🔍 Creating quiz...\n\n"
+      result = chat_completion(json: true)
+
+      puts "✅ Generated Quiz:"
+      puts JSON.pretty_generate(result)
+      puts
+
+      result
+    end
+
+    # Recipe generation with flexible JSON structure
+    def generate_recipe(dish, dietary_restrictions = [])
+      puts "🍳 Generating Recipe for: #{dish}"
+      puts "Dietary restrictions: #{dietary_restrictions.any? ? dietary_restrictions.join(", ") : "None"}"
+      puts "\n#{"-" * 60}"
+
+      restrictions = dietary_restrictions.any? ? " (#{dietary_restrictions.join(", ")})" : ""
+
+      transcript << {
+        system: "You are a chef. Create recipes as structured JSON."
+      }
+      transcript << {
+        user: <<~PROMPT
+          Create a recipe for #{dish}#{restrictions}.
+
+          Return JSON with whatever structure makes sense, but include:
+          - name, description, servings, prep_time, cook_time
+          - ingredients (with amounts)
+          - instructions (step by step)
+          - nutritional_info (if relevant)
+          - tips
+        PROMPT
+      }
+
+      puts "🔍 Creating recipe...\n\n"
+      result = chat_completion(json: true)
+
+      puts "✅ Generated Recipe:"
+      puts JSON.pretty_generate(result)
+      puts
+
+      result
+    end
+
+    def self.run!
+      demo = new
+
+      puts "🎯 Raix JSON Mode Demo"
+      puts "Flexible structured responses without strict schemas"
+      puts "=" * 60
+      puts "\nNote: JSON mode ensures valid JSON but lets the AI determine structure."
+      puts "Compare with ResponseFormat which enforces a specific schema."
+      puts "=" * 60
+      puts
+
+      # Example 1: Sentiment Analysis
+      puts "Example 1: Sentiment Analysis"
+      puts "=" * 60
+      review = "I absolutely loved this product! The quality exceeded my expectations, " \
+               "though I wish the shipping had been a bit faster. Overall, highly recommended!"
+      demo.analyze_sentiment(review)
+
+      puts "\n#{"=" * 60}\n\n"
+
+      # Example 2: Item Comparison
+      puts "Example 2: Product Comparison"
+      puts "=" * 60
+      demo.compare_items(
+        "MacBook Pro",
+        "Dell XPS 15",
+        %w[performance price portability ecosystem]
+      )
+
+      puts "\n#{"=" * 60}\n\n"
+
+      # Example 3: Quiz Generation
+      puts "Example 3: Quiz Generation"
+      puts "=" * 60
+      demo.generate_quiz("Ruby Programming Basics", 3)
+
+      puts "\n#{"=" * 60}\n\n"
+
+      # Example 4: Recipe Generation
+      puts "Example 4: Recipe Generation"
+      puts "=" * 60
+      demo.generate_recipe("Chocolate Chip Cookies", ["gluten-free"])
+
+      puts "\n#{"=" * 60}"
+      puts "✨ JSON mode demo complete!"
+      puts "\n💡 Key Differences:"
+      puts "   • JSON Mode: Flexible structure, AI decides format"
+      puts "   • ResponseFormat: Strict schema, enforced validation"
+      puts "   • Use JSON mode when you want structured data but flexibility in format"
+      puts "   • Use ResponseFormat when you need exact schema compliance"
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  Examples::JsonModeDemo.run!
+end
diff --git a/examples/live_chat_completion.rb b/examples/live_chat_completion.rb
new file mode 100755
index 0000000..f10ab7c
--- /dev/null
+++ b/examples/live_chat_completion.rb
@@ -0,0 +1,81 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "time"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  class LiveChatCompletion
+    include Raix::ChatCompletion
+    include Raix::FunctionDispatch
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "meta-llama/llama-3.3-8b-instruct:free")
+      config.temperature = 0.2
+    end
+
+    function :current_time, "Return the current UTC time", format: { type: "string" } do |_arguments|
+      Time.now.utc.iso8601
+    end
+
+    def self.run!(prompt: nil, enable_tools: true, **chat_params)
+      new(prompt:, enable_tools:, **chat_params).run!
+    end
+
+    def initialize(prompt: nil, enable_tools: true, **chat_params)
+      ENV["RAIX_EXAMPLE_PROMPT"]&.strip => env_prompt if ENV["RAIX_EXAMPLE_PROMPT"]
+      prompt ||= env_prompt || default_prompt
+
+      transcript << { system: system_instructions }
+      transcript << { user: prompt }
+
+      @enable_tools = enable_tools
+      @chat_params = build_chat_params(chat_params)
+    end
+
+    def run!
+      chat_completion(
+        params: @chat_params,
+        available_tools: @enable_tools ? nil : false
+      )
+    end
+
+    private
+
+    def system_instructions
+      <<~SYSTEM.squish
+        You are a production readiness check. Confirm that Raix can reach the configured
+        model and produce a succinct answer. Use the current_time tool when relevant.
+      SYSTEM
+    end
+
+    def default_prompt
+      <<~PROMPT.squish
+        Explain in one or two sentences how I can confirm Raix is wired up correctly.
+      PROMPT
+    end
+
+    def build_chat_params(overrides)
+      overrides.compact => params
+
+      params[:max_tokens] ||= configuration.max_tokens
+      params[:temperature] ||= configuration.temperature
+
+      params
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  puts Examples::LiveChatCompletion.run!
+end
diff --git a/examples/prompt_caching_demo.rb b/examples/prompt_caching_demo.rb
new file mode 100755
index 0000000..f914c9a
--- /dev/null
+++ b/examples/prompt_caching_demo.rb
@@ -0,0 +1,305 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # Demonstrates prompt caching to reduce costs when repeatedly using
+  # large context (documents, character cards, knowledge bases).
+  # Uses Anthropic's cache control feature.
+  class PromptCachingDemo
+    include Raix::ChatCompletion
+
+    configure do |config|
+      # NOTE: Caching works best with Anthropic models
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "anthropic/claude-3-5-sonnet")
+      config.temperature = 0.5
+    end
+
+    # Large document to be cached (simulating a knowledge base)
+    COMPANY_HANDBOOK = <<~HANDBOOK
+      # Company Employee Handbook
+
+      ## Introduction
+      Welcome to TechCorp! This handbook contains essential information about our company
+      policies, culture, and procedures. Please familiarize yourself with its contents.
+
+      ## Company Values
+      1. Innovation First: We encourage creative thinking and calculated risks
+      2. Customer Success: Our customers' success is our success
+      3. Collaborative Spirit: We work together to achieve great things
+      4. Continuous Learning: We invest in our people's growth
+      5. Work-Life Balance: We believe in sustainable productivity
+
+      ## Work Policies
+
+      ### Remote Work
+      - Employees may work remotely up to 3 days per week
+      - Core hours are 10 AM - 3 PM in your local timezone
+      - Must be available for team meetings during core hours
+      - Home office stipend: $500 annually
+
+      ### Time Off
+      - Vacation: 20 days per year, increasing to 25 after 3 years
+      - Sick Leave: 10 days per year (unused days roll over)
+      - Parental Leave: 16 weeks paid for primary caregiver
+      - Holidays: 10 company holidays plus your birthday
+
+      ### Professional Development
+      - Conference Budget: $2,000 per year
+      - Online Learning: Unlimited access to learning platforms
+      - Mentorship Program: All employees can participate
+      - Innovation Time: 10% of work time for personal projects
+
+      ### Health & Wellness
+      - Health Insurance: Comprehensive coverage, company pays 90%
+      - Dental & Vision: Full coverage included
+      - Mental Health: Unlimited therapy sessions
+      - Gym Membership: $100/month reimbursement
+      - Wellness Days: 4 additional days off per year
+
+      ### Equipment & Tools
+      - Laptop: Choose between Mac or PC, replaced every 3 years
+      - Monitors: Up to 2 external monitors
+      - Accessories: Keyboard, mouse, headphones covered
+      - Software: Any tools needed for your role
+
+      ## Code of Conduct
+      - Treat all colleagues with respect and dignity
+      - Embrace diversity and inclusion
+      - Maintain confidentiality of sensitive information
+      - Report concerns to HR immediately
+      - No tolerance for harassment or discrimination
+
+      ## Performance Reviews
+      - Conducted quarterly with your manager
+      - 360-degree feedback from peers
+      - Clear goals and development plans
+      - Compensation reviews annually
+
+      ## Contact Information
+      - HR: hr@techcorp.com
+      - IT Support: support@techcorp.com
+      - Facilities: facilities@techcorp.com
+      - Emergency: Call security at ext. 911
+
+      Last Updated: January 2024
+      Version 3.2
+    HANDBOOK
+
+    def initialize
+      setup_cached_context
+    end
+
+    # Setup the cached context (this part gets cached)
+    def setup_cached_context
+      # Add the large document with cache control
+      transcript << {
+        role: "system",
+        content: [
+          {
+            type: "text",
+            text: "You are TechCorp's HR assistant. Answer questions based on the employee handbook.",
+            cache_control: { type: "ephemeral" }
+          },
+          {
+            type: "text",
+            text: COMPANY_HANDBOOK,
+            cache_control: { type: "ephemeral" }
+          }
+        ]
+      }
+    end
+
+    def ask_question(question)
+      puts "❓ Question: #{question}"
+
+      transcript << { user: question }
+
+      start_time = Time.now
+      response = chat_completion(params: { cache_at: 1000 })
+      duration = (Time.now - start_time).round(2)
+
+      puts "💬 Answer: #{response}"
+      puts "⏱️  Response time: #{duration}s"
+      puts
+
+      response
+    end
+
+    def self.run!
+      puts "💾 Raix Prompt Caching Demo"
+      puts "Reducing costs with cached context"
+      puts "=" * 60
+      puts
+
+      puts "📚 Context Size: #{COMPANY_HANDBOOK.split.length} words"
+      puts "🎯 Model: Using Anthropic Claude (supports caching)"
+      puts
+      puts "Note: The first request will cache the handbook."
+      puts "      Subsequent requests will use the cache (faster & cheaper)."
+      puts "=" * 60
+      puts
+
+      assistant = new
+
+      # First question - will cache the handbook
+      puts "Request 1: Establishing cache"
+      puts "-" * 60
+      assistant.ask_question("How many vacation days do I get?")
+      puts
+
+      # Second question - should use cache
+      puts "Request 2: Using cached context"
+      puts "-" * 60
+      assistant.ask_question("What is the remote work policy?")
+      puts
+
+      # Third question - should use cache
+      puts "Request 3: Using cached context"
+      puts "-" * 60
+      assistant.ask_question("How much is the professional development budget?")
+      puts
+
+      # Fourth question - should use cache
+      puts "Request 4: Using cached context"
+      puts "-" * 60
+      assistant.ask_question("What are the company values?")
+      puts
+
+      puts "=" * 60
+      puts "✨ Caching demo complete!"
+      puts
+      puts "💡 How Prompt Caching Works:"
+      puts "   • Large context (handbook) is cached after first request"
+      puts "   • Subsequent requests reuse the cached context"
+      puts "   • Only the new user message is processed"
+      puts "   • Results in faster responses and lower costs"
+      puts
+      puts "💰 Cost Savings:"
+      puts "   • First request: Full context processed (~1200 tokens)"
+      puts "   • Cached requests: Only new message processed (~10-20 tokens)"
+      puts "   • ~98% reduction in input token costs for follow-up questions"
+      puts
+      puts "⚡ Performance:"
+      puts "   • Cached requests are typically 2-3x faster"
+      puts "   • Cache is valid for 5 minutes (Anthropic)"
+      puts "   • Perfect for: RAG, chatbots, document analysis"
+    end
+  end
+
+  # Example showing caching with a character card (AI roleplaying)
+  class CharacterCachingDemo
+    include Raix::ChatCompletion
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "anthropic/claude-3-5-sonnet")
+      config.temperature = 0.8
+    end
+
+    CHARACTER_CARD = <<~CARD
+      # Character: Dr. Elena Rodriguez
+
+      ## Background
+      Dr. Elena Rodriguez is a brilliant astrophysicist in her mid-40s who works at
+      the International Space Observatory. She has dedicated her life to studying
+      exoplanets and the search for extraterrestrial life.
+
+      ## Personality
+      - Passionate and enthusiastic about space exploration
+      - Patient teacher who loves explaining complex concepts
+      - Slightly eccentric, often uses space metaphors
+      - Optimistic about humanity's future among the stars
+      - Has a dry sense of humor
+
+      ## Speaking Style
+      - Uses scientific terminology but explains things clearly
+      - Often references astronomical phenomena in conversation
+      - Speaks with quiet confidence and wonder
+      - Makes space puns occasionally
+
+      ## Knowledge Areas
+      - Expert in exoplanet detection and characterization
+      - Deep knowledge of stellar evolution
+      - Understanding of astrobiology and habitability
+      - Familiar with space mission design and technology
+
+      ## Current Projects
+      - Leading the TERRA-FIND mission to discover Earth-like planets
+      - Developing new spectroscopic analysis techniques
+      - Mentoring graduate students in astrophysics
+    CARD
+
+    def initialize
+      transcript << {
+        role: "system",
+        content: [
+          {
+            type: "text",
+            text: "You are roleplaying as this character. Stay in character and respond naturally.",
+            cache_control: { type: "ephemeral" }
+          },
+          {
+            type: "text",
+            text: CHARACTER_CARD,
+            cache_control: { type: "ephemeral" }
+          }
+        ]
+      }
+    end
+
+    def chat(message)
+      transcript << { user: message }
+      chat_completion(params: { cache_at: 1000 })
+    end
+
+    def self.run!
+      puts "\n\n#{"=" * 60}"
+      puts "🎭 Character Caching Demo"
+      puts "=" * 60
+      puts
+
+      character = new
+
+      puts "Character: Dr. Elena Rodriguez (Astrophysicist)"
+      puts "-" * 60
+      puts
+
+      conversation = [
+        "Hello Dr. Rodriguez! What are you working on today?",
+        "What's the most exciting exoplanet you've discovered?",
+        "Do you think we'll find alien life in our lifetime?"
+      ]
+
+      conversation.each_with_index do |message, _i|
+        puts "You: #{message}"
+        response = character.chat(message)
+        puts "Dr. Rodriguez: #{response}"
+        puts
+      end
+
+      puts "=" * 60
+      puts "💡 Character Card Caching Benefits:"
+      puts "   • Large character description cached once"
+      puts "   • Consistent personality across conversation"
+      puts "   • Lower costs for multi-turn conversations"
+      puts "   • Perfect for AI roleplaying and chatbots"
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  # Run both demos
+  Examples::PromptCachingDemo.run!
+  Examples::CharacterCachingDemo.run!
+end
diff --git a/examples/prompt_chain_workflow.rb b/examples/prompt_chain_workflow.rb
new file mode 100755
index 0000000..e8016b8
--- /dev/null
+++ b/examples/prompt_chain_workflow.rb
@@ -0,0 +1,255 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # Demonstrates PromptDeclarations for building multi-step AI workflows
+  # with conditional execution, callbacks, and looping.
+  class PromptChainWorkflow
+    include Raix::ChatCompletion
+    include Raix::PromptDeclarations
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.7
+    end
+
+    attr_accessor :research_topic, :key_points, :outline_approved, :draft_sections
+
+    def initialize(topic)
+      @research_topic = topic
+      @key_points = []
+      @outline_approved = false
+      @draft_sections = []
+    end
+
+    # Step 1: Initial research
+    prompt text: lambda {
+      "Research the topic '#{research_topic}' and identify 3-5 key points that should be covered. " \
+        "List each point clearly."
+    }, success: lambda { |response|
+      puts "✅ Research complete"
+      @key_points = response.split("\n").grep(/^\d+\./).map { |line| line.sub(/^\d+\.\s*/, "") }
+      puts "   Found #{@key_points.length} key points"
+      puts
+    }
+
+    # Step 2: Create outline (only if we have key points)
+    prompt text: lambda {
+      "Based on these key points:\n#{key_points.map.with_index { |p, i| "#{i + 1}. #{p}" }.join("\n")}\n\n" \
+        "Create a detailed outline for an article about #{research_topic}."
+    }, if: -> { key_points.any? }, success: lambda { |response|
+      puts "✅ Outline created"
+      puts "   Validating structure..."
+      @outline_approved = response.include?("Introduction") || response.include?("Overview")
+      puts "   Outline #{outline_approved ? "approved ✓" : "needs revision ✗"}"
+      puts
+    }
+
+    # Step 3: Write introduction (only if outline approved)
+    prompt text: lambda {
+      "Write an engaging introduction for an article about #{research_topic}. " \
+        "Keep it concise (2-3 paragraphs)."
+    }, if: -> { outline_approved }, success: lambda { |response|
+      puts "✅ Introduction written"
+      @draft_sections << { section: "Introduction", content: response }
+      puts "   Added to draft (#{draft_sections.length} sections)"
+      puts
+    }
+
+    # Step 4: Write main content sections
+    prompt text: lambda {
+      "Write the main body section covering these key points:\n" \
+        "#{key_points.map.with_index { |p, i| "#{i + 1}. #{p}" }.join("\n")}\n\n" \
+        "Make it informative and well-structured."
+    }, if: -> { draft_sections.any? }, success: lambda { |response|
+      puts "✅ Main content written"
+      @draft_sections << { section: "Main Content", content: response }
+      puts "   Added to draft (#{draft_sections.length} sections)"
+      puts
+    }
+
+    # Step 5: Write conclusion
+    prompt text: lambda {
+      "Write a conclusion that summarizes the key insights about #{research_topic} " \
+        "and provides a forward-looking perspective."
+    }, if: -> { draft_sections.length >= 2 }, success: lambda { |response|
+      puts "✅ Conclusion written"
+      @draft_sections << { section: "Conclusion", content: response }
+      puts "   Added to draft (#{draft_sections.length} sections)"
+      puts
+    }
+
+    def run!
+      puts "📝 AI Research & Writing Workflow"
+      puts "Topic: #{research_topic}"
+      puts "=" * 60
+      puts
+
+      # Execute the prompt chain
+      execute_prompt_chain
+
+      # Display final article
+      puts "\n#{"=" * 60}"
+      puts "📄 Complete Article"
+      puts "=" * 60
+      puts
+
+      draft_sections.each do |section|
+        puts "### #{section[:section]}"
+        puts
+        puts section[:content]
+        puts
+        puts "-" * 60
+        puts
+      end
+
+      puts "✨ Workflow complete!"
+      puts "   Generated #{draft_sections.length} sections"
+    end
+
+    def self.run!(topic = nil)
+      topic ||= ENV["RESEARCH_TOPIC"] || "The Future of AI in Education"
+      new(topic).run!
+    end
+  end
+
+  # Example 2: Data processing workflow with error handling
+  class DataProcessingWorkflow
+    include Raix::ChatCompletion
+    include Raix::PromptDeclarations
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.3
+    end
+
+    attr_accessor :raw_data, :cleaned_data, :analysis_results, :retry_count
+
+    def initialize(data)
+      @raw_data = data
+      @cleaned_data = nil
+      @analysis_results = {}
+      @retry_count = 0
+    end
+
+    # Step 1: Validate and clean data
+    prompt text: lambda {
+      "Analyze this data and identify any issues or inconsistencies:\n\n#{raw_data}\n\n" \
+        "Return 'CLEAN' if data is valid, or describe the issues found."
+    }, success: lambda { |response|
+      if response.include?("CLEAN")
+        @cleaned_data = raw_data
+        puts "✅ Data validation passed"
+      else
+        puts "⚠️  Data issues found: #{response}"
+        @cleaned_data = raw_data # In real app, you'd fix issues
+      end
+      puts
+    }
+
+    # Step 2: Perform analysis (with retry capability)
+    prompt text: lambda {
+      "Analyze this dataset and provide insights:\n\n#{cleaned_data}\n\n" \
+        "Focus on trends, patterns, and key statistics."
+    }, if: -> { !cleaned_data.nil? }, until: -> { analysis_results.any? || retry_count > 2 }, success: lambda { |response|
+      if response.length > 50 # Simple validation
+        @analysis_results[:insights] = response
+        puts "✅ Analysis complete"
+      else
+        @retry_count += 1
+        puts "⚠️  Analysis insufficient, retry #{retry_count}/3"
+      end
+      puts
+    }
+
+    # Step 3: Generate summary
+    prompt text: lambda {
+      "Based on this analysis:\n\n#{analysis_results[:insights]}\n\n" \
+        "Create a concise executive summary (2-3 sentences)."
+    }, if: -> { analysis_results[:insights] }, success: lambda { |response|
+      @analysis_results[:summary] = response
+      puts "✅ Summary generated"
+      puts
+    }
+
+    def run!
+      puts "📊 Data Processing Workflow"
+      puts "=" * 60
+      puts
+
+      execute_prompt_chain
+
+      puts "=" * 60
+      puts "📈 Final Results"
+      puts "=" * 60
+      puts
+
+      if analysis_results[:summary]
+        puts "Summary:"
+        puts analysis_results[:summary]
+        puts
+        puts "-" * 60
+        puts
+        puts "Full Analysis:"
+        puts analysis_results[:insights]
+      else
+        puts "❌ Analysis failed after #{retry_count} attempts"
+      end
+
+      puts
+      puts "✨ Processing complete!"
+    end
+
+    def self.run!(data = nil)
+      data ||= <<~DATA
+        Sales Data Q1 2024:
+        January: $125,000 (target: $120,000)
+        February: $118,000 (target: $120,000)
+        March: $142,000 (target: $120,000)
+        Total: $385,000 (target: $360,000)
+      DATA
+
+      new(data).run!
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  puts "🎯 Raix Prompt Chain Workflow Demo"
+  puts "Multi-step AI workflows with conditional execution"
+  puts "=" * 60
+  puts
+
+  # Demo 1: Research & Writing Workflow
+  puts "Demo 1: Research & Writing Workflow"
+  puts "=" * 60
+  Examples::PromptChainWorkflow.run!
+
+  puts "\n\n#{"=" * 60}\n\n"
+
+  # Demo 2: Data Processing Workflow
+  puts "Demo 2: Data Processing Workflow"
+  puts "=" * 60
+  Examples::DataProcessingWorkflow.run!
+
+  puts "\n#{"=" * 60}"
+  puts "✨ All workflow demos complete!"
+  puts "\n💡 PromptDeclarations Features Demonstrated:"
+  puts "   • Sequential prompt execution"
+  puts "   • Conditional prompts (if:)"
+  puts "   • Success callbacks for handling responses"
+  puts "   • Retry logic (until:)"
+  puts "   • State management between steps"
+end
diff --git a/examples/rails_initializer_example.rb b/examples/rails_initializer_example.rb
new file mode 100644
index 0000000..a905169
--- /dev/null
+++ b/examples/rails_initializer_example.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+
+# Example Rails initializer for Raix standalone runtime.
+# Place this in config/initializers/raix.rb
+
+Raix.configure do |config|
+  # Provider API keys
+  config.openrouter_api_key = Rails.application.credentials.dig(:open_router, :api_key)
+  config.openai_api_key = Rails.application.credentials.dig(:open_ai, :api_key)
+
+  # Optional OpenAI headers
+  # config.openai_organization_id = Rails.application.credentials.dig(:open_ai, :organization_id)
+  # config.openai_project_id = Rails.application.credentials.dig(:open_ai, :project_id)
+
+  # Runtime defaults
+  config.model = "gpt-4o-mini"
+  config.temperature = 0.2
+  config.max_tokens = 1000
+  config.max_tool_calls = 25
+
+  # Transport behavior
+  config.request_timeout = 120
+  config.open_timeout = 30
+  config.request_retries = 2
+end
+
+# Optional request logging/redaction hook
+Raix.configure do |config|
+  config.before_completion = lambda do |context|
+    Rails.logger.info(
+      event: "raix.chat_completion",
+      model: context.current_model,
+      message_count: context.messages.length
+    )
+    {}
+  end
+end
diff --git a/examples/streaming_chat.rb b/examples/streaming_chat.rb
new file mode 100755
index 0000000..b629eae
--- /dev/null
+++ b/examples/streaming_chat.rb
@@ -0,0 +1,210 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "io/console"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # Demonstrates real-time streaming of AI responses token-by-token.
+  # Perfect for building responsive chat interfaces and showing progress.
+  class StreamingChat
+    include Raix::ChatCompletion
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.7
+    end
+
+    attr_reader :response_text
+
+    def initialize
+      @response_text = ""
+    end
+
+    # Example 1: Simple streaming with character-by-character output
+    def simple_stream(prompt)
+      puts "💬 Prompt: #{prompt}"
+      puts "🤖 Response (streaming):"
+      puts
+
+      @response_text = ""
+
+      # Set the stream handler
+      self.stream = lambda do |chunk|
+        @response_text += chunk
+        print chunk
+        $stdout.flush
+      end
+
+      transcript << { user: prompt }
+      chat_completion
+
+      puts "\n"
+    end
+
+    # Example 2: Streaming with progress indicator
+    def stream_with_progress(prompt)
+      puts "💬 Prompt: #{prompt}"
+      print "🤖 Thinking"
+
+      @response_text = ""
+      started = false
+
+      self.stream = lambda do |chunk|
+        unless started
+          # Clear the "Thinking..." line
+          print "\r#{" " * 20}\r"
+          puts "🤖 Response:"
+          puts
+          started = true
+        end
+
+        @response_text += chunk
+        print chunk
+        $stdout.flush
+      end
+
+      # Show thinking dots while waiting
+      thread = Thread.new do
+        sleep 0.5
+        3.times do
+          print "."
+          $stdout.flush
+          sleep 0.3
+        end
+      end
+
+      transcript << { user: prompt }
+      chat_completion
+
+      thread.kill
+      puts "\n"
+    end
+
+    # Example 3: Streaming with word count and timing
+    def stream_with_metrics(prompt)
+      puts "💬 Prompt: #{prompt}"
+      puts "🤖 Response (with metrics):"
+      puts
+
+      @response_text = ""
+      start_time = Time.now
+      token_count = 0
+
+      self.stream = lambda do |chunk|
+        @response_text += chunk
+        token_count += 1
+        print chunk
+        $stdout.flush
+      end
+
+      transcript << { user: prompt }
+      chat_completion
+
+      end_time = Time.now
+      duration = (end_time - start_time).round(2)
+      word_count = @response_text.split.length
+      tokens_per_second = (token_count / duration).round(1)
+
+      puts "\n"
+      puts "-" * 60
+      puts "📊 Metrics:"
+      puts "   Words: #{word_count} | Tokens: #{token_count} | Time: #{duration}s | Speed: #{tokens_per_second} tok/s"
+      puts
+    end
+
+    # Example 4: Interactive streaming chat
+    def interactive_chat
+      puts "🎮 Interactive Streaming Chat"
+      puts "=" * 60
+      puts "Type your messages below. Type 'exit' to quit."
+      puts "=" * 60
+      puts
+
+      loop do
+        print "You: "
+        user_input = $stdin.gets&.chomp
+        break if user_input.nil? || user_input.downcase == "exit"
+
+        next if user_input.strip.empty?
+
+        print "AI:  "
+        @response_text = ""
+
+        self.stream = lambda do |chunk|
+          @response_text += chunk
+          print chunk
+          $stdout.flush
+        end
+
+        transcript << { user: user_input }
+        chat_completion
+
+        # Save the AI's response to transcript
+        # (streaming mode doesn't auto-save)
+        transcript << { assistant: @response_text }
+
+        puts "\n"
+      end
+
+      puts "\n👋 Chat ended. Goodbye!"
+    end
+
+    def self.run!
+      chat = new
+
+      puts "🌊 Raix Streaming Chat Demo"
+      puts "Real-time token-by-token response generation"
+      puts "=" * 60
+      puts
+
+      # Example 1: Simple streaming
+      puts "Example 1: Basic Streaming"
+      puts "-" * 60
+      chat.simple_stream("Write a haiku about Ruby programming")
+      puts
+
+      sleep 1
+
+      # Example 2: Streaming with progress indicator
+      puts "Example 2: Streaming with Progress Indicator"
+      puts "-" * 60
+      chat.stream_with_progress("Explain what makes a good software engineer in 2-3 sentences")
+      puts
+
+      sleep 1
+
+      # Example 3: Streaming with metrics
+      puts "Example 3: Streaming with Performance Metrics"
+      puts "-" * 60
+      chat.stream_with_metrics("List 5 benefits of using Ruby on Rails")
+      puts
+
+      # Example 4: Interactive chat (optional)
+      if ARGV.include?("--interactive")
+        sleep 1
+        puts "\n#{"=" * 60}\n"
+        chat.interactive_chat
+      else
+        puts "\n#{"=" * 60}"
+        puts "✨ Streaming demo complete!"
+        puts "\nTip: Run with --interactive flag for an interactive chat session:"
+        puts "     bundle exec ruby examples/streaming_chat.rb --interactive"
+      end
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  Examples::StreamingChat.run!
+end
diff --git a/examples/structured_data_extraction.rb b/examples/structured_data_extraction.rb
new file mode 100755
index 0000000..deaeb42
--- /dev/null
+++ b/examples/structured_data_extraction.rb
@@ -0,0 +1,205 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "json"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # Demonstrates the ResponseFormat feature for extracting structured data
+  # from unstructured text using a strict JSON schema.
+  class StructuredDataExtraction
+    include Raix::ChatCompletion
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.1 # Low temperature for consistent extraction
+    end
+
+    # Extract person information from text
+    def extract_person(text)
+      puts "📄 Input Text:"
+      puts text
+      puts "\n#{"=" * 60}"
+
+      # Define the schema for person data
+      format = Raix::ResponseFormat.new("Person", {
+                                          full_name: { type: "string" },
+                                          age: { type: "integer" },
+                                          email: { type: "string" },
+                                          occupation: { type: "string" },
+                                          skills: ["string"],
+                                          experience_years: { type: "integer" }
+                                        })
+
+      transcript << {
+        system: "You are a data extraction assistant. Extract information accurately from the provided text."
+      }
+      transcript << {
+        user: "Extract the person's information from this text:\n\n#{text}"
+      }
+
+      puts "🔍 Extracting structured data...\n\n"
+      result = chat_completion(params: { response_format: format })
+
+      puts "✅ Extracted Data:"
+      puts JSON.pretty_generate(result)
+      puts
+
+      result
+    end
+
+    # Extract product information from a description
+    def extract_product(description)
+      puts "📦 Product Description:"
+      puts description
+      puts "\n#{"=" * 60}"
+
+      # Define schema for product data
+      format = Raix::ResponseFormat.new("Product", {
+                                          name: { type: "string" },
+                                          category: { type: "string" },
+                                          price: { type: "number" },
+                                          currency: { type: "string" },
+                                          features: ["string"],
+                                          specifications: {
+                                            type: "object",
+                                            properties: {
+                                              dimensions: { type: "string" },
+                                              weight: { type: "string" },
+                                              color: { type: "string" }
+                                            }
+                                          },
+                                          in_stock: { type: "boolean" }
+                                        })
+
+      transcript << {
+        system: "Extract product information from descriptions. If information is missing, use null."
+      }
+      transcript << {
+        user: "Extract product details:\n\n#{description}"
+      }
+
+      puts "🔍 Extracting product data...\n\n"
+      result = chat_completion(params: { response_format: format })
+
+      puts "✅ Extracted Product:"
+      puts JSON.pretty_generate(result)
+      puts
+
+      result
+    end
+
+    # Extract meeting notes into structured format
+    def extract_meeting_notes(notes)
+      puts "📝 Meeting Notes:"
+      puts notes
+      puts "\n#{"=" * 60}"
+
+      format = Raix::ResponseFormat.new("MeetingNotes", {
+                                          date: { type: "string" },
+                                          attendees: ["string"],
+                                          topics_discussed: ["string"],
+                                          action_items: [
+                                            {
+                                              type: "object",
+                                              properties: {
+                                                task: { type: "string" },
+                                                assignee: { type: "string" },
+                                                due_date: { type: "string" }
+                                              }
+                                            }
+                                          ],
+                                          decisions_made: ["string"],
+                                          next_meeting: { type: "string" }
+                                        })
+
+      transcript << {
+        system: "Extract structured information from meeting notes. Be thorough and accurate."
+      }
+      transcript << {
+        user: "Structure these meeting notes:\n\n#{notes}"
+      }
+
+      puts "🔍 Structuring meeting notes...\n\n"
+      result = chat_completion(params: { response_format: format })
+
+      puts "✅ Structured Notes:"
+      puts JSON.pretty_generate(result)
+      puts
+
+      result
+    end
+
+    def self.run!
+      extractor = new
+
+      puts "🎯 Raix Structured Data Extraction Demo"
+      puts "Using ResponseFormat for schema-validated JSON output"
+      puts "=" * 60
+      puts
+
+      # Example 1: Person data extraction
+      puts "Example 1: Extract Person Information"
+      puts "-" * 60
+      person_text = <<~TEXT
+        John Smith is a 32-year-old senior software engineer at TechCorp.
+        He has 8 years of experience and specializes in Ruby, Python, and JavaScript.
+        His email is john.smith@example.com and he's passionate about AI and automation.
+      TEXT
+      extractor.extract_person(person_text)
+
+      puts "\n#{"=" * 60}\n\n"
+
+      # Example 2: Product data extraction
+      puts "Example 2: Extract Product Information"
+      puts "-" * 60
+      product_text = <<~TEXT
+        The UltraBook Pro 15 is a premium laptop in the Electronics category.
+        Priced at $1,299 USD, it features a stunning 15.6" display, 16GB RAM,
+        and 512GB SSD storage. The sleek aluminum chassis comes in Space Gray,
+        weighs just 3.5 lbs, and measures 14 x 9.7 x 0.6 inches. Currently in stock
+        with free shipping.
+      TEXT
+      extractor.extract_product(product_text)
+
+      puts "\n#{"=" * 60}\n\n"
+
+      # Example 3: Meeting notes extraction
+      puts "Example 3: Extract Meeting Notes"
+      puts "-" * 60
+      meeting_text = <<~TEXT
+        Team meeting on December 15, 2023. Present: Alice, Bob, and Carol.
+
+        We discussed the Q4 roadmap and decided to prioritize the API refactoring project.
+        Alice will update the documentation by December 20th.
+        Bob agreed to review the security audit findings and present recommendations
+        at our next meeting on December 22nd.
+        Carol will coordinate with the design team for the new dashboard mockups,
+        due before year-end.
+
+        We also covered the budget allocation for next quarter and decided to
+        increase the infrastructure budget by 15%.
+      TEXT
+      extractor.extract_meeting_notes(meeting_text)
+
+      puts "\n#{"=" * 60}"
+      puts "✨ All extractions complete!"
+      puts "\nNote: ResponseFormat ensures the AI returns data matching your exact schema."
+      puts "This is perfect for data pipelines, form filling, and structured analysis."
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  Examples::StructuredDataExtraction.run!
+end
diff --git a/examples/trivia_game.rb b/examples/trivia_game.rb
new file mode 100755
index 0000000..515f326
--- /dev/null
+++ b/examples/trivia_game.rb
@@ -0,0 +1,112 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "dotenv"
+require "raix"
+
+# Load environment variables from .env file
+Dotenv.load
+
+# Configure Raix with API keys
+Raix.configure do |config|
+  config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
+  config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
+end
+
+module Examples
+  # A trivia game that uses AI to answer true/false questions
+  class TriviaGame
+    include Raix::Predicate
+
+    configure do |config|
+      config.model = ENV.fetch("RAIX_EXAMPLE_MODEL", "gpt-4o-mini")
+      config.temperature = 0.3
+    end
+
+    attr_reader :score, :questions_asked
+
+    def initialize
+      @score = 0
+      @questions_asked = 0
+    end
+
+    # Define what happens when the answer is "yes"
+    yes? do |response|
+      @questions_asked += 1
+      puts "\n✓ CORRECT! #{response.sub(/^yes,\s*/i, "")}"
+      @score += 1
+    end
+
+    # Define what happens when the answer is "no"
+    no? do |response|
+      @questions_asked += 1
+      puts "\n✗ INCORRECT! #{response.sub(/^no,\s*/i, "")}"
+    end
+
+    # Define what happens when the answer is "maybe"
+    maybe? do |response|
+      @questions_asked += 1
+      puts "\n? UNCLEAR! #{response.sub(/^maybe,\s*/i, "")}"
+    end
+
+    def play(questions)
+      puts "🎮 Welcome to AI Trivia Game!"
+      puts "=" * 50
+
+      questions.each_with_index do |question, index|
+        puts "\nQuestion #{index + 1}: #{question}"
+        print "Your answer (true/false): "
+        user_answer = $stdin.gets.chomp.downcase
+
+        # Convert user's answer to a question for the AI
+        ai_question = "Is this statement true or false: #{question}. The user answered '#{user_answer}'. Is the user correct?"
+
+        ask(ai_question)
+      end
+
+      show_results
+    end
+
+    def show_results
+      puts "\n#{"=" * 50}"
+      puts "🏆 Final Score: #{score}/#{questions_asked}"
+      percentage = (score.to_f / questions_asked * 100).round
+      puts "📊 Percentage: #{percentage}%"
+
+      case percentage
+      when 90..100
+        puts "🌟 Outstanding! You're a trivia master!"
+      when 70..89
+        puts "😊 Great job! Keep it up!"
+      when 50..69
+        puts "🤔 Not bad! Room for improvement."
+      else
+        puts "📚 Better luck next time!"
+      end
+    end
+
+    def self.run!(questions: nil)
+      questions ||= default_questions
+      new.play(questions)
+    end
+
+    def self.default_questions
+      [
+        "Ruby was created by Yukihiro Matsumoto",
+        "Python was released before Ruby",
+        "The Ruby programming language is named after the gemstone",
+        "Rails was created in 2004",
+        "Ruby uses curly braces for blocks exclusively"
+      ]
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  # You can provide custom questions or use the defaults
+  if ARGV.any?
+    Examples::TriviaGame.run!(questions: ARGV)
+  else
+    Examples::TriviaGame.run!
+  end
+end
diff --git a/lib/raix.rb b/lib/raix.rb
index 59eff59..bc7f5d8 100644
--- a/lib/raix.rb
+++ b/lib/raix.rb
@@ -1,12 +1,9 @@
 # frozen_string_literal: true
 
-require "ruby_llm"
-
 require_relative "raix/completion_context"
 require_relative "raix/configuration"
 require_relative "raix/version"
-require_relative "raix/transcript_adapter"
-require_relative "raix/function_tool_adapter"
+require_relative "raix/transcript_store"
 require_relative "raix/chat_completion"
 require_relative "raix/function_dispatch"
 require_relative "raix/prompt_declarations"
diff --git a/lib/raix/chat_completion.rb b/lib/raix/chat_completion.rb
index 87a4bf4..8add426 100644
--- a/lib/raix/chat_completion.rb
+++ b/lib/raix/chat_completion.rb
@@ -2,43 +2,23 @@
 
 require "active_support/concern"
 require "active_support/core_ext/object/blank"
-require "active_support/core_ext/string/filters"
 require "active_support/core_ext/hash/indifferent_access"
-require "ruby_llm"
 
 require_relative "message_adapters/base"
-require_relative "transcript_adapter"
-require_relative "function_tool_adapter"
+require_relative "transcript_store"
+require_relative "runtime/errors"
+require_relative "runtime/transport"
+require_relative "runtime/stream_parser"
+require_relative "runtime/stream_accumulator"
+require_relative "runtime/providers/base"
+require_relative "runtime/providers/openai"
+require_relative "runtime/providers/openrouter"
+require_relative "runtime/client"
 
 module Raix
   class UndeclaredToolError < StandardError; end
 
-  # The `ChatCompletion` module is a Rails concern that provides a way to interact
-  # with the OpenRouter Chat Completion API via its client. The module includes a few
-  # methods that allow you to build a transcript of messages and then send them to
-  # the API for completion. The API will return a response that you can use however
-  # you see fit.
-  #
-  # When the AI responds with tool function calls instead of a text message, this
-  # module automatically:
-  # 1. Executes the requested tool functions
-  # 2. Adds the function results to the conversation transcript
-  # 3. Sends the updated transcript back to the AI for another completion
-  # 4. Repeats this process until the AI responds with a regular text message
-  #
-  # This automatic continuation ensures that tool calls are seamlessly integrated
-  # into the conversation flow. The AI can use tool results to formulate its final
-  # response to the user. You can limit the number of tool calls using the
-  # `max_tool_calls` parameter to prevent excessive function invocations.
-  #
-  # Tool functions must be defined on the class that includes this module. The
-  # `FunctionDispatch` module provides a Rails-like DSL for declaring these
-  # functions at the class level, which is cleaner than implementing them as
-  # instance methods.
-  #
-  # Note that some AI models can make multiple tool function calls in a single
-  # response. When that happens, the module executes all requested functions
-  # before continuing the conversation.
+  # Chat completion concern with tool orchestration and hook support.
   module ChatCompletion
     extend ActiveSupport::Concern
 
@@ -48,35 +28,94 @@ module ChatCompletion
                   :available_tools, :tool_choice, :provider, :max_tool_calls, :stop_tool_calls_and_respond
 
     class_methods do
-      # Returns the current configuration of this class. Falls back to global configuration for unset values.
       def configuration
         @configuration ||= Configuration.new(fallback: Raix.configuration)
       end
 
-      # Let's you configure the class-level configuration using a block.
       def configure
         yield(configuration)
       end
     end
 
-    # Instance level access to the class-level configuration.
     def configuration
       self.class.configuration
     end
 
-    # This method performs chat completion based on the provided transcript and parameters.
-    #
-    # @param params [Hash] The parameters for chat completion.
-    # @option loop [Boolean] :loop (false) DEPRECATED - The system now automatically continues after tool calls.
-    # @option params [Boolean] :json (false) Whether to return the parse the response as a JSON object. Will search for <json> tags in the response first, then fall back to the default JSON parsing of the entire response.
-    # @option params [String] :openai (nil) If non-nil, use OpenAI with the model specified in this param.
-    # @option params [Boolean] :raw (false) Whether to return the raw response or dig the text content.
-    # @option params [Array] :messages (nil) An array of messages to use instead of the transcript.
-    # @option tools [Array|false] :available_tools (nil) Tools to pass to the LLM. Ignored if nil (default). If false, no tools are passed. If an array, only declared tools in the array are passed.
-    # @option max_tool_calls [Integer] :max_tool_calls Maximum number of tool calls before forcing a text response. Defaults to the configured value.
-    # @return [String|Hash] The completed chat response.
     def chat_completion(params: {}, loop: false, json: false, raw: false, openai: nil, save_response: true, messages: nil, available_tools: nil, max_tool_calls: nil)
-      # set params to default values if not provided
+      params = build_request_params(params.dup, available_tools:)
+      json = true if params[:response_format].is_a?(Raix::ResponseFormat)
+      params[:response_format] = params[:response_format].to_schema if params[:response_format].is_a?(Raix::ResponseFormat)
+      params = apply_json_mode_params(params, json:, openai:)
+
+      warn_deprecated_loop(loop)
+      self.max_tool_calls = max_tool_calls || configuration.max_tool_calls
+      @stop_tool_calls_and_respond = false
+      self.model ||= configuration.model
+
+      adapter = MessageAdapters::Base.new(self)
+      messages ||= transcript.flatten.compact
+      messages = messages.map { |msg| adapter.transform(msg) }.dup
+      raise "Can't complete an empty transcript" if messages.blank?
+
+      run_before_completion_hooks(params, messages)
+
+      retry_count = 0
+      tool_call_count = 0
+      content = nil
+
+      begin
+        response = execute_runtime_request(params:, model: openai || model, messages:, openai_override: openai)
+        return if stream && response.blank?
+
+        Thread.current[:chat_completion_response] = response.is_a?(Hash) ? response.with_indifferent_access : response
+
+        tool_calls = response.dig("choices", 0, "message", "tool_calls") || []
+        if tool_calls.any?
+          tool_call_count += tool_calls.size
+          return handle_tool_calls(tool_calls:, tool_call_count:, params:, json:, raw:, openai:, save_response:, available_tools:)
+        end
+
+        content = response.dig("choices", 0, "message", "content")
+        transcript << { assistant: content } if save_response
+        return response if raw
+
+        content = content.to_s.strip
+        return parse_json_response_content(content) if json
+
+        content
+      rescue JSON::ParserError => e
+        if json && retry_count < 3
+          retry_count += 1
+          warn "Retrying JSON response parse (#{retry_count}/3): #{e.message}"
+          sleep retry_count
+          retry
+        end
+
+        warn "Bad JSON received: #{content}"
+        raise e
+      rescue Faraday::BadRequestError => e
+        warn "Chat completion failed: #{e.response&.dig(:body) || e.message}"
+        raise e
+      end
+    end
+
+    # Transcript array for this conversation.
+    def transcript
+      @transcript ||= TranscriptStore.new
+    end
+
+    # Dispatches a tool function call. Override for custom behavior.
+    def dispatch_tool_function(function_name, arguments, cache: nil)
+      public_send(function_name, arguments, cache)
+    end
+
+    private
+
+    def runtime_client
+      @runtime_client ||= Runtime::Client.new(configuration:)
+    end
+
+    def build_request_params(params, available_tools:)
       params[:cache_at] ||= cache_at.presence
       params[:frequency_penalty] ||= frequency_penalty.presence
       params[:logit_bias] ||= logit_bias.presence
@@ -105,206 +144,87 @@ def chat_completion(params: {}, loop: false, json: false, raw: false, openai: ni
       params[:top_logprobs] ||= top_logprobs.presence
       params[:top_p] ||= top_p.presence
 
-      json = true if params[:response_format].is_a?(Raix::ResponseFormat)
+      params
+    end
 
-      if json
-        unless openai
-          params[:provider] ||= {}
-          params[:provider][:require_parameters] = true
-        end
-        if params[:response_format].blank?
-          params[:response_format] ||= {}
-          params[:response_format][:type] = "json_object"
-        end
-      end
+    def apply_json_mode_params(params, json:, openai:)
+      return params unless json
 
-      # Deprecation warning for loop parameter
-      if loop
-        warn "\n\nWARNING: The 'loop' parameter is DEPRECATED and will be ignored.\nChat completions now automatically continue after tool calls until the AI provides a text response.\nUse 'max_tool_calls' to limit the number of tool calls (default: #{configuration.max_tool_calls}).\n\n"
+      unless openai
+        params[:provider] ||= {}
+        params[:provider][:require_parameters] = true
       end
+      if params[:response_format].blank?
+        params[:response_format] ||= {}
+        params[:response_format][:type] = "json_object"
+      end
+      params
+    end
 
-      # Set max_tool_calls from parameter or configuration default
-      self.max_tool_calls = max_tool_calls || configuration.max_tool_calls
-
-      # Reset stop_tool_calls_and_respond flag
-      @stop_tool_calls_and_respond = false
-
-      # Track tool call count
-      tool_call_count = 0
-
-      # set the model to the default if not provided
-      self.model ||= configuration.model
-
-      adapter = MessageAdapters::Base.new(self)
-
-      # duplicate the transcript to avoid race conditions in situations where
-      # chat_completion is called multiple times in parallel
-      # TODO: Defensive programming, ensure messages is an array
-      messages ||= transcript.flatten.compact
-      messages = messages.map { |msg| adapter.transform(msg) }.dup
-      raise "Can't complete an empty transcript" if messages.blank?
-
-      # Run before_completion hooks (global -> class -> instance)
-      # Hooks can modify params and messages for logging, filtering, PII redaction, etc.
-      run_before_completion_hooks(params, messages)
-
-      begin
-        response = ruby_llm_request(params:, model: openai || model, messages:, openai_override: openai)
-        retry_count = 0
-        content = nil
-
-        # no need for additional processing if streaming
-        return if stream && response.blank?
-
-        # tuck the full response into a thread local in case needed
-        Thread.current[:chat_completion_response] = response.is_a?(Hash) ? response.with_indifferent_access : response
-
-        # TODO: add a standardized callback hook for usage events
-        # broadcast(:usage_event, usage_subject, self.class.name.to_s, response, premium?)
-
-        tool_calls = response.dig("choices", 0, "message", "tool_calls") || []
-        if tool_calls.any?
-          tool_call_count += tool_calls.size
-
-          # Check if we've exceeded max_tool_calls
-          if tool_call_count > self.max_tool_calls
-            # Add system message about hitting the limit
-            messages << { role: "system", content: "Maximum tool calls (#{self.max_tool_calls}) exceeded. Please provide a final response to the user without calling any more tools." }
-
-            # Force a final response without tools
-            params[:tools] = nil
-            response = ruby_llm_request(params:, model: openai || model, messages:, openai_override: openai)
-
-            # Process the final response
-            content = response.dig("choices", 0, "message", "content")
-            transcript << { assistant: content } if save_response
-            return raw ? response : content.strip
-          end
-
-          # Dispatch tool calls
-          tool_calls.each do |tool_call| # TODO: parallelize this?
-            # dispatch the called function
-            function_name = tool_call["function"]["name"]
-            arguments = JSON.parse(tool_call["function"]["arguments"].presence || "{}")
-            raise "Unauthorized function call: #{function_name}" unless self.class.functions.map { |f| f[:name].to_sym }.include?(function_name.to_sym)
-
-            dispatch_tool_function(function_name, arguments.with_indifferent_access)
-          end
-
-          # After executing tool calls, we need to continue the conversation
-          # to let the AI process the results and provide a text response.
-          # We continue until the AI responds with a regular assistant message
-          # (not another tool call request), unless stop_tool_calls_and_respond! was called.
-
-          # Use the updated transcript for the next call, not the original messages
-          updated_messages = transcript.flatten.compact
-          last_message = updated_messages.last
-
-          if !@stop_tool_calls_and_respond && (last_message[:role] != "assistant" || last_message[:tool_calls].present?)
-            # Send the updated transcript back to the AI
-            return chat_completion(
-              params:,
-              json:,
-              raw:,
-              openai:,
-              save_response:,
-              messages: nil, # Use transcript instead
-              available_tools:,
-              max_tool_calls: self.max_tool_calls - tool_call_count
-            )
-          elsif @stop_tool_calls_and_respond
-            # If stop_tool_calls_and_respond was set, force a final response without tools
-            params[:tools] = nil
-            response = ruby_llm_request(params:, model: openai || model, messages:, openai_override: openai)
-
-            content = response.dig("choices", 0, "message", "content")
-            transcript << { assistant: content } if save_response
-            return raw ? response : content.strip
-          end
-        end
-
-        response.tap do |res|
-          content = res.dig("choices", 0, "message", "content")
+    def warn_deprecated_loop(loop)
+      return unless loop
 
-          transcript << { assistant: content } if save_response
-          content = content.strip
+      warn "\n\nWARNING: The 'loop' parameter is DEPRECATED and will be ignored.\nChat completions now automatically continue after tool calls until the AI provides a text response.\nUse 'max_tool_calls' to limit the number of tool calls (default: #{configuration.max_tool_calls}).\n\n"
+    end
 
-          if json
-            # Make automatic JSON parsing available to non-OpenAI providers that don't support the response_format parameter
-            content = content.match(%r{<json>(.*?)</json>}m)[1] if content.include?("<json>")
+    def handle_tool_calls(tool_calls:, tool_call_count:, params:, json:, raw:, openai:, save_response:, available_tools:)
+      if tool_call_count > max_tool_calls
+        messages = transcript.flatten.compact.map { |msg| MessageAdapters::Base.new(self).transform(msg) }
+        messages << { role: "system", content: "Maximum tool calls (#{max_tool_calls}) exceeded. Please provide a final response to the user without calling any more tools." }
+        params[:tools] = nil
 
-            return JSON.parse(content)
-          end
+        final_response = execute_runtime_request(params:, model: openai || model, messages:, openai_override: openai)
+        content = final_response.dig("choices", 0, "message", "content")
+        transcript << { assistant: content } if save_response
+        return raw ? final_response : content.to_s.strip
+      end
 
-          return content unless raw
-        end
-      rescue JSON::ParserError => e
-        if e.message.include?("not a valid") # blank JSON
-          warn "Retrying blank JSON response... (#{retry_count} attempts) #{e.message}"
-          retry_count += 1
-          sleep 1 * retry_count # backoff
-          retry if retry_count < 3
+      tool_calls.each do |tool_call|
+        function_name = tool_call.dig("function", "name")
+        arguments = JSON.parse(tool_call.dig("function", "arguments").presence || "{}")
 
-          raise e # just fail if we can't get content after 3 attempts
-        end
+        declared = Array(self.class.functions).map { |f| f[:name].to_sym }
+        raise "Unauthorized function call: #{function_name}" unless declared.include?(function_name.to_sym)
 
-        warn "Bad JSON received!!!!!!: #{content}"
-        raise e
-      rescue Faraday::BadRequestError => e
-        # make sure we see the actual error message on console or Honeybadger
-        warn "Chat completion failed!!!!!!!!!!!!!!!!: #{e.response[:body]}"
-        raise e
+        dispatch_tool_function(function_name, arguments.with_indifferent_access)
       end
-    end
 
-    # This method returns the transcript array.
-    # Manually add your messages to it in the following abbreviated format
-    # before calling `chat_completion`.
-    #
-    # { system: "You are a pumpkin" },
-    # { user: "Hey what time is it?" },
-    # { assistant: "Sorry, pumpkins do not wear watches" }
-    #
-    # to add a function call use the following format:
-    # { function: { name: 'fancy_pants_function', arguments: { param: 'value' } } }
-    #
-    # to add a function result use the following format:
-    # { function: result, name: 'fancy_pants_function' }
-    #
-    # @return [Array] The transcript array.
-    def transcript
-      @transcript ||= TranscriptAdapter.new(ruby_llm_chat)
-    end
+      updated_messages = transcript.flatten.compact
+      last_message = updated_messages.last
+
+      if !@stop_tool_calls_and_respond && (last_message[:role] != "assistant" || last_message[:tool_calls].present?)
+        return chat_completion(
+          params:,
+          json:,
+          raw:,
+          openai:,
+          save_response:,
+          messages: nil,
+          available_tools:,
+          max_tool_calls: max_tool_calls - tool_call_count
+        )
+      end
 
-    # Returns the RubyLLM::Chat instance for this conversation
-    def ruby_llm_chat
-      @ruby_llm_chat ||= begin
-        model_id = model || configuration.model
+      return unless @stop_tool_calls_and_respond
 
-        # Determine provider based on model format or explicit openai flag
-        provider = if model_id.to_s.start_with?("openai/") || model_id.to_s.match?(/^gpt-/)
-                     :openai
-                   else
-                     :openrouter
-                   end
+      continuation_messages = updated_messages.map { |msg| MessageAdapters::Base.new(self).transform(msg) }
+      params[:tools] = nil
+      final_response = execute_runtime_request(params:, model: openai || model, messages: continuation_messages, openai_override: openai)
+      content = final_response.dig("choices", 0, "message", "content")
+      transcript << { assistant: content } if save_response
+      raw ? final_response : content.to_s.strip
+    end
 
-        RubyLLM.chat(model: model_id, provider:, assume_model_exists: true)
+    def parse_json_response_content(content)
+      extracted = content
+      if extracted.include?("<json>")
+        match = extracted.match(%r{<json>(.*?)</json>}m)
+        extracted = match[1] if match
       end
-    end
 
-    # Dispatches a tool function call with the given function name and arguments.
-    # This method can be overridden in subclasses to customize how function calls are handled.
-    #
-    # @param function_name [String] The name of the function to call
-    # @param arguments [Hash] The arguments to pass to the function
-    # @param cache [ActiveSupport::Cache] Optional cache object
-    # @return [Object] The result of the function call
-    def dispatch_tool_function(function_name, arguments, cache: nil)
-      public_send(function_name, arguments, cache)
+      JSON.parse(extracted)
     end
 
-    private
-
     def filtered_tools(tool_names)
       return nil if tool_names.blank?
 
@@ -326,112 +246,28 @@ def run_before_completion_hooks(params, messages)
 
       return if hooks.empty?
 
-      context = CompletionContext.new(
-        chat_completion: self,
-        messages:,
-        params:
-      )
+      context = CompletionContext.new(chat_completion: self, messages:, params:)
 
       hooks.each do |hook|
         result = hook.call(context) if hook.respond_to?(:call)
         next unless result.is_a?(Hash)
 
-        # Handle model separately since it's passed as a keyword arg to ruby_llm_request
         self.model = result[:model] if result.key?(:model)
         params.merge!(result.compact)
       end
     end
 
-    def ruby_llm_request(params:, model:, messages:, openai_override: nil)
-      # Create a temporary chat instance for this request
-      provider = determine_provider(model, openai_override)
-      chat = RubyLLM.chat(model:, provider:, assume_model_exists: true)
-
-      # Apply messages to the chat
-      # Track if we have a user message to determine how to call ask
-      has_user_message = false
-
-      messages.each do |msg|
-        role = msg[:role] || msg["role"]
-        content = msg[:content] || msg["content"]
-
-        case role.to_s
-        when "system"
-          chat.with_instructions(content)
-        when "user"
-          has_user_message = true
-          chat.add_message(role: :user, content:)
-        when "assistant"
-          if msg[:tool_calls] || msg["tool_calls"]
-            chat.add_message(role: :assistant, content:, tool_calls: msg[:tool_calls] || msg["tool_calls"])
-          else
-            chat.add_message(role: :assistant, content:)
-          end
-        when "tool"
-          chat.add_message(
-            role: :tool,
-            content:,
-            tool_call_id: msg[:tool_call_id] || msg["tool_call_id"]
-          )
-        end
-      end
-
-      # Apply configuration parameters
-      chat.with_temperature(params[:temperature]) if params[:temperature]
-
-      # Apply additional params (RubyLLM with_params expects keyword args)
-      additional_params = params.compact.except(:temperature, :tools, :max_tokens, :max_completion_tokens)
-      chat.with_params(**additional_params) if additional_params.any?
-
-      # Handle tools - convert Raix function declarations to RubyLLM tools
-      if params[:tools].present? && respond_to?(:class) && self.class.respond_to?(:functions)
-        ruby_llm_tools = FunctionToolAdapter.convert_tools_for_ruby_llm(self)
-        ruby_llm_tools.each { |tool| chat.with_tool(tool) }
-      end
-
-      # Execute the completion
-      if stream.present?
-        # Streaming mode
-        if has_user_message
-          chat.complete(&stream)
-        else
-          chat.ask(&stream)
-        end
-        nil # Return nil for streaming as per original behavior
-      else
-        # Non-streaming mode - return OpenAI-compatible response format
-        response_message = has_user_message ? chat.complete : chat.ask
-
-        # Convert RubyLLM response to OpenAI format for compatibility
-        {
-          "choices" => [
-            {
-              "message" => {
-                "role" => "assistant",
-                "content" => response_message.content,
-                "tool_calls" => response_message.tool_calls
-              },
-              "finish_reason" => response_message.tool_call? ? "tool_calls" : "stop"
-            }
-          ],
-          "usage" => {
-            "prompt_tokens" => response_message.input_tokens,
-            "completion_tokens" => response_message.output_tokens,
-            "total_tokens" => (response_message.input_tokens || 0) + (response_message.output_tokens || 0)
-          }
-        }
-      end
-    rescue StandardError => e
-      warn "RubyLLM request failed: #{e.message}"
+    def execute_runtime_request(params:, model:, messages:, openai_override:)
+      runtime_client.complete(
+        model:,
+        messages:,
+        params: params.compact,
+        stream:,
+        openai_override:
+      )
+    rescue Runtime::Error => e
+      warn e.message
       raise e
     end
-
-    def determine_provider(model, openai_override)
-      return :openai if openai_override
-      return :openai if model.to_s.match?(/^gpt-/) || model.to_s.match?(/^o\d/)
-
-      # Default to openrouter for model IDs with provider prefix
-      :openrouter
-    end
   end
 end
diff --git a/lib/raix/configuration.rb b/lib/raix/configuration.rb
index bb8859c..0d8afb4 100644
--- a/lib/raix/configuration.rb
+++ b/lib/raix/configuration.rb
@@ -30,19 +30,27 @@ def self.attr_accessor_with_fallback(method_name)
     # is normally set in each class that includes the ChatCompletion module.
     attr_accessor_with_fallback :model
 
-    # DEPRECATED: Use ruby_llm_config.openrouter_api_key instead
+    # DEPRECATED: Prefer openrouter_api_key
     attr_accessor_with_fallback :openrouter_client
 
-    # DEPRECATED: Use ruby_llm_config.openai_api_key instead
+    # DEPRECATED: Prefer openai_api_key
     attr_accessor_with_fallback :openai_client
 
+    # Native API configuration for the standalone runtime.
+    attr_accessor_with_fallback :openai_api_key
+    attr_accessor_with_fallback :openrouter_api_key
+    attr_accessor_with_fallback :openai_organization_id
+    attr_accessor_with_fallback :openai_project_id
+    attr_accessor_with_fallback :openai_base_url
+    attr_accessor_with_fallback :openrouter_base_url
+    attr_accessor_with_fallback :request_timeout
+    attr_accessor_with_fallback :open_timeout
+    attr_accessor_with_fallback :request_retries
+
     # The max_tool_calls option determines the maximum number of tool calls
     # before forcing a text response to prevent excessive function invocations.
     attr_accessor_with_fallback :max_tool_calls
 
-    # Access to RubyLLM configuration
-    attr_accessor_with_fallback :ruby_llm_config
-
     # A callable hook that runs before each chat completion request.
     # Receives a CompletionContext and can modify params and messages.
     # Use for: dynamic parameter resolution, logging, content filtering, PII redaction, etc.
@@ -53,6 +61,9 @@ def self.attr_accessor_with_fallback(method_name)
     DEFAULT_MODEL = "meta-llama/llama-3.3-8b-instruct:free"
     DEFAULT_TEMPERATURE = 0.0
     DEFAULT_MAX_TOOL_CALLS = 25
+    DEFAULT_REQUEST_TIMEOUT = 120
+    DEFAULT_OPEN_TIMEOUT = 30
+    DEFAULT_REQUEST_RETRIES = 2
 
     # Initializes a new instance of the Configuration class with default values.
     def initialize(fallback: nil)
@@ -61,30 +72,75 @@ def initialize(fallback: nil)
       self.max_tokens = DEFAULT_MAX_TOKENS
       self.model = DEFAULT_MODEL
       self.max_tool_calls = DEFAULT_MAX_TOOL_CALLS
-      self.ruby_llm_config = RubyLLM.config
+      self.request_timeout = DEFAULT_REQUEST_TIMEOUT
+      self.open_timeout = DEFAULT_OPEN_TIMEOUT
+      self.request_retries = DEFAULT_REQUEST_RETRIES
       self.fallback = fallback
+
+      @legacy_client_warnings = {}
+      @legacy_config_warning_emitted = false
+      load_legacy_ruby_llm_config!
     end
 
     def client?
-      # Support legacy openrouter_client/openai_client or new RubyLLM config
-      !!(openrouter_client || openai_client || ruby_llm_configured?)
+      # Support legacy client objects, standalone API keys, and RubyLLM migration shim.
+      !!(openrouter_client || openai_client || openai_api_key || openrouter_api_key || ruby_llm_configured?)
     end
 
     def ruby_llm_configured?
-      ruby_llm_config&.openai_api_key || ruby_llm_config&.openrouter_api_key ||
-        ruby_llm_config&.anthropic_api_key || ruby_llm_config&.gemini_api_key
+      legacy_config = ruby_llm_config
+      legacy_config&.openai_api_key || legacy_config&.openrouter_api_key ||
+        legacy_config&.anthropic_api_key || legacy_config&.gemini_api_key
+    end
+
+    # Migration shim for RubyLLM-based configuration. Supported for one major cycle.
+    def ruby_llm_config
+      value = instance_variable_get("@ruby_llm_config")
+      return value if value
+      return unless fallback
+
+      fallback.ruby_llm_config
+    end
+
+    def ruby_llm_config=(value)
+      emit_legacy_config_warning_once!
+      instance_variable_set("@ruby_llm_config", value)
+      migrate_from_legacy_config(value)
+    end
+
+    def legacy_client_warning_emitted?(provider_key)
+      @legacy_client_warnings[provider_key]
+    end
+
+    def mark_legacy_client_warning_emitted!(provider_key)
+      @legacy_client_warnings[provider_key] = true
     end
 
     private
 
     attr_accessor :fallback
 
-    def get_with_fallback(method)
-      value = instance_variable_get("@#{method}")
-      return value if value
-      return unless fallback
+    def load_legacy_ruby_llm_config!
+      return unless defined?(::RubyLLM)
+      return unless ::RubyLLM.respond_to?(:config)
+
+      self.ruby_llm_config = ::RubyLLM.config
+    rescue StandardError
+      nil
+    end
+
+    def migrate_from_legacy_config(legacy_config)
+      return unless legacy_config
+
+      self.openai_api_key ||= legacy_config.openai_api_key if legacy_config.respond_to?(:openai_api_key)
+      self.openrouter_api_key ||= legacy_config.openrouter_api_key if legacy_config.respond_to?(:openrouter_api_key)
+    end
+
+    def emit_legacy_config_warning_once!
+      return if @legacy_config_warning_emitted
 
-      fallback.public_send(method)
+      warn "DEPRECATION: RubyLLM config is deprecated in Raix. Configure API keys with `Raix.configure`."
+      @legacy_config_warning_emitted = true
     end
   end
 end
diff --git a/lib/raix/function_tool_adapter.rb b/lib/raix/function_tool_adapter.rb
deleted file mode 100644
index 807d7d8..0000000
--- a/lib/raix/function_tool_adapter.rb
+++ /dev/null
@@ -1,51 +0,0 @@
-# frozen_string_literal: true
-
-module Raix
-  # Adapter to convert Raix function declarations to RubyLLM::Tool instances
-  class FunctionToolAdapter
-    def self.create_tool_from_function(function_def, instance)
-      tool_class = Class.new(RubyLLM::Tool) do
-        description function_def[:description] if function_def[:description]
-
-        # Define parameters based on function definition
-        function_def[:parameters][:properties]&.each do |param_name, param_def|
-          required = function_def[:parameters][:required]&.include?(param_name)
-          param param_name.to_sym, type: param_def[:type], desc: param_def[:description], required:
-        end
-
-        # Store reference to the instance and function name
-        define_method(:raix_instance) { instance }
-        define_method(:raix_function_name) { function_def[:name] }
-
-        # Override execute to call the Raix function
-        define_method(:execute) do |**args|
-          raix_instance.public_send(raix_function_name, args.with_indifferent_access, nil)
-        end
-      end
-
-      # Set a meaningful name for the tool class
-      tool_class.define_singleton_method(:name) do
-        "Raix::GeneratedTool::#{function_def[:name].to_s.camelize}"
-      end
-
-      tool_instance = tool_class.new
-
-      # Override the name method to return the original function name
-      # This ensures RubyLLM can match the tool call from the AI
-      tool_instance.define_singleton_method(:name) do
-        function_def[:name].to_s
-      end
-
-      tool_instance
-    end
-
-    def self.convert_tools_for_ruby_llm(raix_instance)
-      return [] unless raix_instance.class.respond_to?(:functions)
-      return [] if raix_instance.class.functions.blank?
-
-      raix_instance.class.functions.map do |function_def|
-        create_tool_from_function(function_def, raix_instance)
-      end
-    end
-  end
-end
diff --git a/lib/raix/runtime/client.rb b/lib/raix/runtime/client.rb
new file mode 100644
index 0000000..df950e6
--- /dev/null
+++ b/lib/raix/runtime/client.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module Raix
+  module Runtime
+    # Selects a provider and executes chat completion requests.
+    class Client
+      def initialize(configuration:)
+        @configuration = configuration
+      end
+
+      def complete(model:, messages:, params:, stream:, openai_override:)
+        provider_key = determine_provider(model, openai_override)
+        legacy_response = complete_with_legacy_client(provider_key:, model:, messages:, params:, stream:)
+        return legacy_response if legacy_response
+
+        provider(provider_key).chat_completions(model:, messages:, params:, stream:)
+      end
+
+      private
+
+      attr_reader :configuration
+
+      def provider(key)
+        @providers ||= {}
+        @providers[key] ||= case key
+                            when :openai
+                              Providers::OpenAI.new(configuration:)
+                            else
+                              Providers::OpenRouter.new(configuration:)
+                            end
+      end
+
+      def determine_provider(model, openai_override)
+        return :openai if openai_override
+        return :openai if model.to_s.match?(/^gpt-/) || model.to_s.match?(/^o\d/)
+
+        :openrouter
+      end
+
+      def complete_with_legacy_client(provider_key:, model:, messages:, params:, stream:)
+        client = provider_key == :openai ? configuration.openai_client : configuration.openrouter_client
+        return nil unless client
+
+        warn_deprecated_legacy_client(provider_key)
+
+        return client.complete(model:, messages:, params:, stream:) if client.respond_to?(:complete)
+        return client.chat(model:, messages:, params:, stream:) if client.respond_to?(:chat)
+
+        nil
+      end
+
+      def warn_deprecated_legacy_client(provider_key)
+        return if configuration.legacy_client_warning_emitted?(provider_key)
+
+        warn "DEPRECATION: `#{provider_key}_client` is deprecated; configure `#{provider_key}_api_key` instead."
+        configuration.mark_legacy_client_warning_emitted!(provider_key)
+      end
+    end
+  end
+end
diff --git a/lib/raix/runtime/errors.rb b/lib/raix/runtime/errors.rb
new file mode 100644
index 0000000..92548b7
--- /dev/null
+++ b/lib/raix/runtime/errors.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+module Raix
+  module Runtime
+    class Error < StandardError; end
+
+    class ConfigurationError < Error; end
+
+    # Wraps provider/transport errors with normalized metadata.
+    class TransportError < Error
+      attr_reader :status, :provider, :body
+
+      def initialize(message, status: nil, provider: nil, body: nil)
+        super(message)
+        @status = status
+        @provider = provider
+        @body = body
+      end
+    end
+  end
+end
diff --git a/lib/raix/runtime/providers/base.rb b/lib/raix/runtime/providers/base.rb
new file mode 100644
index 0000000..b14b658
--- /dev/null
+++ b/lib/raix/runtime/providers/base.rb
@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module Raix
+  module Runtime
+    module Providers
+      # Shared provider adapter base class.
+      class Base
+        attr_reader :configuration, :transport
+
+        def initialize(configuration:, transport: nil)
+          @configuration = configuration
+          @transport = transport || Transport.new
+        end
+      end
+    end
+  end
+end
diff --git a/lib/raix/runtime/providers/openai.rb b/lib/raix/runtime/providers/openai.rb
new file mode 100644
index 0000000..8ad2f10
--- /dev/null
+++ b/lib/raix/runtime/providers/openai.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+module Raix
+  module Runtime
+    module Providers
+      # OpenAI Chat Completions adapter.
+      class OpenAI < Base
+        DEFAULT_URL = "https://api.openai.com/v1/chat/completions"
+
+        def chat_completions(model:, messages:, params:, stream: nil)
+          payload = { model:, messages: }.merge(filtered_params(params))
+
+          if stream
+            transport.post_stream(url: endpoint, headers: auth_headers, payload:, provider: "openai", &stream)
+          else
+            transport.post_json(url: endpoint, headers: auth_headers, payload:, provider: "openai")
+          end
+        end
+
+        private
+
+        def endpoint
+          configuration.openai_base_url.presence || DEFAULT_URL
+        end
+
+        def auth_headers
+          api_key = configuration.openai_api_key
+          raise ConfigurationError, "Missing OpenAI API key. Set `Raix.configure { |c| c.openai_api_key = ... }`." if api_key.blank?
+
+          {}.tap do |headers|
+            headers["Authorization"] = "Bearer #{api_key}"
+            headers["OpenAI-Organization"] = configuration.openai_organization_id if configuration.openai_organization_id.present?
+            headers["OpenAI-Project"] = configuration.openai_project_id if configuration.openai_project_id.present?
+          end
+        end
+
+        def filtered_params(params)
+          params.except(:cache_at, :model).compact
+        end
+      end
+    end
+  end
+end
diff --git a/lib/raix/runtime/providers/openrouter.rb b/lib/raix/runtime/providers/openrouter.rb
new file mode 100644
index 0000000..2ca2487
--- /dev/null
+++ b/lib/raix/runtime/providers/openrouter.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module Raix
+  module Runtime
+    module Providers
+      # OpenRouter Chat Completions adapter.
+      class OpenRouter < Base
+        DEFAULT_URL = "https://openrouter.ai/api/v1/chat/completions"
+
+        def chat_completions(model:, messages:, params:, stream: nil)
+          payload = { model:, messages: }.merge(filtered_params(params))
+
+          if stream
+            transport.post_stream(url: endpoint, headers: auth_headers, payload:, provider: "openrouter", &stream)
+          else
+            transport.post_json(url: endpoint, headers: auth_headers, payload:, provider: "openrouter")
+          end
+        end
+
+        private
+
+        def endpoint
+          configuration.openrouter_base_url.presence || DEFAULT_URL
+        end
+
+        def auth_headers
+          api_key = configuration.openrouter_api_key
+          raise ConfigurationError, "Missing OpenRouter API key. Set `Raix.configure { |c| c.openrouter_api_key = ... }`." if api_key.blank?
+
+          { "Authorization" => "Bearer #{api_key}" }
+        end
+
+        def filtered_params(params)
+          params.except(:cache_at, :model).compact
+        end
+      end
+    end
+  end
+end
diff --git a/lib/raix/runtime/stream_accumulator.rb b/lib/raix/runtime/stream_accumulator.rb
new file mode 100644
index 0000000..7f81216
--- /dev/null
+++ b/lib/raix/runtime/stream_accumulator.rb
@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+
+module Raix
+  module Runtime
+    # Reconstructs a final OpenAI-style assistant message from streaming deltas.
+    class StreamAccumulator
+      def initialize
+        @content = +""
+        @tool_calls = {}
+        @finish_reason = nil
+        @usage = nil
+      end
+
+      def consume(chunk)
+        @usage = chunk["usage"] if chunk["usage"].is_a?(Hash)
+
+        choice = chunk.dig("choices", 0) || {}
+        delta = choice["delta"] || {}
+        @finish_reason = choice["finish_reason"] if choice.key?("finish_reason")
+
+        append_content(delta["content"])
+        append_tool_calls(delta["tool_calls"])
+      end
+
+      def envelope
+        tool_calls = @tool_calls.keys.sort.map { |index| @tool_calls[index] }
+
+        {
+          "choices" => [
+            {
+              "message" => {
+                "role" => "assistant",
+                "content" => @content.empty? ? nil : @content,
+                "tool_calls" => tool_calls.empty? ? nil : tool_calls
+              },
+              "finish_reason" => @finish_reason || (tool_calls.any? ? "tool_calls" : "stop")
+            }
+          ],
+          "usage" => @usage
+        }
+      end
+
+      private
+
+      def append_content(content)
+        @content << content.to_s if content
+      end
+
+      def append_tool_calls(tool_calls)
+        return unless tool_calls.is_a?(Array)
+
+        tool_calls.each do |call|
+          index = call["index"] || 0
+          @tool_calls[index] ||= {
+            "id" => call["id"],
+            "type" => call["type"] || "function",
+            "function" => { "name" => +"", "arguments" => +"" }
+          }
+
+          existing = @tool_calls[index]
+          existing["id"] ||= call["id"]
+          existing["type"] ||= call["type"] if call["type"]
+
+          function = call["function"] || {}
+          existing_function = existing["function"] ||= {}
+          existing_function["name"] ||= +""
+          existing_function["arguments"] ||= +""
+          existing_function["name"] << function["name"].to_s if function["name"]
+          existing_function["arguments"] << function["arguments"].to_s if function["arguments"]
+        end
+      end
+    end
+  end
+end
diff --git a/lib/raix/runtime/stream_parser.rb b/lib/raix/runtime/stream_parser.rb
new file mode 100644
index 0000000..53ad62b
--- /dev/null
+++ b/lib/raix/runtime/stream_parser.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+module Raix
+  module Runtime
+    # Minimal SSE parser for OpenAI-compatible stream events.
+    class StreamParser
+      def initialize
+        @buffer = +""
+      end
+
+      def feed(chunk)
+        @buffer << chunk.to_s
+        events = []
+
+        while (delimiter_index = event_delimiter_index)
+          raw_event = @buffer.slice!(0, delimiter_index + delimiter_size)
+          events.concat(parse_event(raw_event))
+        end
+
+        events
+      end
+
+      private
+
+      def event_delimiter_index
+        @buffer.index("\r\n\r\n") || @buffer.index("\n\n")
+      end
+
+      def delimiter_size
+        @buffer.include?("\r\n\r\n") ? 4 : 2
+      end
+
+      def parse_event(raw_event)
+        raw_event.each_line.filter_map do |line|
+          stripped = line.strip
+          next unless stripped.start_with?("data:")
+
+          value = stripped.sub(/\Adata:\s?/, "")
+          next if value.empty?
+
+          value
+        end
+      end
+    end
+  end
+end
diff --git a/lib/raix/runtime/transport.rb b/lib/raix/runtime/transport.rb
new file mode 100644
index 0000000..0a01dd8
--- /dev/null
+++ b/lib/raix/runtime/transport.rb
@@ -0,0 +1,100 @@
+# frozen_string_literal: true
+
+require "faraday"
+require "faraday/retry"
+
+module Raix
+  module Runtime
+    # HTTP transport wrapper with retries, timeout handling, and stream parsing.
+    class Transport
+      DEFAULT_TIMEOUT = 120
+      DEFAULT_OPEN_TIMEOUT = 30
+      DEFAULT_RETRIES = 2
+
+      def initialize(timeout: DEFAULT_TIMEOUT, open_timeout: DEFAULT_OPEN_TIMEOUT, retries: DEFAULT_RETRIES)
+        @timeout = timeout
+        @open_timeout = open_timeout
+        @retries = retries
+      end
+
+      def post_json(url:, headers:, payload:, provider:)
+        response = connection.post(url) do |req|
+          req.options.timeout = @timeout
+          req.options.open_timeout = @open_timeout
+          req.headers.update(default_headers.merge(headers))
+          req.body = JSON.generate(payload)
+        end
+
+        parse_json_response(response, provider:)
+      rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
+        raise TransportError.new("#{provider} request failed: #{e.message}", provider:)
+      end
+
+      def post_stream(url:, headers:, payload:, provider:)
+        parser = StreamParser.new
+        accumulator = StreamAccumulator.new
+
+        connection.post(url) do |req|
+          req.options.timeout = @timeout
+          req.options.open_timeout = @open_timeout
+          req.headers.update(default_headers.merge(headers).merge("Accept" => "text/event-stream"))
+          req.body = JSON.generate(payload.merge(stream: true))
+          req.options.on_data = lambda do |chunk, _overall_received_bytes, _env|
+            parser.feed(chunk).each do |event|
+              next if event == "[DONE]"
+
+              parsed = JSON.parse(event)
+              accumulator.consume(parsed)
+
+              delta_content = parsed.dig("choices", 0, "delta", "content")
+              yield delta_content if delta_content && block_given?
+            rescue JSON::ParserError
+              next
+            end
+          end
+        end
+
+        accumulator.envelope
+      rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
+        raise TransportError.new("#{provider} stream request failed: #{e.message}", provider:)
+      end
+
+      private
+
+      def connection
+        @connection ||= Faraday.new do |f|
+          f.request :retry,
+                    max: @retries,
+                    interval: 0.2,
+                    interval_randomness: 0.5,
+                    backoff_factor: 2,
+                    methods: %i[get post]
+          f.adapter Faraday.default_adapter
+        end
+      end
+
+      def default_headers
+        { "Content-Type" => "application/json" }
+      end
+
+      def parse_json_response(response, provider:)
+        body = response.body.to_s
+        parsed = body.empty? ? {} : JSON.parse(body)
+
+        if response.status.to_i >= 400
+          error_message = parsed.dig("error", "message") || parsed["message"] || "HTTP #{response.status}"
+          raise TransportError.new(
+            "#{provider} request failed (#{response.status}): #{error_message}",
+            status: response.status,
+            provider:,
+            body: parsed
+          )
+        end
+
+        parsed
+      rescue JSON::ParserError
+        raise TransportError.new("#{provider} request returned non-JSON response", status: response.status, provider:, body:)
+      end
+    end
+  end
+end
diff --git a/lib/raix/transcript_adapter.rb b/lib/raix/transcript_adapter.rb
deleted file mode 100644
index 68b86f5..0000000
--- a/lib/raix/transcript_adapter.rb
+++ /dev/null
@@ -1,121 +0,0 @@
-# frozen_string_literal: true
-
-module Raix
-  # Adapter to convert between Raix's transcript array format and RubyLLM's Message objects
-  class TranscriptAdapter
-    attr_reader :ruby_llm_chat
-
-    def initialize(ruby_llm_chat)
-      @ruby_llm_chat = ruby_llm_chat
-      @pending_messages = []
-    end
-
-    # Add a message in Raix format (hash) to the transcript
-    def <<(message_hash)
-      case message_hash
-      when Array
-        # Handle nested arrays (from function dispatch)
-        message_hash.each { |msg| self << msg }
-      when Hash
-        add_message_from_hash(message_hash)
-      end
-      self
-    end
-
-    # Return all messages in Raix-compatible format
-    def flatten
-      ruby_llm_messages = @ruby_llm_chat.messages.map { |msg| message_to_raix_format(msg) }
-      pending = @pending_messages.map { |msg| normalize_message_format(msg) }
-      (ruby_llm_messages + pending).flatten
-    end
-
-    # Get all messages including pending ones
-    def to_a
-      flatten
-    end
-
-    # Allow iteration
-    def compact
-      flatten.compact
-    end
-
-    # Clear all messages
-    def clear
-      @ruby_llm_chat.reset_messages!
-      @pending_messages.clear
-      self
-    end
-
-    # Get last message
-    def last
-      flatten.last
-    end
-
-    # Get size of transcript
-    def size
-      flatten.size
-    end
-
-    alias length size
-
-    private
-
-    def add_message_from_hash(hash)
-      # Raix abbreviated format: { system: "text" }, { user: "text" }, { assistant: "text" }
-      if hash.key?(:system) || hash.key?("system")
-        content = hash[:system] || hash["system"]
-        @ruby_llm_chat.with_instructions(content)
-        @pending_messages << { role: "system", content: }
-      elsif hash.key?(:user) || hash.key?("user")
-        content = hash[:user] || hash["user"]
-        # Don't add to ruby_llm_chat yet - wait for chat_completion call
-        @pending_messages << { role: "user", content: }
-      elsif hash.key?(:assistant) || hash.key?("assistant")
-        content = hash[:assistant] || hash["assistant"]
-        @pending_messages << { role: "assistant", content: }
-      elsif hash[:role] || hash["role"]
-        # Standard OpenAI format (tool messages, assistant with tool_calls, etc.)
-        @pending_messages << hash.with_indifferent_access
-      end
-    end
-
-    def message_to_raix_format(message)
-      # Return in Raix abbreviated format { system: "...", user: "...", assistant: "..." }
-      # unless it's a tool message which needs full format
-      if message.tool_call? || message.tool_result?
-        result = {
-          role: message.role.to_s,
-          content: message.content
-        }
-        result[:tool_calls] = message.tool_calls if message.tool_call?
-        result[:tool_call_id] = message.tool_call_id if message.tool_result?
-        result
-      else
-        # Use abbreviated format
-        { message.role.to_sym => message.content }
-      end
-    end
-
-    def normalize_message_format(msg)
-      # If already in abbreviated format, return as-is
-      return msg if msg.key?(:system) || msg.key?(:user) || msg.key?(:assistant)
-      return msg if msg["system"] || msg["user"] || msg["assistant"]
-
-      # If in standard format with role/content, convert to abbreviated
-      if msg[:role] || msg["role"]
-        role = (msg[:role] || msg["role"]).to_sym
-        content = msg[:content] || msg["content"]
-
-        # Tool messages stay in full format
-        if msg[:tool_calls] || msg["tool_calls"] || msg[:tool_call_id] || msg["tool_call_id"]
-          return msg
-        end
-
-        # Convert to abbreviated format
-        { role => content }
-      else
-        msg
-      end
-    end
-  end
-end
diff --git a/lib/raix/transcript_store.rb b/lib/raix/transcript_store.rb
new file mode 100644
index 0000000..e6ef46d
--- /dev/null
+++ b/lib/raix/transcript_store.rb
@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+
+module Raix
+  # Thread-safe transcript store used by ChatCompletion.
+  # Stores messages in abbreviated format when possible while preserving tool payloads.
+  class TranscriptStore
+    def initialize
+      @entries = []
+      @mutex = Mutex.new
+    end
+
+    def <<(message)
+      @mutex.synchronize do
+        @entries << normalize_entry(message)
+      end
+      self
+    end
+
+    def flatten
+      snapshot = @mutex.synchronize { @entries.dup }
+      snapshot.flatten.compact.map { |entry| normalize_output(entry) }
+    end
+
+    def to_a
+      flatten
+    end
+
+    def compact
+      flatten.compact
+    end
+
+    def clear
+      @mutex.synchronize { @entries.clear }
+      self
+    end
+
+    def last
+      flatten.last
+    end
+
+    def size
+      @mutex.synchronize { @entries.flatten.compact.size }
+    end
+
+    alias length size
+
+    private
+
+    def normalize_entry(entry)
+      case entry
+      when Array
+        entry.map { |message| normalize_entry(message) }
+      when Hash
+        normalize_message(entry)
+      else
+        entry
+      end
+    end
+
+    def normalize_message(message)
+      hash = message.transform_keys(&:to_sym)
+
+      if hash.key?(:role)
+        role = hash[:role].to_s
+        content = hash[:content]
+        tool_payload = hash.key?(:tool_calls) || hash.key?(:tool_call_id) || hash.key?(:name)
+
+        if %w[system user assistant].include?(role) && !tool_payload
+          { role.to_sym => content }
+        else
+          hash
+        end
+      else
+        hash
+      end
+    end
+
+    def normalize_output(entry)
+      return entry unless entry.is_a?(Hash)
+
+      hash = entry.transform_keys(&:to_sym)
+      return hash if hash.key?(:system) || hash.key?(:user) || hash.key?(:assistant)
+
+      role = hash[:role]&.to_s
+      tool_payload = hash.key?(:tool_calls) || hash.key?(:tool_call_id) || hash.key?(:name)
+      return hash unless %w[system user assistant].include?(role) && !tool_payload
+
+      { role.to_sym => hash[:content] }
+    end
+  end
+end
diff --git a/lib/raix/version.rb b/lib/raix/version.rb
index 6276131..abfd35f 100644
--- a/lib/raix/version.rb
+++ b/lib/raix/version.rb
@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 
 module Raix
-  VERSION = "2.0.0"
+  VERSION = "2.1.0"
 end
diff --git a/raix.gemspec b/raix.gemspec
index ec382da..657aa5c 100644
--- a/raix.gemspec
+++ b/raix.gemspec
@@ -29,7 +29,7 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["lib"]
 
   spec.add_dependency "activesupport", ">= 6.0"
+  spec.add_dependency "faraday", "~> 2.0"
   spec.add_dependency "faraday-retry", "~> 2.0"
   spec.add_dependency "ostruct"
-  spec.add_dependency "ruby_llm", "~> 1.9"
 end
diff --git a/spec/raix/before_completion_spec.rb b/spec/raix/before_completion_spec.rb
index 88d0f68..4ca8707 100644
--- a/spec/raix/before_completion_spec.rb
+++ b/spec/raix/before_completion_spec.rb
@@ -56,7 +56,7 @@ def initialize
       end
 
       instance = chat_class.new
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       instance.chat_completion
 
@@ -86,7 +86,7 @@ def initialize
 
     it "calls the class-level hook" do
       instance = chat_class.new
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       expect(instance.chat_completion).to eq("test response")
     end
@@ -115,7 +115,7 @@ def initialize
     it "calls the instance-level hook" do
       instance = chat_class.new
       instance.before_completion = ->(_context) { { temperature: 0.5 } }
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       expect(instance.chat_completion).to eq("test response")
     end
@@ -148,7 +148,7 @@ def initialize
 
       # Track what params are passed via a spy
       params_received = nil
-      allow(instance).to receive(:ruby_llm_request) do |args|
+      allow(instance).to receive(:execute_runtime_request) do |args|
         params_received = args[:params]
         mock_response
       end
@@ -187,7 +187,7 @@ def initialize
       end
 
       instance = chat_class.new
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       instance.chat_completion
 
@@ -209,7 +209,7 @@ def initialize
       end
 
       instance = chat_class.new
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       instance.chat_completion
 
@@ -238,7 +238,7 @@ def initialize
       end
 
       instance = chat_class.new
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       # Should not raise an error
       expect { instance.chat_completion }.not_to raise_error
@@ -263,7 +263,7 @@ def initialize
       end
 
       instance = chat_class.new
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       # Should not raise an error
       expect { instance.chat_completion }.not_to raise_error
@@ -294,7 +294,7 @@ def call(_context)
       instance = chat_class.new
       instance.before_completion = hook_class.new
 
-      allow(instance).to receive(:ruby_llm_request) do |args|
+      allow(instance).to receive(:execute_runtime_request) do |args|
         params_received = args[:params]
         mock_response
       end
@@ -323,14 +323,14 @@ def initialize
       instance = chat_class.new
       instance.before_completion = ->(_context) { { model: "different-model" } }
 
-      allow(instance).to receive(:ruby_llm_request) do |args|
+      allow(instance).to receive(:execute_runtime_request) do |args|
         params_received = args
         mock_response
       end
 
       instance.chat_completion
 
-      # Model is passed separately in ruby_llm_request
+      # Model is passed separately in execute_runtime_request
       expect(params_received[:model]).to eq("different-model")
     end
 
@@ -348,7 +348,7 @@ def initialize
         }
       }
 
-      allow(instance).to receive(:ruby_llm_request) do |args|
+      allow(instance).to receive(:execute_runtime_request) do |args|
         params_received = args[:params]
         mock_response
       end
@@ -389,7 +389,7 @@ def initialize
         {}
       }
 
-      allow(instance).to receive(:ruby_llm_request) do |args|
+      allow(instance).to receive(:execute_runtime_request) do |args|
         messages_sent = args[:messages]
         mock_response
       end
@@ -408,7 +408,7 @@ def initialize
         {}
       }
 
-      allow(instance).to receive(:ruby_llm_request) do |args|
+      allow(instance).to receive(:execute_runtime_request) do |args|
         messages_sent = args[:messages]
         mock_response
       end
@@ -433,7 +433,7 @@ def initialize
         {}
       }
 
-      allow(instance).to receive(:ruby_llm_request) do |args|
+      allow(instance).to receive(:execute_runtime_request) do |args|
         messages_sent = args[:messages]
         mock_response
       end
@@ -470,7 +470,7 @@ def initialize
         {} # Return empty hash, just logging
       }
 
-      allow(instance).to receive(:ruby_llm_request).and_return(mock_response)
+      allow(instance).to receive(:execute_runtime_request).and_return(mock_response)
 
       instance.chat_completion
 
diff --git a/spec/raix/chat_completion_spec.rb b/spec/raix/chat_completion_spec.rb
index 8a8ee1f..d11eabd 100644
--- a/spec/raix/chat_completion_spec.rb
+++ b/spec/raix/chat_completion_spec.rb
@@ -50,9 +50,7 @@ def initialize
       subject.transcript << { user: "WHAT IS THE MEANING OF LIFE?" }
     end
 
-    # TODO: RubyLLM doesn't support OpenAI's predicted outputs feature yet
-    # This feature needs to be added to RubyLLM or we need a workaround
-    xit "does a completion with OpenAI" do
+    it "does a completion with OpenAI" do
       expect(completion).to start_with("THE MEANING OF LIFE")
       expect(subject.transcript.last).to eq({ assistant: completion })
       expect(response.dig("usage", "completion_tokens_details", "accepted_prediction_tokens")).to be > 0
diff --git a/spec/raix/configuration_spec.rb b/spec/raix/configuration_spec.rb
index e8b9ea7..60c34a8 100644
--- a/spec/raix/configuration_spec.rb
+++ b/spec/raix/configuration_spec.rb
@@ -2,20 +2,27 @@
 
 RSpec.describe Raix::Configuration do
   describe "#client?" do
-    context "with RubyLLM configured via OpenRouter API key" do
+    context "with native OpenRouter API key" do
       it "returns true" do
         configuration = described_class.new(fallback: nil)
-        configuration.ruby_llm_config = RubyLLM::Configuration.new
-        configuration.ruby_llm_config.openrouter_api_key = "test_key"
+        configuration.openrouter_api_key = "test_key"
         expect(configuration.client?).to eq true
       end
     end
 
-    context "with RubyLLM configured via OpenAI API key" do
+    context "with native OpenAI API key" do
       it "returns true" do
         configuration = described_class.new(fallback: nil)
-        configuration.ruby_llm_config = RubyLLM::Configuration.new
-        configuration.ruby_llm_config.openai_api_key = "test_key"
+        configuration.openai_api_key = "test_key"
+        expect(configuration.client?).to eq true
+      end
+    end
+
+    context "with legacy RubyLLM config shim" do
+      it "returns true when keys are present" do
+        configuration = described_class.new(fallback: nil)
+        legacy_config = Struct.new(:openai_api_key, :openrouter_api_key, :anthropic_api_key, :gemini_api_key).new(nil, "test_key", nil, nil)
+        configuration.ruby_llm_config = legacy_config
         expect(configuration.client?).to eq true
       end
     end
@@ -23,12 +30,8 @@
     context "without any API configuration" do
       it "returns false" do
         configuration = described_class.new(fallback: nil)
-        configuration.ruby_llm_config = RubyLLM::Configuration.new
-        # Clear all API keys
-        configuration.ruby_llm_config.openai_api_key = nil
-        configuration.ruby_llm_config.openrouter_api_key = nil
-        configuration.ruby_llm_config.anthropic_api_key = nil
-        configuration.ruby_llm_config.gemini_api_key = nil
+        configuration.openai_api_key = nil
+        configuration.openrouter_api_key = nil
         expect(configuration.client?).to eq false
       end
     end
diff --git a/spec/raix/function_dispatch_spec.rb b/spec/raix/function_dispatch_spec.rb
index 13ed635..c485828 100644
--- a/spec/raix/function_dispatch_spec.rb
+++ b/spec/raix/function_dispatch_spec.rb
@@ -89,46 +89,15 @@ class SearchForFile
     expect(params[:required]).not_to include(:path)
   end
 
-  # This simulates a middleman on the network that rewrites the function name to anything else
-  def decorate_clients_with_fake_middleman!
-    result = { openai: Raix.configuration.openai_client, openrouter: Raix.configuration.openrouter_client }
-    mocked_middleman =
-      Class.new(SimpleDelegator) do
-        def chat(...)
-          __getobj__.chat(...).tap do |result|
-            result.dig("choices", 0, "message", "tool_calls")&.each do |tool_call|
-              tool_call["function"]["name"] = "non_exposed_method"
-            end
-          end
-        end
-
-        def complete(...)
-          __getobj__.complete(...).tap do |result|
-            result.dig("choices", 0, "message", "tool_calls")&.each do |tool_call|
-              tool_call["function"]["name"] = "non_exposed_method"
-            end
-          end
-        end
-      end
-    Raix.configuration.openai_client = mocked_middleman.new(Raix.configuration.openai_client)
-    Raix.configuration.openrouter_client = mocked_middleman.new(Raix.configuration.openrouter_client)
-    result
-  end
-
   # Since we are using the send method to execute tools calls, we have to make sure
   # that the method was explicitly defined as a tool function.
   #
   # Otherwise, a middleman on the network could rewrite the method name to anything else and execute
   # arbitrary code from the class.
   it "does not allow non exposed methods to be called" do
-    # With RubyLLM, the security is still enforced in ChatCompletion#chat_completion
-    # when it checks if the function name is in self.class.functions
-    # We test this by directly simulating what would happen if a middleman changed the response
-
     weather = WhatIsTheWeather.new
 
     # Simulate what chat_completion does when it receives a tool call
-    # This mimics the check at line 191 in chat_completion.rb
     fake_tool_call = { "function" => { "name" => "non_exposed_method", "arguments" => "{}" } }
     function_name = fake_tool_call["function"]["name"]
     allowed_functions = weather.class.functions.map { |f| f[:name].to_sym }
@@ -139,31 +108,44 @@ def complete(...)
   end
 
   it "respects max_tool_calls parameter" do
-    # Create a mock that simulates multiple tool calls
     weather = WhatIsTheWeather.new
     weather.transcript.clear
     weather.transcript << { user: "Check the weather for multiple cities repeatedly" }
 
-    # Mock the client to always return tool calls
-    allow(Raix.configuration.openrouter_client).to receive(:complete).and_return({
-                                                                                   "choices" => [{
-                                                                                     "message" => {
-                                                                                       "tool_calls" => [
-                                                                                         {
-                                                                                           "id" => "call_1",
-                                                                                           "type" => "function",
-                                                                                           "function" => {
-                                                                                             "name" => "check_weather",
-                                                                                             "arguments" => '{"location": "City"}'
-                                                                                           }
-                                                                                         }
-                                                                                       ]
-                                                                                     }
-                                                                                   }]
-                                                                                 }).and_call_original
-
-    # With max_tool_calls set to 2, it should stop after 2 calls and provide a final response
-    response = weather.chat_completion(max_tool_calls: 2)
+    tool_call_response = lambda do |id|
+      {
+        "choices" => [{
+          "message" => {
+            "role" => "assistant",
+            "content" => nil,
+            "tool_calls" => [
+              {
+                "id" => id,
+                "type" => "function",
+                "function" => {
+                  "name" => "check_weather",
+                  "arguments" => '{"location":"City"}'
+                }
+              }
+            ]
+          },
+          "finish_reason" => "tool_calls"
+        }]
+      }
+    end
+
+    final_response = {
+      "choices" => [{
+        "message" => { "role" => "assistant", "content" => "Final answer without more tools", "tool_calls" => nil },
+        "finish_reason" => "stop"
+      }]
+    }
+
+    responses = [tool_call_response.call("call_1"), tool_call_response.call("call_2"), final_response]
+    allow(weather).to receive(:execute_runtime_request) { responses.shift }
+
+    response = weather.chat_completion(max_tool_calls: 1)
     expect(response).to be_a(String)
+    expect(response).to include("Final answer")
   end
 end
diff --git a/spec/raix/runtime/client_spec.rb b/spec/raix/runtime/client_spec.rb
new file mode 100644
index 0000000..3bcd109
--- /dev/null
+++ b/spec/raix/runtime/client_spec.rb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::Runtime::Client do
+  let(:configuration) { Raix::Configuration.new(fallback: nil) }
+  subject(:client) { described_class.new(configuration:) }
+
+  before do
+    configuration.openai_api_key = "openai-key"
+    configuration.openrouter_api_key = "openrouter-key"
+  end
+
+  it "routes gpt-* models to OpenAI" do
+    openai_provider = instance_double(Raix::Runtime::Providers::OpenAI)
+    allow(Raix::Runtime::Providers::OpenAI).to receive(:new).and_return(openai_provider)
+    allow(openai_provider).to receive(:chat_completions).and_return({ "choices" => [] })
+
+    client.complete(model: "gpt-4o", messages: [], params: {}, stream: nil, openai_override: nil)
+
+    expect(openai_provider).to have_received(:chat_completions)
+  end
+
+  it "routes non-openai models to OpenRouter" do
+    openrouter_provider = instance_double(Raix::Runtime::Providers::OpenRouter)
+    allow(Raix::Runtime::Providers::OpenRouter).to receive(:new).and_return(openrouter_provider)
+    allow(openrouter_provider).to receive(:chat_completions).and_return({ "choices" => [] })
+
+    client.complete(model: "meta-llama/llama-3.3-8b-instruct:free", messages: [], params: {}, stream: nil, openai_override: nil)
+
+    expect(openrouter_provider).to have_received(:chat_completions)
+  end
+
+  it "forces OpenAI when openai_override is provided" do
+    openai_provider = instance_double(Raix::Runtime::Providers::OpenAI)
+    allow(Raix::Runtime::Providers::OpenAI).to receive(:new).and_return(openai_provider)
+    allow(openai_provider).to receive(:chat_completions).and_return({ "choices" => [] })
+
+    client.complete(model: "meta-llama/llama-3.3-8b-instruct:free", messages: [], params: {}, stream: nil, openai_override: "gpt-4o-mini")
+
+    expect(openai_provider).to have_received(:chat_completions)
+  end
+
+  it "uses a legacy client when configured" do
+    legacy_client = double("legacy_client")
+    configuration.openrouter_client = legacy_client
+
+    expect(legacy_client).to receive(:complete).with(
+      model: "meta-llama/llama-3.3-8b-instruct:free",
+      messages: [],
+      params: {},
+      stream: nil
+    ).and_return({ "choices" => [] })
+
+    expect do
+      client.complete(model: "meta-llama/llama-3.3-8b-instruct:free", messages: [], params: {}, stream: nil, openai_override: nil)
+    end.to output(/DEPRECATION: `openrouter_client`/).to_stderr
+  end
+end
diff --git a/spec/raix/runtime/providers/openai_spec.rb b/spec/raix/runtime/providers/openai_spec.rb
new file mode 100644
index 0000000..ecd1320
--- /dev/null
+++ b/spec/raix/runtime/providers/openai_spec.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::Runtime::Providers::OpenAI do
+  let(:configuration) { Raix::Configuration.new(fallback: nil) }
+  let(:transport) { instance_double(Raix::Runtime::Transport) }
+  subject(:provider) { described_class.new(configuration:, transport:) }
+
+  before do
+    configuration.openai_api_key = "openai-key"
+    configuration.openai_organization_id = "org_123"
+    configuration.openai_project_id = "proj_123"
+  end
+
+  it "sends sync requests through transport.post_json" do
+    expect(transport).to receive(:post_json).with(
+      url: "https://api.openai.com/v1/chat/completions",
+      headers: hash_including(
+        "Authorization" => "Bearer openai-key",
+        "OpenAI-Organization" => "org_123",
+        "OpenAI-Project" => "proj_123"
+      ),
+      payload: hash_including(model: "gpt-4o", messages: [{ role: "user", content: "Hi" }]),
+      provider: "openai"
+    ).and_return({ "choices" => [] })
+
+    provider.chat_completions(model: "gpt-4o", messages: [{ role: "user", content: "Hi" }], params: {}, stream: nil)
+  end
+
+  it "sends stream requests through transport.post_stream" do
+    callback = proc {}
+
+    expect(transport).to receive(:post_stream).and_return({ "choices" => [] })
+    provider.chat_completions(model: "gpt-4o", messages: [], params: {}, stream: callback)
+  end
+
+  it "raises when API key is missing" do
+    configuration.openai_api_key = nil
+
+    expect do
+      provider.chat_completions(model: "gpt-4o", messages: [], params: {}, stream: nil)
+    end.to raise_error(Raix::Runtime::ConfigurationError, /Missing OpenAI API key/)
+  end
+end
diff --git a/spec/raix/runtime/providers/openrouter_spec.rb b/spec/raix/runtime/providers/openrouter_spec.rb
new file mode 100644
index 0000000..f303b34
--- /dev/null
+++ b/spec/raix/runtime/providers/openrouter_spec.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::Runtime::Providers::OpenRouter do
+  let(:configuration) { Raix::Configuration.new(fallback: nil) }
+  let(:transport) { instance_double(Raix::Runtime::Transport) }
+  subject(:provider) { described_class.new(configuration:, transport:) }
+
+  before do
+    configuration.openrouter_api_key = "openrouter-key"
+  end
+
+  it "sends sync requests through transport.post_json" do
+    expect(transport).to receive(:post_json).with(
+      url: "https://openrouter.ai/api/v1/chat/completions",
+      headers: hash_including("Authorization" => "Bearer openrouter-key"),
+      payload: hash_including(model: "meta-llama/llama-3.3-8b-instruct:free", messages: [{ role: "user", content: "Hi" }]),
+      provider: "openrouter"
+    ).and_return({ "choices" => [] })
+
+    provider.chat_completions(model: "meta-llama/llama-3.3-8b-instruct:free", messages: [{ role: "user", content: "Hi" }], params: {}, stream: nil)
+  end
+
+  it "sends stream requests through transport.post_stream" do
+    callback = proc {}
+
+    expect(transport).to receive(:post_stream).and_return({ "choices" => [] })
+    provider.chat_completions(model: "meta-llama/llama-3.3-8b-instruct:free", messages: [], params: {}, stream: callback)
+  end
+
+  it "raises when API key is missing" do
+    configuration.openrouter_api_key = nil
+
+    expect do
+      provider.chat_completions(model: "meta-llama/llama-3.3-8b-instruct:free", messages: [], params: {}, stream: nil)
+    end.to raise_error(Raix::Runtime::ConfigurationError, /Missing OpenRouter API key/)
+  end
+end
diff --git a/spec/raix/runtime/stream_accumulator_spec.rb b/spec/raix/runtime/stream_accumulator_spec.rb
new file mode 100644
index 0000000..794cc48
--- /dev/null
+++ b/spec/raix/runtime/stream_accumulator_spec.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::Runtime::StreamAccumulator do
+  subject(:accumulator) { described_class.new }
+
+  it "accumulates assistant content chunks" do
+    accumulator.consume({ "choices" => [{ "delta" => { "content" => "Hello " }, "finish_reason" => nil }] })
+    accumulator.consume({ "choices" => [{ "delta" => { "content" => "world" }, "finish_reason" => "stop" }] })
+
+    envelope = accumulator.envelope
+    expect(envelope.dig("choices", 0, "message", "content")).to eq("Hello world")
+    expect(envelope.dig("choices", 0, "finish_reason")).to eq("stop")
+  end
+
+  it "accumulates fragmented tool call deltas" do
+    accumulator.consume(
+      {
+        "choices" => [
+          {
+            "delta" => {
+              "tool_calls" => [
+                {
+                  "index" => 0,
+                  "id" => "call_123",
+                  "type" => "function",
+                  "function" => { "name" => "check_", "arguments" => "{\"loc" }
+                }
+              ]
+            },
+            "finish_reason" => nil
+          }
+        ]
+      }
+    )
+
+    accumulator.consume(
+      {
+        "choices" => [
+          {
+            "delta" => {
+              "tool_calls" => [
+                {
+                  "index" => 0,
+                  "function" => { "name" => "weather", "arguments" => "ation\":\"Paris\"}" }
+                }
+              ]
+            },
+            "finish_reason" => "tool_calls"
+          }
+        ]
+      }
+    )
+
+    envelope = accumulator.envelope
+    tool_call = envelope.dig("choices", 0, "message", "tool_calls", 0)
+    expect(tool_call["id"]).to eq("call_123")
+    expect(tool_call.dig("function", "name")).to eq("check_weather")
+    expect(tool_call.dig("function", "arguments")).to eq("{\"location\":\"Paris\"}")
+    expect(envelope.dig("choices", 0, "finish_reason")).to eq("tool_calls")
+  end
+end
diff --git a/spec/raix/runtime/stream_parser_spec.rb b/spec/raix/runtime/stream_parser_spec.rb
new file mode 100644
index 0000000..9151e4b
--- /dev/null
+++ b/spec/raix/runtime/stream_parser_spec.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::Runtime::StreamParser do
+  subject(:parser) { described_class.new }
+
+  it "extracts data events separated by LF" do
+    chunk = "data: {\"a\":1}\n\ndata: {\"b\":2}\n\n"
+    expect(parser.feed(chunk)).to eq(['{"a":1}', '{"b":2}'])
+  end
+
+  it "extracts data events separated by CRLF" do
+    chunk = "data: {\"a\":1}\r\n\r\ndata: {\"b\":2}\r\n\r\n"
+    expect(parser.feed(chunk)).to eq(['{"a":1}', '{"b":2}'])
+  end
+
+  it "buffers incomplete events until full delimiter arrives" do
+    expect(parser.feed("data: {\"a\":1}\n")).to eq([])
+    expect(parser.feed("\n")).to eq(['{"a":1}'])
+  end
+
+  it "ignores non-data lines" do
+    chunk = ":comment\nevent: message\ndata: hello\n\n"
+    expect(parser.feed(chunk)).to eq(["hello"])
+  end
+end
diff --git a/spec/raix/runtime/transport_spec.rb b/spec/raix/runtime/transport_spec.rb
new file mode 100644
index 0000000..5fa5f7c
--- /dev/null
+++ b/spec/raix/runtime/transport_spec.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::Runtime::Transport do
+  subject(:transport) { described_class.new }
+
+  let(:connection) { instance_double(Faraday::Connection) }
+
+  before do
+    allow(transport).to receive(:connection).and_return(connection)
+  end
+
+  describe "#post_json" do
+    it "parses a successful JSON response" do
+      response = instance_double(Faraday::Response, status: 200, body: '{"choices":[{"message":{"content":"ok"}}]}')
+      allow(connection).to receive(:post).and_return(response)
+
+      result = transport.post_json(url: "https://example.com", headers: {}, payload: {}, provider: "test")
+      expect(result.dig("choices", 0, "message", "content")).to eq("ok")
+    end
+
+    it "raises a transport error for provider error responses" do
+      response = instance_double(Faraday::Response, status: 400, body: '{"error":{"message":"bad request"}}')
+      allow(connection).to receive(:post).and_return(response)
+
+      expect do
+        transport.post_json(url: "https://example.com", headers: {}, payload: {}, provider: "test")
+      end.to raise_error(Raix::Runtime::TransportError, /bad request/)
+    end
+
+    it "raises a transport error for non-json responses" do
+      response = instance_double(Faraday::Response, status: 502, body: "upstream failure")
+      allow(connection).to receive(:post).and_return(response)
+
+      expect do
+        transport.post_json(url: "https://example.com", headers: {}, payload: {}, provider: "test")
+      end.to raise_error(Raix::Runtime::TransportError, /non-JSON/)
+    end
+  end
+
+  describe "#post_stream" do
+    it "parses stream events and returns an assembled envelope" do
+      allow(connection).to receive(:post) do |_url, &block|
+        request = Struct.new(:options, :headers, :body).new(Faraday::RequestOptions.new, {}, nil)
+        block.call(request)
+
+        request.options.on_data.call("data: {\"choices\":[{\"delta\":{\"content\":\"Hel\"},\"finish_reason\":null}]}\n\n", 0, nil)
+        request.options.on_data.call("data: {\"choices\":[{\"delta\":{\"content\":\"lo\"},\"finish_reason\":\"stop\"}]}\n\n", 0, nil)
+        request.options.on_data.call("data: [DONE]\n\n", 0, nil)
+      end
+
+      emitted = +""
+      result = transport.post_stream(url: "https://example.com", headers: {}, payload: {}, provider: "test") { |chunk| emitted << chunk }
+
+      expect(emitted).to eq("Hello")
+      expect(result.dig("choices", 0, "message", "content")).to eq("Hello")
+      expect(result.dig("choices", 0, "finish_reason")).to eq("stop")
+    end
+  end
+end
diff --git a/spec/raix/transcript_store_spec.rb b/spec/raix/transcript_store_spec.rb
new file mode 100644
index 0000000..ac1c004
--- /dev/null
+++ b/spec/raix/transcript_store_spec.rb
@@ -0,0 +1,48 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::TranscriptStore do
+  subject(:store) { described_class.new }
+
+  it "stores abbreviated messages as-is" do
+    store << { user: "Hello" }
+    expect(store.flatten).to eq([{ user: "Hello" }])
+  end
+
+  it "normalizes standard role/content format into abbreviated format when possible" do
+    store << { role: "assistant", content: "Hi there" }
+    expect(store.flatten).to eq([{ assistant: "Hi there" }])
+  end
+
+  it "preserves tool payload messages in full format" do
+    store << { role: "tool", tool_call_id: "call_1", name: "check_weather", content: "Sunny" }
+
+    expect(store.flatten).to eq([
+                                  { role: "tool", tool_call_id: "call_1", name: "check_weather", content: "Sunny" }
+                                ])
+  end
+
+  it "supports atomic array appends and flattening" do
+    store << [{ user: "A" }, { assistant: "B" }]
+    expect(store.flatten).to eq([{ user: "A" }, { assistant: "B" }])
+    expect(store.size).to eq(2)
+  end
+
+  it "clears the transcript" do
+    store << { user: "Hello" }
+    store.clear
+    expect(store.flatten).to eq([])
+  end
+
+  it "is safe for concurrent appends" do
+    threads = 10.times.map do |i|
+      Thread.new do
+        50.times { |j| store << { user: "t#{i}-#{j}" } }
+      end
+    end
+    threads.each(&:join)
+
+    expect(store.size).to eq(500)
+  end
+end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 8f13d49..0a21057 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -3,7 +3,6 @@
 require "dotenv"
 require "faraday"
 require "faraday/retry"
-require "ruby_llm"
 require "pry"
 require "raix"
 
@@ -24,16 +23,9 @@
 
 Dotenv.load
 
-RubyLLM.configure do |config|
+Raix.configure do |config|
   config.openrouter_api_key = ENV.fetch("OR_ACCESS_TOKEN", nil)
   config.openai_api_key = ENV.fetch("OAI_ACCESS_TOKEN", nil)
-  config.log_level = Logger::DEBUG
-end
-
-Raix.configure do |config|
-  # Legacy support - can still set these if needed
-  # config.openrouter_client = OpenRouter::Client.new(access_token: ENV.fetch("OR_ACCESS_TOKEN", nil))
-  # config.openai_client = OpenAI::Client.new(access_token: ENV.fetch("OAI_ACCESS_TOKEN", nil))
 end
 
 RSpec.configure do |config|