diff --git a/apps/docs/content/docs/providers/togetherai.mdx b/apps/docs/content/docs/providers/togetherai.mdx
index 320484e..b7e43f7 100644
--- a/apps/docs/content/docs/providers/togetherai.mdx
+++ b/apps/docs/content/docs/providers/togetherai.mdx
@@ -31,26 +31,49 @@ Sign up and get your API key at [api.together.xyz/settings/api-keys](https://api
 TOGETHER_API_KEY=your-key-here
 ```
 
-### 4. Streaming API route
+### 4. Create runtime API route
 
 ```ts title="app/api/chat/route.ts"
-import { streamText } from '@yourgpt/llm-sdk';
-import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+import { createRuntime } from '@yourgpt/llm-sdk';
+import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai';
 
-export async function POST(req: Request) {
-  const { messages } = await req.json();
+const together = createTogetherAI({
+  apiKey: process.env.TOGETHER_API_KEY,
+});
 
-  const result = await streamText({
-    model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'),
-    system: 'You are a helpful assistant.',
-    messages,
-  });
+const runtime = createRuntime({
+  provider: together,
+  model: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
+  systemPrompt: 'You are a helpful assistant.',
+});
 
-  return result.toTextStreamResponse();
+export async function POST(request: Request) {
+  return runtime.handleRequest(request);
 }
 ```
 
-### 5. Generate text
+### 5. Connect Copilot UI
+
+```tsx title="app/page.tsx"
+'use client';
+
+import { CopilotProvider } from '@yourgpt/copilot-sdk/react';
+import { CopilotChat } from '@yourgpt/copilot-sdk/ui';
+
+export default function Page() {
+  return (
+    <CopilotProvider runtimeUrl="/api/chat">
+      <CopilotChat />
+    </CopilotProvider>
+  );
+}
+```
+
+---
+
+## Modern Pattern (Direct)
+
+For simpler use cases without the runtime, use `togetherai()` directly with `generateText` or `streamText`:
 
 ```ts
 import { generateText } from '@yourgpt/llm-sdk';
@@ -64,6 +87,19 @@ const result = await generateText({
 console.log(result.text);
 ```
 
+```ts
+import { streamText } from '@yourgpt/llm-sdk';
+import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+
+const result = await streamText({
+  model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'),
+  system: 'You are a helpful assistant.',
+  messages,
+});
+
+return result.toTextStreamResponse();
+```
+
 ---
 
 ## Available Models
@@ -76,9 +112,6 @@ togetherai('deepseek-ai/DeepSeek-R1')       // reasoning model
 
 // Llama
 togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo')  // 131K ctx, fast
-togetherai('meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo')  // 130K ctx
-togetherai('meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo')
-togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo')
 
 // Qwen
 togetherai('Qwen/Qwen3.5-397B-A17B')       // 262K ctx
@@ -87,11 +120,10 @@ togetherai('Qwen/Qwen3.5-9B')
 // Gemma
 togetherai('google/gemma-4-31B-it')
 
-// Kimi
+// Other
+togetherai('openai/gpt-oss-120b')
 togetherai('moonshotai/Kimi-K2.5')          // 262K ctx
-
-// GLM
-togetherai('zai-org/GLM-5.1')              // 202K ctx
+togetherai('MiniMaxAI/MiniMax-M2.5')
 ```
 
 Any model ID listed on [together.ai/models](https://api.together.xyz/models) works.
@@ -101,21 +133,79 @@ Any model ID listed on [together.ai/models](https://api.together.xyz/models) wor
 ## Configuration
 
 ```ts
-import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai';
 
-// Explicit API key
-const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', {
+// With explicit API key
+const together = createTogetherAI({
   apiKey: 'your-key',
 });
 
 // Custom base URL (e.g. self-hosted or proxy)
+const together = createTogetherAI({
+  apiKey: 'your-key',
+  baseUrl: 'https://my-proxy.example.com/v1',
+});
+```
+
+Or with the modern pattern:
+
+```ts
+import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+
 const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', {
+  apiKey: 'your-key',
   baseURL: 'https://my-proxy.example.com/v1',
 });
 ```
 
 ---
 
+## Fallback Chain
+
+Automatically fail over to backup models when the primary is unavailable or rate-limited:
+
+```ts title="app/api/chat/route.ts"
+import { createRuntime } from '@yourgpt/llm-sdk';
+import { createFallbackChain } from '@yourgpt/llm-sdk/fallback';
+import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai';
+
+const together = createTogetherAI({
+  apiKey: process.env.TOGETHER_API_KEY,
+});
+
+const chain = createFallbackChain({
+  models: [
+    together.languageModel('meta-llama/Llama-3.3-70B-Instruct-Turbo'),
+    together.languageModel('deepseek-ai/DeepSeek-V3'),
+    together.languageModel('Qwen/Qwen3.5-9B'),
+    together.languageModel('google/gemma-4-31B-it'),
+  ],
+  strategy: 'priority',
+  retries: 1,
+  retryDelay: 500,
+  retryBackoff: 'exponential',
+  onFallback: ({ attemptedModel, nextModel, error }) => {
+    console.warn(`[fallback] ${attemptedModel} → ${nextModel} | ${error.message}`);
+  },
+});
+
+const runtime = createRuntime({
+  adapter: chain,
+  systemPrompt: 'You are a helpful assistant.',
+});
+
+export async function POST(request: Request) {
+  return runtime.handleRequest(request);
+}
+```
+
+<Callout type="info">
+With `strategy: 'priority'`, the first model handles all traffic until it fails.
+Use `strategy: 'round-robin'` to distribute load evenly across models.
+</Callout>
+
+---
+
 ## Tool Calling
 
 Many Together AI models support tool calling:
@@ -145,24 +235,6 @@ const result = await generateText({
 
 ---
 
-## With Copilot UI
-
-```tsx title="app/providers.tsx"
-'use client';
-
-import { CopilotProvider } from '@yourgpt/copilot-sdk/react';
-
-export function Providers({ children }: { children: React.ReactNode }) {
-  return (
-    <CopilotProvider runtimeUrl="/api/chat">
-      {children}
-    </CopilotProvider>
-  );
-}
-```
-
----
-
 ## Next Steps
 
 - [Fireworks](/docs/providers/fireworks) - Another fast open-source model platform
diff --git a/examples/playground/app/api/yourgpt-server/route.ts b/examples/playground/app/api/yourgpt-server/route.ts
deleted file mode 100644
index 683b478..0000000
--- a/examples/playground/app/api/yourgpt-server/route.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Proxy to local yourgpt-server-demo for testing SDK stream/non-stream endpoints.
- *
- * Routes based on `streaming` field in the request body:
- *   streaming: true  → /api/copilot/stream  (SSE)
- *   streaming: false → /api/copilot/chat    (JSON)
- *
- * Set YOURGPT_SERVER_URL in .env.local to point at your local server.
- * Default: http://localhost:3001
- */
-
-const SERVER_URL = process.env.YOURGPT_SERVER_URL || "http://localhost:3001";
-
-export async function POST(request: Request) {
-  const body = await request.json();
-  const isStreaming = body.streaming !== false;
-  const endpoint = isStreaming ? "/api/copilot/stream" : "/api/copilot/chat";
-  const targetUrl = `${SERVER_URL}${endpoint}`;
-
-  const upstream = await fetch(targetUrl, {
-    method: "POST",
-    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify(body),
-  });
-
-  // Pass the response body (streamed or JSON) straight through
-  return new Response(upstream.body, {
-    status: upstream.status,
-    headers: {
-      "Content-Type":
-        upstream.headers.get("Content-Type") ?? "application/json",
-      // Forward cache-control so SSE isn't buffered
-      "Cache-Control": "no-cache",
-      "X-Accel-Buffering": "no",
-    },
-  });
-}
diff --git a/examples/playground/app/page.tsx b/examples/playground/app/page.tsx
index 5d6dc05..ba2d863 100644
--- a/examples/playground/app/page.tsx
+++ b/examples/playground/app/page.tsx
@@ -119,8 +119,7 @@ export default function PlaygroundPage() {
   }, [actions]);
 
   // Derived state
-  const hasApiKey =
-    selectedProvider === "yourgpt-server" || !!apiKeys[selectedProvider];
+  const hasApiKey = !!apiKeys[selectedProvider];
 
   // Don't render until mounted (avoid hydration issues)
   if (!mounted) return null;
diff --git a/examples/playground/lib/constants.ts b/examples/playground/lib/constants.ts
index 71ee2f5..4d4f1b8 100644
--- a/examples/playground/lib/constants.ts
+++ b/examples/playground/lib/constants.ts
@@ -102,18 +102,6 @@ export const providers: ProviderConfig[] = [
     createProvider: "createOpenRouter",
     importPath: "@yourgpt/llm-sdk/openrouter",
   },
-  {
-    id: "yourgpt-server",
-    name: "YourGPT Server",
-    model: "local demo",
-    color: "#f59e0b",
-    keyPlaceholder: "",
-    keyLink: "",
-    keyLinkText: "",
-    envVar: "",
-    createProvider: "",
-    importPath: "",
-  },
 ];
 
 // Sample person data for useAIContext demo
@@ -173,7 +161,6 @@ export const INITIAL_API_KEYS: ApiKeys = {
   google: "",
   xai: "",
   openrouter: "",
-  "yourgpt-server": "",
 };
 
 // OpenRouter model options for the model selector (static fallback)
diff --git a/examples/playground/lib/types.ts b/examples/playground/lib/types.ts
index a31fd21..5129216 100644
--- a/examples/playground/lib/types.ts
+++ b/examples/playground/lib/types.ts
@@ -35,7 +35,6 @@ export interface ApiKeys {
   google: string;
   xai: string;
   openrouter: string;
-  "yourgpt-server"?: string;
 }
 
 export type ProviderId =
@@ -43,8 +42,7 @@ export type ProviderId =
   | "anthropic"
   | "google"
   | "xai"
-  | "openrouter"
-  | "yourgpt-server";
+  | "openrouter";
 
 export interface ProviderConfig {
   id: ProviderId;
diff --git a/examples/togetherai-demo/app/api/chat/route.ts b/examples/togetherai-demo/app/api/chat/route.ts
index c19fbf8..6a45d57 100644
--- a/examples/togetherai-demo/app/api/chat/route.ts
+++ b/examples/togetherai-demo/app/api/chat/route.ts
@@ -1,6 +1,7 @@
 import { createRuntime } from "@yourgpt/llm-sdk";
+import { createFallbackChain } from "@yourgpt/llm-sdk/fallback";
 import { createTogetherAI } from "@yourgpt/llm-sdk/togetherai";
-import { DEFAULT_MODEL } from "@/lib/models";
+import { DEFAULT_MODEL, FALLBACK_MODELS } from "@/lib/models";
 
 const SYSTEM_PROMPT = `You are a helpful AI assistant powered by Together AI.
 You have access to many different open-source AI models and can help with a wide variety of tasks.
@@ -12,6 +13,7 @@ export async function POST(request: Request) {
 
     // Get model from query param
     const model = url.searchParams.get("model") || DEFAULT_MODEL;
+    const useFallback = url.searchParams.get("fallback") === "true";
 
     // Get API key from environment
     const apiKey = process.env.TOGETHER_API_KEY;
@@ -26,10 +28,43 @@ export async function POST(request: Request) {
       );
     }
 
-    // Create Together AI provider
     const together = createTogetherAI({ apiKey });
 
-    // Create runtime with the selected model
+    if (useFallback) {
+      // Fallback chain: primary model → fallback models
+      const fallbackModelIds = FALLBACK_MODELS.filter((id) => id !== model);
+      const models = [model, ...fallbackModelIds].map((id) =>
+        together.languageModel(id),
+      );
+
+      const chain = createFallbackChain({
+        models,
+        strategy: "priority",
+        retries: 1,
+        retryDelay: 500,
+        retryBackoff: "exponential",
+        onRetry: ({ model, retryAttempt, maxRetries, delayMs, error }) => {
+          console.warn(
+            `[retry] ${model} attempt ${retryAttempt}/${maxRetries} — waiting ${delayMs}ms | ${(error as Error).message}`,
+          );
+        },
+        onFallback: ({ attemptedModel, nextModel, error, attempt }) => {
+          console.warn(
+            `[fallback] attempt ${attempt}: ${attemptedModel} → ${nextModel} | ${(error as Error).message}`,
+          );
+        },
+      });
+
+      const runtime = createRuntime({
+        adapter: chain,
+        systemPrompt: SYSTEM_PROMPT,
+        debug: process.env.NODE_ENV === "development",
+      });
+
+      return await runtime.handleRequest(request);
+    }
+
+    // Single model (no fallback)
     const runtime = createRuntime({
       provider: together,
       model,
@@ -37,8 +72,7 @@ export async function POST(request: Request) {
       debug: process.env.NODE_ENV === "development",
     });
 
-    const response = await runtime.handleRequest(request);
-    return response;
+    return await runtime.handleRequest(request);
   } catch (error) {
     console.error("[Chat Route] Error:", error);
     return Response.json(
@@ -51,13 +85,14 @@ export async function POST(request: Request) {
 export async function GET(request: Request) {
   const url = new URL(request.url);
   const model = url.searchParams.get("model") || DEFAULT_MODEL;
-
-  const hasEnvKey = !!process.env.TOGETHER_API_KEY;
+  const useFallback = url.searchParams.get("fallback") === "true";
 
   return Response.json({
     status: "ok",
     provider: "togetherai",
     model,
-    configured: hasEnvKey,
+    fallback: useFallback,
+    fallbackModels: useFallback ? FALLBACK_MODELS : [],
+    configured: !!process.env.TOGETHER_API_KEY,
   });
 }
diff --git a/examples/togetherai-demo/app/page.tsx b/examples/togetherai-demo/app/page.tsx
index 6859084..21cf3dc 100644
--- a/examples/togetherai-demo/app/page.tsx
+++ b/examples/togetherai-demo/app/page.tsx
@@ -3,7 +3,12 @@
 import { useState, useMemo, useEffect } from "react";
 import { CopilotProvider } from "@yourgpt/copilot-sdk/react";
 import { CopilotChat } from "@yourgpt/copilot-sdk/ui";
-import { MODEL_GROUPS, ALL_MODELS, DEFAULT_MODEL } from "@/lib/models";
+import {
+  MODEL_GROUPS,
+  ALL_MODELS,
+  DEFAULT_MODEL,
+  FALLBACK_MODELS,
+} from "@/lib/models";
 import {
   ExternalLink,
   Github,
@@ -11,11 +16,13 @@ import {
   Copy,
   Check,
   ChevronDown,
+  Shield,
 } from "lucide-react";
 
 export default function TogetherAIDemo() {
   const [mounted, setMounted] = useState(false);
   const [selectedModel, setSelectedModel] = useState(DEFAULT_MODEL);
+  const [fallbackEnabled, setFallbackEnabled] = useState(false);
   const [copied, setCopied] = useState(false);
 
   useEffect(() => {
@@ -31,8 +38,9 @@ export default function TogetherAIDemo() {
   const runtimeUrl = useMemo(() => {
     const params = new URLSearchParams();
     params.set("model", selectedModel);
+    if (fallbackEnabled) params.set("fallback", "true");
     return `/api/chat?${params.toString()}`;
-  }, [selectedModel]);
+  }, [selectedModel, fallbackEnabled]);
 
   const selectedModelInfo = ALL_MODELS.find((m) => m.id === selectedModel);
 
@@ -96,6 +104,63 @@ export default function TogetherAIDemo() {
           )}
         </div>
 
+        {/* Fallback Chain */}
+        <div className="p-5 border-b border-border">
+          <div className="flex items-center justify-between mb-3">
+            <label className="text-xs font-medium text-muted-foreground uppercase tracking-wider">
+              Fallback Chain
+            </label>
+            <button
+              onClick={() => setFallbackEnabled(!fallbackEnabled)}
+              className={`relative w-9 h-5 rounded-full transition-colors ${
+                fallbackEnabled ? "bg-primary" : "bg-muted"
+              }`}
+            >
+              <span
+                className={`absolute top-0.5 left-0.5 w-4 h-4 rounded-full bg-white transition-transform ${
+                  fallbackEnabled ? "translate-x-4" : "translate-x-0"
+                }`}
+              />
+            </button>
+          </div>
+          {fallbackEnabled ? (
+            <div className="space-y-1.5">
+              <div className="flex items-center gap-2 text-xs text-emerald-400">
+                <Shield className="h-3 w-3" />
+                <span>Auto-failover enabled</span>
+              </div>
+              <p className="text-[11px] text-muted-foreground leading-relaxed">
+                If the primary model fails, the request automatically falls
+                through to the next model in the chain:
+              </p>
+              <div className="mt-2 space-y-1">
+                {FALLBACK_MODELS.map((id, i) => (
+                  <div
+                    key={id}
+                    className={`flex items-center gap-2 text-[11px] font-mono ${
+                      id === selectedModel
+                        ? "text-primary"
+                        : "text-muted-foreground"
+                    }`}
+                  >
+                    <span className="w-4 text-right text-[10px] opacity-50">
+                      {i + 1}.
+                    </span>
+                    <span className="truncate">
+                      {id === selectedModel ? `${id} (primary)` : id}
+                    </span>
+                  </div>
+                ))}
+              </div>
+            </div>
+          ) : (
+            <p className="text-[11px] text-muted-foreground">
+              Enable to automatically try backup models when the primary model
+              is unavailable or rate-limited.
+            </p>
+          )}
+        </div>
+
         {/* Setup Guide */}
         <div className="p-5 flex-1">
           <label className="text-xs font-medium text-muted-foreground uppercase tracking-wider mb-3 block">
@@ -194,7 +259,7 @@ export default function TogetherAIDemo() {
       {/* Right Side - Chat */}
       <main className="flex-1 min-w-0">
         <CopilotProvider
-          key={selectedModel}
+          key={`${selectedModel}-${fallbackEnabled}`}
           runtimeUrl={runtimeUrl}
           maxIterations={5}
         >
diff --git a/examples/togetherai-demo/lib/models.ts b/examples/togetherai-demo/lib/models.ts
index 3ab9c69..0bc161a 100644
--- a/examples/togetherai-demo/lib/models.ts
+++ b/examples/togetherai-demo/lib/models.ts
@@ -111,3 +111,15 @@ export const ALL_MODELS: ModelOption[] = MODEL_GROUPS.flatMap((g) => g.models);
 
 // Default model
 export const DEFAULT_MODEL = "meta-llama/Llama-3.3-70B-Instruct-Turbo";
+
+/**
+ * Fallback chain — models tried in order when the primary model fails.
+ * Priority: fast turbo models first, then larger/slower ones.
+ */
+export const FALLBACK_MODELS = [
+  "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+  "deepseek-ai/DeepSeek-V3",
+  "Qwen/Qwen3.5-9B",
+  "google/gemma-4-31B-it",
+  "MiniMaxAI/MiniMax-M2.5",
+];
diff --git a/examples/togetherai-demo/test-rest.sh b/examples/togetherai-demo/test-rest.sh
new file mode 100755
index 0000000..778c853
--- /dev/null
+++ b/examples/togetherai-demo/test-rest.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+# ─────────────────────────────────────────────────────────────────
+# Together AI Demo — REST API Test
+#
+# Tests the /api/chat endpoint with multiple models via curl.
+# Requires the Next.js dev server running on port 3035.
+#
+# Usage:  ./test-rest.sh
+# ─────────────────────────────────────────────────────────────────
+
+BASE="http://localhost:3035/api/chat"
+PASS=0
+FAIL=0
+
+test_model() {
+  local model="$1"
+  local label="$2"
+  printf "  %-35s " "$label"
+
+  RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
+    "${BASE}?model=${model}" \
+    -H "Content-Type: application/json" \
+    -d '{"messages":[{"role":"user","content":"Say hello in one sentence."}],"streaming":false}' \
+    2>&1)
+
+  HTTP_CODE=$(echo "$RESPONSE" | tail -1)
+  BODY=$(echo "$RESPONSE" | sed '$d')
+
+  if [ "$HTTP_CODE" = "200" ]; then
+    # Extract content from JSON or SSE
+    CONTENT=$(echo "$BODY" | grep -o '"content":"[^"]*"' | head -1 | sed 's/"content":"//;s/"$//')
+    if [ -n "$CONTENT" ]; then
+      printf "✅ %s\n" "$(echo "$CONTENT" | head -c 60)"
+      PASS=$((PASS + 1))
+    else
+      printf "✅ (200 OK, streamed)\n"
+      PASS=$((PASS + 1))
+    fi
+  else
+    ERROR=$(echo "$BODY" | grep -o '"error":"[^"]*"' | head -1 | sed 's/"error":"//;s/"$//')
+    printf "❌ %s %s\n" "$HTTP_CODE" "$(echo "$ERROR" | head -c 50)"
+    FAIL=$((FAIL + 1))
+  fi
+}
+
+echo ""
+echo "🔬 Together AI REST API Tests"
+echo "   Endpoint: $BASE"
+echo ""
+
+# Health check
+printf "  %-35s " "GET /api/chat"
+STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$BASE")
+if [ "$STATUS" = "200" ]; then
+  printf "✅ %s\n" "$STATUS"
+  PASS=$((PASS + 1))
+else
+  printf "❌ %s\n" "$STATUS"
+  FAIL=$((FAIL + 1))
+fi
+
+echo ""
+echo "  POST /api/chat — models:"
+echo ""
+
+# Test models
+test_model "meta-llama/Llama-3.3-70B-Instruct-Turbo"  "Llama 3.3 70B Turbo"
+test_model "deepseek-ai/DeepSeek-V3"                   "DeepSeek V3"
+test_model "deepseek-ai/DeepSeek-V3.1"                 "DeepSeek V3.1"
+test_model "Qwen/Qwen3.5-9B"                           "Qwen 3.5 9B"
+test_model "google/gemma-4-31B-it"                      "Gemma 4 31B"
+test_model "openai/gpt-oss-120b"                        "GPT OSS 120B"
+test_model "MiniMaxAI/MiniMax-M2.5"                     "MiniMax M2.5"
+test_model "moonshotai/Kimi-K2.5"                       "Kimi K2.5"
+test_model "zai-org/GLM-5.1"                            "GLM-5.1"
+
+echo ""
+echo "═══════════════════════════════════════════════════════"
+echo "  ✅ Passed: $PASS  ❌ Failed: $FAIL"
+echo "═══════════════════════════════════════════════════════"
+echo ""
diff --git a/package.json b/package.json
index 0125be4..58ae48a 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "yourgpt-copilot",
-  "version": "2.0.1",
+  "version": "2.5.0",
   "private": true,
   "description": "Open-source SDK for building AI assistants",
   "repository": {
diff --git a/packages/copilot-sdk/package.json b/packages/copilot-sdk/package.json
index 78848ed..ae0c793 100644
--- a/packages/copilot-sdk/package.json
+++ b/packages/copilot-sdk/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@yourgpt/copilot-sdk",
-  "version": "2.1.8",
+  "version": "2.5.0",
   "description": "Copilot SDK for building Production-ready AI Copilots for any product. Connect any LLM, deploy on your infrastructure, own your data.",
   "type": "module",
   "types": "./dist/core/index.d.ts",
diff --git a/packages/copilot-sdk/src/chat/classes/AbstractChat.ts b/packages/copilot-sdk/src/chat/classes/AbstractChat.ts
index fd2c05f..da38799 100644
--- a/packages/copilot-sdk/src/chat/classes/AbstractChat.ts
+++ b/packages/copilot-sdk/src/chat/classes/AbstractChat.ts
@@ -1252,14 +1252,14 @@ export class AbstractChat<T extends UIMessage = UIMessage> {
                 )
               )
                 continue;
-              // Skip tool result messages for client-side tools — client already executed them
-              if (
-                msg.role === "tool" &&
-                msg.tool_call_id &&
-                pendingIds.has(msg.tool_call_id)
-              )
-                continue;
-              // Everything else (server tool results) needs inserting
+              // Skip tool result messages — both client-side (already executed) and
+              // server-side (already represented in streamed metadata.toolExecutions).
+              // Server-side tool results are orphaned here because the corresponding
+              // assistant message with tool_calls was skipped above, so inserting
+              // just the tool result causes "role tool must follow tool_calls" errors
+              // and duplicate tool cards in the UI.
+              if (msg.role === "tool" && msg.tool_call_id) continue;
+              // Everything else needs inserting
               messagesToInsert.push({
                 id: generateMessageId(),
                 role: msg.role as T["role"],
diff --git a/packages/llm-sdk/package.json b/packages/llm-sdk/package.json
index 534a840..2f2d627 100644
--- a/packages/llm-sdk/package.json
+++ b/packages/llm-sdk/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@yourgpt/llm-sdk",
-  "version": "2.1.8",
+  "version": "2.5.0",
   "description": "AI SDK for building AI Agents with any LLM",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",