diff --git a/apps/docs/content/docs/providers/togetherai.mdx b/apps/docs/content/docs/providers/togetherai.mdx
index 320484e..b7e43f7 100644
--- a/apps/docs/content/docs/providers/togetherai.mdx
+++ b/apps/docs/content/docs/providers/togetherai.mdx
@@ -31,26 +31,49 @@ Sign up and get your API key at [api.together.xyz/settings/api-keys](https://api
TOGETHER_API_KEY=your-key-here
```
-### 4. Streaming API route
+### 4. Create runtime API route
```ts title="app/api/chat/route.ts"
-import { streamText } from '@yourgpt/llm-sdk';
-import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+import { createRuntime } from '@yourgpt/llm-sdk';
+import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai';
-export async function POST(req: Request) {
- const { messages } = await req.json();
+const together = createTogetherAI({
+ apiKey: process.env.TOGETHER_API_KEY,
+});
- const result = await streamText({
- model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'),
- system: 'You are a helpful assistant.',
- messages,
- });
+const runtime = createRuntime({
+ provider: together,
+ model: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
+ systemPrompt: 'You are a helpful assistant.',
+});
- return result.toTextStreamResponse();
+export async function POST(request: Request) {
+ return runtime.handleRequest(request);
}
```
-### 5. Generate text
+### 5. Connect Copilot UI
+
+```tsx title="app/page.tsx"
+'use client';
+
+import { CopilotProvider } from '@yourgpt/copilot-sdk/react';
+import { CopilotChat } from '@yourgpt/copilot-sdk/ui';
+
+export default function Page() {
+ return (
+
+
+
+ );
+}
+```
+
+---
+
+## Modern Pattern (Direct)
+
+For simpler use cases without the runtime, use `togetherai()` directly with `generateText` or `streamText`:
```ts
import { generateText } from '@yourgpt/llm-sdk';
@@ -64,6 +87,19 @@ const result = await generateText({
console.log(result.text);
```
+```ts
+import { streamText } from '@yourgpt/llm-sdk';
+import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+
+const result = await streamText({
+ model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'),
+ system: 'You are a helpful assistant.',
+ messages,
+});
+
+return result.toTextStreamResponse();
+```
+
---
## Available Models
@@ -76,9 +112,6 @@ togetherai('deepseek-ai/DeepSeek-R1') // reasoning model
// Llama
togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo') // 131K ctx, fast
-togetherai('meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo') // 130K ctx
-togetherai('meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo')
-togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo')
// Qwen
togetherai('Qwen/Qwen3.5-397B-A17B') // 262K ctx
@@ -87,11 +120,10 @@ togetherai('Qwen/Qwen3.5-9B')
// Gemma
togetherai('google/gemma-4-31B-it')
-// Kimi
+// Other
+togetherai('openai/gpt-oss-120b')
togetherai('moonshotai/Kimi-K2.5') // 262K ctx
-
-// GLM
-togetherai('zai-org/GLM-5.1') // 202K ctx
+togetherai('MiniMaxAI/MiniMax-M2.5')
```
Any model ID listed on [together.ai/models](https://api.together.xyz/models) works.
@@ -101,21 +133,79 @@ Any model ID listed on [together.ai/models](https://api.together.xyz/models) wor
## Configuration
```ts
-import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai';
-// Explicit API key
-const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', {
+// With explicit API key
+const together = createTogetherAI({
apiKey: 'your-key',
});
// Custom base URL (e.g. self-hosted or proxy)
+const together = createTogetherAI({
+ apiKey: 'your-key',
+ baseUrl: 'https://my-proxy.example.com/v1',
+});
+```
+
+Or with the modern pattern:
+
+```ts
+import { togetherai } from '@yourgpt/llm-sdk/togetherai';
+
const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', {
+ apiKey: 'your-key',
baseURL: 'https://my-proxy.example.com/v1',
});
```
---
+## Fallback Chain
+
+Automatically fail over to backup models when the primary is unavailable or rate-limited:
+
+```ts title="app/api/chat/route.ts"
+import { createRuntime } from '@yourgpt/llm-sdk';
+import { createFallbackChain } from '@yourgpt/llm-sdk/fallback';
+import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai';
+
+const together = createTogetherAI({
+ apiKey: process.env.TOGETHER_API_KEY,
+});
+
+const chain = createFallbackChain({
+ models: [
+ together.languageModel('meta-llama/Llama-3.3-70B-Instruct-Turbo'),
+ together.languageModel('deepseek-ai/DeepSeek-V3'),
+ together.languageModel('Qwen/Qwen3.5-9B'),
+ together.languageModel('google/gemma-4-31B-it'),
+ ],
+ strategy: 'priority',
+ retries: 1,
+ retryDelay: 500,
+ retryBackoff: 'exponential',
+ onFallback: ({ attemptedModel, nextModel, error }) => {
+ console.warn(`[fallback] ${attemptedModel} → ${nextModel} | ${error.message}`);
+ },
+});
+
+const runtime = createRuntime({
+ adapter: chain,
+ systemPrompt: 'You are a helpful assistant.',
+});
+
+export async function POST(request: Request) {
+ return runtime.handleRequest(request);
+}
+```
+
+
+With `strategy: 'priority'`, the first model handles all traffic until it fails.
+Use `strategy: 'round-robin'` to distribute load evenly across models.
+
+
+---
+
## Tool Calling
Many Together AI models support tool calling:
@@ -145,24 +235,6 @@ const result = await generateText({
---
-## With Copilot UI
-
-```tsx title="app/providers.tsx"
-'use client';
-
-import { CopilotProvider } from '@yourgpt/copilot-sdk/react';
-
-export function Providers({ children }: { children: React.ReactNode }) {
- return (
-
- {children}
-
- );
-}
-```
-
----
-
## Next Steps
- [Fireworks](/docs/providers/fireworks) - Another fast open-source model platform
diff --git a/examples/playground/app/api/yourgpt-server/route.ts b/examples/playground/app/api/yourgpt-server/route.ts
deleted file mode 100644
index 683b478..0000000
--- a/examples/playground/app/api/yourgpt-server/route.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Proxy to local yourgpt-server-demo for testing SDK stream/non-stream endpoints.
- *
- * Routes based on `streaming` field in the request body:
- * streaming: true → /api/copilot/stream (SSE)
- * streaming: false → /api/copilot/chat (JSON)
- *
- * Set YOURGPT_SERVER_URL in .env.local to point at your local server.
- * Default: http://localhost:3001
- */
-
-const SERVER_URL = process.env.YOURGPT_SERVER_URL || "http://localhost:3001";
-
-export async function POST(request: Request) {
- const body = await request.json();
- const isStreaming = body.streaming !== false;
- const endpoint = isStreaming ? "/api/copilot/stream" : "/api/copilot/chat";
- const targetUrl = `${SERVER_URL}${endpoint}`;
-
- const upstream = await fetch(targetUrl, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify(body),
- });
-
- // Pass the response body (streamed or JSON) straight through
- return new Response(upstream.body, {
- status: upstream.status,
- headers: {
- "Content-Type":
- upstream.headers.get("Content-Type") ?? "application/json",
- // Forward cache-control so SSE isn't buffered
- "Cache-Control": "no-cache",
- "X-Accel-Buffering": "no",
- },
- });
-}
diff --git a/examples/playground/app/page.tsx b/examples/playground/app/page.tsx
index 5d6dc05..ba2d863 100644
--- a/examples/playground/app/page.tsx
+++ b/examples/playground/app/page.tsx
@@ -119,8 +119,7 @@ export default function PlaygroundPage() {
}, [actions]);
// Derived state
- const hasApiKey =
- selectedProvider === "yourgpt-server" || !!apiKeys[selectedProvider];
+ const hasApiKey = !!apiKeys[selectedProvider];
// Don't render until mounted (avoid hydration issues)
if (!mounted) return null;
diff --git a/examples/playground/lib/constants.ts b/examples/playground/lib/constants.ts
index 71ee2f5..4d4f1b8 100644
--- a/examples/playground/lib/constants.ts
+++ b/examples/playground/lib/constants.ts
@@ -102,18 +102,6 @@ export const providers: ProviderConfig[] = [
createProvider: "createOpenRouter",
importPath: "@yourgpt/llm-sdk/openrouter",
},
- {
- id: "yourgpt-server",
- name: "YourGPT Server",
- model: "local demo",
- color: "#f59e0b",
- keyPlaceholder: "",
- keyLink: "",
- keyLinkText: "",
- envVar: "",
- createProvider: "",
- importPath: "",
- },
];
// Sample person data for useAIContext demo
@@ -173,7 +161,6 @@ export const INITIAL_API_KEYS: ApiKeys = {
google: "",
xai: "",
openrouter: "",
- "yourgpt-server": "",
};
// OpenRouter model options for the model selector (static fallback)
diff --git a/examples/playground/lib/types.ts b/examples/playground/lib/types.ts
index a31fd21..5129216 100644
--- a/examples/playground/lib/types.ts
+++ b/examples/playground/lib/types.ts
@@ -35,7 +35,6 @@ export interface ApiKeys {
google: string;
xai: string;
openrouter: string;
- "yourgpt-server"?: string;
}
export type ProviderId =
@@ -43,8 +42,7 @@ export type ProviderId =
| "anthropic"
| "google"
| "xai"
- | "openrouter"
- | "yourgpt-server";
+ | "openrouter";
export interface ProviderConfig {
id: ProviderId;
diff --git a/examples/togetherai-demo/app/api/chat/route.ts b/examples/togetherai-demo/app/api/chat/route.ts
index c19fbf8..6a45d57 100644
--- a/examples/togetherai-demo/app/api/chat/route.ts
+++ b/examples/togetherai-demo/app/api/chat/route.ts
@@ -1,6 +1,7 @@
import { createRuntime } from "@yourgpt/llm-sdk";
+import { createFallbackChain } from "@yourgpt/llm-sdk/fallback";
import { createTogetherAI } from "@yourgpt/llm-sdk/togetherai";
-import { DEFAULT_MODEL } from "@/lib/models";
+import { DEFAULT_MODEL, FALLBACK_MODELS } from "@/lib/models";
const SYSTEM_PROMPT = `You are a helpful AI assistant powered by Together AI.
You have access to many different open-source AI models and can help with a wide variety of tasks.
@@ -12,6 +13,7 @@ export async function POST(request: Request) {
// Get model from query param
const model = url.searchParams.get("model") || DEFAULT_MODEL;
+ const useFallback = url.searchParams.get("fallback") === "true";
// Get API key from environment
const apiKey = process.env.TOGETHER_API_KEY;
@@ -26,10 +28,43 @@ export async function POST(request: Request) {
);
}
- // Create Together AI provider
const together = createTogetherAI({ apiKey });
- // Create runtime with the selected model
+ if (useFallback) {
+ // Fallback chain: primary model → fallback models
+ const fallbackModelIds = FALLBACK_MODELS.filter((id) => id !== model);
+ const models = [model, ...fallbackModelIds].map((id) =>
+ together.languageModel(id),
+ );
+
+ const chain = createFallbackChain({
+ models,
+ strategy: "priority",
+ retries: 1,
+ retryDelay: 500,
+ retryBackoff: "exponential",
+ onRetry: ({ model, retryAttempt, maxRetries, delayMs, error }) => {
+ console.warn(
+ `[retry] ${model} attempt ${retryAttempt}/${maxRetries} — waiting ${delayMs}ms | ${(error as Error).message}`,
+ );
+ },
+ onFallback: ({ attemptedModel, nextModel, error, attempt }) => {
+ console.warn(
+ `[fallback] attempt ${attempt}: ${attemptedModel} → ${nextModel} | ${(error as Error).message}`,
+ );
+ },
+ });
+
+ const runtime = createRuntime({
+ adapter: chain,
+ systemPrompt: SYSTEM_PROMPT,
+ debug: process.env.NODE_ENV === "development",
+ });
+
+ return await runtime.handleRequest(request);
+ }
+
+ // Single model (no fallback)
const runtime = createRuntime({
provider: together,
model,
@@ -37,8 +72,7 @@ export async function POST(request: Request) {
debug: process.env.NODE_ENV === "development",
});
- const response = await runtime.handleRequest(request);
- return response;
+ return await runtime.handleRequest(request);
} catch (error) {
console.error("[Chat Route] Error:", error);
return Response.json(
@@ -51,13 +85,14 @@ export async function POST(request: Request) {
export async function GET(request: Request) {
const url = new URL(request.url);
const model = url.searchParams.get("model") || DEFAULT_MODEL;
-
- const hasEnvKey = !!process.env.TOGETHER_API_KEY;
+ const useFallback = url.searchParams.get("fallback") === "true";
return Response.json({
status: "ok",
provider: "togetherai",
model,
- configured: hasEnvKey,
+ fallback: useFallback,
+ fallbackModels: useFallback ? FALLBACK_MODELS : [],
+ configured: !!process.env.TOGETHER_API_KEY,
});
}
diff --git a/examples/togetherai-demo/app/page.tsx b/examples/togetherai-demo/app/page.tsx
index 6859084..21cf3dc 100644
--- a/examples/togetherai-demo/app/page.tsx
+++ b/examples/togetherai-demo/app/page.tsx
@@ -3,7 +3,12 @@
import { useState, useMemo, useEffect } from "react";
import { CopilotProvider } from "@yourgpt/copilot-sdk/react";
import { CopilotChat } from "@yourgpt/copilot-sdk/ui";
-import { MODEL_GROUPS, ALL_MODELS, DEFAULT_MODEL } from "@/lib/models";
+import {
+ MODEL_GROUPS,
+ ALL_MODELS,
+ DEFAULT_MODEL,
+ FALLBACK_MODELS,
+} from "@/lib/models";
import {
ExternalLink,
Github,
@@ -11,11 +16,13 @@ import {
Copy,
Check,
ChevronDown,
+ Shield,
} from "lucide-react";
export default function TogetherAIDemo() {
const [mounted, setMounted] = useState(false);
const [selectedModel, setSelectedModel] = useState(DEFAULT_MODEL);
+ const [fallbackEnabled, setFallbackEnabled] = useState(false);
const [copied, setCopied] = useState(false);
useEffect(() => {
@@ -31,8 +38,9 @@ export default function TogetherAIDemo() {
const runtimeUrl = useMemo(() => {
const params = new URLSearchParams();
params.set("model", selectedModel);
+ if (fallbackEnabled) params.set("fallback", "true");
return `/api/chat?${params.toString()}`;
- }, [selectedModel]);
+ }, [selectedModel, fallbackEnabled]);
const selectedModelInfo = ALL_MODELS.find((m) => m.id === selectedModel);
@@ -96,6 +104,63 @@ export default function TogetherAIDemo() {
)}
+ {/* Fallback Chain */}
+
+
+
+
+
+ {fallbackEnabled ? (
+
+
+
+ Auto-failover enabled
+
+
+ If the primary model fails, the request automatically falls
+ through to the next model in the chain:
+
+
+ {FALLBACK_MODELS.map((id, i) => (
+
+
+ {i + 1}.
+
+
+ {id === selectedModel ? `${id} (primary)` : id}
+
+
+ ))}
+
+
+ ) : (
+
+ Enable to automatically try backup models when the primary model
+ is unavailable or rate-limited.
+
+ )}
+
+
{/* Setup Guide */}