From 4562116739c937e94b5a2b6fc7581230c10baec6 Mon Sep 17 00:00:00 2001 From: Sahil Date: Fri, 10 Apr 2026 12:52:42 +0530 Subject: [PATCH 1/4] feat(togetherai): add fallback chain support, update docs with createRuntime pattern - Add fallback chain with priority strategy + retry logic to demo API route - Add fallback toggle UI in sidebar showing chain order - Update docs: createTogetherAI + createRuntime as primary pattern, fallback chain section, cleaned up model list - Add REST test script for all models - Bump llm-sdk to 2.1.9 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../content/docs/providers/togetherai.mdx | 152 +++++++++++++----- .../togetherai-demo/app/api/chat/route.ts | 51 +++++- examples/togetherai-demo/app/page.tsx | 71 +++++++- examples/togetherai-demo/lib/models.ts | 12 ++ examples/togetherai-demo/test-rest.sh | 81 ++++++++++ packages/llm-sdk/package.json | 2 +- 6 files changed, 317 insertions(+), 52 deletions(-) create mode 100755 examples/togetherai-demo/test-rest.sh diff --git a/apps/docs/content/docs/providers/togetherai.mdx b/apps/docs/content/docs/providers/togetherai.mdx index 320484e..b7e43f7 100644 --- a/apps/docs/content/docs/providers/togetherai.mdx +++ b/apps/docs/content/docs/providers/togetherai.mdx @@ -31,26 +31,49 @@ Sign up and get your API key at [api.together.xyz/settings/api-keys](https://api TOGETHER_API_KEY=your-key-here ``` -### 4. Streaming API route +### 4. Create runtime API route ```ts title="app/api/chat/route.ts" -import { streamText } from '@yourgpt/llm-sdk'; -import { togetherai } from '@yourgpt/llm-sdk/togetherai'; +import { createRuntime } from '@yourgpt/llm-sdk'; +import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai'; -export async function POST(req: Request) { - const { messages } = await req.json(); +const together = createTogetherAI({ + apiKey: process.env.TOGETHER_API_KEY, +}); - const result = await streamText({ - model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'), - system: 'You are a helpful assistant.', - messages, - }); +const runtime = createRuntime({ + provider: together, + model: 'meta-llama/Llama-3.3-70B-Instruct-Turbo', + systemPrompt: 'You are a helpful assistant.', +}); - return result.toTextStreamResponse(); +export async function POST(request: Request) { + return runtime.handleRequest(request); } ``` -### 5. Generate text +### 5. Connect Copilot UI + +```tsx title="app/page.tsx" +'use client'; + +import { CopilotProvider } from '@yourgpt/copilot-sdk/react'; +import { CopilotChat } from '@yourgpt/copilot-sdk/ui'; + +export default function Page() { + return ( + + + + ); +} +``` + +--- + +## Modern Pattern (Direct) + +For simpler use cases without the runtime, use `togetherai()` directly with `generateText` or `streamText`: ```ts import { generateText } from '@yourgpt/llm-sdk'; @@ -64,6 +87,19 @@ const result = await generateText({ console.log(result.text); ``` +```ts +import { streamText } from '@yourgpt/llm-sdk'; +import { togetherai } from '@yourgpt/llm-sdk/togetherai'; + +const result = await streamText({ + model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'), + system: 'You are a helpful assistant.', + messages, +}); + +return result.toTextStreamResponse(); +``` + --- ## Available Models @@ -76,9 +112,6 @@ togetherai('deepseek-ai/DeepSeek-R1') // reasoning model // Llama togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo') // 131K ctx, fast -togetherai('meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo') // 130K ctx -togetherai('meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo') -togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo') // Qwen togetherai('Qwen/Qwen3.5-397B-A17B') // 262K ctx @@ -87,11 +120,10 @@ togetherai('Qwen/Qwen3.5-9B') // Gemma togetherai('google/gemma-4-31B-it') -// Kimi +// Other +togetherai('openai/gpt-oss-120b') togetherai('moonshotai/Kimi-K2.5') // 262K ctx - -// GLM -togetherai('zai-org/GLM-5.1') // 202K ctx +togetherai('MiniMaxAI/MiniMax-M2.5') ``` Any model ID listed on [together.ai/models](https://api.together.xyz/models) works. @@ -101,21 +133,79 @@ Any model ID listed on [together.ai/models](https://api.together.xyz/models) wor ## Configuration ```ts -import { togetherai } from '@yourgpt/llm-sdk/togetherai'; +import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai'; -// Explicit API key -const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', { +// With explicit API key +const together = createTogetherAI({ apiKey: 'your-key', }); // Custom base URL (e.g. self-hosted or proxy) +const together = createTogetherAI({ + apiKey: 'your-key', + baseUrl: 'https://my-proxy.example.com/v1', +}); +``` + +Or with the modern pattern: + +```ts +import { togetherai } from '@yourgpt/llm-sdk/togetherai'; + const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', { + apiKey: 'your-key', baseURL: 'https://my-proxy.example.com/v1', }); ``` --- +## Fallback Chain + +Automatically fail over to backup models when the primary is unavailable or rate-limited: + +```ts title="app/api/chat/route.ts" +import { createRuntime } from '@yourgpt/llm-sdk'; +import { createFallbackChain } from '@yourgpt/llm-sdk/fallback'; +import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai'; + +const together = createTogetherAI({ + apiKey: process.env.TOGETHER_API_KEY, +}); + +const chain = createFallbackChain({ + models: [ + together.languageModel('meta-llama/Llama-3.3-70B-Instruct-Turbo'), + together.languageModel('deepseek-ai/DeepSeek-V3'), + together.languageModel('Qwen/Qwen3.5-9B'), + together.languageModel('google/gemma-4-31B-it'), + ], + strategy: 'priority', + retries: 1, + retryDelay: 500, + retryBackoff: 'exponential', + onFallback: ({ attemptedModel, nextModel, error }) => { + console.warn(`[fallback] ${attemptedModel} → ${nextModel} | ${error.message}`); + }, +}); + +const runtime = createRuntime({ + adapter: chain, + systemPrompt: 'You are a helpful assistant.', +}); + +export async function POST(request: Request) { + return runtime.handleRequest(request); +} +``` + + +With `strategy: 'priority'`, the first model handles all traffic until it fails. +Use `strategy: 'round-robin'` to distribute load evenly across models. + + +--- + ## Tool Calling Many Together AI models support tool calling: @@ -145,24 +235,6 @@ const result = await generateText({ --- -## With Copilot UI - -```tsx title="app/providers.tsx" -'use client'; - -import { CopilotProvider } from '@yourgpt/copilot-sdk/react'; - -export function Providers({ children }: { children: React.ReactNode }) { - return ( - - {children} - - ); -} -``` - ---- - ## Next Steps - [Fireworks](/docs/providers/fireworks) - Another fast open-source model platform diff --git a/examples/togetherai-demo/app/api/chat/route.ts b/examples/togetherai-demo/app/api/chat/route.ts index c19fbf8..6a45d57 100644 --- a/examples/togetherai-demo/app/api/chat/route.ts +++ b/examples/togetherai-demo/app/api/chat/route.ts @@ -1,6 +1,7 @@ import { createRuntime } from "@yourgpt/llm-sdk"; +import { createFallbackChain } from "@yourgpt/llm-sdk/fallback"; import { createTogetherAI } from "@yourgpt/llm-sdk/togetherai"; -import { DEFAULT_MODEL } from "@/lib/models"; +import { DEFAULT_MODEL, FALLBACK_MODELS } from "@/lib/models"; const SYSTEM_PROMPT = `You are a helpful AI assistant powered by Together AI. You have access to many different open-source AI models and can help with a wide variety of tasks. @@ -12,6 +13,7 @@ export async function POST(request: Request) { // Get model from query param const model = url.searchParams.get("model") || DEFAULT_MODEL; + const useFallback = url.searchParams.get("fallback") === "true"; // Get API key from environment const apiKey = process.env.TOGETHER_API_KEY; @@ -26,10 +28,43 @@ export async function POST(request: Request) { ); } - // Create Together AI provider const together = createTogetherAI({ apiKey }); - // Create runtime with the selected model + if (useFallback) { + // Fallback chain: primary model → fallback models + const fallbackModelIds = FALLBACK_MODELS.filter((id) => id !== model); + const models = [model, ...fallbackModelIds].map((id) => + together.languageModel(id), + ); + + const chain = createFallbackChain({ + models, + strategy: "priority", + retries: 1, + retryDelay: 500, + retryBackoff: "exponential", + onRetry: ({ model, retryAttempt, maxRetries, delayMs, error }) => { + console.warn( + `[retry] ${model} attempt ${retryAttempt}/${maxRetries} — waiting ${delayMs}ms | ${(error as Error).message}`, + ); + }, + onFallback: ({ attemptedModel, nextModel, error, attempt }) => { + console.warn( + `[fallback] attempt ${attempt}: ${attemptedModel} → ${nextModel} | ${(error as Error).message}`, + ); + }, + }); + + const runtime = createRuntime({ + adapter: chain, + systemPrompt: SYSTEM_PROMPT, + debug: process.env.NODE_ENV === "development", + }); + + return await runtime.handleRequest(request); + } + + // Single model (no fallback) const runtime = createRuntime({ provider: together, model, @@ -37,8 +72,7 @@ export async function POST(request: Request) { debug: process.env.NODE_ENV === "development", }); - const response = await runtime.handleRequest(request); - return response; + return await runtime.handleRequest(request); } catch (error) { console.error("[Chat Route] Error:", error); return Response.json( @@ -51,13 +85,14 @@ export async function POST(request: Request) { export async function GET(request: Request) { const url = new URL(request.url); const model = url.searchParams.get("model") || DEFAULT_MODEL; - - const hasEnvKey = !!process.env.TOGETHER_API_KEY; + const useFallback = url.searchParams.get("fallback") === "true"; return Response.json({ status: "ok", provider: "togetherai", model, - configured: hasEnvKey, + fallback: useFallback, + fallbackModels: useFallback ? FALLBACK_MODELS : [], + configured: !!process.env.TOGETHER_API_KEY, }); } diff --git a/examples/togetherai-demo/app/page.tsx b/examples/togetherai-demo/app/page.tsx index 6859084..21cf3dc 100644 --- a/examples/togetherai-demo/app/page.tsx +++ b/examples/togetherai-demo/app/page.tsx @@ -3,7 +3,12 @@ import { useState, useMemo, useEffect } from "react"; import { CopilotProvider } from "@yourgpt/copilot-sdk/react"; import { CopilotChat } from "@yourgpt/copilot-sdk/ui"; -import { MODEL_GROUPS, ALL_MODELS, DEFAULT_MODEL } from "@/lib/models"; +import { + MODEL_GROUPS, + ALL_MODELS, + DEFAULT_MODEL, + FALLBACK_MODELS, +} from "@/lib/models"; import { ExternalLink, Github, @@ -11,11 +16,13 @@ import { Copy, Check, ChevronDown, + Shield, } from "lucide-react"; export default function TogetherAIDemo() { const [mounted, setMounted] = useState(false); const [selectedModel, setSelectedModel] = useState(DEFAULT_MODEL); + const [fallbackEnabled, setFallbackEnabled] = useState(false); const [copied, setCopied] = useState(false); useEffect(() => { @@ -31,8 +38,9 @@ export default function TogetherAIDemo() { const runtimeUrl = useMemo(() => { const params = new URLSearchParams(); params.set("model", selectedModel); + if (fallbackEnabled) params.set("fallback", "true"); return `/api/chat?${params.toString()}`; - }, [selectedModel]); + }, [selectedModel, fallbackEnabled]); const selectedModelInfo = ALL_MODELS.find((m) => m.id === selectedModel); @@ -96,6 +104,63 @@ export default function TogetherAIDemo() { )} + {/* Fallback Chain */} +
+
+ + +
+ {fallbackEnabled ? ( +
+
+ + Auto-failover enabled +
+

+ If the primary model fails, the request automatically falls + through to the next model in the chain: +

+
+ {FALLBACK_MODELS.map((id, i) => ( +
+ + {i + 1}. + + + {id === selectedModel ? `${id} (primary)` : id} + +
+ ))} +
+
+ ) : ( +

+ Enable to automatically try backup models when the primary model + is unavailable or rate-limited. +

+ )} +
+ {/* Setup Guide */}