diff --git a/apps/docs/content/docs/providers/togetherai.mdx b/apps/docs/content/docs/providers/togetherai.mdx index 320484e..b7e43f7 100644 --- a/apps/docs/content/docs/providers/togetherai.mdx +++ b/apps/docs/content/docs/providers/togetherai.mdx @@ -31,26 +31,49 @@ Sign up and get your API key at [api.together.xyz/settings/api-keys](https://api TOGETHER_API_KEY=your-key-here ``` -### 4. Streaming API route +### 4. Create runtime API route ```ts title="app/api/chat/route.ts" -import { streamText } from '@yourgpt/llm-sdk'; -import { togetherai } from '@yourgpt/llm-sdk/togetherai'; +import { createRuntime } from '@yourgpt/llm-sdk'; +import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai'; -export async function POST(req: Request) { - const { messages } = await req.json(); +const together = createTogetherAI({ + apiKey: process.env.TOGETHER_API_KEY, +}); - const result = await streamText({ - model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'), - system: 'You are a helpful assistant.', - messages, - }); +const runtime = createRuntime({ + provider: together, + model: 'meta-llama/Llama-3.3-70B-Instruct-Turbo', + systemPrompt: 'You are a helpful assistant.', +}); - return result.toTextStreamResponse(); +export async function POST(request: Request) { + return runtime.handleRequest(request); } ``` -### 5. Generate text +### 5. Connect Copilot UI + +```tsx title="app/page.tsx" +'use client'; + +import { CopilotProvider } from '@yourgpt/copilot-sdk/react'; +import { CopilotChat } from '@yourgpt/copilot-sdk/ui'; + +export default function Page() { + return ( + + + + ); +} +``` + +--- + +## Modern Pattern (Direct) + +For simpler use cases without the runtime, use `togetherai()` directly with `generateText` or `streamText`: ```ts import { generateText } from '@yourgpt/llm-sdk'; @@ -64,6 +87,19 @@ const result = await generateText({ console.log(result.text); ``` +```ts +import { streamText } from '@yourgpt/llm-sdk'; +import { togetherai } from '@yourgpt/llm-sdk/togetherai'; + +const result = await streamText({ + model: togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo'), + system: 'You are a helpful assistant.', + messages, +}); + +return result.toTextStreamResponse(); +``` + --- ## Available Models @@ -76,9 +112,6 @@ togetherai('deepseek-ai/DeepSeek-R1') // reasoning model // Llama togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo') // 131K ctx, fast -togetherai('meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo') // 130K ctx -togetherai('meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo') -togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo') // Qwen togetherai('Qwen/Qwen3.5-397B-A17B') // 262K ctx @@ -87,11 +120,10 @@ togetherai('Qwen/Qwen3.5-9B') // Gemma togetherai('google/gemma-4-31B-it') -// Kimi +// Other +togetherai('openai/gpt-oss-120b') togetherai('moonshotai/Kimi-K2.5') // 262K ctx - -// GLM -togetherai('zai-org/GLM-5.1') // 202K ctx +togetherai('MiniMaxAI/MiniMax-M2.5') ``` Any model ID listed on [together.ai/models](https://api.together.xyz/models) works. @@ -101,21 +133,79 @@ Any model ID listed on [together.ai/models](https://api.together.xyz/models) wor ## Configuration ```ts -import { togetherai } from '@yourgpt/llm-sdk/togetherai'; +import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai'; -// Explicit API key -const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', { +// With explicit API key +const together = createTogetherAI({ apiKey: 'your-key', }); // Custom base URL (e.g. self-hosted or proxy) +const together = createTogetherAI({ + apiKey: 'your-key', + baseUrl: 'https://my-proxy.example.com/v1', +}); +``` + +Or with the modern pattern: + +```ts +import { togetherai } from '@yourgpt/llm-sdk/togetherai'; + const model = togetherai('meta-llama/Llama-3.3-70B-Instruct-Turbo', { + apiKey: 'your-key', baseURL: 'https://my-proxy.example.com/v1', }); ``` --- +## Fallback Chain + +Automatically fail over to backup models when the primary is unavailable or rate-limited: + +```ts title="app/api/chat/route.ts" +import { createRuntime } from '@yourgpt/llm-sdk'; +import { createFallbackChain } from '@yourgpt/llm-sdk/fallback'; +import { createTogetherAI } from '@yourgpt/llm-sdk/togetherai'; + +const together = createTogetherAI({ + apiKey: process.env.TOGETHER_API_KEY, +}); + +const chain = createFallbackChain({ + models: [ + together.languageModel('meta-llama/Llama-3.3-70B-Instruct-Turbo'), + together.languageModel('deepseek-ai/DeepSeek-V3'), + together.languageModel('Qwen/Qwen3.5-9B'), + together.languageModel('google/gemma-4-31B-it'), + ], + strategy: 'priority', + retries: 1, + retryDelay: 500, + retryBackoff: 'exponential', + onFallback: ({ attemptedModel, nextModel, error }) => { + console.warn(`[fallback] ${attemptedModel} → ${nextModel} | ${error.message}`); + }, +}); + +const runtime = createRuntime({ + adapter: chain, + systemPrompt: 'You are a helpful assistant.', +}); + +export async function POST(request: Request) { + return runtime.handleRequest(request); +} +``` + + +With `strategy: 'priority'`, the first model handles all traffic until it fails. +Use `strategy: 'round-robin'` to distribute load evenly across models. + + +--- + ## Tool Calling Many Together AI models support tool calling: @@ -145,24 +235,6 @@ const result = await generateText({ --- -## With Copilot UI - -```tsx title="app/providers.tsx" -'use client'; - -import { CopilotProvider } from '@yourgpt/copilot-sdk/react'; - -export function Providers({ children }: { children: React.ReactNode }) { - return ( - - {children} - - ); -} -``` - ---- - ## Next Steps - [Fireworks](/docs/providers/fireworks) - Another fast open-source model platform diff --git a/examples/playground/app/api/yourgpt-server/route.ts b/examples/playground/app/api/yourgpt-server/route.ts deleted file mode 100644 index 683b478..0000000 --- a/examples/playground/app/api/yourgpt-server/route.ts +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Proxy to local yourgpt-server-demo for testing SDK stream/non-stream endpoints. - * - * Routes based on `streaming` field in the request body: - * streaming: true → /api/copilot/stream (SSE) - * streaming: false → /api/copilot/chat (JSON) - * - * Set YOURGPT_SERVER_URL in .env.local to point at your local server. - * Default: http://localhost:3001 - */ - -const SERVER_URL = process.env.YOURGPT_SERVER_URL || "http://localhost:3001"; - -export async function POST(request: Request) { - const body = await request.json(); - const isStreaming = body.streaming !== false; - const endpoint = isStreaming ? "/api/copilot/stream" : "/api/copilot/chat"; - const targetUrl = `${SERVER_URL}${endpoint}`; - - const upstream = await fetch(targetUrl, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(body), - }); - - // Pass the response body (streamed or JSON) straight through - return new Response(upstream.body, { - status: upstream.status, - headers: { - "Content-Type": - upstream.headers.get("Content-Type") ?? "application/json", - // Forward cache-control so SSE isn't buffered - "Cache-Control": "no-cache", - "X-Accel-Buffering": "no", - }, - }); -} diff --git a/examples/playground/app/page.tsx b/examples/playground/app/page.tsx index 5d6dc05..ba2d863 100644 --- a/examples/playground/app/page.tsx +++ b/examples/playground/app/page.tsx @@ -119,8 +119,7 @@ export default function PlaygroundPage() { }, [actions]); // Derived state - const hasApiKey = - selectedProvider === "yourgpt-server" || !!apiKeys[selectedProvider]; + const hasApiKey = !!apiKeys[selectedProvider]; // Don't render until mounted (avoid hydration issues) if (!mounted) return null; diff --git a/examples/playground/lib/constants.ts b/examples/playground/lib/constants.ts index 71ee2f5..4d4f1b8 100644 --- a/examples/playground/lib/constants.ts +++ b/examples/playground/lib/constants.ts @@ -102,18 +102,6 @@ export const providers: ProviderConfig[] = [ createProvider: "createOpenRouter", importPath: "@yourgpt/llm-sdk/openrouter", }, - { - id: "yourgpt-server", - name: "YourGPT Server", - model: "local demo", - color: "#f59e0b", - keyPlaceholder: "", - keyLink: "", - keyLinkText: "", - envVar: "", - createProvider: "", - importPath: "", - }, ]; // Sample person data for useAIContext demo @@ -173,7 +161,6 @@ export const INITIAL_API_KEYS: ApiKeys = { google: "", xai: "", openrouter: "", - "yourgpt-server": "", }; // OpenRouter model options for the model selector (static fallback) diff --git a/examples/playground/lib/types.ts b/examples/playground/lib/types.ts index a31fd21..5129216 100644 --- a/examples/playground/lib/types.ts +++ b/examples/playground/lib/types.ts @@ -35,7 +35,6 @@ export interface ApiKeys { google: string; xai: string; openrouter: string; - "yourgpt-server"?: string; } export type ProviderId = @@ -43,8 +42,7 @@ export type ProviderId = | "anthropic" | "google" | "xai" - | "openrouter" - | "yourgpt-server"; + | "openrouter"; export interface ProviderConfig { id: ProviderId; diff --git a/examples/togetherai-demo/app/api/chat/route.ts b/examples/togetherai-demo/app/api/chat/route.ts index c19fbf8..6a45d57 100644 --- a/examples/togetherai-demo/app/api/chat/route.ts +++ b/examples/togetherai-demo/app/api/chat/route.ts @@ -1,6 +1,7 @@ import { createRuntime } from "@yourgpt/llm-sdk"; +import { createFallbackChain } from "@yourgpt/llm-sdk/fallback"; import { createTogetherAI } from "@yourgpt/llm-sdk/togetherai"; -import { DEFAULT_MODEL } from "@/lib/models"; +import { DEFAULT_MODEL, FALLBACK_MODELS } from "@/lib/models"; const SYSTEM_PROMPT = `You are a helpful AI assistant powered by Together AI. You have access to many different open-source AI models and can help with a wide variety of tasks. @@ -12,6 +13,7 @@ export async function POST(request: Request) { // Get model from query param const model = url.searchParams.get("model") || DEFAULT_MODEL; + const useFallback = url.searchParams.get("fallback") === "true"; // Get API key from environment const apiKey = process.env.TOGETHER_API_KEY; @@ -26,10 +28,43 @@ export async function POST(request: Request) { ); } - // Create Together AI provider const together = createTogetherAI({ apiKey }); - // Create runtime with the selected model + if (useFallback) { + // Fallback chain: primary model → fallback models + const fallbackModelIds = FALLBACK_MODELS.filter((id) => id !== model); + const models = [model, ...fallbackModelIds].map((id) => + together.languageModel(id), + ); + + const chain = createFallbackChain({ + models, + strategy: "priority", + retries: 1, + retryDelay: 500, + retryBackoff: "exponential", + onRetry: ({ model, retryAttempt, maxRetries, delayMs, error }) => { + console.warn( + `[retry] ${model} attempt ${retryAttempt}/${maxRetries} — waiting ${delayMs}ms | ${(error as Error).message}`, + ); + }, + onFallback: ({ attemptedModel, nextModel, error, attempt }) => { + console.warn( + `[fallback] attempt ${attempt}: ${attemptedModel} → ${nextModel} | ${(error as Error).message}`, + ); + }, + }); + + const runtime = createRuntime({ + adapter: chain, + systemPrompt: SYSTEM_PROMPT, + debug: process.env.NODE_ENV === "development", + }); + + return await runtime.handleRequest(request); + } + + // Single model (no fallback) const runtime = createRuntime({ provider: together, model, @@ -37,8 +72,7 @@ export async function POST(request: Request) { debug: process.env.NODE_ENV === "development", }); - const response = await runtime.handleRequest(request); - return response; + return await runtime.handleRequest(request); } catch (error) { console.error("[Chat Route] Error:", error); return Response.json( @@ -51,13 +85,14 @@ export async function POST(request: Request) { export async function GET(request: Request) { const url = new URL(request.url); const model = url.searchParams.get("model") || DEFAULT_MODEL; - - const hasEnvKey = !!process.env.TOGETHER_API_KEY; + const useFallback = url.searchParams.get("fallback") === "true"; return Response.json({ status: "ok", provider: "togetherai", model, - configured: hasEnvKey, + fallback: useFallback, + fallbackModels: useFallback ? FALLBACK_MODELS : [], + configured: !!process.env.TOGETHER_API_KEY, }); } diff --git a/examples/togetherai-demo/app/page.tsx b/examples/togetherai-demo/app/page.tsx index 6859084..21cf3dc 100644 --- a/examples/togetherai-demo/app/page.tsx +++ b/examples/togetherai-demo/app/page.tsx @@ -3,7 +3,12 @@ import { useState, useMemo, useEffect } from "react"; import { CopilotProvider } from "@yourgpt/copilot-sdk/react"; import { CopilotChat } from "@yourgpt/copilot-sdk/ui"; -import { MODEL_GROUPS, ALL_MODELS, DEFAULT_MODEL } from "@/lib/models"; +import { + MODEL_GROUPS, + ALL_MODELS, + DEFAULT_MODEL, + FALLBACK_MODELS, +} from "@/lib/models"; import { ExternalLink, Github, @@ -11,11 +16,13 @@ import { Copy, Check, ChevronDown, + Shield, } from "lucide-react"; export default function TogetherAIDemo() { const [mounted, setMounted] = useState(false); const [selectedModel, setSelectedModel] = useState(DEFAULT_MODEL); + const [fallbackEnabled, setFallbackEnabled] = useState(false); const [copied, setCopied] = useState(false); useEffect(() => { @@ -31,8 +38,9 @@ export default function TogetherAIDemo() { const runtimeUrl = useMemo(() => { const params = new URLSearchParams(); params.set("model", selectedModel); + if (fallbackEnabled) params.set("fallback", "true"); return `/api/chat?${params.toString()}`; - }, [selectedModel]); + }, [selectedModel, fallbackEnabled]); const selectedModelInfo = ALL_MODELS.find((m) => m.id === selectedModel); @@ -96,6 +104,63 @@ export default function TogetherAIDemo() { )} + {/* Fallback Chain */} +
+
+ + +
+ {fallbackEnabled ? ( +
+
+ + Auto-failover enabled +
+

+ If the primary model fails, the request automatically falls + through to the next model in the chain: +

+
+ {FALLBACK_MODELS.map((id, i) => ( +
+ + {i + 1}. + + + {id === selectedModel ? `${id} (primary)` : id} + +
+ ))} +
+
+ ) : ( +

+ Enable to automatically try backup models when the primary model + is unavailable or rate-limited. +

+ )} +
+ {/* Setup Guide */}