From fb6762bdad35f3150a9b2ca42f4816ed05b08a47 Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 15:39:39 -0500
Subject: [PATCH 1/8] feat: add AI Gateway SDK and CLI support

---
 README.md                                     |   1 +
 apps/docs/scripts/generate-api-reference.ts   |  10 +-
 apps/docs/src/web/components/docs/nav-data.ts |   6 +
 .../web/content/reference/api/ai-gateway.mdx  | 425 +++++++++++++
 .../src/web/content/reference/api/index.mdx   |   8 +-
 .../src/web/content/reference/api/meta.json   |   1 +
 .../web/content/reference/api/sandboxes.mdx   |  18 +
 apps/docs/src/web/routeTree.gen.ts            |  21 +
 .../routes/_docs/reference/api/ai-gateway.tsx |   7 +
 bun.lock                                      |  65 +-
 examples/README.md                            |  17 +-
 examples/services-aigateway/README.md         |  80 +++
 examples/services-aigateway/app.ts            |   6 +
 examples/services-aigateway/package.json      |  18 +
 .../src/agent/aigateway/agent.ts              | 106 ++++
 package.json                                  |   4 +-
 packages/aigateway/README.md                  |  50 ++
 packages/aigateway/package.json               |  42 ++
 packages/aigateway/src/index.ts               |  79 +++
 packages/aigateway/tsconfig.json              |  11 +
 packages/cli/src/agent-detection.ts           |   1 +
 packages/cli/src/ai-help.ts                   |  20 +-
 packages/cli/src/cmd/ai/capabilities/show.ts  |   8 +
 packages/cli/src/cmd/ai/intro.ts              |   1 +
 .../cli/src/cmd/cloud/aigateway/complete.ts   | 432 ++++++++++++++
 packages/cli/src/cmd/cloud/aigateway/index.ts |  21 +
 .../src/cmd/cloud/aigateway/model-cache.ts    |  88 +++
 .../cli/src/cmd/cloud/aigateway/models.ts     | 213 +++++++
 packages/cli/src/cmd/cloud/aigateway/util.ts  |  86 +++
 packages/cli/src/cmd/cloud/index.ts           |   2 +
 packages/cli/src/config.ts                    |   3 +
 packages/cli/src/types.ts                     |   1 +
 packages/cli/test/cmd/cloud/aigateway.test.ts | 560 ++++++++++++++++++
 .../cli/test/config/profile-creation.test.ts  |   1 +
 packages/core/src/env.d.ts                    |   6 +
 .../src/services/aigateway/api-reference.ts   | 163 +++++
 packages/core/src/services/aigateway/index.ts |   1 +
 .../core/src/services/aigateway/service.ts    | 301 ++++++++++
 packages/core/src/services/config.ts          |   2 +
 packages/core/src/services/index.ts           |   1 +
 packages/core/test/aigateway.test.ts          | 152 +++++
 packages/server/README.md                     |   1 +
 packages/server/src/config.ts                 |   3 +
 packages/server/test/config.test.ts           |   6 +
 tsconfig.json                                 |   1 +
 45 files changed, 3010 insertions(+), 39 deletions(-)
 create mode 100644 apps/docs/src/web/content/reference/api/ai-gateway.mdx
 create mode 100644 apps/docs/src/web/routes/_docs/reference/api/ai-gateway.tsx
 create mode 100644 examples/services-aigateway/README.md
 create mode 100644 examples/services-aigateway/app.ts
 create mode 100644 examples/services-aigateway/package.json
 create mode 100644 examples/services-aigateway/src/agent/aigateway/agent.ts
 create mode 100644 packages/aigateway/README.md
 create mode 100644 packages/aigateway/package.json
 create mode 100644 packages/aigateway/src/index.ts
 create mode 100644 packages/aigateway/tsconfig.json
 create mode 100644 packages/cli/src/cmd/cloud/aigateway/complete.ts
 create mode 100644 packages/cli/src/cmd/cloud/aigateway/index.ts
 create mode 100644 packages/cli/src/cmd/cloud/aigateway/model-cache.ts
 create mode 100644 packages/cli/src/cmd/cloud/aigateway/models.ts
 create mode 100644 packages/cli/src/cmd/cloud/aigateway/util.ts
 create mode 100644 packages/cli/test/cmd/cloud/aigateway.test.ts
 create mode 100644 packages/core/src/services/aigateway/api-reference.ts
 create mode 100644 packages/core/src/services/aigateway/index.ts
 create mode 100644 packages/core/src/services/aigateway/service.ts
 create mode 100644 packages/core/test/aigateway.test.ts

diff --git a/README.md b/README.md
index 2475e28b9..42d87c398 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,7 @@ To chat with other community members you can join the [Agentuity Discord server]
 
 The structure of this mono repository:
 
+- `packages/aigateway` - AI Gateway service client for model discovery and LLM completions
 - `packages/auth` - Agentuity unified Authentication package
 - `packages/claude-code` - Claude Code plugin with multi-agent coding team
 - `packages/cli` - the Agentuity command line tool
diff --git a/apps/docs/scripts/generate-api-reference.ts b/apps/docs/scripts/generate-api-reference.ts
index 74d7c91cb..6579a3a6c 100644
--- a/apps/docs/scripts/generate-api-reference.ts
+++ b/apps/docs/scripts/generate-api-reference.ts
@@ -9,6 +9,7 @@ import type {
 	Service,
 } from '../../../packages/core/src/services/api-reference.ts';
 import { resolveFields } from '../../../packages/core/src/services/api-reference.ts';
+import aiGatewayService from '../../../packages/core/src/services/aigateway/api-reference.ts';
 import apiKeysService from '../../../packages/core/src/services/apikey/api-reference.ts';
 import coderService from '../../../packages/core/src/services/coder/api-reference.ts';
 import databaseService from '../../../packages/core/src/services/db/api-reference.ts';
@@ -32,6 +33,7 @@ import vectorService from '../../../packages/core/src/services/vector/api-refere
 import webhooksService from '../../../packages/core/src/services/webhook/api-reference.ts';
 
 const services: Service[] = [
+	aiGatewayService,
 	apiKeysService,
 	coderService,
 	databaseService,
@@ -243,11 +245,17 @@ description: Direct HTTP access to Agentuity platform services
 
 {/* This file is auto-generated from Zod schemas. Do not edit manually. Run scripts/generate-api-reference.ts to regenerate. */}
 
-import { Activity, Box, BrainCircuit, Building, Clock, Database, FolderKanban, Globe, HardDrive, Key, Layers, ListTodo, Mail, MessageSquare, Search, Server, Shield, Table, Timer, User, Webhook } from 'lucide-react';
+import { Activity, Box, BrainCircuit, Building, Clock, Cpu, Database, FolderKanban, Globe, HardDrive, Key, Layers, ListTodo, Mail, MessageSquare, Search, Server, Shield, Table, Timer, User, Webhook } from 'lucide-react';
 
 Access any Agentuity Platform Service directly via REST APIs, the TypeScript SDK or the CLI.
 
 <Cards>
+  <CardLink
+    href="/reference/api/ai-gateway"
+    title="AI Gateway"
+    description="List supported LLM models and run OpenAI-compatible chat completions"
+    icon={<Cpu className="size-5" />}
+  />
   <CardLink
     href="/reference/api/api-keys"
     title="API Keys"
diff --git a/apps/docs/src/web/components/docs/nav-data.ts b/apps/docs/src/web/components/docs/nav-data.ts
index b20dc9467..0411abf04 100644
--- a/apps/docs/src/web/components/docs/nav-data.ts
+++ b/apps/docs/src/web/components/docs/nav-data.ts
@@ -658,6 +658,12 @@ export const navData: NavSection[] = [
 				title: 'API Reference',
 				url: '/reference/api',
 				items: [
+					{
+						title: 'AI Gateway',
+						url: '/reference/api/ai-gateway',
+						description:
+							'List supported LLM models and run OpenAI-compatible chat completions',
+					},
 					{
 						title: 'API Keys',
 						url: '/reference/api/api-keys',
diff --git a/apps/docs/src/web/content/reference/api/ai-gateway.mdx b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
new file mode 100644
index 000000000..5df6ac52c
--- /dev/null
+++ b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
@@ -0,0 +1,425 @@
+---
+title: AI Gateway API
+short_title: AI Gateway
+description: List supported LLM models and run OpenAI-compatible chat completions
+---
+
+{/* This file is auto-generated from Zod schemas. Do not edit manually. Run scripts/generate-api-reference.ts to regenerate. */}
+
+
+<RegionPicker host="aigateway" />
+
+## Authentication
+
+All requests require a Bearer token. Pass your API or SDK key in the `Authorization` header.
+
+| Header | Value |
+|--------|-------|
+| `Authorization` | `Bearer YOUR_SDK_KEY` |
+
+You can find your SDK key in the [Agentuity Console](https://app.agentuity.com) under your project settings.
+
+---
+
+## List Models
+
+List model metadata for LLM providers available through AI Gateway, grouped by provider.
+
+<ApiEndpoint method="GET" path="/models" host="aigateway" />
+
+
+
+### Response
+
+JSON response containing provider keys mapped to arrays of supported model metadata.
+
+| Status | Description |
+|--------|-------------|
+| 200 | Model catalog returned |
+| 401 | Unauthorized — invalid or missing API key |
+| 402 | Payment required — upgrade to a paid plan |
+
+### Response Fields
+
+<ResponseFields fields={[
+  {
+    "name": "success",
+    "type": "boolean",
+    "description": "",
+    "required": true
+  },
+  {
+    "name": "data",
+    "type": "object",
+    "description": "",
+    "required": true
+  },
+  {
+    "name": "message",
+    "type": "string",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "error",
+    "type": "string",
+    "description": "",
+    "required": false
+  }
+]} />
+
+### Example
+
+<ApiExample method="GET" path="/models" host="aigateway" />
+
+---
+
+## Create Completion
+
+Create a completion through the AI Gateway auto-router. The gateway routes by model and request shape, so chat `messages` and legacy `prompt` payloads are both supported.
+
+<ApiEndpoint method="POST" path="/" host="aigateway" />
+
+
+
+### Request Body
+
+Completion request. Use `messages` for chat-compatible models and `prompt` for legacy OpenAI completions-compatible models. Additional provider-specific fields are passed through.
+
+<ResponseFields fields={[
+  {
+    "name": "model",
+    "type": "string",
+    "description": "Model to use for the completion.",
+    "required": true
+  },
+  {
+    "name": "messages",
+    "type": "object[]",
+    "description": "Messages to complete.",
+    "required": false
+  },
+  {
+    "name": "messages[].role",
+    "type": "string",
+    "description": "",
+    "required": true
+  },
+  {
+    "name": "messages[].content",
+    "type": "string | object[] | null",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "messages[].name",
+    "type": "string",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "messages[].tool_call_id",
+    "type": "string",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "messages[].tool_calls",
+    "type": "array",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "prompt",
+    "type": "string | string[]",
+    "description": "Prompt to complete.",
+    "required": false
+  },
+  {
+    "name": "temperature",
+    "type": "number",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "top_p",
+    "type": "number",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "max_tokens",
+    "type": "number",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "stream",
+    "type": "boolean",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "stop",
+    "type": "string | string[]",
+    "description": "",
+    "required": false
+  }
+]} />
+
+### Response
+
+Provider-compatible completion response.
+
+| Status | Description |
+|--------|-------------|
+| 200 | Completion created |
+| 400 | Invalid completion request |
+| 401 | Unauthorized — invalid or missing API key |
+| 402 | Payment required — upgrade to a paid plan |
+
+### Response Headers
+
+| Header | Description |
+|--------|-------------|
+| `X-Gateway-Cost` | Estimated total gateway cost in USD, when billing metadata is available. |
+| `X-Gateway-Prompt-Tokens` | Prompt token count used for gateway billing. |
+| `X-Gateway-Completion-Tokens` | Completion token count used for gateway billing. |
+
+### Response Fields
+
+<ResponseFields fields={[
+  {
+    "name": "id",
+    "type": "string",
+    "description": ""
+  },
+  {
+    "name": "object",
+    "type": "string",
+    "description": ""
+  },
+  {
+    "name": "created",
+    "type": "number",
+    "description": ""
+  },
+  {
+    "name": "model",
+    "type": "string",
+    "description": ""
+  },
+  {
+    "name": "choices",
+    "type": "array",
+    "description": ""
+  },
+  {
+    "name": "usage",
+    "type": "any",
+    "description": ""
+  },
+  {
+    "name": "agentuity",
+    "type": "object",
+    "description": "Agentuity AI Gateway metadata."
+  },
+  {
+    "name": "agentuity.headers",
+    "type": "object",
+    "description": "AI Gateway response headers captured from the HTTP response."
+  },
+  {
+    "name": "agentuity.cost",
+    "type": "object",
+    "description": "Parsed AI Gateway cost information when available."
+  },
+  {
+    "name": "agentuity.cost.total",
+    "type": "number",
+    "description": "Total estimated gateway cost in USD."
+  },
+  {
+    "name": "agentuity.cost.promptTokens",
+    "type": "number",
+    "description": "Prompt token count used for gateway billing."
+  },
+  {
+    "name": "agentuity.cost.completionTokens",
+    "type": "number",
+    "description": "Completion token count used for gateway billing."
+  }
+]} />
+
+### Example
+
+<ApiExample method="POST" path="/" body={{
+  "model": "openai/gpt-4o-mini",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Say hello in one sentence."
+    }
+  ],
+  "max_tokens": 64
+}} host="aigateway" />
+
+---
+
+## Stream Completion
+
+Create a streaming completion through the AI Gateway auto-router. Set `stream: true` to receive Server-Sent Events token deltas.
+
+<ApiEndpoint method="POST" path="/" host="aigateway" />
+
+
+
+### Request Body
+
+Completion request with `stream` set to `true`.
+
+<ResponseFields fields={[
+  {
+    "name": "model",
+    "type": "string",
+    "description": "Model to use for the completion.",
+    "required": true
+  },
+  {
+    "name": "messages",
+    "type": "object[]",
+    "description": "Messages to complete.",
+    "required": false
+  },
+  {
+    "name": "messages[].role",
+    "type": "string",
+    "description": "",
+    "required": true
+  },
+  {
+    "name": "messages[].content",
+    "type": "string | object[] | null",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "messages[].name",
+    "type": "string",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "messages[].tool_call_id",
+    "type": "string",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "messages[].tool_calls",
+    "type": "array",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "prompt",
+    "type": "string | string[]",
+    "description": "Prompt to complete.",
+    "required": false
+  },
+  {
+    "name": "temperature",
+    "type": "number",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "top_p",
+    "type": "number",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "max_tokens",
+    "type": "number",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "stream",
+    "type": "boolean",
+    "description": "",
+    "required": false
+  },
+  {
+    "name": "stop",
+    "type": "string | string[]",
+    "description": "",
+    "required": false
+  }
+]} />
+
+### Response
+
+Server-Sent Events stream. Each `data:` frame contains an OpenAI-compatible delta payload. The stream ends with `data: [DONE]`.
+
+| Status | Description |
+|--------|-------------|
+| 200 | Streaming completion started |
+| 400 | Invalid completion request |
+| 401 | Unauthorized — invalid or missing API key |
+| 402 | Payment required — upgrade to a paid plan |
+
+### Response Headers
+
+| Header | Description |
+|--------|-------------|
+| `Trailer` | Declares billing trailers such as `X-Gateway-Cost`, `X-Gateway-Prompt-Tokens`, and `X-Gateway-Completion-Tokens` for streamed responses. |
+| `X-Gateway-Cost` | Estimated total gateway cost in USD. For streaming responses this may be delivered as an HTTP trailer after the body completes. |
+| `X-Gateway-Prompt-Tokens` | Prompt token count used for gateway billing. For streaming responses this may be delivered as an HTTP trailer. |
+| `X-Gateway-Completion-Tokens` | Completion token count used for gateway billing. For streaming responses this may be delivered as an HTTP trailer. |
+
+### Response Fields
+
+<ResponseFields fields={[
+  {
+    "name": "choices",
+    "type": "object[]",
+    "description": "Streamed completion choices."
+  },
+  {
+    "name": "choices[].delta",
+    "type": "object",
+    "description": "Incremental assistant message content."
+  },
+  {
+    "name": "choices[].delta.role",
+    "type": "string",
+    "description": "Role for the streamed message delta."
+  },
+  {
+    "name": "choices[].delta.content",
+    "type": "string",
+    "description": "Token or text delta."
+  },
+  {
+    "name": "choices[].finish_reason",
+    "type": "string | null",
+    "description": "Reason the model stopped generating, when available."
+  }
+]} />
+
+### Example
+
+<ApiExample method="POST" path="/" body={{
+  "model": "openai/gpt-4o-mini",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Count to three."
+    }
+  ],
+  "stream": true
+}} headers={{"Accept":"text/event-stream"}} host="aigateway" />
+
+---
diff --git a/apps/docs/src/web/content/reference/api/index.mdx b/apps/docs/src/web/content/reference/api/index.mdx
index b762f319f..883bc9b1b 100644
--- a/apps/docs/src/web/content/reference/api/index.mdx
+++ b/apps/docs/src/web/content/reference/api/index.mdx
@@ -5,11 +5,17 @@ description: Direct HTTP access to Agentuity platform services
 
 {/* This file is auto-generated from Zod schemas. Do not edit manually. Run scripts/generate-api-reference.ts to regenerate. */}
 
-import { Activity, Box, BrainCircuit, Building, Clock, Database, FolderKanban, Globe, HardDrive, Key, Layers, ListTodo, Mail, MessageSquare, Search, Server, Shield, Table, Timer, User, Webhook } from 'lucide-react';
+import { Activity, Box, BrainCircuit, Building, Clock, Cpu, Database, FolderKanban, Globe, HardDrive, Key, Layers, ListTodo, Mail, MessageSquare, Search, Server, Shield, Table, Timer, User, Webhook } from 'lucide-react';
 
 Access any Agentuity Platform Service directly via REST APIs, the TypeScript SDK or the CLI.
 
 <Cards>
+  <CardLink
+    href="/reference/api/ai-gateway"
+    title="AI Gateway"
+    description="List supported LLM models and run OpenAI-compatible chat completions"
+    icon={<Cpu className="size-5" />}
+  />
   <CardLink
     href="/reference/api/api-keys"
     title="API Keys"
diff --git a/apps/docs/src/web/content/reference/api/meta.json b/apps/docs/src/web/content/reference/api/meta.json
index 12cebe2fe..eba340ac0 100644
--- a/apps/docs/src/web/content/reference/api/meta.json
+++ b/apps/docs/src/web/content/reference/api/meta.json
@@ -2,6 +2,7 @@
 	"title": "API Reference",
 	"sort": "title",
 	"pages": [
+		"ai-gateway",
 		"api-keys",
 		"coder",
 		"database",
diff --git a/apps/docs/src/web/content/reference/api/sandboxes.mdx b/apps/docs/src/web/content/reference/api/sandboxes.mdx
index e5aae8f85..94193242d 100644
--- a/apps/docs/src/web/content/reference/api/sandboxes.mdx
+++ b/apps/docs/src/web/content/reference/api/sandboxes.mdx
@@ -172,6 +172,12 @@ Sandbox creation payload.
     "description": "Maximum execution time (e.g., \"30m\", \"2h\")",
     "required": false
   },
+  {
+    "name": "timeout.paused",
+    "type": "string",
+    "description": "Maximum time sandbox can remain paused before termination (e.g., \"24h\", \"0s\" for infinite)",
+    "required": false
+  },
   {
     "name": "command",
     "type": "object",
@@ -282,6 +288,12 @@ Returns the sandbox ID, status, and optional stream URLs for stdout/stderr.
     "description": "Current status of the sandbox",
     "required": true
   },
+  {
+    "name": "executionId",
+    "type": "string",
+    "description": "Initial execution identifier for oneshot sandbox creation",
+    "required": false
+  },
   {
     "name": "url",
     "type": "string",
@@ -747,6 +759,12 @@ Returns paginated list of sandboxes.
     "description": "Execution timeout duration (e.g., \"5m0s\")",
     "required": false
   },
+  {
+    "name": "sandboxes[].timeout.paused",
+    "type": "string",
+    "description": "Paused timeout duration (e.g., \"24h0s\", \"0s\" for infinite)",
+    "required": false
+  },
   {
     "name": "sandboxes[].command",
     "type": "object",
diff --git a/apps/docs/src/web/routeTree.gen.ts b/apps/docs/src/web/routeTree.gen.ts
index f52af0a4c..71c66aa3b 100644
--- a/apps/docs/src/web/routeTree.gen.ts
+++ b/apps/docs/src/web/routeTree.gen.ts
@@ -169,6 +169,7 @@ import { Route as DocsReferenceApiEmailRouteImport } from './routes/_docs/refere
 import { Route as DocsReferenceApiDatabaseRouteImport } from './routes/_docs/reference/api/database';
 import { Route as DocsReferenceApiCoderRouteImport } from './routes/_docs/reference/api/coder';
 import { Route as DocsReferenceApiApiKeysRouteImport } from './routes/_docs/reference/api/api-keys';
+import { Route as DocsReferenceApiAiGatewayRouteImport } from './routes/_docs/reference/api/ai-gateway';
 import { Route as DocsCookbookTutorialsUnderstandingAgentsRouteImport } from './routes/_docs/cookbook/tutorials/understanding-agents';
 import { Route as DocsCookbookTutorialsRagAgentRouteImport } from './routes/_docs/cookbook/tutorials/rag-agent';
 import { Route as DocsCookbookPatternsWebhookHandlerRouteImport } from './routes/_docs/cookbook/patterns/webhook-handler';
@@ -1010,6 +1011,11 @@ const DocsReferenceApiApiKeysRoute = DocsReferenceApiApiKeysRouteImport.update({
 	path: '/api-keys',
 	getParentRoute: () => DocsReferenceApiRouteRoute,
 } as any);
+const DocsReferenceApiAiGatewayRoute = DocsReferenceApiAiGatewayRouteImport.update({
+	id: '/ai-gateway',
+	path: '/ai-gateway',
+	getParentRoute: () => DocsReferenceApiRouteRoute,
+} as any);
 const DocsCookbookTutorialsUnderstandingAgentsRoute =
 	DocsCookbookTutorialsUnderstandingAgentsRouteImport.update({
 		id: '/cookbook/tutorials/understanding-agents',
@@ -1281,6 +1287,7 @@ export interface FileRoutesByFullPath {
 	'/cookbook/patterns/webhook-handler': typeof DocsCookbookPatternsWebhookHandlerRoute;
 	'/cookbook/tutorials/rag-agent': typeof DocsCookbookTutorialsRagAgentRoute;
 	'/cookbook/tutorials/understanding-agents': typeof DocsCookbookTutorialsUnderstandingAgentsRoute;
+	'/reference/api/ai-gateway': typeof DocsReferenceApiAiGatewayRoute;
 	'/reference/api/api-keys': typeof DocsReferenceApiApiKeysRoute;
 	'/reference/api/coder': typeof DocsReferenceApiCoderRoute;
 	'/reference/api/database': typeof DocsReferenceApiDatabaseRoute;
@@ -1468,6 +1475,7 @@ export interface FileRoutesByTo {
 	'/cookbook/patterns/webhook-handler': typeof DocsCookbookPatternsWebhookHandlerRoute;
 	'/cookbook/tutorials/rag-agent': typeof DocsCookbookTutorialsRagAgentRoute;
 	'/cookbook/tutorials/understanding-agents': typeof DocsCookbookTutorialsUnderstandingAgentsRoute;
+	'/reference/api/ai-gateway': typeof DocsReferenceApiAiGatewayRoute;
 	'/reference/api/api-keys': typeof DocsReferenceApiApiKeysRoute;
 	'/reference/api/coder': typeof DocsReferenceApiCoderRoute;
 	'/reference/api/database': typeof DocsReferenceApiDatabaseRoute;
@@ -1659,6 +1667,7 @@ export interface FileRoutesById {
 	'/_docs/cookbook/patterns/webhook-handler': typeof DocsCookbookPatternsWebhookHandlerRoute;
 	'/_docs/cookbook/tutorials/rag-agent': typeof DocsCookbookTutorialsRagAgentRoute;
 	'/_docs/cookbook/tutorials/understanding-agents': typeof DocsCookbookTutorialsUnderstandingAgentsRoute;
+	'/_docs/reference/api/ai-gateway': typeof DocsReferenceApiAiGatewayRoute;
 	'/_docs/reference/api/api-keys': typeof DocsReferenceApiApiKeysRoute;
 	'/_docs/reference/api/coder': typeof DocsReferenceApiCoderRoute;
 	'/_docs/reference/api/database': typeof DocsReferenceApiDatabaseRoute;
@@ -1850,6 +1859,7 @@ export interface FileRouteTypes {
 		| '/cookbook/patterns/webhook-handler'
 		| '/cookbook/tutorials/rag-agent'
 		| '/cookbook/tutorials/understanding-agents'
+		| '/reference/api/ai-gateway'
 		| '/reference/api/api-keys'
 		| '/reference/api/coder'
 		| '/reference/api/database'
@@ -2037,6 +2047,7 @@ export interface FileRouteTypes {
 		| '/cookbook/patterns/webhook-handler'
 		| '/cookbook/tutorials/rag-agent'
 		| '/cookbook/tutorials/understanding-agents'
+		| '/reference/api/ai-gateway'
 		| '/reference/api/api-keys'
 		| '/reference/api/coder'
 		| '/reference/api/database'
@@ -2227,6 +2238,7 @@ export interface FileRouteTypes {
 		| '/_docs/cookbook/patterns/webhook-handler'
 		| '/_docs/cookbook/tutorials/rag-agent'
 		| '/_docs/cookbook/tutorials/understanding-agents'
+		| '/_docs/reference/api/ai-gateway'
 		| '/_docs/reference/api/api-keys'
 		| '/_docs/reference/api/coder'
 		| '/_docs/reference/api/database'
@@ -3455,6 +3467,13 @@ declare module '@tanstack/react-router' {
 			preLoaderRoute: typeof DocsReferenceApiApiKeysRouteImport;
 			parentRoute: typeof DocsReferenceApiRouteRoute;
 		};
+		'/_docs/reference/api/ai-gateway': {
+			id: '/_docs/reference/api/ai-gateway';
+			path: '/ai-gateway';
+			fullPath: '/reference/api/ai-gateway';
+			preLoaderRoute: typeof DocsReferenceApiAiGatewayRouteImport;
+			parentRoute: typeof DocsReferenceApiRouteRoute;
+		};
 		'/_docs/cookbook/tutorials/understanding-agents': {
 			id: '/_docs/cookbook/tutorials/understanding-agents';
 			path: '/cookbook/tutorials/understanding-agents';
@@ -3655,6 +3674,7 @@ declare module '@tanstack/react-router' {
 }
 
 interface DocsReferenceApiRouteRouteChildren {
+	DocsReferenceApiAiGatewayRoute: typeof DocsReferenceApiAiGatewayRoute;
 	DocsReferenceApiApiKeysRoute: typeof DocsReferenceApiApiKeysRoute;
 	DocsReferenceApiCoderRoute: typeof DocsReferenceApiCoderRoute;
 	DocsReferenceApiDatabaseRoute: typeof DocsReferenceApiDatabaseRoute;
@@ -3681,6 +3701,7 @@ interface DocsReferenceApiRouteRouteChildren {
 }
 
 const DocsReferenceApiRouteRouteChildren: DocsReferenceApiRouteRouteChildren = {
+	DocsReferenceApiAiGatewayRoute: DocsReferenceApiAiGatewayRoute,
 	DocsReferenceApiApiKeysRoute: DocsReferenceApiApiKeysRoute,
 	DocsReferenceApiCoderRoute: DocsReferenceApiCoderRoute,
 	DocsReferenceApiDatabaseRoute: DocsReferenceApiDatabaseRoute,
diff --git a/apps/docs/src/web/routes/_docs/reference/api/ai-gateway.tsx b/apps/docs/src/web/routes/_docs/reference/api/ai-gateway.tsx
new file mode 100644
index 000000000..ab125c685
--- /dev/null
+++ b/apps/docs/src/web/routes/_docs/reference/api/ai-gateway.tsx
@@ -0,0 +1,7 @@
+import { createFileRoute } from '@tanstack/react-router';
+import { MDXPage } from '../../../../components/docs/mdx-page';
+
+export const Route = createFileRoute('/_docs/reference/api/ai-gateway')({
+	component: () => <MDXPage route="reference/api/ai-gateway" />,
+	staticData: { crumb: 'AI Gateway' },
+});
diff --git a/bun.lock b/bun.lock
index 724e443e9..57c6bd099 100644
--- a/bun.lock
+++ b/bun.lock
@@ -418,6 +418,21 @@
         "vite": "^7.2.7",
       },
     },
+    "packages/aigateway": {
+      "name": "@agentuity/aigateway",
+      "version": "2.0.14",
+      "dependencies": {
+        "@agentuity/core": "workspace:*",
+        "@agentuity/server": "workspace:*",
+        "zod": "^4.3.5",
+      },
+      "devDependencies": {
+        "@types/bun": "latest",
+        "@types/node": "^22.0.0",
+        "bun-types": "latest",
+        "typescript": "^5.9.0",
+      },
+    },
     "packages/auth": {
       "name": "@agentuity/auth",
       "version": "2.0.14",
@@ -967,6 +982,8 @@
 
     "@adobe/css-tools": ["@adobe/css-tools@4.4.4", "", {}, "sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg=="],
 
+    "@agentuity/aigateway": ["@agentuity/aigateway@workspace:packages/aigateway"],
+
     "@agentuity/auth": ["@agentuity/auth@workspace:packages/auth"],
 
     "@agentuity/claude-code": ["@agentuity/claude-code@workspace:packages/claude-code"],
@@ -3001,7 +3018,7 @@
 
     "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="],
 
-    "gaxios": ["gaxios@7.1.4", "", { "dependencies": { "extend": "^3.0.2", "https-proxy-agent": "^7.0.1", "node-fetch": "^3.3.2" } }, "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA=="],
+    "gaxios": ["gaxios@6.7.1", "", { "dependencies": { "extend": "^3.0.2", "https-proxy-agent": "^7.0.1", "is-stream": "^2.0.0", "node-fetch": "^2.6.9", "uuid": "^9.0.1" } }, "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ=="],
 
     "gcp-metadata": ["gcp-metadata@6.1.1", "", { "dependencies": { "gaxios": "^6.1.1", "google-logging-utils": "^0.0.2", "json-bigint": "^1.0.0" } }, "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A=="],
 
@@ -3047,7 +3064,7 @@
 
     "google-auth-library": ["google-auth-library@10.6.2", "", { "dependencies": { "base64-js": "^1.3.0", "ecdsa-sig-formatter": "^1.0.11", "gaxios": "^7.1.4", "gcp-metadata": "8.1.2", "google-logging-utils": "1.1.3", "jws": "^4.0.0" } }, "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw=="],
 
-    "google-logging-utils": ["google-logging-utils@1.1.3", "", {}, "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA=="],
+    "google-logging-utils": ["google-logging-utils@0.0.2", "", {}, "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ=="],
 
     "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="],
 
@@ -3129,7 +3146,7 @@
 
     "husky": ["husky@9.1.7", "", { "bin": { "husky": "bin.js" } }, "sha512-5gs5ytaNjBrh5Ow3zrvdUUY+0VxIuWVL4i9irt6friV+BqdCfmV11CQTWMiBYWHbXhco+J1kHfTOUkePhCDvMA=="],
 
-    "iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
+    "iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="],
 
     "ieee754": ["ieee754@1.2.1", "", {}, "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA=="],
 
@@ -3553,7 +3570,7 @@
 
     "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="],
 
-    "node-fetch": ["node-fetch@3.3.2", "", { "dependencies": { "data-uri-to-buffer": "^4.0.0", "fetch-blob": "^3.1.4", "formdata-polyfill": "^4.0.10" } }, "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA=="],
+    "node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="],
 
     "node-gyp-build-optional-packages": ["node-gyp-build-optional-packages@5.2.2", "", { "dependencies": { "detect-libc": "^2.0.1" }, "bin": { "node-gyp-build-optional-packages": "bin.js", "node-gyp-build-optional-packages-optional": "optional.js", "node-gyp-build-optional-packages-test": "build-test.js" } }, "sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw=="],
 
@@ -3669,7 +3686,7 @@
 
     "picomatch": ["picomatch@4.0.4", "", {}, "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A=="],
 
-    "pkg-types": ["pkg-types@1.3.1", "", { "dependencies": { "confbox": "^0.1.8", "mlly": "^1.7.4", "pathe": "^2.0.1" } }, "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ=="],
+    "pkg-types": ["pkg-types@2.3.1", "", { "dependencies": { "confbox": "^0.2.4", "exsolve": "^1.0.8", "pathe": "^2.0.3" } }, "sha512-y+ichcgc2LrADuhLNAx8DFjVfgz91pRxfZdI3UDhxHvcVEZsenLO+7XaU5vOp0u/7V/wZ+plyuQxtrDlZJ+yeg=="],
 
     "playwright": ["playwright@1.59.1", "", { "dependencies": { "playwright-core": "1.59.1" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw=="],
 
@@ -4523,8 +4540,6 @@
 
     "c12/chokidar": ["chokidar@5.0.0", "", { "dependencies": { "readdirp": "^5.0.0" } }, "sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw=="],
 
-    "c12/pkg-types": ["pkg-types@2.3.1", "", { "dependencies": { "confbox": "^0.2.4", "exsolve": "^1.0.8", "pathe": "^2.0.3" } }, "sha512-y+ichcgc2LrADuhLNAx8DFjVfgz91pRxfZdI3UDhxHvcVEZsenLO+7XaU5vOp0u/7V/wZ+plyuQxtrDlZJ+yeg=="],
-
     "chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="],
 
     "chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="],
@@ -4547,6 +4562,8 @@
 
     "cytoscape-fcose/cose-base": ["cose-base@2.2.0", "", { "dependencies": { "layout-base": "^2.0.0" } }, "sha512-AzlgcsCbUMymkADOJtQm3wO9S3ltPfYOFD5033keQn9NJzIbtnZj+UdBJe7DYml/8TdbtHJW3j58SOnKhWY/5g=="],
 
+    "d3-dsv/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
+
     "d3-sankey/d3-array": ["d3-array@2.12.1", "", { "dependencies": { "internmap": "^1.0.0" } }, "sha512-B0ErZK/66mHtEsR1TkPEEkwdy+WDesimkM5gpZr5Dsg54BiTA5RXtYW5qTLIAcekaS9xfZrzBLF/OAkB3Qn1YQ=="],
 
     "d3-sankey/d3-shape": ["d3-shape@1.3.7", "", { "dependencies": { "d3-path": "1" } }, "sha512-EUkvKjqPFUAZyOlhY5gzCxCeI0Aep04LwIRpsZ/mLFelJiUfnK56jo5JMDSE7yyP2kLSb6LtF+S5chMk7uqPqw=="],
@@ -4571,6 +4588,8 @@
 
     "effect/uuid": ["uuid@13.0.2", "", { "bin": { "uuid": "dist-node/bin/uuid" } }, "sha512-vzi9uRZ926x4XV73S/4qQaTwPXM2JBj6/6lI/byHH1jOpCzb0zDbfytgA9LcN/hzb2l7WQSQnxITOVx5un/wGw=="],
 
+    "encoding-sniffer/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
+
     "escodegen/source-map": ["source-map@0.6.1", "", {}, "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="],
 
     "extract-zip/yauzl": ["yauzl@2.10.0", "", { "dependencies": { "buffer-crc32": "~0.2.3", "fd-slicer": "~1.1.0" } }, "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g=="],
@@ -4579,12 +4598,14 @@
 
     "form-data/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="],
 
-    "gcp-metadata/gaxios": ["gaxios@6.7.1", "", { "dependencies": { "extend": "^3.0.2", "https-proxy-agent": "^7.0.1", "is-stream": "^2.0.0", "node-fetch": "^2.6.9", "uuid": "^9.0.1" } }, "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ=="],
+    "gaxios/uuid": ["uuid@9.0.1", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA=="],
 
-    "gcp-metadata/google-logging-utils": ["google-logging-utils@0.0.2", "", {}, "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ=="],
+    "google-auth-library/gaxios": ["gaxios@7.1.4", "", { "dependencies": { "extend": "^3.0.2", "https-proxy-agent": "^7.0.1", "node-fetch": "^3.3.2" } }, "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA=="],
 
     "google-auth-library/gcp-metadata": ["gcp-metadata@8.1.2", "", { "dependencies": { "gaxios": "^7.0.0", "google-logging-utils": "^1.0.0", "json-bigint": "^1.0.0" } }, "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg=="],
 
+    "google-auth-library/google-logging-utils": ["google-logging-utils@1.1.3", "", {}, "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA=="],
+
     "happy-dom/whatwg-mimetype": ["whatwg-mimetype@3.0.0", "", {}, "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q=="],
 
     "hast-util-from-html/parse5": ["parse5@7.3.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw=="],
@@ -4617,15 +4638,15 @@
 
     "micromatch/picomatch": ["picomatch@2.3.2", "", {}, "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA=="],
 
-    "mongodb-connection-string-url/whatwg-url": ["whatwg-url@14.2.0", "", { "dependencies": { "tr46": "^5.1.0", "webidl-conversions": "^7.0.0" } }, "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw=="],
+    "mlly/pkg-types": ["pkg-types@1.3.1", "", { "dependencies": { "confbox": "^0.1.8", "mlly": "^1.7.4", "pathe": "^2.0.1" } }, "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ=="],
 
-    "mysql2/iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="],
+    "mongodb-connection-string-url/whatwg-url": ["whatwg-url@14.2.0", "", { "dependencies": { "tr46": "^5.1.0", "webidl-conversions": "^7.0.0" } }, "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw=="],
 
     "next/postcss": ["postcss@8.4.31", "", { "dependencies": { "nanoid": "^3.3.6", "picocolors": "^1.0.0", "source-map-js": "^1.0.2" } }, "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ=="],
 
     "nextjs-app-agentuity/@vitejs/plugin-react": ["@vitejs/plugin-react@4.7.0", "", { "dependencies": { "@babel/core": "^7.28.0", "@babel/plugin-transform-react-jsx-self": "^7.27.1", "@babel/plugin-transform-react-jsx-source": "^7.27.1", "@rolldown/pluginutils": "1.0.0-beta.27", "@types/babel__core": "^7.20.5", "react-refresh": "^0.17.0" }, "peerDependencies": { "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA=="],
 
-    "node-fetch/data-uri-to-buffer": ["data-uri-to-buffer@4.0.1", "", {}, "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A=="],
+    "node-fetch/whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="],
 
     "normalize-package-data/hosted-git-info": ["hosted-git-info@7.0.2", "", { "dependencies": { "lru-cache": "^10.0.1" } }, "sha512-puUZAUKT5m8Zzvs72XWy3HtvVbTWljRE66cP60bxJzAqf2DgICo7lYTY2IHUmLnNpjYvw5bvmoHvPc0QO2a62w=="],
 
@@ -4645,8 +4666,6 @@
 
     "path-scurry/lru-cache": ["lru-cache@11.3.6", "", {}, "sha512-Gf/KoL3C/MlI7Bt0PGI9I+TeTC/I6r/csU58N4BSNc4lppLBeKsOdFYkK+dX0ABDUMJNfCHTyPpzwwO21Awd3A=="],
 
-    "pkg-types/confbox": ["confbox@0.1.8", "", {}, "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w=="],
-
     "postcss/nanoid": ["nanoid@3.3.12", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ=="],
 
     "prebuild-install/tar-fs": ["tar-fs@2.1.4", "", { "dependencies": { "chownr": "^1.1.1", "mkdirp-classic": "^0.5.2", "pump": "^3.0.0", "tar-stream": "^2.1.4" } }, "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ=="],
@@ -4709,6 +4728,8 @@
 
     "webrtc-test/@vitejs/plugin-react": ["@vitejs/plugin-react@4.7.0", "", { "dependencies": { "@babel/core": "^7.28.0", "@babel/plugin-transform-react-jsx-self": "^7.27.1", "@babel/plugin-transform-react-jsx-source": "^7.27.1", "@rolldown/pluginutils": "1.0.0-beta.27", "@types/babel__core": "^7.20.5", "react-refresh": "^0.17.0" }, "peerDependencies": { "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA=="],
 
+    "whatwg-encoding/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
+
     "wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="],
 
     "wrap-ansi-cjs/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="],
@@ -4869,9 +4890,7 @@
 
     "form-data/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="],
 
-    "gcp-metadata/gaxios/node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="],
-
-    "gcp-metadata/gaxios/uuid": ["uuid@9.0.1", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA=="],
+    "google-auth-library/gaxios/node-fetch": ["node-fetch@3.3.2", "", { "dependencies": { "data-uri-to-buffer": "^4.0.0", "fetch-blob": "^3.1.4", "formdata-polyfill": "^4.0.10" } }, "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA=="],
 
     "hast-util-from-html/parse5/entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="],
 
@@ -4885,6 +4904,8 @@
 
     "lazystream/readable-stream/string_decoder": ["string_decoder@1.1.1", "", { "dependencies": { "safe-buffer": "~5.1.0" } }, "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg=="],
 
+    "mlly/pkg-types/confbox": ["confbox@0.1.8", "", {}, "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w=="],
+
     "mongodb-connection-string-url/whatwg-url/tr46": ["tr46@5.1.1", "", { "dependencies": { "punycode": "^2.3.1" } }, "sha512-hdF5ZgjTqgAntKkklYw0R03MG2x/bSzTtkxmIRw/sTNV8YXsCJ1tfLAX23lhxhHJlEf3CRCOCGGWw3vI3GaSPw=="],
 
     "mongodb-connection-string-url/whatwg-url/webidl-conversions": ["webidl-conversions@7.0.0", "", {}, "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g=="],
@@ -4895,6 +4916,10 @@
 
     "nextjs-app-agentuity/@vitejs/plugin-react/react-refresh": ["react-refresh@0.17.0", "", {}, "sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ=="],
 
+    "node-fetch/whatwg-url/tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="],
+
+    "node-fetch/whatwg-url/webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="],
+
     "normalize-package-data/hosted-git-info/lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="],
 
     "oauth/@vitejs/plugin-react/@rolldown/pluginutils": ["@rolldown/pluginutils@1.0.0-beta.27", "", {}, "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA=="],
@@ -5111,7 +5136,7 @@
 
     "docs/ai/@ai-sdk/gateway/@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
 
-    "gcp-metadata/gaxios/node-fetch/whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="],
+    "google-auth-library/gaxios/node-fetch/data-uri-to-buffer": ["data-uri-to-buffer@4.0.1", "", {}, "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A=="],
 
     "prebuild-install/tar-fs/tar-stream/readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="],
 
@@ -5137,10 +5162,6 @@
 
     "create-agentuity/@agentuity/cli/@agentuity/coder-tui/@mariozechner/pi-tui/chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="],
 
-    "gcp-metadata/gaxios/node-fetch/whatwg-url/tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="],
-
-    "gcp-metadata/gaxios/node-fetch/whatwg-url/webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="],
-
     "archiver-utils/glob/jackspeak/@isaacs/cliui/string-width/emoji-regex": ["emoji-regex@9.2.2", "", {}, "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="],
 
     "archiver-utils/glob/jackspeak/@isaacs/cliui/strip-ansi/ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="],
diff --git a/examples/README.md b/examples/README.md
index 7cf6dd6ee..861b7a25b 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -16,6 +16,16 @@ AI SDK integration with streaming LLM responses using Vercel AI SDK.
 - Streaming AI responses
 - Type-safe LLM interactions
 
+#### [services-aigateway](./services-aigateway/)
+
+AI Gateway service usage with the standalone TypeScript API.
+
+**Demonstrates:**
+
+- Model discovery
+- Model metadata filtering
+- OpenAI-compatible chat completions
+
 #### [streaming](./streaming/)
 
 Streaming responses using ReadableStream for chunked data delivery.
@@ -147,7 +157,7 @@ When adding a new example:
 
 | Category           | Examples                 | Description                                 |
 | ------------------ | ------------------------ | ------------------------------------------- |
-| **AI & Streaming** | ai-sdk, streaming        | AI integration and data streaming           |
+| **AI & Streaming** | ai-sdk, services-aigateway, streaming | AI integration and data streaming           |
 | **Real-Time**      | websocket, sse           | Bidirectional and server-push communication |
 | **Agent Patterns** | events, evals, lifecycle | Agent features and lifecycle                |
 | **Storage**        | services-keyvalue        | Data persistence patterns                   |
@@ -163,8 +173,9 @@ When adding a new example:
 **Advanced patterns?** Check out:
 
 1. [ai-sdk](./ai-sdk/) - AI integration
-2. [websocket](./websocket/) - Real-time communication
-3. [evals](./evals/) - Quality testing
+2. [services-aigateway](./services-aigateway/) - AI Gateway model discovery and completions
+3. [websocket](./websocket/) - Real-time communication
+4. [evals](./evals/) - Quality testing
 
 ## Resources
 
diff --git a/examples/services-aigateway/README.md b/examples/services-aigateway/README.md
new file mode 100644
index 000000000..3466f439c
--- /dev/null
+++ b/examples/services-aigateway/README.md
@@ -0,0 +1,80 @@
+# AI Gateway Service Example
+
+This example demonstrates how to use the standalone `@agentuity/aigateway` TypeScript API from an Agentuity agent.
+
+## Features Demonstrated
+
+- **Model discovery** - List AI Gateway models grouped by provider
+- **Filtering** - Filter models by provider, input modality, and reasoning support
+- **Completions** - Run OpenAI-compatible chat completions through AI Gateway
+- **Standalone client** - Use `AIGatewayClient` inside an Agentuity runtime app
+
+## Running the Example
+
+```bash
+cd examples/services-aigateway
+bun install
+bun run dev
+```
+
+## Testing
+
+```bash
+# List all models
+curl http://localhost:3500/agent/aigateway \
+  --json '{"operation":"models"}'
+
+# List OpenAI models
+curl http://localhost:3500/agent/aigateway \
+  --json '{"operation":"models","provider":"openai"}'
+
+# List models that accept image input
+curl http://localhost:3500/agent/aigateway \
+  --json '{"operation":"models","input":"image"}'
+
+# Run a completion
+curl http://localhost:3500/agent/aigateway \
+  --json '{"operation":"complete","model":"openai/gpt-4.1-mini","prompt":"Say hello in one sentence."}'
+```
+
+## Key Concepts
+
+### Client Setup
+
+```typescript
+import { AIGatewayClient } from '@agentuity/aigateway';
+
+const client = new AIGatewayClient();
+```
+
+The client uses standard Agentuity environment variables:
+
+- `AGENTUITY_AIGATEWAY_KEY`
+- `AGENTUITY_SDK_KEY`
+- `AGENTUITY_REGION`
+- `AGENTUITY_AIGATEWAY_URL`
+
+### Model Discovery
+
+```typescript
+const catalog = await client.listModels();
+for (const [provider, models] of Object.entries(catalog)) {
+	console.log(provider, models.map((model) => model.id));
+}
+```
+
+### Chat Completion
+
+```typescript
+const completion = await client.complete({
+	model: 'openai/gpt-4.1-mini',
+	messages: [{ role: 'user', content: 'Say hello' }],
+});
+```
+
+## Common Use Cases
+
+- **Provider-agnostic LLM calls** - Route completion requests through AI Gateway
+- **Model picker UIs** - Populate dropdowns from live model metadata
+- **Capability filtering** - Select models by modality or reasoning support
+- **Centralized billing and auth** - Use Agentuity credentials instead of provider-specific keys
diff --git a/examples/services-aigateway/app.ts b/examples/services-aigateway/app.ts
new file mode 100644
index 000000000..f2d5ea0ba
--- /dev/null
+++ b/examples/services-aigateway/app.ts
@@ -0,0 +1,6 @@
+import { createApp } from '@agentuity/runtime';
+import aigateway from './src/agent/aigateway/agent';
+
+export default await createApp({
+	agents: [aigateway],
+});
diff --git a/examples/services-aigateway/package.json b/examples/services-aigateway/package.json
new file mode 100644
index 000000000..044c05bb2
--- /dev/null
+++ b/examples/services-aigateway/package.json
@@ -0,0 +1,18 @@
+{
+	"name": "example-services-aigateway",
+	"version": "0.0.1",
+	"license": "Apache-2.0",
+	"private": true,
+	"type": "module",
+	"scripts": {
+		"build": "agentuity build --dir .",
+		"dev": "agentuity dev --dir ."
+	},
+	"dependencies": {
+		"@agentuity/aigateway": "workspace:*",
+		"@agentuity/cli": "workspace:*",
+		"@agentuity/core": "workspace:*",
+		"@agentuity/runtime": "workspace:*",
+		"@agentuity/schema": "workspace:*"
+	}
+}
diff --git a/examples/services-aigateway/src/agent/aigateway/agent.ts b/examples/services-aigateway/src/agent/aigateway/agent.ts
new file mode 100644
index 000000000..242322f49
--- /dev/null
+++ b/examples/services-aigateway/src/agent/aigateway/agent.ts
@@ -0,0 +1,106 @@
+/**
+ * AI Gateway Example
+ *
+ * Demonstrates how to use the standalone @agentuity/aigateway TypeScript API
+ * from an Agentuity agent.
+ */
+
+import { AIGatewayClient } from '@agentuity/aigateway';
+import { createAgent } from '@agentuity/runtime';
+import { s } from '@agentuity/schema';
+
+const inputSchema = s.union([
+	s.object({
+		operation: s.literal('models'),
+		provider: s.string().optional(),
+		input: s.string().optional(),
+		reasoning: s.boolean().optional(),
+	}),
+	s.object({
+		operation: s.literal('complete'),
+		model: s.string(),
+		prompt: s.string(),
+		system: s.string().optional(),
+		temperature: s.number().optional(),
+		maxTokens: s.number().optional(),
+	}),
+]);
+
+function getCompletionText(response: unknown): string {
+	const choices = (response as { choices?: unknown }).choices;
+	if (!Array.isArray(choices) || choices.length === 0) {
+		return '';
+	}
+	const first = choices[0] as { message?: { content?: unknown }; text?: unknown };
+	const content = first.message?.content ?? first.text;
+	if (typeof content === 'string') {
+		return content;
+	}
+	if (Array.isArray(content)) {
+		return content
+			.map((part) => {
+				if (typeof part === 'string') return part;
+				if (part && typeof part === 'object' && 'text' in part) {
+					const text = (part as { text?: unknown }).text;
+					return typeof text === 'string' ? text : '';
+				}
+				return '';
+			})
+			.join('');
+	}
+	return '';
+}
+
+export default createAgent('aigateway', {
+	description: 'Example agent demonstrating AI Gateway model discovery and completions',
+	schema: {
+		input: inputSchema,
+		output: s.any(),
+	},
+	handler: async (ctx, input) => {
+		const client = new AIGatewayClient({ logger: ctx.logger });
+
+		switch (input.operation) {
+			case 'models': {
+				const catalog = await client.listModels();
+				const models = Object.entries(catalog)
+					.filter(([provider]) => !input.provider || provider === input.provider)
+					.flatMap(([provider, providerModels]) =>
+						providerModels
+							.filter(
+								(model) => !input.input || model.input_modalities?.includes(input.input)
+							)
+							.filter((model) => !input.reasoning || model.reasoning)
+							.map((model) => ({
+								provider,
+								id: model.id,
+								name: model.name,
+								api: model.api,
+								contextWindow: model.context_window,
+								maxOutputTokens: model.max_output_tokens,
+								reasoning: model.reasoning,
+								inputModalities: model.input_modalities,
+								outputModalities: model.output_modalities,
+							}))
+					);
+				return { models, count: models.length };
+			}
+
+			case 'complete': {
+				const response = await client.complete({
+					model: input.model,
+					messages: [
+						...(input.system ? [{ role: 'system' as const, content: input.system }] : []),
+						{ role: 'user' as const, content: input.prompt },
+					],
+					temperature: input.temperature,
+					max_tokens: input.maxTokens,
+				});
+				return {
+					text: getCompletionText(response),
+					response,
+				};
+			}
+		}
+	},
+});
diff --git a/package.json b/package.json
index fbef8b775..d6239f6f6 100644
--- a/package.json
+++ b/package.json
@@ -9,8 +9,8 @@
 	],
 	"scripts": {
 		"prepare": "husky",
-		"build": "bunx tsc --build && bun run --filter='./packages/core' build && bun run --filter='./packages/schema' build && bun run --filter='./packages/frontend' build && bun run --filter='./packages/server' build && bun run --filter='./packages/react' build && bun run --filter='./packages/postgres' build && bun run --filter='./packages/drizzle' build && bun run --filter='./packages/auth' build && bun run --filter='./packages/evals' build && bun run --filter='./packages/workbench' build && bun run --filter='./packages/runtime' build && bun packages/frontend/scripts/build-beacon.ts && bun run --filter='./packages/cli' build && bun run --filter='./packages/opencode' build && bun run --filter='./apps/testing/integration-suite' build && bun run --filter='./apps/testing/cloud-deployment' build && bun run --filter='./apps/testing/e2e-web' build && bun run --filter='./apps/testing/svelte-web' build",
-		"build:packages": "bunx tsc --build && bun run --filter='./packages/core' build && bun run --filter='./packages/schema' build && bun run --filter='./packages/frontend' build && bun run --filter='./packages/server' build && bun run --filter='./packages/react' build && bun run --filter='./packages/postgres' build && bun run --filter='./packages/drizzle' build && bun run --filter='./packages/auth' build && bun run --filter='./packages/evals' build && bun run --filter='./packages/workbench' build && bun run --filter='./packages/runtime' build && bun packages/frontend/scripts/build-beacon.ts && bun run --filter='./packages/cli' build && bun run --filter='./packages/opencode' build",
+		"build": "bunx tsc --build && bun run --filter='./packages/core' build && bun run --filter='./packages/schema' build && bun run --filter='./packages/frontend' build && bun run --filter='./packages/server' build && bun run --filter='./packages/react' build && bun run --filter='./packages/postgres' build && bun run --filter='./packages/drizzle' build && bun run --filter='./packages/auth' build && bun run --filter='./packages/aigateway' build && bun run --filter='./packages/evals' build && bun run --filter='./packages/workbench' build && bun run --filter='./packages/runtime' build && bun packages/frontend/scripts/build-beacon.ts && bun run --filter='./packages/cli' build && bun run --filter='./packages/opencode' build && bun run --filter='./apps/testing/integration-suite' build && bun run --filter='./apps/testing/cloud-deployment' build && bun run --filter='./apps/testing/e2e-web' build && bun run --filter='./apps/testing/svelte-web' build",
+		"build:packages": "bunx tsc --build && bun run --filter='./packages/core' build && bun run --filter='./packages/schema' build && bun run --filter='./packages/frontend' build && bun run --filter='./packages/server' build && bun run --filter='./packages/react' build && bun run --filter='./packages/postgres' build && bun run --filter='./packages/drizzle' build && bun run --filter='./packages/auth' build && bun run --filter='./packages/aigateway' build && bun run --filter='./packages/evals' build && bun run --filter='./packages/workbench' build && bun run --filter='./packages/runtime' build && bun packages/frontend/scripts/build-beacon.ts && bun run --filter='./packages/cli' build && bun run --filter='./packages/opencode' build",
 		"dev:workbench": "concurrently \"cd packages/workbench && bun run dev:app\" \"cd apps/testing/integration-suite && bun run dev\" --names \"workbench,integration-suite\" --prefix-colors \"blue,green\"",
 		"test": "bun test:packages && cd packages/cli && bun run test && cd ../.. && bun test:templates && bun test:pkginstall && bun test:create",
 		"test:packages": "cd packages/core && bun test && cd ../schema && bun test && cd ../frontend && bun test && cd ../server && bun test && cd ../react && bun test && cd ../postgres && bun test && cd ../drizzle && bun test && cd ../auth && bun test && cd ../runtime && bun test --max-concurrency=1 && cd ../opencode && bun test",
diff --git a/packages/aigateway/README.md b/packages/aigateway/README.md
new file mode 100644
index 000000000..d74029f88
--- /dev/null
+++ b/packages/aigateway/README.md
@@ -0,0 +1,50 @@
+# @agentuity/aigateway
+
+A standalone package for the Agentuity AI Gateway service.
+
+## Installation
+
+```bash
+npm install @agentuity/aigateway
+```
+
+## Quick Start
+
+```typescript
+import { AIGatewayClient } from '@agentuity/aigateway';
+
+const client = new AIGatewayClient();
+
+const models = await client.listModels();
+console.log(Object.keys(models));
+
+const completion = await client.complete({
+	model: 'openai/gpt-4.1-mini',
+	messages: [{ role: 'user', content: 'Say hello' }],
+});
+
+console.log(completion.choices?.[0]);
+```
+
+## Configuration
+
+```typescript
+const client = new AIGatewayClient({
+	apiKey: 'your-api-key',
+	orgId: 'your-org-id',
+	url: 'https://aigateway-usc.agentuity.cloud',
+});
+```
+
+### Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `AGENTUITY_AIGATEWAY_KEY` | AI Gateway API key override | Optional |
+| `AGENTUITY_SDK_KEY` | API key for authentication | Required |
+| `AGENTUITY_REGION` | Region for API endpoints | `usc` |
+| `AGENTUITY_AIGATEWAY_URL` | Override AI Gateway API URL | Auto-detected |
+
+## License
+
+Apache-2.0
diff --git a/packages/aigateway/package.json b/packages/aigateway/package.json
new file mode 100644
index 000000000..121dd26e3
--- /dev/null
+++ b/packages/aigateway/package.json
@@ -0,0 +1,42 @@
+{
+	"name": "@agentuity/aigateway",
+	"version": "2.0.14",
+	"license": "Apache-2.0",
+	"author": "Agentuity employees and contributors",
+	"type": "module",
+	"main": "./dist/index.js",
+	"types": "./dist/index.d.ts",
+	"files": [
+		"AGENTS.md",
+		"README.md",
+		"src",
+		"dist"
+	],
+	"exports": {
+		".": {
+			"import": "./dist/index.js",
+			"types": "./dist/index.d.ts"
+		}
+	},
+	"scripts": {
+		"clean": "rm -rf dist tsconfig.tsbuildinfo",
+		"build": "bunx tsc --build --force",
+		"typecheck": "bunx tsc --noEmit",
+		"prepublishOnly": "bun run clean && bun run build"
+	},
+	"dependencies": {
+		"@agentuity/core": "workspace:*",
+		"@agentuity/server": "workspace:*",
+		"zod": "^4.3.5"
+	},
+	"devDependencies": {
+		"@types/bun": "latest",
+		"@types/node": "^22.0.0",
+		"bun-types": "latest",
+		"typescript": "^5.9.0"
+	},
+	"publishConfig": {
+		"access": "public"
+	},
+	"sideEffects": false
+}
diff --git a/packages/aigateway/src/index.ts b/packages/aigateway/src/index.ts
new file mode 100644
index 000000000..a634d0146
--- /dev/null
+++ b/packages/aigateway/src/index.ts
@@ -0,0 +1,79 @@
+export {
+	AIGatewayService,
+	type AIGatewayChatCompletion,
+	type AIGatewayChatCompletionParams,
+	type AIGatewayChatMessage,
+	type AIGatewayModel,
+	type AIGatewayModelProvider,
+	type AIGatewayModels,
+	type AIGatewayModelsResponse,
+	type AIGatewayPricing,
+	AIGatewayChatCompletionParamsSchema,
+	AIGatewayChatCompletionSchema,
+	AIGatewayChatMessageSchema,
+	AIGatewayModelProviderSchema,
+	AIGatewayModelSchema,
+	AIGatewayModelsResponseSchema,
+	AIGatewayModelsSchema,
+	AIGatewayPricingSchema,
+} from '@agentuity/core/aigateway';
+
+import {
+	AIGatewayService,
+	type AIGatewayChatCompletion,
+	type AIGatewayChatCompletionParams,
+	type AIGatewayModels,
+} from '@agentuity/core/aigateway';
+import { createMinimalLogger, getEnv } from '@agentuity/core';
+import { getServiceUrls } from '@agentuity/core/config';
+import { buildClientHeaders, createServerFetchAdapter, type Logger } from '@agentuity/server';
+import { z } from 'zod';
+
+const isLogger = (val: unknown): val is Logger =>
+	typeof val === 'object' &&
+	val !== null &&
+	['info', 'warn', 'error', 'debug', 'trace'].every(
+		(m) => typeof (val as Record<string, unknown>)[m] === 'function'
+	);
+
+export const AIGatewayClientOptionsSchema = z.object({
+	apiKey: z.string().optional().describe('API key for authentication'),
+	url: z.string().optional().describe('Base URL for the AI Gateway API'),
+	orgId: z.string().optional().describe('Organization ID for multi-tenant operations'),
+	logger: z.custom<Logger>(isLogger).optional().describe('Custom logger instance'),
+});
+
+export type AIGatewayClientOptions = z.infer<typeof AIGatewayClientOptionsSchema>;
+
+export class AIGatewayClient {
+	readonly #service: AIGatewayService;
+
+	constructor(options: AIGatewayClientOptions = {}) {
+		const validatedOptions = AIGatewayClientOptionsSchema.parse(options);
+		const apiKey =
+			validatedOptions.apiKey ||
+			getEnv('AGENTUITY_AIGATEWAY_KEY') ||
+			getEnv('AGENTUITY_SDK_KEY') ||
+			getEnv('AGENTUITY_CLI_KEY');
+		const region = getEnv('AGENTUITY_REGION') ?? 'usc';
+		const serviceUrls = getServiceUrls(region);
+		const url =
+			validatedOptions.url || getEnv('AGENTUITY_AIGATEWAY_URL') || serviceUrls.aigateway;
+		const logger = validatedOptions.logger ?? createMinimalLogger();
+		const headers = buildClientHeaders({
+			apiKey,
+			orgId: validatedOptions.orgId,
+		});
+
+		const adapter = createServerFetchAdapter({ headers }, logger);
+		this.#service = new AIGatewayService(url, adapter);
+	}
+
+	async listModels(): Promise<AIGatewayModels> {
+		return this.#service.listModels();
+	}
+
+	async complete(params: AIGatewayChatCompletionParams): Promise<AIGatewayChatCompletion> {
+		return this.#service.complete(params);
+	}
+}
diff --git a/packages/aigateway/tsconfig.json b/packages/aigateway/tsconfig.json
new file mode 100644
index 000000000..39771b3c1
--- /dev/null
+++ b/packages/aigateway/tsconfig.json
@@ -0,0 +1,11 @@
+{
+	"extends": "../../tsconfig.base.json",
+	"compilerOptions": {
+		"composite": true,
+		"outDir": "./dist",
+		"rootDir": "./src"
+	},
+	"include": ["src/**/*"],
+	"exclude": ["node_modules", "dist"],
+	"references": [{ "path": "../core" }, { "path": "../server" }]
+}
diff --git a/packages/cli/src/agent-detection.ts b/packages/cli/src/agent-detection.ts
index ed0f565ba..4eeb6aaba 100644
--- a/packages/cli/src/agent-detection.ts
+++ b/packages/cli/src/agent-detection.ts
@@ -33,6 +33,7 @@ export const KNOWN_AGENTS: [string, string][] = [
 	['amp', 'amp'],
 	['warp', 'warp'],
 	['pi', 'pi'],
+	['coder', 'coder'],
 	// TODO: VSCode Agent Mode detection - need to find a reliable way to detect
 	// when VSCode's built-in agent (Copilot Chat) is running commands vs just
 	// running in VSCode's integrated terminal. May need env var detection.
diff --git a/packages/cli/src/ai-help.ts b/packages/cli/src/ai-help.ts
index f1210886f..1f8810de3 100644
--- a/packages/cli/src/ai-help.ts
+++ b/packages/cli/src/ai-help.ts
@@ -115,7 +115,7 @@ function buildWhenToUse(): string {
 Use this CLI when the user asks to:
 - Create, build, or deploy AI agents
 - Manage Agentuity projects and organizations
-- Access cloud services (KV, Vector, Postgres, Storage, Sandboxes)
+- Access cloud services (KV, Vector, AI Gateway, Postgres, Storage, Sandboxes)
 - Debug or troubleshoot agent deployments
 - Run agents locally in development mode
 - Manage environment variables and secrets
@@ -228,15 +228,15 @@ function collectQuickReferenceCommands(schema: CLISchema): QuickCommand[] {
 		{ path: 'cloud kv get', label: 'KV get', priority: 9 },
 		{ path: 'cloud kv set', label: 'KV set', priority: 10 },
 		{ path: 'cloud vector search', label: 'Vector search', priority: 11 },
-		{ path: 'cloud db list', label: 'List databases', priority: 12 },
-		{ path: 'cloud db exec', label: 'Execute SQL', priority: 13 },
-		{ path: 'env list', label: 'List env vars', priority: 14 },
-		{ path: 'env set', label: 'Set env var', priority: 15 },
-		{ path: 'cloud deployment list', label: 'List deployments', priority: 16 },
-		{ path: 'cloud deployment rollback', label: 'Rollback deployment', priority: 17 },
-		{ path: 'project list', label: 'List projects', priority: 18 },
-		{ path: 'auth whoami', label: 'Show current user', priority: 19 },
-		{ path: 'version', label: 'Show version', priority: 20 },
+		{ path: 'cloud aigateway models', label: 'List AI models', priority: 12 },
+		{ path: 'cloud aigateway complete', label: 'AI completion', priority: 13 },
+		{ path: 'cloud db list', label: 'List databases', priority: 14 },
+		{ path: 'cloud db exec', label: 'Execute SQL', priority: 15 },
+		{ path: 'env list', label: 'List env vars', priority: 16 },
+		{ path: 'env set', label: 'Set env var', priority: 17 },
+		{ path: 'cloud deployment list', label: 'List deployments', priority: 18 },
+		{ path: 'cloud deployment rollback', label: 'Rollback deployment', priority: 19 },
+		{ path: 'project list', label: 'List projects', priority: 20 },
 	];
 
 	// Build command signatures from schema
diff --git a/packages/cli/src/cmd/ai/capabilities/show.ts b/packages/cli/src/cmd/ai/capabilities/show.ts
index 7e4cafbbe..06a5ddaa9 100644
--- a/packages/cli/src/cmd/ai/capabilities/show.ts
+++ b/packages/cli/src/cmd/ai/capabilities/show.ts
@@ -128,6 +128,14 @@ export const showSubcommand = createSubcommand({
 					requiresAuth: true,
 					requiresProject: true,
 				},
+				{
+					id: 'aigateway',
+					name: 'AI Gateway',
+					description: 'List supported AI models and run LLM completions',
+					commands: ['cloud aigateway models', 'cloud aigateway complete'],
+					requiresAuth: true,
+					requiresProject: true,
+				},
 				{
 					id: 'databases',
 					name: 'Cloud Databases',
diff --git a/packages/cli/src/cmd/ai/intro.ts b/packages/cli/src/cmd/ai/intro.ts
index 7433badd2..4bdd9cb36 100644
--- a/packages/cli/src/cmd/ai/intro.ts
+++ b/packages/cli/src/cmd/ai/intro.ts
@@ -55,6 +55,7 @@ ${getCommand('cloud deployment logs')}    # View deployment logs
 \`\`\`bash
 ${getCommand('cloud kv')}                 # Key-value storage operations
 ${getCommand('cloud vector')}             # Vector database operations
+${getCommand('cloud aigateway')}          # AI Gateway model and completion operations
 ${getCommand('cloud storage')}            # Object storage operations
 ${getCommand('env set KEY value')}        # Set environment variables
 ${getCommand('env set KEY value --secret')} # Set secrets (encrypted)
diff --git a/packages/cli/src/cmd/cloud/aigateway/complete.ts b/packages/cli/src/cmd/cloud/aigateway/complete.ts
new file mode 100644
index 000000000..c3e8a1f5b
--- /dev/null
+++ b/packages/cli/src/cmd/cloud/aigateway/complete.ts
@@ -0,0 +1,432 @@
+import { z } from 'zod';
+import type { AIGatewayModels, AIGatewayService } from '@agentuity/core';
+import { createCommand } from '../../../types';
+import * as tui from '../../../tui';
+import { getCommand } from '../../../command-prefix';
+import { getExecutingAgent } from '../../../agent-detection';
+import { createAIGatewayService, getAIGatewayUrl, getCompletionText } from './util';
+import { getCachedAIGatewayModels, setCachedAIGatewayModels } from './model-cache';
+
+const CompletionResponseSchema = z.object({
+	text: z.string(),
+	response: z.unknown(),
+	cost: z.unknown().optional(),
+});
+
+const defaultModel = 'openai/gpt-4o-mini';
+
+function isAgentOutputMode(): boolean {
+	return Boolean(getExecutingAgent()) && process.env.AGENTUITY_AIGATEWAY_AGENT_OUTPUT !== 'false';
+}
+
+async function readPromptFromStdin(): Promise<string | undefined> {
+	if (process.stdin.isTTY) {
+		return undefined;
+	}
+	const text = await Bun.stdin.text();
+	const trimmed = text.trim();
+	return trimmed.length > 0 ? trimmed : undefined;
+}
+
+async function readPromptFromFile(filename?: string): Promise<string | undefined> {
+	if (!filename) {
+		return undefined;
+	}
+	const text = await Bun.file(filename).text();
+	const trimmed = text.trim();
+	return trimmed.length > 0 ? trimmed : undefined;
+}
+
+export function combinePromptInput(opts: {
+	explicitPrompt?: string;
+	stdinPrompt?: string;
+	stdinMode?: 'append' | 'replace';
+}): string | undefined {
+	if (!opts.stdinPrompt) {
+		return opts.explicitPrompt;
+	}
+	if (!opts.explicitPrompt || opts.stdinMode === 'replace') {
+		return opts.stdinPrompt;
+	}
+	if (!opts.stdinMode || opts.stdinMode === 'append') {
+		return `${opts.explicitPrompt}\n\n${opts.stdinPrompt}`;
+	}
+	return opts.explicitPrompt;
+}
+
+function getUsageText(response: unknown): string | undefined {
+	if (!response || typeof response !== 'object') {
+		return undefined;
+	}
+	const usage = (response as { usage?: unknown }).usage;
+	if (!usage || typeof usage !== 'object') {
+		return undefined;
+	}
+	const input =
+		(usage as { prompt_tokens?: unknown; input_tokens?: unknown }).prompt_tokens ??
+		(usage as { input_tokens?: unknown }).input_tokens;
+	const output =
+		(usage as { completion_tokens?: unknown; output_tokens?: unknown }).completion_tokens ??
+		(usage as { output_tokens?: unknown }).output_tokens;
+	const total = (usage as { total_tokens?: unknown }).total_tokens;
+	const parts = [
+		typeof input === 'number' ? `input=${input}` : undefined,
+		typeof output === 'number' ? `output=${output}` : undefined,
+		typeof total === 'number' ? `total=${total}` : undefined,
+	].filter(Boolean);
+	return parts.length > 0 ? `Usage: ${parts.join(' ')}` : undefined;
+}
+
+function getCostInfo(response: unknown): unknown | undefined {
+	if (!response || typeof response !== 'object') {
+		return undefined;
+	}
+	const agentuity = (response as { agentuity?: unknown }).agentuity;
+	if (!agentuity || typeof agentuity !== 'object') {
+		return undefined;
+	}
+	return (agentuity as { cost?: unknown }).cost;
+}
+
+function getCostText(response: unknown): string | undefined {
+	const cost = getCostInfo(response);
+	if (!cost || typeof cost !== 'object') {
+		return undefined;
+	}
+	const total = (cost as { total?: unknown }).total;
+	const promptTokens = (cost as { promptTokens?: unknown }).promptTokens;
+	const completionTokens = (cost as { completionTokens?: unknown }).completionTokens;
+	const parts = [
+		typeof total === 'number' ? `total=$${total.toFixed(6)}` : undefined,
+		typeof promptTokens === 'number' ? `prompt=${promptTokens}` : undefined,
+		typeof completionTokens === 'number' ? `completion=${completionTokens}` : undefined,
+	].filter(Boolean);
+	return parts.length > 0 ? `Cost: ${parts.join(' ')}` : undefined;
+}
+
+type CompletionModelInfo = {
+	id: string;
+	api?: string;
+	provider?: string;
+};
+
+function matchesModel(provider: string, candidateId: string, model: string): boolean {
+	return candidateId === model || `${provider}/${candidateId}` === model;
+}
+
+async function getCompletionModelInfo(
+	model: string,
+	models: AIGatewayModels
+): Promise<CompletionModelInfo | undefined> {
+	for (const [provider, providerModels] of Object.entries(models)) {
+		const match = providerModels.find((candidate) => matchesModel(provider, candidate.id, model));
+		if (match) {
+			return { id: match.id, api: match.api, provider };
+		}
+	}
+	return undefined;
+}
+
+async function loadModelsForCompletion(opts: {
+	service: AIGatewayService;
+	profile: string;
+	cacheKey: string;
+	refresh?: boolean;
+}): Promise<AIGatewayModels> {
+	if (!opts.refresh) {
+		const cached = await getCachedAIGatewayModels(opts.profile, opts.cacheKey);
+		if (cached) {
+			return cached;
+		}
+	}
+	const models = await opts.service.listModels();
+	await setCachedAIGatewayModels(opts.profile, opts.cacheKey, models);
+	return models;
+}
+
+function buildCompletionRequest(opts: {
+	model: string;
+	prompt: string;
+	system?: string;
+	api?: string;
+	temperature?: number;
+	maxTokens?: number;
+	stream?: boolean;
+}) {
+	const common = {
+		model: opts.model,
+		temperature: opts.temperature,
+		max_tokens: opts.maxTokens,
+		...(opts.stream ? { stream: true } : {}),
+	};
+	return {
+		...common,
+		messages: [
+			...(opts.system ? [{ role: 'system' as const, content: opts.system }] : []),
+			{ role: 'user' as const, content: opts.prompt },
+		],
+	};
+}
+
+async function resolvePrompt(opts: {
+	optionPrompt?: string;
+	argPrompt?: string;
+	file?: string;
+	stdinMode?: 'append' | 'replace';
+}): Promise<string | undefined> {
+	const explicitPrompt =
+		opts.optionPrompt ?? opts.argPrompt ?? (await readPromptFromFile(opts.file));
+	const stdinPrompt = await readPromptFromStdin();
+	return combinePromptInput({ explicitPrompt, stdinPrompt, stdinMode: opts.stdinMode });
+}
+
+function getStreamDeltaText(payload: unknown): string {
+	if (!payload || typeof payload !== 'object') {
+		return '';
+	}
+	const choices = (payload as { choices?: unknown }).choices;
+	if (!Array.isArray(choices)) {
+		return '';
+	}
+	return choices
+		.map((choice) => {
+			if (!choice || typeof choice !== 'object') {
+				return '';
+			}
+			const delta = (choice as { delta?: { content?: unknown } }).delta;
+			if (typeof delta?.content === 'string') {
+				return delta.content;
+			}
+			const text = (choice as { text?: unknown }).text;
+			return typeof text === 'string' ? text : '';
+		})
+		.join('');
+}
+
+async function consumeCompletionStream(
+	stream: ReadableStream<Uint8Array>,
+	options: { json?: boolean; raw?: boolean }
+): Promise<string> {
+	const reader = stream.getReader();
+	const decoder = new TextDecoder();
+	let buffer = '';
+	let text = '';
+
+	const consumeFrame = (frame: string) => {
+		const dataLines = frame
+			.split(/\r?\n/)
+			.filter((line) => line.startsWith('data:'))
+			.map((line) => line.slice(5).trimStart());
+		for (const data of dataLines) {
+			if (!data || data === '[DONE]') {
+				continue;
+			}
+			if (options.raw) {
+				if (!options.json) {
+					console.log(data);
+				}
+				continue;
+			}
+			try {
+				const delta = getStreamDeltaText(JSON.parse(data));
+				if (delta) {
+					text += delta;
+					if (!options.json) {
+						process.stdout.write(delta);
+					}
+				}
+			} catch {
+				// Ignore malformed stream frames and continue consuming the stream.
+			}
+		}
+	};
+
+	try {
+		while (true) {
+			const { done, value } = await reader.read();
+			if (done) {
+				break;
+			}
+			buffer += decoder.decode(value, { stream: true });
+			const frames = buffer.split(/\r?\n\r?\n/);
+			buffer = frames.pop() ?? '';
+			for (const frame of frames) {
+				consumeFrame(frame);
+			}
+		}
+		buffer += decoder.decode();
+		if (buffer.trim()) {
+			consumeFrame(buffer);
+		}
+	} finally {
+		reader.releaseLock();
+	}
+	if (!options.json && !options.raw && text) {
+		process.stdout.write('\n');
+	}
+	return text;
+}
+
+export const completeSubcommand = createCommand({
+	name: 'complete',
+	aliases: ['completion', 'chat'],
+	description: 'Run an AI Gateway chat completion',
+	tags: ['write', 'slow', 'requires-auth', 'uses-stdin'],
+	requires: { auth: true },
+	optional: { project: true, region: true },
+	examples: [
+		{
+			command: getCommand('cloud aigateway complete --model openai/gpt-4.1-mini "Hello"'),
+			description: 'Run a completion',
+		},
+		{
+			command: `echo "Hello" | ${getCommand('cloud aigateway complete --model openai/gpt-4.1-mini')}`,
+			description: 'Read the prompt from stdin',
+		},
+		{
+			command: getCommand(
+				'cloud aigateway complete --model openai/gpt-4.1-mini --file prompt.txt'
+			),
+			description: 'Read the prompt from a file',
+		},
+		{
+			command: getCommand(
+				'cloud aigateway complete --model openai/gpt-4.1-mini --stream "Hello"'
+			),
+			description: 'Stream token output as it arrives',
+		},
+	],
+	schema: {
+		args: z.object({
+			prompt: z.string().optional().describe('prompt text'),
+		}),
+		options: z.object({
+			model: z.string().min(1).optional().describe('model id'),
+			prompt: z.string().optional().describe('prompt text'),
+			file: z.string().optional().describe('read prompt text from a file'),
+			system: z.string().optional().describe('optional system message'),
+			systemFile: z.string().optional().describe('read the system message from a file'),
+			refreshModels: z
+				.boolean()
+				.optional()
+				.describe('refresh the cached AI Gateway model catalog before choosing request format'),
+			temperature: z.number().optional().describe('sampling temperature'),
+			maxTokens: z.number().optional().describe('maximum output tokens'),
+			stream: z.boolean().optional().describe('stream token output as it arrives'),
+			save: z.string().optional().describe('write assistant text to a file'),
+			format: z
+				.enum(['text', 'json', 'raw'])
+				.optional()
+				.describe('output format for non-json mode'),
+			stdinMode: z
+				.enum(['append', 'replace'])
+				.optional()
+				.describe('how to combine stdin with prompt text'),
+			usage: z.boolean().optional().describe('print usage details when available'),
+			cost: z.boolean().optional().describe('print AI Gateway cost details when available'),
+			raw: z.boolean().optional().describe('print the raw completion response'),
+		}),
+		response: CompletionResponseSchema,
+	},
+	async handler(ctx) {
+		const prompt = await resolvePrompt({
+			optionPrompt: ctx.opts.prompt,
+			argPrompt: ctx.args.prompt,
+			file: ctx.opts.file,
+			stdinMode: ctx.opts.stdinMode,
+		});
+		if (!prompt) {
+			tui.fatal(
+				'Prompt is required. Pass it as an argument, use --prompt, use --file, or pipe it through stdin.'
+			);
+		}
+
+		const service = createAIGatewayService(ctx);
+		const model = ctx.opts.model ?? process.env.AGENTUITY_AIGATEWAY_MODEL ?? defaultModel;
+		const system = ctx.opts.system ?? (await readPromptFromFile(ctx.opts.systemFile));
+		const profile = ctx.config?.name ?? 'default';
+		const cacheKey = getAIGatewayUrl(ctx.region, ctx.config?.overrides);
+		let models = await loadModelsForCompletion({
+			service,
+			profile,
+			cacheKey,
+			refresh: ctx.opts.refreshModels,
+		});
+		let modelInfo = await getCompletionModelInfo(model, models);
+		if (!modelInfo && !ctx.opts.refreshModels) {
+			models = await loadModelsForCompletion({ service, profile, cacheKey, refresh: true });
+			modelInfo = await getCompletionModelInfo(model, models);
+		}
+		const request = buildCompletionRequest({
+			model,
+			prompt,
+			system,
+			api: modelInfo?.api,
+			temperature: ctx.opts.temperature,
+			maxTokens: ctx.opts.maxTokens,
+		});
+		const format = ctx.opts.raw
+			? 'raw'
+			: (ctx.opts.format ?? (isAgentOutputMode() ? 'json' : 'text'));
+
+		if (ctx.opts.stream) {
+			const streamed = await service.streamCompleteWithMetadata({ ...request, stream: true });
+			const text = await consumeCompletionStream(streamed.stream, {
+				json: ctx.options.json || format === 'json',
+				raw: format === 'raw',
+			});
+			const metadata = await streamed.metadata;
+			const cost = metadata.cost;
+			if (ctx.opts.save) {
+				await Bun.write(ctx.opts.save, text);
+			}
+			if (!ctx.options.json && format === 'json') {
+				console.log(JSON.stringify({ text, cost, response: { stream: true, model } }, null, 2));
+			}
+			if (!ctx.options.json && ctx.opts.cost) {
+				const costText = getCostText({ agentuity: metadata });
+				if (costText) {
+					console.error(costText);
+				}
+			}
+			return { text, response: { stream: true }, cost };
+		}
+
+		const response = await service.complete(request);
+		const text = getCompletionText(response);
+		const cost = getCostInfo(response);
+		if (ctx.opts.save) {
+			await Bun.write(ctx.opts.save, text);
+		}
+
+		if (!ctx.options.json) {
+			if (format === 'raw') {
+				console.log(JSON.stringify(response, null, 2));
+			} else if (format === 'json') {
+				console.log(
+					JSON.stringify(
+						{ text, model, usage: (response as { usage?: unknown }).usage, cost, response },
+						null,
+						2
+					)
+				);
+			} else {
+				console.log(text);
+			}
+			if (ctx.opts.usage) {
+				const usage = getUsageText(response);
+				if (usage) {
+					console.error(usage);
+				}
+			}
+			if (ctx.opts.cost) {
+				const costText = getCostText(response);
+				if (costText) {
+					console.error(costText);
+				}
+			}
+		}
+
+		return { text, response, cost };
+	},
+});
diff --git a/packages/cli/src/cmd/cloud/aigateway/index.ts b/packages/cli/src/cmd/cloud/aigateway/index.ts
new file mode 100644
index 000000000..450e09ce9
--- /dev/null
+++ b/packages/cli/src/cmd/cloud/aigateway/index.ts
@@ -0,0 +1,21 @@
+import { createCommand } from '../../../types';
+import { getCommand } from '../../../command-prefix';
+import { completeSubcommand } from './complete';
+import { modelsSubcommand } from './models';
+
+export const aigatewayCommand = createCommand({
+	name: 'aigateway',
+	aliases: ['ai-gateway', 'ai'],
+	description: 'Use the Agentuity AI Gateway',
+	tags: ['slow'],
+	examples: [
+		{ command: getCommand('cloud aigateway models'), description: 'List supported models' },
+		{
+			command: getCommand('cloud aigateway complete --model openai/gpt-4.1-mini "Hello"'),
+			description: 'Run a chat completion',
+		},
+	],
+	subcommands: [modelsSubcommand, completeSubcommand],
+});
+
+export default aigatewayCommand;
diff --git a/packages/cli/src/cmd/cloud/aigateway/model-cache.ts b/packages/cli/src/cmd/cloud/aigateway/model-cache.ts
new file mode 100644
index 000000000..acc96c2a3
--- /dev/null
+++ b/packages/cli/src/cmd/cloud/aigateway/model-cache.ts
@@ -0,0 +1,88 @@
+import { Database } from 'bun:sqlite';
+import { mkdir } from 'node:fs/promises';
+import { join } from 'node:path';
+import type { AIGatewayModels } from '@agentuity/core';
+import { getDefaultConfigDir } from '../../../config';
+
+const TTL_MS = 6 * 60 * 60 * 1000;
+
+let db: Database | null = null;
+
+async function getDatabase(): Promise<Database> {
+	if (db) {
+		return db;
+	}
+
+	const configDir = getDefaultConfigDir();
+	await mkdir(configDir, { recursive: true });
+
+	db = new Database(join(configDir, 'resource.db'));
+	db.run('PRAGMA journal_mode = WAL');
+	db.run('PRAGMA busy_timeout = 5000');
+	db.run('PRAGMA synchronous = NORMAL');
+	db.run(`
+		CREATE TABLE IF NOT EXISTS aigateway_model_cache (
+			profile TEXT NOT NULL,
+			cache_key TEXT NOT NULL,
+			models_json TEXT NOT NULL,
+			cached_at INTEGER NOT NULL,
+			PRIMARY KEY (profile, cache_key)
+		)
+	`);
+	db.run(`
+		CREATE INDEX IF NOT EXISTS idx_aigateway_model_cache_cached_at
+		ON aigateway_model_cache(cached_at)
+	`);
+
+	return db;
+}
+
+export async function getCachedAIGatewayModels(
+	profile: string,
+	cacheKey: string
+): Promise<AIGatewayModels | null> {
+	try {
+		const database = await getDatabase();
+		const cutoff = Date.now() - TTL_MS;
+		const row = database
+			.query<{ models_json: string; cached_at: number }, [string, string]>(
+				'SELECT models_json, cached_at FROM aigateway_model_cache WHERE profile = ? AND cache_key = ?'
+			)
+			.get(profile, cacheKey);
+		if (!row) {
+			return null;
+		}
+		if (row.cached_at < cutoff) {
+			database.run('DELETE FROM aigateway_model_cache WHERE profile = ? AND cache_key = ?', [
+				profile,
+				cacheKey,
+			]);
+			return null;
+		}
+		return JSON.parse(row.models_json) as AIGatewayModels;
+	} catch {
+		return null;
+	}
+}
+
+export async function setCachedAIGatewayModels(
+	profile: string,
+	cacheKey: string,
+	models: AIGatewayModels
+): Promise<void> {
+	try {
+		const database = await getDatabase();
+		const cutoff = Date.now() - TTL_MS;
+		database.run('DELETE FROM aigateway_model_cache WHERE cached_at < ?', [cutoff]);
+		database.run(
+			`INSERT INTO aigateway_model_cache (profile, cache_key, models_json, cached_at)
+			 VALUES (?, ?, ?, ?)
+			 ON CONFLICT(profile, cache_key) DO UPDATE SET
+			 models_json = excluded.models_json,
+			 cached_at = excluded.cached_at`,
+			[profile, cacheKey, JSON.stringify(models), Date.now()]
+		);
+	} catch {
+		// Non-critical cache failure should never block the CLI.
+	}
+}
diff --git a/packages/cli/src/cmd/cloud/aigateway/models.ts b/packages/cli/src/cmd/cloud/aigateway/models.ts
new file mode 100644
index 000000000..77019f1f7
--- /dev/null
+++ b/packages/cli/src/cmd/cloud/aigateway/models.ts
@@ -0,0 +1,213 @@
+import { z } from 'zod';
+import { createCommand } from '../../../types';
+import * as tui from '../../../tui';
+import { getCommand } from '../../../command-prefix';
+import { getExecutingAgent } from '../../../agent-detection';
+import { createPublicAIGatewayService, getAIGatewayUrl } from './util';
+import { getCachedAIGatewayModels, setCachedAIGatewayModels } from './model-cache';
+
+const ModelRowSchema = z.object({
+	provider: z.string(),
+	id: z.string(),
+	name: z.string(),
+	api: z.string().optional(),
+	reasoning: z.boolean().optional(),
+	contextWindow: z.number().optional(),
+	maxOutputTokens: z.number().optional(),
+});
+
+const ModelsResponseSchema = z.object({
+	models: z.array(ModelRowSchema),
+	count: z.number(),
+	model: ModelRowSchema.nullable().optional(),
+});
+
+const recommendedModels = [
+	{ use: 'fast', candidates: ['openai/gpt-4o-mini', 'openai/gpt-4.1-mini'] },
+	{ use: 'reasoning', candidates: ['openai/gpt-5-mini', 'openai/o4-mini'] },
+	{ use: 'coding', candidates: ['anthropic/claude-opus-4-7', 'openai/gpt-5-codex'] },
+	{ use: 'cheap', candidates: ['openai/gpt-4.1-nano', 'openai/gpt-5-nano'] },
+];
+
+function isAgentOutputMode(): boolean {
+	return Boolean(getExecutingAgent()) && process.env.AGENTUITY_AIGATEWAY_AGENT_OUTPUT !== 'false';
+}
+
+function getRecommendations(rows: z.infer<typeof ModelRowSchema>[]) {
+	const byId = new Map(rows.map((row) => [row.id, row]));
+	return recommendedModels
+		.map((rec) => {
+			const model = rec.candidates.map((id) => byId.get(id)).find(Boolean);
+			return model ? { use: rec.use, model: model.id, name: model.name } : undefined;
+		})
+		.filter((row): row is { use: string; model: string; name: string } => Boolean(row));
+}
+
+function matchesProviderFilter(
+	provider: string,
+	modelId: string,
+	providerFilter?: string
+): boolean {
+	if (!providerFilter) {
+		return true;
+	}
+	return provider === providerFilter || modelId.startsWith(`${providerFilter}/`);
+}
+
+function matchesModelFilter(provider: string, modelId: string, modelFilter?: string): boolean {
+	if (!modelFilter) {
+		return true;
+	}
+	return modelId === modelFilter || `${provider}/${modelId}` === modelFilter;
+}
+
+function matchesNameFilter(modelId: string, modelName: string, nameFilter?: string): boolean {
+	if (!nameFilter) {
+		return true;
+	}
+	const normalized = nameFilter.toLowerCase();
+	return (
+		modelId.toLowerCase() === normalized ||
+		modelId.split('/').pop()?.toLowerCase() === normalized ||
+		modelName.toLowerCase() === normalized
+	);
+}
+
+export const modelsSubcommand = createCommand({
+	name: 'models',
+	aliases: ['list', 'ls'],
+	description: 'List AI Gateway models',
+	tags: ['read-only', 'fast'],
+	idempotent: true,
+	examples: [
+		{ command: getCommand('cloud aigateway models'), description: 'List all models' },
+		{
+			command: getCommand('cloud aigateway models --provider openai'),
+			description: 'List OpenAI models',
+		},
+		{
+			command: getCommand('cloud aigateway models --model anthropic/claude-opus-4-7'),
+			description: 'Show one model by id',
+		},
+	],
+	schema: {
+		options: z.object({
+			model: z.string().optional().describe('show one model by full provider/id'),
+			provider: z.string().optional().describe('filter by provider'),
+			name: z
+				.string()
+				.optional()
+				.describe('show one model by id or display name with --provider'),
+			reasoning: z.boolean().optional().describe('only show reasoning models'),
+			input: z.string().optional().describe('filter by input modality, such as text or image'),
+			output: z.string().optional().describe('filter by output modality, such as text or image'),
+			ids: z.boolean().optional().describe('only print model ids'),
+			simple: z.boolean().optional().describe('print a compact model list'),
+			recommended: z.boolean().optional().describe('show recommended models for common uses'),
+			refreshModels: z
+				.boolean()
+				.optional()
+				.describe('refresh the cached AI Gateway model catalog'),
+		}),
+		response: ModelsResponseSchema,
+	},
+	async handler(ctx) {
+		const service = createPublicAIGatewayService(ctx);
+		const profile = ctx.config?.name ?? 'default';
+		const cacheKey = getAIGatewayUrl(ctx.region, ctx.config?.overrides);
+		const cached = ctx.opts.refreshModels
+			? null
+			: await getCachedAIGatewayModels(profile, cacheKey);
+		const catalog = cached ?? (await service.listModels());
+		if (!cached) {
+			await setCachedAIGatewayModels(profile, cacheKey, catalog);
+		}
+		const rows = Object.entries(catalog).flatMap(([provider, models]) =>
+			models
+				.filter((model) => matchesProviderFilter(provider, model.id, ctx.opts.provider))
+				.filter((model) => matchesModelFilter(provider, model.id, ctx.opts.model))
+				.filter((model) => matchesNameFilter(model.id, model.name, ctx.opts.name))
+				.filter((model) => !ctx.opts.reasoning || model.reasoning)
+				.filter((model) => !ctx.opts.input || model.input_modalities?.includes(ctx.opts.input))
+				.filter(
+					(model) => !ctx.opts.output || model.output_modalities?.includes(ctx.opts.output)
+				)
+				.map((model) => ({
+					provider,
+					id: model.id,
+					name: model.name,
+					api: model.api,
+					reasoning: model.reasoning,
+					contextWindow: model.context_window,
+					maxOutputTokens: model.max_output_tokens,
+				}))
+		);
+		const singleLookup = Boolean(ctx.opts.model || ctx.opts.name);
+		const selectedModel = singleLookup ? (rows[0] ?? null) : undefined;
+
+		const agentOutput = isAgentOutputMode();
+		if (ctx.options.json || agentOutput) {
+			if (agentOutput && !ctx.options.json) {
+				if (ctx.opts.ids) {
+					console.log(
+						JSON.stringify({ ids: rows.map((row) => row.id), count: rows.length }, null, 2)
+					);
+				} else if (ctx.opts.recommended) {
+					console.log(JSON.stringify({ recommendations: getRecommendations(rows) }, null, 2));
+				} else if (singleLookup) {
+					console.log(
+						JSON.stringify(
+							{ model: selectedModel, models: rows, count: rows.length },
+							null,
+							2
+						)
+					);
+				} else {
+					console.log(JSON.stringify({ models: rows, count: rows.length }, null, 2));
+				}
+			}
+		} else {
+			if (rows.length === 0) {
+				tui.info('No AI Gateway models found');
+			} else if (ctx.opts.ids) {
+				for (const row of rows) {
+					console.log(row.id);
+				}
+			} else if (ctx.opts.recommended) {
+				const recommendations = getRecommendations(rows).map((row) => ({
+					Use: row.use,
+					Model: row.model,
+					Name: row.name,
+				}));
+				if (recommendations.length === 0) {
+					tui.info('No recommended AI Gateway models found');
+				} else {
+					tui.table(recommendations, ['Use', 'Model', 'Name']);
+				}
+			} else if (ctx.opts.simple) {
+				tui.table(
+					rows.map((row) => ({
+						Model: row.id,
+						Name: row.name,
+					})),
+					['Model', 'Name']
+				);
+			} else {
+				tui.info(`Found ${rows.length} AI Gateway model(s):`);
+				tui.table(
+					rows.map((row) => ({
+						Provider: row.provider,
+						Model: row.id,
+						Name: row.name,
+						API: row.api ?? '-',
+						Reasoning: row.reasoning ? 'yes' : 'no',
+						Context: row.contextWindow ?? '-',
+					})),
+					['Provider', 'Model', 'Name', 'API', 'Reasoning', 'Context']
+				);
+			}
+		}
+
+		return { models: rows, count: rows.length, model: selectedModel };
+	},
+});
diff --git a/packages/cli/src/cmd/cloud/aigateway/util.ts b/packages/cli/src/cmd/cloud/aigateway/util.ts
new file mode 100644
index 000000000..9a27432bb
--- /dev/null
+++ b/packages/cli/src/cmd/cloud/aigateway/util.ts
@@ -0,0 +1,86 @@
+import { AIGatewayService, type Logger } from '@agentuity/core';
+import { createServerFetchAdapter, getServiceUrls } from '@agentuity/server';
+import * as tui from '../../../tui';
+import type { AuthData, Config, GlobalOptions, ProjectConfig } from '../../../types';
+
+const defaultAIGatewayRegion = 'usc';
+
+export function getAIGatewayUrl(
+	region?: string,
+	overrides?: { aigateway_url?: string } | null
+): string {
+	if (process.env.AGENTUITY_AIGATEWAY_URL) {
+		return process.env.AGENTUITY_AIGATEWAY_URL;
+	}
+	if (overrides?.aigateway_url) {
+		return overrides.aigateway_url;
+	}
+	return getServiceUrls(region || process.env.AGENTUITY_REGION || defaultAIGatewayRegion)
+		.aigateway;
+}
+
+export function createAIGatewayService(ctx: {
+	logger: Logger;
+	auth: AuthData;
+	region?: string;
+	project?: ProjectConfig;
+	config: Config | null;
+	options: GlobalOptions;
+}) {
+	const orgId =
+		ctx.project?.orgId ??
+		ctx.options.orgId ??
+		(process.env.AGENTUITY_CLOUD_ORG_ID || ctx.config?.preferences?.orgId);
+	if (!orgId) {
+		tui.fatal(
+			'Organization ID is required. Either run from a project directory or use --org-id flag.'
+		);
+	}
+
+	const adapter = createServerFetchAdapter(
+		{
+			headers: {
+				Authorization: `Bearer ${ctx.auth.apiKey}`,
+				'x-agentuity-orgid': orgId,
+			},
+		},
+		ctx.logger
+	);
+
+	return new AIGatewayService(getAIGatewayUrl(ctx.region, ctx.config?.overrides), adapter);
+}
+
+export function createPublicAIGatewayService(ctx: {
+	logger: Logger;
+	region?: string;
+	config: Config | null;
+}) {
+	const adapter = createServerFetchAdapter({ headers: {} }, ctx.logger);
+	return new AIGatewayService(getAIGatewayUrl(ctx.region, ctx.config?.overrides), adapter);
+}
+
+export function getCompletionText(response: unknown): string {
+	const choices = (response as { choices?: unknown }).choices;
+	const first =
+		Array.isArray(choices) && choices.length > 0
+			? (choices[0] as { message?: { content?: unknown }; text?: unknown; delta?: unknown })
+			: undefined;
+	const content =
+		first?.message?.content ?? first?.text ?? (response as { content?: unknown }).content;
+	if (typeof content === 'string') {
+		return content;
+	}
+	if (Array.isArray(content)) {
+		return content
+			.map((part) => {
+				if (typeof part === 'string') return part;
+				if (part && typeof part === 'object' && 'text' in part) {
+					const text = (part as { text?: unknown }).text;
+					return typeof text === 'string' ? text : '';
+				}
+				return '';
+			})
+			.join('');
+	}
+	return '';
+}
diff --git a/packages/cli/src/cmd/cloud/index.ts b/packages/cli/src/cmd/cloud/index.ts
index 36c7479a6..3801e4816 100644
--- a/packages/cli/src/cmd/cloud/index.ts
+++ b/packages/cli/src/cmd/cloud/index.ts
@@ -14,6 +14,7 @@ import webhookCommand from './webhook';
 import { agentCommand } from './agent';
 import envCommand from './env';
 import apikeyCommand from './apikey';
+import aigatewayCommand from './aigateway';
 import oidcCommand from './oidc';
 import streamCommand from './stream';
 import vectorCommand from './vector';
@@ -41,6 +42,7 @@ export const command = createCommand({
 	],
 	subcommands: [
 		apikeyCommand,
+		aigatewayCommand,
 		oidcCommand,
 		keyvalueCommand,
 		queueCommand,
diff --git a/packages/cli/src/config.ts b/packages/cli/src/config.ts
index ab506acca..948545438 100644
--- a/packages/cli/src/config.ts
+++ b/packages/cli/src/config.ts
@@ -226,6 +226,9 @@ export async function loadConfig(
 			if (process.env.AGENTUITY_VECTOR_URL) {
 				overrides.vector_url = process.env.AGENTUITY_VECTOR_URL;
 			}
+			if (process.env.AGENTUITY_AIGATEWAY_URL) {
+				overrides.aigateway_url = process.env.AGENTUITY_AIGATEWAY_URL;
+			}
 			if (process.env.AGENTUITY_STREAM_URL) {
 				overrides.stream_url = process.env.AGENTUITY_STREAM_URL;
 			}
diff --git a/packages/cli/src/types.ts b/packages/cli/src/types.ts
index 8d40915f1..6ee99cecf 100644
--- a/packages/cli/src/types.ts
+++ b/packages/cli/src/types.ts
@@ -41,6 +41,7 @@ export const ConfigSchema = zod.object({
 			kv_url: zod.url().optional().describe('Override keyvalue URL'),
 			sandbox_url: zod.url().optional().describe('Override sandbox URL'),
 			vector_url: zod.url().optional().describe('Override vector store URL'),
+			aigateway_url: zod.url().optional().describe('Override AI Gateway URL'),
 			catalyst_url: zod.url().optional().describe('Override catalyst URL'),
 			ion_url: zod.url().optional().describe('Override ion URL'),
 			gravity_url: zod.url().optional().describe('Override gravity URL'),
diff --git a/packages/cli/test/cmd/cloud/aigateway.test.ts b/packages/cli/test/cmd/cloud/aigateway.test.ts
new file mode 100644
index 000000000..12cea97db
--- /dev/null
+++ b/packages/cli/test/cmd/cloud/aigateway.test.ts
@@ -0,0 +1,560 @@
+import { mkdtemp, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { afterEach, describe, expect, test } from 'bun:test';
+import { createMinimalLogger } from '@agentuity/core';
+import { aigatewayCommand } from '../../../src/cmd/cloud/aigateway';
+import { combinePromptInput, completeSubcommand } from '../../../src/cmd/cloud/aigateway/complete';
+import { modelsSubcommand } from '../../../src/cmd/cloud/aigateway/models';
+import { getCompletionText } from '../../../src/cmd/cloud/aigateway/util';
+
+let server: ReturnType<typeof Bun.serve> | undefined;
+
+afterEach(() => {
+	server?.stop(true);
+	server = undefined;
+	delete process.env.AGENTUITY_AIGATEWAY_URL;
+	delete process.env.AGENTUITY_AIGATEWAY_MODEL;
+});
+
+function baseCtx(url: string) {
+	delete process.env.AGENTUITY_AIGATEWAY_URL;
+	return {
+		auth: { apiKey: 'sdk_test' },
+		logger: createMinimalLogger(),
+		region: 'usc',
+		project: { orgId: 'org_test' },
+		config: { overrides: { aigateway_url: url }, preferences: {} },
+		options: { json: true },
+	};
+}
+
+function completionModelCatalog(api = 'openai-responses') {
+	return Response.json({
+		success: true,
+		data: {
+			openai: [{ id: 'gpt-4.1-mini', name: 'GPT 4.1 Mini', api }],
+		},
+	});
+}
+
+describe('cloud aigateway command', () => {
+	test('registers expected subcommands', () => {
+		expect(aigatewayCommand.name).toBe('aigateway');
+		expect(aigatewayCommand.aliases).toContain('ai-gateway');
+		expect(aigatewayCommand.subcommands?.map((cmd) => cmd.name)).toEqual(['models', 'complete']);
+		expect(aigatewayCommand.requires?.auth).toBeUndefined();
+	});
+
+	test('models subcommand is public', () => {
+		expect(modelsSubcommand.requires?.auth).toBeUndefined();
+		expect(modelsSubcommand.requires?.region).toBeUndefined();
+		expect(modelsSubcommand.idempotent).toBe(true);
+		expect(modelsSubcommand.schema?.response).toBeDefined();
+	});
+
+	test('complete subcommand exposes prompt, model, stream, and convenience schemas', () => {
+		const shape = completeSubcommand.schema?.options?.def.shape;
+		expect(completeSubcommand.requires?.auth).toBe(true);
+		expect(completeSubcommand.requires?.region).toBeUndefined();
+		expect(completeSubcommand.optional?.region).toBe(true);
+		expect(completeSubcommand.tags).toContain('uses-stdin');
+		expect(shape?.model).toBeDefined();
+		expect(shape?.prompt).toBeDefined();
+		expect(shape?.stream).toBeDefined();
+		expect(shape?.maxTokens).toBeDefined();
+		expect(shape?.file).toBeDefined();
+		expect(shape?.systemFile).toBeDefined();
+		expect(shape?.save).toBeDefined();
+		expect(shape?.format).toBeDefined();
+		expect(shape?.stdinMode).toBeDefined();
+		expect(shape?.cost).toBeDefined();
+	});
+
+	test('extracts assistant text from OpenAI-compatible completion responses', () => {
+		expect(
+			getCompletionText({
+				choices: [{ message: { role: 'assistant', content: 'hello' } }],
+			})
+		).toBe('hello');
+		expect(getCompletionText({ choices: [{ text: 'fallback' }] })).toBe('fallback');
+		expect(
+			getCompletionText({
+				content: [{ type: 'text', text: 'anthropic text' }],
+			})
+		).toBe('anthropic text');
+	});
+
+	test('combines explicit prompt and piped stdin by default', () => {
+		expect(
+			combinePromptInput({
+				explicitPrompt: 'Summarize these records.',
+				stdinPrompt: '[{"name":"Ada"}]',
+			})
+		).toBe('Summarize these records.\n\n[{"name":"Ada"}]');
+		expect(
+			combinePromptInput({
+				explicitPrompt: 'Ignore this',
+				stdinPrompt: '[{"name":"Ada"}]',
+				stdinMode: 'replace',
+			})
+		).toBe('[{"name":"Ada"}]');
+	});
+
+	test('models handler calls the configured gateway and returns flattened rows', async () => {
+		const requests: Request[] = [];
+		server = Bun.serve({
+			port: 0,
+			fetch(request) {
+				requests.push(request);
+				return Response.json({
+					success: true,
+					data: {
+						openai: [
+							{
+								id: 'gpt-4.1-mini',
+								name: 'GPT 4.1 Mini',
+								api: 'openai-responses',
+								reasoning: false,
+								input_modalities: ['text'],
+								output_modalities: ['text'],
+							},
+							{
+								id: 'gpt-4.1-vision',
+								name: 'GPT 4.1 Vision',
+								api: 'openai-responses',
+								reasoning: false,
+								input_modalities: ['text', 'image'],
+								output_modalities: ['text'],
+							},
+						],
+					},
+				});
+			},
+		});
+
+		const result = await modelsSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: { input: 'image' },
+			args: {},
+		} as never);
+
+		expect(requests).toHaveLength(1);
+		expect(requests[0]!.method).toBe('GET');
+		expect(requests[0]!.headers.get('authorization')).toBeNull();
+		expect(requests[0]!.headers.get('x-agentuity-orgid')).toBeNull();
+		expect(result.count).toBe(1);
+		expect(result.models[0]?.id).toBe('gpt-4.1-vision');
+	});
+
+	test('models handler does not require auth, org, project, or region', async () => {
+		const requests: Request[] = [];
+		server = Bun.serve({
+			port: 0,
+			fetch(request) {
+				requests.push(request);
+				return Response.json({
+					success: true,
+					data: {
+						openai: [{ id: 'openai/gpt-4.1-mini', name: 'GPT 4.1 Mini' }],
+					},
+				});
+			},
+		});
+
+		const result = await modelsSubcommand.handler({
+			logger: createMinimalLogger(),
+			config: {
+				overrides: { aigateway_url: `http://127.0.0.1:${server.port}` },
+				preferences: {},
+			},
+			options: { json: true },
+			opts: {},
+			args: {},
+		} as never);
+
+		expect(requests).toHaveLength(1);
+		expect(requests[0]!.headers.get('authorization')).toBeNull();
+		expect(requests[0]!.headers.get('x-agentuity-orgid')).toBeNull();
+		expect(result.count).toBe(1);
+	});
+
+	test('models handler filters by provider', async () => {
+		server = Bun.serve({
+			port: 0,
+			fetch() {
+				return Response.json({
+					success: true,
+					data: {
+						openai: [
+							{
+								id: 'openai/gpt-4.1-mini',
+								name: 'GPT 4.1 Mini',
+								api: 'openai-responses',
+							},
+						],
+						anthropic: [
+							{
+								id: 'anthropic/claude-sonnet-4-5-20250929',
+								name: 'Claude Sonnet 4.5',
+								api: 'anthropic-messages',
+							},
+						],
+					},
+				});
+			},
+		});
+
+		const result = await modelsSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: { provider: 'anthropic' },
+			args: {},
+		} as never);
+
+		expect(result.count).toBe(1);
+		expect(result.models[0]?.provider).toBe('anthropic');
+		expect(result.models[0]?.id).toBe('anthropic/claude-sonnet-4-5-20250929');
+	});
+
+	test('models handler returns a single model by full model id', async () => {
+		server = Bun.serve({
+			port: 0,
+			fetch() {
+				return Response.json({
+					success: true,
+					data: {
+						anthropic: [
+							{
+								id: 'claude-opus-4-7',
+								name: 'Claude Opus 4.7',
+								api: 'anthropic-messages',
+							},
+							{
+								id: 'claude-sonnet-4-5-20250929',
+								name: 'Claude Sonnet 4.5',
+								api: 'anthropic-messages',
+							},
+						],
+					},
+				});
+			},
+		});
+
+		const result = await modelsSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: { model: 'anthropic/claude-opus-4-7' },
+			args: {},
+		} as never);
+
+		expect(result.count).toBe(1);
+		expect(result.model?.provider).toBe('anthropic');
+		expect(result.model?.id).toBe('claude-opus-4-7');
+	});
+
+	test('models handler returns a single model by provider and name', async () => {
+		server = Bun.serve({
+			port: 0,
+			fetch() {
+				return Response.json({
+					success: true,
+					data: {
+						openai: [{ id: 'openai/gpt-4.1-mini', name: 'GPT 4.1 Mini' }],
+						anthropic: [{ id: 'anthropic/claude-opus-4-7', name: 'Claude Opus 4.7' }],
+					},
+				});
+			},
+		});
+
+		const result = await modelsSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: { provider: 'anthropic', name: 'Claude Opus 4.7' },
+			args: {},
+		} as never);
+
+		expect(result.count).toBe(1);
+		expect(result.model?.id).toBe('anthropic/claude-opus-4-7');
+	});
+
+	test('models subcommand exposes compact list options', () => {
+		const shape = modelsSubcommand.schema?.options?.def.shape;
+		expect(shape?.model).toBeDefined();
+		expect(shape?.name).toBeDefined();
+		expect(shape?.ids).toBeDefined();
+		expect(shape?.simple).toBeDefined();
+		expect(shape?.recommended).toBeDefined();
+	});
+
+	test('complete handler posts an OpenAI-compatible chat completion request', async () => {
+		let body: unknown;
+		server = Bun.serve({
+			port: 0,
+			async fetch(request) {
+				if (request.method === 'GET') {
+					return completionModelCatalog();
+				}
+				body = await request.json();
+				return Response.json(
+					{
+						id: 'chatcmpl_test',
+						model: 'gpt-4.1-mini',
+						choices: [{ message: { role: 'assistant', content: 'done' } }],
+					},
+					{
+						headers: {
+							'x-gateway-cost': '0.000456',
+							'x-gateway-prompt-tokens': '12',
+							'x-gateway-completion-tokens': '6',
+						},
+					}
+				);
+			},
+		});
+
+		const result = await completeSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: { model: 'gpt-4.1-mini', temperature: 0.2, maxTokens: 128, refreshModels: true },
+			args: { prompt: 'Say done' },
+		} as never);
+
+		expect(body).toEqual({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Say done' }],
+			temperature: 0.2,
+			max_tokens: 128,
+		});
+		expect(result.text).toBe('done');
+		expect(result.cost).toEqual({
+			total: 0.000456,
+			promptTokens: 12,
+			completionTokens: 6,
+		});
+	});
+
+	test('complete handler accepts prompt from --prompt option', async () => {
+		let body: unknown;
+		server = Bun.serve({
+			port: 0,
+			async fetch(request) {
+				if (request.method === 'GET') {
+					return completionModelCatalog();
+				}
+				body = await request.json();
+				return Response.json({
+					choices: [{ message: { role: 'assistant', content: 'from option' } }],
+				});
+			},
+		});
+
+		const result = await completeSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: { model: 'gpt-4.1-mini', prompt: 'Prompt from option', refreshModels: true },
+			args: {},
+		} as never);
+
+		expect(body).toEqual({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Prompt from option' }],
+		});
+		expect(result.text).toBe('from option');
+	});
+
+	test('complete handler accepts prompt from --file option', async () => {
+		let body: unknown;
+		const dir = await mkdtemp(join(tmpdir(), 'agentuity-aigateway-'));
+		const promptFile = join(dir, 'prompt.txt');
+		await Bun.write(promptFile, 'Prompt from file\n');
+		server = Bun.serve({
+			port: 0,
+			async fetch(request) {
+				if (request.method === 'GET') {
+					return completionModelCatalog();
+				}
+				body = await request.json();
+				return Response.json({
+					choices: [{ message: { role: 'assistant', content: 'from file' } }],
+				});
+			},
+		});
+
+		try {
+			const result = await completeSubcommand.handler({
+				...baseCtx(`http://127.0.0.1:${server.port}`),
+				opts: { model: 'gpt-4.1-mini', file: promptFile, refreshModels: true },
+				args: {},
+			} as never);
+
+			expect(body).toEqual({
+				model: 'gpt-4.1-mini',
+				messages: [{ role: 'user', content: 'Prompt from file' }],
+			});
+			expect(result.text).toBe('from file');
+		} finally {
+			await rm(dir, { recursive: true, force: true });
+		}
+	});
+
+	test('complete handler uses messages payload for openai-compatible completions models', async () => {
+		let body: unknown;
+		server = Bun.serve({
+			port: 0,
+			async fetch(request) {
+				if (request.method === 'GET') {
+					return Response.json({
+						success: true,
+						data: {
+							poolside: [
+								{
+									id: 'poolside/laguna-xs.2:free',
+									name: 'Laguna XS',
+									api: 'openai-completions',
+								},
+							],
+						},
+					});
+				}
+				body = await request.json();
+				return Response.json({
+					choices: [{ message: { role: 'assistant', content: 'compatible done' } }],
+				});
+			},
+		});
+
+		const result = await completeSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: {
+				model: 'poolside/laguna-xs.2:free',
+				system: 'Be concise.',
+				refreshModels: true,
+			},
+			args: { prompt: 'Say done' },
+		} as never);
+
+		expect(body).toEqual({
+			model: 'poolside/laguna-xs.2:free',
+			messages: [
+				{ role: 'system', content: 'Be concise.' },
+				{ role: 'user', content: 'Say done' },
+			],
+		});
+		expect(result.text).toBe('compatible done');
+	});
+
+	test('complete handler uses AGENTUITY_AIGATEWAY_MODEL when model is omitted', async () => {
+		let body: unknown;
+		process.env.AGENTUITY_AIGATEWAY_MODEL = 'openai/gpt-env';
+		server = Bun.serve({
+			port: 0,
+			async fetch(request) {
+				if (request.method === 'GET') {
+					return completionModelCatalog();
+				}
+				body = await request.json();
+				return Response.json({
+					choices: [{ message: { role: 'assistant', content: 'from env model' } }],
+				});
+			},
+		});
+
+		const result = await completeSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: {},
+			args: { prompt: 'Say done' },
+		} as never);
+
+		expect(body).toEqual({
+			model: 'openai/gpt-env',
+			messages: [{ role: 'user', content: 'Say done' }],
+		});
+		expect(result.text).toBe('from env model');
+	});
+
+	test('complete handler reads system prompt from --system-file and saves output', async () => {
+		let body: unknown;
+		const dir = await mkdtemp(join(tmpdir(), 'agentuity-aigateway-'));
+		const systemFile = join(dir, 'system.txt');
+		const outputFile = join(dir, 'output.txt');
+		await Bun.write(systemFile, 'Be concise.\n');
+		server = Bun.serve({
+			port: 0,
+			async fetch(request) {
+				if (request.method === 'GET') {
+					return completionModelCatalog();
+				}
+				body = await request.json();
+				return Response.json({
+					choices: [{ message: { role: 'assistant', content: 'saved output' } }],
+				});
+			},
+		});
+
+		try {
+			const result = await completeSubcommand.handler({
+				...baseCtx(`http://127.0.0.1:${server.port}`),
+				opts: { model: 'gpt-4.1-mini', systemFile, save: outputFile, refreshModels: true },
+				args: { prompt: 'Say done' },
+			} as never);
+
+			expect(body).toEqual({
+				model: 'gpt-4.1-mini',
+				messages: [
+					{ role: 'system', content: 'Be concise.' },
+					{ role: 'user', content: 'Say done' },
+				],
+			});
+			expect(result.text).toBe('saved output');
+			expect(await Bun.file(outputFile).text()).toBe('saved output');
+		} finally {
+			await rm(dir, { recursive: true, force: true });
+		}
+	});
+
+	test('complete handler streams token output when --stream is set', async () => {
+		let body: unknown;
+		server = Bun.serve({
+			port: 0,
+			async fetch(request) {
+				if (request.method === 'GET') {
+					return completionModelCatalog();
+				}
+				body = await request.json();
+				return new Response(
+					[
+						'data: {"choices":[{"delta":{"content":"hel"}}]}',
+						'',
+						'data: {"choices":[{"delta":{"content":"lo"}}]}',
+						'',
+						'data: [DONE]',
+						'',
+					].join('\n'),
+					{
+						headers: {
+							'content-type': 'text/event-stream',
+							'x-gateway-cost': '0.000789',
+							'x-gateway-prompt-tokens': '20',
+							'x-gateway-completion-tokens': '10',
+						},
+					}
+				);
+			},
+		});
+
+		const result = await completeSubcommand.handler({
+			...baseCtx(`http://127.0.0.1:${server.port}`),
+			opts: { model: 'gpt-4.1-mini', stream: true, refreshModels: true },
+			args: { prompt: 'Say hello' },
+		} as never);
+
+		expect(body).toEqual({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Say hello' }],
+			stream: true,
+		});
+		expect(result.text).toBe('hello');
+		expect(result.response).toEqual({ stream: true });
+		expect(result.cost).toEqual({
+			total: 0.000789,
+			promptTokens: 20,
+			completionTokens: 10,
+		});
+	});
+});
diff --git a/packages/cli/test/config/profile-creation.test.ts b/packages/cli/test/config/profile-creation.test.ts
index bd914e651..3e732716c 100644
--- a/packages/cli/test/config/profile-creation.test.ts
+++ b/packages/cli/test/config/profile-creation.test.ts
@@ -24,6 +24,7 @@ const ENV_VARS_TO_CLEAR = [
 	'AGENTUITY_KEYVALUE_URL',
 	'AGENTUITY_SANDBOX_URL',
 	'AGENTUITY_VECTOR_URL',
+	'AGENTUITY_AIGATEWAY_URL',
 	'AGENTUITY_STREAM_URL',
 	'AGENTUITY_REGION',
 	'AGENTUITY_CLI_API_KEY',
diff --git a/packages/core/src/env.d.ts b/packages/core/src/env.d.ts
index 88cd902d5..477549b90 100644
--- a/packages/core/src/env.d.ts
+++ b/packages/core/src/env.d.ts
@@ -70,6 +70,12 @@ declare global {
 			/** AI Gateway URL for model routing */
 			AGENTUITY_AIGATEWAY_URL?: string;
 
+			/** AI Gateway API key override */
+			AGENTUITY_AIGATEWAY_KEY?: string;
+
+			/** Default AI Gateway model for simple completions */
+			AGENTUITY_AIGATEWAY_MODEL?: string;
+
 			/** Main API URL for Agentuity services */
 			AGENTUITY_API_URL?: string;
 
diff --git a/packages/core/src/services/aigateway/api-reference.ts b/packages/core/src/services/aigateway/api-reference.ts
new file mode 100644
index 000000000..c49340c3a
--- /dev/null
+++ b/packages/core/src/services/aigateway/api-reference.ts
@@ -0,0 +1,163 @@
+import { z } from 'zod';
+import {
+	AIGatewayChatCompletionParamsSchema,
+	AIGatewayChatCompletionSchema,
+	AIGatewayModelsResponseSchema,
+} from './service.ts';
+import type { Service } from '../api-reference.ts';
+
+const AIGatewayStreamCompletionSchema = z
+	.object({
+		choices: z
+			.array(
+				z
+					.object({
+						delta: z
+							.object({
+								role: z
+									.string()
+									.optional()
+									.describe('Role for the streamed message delta.'),
+								content: z.string().optional().describe('Token or text delta.'),
+							})
+							.optional()
+							.describe('Incremental assistant message content.'),
+						finish_reason: z
+							.string()
+							.nullable()
+							.optional()
+							.describe('Reason the model stopped generating, when available.'),
+					})
+					.catchall(z.unknown())
+			)
+			.describe('Streamed completion choices.'),
+	})
+	.catchall(z.unknown())
+	.describe('A single Server-Sent Events data frame for streamed completions.');
+
+const service: Service = {
+	name: 'AI Gateway',
+	slug: 'ai-gateway',
+	description: 'List supported LLM models and run OpenAI-compatible chat completions',
+	host: 'aigateway',
+	endpoints: [
+		{
+			id: 'list-models',
+			title: 'List Models',
+			method: 'GET',
+			path: '/models',
+			description:
+				'List model metadata for LLM providers available through AI Gateway, grouped by provider.',
+			pathParams: [],
+			queryParams: [],
+			requestBody: null,
+			responseDescription:
+				'JSON response containing provider keys mapped to arrays of supported model metadata.',
+			responseFields: { schema: AIGatewayModelsResponseSchema },
+			statuses: [
+				{ code: 200, description: 'Model catalog returned' },
+				{ code: 401, description: 'Unauthorized — invalid or missing API key' },
+				{ code: 402, description: 'Payment required — upgrade to a paid plan' },
+			],
+			examplePath: '/models',
+		},
+		{
+			id: 'create-chat-completion',
+			title: 'Create Completion',
+			method: 'POST',
+			path: '/',
+			description:
+				'Create a completion through the AI Gateway auto-router. The gateway routes by model and request shape, so chat `messages` and legacy `prompt` payloads are both supported.',
+			pathParams: [],
+			queryParams: [],
+			requestBody: {
+				description:
+					'Completion request. Use `messages` for chat-compatible models and `prompt` for legacy OpenAI completions-compatible models. Additional provider-specific fields are passed through.',
+				fields: { schema: AIGatewayChatCompletionParamsSchema },
+			},
+			responseDescription: 'Provider-compatible completion response.',
+			responseHeaders: [
+				{
+					name: 'X-Gateway-Cost',
+					description:
+						'Estimated total gateway cost in USD, when billing metadata is available.',
+				},
+				{
+					name: 'X-Gateway-Prompt-Tokens',
+					description: 'Prompt token count used for gateway billing.',
+				},
+				{
+					name: 'X-Gateway-Completion-Tokens',
+					description: 'Completion token count used for gateway billing.',
+				},
+			],
+			responseFields: { schema: AIGatewayChatCompletionSchema, stripRequired: true },
+			statuses: [
+				{ code: 200, description: 'Completion created' },
+				{ code: 400, description: 'Invalid completion request' },
+				{ code: 401, description: 'Unauthorized — invalid or missing API key' },
+				{ code: 402, description: 'Payment required — upgrade to a paid plan' },
+			],
+			examplePath: '/',
+			exampleBody: {
+				model: 'openai/gpt-4o-mini',
+				messages: [{ role: 'user', content: 'Say hello in one sentence.' }],
+				max_tokens: 64,
+			},
+		},
+		{
+			id: 'stream-chat-completion',
+			title: 'Stream Completion',
+			method: 'POST',
+			path: '/',
+			description:
+				'Create a streaming completion through the AI Gateway auto-router. Set `stream: true` to receive Server-Sent Events token deltas.',
+			pathParams: [],
+			queryParams: [],
+			requestBody: {
+				description: 'Completion request with `stream` set to `true`.',
+				fields: { schema: AIGatewayChatCompletionParamsSchema },
+			},
+			responseDescription:
+				'Server-Sent Events stream. Each `data:` frame contains an OpenAI-compatible delta payload. The stream ends with `data: [DONE]`.',
+			responseHeaders: [
+				{
+					name: 'Trailer',
+					description:
+						'Declares billing trailers such as `X-Gateway-Cost`, `X-Gateway-Prompt-Tokens`, and `X-Gateway-Completion-Tokens` for streamed responses.',
+				},
+				{
+					name: 'X-Gateway-Cost',
+					description:
+						'Estimated total gateway cost in USD. For streaming responses this may be delivered as an HTTP trailer after the body completes.',
+				},
+				{
+					name: 'X-Gateway-Prompt-Tokens',
+					description:
+						'Prompt token count used for gateway billing. For streaming responses this may be delivered as an HTTP trailer.',
+				},
+				{
+					name: 'X-Gateway-Completion-Tokens',
+					description:
+						'Completion token count used for gateway billing. For streaming responses this may be delivered as an HTTP trailer.',
+				},
+			],
+			responseFields: { schema: AIGatewayStreamCompletionSchema, stripRequired: true },
+			statuses: [
+				{ code: 200, description: 'Streaming completion started' },
+				{ code: 400, description: 'Invalid completion request' },
+				{ code: 401, description: 'Unauthorized — invalid or missing API key' },
+				{ code: 402, description: 'Payment required — upgrade to a paid plan' },
+			],
+			examplePath: '/',
+			exampleHeaders: { Accept: 'text/event-stream' },
+			exampleBody: {
+				model: 'openai/gpt-4o-mini',
+				messages: [{ role: 'user', content: 'Count to three.' }],
+				stream: true,
+			},
+		},
+	],
+};
+
+export default service;
diff --git a/packages/core/src/services/aigateway/index.ts b/packages/core/src/services/aigateway/index.ts
new file mode 100644
index 000000000..d61218d48
--- /dev/null
+++ b/packages/core/src/services/aigateway/index.ts
@@ -0,0 +1 @@
+export * from './service.ts';
diff --git a/packages/core/src/services/aigateway/service.ts b/packages/core/src/services/aigateway/service.ts
new file mode 100644
index 000000000..42359bb6c
--- /dev/null
+++ b/packages/core/src/services/aigateway/service.ts
@@ -0,0 +1,301 @@
+import { z } from 'zod';
+import { FetchAdapter } from '../adapter.ts';
+import { buildUrl, toServiceException, toPayload } from '../_util.ts';
+
+export const AIGatewayPricingSchema = z.object({
+	input: z.number().describe('Input token price.'),
+	output: z.number().describe('Output token price.'),
+	cached_input: z.number().optional().describe('Cached input token price.'),
+	unit: z.string().describe('Pricing unit.'),
+	currency: z.string().describe('Pricing currency.'),
+});
+
+export type AIGatewayPricing = z.infer<typeof AIGatewayPricingSchema>;
+
+export const AIGatewayModelProviderSchema = z.object({
+	env: z.array(z.string()).optional().describe('Environment variables used by this provider.'),
+	api: z.string().optional().describe('Provider API URL.'),
+	doc: z.string().optional().describe('Provider documentation URL.'),
+	logo_url: z.string().optional().describe('Provider logo URL.'),
+});
+
+export type AIGatewayModelProvider = z.infer<typeof AIGatewayModelProviderSchema>;
+
+export const AIGatewayModelSchema = z.object({
+	id: z.string().describe('Model identifier.'),
+	name: z.string().describe('Display name.'),
+	created: z.number().optional().describe('Unix timestamp when the model was created.'),
+	api: z.string().optional().describe('Compatible provider API shape.'),
+	family: z.string().optional().describe('Model family.'),
+	context_window: z.number().optional().describe('Maximum context window.'),
+	max_output_tokens: z.number().optional().describe('Maximum output token count.'),
+	input_modalities: z.array(z.string()).optional().describe('Supported input modalities.'),
+	output_modalities: z.array(z.string()).optional().describe('Supported output modalities.'),
+	attachment: z.boolean().optional().describe('Whether the model supports attachments.'),
+	reasoning: z.boolean().optional().describe('Whether the model supports reasoning.'),
+	tool_call: z.boolean().optional().describe('Whether the model supports tool calls.'),
+	temperature: z.boolean().optional().describe('Whether the model supports temperature.'),
+	knowledge: z.string().optional().describe('Knowledge cutoff or label.'),
+	open_weights: z.boolean().optional().describe('Whether the model has open weights.'),
+	provider: AIGatewayModelProviderSchema.optional().describe('Provider metadata.'),
+	pricing: AIGatewayPricingSchema.optional().describe('Model pricing.'),
+});
+
+export type AIGatewayModel = z.infer<typeof AIGatewayModelSchema>;
+
+export const AIGatewayModelsSchema = z.record(z.string(), z.array(AIGatewayModelSchema));
+export type AIGatewayModels = z.infer<typeof AIGatewayModelsSchema>;
+
+export const AIGatewayModelsResponseSchema = z.object({
+	success: z.boolean(),
+	data: AIGatewayModelsSchema,
+	message: z.string().optional(),
+	error: z.string().optional(),
+});
+
+export type AIGatewayModelsResponse = z.infer<typeof AIGatewayModelsResponseSchema>;
+
+export const AIGatewayChatMessageSchema = z.object({
+	role: z.enum(['system', 'developer', 'user', 'assistant', 'tool']),
+	content: z
+		.union([
+			z.string(),
+			z.array(
+				z
+					.object({
+						type: z.string(),
+					})
+					.catchall(z.unknown())
+			),
+			z.null(),
+		])
+		.optional(),
+	name: z.string().optional(),
+	tool_call_id: z.string().optional(),
+	tool_calls: z.array(z.unknown()).optional(),
+});
+
+export type AIGatewayChatMessage = z.infer<typeof AIGatewayChatMessageSchema>;
+
+export const AIGatewayChatCompletionParamsSchema = z
+	.object({
+		model: z.string().describe('Model to use for the completion.'),
+		messages: z.array(AIGatewayChatMessageSchema).optional().describe('Messages to complete.'),
+		prompt: z
+			.union([z.string(), z.array(z.string())])
+			.optional()
+			.describe('Prompt to complete.'),
+		temperature: z.number().optional(),
+		top_p: z.number().optional(),
+		max_tokens: z.number().optional(),
+		stream: z.boolean().optional(),
+		stop: z.union([z.string(), z.array(z.string())]).optional(),
+	})
+	.catchall(z.unknown());
+
+export type AIGatewayChatCompletionParams = z.infer<typeof AIGatewayChatCompletionParamsSchema>;
+
+export const AIGatewayChatCompletionSchema = z
+	.object({
+		id: z.string().optional(),
+		object: z.string().optional(),
+		created: z.number().optional(),
+		model: z.string().optional(),
+		choices: z.array(z.unknown()).optional(),
+		usage: z.unknown().optional(),
+		agentuity: z
+			.object({
+				headers: z
+					.record(z.string(), z.string())
+					.optional()
+					.describe('AI Gateway response headers captured from the HTTP response.'),
+				cost: z
+					.object({
+						total: z.number().optional().describe('Total estimated gateway cost in USD.'),
+						promptTokens: z
+							.number()
+							.optional()
+							.describe('Prompt token count used for gateway billing.'),
+						completionTokens: z
+							.number()
+							.optional()
+							.describe('Completion token count used for gateway billing.'),
+					})
+					.optional()
+					.describe('Parsed AI Gateway cost information when available.'),
+			})
+			.optional()
+			.describe('Agentuity AI Gateway metadata.'),
+	})
+	.catchall(z.unknown());
+
+export type AIGatewayChatCompletion = z.infer<typeof AIGatewayChatCompletionSchema>;
+
+export const AIGatewayResponseMetadataSchema = z.object({
+	headers: z.record(z.string(), z.string()).optional(),
+	cost: z
+		.object({
+			total: z.number().optional(),
+			promptTokens: z.number().optional(),
+			completionTokens: z.number().optional(),
+		})
+		.optional(),
+});
+
+export type AIGatewayResponseMetadata = z.infer<typeof AIGatewayResponseMetadataSchema>;
+
+export type AIGatewayStreamingCompletion = {
+	stream: ReadableStream<Uint8Array>;
+	metadata: Promise<AIGatewayResponseMetadata>;
+};
+
+function parseNumber(value: string | undefined): number | undefined {
+	if (value === undefined || value.trim() === '') {
+		return undefined;
+	}
+	const parsed = Number(value);
+	return Number.isFinite(parsed) ? parsed : undefined;
+}
+
+function extractGatewayMetadataFromHeaders(headers: Headers): AIGatewayResponseMetadata {
+	const captured: Record<string, string> = {};
+	for (const [key, value] of headers.entries()) {
+		const lower = key.toLowerCase();
+		if (
+			lower.startsWith('x-gateway-') ||
+			(lower.startsWith('x-agentuity-') &&
+				(lower.includes('cost') || lower.includes('token') || lower.includes('usage')))
+		) {
+			captured[lower] = value;
+		}
+	}
+
+	const total = parseNumber(captured['x-gateway-cost']);
+	const promptTokens = parseNumber(captured['x-gateway-prompt-tokens']);
+	const completionTokens = parseNumber(captured['x-gateway-completion-tokens']);
+	const cost =
+		total !== undefined || promptTokens !== undefined || completionTokens !== undefined
+			? { total, promptTokens, completionTokens }
+			: undefined;
+
+	return {
+		...(Object.keys(captured).length > 0 ? { headers: captured } : {}),
+		...(cost ? { cost } : {}),
+	};
+}
+
+async function extractGatewayMetadata(response: Response): Promise<AIGatewayResponseMetadata> {
+	const metadata = extractGatewayMetadataFromHeaders(response.headers);
+	const trailers = (response as Response & { trailers?: Promise<Headers> }).trailers;
+	if (trailers) {
+		try {
+			const trailerMetadata = extractGatewayMetadataFromHeaders(await trailers);
+			return {
+				headers: { ...metadata.headers, ...trailerMetadata.headers },
+				cost: trailerMetadata.cost ?? metadata.cost,
+			};
+		} catch {
+			// Some runtimes expose a trailers promise but reject when trailers are unavailable.
+		}
+	}
+	return metadata;
+}
+
+function attachGatewayMetadata<T extends Record<string, unknown>>(
+	payload: T,
+	metadata: AIGatewayResponseMetadata
+): T {
+	if (!metadata.headers && !metadata.cost) {
+		return payload;
+	}
+	return {
+		...payload,
+		agentuity: {
+			...(typeof payload.agentuity === 'object' && payload.agentuity !== null
+				? payload.agentuity
+				: {}),
+			...metadata,
+		},
+	};
+}
+
+export class AIGatewayService {
+	constructor(
+		readonly baseUrl: string,
+		readonly adapter: FetchAdapter
+	) {}
+
+	async listModels(): Promise<AIGatewayModels> {
+		const method = 'GET';
+		const url = buildUrl(this.baseUrl, '/models');
+		const response = await this.adapter.invoke<AIGatewayModelsResponse>(url, {
+			method,
+			telemetry: { name: 'aigateway.models.list' },
+		});
+		if (!response.ok) {
+			throw await toServiceException(method, url, response.response);
+		}
+		const payload = AIGatewayModelsResponseSchema.parse(response.data);
+		return payload.data;
+	}
+
+	async complete(params: AIGatewayChatCompletionParams): Promise<AIGatewayChatCompletion> {
+		const method = 'POST';
+		const url = buildUrl(this.baseUrl, '/');
+		const [body, contentType] = await toPayload(
+			AIGatewayChatCompletionParamsSchema.parse(params)
+		);
+		const response = await this.adapter.invoke<AIGatewayChatCompletion>(url, {
+			method,
+			body,
+			contentType,
+			telemetry: { name: 'aigateway.completions.create' },
+		});
+		if (!response.ok) {
+			throw await toServiceException(method, url, response.response);
+		}
+		const payload = attachGatewayMetadata(
+			response.data as Record<string, unknown>,
+			await extractGatewayMetadata(response.response)
+		);
+		return AIGatewayChatCompletionSchema.parse(payload);
+	}
+
+	async streamComplete(
+		params: AIGatewayChatCompletionParams
+	): Promise<ReadableStream<Uint8Array>> {
+		return (await this.streamCompleteWithMetadata(params)).stream;
+	}
+
+	async streamCompleteWithMetadata(
+		params: AIGatewayChatCompletionParams
+	): Promise<AIGatewayStreamingCompletion> {
+		const method = 'POST';
+		const url = buildUrl(this.baseUrl, '/');
+		const [body, contentType] = await toPayload(
+			AIGatewayChatCompletionParamsSchema.parse({ ...params, stream: true })
+		);
+		const response = await this.adapter.invoke<never>(url, {
+			method,
+			body,
+			contentType,
+			headers: { Accept: 'text/event-stream' },
+			binary: true,
+			telemetry: { name: 'aigateway.completions.stream' },
+		});
+		if (!response.ok) {
+			throw await toServiceException(method, url, response.response);
+		}
+		if (!response.response.body) {
+			throw await toServiceException(
+				method,
+				url,
+				new Response('Streaming response did not include a body', { status: 502 })
+			);
+		}
+		return {
+			stream: response.response.body,
+			metadata: extractGatewayMetadata(response.response),
+		};
+	}
+}
diff --git a/packages/core/src/services/config.ts b/packages/core/src/services/config.ts
index 6c0d2fa34..933549c35 100644
--- a/packages/core/src/services/config.ts
+++ b/packages/core/src/services/config.ts
@@ -6,6 +6,7 @@ export const ServiceUrlsSchema = z
 		keyvalue: z.string().describe('URL for the key-value storage service.'),
 		stream: z.string().describe('URL for the stream service.'),
 		vector: z.string().describe('URL for the vector storage service.'),
+		aigateway: z.string().describe('URL for the AI Gateway service.'),
 		catalyst: z.string().describe('URL for the Catalyst API gateway.'),
 		otel: z.string().describe('URL for the OpenTelemetry collector.'),
 		sandbox: z.string().describe('URL for the sandbox service.'),
@@ -42,6 +43,7 @@ export function getServiceUrls(region?: string): ServiceUrls {
 		keyvalue: getEnv('AGENTUITY_KEYVALUE_URL') || transportUrl,
 		stream: getEnv('AGENTUITY_STREAM_URL') || buildRegionalURL(resolvedRegion, 'streams'),
 		vector: getEnv('AGENTUITY_VECTOR_URL') || transportUrl,
+		aigateway: getEnv('AGENTUITY_AIGATEWAY_URL') || buildRegionalURL(resolvedRegion, 'aigateway'),
 		catalyst: getEnv('AGENTUITY_CATALYST_URL') || transportUrl,
 		otel: getEnv('AGENTUITY_OTLP_URL') || buildRegionalURL(resolvedRegion, 'otel'),
 		sandbox: getEnv('AGENTUITY_SANDBOX_URL') || transportUrl,
diff --git a/packages/core/src/services/index.ts b/packages/core/src/services/index.ts
index 56aae5fc3..c841cc844 100644
--- a/packages/core/src/services/index.ts
+++ b/packages/core/src/services/index.ts
@@ -1,4 +1,5 @@
 export * from './adapter.ts';
+export * from './aigateway/index.ts';
 export * from './auth/index.ts';
 export * from './email/index.ts';
 export * from './exception.ts';
diff --git a/packages/core/test/aigateway.test.ts b/packages/core/test/aigateway.test.ts
new file mode 100644
index 000000000..7fe720121
--- /dev/null
+++ b/packages/core/test/aigateway.test.ts
@@ -0,0 +1,152 @@
+import { describe, expect, test } from 'bun:test';
+import { createMockAdapter } from '@agentuity/test-utils';
+import { AIGatewayService } from '../src/services/aigateway/index.ts';
+
+describe('AIGatewayService', () => {
+	const baseUrl = 'https://aigateway.example.com';
+
+	test('lists models from the gateway catalog', async () => {
+		const { adapter, calls } = createMockAdapter([
+			{
+				ok: true,
+				data: {
+					success: true,
+					data: {
+						openai: [
+							{
+								id: 'gpt-4.1-mini',
+								name: 'GPT 4.1 Mini',
+								reasoning: false,
+								input_modalities: ['text'],
+								output_modalities: ['text'],
+								provider: { api: 'https://api.openai.com' },
+								pricing: {
+									input: 0.4,
+									output: 1.6,
+									unit: 'per_million_tokens',
+									currency: 'USD',
+								},
+							},
+						],
+					},
+				},
+			},
+		]);
+		const service = new AIGatewayService(baseUrl, adapter);
+
+		const models = await service.listModels();
+
+		expect(calls).toHaveLength(1);
+		expect(calls[0]?.url).toBe(`${baseUrl}/models`);
+		expect(calls[0]?.options.method).toBe('GET');
+		expect(models.openai?.[0]?.id).toBe('gpt-4.1-mini');
+	});
+
+	test('creates completions through the AI Gateway auto-router endpoint', async () => {
+		const { adapter, calls } = createMockAdapter([
+			{
+				ok: true,
+				data: {
+					id: 'chatcmpl_123',
+					model: 'gpt-4.1-mini',
+					choices: [{ message: { role: 'assistant', content: 'Hello' } }],
+				},
+				headers: {
+					'x-gateway-cost': '0.000123',
+					'x-gateway-prompt-tokens': '10',
+					'x-gateway-completion-tokens': '5',
+				},
+			},
+		]);
+		const service = new AIGatewayService(baseUrl, adapter);
+
+		const completion = await service.complete({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Say hello' }],
+			temperature: 0.2,
+		});
+
+		expect(calls).toHaveLength(1);
+		expect(calls[0]?.url).toBe(`${baseUrl}/`);
+		expect(calls[0]?.options.method).toBe('POST');
+		expect(calls[0]?.options.contentType).toBe('application/json');
+		expect(JSON.parse(String(calls[0]?.options.body))).toEqual({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Say hello' }],
+			temperature: 0.2,
+		});
+		expect(completion.id).toBe('chatcmpl_123');
+		expect(completion.agentuity?.cost).toEqual({
+			total: 0.000123,
+			promptTokens: 10,
+			completionTokens: 5,
+		});
+		expect(completion.agentuity?.headers?.['x-gateway-cost']).toBe('0.000123');
+	});
+
+	test('streams completions through the AI Gateway auto-router endpoint', async () => {
+		const { adapter, calls } = createMockAdapter([
+			{
+				ok: true,
+				data: undefined,
+				headers: { 'content-type': 'text/event-stream' },
+				body: 'data: {"choices":[{"delta":{"content":"Hello"}}]}\n\n',
+			},
+		]);
+		const service = new AIGatewayService(baseUrl, adapter);
+
+		const stream = await service.streamComplete({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Say hello' }],
+		});
+
+		expect(calls).toHaveLength(1);
+		expect(calls[0]?.url).toBe(`${baseUrl}/`);
+		expect(calls[0]?.options.method).toBe('POST');
+		expect(calls[0]?.options.contentType).toBe('application/json');
+		expect(calls[0]?.options.headers).toEqual({ Accept: 'text/event-stream' });
+		expect(calls[0]?.options.binary).toBe(true);
+		expect(JSON.parse(String(calls[0]?.options.body))).toEqual({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Say hello' }],
+			stream: true,
+		});
+		expect(stream).toBeInstanceOf(ReadableStream);
+	});
+
+	test('streams chat completions with gateway metadata', async () => {
+		const { adapter } = createMockAdapter([
+			{
+				ok: true,
+				data: undefined,
+				headers: {
+					'content-type': 'text/event-stream',
+					'x-gateway-cost': '0.000234',
+					'x-gateway-prompt-tokens': '11',
+					'x-gateway-completion-tokens': '7',
+				},
+				body: 'data: {"choices":[{"delta":{"content":"Hello"}}]}\n\n',
+			},
+		]);
+		const service = new AIGatewayService(baseUrl, adapter);
+
+		const completion = await service.streamCompleteWithMetadata({
+			model: 'gpt-4.1-mini',
+			messages: [{ role: 'user', content: 'Say hello' }],
+		});
+
+		expect(completion.stream).toBeInstanceOf(ReadableStream);
+		expect(await completion.metadata).toEqual({
+			headers: {
+				'x-gateway-cost': '0.000234',
+				'x-gateway-prompt-tokens': '11',
+				'x-gateway-completion-tokens': '7',
+			},
+			cost: {
+				total: 0.000234,
+				promptTokens: 11,
+				completionTokens: 7,
+			},
+		});
+	});
+});
diff --git a/packages/server/README.md b/packages/server/README.md
index 54053fb5d..be5a2805a 100644
--- a/packages/server/README.md
+++ b/packages/server/README.md
@@ -25,6 +25,7 @@ const urls: ServiceUrls = getServiceUrls(region);
 console.log(urls.keyvalue); // https://agentuity.ai (or AGENTUITY_KEYVALUE_URL)
 console.log(urls.stream); // https://streams.agentuity.cloud (or AGENTUITY_STREAM_URL)
 console.log(urls.vector); // https://agentuity.ai (or AGENTUITY_VECTOR_URL)
+console.log(urls.aigateway); // https://aigateway-usc.agentuity.cloud (or AGENTUITY_AIGATEWAY_URL)
 ```
 
 ### Server Fetch Adapter
diff --git a/packages/server/src/config.ts b/packages/server/src/config.ts
index 596f11346..d3e00120f 100644
--- a/packages/server/src/config.ts
+++ b/packages/server/src/config.ts
@@ -2,6 +2,7 @@ export interface ServiceUrls {
 	keyvalue: string;
 	stream: string;
 	vector: string;
+	aigateway: string;
 	catalyst: string;
 	otel: string;
 	sandbox: string;
@@ -35,6 +36,8 @@ export function getServiceUrls(region?: string): ServiceUrls {
 		keyvalue: process.env.AGENTUITY_KEYVALUE_URL || transportUrl,
 		stream: process.env.AGENTUITY_STREAM_URL || buildRegionalURL(resolvedRegion, 'streams'),
 		vector: process.env.AGENTUITY_VECTOR_URL || transportUrl,
+		aigateway:
+			process.env.AGENTUITY_AIGATEWAY_URL || buildRegionalURL(resolvedRegion, 'aigateway'),
 		catalyst: process.env.AGENTUITY_CATALYST_URL || transportUrl,
 		otel: process.env.AGENTUITY_OTLP_URL || buildRegionalURL(resolvedRegion, 'otel'),
 		sandbox: process.env.AGENTUITY_SANDBOX_URL || transportUrl,
diff --git a/packages/server/test/config.test.ts b/packages/server/test/config.test.ts
index cd1e21b9a..64a44ce0c 100644
--- a/packages/server/test/config.test.ts
+++ b/packages/server/test/config.test.ts
@@ -36,6 +36,7 @@ describe('getServiceUrls', () => {
 		delete process.env.AGENTUITY_OBJECTSTORE_URL;
 		delete process.env.AGENTUITY_STREAM_URL;
 		delete process.env.AGENTUITY_VECTOR_URL;
+		delete process.env.AGENTUITY_AIGATEWAY_URL;
 		delete process.env.AGENTUITY_CATALYST_URL;
 		delete process.env.AGENTUITY_OTLP_URL;
 	});
@@ -51,6 +52,7 @@ describe('getServiceUrls', () => {
 		const urls = getServiceUrls();
 		expect(urls.catalyst).toBe('https://catalyst-us-west.agentuity.cloud');
 		expect(urls.stream).toBe('https://streams-us-west.agentuity.cloud');
+		expect(urls.aigateway).toBe('https://aigateway-us-west.agentuity.cloud');
 	});
 
 	test('should build URLs for us-east region', () => {
@@ -58,12 +60,14 @@ describe('getServiceUrls', () => {
 		expect(urls.catalyst).toBe('https://catalyst-us-east.agentuity.cloud');
 		expect(urls.keyvalue).toBe('https://catalyst-us-east.agentuity.cloud');
 		expect(urls.stream).toBe('https://streams-us-east.agentuity.cloud');
+		expect(urls.aigateway).toBe('https://aigateway-us-east.agentuity.cloud');
 	});
 
 	test('should use agentuity.io for local region', () => {
 		const urls = getServiceUrls('local');
 		expect(urls.catalyst).toBe('https://catalyst.agentuity.io');
 		expect(urls.stream).toBe('https://streams.agentuity.io');
+		expect(urls.aigateway).toBe('https://aigateway.agentuity.io');
 	});
 
 	test('should override with AGENTUITY_TRANSPORT_URL', () => {
@@ -77,9 +81,11 @@ describe('getServiceUrls', () => {
 	test('should override individual service URLs', () => {
 		process.env.AGENTUITY_SANDBOX_URL = 'https://custom-sandbox.example.com';
 		process.env.AGENTUITY_KEYVALUE_URL = 'https://custom-kv.example.com';
+		process.env.AGENTUITY_AIGATEWAY_URL = 'https://custom-ai.example.com';
 		const urls = getServiceUrls('us-east');
 
 		expect(urls.keyvalue).toBe('https://custom-kv.example.com');
+		expect(urls.aigateway).toBe('https://custom-ai.example.com');
 		expect(urls.catalyst).toBe('https://catalyst-us-east.agentuity.cloud');
 		expect(urls.sandbox).toBe('https://custom-sandbox.example.com');
 	});
diff --git a/tsconfig.json b/tsconfig.json
index 6e8a61812..abec9854f 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -2,6 +2,7 @@
 	"files": [],
 	"references": [
 		{ "path": "./packages/auth" },
+		{ "path": "./packages/aigateway" },
 		{ "path": "./packages/claude-code" },
 		{ "path": "./packages/cli" },
 		{ "path": "./packages/coder" },

From 9cd6eda18a1ef511c1821f67534ac9c8be6df69d Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 15:47:54 -0500
Subject: [PATCH 2/8] refactor: use AI Gateway SDK in coder tui

---
 bun.lock                            |  2 +
 packages/coder-tui/package.json     |  2 +
 packages/coder-tui/src/aigateway.ts | 73 ++++++-----------------------
 packages/coder-tui/tsconfig.json    |  9 +++-
 4 files changed, 25 insertions(+), 61 deletions(-)

diff --git a/bun.lock b/bun.lock
index 57c6bd099..7c7757a0a 100644
--- a/bun.lock
+++ b/bun.lock
@@ -535,6 +535,8 @@
       "name": "@agentuity/coder-tui",
       "version": "2.0.14",
       "dependencies": {
+        "@agentuity/core": "workspace:*",
+        "@agentuity/server": "workspace:*",
         "@mariozechner/pi-coding-agent": "^0.72.1",
         "@mariozechner/pi-tui": "^0.72.1",
         "@sinclair/typebox": "^0.34.49",
diff --git a/packages/coder-tui/package.json b/packages/coder-tui/package.json
index 76ba1979d..abdf8cf7b 100644
--- a/packages/coder-tui/package.json
+++ b/packages/coder-tui/package.json
@@ -25,6 +25,8 @@
 		"prepublishOnly": "bun run clean && bun run build"
 	},
 	"dependencies": {
+		"@agentuity/core": "workspace:*",
+		"@agentuity/server": "workspace:*",
 		"@mariozechner/pi-coding-agent": "^0.72.1",
 		"@mariozechner/pi-tui": "^0.72.1",
 		"@sinclair/typebox": "^0.34.49"
diff --git a/packages/coder-tui/src/aigateway.ts b/packages/coder-tui/src/aigateway.ts
index 11fd1d2d1..7a4ee41a1 100644
--- a/packages/coder-tui/src/aigateway.ts
+++ b/packages/coder-tui/src/aigateway.ts
@@ -10,6 +10,13 @@
 import { delimiter, join } from 'node:path';
 import { existsSync } from 'node:fs';
 import { execFileSync } from 'node:child_process';
+import { createMinimalLogger } from '@agentuity/core';
+import {
+	AIGatewayService,
+	type AIGatewayModel,
+	type AIGatewayModels,
+} from '@agentuity/core/aigateway';
+import { createServerFetchAdapter } from '@agentuity/server';
 import type { ExtensionAPI, ProviderModelConfig } from '@mariozechner/pi-coding-agent';
 
 export type KnownApi =
@@ -24,8 +31,6 @@ export type KnownApi =
 	| 'google-gemini-cli'
 	| 'google-vertex';
 
-const MODEL_CATALOG_TIMEOUT_MS = 5_000;
-
 const KNOWN_APIS = new Set<string>([
 	'openai-completions',
 	'mistral-conversations',
@@ -39,34 +44,6 @@ const KNOWN_APIS = new Set<string>([
 	'google-vertex',
 ] satisfies KnownApi[]);
 
-interface AIGatewayModels {
-	[key: string]: AIGatewayModel[];
-}
-
-interface AIGatewayModelResponse {
-	success: boolean;
-	data: AIGatewayModels;
-	message?: string;
-	error?: string;
-}
-
-interface AIGatewayModel {
-	id: string;
-	name: string;
-	api: KnownApi;
-	reasoning: boolean;
-	input_modalities?: ('text' | 'image')[];
-	context_window?: number;
-	max_output_tokens?: number;
-	pricing?: {
-		input: number;
-		output: number;
-		cached_input: number;
-		unit: 'per_million_tokens';
-		currency: 'USD';
-	};
-}
-
 function getEnv(...keys: string[]): string | undefined {
 	for (const key of keys) {
 		if (process.env[key]) {
@@ -155,37 +132,15 @@ async function fetchModels(): Promise<AIGatewayModels> {
 		process.env.AGENTUITY_AIGATEWAY_ORGID = orgId;
 	}
 
-	const controller = new AbortController();
-	const timeout = setTimeout(() => controller.abort(), MODEL_CATALOG_TIMEOUT_MS);
-
 	try {
-		const response = await fetch(`${baseUrl}/models`, { signal: controller.signal });
-
-		if (!response.ok) {
-			console.warn(
-				`Failed to fetch models from AI Gateway: ${response.status} ${response.statusText}`
-			);
-			return {};
-		}
-
-		const payload = (await response.json()) as AIGatewayModelResponse;
-
-		if (!payload.success) {
-			console.warn(`Failed to load models. ${payload.message} ${payload}`);
-		}
-
-		return payload.data;
+		const service = new AIGatewayService(
+			baseUrl,
+			createServerFetchAdapter({ headers: {} }, createMinimalLogger())
+		);
+		return await service.listModels();
 	} catch (error) {
-		if (error instanceof Error && error.name === 'AbortError') {
-			console.warn(
-				`Timed out fetching models from AI Gateway after ${MODEL_CATALOG_TIMEOUT_MS}ms`
-			);
-			return {};
-		}
 		console.warn('Failed to fetch models from AI Gateway:', error);
 		return {};
-	} finally {
-		clearTimeout(timeout);
 	}
 }
 
@@ -193,8 +148,8 @@ function toPiModel(m: AIGatewayModel): ProviderModelConfig {
 	return {
 		id: m.id,
 		name: m.name,
-		reasoning: m.reasoning,
-		input: m.input_modalities as ('text' | 'image')[],
+		reasoning: m.reasoning ?? false,
+		input: (m.input_modalities as ('text' | 'image')[] | undefined) ?? ['text'],
 		contextWindow: m.context_window ?? 40000,
 		maxTokens: m.max_output_tokens ?? 64000,
 		cost: {
diff --git a/packages/coder-tui/tsconfig.json b/packages/coder-tui/tsconfig.json
index dc1ef5181..5156d1abb 100644
--- a/packages/coder-tui/tsconfig.json
+++ b/packages/coder-tui/tsconfig.json
@@ -4,9 +4,14 @@
 		"composite": true,
 		"outDir": "./dist",
 		"rootDir": "./src",
-		"rewriteRelativeImportExtensions": true
+		"rewriteRelativeImportExtensions": true,
+		"paths": {
+			"@agentuity/core": ["../core/src"],
+			"@agentuity/core/*": ["../core/src/services/*"],
+			"@agentuity/server": ["../server/src"]
+		}
 	},
 	"include": ["src/**/*"],
 	"exclude": ["test/**/*"],
-	"references": []
+	"references": [{ "path": "../core" }, { "path": "../server" }]
 }

From 963748e4e2da19323c97f89a59b6738b3f11ca5e Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 15:50:56 -0500
Subject: [PATCH 3/8] docs: generalize AI Gateway completion wording

---
 apps/docs/src/web/content/reference/api/ai-gateway.mdx | 4 ++--
 examples/README.md                                     | 2 +-
 examples/services-aigateway/README.md                  | 4 ++--
 packages/cli/src/cmd/cloud/aigateway/complete.ts       | 2 +-
 packages/cli/src/cmd/cloud/aigateway/index.ts          | 2 +-
 packages/core/src/services/aigateway/api-reference.ts  | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/apps/docs/src/web/content/reference/api/ai-gateway.mdx b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
index 5df6ac52c..bc9354f86 100644
--- a/apps/docs/src/web/content/reference/api/ai-gateway.mdx
+++ b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
@@ -1,7 +1,7 @@
 ---
 title: AI Gateway API
 short_title: AI Gateway
-description: List supported LLM models and run OpenAI-compatible chat completions
+description: List supported LLM models and run routed AI Gateway completions
 ---
 
 {/* This file is auto-generated from Zod schemas. Do not edit manually. Run scripts/generate-api-reference.ts to regenerate. */}
@@ -361,7 +361,7 @@ Completion request with `stream` set to `true`.
 
 ### Response
 
-Server-Sent Events stream. Each `data:` frame contains an OpenAI-compatible delta payload. The stream ends with `data: [DONE]`.
+Server-Sent Events stream. Each `data:` frame contains a provider-compatible delta payload. The stream ends with `data: [DONE]`.
 
 | Status | Description |
 |--------|-------------|
diff --git a/examples/README.md b/examples/README.md
index 861b7a25b..9af19cc82 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -24,7 +24,7 @@ AI Gateway service usage with the standalone TypeScript API.
 
 - Model discovery
 - Model metadata filtering
-- OpenAI-compatible chat completions
+- Routed LLM completions
 
 #### [streaming](./streaming/)
 
diff --git a/examples/services-aigateway/README.md b/examples/services-aigateway/README.md
index 3466f439c..0374a8bb3 100644
--- a/examples/services-aigateway/README.md
+++ b/examples/services-aigateway/README.md
@@ -6,7 +6,7 @@ This example demonstrates how to use the standalone `@agentuity/aigateway` TypeS
 
 - **Model discovery** - List AI Gateway models grouped by provider
 - **Filtering** - Filter models by provider, input modality, and reasoning support
-- **Completions** - Run OpenAI-compatible chat completions through AI Gateway
+- **Completions** - Run routed LLM completions through AI Gateway
 - **Standalone client** - Use `AIGatewayClient` inside an Agentuity runtime app
 
 ## Running the Example
@@ -63,7 +63,7 @@ for (const [provider, models] of Object.entries(catalog)) {
 }
 ```
 
-### Chat Completion
+### Completion
 
 ```typescript
 const completion = await client.complete({
diff --git a/packages/cli/src/cmd/cloud/aigateway/complete.ts b/packages/cli/src/cmd/cloud/aigateway/complete.ts
index c3e8a1f5b..5b8482f58 100644
--- a/packages/cli/src/cmd/cloud/aigateway/complete.ts
+++ b/packages/cli/src/cmd/cloud/aigateway/complete.ts
@@ -270,7 +270,7 @@ async function consumeCompletionStream(
 export const completeSubcommand = createCommand({
 	name: 'complete',
 	aliases: ['completion', 'chat'],
-	description: 'Run an AI Gateway chat completion',
+	description: 'Run an AI Gateway completion',
 	tags: ['write', 'slow', 'requires-auth', 'uses-stdin'],
 	requires: { auth: true },
 	optional: { project: true, region: true },
diff --git a/packages/cli/src/cmd/cloud/aigateway/index.ts b/packages/cli/src/cmd/cloud/aigateway/index.ts
index 450e09ce9..0370acf08 100644
--- a/packages/cli/src/cmd/cloud/aigateway/index.ts
+++ b/packages/cli/src/cmd/cloud/aigateway/index.ts
@@ -12,7 +12,7 @@ export const aigatewayCommand = createCommand({
 		{ command: getCommand('cloud aigateway models'), description: 'List supported models' },
 		{
 			command: getCommand('cloud aigateway complete --model openai/gpt-4.1-mini "Hello"'),
-			description: 'Run a chat completion',
+			description: 'Run a completion',
 		},
 	],
 	subcommands: [modelsSubcommand, completeSubcommand],
diff --git a/packages/core/src/services/aigateway/api-reference.ts b/packages/core/src/services/aigateway/api-reference.ts
index c49340c3a..f75e0be99 100644
--- a/packages/core/src/services/aigateway/api-reference.ts
+++ b/packages/core/src/services/aigateway/api-reference.ts
@@ -38,7 +38,7 @@ const AIGatewayStreamCompletionSchema = z
 const service: Service = {
 	name: 'AI Gateway',
 	slug: 'ai-gateway',
-	description: 'List supported LLM models and run OpenAI-compatible chat completions',
+	description: 'List supported LLM models and run routed AI Gateway completions',
 	host: 'aigateway',
 	endpoints: [
 		{
@@ -119,7 +119,7 @@ const service: Service = {
 				fields: { schema: AIGatewayChatCompletionParamsSchema },
 			},
 			responseDescription:
-				'Server-Sent Events stream. Each `data:` frame contains an OpenAI-compatible delta payload. The stream ends with `data: [DONE]`.',
+				'Server-Sent Events stream. Each `data:` frame contains a provider-compatible delta payload. The stream ends with `data: [DONE]`.',
 			responseHeaders: [
 				{
 					name: 'Trailer',

From 793adf813c9c044f62a3870464be3ab2ab31336e Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 15:53:43 -0500
Subject: [PATCH 4/8] docs: use real AI Gateway API in example

---
 examples/services-aigateway/README.md | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/examples/services-aigateway/README.md b/examples/services-aigateway/README.md
index 0374a8bb3..6335a4ef3 100644
--- a/examples/services-aigateway/README.md
+++ b/examples/services-aigateway/README.md
@@ -17,24 +17,20 @@ bun install
 bun run dev
 ```
 
-## Testing
+## Testing the AI Gateway API Directly
 
 ```bash
 # List all models
-curl http://localhost:3500/agent/aigateway \
-  --json '{"operation":"models"}'
+curl https://aigateway-usc.agentuity.cloud/models
 
 # List OpenAI models
-curl http://localhost:3500/agent/aigateway \
-  --json '{"operation":"models","provider":"openai"}'
-
-# List models that accept image input
-curl http://localhost:3500/agent/aigateway \
-  --json '{"operation":"models","input":"image"}'
+curl https://aigateway-usc.agentuity.cloud/models/openai
 
 # Run a completion
-curl http://localhost:3500/agent/aigateway \
-  --json '{"operation":"complete","model":"openai/gpt-4.1-mini","prompt":"Say hello in one sentence."}'
+curl https://aigateway-usc.agentuity.cloud/ \
+  -H "Authorization: Bearer $AGENTUITY_AIGATEWAY_KEY" \
+  -H "x-agentuity-orgid: $AGENTUITY_CLOUD_ORG_ID" \
+  --json '{"model":"openai/gpt-4.1-mini","messages":[{"role":"user","content":"Say hello in one sentence."}]}'
 ```
 
 ## Key Concepts

From 99aa54c45602a7a04559d36f027ee2fd02aff622 Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 16:18:29 -0500
Subject: [PATCH 5/8] fix: address AI Gateway PR feedback

---
 apps/docs/scripts/generate-api-reference.ts   | 13 ++++-
 .../web/content/reference/api/ai-gateway.mdx  | 10 ++--
 .../src/agent/aigateway/agent.ts              | 11 ++--
 packages/cli/src/agent-detection.ts           |  8 +--
 packages/cli/src/cmd/ai/capabilities/show.ts  |  2 -
 .../cli/src/cmd/cloud/aigateway/complete.ts   | 55 ++++++++++++++-----
 .../src/cmd/cloud/aigateway/model-cache.ts    |  5 +-
 .../cli/src/cmd/cloud/aigateway/models.ts     | 10 +++-
 packages/cli/src/cmd/cloud/index.ts           |  2 +-
 packages/cli/test/cmd/cloud/aigateway.test.ts |  2 +-
 packages/coder-tui/src/aigateway.ts           | 28 ++++++++--
 .../src/services/aigateway/api-reference.ts   |  6 +-
 packages/core/src/services/aigateway/index.ts | 23 +++++++-
 .../core/src/services/aigateway/service.ts    | 25 ++++++++-
 packages/core/src/services/api-reference.ts   |  1 +
 15 files changed, 151 insertions(+), 50 deletions(-)

diff --git a/apps/docs/scripts/generate-api-reference.ts b/apps/docs/scripts/generate-api-reference.ts
index 6579a3a6c..813fc13b7 100644
--- a/apps/docs/scripts/generate-api-reference.ts
+++ b/apps/docs/scripts/generate-api-reference.ts
@@ -107,6 +107,16 @@ function renderResponseHeaders(headers: ResponseHeader[], subHeading: string): s
 	].join('\n');
 }
 
+function renderAuthentication(service: Service): string {
+	if (service.hasPublicEndpoints) {
+		if (service.slug !== 'ai-gateway') {
+			return 'Most requests require a Bearer token. Pass your API or SDK key in the `Authorization` header. Public endpoints (such as listing and fetching public snapshots) are noted below and do not require authentication.';
+		}
+		return 'Most requests require a Bearer token. Pass your API or SDK key in the `Authorization` header. Public endpoints are noted below and do not require authentication.';
+	}
+	return 'All requests require a Bearer token. Pass your API or SDK key in the `Authorization` header.';
+}
+
 function renderEndpointSection(endpoint: Endpoint, headingLevel = 2, host?: string): string {
 	const subHeading = '#'.repeat(headingLevel + 1);
 	const pathParams = toParamTableInput(endpoint.pathParams, 'path');
@@ -182,6 +192,7 @@ function renderEndpointSection(endpoint: Endpoint, headingLevel = 2, host?: stri
 		'',
 		`<ApiEndpoint method="${endpoint.method}" path="${endpoint.path}"${hostProp} />`,
 		'',
+		...(endpoint.public ? ['**Authentication:** Public. No auth required.', ''] : []),
 		paramSection,
 		requestBodyParts.join('\n'),
 		responseParts.join('\n'),
@@ -224,7 +235,7 @@ description: ${service.description}
 
 ## Authentication
 
-${service.hasPublicEndpoints ? 'Most requests require a Bearer token. Pass your API or SDK key in the `Authorization` header. Public endpoints (such as listing and fetching public snapshots) are noted below and do not require authentication.' : 'All requests require a Bearer token. Pass your API or SDK key in the `Authorization` header.'}
+${renderAuthentication(service)}
 
 | Header | Value |
 |--------|-------|
diff --git a/apps/docs/src/web/content/reference/api/ai-gateway.mdx b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
index bc9354f86..428cf90d4 100644
--- a/apps/docs/src/web/content/reference/api/ai-gateway.mdx
+++ b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
@@ -11,7 +11,7 @@ description: List supported LLM models and run routed AI Gateway completions
 
 ## Authentication
 
-All requests require a Bearer token. Pass your API or SDK key in the `Authorization` header.
+Most requests require a Bearer token. Pass your API or SDK key in the `Authorization` header. Public endpoints are noted below and do not require authentication.
 
 | Header | Value |
 |--------|-------|
@@ -27,6 +27,8 @@ List model metadata for LLM providers available through AI Gateway, grouped by p
 
 <ApiEndpoint method="GET" path="/models" host="aigateway" />
 
+**Authentication:** Public. No auth required.
+
 
 
 ### Response
@@ -35,9 +37,7 @@ JSON response containing provider keys mapped to arrays of supported model metad
 
 | Status | Description |
 |--------|-------------|
-| 200 | Model catalog returned |
-| 401 | Unauthorized — invalid or missing API key |
-| 402 | Payment required — upgrade to a paid plan |
+| 200 | Model catalog returned. Public — no auth required. |
 
 ### Response Fields
 
@@ -52,7 +52,7 @@ JSON response containing provider keys mapped to arrays of supported model metad
     "name": "data",
     "type": "object",
     "description": "",
-    "required": true
+    "required": false
   },
   {
     "name": "message",
diff --git a/examples/services-aigateway/src/agent/aigateway/agent.ts b/examples/services-aigateway/src/agent/aigateway/agent.ts
index 242322f49..8b78ca585 100644
--- a/examples/services-aigateway/src/agent/aigateway/agent.ts
+++ b/examples/services-aigateway/src/agent/aigateway/agent.ts
@@ -28,11 +28,12 @@ const inputSchema = s.union([
 
 function getCompletionText(response: unknown): string {
 	const choices = (response as { choices?: unknown }).choices;
-	if (!Array.isArray(choices) || choices.length === 0) {
-		return '';
-	}
-	const first = choices[0] as { message?: { content?: unknown }; text?: unknown };
-	const content = first.message?.content ?? first.text;
+	const first =
+		Array.isArray(choices) && choices.length > 0
+			? (choices[0] as { message?: { content?: unknown }; text?: unknown })
+			: undefined;
+	const content =
+		first?.message?.content ?? first?.text ?? (response as { content?: unknown }).content;
 	if (typeof content === 'string') {
 		return content;
 	}
diff --git a/packages/cli/src/agent-detection.ts b/packages/cli/src/agent-detection.ts
index 4eeb6aaba..73e4215a5 100644
--- a/packages/cli/src/agent-detection.ts
+++ b/packages/cli/src/agent-detection.ts
@@ -343,14 +343,10 @@ let cachedResult: string | undefined | null = null;
 
 /**
  * Check if a basename matches a known agent process name.
- * Short tokens (≤2 chars) require an exact match to avoid false positives
- * (e.g., 'pi' matching 'pip', 'spin'). Longer tokens use substring matching.
+ * Match exact executable names or names split on non-alphanumeric boundaries.
  */
 function matchesProcessName(basename: string, processName: string): boolean {
-	if (processName.length <= 2) {
-		return basename === processName;
-	}
-	return basename.includes(processName);
+	return basename === processName || basename.split(/[^a-z0-9]+/i).includes(processName);
 }
 
 /**
diff --git a/packages/cli/src/cmd/ai/capabilities/show.ts b/packages/cli/src/cmd/ai/capabilities/show.ts
index 06a5ddaa9..0804118f4 100644
--- a/packages/cli/src/cmd/ai/capabilities/show.ts
+++ b/packages/cli/src/cmd/ai/capabilities/show.ts
@@ -133,8 +133,6 @@ export const showSubcommand = createSubcommand({
 					name: 'AI Gateway',
 					description: 'List supported AI models and run LLM completions',
 					commands: ['cloud aigateway models', 'cloud aigateway complete'],
-					requiresAuth: true,
-					requiresProject: true,
 				},
 				{
 					id: 'databases',
diff --git a/packages/cli/src/cmd/cloud/aigateway/complete.ts b/packages/cli/src/cmd/cloud/aigateway/complete.ts
index 5b8482f58..505f2507f 100644
--- a/packages/cli/src/cmd/cloud/aigateway/complete.ts
+++ b/packages/cli/src/cmd/cloud/aigateway/complete.ts
@@ -1,7 +1,6 @@
 import { z } from 'zod';
-import type { AIGatewayModels, AIGatewayService } from '@agentuity/core';
+import { StructuredError, type AIGatewayModels, type AIGatewayService } from '@agentuity/core';
 import { createCommand } from '../../../types';
-import * as tui from '../../../tui';
 import { getCommand } from '../../../command-prefix';
 import { getExecutingAgent } from '../../../agent-detection';
 import { createAIGatewayService, getAIGatewayUrl, getCompletionText } from './util';
@@ -14,6 +13,13 @@ const CompletionResponseSchema = z.object({
 });
 
 const defaultModel = 'openai/gpt-4o-mini';
+const PromptRequiredError = StructuredError(
+	'AIGatewayPromptRequired',
+	'Prompt is required. Pass it as an argument, use --prompt, use --file, or pipe it through stdin.'
+);
+const PromptFileNotFoundError = StructuredError('AIGatewayPromptFileNotFound')<{
+	filename: string;
+}>();
 
 function isAgentOutputMode(): boolean {
 	return Boolean(getExecutingAgent()) && process.env.AGENTUITY_AIGATEWAY_AGENT_OUTPUT !== 'false';
@@ -32,7 +38,14 @@ async function readPromptFromFile(filename?: string): Promise<string | undefined
 	if (!filename) {
 		return undefined;
 	}
-	const text = await Bun.file(filename).text();
+	const file = Bun.file(filename);
+	if (!(await file.exists())) {
+		throw new PromptFileNotFoundError({
+			message: `Prompt file not found: ${filename}`,
+			filename,
+		});
+	}
+	const text = await file.text();
 	const trimmed = text.trim();
 	return trimmed.length > 0 ? trimmed : undefined;
 }
@@ -148,7 +161,6 @@ function buildCompletionRequest(opts: {
 	model: string;
 	prompt: string;
 	system?: string;
-	api?: string;
 	temperature?: number;
 	maxTokens?: number;
 	stream?: boolean;
@@ -221,6 +233,14 @@ async function consumeCompletionStream(
 			if (!data || data === '[DONE]') {
 				continue;
 			}
+			try {
+				const delta = getStreamDeltaText(JSON.parse(data));
+				if (delta) {
+					text += delta;
+				}
+			} catch {
+				// Ignore malformed stream frames and continue consuming the stream.
+			}
 			if (options.raw) {
 				if (!options.json) {
 					console.log(data);
@@ -230,7 +250,6 @@ async function consumeCompletionStream(
 			try {
 				const delta = getStreamDeltaText(JSON.parse(data));
 				if (delta) {
-					text += delta;
 					if (!options.json) {
 						process.stdout.write(delta);
 					}
@@ -336,9 +355,7 @@ export const completeSubcommand = createCommand({
 			stdinMode: ctx.opts.stdinMode,
 		});
 		if (!prompt) {
-			tui.fatal(
-				'Prompt is required. Pass it as an argument, use --prompt, use --file, or pipe it through stdin.'
-			);
+			throw new PromptRequiredError();
 		}
 
 		const service = createAIGatewayService(ctx);
@@ -357,11 +374,11 @@ export const completeSubcommand = createCommand({
 			models = await loadModelsForCompletion({ service, profile, cacheKey, refresh: true });
 			modelInfo = await getCompletionModelInfo(model, models);
 		}
+		const requestModel = modelInfo?.id ?? model;
 		const request = buildCompletionRequest({
-			model,
+			model: requestModel,
 			prompt,
 			system,
-			api: modelInfo?.api,
 			temperature: ctx.opts.temperature,
 			maxTokens: ctx.opts.maxTokens,
 		});
@@ -381,7 +398,13 @@ export const completeSubcommand = createCommand({
 				await Bun.write(ctx.opts.save, text);
 			}
 			if (!ctx.options.json && format === 'json') {
-				console.log(JSON.stringify({ text, cost, response: { stream: true, model } }, null, 2));
+				console.log(
+					JSON.stringify(
+						{ text, cost, response: { stream: true, model: requestModel } },
+						null,
+						2
+					)
+				);
 			}
 			if (!ctx.options.json && ctx.opts.cost) {
 				const costText = getCostText({ agentuity: metadata });
@@ -389,7 +412,7 @@ export const completeSubcommand = createCommand({
 					console.error(costText);
 				}
 			}
-			return { text, response: { stream: true }, cost };
+			return { text, response: { stream: true, model: requestModel }, cost };
 		}
 
 		const response = await service.complete(request);
@@ -405,7 +428,13 @@ export const completeSubcommand = createCommand({
 			} else if (format === 'json') {
 				console.log(
 					JSON.stringify(
-						{ text, model, usage: (response as { usage?: unknown }).usage, cost, response },
+						{
+							text,
+							model: requestModel,
+							usage: (response as { usage?: unknown }).usage,
+							cost,
+							response,
+						},
 						null,
 						2
 					)
diff --git a/packages/cli/src/cmd/cloud/aigateway/model-cache.ts b/packages/cli/src/cmd/cloud/aigateway/model-cache.ts
index acc96c2a3..41c4a1053 100644
--- a/packages/cli/src/cmd/cloud/aigateway/model-cache.ts
+++ b/packages/cli/src/cmd/cloud/aigateway/model-cache.ts
@@ -1,7 +1,7 @@
 import { Database } from 'bun:sqlite';
 import { mkdir } from 'node:fs/promises';
 import { join } from 'node:path';
-import type { AIGatewayModels } from '@agentuity/core';
+import { AIGatewayModelsSchema, type AIGatewayModels } from '@agentuity/core';
 import { getDefaultConfigDir } from '../../../config';
 
 const TTL_MS = 6 * 60 * 60 * 1000;
@@ -59,7 +59,8 @@ export async function getCachedAIGatewayModels(
 			]);
 			return null;
 		}
-		return JSON.parse(row.models_json) as AIGatewayModels;
+		const parsed = AIGatewayModelsSchema.safeParse(JSON.parse(row.models_json));
+		return parsed.success ? parsed.data : null;
 	} catch {
 		return null;
 	}
diff --git a/packages/cli/src/cmd/cloud/aigateway/models.ts b/packages/cli/src/cmd/cloud/aigateway/models.ts
index 77019f1f7..c160ec4f0 100644
--- a/packages/cli/src/cmd/cloud/aigateway/models.ts
+++ b/packages/cli/src/cmd/cloud/aigateway/models.ts
@@ -34,15 +34,21 @@ function isAgentOutputMode(): boolean {
 }
 
 function getRecommendations(rows: z.infer<typeof ModelRowSchema>[]) {
-	const byId = new Map(rows.map((row) => [row.id, row]));
+	const byId = new Map(rows.map((row) => [normalizeModelId(row.id), row]));
 	return recommendedModels
 		.map((rec) => {
-			const model = rec.candidates.map((id) => byId.get(id)).find(Boolean);
+			const model = rec.candidates.map((id) => byId.get(normalizeModelId(id))).find(Boolean);
 			return model ? { use: rec.use, model: model.id, name: model.name } : undefined;
 		})
 		.filter((row): row is { use: string; model: string; name: string } => Boolean(row));
 }
 
+function normalizeModelId(id: string): string {
+	const normalized = id.toLowerCase();
+	const parts = normalized.split('/');
+	return parts.length > 1 ? (parts.at(-1) ?? normalized) : normalized;
+}
+
 function matchesProviderFilter(
 	provider: string,
 	modelId: string,
diff --git a/packages/cli/src/cmd/cloud/index.ts b/packages/cli/src/cmd/cloud/index.ts
index 3801e4816..d2e4d4c0b 100644
--- a/packages/cli/src/cmd/cloud/index.ts
+++ b/packages/cli/src/cmd/cloud/index.ts
@@ -14,7 +14,7 @@ import webhookCommand from './webhook';
 import { agentCommand } from './agent';
 import envCommand from './env';
 import apikeyCommand from './apikey';
-import aigatewayCommand from './aigateway';
+import { aigatewayCommand } from './aigateway';
 import oidcCommand from './oidc';
 import streamCommand from './stream';
 import vectorCommand from './vector';
diff --git a/packages/cli/test/cmd/cloud/aigateway.test.ts b/packages/cli/test/cmd/cloud/aigateway.test.ts
index 12cea97db..87b2ef06c 100644
--- a/packages/cli/test/cmd/cloud/aigateway.test.ts
+++ b/packages/cli/test/cmd/cloud/aigateway.test.ts
@@ -550,7 +550,7 @@ describe('cloud aigateway command', () => {
 			stream: true,
 		});
 		expect(result.text).toBe('hello');
-		expect(result.response).toEqual({ stream: true });
+		expect(result.response).toEqual({ stream: true, model: 'gpt-4.1-mini' });
 		expect(result.cost).toEqual({
 			total: 0.000789,
 			promptTokens: 20,
diff --git a/packages/coder-tui/src/aigateway.ts b/packages/coder-tui/src/aigateway.ts
index 7a4ee41a1..22a8f5fff 100644
--- a/packages/coder-tui/src/aigateway.ts
+++ b/packages/coder-tui/src/aigateway.ts
@@ -10,7 +10,7 @@
 import { delimiter, join } from 'node:path';
 import { existsSync } from 'node:fs';
 import { execFileSync } from 'node:child_process';
-import { createMinimalLogger } from '@agentuity/core';
+import { createMinimalLogger, StructuredError } from '@agentuity/core';
 import {
 	AIGatewayService,
 	type AIGatewayModel,
@@ -44,6 +44,10 @@ const KNOWN_APIS = new Set<string>([
 	'google-vertex',
 ] satisfies KnownApi[]);
 
+const AIGatewayModelFetchError = StructuredError('AIGatewayModelFetchError')<{
+	cause?: unknown;
+}>();
+
 function getEnv(...keys: string[]): string | undefined {
 	for (const key of keys) {
 		if (process.env[key]) {
@@ -109,8 +113,11 @@ async function fetchModels(): Promise<AIGatewayModels> {
 						}
 					}
 					break;
-				} catch (_ex) {
-					//
+				} catch (error) {
+					throw new AIGatewayModelFetchError({
+						message: 'Failed to fetch models from AI Gateway',
+						cause: error,
+					});
 				}
 			}
 		}
@@ -139,17 +146,26 @@ async function fetchModels(): Promise<AIGatewayModels> {
 		);
 		return await service.listModels();
 	} catch (error) {
-		console.warn('Failed to fetch models from AI Gateway:', error);
-		return {};
+		throw new AIGatewayModelFetchError({
+			message: 'Failed to fetch models from AI Gateway',
+			cause: error,
+		});
 	}
 }
 
+function sanitizeModalities(modalities: string[] | undefined): ('text' | 'image')[] {
+	const sanitized = (modalities ?? []).filter(
+		(modality): modality is 'text' | 'image' => modality === 'text' || modality === 'image'
+	);
+	return sanitized.length > 0 ? sanitized : ['text'];
+}
+
 function toPiModel(m: AIGatewayModel): ProviderModelConfig {
 	return {
 		id: m.id,
 		name: m.name,
 		reasoning: m.reasoning ?? false,
-		input: (m.input_modalities as ('text' | 'image')[] | undefined) ?? ['text'],
+		input: sanitizeModalities(m.input_modalities),
 		contextWindow: m.context_window ?? 40000,
 		maxTokens: m.max_output_tokens ?? 64000,
 		cost: {
diff --git a/packages/core/src/services/aigateway/api-reference.ts b/packages/core/src/services/aigateway/api-reference.ts
index f75e0be99..7eba7c0ce 100644
--- a/packages/core/src/services/aigateway/api-reference.ts
+++ b/packages/core/src/services/aigateway/api-reference.ts
@@ -40,6 +40,7 @@ const service: Service = {
 	slug: 'ai-gateway',
 	description: 'List supported LLM models and run routed AI Gateway completions',
 	host: 'aigateway',
+	hasPublicEndpoints: true,
 	endpoints: [
 		{
 			id: 'list-models',
@@ -55,11 +56,10 @@ const service: Service = {
 				'JSON response containing provider keys mapped to arrays of supported model metadata.',
 			responseFields: { schema: AIGatewayModelsResponseSchema },
 			statuses: [
-				{ code: 200, description: 'Model catalog returned' },
-				{ code: 401, description: 'Unauthorized — invalid or missing API key' },
-				{ code: 402, description: 'Payment required — upgrade to a paid plan' },
+				{ code: 200, description: 'Model catalog returned. Public — no auth required.' },
 			],
 			examplePath: '/models',
+			public: true,
 		},
 		{
 			id: 'create-chat-completion',
diff --git a/packages/core/src/services/aigateway/index.ts b/packages/core/src/services/aigateway/index.ts
index d61218d48..546312f05 100644
--- a/packages/core/src/services/aigateway/index.ts
+++ b/packages/core/src/services/aigateway/index.ts
@@ -1 +1,22 @@
-export * from './service.ts';
+export {
+	AIGatewayChatCompletionParamsSchema,
+	AIGatewayChatCompletionSchema,
+	AIGatewayChatMessageSchema,
+	AIGatewayModelProviderSchema,
+	AIGatewayModelSchema,
+	AIGatewayModelsResponseSchema,
+	AIGatewayModelsSchema,
+	AIGatewayPricingSchema,
+	AIGatewayResponseMetadataSchema,
+	AIGatewayService,
+	type AIGatewayChatCompletion,
+	type AIGatewayChatCompletionParams,
+	type AIGatewayChatMessage,
+	type AIGatewayModel,
+	type AIGatewayModelProvider,
+	type AIGatewayModels,
+	type AIGatewayModelsResponse,
+	type AIGatewayPricing,
+	type AIGatewayResponseMetadata,
+	type AIGatewayStreamingCompletion,
+} from './service.ts';
diff --git a/packages/core/src/services/aigateway/service.ts b/packages/core/src/services/aigateway/service.ts
index 42359bb6c..d69ef930a 100644
--- a/packages/core/src/services/aigateway/service.ts
+++ b/packages/core/src/services/aigateway/service.ts
@@ -1,7 +1,13 @@
 import { z } from 'zod';
+import { StructuredError } from '../../error.ts';
 import { FetchAdapter } from '../adapter.ts';
 import { buildUrl, toServiceException, toPayload } from '../_util.ts';
 
+const AIGatewayModelsResponseError = StructuredError('AIGatewayModelsResponseError')<{
+	error?: string;
+	message?: string;
+}>();
+
 export const AIGatewayPricingSchema = z.object({
 	input: z.number().describe('Input token price.'),
 	output: z.number().describe('Output token price.'),
@@ -48,7 +54,7 @@ export type AIGatewayModels = z.infer<typeof AIGatewayModelsSchema>;
 
 export const AIGatewayModelsResponseSchema = z.object({
 	success: z.boolean(),
-	data: AIGatewayModelsSchema,
+	data: AIGatewayModelsSchema.optional(),
 	message: z.string().optional(),
 	error: z.string().optional(),
 });
@@ -190,9 +196,13 @@ async function extractGatewayMetadata(response: Response): Promise<AIGatewayResp
 	if (trailers) {
 		try {
 			const trailerMetadata = extractGatewayMetadataFromHeaders(await trailers);
+			const cost =
+				metadata.cost || trailerMetadata.cost
+					? { ...(metadata.cost ?? {}), ...(trailerMetadata.cost ?? {}) }
+					: undefined;
 			return {
 				headers: { ...metadata.headers, ...trailerMetadata.headers },
-				cost: trailerMetadata.cost ?? metadata.cost,
+				...(cost ? { cost } : {}),
 			};
 		} catch {
 			// Some runtimes expose a trailers promise but reject when trailers are unavailable.
@@ -236,6 +246,17 @@ export class AIGatewayService {
 			throw await toServiceException(method, url, response.response);
 		}
 		const payload = AIGatewayModelsResponseSchema.parse(response.data);
+		if (!payload.success) {
+			throw new AIGatewayModelsResponseError({
+				message: payload.error || payload.message || 'AI Gateway failed to list models',
+				error: payload.error,
+			});
+		}
+		if (!payload.data) {
+			throw new AIGatewayModelsResponseError({
+				message: 'AI Gateway model response did not include data',
+			});
+		}
 		return payload.data;
 	}
 
diff --git a/packages/core/src/services/api-reference.ts b/packages/core/src/services/api-reference.ts
index 27061288f..a440b5201 100644
--- a/packages/core/src/services/api-reference.ts
+++ b/packages/core/src/services/api-reference.ts
@@ -60,6 +60,7 @@ interface Endpoint {
 	exampleBody?: string | object;
 	exampleHeaders?: Record<string, string>;
 	ttlNote?: string;
+	public?: boolean;
 }
 
 interface Service {

From 2aa3c8ed5b65a08e56e1c98eed71ff08b9698553 Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 16:41:46 -0500
Subject: [PATCH 6/8] docs: clarify AI Gateway models response envelope

---
 apps/docs/src/web/content/reference/api/ai-gateway.mdx | 2 +-
 packages/core/src/services/aigateway/api-reference.ts  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/docs/src/web/content/reference/api/ai-gateway.mdx b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
index 428cf90d4..da0df5864 100644
--- a/apps/docs/src/web/content/reference/api/ai-gateway.mdx
+++ b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
@@ -33,7 +33,7 @@ List model metadata for LLM providers available through AI Gateway, grouped by p
 
 ### Response
 
-JSON response containing provider keys mapped to arrays of supported model metadata.
+JSON response with provider keys mapped to arrays of supported model metadata under the `data` envelope.
 
 | Status | Description |
 |--------|-------------|
diff --git a/packages/core/src/services/aigateway/api-reference.ts b/packages/core/src/services/aigateway/api-reference.ts
index 7eba7c0ce..4b80cba3d 100644
--- a/packages/core/src/services/aigateway/api-reference.ts
+++ b/packages/core/src/services/aigateway/api-reference.ts
@@ -53,7 +53,7 @@ const service: Service = {
 			queryParams: [],
 			requestBody: null,
 			responseDescription:
-				'JSON response containing provider keys mapped to arrays of supported model metadata.',
+				'JSON response with provider keys mapped to arrays of supported model metadata under the `data` envelope.',
 			responseFields: { schema: AIGatewayModelsResponseSchema },
 			statuses: [
 				{ code: 200, description: 'Model catalog returned. Public — no auth required.' },

From e1ce45869b55f9d83a114eafff3228056bc42b56 Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 16:48:17 -0500
Subject: [PATCH 7/8] docs: require stream true for AI Gateway stream reference

---
 apps/docs/src/web/content/reference/api/ai-gateway.mdx |  6 +++---
 packages/core/src/services/aigateway/api-reference.ts  |  6 +++++-
 packages/core/src/services/aigateway/index.ts          |  2 ++
 packages/core/src/services/aigateway/service.ts        | 10 ++++++++++
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/apps/docs/src/web/content/reference/api/ai-gateway.mdx b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
index da0df5864..2e778783f 100644
--- a/apps/docs/src/web/content/reference/api/ai-gateway.mdx
+++ b/apps/docs/src/web/content/reference/api/ai-gateway.mdx
@@ -347,9 +347,9 @@ Completion request with `stream` set to `true`.
   },
   {
     "name": "stream",
-    "type": "boolean",
-    "description": "",
-    "required": false
+    "type": "true",
+    "description": "Enable Server-Sent Events streaming.",
+    "required": true
   },
   {
     "name": "stop",
diff --git a/packages/core/src/services/aigateway/api-reference.ts b/packages/core/src/services/aigateway/api-reference.ts
index 4b80cba3d..90df52d2d 100644
--- a/packages/core/src/services/aigateway/api-reference.ts
+++ b/packages/core/src/services/aigateway/api-reference.ts
@@ -1,6 +1,7 @@
 import { z } from 'zod';
 import {
 	AIGatewayChatCompletionParamsSchema,
+	AIGatewayChatCompletionStreamParamsSchema,
 	AIGatewayChatCompletionSchema,
 	AIGatewayModelsResponseSchema,
 } from './service.ts';
@@ -116,7 +117,10 @@ const service: Service = {
 			queryParams: [],
 			requestBody: {
 				description: 'Completion request with `stream` set to `true`.',
-				fields: { schema: AIGatewayChatCompletionParamsSchema },
+				fields: {
+					schema: AIGatewayChatCompletionStreamParamsSchema,
+					overrides: { stream: { type: 'true' } },
+				},
 			},
 			responseDescription:
 				'Server-Sent Events stream. Each `data:` frame contains a provider-compatible delta payload. The stream ends with `data: [DONE]`.',
diff --git a/packages/core/src/services/aigateway/index.ts b/packages/core/src/services/aigateway/index.ts
index 546312f05..465611af3 100644
--- a/packages/core/src/services/aigateway/index.ts
+++ b/packages/core/src/services/aigateway/index.ts
@@ -1,5 +1,6 @@
 export {
 	AIGatewayChatCompletionParamsSchema,
+	AIGatewayChatCompletionStreamParamsSchema,
 	AIGatewayChatCompletionSchema,
 	AIGatewayChatMessageSchema,
 	AIGatewayModelProviderSchema,
@@ -11,6 +12,7 @@ export {
 	AIGatewayService,
 	type AIGatewayChatCompletion,
 	type AIGatewayChatCompletionParams,
+	type AIGatewayChatCompletionStreamParams,
 	type AIGatewayChatMessage,
 	type AIGatewayModel,
 	type AIGatewayModelProvider,
diff --git a/packages/core/src/services/aigateway/service.ts b/packages/core/src/services/aigateway/service.ts
index d69ef930a..33af203e8 100644
--- a/packages/core/src/services/aigateway/service.ts
+++ b/packages/core/src/services/aigateway/service.ts
@@ -101,6 +101,16 @@ export const AIGatewayChatCompletionParamsSchema = z
 
 export type AIGatewayChatCompletionParams = z.infer<typeof AIGatewayChatCompletionParamsSchema>;
 
+export const AIGatewayChatCompletionStreamParamsSchema = AIGatewayChatCompletionParamsSchema.extend(
+	{
+		stream: z.literal(true).describe('Enable Server-Sent Events streaming.'),
+	}
+);
+
+export type AIGatewayChatCompletionStreamParams = z.infer<
+	typeof AIGatewayChatCompletionStreamParamsSchema
+>;
+
 export const AIGatewayChatCompletionSchema = z
 	.object({
 		id: z.string().optional(),

From 9ad8ade40563181f2c8690ef8a6f34d895f24c62 Mon Sep 17 00:00:00 2001
From: Jeff Haynie <jhaynie@gmail.com>
Date: Wed, 6 May 2026 16:59:31 -0500
Subject: [PATCH 8/8] fix: require AI Gateway completion input

---
 .../core/src/services/aigateway/service.ts    | 33 ++++++++++++++++---
 packages/core/test/aigateway.test.ts          | 23 ++++++++++++-
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/packages/core/src/services/aigateway/service.ts b/packages/core/src/services/aigateway/service.ts
index 33af203e8..be8faf7a2 100644
--- a/packages/core/src/services/aigateway/service.ts
+++ b/packages/core/src/services/aigateway/service.ts
@@ -83,6 +83,21 @@ export const AIGatewayChatMessageSchema = z.object({
 
 export type AIGatewayChatMessage = z.infer<typeof AIGatewayChatMessageSchema>;
 
+const missingCompletionInputMessage = 'either prompt or messages must be provided';
+
+function hasCompletionInput(params: { prompt?: string | string[]; messages?: unknown[] }): boolean {
+	if (params.messages && params.messages.length > 0) {
+		return true;
+	}
+	if (typeof params.prompt === 'string') {
+		return params.prompt.trim().length > 0;
+	}
+	if (Array.isArray(params.prompt)) {
+		return params.prompt.length > 0 && params.prompt.every((item) => item.trim().length > 0);
+	}
+	return false;
+}
+
 export const AIGatewayChatCompletionParamsSchema = z
 	.object({
 		model: z.string().describe('Model to use for the completion.'),
@@ -97,15 +112,23 @@ export const AIGatewayChatCompletionParamsSchema = z
 		stream: z.boolean().optional(),
 		stop: z.union([z.string(), z.array(z.string())]).optional(),
 	})
-	.catchall(z.unknown());
+	.catchall(z.unknown())
+	.superRefine((params, ctx) => {
+		if (!hasCompletionInput(params)) {
+			ctx.addIssue({
+				code: 'custom',
+				message: missingCompletionInputMessage,
+				path: ['messages'],
+			});
+		}
+	});
 
 export type AIGatewayChatCompletionParams = z.infer<typeof AIGatewayChatCompletionParamsSchema>;
 
-export const AIGatewayChatCompletionStreamParamsSchema = AIGatewayChatCompletionParamsSchema.extend(
-	{
+export const AIGatewayChatCompletionStreamParamsSchema =
+	AIGatewayChatCompletionParamsSchema.safeExtend({
 		stream: z.literal(true).describe('Enable Server-Sent Events streaming.'),
-	}
-);
+	});
 
 export type AIGatewayChatCompletionStreamParams = z.infer<
 	typeof AIGatewayChatCompletionStreamParamsSchema
diff --git a/packages/core/test/aigateway.test.ts b/packages/core/test/aigateway.test.ts
index 7fe720121..23722d15b 100644
--- a/packages/core/test/aigateway.test.ts
+++ b/packages/core/test/aigateway.test.ts
@@ -1,10 +1,31 @@
 import { describe, expect, test } from 'bun:test';
 import { createMockAdapter } from '@agentuity/test-utils';
-import { AIGatewayService } from '../src/services/aigateway/index.ts';
+import {
+	AIGatewayChatCompletionParamsSchema,
+	AIGatewayService,
+} from '../src/services/aigateway/index.ts';
 
 describe('AIGatewayService', () => {
 	const baseUrl = 'https://aigateway.example.com';
 
+	test('requires prompt or messages for completion params', () => {
+		expect(AIGatewayChatCompletionParamsSchema.safeParse({ model: 'gpt-4.1-mini' }).success).toBe(
+			false
+		);
+		expect(
+			AIGatewayChatCompletionParamsSchema.safeParse({
+				model: 'gpt-4.1-mini',
+				prompt: '   ',
+			}).success
+		).toBe(false);
+		expect(
+			AIGatewayChatCompletionParamsSchema.safeParse({
+				model: 'gpt-4.1-mini',
+				prompt: ['Say hello'],
+			}).success
+		).toBe(true);
+	});
+
 	test('lists models from the gateway catalog', async () => {
 		const { adapter, calls } = createMockAdapter([
 			{