truffle-ai · rahulkarajgikar · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/.changeset/few-apes-judge.md b/.changeset/few-apes-judge.md
@@ -0,0 +1,16 @@
+---
+'dexto': patch
+'@dexto/analytics': patch
+'@dexto/core': patch
+'@dexto/server': patch
+'@dexto/tui': patch
+'@dexto/webui': patch
+---
+
+Publish LLM usage analytics cost metrics.
+
+- `dexto` / `@dexto/tui`: include estimated USD cost and per-bucket cost fields in CLI LLM usage analytics.
+- `@dexto/webui`: include estimated USD cost and per-bucket cost fields in WebUI LLM usage analytics.
+- `@dexto/analytics`: extend the shared `dexto_llm_tokens_consumed` event payload with cost fields.
+- `@dexto/core`: emit `costBreakdown` alongside `estimatedCost` from shared LLM pricing metadata.
+- `@dexto/server`: forward the emitted cost breakdown through usage delivery and A2A SSE events.
diff --git a/docs/static/openapi/openapi.json b/docs/static/openapi/openapi.json
@@ -2,7 +2,7 @@
   "openapi": "3.0.0",
   "info": {
     "title": "Dexto API",
-    "version": "1.6.21",
+    "version": "1.6.22",
     "description": "OpenAPI spec for the Dexto REST API server"
   },
   "servers": [

diff --git a/packages/analytics/src/events.ts b/packages/analytics/src/events.ts
@@ -24,6 +24,18 @@ export interface LLMTokensConsumedEvent {
     totalTokens?: number | undefined;
     cacheReadTokens?: number | undefined;
     cacheWriteTokens?: number | undefined;
+    /** Total estimated cost in USD for the response, when pricing is available. */
+    estimatedCostUsd?: number | undefined;
+    /** Estimated input-token cost in USD for the response, when pricing is available. */
+    inputCostUsd?: number | undefined;
+    /** Estimated output-token cost in USD for the response, when pricing is available. */
+    outputCostUsd?: number | undefined;
+    /** Estimated reasoning-token cost in USD for the response, when pricing is available. */
+    reasoningCostUsd?: number | undefined;
+    /** Estimated cache-read cost in USD for the response, when pricing is available. */
+    cacheReadCostUsd?: number | undefined;
+    /** Estimated cache-write cost in USD for the response, when pricing is available. */
+    cacheWriteCostUsd?: number | undefined;
     /** Estimated input tokens (before LLM call, using length/4 heuristic) */
     estimatedInputTokens?: number | undefined;
     /** Accuracy of estimate vs actual: (estimated - actual) / actual * 100 */

diff --git a/packages/core/src/events/index.ts b/packages/core/src/events/index.ts
@@ -1,5 +1,6 @@
 import { EventEmitter } from 'events';
 import type { LLMProvider, LLMPricingStatus, ReasoningVariant, TokenUsage } from '../llm/types.js';
+import type { TokenUsageCostBreakdown } from '../llm/registry/index.js';
 import type { AgentRuntimeSettings } from '../agent/runtime-config.js';
 import type { ApprovalRequest, ApprovalResponse } from '../approval/types.js';
 import type { SanitizedToolResult } from '../context/types.js';
@@ -371,6 +372,8 @@ export interface AgentEventMap {
         usageScopeId?: string;
         /** Estimated cost in USD for this response, when pricing is available. */
         estimatedCost?: number;
+        /** Estimated token-cost breakdown in USD for this response, when pricing is available. */
+        costBreakdown?: TokenUsageCostBreakdown;
         /** Whether pricing was resolved for this response. */
         pricingStatus?: LLMPricingStatus;
         /** Estimated input tokens before LLM call (for analytics/calibration) */
@@ -649,6 +652,8 @@ export interface SessionEventMap {
         usageScopeId?: string;
         /** Estimated cost in USD for this response, when pricing is available. */
         estimatedCost?: number;
+        /** Estimated token-cost breakdown in USD for this response, when pricing is available. */
+        costBreakdown?: TokenUsageCostBreakdown;
         /** Whether pricing was resolved for this response. */
         pricingStatus?: LLMPricingStatus;
         /** Estimated input tokens before LLM call (for analytics/calibration) */

diff --git a/packages/core/src/llm/executor/stream-processor.test.ts b/packages/core/src/llm/executor/stream-processor.test.ts
@@ -1105,6 +1105,11 @@ describe('StreamProcessor', () => {
                 messageId: 'msg-1',
                 provider: 'openai',
                 model: 'gpt-4',
+                costBreakdown: {
+                    inputUsd: expect.any(Number),
+                    outputUsd: expect.any(Number),
+                    totalUsd: expect.any(Number),
+                },
                 pricingStatus: 'estimated',
                 tokenUsage: {
                     inputTokens: 100,
@@ -1303,6 +1308,11 @@ describe('StreamProcessor', () => {
             const responseEvent = mocks.emittedEvents.find((e) => e.name === 'llm:response');
             expect(responseEvent?.payload).toMatchObject({
                 finishReason: 'cancelled',
+                costBreakdown: {
+                    inputUsd: expect.any(Number),
+                    outputUsd: expect.any(Number),
+                    totalUsd: expect.any(Number),
+                },
                 pricingStatus: 'estimated',
                 tokenUsage: {
                     inputTokens: 12,

diff --git a/packages/core/src/llm/executor/stream-processor.ts b/packages/core/src/llm/executor/stream-processor.ts
@@ -10,6 +10,7 @@ import type { Logger } from '../../logger/v2/types.js';
 import { DextoLogComponent } from '../../logger/v2/types.js';
 import type { ToolPresentationSnapshotV1 } from '../../tools/types.js';
 import { getUsagePricingMetadata } from '../usage-metadata.js';
+import type { TokenUsageCostBreakdown } from '../registry/index.js';
 import type { LLMProvider, LLMPricingStatus, ReasoningVariant, TokenUsage } from '../types.js';
 
 type UsageLike = {
@@ -714,6 +715,7 @@ export class StreamProcessor {
         tokenUsage: TokenUsage;
         finishReason: LLMFinishReason;
         estimatedCost?: number;
+        costBreakdown?: TokenUsageCostBreakdown;
         pricingStatus?: LLMPricingStatus;
     }): void {
         this.eventBus.emit('llm:response', {
@@ -728,6 +730,9 @@ export class StreamProcessor {
             ...(config.estimatedCost !== undefined && {
                 estimatedCost: config.estimatedCost,
             }),
+            ...(config.costBreakdown && {
+                costBreakdown: config.costBreakdown,
+            }),
             ...(config.pricingStatus && { pricingStatus: config.pricingStatus }),
             ...(this.config.estimatedInputTokens !== undefined && {
                 estimatedInputTokens: this.config.estimatedInputTokens,

diff --git a/packages/core/src/llm/usage-metadata.ts b/packages/core/src/llm/usage-metadata.ts
@@ -1,9 +1,14 @@
-import { calculateCost, getModelPricing } from './registry/index.js';
+import {
+    calculateCostBreakdown,
+    getModelPricing,
+    type TokenUsageCostBreakdown,
+} from './registry/index.js';
 import type { LLMProvider, LLMPricingStatus, TokenUsage } from './types.js';
 
 export interface LLMUsagePricingMetadata {
     estimatedCost?: number;
     pricingStatus?: LLMPricingStatus;
+    costBreakdown?: TokenUsageCostBreakdown;
 }
 
 export function hasMeaningfulTokenUsage(tokenUsage: TokenUsage | undefined): boolean {
@@ -38,10 +43,12 @@ export function getUsagePricingMetadata(config: {
     }
 
     // TODO(llm-pricing): Handle totalTokens-only usage without reporting a false zero-cost
-    // estimate. calculateCost() prices detailed token buckets only, so this path should
+    // estimate. calculateCostBreakdown() prices detailed token buckets only, so this path should
     // eventually distinguish "insufficient token detail" from a real zero-cost estimate.
+    const costBreakdown = calculateCostBreakdown(tokenUsage, pricing);
     return {
-        estimatedCost: calculateCost(tokenUsage, pricing),
+        estimatedCost: costBreakdown.totalUsd,
         pricingStatus: 'estimated',
+        costBreakdown,
     };
 }
diff --git a/packages/server/src/events/a2a-sse-subscriber.ts b/packages/server/src/events/a2a-sse-subscriber.ts
@@ -149,6 +149,9 @@ export class A2ASseEventSubscriber {
                     ...(payload.estimatedCost !== undefined && {
                         estimatedCost: payload.estimatedCost,
                     }),
+                    ...(payload.costBreakdown && {
+                        costBreakdown: payload.costBreakdown,
+                    }),
                     ...(payload.pricingStatus && { pricingStatus: payload.pricingStatus }),
                 });
             },

diff --git a/packages/server/src/events/usage-event-subscriber.ts b/packages/server/src/events/usage-event-subscriber.ts
@@ -154,7 +154,8 @@ export class UsageEventSubscriber implements EventSubscriber {
         }
 
         const resolvedCostBreakdown =
-            payload.provider && payload.model
+            payload.costBreakdown ??
+            (payload.provider && payload.model
                 ? (() => {
                       const pricing = getModelPricing(payload.provider, payload.model);
                       if (!pricing) {
@@ -163,7 +164,7 @@ export class UsageEventSubscriber implements EventSubscriber {
 
                       return calculateCostBreakdown(payload.tokenUsage, pricing);
                   })()
-                : undefined;
+                : undefined);
         const resolvedEstimatedCost = payload.estimatedCost ?? resolvedCostBreakdown?.totalUsd;
 
         return {

diff --git a/packages/tui/src/services/processStream.test.ts b/packages/tui/src/services/processStream.test.ts
@@ -1,10 +1,18 @@
-import { describe, expect, it, vi } from 'vitest';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
 import type React from 'react';
 import type { QueuedMessage, StreamingEvent } from '@dexto/core';
 import type { Message, UIState, SessionState } from '../state/types.js';
 import { processStream } from './processStream.js';
 import type { ApprovalRequest } from '../components/ApprovalPrompt.js';
 
+const { captureAnalyticsMock } = vi.hoisted(() => ({
+    captureAnalyticsMock: vi.fn(),
+}));
+
+vi.mock('../host/index.js', () => ({
+    captureAnalytics: captureAnalyticsMock,
+}));
+
 type SetStateAction<T> = React.SetStateAction<T>;
 type Dispatch<T> = React.Dispatch<SetStateAction<T>>;
 
@@ -93,6 +101,10 @@ function createSetters() {
 }
 
 describe('processStream (reasoning)', () => {
+    beforeEach(() => {
+        captureAnalyticsMock.mockClear();
+    });
+
     it('attaches streamed reasoning chunks to the assistant message', async () => {
         const { getMessages, getPendingMessages, setters } = createSetters();
 
@@ -295,4 +307,61 @@ describe('processStream (reasoning)', () => {
         expect(assistantMessages[1]?.content).toBe('Final');
         expect(assistantMessages[1]?.reasoning).toBeUndefined();
     });
+
+    it('captures analytics cost fields for priced llm responses', async () => {
+        const { setters } = createSetters();
+
+        const events: StreamingEvent[] = [
+            { name: 'llm:thinking', sessionId: 'test-session' },
+            {
+                name: 'llm:response',
+                sessionId: 'test-session',
+                content: 'Priced response',
+                provider: 'openai',
+                model: 'gpt-4',
+                estimatedCost: 0.0015,
+                costBreakdown: {
+                    inputUsd: 0.001,
+                    outputUsd: 0.0005,
+                    reasoningUsd: 0,
+                    cacheReadUsd: 0,
+                    cacheWriteUsd: 0,
+                    totalUsd: 0.0015,
+                },
+                tokenUsage: {
+                    inputTokens: 10,
+                    outputTokens: 20,
+                    totalTokens: 30,
+                },
+            },
+            {
+                name: 'run:complete',
+                sessionId: 'test-session',
+                finishReason: 'stop',
+                stepCount: 1,
+                durationMs: 1,
+            },
+        ];
+
+        await processStream(eventStream(events), setters, {
+            useStreaming: false,
+            autoApproveEditsRef: { current: false },
+            bypassPermissionsRef: { current: false },
+            eventBus: { emit: vi.fn() },
+        });
+
+        expect(captureAnalyticsMock).toHaveBeenCalledWith(
+            'dexto_llm_tokens_consumed',
+            expect.objectContaining({
+                source: 'cli',
+                sessionId: 'test-session',
+                estimatedCostUsd: 0.0015,
+                inputCostUsd: 0.001,
+                outputCostUsd: 0.0005,
+                reasoningCostUsd: 0,
+                cacheReadCostUsd: 0,
+                cacheWriteCostUsd: 0,
+            })
+        );
+    });
 });
diff --git a/packages/tui/src/services/processStream.ts b/packages/tui/src/services/processStream.ts
@@ -116,6 +116,28 @@ interface StreamState {
     nonStreamingAccumulatedReasoning: string;
 }
 
+function hasMeaningfulTokenUsageForAnalytics(
+    tokenUsage: Extract<StreamingEvent, { name: 'llm:response' }>['tokenUsage'],
+    estimatedCost?: number
+): boolean {
+    if (estimatedCost !== undefined) {
+        return true;
+    }
+
+    if (!tokenUsage) {
+        return false;
+    }
+
+    return (
+        (tokenUsage.inputTokens ?? 0) > 0 ||
+        (tokenUsage.outputTokens ?? 0) > 0 ||
+        (tokenUsage.reasoningTokens ?? 0) > 0 ||
+        (tokenUsage.cacheReadTokens ?? 0) > 0 ||
+        (tokenUsage.cacheWriteTokens ?? 0) > 0 ||
+        (tokenUsage.totalTokens ?? 0) > 0
+    );
+}
+
 /**
  * Processes the async iterator from agent.stream() and updates UI state.
  *
@@ -546,19 +568,14 @@ export async function processStream(
 
                     // Track token usage analytics
                     if (
-                        event.tokenUsage &&
-                        (event.tokenUsage.inputTokens || event.tokenUsage.outputTokens)
+                        hasMeaningfulTokenUsageForAnalytics(event.tokenUsage, event.estimatedCost)
                     ) {
                         // Calculate estimate accuracy if both estimate and actual are available
                         let estimateAccuracyPercent: number | undefined;
-                        if (
-                            event.estimatedInputTokens !== undefined &&
-                            event.tokenUsage.inputTokens
-                        ) {
-                            const diff = event.estimatedInputTokens - event.tokenUsage.inputTokens;
-                            estimateAccuracyPercent = Math.round(
-                                (diff / event.tokenUsage.inputTokens) * 100
-                            );
+                        const actualInputTokens = event.tokenUsage?.inputTokens;
+                        if (event.estimatedInputTokens !== undefined && actualInputTokens) {
+                            const diff = event.estimatedInputTokens - actualInputTokens;
+                            estimateAccuracyPercent = Math.round((diff / actualInputTokens) * 100);
                         }
 
                         captureAnalytics('dexto_llm_tokens_consumed', {
@@ -568,12 +585,18 @@ export async function processStream(
                             model: event.model,
                             reasoningVariant: event.reasoningVariant ?? undefined,
                             reasoningBudgetTokens: event.reasoningBudgetTokens ?? undefined,
-                            inputTokens: event.tokenUsage.inputTokens,
-                            outputTokens: event.tokenUsage.outputTokens,
-                            reasoningTokens: event.tokenUsage.reasoningTokens,
-                            totalTokens: event.tokenUsage.totalTokens,
-                            cacheReadTokens: event.tokenUsage.cacheReadTokens,
-                            cacheWriteTokens: event.tokenUsage.cacheWriteTokens,
+                            inputTokens: event.tokenUsage?.inputTokens,
+                            outputTokens: event.tokenUsage?.outputTokens,
+                            reasoningTokens: event.tokenUsage?.reasoningTokens,
+                            totalTokens: event.tokenUsage?.totalTokens,
+                            cacheReadTokens: event.tokenUsage?.cacheReadTokens,
+                            cacheWriteTokens: event.tokenUsage?.cacheWriteTokens,
+                            estimatedCostUsd: event.estimatedCost,
+                            inputCostUsd: event.costBreakdown?.inputUsd,
+                            outputCostUsd: event.costBreakdown?.outputUsd,
+                            reasoningCostUsd: event.costBreakdown?.reasoningUsd,
+                            cacheReadCostUsd: event.costBreakdown?.cacheReadUsd,
+                            cacheWriteCostUsd: event.costBreakdown?.cacheWriteUsd,
                             estimatedInputTokens: event.estimatedInputTokens,
                             estimateAccuracyPercent,
                         });