Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .changeset/few-apes-judge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
'dexto': patch
'@dexto/analytics': patch
'@dexto/core': patch
'@dexto/server': patch
'@dexto/tui': patch
'@dexto/webui': patch
---

Publish LLM usage analytics cost metrics.

- `dexto` / `@dexto/tui`: include estimated USD cost and per-bucket cost fields in CLI LLM usage analytics.
- `@dexto/webui`: include estimated USD cost and per-bucket cost fields in WebUI LLM usage analytics.
- `@dexto/analytics`: extend the shared `dexto_llm_tokens_consumed` event payload with cost fields.
- `@dexto/core`: emit `costBreakdown` alongside `estimatedCost` from shared LLM pricing metadata.
- `@dexto/server`: forward the emitted cost breakdown through usage delivery and A2A SSE events.
2 changes: 1 addition & 1 deletion docs/static/openapi/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"openapi": "3.0.0",
"info": {
"title": "Dexto API",
"version": "1.6.21",
"version": "1.6.22",
"description": "OpenAPI spec for the Dexto REST API server"
},
"servers": [
Expand Down
12 changes: 12 additions & 0 deletions packages/analytics/src/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ export interface LLMTokensConsumedEvent {
totalTokens?: number | undefined;
cacheReadTokens?: number | undefined;
cacheWriteTokens?: number | undefined;
/** Total estimated cost in USD for the response, when pricing is available. */
estimatedCostUsd?: number | undefined;
/** Estimated input-token cost in USD for the response, when pricing is available. */
inputCostUsd?: number | undefined;
/** Estimated output-token cost in USD for the response, when pricing is available. */
outputCostUsd?: number | undefined;
/** Estimated reasoning-token cost in USD for the response, when pricing is available. */
reasoningCostUsd?: number | undefined;
/** Estimated cache-read cost in USD for the response, when pricing is available. */
cacheReadCostUsd?: number | undefined;
/** Estimated cache-write cost in USD for the response, when pricing is available. */
cacheWriteCostUsd?: number | undefined;
/** Estimated input tokens (before LLM call, using length/4 heuristic) */
estimatedInputTokens?: number | undefined;
/** Accuracy of estimate vs actual: (estimated - actual) / actual * 100 */
Expand Down
5 changes: 5 additions & 0 deletions packages/core/src/events/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { EventEmitter } from 'events';
import type { LLMProvider, LLMPricingStatus, ReasoningVariant, TokenUsage } from '../llm/types.js';
import type { TokenUsageCostBreakdown } from '../llm/registry/index.js';
import type { AgentRuntimeSettings } from '../agent/runtime-config.js';
import type { ApprovalRequest, ApprovalResponse } from '../approval/types.js';
import type { SanitizedToolResult } from '../context/types.js';
Expand Down Expand Up @@ -371,6 +372,8 @@ export interface AgentEventMap {
usageScopeId?: string;
/** Estimated cost in USD for this response, when pricing is available. */
estimatedCost?: number;
/** Estimated token-cost breakdown in USD for this response, when pricing is available. */
costBreakdown?: TokenUsageCostBreakdown;
/** Whether pricing was resolved for this response. */
pricingStatus?: LLMPricingStatus;
/** Estimated input tokens before LLM call (for analytics/calibration) */
Expand Down Expand Up @@ -649,6 +652,8 @@ export interface SessionEventMap {
usageScopeId?: string;
/** Estimated cost in USD for this response, when pricing is available. */
estimatedCost?: number;
/** Estimated token-cost breakdown in USD for this response, when pricing is available. */
costBreakdown?: TokenUsageCostBreakdown;
/** Whether pricing was resolved for this response. */
pricingStatus?: LLMPricingStatus;
/** Estimated input tokens before LLM call (for analytics/calibration) */
Expand Down
10 changes: 10 additions & 0 deletions packages/core/src/llm/executor/stream-processor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,11 @@ describe('StreamProcessor', () => {
messageId: 'msg-1',
provider: 'openai',
model: 'gpt-4',
costBreakdown: {
inputUsd: expect.any(Number),
outputUsd: expect.any(Number),
totalUsd: expect.any(Number),
},
pricingStatus: 'estimated',
tokenUsage: {
inputTokens: 100,
Expand Down Expand Up @@ -1303,6 +1308,11 @@ describe('StreamProcessor', () => {
const responseEvent = mocks.emittedEvents.find((e) => e.name === 'llm:response');
expect(responseEvent?.payload).toMatchObject({
finishReason: 'cancelled',
costBreakdown: {
inputUsd: expect.any(Number),
outputUsd: expect.any(Number),
totalUsd: expect.any(Number),
},
pricingStatus: 'estimated',
tokenUsage: {
inputTokens: 12,
Expand Down
5 changes: 5 additions & 0 deletions packages/core/src/llm/executor/stream-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import type { Logger } from '../../logger/v2/types.js';
import { DextoLogComponent } from '../../logger/v2/types.js';
import type { ToolPresentationSnapshotV1 } from '../../tools/types.js';
import { getUsagePricingMetadata } from '../usage-metadata.js';
import type { TokenUsageCostBreakdown } from '../registry/index.js';
import type { LLMProvider, LLMPricingStatus, ReasoningVariant, TokenUsage } from '../types.js';

type UsageLike = {
Expand Down Expand Up @@ -714,6 +715,7 @@ export class StreamProcessor {
tokenUsage: TokenUsage;
finishReason: LLMFinishReason;
estimatedCost?: number;
costBreakdown?: TokenUsageCostBreakdown;
pricingStatus?: LLMPricingStatus;
}): void {
this.eventBus.emit('llm:response', {
Expand All @@ -728,6 +730,9 @@ export class StreamProcessor {
...(config.estimatedCost !== undefined && {
estimatedCost: config.estimatedCost,
}),
...(config.costBreakdown && {
costBreakdown: config.costBreakdown,
}),
...(config.pricingStatus && { pricingStatus: config.pricingStatus }),
...(this.config.estimatedInputTokens !== undefined && {
estimatedInputTokens: this.config.estimatedInputTokens,
Expand Down
13 changes: 10 additions & 3 deletions packages/core/src/llm/usage-metadata.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import { calculateCost, getModelPricing } from './registry/index.js';
import {
calculateCostBreakdown,
getModelPricing,
type TokenUsageCostBreakdown,
} from './registry/index.js';
import type { LLMProvider, LLMPricingStatus, TokenUsage } from './types.js';

export interface LLMUsagePricingMetadata {
estimatedCost?: number;
pricingStatus?: LLMPricingStatus;
costBreakdown?: TokenUsageCostBreakdown;
}

export function hasMeaningfulTokenUsage(tokenUsage: TokenUsage | undefined): boolean {
Expand Down Expand Up @@ -38,10 +43,12 @@ export function getUsagePricingMetadata(config: {
}

// TODO(llm-pricing): Handle totalTokens-only usage without reporting a false zero-cost
// estimate. calculateCost() prices detailed token buckets only, so this path should
// estimate. calculateCostBreakdown() prices detailed token buckets only, so this path should
// eventually distinguish "insufficient token detail" from a real zero-cost estimate.
const costBreakdown = calculateCostBreakdown(tokenUsage, pricing);
return {
estimatedCost: calculateCost(tokenUsage, pricing),
estimatedCost: costBreakdown.totalUsd,
pricingStatus: 'estimated',
costBreakdown,
};
}
3 changes: 3 additions & 0 deletions packages/server/src/events/a2a-sse-subscriber.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ export class A2ASseEventSubscriber {
...(payload.estimatedCost !== undefined && {
estimatedCost: payload.estimatedCost,
}),
...(payload.costBreakdown && {
costBreakdown: payload.costBreakdown,
}),
...(payload.pricingStatus && { pricingStatus: payload.pricingStatus }),
});
},
Expand Down
5 changes: 3 additions & 2 deletions packages/server/src/events/usage-event-subscriber.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ export class UsageEventSubscriber implements EventSubscriber {
}

const resolvedCostBreakdown =
payload.provider && payload.model
payload.costBreakdown ??
(payload.provider && payload.model
? (() => {
const pricing = getModelPricing(payload.provider, payload.model);
if (!pricing) {
Expand All @@ -163,7 +164,7 @@ export class UsageEventSubscriber implements EventSubscriber {

return calculateCostBreakdown(payload.tokenUsage, pricing);
})()
: undefined;
: undefined);
const resolvedEstimatedCost = payload.estimatedCost ?? resolvedCostBreakdown?.totalUsd;

return {
Expand Down
71 changes: 70 additions & 1 deletion packages/tui/src/services/processStream.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import { describe, expect, it, vi } from 'vitest';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import type React from 'react';
import type { QueuedMessage, StreamingEvent } from '@dexto/core';
import type { Message, UIState, SessionState } from '../state/types.js';
import { processStream } from './processStream.js';
import type { ApprovalRequest } from '../components/ApprovalPrompt.js';

const { captureAnalyticsMock } = vi.hoisted(() => ({
captureAnalyticsMock: vi.fn(),
}));

vi.mock('../host/index.js', () => ({
captureAnalytics: captureAnalyticsMock,
}));

type SetStateAction<T> = React.SetStateAction<T>;
type Dispatch<T> = React.Dispatch<SetStateAction<T>>;

Expand Down Expand Up @@ -93,6 +101,10 @@ function createSetters() {
}

describe('processStream (reasoning)', () => {
beforeEach(() => {
captureAnalyticsMock.mockClear();
});

it('attaches streamed reasoning chunks to the assistant message', async () => {
const { getMessages, getPendingMessages, setters } = createSetters();

Expand Down Expand Up @@ -295,4 +307,61 @@ describe('processStream (reasoning)', () => {
expect(assistantMessages[1]?.content).toBe('Final');
expect(assistantMessages[1]?.reasoning).toBeUndefined();
});

it('captures analytics cost fields for priced llm responses', async () => {
const { setters } = createSetters();

const events: StreamingEvent[] = [
{ name: 'llm:thinking', sessionId: 'test-session' },
{
name: 'llm:response',
sessionId: 'test-session',
content: 'Priced response',
provider: 'openai',
model: 'gpt-4',
estimatedCost: 0.0015,
costBreakdown: {
inputUsd: 0.001,
outputUsd: 0.0005,
reasoningUsd: 0,
cacheReadUsd: 0,
cacheWriteUsd: 0,
totalUsd: 0.0015,
},
tokenUsage: {
inputTokens: 10,
outputTokens: 20,
totalTokens: 30,
},
},
{
name: 'run:complete',
sessionId: 'test-session',
finishReason: 'stop',
stepCount: 1,
durationMs: 1,
},
];

await processStream(eventStream(events), setters, {
useStreaming: false,
autoApproveEditsRef: { current: false },
bypassPermissionsRef: { current: false },
eventBus: { emit: vi.fn() },
});

expect(captureAnalyticsMock).toHaveBeenCalledWith(
'dexto_llm_tokens_consumed',
expect.objectContaining({
source: 'cli',
sessionId: 'test-session',
estimatedCostUsd: 0.0015,
inputCostUsd: 0.001,
outputCostUsd: 0.0005,
reasoningCostUsd: 0,
cacheReadCostUsd: 0,
cacheWriteCostUsd: 0,
})
);
});
});
55 changes: 39 additions & 16 deletions packages/tui/src/services/processStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,28 @@ interface StreamState {
nonStreamingAccumulatedReasoning: string;
}

function hasMeaningfulTokenUsageForAnalytics(
tokenUsage: Extract<StreamingEvent, { name: 'llm:response' }>['tokenUsage'],
estimatedCost?: number
): boolean {
if (estimatedCost !== undefined) {
return true;
}

if (!tokenUsage) {
return false;
}

return (
(tokenUsage.inputTokens ?? 0) > 0 ||
(tokenUsage.outputTokens ?? 0) > 0 ||
(tokenUsage.reasoningTokens ?? 0) > 0 ||
(tokenUsage.cacheReadTokens ?? 0) > 0 ||
(tokenUsage.cacheWriteTokens ?? 0) > 0 ||
(tokenUsage.totalTokens ?? 0) > 0
);
}

/**
* Processes the async iterator from agent.stream() and updates UI state.
*
Expand Down Expand Up @@ -546,19 +568,14 @@ export async function processStream(

// Track token usage analytics
if (
event.tokenUsage &&
(event.tokenUsage.inputTokens || event.tokenUsage.outputTokens)
hasMeaningfulTokenUsageForAnalytics(event.tokenUsage, event.estimatedCost)
) {
// Calculate estimate accuracy if both estimate and actual are available
let estimateAccuracyPercent: number | undefined;
if (
event.estimatedInputTokens !== undefined &&
event.tokenUsage.inputTokens
) {
const diff = event.estimatedInputTokens - event.tokenUsage.inputTokens;
estimateAccuracyPercent = Math.round(
(diff / event.tokenUsage.inputTokens) * 100
);
const actualInputTokens = event.tokenUsage?.inputTokens;
if (event.estimatedInputTokens !== undefined && actualInputTokens) {
const diff = event.estimatedInputTokens - actualInputTokens;
estimateAccuracyPercent = Math.round((diff / actualInputTokens) * 100);
}

captureAnalytics('dexto_llm_tokens_consumed', {
Expand All @@ -568,12 +585,18 @@ export async function processStream(
model: event.model,
reasoningVariant: event.reasoningVariant ?? undefined,
reasoningBudgetTokens: event.reasoningBudgetTokens ?? undefined,
inputTokens: event.tokenUsage.inputTokens,
outputTokens: event.tokenUsage.outputTokens,
reasoningTokens: event.tokenUsage.reasoningTokens,
totalTokens: event.tokenUsage.totalTokens,
cacheReadTokens: event.tokenUsage.cacheReadTokens,
cacheWriteTokens: event.tokenUsage.cacheWriteTokens,
inputTokens: event.tokenUsage?.inputTokens,
outputTokens: event.tokenUsage?.outputTokens,
reasoningTokens: event.tokenUsage?.reasoningTokens,
totalTokens: event.tokenUsage?.totalTokens,
cacheReadTokens: event.tokenUsage?.cacheReadTokens,
cacheWriteTokens: event.tokenUsage?.cacheWriteTokens,
estimatedCostUsd: event.estimatedCost,
inputCostUsd: event.costBreakdown?.inputUsd,
outputCostUsd: event.costBreakdown?.outputUsd,
reasoningCostUsd: event.costBreakdown?.reasoningUsd,
cacheReadCostUsd: event.costBreakdown?.cacheReadUsd,
cacheWriteCostUsd: event.costBreakdown?.cacheWriteUsd,
estimatedInputTokens: event.estimatedInputTokens,
estimateAccuracyPercent,
});
Expand Down
Loading
Loading