Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ export class Agent {
agents: opts.agents,
hookRegistry: this.hookRegistry,
sessionId: this.sid,
contextWindowSize: opts.contextWindowSize,
pricingPerMillion: opts.pricingPerMillion,
})
this.currentEngine = engine

Expand Down
13 changes: 11 additions & 2 deletions src/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,12 @@ export class QueryEngine {
}

// Auto-compact if context is too large
if (shouldAutoCompact(this.messages as any[], this.config.model, this.compactState)) {
if (shouldAutoCompact(
this.messages as any[],
this.config.model,
this.compactState,
this.config.contextWindowSize,
)) {
await this.executeHooks('PreCompact')
try {
const result = await compactConversation(
Expand Down Expand Up @@ -342,7 +347,11 @@ export class QueryEngine {
(this.totalUsage.cache_read_input_tokens || 0) +
response.usage.cache_read_input_tokens
}
this.totalCost += estimateCost(this.config.model, response.usage)
this.totalCost += estimateCost(
this.config.model,
response.usage,
this.config.pricingPerMillion,
)
}

// Add assistant message to conversation
Expand Down
12 changes: 12 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,13 @@ export interface AgentOptions {
hooks: Array<(input: any, toolUseId: string, context: { signal: AbortSignal }) => Promise<any>>
timeout?: number
}>>
/** 模型上下文窗口大小(单位:tokens),不设置则按原有模型匹配逻辑 */
contextWindowSize?: number
/** 模型定价(每百万 tokens,USD),不设置则按原有模型匹配逻辑 */
pricingPerMillion?: {
input: number
output: number
}
}

export interface QueryResult {
Expand Down Expand Up @@ -483,4 +490,9 @@ export interface QueryEngineConfig {
hookRegistry?: import('./hooks.js').HookRegistry
/** Session ID for hook context */
sessionId?: string
contextWindowSize?: number
pricingPerMillion?: {
input: number
output: number
}
}
3 changes: 2 additions & 1 deletion src/utils/compact.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,12 @@ export function shouldAutoCompact(
messages: any[],
model: string,
state: AutoCompactState,
contextWindowSize?: number,
): boolean {
if (state.consecutiveFailures >= 3) return false

const estimatedTokens = estimateMessagesTokens(messages)
const threshold = getAutoCompactThreshold(model)
const threshold = getAutoCompactThreshold(model, contextWindowSize)

return estimatedTokens >= threshold
}
Expand Down
31 changes: 24 additions & 7 deletions src/utils/tokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ export function getTokenCountFromUsage(usage: {
/**
* Get the context window size for a model.
*/
export function getContextWindowSize(model: string): number {
export function getContextWindowSize(
model: string,
contextWindowSize?: number,
): number {
if (contextWindowSize !== undefined) return contextWindowSize
// Anthropic model context windows
if (model.includes('opus-4') && model.includes('1m')) return 1_000_000
if (model.includes('opus-4')) return 200_000
Expand Down Expand Up @@ -98,8 +102,11 @@ export const AUTOCOMPACT_BUFFER_TOKENS = 13_000
/**
* Get the auto-compact threshold for a model.
*/
export function getAutoCompactThreshold(model: string): number {
return getContextWindowSize(model) - AUTOCOMPACT_BUFFER_TOKENS
export function getAutoCompactThreshold(
model: string,
contextWindowSize?: number,
): number {
return getContextWindowSize(model, contextWindowSize) - AUTOCOMPACT_BUFFER_TOKENS
}

/**
Expand Down Expand Up @@ -136,10 +143,20 @@ export const MODEL_PRICING: Record<string, { input: number; output: number }> =
export function estimateCost(
model: string,
usage: { input_tokens: number; output_tokens: number },
pricing?: { input: number; output: number },
): number {
const pricing = Object.entries(MODEL_PRICING).find(([key]) =>
model.includes(key),
)?.[1] ?? { input: 3 / 1_000_000, output: 15 / 1_000_000 }
if (pricing) {
return (
usage.input_tokens * (pricing.input / 1_000_000) +
usage.output_tokens * (pricing.output / 1_000_000)
)
}
const resolved =
Object.entries(MODEL_PRICING).find(([key]) =>
model.includes(key),
)?.[1] ?? { input: 3 / 1_000_000, output: 15 / 1_000_000 }

return usage.input_tokens * pricing.input + usage.output_tokens * pricing.output
return (
usage.input_tokens * resolved.input + usage.output_tokens * resolved.output
)
}