codeany-ai · YangLuYang · Apr 7, 2026
diff --git a/src/agent.ts b/src/agent.ts
@@ -310,6 +310,8 @@ export class Agent {
       agents: opts.agents,
       hookRegistry: this.hookRegistry,
       sessionId: this.sid,
+      contextWindowSize: opts.contextWindowSize,
+      pricingPerMillion: opts.pricingPerMillion,
     })
     this.currentEngine = engine
 

diff --git a/src/engine.ts b/src/engine.ts
@@ -245,7 +245,12 @@ export class QueryEngine {
       }
 
       // Auto-compact if context is too large
-      if (shouldAutoCompact(this.messages as any[], this.config.model, this.compactState)) {
+      if (shouldAutoCompact(
+        this.messages as any[],
+        this.config.model,
+        this.compactState,
+        this.config.contextWindowSize,
+      )) {
         await this.executeHooks('PreCompact')
         try {
           const result = await compactConversation(
@@ -342,7 +347,11 @@ export class QueryEngine {
             (this.totalUsage.cache_read_input_tokens || 0) +
             response.usage.cache_read_input_tokens
         }
-        this.totalCost += estimateCost(this.config.model, response.usage)
+        this.totalCost += estimateCost(
+          this.config.model,
+          response.usage,
+          this.config.pricingPerMillion,
+        )
       }
 
       // Add assistant message to conversation

diff --git a/src/types.ts b/src/types.ts
@@ -443,6 +443,13 @@ export interface AgentOptions {
     hooks: Array<(input: any, toolUseId: string, context: { signal: AbortSignal }) => Promise<any>>
     timeout?: number
   }>>
+  /** 模型上下文窗口大小（单位：tokens），不设置则按原有模型匹配逻辑 */
+  contextWindowSize?: number
+  /** 模型定价（每百万 tokens，USD），不设置则按原有模型匹配逻辑 */
+  pricingPerMillion?: {
+    input: number
+    output: number
+  }
 }
 
 export interface QueryResult {
@@ -483,4 +490,9 @@ export interface QueryEngineConfig {
   hookRegistry?: import('./hooks.js').HookRegistry
   /** Session ID for hook context */
   sessionId?: string
+  contextWindowSize?: number
+  pricingPerMillion?: {
+    input: number
+    output: number
+  }
 }
diff --git a/src/utils/compact.ts b/src/utils/compact.ts
@@ -42,11 +42,12 @@ export function shouldAutoCompact(
   messages: any[],
   model: string,
   state: AutoCompactState,
+  contextWindowSize?: number,
 ): boolean {
   if (state.consecutiveFailures >= 3) return false
 
   const estimatedTokens = estimateMessagesTokens(messages)
-  const threshold = getAutoCompactThreshold(model)
+  const threshold = getAutoCompactThreshold(model, contextWindowSize)
 
   return estimatedTokens >= threshold
 }

diff --git a/src/utils/tokens.ts b/src/utils/tokens.ts
@@ -65,7 +65,11 @@ export function getTokenCountFromUsage(usage: {
 /**
  * Get the context window size for a model.
  */
-export function getContextWindowSize(model: string): number {
+export function getContextWindowSize(
+  model: string,
+  contextWindowSize?: number,
+): number {
+  if (contextWindowSize !== undefined) return contextWindowSize
   // Anthropic model context windows
   if (model.includes('opus-4') && model.includes('1m')) return 1_000_000
   if (model.includes('opus-4')) return 200_000
@@ -98,8 +102,11 @@ export const AUTOCOMPACT_BUFFER_TOKENS = 13_000
 /**
  * Get the auto-compact threshold for a model.
  */
-export function getAutoCompactThreshold(model: string): number {
-  return getContextWindowSize(model) - AUTOCOMPACT_BUFFER_TOKENS
+export function getAutoCompactThreshold(
+  model: string,
+  contextWindowSize?: number,
+): number {
+  return getContextWindowSize(model, contextWindowSize) - AUTOCOMPACT_BUFFER_TOKENS
 }
 
 /**
@@ -136,10 +143,20 @@ export const MODEL_PRICING: Record<string, { input: number; output: number }> =
 export function estimateCost(
   model: string,
   usage: { input_tokens: number; output_tokens: number },
+  pricing?: { input: number; output: number },
 ): number {
-  const pricing = Object.entries(MODEL_PRICING).find(([key]) =>
-    model.includes(key),
-  )?.[1] ?? { input: 3 / 1_000_000, output: 15 / 1_000_000 }
+  if (pricing) {
+    return (
+      usage.input_tokens * (pricing.input / 1_000_000) +
+      usage.output_tokens * (pricing.output / 1_000_000)
+    )
+  }
+  const resolved =
+    Object.entries(MODEL_PRICING).find(([key]) =>
+      model.includes(key),
+    )?.[1] ?? { input: 3 / 1_000_000, output: 15 / 1_000_000 }
 
-  return usage.input_tokens * pricing.input + usage.output_tokens * pricing.output
+  return (
+    usage.input_tokens * resolved.input + usage.output_tokens * resolved.output
+  )
 }