diff --git a/examples/01-single-agent.ts b/examples/01-single-agent.ts
index b7ac765..af20d90 100644
--- a/examples/01-single-agent.ts
+++ b/examples/01-single-agent.ts
@@ -114,6 +114,8 @@ const conversationAgent = new Agent(
     model: 'claude-sonnet-4-6',
     systemPrompt: 'You are a TypeScript tutor. Give short, direct answers.',
     maxTurns: 2,
+    // Keep only the most recent turn in long prompt() conversations.
+    contextStrategy: { type: 'sliding-window', maxTurns: 1 },
   },
   new ToolRegistry(), // no tools needed for this conversation
   new ToolExecutor(new ToolRegistry()),
diff --git a/src/agent/agent.ts b/src/agent/agent.ts
index 8c1007c..7270e93 100644
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@@ -153,6 +153,7 @@ export class Agent {
       agentRole: this.config.systemPrompt?.slice(0, 50) ?? 'assistant',
       loopDetection: this.config.loopDetection,
       maxTokenBudget: this.config.maxTokenBudget,
+      contextStrategy: this.config.contextStrategy,
     }
 
     this.runner = new AgentRunner(
diff --git a/src/agent/runner.ts b/src/agent/runner.ts
index 81155e8..2b8fbce 100644
--- a/src/agent/runner.ts
+++ b/src/agent/runner.ts
@@ -29,10 +29,12 @@ import type {
   LoopDetectionConfig,
   LoopDetectionInfo,
   LLMToolDef,
+  ContextStrategy,
 } from '../types.js'
 import { TokenBudgetExceededError } from '../errors.js'
 import { LoopDetector } from './loop-detector.js'
 import { emitTrace } from '../utils/trace.js'
+import { estimateTokens } from '../utils/tokens.js'
 import type { ToolRegistry } from '../tool/framework.js'
 import type { ToolExecutor } from '../tool/executor.js'
 
@@ -94,6 +96,8 @@ export interface RunnerOptions {
   readonly loopDetection?: LoopDetectionConfig
   /** Maximum cumulative tokens (input + output) allowed for this run. */
   readonly maxTokenBudget?: number
+  /** Optional context compression strategy for long multi-turn runs. */
+  readonly contextStrategy?: ContextStrategy
 }
 
 /**
@@ -172,6 +176,31 @@ function addTokenUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
 
 const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
 
+/**
+ * Prepends synthetic framing text to the first user message so we never emit
+ * consecutive `user` turns (Bedrock) and summaries do not concatenate onto
+ * the original user prompt (direct API). If there is no user message yet,
+ * inserts a single assistant text preamble.
+ */
+function prependSyntheticPrefixToFirstUser(
+  messages: LLMMessage[],
+  prefix: string,
+): LLMMessage[] {
+  const userIdx = messages.findIndex(m => m.role === 'user')
+  if (userIdx < 0) {
+    return [{
+      role: 'assistant',
+      content: [{ type: 'text', text: prefix.trimEnd() }],
+    }, ...messages]
+  }
+  const target = messages[userIdx]!
+  const merged: LLMMessage = {
+    role: 'user',
+    content: [{ type: 'text', text: prefix }, ...target.content],
+  }
+  return [...messages.slice(0, userIdx), merged, ...messages.slice(userIdx + 1)]
+}
+
 // ---------------------------------------------------------------------------
 // AgentRunner
 // ---------------------------------------------------------------------------
@@ -191,6 +220,10 @@ const ZERO_USAGE: TokenUsage = { input_tokens: 0, output_tokens: 0 }
  */
 export class AgentRunner {
   private readonly maxTurns: number
+  private summarizeCache: {
+    oldSignature: string
+    summaryPrefix: string
+  } | null = null
 
   constructor(
     private readonly adapter: LLMAdapter,
@@ -201,6 +234,172 @@ export class AgentRunner {
     this.maxTurns = options.maxTurns ?? 10
   }
 
+  private serializeMessage(message: LLMMessage): string {
+    return JSON.stringify(message)
+  }
+
+  private truncateToSlidingWindow(messages: LLMMessage[], maxTurns: number): LLMMessage[] {
+    if (maxTurns <= 0) {
+      return messages
+    }
+
+    const firstUserIndex = messages.findIndex(m => m.role === 'user')
+    const firstUser = firstUserIndex >= 0 ? messages[firstUserIndex]! : null
+    const afterFirst = firstUserIndex >= 0
+      ? messages.slice(firstUserIndex + 1)
+      : messages.slice()
+
+    if (afterFirst.length <= maxTurns * 2) {
+      return messages
+    }
+
+    const kept = afterFirst.slice(-maxTurns * 2)
+    const result: LLMMessage[] = []
+
+    if (firstUser !== null) {
+      result.push(firstUser)
+    }
+
+    const droppedPairs = Math.floor((afterFirst.length - kept.length) / 2)
+    if (droppedPairs > 0) {
+      const notice =
+        `[Earlier conversation history truncated — ${droppedPairs} turn(s) removed]\n\n`
+      result.push(...prependSyntheticPrefixToFirstUser(kept, notice))
+      return result
+    }
+
+    result.push(...kept)
+    return result
+  }
+
+  private async summarizeMessages(
+    messages: LLMMessage[],
+    maxTokens: number,
+    summaryModel: string | undefined,
+    baseChatOptions: LLMChatOptions,
+    turns: number,
+    options: RunOptions,
+  ): Promise<{ messages: LLMMessage[]; usage: TokenUsage }> {
+    const estimated = estimateTokens(messages)
+    if (estimated <= maxTokens || messages.length < 4) {
+      return { messages, usage: ZERO_USAGE }
+    }
+
+    const firstUserIndex = messages.findIndex(m => m.role === 'user')
+    if (firstUserIndex < 0 || firstUserIndex === messages.length - 1) {
+      return { messages, usage: ZERO_USAGE }
+    }
+
+    const firstUser = messages[firstUserIndex]!
+    const rest = messages.slice(firstUserIndex + 1)
+    if (rest.length < 2) {
+      return { messages, usage: ZERO_USAGE }
+    }
+
+    // Split on an even boundary so we never separate a tool_use assistant turn
+    // from its tool_result user message (rest is user/assistant pairs).
+    const splitAt = Math.max(2, Math.floor(rest.length / 4) * 2)
+    const oldPortion = rest.slice(0, splitAt)
+    const recentPortion = rest.slice(splitAt)
+
+    const oldSignature = oldPortion.map(m => this.serializeMessage(m)).join('\n')
+    if (this.summarizeCache !== null && this.summarizeCache.oldSignature === oldSignature) {
+      const mergedRecent = prependSyntheticPrefixToFirstUser(
+        recentPortion,
+        `${this.summarizeCache.summaryPrefix}\n\n`,
+      )
+      return { messages: [firstUser, ...mergedRecent], usage: ZERO_USAGE }
+    }
+
+    const summaryPrompt = [
+      'Summarize the following conversation history for an LLM.',
+      '- Preserve user goals, constraints, and decisions.',
+      '- Keep key tool outputs and unresolved questions.',
+      '- Use concise bullets.',
+      '- Do not fabricate details.',
+    ].join('\n')
+
+    const summaryInput: LLMMessage[] = [
+      {
+        role: 'user',
+        content: [
+          { type: 'text', text: summaryPrompt },
+          { type: 'text', text: `\n\nConversation:\n${oldSignature}` },
+        ],
+      },
+    ]
+
+    const summaryOptions: LLMChatOptions = {
+      ...baseChatOptions,
+      model: summaryModel ?? this.options.model,
+      tools: undefined,
+    }
+
+    const summaryStartMs = Date.now()
+    const summaryResponse = await this.adapter.chat(summaryInput, summaryOptions)
+    if (options.onTrace) {
+      const summaryEndMs = Date.now()
+      emitTrace(options.onTrace, {
+        type: 'llm_call',
+        runId: options.runId ?? '',
+        taskId: options.taskId,
+        agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
+        model: summaryOptions.model,
+        phase: 'summary',
+        turn: turns,
+        tokens: summaryResponse.usage,
+        startMs: summaryStartMs,
+        endMs: summaryEndMs,
+        durationMs: summaryEndMs - summaryStartMs,
+      })
+    }
+
+    const summaryText = extractText(summaryResponse.content).trim()
+    const summaryPrefix = summaryText.length > 0
+      ? `[Conversation summary]\n${summaryText}`
+      : '[Conversation summary unavailable]'
+
+    this.summarizeCache = { oldSignature, summaryPrefix }
+    const mergedRecent = prependSyntheticPrefixToFirstUser(
+      recentPortion,
+      `${summaryPrefix}\n\n`,
+    )
+    return {
+      messages: [firstUser, ...mergedRecent],
+      usage: summaryResponse.usage,
+    }
+  }
+
+  private async applyContextStrategy(
+    messages: LLMMessage[],
+    strategy: ContextStrategy,
+    baseChatOptions: LLMChatOptions,
+    turns: number,
+    options: RunOptions,
+  ): Promise<{ messages: LLMMessage[]; usage: TokenUsage }> {
+    if (strategy.type === 'sliding-window') {
+      return { messages: this.truncateToSlidingWindow(messages, strategy.maxTurns), usage: ZERO_USAGE }
+    }
+
+    if (strategy.type === 'summarize') {
+      return this.summarizeMessages(
+        messages,
+        strategy.maxTokens,
+        strategy.summaryModel,
+        baseChatOptions,
+        turns,
+        options,
+      )
+    }
+
+    const estimated = estimateTokens(messages)
+    const compressed = await strategy.compress(messages, estimated)
+    if (!Array.isArray(compressed) || compressed.length === 0) {
+      throw new Error('contextStrategy.custom.compress must return a non-empty LLMMessage[]')
+    }
+    return { messages: compressed, usage: ZERO_USAGE }
+  }
+
   // -------------------------------------------------------------------------
   // Tool resolution
   // -------------------------------------------------------------------------
@@ -313,7 +512,7 @@ export class AgentRunner {
     options: RunOptions = {},
   ): AsyncGenerator<StreamEvent> {
     // Working copy of the conversation — mutated as turns progress.
-    const conversationMessages: LLMMessage[] = [...initialMessages]
+    let conversationMessages: LLMMessage[] = [...initialMessages]
 
     // Accumulated state across all turns.
     let totalUsage: TokenUsage = ZERO_USAGE
@@ -363,6 +562,19 @@ export class AgentRunner {
 
         turns++
 
+        // Optionally compact context before each LLM call after the first turn.
+        if (this.options.contextStrategy && turns > 1) {
+          const compacted = await this.applyContextStrategy(
+            conversationMessages,
+            this.options.contextStrategy,
+            baseChatOptions,
+            turns,
+            options,
+          )
+          conversationMessages = compacted.messages
+          totalUsage = addTokenUsage(totalUsage, compacted.usage)
+        }
+
         // ------------------------------------------------------------------
         // Step 1: Call the LLM and collect the full response for this turn.
         // ------------------------------------------------------------------
@@ -376,6 +588,7 @@ export class AgentRunner {
             taskId: options.taskId,
             agent: options.traceAgent ?? this.options.agentName ?? 'unknown',
             model: this.options.model,
+            phase: 'turn',
             turn: turns,
             tokens: response.usage,
             startMs: llmStartMs,
diff --git a/src/index.ts b/src/index.ts
index bc28b96..0d1b16d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -153,6 +153,7 @@ export type {
   ToolCallRecord,
   LoopDetectionConfig,
   LoopDetectionInfo,
+  ContextStrategy,
 
   // Team
   TeamConfig,
diff --git a/src/types.ts b/src/types.ts
index 98f0397..2864400 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -65,6 +65,18 @@ export interface LLMMessage {
   readonly content: ContentBlock[]
 }
 
+/** Context management strategy for long-running agent conversations. */
+export type ContextStrategy =
+  | { type: 'sliding-window'; maxTurns: number }
+  | { type: 'summarize'; maxTokens: number; summaryModel?: string }
+  | {
+    type: 'custom'
+    compress: (
+      messages: LLMMessage[],
+      estimatedTokens: number,
+    ) => Promise<LLMMessage[]> | LLMMessage[]
+  }
+
 /** Token accounting for a single API call. */
 export interface TokenUsage {
   readonly input_tokens: number
@@ -215,6 +227,8 @@ export interface AgentConfig {
   readonly maxTokens?: number
   /** Maximum cumulative tokens (input + output) allowed for this run. */
   readonly maxTokenBudget?: number
+  /** Optional context compression policy to control input growth across turns. */
+  readonly contextStrategy?: ContextStrategy
   readonly temperature?: number
   /**
    * Maximum wall-clock time (in milliseconds) for the entire agent run.
@@ -487,6 +501,8 @@ export interface TraceEventBase {
 export interface LLMCallTrace extends TraceEventBase {
   readonly type: 'llm_call'
   readonly model: string
+  /** Distinguishes normal turn calls from context-summary calls. */
+  readonly phase?: 'turn' | 'summary'
   readonly turn: number
   readonly tokens: TokenUsage
 }
diff --git a/src/utils/tokens.ts b/src/utils/tokens.ts
new file mode 100644
index 0000000..1dc57cf
--- /dev/null
+++ b/src/utils/tokens.ts
@@ -0,0 +1,27 @@
+import type { LLMMessage } from '../types.js'
+
+/**
+ * Estimate token count using a lightweight character heuristic.
+ * This intentionally avoids model-specific tokenizer dependencies.
+ */
+export function estimateTokens(messages: LLMMessage[]): number {
+  let chars = 0
+
+  for (const message of messages) {
+    for (const block of message.content) {
+      if (block.type === 'text') {
+        chars += block.text.length
+      } else if (block.type === 'tool_result') {
+        chars += block.content.length
+      } else if (block.type === 'tool_use') {
+        chars += JSON.stringify(block.input).length
+      } else if (block.type === 'image') {
+        // Account for non-text payloads with a small fixed cost.
+        chars += 64
+      }
+    }
+  }
+
+  // Conservative English heuristic: ~4 chars per token.
+  return Math.ceil(chars / 4)
+}
diff --git a/tests/context-strategy.test.ts b/tests/context-strategy.test.ts
new file mode 100644
index 0000000..7c847b0
--- /dev/null
+++ b/tests/context-strategy.test.ts
@@ -0,0 +1,202 @@
+import { describe, it, expect, vi } from 'vitest'
+import { z } from 'zod'
+import { AgentRunner } from '../src/agent/runner.js'
+import { ToolRegistry, defineTool } from '../src/tool/framework.js'
+import { ToolExecutor } from '../src/tool/executor.js'
+import type { LLMAdapter, LLMChatOptions, LLMMessage, LLMResponse, TraceEvent } from '../src/types.js'
+
+function textResponse(text: string): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [{ type: 'text', text }],
+    model: 'mock-model',
+    stop_reason: 'end_turn',
+    usage: { input_tokens: 10, output_tokens: 20 },
+  }
+}
+
+function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
+  return {
+    id: `resp-${Math.random().toString(36).slice(2)}`,
+    content: [{
+      type: 'tool_use',
+      id: `tu-${Math.random().toString(36).slice(2)}`,
+      name: toolName,
+      input,
+    }],
+    model: 'mock-model',
+    stop_reason: 'tool_use',
+    usage: { input_tokens: 15, output_tokens: 25 },
+  }
+}
+
+function buildRegistryAndExecutor(): { registry: ToolRegistry; executor: ToolExecutor } {
+  const registry = new ToolRegistry()
+  registry.register(
+    defineTool({
+      name: 'echo',
+      description: 'Echo input',
+      inputSchema: z.object({ message: z.string() }),
+      async execute({ message }) {
+        return { data: message }
+      },
+    }),
+  )
+  return { registry, executor: new ToolExecutor(registry) }
+}
+
+describe('AgentRunner contextStrategy', () => {
+  it('keeps baseline behavior when contextStrategy is not set', async () => {
+    const calls: LLMMessage[][] = []
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: m.content })))
+        return calls.length === 1
+          ? toolUseResponse('echo', { message: 'hello' })
+          : textResponse('done')
+      },
+      async *stream() {
+        /* unused */
+      },
+    }
+    const { registry, executor } = buildRegistryAndExecutor()
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 4,
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'start' }] }])
+    expect(calls).toHaveLength(2)
+    expect(calls[0]).toHaveLength(1)
+    expect(calls[1]!.length).toBeGreaterThan(calls[0]!.length)
+  })
+
+  it('sliding-window truncates old turns and preserves the first user message', async () => {
+    const calls: LLMMessage[][] = []
+    const responses = [
+      toolUseResponse('echo', { message: 't1' }),
+      toolUseResponse('echo', { message: 't2' }),
+      toolUseResponse('echo', { message: 't3' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: m.content })))
+        return responses[idx++]!
+      },
+      async *stream() {
+        /* unused */
+      },
+    }
+    const { registry, executor } = buildRegistryAndExecutor()
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 8,
+      contextStrategy: { type: 'sliding-window', maxTurns: 1 },
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'original prompt' }] }])
+
+    const laterCall = calls[calls.length - 1]!
+    const firstUserText = laterCall[0]!.content[0]
+    expect(firstUserText).toMatchObject({ type: 'text', text: 'original prompt' })
+    const flattenedText = laterCall.flatMap(m => m.content.filter(c => c.type === 'text'))
+    expect(flattenedText.some(c => c.type === 'text' && c.text.includes('truncated'))).toBe(true)
+  })
+
+  it('summarize strategy replaces old context and emits summary trace call', async () => {
+    const calls: Array<{ messages: LLMMessage[]; options: LLMChatOptions }> = []
+    const traces: TraceEvent[] = []
+    const responses = [
+      toolUseResponse('echo', { message: 'first turn payload '.repeat(20) }),
+      toolUseResponse('echo', { message: 'second turn payload '.repeat(20) }),
+      textResponse('This is a concise summary.'),
+      textResponse('final answer'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages, options) {
+        calls.push({ messages: messages.map(m => ({ role: m.role, content: m.content })), options })
+        return responses[idx++]!
+      },
+      async *stream() {
+        /* unused */
+      },
+    }
+    const { registry, executor } = buildRegistryAndExecutor()
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 8,
+      contextStrategy: { type: 'summarize', maxTokens: 20 },
+    })
+
+    const result = await runner.run(
+      [{ role: 'user', content: [{ type: 'text', text: 'start' }] }],
+      { onTrace: (e) => { traces.push(e) }, runId: 'run-summary', traceAgent: 'context-agent' },
+    )
+
+    const summaryCall = calls.find(c => c.messages.length === 1 && c.options.tools === undefined)
+    expect(summaryCall).toBeDefined()
+    const llmTraces = traces.filter(t => t.type === 'llm_call')
+    expect(llmTraces.some(t => t.type === 'llm_call' && t.phase === 'summary')).toBe(true)
+
+    // Summary adapter usage must count toward RunResult.tokenUsage (maxTokenBudget).
+    expect(result.tokenUsage.input_tokens).toBe(15 + 15 + 10 + 10)
+    expect(result.tokenUsage.output_tokens).toBe(25 + 25 + 20 + 20)
+
+    // After compaction, summary text is folded into the next user turn (not a
+    // standalone user message), preserving user/assistant alternation.
+    const turnAfterSummary = calls.find(
+      c => c.messages.some(
+        m => m.role === 'user' && m.content.some(
+          b => b.type === 'text' && b.text.includes('[Conversation summary]'),
+        ),
+      ),
+    )
+    expect(turnAfterSummary).toBeDefined()
+    const rolesAfterFirstUser = turnAfterSummary!.messages.map(m => m.role).join(',')
+    expect(rolesAfterFirstUser).not.toMatch(/^user,user/)
+  })
+
+  it('custom strategy calls compress callback and uses returned messages', async () => {
+    const compress = vi.fn((messages: LLMMessage[]) => messages.slice(-1))
+    const calls: LLMMessage[][] = []
+    const responses = [
+      toolUseResponse('echo', { message: 'hello' }),
+      textResponse('done'),
+    ]
+    let idx = 0
+    const adapter: LLMAdapter = {
+      name: 'mock',
+      async chat(messages) {
+        calls.push(messages.map(m => ({ role: m.role, content: m.content })))
+        return responses[idx++]!
+      },
+      async *stream() {
+        /* unused */
+      },
+    }
+    const { registry, executor } = buildRegistryAndExecutor()
+    const runner = new AgentRunner(adapter, registry, executor, {
+      model: 'mock-model',
+      allowedTools: ['echo'],
+      maxTurns: 4,
+      contextStrategy: {
+        type: 'custom',
+        compress,
+      },
+    })
+
+    await runner.run([{ role: 'user', content: [{ type: 'text', text: 'custom prompt' }] }])
+
+    expect(compress).toHaveBeenCalledOnce()
+    expect(calls[1]).toHaveLength(1)
+  })
+})