From 2fb146d583a69a2e17e47b28353a2f25717f6e7c Mon Sep 17 00:00:00 2001
From: guunergooner <tongchao0923@gmail.com>
Date: Wed, 8 Apr 2026 19:43:13 +0800
Subject: [PATCH 1/2] feat: Add DeepSeek thinking mode support for OpenAI
 compatibility layer

Support DeepSeek reasoning models (deepseek-reasoner and DeepSeek-V3.2)
in OpenAI-compatible API layer with automatic thinking mode detection.

Key changes:
- Add `isOpenAIThinkingEnabled()` for auto-detecting thinking-capable models
- Inject thinking parameters in request body (both official API and vLLM formats)
- Preserve reasoning_content in message conversion for tool call iterations
- Strip reasoning_content from previous turns to save bandwidth
- Add comprehensive test coverage for thinking mode detection

Thinking mode is enabled when:
1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
2. Model name contains "deepseek-reasoner" or "deepseek-v3.2" (auto-detect)

Signed-off-by: guunergooner <tongchao0923@gmail.com>
---
 .../openai/__tests__/convertMessages.test.ts  | 206 ++++++++++++++++++
 .../api/openai/__tests__/thinking.test.ts     | 192 ++++++++++++++++
 src/services/api/openai/convertMessages.ts    |  65 +++++-
 src/services/api/openai/index.ts              |  59 ++++-
 4 files changed, 509 insertions(+), 13 deletions(-)
 create mode 100644 src/services/api/openai/__tests__/thinking.test.ts

diff --git a/src/services/api/openai/__tests__/convertMessages.test.ts b/src/services/api/openai/__tests__/convertMessages.test.ts
index 0ea52757b..39811c7c8 100644
--- a/src/services/api/openai/__tests__/convertMessages.test.ts
+++ b/src/services/api/openai/__tests__/convertMessages.test.ts
@@ -249,3 +249,209 @@ describe('anthropicMessagesToOpenAI', () => {
     )
   })
 })
+
+describe('DeepSeek thinking mode (enableThinking)', () => {
+  test('preserves thinking block as reasoning_content when enabled', () => {
+    const result = anthropicMessagesToOpenAI(
+      [makeUserMsg('question'), makeAssistantMsg([
+        { type: 'thinking' as const, thinking: 'Let me reason about this...' },
+        { type: 'text', text: 'The answer is 42.' },
+      ])],
+      [] as any,
+      { enableThinking: true },
+    )
+    // Should have: user, assistant with reasoning_content
+    expect(result).toHaveLength(2)
+    expect(result[0].role).toBe('user')
+    const assistant = result[1] as any
+    expect(assistant.role).toBe('assistant')
+    expect(assistant.content).toBe('The answer is 42.')
+    expect(assistant.reasoning_content).toBe('Let me reason about this...')
+  })
+
+  test('drops thinking block when enableThinking is false (default)', () => {
+    const result = anthropicMessagesToOpenAI(
+      [makeAssistantMsg([
+        { type: 'thinking' as const, thinking: 'internal thoughts...' },
+        { type: 'text', text: 'visible response' },
+      ])],
+      [] as any,
+    )
+    const assistant = result[0] as any
+    expect(assistant.content).toBe('visible response')
+    expect(assistant.reasoning_content).toBeUndefined()
+  })
+
+  test('preserves reasoning_content with tool_calls in same turn', () => {
+    const result = anthropicMessagesToOpenAI(
+      [
+        makeUserMsg('what is the weather?'),
+        makeAssistantMsg([
+          { type: 'thinking' as const, thinking: 'I need to call the weather tool.' },
+          { type: 'text', text: '' },
+          {
+            type: 'tool_use' as const,
+            id: 'toolu_001',
+            name: 'get_weather',
+            input: { location: 'Hangzhou' },
+          },
+        ]),
+        makeUserMsg([
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'toolu_001',
+            content: 'Cloudy 7~13°C',
+          },
+        ]),
+      ],
+      [] as any,
+      { enableThinking: true },
+    )
+
+    // Find the assistant message
+    const assistants = result.filter(m => m.role === 'assistant')
+    expect(assistants.length).toBe(1)
+    const assistant = assistants[0] as any
+    expect(assistant.reasoning_content).toBe('I need to call the weather tool.')
+    expect(assistant.tool_calls).toBeDefined()
+    expect(assistant.tool_calls[0].function.name).toBe('get_weather')
+  })
+
+  test('strips reasoning_content from previous turns', () => {
+    const result = anthropicMessagesToOpenAI(
+      [
+        // Turn 1: user → assistant (with thinking)
+        makeUserMsg('question 1'),
+        makeAssistantMsg([
+          { type: 'thinking' as const, thinking: 'Turn 1 reasoning...' },
+          { type: 'text', text: 'Turn 1 answer' },
+        ]),
+        // Turn 2: new user message → previous reasoning should be stripped
+        makeUserMsg('question 2'),
+        makeAssistantMsg([
+          { type: 'thinking' as const, thinking: 'Turn 2 reasoning...' },
+          { type: 'text', text: 'Turn 2 answer' },
+        ]),
+      ],
+      [] as any,
+      { enableThinking: true },
+    )
+
+    const assistants = result.filter(m => m.role === 'assistant')
+    // Turn 1 assistant: reasoning should be stripped (previous turn)
+    expect((assistants[0] as any).reasoning_content).toBeUndefined()
+    expect((assistants[0] as any).content).toBe('Turn 1 answer')
+    // Turn 2 assistant: reasoning should be preserved (current turn)
+    expect((assistants[1] as any).reasoning_content).toBe('Turn 2 reasoning...')
+    expect((assistants[1] as any).content).toBe('Turn 2 answer')
+  })
+
+  test('preserves reasoning_content in multi-iteration tool call within same turn', () => {
+    // Simulates a full DeepSeek tool call iteration:
+    // user → assistant(thinking+tool_call) → tool_result → assistant(thinking+tool_call) → tool_result → assistant(thinking+text)
+    const result = anthropicMessagesToOpenAI(
+      [
+        makeUserMsg("tomorrow's weather in Hangzhou"),
+        // Iteration 1: thinking + tool call
+        makeAssistantMsg([
+          { type: 'thinking' as const, thinking: 'I need the date first.' },
+          {
+            type: 'tool_use' as const,
+            id: 'toolu_001',
+            name: 'get_date',
+            input: {},
+          },
+        ]),
+        makeUserMsg([
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'toolu_001',
+            content: '2026-04-08',
+          },
+        ]),
+        // Iteration 2: thinking + tool call
+        makeAssistantMsg([
+          { type: 'thinking' as const, thinking: 'Now I can get the weather.' },
+          {
+            type: 'tool_use' as const,
+            id: 'toolu_002',
+            name: 'get_weather',
+            input: { location: 'Hangzhou', date: '2026-04-08' },
+          },
+        ]),
+        makeUserMsg([
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'toolu_002',
+            content: 'Cloudy 7~13°C',
+          },
+        ]),
+        // Iteration 3: thinking + final answer
+        makeAssistantMsg([
+          { type: 'thinking' as const, thinking: 'I have the info now.' },
+          { type: 'text', text: 'Tomorrow will be cloudy, 7-13°C.' },
+        ]),
+      ],
+      [] as any,
+      { enableThinking: true },
+    )
+
+    // All 3 assistant messages are in the current turn (after last user msg is the last tool_result,
+    // but the "last user message" boundary logic finds the last user-typed message).
+    // Actually, tool_result messages are also UserMessage type, so the last user message
+    // is the one with tool_result for toolu_002. All assistant messages after that should have reasoning.
+    const assistants = result.filter(m => m.role === 'assistant')
+    expect(assistants.length).toBe(3)
+    // All iterations within the same turn preserve reasoning
+    expect((assistants[0] as any).reasoning_content).toBe('I need the date first.')
+    expect((assistants[1] as any).reasoning_content).toBe('Now I can get the weather.')
+    expect((assistants[2] as any).reasoning_content).toBe('I have the info now.')
+  })
+
+  test('handles multiple thinking blocks in single assistant message', () => {
+    const result = anthropicMessagesToOpenAI(
+      [makeUserMsg('question'), makeAssistantMsg([
+        { type: 'thinking' as const, thinking: 'First thought.' },
+        { type: 'thinking' as const, thinking: 'Second thought.' },
+        { type: 'text', text: 'Final answer.' },
+      ])],
+      [] as any,
+      { enableThinking: true },
+    )
+    const assistant = result.filter(m => m.role === 'assistant')[0] as any
+    expect(assistant.reasoning_content).toBe('First thought.\nSecond thought.')
+  })
+
+  test('skips empty thinking blocks', () => {
+    const result = anthropicMessagesToOpenAI(
+      [makeUserMsg('question'), makeAssistantMsg([
+        { type: 'thinking' as const, thinking: '' },
+        { type: 'text', text: 'Answer.' },
+      ])],
+      [] as any,
+      { enableThinking: true },
+    )
+    const assistant = result.filter(m => m.role === 'assistant')[0] as any
+    expect(assistant.reasoning_content).toBeUndefined()
+  })
+
+  test('sets content to null when only thinking and tool_calls present', () => {
+    const result = anthropicMessagesToOpenAI(
+      [makeUserMsg('question'), makeAssistantMsg([
+        { type: 'thinking' as const, thinking: 'Reasoning only.' },
+        {
+          type: 'tool_use' as const,
+          id: 'toolu_001',
+          name: 'bash',
+          input: { command: 'ls' },
+        },
+      ])],
+      [] as any,
+      { enableThinking: true },
+    )
+    const assistant = result.filter(m => m.role === 'assistant')[0] as any
+    expect(assistant.content).toBeNull()
+    expect(assistant.reasoning_content).toBe('Reasoning only.')
+    expect(assistant.tool_calls).toHaveLength(1)
+  })
+})
diff --git a/src/services/api/openai/__tests__/thinking.test.ts b/src/services/api/openai/__tests__/thinking.test.ts
new file mode 100644
index 000000000..f079ae052
--- /dev/null
+++ b/src/services/api/openai/__tests__/thinking.test.ts
@@ -0,0 +1,192 @@
+import { describe, expect, test, beforeEach, afterEach } from 'bun:test'
+import { isOpenAIThinkingEnabled } from '../index.js'
+
+describe('isOpenAIThinkingEnabled', () => {
+  const originalEnv = {
+    OPENAI_ENABLE_THINKING: process.env.OPENAI_ENABLE_THINKING,
+  }
+
+  beforeEach(() => {
+    // Clear env var before each test
+    delete process.env.OPENAI_ENABLE_THINKING
+  })
+
+  afterEach(() => {
+    // Restore original env var
+    process.env.OPENAI_ENABLE_THINKING = originalEnv.OPENAI_ENABLE_THINKING
+  })
+
+  describe('OPENAI_ENABLE_THINKING env var', () => {
+    test('returns true when OPENAI_ENABLE_THINKING=1', () => {
+      process.env.OPENAI_ENABLE_THINKING = '1'
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
+    })
+
+    test('returns true when OPENAI_ENABLE_THINKING=true', () => {
+      process.env.OPENAI_ENABLE_THINKING = 'true'
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
+    })
+
+    test('returns true when OPENAI_ENABLE_THINKING=yes', () => {
+      process.env.OPENAI_ENABLE_THINKING = 'yes'
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
+    })
+
+    test('returns true when OPENAI_ENABLE_THINKING=on', () => {
+      process.env.OPENAI_ENABLE_THINKING = 'on'
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
+    })
+
+    test('returns true when OPENAI_ENABLE_THINKING=TRUE (case insensitive)', () => {
+      process.env.OPENAI_ENABLE_THINKING = 'TRUE'
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
+    })
+
+    test('returns false when OPENAI_ENABLE_THINKING=0', () => {
+      process.env.OPENAI_ENABLE_THINKING = '0'
+      expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
+    })
+
+    test('returns false when OPENAI_ENABLE_THINKING=false', () => {
+      process.env.OPENAI_ENABLE_THINKING = 'false'
+      expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
+    })
+
+    test('returns false when OPENAI_ENABLE_THINKING is empty', () => {
+      process.env.OPENAI_ENABLE_THINKING = ''
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
+    })
+
+    test('returns false when OPENAI_ENABLE_THINKING is not set', () => {
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
+    })
+  })
+
+  describe('model name auto-detect', () => {
+    test('returns true when model name is "deepseek-reasoner"', () => {
+      expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(true)
+    })
+
+    test('returns true when model name contains "deepseek-reasoner" (case insensitive)', () => {
+      expect(isOpenAIThinkingEnabled('DeepSeek-Reasoner')).toBe(true)
+    })
+
+    test('returns true when model name has prefix/suffix for deepseek-reasoner', () => {
+      expect(isOpenAIThinkingEnabled('my-deepseek-reasoner-v1')).toBe(true)
+    })
+
+    test('returns true when model name is namespaced for deepseek-reasoner', () => {
+      expect(isOpenAIThinkingEnabled('TokenService/deepseek-reasoner')).toBe(true)
+    })
+
+    test('returns true when model name is "deepseek-v3.2"', () => {
+      expect(isOpenAIThinkingEnabled('deepseek-v3.2')).toBe(true)
+    })
+
+    test('returns true when model name contains "deepseek-v3.2" (case insensitive)', () => {
+      expect(isOpenAIThinkingEnabled('DeepSeek-V3.2')).toBe(true)
+    })
+
+    test('returns true when model name has prefix/suffix for deepseek-v3.2', () => {
+      expect(isOpenAIThinkingEnabled('my-deepseek-v3.2-v1')).toBe(true)
+    })
+
+    test('returns true when model name is namespaced for deepseek-v3.2', () => {
+      expect(isOpenAIThinkingEnabled('TokenService/deepseek-v3.2')).toBe(true)
+    })
+
+    test('returns false when model name is "deepseek-chat"', () => {
+      expect(isOpenAIThinkingEnabled('deepseek-chat')).toBe(false)
+    })
+
+    test('returns false when model name is "deepseek-v3"', () => {
+      expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(false)
+    })
+
+    test('returns false when model name contains "deepseek" but not "reasoner" or "v3.2"', () => {
+      expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(false)
+    })
+
+    test('returns false when model name is "gpt-4o"', () => {
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
+    })
+
+    test('returns false when model name is empty', () => {
+      expect(isOpenAIThinkingEnabled('')).toBe(false)
+    })
+  })
+
+  describe('priority and combined detection', () => {
+    test('OPENAI_ENABLE_THINKING=1 enables thinking for any model', () => {
+      process.env.OPENAI_ENABLE_THINKING = '1'
+      expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true)
+      expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(true)
+    })
+
+    test('OPENAI_ENABLE_THINKING=false disables thinking even for deepseek-reasoner', () => {
+      process.env.OPENAI_ENABLE_THINKING = 'false'
+      expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
+    })
+
+    test('OPENAI_ENABLE_THINKING=0 disables thinking even for deepseek-reasoner', () => {
+      process.env.OPENAI_ENABLE_THINKING = '0'
+      expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false)
+    })
+
+    test('both conditions can enable thinking', () => {
+      process.env.OPENAI_ENABLE_THINKING = '1'
+      expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(true)
+    })
+  })
+})
+
+describe('thinking request parameters', () => {
+  // Note: These tests verify the request body structure indirectly.
+  // The actual API call is mocked in integration tests.
+  // Here we document the expected parameter formats:
+
+  test('documents official DeepSeek API format: thinking: { type: "enabled" }', () => {
+    // Official DeepSeek API expects:
+    const officialFormat = {
+      thinking: { type: 'enabled' },
+    }
+    expect(officialFormat.thinking.type).toBe('enabled')
+  })
+
+  test('documents vLLM/self-hosted format: enable_thinking + chat_template_kwargs', () => {
+    // Self-hosted DeepSeek-V3.2/vLLM expects:
+    const vllmFormat = {
+      enable_thinking: true,
+      chat_template_kwargs: { thinking: true },
+    }
+    expect(vllmFormat.enable_thinking).toBe(true)
+    expect(vllmFormat.chat_template_kwargs.thinking).toBe(true)
+  })
+
+  test('both formats are added simultaneously when thinking is enabled', () => {
+    // The implementation adds both formats so each endpoint
+    // can use the one it recognizes:
+    const combinedFormat = {
+      // Official DeepSeek API format
+      thinking: { type: 'enabled' },
+      // Self-hosted DeepSeek-V3.2/vLLM format
+      enable_thinking: true,
+      chat_template_kwargs: { thinking: true },
+    }
+    expect(combinedFormat.thinking.type).toBe('enabled')
+    expect(combinedFormat.enable_thinking).toBe(true)
+    expect(combinedFormat.chat_template_kwargs.thinking).toBe(true)
+  })
+
+  test('thinking params are NOT added when thinking is disabled', () => {
+    // When thinking is disabled, none of these params should be present:
+    const disabledFormat = {
+      model: 'gpt-4o',
+      messages: [],
+      stream: true,
+    }
+    expect((disabledFormat as any).thinking).toBeUndefined()
+    expect((disabledFormat as any).enable_thinking).toBeUndefined()
+    expect((disabledFormat as any).chat_template_kwargs).toBeUndefined()
+  })
+})
\ No newline at end of file
diff --git a/src/services/api/openai/convertMessages.ts b/src/services/api/openai/convertMessages.ts
index 3869120eb..623eeb234 100644
--- a/src/services/api/openai/convertMessages.ts
+++ b/src/services/api/openai/convertMessages.ts
@@ -13,6 +13,12 @@ import type {
 import type { AssistantMessage, UserMessage } from '../../../types/message.js'
 import type { SystemPrompt } from '../../../utils/systemPromptType.js'
 
+export interface ConvertMessagesOptions {
+  /** When true, preserve thinking blocks as reasoning_content on assistant messages
+   *  (required for DeepSeek thinking mode with tool calls). */
+  enableThinking?: boolean
+}
+
 /**
  * Convert internal (UserMessage | AssistantMessage)[] to OpenAI-format messages.
  *
@@ -20,14 +26,16 @@ import type { SystemPrompt } from '../../../utils/systemPromptType.js'
  * - system prompt → role: "system" message prepended
  * - tool_use blocks → tool_calls[] on assistant message
  * - tool_result blocks → role: "tool" messages
- * - thinking blocks → silently dropped
+ * - thinking blocks → silently dropped (or preserved as reasoning_content when enableThinking=true)
  * - cache_control → stripped
  */
 export function anthropicMessagesToOpenAI(
   messages: (UserMessage | AssistantMessage)[],
   systemPrompt: SystemPrompt,
+  options?: ConvertMessagesOptions,
 ): ChatCompletionMessageParam[] {
   const result: ChatCompletionMessageParam[] = []
+  const enableThinking = options?.enableThinking ?? false
 
   // Prepend system prompt as system message
   const systemText = systemPromptToText(systemPrompt)
@@ -38,13 +46,42 @@ export function anthropicMessagesToOpenAI(
     } satisfies ChatCompletionSystemMessageParam)
   }
 
-  for (const msg of messages) {
+  // When thinking mode is on, detect turn boundaries so that reasoning_content
+  // from *previous* user turns is stripped (saves bandwidth; DeepSeek ignores it).
+  // A "new turn" starts when a user text message appears after at least one assistant response.
+  const turnBoundaries = new Set<number>()
+  if (enableThinking) {
+    let hasSeenAssistant = false
+    for (let i = 0; i < messages.length; i++) {
+      const msg = messages[i]
+      if (msg.type === 'assistant') {
+        hasSeenAssistant = true
+      }
+      if (msg.type === 'user' && hasSeenAssistant) {
+        const content = msg.message.content
+        const hasText = typeof content === 'string'
+          ? content.length > 0
+          : Array.isArray(content) && content.some(
+              (b: any) => typeof b === 'string' || b.type === 'text',
+            )
+        if (hasText) {
+          turnBoundaries.add(i)
+        }
+      }
+    }
+  }
+
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i]
     switch (msg.type) {
       case 'user':
         result.push(...convertInternalUserMessage(msg))
         break
       case 'assistant':
-        result.push(...convertInternalAssistantMessage(msg))
+        // Preserve reasoning_content unless we're before a turn boundary
+        // (i.e., from a previous user Q&A round)
+        const preserveReasoning = enableThinking && !isBeforeAnyTurnBoundary(i, turnBoundaries)
+        result.push(...convertInternalAssistantMessage(msg, preserveReasoning))
         break
       default:
         break
@@ -61,6 +98,17 @@ function systemPromptToText(systemPrompt: SystemPrompt): string {
     .join('\n\n')
 }
 
+/**
+ * Check if index `i` falls before any turn boundary (i.e. it belongs to a previous turn).
+ * A message at index i is "before" a boundary if there exists a boundary j where i < j.
+ */
+function isBeforeAnyTurnBoundary(i: number, boundaries: Set<number>): boolean {
+  for (const b of boundaries) {
+    if (i < b) return true
+  }
+  return false
+}
+
 function convertInternalUserMessage(
   msg: UserMessage,
 ): ChatCompletionMessageParam[] {
@@ -151,6 +199,7 @@ function convertToolResult(
 
 function convertInternalAssistantMessage(
   msg: AssistantMessage,
+  preserveReasoning = false,
 ): ChatCompletionMessageParam[] {
   const content = msg.message.content
 
@@ -174,6 +223,7 @@ function convertInternalAssistantMessage(
 
   const textParts: string[] = []
   const toolCalls: NonNullable<ChatCompletionAssistantMessageParam['tool_calls']> = []
+  const reasoningParts: string[] = []
 
   for (const block of content) {
     if (typeof block === 'string') {
@@ -191,14 +241,21 @@ function convertInternalAssistantMessage(
             typeof tu.input === 'string' ? tu.input : JSON.stringify(tu.input),
         },
       })
+    } else if (block.type === 'thinking' && preserveReasoning) {
+      // DeepSeek thinking mode: preserve reasoning_content for tool call iterations
+      const thinkingText = (block as Record<string, unknown>).thinking
+      if (typeof thinkingText === 'string' && thinkingText) {
+        reasoningParts.push(thinkingText)
+      }
     }
-    // Skip thinking, redacted_thinking, server_tool_use, etc.
+    // Skip redacted_thinking, server_tool_use, etc.
   }
 
   const result: ChatCompletionAssistantMessageParam = {
     role: 'assistant',
     content: textParts.length > 0 ? textParts.join('\n') : null,
     ...(toolCalls.length > 0 && { tool_calls: toolCalls }),
+    ...(reasoningParts.length > 0 && { reasoning_content: reasoningParts.join('\n') }),
   }
 
   return [result]
diff --git a/src/services/api/openai/index.ts b/src/services/api/openai/index.ts
index 251e89f7d..9c90d79c8 100644
--- a/src/services/api/openai/index.ts
+++ b/src/services/api/openai/index.ts
@@ -24,6 +24,7 @@ import {
 import { logForDebugging } from '../../../utils/debug.js'
 import { addToTotalSessionCost } from '../../../cost-tracker.js'
 import { calculateUSDCost } from '../../../utils/modelCost.js'
+import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js'
 import type { Options } from '../claude.js'
 import { randomUUID } from 'crypto'
 import {
@@ -39,6 +40,29 @@ import {
   TOOL_SEARCH_TOOL_NAME,
 } from '../../../tools/ToolSearchTool/prompt.js'
 
+/**
+ * Detect whether DeepSeek-style thinking mode should be enabled.
+ *
+ * Enabled when:
+ * 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
+ * 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive)
+ *
+ * Disabled when:
+ * - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection)
+ *
+ * @param model - The resolved OpenAI model name
+ * @internal Exported for testing purposes only
+ */
+export function isOpenAIThinkingEnabled(model: string): boolean {
+  // Explicit disable takes priority (overrides model auto-detect)
+  if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false
+  // Explicit enable
+  if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true
+  // Auto-detect from model name (deepseek-reasoner and DeepSeek-V3.2 support thinking mode)
+  const modelLower = model.toLowerCase()
+  return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
+}
+
 /**
  * OpenAI-compatible query path. Converts Anthropic-format messages/tools to
  * OpenAI format, calls the OpenAI-compatible endpoint, and converts the
@@ -120,10 +144,10 @@ export async function* queryModelOpenAI(
     )
 
     // 8. Convert messages and tools to OpenAI format
-    const openaiMessages = anthropicMessagesToOpenAI(
-      messagesForAPI,
-      systemPrompt,
-    )
+    const enableThinking = isOpenAIThinkingEnabled(openaiModel)
+    const openaiMessages = anthropicMessagesToOpenAI(messagesForAPI, systemPrompt, {
+      enableThinking,
+    })
     const openaiTools = anthropicToolsToOpenAI(standardTools)
     const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice)
 
@@ -149,10 +173,16 @@ export async function* queryModelOpenAI(
     })
 
     logForDebugging(
-      `[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}`,
+      `[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}, thinking=${enableThinking}`,
     )
 
     // 11. Call OpenAI API with streaming
+    // DeepSeek thinking mode: inject thinking params via request body.
+    // Two formats are added simultaneously to support different deployments:
+    // - Official DeepSeek API: `thinking: { type: 'enabled' }`
+    // - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
+    // OpenAI SDK passes unknown keys through to the HTTP body.
+    // Each endpoint will use the format it recognizes and ignore the others.
     const stream = await client.chat.completions.create(
       {
         model: openaiModel,
@@ -163,7 +193,18 @@ export async function* queryModelOpenAI(
         }),
         stream: true,
         stream_options: { include_usage: true },
-        ...(options.temperatureOverride !== undefined && {
+        // DeepSeek thinking mode: enable chain-of-thought output.
+        // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
+        ...(enableThinking && {
+          // Official DeepSeek API format
+          thinking: { type: 'enabled' },
+          // Self-hosted DeepSeek-V3.2 format
+          enable_thinking: true,
+          chat_template_kwargs: { thinking: true },
+        }),
+        // Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
+        // but other providers may respect it)
+        ...(!enableThinking && options.temperatureOverride !== undefined && {
           temperature: options.temperatureOverride,
         }),
       },
@@ -172,8 +213,8 @@ export async function* queryModelOpenAI(
       },
     )
 
-    // 7. Convert OpenAI stream to Anthropic events, then process into
-    //    AssistantMessage + StreamEvent (matching the Anthropic path behavior)
+    // 12. Convert OpenAI stream to Anthropic events, then process into
+    //     AssistantMessage + StreamEvent (matching the Anthropic path behavior)
     const adaptedStream = adaptOpenAIStreamToAnthropic(stream, openaiModel)
 
     // Accumulate content blocks and usage, same as the Anthropic path in claude.ts
@@ -287,4 +328,4 @@ export async function* queryModelOpenAI(
       error: error instanceof Error ? error : new Error(String(error)),
     })
   }
-}
+}
\ No newline at end of file

From be38691f68a82fe6a73b19c66322db21cec3f7a2 Mon Sep 17 00:00:00 2001
From: guunergooner <tongchao0923@gmail.com>
Date: Wed, 8 Apr 2026 20:08:49 +0800
Subject: [PATCH 2/2] fix: Address PR #206 review feedback for OpenAI thinking
 mode

- Fix afterEach env var leak: use delete instead of assigning undefined
- Fix turn boundary detection: treat multimodal inputs (e.g. images)
  as new turns, not just text content
- Extract buildOpenAIRequestBody() for testability, replace constant-
  assertion tests with real function output verification

Signed-off-by: guunergooner <tongchao0923@gmail.com>
---
 .../api/openai/__tests__/thinking.test.ts     | 122 +++++++++++-------
 src/services/api/openai/convertMessages.ts    |  14 +-
 src/services/api/openai/index.ts              |  90 ++++++++-----
 3 files changed, 146 insertions(+), 80 deletions(-)

diff --git a/src/services/api/openai/__tests__/thinking.test.ts b/src/services/api/openai/__tests__/thinking.test.ts
index f079ae052..c53abaf36 100644
--- a/src/services/api/openai/__tests__/thinking.test.ts
+++ b/src/services/api/openai/__tests__/thinking.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, test, beforeEach, afterEach } from 'bun:test'
-import { isOpenAIThinkingEnabled } from '../index.js'
+import { isOpenAIThinkingEnabled, buildOpenAIRequestBody } from '../index.js'
 
 describe('isOpenAIThinkingEnabled', () => {
   const originalEnv = {
@@ -12,8 +12,13 @@ describe('isOpenAIThinkingEnabled', () => {
   })
 
   afterEach(() => {
-    // Restore original env var
-    process.env.OPENAI_ENABLE_THINKING = originalEnv.OPENAI_ENABLE_THINKING
+    // Restore original env var — delete key if it was originally undefined
+    // to avoid leaking the env key into subsequent tests
+    if (originalEnv.OPENAI_ENABLE_THINKING === undefined) {
+      delete process.env.OPENAI_ENABLE_THINKING
+    } else {
+      process.env.OPENAI_ENABLE_THINKING = originalEnv.OPENAI_ENABLE_THINKING
+    }
   })
 
   describe('OPENAI_ENABLE_THINKING env var', () => {
@@ -140,53 +145,82 @@ describe('isOpenAIThinkingEnabled', () => {
   })
 })
 
-describe('thinking request parameters', () => {
-  // Note: These tests verify the request body structure indirectly.
-  // The actual API call is mocked in integration tests.
-  // Here we document the expected parameter formats:
+describe('buildOpenAIRequestBody — thinking params', () => {
+  const baseParams = {
+    model: 'deepseek-reasoner',
+    messages: [{ role: 'user', content: 'hello' }],
+    tools: [] as any[],
+    toolChoice: undefined as any,
+  }
 
-  test('documents official DeepSeek API format: thinking: { type: "enabled" }', () => {
-    // Official DeepSeek API expects:
-    const officialFormat = {
-      thinking: { type: 'enabled' },
-    }
-    expect(officialFormat.thinking.type).toBe('enabled')
+  test('includes official DeepSeek API thinking format when enabled', () => {
+    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
+    expect(body.thinking).toEqual({ type: 'enabled' })
   })
 
-  test('documents vLLM/self-hosted format: enable_thinking + chat_template_kwargs', () => {
-    // Self-hosted DeepSeek-V3.2/vLLM expects:
-    const vllmFormat = {
-      enable_thinking: true,
-      chat_template_kwargs: { thinking: true },
-    }
-    expect(vllmFormat.enable_thinking).toBe(true)
-    expect(vllmFormat.chat_template_kwargs.thinking).toBe(true)
+  test('includes vLLM/self-hosted thinking format when enabled', () => {
+    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
+    expect(body.enable_thinking).toBe(true)
+    expect(body.chat_template_kwargs).toEqual({ thinking: true })
   })
 
-  test('both formats are added simultaneously when thinking is enabled', () => {
-    // The implementation adds both formats so each endpoint
-    // can use the one it recognizes:
-    const combinedFormat = {
-      // Official DeepSeek API format
-      thinking: { type: 'enabled' },
-      // Self-hosted DeepSeek-V3.2/vLLM format
-      enable_thinking: true,
-      chat_template_kwargs: { thinking: true },
-    }
-    expect(combinedFormat.thinking.type).toBe('enabled')
-    expect(combinedFormat.enable_thinking).toBe(true)
-    expect(combinedFormat.chat_template_kwargs.thinking).toBe(true)
+  test('includes both formats simultaneously when enabled', () => {
+    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
+    expect(body.thinking).toEqual({ type: 'enabled' })
+    expect(body.enable_thinking).toBe(true)
+    expect(body.chat_template_kwargs.thinking).toBe(true)
   })
 
-  test('thinking params are NOT added when thinking is disabled', () => {
-    // When thinking is disabled, none of these params should be present:
-    const disabledFormat = {
-      model: 'gpt-4o',
-      messages: [],
-      stream: true,
-    }
-    expect((disabledFormat as any).thinking).toBeUndefined()
-    expect((disabledFormat as any).enable_thinking).toBeUndefined()
-    expect((disabledFormat as any).chat_template_kwargs).toBeUndefined()
+  test('does NOT include thinking params when disabled', () => {
+    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
+    expect(body.thinking).toBeUndefined()
+    expect(body.enable_thinking).toBeUndefined()
+    expect(body.chat_template_kwargs).toBeUndefined()
+  })
+
+  test('always includes stream and stream_options', () => {
+    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
+    expect(body.stream).toBe(true)
+    expect(body.stream_options).toEqual({ include_usage: true })
+  })
+
+  test('includes temperature when thinking is off and override is set', () => {
+    const body = buildOpenAIRequestBody({
+      ...baseParams,
+      enableThinking: false,
+      temperatureOverride: 0.7,
+    })
+    expect(body.temperature).toBe(0.7)
+  })
+
+  test('excludes temperature when thinking is on even if override is set', () => {
+    const body = buildOpenAIRequestBody({
+      ...baseParams,
+      enableThinking: true,
+      temperatureOverride: 0.7,
+    })
+    expect(body.temperature).toBeUndefined()
+  })
+
+  test('excludes temperature when thinking is off and no override', () => {
+    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
+    expect(body.temperature).toBeUndefined()
+  })
+
+  test('includes tools and tool_choice when tools are provided', () => {
+    const body = buildOpenAIRequestBody({
+      ...baseParams,
+      tools: [{ type: 'function', function: { name: 'test' } }],
+      toolChoice: 'auto',
+      enableThinking: false,
+    })
+    expect(body.tools).toHaveLength(1)
+    expect(body.tool_choice).toBe('auto')
+  })
+
+  test('excludes tools when empty', () => {
+    const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false })
+    expect(body.tools).toBeUndefined()
+    expect(body.tool_choice).toBeUndefined()
   })
 })
\ No newline at end of file
diff --git a/src/services/api/openai/convertMessages.ts b/src/services/api/openai/convertMessages.ts
index 623eeb234..1dbf1791f 100644
--- a/src/services/api/openai/convertMessages.ts
+++ b/src/services/api/openai/convertMessages.ts
@@ -59,12 +59,20 @@ export function anthropicMessagesToOpenAI(
       }
       if (msg.type === 'user' && hasSeenAssistant) {
         const content = msg.message.content
-        const hasText = typeof content === 'string'
+        // A user message starts a new turn if it contains any non-tool_result content
+        // (text, image, or other media). Tool results alone do NOT start a new turn
+        // because they are continuations of the previous assistant tool call.
+        const startsNewUserTurn = typeof content === 'string'
           ? content.length > 0
           : Array.isArray(content) && content.some(
-              (b: any) => typeof b === 'string' || b.type === 'text',
+              (b: any) =>
+                typeof b === 'string' ||
+                (b &&
+                  typeof b === 'object' &&
+                  'type' in b &&
+                  b.type !== 'tool_result'),
             )
-        if (hasText) {
+        if (startsNewUserTurn) {
           turnBoundaries.add(i)
         }
       }
diff --git a/src/services/api/openai/index.ts b/src/services/api/openai/index.ts
index 9c90d79c8..958f0b3d4 100644
--- a/src/services/api/openai/index.ts
+++ b/src/services/api/openai/index.ts
@@ -63,6 +63,53 @@ export function isOpenAIThinkingEnabled(model: string): boolean {
   return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2')
 }
 
+/**
+ * Build the request body for OpenAI chat.completions.create().
+ * Extracted for testability — the thinking mode params are injected here.
+ *
+ * DeepSeek thinking mode: inject thinking params via request body.
+ * Two formats are added simultaneously to support different deployments:
+ * - Official DeepSeek API: `thinking: { type: 'enabled' }`
+ * - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
+ * OpenAI SDK passes unknown keys through to the HTTP body.
+ * Each endpoint will use the format it recognizes and ignore the others.
+ * @internal Exported for testing purposes only
+ */
+export function buildOpenAIRequestBody(params: {
+  model: string
+  messages: any[]
+  tools: any[]
+  toolChoice: any
+  enableThinking: boolean
+  temperatureOverride?: number
+}): Record<string, any> {
+  const { model, messages, tools, toolChoice, enableThinking, temperatureOverride } = params
+  return {
+    model,
+    messages,
+    ...(tools.length > 0 && {
+      tools,
+      ...(toolChoice && { tool_choice: toolChoice }),
+    }),
+    stream: true,
+    stream_options: { include_usage: true },
+    // DeepSeek thinking mode: enable chain-of-thought output.
+    // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
+    ...(enableThinking && {
+      // Official DeepSeek API format
+      thinking: { type: 'enabled' },
+      // Self-hosted DeepSeek-V3.2 format
+      enable_thinking: true,
+      chat_template_kwargs: { thinking: true },
+    }),
+    // Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
+    // but other providers may respect it)
+    ...(!enableThinking && temperatureOverride !== undefined && {
+      temperature: temperatureOverride,
+    }),
+  }
+}
+
 /**
  * OpenAI-compatible query path. Converts Anthropic-format messages/tools to
  * OpenAI format, calls the OpenAI-compatible endpoint, and converts the
@@ -177,40 +224,17 @@ export async function* queryModelOpenAI(
     )
 
     // 11. Call OpenAI API with streaming
-    // DeepSeek thinking mode: inject thinking params via request body.
-    // Two formats are added simultaneously to support different deployments:
-    // - Official DeepSeek API: `thinking: { type: 'enabled' }`
-    // - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
-    // OpenAI SDK passes unknown keys through to the HTTP body.
-    // Each endpoint will use the format it recognizes and ignore the others.
+    const requestBody = buildOpenAIRequestBody({
+      model: openaiModel,
+      messages: openaiMessages,
+      tools: openaiTools,
+      toolChoice: openaiToolChoice,
+      enableThinking,
+      temperatureOverride: options.temperatureOverride,
+    })
     const stream = await client.chat.completions.create(
-      {
-        model: openaiModel,
-        messages: openaiMessages,
-        ...(openaiTools.length > 0 && {
-          tools: openaiTools,
-          ...(openaiToolChoice && { tool_choice: openaiToolChoice }),
-        }),
-        stream: true,
-        stream_options: { include_usage: true },
-        // DeepSeek thinking mode: enable chain-of-thought output.
-        // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
-        ...(enableThinking && {
-          // Official DeepSeek API format
-          thinking: { type: 'enabled' },
-          // Self-hosted DeepSeek-V3.2 format
-          enable_thinking: true,
-          chat_template_kwargs: { thinking: true },
-        }),
-        // Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
-        // but other providers may respect it)
-        ...(!enableThinking && options.temperatureOverride !== undefined && {
-          temperature: options.temperatureOverride,
-        }),
-      },
-      {
-        signal,
-      },
+      requestBody,
+      { signal },
     )
 
     // 12. Convert OpenAI stream to Anthropic events, then process into