From 2fb146d583a69a2e17e47b28353a2f25717f6e7c Mon Sep 17 00:00:00 2001 From: guunergooner Date: Wed, 8 Apr 2026 19:43:13 +0800 Subject: [PATCH 1/2] feat: Add DeepSeek thinking mode support for OpenAI compatibility layer Support DeepSeek reasoning models (deepseek-reasoner and DeepSeek-V3.2) in OpenAI-compatible API layer with automatic thinking mode detection. Key changes: - Add `isOpenAIThinkingEnabled()` for auto-detecting thinking-capable models - Inject thinking parameters in request body (both official API and vLLM formats) - Preserve reasoning_content in message conversion for tool call iterations - Strip reasoning_content from previous turns to save bandwidth - Add comprehensive test coverage for thinking mode detection Thinking mode is enabled when: 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR 2. Model name contains "deepseek-reasoner" or "deepseek-v3.2" (auto-detect) Signed-off-by: guunergooner --- .../openai/__tests__/convertMessages.test.ts | 206 ++++++++++++++++++ .../api/openai/__tests__/thinking.test.ts | 192 ++++++++++++++++ src/services/api/openai/convertMessages.ts | 65 +++++- src/services/api/openai/index.ts | 59 ++++- 4 files changed, 509 insertions(+), 13 deletions(-) create mode 100644 src/services/api/openai/__tests__/thinking.test.ts diff --git a/src/services/api/openai/__tests__/convertMessages.test.ts b/src/services/api/openai/__tests__/convertMessages.test.ts index 0ea52757b..39811c7c8 100644 --- a/src/services/api/openai/__tests__/convertMessages.test.ts +++ b/src/services/api/openai/__tests__/convertMessages.test.ts @@ -249,3 +249,209 @@ describe('anthropicMessagesToOpenAI', () => { ) }) }) + +describe('DeepSeek thinking mode (enableThinking)', () => { + test('preserves thinking block as reasoning_content when enabled', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg('question'), makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'Let me reason about this...' }, + { type: 'text', text: 'The answer is 42.' }, + ])], + [] as any, + { enableThinking: true }, + ) + // Should have: user, assistant with reasoning_content + expect(result).toHaveLength(2) + expect(result[0].role).toBe('user') + const assistant = result[1] as any + expect(assistant.role).toBe('assistant') + expect(assistant.content).toBe('The answer is 42.') + expect(assistant.reasoning_content).toBe('Let me reason about this...') + }) + + test('drops thinking block when enableThinking is false (default)', () => { + const result = anthropicMessagesToOpenAI( + [makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'internal thoughts...' }, + { type: 'text', text: 'visible response' }, + ])], + [] as any, + ) + const assistant = result[0] as any + expect(assistant.content).toBe('visible response') + expect(assistant.reasoning_content).toBeUndefined() + }) + + test('preserves reasoning_content with tool_calls in same turn', () => { + const result = anthropicMessagesToOpenAI( + [ + makeUserMsg('what is the weather?'), + makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'I need to call the weather tool.' }, + { type: 'text', text: '' }, + { + type: 'tool_use' as const, + id: 'toolu_001', + name: 'get_weather', + input: { location: 'Hangzhou' }, + }, + ]), + makeUserMsg([ + { + type: 'tool_result' as const, + tool_use_id: 'toolu_001', + content: 'Cloudy 7~13°C', + }, + ]), + ], + [] as any, + { enableThinking: true }, + ) + + // Find the assistant message + const assistants = result.filter(m => m.role === 'assistant') + expect(assistants.length).toBe(1) + const assistant = assistants[0] as any + expect(assistant.reasoning_content).toBe('I need to call the weather tool.') + expect(assistant.tool_calls).toBeDefined() + expect(assistant.tool_calls[0].function.name).toBe('get_weather') + }) + + test('strips reasoning_content from previous turns', () => { + const result = anthropicMessagesToOpenAI( + [ + // Turn 1: user → assistant (with thinking) + makeUserMsg('question 1'), + makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'Turn 1 reasoning...' }, + { type: 'text', text: 'Turn 1 answer' }, + ]), + // Turn 2: new user message → previous reasoning should be stripped + makeUserMsg('question 2'), + makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'Turn 2 reasoning...' }, + { type: 'text', text: 'Turn 2 answer' }, + ]), + ], + [] as any, + { enableThinking: true }, + ) + + const assistants = result.filter(m => m.role === 'assistant') + // Turn 1 assistant: reasoning should be stripped (previous turn) + expect((assistants[0] as any).reasoning_content).toBeUndefined() + expect((assistants[0] as any).content).toBe('Turn 1 answer') + // Turn 2 assistant: reasoning should be preserved (current turn) + expect((assistants[1] as any).reasoning_content).toBe('Turn 2 reasoning...') + expect((assistants[1] as any).content).toBe('Turn 2 answer') + }) + + test('preserves reasoning_content in multi-iteration tool call within same turn', () => { + // Simulates a full DeepSeek tool call iteration: + // user → assistant(thinking+tool_call) → tool_result → assistant(thinking+tool_call) → tool_result → assistant(thinking+text) + const result = anthropicMessagesToOpenAI( + [ + makeUserMsg("tomorrow's weather in Hangzhou"), + // Iteration 1: thinking + tool call + makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'I need the date first.' }, + { + type: 'tool_use' as const, + id: 'toolu_001', + name: 'get_date', + input: {}, + }, + ]), + makeUserMsg([ + { + type: 'tool_result' as const, + tool_use_id: 'toolu_001', + content: '2026-04-08', + }, + ]), + // Iteration 2: thinking + tool call + makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'Now I can get the weather.' }, + { + type: 'tool_use' as const, + id: 'toolu_002', + name: 'get_weather', + input: { location: 'Hangzhou', date: '2026-04-08' }, + }, + ]), + makeUserMsg([ + { + type: 'tool_result' as const, + tool_use_id: 'toolu_002', + content: 'Cloudy 7~13°C', + }, + ]), + // Iteration 3: thinking + final answer + makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'I have the info now.' }, + { type: 'text', text: 'Tomorrow will be cloudy, 7-13°C.' }, + ]), + ], + [] as any, + { enableThinking: true }, + ) + + // All 3 assistant messages are in the current turn (after last user msg is the last tool_result, + // but the "last user message" boundary logic finds the last user-typed message). + // Actually, tool_result messages are also UserMessage type, so the last user message + // is the one with tool_result for toolu_002. All assistant messages after that should have reasoning. + const assistants = result.filter(m => m.role === 'assistant') + expect(assistants.length).toBe(3) + // All iterations within the same turn preserve reasoning + expect((assistants[0] as any).reasoning_content).toBe('I need the date first.') + expect((assistants[1] as any).reasoning_content).toBe('Now I can get the weather.') + expect((assistants[2] as any).reasoning_content).toBe('I have the info now.') + }) + + test('handles multiple thinking blocks in single assistant message', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg('question'), makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'First thought.' }, + { type: 'thinking' as const, thinking: 'Second thought.' }, + { type: 'text', text: 'Final answer.' }, + ])], + [] as any, + { enableThinking: true }, + ) + const assistant = result.filter(m => m.role === 'assistant')[0] as any + expect(assistant.reasoning_content).toBe('First thought.\nSecond thought.') + }) + + test('skips empty thinking blocks', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg('question'), makeAssistantMsg([ + { type: 'thinking' as const, thinking: '' }, + { type: 'text', text: 'Answer.' }, + ])], + [] as any, + { enableThinking: true }, + ) + const assistant = result.filter(m => m.role === 'assistant')[0] as any + expect(assistant.reasoning_content).toBeUndefined() + }) + + test('sets content to null when only thinking and tool_calls present', () => { + const result = anthropicMessagesToOpenAI( + [makeUserMsg('question'), makeAssistantMsg([ + { type: 'thinking' as const, thinking: 'Reasoning only.' }, + { + type: 'tool_use' as const, + id: 'toolu_001', + name: 'bash', + input: { command: 'ls' }, + }, + ])], + [] as any, + { enableThinking: true }, + ) + const assistant = result.filter(m => m.role === 'assistant')[0] as any + expect(assistant.content).toBeNull() + expect(assistant.reasoning_content).toBe('Reasoning only.') + expect(assistant.tool_calls).toHaveLength(1) + }) +}) diff --git a/src/services/api/openai/__tests__/thinking.test.ts b/src/services/api/openai/__tests__/thinking.test.ts new file mode 100644 index 000000000..f079ae052 --- /dev/null +++ b/src/services/api/openai/__tests__/thinking.test.ts @@ -0,0 +1,192 @@ +import { describe, expect, test, beforeEach, afterEach } from 'bun:test' +import { isOpenAIThinkingEnabled } from '../index.js' + +describe('isOpenAIThinkingEnabled', () => { + const originalEnv = { + OPENAI_ENABLE_THINKING: process.env.OPENAI_ENABLE_THINKING, + } + + beforeEach(() => { + // Clear env var before each test + delete process.env.OPENAI_ENABLE_THINKING + }) + + afterEach(() => { + // Restore original env var + process.env.OPENAI_ENABLE_THINKING = originalEnv.OPENAI_ENABLE_THINKING + }) + + describe('OPENAI_ENABLE_THINKING env var', () => { + test('returns true when OPENAI_ENABLE_THINKING=1', () => { + process.env.OPENAI_ENABLE_THINKING = '1' + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true) + }) + + test('returns true when OPENAI_ENABLE_THINKING=true', () => { + process.env.OPENAI_ENABLE_THINKING = 'true' + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true) + }) + + test('returns true when OPENAI_ENABLE_THINKING=yes', () => { + process.env.OPENAI_ENABLE_THINKING = 'yes' + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true) + }) + + test('returns true when OPENAI_ENABLE_THINKING=on', () => { + process.env.OPENAI_ENABLE_THINKING = 'on' + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true) + }) + + test('returns true when OPENAI_ENABLE_THINKING=TRUE (case insensitive)', () => { + process.env.OPENAI_ENABLE_THINKING = 'TRUE' + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true) + }) + + test('returns false when OPENAI_ENABLE_THINKING=0', () => { + process.env.OPENAI_ENABLE_THINKING = '0' + expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false) + }) + + test('returns false when OPENAI_ENABLE_THINKING=false', () => { + process.env.OPENAI_ENABLE_THINKING = 'false' + expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false) + }) + + test('returns false when OPENAI_ENABLE_THINKING is empty', () => { + process.env.OPENAI_ENABLE_THINKING = '' + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false) + }) + + test('returns false when OPENAI_ENABLE_THINKING is not set', () => { + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false) + }) + }) + + describe('model name auto-detect', () => { + test('returns true when model name is "deepseek-reasoner"', () => { + expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(true) + }) + + test('returns true when model name contains "deepseek-reasoner" (case insensitive)', () => { + expect(isOpenAIThinkingEnabled('DeepSeek-Reasoner')).toBe(true) + }) + + test('returns true when model name has prefix/suffix for deepseek-reasoner', () => { + expect(isOpenAIThinkingEnabled('my-deepseek-reasoner-v1')).toBe(true) + }) + + test('returns true when model name is namespaced for deepseek-reasoner', () => { + expect(isOpenAIThinkingEnabled('TokenService/deepseek-reasoner')).toBe(true) + }) + + test('returns true when model name is "deepseek-v3.2"', () => { + expect(isOpenAIThinkingEnabled('deepseek-v3.2')).toBe(true) + }) + + test('returns true when model name contains "deepseek-v3.2" (case insensitive)', () => { + expect(isOpenAIThinkingEnabled('DeepSeek-V3.2')).toBe(true) + }) + + test('returns true when model name has prefix/suffix for deepseek-v3.2', () => { + expect(isOpenAIThinkingEnabled('my-deepseek-v3.2-v1')).toBe(true) + }) + + test('returns true when model name is namespaced for deepseek-v3.2', () => { + expect(isOpenAIThinkingEnabled('TokenService/deepseek-v3.2')).toBe(true) + }) + + test('returns false when model name is "deepseek-chat"', () => { + expect(isOpenAIThinkingEnabled('deepseek-chat')).toBe(false) + }) + + test('returns false when model name is "deepseek-v3"', () => { + expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(false) + }) + + test('returns false when model name contains "deepseek" but not "reasoner" or "v3.2"', () => { + expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(false) + }) + + test('returns false when model name is "gpt-4o"', () => { + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false) + }) + + test('returns false when model name is empty', () => { + expect(isOpenAIThinkingEnabled('')).toBe(false) + }) + }) + + describe('priority and combined detection', () => { + test('OPENAI_ENABLE_THINKING=1 enables thinking for any model', () => { + process.env.OPENAI_ENABLE_THINKING = '1' + expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(true) + expect(isOpenAIThinkingEnabled('deepseek-v3')).toBe(true) + }) + + test('OPENAI_ENABLE_THINKING=false disables thinking even for deepseek-reasoner', () => { + process.env.OPENAI_ENABLE_THINKING = 'false' + expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false) + }) + + test('OPENAI_ENABLE_THINKING=0 disables thinking even for deepseek-reasoner', () => { + process.env.OPENAI_ENABLE_THINKING = '0' + expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(false) + }) + + test('both conditions can enable thinking', () => { + process.env.OPENAI_ENABLE_THINKING = '1' + expect(isOpenAIThinkingEnabled('deepseek-reasoner')).toBe(true) + }) + }) +}) + +describe('thinking request parameters', () => { + // Note: These tests verify the request body structure indirectly. + // The actual API call is mocked in integration tests. + // Here we document the expected parameter formats: + + test('documents official DeepSeek API format: thinking: { type: "enabled" }', () => { + // Official DeepSeek API expects: + const officialFormat = { + thinking: { type: 'enabled' }, + } + expect(officialFormat.thinking.type).toBe('enabled') + }) + + test('documents vLLM/self-hosted format: enable_thinking + chat_template_kwargs', () => { + // Self-hosted DeepSeek-V3.2/vLLM expects: + const vllmFormat = { + enable_thinking: true, + chat_template_kwargs: { thinking: true }, + } + expect(vllmFormat.enable_thinking).toBe(true) + expect(vllmFormat.chat_template_kwargs.thinking).toBe(true) + }) + + test('both formats are added simultaneously when thinking is enabled', () => { + // The implementation adds both formats so each endpoint + // can use the one it recognizes: + const combinedFormat = { + // Official DeepSeek API format + thinking: { type: 'enabled' }, + // Self-hosted DeepSeek-V3.2/vLLM format + enable_thinking: true, + chat_template_kwargs: { thinking: true }, + } + expect(combinedFormat.thinking.type).toBe('enabled') + expect(combinedFormat.enable_thinking).toBe(true) + expect(combinedFormat.chat_template_kwargs.thinking).toBe(true) + }) + + test('thinking params are NOT added when thinking is disabled', () => { + // When thinking is disabled, none of these params should be present: + const disabledFormat = { + model: 'gpt-4o', + messages: [], + stream: true, + } + expect((disabledFormat as any).thinking).toBeUndefined() + expect((disabledFormat as any).enable_thinking).toBeUndefined() + expect((disabledFormat as any).chat_template_kwargs).toBeUndefined() + }) +}) \ No newline at end of file diff --git a/src/services/api/openai/convertMessages.ts b/src/services/api/openai/convertMessages.ts index 3869120eb..623eeb234 100644 --- a/src/services/api/openai/convertMessages.ts +++ b/src/services/api/openai/convertMessages.ts @@ -13,6 +13,12 @@ import type { import type { AssistantMessage, UserMessage } from '../../../types/message.js' import type { SystemPrompt } from '../../../utils/systemPromptType.js' +export interface ConvertMessagesOptions { + /** When true, preserve thinking blocks as reasoning_content on assistant messages + * (required for DeepSeek thinking mode with tool calls). */ + enableThinking?: boolean +} + /** * Convert internal (UserMessage | AssistantMessage)[] to OpenAI-format messages. * @@ -20,14 +26,16 @@ import type { SystemPrompt } from '../../../utils/systemPromptType.js' * - system prompt → role: "system" message prepended * - tool_use blocks → tool_calls[] on assistant message * - tool_result blocks → role: "tool" messages - * - thinking blocks → silently dropped + * - thinking blocks → silently dropped (or preserved as reasoning_content when enableThinking=true) * - cache_control → stripped */ export function anthropicMessagesToOpenAI( messages: (UserMessage | AssistantMessage)[], systemPrompt: SystemPrompt, + options?: ConvertMessagesOptions, ): ChatCompletionMessageParam[] { const result: ChatCompletionMessageParam[] = [] + const enableThinking = options?.enableThinking ?? false // Prepend system prompt as system message const systemText = systemPromptToText(systemPrompt) @@ -38,13 +46,42 @@ export function anthropicMessagesToOpenAI( } satisfies ChatCompletionSystemMessageParam) } - for (const msg of messages) { + // When thinking mode is on, detect turn boundaries so that reasoning_content + // from *previous* user turns is stripped (saves bandwidth; DeepSeek ignores it). + // A "new turn" starts when a user text message appears after at least one assistant response. + const turnBoundaries = new Set() + if (enableThinking) { + let hasSeenAssistant = false + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] + if (msg.type === 'assistant') { + hasSeenAssistant = true + } + if (msg.type === 'user' && hasSeenAssistant) { + const content = msg.message.content + const hasText = typeof content === 'string' + ? content.length > 0 + : Array.isArray(content) && content.some( + (b: any) => typeof b === 'string' || b.type === 'text', + ) + if (hasText) { + turnBoundaries.add(i) + } + } + } + } + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] switch (msg.type) { case 'user': result.push(...convertInternalUserMessage(msg)) break case 'assistant': - result.push(...convertInternalAssistantMessage(msg)) + // Preserve reasoning_content unless we're before a turn boundary + // (i.e., from a previous user Q&A round) + const preserveReasoning = enableThinking && !isBeforeAnyTurnBoundary(i, turnBoundaries) + result.push(...convertInternalAssistantMessage(msg, preserveReasoning)) break default: break @@ -61,6 +98,17 @@ function systemPromptToText(systemPrompt: SystemPrompt): string { .join('\n\n') } +/** + * Check if index `i` falls before any turn boundary (i.e. it belongs to a previous turn). + * A message at index i is "before" a boundary if there exists a boundary j where i < j. + */ +function isBeforeAnyTurnBoundary(i: number, boundaries: Set): boolean { + for (const b of boundaries) { + if (i < b) return true + } + return false +} + function convertInternalUserMessage( msg: UserMessage, ): ChatCompletionMessageParam[] { @@ -151,6 +199,7 @@ function convertToolResult( function convertInternalAssistantMessage( msg: AssistantMessage, + preserveReasoning = false, ): ChatCompletionMessageParam[] { const content = msg.message.content @@ -174,6 +223,7 @@ function convertInternalAssistantMessage( const textParts: string[] = [] const toolCalls: NonNullable = [] + const reasoningParts: string[] = [] for (const block of content) { if (typeof block === 'string') { @@ -191,14 +241,21 @@ function convertInternalAssistantMessage( typeof tu.input === 'string' ? tu.input : JSON.stringify(tu.input), }, }) + } else if (block.type === 'thinking' && preserveReasoning) { + // DeepSeek thinking mode: preserve reasoning_content for tool call iterations + const thinkingText = (block as Record).thinking + if (typeof thinkingText === 'string' && thinkingText) { + reasoningParts.push(thinkingText) + } } - // Skip thinking, redacted_thinking, server_tool_use, etc. + // Skip redacted_thinking, server_tool_use, etc. } const result: ChatCompletionAssistantMessageParam = { role: 'assistant', content: textParts.length > 0 ? textParts.join('\n') : null, ...(toolCalls.length > 0 && { tool_calls: toolCalls }), + ...(reasoningParts.length > 0 && { reasoning_content: reasoningParts.join('\n') }), } return [result] diff --git a/src/services/api/openai/index.ts b/src/services/api/openai/index.ts index 251e89f7d..9c90d79c8 100644 --- a/src/services/api/openai/index.ts +++ b/src/services/api/openai/index.ts @@ -24,6 +24,7 @@ import { import { logForDebugging } from '../../../utils/debug.js' import { addToTotalSessionCost } from '../../../cost-tracker.js' import { calculateUSDCost } from '../../../utils/modelCost.js' +import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js' import type { Options } from '../claude.js' import { randomUUID } from 'crypto' import { @@ -39,6 +40,29 @@ import { TOOL_SEARCH_TOOL_NAME, } from '../../../tools/ToolSearchTool/prompt.js' +/** + * Detect whether DeepSeek-style thinking mode should be enabled. + * + * Enabled when: + * 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR + * 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive) + * + * Disabled when: + * - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection) + * + * @param model - The resolved OpenAI model name + * @internal Exported for testing purposes only + */ +export function isOpenAIThinkingEnabled(model: string): boolean { + // Explicit disable takes priority (overrides model auto-detect) + if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false + // Explicit enable + if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true + // Auto-detect from model name (deepseek-reasoner and DeepSeek-V3.2 support thinking mode) + const modelLower = model.toLowerCase() + return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2') +} + /** * OpenAI-compatible query path. Converts Anthropic-format messages/tools to * OpenAI format, calls the OpenAI-compatible endpoint, and converts the @@ -120,10 +144,10 @@ export async function* queryModelOpenAI( ) // 8. Convert messages and tools to OpenAI format - const openaiMessages = anthropicMessagesToOpenAI( - messagesForAPI, - systemPrompt, - ) + const enableThinking = isOpenAIThinkingEnabled(openaiModel) + const openaiMessages = anthropicMessagesToOpenAI(messagesForAPI, systemPrompt, { + enableThinking, + }) const openaiTools = anthropicToolsToOpenAI(standardTools) const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice) @@ -149,10 +173,16 @@ export async function* queryModelOpenAI( }) logForDebugging( - `[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}`, + `[OpenAI] Calling model=${openaiModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}, thinking=${enableThinking}`, ) // 11. Call OpenAI API with streaming + // DeepSeek thinking mode: inject thinking params via request body. + // Two formats are added simultaneously to support different deployments: + // - Official DeepSeek API: `thinking: { type: 'enabled' }` + // - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }` + // OpenAI SDK passes unknown keys through to the HTTP body. + // Each endpoint will use the format it recognizes and ignore the others. const stream = await client.chat.completions.create( { model: openaiModel, @@ -163,7 +193,18 @@ export async function* queryModelOpenAI( }), stream: true, stream_options: { include_usage: true }, - ...(options.temperatureOverride !== undefined && { + // DeepSeek thinking mode: enable chain-of-thought output. + // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek. + ...(enableThinking && { + // Official DeepSeek API format + thinking: { type: 'enabled' }, + // Self-hosted DeepSeek-V3.2 format + enable_thinking: true, + chat_template_kwargs: { thinking: true }, + }), + // Only send temperature when thinking mode is off (DeepSeek ignores it anyway, + // but other providers may respect it) + ...(!enableThinking && options.temperatureOverride !== undefined && { temperature: options.temperatureOverride, }), }, @@ -172,8 +213,8 @@ export async function* queryModelOpenAI( }, ) - // 7. Convert OpenAI stream to Anthropic events, then process into - // AssistantMessage + StreamEvent (matching the Anthropic path behavior) + // 12. Convert OpenAI stream to Anthropic events, then process into + // AssistantMessage + StreamEvent (matching the Anthropic path behavior) const adaptedStream = adaptOpenAIStreamToAnthropic(stream, openaiModel) // Accumulate content blocks and usage, same as the Anthropic path in claude.ts @@ -287,4 +328,4 @@ export async function* queryModelOpenAI( error: error instanceof Error ? error : new Error(String(error)), }) } -} +} \ No newline at end of file From be38691f68a82fe6a73b19c66322db21cec3f7a2 Mon Sep 17 00:00:00 2001 From: guunergooner Date: Wed, 8 Apr 2026 20:08:49 +0800 Subject: [PATCH 2/2] fix: Address PR #206 review feedback for OpenAI thinking mode - Fix afterEach env var leak: use delete instead of assigning undefined - Fix turn boundary detection: treat multimodal inputs (e.g. images) as new turns, not just text content - Extract buildOpenAIRequestBody() for testability, replace constant- assertion tests with real function output verification Signed-off-by: guunergooner --- .../api/openai/__tests__/thinking.test.ts | 122 +++++++++++------- src/services/api/openai/convertMessages.ts | 14 +- src/services/api/openai/index.ts | 90 ++++++++----- 3 files changed, 146 insertions(+), 80 deletions(-) diff --git a/src/services/api/openai/__tests__/thinking.test.ts b/src/services/api/openai/__tests__/thinking.test.ts index f079ae052..c53abaf36 100644 --- a/src/services/api/openai/__tests__/thinking.test.ts +++ b/src/services/api/openai/__tests__/thinking.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test, beforeEach, afterEach } from 'bun:test' -import { isOpenAIThinkingEnabled } from '../index.js' +import { isOpenAIThinkingEnabled, buildOpenAIRequestBody } from '../index.js' describe('isOpenAIThinkingEnabled', () => { const originalEnv = { @@ -12,8 +12,13 @@ describe('isOpenAIThinkingEnabled', () => { }) afterEach(() => { - // Restore original env var - process.env.OPENAI_ENABLE_THINKING = originalEnv.OPENAI_ENABLE_THINKING + // Restore original env var — delete key if it was originally undefined + // to avoid leaking the env key into subsequent tests + if (originalEnv.OPENAI_ENABLE_THINKING === undefined) { + delete process.env.OPENAI_ENABLE_THINKING + } else { + process.env.OPENAI_ENABLE_THINKING = originalEnv.OPENAI_ENABLE_THINKING + } }) describe('OPENAI_ENABLE_THINKING env var', () => { @@ -140,53 +145,82 @@ describe('isOpenAIThinkingEnabled', () => { }) }) -describe('thinking request parameters', () => { - // Note: These tests verify the request body structure indirectly. - // The actual API call is mocked in integration tests. - // Here we document the expected parameter formats: +describe('buildOpenAIRequestBody — thinking params', () => { + const baseParams = { + model: 'deepseek-reasoner', + messages: [{ role: 'user', content: 'hello' }], + tools: [] as any[], + toolChoice: undefined as any, + } - test('documents official DeepSeek API format: thinking: { type: "enabled" }', () => { - // Official DeepSeek API expects: - const officialFormat = { - thinking: { type: 'enabled' }, - } - expect(officialFormat.thinking.type).toBe('enabled') + test('includes official DeepSeek API thinking format when enabled', () => { + const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true }) + expect(body.thinking).toEqual({ type: 'enabled' }) }) - test('documents vLLM/self-hosted format: enable_thinking + chat_template_kwargs', () => { - // Self-hosted DeepSeek-V3.2/vLLM expects: - const vllmFormat = { - enable_thinking: true, - chat_template_kwargs: { thinking: true }, - } - expect(vllmFormat.enable_thinking).toBe(true) - expect(vllmFormat.chat_template_kwargs.thinking).toBe(true) + test('includes vLLM/self-hosted thinking format when enabled', () => { + const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true }) + expect(body.enable_thinking).toBe(true) + expect(body.chat_template_kwargs).toEqual({ thinking: true }) }) - test('both formats are added simultaneously when thinking is enabled', () => { - // The implementation adds both formats so each endpoint - // can use the one it recognizes: - const combinedFormat = { - // Official DeepSeek API format - thinking: { type: 'enabled' }, - // Self-hosted DeepSeek-V3.2/vLLM format - enable_thinking: true, - chat_template_kwargs: { thinking: true }, - } - expect(combinedFormat.thinking.type).toBe('enabled') - expect(combinedFormat.enable_thinking).toBe(true) - expect(combinedFormat.chat_template_kwargs.thinking).toBe(true) + test('includes both formats simultaneously when enabled', () => { + const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true }) + expect(body.thinking).toEqual({ type: 'enabled' }) + expect(body.enable_thinking).toBe(true) + expect(body.chat_template_kwargs.thinking).toBe(true) }) - test('thinking params are NOT added when thinking is disabled', () => { - // When thinking is disabled, none of these params should be present: - const disabledFormat = { - model: 'gpt-4o', - messages: [], - stream: true, - } - expect((disabledFormat as any).thinking).toBeUndefined() - expect((disabledFormat as any).enable_thinking).toBeUndefined() - expect((disabledFormat as any).chat_template_kwargs).toBeUndefined() + test('does NOT include thinking params when disabled', () => { + const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false }) + expect(body.thinking).toBeUndefined() + expect(body.enable_thinking).toBeUndefined() + expect(body.chat_template_kwargs).toBeUndefined() + }) + + test('always includes stream and stream_options', () => { + const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false }) + expect(body.stream).toBe(true) + expect(body.stream_options).toEqual({ include_usage: true }) + }) + + test('includes temperature when thinking is off and override is set', () => { + const body = buildOpenAIRequestBody({ + ...baseParams, + enableThinking: false, + temperatureOverride: 0.7, + }) + expect(body.temperature).toBe(0.7) + }) + + test('excludes temperature when thinking is on even if override is set', () => { + const body = buildOpenAIRequestBody({ + ...baseParams, + enableThinking: true, + temperatureOverride: 0.7, + }) + expect(body.temperature).toBeUndefined() + }) + + test('excludes temperature when thinking is off and no override', () => { + const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false }) + expect(body.temperature).toBeUndefined() + }) + + test('includes tools and tool_choice when tools are provided', () => { + const body = buildOpenAIRequestBody({ + ...baseParams, + tools: [{ type: 'function', function: { name: 'test' } }], + toolChoice: 'auto', + enableThinking: false, + }) + expect(body.tools).toHaveLength(1) + expect(body.tool_choice).toBe('auto') + }) + + test('excludes tools when empty', () => { + const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: false }) + expect(body.tools).toBeUndefined() + expect(body.tool_choice).toBeUndefined() }) }) \ No newline at end of file diff --git a/src/services/api/openai/convertMessages.ts b/src/services/api/openai/convertMessages.ts index 623eeb234..1dbf1791f 100644 --- a/src/services/api/openai/convertMessages.ts +++ b/src/services/api/openai/convertMessages.ts @@ -59,12 +59,20 @@ export function anthropicMessagesToOpenAI( } if (msg.type === 'user' && hasSeenAssistant) { const content = msg.message.content - const hasText = typeof content === 'string' + // A user message starts a new turn if it contains any non-tool_result content + // (text, image, or other media). Tool results alone do NOT start a new turn + // because they are continuations of the previous assistant tool call. + const startsNewUserTurn = typeof content === 'string' ? content.length > 0 : Array.isArray(content) && content.some( - (b: any) => typeof b === 'string' || b.type === 'text', + (b: any) => + typeof b === 'string' || + (b && + typeof b === 'object' && + 'type' in b && + b.type !== 'tool_result'), ) - if (hasText) { + if (startsNewUserTurn) { turnBoundaries.add(i) } } diff --git a/src/services/api/openai/index.ts b/src/services/api/openai/index.ts index 9c90d79c8..958f0b3d4 100644 --- a/src/services/api/openai/index.ts +++ b/src/services/api/openai/index.ts @@ -63,6 +63,53 @@ export function isOpenAIThinkingEnabled(model: string): boolean { return modelLower.includes('deepseek-reasoner') || modelLower.includes('deepseek-v3.2') } +/** + * Build the request body for OpenAI chat.completions.create(). + * Extracted for testability — the thinking mode params are injected here. + * + * DeepSeek thinking mode: inject thinking params via request body. + * Two formats are added simultaneously to support different deployments: + * - Official DeepSeek API: `thinking: { type: 'enabled' }` + * - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }` + * OpenAI SDK passes unknown keys through to the HTTP body. + * Each endpoint will use the format it recognizes and ignore the others. + * @internal Exported for testing purposes only + */ +export function buildOpenAIRequestBody(params: { + model: string + messages: any[] + tools: any[] + toolChoice: any + enableThinking: boolean + temperatureOverride?: number +}): Record { + const { model, messages, tools, toolChoice, enableThinking, temperatureOverride } = params + return { + model, + messages, + ...(tools.length > 0 && { + tools, + ...(toolChoice && { tool_choice: toolChoice }), + }), + stream: true, + stream_options: { include_usage: true }, + // DeepSeek thinking mode: enable chain-of-thought output. + // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek. + ...(enableThinking && { + // Official DeepSeek API format + thinking: { type: 'enabled' }, + // Self-hosted DeepSeek-V3.2 format + enable_thinking: true, + chat_template_kwargs: { thinking: true }, + }), + // Only send temperature when thinking mode is off (DeepSeek ignores it anyway, + // but other providers may respect it) + ...(!enableThinking && temperatureOverride !== undefined && { + temperature: temperatureOverride, + }), + } +} + /** * OpenAI-compatible query path. Converts Anthropic-format messages/tools to * OpenAI format, calls the OpenAI-compatible endpoint, and converts the @@ -177,40 +224,17 @@ export async function* queryModelOpenAI( ) // 11. Call OpenAI API with streaming - // DeepSeek thinking mode: inject thinking params via request body. - // Two formats are added simultaneously to support different deployments: - // - Official DeepSeek API: `thinking: { type: 'enabled' }` - // - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }` - // OpenAI SDK passes unknown keys through to the HTTP body. - // Each endpoint will use the format it recognizes and ignore the others. + const requestBody = buildOpenAIRequestBody({ + model: openaiModel, + messages: openaiMessages, + tools: openaiTools, + toolChoice: openaiToolChoice, + enableThinking, + temperatureOverride: options.temperatureOverride, + }) const stream = await client.chat.completions.create( - { - model: openaiModel, - messages: openaiMessages, - ...(openaiTools.length > 0 && { - tools: openaiTools, - ...(openaiToolChoice && { tool_choice: openaiToolChoice }), - }), - stream: true, - stream_options: { include_usage: true }, - // DeepSeek thinking mode: enable chain-of-thought output. - // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek. - ...(enableThinking && { - // Official DeepSeek API format - thinking: { type: 'enabled' }, - // Self-hosted DeepSeek-V3.2 format - enable_thinking: true, - chat_template_kwargs: { thinking: true }, - }), - // Only send temperature when thinking mode is off (DeepSeek ignores it anyway, - // but other providers may respect it) - ...(!enableThinking && options.temperatureOverride !== undefined && { - temperature: options.temperatureOverride, - }), - }, - { - signal, - }, + requestBody, + { signal }, ) // 12. Convert OpenAI stream to Anthropic events, then process into