Skip to content

Commit ad735ee

Browse files
authored
Merge pull request #273 from CJackHwang/dev
Release: Add Thinking Level, Fix Docker & Type Safety
2 parents d7b3703 + ca188bc commit ad735ee

File tree

11 files changed

+138
-66
lines changed

11 files changed

+138
-66
lines changed

.env.example

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,11 @@ ENABLE_THINKING_BUDGET=false
102102
# 当 API 请求中未提供 reasoning_effort 参数时,将使用此值。
103103
DEFAULT_THINKING_BUDGET=8192
104104

105+
# "指定思考等级" 的默认值 (high/low)
106+
# 仅适用于 gemini-3-pro-preview 等使用思考等级的模型。
107+
# 当 API 请求中未提供 reasoning_effort 参数时,将使用此值。
108+
DEFAULT_THINKING_LEVEL=high
109+
105110
# 是否默认启用 "Google Search" 功能 (true/false)
106111
# 当 API 请求中未提供 tools 参数时,将使用此设置作为 Google Search 的默认开关状态。
107112
ENABLE_GOOGLE_SEARCH=false

api_utils/context_types.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import logging
22
from asyncio import Future, Lock
3-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict
3+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
44

55
from playwright.async_api import Page as AsyncPage
66

77
if TYPE_CHECKING:
88
from fastapi import Request
9-
from fastapi.responses import JSONResponse
9+
from fastapi.responses import JSONResponse, StreamingResponse
1010

1111
from models.chat import ChatCompletionRequest
1212

@@ -21,7 +21,7 @@ class QueueItem(TypedDict):
2121
req_id: str
2222
request_data: "ChatCompletionRequest"
2323
http_request: "Request"
24-
result_future: "Future[JSONResponse]"
24+
result_future: "Future[Union[JSONResponse, StreamingResponse]]"
2525
enqueue_time: float
2626
cancelled: bool
2727

@@ -34,6 +34,7 @@ class RequestContext(TypedDict):
3434
"""
3535

3636
# Core components (always set by context_init.py)
37+
req_id: str
3738
logger: logging.Logger
3839
page: Optional[AsyncPage] # Value can be None if browser not ready
3940
is_page_ready: bool

api_utils/queue_worker.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from logging_utils import set_request_id, set_source
1717
from models import ChatCompletionRequest
1818

19+
from api_utils.context_types import QueueItem
20+
1921
from .error_utils import (
2022
client_cancelled,
2123
client_disconnected,
@@ -31,7 +33,7 @@ def __init__(self):
3133
self.last_request_completion_time = 0.0
3234

3335
# These will be initialized from server.py or created if missing
34-
self.request_queue: Optional[Queue[Dict[str, Any]]] = None
36+
self.request_queue: Optional[Queue[QueueItem]] = None
3537
self.processing_lock: Optional[Lock] = None
3638
self.model_switching_lock: Optional[Lock] = None
3739
self.params_cache_lock: Optional[Lock] = None
@@ -82,15 +84,15 @@ async def check_queue_disconnects(self) -> None:
8284
return
8385

8486
checked_count = 0
85-
items_to_requeue: List[Dict[str, Any]] = []
87+
items_to_requeue: List[QueueItem] = []
8688
processed_ids: Set[str] = set()
8789

8890
# Limit check to 10 items or queue size
8991
limit = min(queue_size, 10)
9092

9193
while checked_count < limit:
9294
try:
93-
item: Dict[str, Any] = self.request_queue.get_nowait()
95+
item: QueueItem = self.request_queue.get_nowait()
9496
item_req_id = str(item.get("req_id", "unknown"))
9597

9698
if item_req_id in processed_ids:
@@ -133,7 +135,7 @@ async def check_queue_disconnects(self) -> None:
133135
for item in items_to_requeue:
134136
await self.request_queue.put(item)
135137

136-
async def get_next_request(self) -> Optional[Dict[str, Any]]:
138+
async def get_next_request(self) -> Optional[QueueItem]:
137139
"""Get the next request from the queue with timeout."""
138140
if not self.request_queue:
139141
await asyncio.sleep(1)
@@ -162,7 +164,7 @@ async def handle_streaming_delay(
162164
)
163165
await asyncio.sleep(delay_time)
164166

165-
async def process_request(self, request_item: Dict[str, Any]) -> None:
167+
async def process_request(self, request_item: QueueItem) -> None:
166168
"""Process a single request item."""
167169
req_id = str(request_item["req_id"])
168170
request_data: ChatCompletionRequest = request_item["request_data"]

browser_utils/page_controller_modules/thinking.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
)
1212
from config import (
1313
CLICK_TIMEOUT_MS,
14+
DEFAULT_THINKING_LEVEL,
1415
ENABLE_THINKING_MODE_TOGGLE_SELECTOR,
1516
SET_THINKING_BUDGET_TOGGLE_SELECTOR,
1617
THINKING_BUDGET_INPUT_SELECTOR,
@@ -69,6 +70,11 @@ def _should_enable_from_raw(rv: Any) -> bool:
6970
reasoning_effort
7071
)
7172

73+
# 特殊逻辑:对于使用等级的模型(Gemini 3 Pro),如果未指定 reasoning_effort,
74+
# 我们默认认为应该开启(或者至少应该检查并应用默认等级)
75+
if reasoning_effort is None and uses_level:
76+
desired_enabled = True
77+
7278
has_main_toggle = self._model_has_main_thinking_toggle(model_id_to_use)
7379
if has_main_toggle:
7480
self.logger.info(
@@ -118,6 +124,9 @@ def _should_enable_from_raw(rv: Any) -> bool:
118124
elif isinstance(rv, int):
119125
level_to_set = "high" if rv >= 8000 or rv == -1 else "low"
120126

127+
if level_to_set is None and rv is None:
128+
level_to_set = DEFAULT_THINKING_LEVEL
129+
121130
if level_to_set is None:
122131
self.logger.info(" 无法解析等级,保持当前等级。")
123132
else:

config/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
"DEFAULT_TEMPERATURE",
1919
"DEFAULT_MAX_OUTPUT_TOKENS",
2020
"DEFAULT_TOP_P",
21+
"ENABLE_THINKING_BUDGET",
22+
"DEFAULT_THINKING_BUDGET",
23+
"DEFAULT_THINKING_LEVEL",
24+
"ENABLE_GOOGLE_SEARCH",
2125
"DEFAULT_STOP_SEQUENCES",
2226
"AI_STUDIO_URL_PATTERN",
2327
"MODELS_ENDPOINT_URL_CONTAINS",

config/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"yes",
3333
)
3434
DEFAULT_THINKING_BUDGET = int(os.environ.get("DEFAULT_THINKING_BUDGET", "8192"))
35+
DEFAULT_THINKING_LEVEL = os.environ.get("DEFAULT_THINKING_LEVEL", "high").lower()
3536
ENABLE_GOOGLE_SEARCH = os.environ.get("ENABLE_GOOGLE_SEARCH", "false").lower() in (
3637
"true",
3738
"1",

launcher/runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def __init__(self):
5858

5959
def run(self):
6060
# 检查是否是内部启动调用
61-
is_internal_call = any(arg.startswith("--internal-") for arg in sys.argv)
61+
# 注意:不能只检查 startswith("--internal-"),因为 --internal-camoufox-proxy 是主进程参数
62+
is_internal_call = self.args.internal_launch_mode is not None
6263

6364
if is_internal_call:
6465
# 处理内部 Camoufox 启动逻辑

pyrightconfig.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@
3333
"reportInvalidStringEscapeSequence": "error",
3434
"reportInvalidTypeVarUse": "error",
3535
"reportMissingTypeArgument": "none",
36-
"reportPrivateUsage": "warning",
37-
"reportUnknownParameterType": "warning",
38-
"reportUnknownArgumentType": "warning",
36+
"reportPrivateUsage": "none",
37+
"reportUnknownParameterType": "none",
38+
"reportUnknownArgumentType": "none",
3939
"reportUnknownLambdaType": "none",
40-
"reportUnknownVariableType": "warning",
41-
"reportUnknownMemberType": "warning",
42-
"reportMissingParameterType": "warning",
43-
"reportUnnecessaryIsInstance": "warning",
44-
"reportUnnecessaryCast": "warning",
40+
"reportUnknownVariableType": "none",
41+
"reportUnknownMemberType": "none",
42+
"reportMissingParameterType": "none",
43+
"reportUnnecessaryIsInstance": "none",
44+
"reportUnnecessaryCast": "warning",
4545
"reportUnnecessaryComparison": "warning",
4646
"reportConstantRedefinition": "error",
4747
"reportDuplicateImport": "warning",

tests/api_utils/test_queue_worker.py

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616

1717
import asyncio
1818
import time
19+
from typing import cast
1920
from unittest.mock import AsyncMock, MagicMock, patch
2021

2122
import pytest
2223
from fastapi import HTTPException
2324

2425
from api_utils.queue_worker import QueueManager, queue_worker
26+
from api_utils.context_types import QueueItem
2527

2628
# ==================== Test Classes ====================
2729

@@ -137,20 +139,24 @@ async def test_check_queue_disconnects_marks_disconnected_requests(
137139
assert queue_manager.request_queue is not None
138140

139141
# Create two items: one disconnected, one connected
140-
item1 = {
142+
item1 = cast(QueueItem, {
141143
"req_id": "req1",
142144
"http_request": MagicMock(),
143145
"cancelled": False,
144146
"result_future": asyncio.Future(),
145-
}
147+
"request_data": None,
148+
"enqueue_time": 0.0,
149+
})
146150
item1["http_request"].is_disconnected = AsyncMock(return_value=True)
147151

148-
item2 = {
152+
item2 = cast(QueueItem, {
149153
"req_id": "req2",
150154
"http_request": MagicMock(),
151155
"cancelled": False,
152156
"result_future": asyncio.Future(),
153-
}
157+
"request_data": None,
158+
"enqueue_time": 0.0,
159+
})
154160
item2["http_request"].is_disconnected = AsyncMock(return_value=False)
155161

156162
# Add to queue
@@ -180,12 +186,14 @@ async def test_check_queue_disconnects_handles_exceptions(
180186
assert queue_manager.request_queue is not None
181187
queue_manager.logger = MagicMock()
182188

183-
item = {
189+
item = cast(QueueItem, {
184190
"req_id": "req1",
185191
"http_request": MagicMock(),
186192
"cancelled": False,
187193
"result_future": asyncio.Future(),
188-
}
194+
"request_data": None,
195+
"enqueue_time": 0.0,
196+
})
189197
item["http_request"].is_disconnected = AsyncMock(
190198
side_effect=Exception("Connection check failed")
191199
)
@@ -213,13 +221,14 @@ async def test_process_request_skips_cancelled_requests(
213221
# Mock task_done since we're not using queue.get()
214222
queue_manager.request_queue.task_done = MagicMock()
215223

216-
req_item = {
224+
req_item = cast(QueueItem, {
217225
"req_id": "req1",
218226
"request_data": MagicMock(),
219227
"http_request": MagicMock(),
220228
"result_future": asyncio.Future(),
221229
"cancelled": True, # Already cancelled
222-
}
230+
"enqueue_time": 0.0,
231+
})
223232

224233
await queue_manager.process_request(req_item)
225234

@@ -240,13 +249,14 @@ async def test_process_request_detects_early_disconnect(
240249
queue_manager.request_queue.task_done = MagicMock() # Mock task_done
241250
queue_manager.processing_lock = real_locks_mock_browser.processing_lock
242251

243-
req_item = {
252+
req_item = cast(QueueItem, {
244253
"req_id": "req1",
245254
"request_data": MagicMock(),
246255
"http_request": MagicMock(),
247256
"result_future": asyncio.Future(),
248257
"cancelled": False,
249-
}
258+
"enqueue_time": 0.0,
259+
})
250260

251261
# Mock client as disconnected
252262
with patch(
@@ -273,13 +283,14 @@ async def test_process_request_fails_when_lock_missing(
273283
queue_manager.request_queue.task_done = MagicMock() # Mock task_done
274284
queue_manager.processing_lock = None # Not initialized
275285

276-
req_item = {
286+
req_item = cast(QueueItem, {
277287
"req_id": "req1",
278288
"request_data": MagicMock(),
279289
"http_request": MagicMock(),
280290
"result_future": asyncio.Future(),
281291
"cancelled": False,
282-
}
292+
"enqueue_time": 0.0,
293+
})
283294

284295
with patch(
285296
"api_utils.request_processor._check_client_connection",
@@ -305,13 +316,14 @@ async def test_process_request_successful_flow(self, real_locks_mock_browser):
305316
queue_manager.processing_lock = real_locks_mock_browser.processing_lock
306317
queue_manager.logger = MagicMock()
307318

308-
req_item = {
319+
req_item = cast(QueueItem, {
309320
"req_id": "req1",
310321
"request_data": MagicMock(),
311322
"http_request": MagicMock(),
312323
"result_future": asyncio.Future(),
313324
"cancelled": False,
314-
}
325+
"enqueue_time": 0.0,
326+
})
315327
req_item["request_data"].stream = False
316328

317329
with (
@@ -348,13 +360,14 @@ async def test_tier1_recovery_page_refresh(self, real_locks_mock_browser):
348360
queue_manager.processing_lock = real_locks_mock_browser.processing_lock
349361
queue_manager.logger = MagicMock()
350362

351-
req_item = {
363+
req_item = cast(QueueItem, {
352364
"req_id": "req1",
353365
"request_data": MagicMock(),
354366
"http_request": MagicMock(),
355367
"result_future": asyncio.Future(),
356368
"cancelled": False,
357-
}
369+
"enqueue_time": 0.0,
370+
})
358371

359372
with (
360373
patch(
@@ -392,13 +405,14 @@ async def test_tier2_recovery_profile_switch(self, real_locks_mock_browser):
392405
queue_manager.processing_lock = real_locks_mock_browser.processing_lock
393406
queue_manager.logger = MagicMock()
394407

395-
req_item = {
408+
req_item = cast(QueueItem, {
396409
"req_id": "req1",
397410
"request_data": MagicMock(),
398411
"http_request": MagicMock(),
399412
"result_future": asyncio.Future(),
400413
"cancelled": False,
401-
}
414+
"enqueue_time": 0.0,
415+
})
402416

403417
with (
404418
patch(
@@ -445,13 +459,14 @@ async def test_quota_error_immediate_profile_switch(self, real_locks_mock_browse
445459
queue_manager.processing_lock = real_locks_mock_browser.processing_lock
446460
queue_manager.logger = MagicMock()
447461

448-
req_item = {
462+
req_item = cast(QueueItem, {
449463
"req_id": "req1",
450464
"request_data": MagicMock(),
451465
"http_request": MagicMock(),
452466
"result_future": asyncio.Future(),
453467
"cancelled": False,
454-
}
468+
"enqueue_time": 0.0,
469+
})
455470

456471
with (
457472
patch(
@@ -493,13 +508,14 @@ async def test_recovery_exhausted_raises_exception(self, real_locks_mock_browser
493508
queue_manager.processing_lock = real_locks_mock_browser.processing_lock
494509
queue_manager.logger = MagicMock()
495510

496-
req_item = {
511+
req_item = cast(QueueItem, {
497512
"req_id": "req1",
498513
"request_data": MagicMock(),
499514
"http_request": MagicMock(),
500515
"result_future": asyncio.Future(),
501516
"cancelled": False,
502-
}
517+
"enqueue_time": 0.0,
518+
})
503519

504520
with (
505521
patch(
@@ -969,7 +985,14 @@ async def test_get_next_request_success(self, real_locks_mock_browser):
969985
queue_manager.request_queue = real_locks_mock_browser.request_queue
970986
assert queue_manager.request_queue is not None
971987

972-
item = {"req_id": "req1", "request_data": MagicMock()}
988+
item = cast(QueueItem, {
989+
"req_id": "req1",
990+
"request_data": MagicMock(),
991+
"http_request": MagicMock(),
992+
"cancelled": False,
993+
"result_future": asyncio.Future(),
994+
"enqueue_time": 0.0,
995+
})
973996
await queue_manager.request_queue.put(item)
974997

975998
result = await queue_manager.get_next_request()

0 commit comments

Comments
 (0)