Merge pull request #273 from CJackHwang/dev

NikkeTryHard · web-flow · commit ad735ee4e196 · 2025-12-02T23:08:26.000-06:00
Release: Add Thinking Level, Fix Docker &amp; Type Safety
diff --git a/.env.example b/.env.example
@@ -102,6 +102,11 @@ ENABLE_THINKING_BUDGET=false
 # 当 API 请求中未提供 reasoning_effort 参数时，将使用此值。
 DEFAULT_THINKING_BUDGET=8192
 
+# "指定思考等级" 的默认值 (high/low)
+# 仅适用于 gemini-3-pro-preview 等使用思考等级的模型。
+# 当 API 请求中未提供 reasoning_effort 参数时，将使用此值。
+DEFAULT_THINKING_LEVEL=high
+
 # 是否默认启用 "Google Search" 功能 (true/false)
 # 当 API 请求中未提供 tools 参数时，将使用此设置作为 Google Search 的默认开关状态。
 ENABLE_GOOGLE_SEARCH=false
diff --git a/api_utils/context_types.py b/api_utils/context_types.py
@@ -1,12 +1,12 @@
 import logging
 from asyncio import Future, Lock
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
 
 from playwright.async_api import Page as AsyncPage
 
 if TYPE_CHECKING:
     from fastapi import Request
-    from fastapi.responses import JSONResponse
+    from fastapi.responses import JSONResponse, StreamingResponse
 
     from models.chat import ChatCompletionRequest
 
@@ -21,7 +21,7 @@ class QueueItem(TypedDict):
     req_id: str
     request_data: "ChatCompletionRequest"
     http_request: "Request"
-    result_future: "Future[JSONResponse]"
+    result_future: "Future[Union[JSONResponse, StreamingResponse]]"
     enqueue_time: float
     cancelled: bool
 
@@ -34,6 +34,7 @@ class RequestContext(TypedDict):
     """
 
     # Core components (always set by context_init.py)
+    req_id: str
     logger: logging.Logger
     page: Optional[AsyncPage]  # Value can be None if browser not ready
     is_page_ready: bool
diff --git a/api_utils/queue_worker.py b/api_utils/queue_worker.py
@@ -16,6 +16,8 @@
 from logging_utils import set_request_id, set_source
 from models import ChatCompletionRequest
 
+from api_utils.context_types import QueueItem
+
 from .error_utils import (
     client_cancelled,
     client_disconnected,
@@ -31,7 +33,7 @@ def __init__(self):
         self.last_request_completion_time = 0.0
 
         # These will be initialized from server.py or created if missing
-        self.request_queue: Optional[Queue[Dict[str, Any]]] = None
+        self.request_queue: Optional[Queue[QueueItem]] = None
         self.processing_lock: Optional[Lock] = None
         self.model_switching_lock: Optional[Lock] = None
         self.params_cache_lock: Optional[Lock] = None
@@ -82,15 +84,15 @@ async def check_queue_disconnects(self) -> None:
             return
 
         checked_count = 0
-        items_to_requeue: List[Dict[str, Any]] = []
+        items_to_requeue: List[QueueItem] = []
         processed_ids: Set[str] = set()
 
         # Limit check to 10 items or queue size
         limit = min(queue_size, 10)
 
         while checked_count < limit:
             try:
-                item: Dict[str, Any] = self.request_queue.get_nowait()
+                item: QueueItem = self.request_queue.get_nowait()
                 item_req_id = str(item.get("req_id", "unknown"))
 
                 if item_req_id in processed_ids:
@@ -133,7 +135,7 @@ async def check_queue_disconnects(self) -> None:
         for item in items_to_requeue:
             await self.request_queue.put(item)
 
-    async def get_next_request(self) -> Optional[Dict[str, Any]]:
+    async def get_next_request(self) -> Optional[QueueItem]:
         """Get the next request from the queue with timeout."""
         if not self.request_queue:
             await asyncio.sleep(1)
@@ -162,7 +164,7 @@ async def handle_streaming_delay(
             )
             await asyncio.sleep(delay_time)
 
-    async def process_request(self, request_item: Dict[str, Any]) -> None:
+    async def process_request(self, request_item: QueueItem) -> None:
         """Process a single request item."""
         req_id = str(request_item["req_id"])
         request_data: ChatCompletionRequest = request_item["request_data"]
diff --git a/browser_utils/page_controller_modules/thinking.py b/browser_utils/page_controller_modules/thinking.py
@@ -11,6 +11,7 @@
 )
 from config import (
     CLICK_TIMEOUT_MS,
+    DEFAULT_THINKING_LEVEL,
     ENABLE_THINKING_MODE_TOGGLE_SELECTOR,
     SET_THINKING_BUDGET_TOGGLE_SELECTOR,
     THINKING_BUDGET_INPUT_SELECTOR,
@@ -69,6 +70,11 @@ def _should_enable_from_raw(rv: Any) -> bool:
             reasoning_effort
         )
 
+        # 特殊逻辑：对于使用等级的模型（Gemini 3 Pro），如果未指定 reasoning_effort，
+        # 我们默认认为应该开启（或者至少应该检查并应用默认等级）
+        if reasoning_effort is None and uses_level:
+            desired_enabled = True
+
         has_main_toggle = self._model_has_main_thinking_toggle(model_id_to_use)
         if has_main_toggle:
             self.logger.info(
@@ -118,6 +124,9 @@ def _should_enable_from_raw(rv: Any) -> bool:
             elif isinstance(rv, int):
                 level_to_set = "high" if rv >= 8000 or rv == -1 else "low"
 
+            if level_to_set is None and rv is None:
+                level_to_set = DEFAULT_THINKING_LEVEL
+
             if level_to_set is None:
                 self.logger.info(" 无法解析等级，保持当前等级。")
             else:
diff --git a/config/__init__.py b/config/__init__.py
@@ -18,6 +18,10 @@
     "DEFAULT_TEMPERATURE",
     "DEFAULT_MAX_OUTPUT_TOKENS",
     "DEFAULT_TOP_P",
+    "ENABLE_THINKING_BUDGET",
+    "DEFAULT_THINKING_BUDGET",
+    "DEFAULT_THINKING_LEVEL",
+    "ENABLE_GOOGLE_SEARCH",
     "DEFAULT_STOP_SEQUENCES",
     "AI_STUDIO_URL_PATTERN",
     "MODELS_ENDPOINT_URL_CONTAINS",
diff --git a/config/constants.py b/config/constants.py
@@ -32,6 +32,7 @@
     "yes",
 )
 DEFAULT_THINKING_BUDGET = int(os.environ.get("DEFAULT_THINKING_BUDGET", "8192"))
+DEFAULT_THINKING_LEVEL = os.environ.get("DEFAULT_THINKING_LEVEL", "high").lower()
 ENABLE_GOOGLE_SEARCH = os.environ.get("ENABLE_GOOGLE_SEARCH", "false").lower() in (
     "true",
     "1",
diff --git a/launcher/runner.py b/launcher/runner.py
@@ -58,7 +58,8 @@ def __init__(self):
 
     def run(self):
         # 检查是否是内部启动调用
-        is_internal_call = any(arg.startswith("--internal-") for arg in sys.argv)
+        # 注意：不能只检查 startswith("--internal-")，因为 --internal-camoufox-proxy 是主进程参数
+        is_internal_call = self.args.internal_launch_mode is not None
 
         if is_internal_call:
             # 处理内部 Camoufox 启动逻辑
diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -33,15 +33,15 @@
   "reportInvalidStringEscapeSequence": "error",
   "reportInvalidTypeVarUse": "error",
   "reportMissingTypeArgument": "none",
-  "reportPrivateUsage": "warning",
-  "reportUnknownParameterType": "warning",
-  "reportUnknownArgumentType": "warning",
+    "reportPrivateUsage": "none",
+    "reportUnknownParameterType": "none",
+  "reportUnknownArgumentType": "none",
   "reportUnknownLambdaType": "none",
-  "reportUnknownVariableType": "warning",
-  "reportUnknownMemberType": "warning",
-  "reportMissingParameterType": "warning",
-  "reportUnnecessaryIsInstance": "warning",
-  "reportUnnecessaryCast": "warning",
+  "reportUnknownVariableType": "none",
+  "reportUnknownMemberType": "none",
+  "reportMissingParameterType": "none",
+    "reportUnnecessaryIsInstance": "none",
+    "reportUnnecessaryCast": "warning",
   "reportUnnecessaryComparison": "warning",
   "reportConstantRedefinition": "error",
   "reportDuplicateImport": "warning",
diff --git a/tests/api_utils/test_queue_worker.py b/tests/api_utils/test_queue_worker.py
@@ -16,12 +16,14 @@
 
 import asyncio
 import time
+from typing import cast
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 from fastapi import HTTPException
 
 from api_utils.queue_worker import QueueManager, queue_worker
+from api_utils.context_types import QueueItem
 
 # ==================== Test Classes ====================
 
@@ -137,20 +139,24 @@ async def test_check_queue_disconnects_marks_disconnected_requests(
         assert queue_manager.request_queue is not None
 
         # Create two items: one disconnected, one connected
-        item1 = {
+        item1 = cast(QueueItem, {
             "req_id": "req1",
             "http_request": MagicMock(),
             "cancelled": False,
             "result_future": asyncio.Future(),
-        }
+            "request_data": None,
+            "enqueue_time": 0.0,
+        })
         item1["http_request"].is_disconnected = AsyncMock(return_value=True)
 
-        item2 = {
+        item2 = cast(QueueItem, {
             "req_id": "req2",
             "http_request": MagicMock(),
             "cancelled": False,
             "result_future": asyncio.Future(),
-        }
+            "request_data": None,
+            "enqueue_time": 0.0,
+        })
         item2["http_request"].is_disconnected = AsyncMock(return_value=False)
 
         # Add to queue
@@ -180,12 +186,14 @@ async def test_check_queue_disconnects_handles_exceptions(
         assert queue_manager.request_queue is not None
         queue_manager.logger = MagicMock()
 
-        item = {
+        item = cast(QueueItem, {
             "req_id": "req1",
             "http_request": MagicMock(),
             "cancelled": False,
             "result_future": asyncio.Future(),
-        }
+            "request_data": None,
+            "enqueue_time": 0.0,
+        })
         item["http_request"].is_disconnected = AsyncMock(
             side_effect=Exception("Connection check failed")
         )
@@ -213,13 +221,14 @@ async def test_process_request_skips_cancelled_requests(
         # Mock task_done since we're not using queue.get()
         queue_manager.request_queue.task_done = MagicMock()
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": True,  # Already cancelled
-        }
+            "enqueue_time": 0.0,
+        })
 
         await queue_manager.process_request(req_item)
 
@@ -240,13 +249,14 @@ async def test_process_request_detects_early_disconnect(
         queue_manager.request_queue.task_done = MagicMock()  # Mock task_done
         queue_manager.processing_lock = real_locks_mock_browser.processing_lock
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": False,
-        }
+            "enqueue_time": 0.0,
+        })
 
         # Mock client as disconnected
         with patch(
@@ -273,13 +283,14 @@ async def test_process_request_fails_when_lock_missing(
         queue_manager.request_queue.task_done = MagicMock()  # Mock task_done
         queue_manager.processing_lock = None  # Not initialized
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": False,
-        }
+            "enqueue_time": 0.0,
+        })
 
         with patch(
             "api_utils.request_processor._check_client_connection",
@@ -305,13 +316,14 @@ async def test_process_request_successful_flow(self, real_locks_mock_browser):
         queue_manager.processing_lock = real_locks_mock_browser.processing_lock
         queue_manager.logger = MagicMock()
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": False,
-        }
+            "enqueue_time": 0.0,
+        })
         req_item["request_data"].stream = False
 
         with (
@@ -348,13 +360,14 @@ async def test_tier1_recovery_page_refresh(self, real_locks_mock_browser):
         queue_manager.processing_lock = real_locks_mock_browser.processing_lock
         queue_manager.logger = MagicMock()
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": False,
-        }
+            "enqueue_time": 0.0,
+        })
 
         with (
             patch(
@@ -392,13 +405,14 @@ async def test_tier2_recovery_profile_switch(self, real_locks_mock_browser):
         queue_manager.processing_lock = real_locks_mock_browser.processing_lock
         queue_manager.logger = MagicMock()
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": False,
-        }
+            "enqueue_time": 0.0,
+        })
 
         with (
             patch(
@@ -445,13 +459,14 @@ async def test_quota_error_immediate_profile_switch(self, real_locks_mock_browse
         queue_manager.processing_lock = real_locks_mock_browser.processing_lock
         queue_manager.logger = MagicMock()
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": False,
-        }
+            "enqueue_time": 0.0,
+        })
 
         with (
             patch(
@@ -493,13 +508,14 @@ async def test_recovery_exhausted_raises_exception(self, real_locks_mock_browser
         queue_manager.processing_lock = real_locks_mock_browser.processing_lock
         queue_manager.logger = MagicMock()
 
-        req_item = {
+        req_item = cast(QueueItem, {
             "req_id": "req1",
             "request_data": MagicMock(),
             "http_request": MagicMock(),
             "result_future": asyncio.Future(),
             "cancelled": False,
-        }
+            "enqueue_time": 0.0,
+        })
 
         with (
             patch(
@@ -969,7 +985,14 @@ async def test_get_next_request_success(self, real_locks_mock_browser):
         queue_manager.request_queue = real_locks_mock_browser.request_queue
         assert queue_manager.request_queue is not None
 
-        item = {"req_id": "req1", "request_data": MagicMock()}
+        item = cast(QueueItem, {
+            "req_id": "req1",
+            "request_data": MagicMock(),
+            "http_request": MagicMock(),
+            "cancelled": False,
+            "result_future": asyncio.Future(),
+            "enqueue_time": 0.0,
+        })
         await queue_manager.request_queue.put(item)
 
         result = await queue_manager.get_next_request()
diff --git a/tests/api_utils/test_queue_worker_recovery.py b/tests/api_utils/test_queue_worker_recovery.py
diff --git a/tests/integration/test_client_disconnect_advanced.py b/tests/integration/test_client_disconnect_advanced.py

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@`
`32`	`32`	`"yes",`
`33`	`33`	`)`
`34`	`34`	`DEFAULT_THINKING_BUDGET = int(os.environ.get("DEFAULT_THINKING_BUDGET", "8192"))`
	`35`	`+DEFAULT_THINKING_LEVEL = os.environ.get("DEFAULT_THINKING_LEVEL", "high").lower()`
`35`	`36`	`ENABLE_GOOGLE_SEARCH = os.environ.get("ENABLE_GOOGLE_SEARCH", "false").lower() in (`
`36`	`37`	`"true",`
`37`	`38`	`"1",`