diff --git a/Dockerfile.headed b/Dockerfile.headed index 42d8674..6b0f25f 100644 --- a/Dockerfile.headed +++ b/Dockerfile.headed @@ -4,33 +4,14 @@ WORKDIR /app ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ - ALLOW_DOCKER_HEADED_CAPTCHA=true + ALLOW_DOCKER_HEADED_CAPTCHA=true \ + PLAYWRIGHT_BROWSERS_PATH=0 -# 安装 Chrome 运行依赖(无头模式仍需要这些系统库) +# 在镜像构建阶段预装 Playwright Chromium,供 personal/browser 模式复用 COPY requirements.txt ./ -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - ca-certificates \ - curl \ - libnss3 \ - libnspr4 \ - libatk1.0-0 \ - libatk-bridge2.0-0 \ - libcups2 \ - libdrm2 \ - libxkbcommon0 \ - libxcomposite1 \ - libxdamage1 \ - libxfixes3 \ - libxrandr2 \ - libgbm1 \ - libpango-1.0-0 \ - libcairo2 \ - libasound2 \ - && rm -rf /var/lib/apt/lists/* - -RUN pip install --no-cache-dir --root-user-action=ignore -r requirements.txt +RUN pip install --no-cache-dir --root-user-action=ignore -r requirements.txt \ + && python -m playwright install --with-deps chromium COPY . . COPY docker/entrypoint.headed.sh /usr/local/bin/entrypoint.headed.sh diff --git a/README.md b/README.md index f1cf91c..acfd382 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,8 @@ python main.py | `veo_3_1_t2v_fast_ultra_relaxed` | 文生视频 | 横屏 | | `veo_3_1_t2v_portrait` | 文生视频 | 竖屏 | | `veo_3_1_t2v_landscape` | 文生视频 | 横屏 | +| `veo_3_1_t2v_lite_portrait` | 文生视频 Lite | 竖屏 | +| `veo_3_1_t2v_lite_landscape` | 文生视频 Lite | 横屏 | #### 首尾帧模型 (I2V - Image to Video) 📸 **支持1-2张图片:1张作为首帧,2张作为首尾帧** @@ -190,6 +192,8 @@ python main.py > 💡 **自动适配**:系统会根据图片数量自动选择对应的 model_key > - **单帧模式**(1张图):使用首帧生成视频 > - **双帧模式**(2张图):使用首帧+尾帧生成过渡视频 +> - `veo_3_1_i2v_lite_*` 仅支持 **1 张** 首帧图片 +> - `veo_3_1_interpolation_lite_*` 仅支持 **2 张** 首尾帧图片 | 模型名称 | 说明| 尺寸 | |---------|---------|--------| @@ -205,6 +209,10 @@ python main.py | `veo_3_1_i2v_s_fast_ultra_relaxed` | 图生视频 | 横屏 | | `veo_3_1_i2v_s_portrait` | 图生视频 | 竖屏 | | `veo_3_1_i2v_s_landscape` | 图生视频 | 横屏 | +| `veo_3_1_i2v_lite_portrait` | 图生视频 Lite(仅首帧) | 竖屏 | +| `veo_3_1_i2v_lite_landscape` | 图生视频 Lite(仅首帧) | 横屏 | +| `veo_3_1_interpolation_lite_portrait` | 图生视频 Lite(首尾帧过渡) | 竖屏 | +| `veo_3_1_interpolation_lite_landscape` | 图生视频 Lite(首尾帧过渡) | 横屏 | #### 多图生成 (R2V - Reference Images to Video) 🖼️ **支持多张图片** diff --git a/docker/entrypoint.headed.sh b/docker/entrypoint.headed.sh index c238bdd..aa6518a 100644 --- a/docker/entrypoint.headed.sh +++ b/docker/entrypoint.headed.sh @@ -1,5 +1,28 @@ #!/bin/sh set -eu +resolve_browser_path() { +python - <<'PY' +from playwright.sync_api import sync_playwright + +with sync_playwright() as p: + print(p.chromium.executable_path or "") +PY +} + +if [ -z "${BROWSER_EXECUTABLE_PATH:-}" ] || [ ! -x "${BROWSER_EXECUTABLE_PATH:-}" ]; then + detected_browser_path="$(resolve_browser_path 2>/dev/null | tr -d '\r' | tail -n 1)" + if [ -n "${detected_browser_path}" ] && [ -x "${detected_browser_path}" ]; then + export BROWSER_EXECUTABLE_PATH="${detected_browser_path}" + fi +fi + echo "[entrypoint] starting flow2api (headless browser mode)" +if [ -n "${BROWSER_EXECUTABLE_PATH:-}" ] && [ -x "${BROWSER_EXECUTABLE_PATH}" ]; then + echo "[entrypoint] browser executable: ${BROWSER_EXECUTABLE_PATH}" + "${BROWSER_EXECUTABLE_PATH}" --version || true +else + echo "[entrypoint] warning: no valid browser executable found for personal/browser captcha" >&2 +fi + exec python main.py diff --git a/src/services/browser_captcha_personal.py b/src/services/browser_captcha_personal.py index 94094bb..80e6751 100644 --- a/src/services/browser_captcha_personal.py +++ b/src/services/browser_captcha_personal.py @@ -19,6 +19,9 @@ from ..core.logger import debug_logger from ..core.config import config +# 复用 browser 模式的浏览器缓存目录约定,避免容器内每次换位置。 +os.environ.setdefault("PLAYWRIGHT_BROWSERS_PATH", "0") + # ==================== Docker 环境检测 ==================== def _is_running_in_docker() -> bool: @@ -118,6 +121,111 @@ def _ensure_nodriver_installed() -> bool: return False +def _run_playwright_install(use_mirror: bool = False) -> bool: + """安装 playwright chromium 浏览器,复用 browser 模式的安装方式。""" + cmd = [sys.executable, '-m', 'playwright', 'install', 'chromium'] + env = os.environ.copy() + + if use_mirror: + env['PLAYWRIGHT_DOWNLOAD_HOST'] = 'https://npmmirror.com/mirrors/playwright' + + try: + debug_logger.log_info("[BrowserCaptcha] 正在安装 chromium 浏览器...") + print("[BrowserCaptcha] 正在安装 chromium 浏览器...") + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env) + if result.returncode == 0: + debug_logger.log_info("[BrowserCaptcha] ✅ chromium 浏览器安装成功") + print("[BrowserCaptcha] ✅ chromium 浏览器安装成功") + return True + + debug_logger.log_warning(f"[BrowserCaptcha] chromium 安装失败: {result.stderr[:200]}") + return False + except Exception as e: + debug_logger.log_warning(f"[BrowserCaptcha] chromium 安装异常: {e}") + return False + + +def _ensure_playwright_installed() -> bool: + """确保 playwright 可用,便于复用其 chromium 二进制。""" + try: + import playwright # noqa: F401 + debug_logger.log_info("[BrowserCaptcha] playwright 已安装") + return True + except ImportError: + pass + + debug_logger.log_info("[BrowserCaptcha] playwright 未安装,开始自动安装...") + print("[BrowserCaptcha] playwright 未安装,开始自动安装...") + + if _run_pip_install('playwright', use_mirror=False): + return True + + debug_logger.log_info("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...") + print("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...") + if _run_pip_install('playwright', use_mirror=True): + return True + + debug_logger.log_error("[BrowserCaptcha] ❌ playwright 自动安装失败,请手动安装: pip install playwright") + print("[BrowserCaptcha] ❌ playwright 自动安装失败,请手动安装: pip install playwright") + return False + + +def _detect_playwright_browser_path() -> Optional[str]: + """读取 playwright 管理的 chromium 可执行文件路径。""" + detect_script = ( + "from playwright.sync_api import sync_playwright\n" + "with sync_playwright() as p:\n" + " print(p.chromium.executable_path or '')\n" + ) + env = os.environ.copy() + env.setdefault("PLAYWRIGHT_BROWSERS_PATH", os.environ.get("PLAYWRIGHT_BROWSERS_PATH", "0") or "0") + + try: + result = subprocess.run( + [sys.executable, "-c", detect_script], + capture_output=True, + text=True, + timeout=60, + env=env, + ) + browser_path_lines = (result.stdout or "").strip().splitlines() + browser_path = browser_path_lines[-1].strip() if browser_path_lines else "" + if result.returncode == 0 and browser_path and os.path.exists(browser_path): + debug_logger.log_info(f"[BrowserCaptcha] 检测到 playwright chromium: {browser_path}") + return browser_path + + stderr_text = (result.stderr or "").strip() + if stderr_text: + debug_logger.log_warning(f"[BrowserCaptcha] 检测 playwright chromium 失败: {stderr_text[:200]}") + except Exception as e: + debug_logger.log_info(f"[BrowserCaptcha] 检测 playwright chromium 时出错: {e}") + + return None + + +def _ensure_playwright_browser_path() -> Optional[str]: + """确保存在可复用的 chromium 二进制,并返回路径。""" + browser_path = _detect_playwright_browser_path() + if browser_path: + return browser_path + + if not _ensure_playwright_installed(): + return None + + debug_logger.log_info("[BrowserCaptcha] playwright chromium 未安装,开始自动安装...") + print("[BrowserCaptcha] playwright chromium 未安装,开始自动安装...") + + if not _run_playwright_install(use_mirror=False): + debug_logger.log_info("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...") + print("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...") + if not _run_playwright_install(use_mirror=True): + debug_logger.log_error("[BrowserCaptcha] ❌ chromium 浏览器自动安装失败,请手动安装: python -m playwright install chromium") + print("[BrowserCaptcha] ❌ chromium 浏览器自动安装失败,请手动安装: python -m playwright install chromium") + return None + + return _detect_playwright_browser_path() + + # 尝试导入 nodriver uc = None NODRIVER_AVAILABLE = False @@ -126,14 +234,15 @@ def _ensure_nodriver_installed() -> bool: if DOCKER_HEADED_BLOCKED: debug_logger.log_warning( "[BrowserCaptcha] 检测到 Docker 环境,默认禁用内置浏览器打码。" - "如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true,并提供 DISPLAY/Xvfb。" + "如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true。" + "personal 模式默认支持无头,不强制依赖 DISPLAY/Xvfb。" ) print("[BrowserCaptcha] ⚠️ 检测到 Docker 环境,默认禁用内置浏览器打码") - print("[BrowserCaptcha] 如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true,并提供 DISPLAY/Xvfb") + print("[BrowserCaptcha] 如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true") else: if IS_DOCKER and ALLOW_DOCKER_HEADED: debug_logger.log_warning( - "[BrowserCaptcha] Docker 内置浏览器打码白名单已启用,请确保 DISPLAY/Xvfb 可用" + "[BrowserCaptcha] Docker 内置浏览器打码白名单已启用,personal 模式将按 headless 配置决定是否需要 DISPLAY/Xvfb" ) print("[BrowserCaptcha] ✅ Docker 内置浏览器打码白名单已启用") if _ensure_nodriver_installed(): @@ -549,14 +658,18 @@ def _refresh_runtime_tunables(self): except Exception: self._fingerprint_cache_ttl_seconds = 3600.0 + def _requires_virtual_display(self) -> bool: + """仅在显式有头模式下要求 Docker/Linux 提供 DISPLAY/Xvfb。""" + return bool(IS_DOCKER and os.name == "posix" and not self.headless) + def _check_available(self): """检查服务是否可用""" if DOCKER_HEADED_BLOCKED: raise RuntimeError( "检测到 Docker 环境,默认禁用内置浏览器打码。" - "如需启用请设置环境变量 ALLOW_DOCKER_HEADED_CAPTCHA=true,并提供 DISPLAY/Xvfb。" + "如需启用请设置环境变量 ALLOW_DOCKER_HEADED_CAPTCHA=true。" ) - if IS_DOCKER and not os.environ.get("DISPLAY"): + if self._requires_virtual_display() and not os.environ.get("DISPLAY"): raise RuntimeError( "Docker 内置浏览器打码已启用,但 DISPLAY 未设置。" "请设置 DISPLAY(例如 :99)并启动 Xvfb。" @@ -1294,6 +1407,13 @@ async def initialize(self): f"[BrowserCaptcha] 指定浏览器不存在,改为自动发现: {browser_executable_path}" ) browser_executable_path = None + if not browser_executable_path: + playwright_browser_path = _ensure_playwright_browser_path() + if playwright_browser_path: + browser_executable_path = playwright_browser_path + debug_logger.log_info( + f"[BrowserCaptcha] 复用 playwright chromium 作为 nodriver 浏览器: {browser_executable_path}" + ) if browser_executable_path: debug_logger.log_info( f"[BrowserCaptcha] 使用指定浏览器可执行文件: {browser_executable_path}" @@ -1361,7 +1481,8 @@ async def initialize(self): browser_args.append('--disable-extensions') effective_launch_args = list(browser_args) - await self._wait_for_display_ready(display_value) + if self._requires_virtual_display(): + await self._wait_for_display_ready(display_value) effective_uid = "n/a" if hasattr(os, "geteuid"): diff --git a/src/services/flow_client.py b/src/services/flow_client.py index e8f981b..3ce228a 100644 --- a/src/services/flow_client.py +++ b/src/services/flow_client.py @@ -809,11 +809,12 @@ async def upload_image( ext = "png" if "png" in mime_type else "jpg" upload_file_name = f"flow2api_upload_{int(time.time() * 1000)}.{ext}" new_url = f"{self.api_base_url}/flow/uploadImage" + normalized_project_id = str(project_id or "").strip() new_client_context = { "tool": "PINHOLE" } - if project_id: - new_client_context["projectId"] = project_id + if normalized_project_id: + new_client_context["projectId"] = normalized_project_id new_json_data = { "clientContext": new_client_context, @@ -860,6 +861,23 @@ async def upload_image( raise Exception(f"Invalid upload response: missing media id, keys={list(new_result.keys())}") except Exception as new_upload_error: last_error = new_upload_error + retry_reason = "网络超时" if self._is_timeout_error(new_upload_error) else self._get_retry_reason(str(new_upload_error)) + + # 旧接口不携带 projectId,带项目上下文的上传一旦回退就可能把图片挂到错误项目。 + if normalized_project_id: + if retry_reason and retry_attempt < max_retries - 1: + debug_logger.log_warning( + f"[UPLOAD] Project-scoped upload 遇到{retry_reason},准备重试新版接口 " + f"({retry_attempt + 2}/{max_retries}, project_id={normalized_project_id})..." + ) + await asyncio.sleep(1) + continue + raise RuntimeError( + "Project-scoped image upload failed via /flow/uploadImage; " + "legacy :uploadUserImage fallback is disabled because it may attach media " + f"to a different project (project_id={normalized_project_id})." + ) from new_upload_error + debug_logger.log_warning( f"[UPLOAD] New upload API failed, fallback to legacy endpoint: {new_upload_error}" ) diff --git a/tests/test_flow_client_upload.py b/tests/test_flow_client_upload.py new file mode 100644 index 0000000..a9d6f5f --- /dev/null +++ b/tests/test_flow_client_upload.py @@ -0,0 +1,103 @@ +import unittest +from unittest.mock import AsyncMock + +from src.services.flow_client import FlowClient + + +JPEG_BYTES = b"\xff\xd8\xff" + b"0" * 16 + + +class FlowClientUploadImageTests(unittest.IsolatedAsyncioTestCase): + async def test_project_scoped_upload_uses_new_endpoint_with_project_id(self): + client = FlowClient(proxy_manager=None) + + request_calls = [] + + async def fake_make_request(**kwargs): + request_calls.append(kwargs) + return { + "media": { + "name": "new-media-id", + } + } + + client._make_request = AsyncMock(side_effect=fake_make_request) + + media_id = await client.upload_image( + at="test-at", + image_bytes=JPEG_BYTES, + aspect_ratio="IMAGE_ASPECT_RATIO_LANDSCAPE", + project_id="project-123", + ) + + self.assertEqual(media_id, "new-media-id") + self.assertEqual(len(request_calls), 1) + self.assertTrue(request_calls[0]["url"].endswith("/flow/uploadImage")) + self.assertEqual( + request_calls[0]["json_data"]["clientContext"]["projectId"], + "project-123", + ) + + async def test_project_scoped_upload_does_not_fallback_to_legacy_endpoint(self): + client = FlowClient(proxy_manager=None) + + request_calls = [] + + async def fake_make_request(**kwargs): + request_calls.append(kwargs) + if kwargs["url"].endswith("/flow/uploadImage"): + raise RuntimeError("HTTP 500: upstream failed") + self.fail("带 project_id 的上传不应回退到 legacy 接口") + + client._make_request = AsyncMock(side_effect=fake_make_request) + + with self.assertRaisesRegex(RuntimeError, "legacy :uploadUserImage fallback is disabled"): + await client.upload_image( + at="test-at", + image_bytes=JPEG_BYTES, + aspect_ratio="IMAGE_ASPECT_RATIO_LANDSCAPE", + project_id="project-123", + ) + + self.assertEqual(len(request_calls), 1) + self.assertEqual( + request_calls[0]["json_data"]["clientContext"]["projectId"], + "project-123", + ) + + async def test_upload_without_project_id_keeps_legacy_fallback(self): + client = FlowClient(proxy_manager=None) + + request_calls = [] + + async def fake_make_request(**kwargs): + request_calls.append(kwargs) + if kwargs["url"].endswith("/flow/uploadImage"): + raise RuntimeError("HTTP 500: upstream failed") + if kwargs["url"].endswith(":uploadUserImage"): + return { + "mediaGenerationId": { + "mediaGenerationId": "legacy-media-id", + } + } + self.fail(f"Unexpected url: {kwargs['url']}") + + client._make_request = AsyncMock(side_effect=fake_make_request) + + media_id = await client.upload_image( + at="test-at", + image_bytes=JPEG_BYTES, + aspect_ratio="IMAGE_ASPECT_RATIO_LANDSCAPE", + project_id=None, + ) + + self.assertEqual(media_id, "legacy-media-id") + self.assertEqual(len(request_calls), 2) + self.assertNotIn( + "projectId", + request_calls[1]["json_data"]["clientContext"], + ) + + +if __name__ == "__main__": + unittest.main()