Skip to content
Merged

Beta #116

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 5 additions & 24 deletions Dockerfile.headed
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,14 @@ WORKDIR /app

ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
ALLOW_DOCKER_HEADED_CAPTCHA=true
ALLOW_DOCKER_HEADED_CAPTCHA=true \
PLAYWRIGHT_BROWSERS_PATH=0

# 安装 Chrome 运行依赖(无头模式仍需要这些系统库)
# 在镜像构建阶段预装 Playwright Chromium,供 personal/browser 模式复用
COPY requirements.txt ./

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
ca-certificates \
curl \
libnss3 \
libnspr4 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libcups2 \
libdrm2 \
libxkbcommon0 \
libxcomposite1 \
libxdamage1 \
libxfixes3 \
libxrandr2 \
libgbm1 \
libpango-1.0-0 \
libcairo2 \
libasound2 \
&& rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir --root-user-action=ignore -r requirements.txt
RUN pip install --no-cache-dir --root-user-action=ignore -r requirements.txt \
&& python -m playwright install --with-deps chromium

COPY . .
COPY docker/entrypoint.headed.sh /usr/local/bin/entrypoint.headed.sh
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,17 @@ python main.py
| `veo_3_1_t2v_fast_ultra_relaxed` | 文生视频 | 横屏 |
| `veo_3_1_t2v_portrait` | 文生视频 | 竖屏 |
| `veo_3_1_t2v_landscape` | 文生视频 | 横屏 |
| `veo_3_1_t2v_lite_portrait` | 文生视频 Lite | 竖屏 |
| `veo_3_1_t2v_lite_landscape` | 文生视频 Lite | 横屏 |

#### 首尾帧模型 (I2V - Image to Video)
📸 **支持1-2张图片:1张作为首帧,2张作为首尾帧**

> 💡 **自动适配**:系统会根据图片数量自动选择对应的 model_key
> - **单帧模式**(1张图):使用首帧生成视频
> - **双帧模式**(2张图):使用首帧+尾帧生成过渡视频
> - `veo_3_1_i2v_lite_*` 仅支持 **1 张** 首帧图片
> - `veo_3_1_interpolation_lite_*` 仅支持 **2 张** 首尾帧图片

| 模型名称 | 说明| 尺寸 |
|---------|---------|--------|
Expand All @@ -205,6 +209,10 @@ python main.py
| `veo_3_1_i2v_s_fast_ultra_relaxed` | 图生视频 | 横屏 |
| `veo_3_1_i2v_s_portrait` | 图生视频 | 竖屏 |
| `veo_3_1_i2v_s_landscape` | 图生视频 | 横屏 |
| `veo_3_1_i2v_lite_portrait` | 图生视频 Lite(仅首帧) | 竖屏 |
| `veo_3_1_i2v_lite_landscape` | 图生视频 Lite(仅首帧) | 横屏 |
| `veo_3_1_interpolation_lite_portrait` | 图生视频 Lite(首尾帧过渡) | 竖屏 |
| `veo_3_1_interpolation_lite_landscape` | 图生视频 Lite(首尾帧过渡) | 横屏 |

#### 多图生成 (R2V - Reference Images to Video)
🖼️ **支持多张图片**
Expand Down
23 changes: 23 additions & 0 deletions docker/entrypoint.headed.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
#!/bin/sh
set -eu

resolve_browser_path() {
python - <<'PY'
from playwright.sync_api import sync_playwright

with sync_playwright() as p:
print(p.chromium.executable_path or "")
PY
}

if [ -z "${BROWSER_EXECUTABLE_PATH:-}" ] || [ ! -x "${BROWSER_EXECUTABLE_PATH:-}" ]; then
detected_browser_path="$(resolve_browser_path 2>/dev/null | tr -d '\r' | tail -n 1)"
if [ -n "${detected_browser_path}" ] && [ -x "${detected_browser_path}" ]; then
export BROWSER_EXECUTABLE_PATH="${detected_browser_path}"
fi
fi

echo "[entrypoint] starting flow2api (headless browser mode)"
if [ -n "${BROWSER_EXECUTABLE_PATH:-}" ] && [ -x "${BROWSER_EXECUTABLE_PATH}" ]; then
echo "[entrypoint] browser executable: ${BROWSER_EXECUTABLE_PATH}"
"${BROWSER_EXECUTABLE_PATH}" --version || true
else
echo "[entrypoint] warning: no valid browser executable found for personal/browser captcha" >&2
fi

exec python main.py
133 changes: 127 additions & 6 deletions src/services/browser_captcha_personal.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from ..core.logger import debug_logger
from ..core.config import config

# 复用 browser 模式的浏览器缓存目录约定,避免容器内每次换位置。
os.environ.setdefault("PLAYWRIGHT_BROWSERS_PATH", "0")


# ==================== Docker 环境检测 ====================
def _is_running_in_docker() -> bool:
Expand Down Expand Up @@ -118,6 +121,111 @@ def _ensure_nodriver_installed() -> bool:
return False


def _run_playwright_install(use_mirror: bool = False) -> bool:
"""安装 playwright chromium 浏览器,复用 browser 模式的安装方式。"""
cmd = [sys.executable, '-m', 'playwright', 'install', 'chromium']
env = os.environ.copy()

if use_mirror:
env['PLAYWRIGHT_DOWNLOAD_HOST'] = 'https://npmmirror.com/mirrors/playwright'

try:
debug_logger.log_info("[BrowserCaptcha] 正在安装 chromium 浏览器...")
print("[BrowserCaptcha] 正在安装 chromium 浏览器...")
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
if result.returncode == 0:
debug_logger.log_info("[BrowserCaptcha] ✅ chromium 浏览器安装成功")
print("[BrowserCaptcha] ✅ chromium 浏览器安装成功")
return True

debug_logger.log_warning(f"[BrowserCaptcha] chromium 安装失败: {result.stderr[:200]}")
return False
except Exception as e:
debug_logger.log_warning(f"[BrowserCaptcha] chromium 安装异常: {e}")
return False


def _ensure_playwright_installed() -> bool:
"""确保 playwright 可用,便于复用其 chromium 二进制。"""
try:
import playwright # noqa: F401
debug_logger.log_info("[BrowserCaptcha] playwright 已安装")
return True
except ImportError:
pass

debug_logger.log_info("[BrowserCaptcha] playwright 未安装,开始自动安装...")
print("[BrowserCaptcha] playwright 未安装,开始自动安装...")

if _run_pip_install('playwright', use_mirror=False):
return True

debug_logger.log_info("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...")
print("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...")
if _run_pip_install('playwright', use_mirror=True):
return True

debug_logger.log_error("[BrowserCaptcha] ❌ playwright 自动安装失败,请手动安装: pip install playwright")
print("[BrowserCaptcha] ❌ playwright 自动安装失败,请手动安装: pip install playwright")
return False


def _detect_playwright_browser_path() -> Optional[str]:
"""读取 playwright 管理的 chromium 可执行文件路径。"""
detect_script = (
"from playwright.sync_api import sync_playwright\n"
"with sync_playwright() as p:\n"
" print(p.chromium.executable_path or '')\n"
)
env = os.environ.copy()
env.setdefault("PLAYWRIGHT_BROWSERS_PATH", os.environ.get("PLAYWRIGHT_BROWSERS_PATH", "0") or "0")

try:
result = subprocess.run(
[sys.executable, "-c", detect_script],
capture_output=True,
text=True,
timeout=60,
env=env,
)
browser_path_lines = (result.stdout or "").strip().splitlines()
browser_path = browser_path_lines[-1].strip() if browser_path_lines else ""
if result.returncode == 0 and browser_path and os.path.exists(browser_path):
debug_logger.log_info(f"[BrowserCaptcha] 检测到 playwright chromium: {browser_path}")
return browser_path

stderr_text = (result.stderr or "").strip()
if stderr_text:
debug_logger.log_warning(f"[BrowserCaptcha] 检测 playwright chromium 失败: {stderr_text[:200]}")
except Exception as e:
debug_logger.log_info(f"[BrowserCaptcha] 检测 playwright chromium 时出错: {e}")

return None


def _ensure_playwright_browser_path() -> Optional[str]:
"""确保存在可复用的 chromium 二进制,并返回路径。"""
browser_path = _detect_playwright_browser_path()
if browser_path:
return browser_path

if not _ensure_playwright_installed():
return None

debug_logger.log_info("[BrowserCaptcha] playwright chromium 未安装,开始自动安装...")
print("[BrowserCaptcha] playwright chromium 未安装,开始自动安装...")

if not _run_playwright_install(use_mirror=False):
debug_logger.log_info("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...")
print("[BrowserCaptcha] 官方源安装失败,尝试国内镜像...")
if not _run_playwright_install(use_mirror=True):
debug_logger.log_error("[BrowserCaptcha] ❌ chromium 浏览器自动安装失败,请手动安装: python -m playwright install chromium")
print("[BrowserCaptcha] ❌ chromium 浏览器自动安装失败,请手动安装: python -m playwright install chromium")
return None

return _detect_playwright_browser_path()


# 尝试导入 nodriver
uc = None
NODRIVER_AVAILABLE = False
Expand All @@ -126,14 +234,15 @@ def _ensure_nodriver_installed() -> bool:
if DOCKER_HEADED_BLOCKED:
debug_logger.log_warning(
"[BrowserCaptcha] 检测到 Docker 环境,默认禁用内置浏览器打码。"
"如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true,并提供 DISPLAY/Xvfb。"
"如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true。"
"personal 模式默认支持无头,不强制依赖 DISPLAY/Xvfb。"
)
print("[BrowserCaptcha] ⚠️ 检测到 Docker 环境,默认禁用内置浏览器打码")
print("[BrowserCaptcha] 如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true,并提供 DISPLAY/Xvfb")
print("[BrowserCaptcha] 如需启用请设置 ALLOW_DOCKER_HEADED_CAPTCHA=true")
else:
if IS_DOCKER and ALLOW_DOCKER_HEADED:
debug_logger.log_warning(
"[BrowserCaptcha] Docker 内置浏览器打码白名单已启用,请确保 DISPLAY/Xvfb 可用"
"[BrowserCaptcha] Docker 内置浏览器打码白名单已启用,personal 模式将按 headless 配置决定是否需要 DISPLAY/Xvfb"
)
print("[BrowserCaptcha] ✅ Docker 内置浏览器打码白名单已启用")
if _ensure_nodriver_installed():
Expand Down Expand Up @@ -549,14 +658,18 @@ def _refresh_runtime_tunables(self):
except Exception:
self._fingerprint_cache_ttl_seconds = 3600.0

def _requires_virtual_display(self) -> bool:
"""仅在显式有头模式下要求 Docker/Linux 提供 DISPLAY/Xvfb。"""
return bool(IS_DOCKER and os.name == "posix" and not self.headless)

def _check_available(self):
"""检查服务是否可用"""
if DOCKER_HEADED_BLOCKED:
raise RuntimeError(
"检测到 Docker 环境,默认禁用内置浏览器打码。"
"如需启用请设置环境变量 ALLOW_DOCKER_HEADED_CAPTCHA=true,并提供 DISPLAY/Xvfb。"
"如需启用请设置环境变量 ALLOW_DOCKER_HEADED_CAPTCHA=true。"
)
if IS_DOCKER and not os.environ.get("DISPLAY"):
if self._requires_virtual_display() and not os.environ.get("DISPLAY"):
raise RuntimeError(
"Docker 内置浏览器打码已启用,但 DISPLAY 未设置。"
"请设置 DISPLAY(例如 :99)并启动 Xvfb。"
Expand Down Expand Up @@ -1294,6 +1407,13 @@ async def initialize(self):
f"[BrowserCaptcha] 指定浏览器不存在,改为自动发现: {browser_executable_path}"
)
browser_executable_path = None
if not browser_executable_path:
playwright_browser_path = _ensure_playwright_browser_path()
if playwright_browser_path:
browser_executable_path = playwright_browser_path
debug_logger.log_info(
f"[BrowserCaptcha] 复用 playwright chromium 作为 nodriver 浏览器: {browser_executable_path}"
)
if browser_executable_path:
debug_logger.log_info(
f"[BrowserCaptcha] 使用指定浏览器可执行文件: {browser_executable_path}"
Expand Down Expand Up @@ -1361,7 +1481,8 @@ async def initialize(self):
browser_args.append('--disable-extensions')

effective_launch_args = list(browser_args)
await self._wait_for_display_ready(display_value)
if self._requires_virtual_display():
await self._wait_for_display_ready(display_value)

effective_uid = "n/a"
if hasattr(os, "geteuid"):
Expand Down
22 changes: 20 additions & 2 deletions src/services/flow_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,11 +809,12 @@ async def upload_image(
ext = "png" if "png" in mime_type else "jpg"
upload_file_name = f"flow2api_upload_{int(time.time() * 1000)}.{ext}"
new_url = f"{self.api_base_url}/flow/uploadImage"
normalized_project_id = str(project_id or "").strip()
new_client_context = {
"tool": "PINHOLE"
}
if project_id:
new_client_context["projectId"] = project_id
if normalized_project_id:
new_client_context["projectId"] = normalized_project_id

new_json_data = {
"clientContext": new_client_context,
Expand Down Expand Up @@ -860,6 +861,23 @@ async def upload_image(
raise Exception(f"Invalid upload response: missing media id, keys={list(new_result.keys())}")
except Exception as new_upload_error:
last_error = new_upload_error
retry_reason = "网络超时" if self._is_timeout_error(new_upload_error) else self._get_retry_reason(str(new_upload_error))

# 旧接口不携带 projectId,带项目上下文的上传一旦回退就可能把图片挂到错误项目。
if normalized_project_id:
if retry_reason and retry_attempt < max_retries - 1:
debug_logger.log_warning(
f"[UPLOAD] Project-scoped upload 遇到{retry_reason},准备重试新版接口 "
f"({retry_attempt + 2}/{max_retries}, project_id={normalized_project_id})..."
)
await asyncio.sleep(1)
continue
raise RuntimeError(
"Project-scoped image upload failed via /flow/uploadImage; "
"legacy :uploadUserImage fallback is disabled because it may attach media "
f"to a different project (project_id={normalized_project_id})."
) from new_upload_error

debug_logger.log_warning(
f"[UPLOAD] New upload API failed, fallback to legacy endpoint: {new_upload_error}"
)
Expand Down
Loading
Loading