diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f33ccba
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,220 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+/lib/
+/lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+*.lcov
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+# Pipfile.lock
+
+# UV
+# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# uv.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+# in the .venv directory. It is recommended not to include this directory in version control.
+.pixi/*
+!.pixi/config.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule*
+celerybeat.pid
+
+# Redis
+*.rdb
+*.aof
+*.pid
+
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+
+# ActiveMQ
+activemq-data/
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+# and can be added to the global gitignore or merged into this file. However, if you prefer,
+# you could uncomment the following to ignore the entire vscode folder
+.vscode/
+# Temporary file for partial code execution
+tempCodeRunnerFile.py
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Streamlit
+.streamlit/secrets.toml
diff --git a/README.md b/README.md
index 49ff1ba..3141424 100644
--- a/README.md
+++ b/README.md
@@ -36,14 +36,14 @@ https://github.com/user-attachments/assets/d0c954a9-9cf3-4c8b-8b04-71d75a68854c
## ✨ Features
-| Feature | Description |
-| :--- | :--- |
-| 📝 **Text-to-Figure** | Generate figures directly from natural language descriptions. |
-| 📄 **Paper-to-Figure** | Extract methodology from PDFs and create visual diagrams automatically. |
+| Feature | Description |
+| :------------------------- | :------------------------------------------------------------------------------- |
+| 📝 **Text-to-Figure** | Generate figures directly from natural language descriptions. |
+| 📄 **Paper-to-Figure** | Extract methodology from PDFs and create visual diagrams automatically. |
| 🔄 **Iterative Refinement** | Dual-agent system (Generation + Evaluation) for continuous quality optimization. |
-| 🎨 **Multiple Formats** | Output as **SVG** or **mxGraph XML** (fully compatible with draw.io). |
-| 💅 **Image Enhancement** | Optional AI-powered post-processing for aesthetic beautification. |
-| 🖥️ **Web Interface** | Interactive Next.js frontend for easy generation and editing. |
+| 🎨 **Multiple Formats** | Output as **SVG** or **mxGraph XML** (fully compatible with draw.io). |
+| 💅 **Image Enhancement** | Optional AI-powered post-processing for aesthetic beautification. |
+| 🖥️ **Web Interface** | Interactive Next.js frontend for easy generation and editing. |
---
@@ -66,11 +66,11 @@ AutoFigure employs a **Review-Refine** loop to ensure high accuracy and aestheti
Here are examples of figures generated by AutoFigure across different domains, showcasing its versatility in handling various levels of complexity.
-| Category & Visualization |
-| :---: |
-| **📄 Paper Case** |
-| **📊 Survey Case** |
-| **📝 Blog Case** |
+| Category & Visualization |
+| :----------------------------------------------------------------------------------------: |
+| **📄 Paper Case** |
+| **📊 Survey Case** |
+| **📝 Blog Case** |
| **📘 Textbook Case** |
---
@@ -153,6 +153,45 @@ if result.success:
Ideally suited for visual interaction and editing.
+#### Windows
+
+Run the backend and frontend in two terminals. This is the most explicit and portable way to start the web app.
+
+1. Terminal 1 - backend: (Replace `YOUR_ENV_NAME` with the Python environment where AutoFigure dependencies are installed. If that environment is already active, skip the `conda activate` line.)
+```powershell
+conda activate YOUR_ENV_NAME
+$env:AUTOFIGURE_BACKEND_PORT = "8796"
+$env:AUTOFIGURE_HOST = "127.0.0.1"
+cd backend
+python app.py
+```
+
+2. Terminal 2 - frontend:
+```powershell
+cd frontend
+$env:NEXT_PUBLIC_AUTOFIGURE_BACKEND_URL = "http://127.0.0.1:8796"
+# Optional: remote PDF-to-Markdown service. If unset, local browser extraction is used.
+# $env:PDF_API_URL = "https://your-pdf-service.example.com/pdf-to-markdown"
+npm run dev
+```
+
+3. Then open `http://127.0.0.1:6002` in your browser.
+4. To stop the web app, press `Ctrl+C` in both terminals. If a previous run left ports occupied, stop those processes explicitly:
+
+```powershell
+$ports = @(8796, 6002)
+Get-NetTCPConnection -LocalPort $ports -State Listen -ErrorAction SilentlyContinue |
+ Select-Object -ExpandProperty OwningProcess -Unique |
+ ForEach-Object { Stop-Process -Id $_ -Force }
+```
+
+`PDF_API_URL` is optional. Configure it only if you have a remote service that
+accepts a `POST` form field named `pdf_file` and returns JSON with a `markdown`
+field. When it is not configured, uploaded PDFs are processed with local browser
+text extraction.
+
+#### Linux/MacOS
+
```bash
./start.sh
# Then open http://localhost:6002 in your browser
@@ -170,13 +209,13 @@ We introduce **FigureBench**, the first large-scale benchmark for generating sci
### Dataset Overview
-| Category | Samples | Avg. Tokens | Text Density | Complexity |
-|:---|:---:|:---:|:---:|:---:|
-| 📄 **Paper** | 3,200 | 12,732 | 42.1% | High |
-| 📝 **Blog** | 20 | 4,047 | 46.0% | Med |
-| 📊 **Survey** | 40 | 2,179 | 43.8% | High |
-| 📘 **Textbook** | 40 | 352 | 25.0% | Low |
-| **Total** | **3,300** | **10k+** | **41.2%** | **~5.3 Components** |
+| Category | Samples | Avg. Tokens | Text Density | Complexity |
+| :------------- | :-------: | :---------: | :----------: | :-----------------: |
+| 📄 **Paper** | 3,200 | 12,732 | 42.1% | High |
+| 📝 **Blog** | 20 | 4,047 | 46.0% | Med |
+| 📊 **Survey** | 40 | 2,179 | 43.8% | High |
+| 📘 **Textbook** | 40 | 352 | 25.0% | Low |
+| **Total** | **3,300** | **10k+** | **41.2%** | **~5.3 Components** |
### Download
@@ -198,48 +237,82 @@ AutoFigure is highly configurable. You can set these in `Config()` or via enviro
### Supported LLM Providers
-| Provider | Base URL | Recommended Text / SVG Model | Recommended Image Model |
-|----------|----------|------------------------------|-------------------------|
-| **OpenRouter** | `openrouter.ai/api/v1` | `google/gemini-3.1-pro-preview` | `google/gemini-3.1-flash-image-preview` |
-| **Bianxie** | `api.bianxie.ai/v1` | `gemini-3.1-pro-preview` | `gemini-3.1-flash-image-preview` |
-| **Google** | `generativelanguage...` | `gemini-3.1-pro-preview` | `gemini-3.1-flash-image-preview` |
+| Provider | Base URL | Recommended Text / SVG Model | Recommended Image Model |
+| -------------- | ----------------------- | ------------------------------- | --------------------------------------- |
+| **OpenRouter** | `openrouter.ai/api/v1` | `google/gemini-3.1-pro-preview` | `google/gemini-3.1-flash-image-preview` |
+| **Bianxie** | `api.bianxie.ai/v1` | `gemini-3.1-pro-preview` | `gemini-3.1-flash-image-preview` |
+| **Google** | `generativelanguage...` | `gemini-3.1-pro-preview` | `gemini-3.1-flash-image-preview` |
+
+### Third-Party API Compatibility
+
+AutoFigure can work with third-party model providers as long as their endpoint matches one of the supported API protocols:
+
+| Third-Party API Type | Web UI Provider | Protocol | Base URL Format |
+| ---------------------------------- | --------------- | ----------------- | ------------------------------------------------------------------------------------------------------- |
+| OpenAI-compatible chat/completions | Custom | OpenAI Compatible | Base path such as `https://provider.example.com/v1`; AutoFigure appends `/chat/completions` when needed |
+| Gemini native generateContent | Custom | Gemini Native | Base path ending at the Gemini API version, such as `https://provider.example.com/api/v1/gemini/v1beta` |
+
+For Gemini Native providers, do not include the model name in the model field's base URL. Endpoints ending in `/models` are normalized automatically, but the recommended input is the version-level base URL.
+For example, if a provider documents `https://provider.example.com/api/v1/gemini/v1beta/models`, enter:
+```text
+Provider: Custom
+Protocol: Gemini Native
+Base URL: https://provider.example.com/api/v1/gemini/v1beta
+Model: gemini-3.1-pro-preview
+```
+
+### Web UI Model Mapping
+
+The web interface separates text/SVG generation models from image generation
+models across different dialogs:
+
+| Web UI Location | Purpose | Use This README Column |
+| --------------------------------------------- | ---------------------------------------- | ---------------------------- |
+| Settings -> General -> Methodology Extraction | Extract core method text from papers | Recommended Text / SVG Model |
+| Settings -> LLM -> Layout Generation LLM | Generate and iterate mxGraph XML layouts | Recommended Text / SVG Model |
+| Beautification -> Code2Prompt LLM API | Convert XML/code into an image prompt | Recommended Text / SVG Model |
+| Beautification -> Image Generation API | Generate beautified raster images | Recommended Image Model |
+
+If generation fails while creating the initial mxGraph XML, check the
+`Settings -> LLM -> Layout Generation LLM` configuration. The image model is
+only used later during beautification.
### Generation Settings
-| Option | Description | Default |
-|--------|-------------|---------|
-| `generation_api_key` | API key for figure generation | Required |
-| `generation_base_url` | Base URL for API | Provider default |
-| `generation_model` | Model name | Provider default |
-| `generation_provider` | Provider: 'openrouter', 'bianxie', 'gemini' | 'openrouter' |
+| Option | Description | Default |
+| --------------------- | ------------------------------------------- | ---------------- |
+| `generation_api_key` | API key for figure generation | Required |
+| `generation_base_url` | Base URL for API | Provider default |
+| `generation_model` | Model name | Provider default |
+| `generation_provider` | Provider: 'openrouter', 'bianxie', 'gemini' | 'openrouter' |
### Methodology Extraction Settings
-| Option | Description | Default |
-|--------|-------------|---------|
-| `methodology_api_key` | API key for methodology extraction | Same as generation |
-| `methodology_model` | Model for methodology extraction | Same as generation |
+| Option | Description | Default |
+| ---------------------- | ----------------------------------- | ------------------ |
+| `methodology_api_key` | API key for methodology extraction | Same as generation |
+| `methodology_model` | Model for methodology extraction | Same as generation |
| `methodology_provider` | Provider for methodology extraction | Same as generation |
### Enhancement Settings
-| Option | Description | Default |
-|--------|-------------|---------|
-| `enhancement_api_key` | API key for image enhancement | None |
-| `enhancement_provider` | Enhancement provider | 'openrouter' |
-| `enhancement_model` | Model for image enhancement | Provider default |
-| `enhancement_input_type` | Input type: 'none', 'code', 'code2prompt' | 'code2prompt' |
-| `enhancement_count` | Number of enhanced variants to generate | 1 |
-| `art_style` | Art style description for enhancement | '' |
+| Option | Description | Default |
+| ------------------------ | ----------------------------------------- | ---------------- |
+| `enhancement_api_key` | API key for image enhancement | None |
+| `enhancement_provider` | Enhancement provider | 'openrouter' |
+| `enhancement_model` | Model for image enhancement | Provider default |
+| `enhancement_input_type` | Input type: 'none', 'code', 'code2prompt' | 'code2prompt' |
+| `enhancement_count` | Number of enhanced variants to generate | 1 |
+| `art_style` | Art style description for enhancement | '' |
### Pipeline Settings
-| Option | Description | Default |
-|--------|-------------|---------|
-| `max_iterations` | Maximum refinement iterations | 5 |
-| `quality_threshold` | Quality threshold (0-10) | 9.0 |
-| `output_dir` | Output directory | './autofigure_output' |
-| `custom_references` | Custom reference figure paths | None |
+| Option | Description | Default |
+| ------------------- | ----------------------------- | --------------------- |
+| `max_iterations` | Maximum refinement iterations | 5 |
+| `quality_threshold` | Quality threshold (0-10) | 9.0 |
+| `output_dir` | Output directory | './autofigure_output' |
+| `custom_references` | Custom reference figure paths | None |
---
@@ -247,48 +320,48 @@ AutoFigure is highly configurable. You can set these in `Config()` or via enviro
### `generate()` Parameters
-| Parameter | Description |
-|-----------|-------------|
-| `description` | Text description of the figure to generate |
-| `max_iterations` | Maximum iterations (overrides config) |
-| `output_format` | 'svg' or 'mxgraphxml' |
-| `quality_threshold` | Quality threshold (overrides config) |
-| `enable_enhancement` | Whether to enhance the final image |
-| `art_style` | Art style for enhancement (overrides config) |
+| Parameter | Description |
+| ------------------------ | --------------------------------------------------- |
+| `description` | Text description of the figure to generate |
+| `max_iterations` | Maximum iterations (overrides config) |
+| `output_format` | 'svg' or 'mxgraphxml' |
+| `quality_threshold` | Quality threshold (overrides config) |
+| `enable_enhancement` | Whether to enhance the final image |
+| `art_style` | Art style for enhancement (overrides config) |
| `enhancement_input_type` | 'none', 'code', or 'code2prompt' (overrides config) |
-| `enhancement_count` | Number of enhanced variants (overrides config) |
-| `topic` | Content type: 'paper', 'survey', 'blog', 'textbook' |
-| `custom_references` | Custom reference figure paths |
+| `enhancement_count` | Number of enhanced variants (overrides config) |
+| `topic` | Content type: 'paper', 'survey', 'blog', 'textbook' |
+| `custom_references` | Custom reference figure paths |
### `generate_from_paper()` Parameters
Accepts all parameters from `generate()` plus:
-| Parameter | Description |
-|-----------|-------------|
-| `paper_path` | Path to paper file (PDF or Markdown) |
-| `methodology_api_key` | API key for extraction (overrides config) |
+| Parameter | Description |
+| ---------------------- | ------------------------------------------ |
+| `paper_path` | Path to paper file (PDF or Markdown) |
+| `methodology_api_key` | API key for extraction (overrides config) |
| `methodology_provider` | Provider for extraction (overrides config) |
### Result Object (`GenerationResult`)
-| Attribute | Description |
-|-----------|-------------|
-| `success` | Whether generation was successful |
-| `svg_path` | Path to generated SVG file |
-| `mxgraph_path` | Path to generated mxGraph XML file |
-| `preview_path` | Path to PNG preview image |
-| `enhanced_paths` | List of all enhanced image paths |
-| `final_score` | Final quality score (0-10) |
+| Attribute | Description |
+| ------------------ | ---------------------------------- |
+| `success` | Whether generation was successful |
+| `svg_path` | Path to generated SVG file |
+| `mxgraph_path` | Path to generated mxGraph XML file |
+| `preview_path` | Path to PNG preview image |
+| `enhanced_paths` | List of all enhanced image paths |
+| `final_score` | Final quality score (0-10) |
| `methodology_text` | Extracted methodology (from paper) |
-| `error` | Error message if failed |
+| `error` | Error message if failed |
### Enhancement Modes
-| Mode | Description |
-|------|-------------|
-| `none` | Direct beautification without code reference |
-| `code` | Use generated code (SVG/XML) as reference |
+| Mode | Description |
+| ------------- | ------------------------------------------------------------------ |
+| `none` | Direct beautification without code reference |
+| `code` | Use generated code (SVG/XML) as reference |
| `code2prompt` | Use LLM to analyze code and generate detailed prompt (recommended) |
---
@@ -316,7 +389,7 @@ AutoFigure/
## 🤝 Community & Support
-**WeChat Discussion Group**
+**WeChat Discussion Group**
Scan the QR code to join our community. If the code is expired, please add WeChat ID `nauhcutnil` or contact `tuchuan@mail.hfut.edu.cn`.
@@ -330,23 +403,22 @@ Scan the QR code to join our community. If the code is expired, please add WeCha
If you use **AutoFigure**, **AutoFigure-Edit**, or **FigureBench** in your research, please cite:
```bibtex
-@inproceedings{
-zhu2026autofigure,
-title={AutoFigure: Generating and Refining Publication-Ready Scientific Illustrations},
-author={Minjun Zhu and Zhen Lin and Yixuan Weng and Panzhong Lu and Qiujie Xie and Yifan Wei and Sifan Liu and Qiyao Sun and Yue Zhang},
-booktitle={The Fourteenth International Conference on Learning Representations},
-year={2026},
-url={https://openreview.net/forum?id=5N3z9JQJKq}
+@inproceedings{zhu2026autofigure,
+ title={AutoFigure: Generating and Refining Publication-Ready Scientific Illustrations},
+ author={Minjun Zhu and Zhen Lin and Yixuan Weng and Panzhong Lu and Qiujie Xie and Yifan Wei and Sifan Liu and Qiyao Sun and Yue Zhang},
+ booktitle={The Fourteenth International Conference on Learning Representations},
+ year={2026},
+ url={https://openreview.net/forum?id=5N3z9JQJKq}
}
@misc{lin2026autofigureeditgeneratingeditablescientific,
- title={AutoFigure-Edit: Generating Editable Scientific Illustration},
- author={Zhen Lin and Qiujie Xie and Minjun Zhu and Shichen Li and Qiyao Sun and Enhao Gu and Yiran Ding and Ke Sun and Fang Guo and Panzhong Lu and Zhiyuan Ning and Yixuan Weng and Yue Zhang},
- year={2026},
- eprint={2603.06674},
- archivePrefix={arXiv},
- primaryClass={cs.CV},
- url={https://arxiv.org/abs/2603.06674},
+ title={AutoFigure-Edit: Generating Editable Scientific Illustration},
+ author={Zhen Lin and Qiujie Xie and Minjun Zhu and Shichen Li and Qiyao Sun and Enhao Gu and Yiran Ding and Ke Sun and Fang Guo and Panzhong Lu and Zhiyuan Ning and Yixuan Weng and Yue Zhang},
+ year={2026},
+ eprint={2603.06674},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV},
+ url={https://arxiv.org/abs/2603.06674},
}
```
@@ -365,12 +437,12 @@ Name and logo usage are covered separately in `TRADEMARK.md`.
Explore more open-source research tools from ResearAI:
-| Project | What it does |
-|---|---|
-| [DeepScientist](https://github.com/ResearAI/DeepScientist) | autonomous scientific discovery system |
-| [AutoFigure-Edit](https://github.com/ResearAI/AutoFigure-Edit) | editable vector paper figures |
-| [DeepReviewer-v2](https://github.com/ResearAI/DeepReviewer-v2) | review papers and drafts |
-| [Awesome-AI-Scientist](https://github.com/ResearAI/Awesome-AI-Scientist) | curated AI scientist landscape |
+| Project | What it does |
+| ------------------------------------------------------------------------ | -------------------------------------- |
+| [DeepScientist](https://github.com/ResearAI/DeepScientist) | autonomous scientific discovery system |
+| [AutoFigure-Edit](https://github.com/ResearAI/AutoFigure-Edit) | editable vector paper figures |
+| [DeepReviewer-v2](https://github.com/ResearAI/DeepReviewer-v2) | review papers and drafts |
+| [Awesome-AI-Scientist](https://github.com/ResearAI/Awesome-AI-Scientist) | curated AI scientist landscape |
---
diff --git a/autofigure/config.py b/autofigure/config.py
index 5506cba..af3b59b 100644
--- a/autofigure/config.py
+++ b/autofigure/config.py
@@ -10,6 +10,8 @@
from pathlib import Path
import os
+from .utils.api_protocol import default_base_url, normalize_protocol
+
@dataclass
class Config:
@@ -38,16 +40,19 @@ class Config:
generation_base_url: Optional[str] = None
generation_model: Optional[str] = None
generation_provider: str = "openrouter" # openrouter, bianxie, gemini
+ generation_protocol: Optional[str] = None # openai-compatible, gemini-native
# Methodology extraction LLM settings (defaults to generation settings)
methodology_api_key: Optional[str] = None
methodology_base_url: Optional[str] = None
methodology_model: Optional[str] = None
methodology_provider: Optional[str] = None
+ methodology_protocol: Optional[str] = None
# Image enhancement settings
enhancement_api_key: Optional[str] = None
enhancement_provider: str = "openrouter" # openrouter, bianxie, gemini
+ enhancement_protocol: Optional[str] = None
enhancement_model: Optional[str] = None
enhancement_base_url: Optional[str] = None
enhancement_input_type: str = "code2prompt" # none, code, code2prompt
@@ -74,12 +79,18 @@ def __post_init__(self):
self.methodology_model = self.generation_model
if self.methodology_provider is None:
self.methodology_provider = self.generation_provider
+ if self.generation_protocol is None:
+ self.generation_protocol = normalize_protocol(self.generation_provider)
+ if self.methodology_protocol is None:
+ self.methodology_protocol = self.generation_protocol
+ if self.enhancement_protocol is None:
+ self.enhancement_protocol = normalize_protocol(self.enhancement_provider)
# Set default base URLs based on provider
if self.generation_base_url is None:
- self.generation_base_url = self._get_default_base_url(self.generation_provider)
+ self.generation_base_url = self._get_default_base_url(self.generation_provider, self.generation_protocol)
if self.methodology_base_url is None:
- self.methodology_base_url = self._get_default_base_url(self.methodology_provider)
+ self.methodology_base_url = self._get_default_base_url(self.methodology_provider, self.methodology_protocol)
# Set default models
if self.generation_model is None:
@@ -91,16 +102,11 @@ def __post_init__(self):
if self.enhancement_model is None:
self.enhancement_model = self._get_default_enhancement_model(self.enhancement_provider)
if self.enhancement_base_url is None:
- self.enhancement_base_url = self._get_default_base_url(self.enhancement_provider)
+ self.enhancement_base_url = self._get_default_base_url(self.enhancement_provider, self.enhancement_protocol)
- def _get_default_base_url(self, provider: str) -> str:
+ def _get_default_base_url(self, provider: str, protocol: Optional[str] = None) -> str:
"""Get default base URL for a provider."""
- urls = {
- "openrouter": "https://openrouter.ai/api/v1",
- "bianxie": "https://api.bianxie.ai/v1",
- "gemini": "https://generativelanguage.googleapis.com/v1beta/openai/",
- }
- return urls.get(provider, "https://openrouter.ai/api/v1")
+ return default_base_url(provider, protocol) or "https://openrouter.ai/api/v1"
def _get_default_model(self, provider: str) -> str:
"""Get default model for a provider."""
@@ -182,9 +188,12 @@ def from_env(cls) -> "Config":
generation_base_url=os.environ.get("AUTOFIGURE_BASE_URL"),
generation_model=os.environ.get("AUTOFIGURE_MODEL"),
generation_provider=os.environ.get("AUTOFIGURE_PROVIDER", "openrouter"),
+ generation_protocol=os.environ.get("AUTOFIGURE_PROTOCOL"),
methodology_api_key=os.environ.get("AUTOFIGURE_METHODOLOGY_API_KEY"),
+ methodology_protocol=os.environ.get("AUTOFIGURE_METHODOLOGY_PROTOCOL"),
enhancement_api_key=os.environ.get("AUTOFIGURE_ENHANCEMENT_API_KEY"),
enhancement_provider=os.environ.get("AUTOFIGURE_ENHANCEMENT_PROVIDER", "openrouter"),
+ enhancement_protocol=os.environ.get("AUTOFIGURE_ENHANCEMENT_PROTOCOL"),
enhancement_model=os.environ.get("AUTOFIGURE_ENHANCEMENT_MODEL"),
enhancement_base_url=os.environ.get("AUTOFIGURE_ENHANCEMENT_BASE_URL"),
enhancement_input_type=os.environ.get("AUTOFIGURE_ENHANCEMENT_INPUT_TYPE", "code2prompt"),
diff --git a/autofigure/enhancer.py b/autofigure/enhancer.py
index 795a345..89e8058 100644
--- a/autofigure/enhancer.py
+++ b/autofigure/enhancer.py
@@ -18,6 +18,15 @@
import requests
+from .utils.api_protocol import (
+ GEMINI_NATIVE,
+ chat_completions_url,
+ default_base_url,
+ normalize_gemini_base_url,
+ normalize_protocol,
+)
+from .utils.llm_client import LLMClient
+
if TYPE_CHECKING:
from .config import Config
@@ -31,6 +40,7 @@ def convert_code_to_text2image_prompt(
base_url: str = "",
model: str = "",
provider: str = "bianxie",
+ protocol: Optional[str] = None,
) -> Optional[str]:
"""
Convert code (mxgraphxml, HTML, etc.) to text2image prompt using LLM.
@@ -52,8 +62,6 @@ def convert_code_to_text2image_prompt(
Text2image prompt string, or None on failure
"""
try:
- from openai import OpenAI
-
if not api_key:
print("[Code2Prompt] ERROR: API key not provided!")
return None
@@ -183,37 +191,22 @@ def convert_code_to_text2image_prompt(
Now analyze this {code_format.upper()} code and create the comprehensive text-to-image prompt following the exact format above. Focus especially on converting every programmatic element into a specific, detailed visual description that perfectly matches the "{art_style}" artistic style while maintaining visual clarity and professional quality."""
- # Adjust base_url for different providers
- actual_base_url = base_url
- if provider == "gemini" and base_url:
- if not base_url.endswith("/openai/") and not base_url.endswith("/openai"):
- if base_url.endswith("/"):
- actual_base_url = base_url + "openai/"
- else:
- actual_base_url = base_url + "/openai/"
- elif not actual_base_url:
- if provider == "openrouter":
- actual_base_url = "https://openrouter.ai/api/v1"
- elif provider == "gemini":
- actual_base_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
- else:
- actual_base_url = "https://api.bianxie.ai/v1"
-
- print(f"[Code2Prompt] Using provider: {provider}, model: {model}")
-
- client = OpenAI(base_url=actual_base_url, api_key=api_key)
-
- completion = client.chat.completions.create(
+ effective_protocol = normalize_protocol(provider, protocol)
+ actual_base_url = base_url or default_base_url(provider, effective_protocol)
+
+ print(f"[Code2Prompt] Using provider: {provider}, protocol: {effective_protocol}, model: {model}")
+
+ client = LLMClient(
+ api_key=api_key,
+ base_url=actual_base_url,
model=model,
- messages=[{"role": "user", "content": conversion_prompt}],
- temperature=0.7,
+ provider=provider,
+ protocol=effective_protocol,
)
-
- if completion and completion.choices:
- response = completion.choices[0].message.content
- if response and len(response.strip()) > 0:
- print(f"[Code2Prompt] Generated prompt length: {len(response)} chars")
- return response.strip()
+ response = client.call([conversion_prompt], temperature=0.7)
+ if response and len(response.strip()) > 0:
+ print(f"[Code2Prompt] Generated prompt length: {len(response)} chars")
+ return response.strip()
print("[Code2Prompt] LLM returned empty response")
return None
@@ -303,29 +296,31 @@ def enhance(
output_path = str(input_path.parent / f"{input_path.stem}_enhanced.png")
provider = self.config.enhancement_provider
+ protocol = normalize_protocol(provider, getattr(self.config, "enhancement_protocol", None))
model = self.config.enhancement_model
base_url = self.config.enhancement_base_url
art_style = style or self.config.art_style or ""
print(f"[ImageEnhancer] Provider: {provider}")
+ print(f"[ImageEnhancer] Protocol: {protocol}")
print(f"[ImageEnhancer] Model: {model}")
print(f"[ImageEnhancer] Style: {art_style or '(default)'}")
print(f"[ImageEnhancer] Input type: {input_type}")
# Route to provider-specific function
- if provider == "openrouter":
- return self._enhance_with_openrouter(
+ if protocol == GEMINI_NATIVE:
+ return self._enhance_with_gemini(
str(input_path), enhancement_input, output_path,
style=art_style, input_type=input_type,
api_key=api_key, base_url=base_url, model=model
)
- elif provider == "gemini":
- return self._enhance_with_gemini(
+ elif provider == "openrouter":
+ return self._enhance_with_openrouter(
str(input_path), enhancement_input, output_path,
style=art_style, input_type=input_type,
api_key=api_key, base_url=base_url, model=model
)
- else: # bianxie (default)
+ else: # OpenAI-compatible (BianXie/custom/default)
return self._enhance_with_bianxie(
str(input_path), enhancement_input, output_path,
style=art_style, input_type=input_type,
@@ -568,12 +563,7 @@ def _enhance_with_bianxie(
if not model:
model = "gemini-3.1-flash-image-preview"
- actual_base_url = base_url or "https://api.bianxie.ai/v1/chat/completions"
- if not actual_base_url.endswith("/chat/completions"):
- if actual_base_url.endswith("/"):
- actual_base_url = actual_base_url + "chat/completions"
- else:
- actual_base_url = actual_base_url + "/chat/completions"
+ actual_base_url = chat_completions_url(base_url or "https://api.bianxie.ai/v1")
print(f"[BianXie] API: {actual_base_url}")
print(f"[BianXie] Model: {model}")
@@ -651,12 +641,7 @@ def _enhance_with_openrouter(
if not model:
model = "google/gemini-3.1-flash-image-preview"
- actual_base_url = base_url or "https://openrouter.ai/api/v1"
- if not actual_base_url.endswith("/chat/completions"):
- if actual_base_url.endswith("/"):
- actual_base_url = actual_base_url + "chat/completions"
- else:
- actual_base_url = actual_base_url + "/chat/completions"
+ actual_base_url = chat_completions_url(base_url or "https://openrouter.ai/api/v1")
print(f"[OpenRouter] API: {actual_base_url}")
print(f"[OpenRouter] Model: {model}")
@@ -738,17 +723,7 @@ def _enhance_with_gemini(
if not base_url:
base_url = "https://generativelanguage.googleapis.com/v1beta"
- # Clean up base_url
- base_url = base_url.rstrip("/")
- for suffix in ["/chat/completions", "/completions", "/v1/chat", "/openai"]:
- if base_url.endswith(suffix):
- base_url = base_url[:-len(suffix)]
-
- if not base_url.endswith("/v1beta") and "generativelanguage.googleapis.com" in base_url:
- if "/v1beta" not in base_url:
- base_url = base_url.rstrip("/") + "/v1beta"
-
- api_url = f"{base_url}/models/{model}:generateContent?key={api_key}"
+ api_url = f"{normalize_gemini_base_url(base_url)}/models/{model}:generateContent?key={api_key}"
print(f"[Gemini] Model: {model}")
print(f"[Gemini] Input type: {input_type}")
diff --git a/autofigure/generator.py b/autofigure/generator.py
index 030fcab..eaf0213 100644
--- a/autofigure/generator.py
+++ b/autofigure/generator.py
@@ -11,6 +11,14 @@
import webbrowser
from openai import OpenAI
+from .utils.api_protocol import (
+ GEMINI_NATIVE,
+ call_gemini_native_text,
+ default_base_url,
+ normalize_openai_base_url,
+ normalize_protocol,
+)
+
CONFIG = {
'MAX_ITERATIONS': 5,
@@ -28,6 +36,7 @@
# LLM Configuration
# =========================
'LLM_PROVIDER': 'openrouter', # openrouter, bianxie, gemini
+ 'LLM_PROTOCOL': 'openai-compatible', # openai-compatible, gemini-native
# OpenRouter
'OPENROUTER_BASE_URL': 'https://openrouter.ai/api/v1',
@@ -65,7 +74,9 @@ def update_config_from_sdk(sdk_config) -> None:
# LLM settings based on provider
provider = sdk_config.generation_provider
+ protocol = sdk_config.generation_protocol or normalize_protocol(provider)
CONFIG['LLM_PROVIDER'] = provider
+ CONFIG['LLM_PROTOCOL'] = protocol
if provider == 'openrouter':
CONFIG['OPENROUTER_API_KEY'] = sdk_config.generation_api_key
@@ -77,8 +88,12 @@ def update_config_from_sdk(sdk_config) -> None:
CONFIG['BIANXIE_CHAT_MODEL'] = sdk_config.generation_model or 'gemini-3.1-pro-preview'
elif provider == 'gemini':
CONFIG['GOOGLE_API_KEY'] = sdk_config.generation_api_key
- CONFIG['GEMINI_BASE_URL'] = sdk_config.generation_base_url or 'https://generativelanguage.googleapis.com/v1beta/openai/'
+ CONFIG['GEMINI_BASE_URL'] = sdk_config.generation_base_url or default_base_url(provider, protocol)
CONFIG['GEMINI_MODEL'] = sdk_config.generation_model or 'gemini-3.1-pro-preview'
+ else:
+ CONFIG['BIANXIE_API_KEY'] = sdk_config.generation_api_key
+ CONFIG['BIANXIE_BASE_URL'] = sdk_config.generation_base_url or default_base_url(provider, protocol) or ''
+ CONFIG['BIANXIE_CHAT_MODEL'] = sdk_config.generation_model or 'gemini-3.1-pro-preview'
# Pipeline settings
CONFIG['MAX_ITERATIONS'] = sdk_config.max_iterations
@@ -92,7 +107,8 @@ def update_config_from_sdk(sdk_config) -> None:
def call_unified_llm(contents: List[Any], provider: Optional[str] = None,
api_key: Optional[str] = None, model: Optional[str] = None,
- base_url: Optional[str] = None) -> Optional[str]:
+ base_url: Optional[str] = None,
+ protocol: Optional[str] = None) -> Optional[str]:
"""
Unified LLM call interface - supports multiple providers (bianxie, openrouter, gemini)
@@ -111,22 +127,15 @@ def call_unified_llm(contents: List[Any], provider: Optional[str] = None,
"""
# Determine the actual provider from parameter or CONFIG
actual_provider = provider or CONFIG.get('LLM_PROVIDER', 'bianxie')
+ actual_protocol = normalize_protocol(actual_provider, protocol or CONFIG.get('LLM_PROTOCOL'))
# Determine base_url, api_key, model based on provider
if actual_provider == 'gemini':
- # Gemini uses its own OpenAI-compatible endpoint
actual_api_key = api_key or CONFIG.get('GOOGLE_API_KEY')
actual_model = model or CONFIG.get('GEMINI_MODEL') or 'gemini-3.1-pro-preview'
- # Gemini OpenAI-compatible endpoint: https://generativelanguage.googleapis.com/v1beta/openai/
actual_base_url = base_url or CONFIG.get('GEMINI_BASE_URL')
if not actual_base_url:
- actual_base_url = 'https://generativelanguage.googleapis.com/v1beta/openai/'
- # Ensure the URL ends with /openai/ for OpenAI-compatible calls
- if actual_base_url and not actual_base_url.endswith('/openai/') and not actual_base_url.endswith('/openai'):
- if actual_base_url.endswith('/'):
- actual_base_url = actual_base_url + 'openai/'
- else:
- actual_base_url = actual_base_url + '/openai/'
+ actual_base_url = default_base_url(actual_provider, actual_protocol)
elif actual_provider == 'openrouter':
actual_base_url = base_url or CONFIG.get('OPENROUTER_BASE_URL') or CONFIG.get('BIANXIE_BASE_URL')
actual_api_key = api_key or CONFIG.get('OPENROUTER_API_KEY') or CONFIG.get('BIANXIE_API_KEY')
@@ -140,14 +149,19 @@ def call_unified_llm(contents: List[Any], provider: Optional[str] = None,
actual_model = model or CONFIG.get('BIANXIE_CHAT_MODEL')
# Default Bianxie URL if nothing is set
if not actual_base_url:
- actual_base_url = 'https://api.bianxie.ai/v1'
+ actual_base_url = default_base_url(actual_provider, actual_protocol) or 'https://api.bianxie.ai/v1'
# Debug logging
print(f"[svg_figure_generator.call_unified_llm] provider: {actual_provider}")
+ print(f"[svg_figure_generator.call_unified_llm] protocol: {actual_protocol}")
print(f"[svg_figure_generator.call_unified_llm] base_url: {actual_base_url}")
print(f"[svg_figure_generator.call_unified_llm] model: {actual_model}")
print(f"[svg_figure_generator.call_unified_llm] api_key present: {bool(actual_api_key)}, suffix: {'...' + actual_api_key[-4:] if actual_api_key and len(actual_api_key) > 4 else 'N/A'}")
+ if actual_protocol == GEMINI_NATIVE:
+ return call_gemini_native_text(contents, actual_api_key, actual_model, actual_base_url)
+
+ actual_base_url = normalize_openai_base_url(actual_base_url)
return _call_openai_compatible(contents, actual_api_key, actual_model, actual_base_url)
diff --git a/autofigure/utils/api_protocol.py b/autofigure/utils/api_protocol.py
new file mode 100644
index 0000000..8419448
--- /dev/null
+++ b/autofigure/utils/api_protocol.py
@@ -0,0 +1,166 @@
+"""
+API endpoint and protocol helpers.
+
+Provider names are UI presets or legacy compatibility labels. The protocol is
+what determines the request shape.
+"""
+
+import base64
+import io
+from typing import Any, Dict, List, Optional
+
+import requests
+from PIL import Image
+
+
+OPENAI_COMPATIBLE = "openai-compatible"
+GEMINI_NATIVE = "gemini-native"
+
+
+def normalize_protocol(provider: Optional[str] = None, protocol: Optional[str] = None) -> str:
+ """Return the effective API protocol for a provider/config pair."""
+ if protocol:
+ value = protocol.strip().lower().replace("_", "-")
+ if value in {"openai", "openai-compatible", "openai-compatible-chat"}:
+ return OPENAI_COMPATIBLE
+ if value in {"gemini", "gemini-native", "google-gemini"}:
+ return GEMINI_NATIVE
+
+ provider_value = (provider or "").strip().lower().replace("_", "-")
+ if provider_value in {"gemini", "google-gemini"}:
+ return GEMINI_NATIVE
+ return OPENAI_COMPATIBLE
+
+
+def default_base_url(provider: Optional[str], protocol: Optional[str] = None) -> str:
+ """Default base URL for presets. User-supplied base_url should override this."""
+ provider_value = (provider or "").strip().lower().replace("_", "-")
+ effective_protocol = normalize_protocol(provider_value, protocol)
+
+ if provider_value == "openrouter":
+ return "https://openrouter.ai/api/v1"
+ if provider_value == "bianxie":
+ return "https://api.bianxie.ai/v1"
+ if effective_protocol == GEMINI_NATIVE:
+ return "https://generativelanguage.googleapis.com/v1beta"
+ if provider_value in {"gemini", "google-gemini"}:
+ return "https://generativelanguage.googleapis.com/v1beta/openai/"
+ return ""
+
+
+def normalize_openai_base_url(base_url: str) -> str:
+ """Normalize an OpenAI-compatible base URL for SDK clients."""
+ value = (base_url or "").strip().rstrip("/")
+ for suffix in ("/chat/completions", "/completions"):
+ if value.endswith(suffix):
+ value = value[: -len(suffix)]
+ break
+ return value
+
+
+def chat_completions_url(base_url: str) -> str:
+ """Return a direct /chat/completions URL for requests-based calls."""
+ value = normalize_openai_base_url(base_url)
+ if not value:
+ return value
+ return f"{value}/chat/completions"
+
+
+def normalize_gemini_base_url(base_url: str) -> str:
+ """Normalize a Gemini native base URL before adding /models/...:generateContent."""
+ value = (base_url or "").strip().split("?", 1)[0].rstrip("/")
+
+ model_marker = "/models/"
+ if model_marker in value:
+ value = value.split(model_marker, 1)[0].rstrip("/")
+
+ for suffix in ("/chat/completions", "/completions", "/v1/chat", "/openai"):
+ if value.endswith(suffix):
+ value = value[: -len(suffix)].rstrip("/")
+
+ if value.endswith("/models"):
+ value = value[: -len("/models")].rstrip("/")
+
+ if "generativelanguage.googleapis.com" in value and "/v1beta" not in value:
+ value = f"{value}/v1beta"
+ elif value.endswith("/gemini") and "/v1beta" not in value:
+ value = f"{value}/v1beta"
+
+ return value
+
+
+def build_gemini_parts(contents: List[Any]) -> List[Dict[str, Any]]:
+ """Convert text/PIL content parts into Gemini native request parts."""
+ parts: List[Dict[str, Any]] = []
+ for part in contents:
+ if isinstance(part, str):
+ parts.append({"text": part})
+ elif isinstance(part, Image.Image):
+ buf = io.BytesIO()
+ part.save(buf, format="PNG")
+ image_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+ parts.append({
+ "inlineData": {
+ "mimeType": "image/png",
+ "data": image_b64,
+ }
+ })
+ else:
+ print(f"[APIProtocol] Skipping unsupported content type: {type(part)}")
+ return parts
+
+
+def call_gemini_native_text(
+ contents: List[Any],
+ api_key: str,
+ model: str,
+ base_url: str,
+ system_prompt: Optional[str] = None,
+ temperature: float = 0.7,
+ max_tokens: Optional[int] = None,
+) -> Optional[str]:
+ """Call Gemini native generateContent and return text from the first candidate."""
+ if not api_key:
+ print("[GeminiNative] ERROR: API key not provided!")
+ return None
+ if not model:
+ print("[GeminiNative] ERROR: Model not specified!")
+ return None
+ if not base_url:
+ print("[GeminiNative] ERROR: Base URL not specified!")
+ return None
+
+ api_url = f"{normalize_gemini_base_url(base_url)}/models/{model}:generateContent?key={api_key}"
+ print(f"[GeminiNative] API URL: {api_url.split('?key=', 1)[0]}?key=***")
+ payload: Dict[str, Any] = {
+ "contents": [{"role": "user", "parts": build_gemini_parts(contents)}],
+ "generationConfig": {"temperature": temperature},
+ }
+ if system_prompt:
+ payload["systemInstruction"] = {"parts": [{"text": system_prompt}]}
+ if max_tokens:
+ payload["generationConfig"]["maxOutputTokens"] = max_tokens
+
+ response = requests.post(
+ api_url,
+ headers={"Content-Type": "application/json"},
+ json=payload,
+ timeout=300,
+ )
+ if response.status_code != 200:
+ raise Exception(f"Gemini native API request failed: {response.status_code} - {response.text[:500]}")
+
+ result = response.json()
+ if "error" in result:
+ error_msg = result.get("error", {})
+ if isinstance(error_msg, dict):
+ error_msg = error_msg.get("message", str(error_msg))
+ raise Exception(f"Gemini native API error: {error_msg}")
+
+ text_parts: List[str] = []
+ for candidate in result.get("candidates", []):
+ for part in candidate.get("content", {}).get("parts", []):
+ if "text" in part:
+ text_parts.append(part["text"])
+
+ return "\n".join(text_parts).strip() or None
diff --git a/autofigure/utils/llm_client.py b/autofigure/utils/llm_client.py
index 1fc6c15..762c9c3 100644
--- a/autofigure/utils/llm_client.py
+++ b/autofigure/utils/llm_client.py
@@ -11,6 +11,15 @@
from PIL import Image
+from .api_protocol import (
+ GEMINI_NATIVE,
+ OPENAI_COMPATIBLE,
+ call_gemini_native_text,
+ default_base_url,
+ normalize_openai_base_url,
+ normalize_protocol,
+)
+
class LLMClient:
"""
@@ -29,6 +38,7 @@ def __init__(
base_url: str = "https://openrouter.ai/api/v1",
model: str = "google/gemini-3.1-pro-preview",
provider: str = "openrouter",
+ protocol: Optional[str] = None,
):
"""
Initialize LLM client.
@@ -37,20 +47,17 @@ def __init__(
api_key: API key for the provider
base_url: Base URL for the API endpoint
model: Model name to use
- provider: Provider name (openrouter, bianxie, gemini)
+ provider: Provider name/preset (openrouter, bianxie, gemini, custom)
+ protocol: API protocol (openai-compatible, gemini-native)
"""
self.api_key = api_key
- self.base_url = base_url
+ self.protocol = normalize_protocol(provider, protocol)
+ self.base_url = base_url or default_base_url(provider, self.protocol)
self.model = model
self.provider = provider
- # Adjust base_url for Gemini
- if provider == "gemini" and base_url:
- if not base_url.endswith("/openai/") and not base_url.endswith("/openai"):
- if base_url.endswith("/"):
- self.base_url = base_url + "openai/"
- else:
- self.base_url = base_url + "/openai/"
+ if self.protocol == OPENAI_COMPATIBLE:
+ self.base_url = normalize_openai_base_url(self.base_url)
def call(
self,
@@ -70,12 +77,22 @@ def call(
Response text, or None on failure
"""
try:
- from openai import OpenAI
-
if not self.api_key:
print("[LLMClient] ERROR: API key not provided!")
return None
+ if self.protocol == GEMINI_NATIVE:
+ return call_gemini_native_text(
+ contents=contents,
+ api_key=self.api_key,
+ model=self.model,
+ base_url=self.base_url,
+ temperature=temperature,
+ max_tokens=max_tokens,
+ )
+
+ from openai import OpenAI
+
client = OpenAI(base_url=self.base_url, api_key=self.api_key)
# Build message content
@@ -134,12 +151,23 @@ def call_with_system(
Response text, or None on failure
"""
try:
- from openai import OpenAI
-
if not self.api_key:
print("[LLMClient] ERROR: API key not provided!")
return None
+ if self.protocol == GEMINI_NATIVE:
+ return call_gemini_native_text(
+ contents=user_contents,
+ api_key=self.api_key,
+ model=self.model,
+ base_url=self.base_url,
+ system_prompt=system_prompt,
+ temperature=temperature,
+ max_tokens=max_tokens,
+ )
+
+ from openai import OpenAI
+
client = OpenAI(base_url=self.base_url, api_key=self.api_key)
# Build user message content
@@ -197,6 +225,7 @@ def create_client_from_config(config: "Config", purpose: str = "generation") ->
base_url=config.methodology_base_url,
model=config.methodology_model,
provider=config.methodology_provider,
+ protocol=config.methodology_protocol,
)
else:
return LLMClient(
@@ -204,4 +233,5 @@ def create_client_from_config(config: "Config", purpose: str = "generation") ->
base_url=config.generation_base_url,
model=config.generation_model,
provider=config.generation_provider,
+ protocol=config.generation_protocol,
)
diff --git a/backend/autofigure_routes.py b/backend/autofigure_routes.py
index 00e33a2..b74a329 100644
--- a/backend/autofigure_routes.py
+++ b/backend/autofigure_routes.py
@@ -34,6 +34,13 @@
)
from autofigure.config import Config
from autofigure.utils.llm_client import LLMClient
+ from autofigure.utils.api_protocol import (
+ GEMINI_NATIVE,
+ chat_completions_url,
+ default_base_url,
+ normalize_gemini_base_url,
+ normalize_protocol,
+ )
from autofigure.enhancer import ImageEnhancer, convert_code_to_text2image_prompt
AUTOFIGURE_AVAILABLE = True
ENHANCEMENT_AVAILABLE = True
@@ -55,9 +62,11 @@ def extract_methodology(markdown_content: str, config: Dict[str, Any]) -> Option
try:
# Support both camelCase (from frontend) and snake_case formats
provider = config.get('methodologyLlmProvider') or config.get('methodology_llm_provider', 'bianxie')
+ protocol = config.get('methodologyLlmProtocol') or config.get('methodology_llm_protocol')
api_key = config.get('methodologyLlmApiKey') or config.get('methodology_llm_api_key', '')
model = config.get('methodologyLlmModel') or config.get('methodology_llm_model', 'gemini-3.1-pro-preview')
base_url = config.get('methodologyLlmBaseUrl') or config.get('methodology_llm_base_url', '')
+ protocol = normalize_protocol(provider, protocol)
if not api_key:
print("[AutoFigure] No methodology LLM API key provided, skipping extraction")
@@ -99,19 +108,15 @@ def extract_methodology(markdown_content: str, config: Dict[str, Any]) -> Option
# Set default base_url based on provider if not specified
if not base_url:
- if provider == 'openrouter':
- base_url = 'https://openrouter.ai/api/v1'
- elif provider == 'gemini':
- base_url = 'https://generativelanguage.googleapis.com/v1beta'
- else:
- base_url = 'https://api.bianxie.ai/v1'
+ base_url = default_base_url(provider, protocol)
# Create LLMClient with the configuration
client = LLMClient(
api_key=api_key,
base_url=base_url,
model=model,
- provider=provider
+ provider=provider,
+ protocol=protocol,
)
response = client.call([prompt])
@@ -239,6 +244,7 @@ def reset_autofigure_config():
# Reset LLM provider
AUTOFIGURE_CONFIG['LLM_PROVIDER'] = 'bianxie'
+ AUTOFIGURE_CONFIG['LLM_PROTOCOL'] = 'openai-compatible'
print("[AutoFigure] AUTOFIGURE_CONFIG reset to default values")
@@ -296,6 +302,7 @@ def create_session():
'min_improvement': config.get('minImprovement', 0.2),
'human_in_loop': config.get('humanInLoop', True),
'llm_provider': config.get('llmProvider', 'claude'),
+ 'llm_protocol': config.get('llmProtocol'),
'api_key': config.get('apiKey', ''),
'base_url': config.get('baseUrl', ''),
'model': config.get('model', ''),
@@ -304,6 +311,7 @@ def create_session():
# Methodology extraction configuration
'enable_methodology_extraction': config.get('enableMethodologyExtraction', True),
'methodology_llm_provider': config.get('methodologyLlmProvider', 'bianxie'),
+ 'methodology_llm_protocol': config.get('methodologyLlmProtocol'),
'methodology_llm_api_key': config.get('methodologyLlmApiKey', ''),
'methodology_llm_base_url': config.get('methodologyLlmBaseUrl', ''),
'methodology_llm_model': config.get('methodologyLlmModel', 'gemini-3.1-pro-preview'),
@@ -370,6 +378,10 @@ def start_generation(session_id: str):
}), 400
AUTOFIGURE_CONFIG['LLM_PROVIDER'] = config['llm_provider']
+ AUTOFIGURE_CONFIG['LLM_PROTOCOL'] = normalize_protocol(
+ config.get('llm_provider'),
+ config.get('llm_protocol'),
+ )
AUTOFIGURE_CONFIG['MAX_ITERATIONS'] = config['max_iterations']
AUTOFIGURE_CONFIG['QUALITY_THRESHOLD'] = config['quality_threshold']
AUTOFIGURE_CONFIG['MIN_IMPROVEMENT'] = config['min_improvement']
@@ -382,11 +394,10 @@ def start_generation(session_id: str):
if config['api_key']:
if config['llm_provider'] == 'gemini':
AUTOFIGURE_CONFIG['GOOGLE_API_KEY'] = config['api_key']
- # Set base URL for Gemini (OpenAI-compatible endpoint)
- if config.get('base_url'):
- AUTOFIGURE_CONFIG['GEMINI_BASE_URL'] = config['base_url']
- else:
- AUTOFIGURE_CONFIG['GEMINI_BASE_URL'] = 'https://generativelanguage.googleapis.com/v1beta'
+ AUTOFIGURE_CONFIG['GEMINI_BASE_URL'] = (
+ config.get('base_url') or
+ default_base_url('gemini', AUTOFIGURE_CONFIG['LLM_PROTOCOL'])
+ )
elif config['llm_provider'] == 'claude':
AUTOFIGURE_CONFIG['CLAUDE_API_KEY'] = config['api_key']
if config['base_url']:
@@ -408,6 +419,11 @@ def start_generation(session_id: str):
AUTOFIGURE_CONFIG['BIANXIE_BASE_URL'] = config['base_url']
elif config['llm_provider'] == 'aigcbest':
AUTOFIGURE_CONFIG['AIGCBEST_API_KEY'] = config['api_key']
+ else:
+ # Custom/OpenAI-compatible providers use the BianXie-compatible slot
+ # because generator.call_unified_llm resolves unknown providers there.
+ AUTOFIGURE_CONFIG['BIANXIE_API_KEY'] = config['api_key']
+ AUTOFIGURE_CONFIG['BIANXIE_BASE_URL'] = config.get('base_url') or ''
if config['model']:
if config['llm_provider'] == 'gemini':
@@ -422,6 +438,8 @@ def start_generation(session_id: str):
AUTOFIGURE_CONFIG['BIANXIE_CHAT_MODEL'] = config['model']
elif config['llm_provider'] == 'aigcbest':
AUTOFIGURE_CONFIG['AIGCBEST_CHAT_MODEL'] = config['model']
+ else:
+ AUTOFIGURE_CONFIG['BIANXIE_CHAT_MODEL'] = config['model']
# Get input content and apply methodology extraction if content type is 'paper'
input_content = session['input_content']
@@ -464,7 +482,7 @@ def start_generation(session_id: str):
)
except Exception as gen_error:
error_msg = str(gen_error)
- print(f"[AutoFigure] Generation error: {error_msg}")
+ print(f"[AutoFigure] Generation error: {error_msg}", flush=True)
with get_session_lock(session_id):
session['status'] = 'error'
session['error'] = error_msg
@@ -495,7 +513,7 @@ def start_generation(session_id: str):
else:
print(f"[AutoFigure] PNG conversion failed: {error_msg}")
except Exception as png_error:
- print(f"[AutoFigure] PNG conversion error: {png_error}")
+ print(f"[AutoFigure] PNG conversion error: {png_error}", flush=True)
# Evaluate initial code (like continue_iteration does for subsequent iterations)
evaluation = None
@@ -513,7 +531,7 @@ def start_generation(session_id: str):
quality_score, evaluation = eval_result if eval_result else (0.0, None)
print(f"[AutoFigure] Initial evaluation complete, score: {quality_score}")
except Exception as eval_error:
- print(f"[AutoFigure] Initial evaluation error: {eval_error}")
+ print(f"[AutoFigure] Initial evaluation error: {eval_error}", flush=True)
# Clean up
if current_png:
@@ -856,6 +874,7 @@ def start_enhancement(session_id: str):
# User-provided LLM config for code2prompt (required for code2prompt mode)
enhancement_llm_config = {
'provider': data.get('enhancement_llm_provider', 'bianxie'),
+ 'protocol': data.get('enhancement_llm_protocol'),
'api_key': data.get('enhancement_llm_api_key', ''),
'base_url': data.get('enhancement_llm_base_url', ''),
'model': data.get('enhancement_llm_model', 'gemini-3.1-pro-preview'),
@@ -864,6 +883,7 @@ def start_enhancement(session_id: str):
# User-provided image generation config (required)
image_gen_config = {
'provider': data.get('image_gen_provider', 'bianxie'),
+ 'protocol': data.get('image_gen_protocol'),
'api_key': data.get('image_gen_api_key', ''),
'base_url': data.get('image_gen_base_url', ''),
'model': data.get('image_gen_model', ''),
@@ -1082,33 +1102,25 @@ def _run_enhancement(session_id: str, final_xml: str, mode: str, art_style: str,
# Extract LLM config for code2prompt
llm_provider = enhancement_llm_config.get('provider', 'bianxie')
+ llm_protocol = normalize_protocol(llm_provider, enhancement_llm_config.get('protocol'))
llm_api_key = enhancement_llm_config.get('api_key', '')
llm_base_url = enhancement_llm_config.get('base_url', '')
llm_model = enhancement_llm_config.get('model', 'gemini-3.1-pro-preview')
# Set default base URL for LLM if not specified
if not llm_base_url:
- if llm_provider == 'openrouter':
- llm_base_url = 'https://openrouter.ai/api/v1'
- elif llm_provider == 'gemini':
- llm_base_url = 'https://generativelanguage.googleapis.com/v1beta'
- else:
- llm_base_url = 'https://api.bianxie.ai/v1'
+ llm_base_url = default_base_url(llm_provider, llm_protocol)
# Extract image generation config
img_provider = image_gen_config.get('provider', 'bianxie')
+ img_protocol = normalize_protocol(img_provider, image_gen_config.get('protocol'))
img_api_key = image_gen_config.get('api_key', '')
img_base_url = image_gen_config.get('base_url', '')
img_model = image_gen_config.get('model', '')
# Set default base URL for image gen if not specified
if not img_base_url:
- if img_provider == 'openrouter':
- img_base_url = 'https://openrouter.ai/api/v1'
- elif img_provider == 'gemini':
- img_base_url = 'https://generativelanguage.googleapis.com/v1beta'
- else:
- img_base_url = 'https://api.bianxie.ai/v1'
+ img_base_url = default_base_url(img_provider, img_protocol)
# Step 1: Convert final XML to PNG (layout image for enhancement)
layout_png_path = None
@@ -1160,7 +1172,8 @@ def _run_enhancement(session_id: str, final_xml: str, mode: str, art_style: str,
api_key=llm_api_key,
base_url=llm_base_url,
model=llm_model,
- provider=llm_provider
+ provider=llm_provider,
+ protocol=llm_protocol,
)
if text2image_prompt:
@@ -1192,6 +1205,7 @@ def _run_enhancement(session_id: str, final_xml: str, mode: str, art_style: str,
enhancement_base_url=img_base_url,
enhancement_model=img_model,
enhancement_provider=img_provider,
+ enhancement_protocol=img_protocol,
art_style=art_style,
)
enhancer = ImageEnhancer(enhancement_config)
@@ -1298,6 +1312,7 @@ def generate_image():
prompt = data.get('prompt', '').strip()
provider = data.get('provider', 'bianxie').strip().lower()
+ protocol = normalize_protocol(provider, data.get('protocol'))
api_key = data.get('api_key', '').strip()
model = data.get('model', '').strip()
base_url = data.get('base_url', '').strip()
@@ -1313,7 +1328,7 @@ def generate_image():
return jsonify({'error': 'Base URL is required'}), 400
_log(f"[AutoFigure] Generating image with prompt: {prompt[:100]}...")
- _log(f"[AutoFigure] Using provider: {provider}, model: {model}, base_url: {base_url}")
+ _log(f"[AutoFigure] Using provider: {provider}, protocol: {protocol}, model: {model}, base_url: {base_url}")
# Build prompt for icon/image generation
full_prompt = f"""Generate a high-quality image based on the following description.
@@ -1325,16 +1340,16 @@ def generate_image():
Generate the image now."""
# Route to provider-specific handling
- if provider == 'openrouter':
- image_base64, image_format = _generate_image_openrouter(
+ if protocol == GEMINI_NATIVE:
+ image_base64, image_format = _generate_image_gemini(
full_prompt, api_key, model, base_url
)
- elif provider == 'gemini':
- image_base64, image_format = _generate_image_gemini(
+ elif provider == 'openrouter':
+ image_base64, image_format = _generate_image_openrouter(
full_prompt, api_key, model, base_url
)
else:
- # Default to BianXie
+ # Default/custom OpenAI-compatible path
image_base64, image_format = _generate_image_bianxie(
full_prompt, api_key, model, base_url
)
@@ -1385,6 +1400,8 @@ def _generate_image_bianxie(prompt: str, api_key: str, model: str, base_url: str
import re
import requests
+ base_url = chat_completions_url(base_url)
+
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}'
@@ -1483,13 +1500,7 @@ def _generate_image_openrouter(prompt: str, api_key: str, model: str, base_url:
import requests
import re
- # Ensure base_url ends with /chat/completions for direct requests
- # (OpenAI SDK auto-appends this, but we use requests directly)
- if not base_url.endswith('/chat/completions'):
- if base_url.endswith('/'):
- base_url = base_url + 'chat/completions'
- else:
- base_url = base_url + '/chat/completions'
+ base_url = chat_completions_url(base_url)
headers = {
'Content-Type': 'application/json',
@@ -1647,18 +1658,7 @@ def _generate_image_gemini(prompt: str, api_key: str, model: str, base_url: str)
# Construct Gemini API URL
# Format: {base_url}/models/{model}:generateContent?key={api_key}
- # Clean up base_url - remove any OpenAI-compatible suffixes that may have been added
- base_url = base_url.rstrip('/')
- for suffix in ['/chat/completions', '/completions', '/v1/chat', '/openai']:
- if base_url.endswith(suffix):
- base_url = base_url[:-len(suffix)]
-
- # Ensure base_url ends with /v1beta for Gemini
- if not base_url.endswith('/v1beta') and 'generativelanguage.googleapis.com' in base_url:
- if '/v1beta' not in base_url:
- base_url = base_url.rstrip('/') + '/v1beta'
-
- api_url = f"{base_url}/models/{model}:generateContent?key={api_key}"
+ api_url = f"{normalize_gemini_base_url(base_url)}/models/{model}:generateContent?key={api_key}"
headers = {
'Content-Type': 'application/json'
diff --git a/frontend/components/autofigure/BeautificationDialog.tsx b/frontend/components/autofigure/BeautificationDialog.tsx
index fb47dfc..48f9509 100644
--- a/frontend/components/autofigure/BeautificationDialog.tsx
+++ b/frontend/components/autofigure/BeautificationDialog.tsx
@@ -13,7 +13,7 @@ import {
Sparkles,
} from "lucide-react"
import { useAutoFigure } from "@/contexts/autofigure-context"
-import { type EnhancementMode, type LLMProvider, LLM_PROVIDERS } from "@/lib/autofigure-types"
+import { type ApiProtocol, type EnhancementMode, type LLMProvider, LLM_PROVIDERS } from "@/lib/autofigure-types"
interface BeautificationDialogProps {
isOpen: boolean
@@ -45,6 +45,10 @@ export default function BeautificationDialog({
const canStart = config.artStyle.trim() &&
config.imageGenApiKey.trim() &&
(selectedMode === 'none' || config.enhancementLlmApiKey.trim())
+ const protocolOptions: { value: ApiProtocol; label: string }[] = [
+ { value: "openai-compatible", label: "OpenAI Compatible" },
+ { value: "gemini-native", label: "Gemini Native" },
+ ]
return (