Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions chandra/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ def generate(
)
bbox_scale = kwargs.pop("bbox_scale", settings.BBOX_SCALE)
vllm_api_base = kwargs.pop("vllm_api_base", settings.VLLM_API_BASE)
vllm_api_key = kwargs.pop("vllm_api_key", settings.VLLM_API_KEY)

if self.method == "vllm":
results = generate_vllm(
batch,
max_output_tokens=max_output_tokens,
bbox_scale=bbox_scale,
vllm_api_base=vllm_api_base,
vllm_api_key=vllm_api_key,
**kwargs,
)
else:
Expand Down
13 changes: 12 additions & 1 deletion chandra/model/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from concurrent.futures import ThreadPoolExecutor
from itertools import repeat
from typing import List
import logging

from PIL import Image
from openai import OpenAI
Expand All @@ -14,6 +15,9 @@
from chandra.settings import settings


logger = logging.getLogger(__name__)


def image_to_base64(image: Image.Image) -> str:
"""Convert PIL Image to base64 string."""
buffered = io.BytesIO()
Expand All @@ -30,11 +34,16 @@ def generate_vllm(
max_failure_retries: int | None = None,
bbox_scale: int = settings.BBOX_SCALE,
vllm_api_base: str = settings.VLLM_API_BASE,
vllm_api_key: str = settings.VLLM_API_KEY,
temperature: float = 0.0,
top_p: float = 0.1,
) -> List[GenerationResult]:
if not vllm_api_base.endswith("/v1"):
# this can fail with
# Exception: Unexpected endpoint or method. (POST /chat/completions)
logger.warning(f"vllm_api_base does not end with '/v1': {vllm_api_base!r}")
client = OpenAI(
api_key=settings.VLLM_API_KEY,
api_key=vllm_api_key,
base_url=vllm_api_base,
default_headers=custom_headers,
)
Expand Down Expand Up @@ -78,6 +87,8 @@ def _generate(item: BatchInputItem, temperature, top_p) -> GenerationResult:
temperature=temperature,
top_p=top_p,
)
if hasattr(completion, "error"):
raise Exception(completion.error)
raw = completion.choices[0].message.content
result = GenerationResult(
raw=raw,
Expand Down
20 changes: 20 additions & 0 deletions chandra/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from chandra.input import load_file
from chandra.model import InferenceManager
from chandra.model.schema import BatchInputItem
from chandra.settings import settings


def get_supported_files(input_path: Path) -> List[Path]:
Expand All @@ -20,6 +21,7 @@ def get_supported_files(input_path: Path) -> List[Path]:
".webp",
".tiff",
".bmp",
".avif",
}

if input_path.is_file():
Expand Down Expand Up @@ -159,6 +161,18 @@ def save_merged_output(
default=None,
help="Maximum number of retries for vLLM inference.",
)
@click.option(
"--vllm-api-base",
type=str,
default=settings.VLLM_API_BASE,
help=f"default: {settings.VLLM_API_BASE!r}",
)
@click.option(
"--vllm-api-key",
type=str,
default=settings.VLLM_API_KEY,
help=f"default: {settings.VLLM_API_KEY!r}",
)
@click.option(
"--include-images/--no-images",
default=True,
Expand Down Expand Up @@ -193,6 +207,8 @@ def main(
max_output_tokens: int,
max_workers: int,
max_retries: int,
vllm_api_base: str,
vllm_api_key: str,
include_images: bool,
include_headers_footers: bool,
save_html: bool,
Expand Down Expand Up @@ -273,6 +289,10 @@ def main(
generate_kwargs["max_workers"] = max_workers
if max_retries is not None:
generate_kwargs["max_retries"] = max_retries
if vllm_api_base is not None:
generate_kwargs["vllm_api_base"] = vllm_api_base
if vllm_api_key is not None:
generate_kwargs["vllm_api_key"] = vllm_api_key

results = model.generate(batch, **generate_kwargs)
all_results.extend(results)
Expand Down