Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 46 additions & 13 deletions app/api/v1/timepoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,12 @@ class GenerateRequest(BaseModel):
)
image_model: str | None = Field(
default=None,
description="Custom image model override (e.g., 'google/imagen-3'). Overrides preset.",
examples=["google/imagen-3", "black-forest-labs/flux-1.1-pro"],
description=(
"Image model ID. OpenRouter format ('org/model') or Google native "
"(gemini-2.5-flash-image, gemini-3-pro-image-preview)."
),
examples=["gemini-2.5-flash-image", "gemini-3-pro-image-preview",
"google/gemini-2.5-flash-image-preview"],
)
write_blob: bool = Field(
default=False,
Expand Down Expand Up @@ -210,12 +214,10 @@ class GenerateRequest(BaseModel):
)


# Default permissive models — used when model_policy="permissive" and no
# explicit text_model/image_model is provided. These must be open-weight
# models available on OpenRouter (or Pollinations for images).
# Default permissive text model — used when model_policy="permissive" and no
# explicit text_model is provided. Must be an open-weight model on OpenRouter.
# Image model is resolved at runtime via get_image_fallback_model().
_DEFAULT_PERMISSIVE_TEXT_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
_DEFAULT_PERMISSIVE_IMAGE_MODEL = "pollinations" # Free, open, always available


def _get_permissive_text_model() -> str:
"""Pick the best available permissive text model from the registry."""
Expand All @@ -226,15 +228,17 @@ def _get_permissive_text_model() -> str:
if registry.model_count == 0:
return _DEFAULT_PERMISSIVE_TEXT_MODEL

# Walk preference list; return first that's in the live registry
# Walk preference list; return first that's in the live registry.
# Prioritize fast non-thinking models — DeepSeek R1 is a thinking
# model that takes 30-60s per call and causes pipeline timeouts.
preference = [
"meta-llama/llama-4-scout-17b-16e-instruct",
"meta-llama/llama-4-maverick-17b-128e-instruct",
"deepseek/deepseek-r1-0528",
"deepseek/deepseek-chat-v3-0324",
"qwen/qwen3-235b-a22b",
"qwen/qwen3-30b-a3b",
"deepseek/deepseek-chat-v3-0324", # Fast chat model
"qwen/qwen3-30b-a3b", # Fast MoE model
"mistralai/mistral-small-3.2-24b-instruct",
"qwen/qwen3-235b-a22b", # Large but non-thinking
"deepseek/deepseek-r1-0528", # Thinking model — slow, last resort
]
for model_id in preference:
if registry.is_model_available(model_id):
Expand All @@ -254,18 +258,47 @@ def resolve_model_policy(
2. model_policy="permissive" → auto-select open-weight models
3. None → let preset / settings defaults handle it

When model_policy="permissive", explicit models are validated against
the PERMISSIVE_PREFIXES allowlist. Proprietary models (OpenAI, Anthropic,
Google Gemini) are rejected with 422.

Returns:
(text_model, image_model) to pass to the pipeline.

Raises:
HTTPException: 422 if explicit models violate permissive policy.
"""
from app.core.model_policy import is_model_permissive

text_model = request.text_model
image_model = request.image_model

if request.model_policy and request.model_policy.lower() == "permissive":
# Validate explicit models against permissive allowlist
if text_model and not is_model_permissive(text_model):
raise HTTPException(
status_code=422,
detail=(
f"model_policy='permissive' requires open-weight models. "
f"'{text_model}' is proprietary. Use models from: "
f"meta-llama/, deepseek/, qwen/, mistralai/, microsoft/, google/gemma, allenai/, nvidia/"
),
)
if image_model and not is_model_permissive(image_model):
raise HTTPException(
status_code=422,
detail=(
f"model_policy='permissive' requires open-weight models. "
f"'{image_model}' is proprietary."
),
)

if not text_model:
text_model = _get_permissive_text_model()
logger.info("model_policy=permissive → text_model=%s", text_model)
if not image_model:
image_model = _DEFAULT_PERMISSIVE_IMAGE_MODEL
from app.core.llm_router import get_image_fallback_model
image_model = get_image_fallback_model(permissive_only=True)
logger.info("model_policy=permissive → image_model=%s", image_model)

return text_model, image_model
Expand Down
164 changes: 51 additions & 113 deletions app/core/llm_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,11 @@
"""

import asyncio
import base64
import logging
import time
from collections.abc import AsyncIterator
from enum import Enum
from typing import Any, TypeVar
from urllib.parse import quote

import httpx
from pydantic import BaseModel

from app.config import (
Expand Down Expand Up @@ -85,24 +81,22 @@ def get_paid_fallback_model() -> str:
return _PAID_FALLBACK_DEFAULT


def get_image_fallback_model() -> str:
"""Get the best image fallback model, consulting the registry first."""
def get_image_fallback_model(permissive_only: bool = False) -> str:
"""Get the best image fallback model, consulting the registry first.

Args:
permissive_only: If True, only return open-weight image models.
"""
try:
from app.core.model_registry import OpenRouterModelRegistry
registry = OpenRouterModelRegistry.get_instance()
best = registry.get_best_image_model()
best = registry.get_best_image_model(permissive_only=permissive_only)
if best:
return best
except Exception:
pass
return _IMAGE_FALLBACK_DEFAULT

# Pollinations.ai - Ultimate free fallback for image generation
# No API key required, always available, decent quality
# NOTE: URL changed from image.pollinations.ai to gen.pollinations.ai in early 2026
POLLINATIONS_URL = "https://gen.pollinations.ai/image/{prompt}"
POLLINATIONS_TIMEOUT = 60.0 # Image generation can take time

# Rate limit retry settings
MAX_RETRIES = 5
INITIAL_BACKOFF = 2.0 # seconds
Expand Down Expand Up @@ -195,6 +189,7 @@ def __init__(
preset: QualityPreset | None = None,
text_model: str | None = None,
image_model: str | None = None,
model_policy: str | None = None,
) -> None:
"""Initialize LLM router.

Expand All @@ -203,12 +198,14 @@ def __init__(
preset: Quality preset (HD, HYPER, BALANCED). Overrides config models.
text_model: Custom text model override (overrides preset).
image_model: Custom image model override (overrides preset).
model_policy: Model policy (e.g. "permissive" blocks Google fallback).
"""
settings = get_settings()
self.preset = preset
self._preset_config = PRESET_CONFIGS.get(preset) if preset else None
self._custom_text_model = text_model
self._custom_image_model = image_model
self._model_policy = model_policy

# Build config from settings if not provided
if config is None:
Expand Down Expand Up @@ -624,7 +621,15 @@ async def call(
logger.warning(f"Paid model fallback also failed: {e2}")

# Try Google provider as ultimate fallback using verified model
if ProviderType.GOOGLE in self.providers and self.config.primary != ProviderType.GOOGLE:
# (blocked in permissive mode — must stay Google-free)
is_permissive = bool(
self._model_policy and self._model_policy.lower() == "permissive"
)
if (
ProviderType.GOOGLE in self.providers
and self.config.primary != ProviderType.GOOGLE
and not is_permissive
):
logger.info("Falling back to Google provider with verified model")
try:
provider = self._get_provider(ProviderType.GOOGLE)
Expand All @@ -635,6 +640,8 @@ async def call(
)
except ProviderError as e3:
logger.warning(f"Google provider fallback failed: {e3}")
elif is_permissive:
logger.info("Skipping Google fallback: model_policy=permissive")

# All fallbacks exhausted
raise ProviderError(
Expand Down Expand Up @@ -749,7 +756,15 @@ async def call_structured(
logger.warning(f"Paid model fallback also failed: {e2}")

# Try Google provider as ultimate fallback using verified model
if ProviderType.GOOGLE in self.providers and self.config.primary != ProviderType.GOOGLE:
# (blocked in permissive mode — must stay Google-free)
is_permissive = bool(
self._model_policy and self._model_policy.lower() == "permissive"
)
if (
ProviderType.GOOGLE in self.providers
and self.config.primary != ProviderType.GOOGLE
and not is_permissive
):
logger.info("Falling back to Google provider with verified model")
try:
provider = self._get_provider(ProviderType.GOOGLE)
Expand All @@ -761,6 +776,8 @@ async def call_structured(
)
except ProviderError as e3:
logger.warning(f"Google provider fallback failed: {e3}")
elif is_permissive:
logger.info("Skipping Google fallback: model_policy=permissive")

# All fallbacks exhausted
raise ProviderError(
Expand All @@ -786,73 +803,6 @@ async def call_structured(

raise

async def _generate_image_pollinations(
self,
prompt: str,
**kwargs: Any,
) -> LLMResponse[str]:
"""Generate image using Pollinations.ai (free, no API key required).

This is the ultimate fallback for image generation. Pollinations.ai
provides free image generation with no API key, no rate limits,
and decent quality using Stable Diffusion models.

Args:
prompt: The image generation prompt.
**kwargs: Additional parameters (currently unused).

Returns:
LLMResponse containing base64-encoded image.

Raises:
ProviderError: If the request fails.
"""
start_time = time.perf_counter()

# URL-encode the prompt for safe embedding in URL
encoded_prompt = quote(prompt, safe="")
url = POLLINATIONS_URL.format(prompt=encoded_prompt)

# Add parameters for better quality
# nologo=true removes watermark, width/height for resolution, model=flux for best quality
url += "?nologo=true&width=1024&height=1024&model=flux"

logger.info(f"Pollinations.ai fallback: generating image for prompt (first 50 chars): {prompt[:50]}...")

try:
async with httpx.AsyncClient(timeout=POLLINATIONS_TIMEOUT) as client:
response = await client.get(url)

if response.status_code != 200:
raise ProviderError(
message=f"Pollinations.ai returned status {response.status_code}",
provider=ProviderType.OPENROUTER, # Use OPENROUTER as proxy
status_code=response.status_code,
retryable=response.status_code >= 500,
)

# Response is raw image bytes (JPEG)
image_bytes = response.content
image_b64 = base64.b64encode(image_bytes).decode("utf-8")

latency_ms = int((time.perf_counter() - start_time) * 1000)
logger.info(f"Pollinations.ai image generated successfully in {latency_ms}ms")

return LLMResponse(
content=image_b64,
model="pollinations-ai",
provider=ProviderType.OPENROUTER, # Use OPENROUTER as proxy type
latency_ms=latency_ms,
)

except httpx.HTTPError as e:
logger.error(f"Pollinations.ai request failed: {e}")
raise ProviderError(
message=f"Pollinations.ai request failed: {e}",
provider=ProviderType.OPENROUTER,
retryable=True,
) from e

async def _generate_image_with_retry(
self,
provider: LLMProvider,
Expand Down Expand Up @@ -972,16 +922,15 @@ async def generate_image(
Raises:
ProviderError: If image generation fails after all retries and fallbacks.
"""
# Direct Pollinations path — when caller explicitly requests it
# (e.g. model_policy="permissive" sets image_model="pollinations")
image_model_id = self._get_model_for_capability(ModelCapability.IMAGE, self.config.primary)
if image_model_id and "pollinations" in image_model_id.lower():
logger.info("Image model is Pollinations — using direct Pollinations path")
return await self._generate_image_pollinations(prompt)

# Determine provider for image generation
# Prefer preset's image_provider, then Google native, then fallback
if self._preset_config and "image_provider" in self._preset_config:
# Prefer preset's image_provider, then Google native, then OpenRouter
is_permissive = bool(
self._model_policy and self._model_policy.lower() == "permissive"
)
if is_permissive and ProviderType.OPENROUTER in self.providers:
# Permissive mode: always use OpenRouter for images (Google-free)
image_provider = ProviderType.OPENROUTER
elif self._preset_config and "image_provider" in self._preset_config:
image_provider = self._preset_config["image_provider"]
elif ProviderType.GOOGLE in self.providers:
image_provider = ProviderType.GOOGLE
Expand Down Expand Up @@ -1010,20 +959,15 @@ async def generate_image(
should_fallback = (
image_provider != ProviderType.OPENROUTER
and ProviderType.OPENROUTER in self.providers
and not is_permissive # Already on OpenRouter in permissive mode
)

if not should_fallback:
# No OpenRouter fallback, but try Pollinations.ai as ultimate fallback
logger.info("No OpenRouter configured, falling back to Pollinations.ai")
try:
return await self._generate_image_pollinations(prompt)
except ProviderError as e2:
logger.error(f"Pollinations.ai fallback failed: {e2}")
raise ProviderError(
message=f"Image generation failed. Primary: {e}, Pollinations: {e2}",
provider=image_provider,
retryable=False,
) from e
raise ProviderError(
message=f"Image generation failed: {e}",
provider=image_provider,
retryable=False,
) from e

# Log appropriately based on error type
image_fallback = get_image_fallback_model()
Expand Down Expand Up @@ -1053,17 +997,11 @@ async def generate_image(
)
except (RateLimitError, ProviderError) as e2:
logger.warning(f"OpenRouter image fallback also failed: {e2}")
# Try Pollinations.ai as ultimate free fallback
logger.info("Falling back to Pollinations.ai (free, no API key required)")
try:
return await self._generate_image_pollinations(prompt)
except ProviderError as e3:
logger.error(f"Pollinations.ai fallback also failed: {e3}")
raise ProviderError(
message=f"All image providers failed. Primary: {e}, OpenRouter: {e2}, Pollinations: {e3}",
provider=image_provider,
retryable=False,
) from e
raise ProviderError(
message=f"All image providers failed. Primary: {e}, OpenRouter: {e2}",
provider=image_provider,
retryable=False,
) from e

async def analyze_image(
self,
Expand Down
Loading
Loading