From c53d0c7d95854b249e97e100bc936eed7185176b Mon Sep 17 00:00:00 2001 From: realityinspector Date: Wed, 11 Mar 2026 13:46:32 -0600 Subject: [PATCH 1/3] feat: add model_policy="permissive" for open-weight model routing Clockchain and other callers can now pass model_policy: "permissive" to force generation through open-weight, distillable models (Llama, DeepSeek, Qwen, etc.) instead of restricted frontier models (Gemini, Anthropic). Changes: - New app/core/model_policy.py: shared permissiveness/provider derivation - GenerateRequest accepts model_policy field (all 3 endpoints) - resolve_model_policy() auto-selects best permissive text + Pollinations image - LLM router shortcuts to Pollinations when image_model contains "pollinations" - Pipeline TDF payload derives model_permissiveness dynamically (was hardcoded) --- app/api/v1/timepoints.py | 131 +++++++++++++++++++++++++++------------ app/core/llm_router.py | 7 +++ app/core/model_policy.py | 54 ++++++++++++++++ app/core/pipeline.py | 3 +- 4 files changed, 154 insertions(+), 41 deletions(-) create mode 100644 app/core/model_policy.py diff --git a/app/api/v1/timepoints.py b/app/api/v1/timepoints.py index 65c2cd4..439f845 100644 --- a/app/api/v1/timepoints.py +++ b/app/api/v1/timepoints.py @@ -43,6 +43,12 @@ from app.auth.credits import CREDIT_COSTS, spend_credits from app.auth.dependencies import get_current_user, require_credits from app.config import QualityPreset, get_settings +from app.core.model_policy import ( + derive_model_permissiveness as _derive_model_permissiveness, +) +from app.core.model_policy import ( + derive_model_provider as _derive_model_provider, +) from app.core.pipeline import GenerationPipeline, PipelineStep from app.database import get_db_session from app.models import GenerationLog, Timepoint, TimepointStatus, TimepointVisibility @@ -58,40 +64,6 @@ # Model provenance helpers (Clockchain schema v0.2) # --------------------------------------------------------------------------- -_GOOGLE_MODEL_PREFIXES = ("gemini", "imagen", "flux-schnell") -_OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/") - - -def _derive_model_provider(model_id: str | None) -> str: - """Derive the routing provider from a model ID string.""" - if not model_id: - return "unknown" - lower = model_id.lower() - if any(lower.startswith(p) for p in _GOOGLE_MODEL_PREFIXES): - return "google" - if any(lower.startswith(p) for p in _OPENROUTER_PREFIXES): - return "openrouter" - if "pollinations" in lower: - return "pollinations" - # Flash defaults to Google for all generation - return "google" - - -def _derive_model_permissiveness(model_id: str | None) -> str: - """Derive distillation licensing permissiveness from a model ID. - - Flash uses frontier models (Google Gemini) for quality — these are - 'restricted' for distillation. Open-weight models routed through - OpenRouter (e.g. Llama) are 'permissive'. - """ - if not model_id: - return "unknown" - lower = model_id.lower() - if any(lower.startswith(p) for p in ("meta-llama/",)): - return "permissive" - # Google Gemini, Imagen, Anthropic, OpenAI, Mistral — all restricted - return "restricted" - # Request/Response Models @@ -145,12 +117,82 @@ class GenerateRequest(BaseModel): default=None, description="URL to POST results to when generation completes (async only)", ) + model_policy: str | None = Field( + default=None, + description=( + "Model licensing policy: 'permissive' selects only open-weight, " + "distillable models (e.g. Llama, DeepSeek, Qwen). Overrides preset " + "but not explicit text_model/image_model." + ), + examples=["permissive"], + ) request_context: dict[str, Any] | None = Field( default=None, description="Opaque context passed through to response (e.g. source, job_id, user_id)", ) +# Default permissive models — used when model_policy="permissive" and no +# explicit text_model/image_model is provided. These must be open-weight +# models available on OpenRouter (or Pollinations for images). +_DEFAULT_PERMISSIVE_TEXT_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct" +_DEFAULT_PERMISSIVE_IMAGE_MODEL = "pollinations" # Free, open, always available + + +def _get_permissive_text_model() -> str: + """Pick the best available permissive text model from the registry.""" + try: + from app.core.model_registry import OpenRouterModelRegistry + + registry = OpenRouterModelRegistry.get_instance() + if registry.model_count == 0: + return _DEFAULT_PERMISSIVE_TEXT_MODEL + + # Walk preference list; return first that's in the live registry + preference = [ + "meta-llama/llama-4-scout-17b-16e-instruct", + "meta-llama/llama-4-maverick-17b-128e-instruct", + "deepseek/deepseek-r1-0528", + "deepseek/deepseek-chat-v3-0324", + "qwen/qwen3-235b-a22b", + "qwen/qwen3-30b-a3b", + "mistralai/mistral-small-3.2-24b-instruct", + ] + for model_id in preference: + if registry.is_model_available(model_id): + return model_id + except Exception: + pass + return _DEFAULT_PERMISSIVE_TEXT_MODEL + + +def resolve_model_policy( + request: GenerateRequest, +) -> tuple[str | None, str | None]: + """Resolve model_policy into concrete text_model / image_model. + + Priority (highest first): + 1. Explicit text_model / image_model on the request (pass-through) + 2. model_policy="permissive" → auto-select open-weight models + 3. None → let preset / settings defaults handle it + + Returns: + (text_model, image_model) to pass to the pipeline. + """ + text_model = request.text_model + image_model = request.image_model + + if request.model_policy and request.model_policy.lower() == "permissive": + if not text_model: + text_model = _get_permissive_text_model() + logger.info("model_policy=permissive → text_model=%s", text_model) + if not image_model: + image_model = _DEFAULT_PERMISSIVE_IMAGE_MODEL + logger.info("model_policy=permissive → image_model=%s", image_model) + + return text_model, image_model + + class StreamEvent(BaseModel): """Server-Sent Event for streaming generation. @@ -769,6 +811,9 @@ async def generate_timepoint( await session.commit() await session.refresh(timepoint) + # Resolve model_policy → concrete models + text_model, image_model = resolve_model_policy(request) + # Start background generation background_tasks.add_task( run_generation_task, @@ -777,8 +822,8 @@ async def generate_timepoint( None, # session_factory not needed with get_session() generate_image=request.generate_image, preset=request.preset, - text_model=request.text_model, - image_model=request.image_model, + text_model=text_model, + image_model=image_model, callback_url=request.callback_url, request_context=request.request_context, ) @@ -834,11 +879,14 @@ async def generate_timepoint_sync( except ValueError: logger.warning(f"Invalid preset '{request.preset}', using default") + # Resolve model_policy → concrete models + text_model, image_model = resolve_model_policy(request) + # Run pipeline pipeline = GenerationPipeline( preset=preset, - text_model=request.text_model, - image_model=request.image_model, + text_model=text_model, + image_model=image_model, ) state = await pipeline.run(request.query, request.generate_image) @@ -1212,13 +1260,16 @@ async def generate_timepoint_stream( else: logger.info(f"Stream generate request: {request.query}") + # Resolve model_policy → concrete models + text_model, image_model = resolve_model_policy(request) + return StreamingResponse( stream_generation( request.query, request.generate_image, preset, - text_model=request.text_model, - image_model=request.image_model, + text_model=text_model, + image_model=image_model, ), media_type="text/event-stream", headers={ diff --git a/app/core/llm_router.py b/app/core/llm_router.py index b43a5cc..686ea33 100644 --- a/app/core/llm_router.py +++ b/app/core/llm_router.py @@ -972,6 +972,13 @@ async def generate_image( Raises: ProviderError: If image generation fails after all retries and fallbacks. """ + # Direct Pollinations path — when caller explicitly requests it + # (e.g. model_policy="permissive" sets image_model="pollinations") + image_model_id = self._get_model_for_capability(ModelCapability.IMAGE, self.config.primary) + if image_model_id and "pollinations" in image_model_id.lower(): + logger.info("Image model is Pollinations — using direct Pollinations path") + return await self._generate_image_pollinations(prompt) + # Determine provider for image generation # Prefer preset's image_provider, then Google native, then fallback if self._preset_config and "image_provider" in self._preset_config: diff --git a/app/core/model_policy.py b/app/core/model_policy.py new file mode 100644 index 0000000..0e853ef --- /dev/null +++ b/app/core/model_policy.py @@ -0,0 +1,54 @@ +"""Model policy helpers — permissiveness classification and default selection. + +Used by both the API layer (timepoints.py) and the pipeline (pipeline.py) +to keep provenance logic in one place. +""" + +# Prefixes for open-weight / distillable model families on OpenRouter +PERMISSIVE_PREFIXES = ( + "meta-llama/", + "deepseek/", + "qwen/", + "mistralai/", # Mistral open-weight models (Apache 2.0) + "microsoft/", # Phi family + "google/gemma", # Gemma open-weight + "allenai/", + "nvidia/", +) + +# Google-native model prefixes (always restricted) +GOOGLE_MODEL_PREFIXES = ("gemini", "imagen", "flux-schnell") + +# Prefixes routed through OpenRouter (may be restricted or permissive) +OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/") + + +def derive_model_provider(model_id: str | None) -> str: + """Derive the routing provider from a model ID string.""" + if not model_id: + return "unknown" + lower = model_id.lower() + if any(lower.startswith(p) for p in GOOGLE_MODEL_PREFIXES): + return "google" + if any(lower.startswith(p) for p in OPENROUTER_PREFIXES): + return "openrouter" + if "pollinations" in lower: + return "pollinations" + return "google" + + +def derive_model_permissiveness(model_id: str | None) -> str: + """Derive distillation licensing permissiveness from a model ID. + + Open-weight models (Llama, DeepSeek, Qwen, Mistral, Phi, Gemma) are + 'permissive' — safe for distillation and derivative works. + Frontier models (Google Gemini, Anthropic, OpenAI) are 'restricted'. + """ + if not model_id: + return "unknown" + lower = model_id.lower() + if any(lower.startswith(p) for p in PERMISSIVE_PREFIXES): + return "permissive" + if "pollinations" in lower: + return "permissive" # Pollinations uses open models + return "restricted" diff --git a/app/core/pipeline.py b/app/core/pipeline.py index fced402..621bdf0 100644 --- a/app/core/pipeline.py +++ b/app/core/pipeline.py @@ -1755,8 +1755,9 @@ def state_to_timepoint(self, state: PipelineState) -> Timepoint: payload["image_model_used"] = image_model # Model provenance (Clockchain schema v0.2) + from app.core.model_policy import derive_model_permissiveness payload["model_provider"] = self.router.config.primary.value if self.router.config else "unknown" - payload["model_permissiveness"] = "restricted" # Flash uses frontier models + payload["model_permissiveness"] = derive_model_permissiveness(text_model) # Store image generation warning in payload if applicable if state.image_generation_failed: From ecb663041211ae92a507882e0a61c366e4df7374 Mon Sep 17 00:00:00 2001 From: realityinspector Date: Wed, 11 Mar 2026 17:19:54 -0600 Subject: [PATCH 2/3] fix: skip Google grounding in permissive mode for fully Google-free generation When model_policy="permissive", the pipeline now skips the grounding step (which requires Google Search API) so that the entire generation is routed through OpenRouter + Pollinations with zero Google dependencies. --- app/api/v1/timepoints.py | 9 +++++++++ app/core/pipeline.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/app/api/v1/timepoints.py b/app/api/v1/timepoints.py index 439f845..357a06a 100644 --- a/app/api/v1/timepoints.py +++ b/app/api/v1/timepoints.py @@ -485,6 +485,7 @@ async def stream_generation( preset: QualityPreset | None = None, text_model: str | None = None, image_model: str | None = None, + model_policy: str | None = None, ) -> AsyncGenerator[str, None]: """Generate SSE events for pipeline progress with real-time streaming. @@ -497,6 +498,7 @@ async def stream_generation( preset: Quality preset (HD, HYPER, BALANCED) text_model: Custom text model override image_model: Custom image model override + model_policy: Model licensing policy (e.g. "permissive") Yields: SSE-formatted event strings @@ -526,6 +528,7 @@ def format_sse(event: StreamEvent) -> str: preset=preset, text_model=text_model, image_model=image_model, + model_policy=model_policy, ) state = None start_time = time.perf_counter() @@ -644,6 +647,7 @@ async def run_generation_task( image_model: str | None = None, callback_url: str | None = None, request_context: dict[str, Any] | None = None, + model_policy: str | None = None, ) -> None: """Background task to run generation pipeline. @@ -657,6 +661,7 @@ async def run_generation_task( image_model: Custom image model override callback_url: URL to POST results to on completion request_context: Opaque context to pass through to callback + model_policy: Model licensing policy (e.g. "permissive") """ from app.database import get_session @@ -677,6 +682,7 @@ async def run_generation_task( preset=parsed_preset, text_model=text_model, image_model=image_model, + model_policy=model_policy, ) state = await pipeline.run(query, generate_image) @@ -826,6 +832,7 @@ async def generate_timepoint( image_model=image_model, callback_url=request.callback_url, request_context=request.request_context, + model_policy=request.model_policy, ) return GenerateResponse( @@ -887,6 +894,7 @@ async def generate_timepoint_sync( preset=preset, text_model=text_model, image_model=image_model, + model_policy=request.model_policy, ) state = await pipeline.run(request.query, request.generate_image) @@ -1270,6 +1278,7 @@ async def generate_timepoint_stream( preset, text_model=text_model, image_model=image_model, + model_policy=request.model_policy, ), media_type="text/event-stream", headers={ diff --git a/app/core/pipeline.py b/app/core/pipeline.py index 621bdf0..c6ecb5d 100644 --- a/app/core/pipeline.py +++ b/app/core/pipeline.py @@ -271,6 +271,7 @@ def __init__( text_model: str | None = None, image_model: str | None = None, max_parallelism: int | None = None, + model_policy: str | None = None, ) -> None: """Initialize pipeline. @@ -280,12 +281,14 @@ def __init__( text_model: Custom text model override (overrides preset) image_model: Custom image model override (overrides preset) max_parallelism: Maximum parallel LLM calls (default from settings) + model_policy: Model licensing policy (e.g. "permissive" for Google-free) """ self._router = router self._preset = preset self._text_model = text_model self._image_model = image_model + self._model_policy = model_policy self._max_parallelism_override = max_parallelism self._max_parallelism: int | None = None # Set during execution planning self._semaphore: asyncio.Semaphore | None = None @@ -982,6 +985,19 @@ async def _step_grounding(self, state: PipelineState) -> PipelineState: ) return state + # Skip grounding in permissive mode (Google-free) + if self._model_policy and self._model_policy.lower() == "permissive": + logger.info("Skipping grounding: model_policy=permissive (Google-free mode)") + state.step_results.append( + StepResult( + step=step, + success=True, + data={"skipped": True, "reason": "model_policy=permissive"}, + latency_ms=0, + ) + ) + return state + # Build grounding input to check if grounding is needed grounding_input = GroundingInput( query=state.judge_result.cleaned_query or state.query, From 34b4096d31ac2c8c6b6f062f3c7f97ae6ed28bc7 Mon Sep 17 00:00:00 2001 From: realityinspector Date: Wed, 11 Mar 2026 18:12:53 -0600 Subject: [PATCH 3/3] feat: add llm_params for full downstream control of generation parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds LLMParams schema to GenerateRequest with fine-grained control over: - temperature, max_tokens, top_p, top_k (all providers) - frequency_penalty, presence_penalty, repetition_penalty (OpenRouter) - stop sequences (all providers) - thinking_level (Google Gemini) - system_prompt_prefix/suffix for prompt injection Request-level llm_params override agent defaults (e.g. temperature=0.3) but agents retain their defaults when params are unset. Works alongside model_policy and explicit text_model/image_model — all composable. Expands OpenRouter provider to accept full parameter set and Google provider to accept top_p, top_k, and stop sequences. --- app/agents/base.py | 19 ++++++- app/api/v1/timepoints.py | 97 ++++++++++++++++++++++++++++++-- app/core/pipeline.py | 17 +++++- app/core/providers/google.py | 6 ++ app/core/providers/openrouter.py | 10 ++-- 5 files changed, 137 insertions(+), 12 deletions(-) diff --git a/app/agents/base.py b/app/agents/base.py index 5944b9a..05743c7 100644 --- a/app/agents/base.py +++ b/app/agents/base.py @@ -108,15 +108,18 @@ def __init__( self, router: LLMRouter | None = None, name: str | None = None, + llm_params: dict[str, Any] | None = None, ) -> None: """Initialize agent. Args: router: LLM router (creates one if not provided) name: Agent name for logging (defaults to class name) + llm_params: Request-level LLM params that override agent defaults """ self.router = router or LLMRouter() self.name = name or self.__class__.__name__ + self._llm_params: dict[str, Any] = llm_params or {} @abstractmethod def get_system_prompt(self) -> str: @@ -161,6 +164,10 @@ async def _call_llm( Uses the router to call the LLM with structured output and handles errors gracefully. + Merging priority (highest first): + 1. self._llm_params (request-level overrides from caller) + 2. kwargs (agent-level defaults like temperature=0.3) + Args: input_data: Input data for prompt generation **kwargs: Additional parameters for the LLM call @@ -178,6 +185,16 @@ async def _call_llm( prompt = self.get_prompt(input_data) system = self.get_system_prompt() + # Apply request-level llm_params (overrides agent defaults) + merged = {**kwargs} # Start with agent defaults + for k, v in self._llm_params.items(): + if k == "system_prompt_prefix" and v: + system = v + "\n\n" + system + elif k == "system_prompt_suffix" and v: + system = system + "\n\n" + v + elif k not in ("system_prompt_prefix", "system_prompt_suffix") and v is not None: + merged[k] = v # Request params override agent defaults + logger.debug(f"{self.name}: calling LLM") response = await self.router.call_structured( @@ -185,7 +202,7 @@ async def _call_llm( response_model=self.response_model, capability=self.capability, system=system, - **kwargs, + **merged, ) latency = int((time.perf_counter() - start_time) * 1000) diff --git a/app/api/v1/timepoints.py b/app/api/v1/timepoints.py index 357a06a..424b7ca 100644 --- a/app/api/v1/timepoints.py +++ b/app/api/v1/timepoints.py @@ -68,15 +68,86 @@ # Request/Response Models +class LLMParams(BaseModel): + """LLM generation parameters for fine-grained control. + + All fields are optional — unset fields use agent/preset defaults. + These parameters flow through to the underlying provider (OpenRouter or Google). + """ + + temperature: float | None = Field( + default=None, + ge=0.0, + le=2.0, + description="Sampling temperature. Lower = more deterministic, higher = more creative. Agent defaults range 0.2-0.85.", + ) + max_tokens: int | None = Field( + default=None, + ge=1, + le=32768, + description="Maximum output tokens per agent call. Preset defaults: hyper=1024, balanced=2048, hd=8192.", + ) + top_p: float | None = Field( + default=None, + ge=0.0, + le=1.0, + description="Nucleus sampling: only consider tokens with cumulative probability <= top_p.", + ) + top_k: int | None = Field( + default=None, + ge=1, + description="Top-k sampling: only consider the k most likely tokens.", + ) + frequency_penalty: float | None = Field( + default=None, + ge=-2.0, + le=2.0, + description="Penalize tokens by their frequency in the output so far. OpenRouter only.", + ) + presence_penalty: float | None = Field( + default=None, + ge=-2.0, + le=2.0, + description="Penalize tokens that have appeared at all in the output. OpenRouter only.", + ) + repetition_penalty: float | None = Field( + default=None, + ge=0.0, + le=2.0, + description="Multiplicative penalty for repeated tokens. OpenRouter only.", + ) + stop: list[str] | None = Field( + default=None, + max_length=4, + description="Stop sequences — generation stops when any of these strings is produced.", + ) + thinking_level: str | None = Field( + default=None, + description="Reasoning depth for thinking models: 'none', 'low', 'medium', 'high'. Google Gemini only.", + examples=["medium", "high"], + ) + system_prompt_prefix: str | None = Field( + default=None, + max_length=2000, + description="Text prepended to every agent's system prompt. Use for tone/style injection.", + ) + system_prompt_suffix: str | None = Field( + default=None, + max_length=2000, + description="Text appended to every agent's system prompt. Use for constraints/instructions.", + ) + + class GenerateRequest(BaseModel): """Request to generate a timepoint. - Attributes: - query: The temporal query to generate - generate_image: Whether to generate the image - preset: Quality preset (hd, hyper, balanced) - text_model: Custom text model override (ignores preset) - image_model: Custom image model override (ignores preset) + Model selection priority (highest first): + 1. Explicit text_model/image_model — use exactly these models + 2. model_policy="permissive" — auto-select open-weight models + 3. preset (hd/hyper/balanced) — use preset's default models + 4. Settings defaults — server-configured defaults + + llm_params override generation hyperparameters across all pipeline agents. """ query: str = Field( @@ -126,6 +197,13 @@ class GenerateRequest(BaseModel): ), examples=["permissive"], ) + llm_params: LLMParams | None = Field( + default=None, + description=( + "Fine-grained LLM parameters applied to all pipeline agents. " + "Overrides preset and agent defaults. Unset fields keep defaults." + ), + ) request_context: dict[str, Any] | None = Field( default=None, description="Opaque context passed through to response (e.g. source, job_id, user_id)", @@ -486,6 +564,7 @@ async def stream_generation( text_model: str | None = None, image_model: str | None = None, model_policy: str | None = None, + llm_params: dict[str, Any] | None = None, ) -> AsyncGenerator[str, None]: """Generate SSE events for pipeline progress with real-time streaming. @@ -529,6 +608,7 @@ def format_sse(event: StreamEvent) -> str: text_model=text_model, image_model=image_model, model_policy=model_policy, + llm_params=llm_params, ) state = None start_time = time.perf_counter() @@ -648,6 +728,7 @@ async def run_generation_task( callback_url: str | None = None, request_context: dict[str, Any] | None = None, model_policy: str | None = None, + llm_params: dict[str, Any] | None = None, ) -> None: """Background task to run generation pipeline. @@ -683,6 +764,7 @@ async def run_generation_task( text_model=text_model, image_model=image_model, model_policy=model_policy, + llm_params=llm_params, ) state = await pipeline.run(query, generate_image) @@ -833,6 +915,7 @@ async def generate_timepoint( callback_url=request.callback_url, request_context=request.request_context, model_policy=request.model_policy, + llm_params=request.llm_params.model_dump(exclude_none=True) if request.llm_params else None, ) return GenerateResponse( @@ -895,6 +978,7 @@ async def generate_timepoint_sync( text_model=text_model, image_model=image_model, model_policy=request.model_policy, + llm_params=request.llm_params.model_dump(exclude_none=True) if request.llm_params else None, ) state = await pipeline.run(request.query, request.generate_image) @@ -1279,6 +1363,7 @@ async def generate_timepoint_stream( text_model=text_model, image_model=image_model, model_policy=request.model_policy, + llm_params=request.llm_params.model_dump(exclude_none=True) if request.llm_params else None, ), media_type="text/event-stream", headers={ diff --git a/app/core/pipeline.py b/app/core/pipeline.py index c6ecb5d..f2de686 100644 --- a/app/core/pipeline.py +++ b/app/core/pipeline.py @@ -272,6 +272,7 @@ def __init__( image_model: str | None = None, max_parallelism: int | None = None, model_policy: str | None = None, + llm_params: dict[str, Any] | None = None, ) -> None: """Initialize pipeline. @@ -282,6 +283,7 @@ def __init__( image_model: Custom image model override (overrides preset) max_parallelism: Maximum parallel LLM calls (default from settings) model_policy: Model licensing policy (e.g. "permissive" for Google-free) + llm_params: LLM hyperparameters that override agent/preset defaults """ self._router = router @@ -289,6 +291,7 @@ def __init__( self._text_model = text_model self._image_model = image_model self._model_policy = model_policy + self._llm_params: dict[str, Any] = llm_params or {} self._max_parallelism_override = max_parallelism self._max_parallelism: int | None = None # Set during execution planning self._semaphore: asyncio.Semaphore | None = None @@ -325,7 +328,7 @@ def router(self) -> LLMRouter: return self._router def _init_agents(self) -> None: - """Initialize all agents with the router.""" + """Initialize all agents with the router and optional llm_params.""" if self._agents_initialized: return @@ -345,6 +348,18 @@ def _init_agents(self) -> None: self._image_prompt_optimizer_agent = ImagePromptOptimizerAgent(router=router) self._image_gen_agent = ImageGenAgent(router=router) self._critique_agent = CritiqueAgent(router=router) + + # Inject request-level llm_params into all agents + if self._llm_params: + for agent in [ + self._judge_agent, self._timeline_agent, self._scene_agent, + self._characters_agent, self._char_id_agent, self._char_bio_agent, + self._moment_agent, self._dialog_agent, self._camera_agent, + self._graph_agent, self._image_prompt_agent, + self._image_prompt_optimizer_agent, self._critique_agent, + ]: + agent._llm_params = self._llm_params # noqa: SLF001 + self._agents_initialized = True def _plan_execution(self) -> None: diff --git a/app/core/providers/google.py b/app/core/providers/google.py index 6a3c0a3..808fbe1 100644 --- a/app/core/providers/google.py +++ b/app/core/providers/google.py @@ -221,6 +221,12 @@ async def call_text( config_params["temperature"] = kwargs["temperature"] if "max_tokens" in kwargs: config_params["max_output_tokens"] = kwargs["max_tokens"] + if "top_p" in kwargs: + config_params["top_p"] = kwargs["top_p"] + if "top_k" in kwargs: + config_params["top_k"] = kwargs["top_k"] + if "stop" in kwargs: + config_params["stop_sequences"] = kwargs["stop"] if "thinking_level" in kwargs: config_params["thinking_config"] = types.ThinkingConfig( thinking_budget=kwargs["thinking_level"] diff --git a/app/core/providers/openrouter.py b/app/core/providers/openrouter.py index 6fbcc28..c138067 100644 --- a/app/core/providers/openrouter.py +++ b/app/core/providers/openrouter.py @@ -248,10 +248,12 @@ async def call_text( "messages": messages, } - if "temperature" in kwargs: - payload["temperature"] = kwargs["temperature"] - if "max_tokens" in kwargs: - payload["max_tokens"] = kwargs["max_tokens"] + # Standard OpenRouter parameters + for param in ("temperature", "max_tokens", "top_p", "top_k", + "frequency_penalty", "presence_penalty", + "repetition_penalty", "stop"): + if param in kwargs: + payload[param] = kwargs[param] # Add response format for structured output if response_model is not None: