timepointai · seanfromthepast · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
@@ -100,7 +100,7 @@ class ImagePromptOptimizerOutput(BaseModel):
         description="Elements removed to reduce complexity"
     )
 
-    issues_found: list[PromptIssue] = Field(
+    issues_found: list[PromptIssue | str] = Field(
         default_factory=list,
         description="Quality issues detected and addressed"
     )
@@ -282,7 +282,7 @@ async def run(
             # Log significant issues
             critical_issues = [
                 i for i in result.content.issues_found
-                if i.severity == "critical"
+                if isinstance(i, PromptIssue) and i.severity == "critical"
             ]
             if critical_issues:
                 logger.warning(

@@ -1063,6 +1063,8 @@ async def generate_timepoint_sync(
         resp.request_context = request.request_context
         return resp
 
+    except HTTPException:
+        raise  # Let validation errors (e.g. 422 from model policy) pass through
     except Exception as e:
         logger.error(f"Sync generation failed: {e}")
         raise HTTPException(status_code=500, detail=str(e)) from e

@@ -66,6 +66,8 @@
 # Static fallback defaults (used when model registry has no data)
 _PAID_FALLBACK_DEFAULT = VerifiedModels.OPENROUTER_TEXT[0]  # google/gemini-2.0-flash-001
 _IMAGE_FALLBACK_DEFAULT = "google/gemini-2.5-flash-image-preview"
+# FLUX for permissive mode — fully open-weight image generation via OpenRouter
+_IMAGE_FALLBACK_PERMISSIVE = "black-forest-labs/flux.2-pro"
 
 
 def get_paid_fallback_model() -> str:
@@ -95,6 +97,9 @@ def get_image_fallback_model(permissive_only: bool = False) -> str:
             return best
     except Exception:
         pass
+    # No open-weight image models exist on OpenRouter — use Google via OpenRouter
+    if permissive_only:
+        return _IMAGE_FALLBACK_PERMISSIVE
     return _IMAGE_FALLBACK_DEFAULT
 
 # Rate limit retry settings

@@ -9,18 +9,19 @@
     "meta-llama/",
     "deepseek/",
     "qwen/",
-    "mistralai/",       # Mistral open-weight models (Apache 2.0)
-    "microsoft/",       # Phi family
-    "google/gemma",     # Gemma open-weight
+    "mistralai/",           # Mistral open-weight models (Apache 2.0)
+    "microsoft/",           # Phi family
+    "google/gemma",         # Gemma open-weight
     "allenai/",
     "nvidia/",
+    "black-forest-labs/",   # FLUX open-weight image models
 )
 
 # Google-native model prefixes (always restricted)
 GOOGLE_MODEL_PREFIXES = ("gemini", "imagen", "flux-schnell")
 
 # Prefixes routed through OpenRouter (may be restricted or permissive)
-OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/")
+OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/", "black-forest-labs/")
 
 
 def derive_model_provider(model_id: str | None) -> str:

@@ -391,7 +391,8 @@ async def generate_image(
                     "content": f"Generate an image: {prompt}",
                 }
             ],
-            "modalities": ["image", "text"],  # Request image output
+            # Image-only models (FLUX) need ["image"]; multimodal models need both
+            "modalities": ["image"] if "flux" in model.lower() else ["image", "text"],
         }
 
         try:

@@ -2,7 +2,7 @@
 
 **For teams building on TIMEPOINT Flash (Web App, iPhone App, Clockchain, Billing, Enterprise integrations)**
 
-TIMEPOINT Flash now supports full downstream control of model selection and generation hyperparameters on every generation request. Downstream apps can set `model_policy: "permissive"` to route all 14 pipeline agents through open-weight models (DeepSeek, Llama, Qwen, Mistral) via OpenRouter for both text and images — making the entire pipeline fully Google-free with zero Google API calls, including grounding. Apps can also specify exact models by name using `text_model` and `image_model` (any OpenRouter-compatible model ID like `qwen/qwen3-235b-a22b` or Google native like `gemini-2.5-flash`), and these explicit overrides take priority over `model_policy`, which in turn takes priority over `preset`. In addition, the new `llm_params` object provides fine-grained control over generation hyperparameters — temperature, max_tokens, top_p, top_k, frequency/presence/repetition penalties, stop sequences, thinking level, and system prompt injection (prefix/suffix) — all applied uniformly across every agent in the pipeline. Request-level `llm_params` override each agent's built-in defaults, so setting `temperature: 0.3` overrides the scene agent's default of 0.7, the dialog agent's default of 0.85, etc. All of these controls are composable: you can combine `model_policy`, explicit models, `preset`, and `llm_params` in the same request.
+TIMEPOINT Flash now supports full downstream control of model selection and generation hyperparameters on every generation request. Downstream apps can set `model_policy: "permissive"` to route all 14 pipeline agents through open-weight models (DeepSeek, Llama, Qwen, Mistral) via OpenRouter for text, and FLUX (Black Forest Labs) for images — making the entire pipeline fully Google-free with zero Google API calls, including grounding. Apps can also specify exact models by name using `text_model` and `image_model` (any OpenRouter-compatible model ID like `qwen/qwen3-235b-a22b` or Google native like `gemini-2.5-flash`), and these explicit overrides take priority over `model_policy`, which in turn takes priority over `preset`. In addition, the new `llm_params` object provides fine-grained control over generation hyperparameters — temperature, max_tokens, top_p, top_k, frequency/presence/repetition penalties, stop sequences, thinking level, and system prompt injection (prefix/suffix) — all applied uniformly across every agent in the pipeline. Request-level `llm_params` override each agent's built-in defaults, so setting `temperature: 0.3` overrides the scene agent's default of 0.7, the dialog agent's default of 0.85, etc. All of these controls are composable: you can combine `model_policy`, explicit models, `preset`, and `llm_params` in the same request.
 
 ## Request Parameters
 

@@ -0,0 +1,105 @@
+# DOWNSTREAM NOTICE — 2026-03-11
+
+**Breaking changes to `model_policy: "permissive"` and image generation**
+
+Affects: Web App, iPhone App, Clockchain, Billing, Enterprise integrations
+
+---
+
+## What changed
+
+### 1. Permissive mode now enforces open-weight models (BREAKING)
+
+Previously, `model_policy: "permissive"` was advisory — it labeled model provenance in the response but did not block proprietary models. **It now enforces.**
+
+If you send `model_policy: "permissive"` with an explicit `text_model` or `image_model` that is proprietary, the request will be rejected with **HTTP 422**:
+
+```json
+{
+  "detail": "model_policy='permissive' requires open-weight models. 'openai/gpt-4o' is proprietary. Use models from: meta-llama/, deepseek/, qwen/, mistralai/, microsoft/, google/gemma, allenai/, nvidia/"
+}
+```
+
+**Action required:** If you pass explicit model IDs alongside `model_policy: "permissive"`, ensure they are from the open-weight allowlist:
+
+| Prefix | Examples |
+|--------|----------|
+| `meta-llama/` | `meta-llama/llama-4-scout-17b-16e-instruct` |
+| `deepseek/` | `deepseek/deepseek-chat-v3-0324` |
+| `qwen/` | `qwen/qwen3-235b-a22b`, `qwen/qwen3-30b-a3b` |
+| `mistralai/` | `mistralai/mistral-small-3.2-24b-instruct` |
+| `microsoft/` | Phi family |
+| `google/gemma` | Gemma open-weight models only (not Gemini) |
+| `allenai/` | OLMo family |
+| `nvidia/` | Nemotron family |
+| `black-forest-labs/` | FLUX image models (`flux.2-pro`, `flux.2-max`, `flux.2-flex`) |
+
+If you omit `text_model` / `image_model` with permissive mode, Flash auto-selects the best available open-weight model from the registry. No action needed in that case.
+
+### 2. Pollinations removed — all images via OpenRouter (BREAKING if you used `image_model: "pollinations"`)
+
+Pollinations.ai has been removed entirely as an image provider. The image fallback chain is now:
+
+- **Before:** Google → OpenRouter → Pollinations (3-tier)
+- **After:** Google → OpenRouter (2-tier)
+
+If you were passing `image_model: "pollinations"` in requests, this will now fall through to OpenRouter image models. Remove any explicit `"pollinations"` references.
+
+### 3. Permissive mode is dramatically faster
+
+Generation with `model_policy: "permissive"` was timing out at 600s due to DeepSeek R1 (a thinking model, 30-60s per call) being selected early in the fallback chain. Fixed:
+
+- **Model preference reordered:** Fast chat models first (Llama 4 Scout, DeepSeek Chat V3, Qwen3-30B). DeepSeek R1 is now last resort.
+- **Dialog batched:** Permissive mode uses batch dialog (1 LLM call) instead of sequential (7 calls).
+- **Critique loop skipped:** Permissive mode skips the dialog critique/refinement pass.
+- **Default max_tokens=2048:** Applied when no preset is specified, preventing unbounded generation.
+
+Expected latency: ~2 minutes (down from 10+ minutes / timeout).
+
+### 4. Google fallback blocked in permissive mode
+
+The LLM router previously fell back to Google Gemini when OpenRouter calls failed, even in permissive mode. This violated the Google-free guarantee. The router now skips Google fallback entirely when `model_policy: "permissive"`.
+
+---
+
+## Migration checklist
+
+- [ ] **Search for `"pollinations"` in your codebase** — remove any explicit references as an `image_model` value
+- [ ] **If you send explicit models with `model_policy: "permissive"`** — verify they match the allowlist prefixes above, or handle 422 responses
+- [ ] **If you had long timeouts for permissive mode** — you can likely reduce them (2-3 minutes is sufficient now)
+- [ ] **No action needed if** you use `model_policy: "permissive"` without explicit model overrides — auto-selection handles everything
+
+## Request examples
+
+**Simplest permissive request (recommended):**
+```json
+{
+  "query": "The signing of the Magna Carta, 1215",
+  "generate_image": true,
+  "model_policy": "permissive"
+}
+```
+
+**Permissive with explicit open-weight model:**
+```json
+{
+  "query": "Apollo 11 Moon Landing, 1969",
+  "model_policy": "permissive",
+  "text_model": "qwen/qwen3-235b-a22b",
+  "generate_image": true
+}
+```
+
+**This will now fail (422):**
+```json
+{
+  "query": "D-Day, 1944",
+  "model_policy": "permissive",
+  "text_model": "openai/gpt-4o"
+}
+```
+
+---
+
+*Deployed to production: 2026-03-11*
+*PR (open-source): https://github.com/timepointai/timepoint-flash/pull/16*