From eb7202ae7717080052251de54cb6bcfeba3dd764 Mon Sep 17 00:00:00 2001
From: timepointai <info@timepointai.com>
Date: Mon, 16 Mar 2026 08:36:30 -0600
Subject: [PATCH] feat: add autoresearch image prompt optimization loop

Karpathy-style autoresearch that explores Flash's image prompt mutation
space (style, lighting, composition, detail, cfg_scale). Includes
dry-run mode with synthetic CLIP scores, Pareto frontier tracking of
quality vs cost, and JSONL result output.
---
 autoresearch/__init__.py           |   6 +
 autoresearch/flash_autoresearch.py | 465 +++++++++++++++++++++++++++++
 2 files changed, 471 insertions(+)
 create mode 100644 autoresearch/__init__.py
 create mode 100644 autoresearch/flash_autoresearch.py

diff --git a/autoresearch/__init__.py b/autoresearch/__init__.py
new file mode 100644
index 0000000..c62cb9e
--- /dev/null
+++ b/autoresearch/__init__.py
@@ -0,0 +1,6 @@
+"""Autoresearch module for Timepoint Flash.
+
+Karpathy-style autonomous optimization loops for Flash's image generation
+pipeline. Explores prompt mutation spaces, measures quality metrics, and
+tracks Pareto frontiers of quality vs cost.
+"""
diff --git a/autoresearch/flash_autoresearch.py b/autoresearch/flash_autoresearch.py
new file mode 100644
index 0000000..cb2749d
--- /dev/null
+++ b/autoresearch/flash_autoresearch.py
@@ -0,0 +1,465 @@
+"""Flash Autoresearch: image prompt optimization loop.
+
+Karpathy-style autoresearch that explores the prompt mutation space for
+Flash's image generation pipeline. Mutates style, lighting, composition,
+and detail keywords; measures image quality via CLIP similarity (or
+synthetic scores in dry-run mode); and tracks the Pareto frontier of
+quality vs cost.
+
+Usage:
+    python -m autoresearch.flash_autoresearch --dry-run --iterations 10
+    python -m autoresearch.flash_autoresearch --iterations 50 --flash-url http://localhost:8000
+"""
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import math
+import random
+import sys
+import time
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+
+# ---------------------------------------------------------------------------
+# Prompt mutation space
+# ---------------------------------------------------------------------------
+
+STYLE_KEYWORDS: List[str] = [
+    "photorealistic",
+    "cinematic",
+    "oil painting",
+    "watercolor",
+    "digital art",
+    "hyperrealistic",
+    "impressionist",
+    "concept art",
+    "matte painting",
+    "cel shaded",
+    "noir",
+    "retro futurism",
+    "vaporwave",
+    "documentary",
+    "graphic novel",
+]
+
+LIGHTING_TERMS: List[str] = [
+    "golden hour",
+    "dramatic chiaroscuro",
+    "soft diffused light",
+    "harsh overhead sun",
+    "neon glow",
+    "candlelight",
+    "backlit silhouette",
+    "volumetric fog",
+    "studio lighting",
+    "moonlight",
+    "fluorescent",
+    "rim lighting",
+    "ambient occlusion",
+    "overcast flat",
+]
+
+COMPOSITION_TERMS: List[str] = [
+    "rule of thirds",
+    "centered symmetry",
+    "extreme close-up",
+    "wide establishing shot",
+    "dutch angle",
+    "bird's eye view",
+    "worm's eye view",
+    "over the shoulder",
+    "leading lines",
+    "frame within frame",
+    "shallow depth of field",
+    "deep focus",
+    "split screen",
+]
+
+DETAIL_LEVELS: List[str] = [
+    "minimal detail, clean lines",
+    "moderate detail, balanced textures",
+    "high detail, intricate textures",
+    "ultra detail, 8k, micro textures",
+    "photographic grain, film emulation",
+    "painterly brushstrokes, loose detail",
+]
+
+NEGATIVE_PROMPTS: List[str] = [
+    "blurry, out of focus",
+    "bad anatomy, distorted",
+    "low quality, jpeg artifacts",
+    "watermark, text overlay",
+    "oversaturated, blown highlights",
+    "",  # no negative prompt
+]
+
+
+# ---------------------------------------------------------------------------
+# Data structures
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PromptConfig:
+    """A single point in the prompt mutation space."""
+
+    style: str
+    lighting: str
+    composition: str
+    detail: str
+    negative: str
+    cfg_scale: float = 7.5  # guidance scale 3.0–15.0
+    seed: int = 42
+
+    def config_hash(self) -> str:
+        raw = f"{self.style}|{self.lighting}|{self.composition}|{self.detail}|{self.negative}|{self.cfg_scale}"
+        return hashlib.sha256(raw.encode()).hexdigest()[:12]
+
+    def to_prompt_text(self) -> str:
+        parts = [self.style, self.lighting, self.composition, self.detail]
+        return ", ".join(p for p in parts if p)
+
+    def to_negative_text(self) -> str:
+        return self.negative
+
+
+@dataclass
+class RunResult:
+    """Result of a single autoresearch iteration."""
+
+    iteration: int
+    config: PromptConfig
+    config_hash: str
+    clip_score: float
+    cost_usd: float
+    duration_s: float
+    is_dry_run: bool
+    timestamp: float = field(default_factory=time.time)
+
+    def quality_per_dollar(self) -> float:
+        if self.cost_usd <= 0:
+            return float("inf")
+        return self.clip_score / self.cost_usd
+
+
+@dataclass
+class ParetoPoint:
+    """A point on the Pareto frontier (quality vs cost)."""
+
+    config_hash: str
+    clip_score: float
+    cost_usd: float
+    config: Dict[str, Any]
+
+
+# ---------------------------------------------------------------------------
+# Mutation engine
+# ---------------------------------------------------------------------------
+
+def random_config(rng: random.Random) -> PromptConfig:
+    """Sample a random point in the mutation space."""
+    return PromptConfig(
+        style=rng.choice(STYLE_KEYWORDS),
+        lighting=rng.choice(LIGHTING_TERMS),
+        composition=rng.choice(COMPOSITION_TERMS),
+        detail=rng.choice(DETAIL_LEVELS),
+        negative=rng.choice(NEGATIVE_PROMPTS),
+        cfg_scale=round(rng.uniform(3.0, 15.0), 1),
+    )
+
+
+def mutate_config(cfg: PromptConfig, rng: random.Random) -> PromptConfig:
+    """Mutate one or two dimensions of an existing config."""
+    fields_to_mutate = rng.sample(
+        ["style", "lighting", "composition", "detail", "negative", "cfg_scale"],
+        k=rng.randint(1, 2),
+    )
+    new = PromptConfig(
+        style=cfg.style,
+        lighting=cfg.lighting,
+        composition=cfg.composition,
+        detail=cfg.detail,
+        negative=cfg.negative,
+        cfg_scale=cfg.cfg_scale,
+        seed=cfg.seed,
+    )
+    for f in fields_to_mutate:
+        if f == "style":
+            new.style = rng.choice(STYLE_KEYWORDS)
+        elif f == "lighting":
+            new.lighting = rng.choice(LIGHTING_TERMS)
+        elif f == "composition":
+            new.composition = rng.choice(COMPOSITION_TERMS)
+        elif f == "detail":
+            new.detail = rng.choice(DETAIL_LEVELS)
+        elif f == "negative":
+            new.negative = rng.choice(NEGATIVE_PROMPTS)
+        elif f == "cfg_scale":
+            new.cfg_scale = round(rng.uniform(3.0, 15.0), 1)
+    return new
+
+
+# ---------------------------------------------------------------------------
+# Scoring
+# ---------------------------------------------------------------------------
+
+def synthetic_clip_score(cfg: PromptConfig, rng: random.Random) -> Tuple[float, float]:
+    """Deterministic-ish synthetic CLIP score for dry-run mode.
+
+    Returns (clip_score, cost_usd). The score is derived from the config
+    hash so identical configs always produce similar scores, plus small
+    noise for realism.
+    """
+    h = cfg.config_hash()
+    # Derive a base score from the hash (0.55–0.85 range)
+    hash_int = int(h, 16)
+    base = 0.55 + (hash_int % 1000) / 1000.0 * 0.30
+
+    # Some combos get bonuses
+    if "cinematic" in cfg.style and "golden hour" in cfg.lighting:
+        base += 0.04
+    if "photorealistic" in cfg.style and "high detail" in cfg.detail:
+        base += 0.03
+    if cfg.cfg_scale > 10.0:
+        base -= 0.02  # too high guidance hurts quality
+    if cfg.cfg_scale < 5.0:
+        base -= 0.01
+
+    # Add small noise
+    noise = rng.gauss(0, 0.015)
+    clip_score = max(0.0, min(1.0, base + noise))
+
+    # Synthetic cost: higher cfg_scale costs more, detail level matters
+    detail_mult = 1.0
+    if "ultra" in cfg.detail:
+        detail_mult = 1.5
+    elif "high" in cfg.detail:
+        detail_mult = 1.2
+    elif "minimal" in cfg.detail:
+        detail_mult = 0.7
+    cost = round(0.30 * detail_mult * (0.8 + cfg.cfg_scale / 30.0), 4)
+
+    return clip_score, cost
+
+
+def live_score(
+    cfg: PromptConfig,
+    flash_url: str,
+    query: str = "A dramatic historical moment captured in time",
+) -> Tuple[float, float]:
+    """Call Flash API, generate an image, score it with CLIP.
+
+    Not implemented yet — placeholder for live mode.
+    """
+    raise NotImplementedError(
+        "Live scoring requires Flash API integration. "
+        "Use --dry-run for synthetic scoring."
+    )
+
+
+# ---------------------------------------------------------------------------
+# Pareto frontier
+# ---------------------------------------------------------------------------
+
+def update_pareto(frontier: List[ParetoPoint], candidate: ParetoPoint) -> List[ParetoPoint]:
+    """Add candidate to Pareto frontier if it is non-dominated.
+
+    A point dominates another if it has both higher quality AND lower cost.
+    Returns the updated frontier.
+    """
+    new_frontier: List[ParetoPoint] = []
+    dominated = False
+
+    for p in frontier:
+        # Does existing point dominate candidate?
+        if p.clip_score >= candidate.clip_score and p.cost_usd <= candidate.cost_usd:
+            if p.clip_score > candidate.clip_score or p.cost_usd < candidate.cost_usd:
+                dominated = True
+        # Does candidate dominate existing point?
+        if candidate.clip_score >= p.clip_score and candidate.cost_usd <= p.cost_usd:
+            if candidate.clip_score > p.clip_score or candidate.cost_usd < p.cost_usd:
+                continue  # drop dominated point
+        new_frontier.append(p)
+
+    if not dominated:
+        new_frontier.append(candidate)
+
+    return new_frontier
+
+
+# ---------------------------------------------------------------------------
+# Main loop
+# ---------------------------------------------------------------------------
+
+def run_autoresearch(
+    iterations: int,
+    dry_run: bool = True,
+    flash_url: Optional[str] = None,
+    output_dir: str = "autoresearch/results",
+    seed: int = 42,
+) -> None:
+    """Run the autoresearch loop."""
+    rng = random.Random(seed)
+    out = Path(output_dir)
+    out.mkdir(parents=True, exist_ok=True)
+
+    results_file = out / "results.jsonl"
+    pareto_file = out / "pareto.json"
+
+    frontier: List[ParetoPoint] = []
+    best_score = 0.0
+    best_config: Optional[PromptConfig] = None
+    current_config = random_config(rng)
+
+    print(f"Flash Autoresearch — {'DRY RUN' if dry_run else 'LIVE'}")
+    print(f"Iterations: {iterations}")
+    print(f"Output: {out}")
+    print("-" * 60)
+
+    with open(results_file, "a") as f:
+        for i in range(iterations):
+            t0 = time.time()
+
+            # Score
+            if dry_run:
+                clip_score, cost = synthetic_clip_score(current_config, rng)
+            else:
+                if not flash_url:
+                    raise ValueError("--flash-url required for live mode")
+                clip_score, cost = live_score(current_config, flash_url)
+
+            duration = time.time() - t0
+
+            result = RunResult(
+                iteration=i,
+                config=current_config,
+                config_hash=current_config.config_hash(),
+                clip_score=round(clip_score, 4),
+                cost_usd=cost,
+                duration_s=round(duration, 3),
+                is_dry_run=dry_run,
+            )
+
+            # Write JSONL
+            record = {
+                "iteration": result.iteration,
+                "config_hash": result.config_hash,
+                "clip_score": result.clip_score,
+                "cost_usd": result.cost_usd,
+                "quality_per_dollar": round(result.quality_per_dollar(), 4)
+                if result.cost_usd > 0
+                else None,
+                "duration_s": result.duration_s,
+                "is_dry_run": result.is_dry_run,
+                "timestamp": result.timestamp,
+                "config": asdict(result.config),
+            }
+            f.write(json.dumps(record) + "\n")
+            f.flush()
+
+            # Update Pareto
+            candidate = ParetoPoint(
+                config_hash=result.config_hash,
+                clip_score=result.clip_score,
+                cost_usd=result.cost_usd,
+                config=asdict(current_config),
+            )
+            frontier = update_pareto(frontier, candidate)
+
+            # Track best
+            improved = ""
+            if clip_score > best_score:
+                best_score = clip_score
+                best_config = current_config
+                improved = " *BEST*"
+
+            print(
+                f"[{i+1:>4}/{iterations}] "
+                f"CLIP={result.clip_score:.4f}  "
+                f"cost=${result.cost_usd:.4f}  "
+                f"Q/$={result.quality_per_dollar():.2f}  "
+                f"pareto={len(frontier)}  "
+                f"hash={result.config_hash}"
+                f"{improved}"
+            )
+
+            # Mutate: 70% mutate best, 20% mutate current, 10% random
+            roll = rng.random()
+            if best_config and roll < 0.7:
+                current_config = mutate_config(best_config, rng)
+            elif roll < 0.9:
+                current_config = mutate_config(current_config, rng)
+            else:
+                current_config = random_config(rng)
+
+    # Write Pareto frontier
+    pareto_data = {
+        "frontier_size": len(frontier),
+        "best_clip_score": best_score,
+        "iterations": iterations,
+        "dry_run": dry_run,
+        "points": [asdict(p) for p in sorted(frontier, key=lambda x: x.clip_score, reverse=True)],
+    }
+    with open(pareto_file, "w") as f:
+        json.dump(pareto_data, f, indent=2)
+
+    print("-" * 60)
+    print(f"Best CLIP score: {best_score:.4f}")
+    print(f"Pareto frontier: {len(frontier)} points")
+    print(f"Results: {results_file}")
+    print(f"Pareto:  {pareto_file}")
+
+    if best_config:
+        print(f"\nBest config:")
+        print(f"  Style:       {best_config.style}")
+        print(f"  Lighting:    {best_config.lighting}")
+        print(f"  Composition: {best_config.composition}")
+        print(f"  Detail:      {best_config.detail}")
+        print(f"  CFG scale:   {best_config.cfg_scale}")
+        print(f"  Negative:    {best_config.negative or '(none)'}")
+        print(f"  Prompt:      {best_config.to_prompt_text()}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Flash Autoresearch: image prompt optimization loop"
+    )
+    parser.add_argument(
+        "--iterations", type=int, default=50, help="Number of iterations (default: 50)"
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Use synthetic CLIP scores instead of calling Flash API",
+    )
+    parser.add_argument(
+        "--flash-url",
+        type=str,
+        default=None,
+        help="Flash API base URL for live mode",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="autoresearch/results",
+        help="Directory for output files (default: autoresearch/results)",
+    )
+    parser.add_argument(
+        "--seed", type=int, default=42, help="Random seed (default: 42)"
+    )
+    args = parser.parse_args()
+
+    run_autoresearch(
+        iterations=args.iterations,
+        dry_run=args.dry_run,
+        flash_url=args.flash_url,
+        output_dir=args.output_dir,
+        seed=args.seed,
+    )
+
+
+if __name__ == "__main__":
+    main()