From 051210ab2c1938af811d381db499036198679693 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 17 Jan 2026 20:23:09 +0000 Subject: [PATCH 1/3] feat(media): Add multimedia editing services and CLI This commit adds comprehensive multimedia generation and editing capabilities to Kurt, supporting both AI-powered generation and traditional media processing. New services (src/kurt/services/): - AIGenerationService: Unified interface to image/video AI APIs - fal.ai: Fast inference, Flux models - Leonardo.ai: Nano Banana, Phoenix models - Replicate: Large model library - Runway: Gen-3/Gen-4 video generation - MediaEditService: FFmpeg/ImageMagick wrapper for editing - Image: resize, crop, rotate, filters, composite - Video: trim, resize, extract audio, add audio, thumbnails - Format conversion for all media types New CLI commands (kurt media): - kurt media generate image: AI image generation - kurt media generate video: AI video generation - kurt media edit resize/crop/rotate/filter/trim/thumbnail - kurt media convert: Format conversion - kurt media info: Get media file information - kurt media providers: Show configured AI providers API endpoints for web UI: - POST /api/media/generate/image - POST /api/media/generate/video - POST /api/media/edit - GET /api/media/info - GET /api/media/providers Also fixes .gitignore to not ignore src/kurt/workflows/ directory. --- .gitignore | 4 +- src/kurt/cli/main.py | 5 +- src/kurt/services/__init__.py | 6 + src/kurt/services/ai_generation.py | 846 ++++++++++++++ src/kurt/services/media_edit.py | 1012 +++++++++++++++++ src/kurt/web/api/server.py | 300 +++++ src/kurt/workflows/media/__init__.py | 75 ++ src/kurt/workflows/media/cli.py | 914 +++++++++++++++ src/kurt/workflows/media/tests/__init__.py | 1 + .../workflows/media/tests/test_services.py | 243 ++++ 10 files changed, 3402 insertions(+), 4 deletions(-) create mode 100644 src/kurt/services/__init__.py create mode 100644 src/kurt/services/ai_generation.py create mode 100644 src/kurt/services/media_edit.py create mode 100644 src/kurt/workflows/media/__init__.py create mode 100644 src/kurt/workflows/media/cli.py create mode 100644 src/kurt/workflows/media/tests/__init__.py create mode 100644 src/kurt/workflows/media/tests/test_services.py diff --git a/.gitignore b/.gitignore index a3af2c4e..6a9ff5d5 100644 --- a/.gitignore +++ b/.gitignore @@ -82,8 +82,8 @@ yarn-error.log\* kurt-demo/ kurt-demo2/ -# Agent workflow definitions (user-specific) -workflows/ +# Agent workflow definitions (user-specific, top-level only) +/workflows/ # Reports (local demo reports) reports/ diff --git a/src/kurt/cli/main.py b/src/kurt/cli/main.py index 81c64b76..d2a525b9 100644 --- a/src/kurt/cli/main.py +++ b/src/kurt/cli/main.py @@ -43,6 +43,7 @@ def get_command(self, ctx, name): "research": ("kurt.workflows.research.cli", "research_group"), "signals": ("kurt.workflows.signals.cli", "signals_group"), "agents": ("kurt.workflows.agents.cli", "agents_group"), + "media": ("kurt.workflows.media.cli", "media_group"), "admin": ("kurt.cli.admin", "admin"), "show": ("kurt.cli.show", "show_group"), "auth": ("kurt.cli.auth", "auth"), @@ -58,8 +59,8 @@ def main(ctx): """ from kurt.config import config_file_exists - # Skip migration check for init, admin, and auth (which don't need local DB) - if ctx.invoked_subcommand in ["init", "admin", "auth"]: + # Skip migration check for init, admin, auth, and media (which don't need local DB) + if ctx.invoked_subcommand in ["init", "admin", "auth", "media"]: return # Skip migration check if running in hook mode diff --git a/src/kurt/services/__init__.py b/src/kurt/services/__init__.py new file mode 100644 index 00000000..42cdedff --- /dev/null +++ b/src/kurt/services/__init__.py @@ -0,0 +1,6 @@ +"""Kurt services - external API integrations and utilities.""" + +from kurt.services.ai_generation import AIGenerationService +from kurt.services.media_edit import MediaEditService + +__all__ = ["AIGenerationService", "MediaEditService"] diff --git a/src/kurt/services/ai_generation.py b/src/kurt/services/ai_generation.py new file mode 100644 index 00000000..29d35afa --- /dev/null +++ b/src/kurt/services/ai_generation.py @@ -0,0 +1,846 @@ +"""AI Generation Service - unified interface to image/video generation APIs. + +Supported providers: +- fal.ai: Fast inference, Flux models, video generation +- Leonardo.ai: Nano Banana, Phoenix, commercial-grade +- Replicate: Huge model library, pay-per-use +- Runway: Video generation (Gen-3, Gen-4) + +Environment variables: +- FAL_KEY: fal.ai API key +- LEONARDO_API_KEY: Leonardo.ai API key +- REPLICATE_API_TOKEN: Replicate API token +- RUNWAY_API_KEY: Runway API key +""" + +from __future__ import annotations + +import asyncio +import os +import time +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +import httpx + + +class Provider(str, Enum): + """Supported AI generation providers.""" + + FAL = "fal" + LEONARDO = "leonardo" + REPLICATE = "replicate" + RUNWAY = "runway" + + +class MediaType(str, Enum): + """Type of media to generate.""" + + IMAGE = "image" + VIDEO = "video" + + +@dataclass +class GenerationResult: + """Result from an AI generation request.""" + + success: bool + url: str | None = None + urls: list[str] = field(default_factory=list) + job_id: str | None = None + provider: str | None = None + model: str | None = None + error: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + @property + def primary_url(self) -> str | None: + """Get the primary output URL.""" + return self.url or (self.urls[0] if self.urls else None) + + +class AIGenerationService: + """Unified interface to AI generation APIs. + + Example: + service = AIGenerationService() + + # Generate image + result = await service.generate_image( + prompt="A futuristic city at sunset", + model="flux-dev", + ) + print(result.url) + + # Generate video from image + result = await service.generate_video( + image_url=result.url, + prompt="Slow zoom in with particles floating", + duration=5, + ) + print(result.url) + """ + + # Default models for each provider + DEFAULT_MODELS = { + Provider.FAL: { + MediaType.IMAGE: "flux/dev", + MediaType.VIDEO: "ltx-video/image-to-video", + }, + Provider.LEONARDO: { + MediaType.IMAGE: "phoenix", + }, + Provider.REPLICATE: { + MediaType.IMAGE: "stability-ai/sdxl", + MediaType.VIDEO: "stability-ai/stable-video-diffusion", + }, + Provider.RUNWAY: { + MediaType.VIDEO: "gen3a_turbo", + }, + } + + def __init__( + self, + fal_key: str | None = None, + leonardo_key: str | None = None, + replicate_token: str | None = None, + runway_key: str | None = None, + default_image_provider: Provider = Provider.FAL, + default_video_provider: Provider = Provider.FAL, + ): + """Initialize the AI generation service. + + Args: + fal_key: fal.ai API key (or FAL_KEY env var) + leonardo_key: Leonardo.ai API key (or LEONARDO_API_KEY env var) + replicate_token: Replicate token (or REPLICATE_API_TOKEN env var) + runway_key: Runway API key (or RUNWAY_API_KEY env var) + default_image_provider: Default provider for image generation + default_video_provider: Default provider for video generation + """ + self.fal_key = fal_key or os.environ.get("FAL_KEY") + self.leonardo_key = leonardo_key or os.environ.get("LEONARDO_API_KEY") + self.replicate_token = replicate_token or os.environ.get("REPLICATE_API_TOKEN") + self.runway_key = runway_key or os.environ.get("RUNWAY_API_KEY") + + self.default_image_provider = default_image_provider + self.default_video_provider = default_video_provider + + self._client: httpx.AsyncClient | None = None + + @property + def client(self) -> httpx.AsyncClient: + """Get or create the HTTP client.""" + if self._client is None: + self._client = httpx.AsyncClient(timeout=300.0) + return self._client + + async def close(self) -> None: + """Close the HTTP client.""" + if self._client is not None: + await self._client.aclose() + self._client = None + + def _get_provider_key(self, provider: Provider) -> str | None: + """Get the API key for a provider.""" + return { + Provider.FAL: self.fal_key, + Provider.LEONARDO: self.leonardo_key, + Provider.REPLICATE: self.replicate_token, + Provider.RUNWAY: self.runway_key, + }.get(provider) + + async def generate_image( + self, + prompt: str, + model: str | None = None, + provider: Provider | str | None = None, + width: int = 1024, + height: int = 1024, + num_images: int = 1, + negative_prompt: str | None = None, + **kwargs: Any, + ) -> GenerationResult: + """Generate an image using AI. + + Args: + prompt: Text description of the image to generate + model: Model identifier (provider-specific) + provider: Provider to use (fal, leonardo, replicate) + width: Image width in pixels + height: Image height in pixels + num_images: Number of images to generate + negative_prompt: Things to avoid in the image + **kwargs: Additional provider-specific parameters + + Returns: + GenerationResult with URL(s) of generated images + """ + if provider is None: + provider = self.default_image_provider + elif isinstance(provider, str): + provider = Provider(provider) + + if model is None: + model = self.DEFAULT_MODELS.get(provider, {}).get(MediaType.IMAGE) + + if provider == Provider.FAL: + return await self._fal_generate_image( + prompt=prompt, + model=model, + width=width, + height=height, + num_images=num_images, + negative_prompt=negative_prompt, + **kwargs, + ) + elif provider == Provider.LEONARDO: + return await self._leonardo_generate_image( + prompt=prompt, + model=model, + width=width, + height=height, + num_images=num_images, + negative_prompt=negative_prompt, + **kwargs, + ) + elif provider == Provider.REPLICATE: + return await self._replicate_generate_image( + prompt=prompt, + model=model, + width=width, + height=height, + num_images=num_images, + negative_prompt=negative_prompt, + **kwargs, + ) + else: + return GenerationResult( + success=False, + error=f"Provider {provider} does not support image generation", + ) + + async def generate_video( + self, + prompt: str, + image_url: str | None = None, + model: str | None = None, + provider: Provider | str | None = None, + duration: int = 5, + **kwargs: Any, + ) -> GenerationResult: + """Generate a video using AI. + + Args: + prompt: Text description of the video motion/content + image_url: Source image URL (for image-to-video) + model: Model identifier (provider-specific) + provider: Provider to use (fal, runway, replicate) + duration: Video duration in seconds + **kwargs: Additional provider-specific parameters + + Returns: + GenerationResult with URL of generated video + """ + if provider is None: + provider = self.default_video_provider + elif isinstance(provider, str): + provider = Provider(provider) + + if model is None: + model = self.DEFAULT_MODELS.get(provider, {}).get(MediaType.VIDEO) + + if provider == Provider.FAL: + return await self._fal_generate_video( + prompt=prompt, + image_url=image_url, + model=model, + duration=duration, + **kwargs, + ) + elif provider == Provider.RUNWAY: + return await self._runway_generate_video( + prompt=prompt, + image_url=image_url, + model=model, + duration=duration, + **kwargs, + ) + elif provider == Provider.REPLICATE: + return await self._replicate_generate_video( + prompt=prompt, + image_url=image_url, + model=model, + **kwargs, + ) + else: + return GenerationResult( + success=False, + error=f"Provider {provider} does not support video generation", + ) + + # ------------------------------------------------------------------------- + # fal.ai Implementation + # ------------------------------------------------------------------------- + + async def _fal_generate_image( + self, + prompt: str, + model: str, + width: int, + height: int, + num_images: int, + negative_prompt: str | None, + **kwargs: Any, + ) -> GenerationResult: + """Generate image via fal.ai.""" + if not self.fal_key: + return GenerationResult( + success=False, + error="FAL_KEY not configured", + ) + + # fal.ai uses model paths like "fal-ai/flux/dev" + if not model.startswith("fal-ai/"): + model = f"fal-ai/{model}" + + url = f"https://fal.run/{model}" + + payload: dict[str, Any] = { + "prompt": prompt, + "image_size": {"width": width, "height": height}, + "num_images": num_images, + } + if negative_prompt: + payload["negative_prompt"] = negative_prompt + payload.update(kwargs) + + try: + response = await self.client.post( + url, + headers={ + "Authorization": f"Key {self.fal_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + images = data.get("images", []) + urls = [img.get("url") for img in images if img.get("url")] + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + provider="fal", + model=model, + metadata={"seed": data.get("seed")}, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"fal.ai API error: {e.response.status_code} - {e.response.text}", + provider="fal", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"fal.ai request failed: {e}", + provider="fal", + ) + + async def _fal_generate_video( + self, + prompt: str, + image_url: str | None, + model: str, + duration: int, + **kwargs: Any, + ) -> GenerationResult: + """Generate video via fal.ai.""" + if not self.fal_key: + return GenerationResult( + success=False, + error="FAL_KEY not configured", + ) + + if not model.startswith("fal-ai/"): + model = f"fal-ai/{model}" + + url = f"https://fal.run/{model}" + + payload: dict[str, Any] = {"prompt": prompt} + if image_url: + payload["image_url"] = image_url + if "num_frames" not in kwargs: + # Approximate frames from duration (assuming ~24fps output) + payload["num_frames"] = min(duration * 24, 257) + payload.update(kwargs) + + try: + response = await self.client.post( + url, + headers={ + "Authorization": f"Key {self.fal_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + video_url = data.get("video", {}).get("url") + + return GenerationResult( + success=True, + url=video_url, + provider="fal", + model=model, + metadata=data, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"fal.ai API error: {e.response.status_code} - {e.response.text}", + provider="fal", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"fal.ai request failed: {e}", + provider="fal", + ) + + # ------------------------------------------------------------------------- + # Leonardo.ai Implementation + # ------------------------------------------------------------------------- + + async def _leonardo_generate_image( + self, + prompt: str, + model: str, + width: int, + height: int, + num_images: int, + negative_prompt: str | None, + **kwargs: Any, + ) -> GenerationResult: + """Generate image via Leonardo.ai.""" + if not self.leonardo_key: + return GenerationResult( + success=False, + error="LEONARDO_API_KEY not configured", + ) + + base_url = "https://cloud.leonardo.ai/api/rest/v1" + + # Model name to ID mapping (common models) + model_ids = { + "phoenix": "6b645e3a-d64f-4341-a6d8-7a3690fbf042", + "nano-banana": "aa77f04e-3eec-4034-9c07-d0f619684628", + "nano-banana-pro": "faf3e8d3-6d19-4e98-8c3a-5c17e9f67a28", + "sdxl": "1e60896f-3c26-4296-8ecc-53e2afecc132", + } + + model_id = model_ids.get(model, model) + + payload: dict[str, Any] = { + "prompt": prompt, + "modelId": model_id, + "width": width, + "height": height, + "num_images": num_images, + } + if negative_prompt: + payload["negative_prompt"] = negative_prompt + payload.update(kwargs) + + try: + # Start generation + response = await self.client.post( + f"{base_url}/generations", + headers={ + "Authorization": f"Bearer {self.leonardo_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + generation_id = data.get("sdGenerationJob", {}).get("generationId") + if not generation_id: + return GenerationResult( + success=False, + error="No generation ID returned", + provider="leonardo", + ) + + # Poll for completion + urls = await self._leonardo_poll_generation(generation_id) + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + job_id=generation_id, + provider="leonardo", + model=model, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"Leonardo API error: {e.response.status_code} - {e.response.text}", + provider="leonardo", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Leonardo request failed: {e}", + provider="leonardo", + ) + + async def _leonardo_poll_generation( + self, + generation_id: str, + max_wait: int = 120, + poll_interval: float = 2.0, + ) -> list[str]: + """Poll Leonardo.ai for generation completion.""" + base_url = "https://cloud.leonardo.ai/api/rest/v1" + start_time = time.time() + + while time.time() - start_time < max_wait: + response = await self.client.get( + f"{base_url}/generations/{generation_id}", + headers={"Authorization": f"Bearer {self.leonardo_key}"}, + ) + response.raise_for_status() + data = response.json() + + generation = data.get("generations_by_pk", {}) + status = generation.get("status") + + if status == "COMPLETE": + images = generation.get("generated_images", []) + return [img.get("url") for img in images if img.get("url")] + elif status == "FAILED": + raise Exception("Generation failed") + + await asyncio.sleep(poll_interval) + + raise Exception("Generation timed out") + + # ------------------------------------------------------------------------- + # Replicate Implementation + # ------------------------------------------------------------------------- + + async def _replicate_generate_image( + self, + prompt: str, + model: str, + width: int, + height: int, + num_images: int, + negative_prompt: str | None, + **kwargs: Any, + ) -> GenerationResult: + """Generate image via Replicate.""" + if not self.replicate_token: + return GenerationResult( + success=False, + error="REPLICATE_API_TOKEN not configured", + ) + + base_url = "https://api.replicate.com/v1" + + # Build input based on model + input_data: dict[str, Any] = { + "prompt": prompt, + "width": width, + "height": height, + "num_outputs": num_images, + } + if negative_prompt: + input_data["negative_prompt"] = negative_prompt + input_data.update(kwargs) + + try: + # Start prediction + response = await self.client.post( + f"{base_url}/predictions", + headers={ + "Authorization": f"Token {self.replicate_token}", + "Content-Type": "application/json", + }, + json={"version": model, "input": input_data}, + ) + response.raise_for_status() + data = response.json() + + prediction_id = data.get("id") + if not prediction_id: + return GenerationResult( + success=False, + error="No prediction ID returned", + provider="replicate", + ) + + # Poll for completion + urls = await self._replicate_poll_prediction(prediction_id) + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + job_id=prediction_id, + provider="replicate", + model=model, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"Replicate API error: {e.response.status_code} - {e.response.text}", + provider="replicate", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Replicate request failed: {e}", + provider="replicate", + ) + + async def _replicate_poll_prediction( + self, + prediction_id: str, + max_wait: int = 300, + poll_interval: float = 2.0, + ) -> list[str]: + """Poll Replicate for prediction completion.""" + base_url = "https://api.replicate.com/v1" + start_time = time.time() + + while time.time() - start_time < max_wait: + response = await self.client.get( + f"{base_url}/predictions/{prediction_id}", + headers={"Authorization": f"Token {self.replicate_token}"}, + ) + response.raise_for_status() + data = response.json() + + status = data.get("status") + + if status == "succeeded": + output = data.get("output", []) + if isinstance(output, list): + return output + return [output] if output else [] + elif status in ("failed", "canceled"): + raise Exception(f"Prediction {status}: {data.get('error')}") + + await asyncio.sleep(poll_interval) + + raise Exception("Prediction timed out") + + async def _replicate_generate_video( + self, + prompt: str, + image_url: str | None, + model: str, + **kwargs: Any, + ) -> GenerationResult: + """Generate video via Replicate.""" + if not self.replicate_token: + return GenerationResult( + success=False, + error="REPLICATE_API_TOKEN not configured", + ) + + base_url = "https://api.replicate.com/v1" + + input_data: dict[str, Any] = {} + if image_url: + input_data["image"] = image_url + if prompt: + input_data["prompt"] = prompt + input_data.update(kwargs) + + try: + response = await self.client.post( + f"{base_url}/predictions", + headers={ + "Authorization": f"Token {self.replicate_token}", + "Content-Type": "application/json", + }, + json={"version": model, "input": input_data}, + ) + response.raise_for_status() + data = response.json() + + prediction_id = data.get("id") + if not prediction_id: + return GenerationResult( + success=False, + error="No prediction ID returned", + provider="replicate", + ) + + urls = await self._replicate_poll_prediction(prediction_id) + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + job_id=prediction_id, + provider="replicate", + model=model, + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Replicate request failed: {e}", + provider="replicate", + ) + + # ------------------------------------------------------------------------- + # Runway Implementation + # ------------------------------------------------------------------------- + + async def _runway_generate_video( + self, + prompt: str, + image_url: str | None, + model: str, + duration: int, + **kwargs: Any, + ) -> GenerationResult: + """Generate video via Runway.""" + if not self.runway_key: + return GenerationResult( + success=False, + error="RUNWAY_API_KEY not configured", + ) + + base_url = "https://api.dev.runwayml.com/v1" + + payload: dict[str, Any] = { + "model": model, + "promptText": prompt, + "duration": duration, + } + if image_url: + payload["promptImage"] = image_url + payload.update(kwargs) + + try: + # Start generation + response = await self.client.post( + f"{base_url}/image_to_video" if image_url else f"{base_url}/text_to_video", + headers={ + "Authorization": f"Bearer {self.runway_key}", + "Content-Type": "application/json", + "X-Runway-Version": "2024-11-06", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + task_id = data.get("id") + if not task_id: + return GenerationResult( + success=False, + error="No task ID returned", + provider="runway", + ) + + # Poll for completion + video_url = await self._runway_poll_task(task_id) + + return GenerationResult( + success=True, + url=video_url, + job_id=task_id, + provider="runway", + model=model, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"Runway API error: {e.response.status_code} - {e.response.text}", + provider="runway", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Runway request failed: {e}", + provider="runway", + ) + + async def _runway_poll_task( + self, + task_id: str, + max_wait: int = 600, + poll_interval: float = 5.0, + ) -> str: + """Poll Runway for task completion.""" + base_url = "https://api.dev.runwayml.com/v1" + start_time = time.time() + + while time.time() - start_time < max_wait: + response = await self.client.get( + f"{base_url}/tasks/{task_id}", + headers={ + "Authorization": f"Bearer {self.runway_key}", + "X-Runway-Version": "2024-11-06", + }, + ) + response.raise_for_status() + data = response.json() + + status = data.get("status") + + if status == "SUCCEEDED": + output = data.get("output", []) + if output: + return output[0] + raise Exception("No output URL in completed task") + elif status == "FAILED": + raise Exception(f"Task failed: {data.get('failure')}") + + await asyncio.sleep(poll_interval) + + raise Exception("Task timed out") + + +# Convenience function for synchronous usage +def generate_image_sync( + prompt: str, + model: str | None = None, + provider: str | None = None, + **kwargs: Any, +) -> GenerationResult: + """Synchronous wrapper for image generation.""" + service = AIGenerationService() + return asyncio.run( + service.generate_image(prompt=prompt, model=model, provider=provider, **kwargs) + ) + + +def generate_video_sync( + prompt: str, + image_url: str | None = None, + model: str | None = None, + provider: str | None = None, + **kwargs: Any, +) -> GenerationResult: + """Synchronous wrapper for video generation.""" + service = AIGenerationService() + return asyncio.run( + service.generate_video( + prompt=prompt, image_url=image_url, model=model, provider=provider, **kwargs + ) + ) diff --git a/src/kurt/services/media_edit.py b/src/kurt/services/media_edit.py new file mode 100644 index 00000000..1103d00a --- /dev/null +++ b/src/kurt/services/media_edit.py @@ -0,0 +1,1012 @@ +"""Media Edit Service - wrapper around FFmpeg and ImageMagick. + +Provides a unified interface for common media editing operations: +- Image: resize, crop, rotate, format conversion, filters +- Video: trim, resize, extract audio, add audio, format conversion +- Audio: trim, convert, extract from video + +Requirements: +- FFmpeg: video/audio processing (install via apt/brew/choco) +- ImageMagick: image processing (install via apt/brew/choco) + +Environment variables: +- FFMPEG_PATH: Path to ffmpeg binary (default: "ffmpeg") +- MAGICK_PATH: Path to magick binary (default: "magick") +""" + +from __future__ import annotations + +import asyncio +import os +import shutil +import tempfile +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path +from typing import Any + + +class MediaFormat(str, Enum): + """Supported output formats.""" + + # Image formats + JPEG = "jpeg" + JPG = "jpg" + PNG = "png" + WEBP = "webp" + GIF = "gif" + AVIF = "avif" + TIFF = "tiff" + + # Video formats + MP4 = "mp4" + WEBM = "webm" + MOV = "mov" + AVI = "avi" + MKV = "mkv" + + # Audio formats + MP3 = "mp3" + WAV = "wav" + AAC = "aac" + OGG = "ogg" + FLAC = "flac" + + +@dataclass +class EditResult: + """Result from a media editing operation.""" + + success: bool + output_path: str | None = None + error: str | None = None + command: str | None = None + stdout: str | None = None + stderr: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class MediaInfo: + """Information about a media file.""" + + path: str + format: str | None = None + width: int | None = None + height: int | None = None + duration: float | None = None + bitrate: int | None = None + codec: str | None = None + audio_codec: str | None = None + fps: float | None = None + size_bytes: int | None = None + + +class MediaEditService: + """Service for editing media files using FFmpeg and ImageMagick. + + Example: + service = MediaEditService() + + # Resize image + result = await service.resize_image( + "input.jpg", + output_path="output.jpg", + width=800, + height=600, + ) + + # Trim video + result = await service.trim_video( + "input.mp4", + output_path="output.mp4", + start="00:00:30", + end="00:01:00", + ) + + # Convert format + result = await service.convert( + "input.png", + output_path="output.webp", + format=MediaFormat.WEBP, + ) + """ + + def __init__( + self, + ffmpeg_path: str | None = None, + magick_path: str | None = None, + ): + """Initialize the media edit service. + + Args: + ffmpeg_path: Path to ffmpeg binary (or FFMPEG_PATH env var) + magick_path: Path to magick binary (or MAGICK_PATH env var) + """ + self.ffmpeg_path = ffmpeg_path or os.environ.get("FFMPEG_PATH", "ffmpeg") + self.magick_path = magick_path or os.environ.get("MAGICK_PATH", "magick") + + def _check_ffmpeg(self) -> bool: + """Check if FFmpeg is available.""" + return shutil.which(self.ffmpeg_path) is not None + + def _check_imagemagick(self) -> bool: + """Check if ImageMagick is available.""" + return shutil.which(self.magick_path) is not None + + async def _run_command( + self, + cmd: list[str], + check: bool = True, + ) -> tuple[int, str, str]: + """Run a command asynchronously. + + Returns: + Tuple of (return_code, stdout, stderr) + """ + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await process.communicate() + return ( + process.returncode or 0, + stdout.decode("utf-8", errors="replace"), + stderr.decode("utf-8", errors="replace"), + ) + + def _ensure_output_path( + self, + input_path: str, + output_path: str | None, + suffix: str | None = None, + ) -> str: + """Generate output path if not provided.""" + if output_path: + return output_path + + input_p = Path(input_path) + if suffix: + return str(input_p.parent / f"{input_p.stem}_edited{suffix}") + return str(input_p.parent / f"{input_p.stem}_edited{input_p.suffix}") + + # ------------------------------------------------------------------------- + # Image Operations (ImageMagick) + # ------------------------------------------------------------------------- + + async def resize_image( + self, + input_path: str, + output_path: str | None = None, + width: int | None = None, + height: int | None = None, + scale: float | None = None, + maintain_aspect: bool = True, + quality: int = 85, + ) -> EditResult: + """Resize an image. + + Args: + input_path: Path to input image + output_path: Path for output (auto-generated if not provided) + width: Target width in pixels + height: Target height in pixels + scale: Scale factor (e.g., 0.5 for half size) + maintain_aspect: Keep aspect ratio (default True) + quality: Output quality 1-100 (for JPEG/WebP) + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult( + success=False, + error="ImageMagick not found. Install with: apt install imagemagick", + ) + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [self.magick_path, input_path] + + if scale: + cmd.extend(["-resize", f"{int(scale * 100)}%"]) + elif width and height: + resize_op = f"{width}x{height}" if maintain_aspect else f"{width}x{height}!" + cmd.extend(["-resize", resize_op]) + elif width: + cmd.extend(["-resize", f"{width}x"]) + elif height: + cmd.extend(["-resize", f"x{height}"]) + + cmd.extend(["-quality", str(quality)]) + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + stdout=stdout, + stderr=stderr, + ) + + async def crop_image( + self, + input_path: str, + output_path: str | None = None, + width: int | None = None, + height: int | None = None, + x: int = 0, + y: int = 0, + gravity: str | None = None, + ) -> EditResult: + """Crop an image. + + Args: + input_path: Path to input image + output_path: Path for output + width: Crop width + height: Crop height + x: X offset from left (or from gravity point) + y: Y offset from top (or from gravity point) + gravity: Gravity point (Center, North, South, East, West, etc.) + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult( + success=False, + error="ImageMagick not found", + ) + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [self.magick_path, input_path] + + if gravity: + cmd.extend(["-gravity", gravity]) + + crop_spec = f"{width}x{height}+{x}+{y}" + cmd.extend(["-crop", crop_spec, "+repage"]) + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def rotate_image( + self, + input_path: str, + output_path: str | None = None, + degrees: float = 90, + background: str = "white", + ) -> EditResult: + """Rotate an image. + + Args: + input_path: Path to input image + output_path: Path for output + degrees: Rotation angle (positive = clockwise) + background: Background color for corners + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [ + self.magick_path, + input_path, + "-background", + background, + "-rotate", + str(degrees), + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def apply_filter( + self, + input_path: str, + output_path: str | None = None, + filter_name: str = "grayscale", + **kwargs: Any, + ) -> EditResult: + """Apply a filter to an image. + + Args: + input_path: Path to input image + output_path: Path for output + filter_name: Filter to apply (grayscale, blur, sharpen, etc.) + **kwargs: Filter-specific parameters + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [self.magick_path, input_path] + + # Map filter names to ImageMagick operations + filter_ops = { + "grayscale": ["-colorspace", "Gray"], + "sepia": ["-sepia-tone", str(kwargs.get("intensity", 80)) + "%"], + "blur": ["-blur", f"0x{kwargs.get('radius', 3)}"], + "sharpen": ["-sharpen", f"0x{kwargs.get('radius', 1)}"], + "negate": ["-negate"], + "normalize": ["-normalize"], + "equalize": ["-equalize"], + "brightness": [ + "-modulate", + f"{kwargs.get('brightness', 100)},{kwargs.get('saturation', 100)}", + ], + "contrast": ["-contrast-stretch", f"{kwargs.get('black', 0)}x{kwargs.get('white', 0)}%"], + } + + if filter_name in filter_ops: + cmd.extend(filter_ops[filter_name]) + else: + return EditResult( + success=False, + error=f"Unknown filter: {filter_name}. Available: {list(filter_ops.keys())}", + ) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def composite_images( + self, + background_path: str, + overlay_path: str, + output_path: str | None = None, + x: int = 0, + y: int = 0, + gravity: str = "NorthWest", + opacity: float = 1.0, + ) -> EditResult: + """Composite two images (overlay one on another). + + Args: + background_path: Path to background image + overlay_path: Path to overlay image + output_path: Path for output + x: X offset + y: Y offset + gravity: Placement gravity + opacity: Overlay opacity (0.0-1.0) + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(background_path, output_path, "_composite.png") + + # Build composite command + cmd = [ + self.magick_path, + background_path, + "(", + overlay_path, + "-alpha", + "set", + "-channel", + "A", + "-evaluate", + "multiply", + str(opacity), + "+channel", + ")", + "-gravity", + gravity, + "-geometry", + f"+{x}+{y}", + "-composite", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + # ------------------------------------------------------------------------- + # Video Operations (FFmpeg) + # ------------------------------------------------------------------------- + + async def trim_video( + self, + input_path: str, + output_path: str | None = None, + start: str | float | None = None, + end: str | float | None = None, + duration: float | None = None, + copy_codec: bool = True, + ) -> EditResult: + """Trim a video to a specific segment. + + Args: + input_path: Path to input video + output_path: Path for output + start: Start time (e.g., "00:00:30" or 30.0) + end: End time (e.g., "00:01:00" or 60.0) + duration: Duration in seconds (alternative to end) + copy_codec: Copy codecs without re-encoding (fast but less precise) + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult( + success=False, + error="FFmpeg not found. Install with: apt install ffmpeg", + ) + + output_path = self._ensure_output_path(input_path, output_path, "_trimmed.mp4") + cmd = [self.ffmpeg_path, "-y"] + + if start: + cmd.extend(["-ss", str(start)]) + + cmd.extend(["-i", input_path]) + + if end: + cmd.extend(["-to", str(end)]) + elif duration: + cmd.extend(["-t", str(duration)]) + + if copy_codec: + cmd.extend(["-c", "copy"]) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + stderr=stderr, + ) + + async def resize_video( + self, + input_path: str, + output_path: str | None = None, + width: int | None = None, + height: int | None = None, + scale: str | None = None, + preset: str | None = None, + ) -> EditResult: + """Resize a video. + + Args: + input_path: Path to input video + output_path: Path for output + width: Target width (use -1 for auto based on height) + height: Target height (use -1 for auto based on width) + scale: FFmpeg scale filter (e.g., "1280:720", "iw/2:ih/2") + preset: Preset resolution ("480p", "720p", "1080p", "4k") + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, "_resized.mp4") + + # Handle presets + presets = { + "480p": "854:480", + "720p": "1280:720", + "1080p": "1920:1080", + "4k": "3840:2160", + } + + if preset: + scale = presets.get(preset, scale) + elif width and height: + scale = f"{width}:{height}" + elif width: + scale = f"{width}:-2" + elif height: + scale = f"-2:{height}" + + if not scale: + return EditResult(success=False, error="Must specify width, height, scale, or preset") + + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + input_path, + "-vf", + f"scale={scale}", + "-c:a", + "copy", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def extract_audio( + self, + input_path: str, + output_path: str | None = None, + format: str = "mp3", + bitrate: str = "192k", + ) -> EditResult: + """Extract audio track from video. + + Args: + input_path: Path to input video + output_path: Path for output audio + format: Output format (mp3, wav, aac, etc.) + bitrate: Audio bitrate + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + input_path, + "-vn", + "-acodec", + "libmp3lame" if format == "mp3" else "copy", + "-ab", + bitrate, + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def add_audio( + self, + video_path: str, + audio_path: str, + output_path: str | None = None, + replace: bool = True, + volume: float = 1.0, + ) -> EditResult: + """Add or replace audio in a video. + + Args: + video_path: Path to input video + audio_path: Path to audio file + output_path: Path for output + replace: Replace existing audio (True) or mix (False) + volume: Audio volume multiplier + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(video_path, output_path, "_audio.mp4") + + if replace: + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + video_path, + "-i", + audio_path, + "-c:v", + "copy", + "-map", + "0:v:0", + "-map", + "1:a:0", + "-shortest", + output_path, + ] + else: + # Mix audio + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + video_path, + "-i", + audio_path, + "-c:v", + "copy", + "-filter_complex", + f"[0:a][1:a]amerge=inputs=2,volume={volume}[a]", + "-map", + "0:v", + "-map", + "[a]", + "-shortest", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def extract_frames( + self, + input_path: str, + output_dir: str | None = None, + fps: float = 1.0, + format: str = "jpg", + quality: int = 2, + ) -> EditResult: + """Extract frames from video as images. + + Args: + input_path: Path to input video + output_dir: Directory for output frames + fps: Frames per second to extract + format: Output image format + quality: JPEG quality (2-31, lower is better) + + Returns: + EditResult with output directory + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + if output_dir is None: + output_dir = tempfile.mkdtemp(prefix="frames_") + else: + os.makedirs(output_dir, exist_ok=True) + + output_pattern = os.path.join(output_dir, f"frame_%04d.{format}") + + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + input_path, + "-vf", + f"fps={fps}", + "-q:v", + str(quality), + output_pattern, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_dir if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + metadata={"pattern": output_pattern}, + ) + + async def create_thumbnail( + self, + input_path: str, + output_path: str | None = None, + time: str | float = "00:00:01", + width: int = 320, + height: int | None = None, + ) -> EditResult: + """Create a thumbnail from video. + + Args: + input_path: Path to input video + output_path: Path for output image + time: Time position to capture + width: Thumbnail width + height: Thumbnail height (auto if not specified) + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, "_thumb.jpg") + + scale = f"{width}:-1" if height is None else f"{width}:{height}" + + cmd = [ + self.ffmpeg_path, + "-y", + "-ss", + str(time), + "-i", + input_path, + "-vframes", + "1", + "-vf", + f"scale={scale}", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + # ------------------------------------------------------------------------- + # Format Conversion + # ------------------------------------------------------------------------- + + async def convert( + self, + input_path: str, + output_path: str | None = None, + format: MediaFormat | str | None = None, + quality: int = 85, + **kwargs: Any, + ) -> EditResult: + """Convert media to different format. + + Args: + input_path: Path to input file + output_path: Path for output (format inferred from extension) + format: Target format + quality: Quality setting (for lossy formats) + **kwargs: Additional format-specific options + + Returns: + EditResult with output path + """ + if format is None and output_path: + format = Path(output_path).suffix.lstrip(".") + + if format is None: + return EditResult(success=False, error="Must specify format or output_path with extension") + + if isinstance(format, MediaFormat): + format = format.value + + # Determine if it's an image or video format + image_formats = {"jpeg", "jpg", "png", "webp", "gif", "avif", "tiff"} + video_formats = {"mp4", "webm", "mov", "avi", "mkv"} + audio_formats = {"mp3", "wav", "aac", "ogg", "flac"} + + if format in image_formats: + return await self._convert_image(input_path, output_path, format, quality) + elif format in video_formats: + return await self._convert_video(input_path, output_path, format, **kwargs) + elif format in audio_formats: + return await self._convert_audio(input_path, output_path, format, **kwargs) + else: + return EditResult(success=False, error=f"Unsupported format: {format}") + + async def _convert_image( + self, + input_path: str, + output_path: str | None, + format: str, + quality: int, + ) -> EditResult: + """Convert image format using ImageMagick.""" + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [ + self.magick_path, + input_path, + "-quality", + str(quality), + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def _convert_video( + self, + input_path: str, + output_path: str | None, + format: str, + **kwargs: Any, + ) -> EditResult: + """Convert video format using FFmpeg.""" + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [self.ffmpeg_path, "-y", "-i", input_path] + + # Add codec options based on format + if format == "webm": + cmd.extend(["-c:v", "libvpx-vp9", "-c:a", "libopus"]) + elif format == "mp4": + cmd.extend(["-c:v", "libx264", "-c:a", "aac"]) + + # Add any additional options + crf = kwargs.get("crf", 23) + cmd.extend(["-crf", str(crf)]) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def _convert_audio( + self, + input_path: str, + output_path: str | None, + format: str, + **kwargs: Any, + ) -> EditResult: + """Convert audio format using FFmpeg.""" + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [self.ffmpeg_path, "-y", "-i", input_path] + + bitrate = kwargs.get("bitrate", "192k") + cmd.extend(["-ab", bitrate]) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + # ------------------------------------------------------------------------- + # Media Information + # ------------------------------------------------------------------------- + + async def get_info(self, input_path: str) -> MediaInfo: + """Get information about a media file. + + Args: + input_path: Path to media file + + Returns: + MediaInfo with file details + """ + if not self._check_ffmpeg(): + # Fallback to basic info + path = Path(input_path) + return MediaInfo( + path=input_path, + format=path.suffix.lstrip("."), + size_bytes=path.stat().st_size if path.exists() else None, + ) + + cmd = [ + self.ffmpeg_path, + "-i", + input_path, + "-hide_banner", + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + # FFmpeg outputs info to stderr + info = MediaInfo(path=input_path) + + # Parse format + path = Path(input_path) + info.format = path.suffix.lstrip(".") + if path.exists(): + info.size_bytes = path.stat().st_size + + # Parse dimensions (from "Stream #0:0: Video: ... 1920x1080") + import re + + dimension_match = re.search(r"(\d{2,5})x(\d{2,5})", stderr) + if dimension_match: + info.width = int(dimension_match.group(1)) + info.height = int(dimension_match.group(2)) + + # Parse duration + duration_match = re.search(r"Duration: (\d+):(\d+):(\d+)\.(\d+)", stderr) + if duration_match: + h, m, s, ms = duration_match.groups() + info.duration = int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 100 + + # Parse FPS + fps_match = re.search(r"(\d+(?:\.\d+)?)\s*fps", stderr) + if fps_match: + info.fps = float(fps_match.group(1)) + + return info + + +# Convenience functions for synchronous usage +def resize_image_sync(input_path: str, **kwargs: Any) -> EditResult: + """Synchronous wrapper for image resize.""" + service = MediaEditService() + return asyncio.run(service.resize_image(input_path, **kwargs)) + + +def trim_video_sync(input_path: str, **kwargs: Any) -> EditResult: + """Synchronous wrapper for video trim.""" + service = MediaEditService() + return asyncio.run(service.trim_video(input_path, **kwargs)) + + +def convert_sync(input_path: str, **kwargs: Any) -> EditResult: + """Synchronous wrapper for format conversion.""" + service = MediaEditService() + return asyncio.run(service.convert(input_path, **kwargs)) diff --git a/src/kurt/web/api/server.py b/src/kurt/web/api/server.py index 95c8b92d..9f64b01d 100644 --- a/src/kurt/web/api/server.py +++ b/src/kurt/web/api/server.py @@ -870,6 +870,306 @@ def api_git_show(path: str = Query(..., description="File path relative to repo raise HTTPException(status_code=500, detail=str(e)) +# --- Media API endpoints --- + + +class MediaGeneratePayload(BaseModel): + prompt: str + model: str | None = None + provider: str | None = None # fal, leonardo, replicate, runway + width: int = 1024 + height: int = 1024 + num_images: int = 1 + negative_prompt: str | None = None + # Video-specific + image_url: str | None = None + duration: int = 5 + + +class MediaEditPayload(BaseModel): + input_path: str + output_path: str | None = None + operation: str # resize, crop, rotate, filter, trim, convert + # Operation-specific params + width: int | None = None + height: int | None = None + scale: float | None = None + preset: str | None = None # 480p, 720p, 1080p, 4k + quality: int = 85 + # Crop params + x: int = 0 + y: int = 0 + gravity: str | None = None + # Rotate params + degrees: float = 90 + background: str = "white" + # Filter params + filter_name: str | None = None + # Trim params + start: str | None = None + end: str | None = None + duration_seconds: float | None = None + # Convert params + format: str | None = None + + +@app.post("/api/media/generate/image") +async def api_media_generate_image(payload: MediaGeneratePayload): + """Generate an image using AI. + + Supports providers: fal (default), leonardo, replicate + """ + try: + from kurt.services.ai_generation import AIGenerationService + + service = AIGenerationService() + try: + result = await service.generate_image( + prompt=payload.prompt, + model=payload.model, + provider=payload.provider, + width=payload.width, + height=payload.height, + num_images=payload.num_images, + negative_prompt=payload.negative_prompt, + ) + finally: + await service.close() + + if not result.success: + raise HTTPException(status_code=400, detail=result.error) + + return { + "success": True, + "url": result.url, + "urls": result.urls, + "provider": result.provider, + "model": result.model, + "job_id": result.job_id, + } + except HTTPException: + raise + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/media/generate/video") +async def api_media_generate_video(payload: MediaGeneratePayload): + """Generate a video using AI. + + Supports providers: fal (default), runway, replicate + """ + try: + from kurt.services.ai_generation import AIGenerationService + + service = AIGenerationService() + try: + result = await service.generate_video( + prompt=payload.prompt, + image_url=payload.image_url, + model=payload.model, + provider=payload.provider, + duration=payload.duration, + ) + finally: + await service.close() + + if not result.success: + raise HTTPException(status_code=400, detail=result.error) + + return { + "success": True, + "url": result.url, + "provider": result.provider, + "model": result.model, + "job_id": result.job_id, + } + except HTTPException: + raise + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/media/edit") +async def api_media_edit(payload: MediaEditPayload): + """Edit an image or video file. + + Operations: resize, crop, rotate, filter, trim, convert + """ + try: + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + result = None + + if payload.operation == "resize": + # Detect if image or video based on extension + path = Path(payload.input_path) + image_exts = {".jpg", ".jpeg", ".png", ".webp", ".gif", ".tiff", ".avif"} + if path.suffix.lower() in image_exts: + result = await service.resize_image( + payload.input_path, + output_path=payload.output_path, + width=payload.width, + height=payload.height, + scale=payload.scale, + quality=payload.quality, + ) + else: + result = await service.resize_video( + payload.input_path, + output_path=payload.output_path, + width=payload.width, + height=payload.height, + preset=payload.preset, + ) + + elif payload.operation == "crop": + result = await service.crop_image( + payload.input_path, + output_path=payload.output_path, + width=payload.width, + height=payload.height, + x=payload.x, + y=payload.y, + gravity=payload.gravity, + ) + + elif payload.operation == "rotate": + result = await service.rotate_image( + payload.input_path, + output_path=payload.output_path, + degrees=payload.degrees, + background=payload.background, + ) + + elif payload.operation == "filter": + if not payload.filter_name: + raise HTTPException(status_code=400, detail="filter_name required for filter operation") + result = await service.apply_filter( + payload.input_path, + output_path=payload.output_path, + filter_name=payload.filter_name, + ) + + elif payload.operation == "trim": + result = await service.trim_video( + payload.input_path, + output_path=payload.output_path, + start=payload.start, + end=payload.end, + duration=payload.duration_seconds, + ) + + elif payload.operation == "convert": + if not payload.format: + raise HTTPException(status_code=400, detail="format required for convert operation") + result = await service.convert( + payload.input_path, + output_path=payload.output_path, + format=payload.format, + quality=payload.quality, + ) + + else: + raise HTTPException( + status_code=400, + detail=f"Unknown operation: {payload.operation}. " + "Supported: resize, crop, rotate, filter, trim, convert", + ) + + if result is None: + raise HTTPException(status_code=500, detail="Operation returned no result") + + if not result.success: + raise HTTPException(status_code=400, detail=result.error) + + return { + "success": True, + "output_path": result.output_path, + "command": result.command, + } + except HTTPException: + raise + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/media/info") +async def api_media_info(path: str = Query(...)): + """Get information about a media file.""" + try: + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + info = await service.get_info(path) + + return { + "path": info.path, + "format": info.format, + "width": info.width, + "height": info.height, + "duration": info.duration, + "fps": info.fps, + "size_bytes": info.size_bytes, + } + except FileNotFoundError: + raise HTTPException(status_code=404, detail="File not found") + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/media/providers") +def api_media_providers(): + """Get available AI providers and their configuration status.""" + providers = [ + { + "name": "fal", + "display_name": "fal.ai", + "env_var": "FAL_KEY", + "configured": bool(os.environ.get("FAL_KEY")), + "capabilities": ["image", "video"], + }, + { + "name": "leonardo", + "display_name": "Leonardo.ai", + "env_var": "LEONARDO_API_KEY", + "configured": bool(os.environ.get("LEONARDO_API_KEY")), + "capabilities": ["image"], + }, + { + "name": "replicate", + "display_name": "Replicate", + "env_var": "REPLICATE_API_TOKEN", + "configured": bool(os.environ.get("REPLICATE_API_TOKEN")), + "capabilities": ["image", "video"], + }, + { + "name": "runway", + "display_name": "Runway", + "env_var": "RUNWAY_API_KEY", + "configured": bool(os.environ.get("RUNWAY_API_KEY")), + "capabilities": ["video"], + }, + ] + + # Check for FFmpeg and ImageMagick + tools = { + "ffmpeg": which("ffmpeg") is not None, + "imagemagick": which("magick") is not None or which("convert") is not None, + } + + return {"providers": providers, "tools": tools} + + # --- Workflow API endpoints --- diff --git a/src/kurt/workflows/media/__init__.py b/src/kurt/workflows/media/__init__.py new file mode 100644 index 00000000..a97b1ba9 --- /dev/null +++ b/src/kurt/workflows/media/__init__.py @@ -0,0 +1,75 @@ +"""Media workflow - AI-powered image and video generation and editing. + +This module provides: +- AI image generation via fal.ai, Leonardo.ai, Replicate +- AI video generation via Runway, fal.ai, Replicate +- Media editing via FFmpeg and ImageMagick + +CLI Usage: + # Generate images + kurt media generate image --prompt "A sunset over mountains" --model flux-dev + + # Generate video from image + kurt media generate video --image hero.png --prompt "Slow zoom" --duration 5 + + # Edit images + kurt media edit resize input.jpg --width 800 --height 600 + kurt media edit crop input.jpg --width 400 --height 400 --gravity center + kurt media edit filter input.jpg --filter grayscale + + # Edit videos + kurt media edit trim video.mp4 --start 00:00:30 --end 00:01:00 + kurt media edit resize video.mp4 --preset 720p + kurt media edit thumbnail video.mp4 --time 5 + + # Convert formats + kurt media convert input.png --format webp + kurt media convert video.mp4 --format webm + +Programmatic Usage: + from kurt.services import AIGenerationService, MediaEditService + + # Generate image + service = AIGenerationService() + result = await service.generate_image(prompt="...", model="flux-dev") + + # Edit video + editor = MediaEditService() + result = await editor.trim_video("input.mp4", start="00:00:30", duration=30) +""" + +from kurt.services.ai_generation import ( + AIGenerationService, + GenerationResult, + MediaType, + Provider, + generate_image_sync, + generate_video_sync, +) +from kurt.services.media_edit import ( + EditResult, + MediaEditService, + MediaFormat, + MediaInfo, + convert_sync, + resize_image_sync, + trim_video_sync, +) + +__all__ = [ + # AI Generation + "AIGenerationService", + "GenerationResult", + "Provider", + "MediaType", + "generate_image_sync", + "generate_video_sync", + # Media Editing + "MediaEditService", + "EditResult", + "MediaFormat", + "MediaInfo", + "resize_image_sync", + "trim_video_sync", + "convert_sync", +] diff --git a/src/kurt/workflows/media/cli.py b/src/kurt/workflows/media/cli.py new file mode 100644 index 00000000..62e47e28 --- /dev/null +++ b/src/kurt/workflows/media/cli.py @@ -0,0 +1,914 @@ +"""CLI commands for media generation and editing. + +Commands: +- kurt media generate image: Generate images with AI +- kurt media generate video: Generate videos with AI +- kurt media edit: Edit images and videos +- kurt media convert: Convert between formats +- kurt media info: Get media file information +""" + +from __future__ import annotations + +import asyncio +import json +import os +from pathlib import Path + +import click +from rich.console import Console +from rich.table import Table + +console = Console() + + +# ============================================================================= +# Main Group +# ============================================================================= + + +@click.group(name="media") +def media_group(): + """Generate and edit images, videos, and audio. + + Use AI models to generate media, or edit existing files with + FFmpeg and ImageMagick. + + \b + Examples: + kurt media generate image --prompt "A cat" --model flux-dev + kurt media edit resize image.jpg --width 800 + kurt media convert video.mp4 --format webm + """ + pass + + +# ============================================================================= +# Generate Commands +# ============================================================================= + + +@media_group.group(name="generate") +def generate_group(): + """Generate images and videos using AI models. + + \b + Supported providers: + - fal.ai: Fast inference, Flux models (FAL_KEY) + - Leonardo.ai: Nano Banana, Phoenix (LEONARDO_API_KEY) + - Replicate: Large model library (REPLICATE_API_TOKEN) + - Runway: Video generation (RUNWAY_API_KEY) + + Set the appropriate API key as an environment variable. + """ + pass + + +@generate_group.command(name="image") +@click.option( + "--prompt", + "-p", + required=True, + help="Text description of the image to generate", +) +@click.option( + "--model", + "-m", + default="flux/dev", + help="Model to use (e.g., flux/dev, nano-banana, sdxl)", +) +@click.option( + "--provider", + type=click.Choice(["fal", "leonardo", "replicate"]), + default="fal", + help="AI provider to use", +) +@click.option("--width", "-w", type=int, default=1024, help="Image width in pixels") +@click.option("--height", "-h", type=int, default=1024, help="Image height in pixels") +@click.option("--num", "-n", type=int, default=1, help="Number of images to generate") +@click.option("--negative", help="Negative prompt (things to avoid)") +@click.option( + "--output", + "-o", + type=click.Path(), + help="Output file path (default: downloads URL)", +) +@click.option("--json", "as_json", is_flag=True, help="Output result as JSON") +def generate_image( + prompt: str, + model: str, + provider: str, + width: int, + height: int, + num: int, + negative: str | None, + output: str | None, + as_json: bool, +): + """Generate images using AI models. + + \b + Examples: + # Generate with fal.ai (default) + kurt media generate image --prompt "A sunset over mountains" + + # Use Leonardo.ai with Nano Banana + kurt media generate image -p "Product photo" --provider leonardo --model nano-banana + + # Generate multiple images + kurt media generate image -p "Abstract art" --num 4 --width 512 --height 512 + + # Save to file + kurt media generate image -p "Logo design" -o logo.png + """ + from kurt.services.ai_generation import AIGenerationService + + async def _generate(): + service = AIGenerationService() + try: + result = await service.generate_image( + prompt=prompt, + model=model, + provider=provider, + width=width, + height=height, + num_images=num, + negative_prompt=negative, + ) + return result + finally: + await service.close() + + with console.status("[bold blue]Generating image..."): + result = asyncio.run(_generate()) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + if as_json: + click.echo( + json.dumps( + { + "success": result.success, + "url": result.url, + "urls": result.urls, + "provider": result.provider, + "model": result.model, + }, + indent=2, + ) + ) + return + + # Download if output path specified + if output and result.url: + import httpx + + console.print(f"[dim]Downloading to {output}...[/dim]") + response = httpx.get(result.url) + Path(output).write_bytes(response.content) + console.print(f"[green]Saved to:[/green] {output}") + else: + console.print(f"[green]Generated {len(result.urls)} image(s)[/green]") + for i, url in enumerate(result.urls, 1): + console.print(f" [{i}] {url}") + + console.print(f"[dim]Provider: {result.provider} | Model: {result.model}[/dim]") + + +@generate_group.command(name="video") +@click.option( + "--prompt", + "-p", + required=True, + help="Text description of the video motion/content", +) +@click.option( + "--image", + "-i", + type=click.Path(exists=True), + help="Source image for image-to-video", +) +@click.option( + "--image-url", + help="Source image URL for image-to-video", +) +@click.option( + "--model", + "-m", + default=None, + help="Model to use (e.g., gen3a_turbo, ltx-video)", +) +@click.option( + "--provider", + type=click.Choice(["fal", "runway", "replicate"]), + default="fal", + help="AI provider to use", +) +@click.option( + "--duration", + "-d", + type=int, + default=5, + help="Video duration in seconds", +) +@click.option( + "--output", + "-o", + type=click.Path(), + help="Output file path", +) +@click.option("--json", "as_json", is_flag=True, help="Output result as JSON") +def generate_video( + prompt: str, + image: str | None, + image_url: str | None, + model: str | None, + provider: str, + duration: int, + output: str | None, + as_json: bool, +): + """Generate videos using AI models. + + \b + Examples: + # Text-to-video with fal.ai + kurt media generate video --prompt "Ocean waves crashing" + + # Image-to-video with local file + kurt media generate video -i hero.png -p "Slow zoom in" -d 5 + + # Use Runway for high quality + kurt media generate video -p "Camera pan" --provider runway --image-url https://... + + # Save to file + kurt media generate video -p "Particles floating" -o intro.mp4 + """ + from kurt.services.ai_generation import AIGenerationService + + # Handle local image file + source_url = image_url + if image and not image_url: + # For local files, we'd need to upload first + # For now, require URL or use a provider that accepts base64 + console.print( + "[yellow]Note:[/yellow] Local image files require upload. " + "Use --image-url with a hosted image URL instead." + ) + console.print("[dim]Tip: Upload to imgur, cloudinary, or your own server[/dim]") + raise SystemExit(1) + + async def _generate(): + service = AIGenerationService() + try: + result = await service.generate_video( + prompt=prompt, + image_url=source_url, + model=model, + provider=provider, + duration=duration, + ) + return result + finally: + await service.close() + + with console.status("[bold blue]Generating video (this may take a few minutes)..."): + result = asyncio.run(_generate()) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + if as_json: + click.echo( + json.dumps( + { + "success": result.success, + "url": result.url, + "job_id": result.job_id, + "provider": result.provider, + "model": result.model, + }, + indent=2, + ) + ) + return + + # Download if output path specified + if output and result.url: + import httpx + + console.print(f"[dim]Downloading to {output}...[/dim]") + response = httpx.get(result.url) + Path(output).write_bytes(response.content) + console.print(f"[green]Saved to:[/green] {output}") + else: + console.print(f"[green]Video generated[/green]") + console.print(f" URL: {result.url}") + + console.print(f"[dim]Provider: {result.provider} | Model: {result.model}[/dim]") + + +# ============================================================================= +# Edit Commands +# ============================================================================= + + +@media_group.group(name="edit") +def edit_group(): + """Edit images and videos using FFmpeg/ImageMagick. + + \b + Requirements: + - ImageMagick: apt install imagemagick (images) + - FFmpeg: apt install ffmpeg (video/audio) + """ + pass + + +@edit_group.command(name="resize") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option("--width", "-w", type=int, help="Target width in pixels") +@click.option("--height", "-h", type=int, help="Target height in pixels") +@click.option("--scale", "-s", type=float, help="Scale factor (e.g., 0.5 for half)") +@click.option( + "--preset", + type=click.Choice(["480p", "720p", "1080p", "4k"]), + help="Video resolution preset", +) +@click.option("--output", "-o", type=click.Path(), help="Output file path") +@click.option("--quality", "-q", type=int, default=85, help="Output quality (1-100)") +def edit_resize( + input_path: str, + width: int | None, + height: int | None, + scale: float | None, + preset: str | None, + output: str | None, + quality: int, +): + """Resize an image or video. + + \b + Examples: + # Resize image to specific dimensions + kurt media edit resize photo.jpg --width 800 --height 600 + + # Scale image by factor + kurt media edit resize photo.jpg --scale 0.5 + + # Resize video to 720p + kurt media edit resize video.mp4 --preset 720p + + # Specify output path + kurt media edit resize photo.jpg -w 1200 -o thumbnail.jpg + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + path = Path(input_path) + + # Determine if image or video + image_exts = {".jpg", ".jpeg", ".png", ".webp", ".gif", ".tiff", ".avif"} + video_exts = {".mp4", ".webm", ".mov", ".avi", ".mkv"} + + async def _resize(): + if path.suffix.lower() in image_exts: + return await service.resize_image( + input_path, + output_path=output, + width=width, + height=height, + scale=scale, + quality=quality, + ) + elif path.suffix.lower() in video_exts: + return await service.resize_video( + input_path, + output_path=output, + width=width, + height=height, + preset=preset, + ) + else: + return None + + with console.status("[bold blue]Resizing..."): + result = asyncio.run(_resize()) + + if result is None: + console.print(f"[red]Error:[/red] Unsupported file type: {path.suffix}") + raise SystemExit(1) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Resized:[/green] {result.output_path}") + + +@edit_group.command(name="crop") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option("--width", "-w", type=int, required=True, help="Crop width") +@click.option("--height", "-h", type=int, required=True, help="Crop height") +@click.option("--x", type=int, default=0, help="X offset") +@click.option("--y", type=int, default=0, help="Y offset") +@click.option( + "--gravity", + "-g", + type=click.Choice( + ["NorthWest", "North", "NorthEast", "West", "Center", "East", "SouthWest", "South", "SouthEast"] + ), + help="Crop from gravity point", +) +@click.option("--output", "-o", type=click.Path(), help="Output file path") +def edit_crop( + input_path: str, + width: int, + height: int, + x: int, + y: int, + gravity: str | None, + output: str | None, +): + """Crop an image. + + \b + Examples: + # Crop from top-left + kurt media edit crop photo.jpg --width 400 --height 400 + + # Crop from center + kurt media edit crop photo.jpg -w 400 -h 400 --gravity Center + + # Crop with offset + kurt media edit crop photo.jpg -w 200 -h 200 --x 100 --y 50 + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status("[bold blue]Cropping..."): + result = asyncio.run( + service.crop_image( + input_path, + output_path=output, + width=width, + height=height, + x=x, + y=y, + gravity=gravity, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Cropped:[/green] {result.output_path}") + + +@edit_group.command(name="rotate") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option("--degrees", "-d", type=float, default=90, help="Rotation angle (positive = clockwise)") +@click.option("--background", "-b", default="white", help="Background color for corners") +@click.option("--output", "-o", type=click.Path(), help="Output file path") +def edit_rotate( + input_path: str, + degrees: float, + background: str, + output: str | None, +): + """Rotate an image. + + \b + Examples: + kurt media edit rotate photo.jpg --degrees 90 + kurt media edit rotate photo.jpg -d -45 --background transparent + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status("[bold blue]Rotating..."): + result = asyncio.run( + service.rotate_image( + input_path, + output_path=output, + degrees=degrees, + background=background, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Rotated:[/green] {result.output_path}") + + +@edit_group.command(name="filter") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option( + "--filter", + "-f", + "filter_name", + required=True, + type=click.Choice( + ["grayscale", "sepia", "blur", "sharpen", "negate", "normalize", "equalize", "brightness", "contrast"] + ), + help="Filter to apply", +) +@click.option("--intensity", type=int, default=80, help="Filter intensity (for sepia)") +@click.option("--radius", type=float, default=3, help="Radius (for blur/sharpen)") +@click.option("--output", "-o", type=click.Path(), help="Output file path") +def edit_filter( + input_path: str, + filter_name: str, + intensity: int, + radius: float, + output: str | None, +): + """Apply a filter to an image. + + \b + Examples: + kurt media edit filter photo.jpg --filter grayscale + kurt media edit filter photo.jpg -f blur --radius 5 + kurt media edit filter photo.jpg -f sepia --intensity 90 + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status("[bold blue]Applying filter..."): + result = asyncio.run( + service.apply_filter( + input_path, + output_path=output, + filter_name=filter_name, + intensity=intensity, + radius=radius, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Filter applied:[/green] {result.output_path}") + + +@edit_group.command(name="trim") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option("--start", "-s", help="Start time (e.g., 00:00:30 or 30)") +@click.option("--end", "-e", help="End time (e.g., 00:01:00 or 60)") +@click.option("--duration", "-d", type=float, help="Duration in seconds (alternative to --end)") +@click.option("--output", "-o", type=click.Path(), help="Output file path") +@click.option("--reencode", is_flag=True, help="Re-encode for frame-accurate cuts") +def edit_trim( + input_path: str, + start: str | None, + end: str | None, + duration: float | None, + output: str | None, + reencode: bool, +): + """Trim a video to a specific segment. + + \b + Examples: + # Trim from 30s to 1 minute + kurt media edit trim video.mp4 --start 00:00:30 --end 00:01:00 + + # Trim first 30 seconds + kurt media edit trim video.mp4 --duration 30 + + # Trim with re-encoding for precise cuts + kurt media edit trim video.mp4 -s 10 -e 20 --reencode + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status("[bold blue]Trimming video..."): + result = asyncio.run( + service.trim_video( + input_path, + output_path=output, + start=start, + end=end, + duration=duration, + copy_codec=not reencode, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Trimmed:[/green] {result.output_path}") + + +@edit_group.command(name="thumbnail") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option("--time", "-t", default="00:00:01", help="Time position to capture") +@click.option("--width", "-w", type=int, default=320, help="Thumbnail width") +@click.option("--height", "-h", type=int, help="Thumbnail height (auto if not set)") +@click.option("--output", "-o", type=click.Path(), help="Output file path") +def edit_thumbnail( + input_path: str, + time: str, + width: int, + height: int | None, + output: str | None, +): + """Create a thumbnail from a video. + + \b + Examples: + kurt media edit thumbnail video.mp4 --time 5 + kurt media edit thumbnail video.mp4 -t 00:01:30 -w 640 -o thumb.jpg + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status("[bold blue]Creating thumbnail..."): + result = asyncio.run( + service.create_thumbnail( + input_path, + output_path=output, + time=time, + width=width, + height=height, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Thumbnail created:[/green] {result.output_path}") + + +@edit_group.command(name="extract-audio") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option( + "--format", + "-f", + default="mp3", + type=click.Choice(["mp3", "wav", "aac", "ogg", "flac"]), + help="Output audio format", +) +@click.option("--bitrate", "-b", default="192k", help="Audio bitrate") +@click.option("--output", "-o", type=click.Path(), help="Output file path") +def edit_extract_audio( + input_path: str, + format: str, + bitrate: str, + output: str | None, +): + """Extract audio from a video file. + + \b + Examples: + kurt media edit extract-audio video.mp4 + kurt media edit extract-audio video.mp4 --format wav --bitrate 320k + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status("[bold blue]Extracting audio..."): + result = asyncio.run( + service.extract_audio( + input_path, + output_path=output, + format=format, + bitrate=bitrate, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Audio extracted:[/green] {result.output_path}") + + +@edit_group.command(name="add-audio") +@click.argument("video_path", type=click.Path(exists=True)) +@click.argument("audio_path", type=click.Path(exists=True)) +@click.option("--output", "-o", type=click.Path(), help="Output file path") +@click.option("--mix", is_flag=True, help="Mix with existing audio instead of replacing") +@click.option("--volume", "-v", type=float, default=1.0, help="Audio volume multiplier") +def edit_add_audio( + video_path: str, + audio_path: str, + output: str | None, + mix: bool, + volume: float, +): + """Add or replace audio in a video. + + \b + Examples: + # Replace audio + kurt media edit add-audio video.mp4 music.mp3 + + # Mix with existing audio + kurt media edit add-audio video.mp4 voiceover.mp3 --mix --volume 0.8 + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status("[bold blue]Adding audio..."): + result = asyncio.run( + service.add_audio( + video_path, + audio_path, + output_path=output, + replace=not mix, + volume=volume, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Audio added:[/green] {result.output_path}") + + +# ============================================================================= +# Convert Command +# ============================================================================= + + +@media_group.command(name="convert") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option( + "--format", + "-f", + required=True, + type=click.Choice( + [ + "jpeg", + "jpg", + "png", + "webp", + "gif", + "avif", + "mp4", + "webm", + "mov", + "mp3", + "wav", + "aac", + "ogg", + ] + ), + help="Target format", +) +@click.option("--quality", "-q", type=int, default=85, help="Output quality (for lossy formats)") +@click.option("--output", "-o", type=click.Path(), help="Output file path") +def convert( + input_path: str, + format: str, + quality: int, + output: str | None, +): + """Convert media files between formats. + + \b + Examples: + # Convert image to WebP + kurt media convert photo.jpg --format webp + + # Convert video to WebM + kurt media convert video.mp4 --format webm + + # Convert with custom quality + kurt media convert photo.png -f jpeg -q 90 -o compressed.jpg + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + with console.status(f"[bold blue]Converting to {format}..."): + result = asyncio.run( + service.convert( + input_path, + output_path=output, + format=format, + quality=quality, + ) + ) + + if not result.success: + console.print(f"[red]Error:[/red] {result.error}") + raise SystemExit(1) + + console.print(f"[green]Converted:[/green] {result.output_path}") + + +# ============================================================================= +# Info Command +# ============================================================================= + + +@media_group.command(name="info") +@click.argument("input_path", type=click.Path(exists=True)) +@click.option("--json", "as_json", is_flag=True, help="Output as JSON") +def info(input_path: str, as_json: bool): + """Get information about a media file. + + \b + Examples: + kurt media info video.mp4 + kurt media info photo.jpg --json + """ + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + + result = asyncio.run(service.get_info(input_path)) + + if as_json: + click.echo( + json.dumps( + { + "path": result.path, + "format": result.format, + "width": result.width, + "height": result.height, + "duration": result.duration, + "fps": result.fps, + "size_bytes": result.size_bytes, + }, + indent=2, + ) + ) + return + + table = Table(title=f"Media Info: {Path(input_path).name}") + table.add_column("Property", style="cyan") + table.add_column("Value", style="green") + + table.add_row("Path", result.path) + table.add_row("Format", result.format or "unknown") + + if result.width and result.height: + table.add_row("Dimensions", f"{result.width}x{result.height}") + + if result.duration: + mins, secs = divmod(result.duration, 60) + table.add_row("Duration", f"{int(mins)}:{secs:05.2f}") + + if result.fps: + table.add_row("FPS", f"{result.fps:.2f}") + + if result.size_bytes: + size_mb = result.size_bytes / (1024 * 1024) + table.add_row("Size", f"{size_mb:.2f} MB") + + console.print(table) + + +# ============================================================================= +# Providers Command +# ============================================================================= + + +@media_group.command(name="providers") +def providers(): + """Show available AI providers and their status. + + Checks which providers have API keys configured. + """ + table = Table(title="AI Generation Providers") + table.add_column("Provider", style="cyan") + table.add_column("Status", style="green") + table.add_column("Env Variable") + table.add_column("Capabilities") + + providers_info = [ + ("fal.ai", "FAL_KEY", "Image, Video"), + ("Leonardo.ai", "LEONARDO_API_KEY", "Image"), + ("Replicate", "REPLICATE_API_TOKEN", "Image, Video"), + ("Runway", "RUNWAY_API_KEY", "Video"), + ] + + for name, env_var, caps in providers_info: + is_configured = bool(os.environ.get(env_var)) + status = "[green]Configured[/green]" if is_configured else "[dim]Not set[/dim]" + table.add_row(name, status, env_var, caps) + + console.print(table) + console.print() + console.print("[dim]Set environment variables to enable providers.[/dim]") + console.print("[dim]Example: export FAL_KEY=your_api_key[/dim]") diff --git a/src/kurt/workflows/media/tests/__init__.py b/src/kurt/workflows/media/tests/__init__.py new file mode 100644 index 00000000..fbe5c9eb --- /dev/null +++ b/src/kurt/workflows/media/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for media workflow.""" diff --git a/src/kurt/workflows/media/tests/test_services.py b/src/kurt/workflows/media/tests/test_services.py new file mode 100644 index 00000000..139ebb75 --- /dev/null +++ b/src/kurt/workflows/media/tests/test_services.py @@ -0,0 +1,243 @@ +"""Tests for media services.""" + +import os +import tempfile +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from kurt.services.ai_generation import ( + AIGenerationService, + GenerationResult, + Provider, +) +from kurt.services.media_edit import ( + EditResult, + MediaEditService, + MediaFormat, +) + + +class TestAIGenerationService: + """Tests for AIGenerationService.""" + + def test_init_reads_env_vars(self, monkeypatch): + """Test that service reads API keys from environment.""" + monkeypatch.setenv("FAL_KEY", "test-fal-key") + monkeypatch.setenv("LEONARDO_API_KEY", "test-leonardo-key") + + service = AIGenerationService() + + assert service.fal_key == "test-fal-key" + assert service.leonardo_key == "test-leonardo-key" + + def test_init_with_explicit_keys(self): + """Test that explicit keys override env vars.""" + service = AIGenerationService( + fal_key="explicit-fal", + leonardo_key="explicit-leonardo", + ) + + assert service.fal_key == "explicit-fal" + assert service.leonardo_key == "explicit-leonardo" + + def test_default_providers(self): + """Test default provider settings.""" + service = AIGenerationService() + + assert service.default_image_provider == Provider.FAL + assert service.default_video_provider == Provider.FAL + + @pytest.mark.asyncio + async def test_generate_image_no_api_key(self): + """Test that missing API key returns error.""" + service = AIGenerationService(fal_key=None) + + result = await service.generate_image( + prompt="test prompt", + provider=Provider.FAL, + ) + + assert not result.success + assert "FAL_KEY not configured" in result.error + + @pytest.mark.asyncio + async def test_generate_video_no_api_key(self): + """Test that missing API key returns error for video.""" + service = AIGenerationService(runway_key=None) + + result = await service.generate_video( + prompt="test prompt", + provider=Provider.RUNWAY, + ) + + assert not result.success + assert "RUNWAY_API_KEY not configured" in result.error + + +class TestMediaEditService: + """Tests for MediaEditService.""" + + def test_init_defaults(self): + """Test default binary paths.""" + service = MediaEditService() + + assert service.ffmpeg_path == "ffmpeg" + assert service.magick_path == "magick" + + def test_init_with_custom_paths(self): + """Test custom binary paths.""" + service = MediaEditService( + ffmpeg_path="/usr/local/bin/ffmpeg", + magick_path="/usr/local/bin/magick", + ) + + assert service.ffmpeg_path == "/usr/local/bin/ffmpeg" + assert service.magick_path == "/usr/local/bin/magick" + + def test_ensure_output_path_with_provided(self): + """Test output path when explicitly provided.""" + service = MediaEditService() + + result = service._ensure_output_path( + input_path="input.jpg", + output_path="custom_output.jpg", + ) + + assert result == "custom_output.jpg" + + def test_ensure_output_path_auto_generated(self): + """Test auto-generated output path.""" + service = MediaEditService() + + result = service._ensure_output_path( + input_path="/path/to/input.jpg", + output_path=None, + ) + + assert result == "/path/to/input_edited.jpg" + + def test_ensure_output_path_with_suffix(self): + """Test output path with custom suffix.""" + service = MediaEditService() + + result = service._ensure_output_path( + input_path="/path/to/video.mp4", + output_path=None, + suffix="_trimmed.mp4", + ) + + assert result == "/path/to/video_trimmed.mp4" + + @pytest.mark.asyncio + async def test_resize_image_no_imagemagick(self): + """Test resize when ImageMagick not available.""" + service = MediaEditService() + + with patch.object(service, "_check_imagemagick", return_value=False): + result = await service.resize_image( + "input.jpg", + width=800, + height=600, + ) + + assert not result.success + assert "ImageMagick not found" in result.error + + @pytest.mark.asyncio + async def test_trim_video_no_ffmpeg(self): + """Test trim when FFmpeg not available.""" + service = MediaEditService() + + with patch.object(service, "_check_ffmpeg", return_value=False): + result = await service.trim_video( + "input.mp4", + start="00:00:30", + end="00:01:00", + ) + + assert not result.success + assert "FFmpeg not found" in result.error + + @pytest.mark.asyncio + async def test_convert_unsupported_format(self): + """Test convert with unsupported format.""" + service = MediaEditService() + + result = await service.convert( + "input.xyz", + format="unsupported", + ) + + assert not result.success + assert "Unsupported format" in result.error + + @pytest.mark.asyncio + async def test_get_info_basic(self, tmp_path): + """Test getting basic file info.""" + # Create a test file + test_file = tmp_path / "test.txt" + test_file.write_text("test content") + + service = MediaEditService() + + with patch.object(service, "_check_ffmpeg", return_value=False): + info = await service.get_info(str(test_file)) + + assert info.path == str(test_file) + assert info.format == "txt" + assert info.size_bytes > 0 + + +class TestGenerationResult: + """Tests for GenerationResult dataclass.""" + + def test_primary_url_from_url(self): + """Test primary_url when url is set.""" + result = GenerationResult( + success=True, + url="https://example.com/image.png", + urls=["https://example.com/other.png"], + ) + + assert result.primary_url == "https://example.com/image.png" + + def test_primary_url_from_urls(self): + """Test primary_url falls back to first urls entry.""" + result = GenerationResult( + success=True, + url=None, + urls=["https://example.com/first.png", "https://example.com/second.png"], + ) + + assert result.primary_url == "https://example.com/first.png" + + def test_primary_url_none(self): + """Test primary_url when nothing available.""" + result = GenerationResult( + success=False, + error="Some error", + ) + + assert result.primary_url is None + + +class TestMediaFormat: + """Tests for MediaFormat enum.""" + + def test_image_formats(self): + """Test image format values.""" + assert MediaFormat.JPEG.value == "jpeg" + assert MediaFormat.PNG.value == "png" + assert MediaFormat.WEBP.value == "webp" + + def test_video_formats(self): + """Test video format values.""" + assert MediaFormat.MP4.value == "mp4" + assert MediaFormat.WEBM.value == "webm" + + def test_audio_formats(self): + """Test audio format values.""" + assert MediaFormat.MP3.value == "mp3" + assert MediaFormat.WAV.value == "wav" From 4f0cf52ca71acad4f860479bb1c9816fe86db12b Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 17 Jan 2026 21:07:08 +0000 Subject: [PATCH 2/3] feat(ui): Add Image and Video Editor panels - Add ImageEditor component using Konva canvas - Shape tools: rectangle, circle, text - Transform/drag support for shapes - Color picker for fill/stroke - AI generation dialog - Export to PNG/JPEG functionality - Add VideoEditor component with custom player - Play/pause, seek, volume controls - Playback speed selection - Trim markers on timeline - Keyboard shortcuts (space, arrows, i/o, m) - AI video generation dialog - Add panel wrappers for dockview integration - Add dependencies: konva, react-konva, @remotion/player - Add ~530 lines of CSS for editor styling --- src/kurt/web/client/package.json | 3 + src/kurt/web/client/src/App.jsx | 4 + .../web/client/src/components/ImageEditor.jsx | 513 +++++++++++++++++ .../web/client/src/components/VideoEditor.jsx | 515 +++++++++++++++++ .../client/src/panels/ImageEditorPanel.jsx | 140 +++++ .../client/src/panels/VideoEditorPanel.jsx | 93 ++++ src/kurt/web/client/src/styles.css | 527 ++++++++++++++++++ 7 files changed, 1795 insertions(+) create mode 100644 src/kurt/web/client/src/components/ImageEditor.jsx create mode 100644 src/kurt/web/client/src/components/VideoEditor.jsx create mode 100644 src/kurt/web/client/src/panels/ImageEditorPanel.jsx create mode 100644 src/kurt/web/client/src/panels/VideoEditorPanel.jsx diff --git a/src/kurt/web/client/package.json b/src/kurt/web/client/package.json index e43728e9..a58daf89 100644 --- a/src/kurt/web/client/package.json +++ b/src/kurt/web/client/package.json @@ -14,6 +14,7 @@ "test:ui": "vitest --ui" }, "dependencies": { + "@remotion/player": "^4.0.250", "@tiptap/core": "^3.15.2", "@tiptap/extension-highlight": "^3.15.2", "@tiptap/extension-image": "^3.15.3", @@ -29,11 +30,13 @@ "@tiptap/starter-kit": "^3.15.2", "diff": "^8.0.2", "dockview-react": "^4.13.1", + "konva": "^9.3.18", "markdown-it": "^14.0.0", "prism-react-renderer": "^2.4.1", "react": "^18.2.0", "react-diff-view": "^3.2.0", "react-dom": "^18.2.0", + "react-konva": "^18.2.10", "react-simple-code-editor": "^0.14.1", "turndown": "^7.1.2", "turndown-plugin-gfm": "^1.0.2", diff --git a/src/kurt/web/client/src/App.jsx b/src/kurt/web/client/src/App.jsx index 2aca41d7..1aa40c50 100644 --- a/src/kurt/web/client/src/App.jsx +++ b/src/kurt/web/client/src/App.jsx @@ -10,6 +10,8 @@ import EmptyPanel from './panels/EmptyPanel' import ReviewPanel from './panels/ReviewPanel' import WorkflowsPanel from './panels/WorkflowsPanel' import WorkflowTerminalPanel from './panels/WorkflowTerminalPanel' +import ImageEditorPanel from './panels/ImageEditorPanel' +import VideoEditorPanel from './panels/VideoEditorPanel' import DiffHighlightPOC from './components/DiffHighlightPOC' import TiptapDiffPOC from './components/TiptapDiffPOC' @@ -28,6 +30,8 @@ const components = { review: ReviewPanel, workflows: WorkflowsPanel, workflowTerminal: WorkflowTerminalPanel, + imageEditor: ImageEditorPanel, + videoEditor: VideoEditorPanel, } const KNOWN_COMPONENTS = new Set(Object.keys(components)) diff --git a/src/kurt/web/client/src/components/ImageEditor.jsx b/src/kurt/web/client/src/components/ImageEditor.jsx new file mode 100644 index 00000000..1ac70623 --- /dev/null +++ b/src/kurt/web/client/src/components/ImageEditor.jsx @@ -0,0 +1,513 @@ +import React, { useState, useEffect, useRef, useCallback } from 'react' +import { Stage, Layer, Image as KonvaImage, Rect, Circle, Text, Transformer } from 'react-konva' + +const apiBase = import.meta.env.VITE_API_URL || '' +const apiUrl = (path) => `${apiBase}${path}` + +// Load image from URL or file +const useImage = (src) => { + const [image, setImage] = useState(null) + const [status, setStatus] = useState('loading') + + useEffect(() => { + if (!src) { + setImage(null) + setStatus('idle') + return + } + + setStatus('loading') + const img = new window.Image() + img.crossOrigin = 'anonymous' + + img.onload = () => { + setImage(img) + setStatus('loaded') + } + + img.onerror = () => { + setImage(null) + setStatus('error') + } + + img.src = src + }, [src]) + + return [image, status] +} + +// Shape component that can be selected and transformed +const Shape = ({ shapeProps, isSelected, onSelect, onChange }) => { + const shapeRef = useRef() + const trRef = useRef() + + useEffect(() => { + if (isSelected && trRef.current && shapeRef.current) { + trRef.current.nodes([shapeRef.current]) + trRef.current.getLayer().batchDraw() + } + }, [isSelected]) + + const ShapeComponent = shapeProps.type === 'circle' ? Circle : Rect + + return ( + <> + { + onChange({ + ...shapeProps, + x: e.target.x(), + y: e.target.y(), + }) + }} + onTransformEnd={(e) => { + const node = shapeRef.current + const scaleX = node.scaleX() + const scaleY = node.scaleY() + + node.scaleX(1) + node.scaleY(1) + + onChange({ + ...shapeProps, + x: node.x(), + y: node.y(), + width: Math.max(5, node.width() * scaleX), + height: Math.max(5, node.height() * scaleY), + rotation: node.rotation(), + }) + }} + /> + {isSelected && ( + { + if (newBox.width < 5 || newBox.height < 5) { + return oldBox + } + return newBox + }} + /> + )} + + ) +} + +// Text element component +const TextElement = ({ textProps, isSelected, onSelect, onChange }) => { + const textRef = useRef() + const trRef = useRef() + + useEffect(() => { + if (isSelected && trRef.current && textRef.current) { + trRef.current.nodes([textRef.current]) + trRef.current.getLayer().batchDraw() + } + }, [isSelected]) + + return ( + <> + { + onChange({ + ...textProps, + x: e.target.x(), + y: e.target.y(), + }) + }} + onTransformEnd={(e) => { + const node = textRef.current + onChange({ + ...textProps, + x: node.x(), + y: node.y(), + fontSize: Math.max(8, textProps.fontSize * node.scaleY()), + rotation: node.rotation(), + }) + node.scaleX(1) + node.scaleY(1) + }} + /> + {isSelected && ( + { + newBox.width = Math.max(30, newBox.width) + return newBox + }} + /> + )} + + ) +} + +export default function ImageEditor({ + imageSrc, + onSave, + onGenerate, + width = 800, + height = 600, +}) { + const [image, imageStatus] = useImage(imageSrc) + const [shapes, setShapes] = useState([]) + const [texts, setTexts] = useState([]) + const [selectedId, setSelectedId] = useState(null) + const [tool, setTool] = useState('select') // select, rect, circle, text + const [fillColor, setFillColor] = useState('#3b82f6') + const [strokeColor, setStrokeColor] = useState('#1e40af') + const [textContent, setTextContent] = useState('Text') + const [isGenerating, setIsGenerating] = useState(false) + const [generatePrompt, setGeneratePrompt] = useState('') + const [showGenerateDialog, setShowGenerateDialog] = useState(false) + const stageRef = useRef() + + // Calculate image dimensions to fit canvas while maintaining aspect ratio + const getImageDimensions = useCallback(() => { + if (!image) return { x: 0, y: 0, width: 0, height: 0 } + + const imgRatio = image.width / image.height + const canvasRatio = width / height + + let imgWidth, imgHeight, imgX, imgY + + if (imgRatio > canvasRatio) { + imgWidth = width + imgHeight = width / imgRatio + imgX = 0 + imgY = (height - imgHeight) / 2 + } else { + imgHeight = height + imgWidth = height * imgRatio + imgX = (width - imgWidth) / 2 + imgY = 0 + } + + return { x: imgX, y: imgY, width: imgWidth, height: imgHeight } + }, [image, width, height]) + + // Handle stage click + const handleStageClick = (e) => { + const clickedOnEmpty = e.target === e.target.getStage() + if (clickedOnEmpty) { + setSelectedId(null) + return + } + + const pos = e.target.getStage().getPointerPosition() + + if (tool === 'rect') { + const newShape = { + id: `rect-${Date.now()}`, + type: 'rect', + x: pos.x - 50, + y: pos.y - 25, + width: 100, + height: 50, + fill: fillColor, + stroke: strokeColor, + strokeWidth: 2, + } + setShapes([...shapes, newShape]) + setSelectedId(newShape.id) + setTool('select') + } else if (tool === 'circle') { + const newShape = { + id: `circle-${Date.now()}`, + type: 'circle', + x: pos.x, + y: pos.y, + radius: 40, + fill: fillColor, + stroke: strokeColor, + strokeWidth: 2, + } + setShapes([...shapes, newShape]) + setSelectedId(newShape.id) + setTool('select') + } else if (tool === 'text') { + const newText = { + id: `text-${Date.now()}`, + type: 'text', + x: pos.x, + y: pos.y, + text: textContent, + fontSize: 24, + fill: fillColor, + fontFamily: 'Arial', + } + setTexts([...texts, newText]) + setSelectedId(newText.id) + setTool('select') + } + } + + // Delete selected element + const handleDelete = useCallback(() => { + if (!selectedId) return + setShapes(shapes.filter((s) => s.id !== selectedId)) + setTexts(texts.filter((t) => t.id !== selectedId)) + setSelectedId(null) + }, [selectedId, shapes, texts]) + + // Keyboard shortcuts + useEffect(() => { + const handleKeyDown = (e) => { + if (e.key === 'Delete' || e.key === 'Backspace') { + if (document.activeElement.tagName !== 'INPUT') { + handleDelete() + } + } + } + window.addEventListener('keydown', handleKeyDown) + return () => window.removeEventListener('keydown', handleKeyDown) + }, [handleDelete]) + + // Export canvas as image + const handleExport = useCallback(() => { + if (!stageRef.current) return + const uri = stageRef.current.toDataURL() + const link = document.createElement('a') + link.download = 'image-export.png' + link.href = uri + document.body.appendChild(link) + link.click() + document.body.removeChild(link) + }, []) + + // Generate image with AI + const handleGenerate = async () => { + if (!generatePrompt.trim()) return + + setIsGenerating(true) + try { + const response = await fetch(apiUrl('/api/media/generate/image'), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + prompt: generatePrompt, + width: 1024, + height: 1024, + }), + }) + + if (!response.ok) { + const data = await response.json() + throw new Error(data.detail || 'Failed to generate image') + } + + const data = await response.json() + if (data.url && onGenerate) { + onGenerate(data.url) + } + setShowGenerateDialog(false) + setGeneratePrompt('') + } catch (err) { + alert(`Generation failed: ${err.message}`) + } finally { + setIsGenerating(false) + } + } + + const imgDims = getImageDimensions() + + return ( +
+ {/* Toolbar */} +
+
+ + + + +
+ +
+ + +
+ + {tool === 'text' && ( +
+ setTextContent(e.target.value)} + placeholder="Text content" + className="text-input" + /> +
+ )} + +
+ {selectedId && ( + + )} + + +
+
+ + {/* Canvas */} +
+ {imageStatus === 'loading' && ( +
Loading image...
+ )} + {imageStatus === 'error' && ( +
Failed to load image
+ )} + + + {/* Background image */} + {image && ( + + )} + + {/* Shapes */} + {shapes.map((shape) => ( + setSelectedId(shape.id)} + onChange={(newAttrs) => { + setShapes(shapes.map((s) => (s.id === shape.id ? newAttrs : s))) + }} + /> + ))} + + {/* Text elements */} + {texts.map((text) => ( + setSelectedId(text.id)} + onChange={(newAttrs) => { + setTexts(texts.map((t) => (t.id === text.id ? newAttrs : t))) + }} + /> + ))} + + +
+ + {/* Generate Dialog */} + {showGenerateDialog && ( +
setShowGenerateDialog(false)}> +
e.stopPropagation()}> +

Generate Image with AI

+