diff --git a/.gitignore b/.gitignore index a3af2c4e..6a9ff5d5 100644 --- a/.gitignore +++ b/.gitignore @@ -82,8 +82,8 @@ yarn-error.log\* kurt-demo/ kurt-demo2/ -# Agent workflow definitions (user-specific) -workflows/ +# Agent workflow definitions (user-specific, top-level only) +/workflows/ # Reports (local demo reports) reports/ diff --git a/src/kurt/cli/main.py b/src/kurt/cli/main.py index 81c64b76..d2a525b9 100644 --- a/src/kurt/cli/main.py +++ b/src/kurt/cli/main.py @@ -43,6 +43,7 @@ def get_command(self, ctx, name): "research": ("kurt.workflows.research.cli", "research_group"), "signals": ("kurt.workflows.signals.cli", "signals_group"), "agents": ("kurt.workflows.agents.cli", "agents_group"), + "media": ("kurt.workflows.media.cli", "media_group"), "admin": ("kurt.cli.admin", "admin"), "show": ("kurt.cli.show", "show_group"), "auth": ("kurt.cli.auth", "auth"), @@ -58,8 +59,8 @@ def main(ctx): """ from kurt.config import config_file_exists - # Skip migration check for init, admin, and auth (which don't need local DB) - if ctx.invoked_subcommand in ["init", "admin", "auth"]: + # Skip migration check for init, admin, auth, and media (which don't need local DB) + if ctx.invoked_subcommand in ["init", "admin", "auth", "media"]: return # Skip migration check if running in hook mode diff --git a/src/kurt/services/__init__.py b/src/kurt/services/__init__.py new file mode 100644 index 00000000..42cdedff --- /dev/null +++ b/src/kurt/services/__init__.py @@ -0,0 +1,6 @@ +"""Kurt services - external API integrations and utilities.""" + +from kurt.services.ai_generation import AIGenerationService +from kurt.services.media_edit import MediaEditService + +__all__ = ["AIGenerationService", "MediaEditService"] diff --git a/src/kurt/services/ai_generation.py b/src/kurt/services/ai_generation.py new file mode 100644 index 00000000..29d35afa --- /dev/null +++ b/src/kurt/services/ai_generation.py @@ -0,0 +1,846 @@ +"""AI Generation Service - unified interface to image/video generation APIs. + +Supported providers: +- fal.ai: Fast inference, Flux models, video generation +- Leonardo.ai: Nano Banana, Phoenix, commercial-grade +- Replicate: Huge model library, pay-per-use +- Runway: Video generation (Gen-3, Gen-4) + +Environment variables: +- FAL_KEY: fal.ai API key +- LEONARDO_API_KEY: Leonardo.ai API key +- REPLICATE_API_TOKEN: Replicate API token +- RUNWAY_API_KEY: Runway API key +""" + +from __future__ import annotations + +import asyncio +import os +import time +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +import httpx + + +class Provider(str, Enum): + """Supported AI generation providers.""" + + FAL = "fal" + LEONARDO = "leonardo" + REPLICATE = "replicate" + RUNWAY = "runway" + + +class MediaType(str, Enum): + """Type of media to generate.""" + + IMAGE = "image" + VIDEO = "video" + + +@dataclass +class GenerationResult: + """Result from an AI generation request.""" + + success: bool + url: str | None = None + urls: list[str] = field(default_factory=list) + job_id: str | None = None + provider: str | None = None + model: str | None = None + error: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + @property + def primary_url(self) -> str | None: + """Get the primary output URL.""" + return self.url or (self.urls[0] if self.urls else None) + + +class AIGenerationService: + """Unified interface to AI generation APIs. + + Example: + service = AIGenerationService() + + # Generate image + result = await service.generate_image( + prompt="A futuristic city at sunset", + model="flux-dev", + ) + print(result.url) + + # Generate video from image + result = await service.generate_video( + image_url=result.url, + prompt="Slow zoom in with particles floating", + duration=5, + ) + print(result.url) + """ + + # Default models for each provider + DEFAULT_MODELS = { + Provider.FAL: { + MediaType.IMAGE: "flux/dev", + MediaType.VIDEO: "ltx-video/image-to-video", + }, + Provider.LEONARDO: { + MediaType.IMAGE: "phoenix", + }, + Provider.REPLICATE: { + MediaType.IMAGE: "stability-ai/sdxl", + MediaType.VIDEO: "stability-ai/stable-video-diffusion", + }, + Provider.RUNWAY: { + MediaType.VIDEO: "gen3a_turbo", + }, + } + + def __init__( + self, + fal_key: str | None = None, + leonardo_key: str | None = None, + replicate_token: str | None = None, + runway_key: str | None = None, + default_image_provider: Provider = Provider.FAL, + default_video_provider: Provider = Provider.FAL, + ): + """Initialize the AI generation service. + + Args: + fal_key: fal.ai API key (or FAL_KEY env var) + leonardo_key: Leonardo.ai API key (or LEONARDO_API_KEY env var) + replicate_token: Replicate token (or REPLICATE_API_TOKEN env var) + runway_key: Runway API key (or RUNWAY_API_KEY env var) + default_image_provider: Default provider for image generation + default_video_provider: Default provider for video generation + """ + self.fal_key = fal_key or os.environ.get("FAL_KEY") + self.leonardo_key = leonardo_key or os.environ.get("LEONARDO_API_KEY") + self.replicate_token = replicate_token or os.environ.get("REPLICATE_API_TOKEN") + self.runway_key = runway_key or os.environ.get("RUNWAY_API_KEY") + + self.default_image_provider = default_image_provider + self.default_video_provider = default_video_provider + + self._client: httpx.AsyncClient | None = None + + @property + def client(self) -> httpx.AsyncClient: + """Get or create the HTTP client.""" + if self._client is None: + self._client = httpx.AsyncClient(timeout=300.0) + return self._client + + async def close(self) -> None: + """Close the HTTP client.""" + if self._client is not None: + await self._client.aclose() + self._client = None + + def _get_provider_key(self, provider: Provider) -> str | None: + """Get the API key for a provider.""" + return { + Provider.FAL: self.fal_key, + Provider.LEONARDO: self.leonardo_key, + Provider.REPLICATE: self.replicate_token, + Provider.RUNWAY: self.runway_key, + }.get(provider) + + async def generate_image( + self, + prompt: str, + model: str | None = None, + provider: Provider | str | None = None, + width: int = 1024, + height: int = 1024, + num_images: int = 1, + negative_prompt: str | None = None, + **kwargs: Any, + ) -> GenerationResult: + """Generate an image using AI. + + Args: + prompt: Text description of the image to generate + model: Model identifier (provider-specific) + provider: Provider to use (fal, leonardo, replicate) + width: Image width in pixels + height: Image height in pixels + num_images: Number of images to generate + negative_prompt: Things to avoid in the image + **kwargs: Additional provider-specific parameters + + Returns: + GenerationResult with URL(s) of generated images + """ + if provider is None: + provider = self.default_image_provider + elif isinstance(provider, str): + provider = Provider(provider) + + if model is None: + model = self.DEFAULT_MODELS.get(provider, {}).get(MediaType.IMAGE) + + if provider == Provider.FAL: + return await self._fal_generate_image( + prompt=prompt, + model=model, + width=width, + height=height, + num_images=num_images, + negative_prompt=negative_prompt, + **kwargs, + ) + elif provider == Provider.LEONARDO: + return await self._leonardo_generate_image( + prompt=prompt, + model=model, + width=width, + height=height, + num_images=num_images, + negative_prompt=negative_prompt, + **kwargs, + ) + elif provider == Provider.REPLICATE: + return await self._replicate_generate_image( + prompt=prompt, + model=model, + width=width, + height=height, + num_images=num_images, + negative_prompt=negative_prompt, + **kwargs, + ) + else: + return GenerationResult( + success=False, + error=f"Provider {provider} does not support image generation", + ) + + async def generate_video( + self, + prompt: str, + image_url: str | None = None, + model: str | None = None, + provider: Provider | str | None = None, + duration: int = 5, + **kwargs: Any, + ) -> GenerationResult: + """Generate a video using AI. + + Args: + prompt: Text description of the video motion/content + image_url: Source image URL (for image-to-video) + model: Model identifier (provider-specific) + provider: Provider to use (fal, runway, replicate) + duration: Video duration in seconds + **kwargs: Additional provider-specific parameters + + Returns: + GenerationResult with URL of generated video + """ + if provider is None: + provider = self.default_video_provider + elif isinstance(provider, str): + provider = Provider(provider) + + if model is None: + model = self.DEFAULT_MODELS.get(provider, {}).get(MediaType.VIDEO) + + if provider == Provider.FAL: + return await self._fal_generate_video( + prompt=prompt, + image_url=image_url, + model=model, + duration=duration, + **kwargs, + ) + elif provider == Provider.RUNWAY: + return await self._runway_generate_video( + prompt=prompt, + image_url=image_url, + model=model, + duration=duration, + **kwargs, + ) + elif provider == Provider.REPLICATE: + return await self._replicate_generate_video( + prompt=prompt, + image_url=image_url, + model=model, + **kwargs, + ) + else: + return GenerationResult( + success=False, + error=f"Provider {provider} does not support video generation", + ) + + # ------------------------------------------------------------------------- + # fal.ai Implementation + # ------------------------------------------------------------------------- + + async def _fal_generate_image( + self, + prompt: str, + model: str, + width: int, + height: int, + num_images: int, + negative_prompt: str | None, + **kwargs: Any, + ) -> GenerationResult: + """Generate image via fal.ai.""" + if not self.fal_key: + return GenerationResult( + success=False, + error="FAL_KEY not configured", + ) + + # fal.ai uses model paths like "fal-ai/flux/dev" + if not model.startswith("fal-ai/"): + model = f"fal-ai/{model}" + + url = f"https://fal.run/{model}" + + payload: dict[str, Any] = { + "prompt": prompt, + "image_size": {"width": width, "height": height}, + "num_images": num_images, + } + if negative_prompt: + payload["negative_prompt"] = negative_prompt + payload.update(kwargs) + + try: + response = await self.client.post( + url, + headers={ + "Authorization": f"Key {self.fal_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + images = data.get("images", []) + urls = [img.get("url") for img in images if img.get("url")] + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + provider="fal", + model=model, + metadata={"seed": data.get("seed")}, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"fal.ai API error: {e.response.status_code} - {e.response.text}", + provider="fal", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"fal.ai request failed: {e}", + provider="fal", + ) + + async def _fal_generate_video( + self, + prompt: str, + image_url: str | None, + model: str, + duration: int, + **kwargs: Any, + ) -> GenerationResult: + """Generate video via fal.ai.""" + if not self.fal_key: + return GenerationResult( + success=False, + error="FAL_KEY not configured", + ) + + if not model.startswith("fal-ai/"): + model = f"fal-ai/{model}" + + url = f"https://fal.run/{model}" + + payload: dict[str, Any] = {"prompt": prompt} + if image_url: + payload["image_url"] = image_url + if "num_frames" not in kwargs: + # Approximate frames from duration (assuming ~24fps output) + payload["num_frames"] = min(duration * 24, 257) + payload.update(kwargs) + + try: + response = await self.client.post( + url, + headers={ + "Authorization": f"Key {self.fal_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + video_url = data.get("video", {}).get("url") + + return GenerationResult( + success=True, + url=video_url, + provider="fal", + model=model, + metadata=data, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"fal.ai API error: {e.response.status_code} - {e.response.text}", + provider="fal", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"fal.ai request failed: {e}", + provider="fal", + ) + + # ------------------------------------------------------------------------- + # Leonardo.ai Implementation + # ------------------------------------------------------------------------- + + async def _leonardo_generate_image( + self, + prompt: str, + model: str, + width: int, + height: int, + num_images: int, + negative_prompt: str | None, + **kwargs: Any, + ) -> GenerationResult: + """Generate image via Leonardo.ai.""" + if not self.leonardo_key: + return GenerationResult( + success=False, + error="LEONARDO_API_KEY not configured", + ) + + base_url = "https://cloud.leonardo.ai/api/rest/v1" + + # Model name to ID mapping (common models) + model_ids = { + "phoenix": "6b645e3a-d64f-4341-a6d8-7a3690fbf042", + "nano-banana": "aa77f04e-3eec-4034-9c07-d0f619684628", + "nano-banana-pro": "faf3e8d3-6d19-4e98-8c3a-5c17e9f67a28", + "sdxl": "1e60896f-3c26-4296-8ecc-53e2afecc132", + } + + model_id = model_ids.get(model, model) + + payload: dict[str, Any] = { + "prompt": prompt, + "modelId": model_id, + "width": width, + "height": height, + "num_images": num_images, + } + if negative_prompt: + payload["negative_prompt"] = negative_prompt + payload.update(kwargs) + + try: + # Start generation + response = await self.client.post( + f"{base_url}/generations", + headers={ + "Authorization": f"Bearer {self.leonardo_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + generation_id = data.get("sdGenerationJob", {}).get("generationId") + if not generation_id: + return GenerationResult( + success=False, + error="No generation ID returned", + provider="leonardo", + ) + + # Poll for completion + urls = await self._leonardo_poll_generation(generation_id) + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + job_id=generation_id, + provider="leonardo", + model=model, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"Leonardo API error: {e.response.status_code} - {e.response.text}", + provider="leonardo", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Leonardo request failed: {e}", + provider="leonardo", + ) + + async def _leonardo_poll_generation( + self, + generation_id: str, + max_wait: int = 120, + poll_interval: float = 2.0, + ) -> list[str]: + """Poll Leonardo.ai for generation completion.""" + base_url = "https://cloud.leonardo.ai/api/rest/v1" + start_time = time.time() + + while time.time() - start_time < max_wait: + response = await self.client.get( + f"{base_url}/generations/{generation_id}", + headers={"Authorization": f"Bearer {self.leonardo_key}"}, + ) + response.raise_for_status() + data = response.json() + + generation = data.get("generations_by_pk", {}) + status = generation.get("status") + + if status == "COMPLETE": + images = generation.get("generated_images", []) + return [img.get("url") for img in images if img.get("url")] + elif status == "FAILED": + raise Exception("Generation failed") + + await asyncio.sleep(poll_interval) + + raise Exception("Generation timed out") + + # ------------------------------------------------------------------------- + # Replicate Implementation + # ------------------------------------------------------------------------- + + async def _replicate_generate_image( + self, + prompt: str, + model: str, + width: int, + height: int, + num_images: int, + negative_prompt: str | None, + **kwargs: Any, + ) -> GenerationResult: + """Generate image via Replicate.""" + if not self.replicate_token: + return GenerationResult( + success=False, + error="REPLICATE_API_TOKEN not configured", + ) + + base_url = "https://api.replicate.com/v1" + + # Build input based on model + input_data: dict[str, Any] = { + "prompt": prompt, + "width": width, + "height": height, + "num_outputs": num_images, + } + if negative_prompt: + input_data["negative_prompt"] = negative_prompt + input_data.update(kwargs) + + try: + # Start prediction + response = await self.client.post( + f"{base_url}/predictions", + headers={ + "Authorization": f"Token {self.replicate_token}", + "Content-Type": "application/json", + }, + json={"version": model, "input": input_data}, + ) + response.raise_for_status() + data = response.json() + + prediction_id = data.get("id") + if not prediction_id: + return GenerationResult( + success=False, + error="No prediction ID returned", + provider="replicate", + ) + + # Poll for completion + urls = await self._replicate_poll_prediction(prediction_id) + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + job_id=prediction_id, + provider="replicate", + model=model, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"Replicate API error: {e.response.status_code} - {e.response.text}", + provider="replicate", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Replicate request failed: {e}", + provider="replicate", + ) + + async def _replicate_poll_prediction( + self, + prediction_id: str, + max_wait: int = 300, + poll_interval: float = 2.0, + ) -> list[str]: + """Poll Replicate for prediction completion.""" + base_url = "https://api.replicate.com/v1" + start_time = time.time() + + while time.time() - start_time < max_wait: + response = await self.client.get( + f"{base_url}/predictions/{prediction_id}", + headers={"Authorization": f"Token {self.replicate_token}"}, + ) + response.raise_for_status() + data = response.json() + + status = data.get("status") + + if status == "succeeded": + output = data.get("output", []) + if isinstance(output, list): + return output + return [output] if output else [] + elif status in ("failed", "canceled"): + raise Exception(f"Prediction {status}: {data.get('error')}") + + await asyncio.sleep(poll_interval) + + raise Exception("Prediction timed out") + + async def _replicate_generate_video( + self, + prompt: str, + image_url: str | None, + model: str, + **kwargs: Any, + ) -> GenerationResult: + """Generate video via Replicate.""" + if not self.replicate_token: + return GenerationResult( + success=False, + error="REPLICATE_API_TOKEN not configured", + ) + + base_url = "https://api.replicate.com/v1" + + input_data: dict[str, Any] = {} + if image_url: + input_data["image"] = image_url + if prompt: + input_data["prompt"] = prompt + input_data.update(kwargs) + + try: + response = await self.client.post( + f"{base_url}/predictions", + headers={ + "Authorization": f"Token {self.replicate_token}", + "Content-Type": "application/json", + }, + json={"version": model, "input": input_data}, + ) + response.raise_for_status() + data = response.json() + + prediction_id = data.get("id") + if not prediction_id: + return GenerationResult( + success=False, + error="No prediction ID returned", + provider="replicate", + ) + + urls = await self._replicate_poll_prediction(prediction_id) + + return GenerationResult( + success=True, + url=urls[0] if urls else None, + urls=urls, + job_id=prediction_id, + provider="replicate", + model=model, + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Replicate request failed: {e}", + provider="replicate", + ) + + # ------------------------------------------------------------------------- + # Runway Implementation + # ------------------------------------------------------------------------- + + async def _runway_generate_video( + self, + prompt: str, + image_url: str | None, + model: str, + duration: int, + **kwargs: Any, + ) -> GenerationResult: + """Generate video via Runway.""" + if not self.runway_key: + return GenerationResult( + success=False, + error="RUNWAY_API_KEY not configured", + ) + + base_url = "https://api.dev.runwayml.com/v1" + + payload: dict[str, Any] = { + "model": model, + "promptText": prompt, + "duration": duration, + } + if image_url: + payload["promptImage"] = image_url + payload.update(kwargs) + + try: + # Start generation + response = await self.client.post( + f"{base_url}/image_to_video" if image_url else f"{base_url}/text_to_video", + headers={ + "Authorization": f"Bearer {self.runway_key}", + "Content-Type": "application/json", + "X-Runway-Version": "2024-11-06", + }, + json=payload, + ) + response.raise_for_status() + data = response.json() + + task_id = data.get("id") + if not task_id: + return GenerationResult( + success=False, + error="No task ID returned", + provider="runway", + ) + + # Poll for completion + video_url = await self._runway_poll_task(task_id) + + return GenerationResult( + success=True, + url=video_url, + job_id=task_id, + provider="runway", + model=model, + ) + except httpx.HTTPStatusError as e: + return GenerationResult( + success=False, + error=f"Runway API error: {e.response.status_code} - {e.response.text}", + provider="runway", + ) + except Exception as e: + return GenerationResult( + success=False, + error=f"Runway request failed: {e}", + provider="runway", + ) + + async def _runway_poll_task( + self, + task_id: str, + max_wait: int = 600, + poll_interval: float = 5.0, + ) -> str: + """Poll Runway for task completion.""" + base_url = "https://api.dev.runwayml.com/v1" + start_time = time.time() + + while time.time() - start_time < max_wait: + response = await self.client.get( + f"{base_url}/tasks/{task_id}", + headers={ + "Authorization": f"Bearer {self.runway_key}", + "X-Runway-Version": "2024-11-06", + }, + ) + response.raise_for_status() + data = response.json() + + status = data.get("status") + + if status == "SUCCEEDED": + output = data.get("output", []) + if output: + return output[0] + raise Exception("No output URL in completed task") + elif status == "FAILED": + raise Exception(f"Task failed: {data.get('failure')}") + + await asyncio.sleep(poll_interval) + + raise Exception("Task timed out") + + +# Convenience function for synchronous usage +def generate_image_sync( + prompt: str, + model: str | None = None, + provider: str | None = None, + **kwargs: Any, +) -> GenerationResult: + """Synchronous wrapper for image generation.""" + service = AIGenerationService() + return asyncio.run( + service.generate_image(prompt=prompt, model=model, provider=provider, **kwargs) + ) + + +def generate_video_sync( + prompt: str, + image_url: str | None = None, + model: str | None = None, + provider: str | None = None, + **kwargs: Any, +) -> GenerationResult: + """Synchronous wrapper for video generation.""" + service = AIGenerationService() + return asyncio.run( + service.generate_video( + prompt=prompt, image_url=image_url, model=model, provider=provider, **kwargs + ) + ) diff --git a/src/kurt/services/media_edit.py b/src/kurt/services/media_edit.py new file mode 100644 index 00000000..1103d00a --- /dev/null +++ b/src/kurt/services/media_edit.py @@ -0,0 +1,1012 @@ +"""Media Edit Service - wrapper around FFmpeg and ImageMagick. + +Provides a unified interface for common media editing operations: +- Image: resize, crop, rotate, format conversion, filters +- Video: trim, resize, extract audio, add audio, format conversion +- Audio: trim, convert, extract from video + +Requirements: +- FFmpeg: video/audio processing (install via apt/brew/choco) +- ImageMagick: image processing (install via apt/brew/choco) + +Environment variables: +- FFMPEG_PATH: Path to ffmpeg binary (default: "ffmpeg") +- MAGICK_PATH: Path to magick binary (default: "magick") +""" + +from __future__ import annotations + +import asyncio +import os +import shutil +import tempfile +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path +from typing import Any + + +class MediaFormat(str, Enum): + """Supported output formats.""" + + # Image formats + JPEG = "jpeg" + JPG = "jpg" + PNG = "png" + WEBP = "webp" + GIF = "gif" + AVIF = "avif" + TIFF = "tiff" + + # Video formats + MP4 = "mp4" + WEBM = "webm" + MOV = "mov" + AVI = "avi" + MKV = "mkv" + + # Audio formats + MP3 = "mp3" + WAV = "wav" + AAC = "aac" + OGG = "ogg" + FLAC = "flac" + + +@dataclass +class EditResult: + """Result from a media editing operation.""" + + success: bool + output_path: str | None = None + error: str | None = None + command: str | None = None + stdout: str | None = None + stderr: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class MediaInfo: + """Information about a media file.""" + + path: str + format: str | None = None + width: int | None = None + height: int | None = None + duration: float | None = None + bitrate: int | None = None + codec: str | None = None + audio_codec: str | None = None + fps: float | None = None + size_bytes: int | None = None + + +class MediaEditService: + """Service for editing media files using FFmpeg and ImageMagick. + + Example: + service = MediaEditService() + + # Resize image + result = await service.resize_image( + "input.jpg", + output_path="output.jpg", + width=800, + height=600, + ) + + # Trim video + result = await service.trim_video( + "input.mp4", + output_path="output.mp4", + start="00:00:30", + end="00:01:00", + ) + + # Convert format + result = await service.convert( + "input.png", + output_path="output.webp", + format=MediaFormat.WEBP, + ) + """ + + def __init__( + self, + ffmpeg_path: str | None = None, + magick_path: str | None = None, + ): + """Initialize the media edit service. + + Args: + ffmpeg_path: Path to ffmpeg binary (or FFMPEG_PATH env var) + magick_path: Path to magick binary (or MAGICK_PATH env var) + """ + self.ffmpeg_path = ffmpeg_path or os.environ.get("FFMPEG_PATH", "ffmpeg") + self.magick_path = magick_path or os.environ.get("MAGICK_PATH", "magick") + + def _check_ffmpeg(self) -> bool: + """Check if FFmpeg is available.""" + return shutil.which(self.ffmpeg_path) is not None + + def _check_imagemagick(self) -> bool: + """Check if ImageMagick is available.""" + return shutil.which(self.magick_path) is not None + + async def _run_command( + self, + cmd: list[str], + check: bool = True, + ) -> tuple[int, str, str]: + """Run a command asynchronously. + + Returns: + Tuple of (return_code, stdout, stderr) + """ + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await process.communicate() + return ( + process.returncode or 0, + stdout.decode("utf-8", errors="replace"), + stderr.decode("utf-8", errors="replace"), + ) + + def _ensure_output_path( + self, + input_path: str, + output_path: str | None, + suffix: str | None = None, + ) -> str: + """Generate output path if not provided.""" + if output_path: + return output_path + + input_p = Path(input_path) + if suffix: + return str(input_p.parent / f"{input_p.stem}_edited{suffix}") + return str(input_p.parent / f"{input_p.stem}_edited{input_p.suffix}") + + # ------------------------------------------------------------------------- + # Image Operations (ImageMagick) + # ------------------------------------------------------------------------- + + async def resize_image( + self, + input_path: str, + output_path: str | None = None, + width: int | None = None, + height: int | None = None, + scale: float | None = None, + maintain_aspect: bool = True, + quality: int = 85, + ) -> EditResult: + """Resize an image. + + Args: + input_path: Path to input image + output_path: Path for output (auto-generated if not provided) + width: Target width in pixels + height: Target height in pixels + scale: Scale factor (e.g., 0.5 for half size) + maintain_aspect: Keep aspect ratio (default True) + quality: Output quality 1-100 (for JPEG/WebP) + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult( + success=False, + error="ImageMagick not found. Install with: apt install imagemagick", + ) + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [self.magick_path, input_path] + + if scale: + cmd.extend(["-resize", f"{int(scale * 100)}%"]) + elif width and height: + resize_op = f"{width}x{height}" if maintain_aspect else f"{width}x{height}!" + cmd.extend(["-resize", resize_op]) + elif width: + cmd.extend(["-resize", f"{width}x"]) + elif height: + cmd.extend(["-resize", f"x{height}"]) + + cmd.extend(["-quality", str(quality)]) + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + stdout=stdout, + stderr=stderr, + ) + + async def crop_image( + self, + input_path: str, + output_path: str | None = None, + width: int | None = None, + height: int | None = None, + x: int = 0, + y: int = 0, + gravity: str | None = None, + ) -> EditResult: + """Crop an image. + + Args: + input_path: Path to input image + output_path: Path for output + width: Crop width + height: Crop height + x: X offset from left (or from gravity point) + y: Y offset from top (or from gravity point) + gravity: Gravity point (Center, North, South, East, West, etc.) + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult( + success=False, + error="ImageMagick not found", + ) + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [self.magick_path, input_path] + + if gravity: + cmd.extend(["-gravity", gravity]) + + crop_spec = f"{width}x{height}+{x}+{y}" + cmd.extend(["-crop", crop_spec, "+repage"]) + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def rotate_image( + self, + input_path: str, + output_path: str | None = None, + degrees: float = 90, + background: str = "white", + ) -> EditResult: + """Rotate an image. + + Args: + input_path: Path to input image + output_path: Path for output + degrees: Rotation angle (positive = clockwise) + background: Background color for corners + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [ + self.magick_path, + input_path, + "-background", + background, + "-rotate", + str(degrees), + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def apply_filter( + self, + input_path: str, + output_path: str | None = None, + filter_name: str = "grayscale", + **kwargs: Any, + ) -> EditResult: + """Apply a filter to an image. + + Args: + input_path: Path to input image + output_path: Path for output + filter_name: Filter to apply (grayscale, blur, sharpen, etc.) + **kwargs: Filter-specific parameters + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(input_path, output_path) + cmd = [self.magick_path, input_path] + + # Map filter names to ImageMagick operations + filter_ops = { + "grayscale": ["-colorspace", "Gray"], + "sepia": ["-sepia-tone", str(kwargs.get("intensity", 80)) + "%"], + "blur": ["-blur", f"0x{kwargs.get('radius', 3)}"], + "sharpen": ["-sharpen", f"0x{kwargs.get('radius', 1)}"], + "negate": ["-negate"], + "normalize": ["-normalize"], + "equalize": ["-equalize"], + "brightness": [ + "-modulate", + f"{kwargs.get('brightness', 100)},{kwargs.get('saturation', 100)}", + ], + "contrast": ["-contrast-stretch", f"{kwargs.get('black', 0)}x{kwargs.get('white', 0)}%"], + } + + if filter_name in filter_ops: + cmd.extend(filter_ops[filter_name]) + else: + return EditResult( + success=False, + error=f"Unknown filter: {filter_name}. Available: {list(filter_ops.keys())}", + ) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def composite_images( + self, + background_path: str, + overlay_path: str, + output_path: str | None = None, + x: int = 0, + y: int = 0, + gravity: str = "NorthWest", + opacity: float = 1.0, + ) -> EditResult: + """Composite two images (overlay one on another). + + Args: + background_path: Path to background image + overlay_path: Path to overlay image + output_path: Path for output + x: X offset + y: Y offset + gravity: Placement gravity + opacity: Overlay opacity (0.0-1.0) + + Returns: + EditResult with output path + """ + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(background_path, output_path, "_composite.png") + + # Build composite command + cmd = [ + self.magick_path, + background_path, + "(", + overlay_path, + "-alpha", + "set", + "-channel", + "A", + "-evaluate", + "multiply", + str(opacity), + "+channel", + ")", + "-gravity", + gravity, + "-geometry", + f"+{x}+{y}", + "-composite", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + # ------------------------------------------------------------------------- + # Video Operations (FFmpeg) + # ------------------------------------------------------------------------- + + async def trim_video( + self, + input_path: str, + output_path: str | None = None, + start: str | float | None = None, + end: str | float | None = None, + duration: float | None = None, + copy_codec: bool = True, + ) -> EditResult: + """Trim a video to a specific segment. + + Args: + input_path: Path to input video + output_path: Path for output + start: Start time (e.g., "00:00:30" or 30.0) + end: End time (e.g., "00:01:00" or 60.0) + duration: Duration in seconds (alternative to end) + copy_codec: Copy codecs without re-encoding (fast but less precise) + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult( + success=False, + error="FFmpeg not found. Install with: apt install ffmpeg", + ) + + output_path = self._ensure_output_path(input_path, output_path, "_trimmed.mp4") + cmd = [self.ffmpeg_path, "-y"] + + if start: + cmd.extend(["-ss", str(start)]) + + cmd.extend(["-i", input_path]) + + if end: + cmd.extend(["-to", str(end)]) + elif duration: + cmd.extend(["-t", str(duration)]) + + if copy_codec: + cmd.extend(["-c", "copy"]) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + stderr=stderr, + ) + + async def resize_video( + self, + input_path: str, + output_path: str | None = None, + width: int | None = None, + height: int | None = None, + scale: str | None = None, + preset: str | None = None, + ) -> EditResult: + """Resize a video. + + Args: + input_path: Path to input video + output_path: Path for output + width: Target width (use -1 for auto based on height) + height: Target height (use -1 for auto based on width) + scale: FFmpeg scale filter (e.g., "1280:720", "iw/2:ih/2") + preset: Preset resolution ("480p", "720p", "1080p", "4k") + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, "_resized.mp4") + + # Handle presets + presets = { + "480p": "854:480", + "720p": "1280:720", + "1080p": "1920:1080", + "4k": "3840:2160", + } + + if preset: + scale = presets.get(preset, scale) + elif width and height: + scale = f"{width}:{height}" + elif width: + scale = f"{width}:-2" + elif height: + scale = f"-2:{height}" + + if not scale: + return EditResult(success=False, error="Must specify width, height, scale, or preset") + + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + input_path, + "-vf", + f"scale={scale}", + "-c:a", + "copy", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def extract_audio( + self, + input_path: str, + output_path: str | None = None, + format: str = "mp3", + bitrate: str = "192k", + ) -> EditResult: + """Extract audio track from video. + + Args: + input_path: Path to input video + output_path: Path for output audio + format: Output format (mp3, wav, aac, etc.) + bitrate: Audio bitrate + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + input_path, + "-vn", + "-acodec", + "libmp3lame" if format == "mp3" else "copy", + "-ab", + bitrate, + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def add_audio( + self, + video_path: str, + audio_path: str, + output_path: str | None = None, + replace: bool = True, + volume: float = 1.0, + ) -> EditResult: + """Add or replace audio in a video. + + Args: + video_path: Path to input video + audio_path: Path to audio file + output_path: Path for output + replace: Replace existing audio (True) or mix (False) + volume: Audio volume multiplier + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(video_path, output_path, "_audio.mp4") + + if replace: + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + video_path, + "-i", + audio_path, + "-c:v", + "copy", + "-map", + "0:v:0", + "-map", + "1:a:0", + "-shortest", + output_path, + ] + else: + # Mix audio + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + video_path, + "-i", + audio_path, + "-c:v", + "copy", + "-filter_complex", + f"[0:a][1:a]amerge=inputs=2,volume={volume}[a]", + "-map", + "0:v", + "-map", + "[a]", + "-shortest", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def extract_frames( + self, + input_path: str, + output_dir: str | None = None, + fps: float = 1.0, + format: str = "jpg", + quality: int = 2, + ) -> EditResult: + """Extract frames from video as images. + + Args: + input_path: Path to input video + output_dir: Directory for output frames + fps: Frames per second to extract + format: Output image format + quality: JPEG quality (2-31, lower is better) + + Returns: + EditResult with output directory + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + if output_dir is None: + output_dir = tempfile.mkdtemp(prefix="frames_") + else: + os.makedirs(output_dir, exist_ok=True) + + output_pattern = os.path.join(output_dir, f"frame_%04d.{format}") + + cmd = [ + self.ffmpeg_path, + "-y", + "-i", + input_path, + "-vf", + f"fps={fps}", + "-q:v", + str(quality), + output_pattern, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_dir if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + metadata={"pattern": output_pattern}, + ) + + async def create_thumbnail( + self, + input_path: str, + output_path: str | None = None, + time: str | float = "00:00:01", + width: int = 320, + height: int | None = None, + ) -> EditResult: + """Create a thumbnail from video. + + Args: + input_path: Path to input video + output_path: Path for output image + time: Time position to capture + width: Thumbnail width + height: Thumbnail height (auto if not specified) + + Returns: + EditResult with output path + """ + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, "_thumb.jpg") + + scale = f"{width}:-1" if height is None else f"{width}:{height}" + + cmd = [ + self.ffmpeg_path, + "-y", + "-ss", + str(time), + "-i", + input_path, + "-vframes", + "1", + "-vf", + f"scale={scale}", + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + # ------------------------------------------------------------------------- + # Format Conversion + # ------------------------------------------------------------------------- + + async def convert( + self, + input_path: str, + output_path: str | None = None, + format: MediaFormat | str | None = None, + quality: int = 85, + **kwargs: Any, + ) -> EditResult: + """Convert media to different format. + + Args: + input_path: Path to input file + output_path: Path for output (format inferred from extension) + format: Target format + quality: Quality setting (for lossy formats) + **kwargs: Additional format-specific options + + Returns: + EditResult with output path + """ + if format is None and output_path: + format = Path(output_path).suffix.lstrip(".") + + if format is None: + return EditResult(success=False, error="Must specify format or output_path with extension") + + if isinstance(format, MediaFormat): + format = format.value + + # Determine if it's an image or video format + image_formats = {"jpeg", "jpg", "png", "webp", "gif", "avif", "tiff"} + video_formats = {"mp4", "webm", "mov", "avi", "mkv"} + audio_formats = {"mp3", "wav", "aac", "ogg", "flac"} + + if format in image_formats: + return await self._convert_image(input_path, output_path, format, quality) + elif format in video_formats: + return await self._convert_video(input_path, output_path, format, **kwargs) + elif format in audio_formats: + return await self._convert_audio(input_path, output_path, format, **kwargs) + else: + return EditResult(success=False, error=f"Unsupported format: {format}") + + async def _convert_image( + self, + input_path: str, + output_path: str | None, + format: str, + quality: int, + ) -> EditResult: + """Convert image format using ImageMagick.""" + if not self._check_imagemagick(): + return EditResult(success=False, error="ImageMagick not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [ + self.magick_path, + input_path, + "-quality", + str(quality), + output_path, + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def _convert_video( + self, + input_path: str, + output_path: str | None, + format: str, + **kwargs: Any, + ) -> EditResult: + """Convert video format using FFmpeg.""" + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [self.ffmpeg_path, "-y", "-i", input_path] + + # Add codec options based on format + if format == "webm": + cmd.extend(["-c:v", "libvpx-vp9", "-c:a", "libopus"]) + elif format == "mp4": + cmd.extend(["-c:v", "libx264", "-c:a", "aac"]) + + # Add any additional options + crf = kwargs.get("crf", 23) + cmd.extend(["-crf", str(crf)]) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + async def _convert_audio( + self, + input_path: str, + output_path: str | None, + format: str, + **kwargs: Any, + ) -> EditResult: + """Convert audio format using FFmpeg.""" + if not self._check_ffmpeg(): + return EditResult(success=False, error="FFmpeg not found") + + output_path = self._ensure_output_path(input_path, output_path, f".{format}") + + cmd = [self.ffmpeg_path, "-y", "-i", input_path] + + bitrate = kwargs.get("bitrate", "192k") + cmd.extend(["-ab", bitrate]) + + cmd.append(output_path) + + returncode, stdout, stderr = await self._run_command(cmd) + + return EditResult( + success=returncode == 0, + output_path=output_path if returncode == 0 else None, + error=stderr if returncode != 0 else None, + command=" ".join(cmd), + ) + + # ------------------------------------------------------------------------- + # Media Information + # ------------------------------------------------------------------------- + + async def get_info(self, input_path: str) -> MediaInfo: + """Get information about a media file. + + Args: + input_path: Path to media file + + Returns: + MediaInfo with file details + """ + if not self._check_ffmpeg(): + # Fallback to basic info + path = Path(input_path) + return MediaInfo( + path=input_path, + format=path.suffix.lstrip("."), + size_bytes=path.stat().st_size if path.exists() else None, + ) + + cmd = [ + self.ffmpeg_path, + "-i", + input_path, + "-hide_banner", + ] + + returncode, stdout, stderr = await self._run_command(cmd) + + # FFmpeg outputs info to stderr + info = MediaInfo(path=input_path) + + # Parse format + path = Path(input_path) + info.format = path.suffix.lstrip(".") + if path.exists(): + info.size_bytes = path.stat().st_size + + # Parse dimensions (from "Stream #0:0: Video: ... 1920x1080") + import re + + dimension_match = re.search(r"(\d{2,5})x(\d{2,5})", stderr) + if dimension_match: + info.width = int(dimension_match.group(1)) + info.height = int(dimension_match.group(2)) + + # Parse duration + duration_match = re.search(r"Duration: (\d+):(\d+):(\d+)\.(\d+)", stderr) + if duration_match: + h, m, s, ms = duration_match.groups() + info.duration = int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 100 + + # Parse FPS + fps_match = re.search(r"(\d+(?:\.\d+)?)\s*fps", stderr) + if fps_match: + info.fps = float(fps_match.group(1)) + + return info + + +# Convenience functions for synchronous usage +def resize_image_sync(input_path: str, **kwargs: Any) -> EditResult: + """Synchronous wrapper for image resize.""" + service = MediaEditService() + return asyncio.run(service.resize_image(input_path, **kwargs)) + + +def trim_video_sync(input_path: str, **kwargs: Any) -> EditResult: + """Synchronous wrapper for video trim.""" + service = MediaEditService() + return asyncio.run(service.trim_video(input_path, **kwargs)) + + +def convert_sync(input_path: str, **kwargs: Any) -> EditResult: + """Synchronous wrapper for format conversion.""" + service = MediaEditService() + return asyncio.run(service.convert(input_path, **kwargs)) diff --git a/src/kurt/web/api/server.py b/src/kurt/web/api/server.py index 95c8b92d..ba3ee711 100644 --- a/src/kurt/web/api/server.py +++ b/src/kurt/web/api/server.py @@ -200,6 +200,104 @@ def api_delete_file(path: str = Query(...)): raise HTTPException(status_code=500, detail=str(e)) +# MIME type mapping for common media files +MEDIA_MIME_TYPES = { + # Images + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".gif": "image/gif", + ".webp": "image/webp", + ".avif": "image/avif", + ".svg": "image/svg+xml", + ".bmp": "image/bmp", + ".tiff": "image/tiff", + ".tif": "image/tiff", + ".ico": "image/x-icon", + # Videos + ".mp4": "video/mp4", + ".webm": "video/webm", + ".mov": "video/quicktime", + ".avi": "video/x-msvideo", + ".mkv": "video/x-matroska", + ".m4v": "video/x-m4v", + ".ogv": "video/ogg", + # Audio + ".mp3": "audio/mpeg", + ".wav": "audio/wav", + ".ogg": "audio/ogg", + ".m4a": "audio/mp4", + ".flac": "audio/flac", + ".aac": "audio/aac", +} + + +@app.get("/api/file/raw") +def api_get_file_raw(path: str = Query(...)): + """Serve raw binary files (images, videos, audio) directly.""" + try: + # Validate path to prevent directory traversal + resolved = (Path.cwd() / path).resolve() + if not str(resolved).startswith(str(Path.cwd())): + raise HTTPException(status_code=400, detail="Invalid path") + + if not resolved.exists(): + raise HTTPException(status_code=404, detail="File not found") + + if not resolved.is_file(): + raise HTTPException(status_code=400, detail="Path is not a file") + + # Determine MIME type + ext = resolved.suffix.lower() + media_type = MEDIA_MIME_TYPES.get(ext, "application/octet-stream") + + return FileResponse( + path=resolved, + media_type=media_type, + filename=resolved.name, + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +class BinaryFilePayload(BaseModel): + content_base64: str # Base64 encoded binary content + filename: str | None = None + + +@app.put("/api/file/raw") +def api_put_file_raw(path: str = Query(...), payload: BinaryFilePayload = None): + """Save raw binary files (for image/video editor exports).""" + import base64 + + try: + if payload is None: + raise HTTPException(status_code=400, detail="No payload provided") + + # Validate path to prevent directory traversal + resolved = (Path.cwd() / path).resolve() + if not str(resolved).startswith(str(Path.cwd())): + raise HTTPException(status_code=400, detail="Invalid path") + + # Ensure parent directory exists + resolved.parent.mkdir(parents=True, exist_ok=True) + + # Decode and save binary content + try: + content = base64.b64decode(payload.content_base64) + except Exception: + raise HTTPException(status_code=400, detail="Invalid base64 content") + + resolved.write_bytes(content) + return {"path": path, "status": "ok", "size": len(content)} + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/api/file/rename") def api_rename_file(payload: RenamePayload): try: @@ -870,6 +968,306 @@ def api_git_show(path: str = Query(..., description="File path relative to repo raise HTTPException(status_code=500, detail=str(e)) +# --- Media API endpoints --- + + +class MediaGeneratePayload(BaseModel): + prompt: str + model: str | None = None + provider: str | None = None # fal, leonardo, replicate, runway + width: int = 1024 + height: int = 1024 + num_images: int = 1 + negative_prompt: str | None = None + # Video-specific + image_url: str | None = None + duration: int = 5 + + +class MediaEditPayload(BaseModel): + input_path: str + output_path: str | None = None + operation: str # resize, crop, rotate, filter, trim, convert + # Operation-specific params + width: int | None = None + height: int | None = None + scale: float | None = None + preset: str | None = None # 480p, 720p, 1080p, 4k + quality: int = 85 + # Crop params + x: int = 0 + y: int = 0 + gravity: str | None = None + # Rotate params + degrees: float = 90 + background: str = "white" + # Filter params + filter_name: str | None = None + # Trim params + start: str | None = None + end: str | None = None + duration_seconds: float | None = None + # Convert params + format: str | None = None + + +@app.post("/api/media/generate/image") +async def api_media_generate_image(payload: MediaGeneratePayload): + """Generate an image using AI. + + Supports providers: fal (default), leonardo, replicate + """ + try: + from kurt.services.ai_generation import AIGenerationService + + service = AIGenerationService() + try: + result = await service.generate_image( + prompt=payload.prompt, + model=payload.model, + provider=payload.provider, + width=payload.width, + height=payload.height, + num_images=payload.num_images, + negative_prompt=payload.negative_prompt, + ) + finally: + await service.close() + + if not result.success: + raise HTTPException(status_code=400, detail=result.error) + + return { + "success": True, + "url": result.url, + "urls": result.urls, + "provider": result.provider, + "model": result.model, + "job_id": result.job_id, + } + except HTTPException: + raise + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/media/generate/video") +async def api_media_generate_video(payload: MediaGeneratePayload): + """Generate a video using AI. + + Supports providers: fal (default), runway, replicate + """ + try: + from kurt.services.ai_generation import AIGenerationService + + service = AIGenerationService() + try: + result = await service.generate_video( + prompt=payload.prompt, + image_url=payload.image_url, + model=payload.model, + provider=payload.provider, + duration=payload.duration, + ) + finally: + await service.close() + + if not result.success: + raise HTTPException(status_code=400, detail=result.error) + + return { + "success": True, + "url": result.url, + "provider": result.provider, + "model": result.model, + "job_id": result.job_id, + } + except HTTPException: + raise + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/media/edit") +async def api_media_edit(payload: MediaEditPayload): + """Edit an image or video file. + + Operations: resize, crop, rotate, filter, trim, convert + """ + try: + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + result = None + + if payload.operation == "resize": + # Detect if image or video based on extension + path = Path(payload.input_path) + image_exts = {".jpg", ".jpeg", ".png", ".webp", ".gif", ".tiff", ".avif"} + if path.suffix.lower() in image_exts: + result = await service.resize_image( + payload.input_path, + output_path=payload.output_path, + width=payload.width, + height=payload.height, + scale=payload.scale, + quality=payload.quality, + ) + else: + result = await service.resize_video( + payload.input_path, + output_path=payload.output_path, + width=payload.width, + height=payload.height, + preset=payload.preset, + ) + + elif payload.operation == "crop": + result = await service.crop_image( + payload.input_path, + output_path=payload.output_path, + width=payload.width, + height=payload.height, + x=payload.x, + y=payload.y, + gravity=payload.gravity, + ) + + elif payload.operation == "rotate": + result = await service.rotate_image( + payload.input_path, + output_path=payload.output_path, + degrees=payload.degrees, + background=payload.background, + ) + + elif payload.operation == "filter": + if not payload.filter_name: + raise HTTPException(status_code=400, detail="filter_name required for filter operation") + result = await service.apply_filter( + payload.input_path, + output_path=payload.output_path, + filter_name=payload.filter_name, + ) + + elif payload.operation == "trim": + result = await service.trim_video( + payload.input_path, + output_path=payload.output_path, + start=payload.start, + end=payload.end, + duration=payload.duration_seconds, + ) + + elif payload.operation == "convert": + if not payload.format: + raise HTTPException(status_code=400, detail="format required for convert operation") + result = await service.convert( + payload.input_path, + output_path=payload.output_path, + format=payload.format, + quality=payload.quality, + ) + + else: + raise HTTPException( + status_code=400, + detail=f"Unknown operation: {payload.operation}. " + "Supported: resize, crop, rotate, filter, trim, convert", + ) + + if result is None: + raise HTTPException(status_code=500, detail="Operation returned no result") + + if not result.success: + raise HTTPException(status_code=400, detail=result.error) + + return { + "success": True, + "output_path": result.output_path, + "command": result.command, + } + except HTTPException: + raise + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/media/info") +async def api_media_info(path: str = Query(...)): + """Get information about a media file.""" + try: + from kurt.services.media_edit import MediaEditService + + service = MediaEditService() + info = await service.get_info(path) + + return { + "path": info.path, + "format": info.format, + "width": info.width, + "height": info.height, + "duration": info.duration, + "fps": info.fps, + "size_bytes": info.size_bytes, + } + except FileNotFoundError: + raise HTTPException(status_code=404, detail="File not found") + except ImportError as e: + raise HTTPException(status_code=503, detail=f"Media service not available: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/media/providers") +def api_media_providers(): + """Get available AI providers and their configuration status.""" + providers = [ + { + "name": "fal", + "display_name": "fal.ai", + "env_var": "FAL_KEY", + "configured": bool(os.environ.get("FAL_KEY")), + "capabilities": ["image", "video"], + }, + { + "name": "leonardo", + "display_name": "Leonardo.ai", + "env_var": "LEONARDO_API_KEY", + "configured": bool(os.environ.get("LEONARDO_API_KEY")), + "capabilities": ["image"], + }, + { + "name": "replicate", + "display_name": "Replicate", + "env_var": "REPLICATE_API_TOKEN", + "configured": bool(os.environ.get("REPLICATE_API_TOKEN")), + "capabilities": ["image", "video"], + }, + { + "name": "runway", + "display_name": "Runway", + "env_var": "RUNWAY_API_KEY", + "configured": bool(os.environ.get("RUNWAY_API_KEY")), + "capabilities": ["video"], + }, + ] + + # Check for FFmpeg and ImageMagick + tools = { + "ffmpeg": which("ffmpeg") is not None, + "imagemagick": which("magick") is not None or which("convert") is not None, + } + + return {"providers": providers, "tools": tools} + + # --- Workflow API endpoints --- diff --git a/src/kurt/web/client/package.json b/src/kurt/web/client/package.json index e43728e9..a58daf89 100644 --- a/src/kurt/web/client/package.json +++ b/src/kurt/web/client/package.json @@ -14,6 +14,7 @@ "test:ui": "vitest --ui" }, "dependencies": { + "@remotion/player": "^4.0.250", "@tiptap/core": "^3.15.2", "@tiptap/extension-highlight": "^3.15.2", "@tiptap/extension-image": "^3.15.3", @@ -29,11 +30,13 @@ "@tiptap/starter-kit": "^3.15.2", "diff": "^8.0.2", "dockview-react": "^4.13.1", + "konva": "^9.3.18", "markdown-it": "^14.0.0", "prism-react-renderer": "^2.4.1", "react": "^18.2.0", "react-diff-view": "^3.2.0", "react-dom": "^18.2.0", + "react-konva": "^18.2.10", "react-simple-code-editor": "^0.14.1", "turndown": "^7.1.2", "turndown-plugin-gfm": "^1.0.2", diff --git a/src/kurt/web/client/src/App.jsx b/src/kurt/web/client/src/App.jsx index 2aca41d7..cf878f71 100644 --- a/src/kurt/web/client/src/App.jsx +++ b/src/kurt/web/client/src/App.jsx @@ -10,6 +10,8 @@ import EmptyPanel from './panels/EmptyPanel' import ReviewPanel from './panels/ReviewPanel' import WorkflowsPanel from './panels/WorkflowsPanel' import WorkflowTerminalPanel from './panels/WorkflowTerminalPanel' +import ImageEditorPanel from './panels/ImageEditorPanel' +import VideoEditorPanel from './panels/VideoEditorPanel' import DiffHighlightPOC from './components/DiffHighlightPOC' import TiptapDiffPOC from './components/TiptapDiffPOC' @@ -28,6 +30,8 @@ const components = { review: ReviewPanel, workflows: WorkflowsPanel, workflowTerminal: WorkflowTerminalPanel, + imageEditor: ImageEditorPanel, + videoEditor: VideoEditorPanel, } const KNOWN_COMPONENTS = new Set(Object.keys(components)) @@ -149,6 +153,18 @@ const getFileName = (path) => { return parts[parts.length - 1] } +// Media file type detection +const IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'avif', 'tiff', 'tif', 'bmp', 'svg'] +const VIDEO_EXTENSIONS = ['mp4', 'webm', 'mov', 'avi', 'mkv', 'm4v', 'ogv'] + +const getFileExtension = (path) => { + const parts = path.split('.') + return parts.length > 1 ? parts.pop().toLowerCase() : '' +} + +const isImageFile = (path) => IMAGE_EXTENSIONS.includes(getFileExtension(path)) +const isVideoFile = (path) => VIDEO_EXTENSIONS.includes(getFileExtension(path)) + const LAYOUT_VERSION = 21 // Increment to force layout reset // Generate a short hash from the project root path for localStorage keys @@ -821,6 +837,142 @@ export default function App() { [dockApi, openFileAtPosition] ) + // Open image file in image editor panel + const openImageEditor = useCallback( + (path) => { + if (!dockApi) return + + const panelId = `imageEditor-${path}` + const existingPanel = dockApi.getPanel(panelId) + + if (existingPanel) { + existingPanel.api.setActive() + return + } + + const emptyPanel = dockApi.getPanel('empty-center') + const workflowsPanel = dockApi.getPanel('workflows') + const centerGroup = centerGroupRef.current + + // Find existing editor panels to add as sibling tab + const allPanels = Array.isArray(dockApi.panels) ? dockApi.panels : [] + const existingEditorPanel = allPanels.find(p => + p.id.startsWith('editor-') || p.id.startsWith('imageEditor-') || p.id.startsWith('videoEditor-') + ) + + let position + if (existingEditorPanel?.group) { + position = { referenceGroup: existingEditorPanel.group } + } else if (centerGroup) { + position = { referenceGroup: centerGroup } + } else if (emptyPanel?.group) { + position = { referenceGroup: emptyPanel.group } + } else if (workflowsPanel?.group) { + position = { direction: 'above', referenceGroup: workflowsPanel.group } + } else { + position = { direction: 'right', referencePanel: 'filetree' } + } + + const panel = dockApi.addPanel({ + id: panelId, + component: 'imageEditor', + title: `🖼 ${getFileName(path)}`, + position, + params: { path }, + }) + + if (emptyPanel) { + emptyPanel.api.close() + } + + if (panel?.group) { + panel.group.header.hidden = false + centerGroupRef.current = panel.group + panel.group.api.setConstraints({ + minimumHeight: 200, + maximumHeight: Infinity, + }) + } + }, + [dockApi] + ) + + // Open video file in video editor panel + const openVideoEditor = useCallback( + (path) => { + if (!dockApi) return + + const panelId = `videoEditor-${path}` + const existingPanel = dockApi.getPanel(panelId) + + if (existingPanel) { + existingPanel.api.setActive() + return + } + + const emptyPanel = dockApi.getPanel('empty-center') + const workflowsPanel = dockApi.getPanel('workflows') + const centerGroup = centerGroupRef.current + + // Find existing editor panels to add as sibling tab + const allPanels = Array.isArray(dockApi.panels) ? dockApi.panels : [] + const existingEditorPanel = allPanels.find(p => + p.id.startsWith('editor-') || p.id.startsWith('imageEditor-') || p.id.startsWith('videoEditor-') + ) + + let position + if (existingEditorPanel?.group) { + position = { referenceGroup: existingEditorPanel.group } + } else if (centerGroup) { + position = { referenceGroup: centerGroup } + } else if (emptyPanel?.group) { + position = { referenceGroup: emptyPanel.group } + } else if (workflowsPanel?.group) { + position = { direction: 'above', referenceGroup: workflowsPanel.group } + } else { + position = { direction: 'right', referencePanel: 'filetree' } + } + + const panel = dockApi.addPanel({ + id: panelId, + component: 'videoEditor', + title: `🎬 ${getFileName(path)}`, + position, + params: { path }, + }) + + if (emptyPanel) { + emptyPanel.api.close() + } + + if (panel?.group) { + panel.group.header.hidden = false + centerGroupRef.current = panel.group + panel.group.api.setConstraints({ + minimumHeight: 200, + maximumHeight: Infinity, + }) + } + }, + [dockApi] + ) + + // Smart file opener that routes to appropriate editor based on file type + const openMediaFile = useCallback( + (path) => { + if (isImageFile(path)) { + openImageEditor(path) + return true + } + if (isVideoFile(path)) { + openVideoEditor(path) + return true + } + return false + }, + [openImageEditor, openVideoEditor] + ) + useEffect(() => { if (!dockApi || !approvalsLoaded) return const pendingIds = new Set(approvals.map((req) => req.id)) @@ -1456,6 +1608,9 @@ export default function App() { onOpenFile: openFile, onOpenFileToSide: openFileToSide, onOpenDiff: openDiff, + onOpenImageEditor: openImageEditor, + onOpenVideoEditor: openVideoEditor, + onOpenMediaFile: openMediaFile, projectRoot, activeFile, activeDiffFile, @@ -1463,7 +1618,7 @@ export default function App() { onToggleCollapse: toggleFiletree, }) } - }, [dockApi, openFile, openFileToSide, openDiff, projectRoot, activeFile, activeDiffFile, collapsed.filetree, toggleFiletree]) + }, [dockApi, openFile, openFileToSide, openDiff, openImageEditor, openVideoEditor, openMediaFile, projectRoot, activeFile, activeDiffFile, collapsed.filetree, toggleFiletree]) // Helper to focus a review panel const focusReviewPanel = useCallback( diff --git a/src/kurt/web/client/src/components/FileTree.jsx b/src/kurt/web/client/src/components/FileTree.jsx index 2e3b643c..69e8bf32 100644 --- a/src/kurt/web/client/src/components/FileTree.jsx +++ b/src/kurt/web/client/src/components/FileTree.jsx @@ -17,7 +17,20 @@ const formatSectionLabel = (path) => { return name.charAt(0).toUpperCase() + name.slice(1) } -export default function FileTree({ onOpen, onOpenToSide, onFileDeleted, onFileRenamed, onFileMoved, projectRoot, activeFile, creatingFile, onFileCreated, onCancelCreate }) { +// Media file type detection +const IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'avif', 'tiff', 'tif', 'bmp', 'svg'] +const VIDEO_EXTENSIONS = ['mp4', 'webm', 'mov', 'avi', 'mkv', 'm4v', 'ogv'] + +const getFileExtension = (path) => { + const parts = path.split('.') + return parts.length > 1 ? parts.pop().toLowerCase() : '' +} + +const isImageFile = (path) => IMAGE_EXTENSIONS.includes(getFileExtension(path)) +const isVideoFile = (path) => VIDEO_EXTENSIONS.includes(getFileExtension(path)) +const isMediaFile = (path) => isImageFile(path) || isVideoFile(path) + +export default function FileTree({ onOpen, onOpenToSide, onOpenImageEditor, onOpenVideoEditor, onOpenMediaFile, onFileDeleted, onFileRenamed, onFileMoved, projectRoot, activeFile, creatingFile, onFileCreated, onCancelCreate }) { const [entries, setEntries] = useState([]) const [expandedDirs, setExpandedDirs] = useState({}) const [searchQuery, setSearchQuery] = useState('') @@ -210,7 +223,12 @@ export default function FileTree({ onOpen, onOpenToSide, onFileDeleted, onFileRe })) } } else { - onOpen(entry.path) + // Route media files to their respective editors + if (onOpenMediaFile && isMediaFile(entry.path)) { + onOpenMediaFile(entry.path) + } else { + onOpen(entry.path) + } } } @@ -850,6 +868,16 @@ export default function FileTree({ onOpen, onOpenToSide, onFileDeleted, onFileRe
{ onOpenToSide?.(contextMenu.entry.path); setContextMenu(null) }}> Open to the Side
+ {isImageFile(contextMenu.entry.path) && onOpenImageEditor && ( +
{ onOpenImageEditor(contextMenu.entry.path); setContextMenu(null) }}> + Open in Image Editor +
+ )} + {isVideoFile(contextMenu.entry.path) && onOpenVideoEditor && ( +
{ onOpenVideoEditor(contextMenu.entry.path); setContextMenu(null) }}> + Open in Video Editor +
+ )}
)} diff --git a/src/kurt/web/client/src/components/ImageEditor.jsx b/src/kurt/web/client/src/components/ImageEditor.jsx new file mode 100644 index 00000000..05cc1b68 --- /dev/null +++ b/src/kurt/web/client/src/components/ImageEditor.jsx @@ -0,0 +1,812 @@ +import React, { useState, useEffect, useRef, useCallback } from 'react' +import { Stage, Layer, Image as KonvaImage, Rect, Circle, Text, Line, Transformer } from 'react-konva' + +const apiBase = import.meta.env.VITE_API_URL || '' +const apiUrl = (path) => `${apiBase}${path}` + +// Load image from URL or file +const useImage = (src) => { + const [image, setImage] = useState(null) + const [status, setStatus] = useState('loading') + + useEffect(() => { + if (!src) { + setImage(null) + setStatus('idle') + return + } + + setStatus('loading') + const img = new window.Image() + img.crossOrigin = 'anonymous' + + img.onload = () => { + setImage(img) + setStatus('loaded') + } + + img.onerror = () => { + setImage(null) + setStatus('error') + } + + img.src = src + }, [src]) + + return [image, status] +} + +// Shape component that can be selected and transformed +const Shape = ({ shapeProps, isSelected, onSelect, onChange }) => { + const shapeRef = useRef() + const trRef = useRef() + + useEffect(() => { + if (isSelected && trRef.current && shapeRef.current) { + trRef.current.nodes([shapeRef.current]) + trRef.current.getLayer().batchDraw() + } + }, [isSelected]) + + const ShapeComponent = shapeProps.type === 'circle' ? Circle : Rect + + return ( + <> + { + onChange({ + ...shapeProps, + x: e.target.x(), + y: e.target.y(), + }) + }} + onTransformEnd={(e) => { + const node = shapeRef.current + const scaleX = node.scaleX() + const scaleY = node.scaleY() + + node.scaleX(1) + node.scaleY(1) + + onChange({ + ...shapeProps, + x: node.x(), + y: node.y(), + width: Math.max(5, node.width() * scaleX), + height: Math.max(5, node.height() * scaleY), + rotation: node.rotation(), + }) + }} + /> + {isSelected && ( + { + if (newBox.width < 5 || newBox.height < 5) { + return oldBox + } + return newBox + }} + /> + )} + + ) +} + +// Text element component +const TextElement = ({ textProps, isSelected, onSelect, onChange }) => { + const textRef = useRef() + const trRef = useRef() + + useEffect(() => { + if (isSelected && trRef.current && textRef.current) { + trRef.current.nodes([textRef.current]) + trRef.current.getLayer().batchDraw() + } + }, [isSelected]) + + return ( + <> + { + onChange({ + ...textProps, + x: e.target.x(), + y: e.target.y(), + }) + }} + onTransformEnd={(e) => { + const node = textRef.current + onChange({ + ...textProps, + x: node.x(), + y: node.y(), + fontSize: Math.max(8, textProps.fontSize * node.scaleY()), + rotation: node.rotation(), + }) + node.scaleX(1) + node.scaleY(1) + }} + /> + {isSelected && ( + { + newBox.width = Math.max(30, newBox.width) + return newBox + }} + /> + )} + + ) +} + +// History state for undo/redo +const MAX_HISTORY = 50 + +export default function ImageEditor({ + imageSrc, + onSave, + onGenerate, + width = 800, + height = 600, +}) { + const [image, imageStatus] = useImage(imageSrc) + const [shapes, setShapes] = useState([]) + const [texts, setTexts] = useState([]) + const [selectedId, setSelectedId] = useState(null) + const [tool, setTool] = useState('select') // select, rect, circle, text, brush + const [fillColor, setFillColor] = useState('#3b82f6') + const [strokeColor, setStrokeColor] = useState('#1e40af') + const [textContent, setTextContent] = useState('Text') + const [brushSize, setBrushSize] = useState(5) + const [lines, setLines] = useState([]) + const [isDrawing, setIsDrawing] = useState(false) + const [filters, setFilters] = useState({ + brightness: 100, + contrast: 100, + saturate: 100, + blur: 0, + grayscale: 0, + sepia: 0, + }) + const [showFilters, setShowFilters] = useState(false) + const [isGenerating, setIsGenerating] = useState(false) + const [generatePrompt, setGeneratePrompt] = useState('') + const [showGenerateDialog, setShowGenerateDialog] = useState(false) + const stageRef = useRef() + + // Build CSS filter string + const getFilterString = useCallback(() => { + const { brightness, contrast, saturate, blur, grayscale, sepia } = filters + return `brightness(${brightness}%) contrast(${contrast}%) saturate(${saturate}%) blur(${blur}px) grayscale(${grayscale}%) sepia(${sepia}%)` + }, [filters]) + + // History for undo/redo + const [history, setHistory] = useState([{ shapes: [], texts: [], lines: [] }]) + const [historyIndex, setHistoryIndex] = useState(0) + + // Save state to history + const saveToHistory = useCallback((newShapes, newTexts, newLines) => { + setHistory((prev) => { + // Remove any future history if we're not at the end + const newHistory = prev.slice(0, historyIndex + 1) + // Add new state + newHistory.push({ shapes: newShapes, texts: newTexts, lines: newLines || lines }) + // Limit history size + if (newHistory.length > MAX_HISTORY) { + newHistory.shift() + return newHistory + } + return newHistory + }) + setHistoryIndex((prev) => Math.min(prev + 1, MAX_HISTORY - 1)) + }, [historyIndex, lines]) + + // Undo + const handleUndo = useCallback(() => { + if (historyIndex > 0) { + const newIndex = historyIndex - 1 + setHistoryIndex(newIndex) + const state = history[newIndex] + setShapes(state.shapes) + setTexts(state.texts) + setLines(state.lines || []) + setSelectedId(null) + } + }, [historyIndex, history]) + + // Redo + const handleRedo = useCallback(() => { + if (historyIndex < history.length - 1) { + const newIndex = historyIndex + 1 + setHistoryIndex(newIndex) + const state = history[newIndex] + setShapes(state.shapes) + setTexts(state.texts) + setLines(state.lines || []) + setSelectedId(null) + } + }, [historyIndex, history]) + + const canUndo = historyIndex > 0 + const canRedo = historyIndex < history.length - 1 + + // Brush drawing handlers + const handleMouseDown = useCallback((e) => { + if (tool !== 'brush') return + setIsDrawing(true) + const pos = e.target.getStage().getPointerPosition() + setLines([...lines, { + id: `line-${Date.now()}`, + points: [pos.x, pos.y], + stroke: strokeColor, + strokeWidth: brushSize, + }]) + }, [tool, lines, strokeColor, brushSize]) + + const handleMouseMove = useCallback((e) => { + if (!isDrawing || tool !== 'brush') return + const stage = e.target.getStage() + const point = stage.getPointerPosition() + setLines((prevLines) => { + const lastLine = prevLines[prevLines.length - 1] + if (!lastLine) return prevLines + // Add point to the last line + const newLines = prevLines.slice(0, -1) + newLines.push({ + ...lastLine, + points: [...lastLine.points, point.x, point.y], + }) + return newLines + }) + }, [isDrawing, tool]) + + const handleMouseUp = useCallback(() => { + if (isDrawing && tool === 'brush') { + setIsDrawing(false) + // Save to history after drawing is complete + saveToHistory(shapes, texts, lines) + } + }, [isDrawing, tool, shapes, texts, lines, saveToHistory]) + + // Calculate image dimensions to fit canvas while maintaining aspect ratio + const getImageDimensions = useCallback(() => { + if (!image) return { x: 0, y: 0, width: 0, height: 0 } + + const imgRatio = image.width / image.height + const canvasRatio = width / height + + let imgWidth, imgHeight, imgX, imgY + + if (imgRatio > canvasRatio) { + imgWidth = width + imgHeight = width / imgRatio + imgX = 0 + imgY = (height - imgHeight) / 2 + } else { + imgHeight = height + imgWidth = height * imgRatio + imgX = (width - imgWidth) / 2 + imgY = 0 + } + + return { x: imgX, y: imgY, width: imgWidth, height: imgHeight } + }, [image, width, height]) + + // Handle stage click + const handleStageClick = (e) => { + const clickedOnEmpty = e.target === e.target.getStage() + if (clickedOnEmpty) { + setSelectedId(null) + return + } + + const pos = e.target.getStage().getPointerPosition() + + if (tool === 'rect') { + const newShape = { + id: `rect-${Date.now()}`, + type: 'rect', + x: pos.x - 50, + y: pos.y - 25, + width: 100, + height: 50, + fill: fillColor, + stroke: strokeColor, + strokeWidth: 2, + } + const newShapes = [...shapes, newShape] + setShapes(newShapes) + saveToHistory(newShapes, texts) + setSelectedId(newShape.id) + setTool('select') + } else if (tool === 'circle') { + const newShape = { + id: `circle-${Date.now()}`, + type: 'circle', + x: pos.x, + y: pos.y, + radius: 40, + fill: fillColor, + stroke: strokeColor, + strokeWidth: 2, + } + const newShapes = [...shapes, newShape] + setShapes(newShapes) + saveToHistory(newShapes, texts) + setSelectedId(newShape.id) + setTool('select') + } else if (tool === 'text') { + const newText = { + id: `text-${Date.now()}`, + type: 'text', + x: pos.x, + y: pos.y, + text: textContent, + fontSize: 24, + fill: fillColor, + fontFamily: 'Arial', + } + const newTexts = [...texts, newText] + setTexts(newTexts) + saveToHistory(shapes, newTexts) + setSelectedId(newText.id) + setTool('select') + } + } + + // Delete selected element + const handleDelete = useCallback(() => { + if (!selectedId) return + const newShapes = shapes.filter((s) => s.id !== selectedId) + const newTexts = texts.filter((t) => t.id !== selectedId) + setShapes(newShapes) + setTexts(newTexts) + saveToHistory(newShapes, newTexts) + setSelectedId(null) + }, [selectedId, shapes, texts, saveToHistory]) + + // Keyboard shortcuts + useEffect(() => { + const handleKeyDown = (e) => { + // Skip if typing in an input + if (document.activeElement.tagName === 'INPUT' || document.activeElement.tagName === 'TEXTAREA') { + return + } + + if (e.key === 'Delete' || e.key === 'Backspace') { + handleDelete() + } + // Undo: Ctrl+Z / Cmd+Z + if ((e.ctrlKey || e.metaKey) && e.key === 'z' && !e.shiftKey) { + e.preventDefault() + handleUndo() + } + // Redo: Ctrl+Shift+Z / Cmd+Shift+Z or Ctrl+Y / Cmd+Y + if ((e.ctrlKey || e.metaKey) && (e.key === 'y' || (e.key === 'z' && e.shiftKey))) { + e.preventDefault() + handleRedo() + } + } + window.addEventListener('keydown', handleKeyDown) + return () => window.removeEventListener('keydown', handleKeyDown) + }, [handleDelete, handleUndo, handleRedo]) + + // Export canvas as image + const handleExport = useCallback(() => { + if (!stageRef.current) return + const uri = stageRef.current.toDataURL() + const link = document.createElement('a') + link.download = 'image-export.png' + link.href = uri + document.body.appendChild(link) + link.click() + document.body.removeChild(link) + }, []) + + // Generate image with AI + const handleGenerate = async () => { + if (!generatePrompt.trim()) return + + setIsGenerating(true) + try { + const response = await fetch(apiUrl('/api/media/generate/image'), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + prompt: generatePrompt, + width: 1024, + height: 1024, + }), + }) + + if (!response.ok) { + const data = await response.json() + throw new Error(data.detail || 'Failed to generate image') + } + + const data = await response.json() + if (data.url && onGenerate) { + onGenerate(data.url) + } + setShowGenerateDialog(false) + setGeneratePrompt('') + } catch (err) { + alert(`Generation failed: ${err.message}`) + } finally { + setIsGenerating(false) + } + } + + const imgDims = getImageDimensions() + + return ( +
+ {/* Toolbar */} +
+ {/* Undo/Redo */} +
+ + +
+ +
+ + + + + +
+ +
+ + +
+ + {tool === 'brush' && ( +
+ +
+ )} + +
+ +
+ + {tool === 'text' && ( +
+ setTextContent(e.target.value)} + placeholder="Text content" + className="text-input" + /> +
+ )} + +
+ {selectedId && ( + + )} + + +
+
+ + {/* Canvas */} +
+ {imageStatus === 'loading' && ( +
Loading image...
+ )} + {imageStatus === 'error' && ( +
Failed to load image
+ )} + + + {/* Background image */} + {image && ( + + )} + + {/* Shapes */} + {shapes.map((shape) => ( + setSelectedId(shape.id)} + onChange={(newAttrs) => { + setShapes(shapes.map((s) => (s.id === shape.id ? newAttrs : s))) + }} + /> + ))} + + {/* Text elements */} + {texts.map((text) => ( + setSelectedId(text.id)} + onChange={(newAttrs) => { + setTexts(texts.map((t) => (t.id === text.id ? newAttrs : t))) + }} + /> + ))} + + {/* Brush lines */} + {lines.map((line) => ( + + ))} + + +
+ + {/* Generate Dialog */} + {showGenerateDialog && ( +
setShowGenerateDialog(false)}> +
e.stopPropagation()}> +

Generate Image with AI

+