diff --git a/Firebase_Genkit_Issue_3280_Analysis.md b/Firebase_Genkit_Issue_3280_Analysis.md new file mode 100644 index 0000000000..fcbe8a7e30 --- /dev/null +++ b/Firebase_Genkit_Issue_3280_Analysis.md @@ -0,0 +1,501 @@ +# Firebase Genkit Issue #3280: Integrate dotprompt to Python Implementation + +## Executive Summary + +This document provides a comprehensive analysis and execution plan for implementing dotprompt functionality in the Python version of Firebase Genkit. The goal is to achieve feature parity with the stable JavaScript/TypeScript implementation by adding support for `.prompt` files, template processing, and file-based prompt management. + +## Problem Analysis + +### Issue Overview +**Issue #3280**: Integrate dotprompt standalone library to the Python project + +The Firebase Genkit project supports multiple programming languages, with JavaScript/TypeScript being the stable, feature-complete implementation. The Python implementation is in early development and lacks several key features, particularly dotprompt support for file-based prompt management and templating. + +### Current State Analysis + +#### JavaScript/TypeScript Implementation (Stable) +- Full dotprompt support with `.prompt` file loading and parsing +- Handlebars-based templating engine +- YAML frontmatter for metadata, model config, and schemas +- Runtime prompt compilation and execution +- Prompt variants and namespace support +- Helper functions and partials +- Automatic prompt folder scanning + +#### Python Implementation (Early Development) +- ✅ Basic `ExecutablePrompt` class exists +- ✅ Programmatic prompt definition via `define_prompt()` +- ❌ No `.prompt` file support +- ❌ No template processing capabilities +- ❌ No file-based prompt management +- ❌ TODO comment in code: "run str prompt/system/message through dotprompt using input" + +### Gap Analysis + +The main gaps between JavaScript and Python implementations: + +1. **File Format Support**: No parsing of `.prompt` files with YAML frontmatter +2. **Template Engine**: No Handlebars-compatible templating system +3. **File Management**: No automatic loading and scanning of prompt directories +4. **Variants**: No support for prompt variants (e.g., `prompt.variant.prompt`) +5. **Helpers & Partials**: No support for reusable template components + +## dotprompt Functionality Overview + +### File Format Structure +```yaml +--- +model: googleai/gemini-1.5-flash +config: + temperature: 0.9 +input: + schema: + location: string + style?: string + name?: string + default: + location: a restaurant +--- + +You are the world's most welcoming AI assistant and are currently working at {{location}}. + +Greet a guest{{#if name}} named {{name}}{{/if}}{{#if style}} in the style of {{style}}{{/if}}. +``` + +### Key Features +- **YAML Frontmatter**: Model configuration, input/output schemas, metadata +- **Handlebars Templates**: Dynamic content with `{{variable}}`, `{{#if}}`, `{{#each}}` +- **File-based Management**: Organized prompt libraries in directories +- **Variants**: Multiple versions of prompts (formal, casual, etc.) +- **Helpers**: Custom template functions for advanced logic +- **Partials**: Reusable template components + +## Detailed Execution Plan + +### Phase 1: Core Infrastructure Setup + +#### 1.1 Module Structure Creation +Create the following directory structure: +``` +py/packages/genkit/src/genkit/dotprompt/ +├── __init__.py # Public API exports +├── parser.py # YAML frontmatter + template parsing +├── template.py # Handlebars-like templating engine +├── loader.py # .prompt file loading and folder scanning +├── helpers.py # Built-in template helpers +├── types.py # Type definitions and schemas +└── exceptions.py # Custom exception classes +``` + +#### 1.2 Dependencies Addition +Add to `pyproject.toml`: +```toml +dependencies = [ + "pyyaml>=6.0", # YAML frontmatter parsing + "pybars3>=0.9.7", # Handlebars-compatible templating + "pathlib", # File system operations (built-in) + "typing-extensions", # Enhanced typing support +] +``` + +### Phase 2: Core Components Implementation + +#### 2.1 Prompt File Parser (`parser.py`) +```python +class PromptFile: + """Represents a parsed .prompt file with metadata and template.""" + + def __init__(self, metadata: dict, template: str, file_path: str): + self.metadata = metadata + self.template = template + self.file_path = file_path + self.model = metadata.get('model') + self.config = metadata.get('config', {}) + self.input_schema = metadata.get('input', {}).get('schema') + self.output_schema = metadata.get('output', {}).get('schema') + +class PromptParser: + """Parses .prompt files with YAML frontmatter and template content.""" + + def parse_file(self, file_path: str) -> PromptFile: + """Parse a .prompt file and return PromptFile object.""" + pass + + def parse_content(self, content: str) -> PromptFile: + """Parse prompt content string.""" + pass +``` + +#### 2.2 Template Engine (`template.py`) +```python +class CompiledTemplate: + """A compiled template ready for rendering.""" + pass + +class TemplateEngine: + """Handlebars-compatible template processing engine.""" + + def __init__(self): + self.helpers = {} + self.partials = {} + + def compile(self, template: str) -> CompiledTemplate: + """Compile a template string into executable form.""" + pass + + def render(self, template: CompiledTemplate, context: dict) -> str: + """Render a compiled template with given context.""" + pass + + def register_helper(self, name: str, helper_fn: callable): + """Register a template helper function.""" + pass + + def register_partial(self, name: str, template: str): + """Register a template partial.""" + pass +``` + +#### 2.3 File Loader (`loader.py`) +```python +class PromptLoader: + """Loads and manages .prompt files from directories.""" + + def __init__(self, template_engine: TemplateEngine): + self.template_engine = template_engine + self.cache = {} + + def load_prompt_folder(self, dir_path: str, namespace: str = '') -> dict: + """Recursively load all .prompt files from directory.""" + pass + + def load_prompt_file(self, file_path: str) -> PromptFile: + """Load a single .prompt file.""" + pass + + def get_prompt(self, name: str, variant: str = None) -> PromptFile: + """Retrieve a loaded prompt by name and variant.""" + pass +``` + +### Phase 3: Integration with Existing Prompt System + +#### 3.1 Enhance ExecutablePrompt Class +Update `py/packages/genkit/src/genkit/blocks/prompt.py`: + +```python +class ExecutablePrompt: + def __init__(self, ...): + # Existing initialization + self._template_engine = None + self._prompt_file = None + + def render(self, input: Any | None = None, config: dict | None = None) -> GenerateActionOptions: + """Enhanced render method with template processing.""" + # Process templates using dotprompt if string templates are provided + processed_system = self._process_template(self._system, input) + processed_prompt = self._process_template(self._prompt, input) + processed_messages = self._process_messages(self._messages, input) + + return to_generate_action_options( + registry=self._registry, + model=self._model, + prompt=processed_prompt, + system=processed_system, + messages=processed_messages, + # ... rest of parameters + ) + + def _process_template(self, template: str | Part | list[Part] | None, input: Any) -> str | Part | list[Part] | None: + """Process template strings with dotprompt engine.""" + if isinstance(template, str) and self._template_engine: + compiled = self._template_engine.compile(template) + return self._template_engine.render(compiled, input or {}) + return template +``` + +#### 3.2 Add New API Functions + +The dotprompt API functions will be organized across multiple files and exposed through `__init__.py`: + +**File Structure**: +``` +py/packages/genkit/src/genkit/dotprompt/ +├── __init__.py # Public API exports and convenience functions +├── api.py # Main API functions implementation +├── parser.py # File parsing logic +├── template.py # Template engine +├── loader.py # File loading logic +└── types.py # Type definitions +``` + +**`py/packages/genkit/src/genkit/dotprompt/__init__.py`**: +```python +"""Dotprompt module for file-based prompt management.""" + +from .api import ( + load_prompt_folder, + define_helper, + define_partial, + prompt, + create_prompt_from_file, +) +from .types import PromptFile, CompiledTemplate +from .template import TemplateEngine +from .loader import PromptLoader + +# Re-export main API functions for easy importing +__all__ = [ + 'load_prompt_folder', + 'define_helper', + 'define_partial', + 'prompt', + 'create_prompt_from_file', + 'PromptFile', + 'CompiledTemplate', + 'TemplateEngine', + 'PromptLoader', +] +``` + +**`py/packages/genkit/src/genkit/dotprompt/api.py`**: +```python +"""Main API functions for dotprompt functionality.""" + +from genkit.core.registry import Registry +from genkit.blocks.prompt import ExecutablePrompt +from .loader import PromptLoader +from .types import PromptFile + +def load_prompt_folder(registry: Registry, dir: str = './prompts', ns: str = ''): + """Load all .prompt files from directory into registry.""" + loader = registry.prompt_loader + loaded_prompts = loader.load_prompt_folder(dir, ns) + + for name, prompt_file in loaded_prompts.items(): + registry.register_prompt_from_file(name, prompt_file) + +def define_helper(registry: Registry, name: str, fn: callable): + """Register a template helper function.""" + registry.dotprompt.register_helper(name, fn) + +def define_partial(registry: Registry, name: str, source: str): + """Register a template partial.""" + registry.dotprompt.register_partial(name, source) + +def prompt(registry: Registry, name: str, variant: str = None, dir: str = './prompts') -> ExecutablePrompt: + """Load and return an executable prompt from .prompt file.""" + return registry.get_prompt(name, variant) + +def create_prompt_from_file(registry: Registry, file_path: str) -> ExecutablePrompt: + """Create ExecutablePrompt from .prompt file.""" + loader = registry.prompt_loader + prompt_file = loader.load_prompt_file(file_path) + return _create_executable_from_prompt_file(registry, prompt_file) + +def _create_executable_from_prompt_file(registry: Registry, prompt_file: PromptFile) -> ExecutablePrompt: + """Internal helper to create ExecutablePrompt from PromptFile.""" + # Implementation details... + pass +``` + +**Usage Examples**: +```python +# Import the functions from the dotprompt module +from genkit.dotprompt import load_prompt_folder, define_helper, prompt + +# Or import the entire module +import genkit.dotprompt as dotprompt + +# Usage in application code +ai = genkit({'plugins': [google_genai()]}) + +# Load all prompts from directory +load_prompt_folder(ai.registry, './prompts') + +# Define custom helper +define_helper(ai.registry, 'uppercase', lambda text: text.upper()) + +# Use a loaded prompt +greeting = prompt(ai.registry, 'greeting', variant='formal') +response = await greeting({'name': 'Alice'}) +``` + +### Phase 4: Registry Integration + +#### 4.1 Extend Registry Class +Update `py/packages/genkit/src/genkit/core/registry.py`: + +```python +class Registry: + def __init__(self): + # Existing initialization + self.dotprompt = TemplateEngine() + self.prompt_loader = PromptLoader(self.dotprompt) + self.loaded_prompts = {} + + def register_prompt_from_file(self, name: str, prompt_file: PromptFile): + """Register a prompt loaded from .prompt file.""" + pass + + def get_prompt(self, name: str, variant: str = None) -> ExecutablePrompt: + """Retrieve a registered prompt.""" + pass +``` + +#### 4.2 Auto-loading Integration +Add to main Genkit initialization: + +```python +class Genkit: + def __init__(self, options: GenkitOptions): + # Existing initialization + if options.get('prompt_dir'): + self.load_prompt_folder(options['prompt_dir']) + + def load_prompt_folder(self, dir: str = './prompts'): + """Load all .prompt files from directory.""" + load_prompt_folder(self.registry, dir) +``` + +### Phase 5: Testing and Validation + +#### 5.1 Unit Tests Structure +``` +py/packages/genkit/tests/dotprompt/ +├── test_parser.py # Test YAML parsing and validation +├── test_template.py # Test template rendering +├── test_loader.py # Test file loading and caching +├── test_integration.py # Test ExecutablePrompt integration +├── fixtures/ +│ ├── simple.prompt # Basic test prompt +│ ├── complex.prompt # Advanced features test +│ └── variant.test.prompt # Variant testing +└── helpers.py # Test utilities +``` + +#### 5.2 Test Cases Coverage +- **Parser Tests**: YAML frontmatter parsing, error handling, schema validation +- **Template Tests**: Variable substitution, conditionals, loops, helpers +- **Loader Tests**: Directory scanning, file caching, variant resolution +- **Integration Tests**: End-to-end prompt execution, compatibility with existing API + +#### 5.3 Performance Testing +- Template compilation and caching performance +- File loading and scanning benchmarks +- Memory usage with large prompt libraries + +### Phase 6: Documentation and Examples + +#### 6.1 Documentation Updates +- Add dotprompt section to Python documentation +- Create migration guide from programmatic to file-based prompts +- API reference documentation +- Best practices guide + +#### 6.2 Example Implementation +Create sample prompts and usage examples: + +```python +# examples/dotprompt_example.py +from genkit import genkit +from genkit.dotprompt import load_prompt_folder + +# Initialize with prompt directory +ai = genkit({ + 'plugins': [google_genai()], + 'prompt_dir': './prompts' +}) + +# Use file-based prompt +greeting_prompt = ai.prompt('greeting', variant='formal') +response = await greeting_prompt({'name': 'Alice', 'location': 'hotel lobby'}) +``` + +## Implementation Timeline + +### Week 1-2: Foundation +- Set up module structure and dependencies +- Implement basic parser and template engine +- Create initial test framework + +### Week 3-4: Core Features +- Complete file loader implementation +- Integrate with ExecutablePrompt class +- Add registry support + +### Week 5-6: Advanced Features +- Implement helpers and partials +- Add variant support +- Performance optimization + +### Week 7-8: Testing and Polish +- Comprehensive testing suite +- Documentation and examples +- Performance benchmarking + +## Risk Assessment and Mitigation + +### Technical Risks + +1. **Template Engine Compatibility** + - **Risk**: Python Handlebars libraries may not match JavaScript behavior exactly + - **Mitigation**: Create compatibility tests with shared .prompt files, implement custom engine if needed + +2. **Performance Impact** + - **Risk**: File scanning and template compilation may slow startup + - **Mitigation**: Implement intelligent caching, lazy loading, and production optimization + +3. **Schema Validation** + - **Risk**: Python typing system differences from JavaScript schemas + - **Mitigation**: Create schema translation layer, use Pydantic for validation + +### Project Risks + +1. **Breaking Changes** + - **Risk**: Integration might break existing Python code + - **Mitigation**: Maintain backward compatibility, gradual rollout strategy + +2. **Maintenance Overhead** + - **Risk**: Additional complexity in codebase + - **Mitigation**: Comprehensive documentation, clear separation of concerns + +## Success Criteria + +### Functional Requirements +- ✅ Parse .prompt files with YAML frontmatter +- ✅ Process Handlebars-compatible templates +- ✅ Load prompts from directories automatically +- ✅ Support prompt variants and namespaces +- ✅ Maintain backward compatibility with existing API + +### Performance Requirements +- Template compilation under 10ms per prompt +- Directory scanning under 100ms for 100 prompts +- Memory usage increase under 10MB for typical usage + +### Quality Requirements +- 95%+ test coverage for new code +- Zero breaking changes to existing API +- Documentation coverage for all new features + +## Conclusion + +This implementation plan provides a comprehensive roadmap for integrating dotprompt functionality into the Python version of Firebase Genkit. The phased approach ensures systematic development while maintaining backward compatibility and code quality. + +The integration will significantly enhance the Python implementation's capabilities, bringing it closer to feature parity with the stable JavaScript version and providing developers with a consistent, file-based approach to prompt management across both language implementations. + +Key benefits of this implementation: +- **Developer Experience**: File-based prompt management with version control +- **Maintainability**: Separation of prompts from code logic +- **Reusability**: Shared prompts across different parts of applications +- **Collaboration**: Non-technical team members can edit prompts +- **Testing**: Easier prompt testing and validation + +The successful completion of this integration will mark a significant milestone in the Python implementation's maturity and adoption potential. + +--- + +**Document Version**: 1.0 +**Date**: January 2025 +**Issue Reference**: [Firebase Genkit #3280](https://github.com/firebase/genkit/issues/3280) diff --git a/py/packages/genkit/src/genkit/ai/_base.py b/py/packages/genkit/src/genkit/ai/_base.py index 4433597b47..53c380b84a 100644 --- a/py/packages/genkit/src/genkit/ai/_base.py +++ b/py/packages/genkit/src/genkit/ai/_base.py @@ -48,6 +48,8 @@ def __init__( plugins: list[Plugin] | None = None, model: str | None = None, reflection_server_spec: ServerSpec | None = None, + prompt_dir: str | None = None, + prompt_ns: str | None = None, ) -> None: """Initialize a new Genkit instance. @@ -60,6 +62,13 @@ def __init__( super().__init__() self._initialize_server(reflection_server_spec) self._initialize_registry(model, plugins) + # Optional, non-breaking .prompt folder load (no auto-registration) + if prompt_dir: + try: + self.registry.load_prompt_folder(prompt_dir, prompt_ns) + except Exception: + # TODO: Consider logging a warning; keep non-fatal + pass define_generate_action(self.registry) def run_main(self, coro: Coroutine[Any, Any, T] | None = None) -> T: diff --git a/py/packages/genkit/src/genkit/ai/_base_async.py b/py/packages/genkit/src/genkit/ai/_base_async.py index 7229c54642..4368e7a076 100644 --- a/py/packages/genkit/src/genkit/ai/_base_async.py +++ b/py/packages/genkit/src/genkit/ai/_base_async.py @@ -47,6 +47,8 @@ def __init__( plugins: list[Plugin] | None = None, model: str | None = None, reflection_server_spec: ServerSpec | None = None, + prompt_dir: str | None = None, + prompt_ns: str | None = None, ) -> None: """Initialize a new Genkit instance. @@ -59,6 +61,13 @@ def __init__( super().__init__() self._reflection_server_spec = reflection_server_spec self._initialize_registry(model, plugins) + # Optional, non-breaking .prompt folder load (no auto-registration) + if prompt_dir: + try: + self.registry.load_prompt_folder(prompt_dir, prompt_ns) + except Exception: + # TODO: Consider logging a warning; keep non-fatal + pass def _initialize_registry(self, model: str | None, plugins: list[Plugin] | None) -> None: """Initialize the registry for the Genkit instance. diff --git a/py/packages/genkit/src/genkit/ai/_registry.py b/py/packages/genkit/src/genkit/ai/_registry.py index 4bb19db11e..83d9df5c97 100644 --- a/py/packages/genkit/src/genkit/ai/_registry.py +++ b/py/packages/genkit/src/genkit/ai/_registry.py @@ -74,6 +74,15 @@ ToolChoice, ) +# Optional, non-invasive helpers to load `.prompt` files via standalone handler +from genkit.dotprompt import ( + load_prompt_dir as dp_load_prompt_dir, + aload_prompt_dir as dp_aload_prompt_dir, + load_prompt_file as dp_load_prompt_file, + aload_prompt_file as dp_aload_prompt_file, +) +from genkit.dotprompt.types import LoadedPrompt + EVALUATOR_METADATA_KEY_DISPLAY_NAME = 'evaluatorDisplayName' EVALUATOR_METADATA_KEY_DEFINITION = 'evaluatorDefinition' EVALUATOR_METADATA_KEY_IS_BILLED = 'evaluatorIsBilled' @@ -103,6 +112,32 @@ def __init__(self): """Initialize the Genkit registry.""" self.registry: Registry = Registry() + # --- Dotprompt file-loading helpers (no registration, no side-effects) --- + def load_prompt_dir(self, dir: str, ns: str | None = None) -> dict[str, LoadedPrompt]: + """Synchronously scan a directory and parse `.prompt` files. + + Mirrors JS folder scanning behavior (partials, subdir prefixing), but does + not auto-register or render metadata. + """ + return dp_load_prompt_dir(self.registry.dotprompt, dir, ns) + + async def aload_prompt_dir(self, dir: str, ns: str | None = None, *, with_metadata: bool = True) -> dict[str, LoadedPrompt]: + """Asynchronously scan a directory and optionally render metadata.""" + return await dp_aload_prompt_dir(self.registry.dotprompt, dir, ns, with_metadata=with_metadata) + + def load_prompt_file(self, file_path: str, ns: str | None = None) -> LoadedPrompt: + """Synchronously parse a single `.prompt` file (no metadata).""" + return dp_load_prompt_file(self.registry.dotprompt, file_path, ns) + + async def aload_prompt_file(self, file_path: str, ns: str | None = None, *, with_metadata: bool = True) -> LoadedPrompt: + """Asynchronously parse a single `.prompt` file and optionally render metadata.""" + return await dp_aload_prompt_file(self.registry.dotprompt, file_path, ns, with_metadata=with_metadata) + + # --- Lookup helpers matching JS key rules --- + def lookup_loaded_prompt(self, name: str, variant: str | None = None, ns: str | None = None): + """Lookup a previously loaded .prompt by name/variant/ns.""" + return self.registry.lookup_loaded_prompt(name, variant, ns) + def flow(self, name: str | None = None, description: str | None = None) -> Callable[[Callable], Callable]: """Decorator to register a function as a flow. diff --git a/py/packages/genkit/src/genkit/core/registry.py b/py/packages/genkit/src/genkit/core/registry.py index 316c8c0ba2..a97e7a9b63 100644 --- a/py/packages/genkit/src/genkit/core/registry.py +++ b/py/packages/genkit/src/genkit/core/registry.py @@ -32,7 +32,13 @@ from typing import Any import structlog -from dotpromptz.dotprompt import Dotprompt + +# Import Dotprompt optionally to keep module import-safe without the dependency. +try: + from dotpromptz.dotprompt import Dotprompt # type: ignore +except Exception: # pragma: no cover + class Dotprompt: # type: ignore + pass from genkit.core.action import ( Action, @@ -43,6 +49,16 @@ ) from genkit.core.action.types import ActionKind, ActionName, ActionResolver +# Optional imports for dotprompt file loading +try: + from genkit.dotprompt import load_prompt_dir as dp_load_prompt_dir # type: ignore + from genkit.dotprompt.file_loader import registry_definition_key # type: ignore + from genkit.dotprompt.types import LoadedPrompt # type: ignore +except Exception: # pragma: no cover + dp_load_prompt_dir = None # type: ignore + registry_definition_key = None # type: ignore + LoadedPrompt = object # type: ignore + logger = structlog.get_logger(__name__) # An action store is a nested dictionary mapping ActionKind to a dictionary of @@ -86,6 +102,8 @@ def __init__(self): self._value_by_kind_and_name: dict[str, dict[str, Any]] = {} self._lock = threading.RLock() self.dotprompt = Dotprompt() + # Storage for prompts loaded from .prompt files (definition key -> LoadedPrompt) + self._loaded_prompts: dict[str, LoadedPrompt] = {} # TODO: Figure out how to set this. self.api_stability: str = 'stable' @@ -271,6 +289,40 @@ def list_actions( } return actions + # --- Dotprompt file-based prompt management (safe, opt-in) --- + def load_prompt_folder(self, dir: str, ns: str | None = None) -> dict[str, 'LoadedPrompt']: + """Load .prompt files into in-memory storage without registration. + + This mirrors JS folder scanning behavior but intentionally avoids + registering actions. Use this to prepare for later integration. + """ + if dp_load_prompt_dir is None: + raise RuntimeError('dotprompt loader not available') + loaded = dp_load_prompt_dir(self.dotprompt, dir, ns) + with self._lock: + self._loaded_prompts.update(loaded) + return loaded + + def list_loaded_prompts(self) -> list[str]: + """Return keys of loaded .prompt definitions.""" + with self._lock: + return list(self._loaded_prompts.keys()) + + def get_loaded_prompt(self, key: str) -> 'LoadedPrompt | None': + """Get a previously loaded .prompt by its definition key.""" + with self._lock: + return self._loaded_prompts.get(key) + + def lookup_loaded_prompt(self, name: str, variant: str | None = None, ns: str | None = None) -> 'LoadedPrompt | None': + """Lookup a loaded .prompt by name/variant/ns using JS definition key rules. + + Mirrors JS `registryDefinitionKey(name, variant, ns)` composition. + """ + if registry_definition_key is None: + return None + key = registry_definition_key(name, variant, ns) + return self.get_loaded_prompt(key) + def register_value(self, kind: str, name: str, value: Any): """Registers a value with a given kind and name. diff --git a/py/packages/genkit/src/genkit/dotprompt/__init__.py b/py/packages/genkit/src/genkit/dotprompt/__init__.py new file mode 100644 index 0000000000..c380b5d857 --- /dev/null +++ b/py/packages/genkit/src/genkit/dotprompt/__init__.py @@ -0,0 +1,29 @@ +"""Standalone .prompt file handling utilities (no registry integration).""" + +from typing import Any, Callable, Dict +from dotpromptz.dotprompt import Dotprompt + +from .types import LoadedPrompt, PromptFileId +from .file_loader import load_prompt_dir, load_prompt_file, registry_definition_key +from .file_loader import define_partial, define_helper +from .file_loader import ( + aload_prompt_dir, + aload_prompt_file, + render_prompt_metadata, +) + +__all__ = [ + "LoadedPrompt", + "PromptFileId", + "registry_definition_key", + "load_prompt_dir", + "load_prompt_file", + "aload_prompt_dir", + "aload_prompt_file", + "render_prompt_metadata", + "define_partial", + "define_helper", + "Dotprompt", +] + + diff --git a/py/packages/genkit/src/genkit/dotprompt/file_loader.py b/py/packages/genkit/src/genkit/dotprompt/file_loader.py new file mode 100644 index 0000000000..d84f79a9ad --- /dev/null +++ b/py/packages/genkit/src/genkit/dotprompt/file_loader.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any, Dict, Iterable, Tuple +from dotpromptz.dotprompt import Dotprompt + +from .types import LoadedPrompt, PromptFileId + + +# TODO: Confirm canonical namespace rules when scanning nested directories. +def registry_definition_key(name: str, variant: str | None = None, ns: str | None = None) -> str: + """Build a definition key "ns/name.variant" where ns/variant are optional.""" + prefix = f"{ns}/" if ns else "" + suffix = f".{variant}" if variant else "" + return f"{prefix}{name}{suffix}" + + +def _parse_name_and_variant(filename: str) -> Tuple[str, str | None]: + """Extract base name and optional variant from a `.prompt` filename. + + Behavior: + - strip `.prompt` + - if remaining contains a `.` split name and variant at the first dot + """ + base = filename[:-7] if filename.endswith('.prompt') else filename + if '.' in base: + parts = base.split('.') + return parts[0], parts[1] + return base, None + + +def define_partial(dp: Dotprompt, name: str, source: str) -> None: + """Register a Handlebars partial with the provided `Dotprompt` instance.""" + # Support both camelCase and snake_case for Python bindings. + if hasattr(dp, 'definePartial'): + getattr(dp, 'definePartial')(name, source) # type: ignore[attr-defined] + else: + getattr(dp, 'define_partial')(name, source) + + +def define_helper(dp: Dotprompt, name: str, fn: Any) -> None: + """Register a helper on the provided `Dotprompt` instance.""" + dp.defineHelper(name, fn) + + +def load_prompt_file(dp: Dotprompt, file_path: str, ns: str | None = None) -> LoadedPrompt: + """Load and parse a single `.prompt` file using dotpromptz. + + - Reads file as UTF-8 + - Parses source via `dp.parse` + - Does NOT eagerly compile; compilation can be done by caller + - Returns a LoadedPrompt instance + """ + path = Path(file_path) + source = path.read_text(encoding='utf-8') + template = dp.parse(source) + name, variant = _parse_name_and_variant(path.name) + return LoadedPrompt( + id=PromptFileId(name=name, variant=variant, ns=ns), + template=template, + source=source, + ) + + +async def render_prompt_metadata(dp: Dotprompt, loaded: LoadedPrompt) -> dict[str, Any]: + """Render metadata for a parsed template using dotpromptz. + + Performs cleanup for null schema descriptions. + """ + # Support both camelCase and snake_case for Python bindings. + if hasattr(dp, 'renderMetadata'): + metadata: dict[str, Any] = await getattr(dp, 'renderMetadata')(loaded.template) # type: ignore[attr-defined] + else: + metadata = await getattr(dp, 'render_metadata')(loaded.template) + + # Remove null descriptions + try: + if metadata.get('output', {}).get('schema', {}).get('description', None) is None: + metadata['output']['schema'].pop('description', None) + except Exception: + pass + try: + if metadata.get('input', {}).get('schema', {}).get('description', None) is None: + metadata['input']['schema'].pop('description', None) + except Exception: + pass + + loaded.metadata = metadata + return metadata + + +def _iter_prompt_dir(dir_path: str) -> Iterable[Tuple[Path, str]]: + """Yield (path, subdir) for files under dir recursively. + + subdir is the relative directory from the root, used for namespacing. + """ + root = Path(dir_path).resolve() + for current_dir, _dirs, files in os.walk(root): + rel = os.path.relpath(current_dir, root) + subdir = '' if rel == '.' else rel + for fname in files: + if fname.endswith('.prompt'): + yield Path(current_dir) / fname, subdir + + +def load_prompt_dir(dp: Dotprompt, dir_path: str, ns: str | None = None) -> Dict[str, LoadedPrompt]: + """Recursively scan a directory, registering partials and loading prompts. + + - Files starting with `_` are treated as partials; register via definePartial + - Other `.prompt` files are parsed and returned + - If a file is in a subdirectory, that subdirectory is prefixed to the prompt name + using the definition key semantics ("ns/subdir/name.variant") + + Returns a dict mapping definition keys to `LoadedPrompt`. + """ + loaded: Dict[str, LoadedPrompt] = {} + for file_path, subdir in _iter_prompt_dir(dir_path): + fname = file_path.name + parent = file_path.parent + if fname.startswith('_') and fname.endswith('.prompt'): + partial_name = fname[1:-7] + define_partial(dp, partial_name, (parent / fname).read_text(encoding='utf-8')) + continue + + # Regular prompt file + name, variant = _parse_name_and_variant(fname) + + # Include subdir in the prompt "name" prefix, not in ns. + name_with_prefix = f"{subdir}/{name}" if subdir else name + + loaded_prompt = load_prompt_file(dp, str(file_path), ns=ns) + # Update the id.name to include the subdir prefix. + loaded_prompt.id = PromptFileId(name=name_with_prefix, variant=variant, ns=ns) + + key = registry_definition_key(name_with_prefix, variant, ns) + loaded[key] = loaded_prompt + return loaded + + +async def aload_prompt_file(dp: Dotprompt, file_path: str, ns: str | None = None, *, with_metadata: bool = True) -> LoadedPrompt: + """Async variant that also renders metadata when requested.""" + loaded = load_prompt_file(dp, file_path, ns) + if with_metadata: + await render_prompt_metadata(dp, loaded) + return loaded + + +async def aload_prompt_dir(dp: Dotprompt, dir_path: str, ns: str | None = None, *, with_metadata: bool = True) -> Dict[str, LoadedPrompt]: + """Async directory loader that optionally renders metadata for each prompt.""" + loaded = load_prompt_dir(dp, dir_path, ns) + if with_metadata: + for key, prompt in loaded.items(): + await render_prompt_metadata(dp, prompt) + return loaded + + diff --git a/py/packages/genkit/src/genkit/dotprompt/types.py b/py/packages/genkit/src/genkit/dotprompt/types.py new file mode 100644 index 0000000000..2ec0d5bd49 --- /dev/null +++ b/py/packages/genkit/src/genkit/dotprompt/types.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Optional + + +@dataclass(frozen=True) +class PromptFileId: + """Represents a unique identifier for a prompt file.""" + + name: str + variant: Optional[str] = None + ns: Optional[str] = None + + +@dataclass +class LoadedPrompt: + """A parsed and compiled prompt.""" + + id: PromptFileId + template: Any + source: str + compiled: Any | None = None + metadata: dict[str, Any] | None = None + + diff --git a/py/packages/genkit/tests/genkit/dotprompt/file_loader_test.py b/py/packages/genkit/tests/genkit/dotprompt/file_loader_test.py new file mode 100644 index 0000000000..e730a0e148 --- /dev/null +++ b/py/packages/genkit/tests/genkit/dotprompt/file_loader_test.py @@ -0,0 +1,194 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +## Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for standalone dotprompt file loading utilities.""" + +from __future__ import annotations + +import asyncio +from pathlib import Path + +import pytest + +from genkit.dotprompt import load_prompt_dir, aload_prompt_dir + + +import pytest +from dotpromptz.dotprompt import Dotprompt + + +def _dp() -> Dotprompt: + return Dotprompt() + + +def _write(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding='utf-8') + + +def _simple_prompt_frontmatter(model: str = 'echoModel') -> str: + return ( + "---\n" + f"model: {model}\n" + "input:\n" + " schema:\n" + " type: object\n" + "---\n\n" + ) + + +def test_load_prompt_dir_parses_files_and_variants(tmp_path: Path) -> None: + prompts_dir = tmp_path / 'prompts' + + # Partial + _write( + prompts_dir / '_personality.prompt', + _simple_prompt_frontmatter() + "Talk like a {{#if style}}{{style}}{{else}}helpful assistant{{/if}}.\n", + ) + + # Regular prompt + _write( + prompts_dir / 'hello.prompt', + _simple_prompt_frontmatter() + "Hello {{name}}!\n", + ) + + # Variant prompt + _write( + prompts_dir / 'my.formal.prompt', + _simple_prompt_frontmatter() + "Good day, {{name}}.\n", + ) + + # Subdirectory prompt + _write( + prompts_dir / 'sub' / 'bye.prompt', + _simple_prompt_frontmatter() + "Bye {{name}}.\n", + ) + + dp = _dp() + loaded = load_prompt_dir(dp, str(prompts_dir)) + + # Keys should mirror JS: name.variant with subdir prefix in name + assert set(loaded.keys()) == {"hello", "my.formal", "sub/bye"} + + assert loaded["hello"].id.name == "hello" + assert loaded["hello"].id.variant is None + assert loaded["hello"].id.ns is None + + assert loaded["my.formal"].id.name == "my" + assert loaded["my.formal"].id.variant == "formal" + + assert loaded["sub/bye"].id.name == "sub/bye" + assert loaded["sub/bye"].id.variant is None + + +@pytest.mark.asyncio +async def test_aload_prompt_dir_renders_metadata(tmp_path: Path) -> None: + prompts_dir = tmp_path / 'prompts' + + _write( + prompts_dir / 'info.prompt', + _simple_prompt_frontmatter() + "This is a prompt that renders metadata.\n", + ) + + dp = _dp() + loaded = await aload_prompt_dir(dp, str(prompts_dir), with_metadata=True) + + assert "info" in loaded + assert loaded["info"].metadata is not None + # Accept PromptMetadata object from dotpromptz or a dict + try: + from dotpromptz.typing import PromptMetadata as DpPromptMetadata # type: ignore + assert isinstance(loaded["info"].metadata, (dict, DpPromptMetadata)) + except Exception: + assert isinstance(loaded["info"].metadata, dict) + + +def test_name_and_variant_parsing_with_multiple_dots(tmp_path: Path) -> None: + prompts_dir = tmp_path / 'prompts' + + _write( + prompts_dir / 'a.b.c.prompt', + _simple_prompt_frontmatter() + "Testing names with multiple dots.\n", + ) + + dp = _dp() + loaded = load_prompt_dir(dp, str(prompts_dir)) + + # Current behavior matches JS-like split: name=a, variant=b; the rest is ignored. + assert set(loaded.keys()) == {"a.b"} + assert loaded["a.b"].id.name == "a" + assert loaded["a.b"].id.variant == "b" + + +def test_registry_definition_key_and_ns(tmp_path: Path) -> None: + prompts_dir = tmp_path / 'prompts' + _write( + prompts_dir / 'sub' / 'a.formal.prompt', + _simple_prompt_frontmatter() + "A.\n", + ) + + from genkit.dotprompt import load_prompt_dir, registry_definition_key + + dp = _dp() + loaded = load_prompt_dir(dp, str(prompts_dir), ns='myNS') + + # Name should include subdir prefix; ns should be appended as "myNS" + key = registry_definition_key('sub/a', 'formal', 'myNS') + assert key in loaded + assert loaded[key].id.name == 'sub/a' + assert loaded[key].id.variant == 'formal' + assert loaded[key].id.ns == 'myNS' + + # Verify Registry-style lookup composition via the public API (optional) + from genkit.core.registry import Registry + reg = Registry() + reg.load_prompt_folder(str(prompts_dir), ns='myNS') + found = reg.lookup_loaded_prompt('sub/a', variant='formal', ns='myNS') + assert found is not None + + +@pytest.mark.asyncio +async def test_single_file_load_with_metadata(tmp_path: Path) -> None: + file_path = tmp_path / 'single.prompt' + _write(file_path, _simple_prompt_frontmatter() + "Single.\n") + + from genkit.dotprompt import aload_prompt_file + dp = _dp() + loaded = await aload_prompt_file(dp, str(file_path), ns='n1', with_metadata=True) + + assert loaded.id.name == 'single' + assert loaded.id.variant is None + assert loaded.id.ns == 'n1' + assert loaded.metadata is not None + + +@pytest.mark.asyncio +async def test_variant_in_subdir_with_ns_and_metadata(tmp_path: Path) -> None: + prompts_dir = tmp_path / 'prompts' + _write( + prompts_dir / 'nested' / 'foo.bar.prompt', + _simple_prompt_frontmatter() + "Nested Variant.\n", + ) + + from genkit.dotprompt import aload_prompt_dir + dp = _dp() + loaded = await aload_prompt_dir(dp, str(prompts_dir), ns='nsX', with_metadata=True) + + # Expect key: nsX/subdir/name.variant with our builder behavior + assert 'nsX/nested/foo.bar' in [ + f"{p.id.ns}/{p.id.name}{'.' + p.id.variant if p.id.variant else ''}" for p in loaded.values() + ] + +