diff --git a/README.md b/README.md index 01b5c0c..ead6466 100644 --- a/README.md +++ b/README.md @@ -1,213 +1,140 @@ -# 🧝 Elf0 - sElf Improving Agentic YAML Workflows +# 🧝 Elf0 - Build AI Agent Workflows in YAML -**Build powerful AI agent workflows using simple YAML files. Zero complex coding required.** +Elf0 is a command line tool I created to help rapidly build and test AI agent workflows. Often you might get a requirement to build an agent to do something and need to experiment with how it works. -> ⚠️ **IMPORTANT: NOT PRODUCTION READY - USE AT YOUR OWN RISK** -> -> This software is experimental and in active development. Elf0 workflows can execute custom Python functions, interact with external tools, and perform system operations that may cause data loss, security vulnerabilities, or damage to your system. -> -> **Before using Elf0:** -> - Review all workflow files before execution -> - Test in isolated environments first -> - Never run untrusted workflows -> - Backup important data -> - Use appropriate security measures -> -> **The author(s) provide this software "AS IS" without any warranties and assume no liability for any damages, data loss, security breaches, or other issues that may result from its use. Users are solely responsible for ensuring safe and appropriate usage.** +For example, you may have an insurance PDF document and want to extract quote information. Sure, you could write a quick prompt and plug it into ChatGPT, but usually the problem is more nuanced and complex, requiring a sophisticated workflow (an agent) to solve it properly. -Elf0 lets you create multi-step AI workflows by describing what you want in YAML. Chain (graph) together different AI models, integrate with external tools, and even use AI to improve your workflows automatically. +**Elf0 lets you easily create surprisingly useful agents.** -```bash -# Simple AI workflow in one command -uv run elf0 agent specs/basic_chat.yaml --prompt "Explain quantum computing in simple terms" - -# Reference files automatically with @filename.ext syntax -uv run elf0 agent specs/basic_chat.yaml --prompt "Review this code @src/elf0/cli.py and suggest improvements" +Start by creating a YAML file that defines your agent workflow, add in the prompts and model settings, and run it as real code. Since your agent specification (_spec_) is defined in YAML, you can easily feed the whole thing into ChatGPT or Claude and get it to improve it. -# Let AI improve your workflows -uv run elf0 improve yaml specs/my_workflow.yaml --prompt "Make this workflow more efficient" -``` - -## ⚑ Quick Start (5 minutes) +Defining agents like this is powerful because you can version not only the prompts but the entire logic of the workflow, the model parameters, and the reasoning chains. YAML becomes a great agent abstraction that can be quickly tailored to your specific needs and use cases. -> ⚠️ **Read the [Security & Safety Considerations](#-security--safety-considerations) section before proceeding** +## πŸš€ What Can You Build? -Get up and running with your first AI workflow in 5 minutes: +### Simple Agents +```bash +# Basic assistant (uses gpt-4.1-mini - cheap but not great at reasoning) +uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "How many r's are in strawberry?" -### 1. Prerequisites -- **Python 3.13+** ([Download here](https://python.org/downloads/)) -- **uv package manager** by Astral ([Install guide](https://docs.astral.sh/uv/getting-started/installation/)) - A blazingly fast Python package manager that replaces pip/conda -- **An API key** from [OpenAI](https://platform.openai.com/api-keys), [Anthropic](https://console.anthropic.com/), or use [Ollama](https://ollama.ai/) locally +# Better reasoning agent (same model, much better system prompt - gets strawberry right!) +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "How many r's are in strawberry?" -### 2. Install Elf0 -```bash -git clone https://github.com/emson/elf.git -cd elf -uv venv -source .venv/bin/activate # On Windows: .venv\Scripts\activate -uv pip install -e . +# Prompt optimization - enter a potential prompt and it iterates to improve it +uv run elf0 agent specs/utils/optimizer_prompt_v1.yaml --prompt "Help me write better prompts for code review" ``` -### 3. Set up your API keys -Configure your environment using the provided template: - +### File Analysis with @references ```bash -# Copy the environment template -cp .env.example .env +# Automatically include file contents in your prompts +uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "Review this code @src/elf0/cli.py and suggest improvements" -# Edit .env with your actual API keys -# The file contains detailed instructions and all supported providers +# Analyze entire directories +uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "Analyze all code in @src/ for improvements" ``` -**Quick setup for common providers:** -```bash -# For OpenAI (recommended for beginners) -export OPENAI_API_KEY="your-api-key-here" - -# OR for Anthropic (Claude) -export ANTHROPIC_API_KEY="your-api-key-here" +### YouTube Analysis (with MCP servers) +You will first need to install and run the included MCP server (please see [mcp/youtube-transcript/README.md](mcp/youtube-transcript/README.md)). -# OR use Ollama locally (no API key needed) -# Install Ollama from https://ollama.ai/ and run: ollama pull llama2 +```bash +# Download and summarize YouTube transcripts +uv run elf0 agent specs/content/youtube_analyzer.yaml --prompt "Analyse this youtube video https://www.youtube.com/watch?v=9tOmppsiO2w" ``` -**Supported LLM providers:** -- **OpenAI**: GPT-4, GPT-3.5, etc. - Get API key from [platform.openai.com](https://platform.openai.com/api-keys) -- **Anthropic**: Claude models - Get API key from [console.anthropic.com](https://console.anthropic.com/) -- **DeepSeek**: Advanced models - Get API key from [platform.deepseek.com](https://platform.deepseek.com/) -- **Ollama**: Local models (free) - No API key required, install from [ollama.ai](https://ollama.ai/) - -### 4. Run your first workflow +### Interactive Workflows ```bash -uv run elf0 agent specs/basic_chat.yaml --prompt "Write a haiku about programming" +# Ask for your name and create a poem about it (calls Python functions) +uv run elf0 agent specs/examples/interactive_assistant.yaml --prompt "Ask me my name and then make a poem about it." ``` -**πŸŽ‰ It works!** You should see a beautiful haiku generated by AI. +### Simulations +```bash +# Create complex multi-agent simulations +uv run elf0 -v agent specs/utils/simulation_scenario_v1.yaml --prompt "Create a salary negotiation simulation. There are 2 people, Ben a plucky young agent engineer and Clive the CEO of a hot up-and-coming software conslutancy based in London. Create a persona for Ben and Clive, and design this YAML spec around how they will interact under different scenarios. We will give the new YAML file you output the scenarios." --output simulate_salary.yaml ---- +# Then run scenarios with your new simulation +uv run elf0 agent simulate_salary.yaml --prompt "Ben is negotiating a new job with Clive, Ben has to commute from Edinburgh 4 days a month. Work through the negotiation step by step." +``` -## πŸ“¦ Complete Installation Guide +**Pulling all this together, you have a very powerful and useful "agent toolkit" to explore ideas and get stuff done.** -### System Requirements +## ⚑ Quick Start (5 minutes) -- **Operating System**: macOS, Linux, or Windows -- **Python**: 3.13 or higher -- **Memory**: 4GB RAM minimum (8GB recommended) -- **Network**: Internet connection for cloud LLM providers +> ⚠️ **Be careful!** Elf0 can execute code and access your files. Only run workflows you trust. -### Step 1: Install Python -Elf0 requires Python 3.13 or higher. Check your version: +### 1. Install Python 3.13+ and uv +- **Python 3.13+**: [Download here](https://python.org/downloads/) +- **uv package manager**: Fast Python package manager - [Install guide](https://docs.astral.sh/uv/getting-started/installation/) +### 2. Install Elf0 ```bash -python --version -# or -python3 --version +git clone https://github.com/emson/elf.git +cd elf +uv venv && source .venv/bin/activate # Windows: .venv\Scripts\activate +uv pip install -e . ``` -If you don't have Python 3.13+, download it from [python.org](https://python.org/downloads/). - -### Step 2: Install uv Package Manager - -Elf0 uses `uv` by Astral for fast Python package management. `uv` is a modern, blazingly fast Python package installer and resolver that's 10-100x faster than pip. It's written in Rust and provides excellent dependency resolution. - -**Why we use uv:** -- ⚑ **10-100x faster** than pip for installing packages -- πŸ”’ **Better dependency resolution** - avoids dependency conflicts -- 🧹 **Clean virtual environments** - isolated, reproducible setups -- πŸ”„ **Drop-in pip replacement** - same commands, better performance -- πŸ“¦ **Built-in virtual environment management** - -**Installation:** +### 3. Get an API key +Choose one: +- **OpenAI**: [Get key](https://platform.openai.com/api-keys) β†’ `export OPENAI_API_KEY="your-key"` +- **Anthropic**: [Get key](https://console.anthropic.com/) β†’ `export ANTHROPIC_API_KEY="your-key"` +- **Ollama**: [Install locally](https://ollama.ai/) β†’ `ollama pull llama2` (no key needed) +### 4. Try it out! ```bash -# macOS/Linux (Recommended) -curl -LsSf https://astral.sh/uv/install.sh | sh +# Test basic chat +uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "Write a haiku about programming" -# Windows (PowerShell) -powershell -c "irm https://astral.sh/uv/install.ps1 | iex" +# Test file analysis +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "What does this code do? @src/elf0/cli.py" -# Alternative: Install via pip (slower, but works everywhere) -pip install uv +# Test the strawberry reasoning challenge +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "How many r's are in strawberry?" +``` -# Alternative: Install via Homebrew (macOS) -brew install uv +**πŸŽ‰ It works!** You should see AI-generated responses. The reasoning agent actually gets the strawberry question right! -# Alternative: Install via pipx -pipx install uv -``` +## πŸ”’ Security Note -**Verify installation:** -```bash -uv --version -# Should show something like: uv 0.4.29 -``` +**Elf0 can execute code, read files, and make network requests.** It's experimental software - be careful what you run. -**New to uv?** Don't worry! It works just like pip but faster: -- `uv pip install package` ≑ `pip install package` -- `uv venv` ≑ `python -m venv` -- `uv run command` ≑ `python command` (but with auto-dependency management) +**What workflows can do:** +- Read any file you can access +- Execute Python functions and MCP servers +- Send your data to LLM providers +- Write/modify files -### Step 3: Clone and Install Elf0 -```bash -git clone https://github.com/emson/elf0.git -cd elf -uv venv -source .venv/bin/activate # On Windows: .venv\Scripts\activate -uv pip install -e . -``` +**Stay safe:** +- Only run workflows you trust +- Review YAML files before running (`cat workflow.yaml`) +- Use test API keys, not production ones +- Back up important work first +- Start with simple examples in `specs/basic/` -### Step 4: Configure API Keys - -Elf0 supports multiple LLM providers. Choose one: - -#### Option A: OpenAI (Recommended for beginners) -1. Get an API key from [OpenAI Platform](https://platform.openai.com/api-keys) -2. Set the environment variable: - ```bash - export OPENAI_API_KEY="sk-your-key-here" - ``` - -#### Option B: Anthropic (Claude) -1. Get an API key from [Anthropic Console](https://console.anthropic.com/) -2. Set the environment variable: - ```bash - export ANTHROPIC_API_KEY="sk-ant-your-key-here" - ``` - -#### Option C: Ollama (Local, Free) -1. Install Ollama from [ollama.ai](https://ollama.ai/) -2. Download a model: - ```bash - ollama pull llama2 - # or - ollama pull codellama - ``` -3. No API key needed! Ollama runs locally. - -### Step 5: Verify Installation -```bash -# Test with OpenAI/Anthropic -uv run elf0 agent specs/basic_chat.yaml --prompt "Hello, Elf0!" +**Red flags - never run workflows that:** +- Come from unknown sources +- Use `os.system()` or `subprocess` +- Access sensitive directories (`~/.ssh`, `/etc`) +- Request elevated permissions -# Test with Ollama (use ollama spec) -uv run elf0 agent specs/examples/ollama_chat.yaml --prompt "Hello, Elf0!" -``` +The `@file.txt` syntax sends file contents to LLM providers, so be mindful of sensitive data. ---- +## 🧠 How Elf0 Works -## πŸƒβ€β™€οΈ Your First Workflow +### YAML as Agent Abstraction -Let's understand what just happened by examining a simple workflow: +Here's what a simple agent looks like: -### The Basic Chat Workflow (`specs/basic_chat.yaml`) ```yaml -name: basic_chat -description: Simple AI chat using Claude +# specs/basic/chat_simple_v1.yaml +version: "0.1" +description: "Simple AI assistant" +runtime: "langgraph" llms: - claude: - type: anthropic - model_name: claude-3-5-haiku-latest + assistant: + type: openai + model_name: gpt-4.1-mini temperature: 0.7 workflow: @@ -215,846 +142,372 @@ workflow: nodes: - id: chat_step kind: agent - ref: claude - stop: true + ref: assistant config: prompt: | - You are a helpful AI assistant. Respond to the user's request clearly and concisely. - + You are a helpful AI assistant. User request: {input} + stop: true ``` **What this does:** -1. **Defines an LLM**: Uses Anthropic's Claude with specific settings -2. **Creates a workflow**: Sequential workflow with one step -3. **Sets up a node**: An "agent" node that processes user input -4. **Configures the prompt**: Instructions for the AI + user input placeholder -5. **Stops execution**: `stop: true` ends the workflow after this step - -### Running Workflows - -```bash -# Basic usage -uv run elf0 agent specs/basic_chat.yaml --prompt "Your message here" - -# Save output to file -uv run elf0 agent specs/basic_chat.yaml --prompt "Write a story" --output story.md - -# Use verbose mode to see what's happening -uv run elf0 --verbose agent specs/basic_chat.yaml --prompt "Hello" - -# Interactive mode for conversations -uv run elf0 prompt specs/basic_chat.yaml -``` +1. Defines an LLM (OpenAI's cheap but fast model) +2. Creates a workflow with one step +3. Sets up the prompt with user input +4. Runs when you execute: `uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "Hello"` -### The Magic of File References +### The Magic of `@file.md` and `@my/dir` References -Elf0's killer feature is automatic file inclusion using `@my/path/filename.ext` syntax: +Instead of copy-pasting file contents, just use `@filename`: ```bash -# Automatically include file contents in your prompt -uv run elf0 agent specs/basic_chat.yaml --prompt "Explain this code @src/elf0/cli.py" +# This automatically reads README.md and includes it in the prompt +uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "Summarize @README.md" -# Reference multiple files -uv run elf0 agent specs/basic_chat.yaml --prompt "Compare @file1.py and @file2.py" +# Works with any file type +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "Find bugs in @app.py" -# Works in interactive mode too -uv run elf0 prompt specs/basic_chat.yaml -πŸ’¬ Prompt: Review @README.md and suggest improvements +# Even entire directories +uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "Analyze the code quality in @src/" ``` -The files are automatically read and included as context - no manual copy/paste needed! +This is incredibly useful for code review, documentation, and analysis tasks. ---- +## πŸ› οΈ Cool Things You Can Do -## πŸ–₯️ CLI Reference +### Workflow Self-Improvement -Elf0 provides a comprehensive command-line interface for executing and managing AI workflows. +Don't like a workflow? Get AI to improve it: -### Basic Usage ```bash -uv run elf0 [OPTIONS] COMMAND [ARGS]... -``` +# AI analyzes and improves any workflow +uv run elf0 improve yaml specs/basic/chat_simple_v1.yaml -### Global Options -| Option | Description | -|--------|-------------| -| `--verbose`, `-v` | Enable verbose logging output (shows detailed logs from Elf0 core and HTTP libraries) | -| `--help` | Show help message and exit | +# Or use the dedicated optimizer +uv run elf0 agent specs/utils/optimizer_yaml_v1.yaml --prompt "Improve this workflow @specs/basic/chat_simple_v1.yaml" -### Commands Overview - -#### `agent` - Execute Workflows -Execute an agent workflow defined in YAML: - -```bash -uv run elf0 agent [OPTIONS] +# You can even feed the result back into ChatGPT for more improvements ``` -**Options:** -- `--prompt TEXT` - User prompt to process -- `--prompt_file PATH` - Markdown (.md) or XML (.xml) file containing the prompt -- `--context PATH` - Context file(s) to include (use multiple times or comma-separated) -- `--output PATH` - Save result to file instead of displaying -- `--session-id TEXT` - Session identifier for stateful runs (default: "session") +### Interactive Mode -**Examples:** -```bash -# Basic usage -uv run elf0 agent specs/basic_chat.yaml --prompt "Explain quantum computing" +Have conversations with any workflow (in verbose mode `-v`): -# Use prompt file -uv run elf0 agent specs/basic_chat.yaml --prompt_file my_prompt.md +```bash +uv run elf0 -v prompt specs/basic/chat_simple_v1.yaml +πŸ’¬ Prompt: Help me debug this code @app.py +πŸ’¬ Prompt: Now write unit tests for it +πŸ’¬ Prompt: /exit +``` -# Include context files -uv run elf0 agent specs/basic_chat.yaml --prompt "Analyze this" --context config.yaml --context data.csv +### Multi-Agent Workflows -# Save output to file -uv run elf0 agent specs/basic_chat.yaml --prompt "Write documentation" --output docs.md +Create sophisticated multi-step processes: -# Verbose mode for debugging -uv run elf0 --verbose agent specs/basic_chat.yaml --prompt "Debug this workflow" +```yaml +# Example: Code review workflow +workflow: + type: sequential + nodes: + - id: analyze_code + kind: agent + ref: fast_model + config: + prompt: "Analyze this code for issues: {input}" + + - id: detailed_review + kind: agent + ref: smart_model + config: + prompt: | + Based on this analysis: {state.output} + Provide detailed code review with suggestions. ``` -#### `prompt` - Interactive Sessions -Start an interactive conversation with a workflow agent: +### Advanced Examples ```bash -uv run elf0 prompt [OPTIONS] -``` +# Create new workflows using AI +uv run elf0 agent specs/utils/agent_creator.yaml --prompt "Create a workflow for API testing" -**Options:** -- `--session-id TEXT` - Session identifier for the conversation (default: "interactive_session") - -**Examples:** -```bash -# Start interactive session -uv run elf0 prompt specs/basic_chat.yaml +# Process YouTube videos (requires MCP server setup) +uv run elf0 agent specs/content/youtube_analyzer.yaml --prompt "Analyze this video https://youtube.com/watch?v=example" -# Custom session ID -uv run elf0 prompt specs/basic_chat.yaml --session-id my_session +# Generate simulations +uv run elf0 agent specs/utils/simulation_scenario_v1.yaml --prompt "Create a customer service training simulation" --output customer_sim.yaml ``` -**Interactive Commands:** -- Type your prompt and press Enter twice to send -- `/send` - Send the current prompt -- `/exit`, `/quit`, `/bye` - Exit the session -- `@filename.ext` - Include file contents in your prompt +## πŸ“‹ Command Reference -#### `improve yaml` - Workflow Optimization -Improve and optimize YAML workflow specifications using AI: +### Basic Commands ```bash -uv run elf0 improve yaml [OPTIONS] -``` - -**Options:** -- `--output PATH`, `-o PATH` - Save improved YAML to file (default: `_improved.yaml`) -- `--prompt TEXT` - Custom improvement guidance (supports @file references) -- `--session-id TEXT` - Session identifier for improvement run (default: "improve_session") +# Run a workflow +uv run elf0 agent --prompt "Your prompt here" -**Examples:** -```bash -# Basic improvement -uv run elf0 improve yaml specs/my_workflow.yaml +# Interactive mode +uv run elf0 prompt -# Custom output file -uv run elf0 improve yaml specs/my_workflow.yaml --output optimized_workflow.yaml +# Improve a workflow with AI +uv run elf0 improve yaml -# Specific improvement guidance -uv run elf0 improve yaml specs/my_workflow.yaml --prompt "Focus on making prompts more specific" +# List available workflows +uv run elf0 list-specs -# Use reference patterns -uv run elf0 improve yaml specs/my_workflow.yaml --prompt "Follow patterns from @examples/best_workflow.yaml" +# Verbose mode (see what's happening) +uv run elf0 --verbose agent --prompt "Debug mode" ``` -#### `list-specs` - Discover Workflows -List all available YAML workflow specification files: +### Useful Options ```bash -uv run elf0 list-specs -``` - -Shows all `.yaml` and `.yml` files in the `./specs` directory with their descriptions. - -### Advanced Usage Patterns - -#### File References with @ Syntax -Use `@path/filename.ext` anywhere in prompts to automatically include file contents: +# Save output to file +--output filename.txt -```bash -# Single file reference -uv run elf0 agent specs/basic_chat.yaml --prompt "Review this code @src/elf0/cli.py" +# Include additional files as context +--context file1.txt --context file2.csv -# Multiple file references -uv run elf0 agent specs/basic_chat.yaml --prompt "Compare @file1.py and @file2.py" +# Use @file.ext syntax anywhere in prompts +--prompt "Analyze @data.csv and @config.yaml" -# In improvement guidance -uv run elf0 improve yaml specs/workflow.yaml --prompt "Use patterns from @examples/template.yaml" +# Interactive mode commands +πŸ’¬ Prompt: /send # Send current message +πŸ’¬ Prompt: /exit # Exit session +πŸ’¬ Prompt: @file.txt help # Include file in message ``` -#### Output Redirection -Control where logs and output go: +### Examples ```bash -# Output to file, errors to stderr -uv run elf0 agent workflow.yaml --prompt "Generate report" > report.txt - -# Verbose logs to file, output to stdout -uv run elf0 --verbose agent workflow.yaml --prompt "Debug" 2> debug.log +# Code review +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "Review @app.py for security issues" -# Both output and logs to files -uv run elf0 agent workflow.yaml --prompt "Process" > output.txt 2> logs.txt +# Content generation +uv run elf0 agent specs/content/linkedin_post.yaml --prompt "Write about AI trends" --output post.md -# Pipe output while preserving error logs -uv run elf0 agent workflow.yaml --prompt "Generate" | grep "important" +# Interactive debugging +uv run elf0 prompt specs/basic/chat_simple_v1.yaml +πŸ’¬ Prompt: Help me debug @buggy_code.py +πŸ’¬ Prompt: Now write tests for the fixed version ``` -#### Session Management -Use session IDs to maintain conversation context: +## 🧠 Understanding Workflows -```bash -# Different sessions for different tasks -uv run elf0 prompt specs/chat.yaml --session-id "project_alpha" -uv run elf0 prompt specs/chat.yaml --session-id "project_beta" +Think of workflows like cooking recipes - a series of steps that transform your input into the desired output. -# Continue previous agent session -uv run elf0 agent specs/workflow.yaml --prompt "Continue from before" --session-id "my_project" -``` +**Basic structure:** +1. **Input**: Your prompt or question +2. **LLM models**: Define which AI models to use (OpenAI, Anthropic, Ollama) +3. **Workflow nodes**: The processing steps (agents, tools, functions) +4. **Output**: The final result ---- +**Node types:** +- **agent**: AI model that processes text +- **tool**: Python function for custom logic +- **mcp**: External tool via Model Context Protocol + +**Flow types:** +- **sequential**: Steps run one after another (most common) +- **custom_graph**: Complex routing with conditionals and parallel processing -## 🧠 Core Concepts - -### Workflows -A **workflow** is a sequence of AI agents and tools working together. Think of it like a recipe: -1. Take user input -2. Process it with AI Agent A -3. Pass the result to Tool B -4. Process with AI Agent C -5. Return final result - -### Nodes -**Nodes** are the building blocks of workflows: -- **Agent nodes**: AI models that process text (OpenAI, Anthropic, Ollama) -- **Tool nodes**: Custom Python functions for data processing -- **MCP nodes**: External tools via Model Context Protocol -- **Claude Code nodes**: AI-powered code generation, analysis, and modification - -### Edges -**Edges** connect nodes together, defining the flow of data: -- **Sequential**: Nodes run one after another -- **Conditional**: Route based on conditions (if/then logic) -- **Parallel**: Run multiple nodes simultaneously - -### YAML Structure ```yaml -name: my_workflow -description: What this workflow does +# Minimal workflow example +version: "0.1" +runtime: "langgraph" -# Define your AI models llms: - llm_model: + my_ai: type: openai model_name: gpt-4.1-mini - temperature: 0.7 -# Define your workflow steps workflow: type: sequential nodes: - - id: step1 + - id: process kind: agent - ref: llm_model + ref: my_ai config: - prompt: "Your instructions here. User input: {input}" + prompt: "Help with: {input}" + stop: true ``` ---- - -## πŸ“‹ Common Use Cases - -### 1. Content Generation +## πŸ’‘ Quick Examples by Use Case +You can list the YAML workflows in the `specs` directory (and filter the subdirectories) with: ```bash -# Blog post generation -uv run elf0 agent specs/basic_chat.yaml --prompt "Write a blog post about sustainable technology" - -# Code documentation -uv run elf0 agent specs/basic_chat.yaml --prompt "Document this code @src/api.py" +# list all specs (ignoring archive) +uv run elf0 list-specs -# Meeting summaries -uv run elf0 agent specs/basic_chat.yaml --prompt "Summarize this meeting transcript @meeting.txt" +# List a specific directory +uv run elf0 list-specs content ``` -### 2. Code Analysis +### Content Creation ```bash -# Code review -uv run elf0 agent specs/basic_chat.yaml --prompt "Review this code for bugs and improvements @main.py" +# Blog posts and articles +uv run elf0 agent specs/content/content_basic_v1.yaml --prompt "Write about AI trends in 2024" -# Security audit -uv run elf0 agent specs/basic_chat.yaml --prompt "Check this code for security vulnerabilities @auth.py" +# Social media +uv run elf0 agent specs/content/linkedin_post.yaml --prompt "Post about remote work benefits" +uv run elf0 agent specs/content/twitter_post.yaml --prompt "Tweet about machine learning" -# Performance optimization -uv run elf0 agent specs/basic_chat.yaml --prompt "Suggest performance improvements for @slow_function.py" +# Documentation +uv run elf0 agent specs/basic/chat_simple_v1.yaml --prompt "Document this API @api.py" ``` -### 3. Data Processing +### Code Analysis ```bash -# Text analysis -uv run elf0 agent specs/examples/python_text_processor.yaml --prompt "artificial intelligence" +# Code review with the reasoning agent (much better than basic chat) +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "Review @app.py for bugs and improvements" -# File processing with custom functions -uv run elf0 agent specs/examples/python_calculator.yaml --prompt "Calculate 23 plus 102" +# Security check +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "Find security issues in @auth.py" -# Interactive data workflows -uv run elf0 agent specs/examples/python_function_test.yaml --prompt "Process data" +# Compare files +uv run elf0 agent specs/basic/reasoning_structured_v1.yaml --prompt "Compare @old_version.py and @new_version.py" ``` -### 4. Multi-step Workflows +### Workflow Management ```bash -# Research and summarization -uv run elf0 agent specs/examples/prompt_chaining.yaml --prompt "Research topic: renewable energy" +# Create new workflows with AI +uv run elf0 agent specs/utils/agent_creator.yaml --prompt "Create a workflow for API testing" -# Code generation and review -uv run elf0 agent specs/examples/ollama_coder.yaml --prompt "Create a REST API" +# Improve existing ones +uv run elf0 improve yaml specs/basic/chat_simple_v1.yaml --prompt "Make it better for code review" -# Document processing pipeline -uv run elf0 agent specs/examples/mcp_workflow.yaml --prompt "Process documents" +# Generate simulations +uv run elf0 agent specs/utils/simulation_scenario_v1.yaml --prompt "Create a customer support simulation" --output support_sim.yaml ``` ---- - ## πŸš€ Advanced Features -### Interactive Mode -Start conversations with any workflow: - -```bash -uv run elf0 prompt specs/basic_chat.yaml -πŸ’¬ Prompt: Hello, how are you? -πŸ’¬ Prompt: Analyze this file @config.yaml -πŸ’¬ Prompt: exit -``` - -### Self-Improving Workflows -Let AI improve your workflows automatically: - -```bash -# Analyze and improve any workflow -uv run elf0 improve yaml specs/my_workflow.yaml - -# Custom improvement guidance -uv run elf0 improve yaml specs/my_workflow.yaml --prompt "Make prompts more specific" - -# Use reference patterns -uv run elf0 improve yaml specs/my_workflow.yaml --prompt "Follow patterns from @examples/best_workflow.yaml" -``` - -### Python Function Integration -Create custom tools using Python functions: +### Python Functions +Add custom logic to workflows: ```python -# src/tools/my_tools.py -def process_text(state, text_input): - """Custom text processing function.""" - processed = text_input.upper() - return {"output": f"Processed: {processed}"} -``` - -```yaml -# In your workflow YAML -functions: - text_processor: - type: python - name: text_processor - entrypoint: src.tools.my_tools.process_text - -workflow: - nodes: - - id: process - kind: tool - ref: text_processor -``` - -### Claude Code Integration -Use Claude Code SDK for AI-powered code generation, analysis, and modification: -**NB: You will need the Claude API keys set up for this** - -```bash -# Generate code from requirements -uv run elf0 agent specs/examples/claude_code_example.yaml --prompt "Create a Python calculator function" - -# Self-improvement workflow for Elf0 platform -uv run elf0 agent specs/examples/claude_code_self_improvement.yaml --prompt "Add better error handling to Elf0 workflows" -``` - -**Claude Code Node Types:** -- `generate_code`: Create new code from requirements -- `analyze_code`: Review code for quality, security, and performance -- `modify_code`: Improve existing code with specific changes -- `chat`: General conversation with code-aware AI - -```yaml -# Example Claude Code node in workflow -nodes: - - id: code_generator - kind: claude_code - config: - task: "generate_code" - prompt: "Create a Python function based on: {input}" - output_format: "text" - tools: ["filesystem", "bash"] - temperature: 0.2 +# Create custom functions +def my_processor(state, operation="uppercase"): + text = state.get("output", "") + if operation == "uppercase": + result = text.upper() + return {"output": f"Processed: {result}"} ``` -### MCP (Model Context Protocol) Integration -Connect to external tools and services: +### MCP Servers +Connect to external tools. For example, the YouTube transcript server: ```bash -# Run MCP-enabled workflows -uv run elf0 agent specs/examples/mcp_workflow.yaml --prompt "Calculate 15 + 30" +# First install and start the MCP server (see mcp/youtube-transcript/README.md) +uv pip install youtube-transcript-api -# MCP servers are started automatically -uv run elf0 agent specs/examples/simple_mcp.yaml --prompt "Use filesystem tools" +# Then use it in workflows +uv run elf0 agent specs/content/youtube_analyzer.yaml --prompt "Analyze this video https://youtube.com/watch?v=example" ``` -### Multiple LLM Providers -Use different AI models in the same workflow: +### Multiple AI Models +Use different models for different tasks: ```yaml llms: - fast_model: + fast_model: # Quick and cheap type: openai model_name: gpt-4.1-mini - temperature: 0.3 - smart_model: + smart_model: # Powerful reasoning type: anthropic model_name: claude-sonnet-4 - temperature: 0.7 - local_model: + local_model: # Private and free type: ollama model_name: llama3 - temperature: 0.5 workflow: nodes: - id: draft - kind: agent - ref: fast_model # Quick draft + ref: fast_model # Quick first pass - id: refine - kind: agent - ref: smart_model # Detailed refinement -``` - ---- - -## πŸ”— File Reference System - -Elf0's `@filename.ext` syntax automatically includes file contents in prompts: - -### Basic Usage -```bash -# Single file -uv run elf0 agent specs/basic_chat.yaml --prompt "Explain @main.py" - -# Multiple files -uv run elf0 agent specs/basic_chat.yaml --prompt "Compare @file1.py and @file2.py" - -# Mixed with regular text -uv run elf0 agent specs/basic_chat.yaml --prompt "Review @code.py and suggest improvements based on @guidelines.md" -``` - -### Supported File Types -- **Code**: `.py`, `.js`, `.ts`, `.java`, `.cpp`, etc. -- **Text**: `.txt`, `.md`, `.rst`, `.log` -- **Config**: `.yaml`, `.yml`, `.json`, `.toml`, `.ini` -- **Data**: `.csv`, `.xml` (text-based files) - -### Best Practices -- **Keep files reasonably sized** (< 10KB for best results) -- **Use descriptive filenames** (`user_auth.py` vs `utils.py`) -- **Combine with context** (`--context` flag for additional files) -- **Reference documentation** (`@README.md`, `@CHANGELOG.md`) - -### Examples -```bash -# Code review with context -uv run elf0 agent specs/basic_chat.yaml \ - --prompt "Review @api.py for security issues" \ - --context requirements.txt --context .env.example - -# Documentation generation -uv run elf0 agent specs/basic_chat.yaml \ - --prompt "Create API docs for @server.py based on @api_spec.yaml" - -# Test generation -uv run elf0 agent specs/basic_chat.yaml \ - --prompt "Write unit tests for @calculator.py following patterns in @test_example.py" + ref: smart_model # Detailed improvement ``` ---- - -## πŸ”’ Security & Safety Considerations - -### ⚠️ Important Security Warnings - -**Elf0 is experimental software that can execute arbitrary code and interact with your system. Use with extreme caution.** - -#### Potential Risks: -- **File System Access**: Workflows can read, write, and delete files -- **Network Requests**: External API calls and web requests -- **Code Execution**: Custom Python functions and MCP servers -- **System Commands**: Potential shell command execution -- **Data Exposure**: Sensitive data may be sent to LLM providers - -#### Best Practices: +### Want More? ```bash -# 1. Always review workflows before running -cat specs/workflow.yaml # Inspect the workflow - -# 2. Test in isolated environments -docker run --rm -it python:3.13 # Use containers -python -m venv test_env # Separate virtual environments - -# 3. Use restricted permissions -chmod 644 sensitive_files/ # Read-only important files -chattr +i important_config # Immutable critical configs (Linux) - -# 4. Monitor workflow execution -uv run elf0 --verbose agent workflow.yaml # Watch what happens - -# 5. Backup before experimentation -cp -r project/ project_backup/ # Backup your work -``` - -#### What Workflows Can Do: -- **Read any accessible file** on your system -- **Write/modify files** with your user permissions -- **Make network requests** to external services -- **Execute Python code** defined in workflows -- **Start external processes** via MCP servers -- **Access environment variables** including API keys - -#### Red Flags - Never Run Workflows That: -- Come from untrusted sources -- Use `os.system()` or `subprocess` calls -- Access sensitive directories (`/etc`, `~/.ssh`, etc.) -- Make unexpected network requests -- Request elevated permissions -- Modify system configurations - -#### Data Privacy: -- **LLM Providers**: Your prompts/data are sent to OpenAI, Anthropic, etc. -- **Local Processing**: Ollama keeps data local but uses system resources -- **File Contents**: `@file.txt` syntax uploads file contents to LLMs -- **Logging**: Workflow data may be logged locally - -### Safe Usage Guidelines - -```bash -# Create a dedicated Elf0 workspace -mkdir ~/elf0_workspace -cd ~/elf0_workspace -git clone https://github.com/emson/elf0.git -cd elf0 - -# Use a dedicated Python environment -uv venv elf0_env -source elf0_env/bin/activate +# See all available workflows +uv run elf0 list-specs -# Set up minimal API keys (avoid using production keys) -export OPENAI_API_KEY="sk-test-key-here" # Use test/development keys +# Explore examples and utilities +ls specs/examples/ +ls specs/utils/ -# Test with safe, simple workflows first -uv run elf0 agent specs/basic_chat.yaml --prompt "Hello world" +# Create your own workflow +uv run elf0 agent specs/utils/agent_creator.yaml --prompt "Create a workflow for my specific use case" ``` ---- - -## πŸ›  Troubleshooting +## πŸ› οΈ Troubleshooting ### Common Issues -#### 1. "Command not found: elf" +**"Command not found: elf"** ```bash # Make sure you're in the virtual environment -source .venv/bin/activate # macOS/Linux -.venv\Scripts\activate # Windows - -# Verify installation -uv pip list | grep elf +source .venv/bin/activate +uv pip install -e . ``` -#### 2. "API key not found" +**"API key not found"** ```bash # Check your environment variables echo $OPENAI_API_KEY echo $ANTHROPIC_API_KEY -# Set them properly +# Set them if missing export OPENAI_API_KEY="your-key-here" - -# Or use a .env file in the project root -echo "OPENAI_API_KEY=your-key-here" > .env ``` -#### 3. "Module not found" errors +**"Module not found" errors** ```bash # Reinstall in development mode uv pip install -e . - -# Or install dependencies -uv pip install -r requirements.txt -``` - -#### 4. "Workflow file not found" -```bash -# Check the file exists -ls specs/basic_chat.yaml - -# Use full path if needed -uv run elf0 agent /full/path/to/specs/basic_chat.yaml --prompt "test" -``` - -#### 5. Ollama connection issues -```bash -# Make sure Ollama is running -ollama list - -# Start Ollama service -ollama serve - -# Test with a simple model -ollama run llama2 "Hello" -``` - -#### 6. uv package manager issues -```bash -# uv command not found -# Make sure uv is in your PATH - restart your terminal after installation -echo $PATH | grep -i uv - -# Reinstall uv if needed -curl -LsSf https://astral.sh/uv/install.sh | sh - -# Alternative: Use pip fallback -pip install -e . # Instead of uv pip install -e . - -# Clear uv cache if having dependency issues -uv cache clean - -# Check uv configuration -uv --help -``` - -### Platform-Specific Issues - -#### Windows -- Use `py` instead of `python` if you have multiple Python versions -- Use PowerShell or Command Prompt, not Git Bash for installation -- Path separators: use forward slashes `/` in file paths - -#### macOS -- Install Xcode Command Line Tools: `xcode-select --install` -- Use Homebrew for Python if needed: `brew install python@3.13` - -#### Linux -- Install Python dev headers: `sudo apt-get install python3-dev` -- Some distributions need: `sudo apt-get install build-essential` - -### Getting Help - -1. **Check the examples**: Look in `specs/examples/` for working workflows -2. **Use verbose mode**: `uv run elf0 --verbose` to see detailed logs -3. **Check the issues**: [GitHub Issues](https://github.com/emson/elf/issues) -4. **Start a discussion**: [GitHub Discussions](https://github.com/emson/elf/discussions) - ---- - -## πŸ“‚ Project Structure - -Understanding where files go helps you organize your workflows: - -``` -elf/ -β”œβ”€β”€ specs/ # Workflow definitions -β”‚ β”œβ”€β”€ basic_chat.yaml # Simple chat workflow -β”‚ β”œβ”€β”€ agent-*.yaml # Pre-built agent workflows -β”‚ └── examples/ # Example workflows -β”‚ β”œβ”€β”€ ollama_chat.yaml -β”‚ β”œβ”€β”€ mcp_workflow.yaml -β”‚ └── python_*.yaml -β”œβ”€β”€ src/elf/ # Elf0 source code -β”œβ”€β”€ mcp/ # MCP server configurations -└── your_workflows/ # Put your custom workflows here -``` - -### Creating Your Own Workflows - -1. **Start with an example**: Copy `specs/basic_chat.yaml` -2. **Modify gradually**: Change prompts, models, add steps -3. **Test frequently**: Run after each change -4. **Use version control**: Git track your workflow evolution - -```bash -# Copy and customize -cp specs/basic_chat.yaml my_workflow.yaml - -# Edit with your favorite editor -code my_workflow.yaml # VS Code -vim my_workflow.yaml # Vim -nano my_workflow.yaml # Nano - -# Test your changes -uv run elf0 agent my_workflow.yaml --prompt "test" -``` - ---- - -## 🎯 Examples Gallery - -### Simple Examples (Start Here) - -#### Basic Chat -```bash -uv run elf0 agent specs/basic_chat.yaml --prompt "Explain photosynthesis" -``` - -#### Code Analysis -```bash -uv run elf0 agent specs/basic_chat.yaml --prompt "Review this code @example.py" -``` - -#### Local LLM (Free) -```bash -uv run elf0 agent specs/examples/ollama_chat.yaml --prompt "What is machine learning?" -``` - -### Intermediate Examples - -#### Multi-step Workflow -```bash -uv run elf0 agent specs/examples/prompt_chaining.yaml --prompt "Research and summarize: renewable energy" -``` - -#### Interactive Session -```bash -uv run elf0 prompt specs/basic_chat.yaml -πŸ’¬ Prompt: Help me debug @buggy_code.py -πŸ’¬ Prompt: Now write tests for the fixed version -πŸ’¬ Prompt: exit ``` -#### Python Function Integration +**Workflow not working as expected** ```bash -uv run elf0 agent specs/examples/python_text_processor.yaml --prompt "artificial intelligence" -``` - -### Advanced Examples +# Use verbose mode to see what's happening +uv run elf0 --verbose agent workflow.yaml --prompt "debug" -#### Self-Improvement -```bash -uv run elf0 improve yaml specs/basic_chat.yaml --prompt "Make this workflow better for code review" +# Review the YAML file +cat workflow.yaml ``` -#### MCP Integration -```bash -uv run elf0 agent specs/examples/mcp_workflow.yaml --prompt "Calculate compound interest for $1000 at 5% for 10 years" -``` +## 🀝 Contributing & Support -#### Claude Code Integration -```bash -# AI-powered code generation and improvement -uv run elf0 agent specs/examples/claude_code_example.yaml --prompt "Create a REST API with authentication" +**Want to help?** +- Report bugs or suggest features: [GitHub Issues](https://github.com/emson/elf/issues) +- Share your workflows: Submit a PR with your useful specs +- Improve docs: Found something unclear? Please fix it! -# Self-evolving AI platform capabilities -uv run elf0 agent specs/examples/claude_code_self_improvement.yaml --prompt "Add logging capabilities to Elf0 workflows" -``` - -#### Complex Workflows -```bash -uv run elf0 agent specs/examples/orchestration_workers.yaml --prompt "Analyze this codebase @src/" -``` - ---- - -## 🀝 Contributing - -We welcome contributions! Here's how to get involved: - -### Quick Contributions -- **Report bugs**: [Create an issue](https://github.com/emson/elf/issues/new) -- **Suggest features**: [Start a discussion](https://github.com/emson/elf/discussions) -- **Improve docs**: Edit README or add examples -- **Share workflows**: Submit your useful workflows - -### Development Setup +**Development setup:** ```bash git clone https://github.com/emson/elf.git cd elf -uv venv # Create virtual environment (much faster than python -m venv) -source .venv/bin/activate # Activate environment -uv pip install -e . # Install Elf0 in development mode (faster than pip) - -# Install development dependencies -uv pip install pytest ruff mypy - -# Run tests -pytest - -# Run linting -ruff check src/ -mypy src/ +uv venv && source .venv/bin/activate +uv pip install -e . +pytest # Run tests ``` -**Note:** All `uv pip` commands can be replaced with regular `pip` commands if you prefer, but `uv` will be significantly faster for dependency resolution and installation. - -### Areas We Need Help -- **New workflow examples** for different use cases -- **Documentation improvements** for clarity -- **MCP server integrations** for popular tools -- **Performance optimizations** for large workflows -- **Platform testing** (Windows, Linux, different Python versions) - -### Code Style -- Follow PEP 8 conventions -- Use type hints for all functions -- Add tests for new features -- Update documentation - ---- - -## πŸ“„ License & Legal Disclaimers - -Elf0 is licensed under the [Apache License 2.0](LICENSE). This means you can freely use, modify, and distribute this software, even for commercial purposes, as long as you include the original license and copyright notice. - -### Legal Disclaimers +## πŸ“„ License -**DISCLAIMER OF WARRANTIES**: This software is provided "AS IS" without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. +Apache License 2.0 - Use freely, even commercially. See [LICENSE](LICENSE) file. -**LIMITATION OF LIABILITY**: In no event shall the authors, copyright holders, or contributors be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the software or the use or other dealings in the software. - -**USER RESPONSIBILITY**: You are solely responsible for: -- Reviewing workflow files before execution -- Ensuring appropriate security measures -- Protecting sensitive data and systems -- Complying with applicable laws and regulations -- Any consequences of using this experimental software - -**EXPERIMENTAL SOFTWARE**: This is beta/experimental software under active development. Features may change, break, or be removed without notice. Use in production environments is strongly discouraged. - -**NO SUPPORT GUARANTEE**: While we appreciate community contributions, there is no guarantee of support, maintenance, or updates to this software. +**Disclaimer**: This is experimental software. Use with caution, especially in production environments. You're responsible for reviewing workflows before running them. ## πŸ™ Acknowledgments - Built with [LangGraph](https://github.com/langchain-ai/langgraph) for workflow orchestration -- Powered by [Rich](https://github.com/Textualize/rich) for beautiful terminal output -- Uses [uv](https://github.com/astral-sh/uv) by [Astral](https://astral.sh/) for blazingly fast Python package management +- Uses [uv](https://github.com/astral-sh/uv) by Astral for fast Python package management - Supports [MCP](https://modelcontextprotocol.io/) for tool integration -- Inspired by NVIDIA's AgentIQ framework for AI workflow design patterns +- Inspired by agentic workflow patterns --- -**Ready to build your first AI workflow?** Start with the [Quick Start](#-quick-start-5-minutes) section above! πŸš€ +**Ready to build your first AI workflow?** Start with the [Quick Start](#-quick-start-5-minutes) section! πŸš€ + +I hope you love using Elf0 as much as I enjoyed building it. diff --git a/docs/feature_dir_glob.md b/docs/feature_dir_glob.md new file mode 100644 index 0000000..5616126 --- /dev/null +++ b/docs/feature_dir_glob.md @@ -0,0 +1,188 @@ +# Feature: Directory Reference Support (@directory/) + +## Overview + +This feature extends Elf0's existing file reference system (`@file.py`) to support directory references (`@src/`) with intelligent file discovery and filtering. The implementation maintains backward compatibility while adding directory support using the same `@` syntax pattern. + +**Current**: File references work with `@path/to/file.py` +**New**: Directory references with `@path/to/directory/` syntax +**Design**: Non-recursive scanning, intelligent file filtering, 5-file limit with combined content mode, context window management + +## Phase 1: Core Logic Enhancement + +**Summary**: Extend `src/elf0/utils/file_utils.py` to handle directories alongside existing file references. Add helper functions for directory processing, combined content file creation, and context window management. Modify `parse_at_references()` to detect and process directories with intelligent file filtering. + +### Helper Functions Implementation + +- [x] **Add `is_valid_directory()` function** after line 11 in `src/elf0/utils/file_utils.py` + - Check if path exists and is directory using `path.exists() and path.is_dir()` + - Return boolean result + +- [x] **Add `is_relevant_file()` function** after `is_valid_directory()` + - Define code extensions: `.py`, `.js`, `.ts`, `.jsx`, `.tsx`, `.java`, `.cpp`, `.c`, `.h`, `.rs`, `.go`, `.rb`, `.php`, `.sh`, `.sql`, `.r`, `.scala`, `.kt` + - Define config extensions: `.json`, `.yaml`, `.yml`, `.xml`, `.toml`, `.ini`, `.env`, `.cfg` + - Define doc extensions: `.md`, `.rst`, `.txt`, `.adoc` + - Skip hidden files (starting with `.`) and binary files (`.pyc`, `.exe`, `.jpg`, etc.) + - Handle extensionless files with size check (1MB limit) and text detection + +- [x] **Add `get_directory_files()` function** after `is_relevant_file()` + - Accept directory path and max_files parameter (default 5 for LLM efficiency) + - Use `directory.iterdir()` to scan files (non-recursive) + - Filter files using `is_relevant_file()` function + - Implement safety limit with warning when exceeded + - Sort results alphabetically using `key=lambda p: p.name.lower()` + - Handle `PermissionError` and `OSError` with appropriate logging + +### Core Function Modifications + +- [x] **Modify `parse_at_references()` function** at lines 121-126 + - Keep existing file handling logic unchanged (`is_valid_file()` branch) + - Add new `elif is_valid_directory(path):` branch after file check + - Call `get_directory_files(path)` and update `referenced_files_set` + - Add info logging when directory expands to show file count + - Update error message to say "path" instead of "file" + +- [ ] **Add `create_combined_content_file()` function** after `read_files_content()` (SKIPPED - minimal implementation) + - When file count exceeds 5, create temporary file combining all content + - Add file headers with paths: `=== {file_path} ===\n{content}\n\n` + - Implement content size limit (e.g., 100KB total) with truncation warning + - Return path to temporary file for single file reference + - Log warning when content is truncated due to size limits + +- [x] **Enhance `read_files_content()` function** at line 27 + - ~~Set a constant FILE_COUNT = 5~~ (SKIPPED - keep simple) + - ~~Check if file count > FILE_COUNT, use `create_combined_content_file()` instead~~ (SKIPPED - keep simple) + - ~~For <= FILE_COUNT files, use existing individual file inclusion method~~ (use existing method always) + - Check if `len(str(current_path.parent)) > 1` to detect directory context + - Use `{current_path.parent}/{current_path.name}` format for directory files + - Keep existing `{current_path.name}` format for standalone files + +## Phase 2: Minimal Testing Strategy + +**Summary**: Create focused tests following testing principles. Emphasize CLI-level testing with minimal unit tests for helper functions. Focus on observable behavior rather than implementation details. + +### CLI Integration Tests + +- [x] **Extend `tests/cli/test_cli.py`** with directory reference test (TESTED - manual verification) + - Create test directory with sample files (`.py`, `.json`, `.md`) + - Use `CliRunner` to test CLI command with directory reference + - Assert command succeeds (exit code 0) without testing exact output format + +- [x] **Add mixed file and directory test** to same file (TESTED - manual verification) + - Test CLI command with both `@file.py` and `@directory/` syntax + - Verify command completes successfully with mixed references + +### Basic Unit Tests + +- [ ] **Create `tests/utils/test_file_utils.py`** (SKIPPED - minimal implementation) + - Add simple test for `is_relevant_file()` with code files (`.py`) + - Add simple test for `is_relevant_file()` with binary files (should exclude) + - Add basic test for `get_directory_files()` sorting behavior + - Keep tests minimal and focused on observable behavior + +### Edge Case Testing + +- [x] **Add safety limit test** to CLI tests (TESTED - manual verification) + - Create directory with many files (>5) + - Verify CLI command still succeeds and doesn't crash + - ~~Test both file count limits and content size limits~~ (content limits skipped) + - Focus on resilience rather than exact behavior + +- [ ] **Add combined content file test** to CLI tests (SKIPPED - minimal implementation) + - Create directory with 6+ files to trigger combined content mode + - Verify temporary file is created and content is combined properly + - Test content size truncation with large files + +## Phase 3: Error Handling + +**Summary**: Implement robust error handling following existing patterns in codebase. Use consistent logging and graceful degradation for edge cases like permission errors and large files. + +### Error Handling Implementation + +- [x] **Implement graceful permission handling** in `get_directory_files()` + - Catch `PermissionError` and log warning with clear message + - Return empty list to continue processing other references + +- [x] **Handle file size limits** in `is_relevant_file()` + - Check file size for extensionless files (1MB limit) + - Skip files that are too large with appropriate logging + +- [ ] **Implement content size management** in `create_combined_content_file()` (SKIPPED - minimal implementation) + - Track total content size during file combination + - Implement 100KB total limit with truncation and warning + - Handle temporary file cleanup properly + +- [x] **Update error messages** in `parse_at_references()` + - Change warning message to use "path" instead of "file" + - Maintain same logging level and graceful degradation pattern + +## Phase 4: Documentation Updates + +**Summary**: Update existing documentation and create minimal new documentation to cover directory reference functionality. Focus on practical examples and user-facing features. + +### README Updates + +- [ ] **Update file reference section** in README.md (DEFERRED - minimal implementation complete) + - Add directory reference examples to existing `@filename.ext` section + - Show mixed file and directory reference example + - Keep examples concise and practical + +### CLI Help Updates + +- [ ] **Update CLI docstrings** in `src/elf0/cli.py` (DEFERRED - minimal implementation complete) + - Add directory reference example to `agent_command()` docstring + - Add directory reference example to `prompt_yaml_command()` docstring + - Keep additions brief and consistent with existing style + +### Optional Feature Documentation + +- [ ] **Create basic feature guide** at `docs/features/feature_file_references.md` (DEFERRED - minimal implementation complete) + - Document both file and directory reference syntax + - List supported file types and excluded types + - Keep documentation concise and example-focused + +## Phase 5: Backward Compatibility + +**Summary**: Ensure all existing functionality works unchanged. Verify no breaking changes to API surface or user experience. Enhance error messages while maintaining same graceful degradation behavior. + +### Compatibility Verification + +- [x] **Verify existing file references unchanged** + - Test that `@file.py` syntax continues working exactly as before + - Ensure same regex pattern captures both files and directories + - Confirm no changes to function signatures or return types + +- [x] **Test enhanced error messages** + - Verify error messages are improved but not breaking + - Ensure same logging levels and graceful degradation + - Confirm existing warning patterns are preserved + +## Success Criteria + +**Summary**: Validate that directory references work in CLI commands and interactive mode, intelligent filtering excludes binary files, safety limits prevent issues, and all existing functionality is preserved. + +### Functional Validation + +- [x] **Verify `@directory/` syntax works** in both CLI and interactive mode + - Test basic directory reference functionality + - Confirm mixed file and directory references work together + +- [x] **Test intelligent file filtering** excludes unwanted files + - Verify binary files are automatically excluded + - Confirm hidden files are skipped appropriately + +- [x] **Validate safety mechanisms** prevent performance issues + - Test that file count limits (5 max) are enforced + - ~~Verify content size limits (100KB) prevent context overflow~~ (SKIPPED - minimal implementation) + - ~~Test combined content file creation for large directories~~ (SKIPPED - minimal implementation) + - Verify large files are handled appropriately + +### Quality Validation + +- [x] **Run existing test suite** to ensure no regressions + - Verify all current tests continue passing (134/135 passed, 1 unrelated failure) + - Confirm no performance degradation in existing functionality + +- [x] **Verify error handling** follows existing patterns + - Test that error messages are clear and actionable + - Confirm consistent logging behavior with existing code diff --git a/docs/feature_specs_tidy.md b/docs/feature_specs_tidy.md new file mode 100644 index 0000000..f00b0e7 --- /dev/null +++ b/docs/feature_specs_tidy.md @@ -0,0 +1,223 @@ +# Feature: Specs Directory Reorganisation + +## Overview + +This feature implements a comprehensive reorganisation of the elf0 workflow specifications directory. The plan transforms 38 inconsistently named workflows into a logical directory system with 15-18 active workflows whilst preserving all existing work in an archive. Key improvements include zero-loss migration, logical structure with four focused directories, consistent naming convention, simplified CLI filtering, and a quality framework for standardisation. + +## Current State Context + +The existing system has 38 workflow files split between the main `/specs/` directory (18 files) and `/specs/examples/` subdirectory (16 files). Problems include inconsistent naming conventions, poor organisation, missing business value, quality inconsistencies, and user experience issues. The current CLI only scans the root directory without filtering capabilities. + +--- + +## Section 1: Directory Structure Creation + +**Summary:** Establish the new four-directory structure (`basic/`, `content/`, `code/`, `examples/`, `archive/`) with proper organisation for workflow progression and discovery. + +### Tasks + +- [x] Create `specs/basic/` directory for entry-level workflows (3-4 files) +- [x] Create `specs/content/` directory for content creation & analysis workflows (4-5 files) +- [x] Create `specs/code/` directory for code generation & analysis workflows (3-4 files) +- [x] Create `specs/examples/` directory for advanced patterns & integrations (4-5 files) +- [x] Create `specs/archive/` directory to preserve all existing workflows unchanged +- [x] Verify directory structure matches the planned layout with appropriate README files for each category + +--- + +## Section 2: Workflow Migration and Archiving + +**Summary:** Move all 38 existing workflows to the archive directory unchanged, then create curated active workflows in the new structure using the `{subcategory}_{context}_{version}.yaml` naming convention. + +### Archive Migration Tasks + +- [x] Copy all 18 files from main `/specs/` directory to `specs/archive/` unchanged +- [x] Copy all 16 files from `/specs/examples/` directory to `specs/archive/` unchanged +- [x] Verify all 38 workflows are preserved exactly as-is in archive +- [x] Test that archived workflows remain functional and accessible + +### Active Workflow Creation Tasks + +#### Basic Category +- [x] Migrate `basic_chat.yaml` β†’ `basic/chat_simple_v1.yaml` with updated metadata +- [x] Migrate `basic_reasoning-01.yaml` β†’ `basic/reasoning_structured_v1.yaml` with updated metadata +- [ ] Create new `basic/content_basic_v1.yaml` as simplified content creation workflow + +#### Content Category +- [x] Create new `content/content_basic_v1.yaml` as general content creation workflow +- [ ] Migrate `agent-twitter-01_improved.yaml` β†’ `content/social_twitter_v2.yaml` with updated metadata +- [ ] Migrate `agent-linkedin-01.yaml` β†’ `content/social_linkedin_v1.yaml` with updated metadata +- [ ] Migrate `youtube_analyzer.yaml` β†’ `content/analysis_youtube_v1.yaml` with updated metadata +- [ ] Migrate `agent-tech_doc_creator.yaml` β†’ `content/documentation_technical_v1.yaml` with updated metadata + +#### Code Category +- [ ] Migrate `examples/ollama_coder.yaml` β†’ `code/generator_python_v1.yaml` with updated metadata +- [ ] Create new `code/analyzer_review_v1.yaml` for code analysis and review functionality +- [ ] Migrate `examples/claude_code_example.yaml` β†’ `code/integration_claude_code_v1.yaml` with updated metadata + +#### Examples Category +- [ ] Migrate `prompt_optimizer.yaml` β†’ `examples/automation_prompt_optimizer_v1.yaml` with updated metadata +- [ ] Migrate `examples/orchestration_workers.yaml` β†’ `examples/orchestration_workers_v1.yaml` with updated metadata +- [ ] Migrate `examples/mcp_workflow.yaml` β†’ `examples/integration_mcp_calculator_v1.yaml` with updated metadata +- [ ] Create new `examples/workflow_chaining_v1.yaml` for workflow composition patterns +- [ ] Create new `examples/evaluation_judge_v1.yaml` for evaluation workflow patterns + +--- + +## Section 3: CLI Enhancement Implementation + +**Summary:** Modify the CLI to support recursive directory scanning and simple filtering with `elf0 list-specs [directory]` syntax, updating both the command interface and underlying file discovery logic. + +### CLI Command Updates + +- [x] Modify `src/elf0/cli.py` line 545 `list_specs_command()` function signature +- [x] Add optional positional `directory` argument using `typer.Argument()` +- [x] Update help text to reflect new filtering capabilities +- [x] Implement directory validation with helpful error messages for invalid directories +- [x] Ensure consistent output formatting across all directory filters + +### File Discovery Updates + +- [x] Modify `src/elf0/utils/file_utils.py` `list_spec_files()` function at line 139 +- [x] Add `directory_filter: str | None = None` parameter to function signature +- [x] Implement recursive scanning logic when `directory_filter` is None +- [x] Implement single directory scanning when `directory_filter` is specified +- [x] Add directory existence validation and error handling +- [x] Maintain consistent sorting and output formatting +- [x] Update function docstring with new parameter and behavior documentation + +### CLI Usage Examples Verification + +- [x] Test `elf0 list-specs` shows all specs across all directories +- [x] Test `elf0 list-specs basic` shows only basic/ directory contents +- [x] Test `elf0 list-specs content` shows only content/ directory contents (empty for now) +- [x] Test `elf0 list-specs code` shows only code/ directory contents (empty for now) +- [x] Test `elf0 list-specs examples` shows only examples/ directory contents (empty for now) +- [x] Test `elf0 list-specs archive` shows only archived workflows +- [x] Test error handling for invalid directory names + +--- + +## Section 4: Schema Compliance and Optional Metadata + +**Summary:** Ensure all workflows comply with the existing Pydantic schema and optionally add discovery metadata through the eval section for enhanced organisation without breaking existing validation. + +### Schema Compliance Tasks + +- [x] Ensure `version: "v1"` field is present in all active workflows (existing schema requirement) +- [x] Add clear, concise `description` field to all active workflows (existing optional field) +- [x] Verify `runtime: "langgraph"` field is present in all workflows (existing schema requirement) +- [x] Validate all workflows against existing Pydantic `Spec` model without adding new required fields + +### Optional Discovery Metadata (via eval section) + +- [x] Add optional discovery metadata in `eval` section for workflows that benefit from it: + ```yaml + eval: + tags: ["content", "social-media", "automation"] + use_cases: ["Marketing campaigns", "Content planning"] + prerequisites: ["Basic understanding of social media"] + estimated_runtime: "30-60 seconds" + ``` +- [ ] Keep all discovery metadata optional to maintain backwards compatibility +- [ ] Use directory structure for primary categorisation instead of metadata fields + +### Schema Validation + +- [ ] Verify all migrated workflows validate against existing Pydantic `Spec` model +- [ ] Test that no existing workflows are broken by migrations +- [ ] Ensure CLI and file discovery work with existing schema structure + +--- + +## Section 5: Testing and Validation + +**Summary:** Ensure all existing pytest tests continue to pass whilst adding new test coverage for directory filtering, recursive scanning, and metadata validation functionality. + +### Existing Test Compatibility + +- [ ] Run full pytest test suite to verify no regressions +- [ ] Fix any broken tests due to file location changes +- [ ] Update test file paths to reference new directory structure +- [ ] Verify workflow functionality remains intact after migration + +### New Test Cases + +- [ ] Add test for CLI `list-specs` command with no directory argument (shows all specs) +- [ ] Add test for CLI `list-specs` command with valid directory argument (shows filtered specs) +- [ ] Add test for CLI `list-specs` command with invalid directory argument (returns helpful error) +- [ ] Add test for `list_spec_files()` function with recursive directory scanning + +### Validation Testing + +- [ ] Test all migrated workflows execute successfully in new locations +- [ ] Validate metadata consistency across all active workflows +- [ ] Test CLI functionality with various directory filter combinations +- [ ] Verify error messages are helpful and user-friendly + +--- + +## Section 6: Documentation Updates + +**Summary:** Create comprehensive documentation for the new structure including a single well-structured specs README, updated CLI help, and migration guides for users. + +### Comprehensive Documentation + +- [ ] Create `specs/README.md` with clear, structured documentation including: + - Overview of the workflow directory structure and organization + - Directory-by-directory explanation (basic/, content/, code/, examples/, archive/) + - Progression path from entry-level to advanced workflows + - Usage instructions with CLI examples (`elf0 list-specs`, `elf0 agent`) + - Naming convention explanation with examples + - How to find the right workflow for specific use cases + - Archive access instructions for legacy workflows + +### CLI Documentation Updates + +- [ ] Update CLI help text for `list-specs` command with new filtering options +- [ ] Add usage examples to command help documentation +- [ ] Update any references to old workflow paths in help text +- [ ] Ensure error messages provide clear guidance for users + +### User Migration Guide + +- [ ] Create migration guide explaining new directory structure +- [ ] Document how to find equivalent workflows in new structure +- [ ] Provide examples of using new CLI filtering capabilities +- [ ] Explain archive access for legacy workflow usage + +--- + +## Success Criteria + +### Immediate Validation +- [ ] All 38 existing workflows preserved unchanged in archive directory +- [ ] 15-18 curated active workflows properly organized in new directory structure +- [ ] CLI filtering works correctly for all directories (basic, content, code, examples, archive) +- [ ] All pytest tests pass without regressions +- [ ] All active workflows validate against existing Pydantic `Spec` model + +### Quality Verification +- [ ] Naming convention `{subcategory}_{context}_{version}.yaml` applied consistently +- [ ] Directory structure provides clear categorisation without requiring metadata fields +- [ ] Documentation provides clear guidance for workflow discovery and usage +- [ ] Optional discovery metadata used judiciously through eval section only + +### Functional Testing +- [ ] `elf0 list-specs` shows all workflows across directories +- [ ] Directory-specific filtering works for each category +- [ ] All migrated workflows execute successfully +- [ ] Error handling provides helpful user feedback +- [ ] Archive workflows remain accessible and functional +- [ ] No schema validation errors for any workflow + +--- + +## Implementation Notes + +- Preserve all existing workflow functionality during migration +- Test each migrated workflow individually before proceeding +- Maintain backwards compatibility where possible +- Focus on minimal, incremental changes to reduce risk +- Validate CLI changes thoroughly before committing +- Document any deviations from the original plan with clear rationale \ No newline at end of file diff --git a/docs/feature_youtube_transcript_mcp.md b/docs/feature_youtube_transcript_mcp.md new file mode 100644 index 0000000..be9765b --- /dev/null +++ b/docs/feature_youtube_transcript_mcp.md @@ -0,0 +1,176 @@ +# Feature: YouTube Transcript MCP Server + +## Overview +This feature implements a Model Context Protocol (MCP) server for extracting YouTube video transcripts. The server provides tools for transcript extraction, video metadata retrieval, and language detection. It integrates with Elf0 workflows to enable automated video content analysis, supporting use cases from educational note-taking to competitive intelligence. + +## πŸ“ Project Structure + +**Summary**: Creates a standalone MCP server project using uv package manager with proper Python project structure, including main server file, configuration, tests, and documentation. + +### Tasks: +- [x] Create `mcps/` directory if it doesn't exist +- [x] Initialize new uv project with `uv init mcp-youtube-transcript` +- [x] Create `pyproject.toml` with project metadata and dependencies +- [x] Create main `server.py` file for MCP server implementation +- [x] Create `README.md` with installation and usage instructions +- [ ] Create `tests/` directory with `test_youtube_transcript.py` +- [x] Add core dependencies: `youtube-transcript-api`, `pytube` (minimal approach) +- [ ] Add development dependencies: `pytest`, `pytest-asyncio` + +## πŸ› οΈ Core MCP Tools Implementation + +**Summary**: Implements 7 main MCP tools for YouTube transcript extraction and metadata retrieval, each with specific input parameters and return types for comprehensive video content analysis. + +### Tasks: +- [x] Implement `extract_transcript(url: str, language: str = "en")` tool returning clean transcript text +- [x] Implement `get_video_metadata(url: str)` tool returning title, channel, duration, description +- [ ] Implement `list_available_languages(url: str)` tool returning available caption languages +- [ ] Implement `extract_with_timestamps(url: str, language: str = "en")` tool returning timestamped segments +- [x] Implement `validate_youtube_url(url: str)` tool for URL validation +- [ ] Implement `extract_chapters(url: str)` tool returning video chapters if available +- [ ] Implement `get_captions_info(url: str)` tool returning caption availability and types +- [x] Use JSON-RPC over stdin/stdout (minimal approach instead of @mcp.tool decorators) +- [x] Add proper docstrings with parameter descriptions for each tool + +## πŸ“Š Data Models (Pydantic) + +**Summary**: Creates Pydantic models for structured data validation and serialization of all tool responses, ensuring consistent data formats across the MCP interface. + +### Tasks: +- [x] Create transcript result structure (using plain dicts for minimal implementation) +- [x] Create video metadata structure (using plain dicts for minimal implementation) +- [ ] Create `TimestampedSegment` model with fields: text, start_time, end_time, confidence_score +- [ ] Create `LanguageOption` model with fields: code, name, auto_generated, translatable +- [ ] Create `ChapterInfo` model with fields: title, start_time, end_time +- [ ] Create `CaptionInfo` model with fields: available_languages, has_manual, has_auto_generated +- [x] Add basic validation (URL format, required parameters) +- [x] Add proper type hints for function parameters + +## ⚠️ Error Handling Strategy + +**Summary**: Implements comprehensive error handling with custom exception classes, detailed error messages, fallback mechanisms, and logging for robust operation across various failure scenarios. + +### Tasks: +- [x] Implement basic error handling with try/catch blocks and meaningful messages +- [x] Handle URL validation errors with descriptive messages +- [x] Handle video access errors (private, deleted, etc.) +- [x] Handle transcript extraction failures with fallback to auto-generated +- [ ] Create custom exception classes for specific error types +- [x] Implement fallback mechanism to use auto-generated captions when manual ones unavailable +- [x] Add graceful degradation for partial failures (e.g., metadata without transcript) +- [x] Add detailed error messages with actionable suggestions for users +- [ ] Implement comprehensive logging with appropriate log levels for debugging +- [x] Add exception handling for network timeouts and connection errors + +## ⚑ Performance Optimizations + +**Summary**: Implements caching, async operations, and memory optimization to ensure fast response times and efficient resource usage for high-volume transcript extraction. + +### Tasks: +- [ ] Implement TTL cache for transcript results with configurable expiration +- [ ] Implement TTL cache for video metadata with configurable expiration +- [ ] Use async/await for all network calls to YouTube API +- [ ] Implement connection pooling for efficient network resource usage +- [ ] Add configurable timeouts for all external requests +- [ ] Implement retry strategies with exponential backoff for transient failures +- [ ] Add batch processing support for multiple URL requests +- [ ] Implement memory-efficient streaming for very large transcripts +- [ ] Add cache size limits to prevent memory exhaustion +- [ ] Monitor and log cache hit rates for performance tuning + +## πŸ”’ Security & Validation + +**Summary**: Implements comprehensive input validation, rate limiting, and security measures to prevent abuse and ensure safe operation of the MCP server. + +### Tasks: +- [ ] Create strict YouTube URL pattern matching for youtube.com domains +- [ ] Add support for youtu.be short URL format validation +- [ ] Add support for YouTube playlist and channel URL validation +- [ ] Implement input sanitization for all URL parameters +- [ ] Add language code validation against ISO 639-1 standards +- [ ] Implement rate limiting per client IP to prevent abuse +- [ ] Add request size limits to prevent DoS attacks +- [ ] Ensure no persistent storage of user data for privacy +- [ ] Add parameter length limits to prevent buffer overflow attacks +- [ ] Implement user agent rotation to avoid detection as bot + +## πŸ§ͺ Comprehensive Testing + +**Summary**: Creates thorough test coverage including unit tests, integration tests, edge cases, performance tests, and security validation to ensure reliable operation. + +### Tasks: +- [ ] Write unit tests for each MCP tool function with mocked responses +- [ ] Create integration tests with real YouTube URLs across content types +- [ ] Add edge case tests for private videos and deleted content +- [ ] Add edge case tests for invalid URLs and malformed inputs +- [ ] Add edge case tests for geo-restricted content +- [ ] Create performance tests for large transcripts (>1 hour videos) +- [ ] Add concurrent request testing for multiple simultaneous users +- [ ] Test memory usage with large transcript processing +- [ ] Verify all exception paths and error recovery scenarios +- [ ] Add security tests for input validation and injection attempts +- [ ] Test rate limiting functionality with burst requests +- [ ] Add tests for cache effectiveness and TTL expiration + +## πŸ”Œ Elf0 Integration + +**Summary**: Creates example workflow YAML files and documentation showing how to use the YouTube transcript MCP server within Elf0 workflows for video content analysis. + +### Tasks: +- [x] Create example workflow YAML for basic transcript extraction and analysis +- [x] Add MCP node configuration with proper server command +- [x] Implement multi-step workflow: extract transcript β†’ get metadata β†’ analyze content β†’ create summary +- [x] Add parameter binding examples using `${state.input}` syntax +- [ ] Create workflow for batch processing multiple YouTube URLs +- [ ] Add language-specific extraction workflow examples +- [ ] Document proper error handling in Elf0 workflows +- [ ] Create example workflows for different use cases (education, business, research) +- [x] Add CLI usage examples with `uv run elf0` commands +- [ ] Test end-to-end integration with real Elf0 workflows + +## πŸ“š Documentation & Configuration + +**Summary**: Creates comprehensive documentation including setup instructions, API reference, troubleshooting guide, and configurable options for deployment and operation. + +### Tasks: +- [x] Write detailed README.md with installation and setup instructions +- [x] Document all MCP tools with parameter descriptions and return types +- [x] Add troubleshooting section for common errors and solutions +- [x] Create API reference documentation for all tools and models +- [ ] Document configuration options for cache size and TTL settings +- [ ] Add configuration for request timeout values +- [ ] Document rate limiting parameters and recommendations +- [ ] Add preferred language fallback chain configuration +- [ ] Document chapter detection sensitivity settings +- [ ] Add logging level configuration options +- [ ] Create deployment guide for production environments +- [ ] Add performance tuning recommendations and benchmarks + +## πŸ“Š Performance Targets + +**Summary**: Defines specific performance benchmarks and monitoring metrics to ensure the MCP server meets production-grade requirements for speed, reliability, and scalability. + +### Tasks: +- [ ] Achieve < 5 seconds response time for transcript extraction +- [ ] Maintain < 100MB memory usage for typical transcripts +- [ ] Achieve > 80% cache hit rate for repeated requests +- [ ] Maintain < 1% error rate for valid YouTube URLs +- [ ] Support 50+ concurrent users without performance degradation +- [ ] Add response time monitoring and alerting +- [ ] Implement memory usage tracking and limits +- [ ] Add cache performance metrics and reporting +- [ ] Monitor error rates and implement automatic recovery +- [ ] Add load testing for concurrent user scenarios +- [ ] Implement performance regression testing +- [ ] Add automated performance benchmarking in CI/CD + +## βœ… Success Criteria + +The feature is complete when: +- [ ] All MCP tools are implemented and tested +- [ ] Error handling covers all edge cases with graceful fallbacks +- [ ] Performance targets are met under load testing +- [ ] Security validation prevents common attack vectors +- [ ] Integration with Elf0 workflows functions correctly +- [ ] Documentation is comprehensive and up-to-date +- [ ] Test coverage exceeds 90% for all critical code paths \ No newline at end of file diff --git a/docs/features/feature_functions-01.md b/docs/features/feature_functions-01.md index 99b60b2..ff68f06 100644 --- a/docs/features/feature_functions-01.md +++ b/docs/features/feature_functions-01.md @@ -1,4 +1,4 @@ -# docs_ai/features/feature_functions-01.md +# docs/features/feature_functions-01.md # Python Function Calling - MVP Implementation Guide diff --git a/docs/notes/notes_function_calling.md b/docs/notes/notes_function_calling.md index aef25e1..b3e8033 100644 --- a/docs/notes/notes_function_calling.md +++ b/docs/notes/notes_function_calling.md @@ -1,4 +1,4 @@ -# docs_ai/notes/notes_function_calling.md +# docs/notes/notes_function_calling.md # Python Function Calling Implementation Plan diff --git a/docs/notes/notes_specs_tidy.md b/docs/notes/notes_specs_tidy.md new file mode 100644 index 0000000..f291418 --- /dev/null +++ b/docs/notes/notes_specs_tidy.md @@ -0,0 +1,305 @@ +# Specs Directory Reorganization Notes + +## Current State Analysis + +### Existing Specs Inventory +**Main `/specs/` directory:** +- `basic_chat.yaml` - Simple conversational agent +- `basic_reasoning-01.yaml` - Structured thinking with Phi-4 system prompt +- `basic_reasoning-02.yaml` - (needs assessment) +- `agent-twitter-01.yaml` - Original Twitter post generator +- `agent-twitter-01_improved.yaml` - Enhanced Twitter/thread generator +- `agent-linkedin-01.yaml` - LinkedIn post optimization +- `youtube_analyzer.yaml` - Video transcript analysis +- `prompt_optimizer.yaml` - Iterative prompt improvement +- `agent-tech_doc_creator.yaml` - Comprehensive documentation generator +- `agent-creator-01.yaml` - (needs assessment) +- `agent-creator-02.yaml` - (needs assessment) +- `agent-simulation.yaml` - (needs assessment) +- `agent-optimizer.yaml` - (needs assessment) + +**Examples subdirectory:** +- `claude_code_example.yaml` - Multi-stage code generation with analysis +- `claude_code_self_improvement.yaml` - (needs assessment) +- `claude_sonnet_example.yaml` - (needs assessment) +- `interactive_assistant.yaml` - (needs assessment) +- `mcp_workflow.yaml` - MCP tool integration demo +- `ollama_chat.yaml` - (needs assessment) +- `ollama_coder.yaml` - Iterative code generation with evaluation +- `ollama_optimizer.yaml` - (needs assessment) +- `orchestration_workers.yaml` - Multi-agent worker pattern +- `prompt_chaining.yaml` - (needs assessment) +- `prompt_routing.yaml` - (needs assessment) +- `prompt_routing_with_reference.yaml` - (needs assessment) +- `python_calculator.yaml` - (needs assessment) +- `python_function_test.yaml` - (needs assessment) +- `python_text_processor.yaml` - (needs assessment) +- `simple_mcp.yaml` - (needs assessment) + +## Proposed Naming Convention + +### Structure: `{category}_{subcategory}_{context}_{version}.yaml` + +**Categories:** +1. **basic** - Foundational workflows for getting started +2. **content** - Content creation, analysis, and optimization +3. **automation** - Multi-step business automation workflows +4. **integration** - External tools, MCP, and API integrations +5. **development** - Code generation, analysis, and technical workflows + +### Naming Examples: +- `basic_chat_simple_v1.yaml` +- `basic_reasoning_structured_v1.yaml` +- `content_social_twitter_v2.yaml` +- `content_social_linkedin_v1.yaml` +- `content_analysis_youtube_v1.yaml` +- `content_documentation_technical_v1.yaml` +- `automation_workflow_orchestrator_v1.yaml` +- `automation_prompt_optimizer_v1.yaml` +- `integration_mcp_calculator_v1.yaml` +- `integration_mcp_youtube_v1.yaml` +- `development_code_generator_v1.yaml` +- `development_code_evaluator_v1.yaml` + +## Reorganization Strategy + +### Tier 1: Basic/Getting Started +**Purpose:** Simple workflows for new users to understand elf0 capabilities +- `basic_chat_simple_v1.yaml` ← `basic_chat.yaml` +- `basic_reasoning_structured_v1.yaml` ← `basic_reasoning-01.yaml` +- `basic_content_generator_v1.yaml` ← NEW (simplified content creation) + +### Tier 2: Content Creation & Analysis +**Purpose:** Practical content workflows for business/entrepreneurial users +- `content_social_twitter_v2.yaml` ← `agent-twitter-01_improved.yaml` +- `content_social_linkedin_v1.yaml` ← `agent-linkedin-01.yaml` +- `content_analysis_youtube_v1.yaml` ← `youtube_analyzer.yaml` +- `content_documentation_technical_v1.yaml` ← `agent-tech_doc_creator.yaml` (simplified) +- `content_blog_post_v1.yaml` ← NEW +- `content_email_marketing_v1.yaml` ← NEW + +### Tier 3: Advanced Automation +**Purpose:** Sophisticated multi-agent and iterative workflows +- `automation_prompt_optimizer_v1.yaml` ← `prompt_optimizer.yaml` +- `automation_workflow_orchestrator_v1.yaml` ← `examples/orchestration_workers.yaml` +- `automation_meeting_summarizer_v1.yaml` ← NEW +- `automation_report_generator_v1.yaml` ← NEW + +### Tier 4: Integration & External Tools +**Purpose:** MCP integrations and external service workflows +- `integration_mcp_youtube_v1.yaml` ← `youtube_analyzer.yaml` (MCP parts) +- `integration_mcp_calculator_v1.yaml` ← `examples/mcp_workflow.yaml` +- `integration_claude_code_v1.yaml` ← `examples/claude_code_example.yaml` +- `integration_api_research_v1.yaml` ← NEW (web search, data gathering) + +### Tier 5: Development & Technical +**Purpose:** Code generation, analysis, and technical workflows +- `development_code_generator_v1.yaml` ← `examples/ollama_coder.yaml` +- `development_code_analyzer_v1.yaml` ← NEW (code review, documentation) +- `development_testing_generator_v1.yaml` ← NEW (test case generation) + +## Files to Remove/Archive + +### Outdated/Duplicate: +- `agent-twitter-01.yaml` (superseded by improved version) +- `basic_reasoning-02.yaml` (if less useful than 01) + +### Assess for Value: +- `agent-creator-01.yaml` & `agent-creator-02.yaml` +- `agent-simulation.yaml` +- `agent-optimizer.yaml` +- Most files in `/examples/` subdirectory + +### Examples Directory: +- Promote valuable workflows to main specs +- Archive or remove redundant examples +- Keep only essential demonstration files + +## New Workflows Needed + +### Business/Entrepreneurial Focus: +1. **Email Marketing Generator** - Create email campaigns, newsletters +2. **Meeting Summarizer** - Process meeting transcripts into action items +3. **Competitive Analysis** - Research and analyze competitors +4. **Blog Post Generator** - SEO-optimized blog content creation +5. **Social Media Scheduler** - Multi-platform content planning +6. **Customer Support Responses** - Template-based response generation +7. **Proposal Writer** - Business proposal and contract generation +8. **Market Research Analyzer** - Process and synthesize market data + +### Technical/Development Focus: +1. **API Documentation Generator** - From code to comprehensive docs +2. **Test Case Generator** - Automated test creation from requirements +3. **Code Review Assistant** - Systematic code analysis and feedback +4. **Database Schema Designer** - Entity relationship modeling +5. **DevOps Automation** - CI/CD pipeline configuration +6. **Security Audit Workflows** - Code security analysis + +## Quality Standards + +### Each Workflow Must Include: +- Clear description of purpose and use case +- Appropriate LLM model selection and temperature settings +- Well-structured prompts with specific instructions +- Proper error handling and edge cases +- Example usage in comments +- Version control for iterative improvements + +### Documentation Requirements: +- Target audience clearly defined +- Prerequisites and dependencies listed +- Expected input/output formats specified +- Troubleshooting guidance included +- Integration possibilities noted + +## Implementation Roadmap + +### Phase 1: Assessment & Cleanup +1. Review all files in `/examples/` for value and functionality +2. Test existing workflows to ensure they work correctly +3. Identify duplicate or outdated specifications +4. Document current functionality and use cases + +### Phase 2: Reorganization +1. Apply new naming convention to valuable existing files +2. Simplify overly complex workflows for better usability +3. Move promoted files from `/examples/` to main `/specs/` +4. Archive or remove redundant/outdated files + +### Phase 3: Enhancement +1. Create missing business-focused workflows +2. Add comprehensive documentation and examples +3. Ensure consistent quality and formatting across all specs +4. Test all workflows for functionality and user experience + +### Phase 4: Documentation +1. Create comprehensive README with workflow descriptions +2. Provide clear progression guide from basic to advanced +3. Include use case examples and business applications +4. Document best practices for creating custom workflows + +## Success Criteria + +### User Experience: +- New users can quickly find and use basic workflows +- Clear progression path from simple to complex use cases +- Business users find immediately valuable automation workflows +- Technical users have sophisticated development tools available + +### Maintenance: +- Consistent naming makes finding and organizing workflows easy +- Version control allows for iterative improvements +- Clear categorization supports future expansion +- Documentation enables community contributions + +### Business Value: +- Workflows address real entrepreneurial and technical needs +- Examples demonstrate clear ROI and time savings +- Integration possibilities expand use case scenarios +- Quality standards ensure reliable, professional results + +## Target Audience Analysis + +### Primary Users: +1. **Technical Entrepreneurs** - Need automation for business processes +2. **Software Developers** - Require code generation and analysis tools +3. **Content Creators** - Want efficient content production workflows +4. **Business Analysts** - Need data processing and report generation +5. **Consultants** - Require client deliverable automation + +### User Journey Mapping: +1. **Discovery** - Find elf0, explore basic workflows +2. **Learning** - Progress from simple to complex use cases +3. **Adoption** - Integrate workflows into daily work +4. **Expansion** - Create custom workflows, contribute back +5. **Mastery** - Build sophisticated automation systems + +## Workflow Complexity Levels + +### Level 1: Basic (Single Agent) +- Simple input β†’ LLM β†’ output workflows +- One-step transformations +- Clear, predictable results +- Examples: basic chat, simple content generation + +### Level 2: Structured (Multi-Node Sequential) +- Multiple LLM calls in sequence +- State passing between nodes +- Basic reasoning and analysis +- Examples: content optimization, structured analysis + +### Level 3: Advanced (Conditional/Branching) +- Decision points and conditional logic +- Dynamic workflow paths +- Error handling and fallbacks +- Examples: content routing, adaptive responses + +### Level 4: Complex (Multi-Agent/Iterative) +- Multiple specialized LLMs +- Feedback loops and iteration +- Quality evaluation and improvement +- Examples: code generation with review, prompt optimization + +### Level 5: Expert (Integration/Orchestration) +- External tool integration (MCP) +- Multi-system coordination +- Complex business logic +- Examples: full automation pipelines, API integrations + +## Metadata Standards + +### Required Fields for All Specs: +```yaml +version: "v1" +description: "Clear, concise description of purpose and use case" +runtime: "langgraph" +complexity_level: "basic|structured|advanced|complex|expert" +target_audience: ["entrepreneurs", "developers", "content_creators", "analysts"] +use_cases: ["specific use case 1", "specific use case 2"] +prerequisites: ["required knowledge", "required setup"] +estimated_runtime: "quick|medium|long" +``` + +### Optional Metadata: +```yaml +tags: ["content", "automation", "analysis"] +related_workflows: ["workflow1.yaml", "workflow2.yaml"] +integration_requirements: ["mcp", "external_api", "file_system"] +example_prompts: + - "Example user input 1" + - "Example user input 2" +``` + +## Quality Assurance Checklist + +### Before Publishing Any Workflow: +- [ ] Tested with multiple example inputs +- [ ] Error cases handled gracefully +- [ ] Documentation is clear and complete +- [ ] Naming follows convention +- [ ] Version is appropriate +- [ ] Target audience is clearly defined +- [ ] Use cases are practical and valuable +- [ ] LLM selection is optimal for task +- [ ] Temperature and parameters are tuned +- [ ] Prompts are clear and unambiguous +- [ ] Output format is consistent +- [ ] Integration points are documented + +## Community Contribution Guidelines + +### Workflow Submission Process: +1. Follow naming convention +2. Include comprehensive metadata +3. Provide example usage +4. Test thoroughly +5. Submit with documentation +6. Respond to review feedback + +### Review Criteria: +- Technical correctness +- Business value +- Code quality +- Documentation completeness +- User experience +- Maintenance considerations \ No newline at end of file diff --git a/docs/notes/notes_user_feedback.md b/docs/notes/notes_user_feedback.md index 1da4921..b903741 100644 --- a/docs/notes/notes_user_feedback.md +++ b/docs/notes/notes_user_feedback.md @@ -2,7 +2,7 @@ **Prompt:** ``` - Create a prompt that will allow a YAML spec agent @docs_ai/project_overview.md @docs_specs/spec_schema.md to prompt the user for input, we need a node that allows the LLM can ask the user a question and wait for the answer, and then incorporate this in the context of the overall process. + Create a prompt that will allow a YAML spec agent @docs/project_overview.md @docs_specs/spec_schema.md to prompt the user for input, we need a node that allows the LLM can ask the user a question and wait for the answer, and then incorporate this in the context of the overall process. ``` ## Overview diff --git a/docs/notes_elf_sdk.md b/docs/notes_elf_sdk.md new file mode 100644 index 0000000..824dc83 --- /dev/null +++ b/docs/notes_elf_sdk.md @@ -0,0 +1,485 @@ +# Elf0 SDK Design Notes + +## Current State Analysis + +Elf0 is currently a CLI-first application with a well-structured core that can be transformed into a programmable Python SDK. The existing architecture provides: + +- **Core workflow engine** (`src/elf0/core/`) - LangGraph compilation, LLM clients, spec loading +- **CLI interface** (`src/elf0/cli.py`) - Command-line wrapper around core functionality +- **YAML workflow specifications** (`specs/`) - Declarative workflow definitions +- **Rich integrations** - OpenAI, Anthropic, Ollama, MCP, Claude Code SDK + +## SDK Conversion Goals + +Transform Elf0 into a **dual-purpose package**: +1. **Python SDK** for embedding AI workflows in applications +2. **CLI tool** (existing functionality preserved) + +### Design Principles +- **Minimal core changes** - Leverage existing robust architecture +- **Backward compatibility** - All CLI commands continue working +- **Progressive enhancement** - Gradual adoption path from CLI to SDK +- **Clean separation** - SDK layer wraps core, CLI becomes SDK consumer + +## Proposed Architecture + +### New SDK Module Structure +``` +src/elf0/ +β”œβ”€β”€ __init__.py # Main SDK exports and version +β”œβ”€β”€ sdk/ # New SDK interface layer +β”‚ β”œβ”€β”€ __init__.py # SDK exports +β”‚ β”œβ”€β”€ workflow.py # Primary Workflow class +β”‚ β”œβ”€β”€ client.py # Session management and high-level API +β”‚ β”œβ”€β”€ builder.py # Programmatic workflow construction +β”‚ β”œβ”€β”€ config.py # SDK configuration management +β”‚ └── integrations/ # Framework-specific helpers +β”‚ β”œβ”€β”€ __init__.py +β”‚ β”œβ”€β”€ fastapi.py # FastAPI middleware +β”‚ β”œβ”€β”€ django.py # Django integration +β”‚ └── jupyter.py # Jupyter notebook helpers +β”œβ”€β”€ core/ # Existing engine (minimal changes) +β”‚ β”œβ”€β”€ compiler.py # Workflow β†’ LangGraph compiler +β”‚ β”œβ”€β”€ spec.py # YAML spec models +β”‚ β”œβ”€β”€ runner.py # Low-level execution +β”‚ β”œβ”€β”€ config.py # Enhanced with SDK support +β”‚ └── ... +β”œβ”€β”€ cli.py # Refactored to use SDK internally +└── utils/ # Existing utilities +``` + +## SDK Public API Design + +### 1. Primary Workflow Class + +```python +from elf0 import ElfWorkflow + +# Load from YAML +workflow = ElfWorkflow.from_yaml("path/to/workflow.yaml") +result = workflow.run("Your prompt here", session_id="my_session") + +# Load from dict/spec +spec_dict = {"name": "my_workflow", "llms": {...}, "workflow": {...}} +workflow = ElfWorkflow.from_dict(spec_dict) + +# Access workflow metadata +print(workflow.name) +print(workflow.description) +print(workflow.tags) +``` + +### 2. Client-Based API (Session Management) + +```python +from elf0 import ElfClient + +# Create client with configuration +client = ElfClient( + openai_api_key="sk-...", + anthropic_api_key="sk-ant-...", + default_session_id="app_session" +) + +# Run workflows +result = client.run_workflow( + "specs/basic/chat_simple_v1.yaml", + "Hello, world!", + session_id="user_123" +) + +# Manage sessions +client.clear_session("user_123") +client.list_sessions() +``` + +### 3. Programmatic Workflow Builder + +```python +from elf0 import WorkflowBuilder + +# Fluent API for creating workflows +workflow = WorkflowBuilder() \ + .name("dynamic_chat") \ + .description("Programmatically created workflow") \ + .add_llm("gpt4", type="openai", model="gpt-4", temperature=0.7) \ + .add_agent_node( + id="chat", + llm="gpt4", + prompt="You are a helpful assistant. User: {input}" + ) \ + .set_stop_node("chat") \ + .build() + +result = workflow.run("What's the weather like?") +``` + +### 4. Configuration Management + +```python +import elf0 + +# Global configuration +elf0.configure( + openai_api_key="sk-...", + anthropic_api_key="sk-ant-...", + default_temperature=0.8, + default_session_id="global" +) + +# Per-workflow configuration +workflow.configure(temperature=0.5, max_tokens=1000) + +# Environment-based configuration (existing .env support) +elf0.configure_from_env() +``` + +## Implementation Plan + +### Phase 1: Core SDK Foundation + +#### 1.1 Create SDK Interface Layer +- **`src/elf0/sdk/workflow.py`** - Main Workflow class wrapping core functionality +- **`src/elf0/sdk/client.py`** - Client with session management and configuration +- **`src/elf0/__init__.py`** - Export main SDK classes and version + +```python +# src/elf0/__init__.py +__version__ = "0.1.0" + +from .sdk.workflow import ElfWorkflow +from .sdk.client import ElfClient +from .sdk.builder import WorkflowBuilder +from .sdk.config import configure, configure_from_env + +__all__ = [ + "ElfWorkflow", + "ElfClient", + "WorkflowBuilder", + "configure", + "configure_from_env" +] +``` + +#### 1.2 Enhance Core Engine +- **Modify `core/config.py`** - Add programmatic configuration support +- **Update `core/runner.py`** - Support SDK calls with better error handling +- **Add result formatting** - Consistent output format for SDK consumers + +#### 1.3 Package Configuration +- **Update `pyproject.toml`** - Add library classifiers, maintain CLI entry point +- **Version management** - Use `__version__` in `__init__.py` + +### Phase 2: Advanced SDK Features + +#### 2.1 Programmatic Workflow Builder +```python +# src/elf0/sdk/builder.py +class WorkflowBuilder: + def __init__(self): + self._spec = { + "name": "", + "llms": {}, + "workflow": {"type": "sequential", "nodes": [], "edges": []} + } + + def name(self, name: str) -> "WorkflowBuilder": + self._spec["name"] = name + return self + + def add_llm(self, id: str, type: str, model: str, **kwargs) -> "WorkflowBuilder": + self._spec["llms"][id] = {"type": type, "model_name": model, **kwargs} + return self + + def add_agent_node(self, id: str, llm: str, prompt: str, **kwargs) -> "WorkflowBuilder": + node = { + "id": id, + "kind": "agent", + "ref": llm, + "config": {"prompt": prompt}, + **kwargs + } + self._spec["workflow"]["nodes"].append(node) + return self + + def build(self) -> "ElfWorkflow": + return ElfWorkflow.from_dict(self._spec) +``` + +#### 2.2 Enhanced Session Management +- **Persistent sessions** - Store session state across runs +- **Context management** - Automatic cleanup and resource management +- **Async support** - `async def run_async()` methods + +### Phase 3: CLI as SDK Consumer + +#### 3.1 Refactor CLI to Use SDK +```python +# Modified src/elf0/cli.py +from elf0 import ElfClient + +def agent_command(spec_path: Path, prompt: str, session_id: str, ...): + """Execute an agent workflow - now using SDK internally.""" + client = ElfClient() + + try: + result = client.run_workflow(spec_path, prompt, session_id) + display_workflow_result(result) + except Exception as e: + # Enhanced error handling + handle_sdk_error(e) +``` + +## Usage Examples + +### 1. Web Application Integration + +```python +from flask import Flask, request, jsonify +from elf0 import ElfClient + +app = Flask(__name__) +elf_client = ElfClient() + +@app.route('/ai/chat', methods=['POST']) +def ai_chat(): + data = request.json + prompt = data['prompt'] + session_id = data.get('session_id', f"web_{request.remote_addr}") + + try: + result = elf_client.run_workflow( + "specs/basic/chat_simple_v1.yaml", + prompt, + session_id=session_id + ) + return jsonify({ + 'success': True, + 'response': result['output'], + 'session_id': session_id + }) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + +@app.route('/ai/sessions/', methods=['DELETE']) +def clear_session(session_id): + elf_client.clear_session(session_id) + return jsonify({'success': True}) +``` + +### 2. Jupyter Notebook Integration + +```python +# In a Jupyter cell +from elf0 import ElfWorkflow +import IPython.display as display + +workflow = ElfWorkflow.from_yaml("specs/content/content_basic_v1.yaml") + +# Interactive widget for workflow execution +def run_content_generator(prompt): + result = workflow.run(prompt, session_id="notebook") + display.Markdown(result['output']) + +# Use with ipywidgets +import ipywidgets as widgets +prompt_widget = widgets.Text(placeholder="Enter your content prompt...") +output_widget = widgets.Output() + +def on_submit(change): + with output_widget: + output_widget.clear_output() + run_content_generator(prompt_widget.value) + +prompt_widget.observe(on_submit, names='value') +display.VBox([prompt_widget, output_widget]) +``` + +### 3. Batch Processing + +```python +from elf0 import ElfWorkflow +import asyncio +from concurrent.futures import ThreadPoolExecutor + +workflow = ElfWorkflow.from_yaml("specs/content/linkedin_post.yaml") + +def process_single(prompt): + return workflow.run(prompt, session_id=f"batch_{id(prompt)}") + +async def process_batch(prompts, max_workers=5): + loop = asyncio.get_event_loop() + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + tasks = [ + loop.run_in_executor(executor, process_single, prompt) + for prompt in prompts + ] + results = await asyncio.gather(*tasks) + + return results + +# Usage +prompts = [ + "Write about AI in healthcare", + "Write about renewable energy", + "Write about remote work trends" +] + +results = asyncio.run(process_batch(prompts)) +for i, result in enumerate(results): + print(f"Result {i+1}: {result['output'][:100]}...") +``` + +### 4. Dynamic Workflow Creation + +```python +from elf0 import WorkflowBuilder + +def create_analysis_workflow(model_type="openai", complexity="basic"): + builder = WorkflowBuilder() \ + .name(f"{complexity}_analysis") \ + .description(f"Dynamic {complexity} analysis workflow") + + # Add appropriate LLM based on complexity + if complexity == "basic": + builder.add_llm("analyzer", type=model_type, model="gpt-4o-mini", temperature=0.3) + else: + builder.add_llm("analyzer", type=model_type, model="gpt-4", temperature=0.5) + + # Build analysis chain + builder.add_agent_node( + id="analyze", + llm="analyzer", + prompt=f"Perform {complexity} analysis of: {{input}}" + ).set_stop_node("analyze") + + return builder.build() + +# Create different workflows for different use cases +basic_analyzer = create_analysis_workflow("openai", "basic") +advanced_analyzer = create_analysis_workflow("anthropic", "advanced") + +# Use them +basic_result = basic_analyzer.run("Analyze this simple dataset") +advanced_result = advanced_analyzer.run("Perform deep analysis of market trends") +``` + +## Migration Strategy + +### Backward Compatibility +- **All CLI commands work unchanged** - `uv run elf0 agent workflow.yaml --prompt "..."` +- **YAML workflow format preserved** - Existing specs continue working +- **Environment variables supported** - `.env` files and `export` commands + +### Gradual Adoption Path +1. **Install**: Same package (`pip install elf0` or `uv pip install elf0`) +2. **Use CLI**: Continue using CLI as before +3. **Import SDK**: Start with `from elf0 import ElfWorkflow` +4. **Migrate incrementally**: Replace CLI subprocess calls with SDK calls + +### Breaking Changes (None in Phase 1) +- No breaking changes to existing functionality +- CLI remains primary interface initially +- SDK additive, not replacement + +## Testing Strategy + +### Unit Tests +- SDK classes and methods +- Configuration management +- Workflow builder validation +- Error handling and edge cases + +### Integration Tests +- SDK + LLM providers (OpenAI, Anthropic, Ollama) +- SDK + MCP servers +- SDK + Claude Code integration +- Session management across multiple workflows + +### Compatibility Tests +- CLI commands still work after SDK implementation +- YAML workflow specs continue functioning +- Environment variable configuration preserved + +### Example Tests +```python +# tests/sdk/test_workflow.py +import pytest +from elf0 import ElfWorkflow + +def test_workflow_from_yaml(): + workflow = ElfWorkflow.from_yaml("tests/fixtures/simple_chat.yaml") + assert workflow.name == "simple_chat" + +def test_workflow_run(): + workflow = ElfWorkflow.from_yaml("tests/fixtures/simple_chat.yaml") + result = workflow.run("Hello", session_id="test") + assert "output" in result + assert isinstance(result["output"], str) +``` + +## Package Configuration Updates + +### pyproject.toml Changes +```toml +[project] +name = "elf0" +description = "AI workflow engine and SDK for building agent-powered applications" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.13", + "Topic :: Software Development :: Libraries :: Python Modules", # SDK + "Topic :: System :: Systems Administration", # CLI + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] + +# Maintain CLI entry point +[project.scripts] +elf0 = "elf0.cli:app" + +# Optional dependencies for integrations +[project.optional-dependencies] +web = ["fastapi>=0.104.0", "uvicorn>=0.24.0"] +django = ["django>=4.2.0"] +jupyter = ["ipywidgets>=8.0.0", "jupyter>=1.0.0"] +all = ["elf0[web,django,jupyter]"] +``` + +## Documentation Structure + +### SDK Documentation +- **API Reference** - Auto-generated from docstrings +- **Quickstart Guide** - Basic SDK usage +- **Integration Examples** - Flask, Django, FastAPI, Jupyter +- **Migration Guide** - CLI to SDK transition + +### Enhanced CLI Documentation +- **Existing CLI docs** - Preserved and enhanced +- **SDK Integration** - How to use SDK alongside CLI +- **Best Practices** - When to use CLI vs SDK + +## Benefits of This Approach + +### For Developers +- **Embed AI workflows** in existing applications +- **Programmatic control** over workflow execution +- **Session management** for user interactions +- **Dynamic workflow creation** based on runtime conditions + +### For Organizations +- **Integration flexibility** - Works with existing tech stacks +- **Gradual adoption** - No need to rewrite existing CLI usage +- **Scalable architecture** - SDK supports high-throughput applications +- **Consistent interface** - Same underlying engine for CLI and SDK + +### For the Elf0 Project +- **Broader adoption** - Appeals to both CLI users and Python developers +- **Ecosystem growth** - Enables third-party integrations and extensions +- **Maintainability** - Single codebase for both CLI and SDK +- **Future-proof** - Foundation for advanced features (web UI, cloud deployment) + +This design transforms Elf0 from a CLI tool into a comprehensive AI workflow platform while preserving everything that makes it powerful today. \ No newline at end of file diff --git a/docs/notes_fix_agent_principles.md b/docs/notes_fix_agent_principles.md new file mode 100644 index 0000000..f3d58dd --- /dev/null +++ b/docs/notes_fix_agent_principles.md @@ -0,0 +1,217 @@ +# Agent Workflow Design Principles: Learning from LangGraph Channel Conflicts + +## Problem Analysis: The `simulate_salary.yaml` Issue + +### What Went Wrong + +The `simulate_salary.yaml` workflow failed with `INVALID_CONCURRENT_GRAPH_UPDATE` errors due to several design anti-patterns: + +1. **Fan-out Anti-pattern**: The `scenario_processor` node fanned out to 4 nodes simultaneously (`ben_agent`, `patrick_agent`, `lily_agent`, `context_injector`), causing multiple concurrent writes to the input channel. + +2. **Input Channel Violations**: LangGraph received multiple identical input values simultaneously, violating the "one value per step" rule for input channels. + +3. **Complex Dependency Web**: The `outcome_synthesizer` node received inputs from 6 different sources, creating a complex dependency graph that was difficult to manage. + +4. **Incomplete State References**: Several nodes had malformed prompts with empty `Scenario:` sections instead of proper `{state.scenario_input}` references. + +5. **Runtime Mismatch**: Used `custom_graph` workflow type with patterns that don't align with LangGraph's execution model. + +### Root Technical Issue + +LangGraph expects either: +- Sequential execution with clear single-path flow +- Custom graphs with proper state management using annotated types and reducers +- Explicit handling of concurrent updates through reducer functions + +The workflow violated these constraints by creating implicit concurrent state access without proper coordination. + +## Prevention Principles + +### 1. Input Isolation Principle + +**Rule**: Only ONE node should consume the raw `{input}` variable. + +```yaml +# βœ… CORRECT: Single input consumer +- id: "entry_node" + prompt: "Process input: {input}" + output_key: "processed_input" + +- id: "next_node" + prompt: "Continue with: {state.processed_input}" +``` + +```yaml +# ❌ WRONG: Multiple input consumers +- id: "node_a" + prompt: "Process: {input}" +- id: "node_b" + prompt: "Also process: {input}" # Violates single input rule +``` + +### 2. Linear Flow Preference + +**Rule**: Prefer sequential workflows over complex custom graphs unless parallelism is absolutely necessary. + +```yaml +# βœ… CORRECT: Linear sequential flow +workflow: + type: "sequential" + nodes: + - id: "step1" + - id: "step2" + - id: "step3" + edges: + - source: "step1" + target: "step2" + - source: "step2" + target: "step3" +``` + +```yaml +# ❌ RISKY: Complex fan-out pattern +workflow: + type: "custom_graph" + edges: + - source: "step1" + target: "step2a" + - source: "step1" + target: "step2b" # Fan-out creates complexity + - source: "step1" + target: "step2c" +``` + +### 3. State Reference Completeness + +**Rule**: Every node that needs scenario/context data should explicitly reference state variables. + +```yaml +# βœ… CORRECT: Complete state reference +prompt: | + You are Agent X analyzing this scenario. + + Scenario: + {state.scenario_input} + + Provide your analysis... +``` + +```yaml +# ❌ WRONG: Missing state reference +prompt: | + You are Agent X analyzing this scenario. + + Scenario: + + Provide your analysis... # Empty scenario reference +``` + +### 4. Explicit Dependency Management + +**Rule**: Make data dependencies explicit in graph structure rather than relying on implicit state sharing. + +```yaml +# βœ… CORRECT: Clear dependency chain +edges: + - source: "data_collector" + target: "analyzer" + - source: "analyzer" + target: "synthesizer" # Clear who feeds whom +``` + +```yaml +# ❌ UNCLEAR: Implicit dependencies +edges: + - source: "step1" + target: "final_step" # Missing intermediate dependencies + - source: "step2" + target: "final_step" + - source: "step3" + target: "final_step" # Complex convergence without clear data flow +``` + +### 5. Runtime Constraint Awareness + +**Rule**: Understand and design within the constraints of your target runtime. + +**LangGraph Constraints:** +- Input channels can only receive one value per step +- Concurrent updates require explicit reducer functions +- Fan-out patterns need careful state management +- Custom graphs require more sophisticated error handling + +**Design Accordingly:** +- Use sequential workflows for linear processes +- Use custom graphs only when true parallelism is needed +- Test complex patterns incrementally + +### 6. Incremental Testing Principle + +**Rule**: Build and test workflows incrementally rather than creating complex structures all at once. + +**Progressive Development:** +1. Start with 2-3 nodes in sequential flow +2. Test basic input β†’ processing β†’ output flow +3. Add one node at a time +4. Test after each addition +5. Only add complexity (fan-out, convergence) after basics work + +### 7. State Key Uniqueness + +**Rule**: Ensure each node outputs to a unique state key to avoid conflicts. + +```yaml +# βœ… CORRECT: Unique output keys +nodes: + - id: "agent_a" + output_key: "agent_a_response" + - id: "agent_b" + output_key: "agent_b_response" +``` + +```yaml +# ❌ WRONG: Conflicting output keys +nodes: + - id: "agent_a" + output_key: "response" + - id: "agent_b" + output_key: "response" # Potential conflict +``` + +### 8. Error Message Analysis + +**Rule**: When LangGraph errors occur, analyse the technical details rather than just fixing symptoms. + +**For `INVALID_CONCURRENT_GRAPH_UPDATE`:** +- Check for fan-out patterns +- Verify input isolation +- Look for concurrent state writes +- Consider switching to sequential workflow +- Add explicit reducers if concurrent updates are needed + +## Implementation Checklist + +Before deploying a complex workflow: + +- [ ] Only one node consumes `{input}` +- [ ] All other nodes use `{state.variable_name}` references +- [ ] No empty or incomplete prompt templates +- [ ] Clear linear data flow (prefer sequential over custom_graph) +- [ ] Unique output keys for all nodes +- [ ] Incremental testing completed +- [ ] Runtime constraints understood and respected +- [ ] Error handling considered for complex patterns + +## Recovery Strategies + +When facing channel conflicts: + +1. **Simplify First**: Convert custom_graph to sequential workflow +2. **Eliminate Fan-out**: Create linear execution chains +3. **Fix State References**: Ensure all nodes properly reference state +4. **Test Minimally**: Start with 2-3 nodes and build up +5. **Consider Alternatives**: Sometimes the workflow design needs fundamental changes + +## Key Takeaway + +**Complex multi-agent simulations can be achieved with simple linear workflows.** The illusion of parallelism in AI agent interactions can often be maintained through sequential execution where each agent builds upon the previous agent's output, creating rich interactive dynamics without LangGraph's concurrency complications. \ No newline at end of file diff --git a/docs/notes_fix_agent_principles_cust_graph.md b/docs/notes_fix_agent_principles_cust_graph.md new file mode 100644 index 0000000..fcff705 --- /dev/null +++ b/docs/notes_fix_agent_principles_cust_graph.md @@ -0,0 +1,313 @@ +# Custom Graph Agent Design Principles: Preventing Channel Conflicts + +## Overview + +While sequential workflows are safer and simpler, there are legitimate use cases for `custom_graph` workflows in LangGraph. This document outlines how to properly design custom graph YAML agents that avoid channel conflicts while leveraging parallel execution and complex routing. + +## When to Use Custom Graphs + +**Use custom graphs when you need:** +- True parallel execution of independent agents +- Complex conditional routing based on state +- Dynamic workflow patterns that can't be expressed linearly +- Multi-agent collaboration with different interaction patterns + +**Stick with sequential when you have:** +- Linear processing workflows +- Simple multi-step simulations +- Sequential agent interactions +- Single-path decision flows + +## LangGraph Custom Graph Architecture + +### State Management Foundation + +LangGraph uses a shared state system where: +- **State**: A shared data structure (TypedDict or Pydantic model) +- **Nodes**: Functions that read and update state +- **Edges**: Define execution flow and routing +- **Reducers**: Functions that handle concurrent state updates + +### Key Technical Concepts + +#### 1. State Channels and Reducers + +```yaml +# Problem: Multiple nodes updating same key without reducer +state_key: "value" # Default override behavior + +# Solution: Use distinct keys or understand reducer behavior +node_a_output: "value_a" +node_b_output: "value_b" +``` + +#### 2. Concurrent Updates + +LangGraph executes nodes in "super-steps" where: +- All nodes ready to execute run in parallel +- State updates are collected and applied via reducers +- Next super-step begins with updated state + +## Custom Graph Design Patterns + +### 1. Fan-out β†’ Fan-in Pattern (SAFE) + +```yaml +workflow: + type: "custom_graph" + nodes: + - id: "input_processor" + prompt: "Process input: {input}" + output_key: "processed_input" + + - id: "agent_a" + prompt: "Analyze: {state.processed_input}" + output_key: "analysis_a" + + - id: "agent_b" + prompt: "Evaluate: {state.processed_input}" + output_key: "analysis_b" + + - id: "synthesizer" + prompt: | + Combine insights: + Analysis A: {state.analysis_a} + Analysis B: {state.analysis_b} + output_key: "final_result" + + edges: + - source: "input_processor" + target: "agent_a" + - source: "input_processor" + target: "agent_b" + - source: "agent_a" + target: "synthesizer" + - source: "agent_b" + target: "synthesizer" +``` + +**Why this works:** +- Single input consumer (`input_processor`) +- Parallel nodes use same state key (`processed_input`) but different output keys +- Convergence node explicitly references all parallel outputs +- No concurrent writes to same state keys + +### 2. Conditional Routing Pattern (SAFE) + +```yaml +workflow: + type: "custom_graph" + nodes: + - id: "decision_maker" + prompt: "Analyze and route: {input}" + output_key: "routing_decision" + + - id: "path_a_processor" + prompt: "Handle path A: {state.routing_decision}" + output_key: "path_a_result" + + - id: "path_b_processor" + prompt: "Handle path B: {state.routing_decision}" + output_key: "path_b_result" + + - id: "final_processor" + prompt: "Complete processing: {state.path_a_result}{state.path_b_result}" + output_key: "final_result" + + edges: + - source: "decision_maker" + target: "path_a_processor" + condition: "'path_a' in {state.routing_decision}.lower()" + - source: "decision_maker" + target: "path_b_processor" + condition: "'path_b' in {state.routing_decision}.lower()" + - source: "path_a_processor" + target: "final_processor" + - source: "path_b_processor" + target: "final_processor" +``` + +### 3. Multi-Agent Collaboration Pattern (SAFE) + +```yaml +workflow: + type: "custom_graph" + nodes: + - id: "coordinator" + prompt: "Coordinate multi-agent task: {input}" + output_key: "task_breakdown" + + - id: "specialist_a" + prompt: | + Handle specialist task A: + Task: {state.task_breakdown} + output_key: "specialist_a_work" + + - id: "specialist_b" + prompt: | + Handle specialist task B: + Task: {state.task_breakdown} + output_key: "specialist_b_work" + + - id: "reviewer" + prompt: | + Review all work: + Specialist A: {state.specialist_a_work} + Specialist B: {state.specialist_b_work} + output_key: "review_feedback" + + - id: "final_integrator" + prompt: | + Integrate final result: + Original task: {state.task_breakdown} + Specialist A work: {state.specialist_a_work} + Specialist B work: {state.specialist_b_work} + Review feedback: {state.review_feedback} + output_key: "integrated_result" + + edges: + - source: "coordinator" + target: "specialist_a" + - source: "coordinator" + target: "specialist_b" + - source: "specialist_a" + target: "reviewer" + - source: "specialist_b" + target: "reviewer" + - source: "reviewer" + target: "final_integrator" +``` + +## Anti-Patterns That Cause Conflicts + +### 1. Multiple Input Consumers (DANGEROUS) + +```yaml +# ❌ WRONG: Multiple nodes consuming {input} +nodes: + - id: "agent_a" + prompt: "Process: {input}" # Bad! + - id: "agent_b" + prompt: "Analyze: {input}" # Bad! +``` + +### 2. Concurrent Writes to Same State Key (DANGEROUS) + +```yaml +# ❌ WRONG: Both nodes writing to 'analysis' +nodes: + - id: "agent_a" + output_key: "analysis" # Conflict! + - id: "agent_b" + output_key: "analysis" # Conflict! +``` + +### 3. Uncontrolled Fan-out Without Proper Convergence (RISKY) + +```yaml +# ❌ RISKY: No clear convergence strategy +edges: + - source: "start" + target: "agent_a" + - source: "start" + target: "agent_b" + - source: "start" + target: "agent_c" + # No clear path to final node +``` + +## Custom Graph Safety Checklist + +Before deploying custom graph workflows: + +### Input Management +- [ ] Only ONE node consumes `{input}` +- [ ] All other nodes reference `{state.variable_name}` +- [ ] No hardcoded input values in prompts + +### State Key Management +- [ ] Each node has unique `output_key` +- [ ] No concurrent writes to same state key +- [ ] State references are complete (no empty sections) + +### Graph Structure +- [ ] Clear convergence points for parallel paths +- [ ] All edges lead to reachable nodes +- [ ] No orphaned nodes or infinite loops +- [ ] Final node has `stop: true` + +### Parallel Execution Safety +- [ ] Parallel nodes read from same state keys, write to different keys +- [ ] Convergence nodes explicitly reference all parallel outputs +- [ ] No dependencies between parallel nodes + +### Error Prevention +- [ ] Test with minimal 2-3 node versions first +- [ ] Verify all edge sources/targets exist in nodes +- [ ] Ensure deterministic routing in conditional edges + +## Advanced Patterns + +### Using State Transformation + +```yaml +# Transform state for downstream processing +- id: "state_transformer" + prompt: | + Transform data for next stage: + Raw input: {state.raw_data} + Analysis: {state.analysis_results} + + Provide structured data for final processing. + output_key: "transformed_state" +``` + +### Conditional Convergence + +```yaml +# Different convergence based on conditions +- id: "smart_converger" + prompt: | + Intelligently combine results based on content: + {% if state.path_a_result %} + Path A Result: {state.path_a_result} + {% endif %} + {% if state.path_b_result %} + Path B Result: {state.path_b_result} + {% endif %} + + Synthesize appropriate response. + output_key: "conditional_synthesis" +``` + +## Migration Strategy: Sequential β†’ Custom Graph + +When converting sequential workflows to custom graphs: + +1. **Start with working sequential version** +2. **Identify true parallelization opportunities** +3. **Design state key strategy** +4. **Convert one parallel section at a time** +5. **Test thoroughly at each step** + +## Performance Considerations + +Custom graphs are more powerful but: +- **Higher complexity** = more potential failure points +- **Parallel execution** can be faster but uses more resources +- **State management** overhead increases with complexity +- **Debugging** is more challenging than sequential flows + +## When to Avoid Custom Graphs + +**Avoid custom graphs if:** +- Sequential workflow meets requirements +- Team lacks LangGraph expertise +- Debugging complexity outweighs benefits +- Simple fan-out can be achieved with sequential + state accumulation + +## Key Takeaway + +**Custom graphs are powerful but require careful design.** The complexity trade-off is only worthwhile when you need true parallel execution, complex routing, or dynamic workflows that can't be expressed sequentially. Most multi-agent simulations work perfectly with sequential workflows where agents build upon each other's outputs. + +Start simple, add complexity only when necessary, and always prioritize maintainability over architectural elegance. \ No newline at end of file diff --git a/docs/notes_fix_function_principles.md b/docs/notes_fix_function_principles.md new file mode 100644 index 0000000..4eae170 --- /dev/null +++ b/docs/notes_fix_function_principles.md @@ -0,0 +1,218 @@ +# Function Integration Principles - Debugging Guide + +## Problem Analysis: Why Interactive Functions Fail + +This document analyzes the common issues when integrating Python functions in Elf0 workflows and provides principles for reliable function integration. + +## Root Cause Analysis + +### Primary Issues Identified + +#### 1. **Parameter Passing Confusion** +**Problem**: Inconsistent syntax for passing state variables to function parameters +- Tried `"${state.variable}"` (template substitution syntax) +- Tried `"{state.variable}"` (agent prompt syntax) +- Reality: Functions receive state directly, no template syntax needed + +**Solution**: Remove parameter specification and let functions access state directly +```yaml +# ❌ Wrong - trying to pass state as parameters +- id: ask_user + kind: tool + ref: get_input + config: + parameters: + prompt: "${state.question}" + +# βœ… Right - let function access state directly +- id: ask_user + kind: tool + ref: get_input +``` + +#### 2. **State Variable Access Patterns** +**Problem**: Confusion about how functions access workflow state +- Functions receive the entire `WorkflowState` as first parameter +- State contains accumulated data from previous nodes +- Output from previous node is typically in `state["output"]` + +**Solution**: Design functions to intelligently access state data +```python +def get_user_input(state: WorkflowState, prompt: str = "Please provide input:") -> WorkflowState: + # Smart state access - use what's available + if prompt == "Please provide input:": + if "question" in state: + prompt = state["question"] + elif "output" in state: + prompt = state["output"] +``` + +#### 3. **Hardcoded vs Generic Logic** +**Problem**: Workflows written for specific use cases instead of generic patterns +- Hardcoded "ask for name" instead of generic question generation +- Hardcoded "create poem" instead of generic response formation + +**Solution**: Design prompts and functions to be reusable across scenarios +```yaml +# ❌ Wrong - hardcoded for specific scenario +prompt: | + The user wants you to ask their name and then create a poem about it. + Since you don't know their name yet, ask them directly for their name. + +# βœ… Right - generic pattern +prompt: | + Based on the user's initial input: "{input}" + + Analyze what the user is asking for and determine what additional information you need. + Generate ONE specific follow-up question that will help you better understand their needs. +``` + +#### 4. **Output Key vs State Variable Confusion** +**Problem**: Misunderstanding how `output_key` affects state management +- `output_key: question` was expected to create `state.question` +- Reality: Node output goes to `state["output"]`, `output_key` is metadata + +**Solution**: Use `state["output"]` to access previous node results +```yaml +# The output_key is for internal tracking, not state variable naming +- id: generate_question + kind: agent + ref: assistant + config: + prompt: "Generate a question..." + output_key: question # Metadata only + +# Access via state.output in subsequent nodes +- id: final_response + config: + prompt: | + Follow-up question you asked: {state.output} +``` + +#### 5. **Workflow Complexity vs Simplicity** +**Problem**: Adding unnecessary intermediate steps that complicate state flow +- Added word counting step that wasn't needed +- Multiple processing steps that didn't add value + +**Solution**: Keep workflows minimal and focused +```yaml +# ❌ Wrong - unnecessary complexity +nodes: + - id: generate_question + - id: ask_user + - id: process_response # Unnecessary + - id: final_response + +# βœ… Right - minimal necessary steps +nodes: + - id: generate_question + - id: ask_user + - id: final_response +``` + +## Function Integration Principles + +### 1. **State-First Design** +- Functions should be designed to work with `WorkflowState` directly +- Avoid complex parameter passing when state access is simpler +- Make functions intelligent about what data they can use from state + +### 2. **Generic Over Specific** +- Write prompts and functions that work across multiple scenarios +- Avoid hardcoding specific use cases in workflow logic +- Let the LLM determine context-specific behavior + +### 3. **Minimal Parameter Interface** +- Only pass parameters when truly necessary +- Prefer state access over parameter passing for workflow data +- Use parameters for configuration, not data transfer + +### 4. **Clear State Flow** +- Understand that `state["output"]` contains the previous node's result +- Use meaningful variable names in state updates +- Document what each function adds to or expects from state + +### 5. **Fallback and Robustness** +- Design functions to handle missing state variables gracefully +- Provide sensible defaults when expected data isn't available +- Include debug information during development + +## Best Practices for Function Integration + +### Function Design Patterns + +```python +def generic_user_input(state: WorkflowState, prompt: str = "Please provide input:") -> WorkflowState: + """Generic pattern for user input functions.""" + + # 1. Smart prompt resolution from state + if prompt == "Please provide input:": + prompt = state.get("output", state.get("question", prompt)) + + # 2. Perform function logic + user_response = collect_user_input(prompt) + + # 3. Return enriched state + return { + **state, + "user_input": user_response, + "output": f"User provided: {user_response}" + } +``` + +### Workflow Design Patterns + +```yaml +# Generic interactive workflow pattern +workflow: + type: sequential + nodes: + # 1. Analyze input and generate question + - id: generate_question + kind: agent + ref: assistant + config: + prompt: | + Based on: "{input}" + What additional information do you need? + Output only the question. + + # 2. Collect user input (no parameters needed) + - id: ask_user + kind: tool + ref: get_input + + # 3. Generate final response with all context + - id: final_response + kind: agent + ref: assistant + config: + prompt: | + Original: "{input}" + Question: {state.output} + Response: {state.user_input} + Provide complete answer. + stop: true +``` + +## Debugging Checklist + +When function integration fails, check: + +1. **State Access**: Is the function accessing state variables correctly? +2. **Parameter Necessity**: Do you really need to pass parameters? +3. **State Flow**: Is each node properly updating state for the next? +4. **Generic Design**: Are prompts too specific to one use case? +5. **Workflow Simplicity**: Can you remove unnecessary steps? + +## Common Anti-Patterns to Avoid + +- ❌ Overusing parameter passing for state data +- ❌ Hardcoding specific scenarios in generic workflows +- ❌ Adding unnecessary processing steps +- ❌ Assuming specific state variable names exist +- ❌ Complex template substitution when direct access works + +## Summary + +The key insight is that **functions should be designed to work with state directly rather than through complex parameter passing**. This leads to simpler, more robust, and more reusable workflows that can handle a variety of interactive scenarios without modification. \ No newline at end of file diff --git a/docs/plan_dir_glob.md b/docs/plan_dir_glob.md new file mode 100644 index 0000000..2c08b1a --- /dev/null +++ b/docs/plan_dir_glob.md @@ -0,0 +1,464 @@ +# Plan: Directory Reference Support (@directory/) + +## Overview + +Extend Elf0's existing file reference system (`@file.py`) to support directory references (`@src/`) with intelligent file discovery and filtering. + +## Requirements Analysis + +### Current State +- **File references**: `@path/to/file.py` works perfectly +- **Implementation**: `src/elf0/utils/file_utils.py:parse_at_references()` +- **Usage**: CLI commands and interactive mode +- **Flow**: Parse β†’ Validate β†’ Read β†’ Include in prompt + +### New Requirements +- **Directory references**: `@path/to/directory/` syntax +- **Intelligent filtering**: Include relevant files (code, config, docs) +- **Safety limits**: Prevent overwhelming LLM with too many files +- **Backward compatibility**: All existing functionality preserved + +### Design Decisions +- **Syntax**: `@directory/` (no asterisk) - clean and consistent with existing pattern +- **Recursion**: Single level only (non-recursive) for performance and clarity +- **File filtering**: Smart detection of relevant file types +- **Ordering**: Alphabetical sorting for consistent, predictable output + +## Implementation Plan + +### Phase 1: Core Logic Enhancement + +#### File Locations and Changes + +**Primary file**: `src/elf0/utils/file_utils.py` + +**Function modifications**: +1. **`parse_at_references()` (lines 102-136)** - Extend to handle directories +2. **Add new helper functions** after `is_valid_file()` (after line 11) +3. **Enhance `read_files_content()` (lines 13-30)** - Better headers for directory files + +#### New Helper Functions + +```python +def is_valid_directory(path: Path) -> bool: + """Check if a path exists and is a directory.""" + return path.exists() and path.is_dir() + +def is_relevant_file(path: Path) -> bool: + """Check if a file should be included in directory scanning. + + Includes code, configuration, and documentation files. + Excludes binary files, hidden files, and generated files. + """ + # Define relevant file extensions + code_exts = {'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h', + '.rs', '.go', '.rb', '.php', '.sh', '.sql', '.r', '.scala', '.kt'} + config_exts = {'.json', '.yaml', '.yml', '.xml', '.toml', '.ini', '.env', '.cfg'} + doc_exts = {'.md', '.rst', '.txt', '.adoc'} + + # Skip hidden files and directories + if path.name.startswith('.'): + return False + + # Skip common binary/generated files + skip_exts = {'.pyc', '.pyo', '.class', '.exe', '.dll', '.so', '.o', '.a', + '.zip', '.tar', '.gz', '.7z', '.rar', '.pdf', '.jpg', '.jpeg', + '.png', '.gif', '.svg', '.mp4', '.avi', '.mp3', '.wav'} + + suffix = path.suffix.lower() + + # Include relevant files + if suffix in code_exts or suffix in config_exts or suffix in doc_exts: + return True + + # Exclude known binary files + if suffix in skip_exts: + return False + + # Handle files without extension (Makefile, Dockerfile, etc.) + if not suffix and path.is_file(): + try: + # Size limit check (1MB) + if path.stat().st_size > 1024 * 1024: + return False + # Basic text file detection + with path.open('rb') as f: + sample = f.read(min(1024, path.stat().st_size)) + if not sample: + return False + # Check if mostly printable characters + printable = sum(1 for b in sample if 32 <= b <= 126 or b in (9, 10, 13)) + return printable / len(sample) > 0.7 + except (OSError, UnicodeDecodeError): + return False + + return False + +def get_directory_files(directory: Path, max_files: int = 50) -> list[Path]: + """Get relevant files from a directory (non-recursive). + + Args: + directory: Directory to scan + max_files: Maximum number of files to return (safety limit) + + Returns: + List of relevant file paths, sorted alphabetically + """ + relevant_files = [] + + try: + for item in directory.iterdir(): + if item.is_file() and is_relevant_file(item): + relevant_files.append(item) + + # Safety limit to prevent overwhelming LLM + if len(relevant_files) >= max_files: + logger.warning(f"Directory '@{directory}' contains many files. " + f"Only including first {max_files} relevant files.") + break + + # Sort for consistent, predictable ordering + return sorted(relevant_files, key=lambda p: p.name.lower()) + + except PermissionError: + logger.warning(f"Permission denied accessing directory '@{directory}'") + return [] + except OSError as e: + logger.warning(f"Could not read directory '@{directory}': {e}") + return [] +``` + +#### Enhanced parse_at_references() + +**Current logic** (lines 121-126): +```python +for match in matches: + path = Path(match) + if is_valid_file(path): + referenced_files_set.add(path) + else: + logger.warning(f"Referenced file '@{match}' not found or is not a file. Skipping.") +``` + +**New logic**: +```python +for match in matches: + path = Path(match) + if is_valid_file(path): + # Existing file behavior - unchanged + referenced_files_set.add(path) + elif is_valid_directory(path): + # New directory behavior + directory_files = get_directory_files(path) + referenced_files_set.update(directory_files) + if directory_files: + logger.info(f"Directory '@{match}' expanded to {len(directory_files)} files") + else: + logger.warning(f"Referenced path '@{match}' not found. Skipping.") +``` + +#### Enhanced read_files_content() + +**Update file headers** to show directory context: +```python +# Current: f"Content of {current_path.name}:\n{f.read()}\n---" +# New: Show directory context when relevant +if len(str(current_path.parent)) > 1: # Not just "." + header = f"Content of {current_path.parent}/{current_path.name}" +else: + header = f"Content of {current_path.name}" +content_parts.append(f"{header}:\n{f.read()}\n---") +``` + +### Phase 2: Testing Strategy + +Following **@docs/testing_principles.md**: + +#### Test at CLI Level (Primary) +**File**: `tests/cli/test_cli.py` (extend existing) + +```python +def test_directory_reference_cli(tmp_path, runner): + """Test CLI command with directory reference.""" + # Arrange: Create test directory with files + test_dir = tmp_path / "test_src" + test_dir.mkdir() + (test_dir / "main.py").write_text("print('hello')") + (test_dir / "config.json").write_text('{"key": "value"}') + (test_dir / "README.md").write_text("# Test project") + + spec_file = tmp_path / "test_spec.yaml" + spec_file.write_text(create_minimal_spec()) + + # Act: Run CLI with directory reference + result = runner.invoke(app, [ + "agent", str(spec_file), + "--prompt", f"Analyze @{test_dir}/" + ]) + + # Assert: Command succeeds and includes all relevant files + assert result.exit_code == 0 + # Don't test exact output format (implementation detail) + # Test that all files were processed (observable behavior) + +def test_mixed_file_and_directory_references(tmp_path, runner): + """Test CLI with both file and directory references.""" + # Test real use case: mixed references + # Arrange, Act, Assert pattern + +def test_directory_reference_safety_limits(tmp_path, runner): + """Test directory with many files respects safety limits.""" + # Test edge case: too many files +``` + +#### Unit Tests for New Functions +**File**: `tests/utils/test_file_utils.py` (new file) + +```python +class TestDirectoryFileDiscovery: + """Test directory file discovery functionality.""" + + def test_is_relevant_file_code_files(self, tmp_path): + """Test that code files are identified as relevant.""" + # Arrange + code_file = tmp_path / "test.py" + code_file.write_text("print('hello')") + + # Act & Assert + assert is_relevant_file(code_file) is True + + def test_is_relevant_file_binary_files(self, tmp_path): + """Test that binary files are excluded.""" + # Test real use case: avoid binary files + + def test_get_directory_files_sorts_output(self, tmp_path): + """Test that directory files are returned in sorted order.""" + # Test observable behavior: consistent ordering + + def test_get_directory_files_respects_limits(self, tmp_path): + """Test that file count limits are enforced.""" + # Test edge case: safety limits +``` + +#### Integration Tests +**File**: `tests/integration/test_directory_references.py` (new file) + +```python +def test_end_to_end_directory_workflow(tmp_path): + """Test complete workflow execution with directory references.""" + # Test high-level behavior: full workflow with directory refs + # Focus on user-facing functionality, not implementation +``` + +### Phase 3: Edge Cases and Error Handling + +#### Edge Cases to Handle +1. **Empty directories** - Return empty list, log info message +2. **Permission denied** - Graceful fallback, clear error message +3. **Symbolic links** - Follow links but detect cycles +4. **Very large files** - Size limits and warnings +5. **Binary files** - Detection and exclusion +6. **Many files** - Safety limits and truncation +7. **Non-existent paths** - Clear error messages +8. **Mixed valid/invalid references** - Process valid ones, warn about invalid + +#### Error Handling Patterns +Follow existing patterns in codebase: +- Use `logger.warning()` for non-fatal issues +- Use `logger.info()` for informational messages +- Graceful degradation (skip problematic files, continue processing) +- Clear, actionable error messages + +### Phase 4: Documentation Updates + +#### Files to Update + +**1. README.md** +- Update file reference section with directory examples +- Add new examples in usage sections +- Update quick start guide + +**2. CLI Help Text** +**File**: `src/elf0/cli.py` +- Update docstrings for `agent_command()` and `prompt_yaml_command()` +- Add directory reference examples + +**3. New Documentation** +**File**: `docs/features/feature_file_references.md` (new) +- Comprehensive guide to file and directory references +- Examples and best practices +- Troubleshooting common issues + +#### Documentation Examples + +```markdown +### File and Directory References + +Include files and directories in your prompts using `@` syntax: + +```bash +# Single file +uv run elf0 agent workflow.yaml --prompt "Review @src/main.py" + +# Directory (all relevant files) +uv run elf0 agent workflow.yaml --prompt "Analyze all code in @src/" + +# Mixed references +uv run elf0 agent workflow.yaml --prompt "Review @config.json and all files in @src/" + +# Multiple directories +uv run elf0 agent workflow.yaml --prompt "Compare @src/ and @tests/" +``` + +**Supported file types in directories:** +- **Code**: `.py`, `.js`, `.ts`, `.java`, `.cpp`, `.rs`, `.go`, etc. +- **Configuration**: `.json`, `.yaml`, `.xml`, `.toml`, `.ini`, etc. +- **Documentation**: `.md`, `.rst`, `.txt`, etc. +- **Scripts**: `.sh`, `.sql`, Makefile, Dockerfile, etc. + +**Automatically excluded:** +- Hidden files (starting with `.`) +- Binary files (images, executables, archives) +- Generated files (`.pyc`, `.class`, etc.) +``` + +### Phase 5: Backward Compatibility and Migration + +#### No Breaking Changes +- All existing `@file.py` syntax continues working +- Same regex pattern captures both files and directories +- Function signatures preserved +- Error handling enhanced, not changed + +#### Enhanced User Experience +- Better error messages distinguish files vs directories +- Informational logging shows directory expansion +- Consistent file ordering reduces prompt variation +- Safety limits prevent performance issues + +## Testing Strategy Details + +### Following Testing Principles + +#### 1. Test High-Level Behavior +- **Focus**: CLI commands work with directory references +- **Avoid**: Testing internal file filtering logic details +- **Example**: Test that `uv run elf0 agent spec.yaml --prompt "Review @src/"` succeeds + +#### 2. Test Real Use Cases +- **Common scenarios**: Mixed file and directory references +- **Edge cases**: Empty directories, permission errors +- **User workflows**: Code review, documentation generation + +#### 3. Maintain Test Independence +- **Setup**: Each test creates its own temporary directory structure +- **Cleanup**: Automatic with `tmp_path` fixture +- **Isolation**: No shared state between tests + +#### 4. CLI-Level Testing +- **Primary approach**: Use `CliRunner` for command testing +- **Focus**: Parameter handling and output behavior +- **Avoid**: Direct function calls for CLI features + +#### 5. Simple and Clear Tests +- **Naming**: `test_directory_reference_expands_files` +- **Structure**: Clear Arrange-Act-Assert sections +- **Assertions**: Test observable behavior, not implementation + +### Test File Organization + +``` +tests/ +β”œβ”€β”€ cli/ +β”‚ └── test_cli.py # Extended with directory tests +β”œβ”€β”€ utils/ +β”‚ └── test_file_utils.py # New: unit tests for helpers +β”œβ”€β”€ integration/ +β”‚ └── test_directory_references.py # New: end-to-end tests +└── fixtures/ + └── test_directory_structures/ # New: test data +``` + +## Implementation Benefits + +### For Users +- **Natural syntax**: `@src/` instead of listing individual files +- **Intelligent filtering**: Only relevant files included automatically +- **Consistent behavior**: Predictable file ordering and inclusion +- **Safety features**: Protection against too many files or binary content + +### For Developers +- **Minimal changes**: Extends existing architecture cleanly +- **Reusable code**: New helpers useful for other features +- **Testable design**: Clear separation of concerns +- **Error resilience**: Graceful handling of edge cases + +### For Maintainers +- **Backward compatibility**: No disruption to existing workflows +- **Clear logging**: Easy to debug and troubleshoot +- **Extensible design**: Easy to add new file types or filters +- **Well-tested**: Comprehensive test coverage + +## Risk Mitigation + +### Performance Risks +- **File count limits**: Maximum 50 files per directory +- **File size limits**: Skip files larger than 1MB +- **Memory management**: Stream file reading where possible + +### Security Risks +- **Permission handling**: Graceful permission denied handling +- **Path traversal**: No recursive directory scanning +- **Binary file detection**: Prevent including binary content + +### User Experience Risks +- **Clear feedback**: Informative messages about file inclusion/exclusion +- **Predictable behavior**: Consistent ordering and filtering +- **Error recovery**: Continue processing when some files fail + +## Implementation Timeline + +### Phase 1: Core Implementation (Week 1) +- Implement helper functions +- Extend `parse_at_references()` +- Basic manual testing + +### Phase 2: Testing (Week 1) +- Create comprehensive test suite +- CLI integration tests +- Edge case testing + +### Phase 3: Documentation (Week 1) +- Update README and CLI help +- Create feature documentation +- Usage examples + +### Phase 4: Validation (Week 1) +- End-to-end testing +- Performance validation +- User acceptance testing + +## Success Criteria + +### Functional Requirements +- βœ… `@directory/` syntax works in CLI commands +- βœ… `@directory/` syntax works in interactive mode +- βœ… Mixed file and directory references work +- βœ… Intelligent file filtering excludes binary files +- βœ… Safety limits prevent performance issues +- βœ… All existing functionality preserved + +### Quality Requirements +- βœ… Comprehensive test coverage (>90%) +- βœ… No performance regression +- βœ… Clear error messages and logging +- βœ… Documentation updated and accurate +- βœ… Code style consistent with existing codebase + +### User Experience Requirements +- βœ… Intuitive syntax matching existing patterns +- βœ… Predictable and consistent behavior +- βœ… Helpful feedback and error messages +- βœ… Smooth migration from existing workflows + +This plan ensures a robust, well-tested implementation that extends Elf0's powerful file reference system while maintaining the high quality and reliability users expect. \ No newline at end of file diff --git a/docs/plan_opensource-min.md b/docs/plan_opensource-min.md index 575526a..4224dc4 100644 --- a/docs/plan_opensource-min.md +++ b/docs/plan_opensource-min.md @@ -79,15 +79,15 @@ This plan outlines the comprehensive steps needed to transform ELF from a privat - [ ] `output-prompt-qwen32.py` (contains experimental prompts) ### Task 3.2: Documentation Organization -**Current structure**: `docs_ai/` contains internal development notes +**Current structure**: `docs/` contains internal development notes **Target structure**: Move to standard `docs/` directory with public-appropriate content **Actions**: -1. Review `docs_ai/notes/` for content suitable for public consumption +1. Review `docs/notes/` for content suitable for public consumption 2. Restructure as user-facing documentation 3. Remove or sanitize internal development notes 4. Maintain technical architecture documentation -- [ ] Rename `docs_ai` to `docs` +- [ ] Rename `docs` to `docs` ## πŸš€ Phase 4: Infrastructure Setup diff --git a/docs/plan_opensource.md b/docs/plan_opensource.md index 5f9dcf0..02edf46 100644 --- a/docs/plan_opensource.md +++ b/docs/plan_opensource.md @@ -84,11 +84,11 @@ This plan outlines the comprehensive steps needed to transform ELF from a privat - `output-prompt-qwen32.py` (contains experimental prompts) ### Task 3.2: Documentation Organization -**Current structure**: `docs_ai/` contains internal development notes +**Current structure**: `docs/` contains internal development notes **Target structure**: Move to standard `docs/` directory with public-appropriate content **Actions**: -1. Review `docs_ai/notes/` for content suitable for public consumption +1. Review `docs/notes/` for content suitable for public consumption 2. Restructure as user-facing documentation 3. Remove or sanitize internal development notes 4. Maintain technical architecture documentation diff --git a/docs/plan_specs_tidy.md b/docs/plan_specs_tidy.md new file mode 100644 index 0000000..1cef799 --- /dev/null +++ b/docs/plan_specs_tidy.md @@ -0,0 +1,475 @@ +# Specs Directory Reorganisation Plan + +## Executive Summary + +This plan outlines a comprehensive reorganisation of the elf0 workflow specifications directory that preserves all existing work whilst creating a curated, well-organized structure. The approach transforms 38 inconsistently named workflows into a logical directory system with 15-18 active workflows and full preservation of all existing work in an archive. + +**Key changes:** +- **Zero-loss migration:** All 38 existing workflows preserved in `archive/` directory +- **Logical structure:** Four focused directories (`basic/`, `content/`, `code/`, `examples/`) +- **Consistent naming:** Restored `{subcategory}_{context}_{version}.yaml` pattern +- **Simplified CLI:** Minimal `elf0 list-specs [directory]` filtering approach +- **Quality framework:** Standardization approach for prompt engineering and metadata +- **Future-ready:** Lean, flexible architecture supporting continuous improvement + +## Current State Analysis + +### File Locations + +**Main `/specs/` directory (18 files):** +- `src/elf0/specs/basic_chat.yaml` +- `src/elf0/specs/basic_reasoning-01.yaml` +- `src/elf0/specs/basic_reasoning-02.yaml` +- `src/elf0/specs/agent-twitter-01.yaml` +- `src/elf0/specs/agent-twitter-01_improved.yaml` +- `src/elf0/specs/agent-linkedin-01.yaml` +- `src/elf0/specs/youtube_analyzer.yaml` +- `src/elf0/specs/prompt_optimizer.yaml` +- `src/elf0/specs/agent-tech_doc_creator.yaml` +- `src/elf0/specs/agent-creator-01.yaml` +- `src/elf0/specs/agent-creator-02.yaml` +- `src/elf0/specs/agent-simulation.yaml` +- `src/elf0/specs/agent-optimizer.yaml` + +**Examples subdirectory `/specs/examples/` (16 files):** +- `src/elf0/specs/examples/claude_code_example.yaml` +- `src/elf0/specs/examples/claude_code_self_improvement.yaml` +- `src/elf0/specs/examples/claude_sonnet_example.yaml` +- `src/elf0/specs/examples/interactive_assistant.yaml` +- `src/elf0/specs/examples/mcp_workflow.yaml` +- `src/elf0/specs/examples/ollama_chat.yaml` +- `src/elf0/specs/examples/ollama_coder.yaml` +- `src/elf0/specs/examples/ollama_optimizer.yaml` +- `src/elf0/specs/examples/orchestration_workers.yaml` +- `src/elf0/specs/examples/prompt_chaining.yaml` +- `src/elf0/specs/examples/prompt_routing.yaml` +- `src/elf0/specs/examples/prompt_routing_with_reference.yaml` +- `src/elf0/specs/examples/python_calculator.yaml` +- `src/elf0/specs/examples/python_function_test.yaml` +- `src/elf0/specs/examples/python_text_processor.yaml` +- `src/elf0/specs/examples/simple_mcp.yaml` + +### Technical Architecture Analysis + +**Workflow Types Supported:** +- `sequential` - Linear execution flow +- `react` - Reasoning and acting pattern +- `evaluator_optimizer` - Iterative improvement cycles +- `custom_graph` - Complex conditional workflows + +**Node Types Available:** +- `agent` - LLM-powered reasoning nodes +- `tool` - External function calls +- `judge` - Evaluation and scoring nodes +- `branch` - Conditional routing nodes +- `mcp` - Model Context Protocol integrations +- `claude_code` - Code generation nodes + +**LLM Integrations:** +- OpenAI (GPT-4, GPT-4.1-mini, O3) +- Anthropic (Claude Sonnet 4, Claude Haiku) +- Ollama (Local models) + +**Current Workflow Complexity Distribution:** +- Level 1 (Basic): 6 workflows +- Level 2 (Structured): 8 workflows +- Level 3 (Advanced): 12 workflows +- Level 4 (Complex): 8 workflows +- Level 5 (Expert): 4 workflows + +## Problems Identified + +### 1. Inconsistent Naming Conventions +- Mixed prefixes: `basic_`, `agent-`, no prefix +- Inconsistent versioning: `-01`, `-02`, `_improved` +- No clear category indication in filenames + +### 2. Poor Organisation Structure +- Flat directory structure makes discovery difficult +- `/examples/` subdirectory contains production-ready workflows +- No clear progression path for users + +### 3. Missing Business Value +- Lack of practical business automation workflows +- Limited entrepreneurial use cases +- No clear ROI demonstration + +### 4. Quality Inconsistencies +- Inconsistent metadata standards +- Variable prompt quality +- Missing documentation within workflows + +### 5. User Experience Issues +- No clear entry points for new users +- Difficult to find appropriate workflows for specific needs +- No guidance on workflow progression + +## Proposed Solution + +### 1. Enhanced Directory Structure + +Organized by use case with clear progression paths: + +``` +specs/ +β”œβ”€β”€ basic/ # Entry-level workflows (3-4 files) +β”‚ β”œβ”€β”€ chat_simple_v1.yaml +β”‚ β”œβ”€β”€ reasoning_structured_v1.yaml +β”‚ └── content_basic_v1.yaml +β”œβ”€β”€ content/ # Content creation & analysis (4-5 files) +β”‚ β”œβ”€β”€ social_twitter_v2.yaml +β”‚ β”œβ”€β”€ social_linkedin_v1.yaml +β”‚ β”œβ”€β”€ analysis_youtube_v1.yaml +β”‚ └── documentation_technical_v1.yaml +β”œβ”€β”€ code/ # Code generation & analysis (3-4 files) +β”‚ β”œβ”€β”€ generator_python_v1.yaml +β”‚ β”œβ”€β”€ analyzer_review_v1.yaml +β”‚ └── integration_claude_code_v1.yaml +β”œβ”€β”€ examples/ # Advanced patterns & integrations (4-5 files) +β”‚ β”œβ”€β”€ automation_prompt_optimizer_v1.yaml +β”‚ β”œβ”€β”€ orchestration_workers_v1.yaml +β”‚ β”œβ”€β”€ integration_mcp_calculator_v1.yaml +β”‚ └── workflow_chaining_v1.yaml +└── archive/ # All existing workflows preserved + β”œβ”€β”€ agent-twitter-01.yaml + β”œβ”€β”€ basic_reasoning-02.yaml + β”œβ”€β”€ agent-creator-01.yaml + └── [... all 38 current files] +``` + +**Total: ~15-18 active files + 38 archived files** + +### 2. Naming Convention + +**Structure:** `{subcategory}_{context}_{version}.yaml` + +**Examples:** +- `social_twitter_v2.yaml` - Twitter/thread generator +- `integration_mcp_calculator_v1.yaml` - MCP calculator integration +- `generator_python_v1.yaml` - Python code generation workflow +- `automation_prompt_optimizer_v1.yaml` - Prompt optimization workflow + +**Rationale:** Consistent naming enables: +- **Clear categorization** - Subcategory prefix indicates workflow type +- **Version management** - Explicit versioning supports iterative improvement +- **Predictable discovery** - Pattern-based naming aids CLI filtering +- **Future-proofing** - Structure accommodates expansion + +### 3. Essential Metadata Standards + +Every workflow must include: + +```yaml +version: "0.1" +description: "Clear, concise description of purpose and use case" +runtime: "langgraph" +# Optional metadata for enhanced discovery: +tags: ["content", "automation", "analysis"] # For filtering +complexity: "basic|intermediate|advanced" # For user progression +``` + +**Rationale:** Minimal required metadata reduces maintenance overhead whilst supporting essential discovery features. + +### 4. Minimal CLI Design + +**Current `list-specs` command:** +- Scans only root `./specs` directory +- No filtering capabilities + +**Enhanced design philosophy:** +- **Default to all** - Show all specs across directories by default +- **Simple filtering** - Optional directory name as argument +- **Minimal complexity** - Single parameter, intuitive usage + +**Proposed CLI signature:** +```bash +elf0 list-specs [directory] +``` + +**Usage examples:** +```bash +elf0 list-specs # Show all specs (default) +elf0 list-specs basic # Show only basic/ directory +elf0 list-specs content # Show only content/ directory +elf0 list-specs code # Show only code/ directory +elf0 list-specs archive # Show archived workflows +``` + +**Implementation approach:** +- Single optional positional argument +- Directory validation with helpful error messages +- Consistent output formatting across all directories + +## Priority-Based Implementation Plan + +### Priority 1: Core Structure (Immediate) + +**Goals:** Establish basic organisation without breaking existing functionality + +**Tasks:** +1. **Audit and consolidate** - Identify the 12-15 most valuable workflows from current 38 +2. **Create directory structure** - Establish `basic/`, `content/`, `examples/` folders +3. **Initial migration** - Move and rename selected workflows +4. **Update CLI** - Modify `list-specs` to support subdirectories + +**Outcome:** Functional organised structure with reduced file count + +### Priority 2: CLI Enhancement (Next) + +**Goals:** Improve workflow discovery and filtering + +**Tasks:** +1. **Enhance list-specs command** - Add category filtering (`--category basic|content|examples|all`) +2. **Update file discovery** - Modify `list_spec_files()` for recursive scanning +3. **Test integration** - Ensure pytest tests continue to pass +4. **Documentation updates** - Update CLI help and examples + +**Outcome:** Enhanced discovery through CLI filtering + +### Priority 3: Quality Standardisation (Then) + +**Goals:** Consistent metadata and prompt quality + +**Tasks:** +1. **Standardise metadata** - Apply essential metadata to all workflows +2. **Improve prompts** - Enhance prompt engineering for consistency +3. **Add quality checks** - Implement validation for required fields +4. **Documentation** - Create README for each category + +**Outcome:** Professional, consistent workflow library + +### Priority 4: Gradual Expansion (Later) + +**Goals:** Add new workflows based on user needs + +**Tasks:** +1. **Monitor usage patterns** - Identify gaps in workflow coverage +2. **Create missing workflows** - Develop high-value business automations +3. **Community guidelines** - Enable external contributions +4. **Advanced features** - Add versioning, tagging, search functionality + +**Outcome:** Comprehensive workflow ecosystem + +## File Migration Strategy + +### Preserve All Workflows - Zero Loss Approach + +**Archive Directory:** +All 38 existing workflows moved to `archive/` directory unchanged: +- Preserves all existing work and investment +- Enables reference and potential future promotion +- Maintains backwards compatibility for any external references +- No workflow is lost or deleted + +### Curated Active Workflows (15-18 total) + +**Basic Category (3 files):** +- `basic_chat.yaml` β†’ `basic/chat_simple_v1.yaml` +- `basic_reasoning-01.yaml` β†’ `basic/reasoning_structured_v1.yaml` +- NEW: `basic/content_basic_v1.yaml` (simplified content creation) + +**Content Category (4 files):** +- `agent-twitter-01_improved.yaml` β†’ `content/social_twitter_v2.yaml` +- `agent-linkedin-01.yaml` β†’ `content/social_linkedin_v1.yaml` +- `youtube_analyzer.yaml` β†’ `content/analysis_youtube_v1.yaml` +- `agent-tech_doc_creator.yaml` β†’ `content/documentation_technical_v1.yaml` + +**Code Category (3 files):** +- `examples/ollama_coder.yaml` β†’ `code/generator_python_v1.yaml` +- NEW: `code/analyzer_review_v1.yaml` (code analysis and review) +- `examples/claude_code_example.yaml` β†’ `code/integration_claude_code_v1.yaml` + +**Examples Category (5 files):** +- `prompt_optimizer.yaml` β†’ `examples/automation_prompt_optimizer_v1.yaml` +- `examples/orchestration_workers.yaml` β†’ `examples/orchestration_workers_v1.yaml` +- `examples/mcp_workflow.yaml` β†’ `examples/integration_mcp_calculator_v1.yaml` +- NEW: `examples/workflow_chaining_v1.yaml` (workflow composition patterns) +- NEW: `examples/evaluation_judge_v1.yaml` (evaluation workflow patterns) + +**Migration Philosophy:** +- **Curate, don't delete** - Select best examples for active use +- **Preserve everything** - Archive maintains all existing work +- **Enhance selectively** - Improve chosen workflows with standardized metadata +- **Enable discovery** - Clear organization aids workflow discovery + +## CLI Technical Requirements + +### Required Changes to `src/elf0/cli.py` + +**Current `list-specs` command (line 545):** +```python +@app.command("list-specs", help="List all YAML workflow spec files in the ./specs directory.") +def list_specs_command() -> None: +``` + +**Enhanced `list-specs` command:** +```python +@app.command("list-specs", help="List YAML workflow spec files, optionally filtered by directory.") +def list_specs_command( + directory: str = typer.Argument( + None, + help="Optional directory filter (basic, content, code, examples, archive). Shows all if not specified." + ) +) -> None: +``` + +### Required Changes to `src/elf0/utils/file_utils.py` + +**Current `list_spec_files()` function (line 139):** +- Only scans root directory +- Ignores subdirectories + +**Enhanced function signature:** +```python +def list_spec_files(specs_dir: Path, directory_filter: str | None = None) -> list[Path]: + """Lists YAML spec files with optional directory filtering. + + Args: + specs_dir: The Path to the specs directory + directory_filter: None for all directories, or specific subdirectory name + + Returns: + List of Path objects for matching spec files + """ +``` + +**Implementation approach:** +- If `directory_filter` is None: recursively scan all subdirectories +- If `directory_filter` specified: scan only that subdirectory +- Validate directory exists and provide helpful error messages +- Maintain consistent sorting and output formatting + +### Test Considerations + +**Existing pytest tests:** Ensure all current tests continue to pass +**New test cases needed:** +- Directory filtering functionality +- Recursive directory scanning +- Invalid directory handling +- Empty directory handling + +## Standardization and Enhancement Strategy + +### Prompt Quality Framework + +**Current challenges:** +- Inconsistent prompt engineering quality +- Variable output formats +- Missing error handling +- Unclear instructions + +**Standardization approach:** +```yaml +# Template for consistent prompt structure +prompt: | + ROLE: [Clear role definition] + + CONTEXT: + [Relevant context and background] + + TASK: + [Specific, actionable instructions] + + CONSTRAINTS: + - [Clear limitations and boundaries] + - [Output format requirements] + + EXAMPLES: + [Input/output examples when helpful] + + OUTPUT FORMAT: + [Explicit format specification] +``` + +### Metadata Standardization + +**Required fields for all workflows:** +```yaml +version: "v1" # Semantic versioning +description: "..." # One-line purpose description +runtime: "langgraph" # Target runtime +complexity: "basic" # basic|intermediate|advanced +category: "content" # Directory category +subcategory: "social" # Workflow type +``` + +**Optional enhancement fields:** +```yaml +tags: ["twitter", "social-media", "content-creation"] +use_cases: ["Marketing campaigns", "Content planning"] +prerequisites: ["Twitter API access"] +estimated_runtime: "30-60 seconds" +example_inputs: + - "Write a thread about AI safety" + - "Create tweets for product launch" +``` + +### Enhancement Methodology + +**1. Iterative Improvement Process:** +- Version workflows when making significant changes +- Test with multiple example inputs before promoting +- Document improvement rationale in commit messages +- Maintain backwards compatibility within major versions + +**2. Quality Gates:** +- **Syntax validation** - YAML schema compliance +- **Prompt clarity** - Clear, unambiguous instructions +- **Error handling** - Graceful failure modes +- **Output consistency** - Predictable result formats + +**3. Best Practice Patterns:** +- **Temperature tuning** - Appropriate creativity levels per use case +- **Model selection** - Optimal LLM choice for task requirements +- **Token management** - Efficient prompt design for cost control +- **Retry logic** - Built-in resilience for API failures + +### Future Enhancement Pipeline + +**Phase 1: Foundation (Current)** +- Establish directory structure and naming convention +- Migrate core workflows with basic metadata +- Implement CLI filtering functionality + +**Phase 2: Quality (Next)** +- Apply prompt standardization template +- Add comprehensive metadata to all workflows +- Implement validation tooling + +**Phase 3: Intelligence (Later)** +- Usage analytics to identify improvement opportunities +- Automated prompt optimization suggestions +- Community contribution review process +- Performance benchmarking framework + +### Lean Implementation Principles + +**Minimal viable changes:** +- Single responsibility - Each workflow does one thing well +- Composability - Workflows can be chained together +- Configurability - Parameters exposed for customization +- Observability - Clear logging and error reporting + +**Flexible architecture:** +- Plugin-based enhancement system +- Configuration-driven behavior +- Minimal coupling between components +- Future-proof metadata schema + +## Implementation Benefits + +### Immediate Benefits +- **Zero workflow loss** - All existing work preserved in archive +- **Clear organization** - Logical directory structure with versioning +- **Improved discovery** - Simple CLI filtering by directory +- **Reduced maintenance** - Focus on curated, high-quality workflows + +### Long-term Benefits +- **Quality consistency** - Standardized prompt engineering patterns +- **Version management** - Clear upgrade and iteration paths +- **Community scaling** - Structure supports external contributions +- **Performance optimization** - Framework for continuous improvement + +## Conclusion + +This enhanced reorganisation plan balances immediate organizational benefits with long-term quality and maintainability goals. By preserving all existing workflows whilst curating a focused active set, we maintain backwards compatibility whilst improving user experience. The standardization framework ensures consistent quality improvements over time, following lean principles that minimize complexity whilst maximizing flexibility. \ No newline at end of file diff --git a/docs/plan_youtube_transcript_mcp.md b/docs/plan_youtube_transcript_mcp.md new file mode 100644 index 0000000..fe512a0 --- /dev/null +++ b/docs/plan_youtube_transcript_mcp.md @@ -0,0 +1,238 @@ +# YouTube Transcript MCP Server Implementation Plan + +## πŸ“ File Structure +``` +mcps/ +β”œβ”€β”€ mcp-youtube-transcript/ +β”‚ β”œβ”€β”€ pyproject.toml # uv project configuration +β”‚ β”œβ”€β”€ server.py # Main MCP server +β”‚ β”œβ”€β”€ README.md # Documentation +β”‚ └── tests/ +β”‚ └── test_youtube_transcript.py +``` + +## πŸ”§ Project Setup with uv +```bash +cd mcps/ +uv init mcp-youtube-transcript +cd mcp-youtube-transcript +uv add "mcp[cli]" youtube-transcript-api pytube pydantic validators cachetools +uv add --dev pytest pytest-asyncio +``` + +## 🎯 Real-World Use Cases + +### πŸ“š Educational & Learning +- **Students**: Extract lecture transcripts for note-taking and study guides +- **Researchers**: Analyze educational content and extract key concepts +- **Language Learners**: Get transcripts with timestamps for pronunciation practice +- **Accessibility**: Provide text versions for hearing-impaired users + +### πŸ’Ό Business & Content Creation +- **Content Marketers**: Analyze competitor videos for strategy insights +- **SEO Specialists**: Extract keywords and topics from trending videos +- **Podcast Producers**: Repurpose YouTube content into written articles +- **Social Media Managers**: Create quote graphics from video content +- **Copywriters**: Extract compelling phrases and messaging from successful videos + +### πŸ“° Journalism & Research +- **Journalists**: Extract quotes and verify claims from video interviews +- **Fact Checkers**: Analyze political speeches and public statements +- **Academic Researchers**: Process large volumes of video content for analysis +- **Legal Professionals**: Extract testimony or evidence from recorded proceedings + +### πŸ” Analysis & Intelligence +- **Market Researchers**: Analyze customer feedback from review videos +- **Trend Analysts**: Process viral content to identify emerging topics +- **Brand Monitors**: Track brand mentions across video content +- **Competitive Intelligence**: Monitor competitor announcements and presentations + +### πŸ› οΈ Technical & Development +- **Documentation Teams**: Convert technical video tutorials into written guides +- **Training Departments**: Create searchable knowledge bases from training videos +- **Quality Assurance**: Review video content for compliance and accuracy +- **AI Training**: Generate datasets for natural language processing models + +## πŸ› οΈ Core Components + +### 1. MCP Tools to Implement +- `extract_transcript(url: str, language: str = "en")` β†’ Clean transcript text +- `get_video_metadata(url: str)` β†’ Title, channel, duration, description +- `list_available_languages(url: str)` β†’ Available caption languages +- `extract_with_timestamps(url: str, language: str = "en")` β†’ Timestamped segments +- `validate_youtube_url(url: str)` β†’ URL validation helper +- `extract_chapters(url: str)` β†’ Video chapters if available +- `get_captions_info(url: str)` β†’ Caption availability and types + +### 2. Data Models (Pydantic) +- `TranscriptResult`: transcript_text, language, duration, word_count, source_type +- `VideoMetadata`: title, channel, duration, description, upload_date, view_count, thumbnail_url +- `TimestampedSegment`: text, start_time, end_time, confidence_score +- `LanguageOption`: code, name, auto_generated, translatable +- `ChapterInfo`: title, start_time, end_time +- `CaptionInfo`: available_languages, has_manual, has_auto_generated + +### 3. Error Handling Strategy +- Custom exception classes: `TranscriptNotAvailableError`, `InvalidURLError`, `VideoPrivateError`, `VideoNotFoundError`, `RateLimitError` +- Detailed error messages with actionable suggestions +- Fallback mechanisms (auto-generated if manual not available) +- Graceful degradation for partial failures +- Comprehensive logging for debugging and monitoring + +### 4. Performance Optimizations +- TTL cache for metadata and transcript results (configurable expiration) +- Async operations for all network calls with proper connection pooling +- Configurable timeouts and retry strategies with exponential backoff +- Batch processing support for multiple URLs in single request +- Memory-efficient streaming for very large transcripts + +### 5. Security & Validation +- Strict YouTube URL pattern matching (youtube.com, youtu.be, all variants) +- Input sanitization for all parameters (language codes, URLs) +- Rate limiting per client to prevent abuse +- No persistent storage of user data (privacy by design) +- Request size limits to prevent DoS attacks + +## πŸš€ Implementation Steps + +1. **Initialize uv project** with proper pyproject.toml configuration +2. **Implement URL validation** with comprehensive YouTube URL patterns +3. **Create core transcript extraction** with robust error handling +4. **Add metadata fetching** with fallback mechanisms +5. **Implement intelligent caching** with TTL and memory limits +6. **Add timestamp and chapter support** for detailed content analysis +7. **Create comprehensive test suite** covering all edge cases +8. **Write detailed documentation** with usage examples and troubleshooting +9. **Integration testing** with real Elf0 workflows +10. **Performance benchmarking** and optimization + +## πŸ§ͺ Test Strategy +- **Unit tests**: Each tool function with mocked responses +- **Integration tests**: Real YouTube URLs across different content types +- **Edge case testing**: Private videos, deleted content, invalid URLs, geo-restrictions +- **Performance tests**: Large transcripts, concurrent requests, memory usage +- **Error handling verification**: All exception paths and recovery scenarios +- **Security tests**: Input validation, injection attempts, rate limiting + +## πŸ“‹ Configuration Options +- Cache size and TTL settings +- Request timeout configurations +- Rate limiting parameters +- Preferred language fallback chains +- Chapter detection sensitivity +- Logging levels and output formats + +## πŸ”Œ Integration with Elf0 + +### Example Workflow YAML +```yaml +version: "0.1" +description: "YouTube transcript analysis with key points and summary" +runtime: "langgraph" + +llms: + analyzer_llm: + type: anthropic + model_name: claude-3-5-haiku-latest + temperature: 0.3 + +workflow: + type: custom_graph + nodes: + - id: extract_transcript + kind: mcp + config: + server: + command: ["uv", "run", "python", "mcps/mcp-youtube-transcript/server.py"] + tool: "extract_transcript" + parameters: + url: "${state.input}" + language: "en" + + - id: get_metadata + kind: mcp + config: + server: + command: ["uv", "run", "python", "mcps/mcp-youtube-transcript/server.py"] + tool: "get_video_metadata" + parameters: + url: "${state.input}" + + - id: find_key_points + kind: agent + ref: analyzer_llm + config: + prompt: | + Video: {metadata} + + Analyze this YouTube transcript and identify the 5 most interesting and important points. + Focus on actionable insights, surprising facts, or key concepts. + + Transcript: {transcript_text} + + Return as numbered list with brief explanations. + + - id: create_summary + kind: agent + ref: analyzer_llm + config: + prompt: | + Create a concise summary of this YouTube video. + Include: main topic, key insights, and practical takeaways. + + Video Info: {metadata} + Key Points: {output} + + Format as a well-structured summary. + stop: true + + edges: + - source: extract_transcript + target: get_metadata + - source: get_metadata + target: find_key_points + - source: find_key_points + target: create_summary +``` + +### Usage Examples +```bash +# Basic transcript extraction +uv run elf0 agent youtube_analyzer.yaml --prompt "https://youtube.com/watch?v=example" + +# Extract with specific language +uv run elf0 agent youtube_analyzer.yaml --prompt "https://youtube.com/watch?v=example&lang=es" + +# Batch processing multiple videos +uv run elf0 agent youtube_batch.yaml --prompt "Video URLs: url1, url2, url3" +``` + +## πŸ”’ Security Considerations + +### Data Privacy +- No persistent storage of transcript data +- Cache data expires automatically +- No tracking or analytics collection +- User URLs are not logged in production + +### Rate Limiting +- Per-client request limits to prevent abuse +- Exponential backoff for retries +- Respect YouTube's API guidelines +- Circuit breaker pattern for service protection + +### Input Validation +- Strict URL pattern matching +- Language code validation against ISO standards +- Parameter sanitization for all inputs +- Request size limits to prevent memory exhaustion + +## πŸ“Š Performance Targets + +- **Response Time**: < 5 seconds for transcript extraction +- **Memory Usage**: < 100MB for typical transcripts +- **Cache Hit Rate**: > 80% for repeated requests +- **Error Rate**: < 1% for valid URLs +- **Concurrent Users**: Support 50+ simultaneous requests + +This comprehensive plan creates a production-ready MCP server that handles the full spectrum of YouTube transcript extraction needs while maintaining performance, security, and reliability standards suitable for enterprise use. \ No newline at end of file diff --git a/docs_specs/spec_principles.md b/docs_specs/spec_principles.md new file mode 100644 index 0000000..45e195f --- /dev/null +++ b/docs_specs/spec_principles.md @@ -0,0 +1,703 @@ +# Elf0 YAML Specification Principles: Complete LLM Guide + +## Overview + +This document provides comprehensive principles for creating robust, maintainable, and error-free Elf0 YAML specification files. It synthesizes lessons learned from common failures and provides patterns that work reliably across different use cases. + +**Target Audience**: LLMs generating YAML specs, developers creating workflows, and anyone debugging specification issues. + +## Core Foundation Principles + +### 1. **Mandatory Fields First** +Always start with the absolutely required fields in exactly this order: + +```yaml +version: "0.1" # REQUIRED: Always use quotes +description: "Brief description" # OPTIONAL but recommended +runtime: "langgraph" # REQUIRED: langgraph | agentiq +``` + +**Critical Rule**: The `workflow.type` field is **ABSOLUTELY MANDATORY** and commonly forgotten. This field determines how the runtime processes the workflow. + +### 2. **Progressive Validation Strategy** +Build specs incrementally to catch errors early: + +1. **Minimal Valid Spec**: Start with single agent node +2. **Add Components**: One LLM, function, or node at a time +3. **Test Frequently**: Validate after each addition +4. **Complex Patterns Last**: Only add parallelism/custom graphs after basics work + +## LLM and Resource Management + +### 3. **LLM Definition Principles** + +```yaml +# βœ… CORRECT: Complete LLM definition +llms: + main_agent: # Use descriptive names + type: openai # openai | anthropic | ollama + model_name: gpt-4o-mini # Verify model availability + temperature: 0.3 # 0.0-1.0 range + params: # Flat key-value pairs only + max_tokens: 1000 + +# ❌ WRONG: Common mistakes +llms: + agent1: # Non-descriptive name + type: openai + model: gpt-4 # Wrong field name (should be model_name) + params: + response_format: # Complex nested objects not supported + type: "json_object" +``` + +**Model Availability**: Always use commonly available models: +- **OpenAI**: `gpt-4o-mini`, `gpt-4o`, `gpt-3.5-turbo` +- **Anthropic**: `claude-3-haiku-20240307`, `claude-3-sonnet-20240229` +- **Ollama**: `llama2`, `mistral`, `codellama` + +### 4. **Function Integration Patterns** + +#### State-First Design (Preferred) +```yaml +# βœ… PREFERRED: Let functions access state directly +functions: + user_input: + type: python + name: "User Input Collector" + entrypoint: "elf0.functions.utils.get_user_input" + +workflow: + nodes: + - id: collect_input + kind: tool + ref: user_input + # No config/parameters needed - function reads state +``` + +#### Parameter-Based Design (When Necessary) +```yaml +# βœ… ACCEPTABLE: Explicit parameters for configuration +- id: process_data + kind: tool + ref: data_processor + config: + parameters: + operation: "transform" # Static configuration + format: "json" # Not workflow state data +``` + +**Anti-Pattern**: Avoid complex state variable passing through parameters: +```yaml +# ❌ WRONG: Complex state parameter passing +config: + parameters: + prompt: "${state.question}" # Fragile template substitution + data: "{state.user_input}" # Inconsistent syntax +``` + +## Workflow Design Patterns + +### 5. **Sequential Workflow Pattern (Default Choice)** + +**Use for**: 90% of workflows including multi-agent interactions, data processing pipelines, and user interactions. + +```yaml +workflow: + type: sequential # MANDATORY field + nodes: + - id: input_processor + kind: agent + ref: main_llm + config: + prompt: | + Process user input: "{input}" + Generate structured output for next step. + + - id: task_executor + kind: agent + ref: main_llm + config: + prompt: | + Execute task based on: {state.output} + Provide detailed results. + + - id: final_formatter + kind: agent + ref: main_llm + config: + prompt: | + Format final response: {state.output} + Make it user-friendly and complete. + stop: true + + edges: # Optional for sequential - auto-generated + - source: input_processor + target: task_executor + - source: task_executor + target: final_formatter +``` + +### 6. **Input Isolation Principle (Critical)** + +**Rule**: Only ONE node should ever consume the raw `{input}` variable. + +```yaml +# βœ… CORRECT: Single input consumer +nodes: + - id: entry_point + prompt: "Analyze input: {input}" + output_key: processed_input + + - id: next_step + prompt: "Continue with: {state.processed_input}" + +# ❌ WRONG: Multiple input consumers (causes channel conflicts) +nodes: + - id: agent_a + prompt: "Process: {input}" + - id: agent_b + prompt: "Also process: {input}" # VIOLATION! +``` + +### 7. **State Variable Management** + +#### State Flow Rules +1. **Previous Node Output**: Access via `{state.output}` +2. **Specific Keys**: Use `output_key` for custom state variables +3. **State Accumulation**: Each node adds to state, doesn't replace it + +```yaml +# βœ… CORRECT: Proper state management +- id: data_analyzer + kind: agent + ref: analyzer_llm + config: + prompt: "Analyze: {input}" + output_key: analysis_result + +- id: decision_maker + kind: agent + ref: decision_llm + config: + prompt: | + Based on analysis: {state.analysis_result} + Make a decision about next steps. + output_key: decision + +- id: action_executor + kind: agent + ref: executor_llm + config: + prompt: | + Execute this decision: {state.decision} + Original analysis: {state.analysis_result} + Provide execution results. + stop: true +``` + +### 8. **Custom Graph Patterns (Advanced)** + +**Use only when you need**: True parallelism, complex routing, or dynamic workflows. + +#### Safe Fan-out β†’ Fan-in Pattern +```yaml +workflow: + type: custom_graph + nodes: + - id: input_distributor + kind: agent + ref: coordinator_llm + config: + prompt: "Distribute task: {input}" + output_key: distributed_task + + - id: specialist_a + kind: agent + ref: specialist_llm + config: + prompt: "Handle aspect A: {state.distributed_task}" + output_key: result_a + + - id: specialist_b + kind: agent + ref: specialist_llm + config: + prompt: "Handle aspect B: {state.distributed_task}" + output_key: result_b + + - id: result_synthesizer + kind: agent + ref: synthesizer_llm + config: + prompt: | + Combine results: + Specialist A: {state.result_a} + Specialist B: {state.result_b} + Original task: {state.distributed_task} + stop: true + + edges: + - source: input_distributor + target: specialist_a + - source: input_distributor + target: specialist_b + - source: specialist_a + target: result_synthesizer + - source: specialist_b + target: result_synthesizer +``` + +## Advanced Integration Patterns + +### 9. **MCP Integration (Modern Pattern)** + +```yaml +# βœ… MODERN: Direct MCP nodes (preferred) +workflow: + nodes: + - id: calculator + kind: mcp + config: + server: + command: ["python", "mcp/calculator/server.py"] + cwd: "/path/to/project" + tool: "calculate" + parameters: + a: "${state.json.number_a}" + b: "${state.json.number_b}" + operation: "${state.json.operation}" + +# ❌ LEGACY: MCP functions (deprecated) +functions: + calc_tool: + type: mcp + entrypoint: "mcp://localhost:3000/calculate" +``` + +### 10. **Interactive User Input Patterns** + +```yaml +# βœ… GENERIC INTERACTIVE PATTERN +workflow: + type: sequential + nodes: + - id: generate_question + kind: agent + ref: assistant + config: + prompt: | + Based on: "{input}" + What additional information do you need? + Generate ONE specific question. + Output only the question text. + + - id: collect_user_response + kind: tool + ref: get_user_input + # No parameters - function uses state.output for question + + - id: generate_final_response + kind: agent + ref: assistant + config: + prompt: | + Original request: "{input}" + Question asked: {state.output} + User response: {state.user_input} + Provide comprehensive final answer. + stop: true +``` + +## Error Prevention and Edge Cases + +### 11. **Validation Checklist** + +Before creating any YAML spec, verify: + +#### Required Fields +- [ ] `version` is present and quoted +- [ ] `runtime` is specified +- [ ] `workflow.type` is specified (most common error) +- [ ] At least one LLM is defined in `llms` section +- [ ] All node `ref` fields point to existing LLMs/functions +- [ ] At least one node has `stop: true` + +#### State Management +- [ ] Only one node consumes `{input}` +- [ ] All `output_key` values are unique +- [ ] State references use consistent syntax: `{state.variable_name}` +- [ ] No empty prompt template sections + +#### Graph Structure +- [ ] All edge `source` and `target` refer to existing node IDs +- [ ] No unreachable nodes (orphans) +- [ ] No infinite loops in edge definitions +- [ ] Custom graphs have proper convergence points + +### 12. **Common Error Patterns and Fixes** + +#### Channel Conflict Errors +```yaml +# ERROR: INVALID_CONCURRENT_GRAPH_UPDATE +# CAUSE: Multiple nodes receiving input simultaneously + +# ❌ PROBLEMATIC PATTERN +edges: + - source: start_node + target: agent_a + - source: start_node # Fan-out without proper state management + target: agent_b + +# βœ… SOLUTION: Sequential with state passing +edges: + - source: start_node + target: coordinator + - source: coordinator + target: agent_a + - source: agent_a + target: agent_b +``` + +#### Reference Errors +```yaml +# ERROR: Node references unknown LLM/function +# ❌ WRONG: Undefined reference +nodes: + - id: my_agent + kind: agent + ref: undefined_llm # Not in llms section + +# βœ… CORRECT: Valid reference +llms: + main_llm: + type: openai + model_name: gpt-4o-mini + +nodes: + - id: my_agent + kind: agent + ref: main_llm +``` + +#### Template Variable Errors +```yaml +# ❌ WRONG: Malformed template variables +prompt: | + Process input: {input} + Previous result: {state.previous_result + Context: {state.} + +# βœ… CORRECT: Proper template syntax +prompt: | + Process input: {input} + Previous result: {state.previous_result} + Context: {state.context_data} +``` + +### 13. **Performance and Resource Principles** + +#### Model Selection Strategy +```yaml +# Fast + Cheap for simple tasks +model_name: gpt-4o-mini +temperature: 0.1 + +# Powerful for complex reasoning +model_name: gpt-4o +temperature: 0.3 + +# Local for privacy/offline +type: ollama +model_name: llama2 +``` + +#### Workflow Optimization +```yaml +# βœ… EFFICIENT: Minimal necessary steps +nodes: + - id: process + - id: validate + - id: output + +# ❌ INEFFICIENT: Unnecessary complexity +nodes: + - id: pre_process + - id: process + - id: post_process + - id: validate + - id: re_validate + - id: format + - id: post_format + - id: output +``` + +## Security and Safety Principles + +### 14. **Input Sanitization** +```yaml +# βœ… SAFE: Validate and sanitize input +- id: input_validator + kind: agent + ref: validator_llm + config: + prompt: | + Validate and sanitize this input: "{input}" + Ensure it contains no harmful content. + Output cleaned version or reject if unsafe. +``` + +### 15. **Function Security** +```yaml +# βœ… SAFE: Use only trusted function entrypoints +functions: + safe_processor: + type: python + entrypoint: "elf0.functions.verified.safe_processor" + +# ❌ RISKY: Arbitrary or untrusted code +functions: + risky_tool: + entrypoint: "random_package.untrusted_function" +``` + +## Testing and Debugging Strategies + +### 16. **Progressive Testing Pattern** + +```yaml +# Step 1: Minimal working spec +version: "0.1" +runtime: "langgraph" +llms: + test_llm: + type: openai + model_name: gpt-4o-mini +workflow: + type: sequential + nodes: + - id: simple_test + kind: agent + ref: test_llm + stop: true + +# Step 2: Add one component at a time +# Step 3: Test after each addition +# Step 4: Add complexity only when basics work +``` + +### 17. **Debug Information Strategy** + +```yaml +# Add debug nodes during development +- id: debug_state + kind: agent + ref: debug_llm + config: + prompt: | + Debug state inspection: + Current state keys: {state.keys()} + Input: {input} + Previous output: {state.output} + Generate summary of current workflow state. + output_key: debug_info +``` + +## Specialized Use Case Patterns + +### 18. **Multi-Agent Simulation** +```yaml +# βœ… EFFECTIVE: Sequential simulation creating agent interaction illusion +workflow: + type: sequential + nodes: + - id: scenario_setup + prompt: "Setup scenario: {input}" + + - id: agent_alice + prompt: | + You are Alice in this scenario: {state.output} + React and respond naturally. + + - id: agent_bob + prompt: | + You are Bob. Alice just said: {state.output} + In scenario: {state.scenario_setup} + Respond to Alice naturally. + + - id: agent_alice_response + prompt: | + You are Alice. Bob responded: {state.output} + Continue the conversation naturally. + + - id: simulation_summary + prompt: | + Summarize this agent interaction: + Scenario: {state.scenario_setup} + Full conversation context from previous exchanges. + stop: true +``` + +### 19. **Data Processing Pipeline** +```yaml +workflow: + type: sequential + nodes: + - id: data_ingestion + kind: agent + ref: processor_llm + config: + prompt: "Ingest and structure: {input}" + output_key: structured_data + + - id: data_validation + kind: tool + ref: validator_function + + - id: data_transformation + kind: agent + ref: transformer_llm + config: + prompt: | + Transform validated data: {state.output} + Original structure: {state.structured_data} + output_key: transformed_data + + - id: output_formatting + kind: agent + ref: formatter_llm + config: + prompt: "Format final output: {state.transformed_data}" + stop: true +``` + +### 20. **Conditional Workflow** +```yaml +workflow: + type: custom_graph + nodes: + - id: decision_point + kind: agent + ref: decision_llm + config: + prompt: | + Analyze: {input} + Route to: "technical" or "creative" or "general" + Output only the routing decision. + output_key: routing_decision + + - id: technical_handler + kind: agent + ref: technical_llm + config: + prompt: "Handle technical query: {state.routing_decision}" + output_key: technical_result + + - id: creative_handler + kind: agent + ref: creative_llm + config: + prompt: "Handle creative query: {state.routing_decision}" + output_key: creative_result + + - id: general_handler + kind: agent + ref: general_llm + config: + prompt: "Handle general query: {state.routing_decision}" + output_key: general_result + + edges: + - source: decision_point + target: technical_handler + condition: "'technical' in state.routing_decision.lower()" + - source: decision_point + target: creative_handler + condition: "'creative' in state.routing_decision.lower()" + - source: decision_point + target: general_handler + condition: "'general' in state.routing_decision.lower()" +``` + +## Anti-Patterns and What to Avoid + +### 21. **Critical Anti-Patterns** + +```yaml +# ❌ NEVER DO: Multiple input consumers +nodes: + - id: agent_a + prompt: "{input}" + - id: agent_b + prompt: "{input}" + +# ❌ NEVER DO: Conflicting output keys +nodes: + - id: agent_a + output_key: "result" + - id: agent_b + output_key: "result" + +# ❌ NEVER DO: Empty or incomplete references +nodes: + - id: broken_node + prompt: | + Context: {state.} + Data: + Process: {state.nonexistent} + +# ❌ NEVER DO: Complex parameter passing +config: + parameters: + complex_data: "${state.nested.deep.value}" + template_string: "Process {state.var} with {state.other}" + +# ❌ NEVER DO: Missing required fields +workflow: + # Missing type field! + nodes: + - id: node1 + kind: agent + # Missing ref field! +``` + +## Quick Reference Checklist + +### Pre-Creation Checklist +- [ ] Understand the use case (sequential vs parallel needs) +- [ ] Choose appropriate LLM models for the task +- [ ] Design state flow before writing YAML +- [ ] Plan testing strategy + +### During Creation Checklist +- [ ] Start with minimal working spec +- [ ] Add required fields first +- [ ] Use descriptive IDs and names +- [ ] Follow input isolation principle +- [ ] Design unique output keys +- [ ] Test incrementally + +### Post-Creation Validation +- [ ] All required fields present +- [ ] No orphaned or unreachable nodes +- [ ] State references are complete +- [ ] Edge sources/targets exist +- [ ] No anti-patterns present +- [ ] Test with real inputs + +## Summary: Golden Rules for LLMs + +When generating YAML specifications: + +1. **Start Simple**: Begin with sequential workflow pattern +2. **Required Fields First**: `version`, `runtime`, `workflow.type` are mandatory +3. **One Input Consumer**: Only first node uses `{input}` +4. **State Flow**: Use `{state.output}` for previous node results +5. **Unique Keys**: Every `output_key` must be unique +6. **Valid References**: All `ref` fields must point to defined LLMs/functions +7. **Complete Templates**: No empty `{state.}` or malformed variables +8. **Test Incrementally**: Build and test in small steps +9. **Avoid Complexity**: Use custom graphs only when truly needed +10. **Follow Patterns**: Use proven patterns from this document + +**Remember**: A working simple specification is infinitely better than a broken complex one. Start minimal, test frequently, and add complexity only when the basics work perfectly. \ No newline at end of file diff --git a/docs_specs/spec_structure.md b/docs_specs/spec_structure.md index 5cdb128..f5e8dca 100644 --- a/docs_specs/spec_structure.md +++ b/docs_specs/spec_structure.md @@ -10,7 +10,7 @@ workflow: max_iterations: # Optional: For workflows with loops, max times to repeat ``` -**CRITICAL**: The `workflow.type` field is **mandatory**. It determines how the runtime engine processes the nodes and edges. Refer to `docs_ai/flow_rules.md` for guidance on choosing the correct type based on your edge patterns (e.g., sequential vs. fan-out/fan-in for custom_graph). +**CRITICAL**: The `workflow.type` field is **mandatory**. It determines how the runtime engine processes the nodes and edges. Refer to `docs/flow_rules.md` for guidance on choosing the correct type based on your edge patterns (e.g., sequential vs. fan-out/fan-in for custom_graph). --- @@ -28,7 +28,7 @@ Nodes are the building blocks of your workflow, representing individual tasks or - **Required** for `mcp` nodes. - Commonly used in `agent` and `judge` nodes for `prompt` and `output_key`. - Used in `tool` nodes if the tool itself requires specific parameters not covered by its `Function` definition. -- `stop` (bool, Optional, default: `false`): If `true`, the workflow (or the current path in a `custom_graph`) will terminate after this node executes. See Rule 5 in `docs_ai/flow_rules.md`. +- `stop` (bool, Optional, default: `false`): If `true`, the workflow (or the current path in a `custom_graph`) will terminate after this node executes. See Rule 5 in `docs/flow_rules.md`. ### Node Kinds @@ -37,8 +37,8 @@ Nodes are the building blocks of your workflow, representing individual tasks or - **Purpose**: For all LLM-based processing. This includes tasks like analyzing input, generating text, making decisions based on prompts, simulating personas, or any operation that involves sending a prompt to an LLM. - **`ref`**: **Required**. Must point to a valid LLM name defined in the top-level `llms` section. - **`config`**: - - `prompt` (string, **Required**): The prompt template to be sent to the LLM. Can include placeholders like `{workflow_initial_input}` (for the very first agent node in a workflow - see Rule 1 in `docs_ai/flow_rules.md`) or `{state.variable_name}` to inject data from previous nodes (see Rule 3 in `docs_ai/flow_rules.md`). - - `output_key` (string, **Required if output is used by other nodes**): The key under which this agent's output (the LLM's response) will be stored in the workflow state, accessible by subsequent nodes via `{state.your_output_key}`. See Rule 2 in `docs_ai/flow_rules.md`. + - `prompt` (string, **Required**): The prompt template to be sent to the LLM. Can include placeholders like `{workflow_initial_input}` (for the very first agent node in a workflow - see Rule 1 in `docs/flow_rules.md`) or `{state.variable_name}` to inject data from previous nodes (see Rule 3 in `docs/flow_rules.md`). + - `output_key` (string, **Required if output is used by other nodes**): The key under which this agent's output (the LLM's response) will be stored in the workflow state, accessible by subsequent nodes via `{state.your_output_key}`. See Rule 2 in `docs/flow_rules.md`. ```yaml - id: analyze_request @@ -121,7 +121,7 @@ Nodes are the building blocks of your workflow, representing individual tasks or #### 5. `mcp` (Model Context Protocol) -- **Purpose**: For direct integration with external tools and services via the Model Context Protocol. This is the recommended way to call external, potentially long-running or stateful, tools like calculators, database query engines, file system tools, etc. See Rule 8 in `docs_ai/flow_rules.md` for when to choose `mcp` over `agent`. +- **Purpose**: For direct integration with external tools and services via the Model Context Protocol. This is the recommended way to call external, potentially long-running or stateful, tools like calculators, database query engines, file system tools, etc. See Rule 8 in `docs/flow_rules.md` for when to choose `mcp` over `agent`. - **`ref`**: **Not used**. All MCP configuration is within the `config` block. - **`config`** (**Required**, and has specific sub-fields): - `server` (object, **Required**): Defines how to start and manage the MCP server. @@ -183,4 +183,4 @@ edges: # condition: "state.get('status') == 'error'" # Or simply no condition if it's the last edge ``` -This detailed structure, in conjunction with `docs_specs/spec_schema.md` and `docs_ai/flow_rules.md`, should provide a solid foundation for LLMs to understand how to construct valid and meaningful workflow YAML files. \ No newline at end of file +This detailed structure, in conjunction with `docs_specs/spec_schema.md` and `docs/flow_rules.md`, should provide a solid foundation for LLMs to understand how to construct valid and meaningful workflow YAML files. \ No newline at end of file diff --git a/mcp/youtube-transcript/README.md b/mcp/youtube-transcript/README.md new file mode 100644 index 0000000..5083302 --- /dev/null +++ b/mcp/youtube-transcript/README.md @@ -0,0 +1,127 @@ +# YouTube Transcript MCP Server + +A minimal Model Context Protocol (MCP) server for extracting YouTube video transcripts and metadata, designed for integration with Elf0 workflows. + +## Features + +- **Extract transcripts** from YouTube videos with language support +- **Get video metadata** including title, channel, duration, view count +- **Validate YouTube URLs** for proper format checking +- **Simple integration** with Elf0 MCP workflows + +## Installation + +From the project root directory, install the required dependency: + +```bash +uv pip install youtube-transcript-api +``` + +## Starting the Server + +Start the MCP server from the project root: + +```bash +uv run python mcp/youtube-transcript/server.py +``` + +The server will start and wait for JSON-RPC requests via stdin. This is normal behaviour - it communicates with Elf0 workflows automatically. + +## Testing the Server + +### Quick Test with Elf0 +Test the server with a real YouTube video using the example workflow: + +```bash +uv run elf0 agent specs/content/youtube_analyzer.yaml --prompt "Analyse this youtube video https://www.youtube.com/watch?v=9tOmppsiO2w" +``` + +### Manual JSON-RPC Testing + +```bash +# Test server connectivity +echo '{"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}' | uv run python mcp/youtube-transcript/server.py + +# Test transcript extraction +echo '{"jsonrpc": "2.0", "id": 2, "method": "tools/call", "params": {"name": "extract_transcript", "arguments": {"url": "https://www.youtube.com/watch?v=9tOmppsiO2w"}}}' | uv run python mcp/youtube-transcript/server.py +``` + +### With Elf0 Workflows + +The server integrates seamlessly with Elf0 workflows. Here's an example workflow configuration: + +```yaml +version: "0.1" +description: "YouTube transcript analysis" +runtime: "langgraph" + +llms: + analyzer: + type: openai + model_name: gpt-4o-mini + temperature: 0.3 + +workflow: + type: sequential + nodes: + - id: extract_transcript + kind: mcp + config: + server: + command: ["uv", "run", "python", "mcp/youtube-transcript/server.py"] + cwd: "/Users/benemson/Dropbox/devel/projects/ai/elf0" + tool: "extract_transcript" + parameters: + url: "{input}" + language: "en" + + - id: analyze_content + kind: agent + ref: analyzer + config: + prompt: | + Analyze this YouTube transcript and provide key insights: + + {state.output} + stop: true + + edges: + - source: extract_transcript + target: analyze_content +``` + +Run with Elf0: + +```bash +uv run elf0 agent specs/content/youtube_analyzer.yaml --prompt "https://youtube.com/watch?v=example" +``` + +## Available Tools + +### extract_transcript +- **Input**: `url` (YouTube URL), `language` (optional, default: "en") +- **Output**: Transcript text with metadata (word count, language, etc.) + +### get_video_metadata +- **Input**: `url` (YouTube URL) +- **Output**: Video title, channel, duration, description, view count, thumbnail + +### validate_youtube_url +- **Input**: `url` (URL to validate) +- **Output**: Boolean validation result + +## Supported URL Formats + +- `https://youtube.com/watch?v=VIDEO_ID` +- `https://youtu.be/VIDEO_ID` +- Direct video IDs: `VIDEO_ID` + +## Error Handling + +The server handles common errors gracefully: +- Invalid URLs +- Private or deleted videos +- Missing transcripts +- Network failures + +Errors are returned as JSON-RPC error responses with descriptive messages. \ No newline at end of file diff --git a/mcp/youtube-transcript/pyproject.toml b/mcp/youtube-transcript/pyproject.toml new file mode 100644 index 0000000..a17e403 --- /dev/null +++ b/mcp/youtube-transcript/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "youtube-transcript-mcp" +version = "0.1.0" +description = "YouTube Transcript MCP Server for Elf0" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "youtube-transcript-api>=1.1.0", +] + +[project.scripts] +youtube-transcript-mcp = "server:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/mcp/youtube-transcript/server.py b/mcp/youtube-transcript/server.py new file mode 100644 index 0000000..0ded194 --- /dev/null +++ b/mcp/youtube-transcript/server.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 +# mcp/youtube-transcript/server.py +""" +YouTube Transcript MCP Server +Minimal implementation for extracting YouTube video transcripts and metadata +Compatible with Elf0's MCP client implementation +""" +import json +import sys +import re +from urllib.parse import urlparse, parse_qs +from youtube_transcript_api import YouTubeTranscriptApi + +try: + from rich.console import Console + console = Console(stderr=True) +except ImportError: + console = None + +def log_message(message: str, style: str = None): + """Log message using rich if available, otherwise print to stderr""" + if console: + console.print(message, style=style) + else: + print(f"[YouTube MCP] {message}", file=sys.stderr) + + +def extract_video_id(url: str) -> str: + """Extract YouTube video ID from URL""" + # Handle youtube.com/watch?v=ID + if 'youtube.com/watch' in url: + parsed = urlparse(url) + params = parse_qs(parsed.query) + if 'v' in params: + return params['v'][0] + + # Handle youtu.be/ID + if 'youtu.be/' in url: + return url.split('youtu.be/')[-1].split('?')[0] + + # Handle direct video ID + if re.match(r'^[a-zA-Z0-9_-]{11}$', url): + return url + + raise ValueError(f"Could not extract video ID from URL: {url}") + + +def validate_youtube_url(url: str) -> bool: + """Validate if URL is a YouTube video URL""" + try: + extract_video_id(url) + return True + except ValueError: + return False + + +def extract_transcript(url: str, language: str = "en") -> dict: + """Extract transcript from YouTube video""" + try: + video_id = extract_video_id(url) + log_message(f"πŸ“Ί Fetching transcript for video: {video_id}", "blue") + + # Try to get transcript in specified language + try: + transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[language]) + except Exception: + # Fallback to auto-generated or any available language + transcript_list = YouTubeTranscriptApi.get_transcript(video_id) + + # Join all transcript segments + transcript_text = ' '.join([item['text'] for item in transcript_list]) + word_count = len(transcript_text.split()) + + log_message(f"βœ… Transcript extracted: {word_count} words, {len(transcript_list)} segments", "green") + + return { + "transcript_text": transcript_text, + "language": language, + "word_count": word_count, + "segment_count": len(transcript_list) + } + + except Exception as e: + raise Exception(f"Failed to extract transcript: {str(e)}") + + +def get_video_metadata(url: str) -> dict: + """Get basic YouTube video metadata from URL""" + try: + video_id = extract_video_id(url) + log_message(f"πŸ“‹ Getting metadata for video: {video_id}", "cyan") + + return { + "video_id": video_id, + "url": url, + "platform": "YouTube", + "note": "Full metadata extraction temporarily disabled due to YouTube API restrictions" + } + + except Exception as e: + raise Exception(f"Failed to get video metadata: {str(e)}") + + +def main(): + """Handle MCP requests via stdin/stdout""" + log_message("πŸš€ YouTube Transcript MCP Server starting...", "bold green") + log_message("πŸ’‘ Waiting for MCP requests via stdin", "dim") + + tools = [ + { + "name": "extract_transcript", + "description": "Extract transcript text from a YouTube video", + "inputSchema": { + "type": "object", + "properties": { + "url": {"type": "string", "description": "YouTube video URL or video ID"}, + "language": {"type": "string", "description": "Language code (default: 'en')", "default": "en"} + }, + "required": ["url"] + } + }, + { + "name": "get_video_metadata", + "description": "Get metadata for a YouTube video (title, channel, duration, etc.)", + "inputSchema": { + "type": "object", + "properties": { + "url": {"type": "string", "description": "YouTube video URL or video ID"} + }, + "required": ["url"] + } + }, + { + "name": "validate_youtube_url", + "description": "Validate if a URL is a valid YouTube video URL", + "inputSchema": { + "type": "object", + "properties": { + "url": {"type": "string", "description": "URL to validate"} + }, + "required": ["url"] + } + } + ] + + for line in sys.stdin: + try: + request = json.loads(line.strip()) + method = request.get("method") + params = request.get("params", {}) + + if method == "initialize": + log_message("πŸ”Œ Client connected and initialized", "green") + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": { + "protocolVersion": "2024-11-05", + "capabilities": {"tools": {}}, + "serverInfo": {"name": "youtube-transcript", "version": "0.1.0"} + } + } + elif method == "tools/list": + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": {"tools": tools} + } + elif method == "tools/call": + tool_name = params.get("name") + args = params.get("arguments", {}) + + try: + if tool_name == "extract_transcript": + url = args.get("url") + language = args.get("language", "en") + + if not url: + raise ValueError("URL is required") + + result = extract_transcript(url, language) + + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": { + "content": [{"type": "text", "text": json.dumps(result, indent=2)}] + } + } + + elif tool_name == "get_video_metadata": + url = args.get("url") + + if not url: + raise ValueError("URL is required") + + result = get_video_metadata(url) + + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": { + "content": [{"type": "text", "text": json.dumps(result, indent=2)}] + } + } + + elif tool_name == "validate_youtube_url": + url = args.get("url") + + if not url: + raise ValueError("URL is required") + + is_valid = validate_youtube_url(url) + result = {"valid": is_valid, "url": url} + + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": { + "content": [{"type": "text", "text": json.dumps(result, indent=2)}] + } + } + + else: + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "error": {"code": -1, "message": f"Unknown tool: {tool_name}"} + } + + except Exception as e: + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "error": {"code": -1, "message": str(e)} + } + else: + response = { + "jsonrpc": "2.0", + "id": request.get("id"), + "error": {"code": -32601, "message": f"Method not found: {method}"} + } + + print(json.dumps(response), flush=True) + + except Exception as e: + error_response = { + "jsonrpc": "2.0", + "id": request.get("id") if 'request' in locals() else None, + "error": {"code": -32603, "message": str(e)} + } + print(json.dumps(error_response), flush=True) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index c6385d2..1c857fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,3 +147,6 @@ path = "src/elf0/__init__.py" [tool.hatch.build.targets.wheel] packages = ["src/elf0"] + +[tool.uv.workspace] +members = ["mcp/youtube-transcript"] diff --git a/specs/agent-creator-01.yaml b/specs/archive/agent-creator-01.yaml similarity index 99% rename from specs/agent-creator-01.yaml rename to specs/archive/agent-creator-01.yaml index 35cbccf..3bd01f9 100644 --- a/specs/agent-creator-01.yaml +++ b/specs/archive/agent-creator-01.yaml @@ -20,7 +20,7 @@ llms: **Key Resources:** - Schema rules: docs_specs/spec_schema.md (required fields, types, validation) - - Design patterns: docs_ai/agent_patterns.md (8 proven patterns with use cases) + - Design patterns: docs/agent_patterns.md (8 proven patterns with use cases) - Reference implementation: specs/yaml_optimizer.yaml (evaluator-optimizer example) **Analysis Framework:** diff --git a/specs/agent-creator-02.yaml b/specs/archive/agent-creator-02.yaml similarity index 98% rename from specs/agent-creator-02.yaml rename to specs/archive/agent-creator-02.yaml index 8c6b0ae..280229c 100644 --- a/specs/agent-creator-02.yaml +++ b/specs/archive/agent-creator-02.yaml @@ -35,7 +35,7 @@ workflow: config: prompt: | Given the rewritten prompt below, reason about the best workflow pattern to process it. - Consider agent patterns in @docs_ai/agent_patterns.md + Consider agent patterns in @docs/agent_patterns.md Output your reasoning and specify the recommended workflow graph pattern (sequential, custom_graph, react, evaluator_optimizer) as a mermaid diagram so that the edges in the new YAML file can be correctly generated. diff --git a/specs/agent-linkedin-01.yaml b/specs/archive/agent-linkedin-01.yaml similarity index 100% rename from specs/agent-linkedin-01.yaml rename to specs/archive/agent-linkedin-01.yaml diff --git a/specs/agent-optimizer.yaml b/specs/archive/agent-optimizer.yaml similarity index 100% rename from specs/agent-optimizer.yaml rename to specs/archive/agent-optimizer.yaml diff --git a/specs/agent-simulation.yaml b/specs/archive/agent-simulation.yaml similarity index 97% rename from specs/agent-simulation.yaml rename to specs/archive/agent-simulation.yaml index 370bac6..f63f585 100644 --- a/specs/agent-simulation.yaml +++ b/specs/archive/agent-simulation.yaml @@ -39,7 +39,7 @@ workflow: ref: rule_llm config: prompt: | - You have access to @docs_ai/spec_schema.md and @docs_ai/flow_rules.md. + You have access to @docs/spec_schema.md and @docs/flow_rules.md. Extract the non-negotiable requirements for a valid LangGraph YAML spec: β€’ mandatory top-level keys β€’ one and only one use of the reserved input placeholder (curly braces around *input*) in the first node @@ -86,7 +86,7 @@ workflow: ref: strategist_llm config: prompt: | - Using @docs_ai/agent_patterns.md plus the element list below, design the optimal + Using @docs/agent_patterns.md plus the element list below, design the optimal graph structure for a simulation. {state.scenario_breakdown} diff --git a/specs/agent-tech_doc_creator.yaml b/specs/archive/agent-tech_doc_creator.yaml similarity index 100% rename from specs/agent-tech_doc_creator.yaml rename to specs/archive/agent-tech_doc_creator.yaml diff --git a/specs/agent-twitter-01.yaml b/specs/archive/agent-twitter-01.yaml similarity index 100% rename from specs/agent-twitter-01.yaml rename to specs/archive/agent-twitter-01.yaml diff --git a/specs/agent-twitter-01_improved.yaml b/specs/archive/agent-twitter-01_improved.yaml similarity index 97% rename from specs/agent-twitter-01_improved.yaml rename to specs/archive/agent-twitter-01_improved.yaml index 8e4163e..e12c7b5 100644 --- a/specs/agent-twitter-01_improved.yaml +++ b/specs/archive/agent-twitter-01_improved.yaml @@ -32,7 +32,7 @@ workflow: INSTRUCTIONS: 1. Carefully read the following input text: - {workflow_initial_input} + {input} 2. Identify and clearly describe the primary audience for this text. 3. Decide if the content is best expressed as a "single" tweet or a "thread": - Choose "thread" if the input contains multiple key points, detailed explanations, or a narrative that benefits from multiple tweets. @@ -43,7 +43,7 @@ workflow: Tweet Format: <"single" or "thread"> Do NOT include any extra commentary or explanation. - output_key: analysis_and_format + output_key: analysis_and_format stop: false - id: reason_structure @@ -81,7 +81,7 @@ workflow: Thread Outline: Output ONLY as specified above, no extra text. - output_key: workflow_reasoning + output_key: workflow_reasoning stop: false - id: decide_path @@ -108,7 +108,7 @@ workflow: - Do NOT use emojis or dashes (except for web links). - Replace web links like mysite.com with mysite[dot]com. - Output ONLY the final tweet text, no explanations or extra text. - output_key: final_tweet + output_key: final_tweet stop: true - id: rewrite_thread @@ -139,7 +139,7 @@ workflow: - Supporting detail 3/5 Another insight... ... - output_key: final_thread + output_key: final_thread stop: true edges: diff --git a/specs/archive/agent_simulation.yaml b/specs/archive/agent_simulation.yaml new file mode 100644 index 0000000..055a4b9 --- /dev/null +++ b/specs/archive/agent_simulation.yaml @@ -0,0 +1,212 @@ +version: "0.1" +description: "Meta-agent that turns any scenario description into a complete LangGraph simulation spec. The generated spec role-plays each discovered element and aggregates an outcome report." + +runtime: "langgraph" + +llms: + rule_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.2 + params: {} + decomposer_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.3 + params: {} + strategist_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.5 + params: {} + element_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.4 + params: {} + yaml_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.1 + params: {} + +workflow: + type: sequential + nodes: + # 1. Summarise the schema and flow rules + - id: gather_rules + kind: agent + ref: rule_llm + config: + prompt: | + You have access to @docs_specs/spec_schema.md and @docs/flow_rules.md. + Extract the non-negotiable requirements for a valid LangGraph YAML spec: + β€’ mandatory top-level keys + β€’ one and only one use of the reserved input placeholder {input} in the first node + β€’ all other data references via `{{state.xxx}}` + β€’ node field rules (`id`, `kind`, `ref`, `config`, `output_key`, valid `workflow.type`, etc.) + + OUTPUT under: + ## YAML_RULES + - + output_key: yaml_rules + + # 2. Break down the user's scenario and rewrite it clearly + - id: analyse_scenario + kind: agent + ref: decomposer_llm + config: + prompt: | + You are an expert at analysing prompts and breaking them down into their constituent elements, in order to create a graph simulation. + Your job is to dissect the raw simulation prompt into its constituent elements, and rewrite it in your own words, using: + (actors, objects, subsystems, environmental factors, constraints). + + ORIGINAL SCENARIO PROMPT + ------------------------ + {input} + ------------------------ + + Provide: + 1. **Element List** - bullet list with a short behavioural note for each element. + 2. **Rewritten Scenario** - your own concise, unambiguous restatement. + + Format exactly: + + ## ELEMENTS + - : + - ... + + ## REWRITTEN_SIMULATION_PROMPT + + output_key: scenario_breakdown + + # 3. Choose patterns and overall graph structure + - id: map_interactions + kind: agent + ref: strategist_llm + config: + prompt: | + Using @docs/agent_patterns.md plus the element list below, design the optimal + graph structure for a simulation. + + {state.scenario_breakdown} + + Produce: + + BEGIN REASONING + + [why particular patterns fit] + + END REASONING + + BEGIN WORKFLOW_TYPE + [sequential | custom_graph | react | evaluator_optimizer] + END WORKFLOW_TYPE + + BEGIN GRAPH_PLAN + - node list with brief role (e.g. scenario_input, _agent, aggregator) + - edge plan (who feeds whom) + END GRAPH_PLAN + output_key: graph_plan + + # 4. Craft a detailed simulation prompt for each element + - id: make_element_prompts + kind: agent + ref: element_llm + config: + prompt: | + Build a role-play prompt for each element so that, when given the scenario text, + it responds exactly as that element would. + + ELEMENT LIST AND REWRITTEN SCENARIO + ----------------------------------- + {state.scenario_breakdown} + + For each element, output: + + ### + Prompt: + """ + You are . . + about the typical characteristics of this element, its behaviours and triggers. + Imagine you are this element and incorporate these values and make a prediction of what happens in this scenario, based on your behaviours and traits. + + Scenario: + {{state.scenario_input}} + Provide your detailed reaction... + """ + + Combine all into a single block labelled: + ## ELEMENT_PROMPTS + output_key: element_prompts + + # 5. Generate the final simulation YAML + - id: build_simulation_yaml + kind: agent + ref: yaml_llm + stop: true + config: + format: yaml + prompt: | + Use the information below to produce a *schema-compliant* simulation spec as YAML output. + Think hard about the correct structure, going step by step implementing the graph. + + REQUIRED STRUCTURE: + - `version` (string): "1.0" + - `description` (string): Descriptive text about the simulation + - `runtime` (string): "langgraph" + - `llms` (object): Dictionary with one LLM definition + - Key should be descriptive (e.g., "sim_llm") + - Value must have: `type` (string), `model_name` (string) + - `workflow` (object): + - `type` (string): One of "sequential", "custom_graph", "react", "evaluator_optimizer" + - `nodes` (array): Array of node objects, each with: + - `id` (string): Unique identifier + - `kind` (string): "agent" for LLM nodes + - `ref` (string): Reference to LLM key + - `config` (object): Contains `prompt` (string) + - `output_key` (string): State variable name for output + - `stop` (boolean): true only for final node + - `edges` (array): Array of edge objects, each with: + - `source` (string): Source node id + - `target` (string): Target node id + + ## YAML RULES + {state.yaml_rules} + + ## GRAPH PLAN + {state.graph_plan} + + ## ELEMENT PROMPTS + {state.element_prompts} + + CRITICAL REQUIREMENTS: + - The first node must reference the reserved input placeholder `{input}` **once, within its prompt**. This node should store the input as `scenario_input` in state. The prompt must actually USE the {input} variable, not be hardcoded. No other node may reference `{input}`. + - All subsequent nodes must reference data via `{{state.variable_name}}` format only (e.g., `{{state.scenario_input}}` for the scenario text). + - Follow the node/edge layout from GRAPH_PLAN and the workflow type specified. + - Define exactly one LLM with model `gpt-4.1-mini`. + - Each element gets its own `agent` node using its crafted prompt. + - Add an `aggregate_outcomes` node that summarizes all element responses into a final predictive narrative. + - Use `stop: true` only on `aggregate_outcomes`. + - Ensure all node IDs in edges exist in the nodes array. + - DATA FLOW: {input} β†’ first_node(output_key: scenario_input) β†’ element_nodes(use: {{state.scenario_input}}) β†’ aggregator + - MANDATORY: The first node MUST have this EXACT prompt text: "Process this scenario for simulation: {input}. Extract key elements and provide comprehensive scenario context." + - FORBIDDEN: NO hardcoded scenario descriptions in the first node prompt. The prompt MUST contain {input} and ONLY {input} for scenario data. + - EXAMPLE VIOLATION (DO NOT DO): prompt: "Process this scenario for simulation: Simulate a agent engineer called Ben..." + - CORRECT PATTERN: prompt: "Process this scenario for simulation: {input}. Extract key elements and provide comprehensive scenario context." + - CRITICAL: NO "__start__" references anywhere in YAML. NO edges with source "__start__". + - ALL OTHER NODES must use {state.scenario_input} NEVER {input} + - EDGES: All edge sources must be actual node IDs from the nodes array + + Generate the complete specification as valid YAML. Do NOT include markdown fences, preamble, summary or commentary. + output_key: simulation_yaml + + edges: + - source: gather_rules + target: analyse_scenario + - source: analyse_scenario + target: map_interactions + - source: map_interactions + target: make_element_prompts + - source: make_element_prompts + target: build_simulation_yaml diff --git a/specs/basic_chat.yaml b/specs/archive/basic_chat.yaml similarity index 100% rename from specs/basic_chat.yaml rename to specs/archive/basic_chat.yaml diff --git a/specs/basic_reasoning-01.yaml b/specs/archive/basic_reasoning-01.yaml similarity index 100% rename from specs/basic_reasoning-01.yaml rename to specs/archive/basic_reasoning-01.yaml diff --git a/specs/basic_reasoning-02.yaml b/specs/archive/basic_reasoning-02.yaml similarity index 100% rename from specs/basic_reasoning-02.yaml rename to specs/archive/basic_reasoning-02.yaml diff --git a/specs/archive/claude_code_example.yaml b/specs/archive/claude_code_example.yaml new file mode 100644 index 0000000..f1e9bac --- /dev/null +++ b/specs/archive/claude_code_example.yaml @@ -0,0 +1,123 @@ +version: "0.1" +description: "Example workflow demonstrating Claude Code SDK integration for code generation, analysis, and self-improvement" +runtime: langgraph + +llms: + anthropic_sonnet: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.1 + max_tokens: 4096 + +workflow: + type: sequential + nodes: + # Step 1: Analyze user requirements using regular agent + - id: requirements_analyzer + kind: agent + ref: anthropic_sonnet + config: + prompt: | + Analyze the following software requirements and break them down into specific technical tasks: + + Requirements: {input} + + Please provide: + 1. A clear understanding of what needs to be built + 2. Key technical components needed + 3. Programming language and framework recommendations + 4. File structure suggestions + 5. Implementation approach + + Format your response in a structured way that can guide code generation. + + # Step 2: Generate code using Claude Code SDK + - id: code_generator + kind: claude_code + config: + task: "generate_code" + prompt: | + Based on the following analysis, generate the complete code implementation: + + ${state.output} + + Please create: + 1. Complete, working code files + 2. Proper error handling + 3. Clear documentation and comments + 4. Test cases if applicable + 5. README with setup instructions + + Focus on creating production-ready, maintainable code. + output_format: "text" + tools: ["filesystem", "bash"] + temperature: 0.2 + + # Step 3: Analyze the generated code for improvements + - id: code_analyzer + kind: claude_code + config: + task: "analyze_code" + prompt: | + Analyze the code that was just generated and provide a comprehensive review: + + Generated Code: + ${state.output} + + Please analyze: + 1. Code quality and best practices + 2. Potential security issues + 3. Performance considerations + 4. Maintainability aspects + 5. Test coverage + 6. Documentation quality + + Provide specific recommendations for improvements. + output_format: "json" + temperature: 0.1 + + # Step 4: Improve the code based on analysis + - id: code_improver + kind: claude_code + config: + task: "modify_code" + prompt: | + Improve the previously generated code based on this analysis: + + Analysis: ${state.output} + + Please: + 1. Apply the recommended improvements + 2. Fix any identified issues + 3. Enhance code quality and performance + 4. Add missing tests or documentation + 5. Ensure best practices are followed + + Provide the improved, production-ready code. + output_format: "text" + tools: ["filesystem", "bash"] + temperature: 0.15 + + # Step 5: Create final summary with regular agent + - id: summary_generator + kind: agent + ref: anthropic_sonnet + config: + prompt: | + Create a final summary of the code generation and improvement process: + + Original Requirements: {input} + + Final Code Output: {output} + + Please provide: + 1. Summary of what was built + 2. Key features implemented + 3. Quality improvements made + 4. Usage instructions + 5. Next steps for deployment or further development + + Make this summary suitable for both technical and non-technical stakeholders. + stop: true + + edges: [] # Sequential workflow - no custom edges needed diff --git a/specs/archive/claude_code_self_improvement.yaml b/specs/archive/claude_code_self_improvement.yaml new file mode 100644 index 0000000..0e972ac --- /dev/null +++ b/specs/archive/claude_code_self_improvement.yaml @@ -0,0 +1,126 @@ +version: "0.1" +description: "Self-improvement workflow using Claude Code to enhance ELF's own capabilities" +runtime: langgraph + +llms: + anthropic_sonnet: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.1 + max_tokens: 4096 + +workflow: + type: sequential + nodes: + # Step 1: Analyze improvement request + - id: improvement_analyzer + kind: agent + ref: anthropic_sonnet + config: + prompt: | + Analyze this request for improving the ELF platform: + + Request: {input} + + Please identify: + 1. Which ELF components need modification + 2. What new capabilities should be added + 3. Technical approach for implementation + 4. Files that need to be modified or created + 5. Testing requirements + + Focus on improvements that enhance ELF's workflow orchestration, agent patterns, or self-improvement capabilities. + + # Step 2: Generate implementation using Claude Code + - id: implementation_generator + kind: claude_code + config: + task: "generate_code" + prompt: | + Implement the ELF platform improvements based on this analysis: + + ${state.output} + + Please: + 1. Generate Python code following ELF's architecture patterns + 2. Create new node types, workflow patterns, or utilities as needed + 3. Follow ELF's coding standards (see CLAUDE.md) + 4. Add proper type hints and documentation + 5. Create example workflows demonstrating the new capabilities + + Ensure all code integrates seamlessly with ELF's existing architecture. + files: ["src/elf/core/", "specs/examples/"] + output_format: "text" + tools: ["filesystem"] + working_directory: "/Users/benemson/Dropbox/devel/projects/ai/elf" + temperature: 0.2 + + # Step 3: Review and optimize the implementation + - id: code_reviewer + kind: claude_code + config: + task: "analyze_code" + prompt: | + Review this ELF platform improvement implementation: + + ${state.output} + + Check for: + 1. Compliance with ELF's architecture patterns + 2. Proper error handling and logging + 3. Integration with existing components + 4. Code quality and maintainability + 5. Security considerations + 6. Performance implications + + Suggest specific improvements for production readiness. + output_format: "json" + temperature: 0.1 + + # Step 4: Create improved implementation + - id: final_implementation + kind: claude_code + config: + task: "modify_code" + prompt: | + Create the final, production-ready implementation incorporating this review: + + Review: {output} + + Please: + 1. Apply all recommended improvements + 2. Ensure robust error handling + 3. Add comprehensive logging + 4. Include unit tests following ELF's testing patterns + 5. Create documentation and examples + + The result should be ready for integration into ELF. + files: ["src/elf/", "tests/", "specs/examples/"] + output_format: "text" + tools: ["filesystem", "bash"] + working_directory: "/Users/benemson/Dropbox/devel/projects/ai/elf" + temperature: 0.15 + + # Step 5: Generate integration summary + - id: integration_summary + kind: agent + ref: anthropic_sonnet + config: + prompt: | + Create an integration summary for this ELF platform improvement: + + Original Request: {input} + Final Implementation: {output} + + Provide: + 1. Overview of new capabilities added + 2. Files created or modified + 3. Integration instructions + 4. Testing checklist + 5. Documentation updates needed + 6. Example usage scenarios + + This summary should help the development team integrate the improvements. + stop: true + + edges: [] diff --git a/specs/examples/claude_sonnet_example.yaml b/specs/archive/claude_sonnet_example.yaml similarity index 100% rename from specs/examples/claude_sonnet_example.yaml rename to specs/archive/claude_sonnet_example.yaml diff --git a/specs/archive/interactive_assistant.yaml b/specs/archive/interactive_assistant.yaml new file mode 100644 index 0000000..72eeb06 --- /dev/null +++ b/specs/archive/interactive_assistant.yaml @@ -0,0 +1,64 @@ +version: "0.1" +description: "Interactive assistant that asks follow-up questions and processes user responses" +runtime: "langgraph" + +llms: + assistant: + type: openai + model_name: gpt-4.1-mini + temperature: 0.3 + +functions: + get_input: + type: python + name: "User Input" + entrypoint: "elf0.functions.utils.get_user_input" + + process_text: + type: python + name: "Text Processor" + entrypoint: "elf0.functions.utils.text_processor" + +workflow: + type: sequential + nodes: + - id: generate_question + kind: agent + ref: assistant + config: + prompt: | + Based on the user's initial input: "${state.input}" + + Generate a thoughtful follow-up question to better understand what they need. + Ask only ONE specific question that will help you provide better assistance. + + Don't provide any answers yet - just ask the question. + + - id: ask_user + kind: tool + ref: get_input + config: + parameters: + prompt: "${state.output}" + + - id: process_response + kind: tool + ref: process_text + config: + parameters: + operation: "count_words" + + - id: final_response + kind: agent + ref: assistant + config: + prompt: | + Original request: "${state.input}" + Follow-up question asked: "${state.output}" + User's response: "${state.user_input}" + Response analysis: Word count is ${state.word_count} + + Now provide a comprehensive and helpful response that addresses their original request, + taking into account their follow-up answer. Be specific and actionable. + stop: true + edges: [] \ No newline at end of file diff --git a/specs/examples/mcp_workflow.yaml b/specs/archive/mcp_workflow.yaml similarity index 100% rename from specs/examples/mcp_workflow.yaml rename to specs/archive/mcp_workflow.yaml diff --git a/specs/archive/ollama_chat.yaml b/specs/archive/ollama_chat.yaml new file mode 100644 index 0000000..f5ae834 --- /dev/null +++ b/specs/archive/ollama_chat.yaml @@ -0,0 +1,44 @@ +# Location: workflows/basic_chat.yaml +version: "0.1" +description: "Ollama chat workflow that takes user input and returns a response" +runtime: "langgraph" + +# LLM definitions +llms: + chat_llm: + type: "ollama" + model_name: "huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct-GGUF:latest" + temperature: 0.5 + params: + max_tokens: 4000 + base_url: "http://localhost:11434" # Default Ollama URL + system_prompt: | + You are a highly trained Language Model to help users. + Your role as an assistant involves thoroughly exploring questions + through a systematic thinking process before providing the final + precise and accurate solutions. This requires engaging in a comprehensive + cycle of analysis, summarizing, exploration, reassessment, reflection, + backtracing, and iteration to develop well-considered thinking process. + Please structure your response into two main sections: Thought and Solution. + In the Thought section, detail your reasoning process in steps. + Each step should include detailed considerations such as analysing questions, + summarizing relevant findings, brainstorming new ideas, verifying the accuracy + of the current steps, refining any errors, and revisiting previous steps. + In the Solution section, based on various attempts, explorations, and reflections + from the Thought section, systematically present the final solution that you + deem correct. The Solution section should be logical, accurate, and concise. + +# No retrievers, memory or functions needed for basic chat +retrievers: {} +memory: {} +functions: {} + +# Workflow definition +workflow: + type: "sequential" + nodes: + - id: "chat_step" + kind: "agent" + ref: "chat_llm" + stop: true + edges: [] diff --git a/specs/examples/ollama_coder.yaml b/specs/archive/ollama_coder.yaml similarity index 100% rename from specs/examples/ollama_coder.yaml rename to specs/archive/ollama_coder.yaml diff --git a/specs/examples/ollama_optimizer.yaml b/specs/archive/ollama_optimizer.yaml similarity index 100% rename from specs/examples/ollama_optimizer.yaml rename to specs/archive/ollama_optimizer.yaml diff --git a/specs/examples/orchestration_workers.yaml b/specs/archive/orchestration_workers.yaml similarity index 100% rename from specs/examples/orchestration_workers.yaml rename to specs/archive/orchestration_workers.yaml diff --git a/specs/archive/prompt_chaining.yaml b/specs/archive/prompt_chaining.yaml new file mode 100644 index 0000000..98466c5 --- /dev/null +++ b/specs/archive/prompt_chaining.yaml @@ -0,0 +1,61 @@ +version: "1.0" +description: "A sequential workflow that first generates a detailed outline for a given topic and then refines that outline into a polished LinkedIn post." +runtime: "langgraph" # Specifies the execution engine + +llms: + # Define the LLM client configuration to be used by agent nodes + # The 'ref' in the nodes will point to 'default_chat_llm' + default_chat_llm: + type: "openai" # Corrected type name for schema validation + model_name: "gpt-4.1-mini" # Corrected field name, was 'model' + temperature: 0.7 + # A system message could be part of the LLM config if always applicable + # system_message: "You are an expert content creator." + +workflow: + type: sequential # Indicates nodes process data in sequence + nodes: + - id: outline_generator + kind: agent # This node is an LLM-powered agent + ref: default_chat_llm # Uses the LLM configuration defined above + config: + # The prompt template for this node. + # It's assumed that {input} will be replaced with the initial user prompt. + prompt: | + Topic: {input} + + Based on the topic provided above, please generate a comprehensive and well-structured outline. + The outline should clearly delineate: + 1. Main sections with descriptive headings. + 2. Key sub-points or arguments under each main section. + 3. A logical progression of ideas from introduction to conclusion. + + The output should consist solely of the generated outline text. + # No 'stop: true' here, as it's not the final node + + - id: linkedin_post_refiner + kind: agent + ref: default_chat_llm + config: + # This prompt template will use the output of the 'outline_generator'. + # It's assumed that {output} will be replaced with state['output'] from the previous node. + prompt: | + Original Outline: + {output} + + Transform the above outline into a polished, engaging, and professional LinkedIn post. + The post should: + - Adopt a natural and human-like tone. + - Be suitable for a professional LinkedIn audience. + - Encourage engagement (e.g., by posing a thoughtful question or a call to discussion). + - Be well-structured and easy to read (consider using short paragraphs, bullet points if appropriate). + - Include 2-3 relevant hashtags. + + The output should consist solely of the LinkedIn post content. + stop: true # Marks this as the final node in the workflow + + edges: + # Defines the explicit sequence of execution from one node to the next. + - source: outline_generator + target: linkedin_post_refiner + # No condition means it's a direct, unconditional transition. \ No newline at end of file diff --git a/specs/archive/prompt_optimizer.yaml b/specs/archive/prompt_optimizer.yaml new file mode 100644 index 0000000..447f338 --- /dev/null +++ b/specs/archive/prompt_optimizer.yaml @@ -0,0 +1,100 @@ +version: "0.1" +description: "Enhanced Evaluator-Optimizer workflow for iteratively refining prompts for high-quality code generation" +runtime: "langgraph" + +llms: + generator: + type: "openai" + model_name: "o3" + temperature: 1 + # model_name: "gpt-4.1" + # temperature: 0.5 + params: + # max_tokens: 4000 + system_prompt: | + You are a world-class prompt engineering specialist with deep expertise in crafting prompts that elicit high-quality code from large language models. + When given an initial user prompt, follow these steps: + 1. **Analysis**: Identify ambiguities, missing context, and improvement opportunities. + 2. **Refinement**: Clarify objectives, add necessary context and constraints, and structure the prompt clearly. + 3. **Enhancement**: Embed examples or input/output templates to guide the model’s code generation. + 4. **Output**: Provide only the final improved prompt, formatted as a complete instruction. Do not include explanations or summaries. + Use clear sections (e.g., Task, Context, Requirements, Examples, Constraints) where appropriate. + + evaluator: + type: "openai" + model_name: "gpt-4.1" + temperature: 0 + params: + max_tokens: 1000 + system_prompt: | + You are an expert prompt evaluator specialising in coding tasks. + You will receive a proposed improved prompt. Evaluate it on: + - **Clarity**: Is the language unambiguous and objectives clear? + - **Completeness**: Are all necessary context and constraints included? + - **Structure**: Is it organised logically for readability? + - **Specificity**: Are the instructions detailed enough for precise code generation? + - **Effectiveness**: How likely is it to produce correct, concise, maintainable code? + For each criterion, assign an integer score from 1 (poor) to 5 (excellent), and provide a short comment. Then calculate the average of these five scores as `evaluation_score`. + **Output only** a JSON object matching this schema: + ```json + { + "evaluation_score": , + "scores": { + "Clarity": , + "Completeness": , + "Structure": , + "Specificity": , + "Effectiveness": + }, + "feedback": { + "Clarity": "", + "Completeness": "", + "Structure": "", + "Specificity": "", + "Effectiveness": "" + } + } + ``` + No additional text. + +retrievers: {} +memory: {} +functions: {} + +workflow: + type: "evaluator_optimizer" + max_iterations: 7 + nodes: + - id: "generate" + kind: "agent" + ref: "generator" + stop: false + + - id: "evaluate" + kind: "judge" + ref: "evaluator" + stop: false + + - id: "finalize" + kind: "agent" + ref: "generator" + stop: true + + edges: + - source: "generate" + target: "evaluate" + condition: "True" + + - source: "evaluate" + target: "finalize" + condition: "state.get('evaluation_score', 0) >= 4.0 or state.get('iteration_count', 0) >= 7" + + - source: "evaluate" + target: "generate" + condition: "state.get('evaluation_score', 0) < 4.0 and state.get('iteration_count', 0) < 7" + +eval: + metrics: + - quality + - iterations + dataset_path: "data/prompt_improvement_test.jsonl" diff --git a/specs/archive/prompt_routing.yaml b/specs/archive/prompt_routing.yaml new file mode 100644 index 0000000..baa4550 --- /dev/null +++ b/specs/archive/prompt_routing.yaml @@ -0,0 +1,81 @@ +version: "1.0" +description: "A workflow that uses a prompt classifier to route user queries to different handler prompts based on the input prompt." +runtime: "langgraph" # Specifies the execution engine + +llms: + classifier_llm: + type: "openai" + model_name: "gpt-4.1-mini" + temperature: 0.2 + # System prompt for classifier: "You are an expert text classifier. Your task is to categorize the user query into 'general_chat' or 'deep_reasoning'. Respond with only the category name." + general_chat_llm: + type: "openai" + model_name: "gpt-4.1-mini" + temperature: 0.7 + deep_reasoning_llm: + type: "openai" + model_name: "gpt-4.1-mini" # Or a more capable model like gpt-4.1-mini if preferred + temperature: 0.5 + +workflow: + type: custom_graph # Using custom_graph for explicit edge definition + # entry_point: prompt_classifier # Optional: usually inferred if it's the first node + nodes: + - id: prompt_classifier + kind: agent + ref: classifier_llm # References the LLM configuration for classification + config: + prompt: | + Analyze the following user query and classify it into one of these categories: 'general_chat' or 'deep_reasoning'. + Your response must consist of ONLY the category name as a single string (e.g., "general_chat" or "deep_reasoning"). + Do not add any other text, explanation, or formatting. + + User Query: + {input} + stop: false # Output of this node is used for routing, so it's not a terminal node + + - id: general_chat_handler + kind: agent + ref: general_chat_llm # References the LLM configuration for general conversation + config: + prompt: | + User Query: {input} + + Engage in a friendly and helpful conversation based on the user query. + stop: true # This node concludes the 'general_chat' path + + - id: deep_reasoning_handler + kind: agent + ref: deep_reasoning_llm # References the LLM configuration for complex tasks + config: + prompt: | + System: | + You are a highly trained Language Model to help users. + Your role as an assistant involves thoroughly exploring questions + through a systematic thinking process before providing the final + precise and accurate solutions. This requires engaging in a comprehensive + cycle of analysis, summarizing, exploration, reassessment, reflection, + backtracing, and iteration to develop well-considered thinking process. + Please structure your response into two main sections: Thought and Solution. + In the Thought section, detail your reasoning process in steps. + Each step should include detailed considerations such as analysing questions, + summarizing relevant findings, brainstorming new ideas, verifying the accuracy + of the current steps, refining any errors, and revisiting previous steps. + In the Solution section, based on various attempts, explorations, and reflections + from the Thought section, systematically present the final solution that you + deem correct. The Solution section should be logical, accurate, and concise. + User Query: {input} + + Provide a comprehensive, insightful, and well-structured analytical response to the user query. Break down complex problems if necessary. + stop: true # This node concludes the 'deep_reasoning' path + + edges: + - source: prompt_classifier + target: general_chat_handler + # This condition checks the direct string output of the classifier agent + condition: "state.get('output') == 'general_chat'" + + - source: prompt_classifier + target: deep_reasoning_handler + # This condition checks the direct string output of the classifier agent + condition: "state.get('output') == 'deep_reasoning'" \ No newline at end of file diff --git a/specs/examples/prompt_routing_with_reference.yaml b/specs/archive/prompt_routing_with_reference.yaml similarity index 100% rename from specs/examples/prompt_routing_with_reference.yaml rename to specs/archive/prompt_routing_with_reference.yaml diff --git a/specs/examples/python_calculator.yaml b/specs/archive/python_calculator.yaml similarity index 100% rename from specs/examples/python_calculator.yaml rename to specs/archive/python_calculator.yaml diff --git a/specs/archive/python_function_test.yaml b/specs/archive/python_function_test.yaml new file mode 100644 index 0000000..fb89ee7 --- /dev/null +++ b/specs/archive/python_function_test.yaml @@ -0,0 +1,39 @@ +version: "0.1" +description: "Simple Python function workflow with user input and text processing" +runtime: "langgraph" + +llms: + test_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.0 + +functions: + get_input: + type: python + name: "User Input" + entrypoint: "elf0.functions.utils.get_user_input" + + process_text: + type: python + name: "Text Processor" + entrypoint: "elf0.functions.utils.text_processor" + +workflow: + type: sequential + nodes: + - id: ask_user + kind: tool + ref: get_input + config: + parameters: + prompt: "What would you like me to help you with today?" + + - id: process + kind: tool + ref: process_text + config: + parameters: + operation: "count_words" + stop: true + edges: [] \ No newline at end of file diff --git a/specs/examples/python_text_processor.yaml b/specs/archive/python_text_processor.yaml similarity index 100% rename from specs/examples/python_text_processor.yaml rename to specs/archive/python_text_processor.yaml diff --git a/specs/examples/simple_mcp.yaml b/specs/archive/simple_mcp.yaml similarity index 100% rename from specs/examples/simple_mcp.yaml rename to specs/archive/simple_mcp.yaml diff --git a/specs/archive/simulation_scenario_v1.yaml b/specs/archive/simulation_scenario_v1.yaml new file mode 100644 index 0000000..e821e79 --- /dev/null +++ b/specs/archive/simulation_scenario_v1.yaml @@ -0,0 +1,264 @@ +# src/specs/utils/simulation_scenario_v1.yaml +version: "v1" +description: "Meta-utility for creating simulation workflows that role-play scenario elements and generate outcome reports" +runtime: "langgraph" + +llms: + rule_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.2 + params: {} + decomposer_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.3 + params: {} + strategist_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.5 + params: {} + element_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.4 + params: {} + yaml_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.1 + params: {} + +workflow: + type: sequential + nodes: + # 1. Summarise the schema and flow rules + - id: gather_rules + kind: agent + ref: rule_llm + config: + prompt: | + You have access to @docs_specs/spec_schema.md, @docs/flow_rules.md, and @docs/notes_fix_agent_principles.md. + Extract the non-negotiable requirements for LangGraph YAML specs that prevent channel conflicts: + + PRIORITY RULES FROM CHANNEL CONFLICT ANALYSIS: + β€’ ONLY sequential workflow type (never custom_graph for simulations) + β€’ ONE and only ONE node may reference {input} (the first node only) + β€’ ALL other nodes use {{state.variable_name}} format + β€’ LINEAR execution chains only (no fan-out patterns) + β€’ Complete state references (no empty "Scenario:" sections) + β€’ Unique output keys for all nodes + + STANDARD SPEC REQUIREMENTS: + β€’ mandatory top-level keys (version, description, runtime, llms, workflow) + β€’ node field rules (`id`, `kind`, `ref`, `config`, `output_key`, `stop`) + β€’ edge structure requirements + + OUTPUT under: + ## YAML_RULES + - + output_key: yaml_rules + + # 2. Break down the user's scenario and rewrite it clearly + - id: analyse_scenario + kind: agent + ref: decomposer_llm + config: + prompt: | + You are an expert at analysing prompts and breaking them down into their constituent elements, in order to create a graph simulation. + Your job is to dissect the raw simulation prompt into its constituent elements, and rewrite it in your own words, using: + (actors, objects, subsystems, environmental factors, constraints). + + ORIGINAL SCENARIO PROMPT + ------------------------ + {input} + ------------------------ + + Provide: + 1. **Element List** - bullet list with a short behavioural note for each element. + 2. **Rewritten Scenario** - your own concise, unambiguous restatement. + + Format exactly: + + ## ELEMENTS + - : + - ... + + ## REWRITTEN_SIMULATION_PROMPT + + output_key: scenario_breakdown + + # 3. Choose patterns and overall graph structure + - id: map_interactions + kind: agent + ref: strategist_llm + config: + prompt: | + Based on the LangGraph Channel Conflict principles from @docs/notes_fix_agent_principles.md, + design a LINEAR SEQUENTIAL simulation structure for the elements below. + + {state.scenario_breakdown} + + MANDATORY DESIGN PRINCIPLES: + - Use ONLY sequential workflow type (never custom_graph) + - Create LINEAR execution chain: scenario_processor β†’ element1 β†’ element2 β†’ ... β†’ aggregator + - NO fan-out patterns (one node feeds exactly one next node) + - Each element becomes one sequential node in the chain + - Each node has unique output_key + + Produce: + + BEGIN REASONING + + Sequential linear flow prevents LangGraph channel conflicts. Each element will process + the scenario in sequence, building upon previous elements' outputs for rich interaction. + + END REASONING + + BEGIN WORKFLOW_TYPE + sequential + END WORKFLOW_TYPE + + BEGIN GRAPH_PLAN + - scenario_processor (consumes {input}, outputs scenario_input) + - _agent (uses {state.scenario_input}, outputs _response) + - _agent (uses {state.scenario_input} + {state._response}, outputs _response) + - ... (continue for all elements) + - aggregate_outcomes (synthesizes all responses into final narrative) + + EDGES: Linear chain scenario_processor β†’ element1 β†’ element2 β†’ ... β†’ aggregate_outcomes + END GRAPH_PLAN + output_key: graph_plan + + # 4. Craft a detailed simulation prompt for each element + - id: make_element_prompts + kind: agent + ref: element_llm + config: + prompt: | + Build role-play prompts for each element following LangGraph Channel Conflict principles. + Each element will execute sequentially and can reference previous elements' outputs. + + ELEMENT LIST AND REWRITTEN SCENARIO + ----------------------------------- + {state.scenario_breakdown} + + PROMPT REQUIREMENTS: + - First element only uses {{state.scenario_input}} + - Subsequent elements can use {{state.scenario_input}} AND previous elements' outputs + - ALL prompts must have complete state references (no empty "Scenario:" sections) + - Each element gets unique output_key: _response + + For each element, output: + + ### + Prompt: + """ + You are . . + about the typical characteristics of this element, its behaviours and triggers. + Imagine you are this element and incorporate these values and make a prediction of what happens in this scenario, based on your behaviours and traits. + + Scenario: + {{state.scenario_input}} + + [If not first element, add previous context:] + Previous Element Perspectives: + {{state._response}} + + Provide your detailed reaction as ... + """ + + Combine all into a single block labelled: + ## ELEMENT_PROMPTS + output_key: element_prompts + + # 5. Generate the final simulation YAML + - id: build_simulation_yaml + kind: agent + ref: yaml_llm + stop: true + config: + format: yaml + prompt: | + Generate a LangGraph-compliant simulation spec following Channel Conflict Prevention Principles. + The spec MUST use sequential workflow with linear execution to prevent channel conflicts. + + REQUIRED STRUCTURE: + - `version` (string): "1.0" + - `description` (string): Descriptive text about the simulation + - `runtime` (string): "langgraph" + - `llms` (object): Dictionary with ONE LLM definition + - Key should be descriptive (e.g., "sim_llm") + - Value must have: `type` (string), `model_name` (string) + - `workflow` (object): + - `type` (string): MUST be "sequential" (never custom_graph) + - `nodes` (array): Array of node objects in LINEAR execution order: + - `id` (string): Unique identifier + - `kind` (string): "agent" for LLM nodes + - `ref` (string): Reference to LLM key + - `config` (object): Contains `prompt` (string) + - `output_key` (string): State variable name for output (must be unique) + - `stop` (boolean): true only for final node + - `edges` (array): LINEAR chain of edge objects: + - `source` (string): Source node id + - `target` (string): Target node id (forms chain: Aβ†’Bβ†’Cβ†’D...) + + ## YAML RULES + {state.yaml_rules} + + ## GRAPH PLAN + {state.graph_plan} + + ## ELEMENT PROMPTS + {state.element_prompts} + + CHANNEL CONFLICT PREVENTION REQUIREMENTS (CRITICAL): + + 1. INPUT ISOLATION PRINCIPLE: + - ONLY the first node (scenario_processor) may reference `{input}` + - The first node MUST have this EXACT prompt: "Process this scenario for simulation: {input}. Extract key elements and provide comprehensive scenario context." + - DO NOT hardcode any scenario text in the first node prompt + - The first node prompt must use ONLY {input} for scenario data + - ALL other nodes MUST use `{{state.variable_name}}` format NEVER `{input}` + + 2. LINEAR FLOW PRINCIPLE: + - workflow.type MUST be "sequential" + - Create LINEAR execution chain: scenario_processor β†’ element1 β†’ element2 β†’ ... β†’ aggregate_outcomes + - NO fan-out patterns (one source β†’ multiple targets) + - Each edge connects exactly one source to one target + + 3. STATE REFERENCE COMPLETENESS: + - ALL element nodes must have complete `{{state.scenario_input}}` references + - NO empty "Scenario:" sections in prompts + - Each node has unique output_key ending in "_response" + + 4. GRAPH STRUCTURE: + - Define exactly one LLM with model `gpt-4.1-mini` + - Each element gets its own sequential `agent` node + - Final `aggregate_outcomes` node with `stop: true` + - All edge sources must be actual node IDs from nodes array + + 5. DATA FLOW PATTERN: + {input} β†’ scenario_processor(β†’scenario_input) β†’ element1(β†’element1_response) β†’ element2(β†’element2_response) β†’ ... β†’ aggregate_outcomes + + 6. FORBIDDEN PATTERNS: + - NO custom_graph workflow type + - NO fan-out edges (one source to multiple targets) + - NO "__start__" references + - NO hardcoded scenario text in first node + - NO empty state references + - NO concurrent state writes + + Generate the complete specification as valid YAML. Do NOT include markdown fences, preamble, summary or commentary. + output_key: simulation_yaml + + edges: + - source: gather_rules + target: analyse_scenario + - source: analyse_scenario + target: map_interactions + - source: map_interactions + target: make_element_prompts + - source: make_element_prompts + target: build_simulation_yaml \ No newline at end of file diff --git a/specs/basic/chat_simple_v1.yaml b/specs/basic/chat_simple_v1.yaml new file mode 100644 index 0000000..280a5a6 --- /dev/null +++ b/specs/basic/chat_simple_v1.yaml @@ -0,0 +1,39 @@ +# src/specs/basic/chat_simple_v1.yaml +version: "v1" +description: "Basic chat workflow for simple conversational interactions" +runtime: "langgraph" + +# LLM definitions +llms: + chat_llm: + type: "openai" + model_name: "gpt-4.1-mini" + temperature: 0.7 + params: + max_tokens: 1000 + max_retries: 3 + retry_delay: 1.0 + max_retry_delay: 30.0 + retry_backoff_factor: 2.0 + system_prompt: | + You are a friendly assistant who always ends a conversation with an emoji. + +# No retrievers, memory or functions needed for basic chat +retrievers: {} +memory: {} +functions: {} + +# Workflow definition +workflow: + type: "sequential" + nodes: + - id: "chat_step" + kind: "agent" + ref: "chat_llm" + stop: true + edges: [] + +eval: + tags: ["basic", "chat", "conversational"] + use_cases: ["Simple Q&A", "Getting started with elf0", "Basic interaction testing"] + estimated_runtime: "5-15 seconds" diff --git a/specs/basic/reasoning_structured_v1.yaml b/specs/basic/reasoning_structured_v1.yaml new file mode 100644 index 0000000..1f22d58 --- /dev/null +++ b/specs/basic/reasoning_structured_v1.yaml @@ -0,0 +1,52 @@ +# src/specs/basic/reasoning_structured_v1.yaml +version: "v1" +description: "Structured reasoning workflow using systematic thinking process for complex questions" +runtime: "langgraph" + +# LLM definitions +llms: + reasoning_llm: + type: "openai" + model_name: "gpt-4.1-mini" + temperature: 0.7 + params: + max_tokens: 1500 + max_retries: 3 + retry_delay: 1.0 + max_retry_delay: 30.0 + retry_backoff_factor: 2.0 + system_prompt: | + You are a highly trained Language Model to help users. + Your role as an assistant involves thoroughly exploring questions + through a systematic thinking process before providing the final + precise and accurate solutions. This requires engaging in a comprehensive + cycle of analysis, summarizing, exploration, reassessment, reflection, + backtracing, and iteration to develop well-considered thinking process. + Please structure your response into two main sections: Thought and Solution. + In the Thought section, detail your reasoning process in steps. + Each step should include detailed considerations such as analysing questions, + summarizing relevant findings, brainstorming new ideas, verifying the accuracy + of the current steps, refining any errors, and revisiting previous steps. + In the Solution section, based on various attempts, explorations, and reflections + from the Thought section, systematically present the final solution that you + deem correct. The Solution section should be logical, accurate, and concise. + +# No retrievers, memory or functions needed for basic reasoning +retrievers: {} +memory: {} +functions: {} + +# Workflow definition +workflow: + type: "sequential" + nodes: + - id: "reasoning_step" + kind: "agent" + ref: "reasoning_llm" + stop: true + edges: [] + +eval: + tags: ["basic", "reasoning", "structured-thinking"] + use_cases: ["Complex problem solving", "Multi-step analysis", "Logical reasoning tasks"] + estimated_runtime: "15-45 seconds" diff --git a/specs/content/content_basic_v1.yaml b/specs/content/content_basic_v1.yaml new file mode 100644 index 0000000..1dbf354 --- /dev/null +++ b/specs/content/content_basic_v1.yaml @@ -0,0 +1,41 @@ +# src/specs/content/content_basic_v1.yaml +version: "v1" +description: "Basic content creation workflow for generating various types of written content" +runtime: "langgraph" + +# LLM definitions +llms: + content_llm: + type: "openai" + model_name: "gpt-4.1-mini" + temperature: 0.8 + params: + max_tokens: 2000 + max_retries: 3 + retry_delay: 1.0 + max_retry_delay: 30.0 + retry_backoff_factor: 2.0 + system_prompt: | + You are a skilled content creator who produces engaging, clear, and well-structured written content. + Focus on creating content that is informative, engaging, and tailored to the target audience. + Always consider the purpose, tone, and format requirements when creating content. + +# No retrievers, memory or functions needed for basic content creation +retrievers: {} +memory: {} +functions: {} + +# Workflow definition +workflow: + type: "sequential" + nodes: + - id: "content_creation_step" + kind: "agent" + ref: "content_llm" + stop: true + edges: [] + +eval: + tags: ["content", "writing", "creation"] + use_cases: ["Blog posts", "Articles", "Marketing copy", "Documentation"] + estimated_runtime: "20-60 seconds" diff --git a/specs/content/linkedin_post.yaml b/specs/content/linkedin_post.yaml new file mode 100644 index 0000000..116e472 --- /dev/null +++ b/specs/content/linkedin_post.yaml @@ -0,0 +1,100 @@ +version: "0.1" +description: "Generate an optimized LinkedIn post by analyzing input text, identifying the target audience, reasoning about the key question to address, and rewriting with minimal changes in a curious, authoritative friend style without emojis." +runtime: "langgraph" + +llms: + main_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.5 + writer_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.5 + params: + max_tokens: 4000 + +workflow: + type: sequential + nodes: + - id: analyze_and_identify_audience + kind: agent + ref: main_llm + config: + prompt: | + You are professional content editor, who is an expert at reading and analysing content. + Provided with the following input text, read it deeply and imagine what resonates with this audience: + + {workflow_initial_input} + + Task: + 1. Carefully read and understand the text. + 2. Identify and clearly describe the primary target audience for this text. + 3. Provide a concise analysis explaining why this audience is appropriate. + + Format your output as: + Audience: + Analysis: + output_key: audience_analysis + stop: false + + - id: reason_about_structure + kind: agent + ref: main_llm + config: + prompt: | + You are a professional content editor, who deeply understands how content affects an audience. + Given the audience analysis below, determine the single most important question this content should answer to effectively engage the audience. Also think about the smaller subquestions that will help answer this main question. + Output in eloquent and engaging narrative that is interesting and has value, to this audience. + + Audience Analysis: + {state.audience_analysis} + + Task: + 1. Identify the key question that the post must address to resonate with the audience. + 2. Identify sub-questions that help answer the key question (see Task 1). + 3. Provide detailed reasoning on how structuring the content around these questions will improve engagement and clarity. + + Format your response as: + Key Question: + Sub Questions: + Reasoning: + output_key: workflow_reasoning + stop: false + + - id: rewrite_post + kind: agent + ref: writer_llm + config: + prompt: | + You are a professional writer, who writes eloquently, taking initial concepts and an audience and writes engaging and valuable content that appeals to this audience. + Using the original text and the reasoning below, rewrite this content to answer the key question and sub-questions effectively. + Specify the key question at the beginning in the style of a curious and authoritative friend. + At the end ask "I wonder if you could help me with this?", rewrite this final question to naturally fit the content. + + Requirements: + - Eloquently enhance the content and preserve original meaning. + - Write in the style of an eloquent, curious and authoritative friend. + - Avoid emojis and the dash character `β€”`. + - Use a natural, human tone suitable for content. + + Original Text: + {workflow_initial_input} + + Reasoning: + {state.workflow_reasoning} + output_key: rewritten_post + stop: true + + edges: + - source: analyze_and_identify_audience + target: reason_about_structure + - source: reason_about_structure + target: rewrite_post + +# Comments: +# - Lowered temperature values for more focused and consistent outputs. +# - Added explicit formatting instructions in prompts to improve output parsing. +# - Switched final rewriting to writer_llm (Claude) to leverage its strengths in style and tone. +# - Clarified reasoning node to focus on identifying one key question to guide rewriting. +# - Improved description for clarity and conciseness. diff --git a/specs/content/twitter_post.yaml b/specs/content/twitter_post.yaml new file mode 100644 index 0000000..e12c7b5 --- /dev/null +++ b/specs/content/twitter_post.yaml @@ -0,0 +1,165 @@ +version: "0.4" +description: "Analyzes input text to identify audience and key question, then generates a well-structured, informative Twitter post or a numbered tweet thread in UK English with a curious, authoritative, and honest tone, avoiding emojis and dashes in text (except for markdown list formatting)." +runtime: "langgraph" + +llms: + main_llm: + type: openai + model_name: gpt-4.1-mini + temperature: 0.3 # Focused, consistent analysis and reasoning + params: + max_tokens: 512 + writer_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.7 # More creative, engaging writing + params: + max_tokens: 2048 # Increased for longer, well-structured threads + +retrievers: {} +memory: {} +functions: {} + +workflow: + type: custom_graph + nodes: + - id: analyze_input + kind: agent + ref: main_llm + config: + prompt: | + ROLE: Expert social media strategist. + + INSTRUCTIONS: + 1. Carefully read the following input text: + {input} + 2. Identify and clearly describe the primary audience for this text. + 3. Decide if the content is best expressed as a "single" tweet or a "thread": + - Choose "thread" if the input contains multiple key points, detailed explanations, or a narrative that benefits from multiple tweets. + - Otherwise, choose "single". + 4. Output ONLY the following, clearly labeled: + + Audience Analysis: + Tweet Format: <"single" or "thread"> + + Do NOT include any extra commentary or explanation. + output_key: analysis_and_format + stop: false + + - id: reason_structure + kind: agent + ref: main_llm + config: + prompt: | + ROLE: Expert Twitter content strategist. + + INPUT: + {state.analysis_and_format} + + TASKS: + 1. Identify the single most important question this Twitter content should answer to engage the specified audience. + 2. Reason step-by-step about how to structure the content to answer this question honestly and openly. + 3. If Tweet Format is "single": + - Write a draft tweet (max 280 characters) that answers the key question. + 4. If Tweet Format is "thread": + - Outline a thread structure: + a. Main statement or valuable insight as the first tweet. + b. Each following tweet should present a key takeaway, fact, or opinion, each backed up with supporting details or points, with context of the previous thread. + c. Each tweet should be under 280 characters. + d. For supporting points within a tweet, use markdown '-' dashes for bullet points. + e. Specify the total number of tweets in the thread. + f. Vary sentence lengths and rhythm so the thread feels spontaneous rather than rigid. + 5. Tailor every tweet to the chosen audience to boost their skills, provide value or entertain them. + 6. Explain points using concrete examples, facts or quotations ideally taken from SOURCE_INPUT or prompt. + 7. Maintain a warm, ethical tone, avoid hype, knowledgable guide + 8. Forbidden: emojis, β€œgame-changer”, en-dashes/hyphens connecting words (e.g. no β€œmust-read”). + + OUTPUT FORMAT: + Key Question or Insight: 1/n + Reasoning: + Draft Tweet: + Thread Outline: + + Output ONLY as specified above, no extra text. + output_key: workflow_reasoning + stop: false + + - id: decide_path + kind: branch + config: + condition: | + 'tweet format: thread' in {state.analysis_and_format}.lower() + stop: false + + - id: rewrite_single + kind: agent + ref: writer_llm + config: + prompt: | + ROLE: Twitter copywriter. + + BASED ON: + {state.workflow_reasoning} + + INSTRUCTIONS: + - Write a single tweet in UK English, max 280 characters. + - Use a curious, authoritative, and honest tone, like a knowledgeable friend. + - Use thoughtful word choices and express an opinion that evokes emotion (e.g., vulnerability, humour, excitement). + - Do NOT use emojis or dashes (except for web links). + - Replace web links like mysite.com with mysite[dot]com. + - Output ONLY the final tweet text, no explanations or extra text. + output_key: final_tweet + stop: true + + - id: rewrite_thread + kind: agent + ref: writer_llm + config: + prompt: | + ROLE: Twitter thread specialist. + + BASED ON: + {state.workflow_reasoning} + + INSTRUCTIONS: + - Write a well-structured Twitter thread in UK English. + - The first tweet should be a main statement or valuable insight. + - Each subsequent tweet should present a key takeaway, fact, or opinion, backed up with supporting details or points. + - For supporting points within a tweet, use markdown '-' dashes for bullet points. + - Each tweet must be under 280 characters. + - Number each tweet in the format "n/N" (e.g., "3/10") at the start of each tweet, where N is the total number of tweets in the thread. + - Use a curious, authoritative, and honest tone, like a knowledgeable friend. + - Do NOT use emojis or dashes (except for markdown list formatting and web links). + - Replace web links like mysite.com with mysite[dot]com. + - Output ONLY the final thread as a numbered list of tweets, each on its own line, no extra text. + + EXAMPLE OUTPUT: + 1/5 Main statement... + 2/5 - Key point one + - Supporting detail + 3/5 Another insight... + ... + output_key: final_thread + stop: true + + edges: + - source: analyze_input + target: reason_structure + - source: reason_structure + target: decide_path + - source: decide_path + target: rewrite_thread + condition: "'tweet format: thread' in {state.analysis_and_format}.lower()" + - source: decide_path + target: rewrite_single + condition: "'tweet format: single' in {state.analysis_and_format}.lower()" + +eval: {} + +# Comments: +# - Prompts are role-based and step-by-step for clarity and optimal LLM output. +# - Thread output is structured with markdown dashes for bullet points and tweet numbering as "n/N". +# - All nodes and edges are schema-compliant and mutually exclusive. +# - Output is strictly limited to required content for downstream reliability. +# - LLM parameters are tuned for analysis (main_llm) and creative writing (writer_llm). +# - Maintains maintainability and clarity with inline comments. diff --git a/specs/content/youtube_analyzer.yaml b/specs/content/youtube_analyzer.yaml new file mode 100644 index 0000000..f617ea3 --- /dev/null +++ b/specs/content/youtube_analyzer.yaml @@ -0,0 +1,90 @@ +version: "0.1" +description: "YouTube transcript analysis with key points and summary" +runtime: "langgraph" + +llms: + gpt_model: + type: openai + model_name: gpt-4.1-mini + temperature: 0.2 + +workflow: + type: custom_graph + nodes: + - id: extract_url + kind: agent + ref: gpt_model + config: + prompt: | + Extract YouTube URL from user input and return as JSON. + + Input: {input} + + If YouTube URL found: {"youtube_url": "FULL_URL_HERE"} + If no URL found: {"error": "No YouTube URL found"} + + Return only the JSON object. + output_key: url_extraction + + - id: get_transcript + kind: mcp + config: + server: + command: ["python", "mcp/youtube-transcript/server.py"] + tool: "extract_transcript" + parameters: + url: "${state.json.youtube_url}" + language: "en" + + - id: analyze_and_summarize + kind: agent + ref: gpt_model + config: + prompt: | + Analyze this YouTube transcript and create a comprehensive markdown summary. + + Video URL: {state.json.youtube_url} + Transcript Data: {output} + + Create a well-structured markdown document that includes: + + ## Video Summary + + **URL:** [Include the video URL here] + + **Main Topic:** [Brief description of what the video is about] + + ## Key Concepts & Insights + + Identify and list the 5-7 most interesting and important concepts from the transcript: + + 1. **[Concept Name]** - [Brief explanation of the concept and why it's important] + 2. **[Concept Name]** - [Brief explanation] + [Continue for all key concepts...] + + ## Actionable Takeaways + + List 3-5 practical actions or insights viewers can apply: + + - [Actionable item 1] + - [Actionable item 2] + [Continue as appropriate...] + + ## Summary + + [2-3 paragraph comprehensive summary of the video's content, connecting the key concepts and explaining the overall message or purpose] + + --- + + Focus on: + - Extracting genuine insights and surprising facts + - Making complex ideas accessible + - Highlighting practical value for the reader + - Using clear, engaging markdown formatting + stop: true + + edges: + - source: extract_url + target: get_transcript + - source: get_transcript + target: analyze_and_summarize \ No newline at end of file diff --git a/specs/examples/interactive_assistant.yaml b/specs/examples/interactive_assistant.yaml index 72eeb06..706e2ba 100644 --- a/specs/examples/interactive_assistant.yaml +++ b/specs/examples/interactive_assistant.yaml @@ -13,11 +13,6 @@ functions: type: python name: "User Input" entrypoint: "elf0.functions.utils.get_user_input" - - process_text: - type: python - name: "Text Processor" - entrypoint: "elf0.functions.utils.text_processor" workflow: type: sequential @@ -27,38 +22,33 @@ workflow: ref: assistant config: prompt: | - Based on the user's initial input: "${state.input}" + Based on the user's initial input: "{input}" - Generate a thoughtful follow-up question to better understand what they need. - Ask only ONE specific question that will help you provide better assistance. + Analyze what the user is asking for and determine what additional information you need to provide a complete and helpful response. + Generate ONE specific follow-up question that will help you better understand their needs or gather missing information. - Don't provide any answers yet - just ask the question. + Output only the question text, nothing else. + output_key: question - id: ask_user kind: tool ref: get_input - config: - parameters: - prompt: "${state.output}" - - - id: process_response - kind: tool - ref: process_text - config: - parameters: - operation: "count_words" - id: final_response kind: agent ref: assistant config: prompt: | - Original request: "${state.input}" - Follow-up question asked: "${state.output}" - User's response: "${state.user_input}" - Response analysis: Word count is ${state.word_count} + Original request: "{input}" + Follow-up question you asked: {state.output} + User's response: {state.user_input} - Now provide a comprehensive and helpful response that addresses their original request, - taking into account their follow-up answer. Be specific and actionable. + Now provide a comprehensive and helpful response that addresses the user's original request, + taking into account the additional information they provided. Be specific and actionable. stop: true - edges: [] \ No newline at end of file + + edges: + - source: generate_question + target: ask_user + - source: ask_user + target: final_response \ No newline at end of file diff --git a/specs/examples/prompt_chaining.yaml b/specs/examples/prompt_chaining.yaml index 7634a52..98466c5 100644 --- a/specs/examples/prompt_chaining.yaml +++ b/specs/examples/prompt_chaining.yaml @@ -6,7 +6,7 @@ llms: # Define the LLM client configuration to be used by agent nodes # The 'ref' in the nodes will point to 'default_chat_llm' default_chat_llm: - type: "openai_chat" # Placeholder: actual type depends on LLMClient implementation + type: "openai" # Corrected type name for schema validation model_name: "gpt-4.1-mini" # Corrected field name, was 'model' temperature: 0.7 # A system message could be part of the LLM config if always applicable diff --git a/specs/utils/creator_workflow_v1.yaml b/specs/utils/creator_workflow_v1.yaml new file mode 100644 index 0000000..155fbef --- /dev/null +++ b/specs/utils/creator_workflow_v1.yaml @@ -0,0 +1,108 @@ +# src/specs/utils/creator_workflow_v1.yaml +version: "0.1" +description: "Utility workflow for analyzing technical requirements and generating new YAML agent spec file." +runtime: "langgraph" + +llms: + rewriter_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.3 + strategist_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.5 + worker_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.4 + +workflow: + type: sequential + nodes: + - id: rewrite_input + kind: agent + ref: rewriter_llm + config: + prompt: | + Rewrite the following user prompt clearly and unambiguously: + + {workflow_initial_input} + output_key: rewritten_prompt + + - id: select_workflow + kind: agent + ref: strategist_llm + config: + prompt: | + Given the rewritten prompt below, reason about the best workflow pattern to process it. + Consider agent patterns in @docs/agent_patterns.md + Output your reasoning and specify the recommended workflow graph pattern (sequential, custom_graph, react, evaluator_optimizer) + as a mermaid diagram so that the edges in the new YAML file can be correctly generated. + + Prompt: {state.rewritten_prompt} + output_key: workflow_reasoning + + - id: generate_spec + kind: agent + ref: worker_llm + config: + format: yaml + prompt: | + Using the following input and reasoning, generate a complete LangGraph YAML spec that implements the workflow: + + Rewritten prompt: {state.rewritten_prompt} + Workflow reasoning: {state.workflow_reasoning} + + CRITICAL: Follow this EXACT structure (all fields shown are REQUIRED unless marked optional): + + ``` + version: "0.1" # REQUIRED + description: "" # REQUIRED - describe the workflow purpose + runtime: "langgraph" # REQUIRED + + llms: # REQUIRED section + : # e.g., "main_llm" + type: openai # Must be: openai, anthropic, or ollama + model_name: gpt-4.1-mini # Model identifier + temperature: 0.7 # Optional, 0.0-1.0 + + workflow: # REQUIRED section + type: sequential # REQUIRED - or: custom_graph, react, evaluator_optimizer + nodes: # REQUIRED - list of nodes + - id: # Unique identifier + kind: agent # Usually "agent" for LLM nodes + ref: # Must match a key in llms section + config: # Optional + prompt: | # The prompt template + + output_key: # Optional - state variable name + stop: false # true only for the final node + + edges: # REQUIRED - can be empty list for sequential + - source: + target: + ``` + + RULES: + 1. The FIRST node's prompt must contain {workflow_initial_input} exactly once + 2. Subsequent nodes reference data via {state.variable_name} + 3. Node IDs in edges must match actual node IDs + 4. The LAST node must have stop: true + 5. For the workflow add the correct edges that represent the workflow logic (see agent design patterns/mermaid diagram) + 6. LLM type must be one of: openai, anthropic, ollama (NOT "openai/chat") + + Output the complete YAML specification. Do NOT include markdown fences, preamble, summary or commentary. + output_key: final_spec + stop: true + + edges: + - source: rewrite_input + target: select_workflow + - source: select_workflow + target: generate_spec + +eval: + tags: ["utils", "creation", "meta-workflow"] + use_cases: ["Creating new workflows", "Automating workflow generation", "Rapid prototyping"] + estimated_runtime: "60-120 seconds" diff --git a/specs/utils/optimizer_prompt_v1.yaml b/specs/utils/optimizer_prompt_v1.yaml new file mode 100644 index 0000000..97f3395 --- /dev/null +++ b/specs/utils/optimizer_prompt_v1.yaml @@ -0,0 +1,100 @@ +# src/specs/utils/optimizer_prompt_v1.yaml +version: "0.1" +description: "Utility workflow for iteratively refining prompts to achieve high-quality outputs" +runtime: "langgraph" + +llms: + generator: + type: "anthropic" + model_name: "claude-sonnet-4-20250514" + temperature: 0.8 + # model_name: "gpt-4.1" + # temperature: 0.5 + params: + # max_tokens: 4000 + system_prompt: | + You are a world-class prompt engineering specialist with deep expertise in crafting prompts that elicit high-quality code from large language models. + When given an initial user prompt, follow these steps: + 1. **Analysis**: Identify ambiguities, missing context, and improvement opportunities. + 2. **Refinement**: Clarify objectives, add necessary context and constraints, and structure the prompt clearly. + 3. **Enhancement**: Embed examples or input/output templates to guide the model’s code generation. + 4. **Output**: Provide only the final improved prompt, formatted as a complete instruction. Do not include explanations or summaries. + Use clear sections (e.g., Task, Context, Requirements, Examples, Constraints) where appropriate. + + evaluator: + type: "openai" + model_name: "gpt-4.1" + temperature: 0 + params: + max_tokens: 1000 + system_prompt: | + You are an expert prompt evaluator specialising in coding tasks. + You will receive a proposed improved prompt. Evaluate it on: + - **Clarity**: Is the language unambiguous and objectives clear? + - **Completeness**: Are all necessary context and constraints included? + - **Structure**: Is it organised logically for readability? + - **Specificity**: Are the instructions detailed enough for precise code generation? + - **Effectiveness**: How likely is it to produce correct, concise, maintainable code? + For each criterion, assign an integer score from 1 (poor) to 5 (excellent), and provide a short comment. Then calculate the average of these five scores as `evaluation_score`. + **Output only** a JSON object matching this schema: + ```json + { + "evaluation_score": , + "scores": { + "Clarity": , + "Completeness": , + "Structure": , + "Specificity": , + "Effectiveness": + }, + "feedback": { + "Clarity": "", + "Completeness": "", + "Structure": "", + "Specificity": "", + "Effectiveness": "" + } + } + ``` + No additional text. + +retrievers: {} +memory: {} +functions: {} + +workflow: + type: "evaluator_optimizer" + max_iterations: 7 + nodes: + - id: "generate" + kind: "agent" + ref: "generator" + stop: false + + - id: "evaluate" + kind: "judge" + ref: "evaluator" + stop: false + + - id: "finalize" + kind: "agent" + ref: "generator" + stop: true + + edges: + - source: "generate" + target: "evaluate" + condition: "True" + + - source: "evaluate" + target: "finalize" + condition: "state.get('evaluation_score', 0) >= 4.0 or state.get('iteration_count', 0) >= 7" + + - source: "evaluate" + target: "generate" + condition: "state.get('evaluation_score', 0) < 4.0 and state.get('iteration_count', 0) < 7" + +eval: + tags: ["utils", "prompt-engineering", "optimization"] + use_cases: ["Improving prompt quality", "Iterative prompt refinement", "Code generation prompts"] + estimated_runtime: "180-420 seconds" diff --git a/specs/prompt_optimizer.yaml b/specs/utils/optimizer_prompt_v2.yaml similarity index 100% rename from specs/prompt_optimizer.yaml rename to specs/utils/optimizer_prompt_v2.yaml diff --git a/specs/utils/optimizer_yaml_v1.yaml b/specs/utils/optimizer_yaml_v1.yaml new file mode 100644 index 0000000..e656a6a --- /dev/null +++ b/specs/utils/optimizer_yaml_v1.yaml @@ -0,0 +1,206 @@ +# src/specs/utils/optimizer_yaml_v1.yaml +version: "0.1" +description: "Utility workflow for optimizing and improving existing YAML agent workflow specs" +runtime: "langgraph" + +llms: + spec_generator: + type: "anthropic" + model_name: "claude-sonnet-4-20250514" + temperature: 0.2 + params: + max_tokens: 4000 + stream: true + top_p: 0.9 + presence_penalty: 0.0 + frequency_penalty: 0.0 + system_prompt: | + You are a world-class prompt and YAML spec engineer. + + **TASK**: + You will receive a workflow spec in YAML. Your job is to produce an improved, schema-compliant version of that spec. + You may also receive feedback on a previous attempt. If so, incorporate that feedback into your new version. + + **IMPROVEMENT STEPS**: + 1. **Parse & Validate**: Analyze the original spec. Ensure it has all required fields (`version`, `runtime`, `llms`, `workflow`). Check for schema consistency. + 2. **Refine Metadata**: Bump the `version` (e.g., "0.1" -> "0.2"). Make the `description` more clear and concise. + 3. **Optimize Prompts**: Improve all prompts in the workflow. Make them clearer, more specific, and structured for better LLM performance. Add roles and step-by-step instructions where needed. + 4. **Harden Workflow**: Adjust workflow logic if necessary. Ensure node `ref`s are correct. + 5. **Format & Annotate**: Add inline YAML comments (`#`) to explain significant changes or complex parts of the workflow. + + **CRITICAL OUTPUT STRUCTURE**: + You MUST generate a complete YAML file that follows this EXACT structure (all fields shown are REQUIRED unless marked optional): + + ``` + version: "0.2" # REQUIRED - Bump from original + description: "" # REQUIRED - clear, one-line description + runtime: "langgraph" # REQUIRED + + llms: # REQUIRED section + : # e.g., "main_llm", "analyzer_llm" + type: openai # Must be: openai, anthropic, or ollama + model_name: # e.g., gpt-4.1-mini + temperature: # 0.0-1.0 + params: # Optional + max_tokens: + # other provider-specific params + + retrievers: {} # Optional - can be empty dict + memory: {} # Optional - can be empty dict + functions: {} # Optional - can be empty dict + + workflow: # REQUIRED section + type: # REQUIRED - sequential, custom_graph, react, or evaluator_optimizer + max_iterations: # Optional - for evaluator_optimizer type + nodes: # REQUIRED - list of nodes + - id: # Unique identifier + kind: agent # agent, tool, judge, branch, or mcp + ref: # Must match a key in llms section + config: # Optional but usually needed + prompt: | # The improved prompt + + output_key: # Optional - state variable name + stop: false # true only for the final node + + edges: # REQUIRED - list of edges + - source: + target: + condition: # Optional - Python expression + + eval: # Optional section + metrics: # Optional + - + dataset_path: # Optional + ``` + + **STRICT RULES**: + 1. The FIRST node's prompt must contain {workflow_initial_input} exactly once + 2. Subsequent nodes reference data via {state.variable_name} + 3. All node IDs in edges must match actual node IDs in the nodes list + 4. The LAST node must have stop: true + 5. LLM type must be one of: openai, anthropic, ollama + 6. All node refs must match keys defined in the llms section + 7. For evaluator_optimizer workflows, include proper conditions on edges + 8. For the workflow add the correct edges that represent the workflow logic + + **OUTPUT REQUIREMENTS**: + - Generate ONLY the complete YAML content + - Do NOT include markdown fences (```yaml or ```) + - Do NOT include any explanations, preamble, or commentary + - The output must be valid YAML that can be parsed directly + + spec_evaluator: + type: "openai" + model_name: "gpt-4.1" + temperature: 0.0 + params: + max_tokens: 1500 + system_prompt: | + You are an expert YAML spec auditor. + **Input**: + - `original_spec`: the user's initial YAML. + - `improved_spec`: the generator's proposed YAML. + **Task**: Evaluate `improved_spec` against `original_spec` and the official schema. + Score each criterion 1–5, then average into `evaluation_score`: + - **Compliance**: Adherence to the required schema (fields, types, order). + - **Completeness**: Inclusion of all logical sections and metadata. + - **Clarity**: Readability, comments, and descriptive text. + - **Robustness**: Workflow consistency (no broken nodes/edges, sensible defaults). + - **Precision**: Prompts' specificity and technical correctness. + **Output only** a JSON object: + ```json + { + "evaluation_score": , + "scores": { + "Compliance": , + "Completeness": , + "Clarity": , + "Robustness": , + "Precision": + }, + "feedback": { + "Compliance": "", + "Completeness": "", + "Clarity": "", + "Robustness": "", + "Precision": "" + } + } + ``` + +retrievers: {} +memory: {} +functions: {} + +workflow: + type: "evaluator_optimizer" + max_iterations: 5 + nodes: + - id: generate + kind: agent + ref: spec_generator + stop: false + config: + format: yaml + prompt: | + Generate an improved version of the following workflow spec: + + {workflow_initial_input} + + Follow the improvement steps and output structure defined in your system prompt. + Ensure the output is a complete, valid YAML specification. + + - id: evaluate + kind: judge + ref: spec_evaluator + stop: false + config: + prompt: | + Evaluate the improved spec against the original: + + Original spec: + {workflow_initial_input} + + Improved spec: + {state.generate} + + Provide your evaluation as specified in your system prompt. + + - id: finalize + kind: agent + ref: spec_generator + stop: true + config: + format: yaml + prompt: | + Based on the evaluation feedback, produce the final optimized version of the spec. + + Original spec: + {workflow_initial_input} + + Previous attempt: + {state.generate} + + Evaluation feedback: + {state.evaluate} + + Incorporate the feedback and generate the final, optimized YAML specification. + Ensure all issues identified in the evaluation are addressed. + + edges: + - source: generate + target: evaluate + condition: "True" + + - source: evaluate + target: finalize + condition: "state.get('evaluation_score', 0) >= 4.0 or state.get('iteration_count', 0) >= 5" + + - source: evaluate + target: generate + condition: "state.get('evaluation_score', 0) < 4.0 and state.get('iteration_count', 0) < 5" + +eval: + tags: ["utils", "optimization", "meta-workflow"] + use_cases: ["Improving existing workflows", "Schema compliance checking", "Quality enhancement"] + estimated_runtime: "120-300 seconds" diff --git a/specs/utils/simulation_scenario_v1.yaml b/specs/utils/simulation_scenario_v1.yaml new file mode 100644 index 0000000..24471e7 --- /dev/null +++ b/specs/utils/simulation_scenario_v1.yaml @@ -0,0 +1,264 @@ +# src/specs/utils/simulation_scenario_v1.yaml +version: "0.1" +description: "Meta-utility for creating simulation workflows that role-play scenario elements and generate outcome reports" +runtime: "langgraph" + +llms: + rule_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.2 + params: {} + decomposer_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.3 + params: {} + strategist_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.5 + params: {} + element_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.4 + params: {} + yaml_llm: + type: anthropic + model_name: claude-sonnet-4-20250514 + temperature: 0.1 + params: {} + +workflow: + type: sequential + nodes: + # 1. Summarise the schema and flow rules + - id: gather_rules + kind: agent + ref: rule_llm + config: + prompt: | + You have access to @docs_specs/spec_schema.md, @docs/flow_rules.md, and @docs/notes_fix_agent_principles.md. + Extract the non-negotiable requirements for LangGraph YAML specs that prevent channel conflicts: + + PRIORITY RULES FROM CHANNEL CONFLICT ANALYSIS: + β€’ ONLY sequential workflow type (never custom_graph for simulations) + β€’ ONE and only ONE node may reference {input} (the first node only) + β€’ ALL other nodes use {{state.variable_name}} format + β€’ LINEAR execution chains only (no fan-out patterns) + β€’ Complete state references (no empty "Scenario:" sections) + β€’ Unique output keys for all nodes + + STANDARD SPEC REQUIREMENTS: + β€’ mandatory top-level keys (version, description, runtime, llms, workflow) + β€’ node field rules (`id`, `kind`, `ref`, `config`, `output_key`, `stop`) + β€’ edge structure requirements + + OUTPUT under: + ## YAML_RULES + - + output_key: yaml_rules + + # 2. Break down the user's scenario and rewrite it clearly + - id: analyse_scenario + kind: agent + ref: decomposer_llm + config: + prompt: | + You are an expert at analysing prompts and breaking them down into their constituent elements, in order to create a graph simulation. + Your job is to dissect the raw simulation prompt into its constituent elements, and rewrite it in your own words, using: + (actors, objects, subsystems, environmental factors, constraints). + + ORIGINAL SCENARIO PROMPT + ------------------------ + {input} + ------------------------ + + Provide: + 1. **Element List** - bullet list with a short behavioural note for each element. + 2. **Rewritten Scenario** - your own concise, unambiguous restatement. + + Format exactly: + + ## ELEMENTS + - : + - ... + + ## REWRITTEN_SIMULATION_PROMPT + + output_key: scenario_breakdown + + # 3. Choose patterns and overall graph structure + - id: map_interactions + kind: agent + ref: strategist_llm + config: + prompt: | + Based on the LangGraph Channel Conflict principles from @docs/notes_fix_agent_principles.md, + design a LINEAR SEQUENTIAL simulation structure for the elements below. + + {state.scenario_breakdown} + + MANDATORY DESIGN PRINCIPLES: + - Use ONLY sequential workflow type (never custom_graph) + - Create LINEAR execution chain: scenario_processor β†’ element1 β†’ element2 β†’ ... β†’ aggregator + - NO fan-out patterns (one node feeds exactly one next node) + - Each element becomes one sequential node in the chain + - Each node has unique output_key + + Produce: + + BEGIN REASONING + + Sequential linear flow prevents LangGraph channel conflicts. Each element will process + the scenario in sequence, building upon previous elements' outputs for rich interaction. + + END REASONING + + BEGIN WORKFLOW_TYPE + sequential + END WORKFLOW_TYPE + + BEGIN GRAPH_PLAN + - scenario_processor (consumes {input}, outputs scenario_input) + - _agent (uses {state.scenario_input}, outputs _response) + - _agent (uses {state.scenario_input} + {state._response}, outputs _response) + - ... (continue for all elements) + - aggregate_outcomes (synthesizes all responses into final narrative) + + EDGES: Linear chain scenario_processor β†’ element1 β†’ element2 β†’ ... β†’ aggregate_outcomes + END GRAPH_PLAN + output_key: graph_plan + + # 4. Craft a detailed simulation prompt for each element + - id: make_element_prompts + kind: agent + ref: element_llm + config: + prompt: | + Build role-play prompts for each element following LangGraph Channel Conflict principles. + Each element will execute sequentially and can reference previous elements' outputs. + + ELEMENT LIST AND REWRITTEN SCENARIO + ----------------------------------- + {state.scenario_breakdown} + + PROMPT REQUIREMENTS: + - First element only uses {{state.scenario_input}} + - Subsequent elements can use {{state.scenario_input}} AND previous elements' outputs + - ALL prompts must have complete state references (no empty "Scenario:" sections) + - Each element gets unique output_key: _response + + For each element, output: + + ### + Prompt: + """ + You are . . + about the typical characteristics of this element, its behaviours and triggers. + Imagine you are this element and incorporate these values and make a prediction of what happens in this scenario, based on your behaviours and traits. + + Scenario: + {{state.scenario_input}} + + [If not first element, add previous context:] + Previous Element Perspectives: + {{state._response}} + + Provide your detailed reaction as ... + """ + + Combine all into a single block labelled: + ## ELEMENT_PROMPTS + output_key: element_prompts + + # 5. Generate the final simulation YAML + - id: build_simulation_yaml + kind: agent + ref: yaml_llm + stop: true + config: + format: yaml + prompt: | + Generate a LangGraph-compliant simulation spec following Channel Conflict Prevention Principles. + The spec MUST use sequential workflow with linear execution to prevent channel conflicts. + + REQUIRED STRUCTURE: + - `version` (string): "1.0" + - `description` (string): Descriptive text about the simulation + - `runtime` (string): "langgraph" + - `llms` (object): Dictionary with ONE LLM definition + - Key should be descriptive (e.g., "sim_llm") + - Value must have: `type` (string), `model_name` (string) + - `workflow` (object): + - `type` (string): MUST be "sequential" (never custom_graph) + - `nodes` (array): Array of node objects in LINEAR execution order: + - `id` (string): Unique identifier + - `kind` (string): "agent" for LLM nodes + - `ref` (string): Reference to LLM key + - `config` (object): Contains `prompt` (string) + - `output_key` (string): State variable name for output (must be unique) + - `stop` (boolean): true only for final node + - `edges` (array): LINEAR chain of edge objects: + - `source` (string): Source node id + - `target` (string): Target node id (forms chain: Aβ†’Bβ†’Cβ†’D...) + + ## YAML RULES + {state.yaml_rules} + + ## GRAPH PLAN + {state.graph_plan} + + ## ELEMENT PROMPTS + {state.element_prompts} + + CHANNEL CONFLICT PREVENTION REQUIREMENTS (CRITICAL): + + 1. INPUT ISOLATION PRINCIPLE: + - ONLY the first node (scenario_processor) may reference `{input}` + - The first node MUST have this EXACT prompt: "Process this scenario for simulation: {input}. Extract key elements and provide comprehensive scenario context." + - DO NOT hardcode any scenario text in the first node prompt + - The first node prompt must use ONLY {input} for scenario data + - ALL other nodes MUST use `{{state.variable_name}}` format NEVER `{input}` + + 2. LINEAR FLOW PRINCIPLE: + - workflow.type MUST be "sequential" + - Create LINEAR execution chain: scenario_processor β†’ element1 β†’ element2 β†’ ... β†’ aggregate_outcomes + - NO fan-out patterns (one source β†’ multiple targets) + - Each edge connects exactly one source to one target + + 3. STATE REFERENCE COMPLETENESS: + - ALL element nodes must have complete `{{state.scenario_input}}` references + - NO empty "Scenario:" sections in prompts + - Each node has unique output_key ending in "_response" + + 4. GRAPH STRUCTURE: + - Define exactly one LLM with model `gpt-4.1-mini` + - Each element gets its own sequential `agent` node + - Final `aggregate_outcomes` node with `stop: true` + - All edge sources must be actual node IDs from nodes array + + 5. DATA FLOW PATTERN: + {input} β†’ scenario_processor(β†’scenario_input) β†’ element1(β†’element1_response) β†’ element2(β†’element2_response) β†’ ... β†’ aggregate_outcomes + + 6. FORBIDDEN PATTERNS: + - NO custom_graph workflow type + - NO fan-out edges (one source to multiple targets) + - NO "__start__" references + - NO hardcoded scenario text in first node + - NO empty state references + - NO concurrent state writes + + Generate the complete specification as valid YAML. Do NOT include markdown fences, preamble, summary or commentary. + output_key: simulation_yaml + + edges: + - source: gather_rules + target: analyse_scenario + - source: analyse_scenario + target: map_interactions + - source: map_interactions + target: make_element_prompts + - source: make_element_prompts + target: build_simulation_yaml diff --git a/src/elf0/cli.py b/src/elf0/cli.py index 5d81271..ff4ed5d 100644 --- a/src/elf0/cli.py +++ b/src/elf0/cli.py @@ -19,7 +19,6 @@ from rich.live import Live from rich.logging import RichHandler # Added from rich.markdown import Markdown -from rich.rule import Rule # Added import from rich.spinner import Spinner import typer @@ -168,6 +167,65 @@ def format_workflow_result(result: object) -> tuple[str, bool]: typer.secho(f"Error: Could not serialize result to JSON: {e}", fg=typer.colors.RED) raise typer.Exit(code=1) from e +def _display_spec_file(spec_file_path: Path, show_full_path: bool = False) -> None: + """Display a single spec file with its description.""" + description = extract_spec_description(spec_file_path) + + if show_full_path: + full_path = str(spec_file_path.relative_to(Path())) + rich.console.print(f"[bold bright_green]{full_path}[/bold bright_green]") + else: + rich.console.print(f"[bold bright_green]{spec_file_path.name}[/bold bright_green]") + + if description == "No description available.": + rich.console.print(f" [dim italic]{description}[/dim italic]") + elif "Error:" in description: + rich.console.print(f" [red]{description}[/red]") + else: + rich.console.print(f" {description}") + +def _display_grouped_specs(grouped_files: dict, directory_order: list[str]) -> None: + """Display specs grouped by directory.""" + first_group = True + for dir_name in directory_order: + if dir_name in grouped_files: + files_in_dir = grouped_files[dir_name] + + # Add spacing between groups (except before first group) + if not first_group: + rich.console.print() + rich.console.print() + + # Directory header + rich.console.print(f"[bold blue]── {dir_name.title()} ──[/bold blue]") + rich.console.print() + + # Files in this directory + for i, spec_file_path in enumerate(files_in_dir): + # Add subtle separator between files in same directory + if i > 0: + rich.console.print() + + _display_spec_file(spec_file_path, show_full_path=True) + + first_group = False + + # Final spacing + rich.console.print() + +def _display_single_directory_specs(spec_files: list[Path]) -> None: + """Display specs for a single directory.""" + for i, spec_file_path in enumerate(spec_files): + # Add spacing between entries except the first one + if i > 0: + rich.console.print() + + _display_spec_file(spec_file_path, show_full_path=True) + + # Add a blank line after the last item for spacing before the next shell prompt + if i == len(spec_files) - 1: + rich.console.print() + def validate_output_path(output_path: Path) -> None: """Validate output path and permissions.""" if not output_path.parent.exists(): @@ -542,8 +600,13 @@ def prompt_yaml_command( # Session ended message is essential UI feedback. rich.console.print("\n[yellow]Session ended.[/yellow]") -@app.command("list-specs", help="List all YAML workflow spec files in the ./specs directory.") -def list_specs_command() -> None: +@app.command("list-specs", help="List YAML workflow spec files, optionally filtered by directory.") +def list_specs_command( + directory: str = typer.Argument( + None, + help="Optional directory filter (basic, content, code, examples, utils, archive). Shows all except archive if not specified." + ) +) -> None: """Scans the ./specs directory for YAML workflow specification files (.yaml or .yml) and displays them with their descriptions. @@ -555,32 +618,28 @@ def list_specs_command() -> None: rich.console.print(f"[yellow]Warning:[/] Specs directory '{specs_dir}' not found.") return - spec_files = list_spec_files(specs_dir) + spec_files = list_spec_files(specs_dir, directory) if not spec_files: rich.console.print(f"No spec files (.yaml or .yml) found in '{specs_dir}'.") return - for i, spec_file_path in enumerate(spec_files): - description = extract_spec_description(spec_file_path) - - # Add a rule before each entry except the first one - if i > 0: - rich.console.print(Rule(style="dim black")) # Using a very subtle rule - rich.console.print() # Add a blank line for more spacing after the rule + if directory is None: + # Group by directory when showing all specs + from collections import defaultdict - rich.console.print(f"[bold bright_green]{spec_file_path.name}[/bold bright_green]") + # Group files by directory + grouped_files = defaultdict(list) + for spec_file_path in spec_files: + dir_name = spec_file_path.parent.name + grouped_files[dir_name].append(spec_file_path) - if description == "No description available.": - rich.console.print(f" [dim italic]{description}[/dim italic]") - elif "Error:" in description: - rich.console.print(f" [red]{description}[/red]") - else: - rich.console.print(f" {description}") + # Define directory order (archive last, excluded from 'all') + directory_order = ["specs", "basic", "content", "code", "examples", "utils"] - # Add a blank line after the last item for spacing before the next shell prompt - if i == len(spec_files) - 1: - rich.console.print() + _display_grouped_specs(grouped_files, directory_order) + else: + _display_single_directory_specs(spec_files) # Add subcommands to improve app improve_app.command("yaml", help="Improve a YAML workflow specification using AI optimization")(improve_yaml_command) diff --git a/src/elf0/core/compiler.py b/src/elf0/core/compiler.py index 21d868b..a0051cc 100644 --- a/src/elf0/core/compiler.py +++ b/src/elf0/core/compiler.py @@ -26,16 +26,68 @@ def __init__(self, data: dict[str, Any]): self._data = {k: v for k, v in data.items() if isinstance(k, str)} def __getattr__(self, name: str) -> Any: - return self._data.get(name, "") + # First check regular state fields + if name in self._data: + return self._data[name] + # Then check dynamic_state for output_key fields + dynamic_state = self._data.get("dynamic_state", {}) + if dynamic_state and isinstance(dynamic_state, dict) and name in dynamic_state: + return dynamic_state[name] + # Special handling for 'json' - parse JSON from output_key fields + if name == "json": + return self._create_json_namespace() + return "" + + def _create_json_namespace(self) -> "SafeNamespace": + """Create a namespace that parses JSON from dynamic_state fields.""" + import json + json_data = {} + dynamic_state = self._data.get("dynamic_state", {}) + if dynamic_state and isinstance(dynamic_state, dict): + for value in dynamic_state.values(): + if isinstance(value, str): + try: + # Clean the string first - remove markdown fences and quotes + cleaned_value = value.strip() + if cleaned_value.startswith("```json"): + cleaned_value = cleaned_value[7:] + if cleaned_value.startswith("```"): + cleaned_value = cleaned_value[3:] + if cleaned_value.endswith("```"): + cleaned_value = cleaned_value[:-3] + cleaned_value = cleaned_value.strip() + + # Handle case where LLM returns just "error" instead of JSON + if cleaned_value in {'"error"', "error"}: + json_data["error"] = "JSON parsing failed" + continue + + # Try to parse as JSON + parsed = json.loads(cleaned_value) + if isinstance(parsed, dict): + json_data.update(parsed) + elif isinstance(parsed, str): + # If it's a string, treat it as an error + json_data["error"] = parsed + except (json.JSONDecodeError, ValueError): + # Not valid JSON, treat as error message + json_data["error"] = f"Invalid JSON: {value[:50]}..." + return SafeNamespace(json_data) def __contains__(self, key: str) -> bool: - return key in self._data + if key in self._data: + return True + dynamic_state = self._data.get("dynamic_state", {}) + if dynamic_state and isinstance(dynamic_state, dict) and key in dynamic_state: + return True + # Special handling for 'json' + return key == "json" # Always return True for json namespace def __getitem__(self, key: str) -> Any: - return self._data.get(key, "") + return self.__getattr__(key) def get(self, key: str, default: Any = "") -> Any: - return self._data.get(key, default) + return self.__getattr__(key) if self.__contains__(key) else default # Get a logger specific to elf.core.compiler. The CLI's --quiet flag will target 'elf.core'. logger = logging.getLogger(__name__) # This will be 'elf.core.compiler' @@ -63,6 +115,8 @@ class WorkflowState(TypedDict): format_error: str | None # Claude Code integration fields claude_code_result: dict[str, Any] | None + # Dynamic fields for output_key support - stores custom node outputs + dynamic_state: dict[str, Any] | None class NodeFunction(Protocol): """Protocol defining the interface for node functions.""" @@ -147,6 +201,136 @@ def make_llm_node(spec: Spec, node: WorkflowNode) -> NodeFunction: elif potential_prompt is not None: # 'prompt' key exists in config but its value is not a string logger.warning(f"[yellow]⚠ [Node: {node.id}] Invalid prompt type - ignored[/yellow]") + def _prepare_prompt_template(state: WorkflowState, prompt_template_str: str, user_provided_input: str) -> str: + """Prepare the final prompt to send to LLM.""" + if not prompt_template_str: + return user_provided_input if user_provided_input else "" + + # Support multiple state fields in template + template_kwargs = { + "input": user_provided_input, + "output": state.get("output", ""), + "iteration_count": state.get("iteration_count", 0), + "evaluation_score": state.get("evaluation_score", 0.0), + # Allow attribute-style access such as {state.output} with safe fallback + "state": SafeNamespace(state), + } + + try: + return prompt_template_str.format(**template_kwargs) + except KeyError as e: + logger.warning(f"[yellow]⚠ [Node: {node.id}] Template variable {e} not found in state, using partial formatting[/yellow]") + return _handle_template_error(prompt_template_str, user_provided_input, str(e).strip("'\"")) + + def _handle_template_error(prompt_template_str: str, user_provided_input: str, error_key: str) -> str: + """Handle template formatting errors.""" + # If the error key looks like a malformed JSON key, try to clean the prompt + if error_key.startswith('"') and error_key.endswith('"'): + # This is likely a malformed LLM output - remove the problematic template + cleaned_prompt = prompt_template_str + # Remove the malformed template variable + import re + malformed_pattern = r'\{["\'][^"\']*["\']\}' + cleaned_prompt = re.sub(malformed_pattern, "[MALFORMED_OUTPUT]", cleaned_prompt) + logger.warning(f"[yellow]⚠ [Node: {node.id}] Detected malformed template variable, cleaned prompt[/yellow]") + return cleaned_prompt + # Fall back to just input formatting for compatibility + if "{input}" in prompt_template_str: + try: + return prompt_template_str.format(input=user_provided_input) + except KeyError: + # Even input formatting failed, use raw prompt + final_prompt = prompt_template_str + if user_provided_input: + final_prompt += "\n\nUser Input: " + user_provided_input + return final_prompt + else: + final_prompt = prompt_template_str + if user_provided_input: + final_prompt += "\n\nUser Input: " + user_provided_input + return final_prompt + + def _clean_json_response(response: str) -> str: + """Clean malformed JSON responses from LLMs.""" + cleaned = response.strip() + + # If response is just a quoted string (common LLM error), try to fix it + QUOTED_STRING_COUNT = 2 + if cleaned.startswith('"') and cleaned.endswith('"') and cleaned.count('"') == QUOTED_STRING_COUNT: + # This looks like: "youtube_url" instead of {"youtube_url": "value"} + logger.warning(f"[yellow]⚠ [Node: {node.id}] Detected malformed JSON response, attempting to fix[/yellow]") + return '{"error": "Malformed LLM output - expected JSON object"}' + if not cleaned.startswith("{"): + # Try to extract JSON from response if it's wrapped in text + import re + json_match = re.search(r"\{[^{}]*\}", cleaned) + if json_match: + return json_match.group(0) + # If no JSON found and response looks like it should be JSON, return error + if any(keyword in cleaned.lower() for keyword in ["youtube", "url", "error"]): + logger.warning(f"[yellow]⚠ [Node: {node.id}] No valid JSON found in response, returning error[/yellow]") + return '{"error": "Invalid response format - expected JSON"}' + + return response + + def _handle_structured_output(response: str, output_format: str, state: WorkflowState) -> WorkflowState: + """Handle structured output processing.""" + logger.info(f"[blue][Node: {node.id}] Processing {output_format} format[/blue]") + try: + if output_format == "json": + # Handle JSON structured output for Spec generation + spec_instance = Spec.from_structured_json(response) + yaml_output = spec_instance.to_yaml_string() + + logger.info(f"[green]βœ“ [Node: {node.id}] JSON validation passed[/green]") + return WorkflowState({ + **state, + "output": yaml_output, # Clean YAML output + "structured_data": spec_instance.model_dump(exclude_none=True), + "raw_json": response, + "format_status": "converted", + "current_node": node.id, + "error_context": None + }) + if output_format == "yaml": + # Handle YAML format (existing logic) + structured_output = Spec.create_structured_output(response) + + if structured_output["validation"]["is_valid"]: + logger.info(f"[green]βœ“ [Node: {node.id}] YAML validation passed[/green]") + return WorkflowState({ + **state, + "output": structured_output["yaml_content"], + "structured_output": structured_output, + "validation_status": "valid", + "current_node": node.id, + "error_context": None + }) + error_msg = structured_output["validation"]["error"] + logger.error(f"[red]βœ— [Node: {node.id}] YAML validation failed: {error_msg}[/red]") + return WorkflowState({ + **state, + "output": response, + "structured_output": structured_output, + "validation_status": "invalid", + "validation_error": error_msg, + "current_node": node.id, + "error_context": f"YAML validation failed: {error_msg}" + }) + logger.warning(f"[yellow]⚠ [Node: {node.id}] Unknown format: {output_format}[/yellow]") + return None # Continue with normal processing + + except Exception as e: + logger.exception(f"[red]βœ— [Node: {node.id}] Structured output error: {e!s}[/red]") + return WorkflowState({ + **state, + "output": response, + "format_status": "error", + "format_error": str(e), + "current_node": node.id, + "error_context": f"Structured output error: {e!s}" + }) + def node_fn(state: WorkflowState) -> WorkflowState: try: current_iter_display = (state.get("iteration_count") or 0) + 1 @@ -155,32 +339,10 @@ def node_fn(state: WorkflowState) -> WorkflowState: user_provided_input = state.get("input", "") - final_prompt_to_llm: str - if prompt_template_str: - # Support multiple state fields in template - template_kwargs = { - "input": user_provided_input, - "output": state.get("output", ""), - "iteration_count": state.get("iteration_count", 0), - "evaluation_score": state.get("evaluation_score", 0.0), - # Allow attribute-style access such as {state.output} with safe fallback - "state": SafeNamespace(state), - } + # Prepare prompt using helper function + final_prompt_to_llm = _prepare_prompt_template(state, prompt_template_str, user_provided_input) - try: - final_prompt_to_llm = prompt_template_str.format(**template_kwargs) - except KeyError as e: - logger.warning(f"[yellow]⚠ [Node: {node.id}] Template variable {e} not found in state, using partial formatting[/yellow]") - # Fall back to just input formatting for compatibility - if "{input}" in prompt_template_str: - final_prompt_to_llm = prompt_template_str.format(input=user_provided_input) - else: - final_prompt_to_llm = prompt_template_str - if user_provided_input: - final_prompt_to_llm += "\n\nUser Input: " + user_provided_input - elif user_provided_input: - final_prompt_to_llm = user_provided_input - else: + if not final_prompt_to_llm and not user_provided_input: error_msg = f"Node {node.id} (type: {node.kind}) has no prompt template in config and no 'input' in state. Cannot proceed." logger.error(f"[red]βœ— [Node: {node.id}] {error_msg}[/red]") return WorkflowState({ @@ -194,63 +356,17 @@ def node_fn(state: WorkflowState) -> WorkflowState: response = llm_client.generate(final_prompt_to_llm) logger.info(f"[dim][Node: {node.id}] Response: {response[:50]}...[/dim]") + # Clean and validate response for nodes that expect JSON + output_key = node.config.get("output_key") + if output_key and "json" in str(node.config.get("prompt", "")).lower(): + response = _clean_json_response(response) + # Check if this node has a structured output format output_format = node.config.get("format") if output_format: - logger.info(f"[blue][Node: {node.id}] Processing {output_format} format[/blue]") - try: - if output_format == "json": - # Handle JSON structured output for Spec generation - spec_instance = Spec.from_structured_json(response) - yaml_output = spec_instance.to_yaml_string() - - logger.info(f"[green]βœ“ [Node: {node.id}] JSON validation passed[/green]") - return WorkflowState({ - **state, - "output": yaml_output, # Clean YAML output - "structured_data": spec_instance.model_dump(exclude_none=True), - "raw_json": response, - "format_status": "converted", - "current_node": node.id, - "error_context": None - }) - if output_format == "yaml": - # Handle YAML format (existing logic) - structured_output = Spec.create_structured_output(response) - - if structured_output["validation"]["is_valid"]: - logger.info(f"[green]βœ“ [Node: {node.id}] YAML validation passed[/green]") - return WorkflowState({ - **state, - "output": structured_output["yaml_content"], - "structured_output": structured_output, - "validation_status": "valid", - "current_node": node.id, - "error_context": None - }) - error_msg = structured_output["validation"]["error"] - logger.error(f"[red]βœ— [Node: {node.id}] YAML validation failed: {error_msg}[/red]") - return WorkflowState({ - **state, - "output": response, - "structured_output": structured_output, - "validation_status": "invalid", - "validation_error": error_msg, - "current_node": node.id, - "error_context": f"YAML validation failed: {error_msg}" - }) - logger.warning(f"[yellow]⚠ [Node: {node.id}] Unknown format: {output_format}[/yellow]") - - except Exception as e: - logger.exception(f"[red]βœ— [Node: {node.id}] Structured output error: {e!s}[/red]") - return WorkflowState({ - **state, - "output": response, - "format_status": "error", - "format_error": str(e), - "current_node": node.id, - "error_context": f"Structured output error: {e!s}" - }) + structured_result = _handle_structured_output(response, output_format, state) + if structured_result is not None: + return structured_result if node.id == "breakdown_worker": current_iteration_for_node = state.get("iteration_count") or 0 @@ -262,12 +378,22 @@ def node_fn(state: WorkflowState) -> WorkflowState: "error_context": None }) - return WorkflowState({ + # Handle output_key for custom state field assignment + result_state = { **state, # Preserve all existing state "output": response, "current_node": node.id, "error_context": None - }) + } + + # If node has output_key, store response in dynamic_state + output_key = node.config.get("output_key") + if output_key and isinstance(output_key, str): + if "dynamic_state" not in result_state or result_state["dynamic_state"] is None: + result_state["dynamic_state"] = {} + result_state["dynamic_state"][output_key] = response + + return WorkflowState(result_state) except Exception as e: logger.exception(f"[red]βœ— [Node: {node.id}] LLM error: {e!s}[/red]") # Preserve original state from before this node's execution on error @@ -646,22 +772,23 @@ def node_fn(state: WorkflowState) -> WorkflowState: def make_branch_node(node: Any) -> NodeFunction: """Creates a node function for implementing branching logic within the workflow. - Note: This function currently returns a placeholder implementation. - A proper implementation would involve evaluating conditions based on `WorkflowState` - and returning a string that dictates the next node or path in the graph. + Branch nodes are pass-through nodes that preserve the previous node's output + while allowing conditional routing based on their configuration. The actual + routing logic is handled by the graph edges, not the node function itself. Args: node: The node configuration object. Specific attributes relevant to branching (e.g., conditions, target nodes) would be defined here in a full implementation. Returns: - A node function that, when implemented, performs branching based on `WorkflowState`. + A node function that passes through the previous node's output unchanged. """ def node_fn(state: WorkflowState) -> WorkflowState: - # TODO: Implement actual branching logic + # Branch nodes are pass-through - preserve previous output for routing return { **state, - "output": f"Branch result for: {state['input']}" + "current_node": getattr(node, "id", "branch_node"), + "error_context": None } return node_fn @@ -1161,6 +1288,8 @@ class WorkflowStateSchema(BaseModel): raw_json: str | None = None format_status: str | None = None # 'converted', 'error', or None format_error: str | None = None + # Dynamic fields for output_key support + dynamic_state: dict[str, Any] | None = None # Create a new graph with explicit state schema graph = StateGraph( diff --git a/src/elf0/core/nodes/mcp_node.py b/src/elf0/core/nodes/mcp_node.py index c67ab9e..60db00a 100644 --- a/src/elf0/core/nodes/mcp_node.py +++ b/src/elf0/core/nodes/mcp_node.py @@ -52,9 +52,82 @@ async def execute(self, state: dict[str, Any]) -> dict[str, Any]: finally: await self.client.disconnect() + def _extract_json_from_dynamic_state(self, dynamic_state: dict, json_key: str) -> tuple[Any, bool]: + """Extract JSON value from dynamic_state.""" + import json + if not dynamic_state or not isinstance(dynamic_state, dict): + return None, False + + for dyn_value in dynamic_state.values(): + if isinstance(dyn_value, str): + try: + # Try to parse as JSON + cleaned_value = dyn_value.strip() + if cleaned_value.startswith("```json"): + cleaned_value = cleaned_value[7:] + if cleaned_value.startswith("```"): + cleaned_value = cleaned_value[3:] + if cleaned_value.endswith("```"): + cleaned_value = cleaned_value[:-3] + cleaned_value = cleaned_value.strip() + + # Handle malformed responses + QUOTED_STRING_COUNT = 2 + if cleaned_value.startswith('"') and cleaned_value.endswith('"') and cleaned_value.count('"') == QUOTED_STRING_COUNT: + # Likely malformed: "youtube_url" instead of {"youtube_url": "value"} + continue + + parsed = json.loads(cleaned_value) + if isinstance(parsed, dict) and json_key in parsed: + return parsed[json_key], True + except (json.JSONDecodeError, ValueError): + continue + return None, False + + def _extract_json_from_output(self, output: str, json_key: str) -> tuple[Any, bool]: + """Extract JSON value from output string.""" + import json + if not isinstance(output, str): + return None, False + + # Try to find JSON in the output + start = output.find("{") + end = output.rfind("}") + 1 + if start != -1 and end != 0: + try: + json_str = output[start:end] + parsed = json.loads(json_str) + return parsed.get(json_key), True + except (json.JSONDecodeError, ValueError): + pass + return None, False + + def _handle_json_parameter(self, key: str, json_key: str, state: dict[str, Any]) -> Any: + """Handle JSON parameter extraction with fallbacks.""" + import json + try: + # First try to find JSON in dynamic_state (new system) + dynamic_state = state.get("dynamic_state", {}) + value, found = self._extract_json_from_dynamic_state(dynamic_state, json_key) + if found: + return value + + # Fallback to old output-based parsing if not found in dynamic_state + output = state.get("output", "{}") + value, found = self._extract_json_from_output(output, json_key) + if found: + return value + + # Final fallback - if still no JSON found, use placeholder + logger.warning(f"[yellow]⚠ MCP parameter {key}: Could not extract {json_key} from JSON, using placeholder[/yellow]") + return f"MISSING_{json_key.upper()}" + + except (json.JSONDecodeError, AttributeError) as e: + logger.warning(f"[yellow]⚠ MCP parameter {key}: JSON extraction failed ({e}), using placeholder[/yellow]") + return f"MISSING_{json_key.upper()}" + def _bind_parameters(self, state: dict[str, Any]) -> dict[str, Any]: """Enhanced parameter binding from state with JSON parsing support.""" - import json bound = {} for key, value in self.parameters.items(): if isinstance(value, str) and value.startswith("${"): @@ -63,25 +136,9 @@ def _bind_parameters(self, state: dict[str, Any]) -> dict[str, Any]: # Special handling for JSON extraction from previous output if var_name.startswith("json."): - # Extract from JSON in output field + # Extract from JSON in output field or dynamic_state json_key = var_name[5:] # Remove "json." prefix - try: - output = state.get("output", "{}") - # Clean the output to extract just JSON - if isinstance(output, str): - # Try to find JSON in the output - start = output.find("{") - end = output.rfind("}") + 1 - if start != -1 and end != 0: - json_str = output[start:end] - parsed = json.loads(json_str) - bound[key] = parsed.get(json_key, value) - else: - bound[key] = value - else: - bound[key] = value - except (json.JSONDecodeError, AttributeError): - bound[key] = value + bound[key] = self._handle_json_parameter(key, json_key, state) else: bound[key] = state.get(var_name, value) else: diff --git a/src/elf0/functions/utils.py b/src/elf0/functions/utils.py index 73f18bf..7036f76 100644 --- a/src/elf0/functions/utils.py +++ b/src/elf0/functions/utils.py @@ -121,6 +121,13 @@ def get_user_input(state: WorkflowState, prompt: str = "Please provide input:") """ console = Console(stderr=True) + # Use question from state if available and no custom prompt provided + if prompt == "Please provide input:": + if "question" in state: + prompt = state["question"] + elif "output" in state: + prompt = state["output"] + # Display the LLM's question with professional styling console.print("\n[bold blue]Assistant:[/bold blue]") console.print(prompt) diff --git a/src/elf0/utils/file_utils.py b/src/elf0/utils/file_utils.py index a9eab0f..f6cde8c 100644 --- a/src/elf0/utils/file_utils.py +++ b/src/elf0/utils/file_utils.py @@ -10,6 +10,88 @@ def is_valid_file(path: Path) -> bool: """Check if a path exists and is a file.""" return path.exists() and path.is_file() +def is_valid_directory(path: Path) -> bool: + """Check if a path exists and is a directory.""" + return path.exists() and path.is_dir() + +def is_relevant_file(path: Path) -> bool: + """Check if a file should be included in directory scanning.""" + # Skip hidden files + if path.name.startswith("."): + return False + + # Define relevant extensions + relevant_exts = { + ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h", + ".rs", ".go", ".rb", ".php", ".sh", ".sql", ".r", ".scala", ".kt", + ".json", ".yaml", ".yml", ".xml", ".toml", ".ini", ".env", ".cfg", + ".md", ".rst", ".txt", ".adoc" + } + + # Skip known binary extensions + binary_exts = { + ".pyc", ".pyo", ".class", ".exe", ".dll", ".so", ".o", ".a", + ".zip", ".tar", ".gz", ".7z", ".rar", ".pdf", ".jpg", ".jpeg", + ".png", ".gif", ".svg", ".mp4", ".avi", ".mp3", ".wav" + } + + suffix = path.suffix.lower() + + if suffix in relevant_exts: + return True + if suffix in binary_exts: + return False + + # Handle extensionless files + if not suffix and path.is_file(): + try: + # Simple size check + if path.stat().st_size > 1024 * 1024: # 1MB + return False + # Basic text detection + with path.open("rb") as f: + sample = f.read(min(1024, path.stat().st_size)) + if not sample: + return False + # Check if mostly printable ASCII characters + ascii_printable_start = 32 # Space character + ascii_printable_end = 126 # Tilde character + ascii_tab = 9 + ascii_newline = 10 + ascii_carriage_return = 13 + text_threshold = 0.7 + + printable_chars = sum(1 for b in sample + if ascii_printable_start <= b <= ascii_printable_end + or b in (ascii_tab, ascii_newline, ascii_carriage_return)) + return printable_chars / len(sample) > text_threshold + except (OSError, UnicodeDecodeError): + return False + + return False + +def get_directory_files(directory: Path, max_files: int = 5) -> list[Path]: + """Get relevant files from a directory (non-recursive).""" + relevant_files = [] + + try: + for item in directory.iterdir(): + if item.is_file() and is_relevant_file(item): + relevant_files.append(item) + if len(relevant_files) >= max_files: + logger.warning(f"Directory '@{directory}' contains many files. " + f"Only including first {max_files} relevant files.") + break + + return sorted(relevant_files, key=lambda p: p.name.lower()) + + except PermissionError: + logger.warning(f"Permission denied accessing directory '@{directory}'") + return [] + except OSError as e: + logger.warning(f"Could not read directory '@{directory}': {e}") + return [] + def read_files_content(files: list[Path]) -> str: """Read content from a list of files. @@ -24,7 +106,12 @@ def read_files_content(files: list[Path]) -> str: try: current_path = Path(file_path) # Ensure it's a Path object with current_path.open(encoding="utf-8") as f: - content_parts.append(f"Content of {current_path.name}:\n{f.read()}\n---") + # Show directory context for files from directories + if len(str(current_path.parent)) > 1: # Not just "." + header = f"Content of {current_path.parent}/{current_path.name}" + else: + header = f"Content of {current_path.name}" + content_parts.append(f"{header}:\n{f.read()}\n---") except OSError as e: logger.warning(f"Could not read context file '{file_path}': {e}. Skipping.") return "\n".join(content_parts) @@ -122,8 +209,13 @@ def parse_at_references(prompt: str) -> tuple[str, list[Path]]: path = Path(match) if is_valid_file(path): referenced_files_set.add(path) + elif is_valid_directory(path): + directory_files = get_directory_files(path) + referenced_files_set.update(directory_files) + if directory_files: + logger.info(f"Directory '@{match}' expanded to {len(directory_files)} files") else: - logger.warning(f"Referenced file '@{match}' not found or is not a file. Skipping.") + logger.warning(f"Referenced path '@{match}' not found. Skipping.") # Convert set to list for consistent return type, sort for deterministic order if needed referenced_files = sorted(referenced_files_set, key=lambda p: str(p)) @@ -136,26 +228,42 @@ def parse_at_references(prompt: str) -> tuple[str, list[Path]]: return cleaned_prompt, referenced_files # Helper function to list spec files -def list_spec_files(specs_dir: Path) -> list[Path]: - """Lists all YAML spec files (.yaml or .yml) directly in the given directory. - - Ignores subdirectories. +def list_spec_files(specs_dir: Path, directory_filter: str | None = None) -> list[Path]: + """Lists YAML spec files with optional directory filtering. Args: - specs_dir: The Path object representing the directory to scan. + specs_dir: The Path to the specs directory + directory_filter: None for all directories (excluding archive), or specific subdirectory name Returns: - A list of Path objects for spec files, sorted alphabetically. - Returns an empty list if the directory doesn't exist or is not a directory. + List of Path objects for matching spec files """ if not specs_dir.exists() or not specs_dir.is_dir(): logger.debug(f"Specs directory '{specs_dir}' does not exist or is not a directory.") return [] - spec_files = [ - item for item in specs_dir.iterdir() - if item.is_file() and item.suffix.lower() in (".yaml", ".yml") - ] + spec_files = [] + + if directory_filter is None: + # Recursive scan - get files from all subdirectories and root, excluding archive + spec_files_set = set() + for item in specs_dir.rglob("*.yaml"): + if item.is_file() and "archive" not in item.parts: + spec_files_set.add(item) + for item in specs_dir.rglob("*.yml"): + if item.is_file() and "archive" not in item.parts: + spec_files_set.add(item) + spec_files = list(spec_files_set) + else: + # Single directory scan + target_dir = specs_dir / directory_filter + if target_dir.exists() and target_dir.is_dir(): + for item in target_dir.iterdir(): + if item.is_file() and item.suffix.lower() in (".yaml", ".yml"): + spec_files.append(item) + else: + logger.warning(f"Directory filter '{directory_filter}' not found in '{specs_dir}'") + return [] return sorted(spec_files, key=lambda p: p.name) diff --git a/uv.lock b/uv.lock index 8bb35fd..d17e89b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,12 @@ version = 1 requires-python = ">=3.13" +[manifest] +members = [ + "elf0", + "youtube-transcript-mcp", +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -204,6 +210,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/b8/7ddd1e8ba9701dea08ce22029917140e6f66a859427406579fd8d0ca7274/coverage-7.9.1-py3-none-any.whl", hash = "sha256:66b974b145aa189516b6bf2d8423e888b742517d37872f6ee4c5be0073bd9a3c", size = 204000 }, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 }, +] + [[package]] name = "distlib" version = "0.3.9" @@ -1440,6 +1455,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/ee/518b72faa2073f5aa8e3262408d284892cb79cf2754ba0c3a5870645ef73/xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b", size = 26801 }, ] +[[package]] +name = "youtube-transcript-api" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "defusedxml" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/dd/10d413b20a2d14fa483853d0f6d920a0a0a6887d7c60167e4641733f99fb/youtube_transcript_api-1.1.0.tar.gz", hash = "sha256:786d9e64bd7fffee0dbc1471a61a798cebdc379b9cf8f7661d3664e831fcc1a5", size = 470144 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/69/63f1b9f96a9d3b6bd35288fe27f987c41bd157e47b3d07ca025549e3f8e6/youtube_transcript_api-1.1.0-py3-none-any.whl", hash = "sha256:876ac42b1e3f8cc99b81d8fd810bd74ed07511e51dff5db50e714e3156ad3595", size = 485739 }, +] + +[[package]] +name = "youtube-transcript-mcp" +version = "0.1.0" +source = { editable = "mcp/youtube-transcript" } +dependencies = [ + { name = "youtube-transcript-api" }, +] + +[package.metadata] +requires-dist = [{ name = "youtube-transcript-api", specifier = ">=1.1.0" }] + [[package]] name = "zstandard" version = "0.23.0"