From 5e028194f8d3dd7967feae37a6fc910e9eff07a1 Mon Sep 17 00:00:00 2001
From: LM <lalvarezt89@gmail.com>
Date: Wed, 5 Nov 2025 17:23:52 +0100
Subject: [PATCH 1/5] feat(bench): new bechmarking tool

---
 .github/workflows/bench-command.yml   | 620 ++++++++++++++++++++
 .github/workflows/benchmark.yml       |  19 -
 .github/workflows/cd.yml              |   3 +-
 .github/workflows/ci.yml              |   4 +-
 .gitignore                            |   6 +
 Cargo.lock                            | 204 ++++++-
 Cargo.toml                            |  10 +
 README.md                             |   1 -
 docs/benchmarking.md                  | 783 --------------------------
 docs/command-line-options.md          |   2 -
 docs/template-system.md               |  11 +-
 scripts/README.md                     | 346 ++++++++++++
 scripts/analyze_all_templates.sh      | 205 +++++++
 scripts/compare_benchmark_versions.sh | 230 ++++++++
 scripts/compare_template_results.py   | 274 +++++++++
 scripts/compile_benchmark_versions.sh | 348 ++++++++++++
 src/bin/bench-throughput.rs           | 694 +++++++++++++++++++++++
 17 files changed, 2936 insertions(+), 824 deletions(-)
 create mode 100644 .github/workflows/bench-command.yml
 delete mode 100644 .github/workflows/benchmark.yml
 delete mode 100644 docs/benchmarking.md
 create mode 100644 scripts/README.md
 create mode 100755 scripts/analyze_all_templates.sh
 create mode 100755 scripts/compare_benchmark_versions.sh
 create mode 100755 scripts/compare_template_results.py
 create mode 100755 scripts/compile_benchmark_versions.sh
 create mode 100644 src/bin/bench-throughput.rs
diff --git a/.github/workflows/bench-command.yml b/.github/workflows/bench-command.yml
new file mode 100644
index 0000000..6123465
--- /dev/null
+++ b/.github/workflows/bench-command.yml
@@ -0,0 +1,620 @@
+name: Benchmark Command
+# Trigger on-demand benchmarks via PR comments
+# Usage: /bench <ref1> <ref2> [size] [warmup] [runs]
+# Examples:
+#   /bench main v0.13.0
+#   /bench abc12345 def56789 50000
+#   /bench main HEAD 50000 5 20
+# Only repository owner can trigger this command
+
+on:
+  issue_comment:
+    types: [created]
+
+# Prevent concurrent benchmark runs on the same PR
+concurrency:
+  group: bench-${{ github.event.issue.number }}
+  cancel-in-progress: true
+
+jobs:
+  check-permission:
+    name: Check Command Permission
+    # Only run on PR comments (not regular issues)
+    if: |
+      github.event.issue.pull_request &&
+      startsWith(github.event.comment.body, '/bench ')
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    outputs:
+      authorized: ${{ steps.check.outputs.authorized }}
+      ref1: ${{ steps.parse.outputs.ref1 }}
+      ref2: ${{ steps.parse.outputs.ref2 }}
+      size: ${{ steps.parse.outputs.size }}
+      warmup: ${{ steps.parse.outputs.warmup }}
+      runs: ${{ steps.parse.outputs.runs }}
+    steps:
+      - name: Check if commenter is repo owner
+        id: check
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const commenter = context.payload.comment.user.login;
+            const owner = context.payload.repository.owner.login;
+            const isOwner = commenter === owner;
+
+            console.log(`Commenter: ${commenter}`);
+            console.log(`Repository owner: ${owner}`);
+            console.log(`Is owner: ${isOwner}`);
+
+            if (!isOwner) {
+              await github.rest.reactions.createForIssueComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: context.payload.comment.id,
+                content: '-1'
+              });
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: `❌ **Permission denied**: Only @${owner} can trigger benchmark comparisons.`
+              });
+            } else {
+              await github.rest.reactions.createForIssueComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: context.payload.comment.id,
+                content: 'eyes'
+              });
+            }
+
+            core.setOutput('authorized', isOwner);
+
+      - name: Parse benchmark command
+        id: parse
+        if: steps.check.outputs.authorized == 'true'
+        continue-on-error: true
+        run: |
+          set -euo pipefail
+          COMMENT="${{ github.event.comment.body }}"
+
+          # Parse command: /bench ref1 ref2 [size] [warmup] [runs]
+          # Remove /bench prefix and extract parameters
+          PARAMS=$(echo "$COMMENT" | sed 's|^/bench[[:space:]]*||')
+
+          # Extract parameters
+          REF1=$(echo "$PARAMS" | awk '{print $1}')
+          REF2=$(echo "$PARAMS" | awk '{print $2}')
+          SIZE=$(echo "$PARAMS" | awk '{print $3}')
+          WARMUP=$(echo "$PARAMS" | awk '{print $4}')
+          RUNS=$(echo "$PARAMS" | awk '{print $5}')
+
+          # Validate required parameters
+          if [ -z "$REF1" ] || [ -z "$REF2" ]; then
+            echo "error=Invalid format. Missing required parameters." >> $GITHUB_OUTPUT
+            echo "parse_failed=true" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Set defaults for optional parameters
+          if [ -z "$SIZE" ] || ! [[ "$SIZE" =~ ^[0-9]+$ ]]; then
+            SIZE=10000
+          fi
+
+          if [ -z "$WARMUP" ] || ! [[ "$WARMUP" =~ ^[0-9]+$ ]]; then
+            WARMUP=5
+          fi
+
+          if [ -z "$RUNS" ] || ! [[ "$RUNS" =~ ^[0-9]+$ ]]; then
+            RUNS=50
+          fi
+
+          echo "ref1=$REF1" >> $GITHUB_OUTPUT
+          echo "ref2=$REF2" >> $GITHUB_OUTPUT
+          echo "size=$SIZE" >> $GITHUB_OUTPUT
+          echo "warmup=$WARMUP" >> $GITHUB_OUTPUT
+          echo "runs=$RUNS" >> $GITHUB_OUTPUT
+          echo "parse_failed=false" >> $GITHUB_OUTPUT
+
+          echo "Parsed parameters:"
+          echo "  ref1: $REF1"
+          echo "  ref2: $REF2"
+          echo "  size: $SIZE"
+          echo "  warmup: $WARMUP"
+          echo "  runs: $RUNS"
+
+      - name: Post parse error
+        if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'failure'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'confused'
+            });
+
+            const errorMessage = [
+              '❌ **Invalid command format**',
+              '',
+              '**Usage:** `/bench <ref1> <ref2> [size] [warmup] [runs]`',
+              '',
+              '**Examples:**',
+              '```',
+              '/bench main v0.13.0',
+              '/bench abc12345 def56789 50000',
+              '/bench main HEAD 50000 5',
+              '/bench main HEAD 50000 5 20',
+              '```',
+              '',
+              '**Parameters:**',
+              '- `ref1` (required): Baseline git reference',
+              '- `ref2` (required): Current git reference',
+              '- `size` (optional): Input size (default: 10000)',
+              '- `warmup` (optional): Warmup runs (default: 5)',
+              '- `runs` (optional): Benchmark runs (default: 50)',
+              '',
+              '**Note:** This runs all 28 predefined templates with a single input size.',
+              'For detailed per-template analysis with hyperfine, use the local tools.'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: errorMessage
+            });
+
+      - name: Post acknowledgment
+        if: steps.check.outputs.authorized == 'true' && steps.parse.outcome == 'success'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ steps.parse.outputs.ref1 }}';
+            const ref2 = '${{ steps.parse.outputs.ref2 }}';
+            const size = '${{ steps.parse.outputs.size }}';
+            const warmup = '${{ steps.parse.outputs.warmup }}';
+            const runs = '${{ steps.parse.outputs.runs }}';
+
+            const message = [
+              '🚀 **Benchmark comparison started**',
+              '',
+              '**Comparing:**',
+              `- **Baseline**: \`${ref1}\``,
+              `- **Current**: \`${ref2}\``,
+              '',
+              '**Parameters:**',
+              `- **Size**: ${size} paths`,
+              `- **Warmup**: ${warmup} runs`,
+              `- **Runs**: ${runs} measurements`,
+              `- **Templates**: All 28 predefined templates`,
+              '',
+              'Results will be posted here when complete...'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+  run-benchmarks:
+    name: Run Benchmark Comparison
+    needs: check-permission
+    if: needs.check-permission.outputs.authorized == 'true' && needs.check-permission.outputs.ref1 != ''
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Need full history to access all refs
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache Rust dependencies
+        uses: Swatinem/rust-cache@v2
+
+      - name: Install hyperfine
+        run: |
+          wget https://github.com/sharkdp/hyperfine/releases/download/v1.18.0/hyperfine_1.18.0_amd64.deb
+          sudo dpkg -i hyperfine_1.18.0_amd64.deb
+          hyperfine --version
+
+      - name: Fetch refs from remote
+        run: |
+          set -euo pipefail
+          REF1="${{ needs.check-permission.outputs.ref1 }}"
+          REF2="${{ needs.check-permission.outputs.ref2 }}"
+
+          echo "Fetching ref1: $REF1"
+          git fetch origin "$REF1" || git fetch origin "refs/tags/$REF1" || git fetch origin "refs/heads/$REF1" || true
+
+          echo "Fetching ref2: $REF2"
+          git fetch origin "$REF2" || git fetch origin "refs/tags/$REF2" || git fetch origin "refs/heads/$REF2" || true
+
+          # Update remote refs
+          git fetch origin --tags
+
+      - name: Validate and order refs
+        id: validate
+        run: |
+          set -euo pipefail
+          REF1="${{ needs.check-permission.outputs.ref1 }}"
+          REF2="${{ needs.check-permission.outputs.ref2 }}"
+
+          # Validate both refs exist
+          if ! git rev-parse --verify "$REF1" >/dev/null 2>&1; then
+            echo "error=Ref '$REF1' not found" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          if ! git rev-parse --verify "$REF2" >/dev/null 2>&1; then
+            echo "error=Ref '$REF2' not found" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Resolve to full SHAs
+          SHA1=$(git rev-parse "$REF1")
+          SHA2=$(git rev-parse "$REF2")
+
+          # Check if both refs resolve to the same commit
+          if [ "$SHA1" = "$SHA2" ]; then
+            echo "same_commit=true" >> $GITHUB_OUTPUT
+            echo "ref1_sha=$(git rev-parse --short=8 $REF1)" >> $GITHUB_OUTPUT
+            echo "ref2_sha=$(git rev-parse --short=8 $REF2)" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          echo "same_commit=false" >> $GITHUB_OUTPUT
+
+          # Determine which is older (baseline) and newer (current)
+          # Get commit timestamps
+          TIMESTAMP1=$(git log -1 --format=%ct "$SHA1")
+          TIMESTAMP2=$(git log -1 --format=%ct "$SHA2")
+
+          if [ "$TIMESTAMP1" -le "$TIMESTAMP2" ]; then
+            # REF1 is older or same age -> baseline
+            BASELINE_REF="$REF1"
+            BASELINE_SHA="$SHA1"
+            CURRENT_REF="$REF2"
+            CURRENT_SHA="$SHA2"
+          else
+            # REF2 is older -> baseline
+            BASELINE_REF="$REF2"
+            BASELINE_SHA="$SHA2"
+            CURRENT_REF="$REF1"
+            CURRENT_SHA="$SHA1"
+          fi
+
+          # Output the determined ordering
+          echo "baseline_ref=$BASELINE_REF" >> $GITHUB_OUTPUT
+          echo "baseline_sha=$(git rev-parse --short=8 $BASELINE_SHA)" >> $GITHUB_OUTPUT
+          echo "current_ref=$CURRENT_REF" >> $GITHUB_OUTPUT
+          echo "current_sha=$(git rev-parse --short=8 $CURRENT_SHA)" >> $GITHUB_OUTPUT
+
+          # Keep original refs for display
+          echo "ref1_sha=$(git rev-parse --short=8 $REF1)" >> $GITHUB_OUTPUT
+          echo "ref2_sha=$(git rev-parse --short=8 $REF2)" >> $GITHUB_OUTPUT
+
+          echo "Determined ordering:"
+          echo "  Baseline (older): $BASELINE_REF ($BASELINE_SHA)"
+          echo "  Current (newer):  $CURRENT_REF ($CURRENT_SHA)"
+
+      - name: Handle same commit case
+        if: steps.validate.outputs.same_commit == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ needs.check-permission.outputs.ref1 }}';
+            const ref2 = '${{ needs.check-permission.outputs.ref2 }}';
+            const sha = '${{ steps.validate.outputs.ref1_sha }}';
+
+            const message = [
+              '⚠️ **Same commit detected**',
+              '',
+              `Both \`${ref1}\` and \`${ref2}\` resolve to the same commit: \`${sha}\``,
+              '',
+              'No benchmark comparison needed - the refs are identical.',
+              '',
+              '**Tip:** To compare different versions, use refs that point to different commits.'
+            ].join('\n');
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'eyes'
+            });
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+      - name: Check benchmark tool exists in baseline
+        if: steps.validate.outputs.same_commit == 'false'
+        id: check_baseline_tool
+        run: |
+          set -euo pipefail
+          BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}"
+          echo "Checking out $BASELINE_REF..."
+          git checkout "$BASELINE_REF"
+
+          # Check if bench-throughput binary is defined in Cargo.toml
+          if ! grep -q 'name = "bench-throughput"' Cargo.toml 2>/dev/null; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool 'bench-throughput' not found in $BASELINE_REF"
+            exit 0
+          fi
+
+          # Check if the source file exists
+          if ! grep -A 2 'name = "bench-throughput"' Cargo.toml | grep -q 'path.*='; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool source file not found in $BASELINE_REF"
+            exit 0
+          fi
+
+          echo "exists=true" >> $GITHUB_OUTPUT
+          echo "✓ Benchmark tool found in $BASELINE_REF"
+
+      - name: Check benchmark tool exists in current
+        if: steps.validate.outputs.same_commit == 'false'
+        id: check_current_tool
+        run: |
+          set -euo pipefail
+          CURRENT_REF="${{ steps.validate.outputs.current_ref }}"
+          echo "Checking out $CURRENT_REF..."
+          git checkout "$CURRENT_REF"
+
+          # Check if bench-throughput binary is defined in Cargo.toml
+          if ! grep -q 'name = "bench-throughput"' Cargo.toml 2>/dev/null; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool 'bench-throughput' not found in $CURRENT_REF"
+            exit 0
+          fi
+
+          # Check if the source file exists
+          if ! grep -A 2 'name = "bench-throughput"' Cargo.toml | grep -q 'path.*='; then
+            echo "exists=false" >> $GITHUB_OUTPUT
+            echo "❌ Benchmark tool source file not found in $CURRENT_REF"
+            exit 0
+          fi
+
+          echo "exists=true" >> $GITHUB_OUTPUT
+          echo "✓ Benchmark tool found in $CURRENT_REF"
+
+      - name: Post missing tool error
+        if: steps.validate.outputs.same_commit == 'false' && (steps.check_baseline_tool.outputs.exists == 'false' || steps.check_current_tool.outputs.exists == 'false')
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const baseline_ref = '${{ steps.validate.outputs.baseline_ref }}';
+            const current_ref = '${{ steps.validate.outputs.current_ref }}';
+            const baseline_exists = '${{ steps.check_baseline_tool.outputs.exists }}' === 'true';
+            const current_exists = '${{ steps.check_current_tool.outputs.exists }}' === 'true';
+
+            let message = '❌ **Benchmark comparison failed**\n\n';
+            message += '**Reason**: The benchmark tool (`bench-throughput`) does not exist in ';
+
+            if (!baseline_exists && !current_exists) {
+              message += `both refs:\n- \`${baseline_ref}\` (baseline/older)\n- \`${current_ref}\` (current/newer)`;
+            } else if (!baseline_exists) {
+              message += `baseline ref: \`${baseline_ref}\` (older commit)`;
+            } else {
+              message += `current ref: \`${current_ref}\` (newer commit)`;
+            }
+
+            message += '\n\n**Solution**: Ensure both refs contain the benchmark tool.';
+            message += '\n\n**Example**: `/bench main HEAD`';
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'confused'
+            });
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: message
+            });
+
+      - name: Build baseline benchmark tool
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          BASELINE_REF="${{ steps.validate.outputs.baseline_ref }}"
+
+          echo "Checking out baseline: $BASELINE_REF..."
+          git checkout "$BASELINE_REF"
+
+          echo "Building benchmark tool..."
+          if ! cargo build --release --bin bench-throughput 2>&1 | tee build_baseline.log; then
+            echo "❌ Failed to build benchmark tool for $BASELINE_REF"
+            exit 1
+          fi
+
+          # Save binary with unique name
+          cp target/release/bench-throughput bench-throughput-baseline
+          echo "✓ Built baseline benchmark tool"
+
+      - name: Build current benchmark tool
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          CURRENT_REF="${{ steps.validate.outputs.current_ref }}"
+
+          echo "Checking out current: $CURRENT_REF..."
+          git checkout "$CURRENT_REF"
+
+          # Rebuild in case dependencies changed
+          echo "Building benchmark tool..."
+          if ! cargo build --release --bin bench-throughput 2>&1 | tee build_current.log; then
+            echo "❌ Failed to build benchmark tool for $CURRENT_REF"
+            exit 1
+          fi
+
+          # Save binary with unique name
+          cp target/release/bench-throughput bench-throughput-current
+          echo "✓ Built current benchmark tool"
+
+      - name: Run benchmarks
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        run: |
+          set -euo pipefail
+          SIZE="${{ needs.check-permission.outputs.size }}"
+          WARMUP="${{ needs.check-permission.outputs.warmup }}"
+          RUNS="${{ needs.check-permission.outputs.runs }}"
+          BASELINE_SHA="${{ steps.validate.outputs.baseline_sha }}"
+          CURRENT_SHA="${{ steps.validate.outputs.current_sha }}"
+
+          echo "Running benchmark comparison..."
+          echo "  Baseline: $BASELINE_SHA"
+          echo "  Current:  $CURRENT_SHA"
+          echo "  Size:     $SIZE paths"
+          echo "  Warmup:   $WARMUP runs"
+          echo "  Runs:     $RUNS measurements"
+          echo ""
+
+          # Run hyperfine with markdown export
+          hyperfine \
+            --warmup "$WARMUP" \
+            --runs "$RUNS" \
+            --export-markdown comparison_results.md \
+            --command-name "baseline ($BASELINE_SHA)" \
+            "./bench-throughput-baseline --template all --size $SIZE --output /dev/null" \
+            --command-name "current ($CURRENT_SHA)" \
+            "./bench-throughput-current --template all --size $SIZE --output /dev/null"
+
+          echo "✓ Benchmark comparison complete"
+
+      - name: Post results to PR
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const comparison_results = fs.readFileSync('comparison_results.md', 'utf8');
+            const baseline_sha = '${{ steps.validate.outputs.baseline_sha }}';
+            const current_sha = '${{ steps.validate.outputs.current_sha }}';
+            const size = '${{ needs.check-permission.outputs.size }}';
+            const warmup = '${{ needs.check-permission.outputs.warmup }}';
+            const runs = '${{ needs.check-permission.outputs.runs }}';
+
+            const body = [
+              '## 🔬 Benchmark Comparison Report',
+              '',
+              '**Requested by:** @${{ github.event.comment.user.login }}',
+              '',
+              '**Configuration:**',
+              `- **Baseline (older):** \`${baseline_sha}\``,
+              `- **Current (newer):** \`${current_sha}\``,
+              `- **Test:** All 28 predefined templates`,
+              `- **Input size:** ${size} paths per run`,
+              `- **Warmup:** ${warmup} runs`,
+              `- **Measurements:** ${runs} runs`,
+              '',
+              '**Results:**',
+              '',
+              comparison_results.trim(),
+              '',
+              '> **Interpretation:**',
+              '> - **Mean**: Average execution time across all runs',
+              '> - **Min/Max**: Fastest and slowest runs observed',
+              '> - **Relative**: Speed comparison (1.00 = baseline, <1.00 = faster, >1.00 = slower)',
+              '> - Each run processes all 28 templates on ${size} generated paths',
+              '',
+              '---',
+              '',
+              '<sub>Triggered by [/bench command](${{ github.event.comment.html_url }})</sub>',
+              '',
+              '**Note:** Build logs are available in the [workflow artifacts](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}).'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: body
+            });
+
+      - name: Upload benchmark artifacts
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-comparison-${{ github.event.comment.id }}
+          path: |
+            comparison_results.md
+            build_baseline.log
+            build_current.log
+          retention-days: 30
+
+      - name: Add success reaction
+        if: steps.validate.outputs.same_commit == 'false' && steps.check_baseline_tool.outputs.exists == 'true' && steps.check_current_tool.outputs.exists == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'rocket'
+            });
+
+  handle-error:
+    name: Handle Errors
+    needs: [check-permission, run-benchmarks]
+    if: failure() && needs.check-permission.outputs.authorized == 'true'
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+    steps:
+      - name: Post error message
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const ref1 = '${{ needs.check-permission.outputs.ref1 }}';
+            const ref2 = '${{ needs.check-permission.outputs.ref2 }}';
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ github.event.comment.id }},
+              content: 'confused'
+            });
+
+            const errorBody = [
+              '❌ **Benchmark comparison failed**',
+              '',
+              `Failed to compare \`${ref1}\` and \`${ref2}\`.`,
+              '',
+              '**Please check:**',
+              '- Both refs exist and are valid git references (branches, tags, or commits)',
+              '- The benchmark tool exists in both refs',
+              '- The code at those refs compiles successfully',
+              '- Parameters are in correct format: `/bench <ref1> <ref2> [size] [warmup] [runs]`',
+              '',
+              '**See the [workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.**'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: errorBody
+            });
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
deleted file mode 100644
index 459499d..0000000
--- a/.github/workflows/benchmark.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Performance Benchmarks
-on: [push, pull_request]
-
-jobs:
-  benchmark:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: dtolnay/rust-toolchain@stable
-      - name: Build benchmark tool
-        run: cargo build --release --bin string-pipeline-bench
-      - name: Run benchmarks
-        run: |
-          ./target/release/string-pipeline-bench --iterations 5000 > benchmark_results.txt
-      - name: Upload results
-        uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results
-          path: benchmark_results.json
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
index 570e3ca..943063c 100644
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -217,9 +217,8 @@ jobs:
            zsh xz-utils liblz4-tool musl-tools brotli zstd
 
       - name: Install Rust
-        uses: dtolnay/rust-toolchain@master
+        uses: dtolnay/rust-toolchain@stable
         with:
-          toolchain: nightly
           target: ${{ matrix.target }}
 
       # for some reason, the above action doesn't seem to set the target correctly
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c0ce61..67746bc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@nightly
+        uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
       - name: Run tests
         run: cargo test --locked --all-features --workspace -- --nocapture
@@ -28,7 +28,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@nightly
+        uses: dtolnay/rust-toolchain@stable
         with:
           components: rustfmt
       - uses: Swatinem/rust-cache@v2
diff --git a/.gitignore b/.gitignore
index ea8c4bf..7a9e023 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,7 @@
 /target
+
+# Benchmark results
+bench_results.json
+benchmark_results.json
+benchmark_results.txt
+comparison.md
diff --git a/Cargo.lock b/Cargo.lock
index e3af432..211cbad 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -53,7 +53,7 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -64,7 +64,7 @@ checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -189,6 +189,17 @@ version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
 
+[[package]]
+name = "comfy-table"
+version = "7.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b"
+dependencies = [
+ "crossterm 0.29.0",
+ "unicode-segmentation",
+ "unicode-width",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -256,6 +267,45 @@ version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
+[[package]]
+name = "crossterm"
+version = "0.28.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6"
+dependencies = [
+ "bitflags",
+ "crossterm_winapi",
+ "mio",
+ "parking_lot",
+ "rustix 0.38.44",
+ "signal-hook",
+ "signal-hook-mio",
+ "winapi",
+]
+
+[[package]]
+name = "crossterm"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
+dependencies = [
+ "bitflags",
+ "crossterm_winapi",
+ "document-features",
+ "parking_lot",
+ "rustix 1.0.7",
+ "winapi",
+]
+
+[[package]]
+name = "crossterm_winapi"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "crunchy"
 version = "0.2.3"
@@ -296,6 +346,15 @@ dependencies = [
  "crypto-common",
 ]
 
+[[package]]
+name = "document-features"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
+dependencies = [
+ "litrs",
+]
+
 [[package]]
 name = "either"
 version = "1.15.0"
@@ -309,7 +368,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -346,7 +405,7 @@ dependencies = [
  "cfg-if",
  "libc",
  "r-efi",
- "wasi",
+ "wasi 0.14.2+wasi-0.2.4",
 ]
 
 [[package]]
@@ -423,12 +482,24 @@ version = "0.2.172"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
 
+[[package]]
+name = "litrs"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
+
 [[package]]
 name = "lock_api"
 version = "0.4.13"
@@ -451,6 +522,18 @@ version = "2.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
 
+[[package]]
+name = "mio"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
+dependencies = [
+ "libc",
+ "log",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -662,6 +745,19 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "88f8660c1ff60292143c98d08fc6e2f654d722db50410e3f3797d40baaf9d8f3"
 
+[[package]]
+name = "rustix"
+version = "0.38.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.15",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "rustix"
 version = "1.0.7"
@@ -671,8 +767,8 @@ dependencies = [
  "bitflags",
  "errno",
  "libc",
- "linux-raw-sys",
- "windows-sys",
+ "linux-raw-sys 0.9.4",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -745,6 +841,36 @@ dependencies = [
  "digest",
 ]
 
+[[package]]
+name = "signal-hook"
+version = "0.3.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2"
+dependencies = [
+ "libc",
+ "signal-hook-registry",
+]
+
+[[package]]
+name = "signal-hook-mio"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
+dependencies = [
+ "libc",
+ "mio",
+ "signal-hook",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "smallvec"
 version = "1.15.1"
@@ -757,7 +883,9 @@ version = "0.13.0"
 dependencies = [
  "clap",
  "clap_mangen",
+ "comfy-table",
  "criterion",
+ "crossterm 0.28.1",
  "dashmap",
  "fast-strip-ansi",
  "memchr",
@@ -766,8 +894,11 @@ dependencies = [
  "pest",
  "pest_derive",
  "regex",
+ "serde",
+ "serde_json",
  "smallvec",
  "tempfile",
+ "unicode-width",
 ]
 
 [[package]]
@@ -796,8 +927,8 @@ dependencies = [
  "fastrand",
  "getrandom",
  "once_cell",
- "rustix",
- "windows-sys",
+ "rustix 1.0.7",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -848,6 +979,18 @@ version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
@@ -880,6 +1023,12 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
 [[package]]
 name = "wasi"
 version = "0.14.2+wasi-0.2.4"
@@ -957,15 +1106,43 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
 [[package]]
 name = "winapi-util"
 version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "windows-sys"
 version = "0.59.0"
@@ -975,6 +1152,15 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.52.6"
diff --git a/Cargo.toml b/Cargo.toml
index 772ee29..cb98f59 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,11 @@ parking_lot = "0.12.3"
 dashmap = "6.1.0"
 smallvec = "1.15.0"
 memchr = "2.7.4"
+crossterm = "0.28"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+comfy-table = "7.1"
+unicode-width = "0.2"
 
 [build-dependencies]
 clap = { version = "4.5.39", features = ["derive", "cargo"] }
@@ -38,6 +43,11 @@ path = "src/main.rs"
 name = "string-pipeline-bench"
 path = "src/bin/bench.rs"
 
+[[bin]]
+bench = false
+name = "bench-throughput"
+path = "src/bin/bench-throughput.rs"
+
 [profile.staging]
 inherits = "dev"
 opt-level = 3
diff --git a/README.md b/README.md
index 2ded8a0..43142f0 100644
--- a/README.md
+++ b/README.md
@@ -225,7 +225,6 @@ cargo build --release --bin bench
 [📖 Template System](docs/template-system.md)
 [⚙️  CLI Options & Usage](docs/command-line-options.md)
 [🔍 Comprehensive Debug System Guide](docs/debug-system.md)
-[📊 Performance Benchmarking Guide](docs/benchmarking.md)
 
 ## 🤝 Contributing
 
diff --git a/docs/benchmarking.md b/docs/benchmarking.md
deleted file mode 100644
index c8c41c8..0000000
--- a/docs/benchmarking.md
+++ /dev/null
@@ -1,783 +0,0 @@
-# 🏆 String Pipeline Benchmarking Tool
-
-_NOTE: what follows has mostly been assembled using AI as an experiment and as a basis for further improvements._
-
-A simple benchmarking tool that helps measure performance of string pipeline operations and provides timing information in both text and JSON formats.
-
-## 📋 Table of Contents
-
-- [🚀 Quick Start](#-quick-start)
-- [✨ Features Overview](#-features-overview)
-- [📖 Usage Guide](#-usage-guide)
-  - [Basic Usage](#basic-usage)
-  - [Command Line Options](#command-line-options)
-  - [Output Formats](#output-formats)
-- [🧪 Benchmark Categories](#-benchmark-categories)
-  - [Single Operations](#1--single-operations)
-  - [Multiple Simple Operations](#2--multiple-simple-operations)
-  - [Map Operations](#3-️-map-operations)
-  - [Complex Operations](#4--complex-operations)
-- [📊 Test Data & Methodology](#-test-data--methodology)
-- [📈 Performance Analysis](#-performance-analysis)
-  - [Basic Methods](#basic-methods)
-  - [Timing Precision](#timing-precision)
-  - [Metrics Explanation](#metrics-explanation)
-- [💼 Automated Usage](#-automated-usage)
-  - [Script Integration](#script-integration)
-  - [Performance Comparison](#performance-comparison)
-- [🔧 Development Guide](#-development-guide)
-  - [Adding New Benchmarks](#adding-new-benchmarks)
-  - [Performance Considerations](#performance-considerations)
-  - [Best Practices](#best-practices)
-- [📋 Example Results](#-example-results)
-- [⚠️ Troubleshooting](#️-troubleshooting)
-
-## 🚀 Quick Start
-
-```bash
-# Run with default settings (1000 iterations, text output)
-cargo run --bin bench
-
-# Run in release mode for better performance
-cargo run --release --bin bench
-
-# Quick test with fewer iterations
-cargo run --bin bench -- --iterations 100
-```
-
-## ✨ Features Overview
-
-- 🧪 **Test Coverage**: Tests single operations, multiple operations, map operations, and complex nested operations
-- 📊 **Basic Statistics**: Runs configurable iterations (default 1000) and calculates averages with outlier removal
-- 🏋️ **Warmup Phase**: Runs warmup iterations (10% of measurements) to help get consistent timing
-- 🎯 **Outlier Removal**: Removes top and bottom 5% of measurements to reduce noise
-- 📄 **Multiple Output Formats**: Supports both human-readable text and machine-readable JSON output
-- 🏗️ **Performance Categories**: Groups results by operation type for easier analysis
-- 📈 **Basic Metrics**: Provides average, minimum, maximum times from the filtered measurements
-- ⚡ **Automation Support**: Works well in CI/CD and automated scripts
-- 🔍 **Debug Integration**: Works with the existing debug system's timing capabilities
-
-## 📖 Usage Guide
-
-### Basic Usage
-
-| Command | Description | Use Case |
-|---------|-------------|----------|
-| `cargo run --bin bench` | Default run (1000 iterations, text) | Development testing |
-| `cargo run --release --bin bench` | Optimized build | Better performance measurements |
-| `./target/release/bench.exe` | Direct binary execution | Scripts and automation |
-
-```bash
-# 🚀 Development workflow
-cargo run --bin bench -- --iterations 100  # Quick test
-
-# 🔄 More thorough testing
-cargo build --release --bin bench
-./target/release/bench --iterations 5000 --format json > results.json
-```
-
-### Command Line Options
-
-| Option | Short | Default | Description |
-|--------|-------|---------|-------------|
-| `--iterations` | `-n` | `1000` | Number of iterations per benchmark |
-| `--format` | `-f` | `text` | Output format: `text` or `json` |
-| `--help` | `-h` | - | Show help information |
-| `--version` | `-V` | - | Show version information |
-
-**Examples:**
-
-```bash
-# 📊 Better accuracy (more iterations)
-cargo run --bin bench -- --iterations 2000
-
-# 🤖 Machine processing (JSON output)
-cargo run --bin bench -- --format json
-
-# 🚀 Quick development test
-cargo run --bin bench -- --iterations 50 --format text
-
-# 🔍 Help and version info
-cargo run --bin bench -- --help
-cargo run --bin bench -- --version
-```
-
-### Output Formats
-
-#### 📄 Text Output (Default)
-
-Good for **reading results** and **development workflows**:
-
-- ✅ **Progress indicators** during execution with real-time feedback
-- ✅ **Formatted tables** with aligned columns and readable timing units
-- ✅ **Performance summary** by category with fastest/slowest identification
-- ✅ **Basic statistics** including total execution time and outlier counts
-- ✅ **Color-coded** output (when terminal supports it)
-
-```text
-🔸 Running single operation benchmarks...
-  Single: upper ... ✓ avg: 295ns
-  Single: lower ... ✓ avg: 149ns
-
-📊 Summary:
-• Total benchmarks run: 33
-• Total execution time: 392.17ms
-```
-
-#### 🤖 JSON Output
-
-Good for **automation**, **scripts**, and **data processing**:
-
-- ✅ **Machine-readable** structured data
-- ✅ **Timestamps** and version information for tracking
-- ✅ **Timing metrics** for each benchmark
-- ✅ **Categorized results** for easier filtering
-- ✅ **Works well** with tools like `jq`, `python`, etc.
-
-```json
-{
-  "summary": {
-    "total_benchmarks": 33,
-    "total_execution_time_ns": 392170000,
-    "iterations_per_benchmark": 1000
-  },
-  "categories": {
-    "single_operations": [...],
-    "map_operations": [...]
-  },
-  "timestamp": "2024-01-15T10:30:45Z",
-  "version": "0.13.0"
-}
-```
-
-## 🧪 Benchmark Categories
-
-The benchmark suite is organized into **four distinct categories** that test different aspects of the pipeline system, from basic operations to complex nested transformations.
-
-### 1. 🔧 Single Operations
-
-Tests **individual pipeline operations** to establish baseline performance:
-
-| Operation | Template | Purpose | Expected Performance |
-|-----------|----------|---------|---------------------|
-| `split` | `{split:,:..\|join:,}` | Text splitting capability | ~3-4μs |
-| `upper` | `{upper}` | Case conversion | ~200-300ns |
-| `lower` | `{lower}` | Case conversion | ~150-200ns |
-| `trim` | `{trim}` | Whitespace removal | ~100-150ns |
-| `reverse` | `{reverse}` | String/list reversal | ~600-700ns |
-| `sort` | `{split:,:..\|sort\|join:,}` | Alphabetical sorting | ~3-4μs |
-| `unique` | `{split:,:..\|unique\|join:,}` | Duplicate removal | ~5-6μs |
-| `replace` | `{replace:s/a/A/g}` | Pattern replacement | ~2-3μs |
-| `filter` | `{split:,:..\|filter:^[a-m]\|join:,}` | Pattern filtering | ~14-16μs |
-
-> 💡 **Baseline Importance:** These measurements establish the **fundamental performance characteristics** of each operation and serve as building blocks for understanding more complex pipeline performance.
-
-### 2. 🔗 Multiple Simple Operations
-
-Tests **chains of basic operations** to measure composition overhead:
-
-| Pipeline | Template | Purpose | Performance Range |
-|----------|----------|---------|------------------|
-| Split + Join | `{split:,:..\|join: }` | Basic transformation | ~3μs |
-| Split + Sort + Join | `{split:,:..\|sort\|join:;}` | Sorting pipeline | ~3-4μs |
-| Split + Unique + Join | `{split:,:..\|unique\|join:,}` | Deduplication | ~5-6μs |
-| Split + Reverse + Join | `{split:,:..\|reverse\|join:-}` | Reversal pipeline | ~3μs |
-| Split + Filter + Join | `{split:,:..\|filter:^[a-m]\|join:,}` | Filtering pipeline | ~16-17μs |
-| Split + Slice + Join | `{split:,:..\|slice:0..5\|join:&}` | Range extraction | ~4μs |
-| Upper + Trim + Replace | `{upper\|trim\|replace:s/,/ /g}` | String transformations | ~3-4μs |
-| Split + Sort + Unique + Join | `{split:,:..\|sort\|unique\|join:+}` | Multi-step processing | ~5-6μs |
-
-> 🎯 **Composition Analysis:** These tests reveal how **operation chaining affects performance** and whether there are significant overhead costs in pipeline composition.
-
-### 3. 🗺️ Map Operations
-
-Tests **operations applied to each list item** via the map function:
-
-| Operation Type | Template | Purpose | Performance Range |
-|----------------|----------|---------|------------------|
-| Map(Upper) | `{split:,:..\|map:{upper}\|join:,}` | Case conversion mapping | ~8-9μs |
-| Map(Trim+Upper) | `{split:,:..\|map:{trim\|upper}\|join: }` | Chained operations in map | ~9-10μs |
-| Map(Prepend) | `{split:,:..\|map:{prepend:item}\|join:,}` | Text prefix addition | ~9-10μs |
-| Map(Append) | `{split:,:..\|map:{append:-fruit}\|join:;}` | Text suffix addition | ~10-11μs |
-| Map(Reverse) | `{split:,:..\|map:{reverse}\|join:,}` | String reversal per item | ~8-9μs |
-| Map(Substring) | `{split:,:..\|map:{substring:0..3}\|join: }` | Text extraction per item | ~8-9μs |
-| Map(Pad) | `{split:,:..\|map:{pad:10:_}\|join:,}` | Text padding per item | ~10-11μs |
-| Map(Replace) | `{split:,:..\|map:{replace:s/e/E/g}\|join:,}` | Pattern replacement per item | ~49-60μs |
-
-> 🔍 **Map Performance:** Map operations show **scaling behavior** based on list size and the complexity of the inner operation. Replace operations are notably slower due to regex processing.
-
-### 4. 🚀 Complex Operations
-
-Tests **sophisticated nested operations** and real-world transformation scenarios:
-
-| Complexity Level | Template | Purpose | Performance Range |
-|------------------|----------|---------|------------------|
-| Nested Split+Join | `{split:,:..\|map:{split:_:..\|join:-}\|join: }` | Multi-level parsing | ~15-16μs |
-| Combined Transform | `{split:,:..\|map:{upper\|substring:0..5}\|join:,}` | Chained transformations | ~10μs |
-| Filter+Map Chain | `{split:,:..\|filter:^[a-m]\|map:{reverse}\|join:&}` | Conditional processing | ~16-17μs |
-| Replace+Transform | `{split:,:..\|map:{upper\|replace:s/A/a/g}\|join:;}` | Pattern + transformation | ~50-60μs |
-| Unique+Map | `{split:,:..\|unique\|map:{upper}\|join:,}` | Dedup + transformation | ~10-11μs |
-| Multi-Replace | `{split:,:..\|map:{replace:s/a/A/g\|upper}\|join:,}` | Complex pattern work | ~51-60μs |
-| Substring+Pad | `{split:,:..\|map:{substring:0..3\|pad:5:_}\|join:+}` | Text formatting pipeline | ~10-11μs |
-| Multi-Level Filter | `{split:,:..\|filter:^[a-z]\|map:{upper}\|sort\|join: }` | Comprehensive processing | ~17-18μs |
-
-> 🏆 **Real-World Scenarios:** Complex operations represent **typical production use cases** and help identify performance bottlenecks in sophisticated data transformation pipelines.
-
-## 📊 Test Data & Methodology
-
-### 🍎 Test Dataset
-
-The benchmark uses a **carefully designed test dataset** that provides realistic performance characteristics:
-
-| Property | Value | Purpose |
-|----------|-------|---------|
-| **Content** | Comma-separated fruit names | Real-world data structure |
-| **Length** | 208 characters | Moderate size for consistent timing |
-| **Items** | 26 distinct fruits | Good sample size |
-| **Unicode** | ASCII + Unicode safe | Comprehensive character handling |
-| **Separators** | Commas, underscores, pipes | Multiple parsing scenarios |
-
-**Actual Test Data:**
-
-```text
-"apple,banana,cherry,date,elderberry,fig,grape,honeydew,ice_fruit,jackfruit,kiwi,lemon,mango,nectarine,orange,papaya,quince,raspberry,strawberry,tomato,ugli_fruit,vanilla,watermelon,xigua,yellow_apple,zucchini"
-```
-
-> 🎯 **Why This Dataset?** This data provides **realistic performance characteristics** without being too large to cause timing inconsistencies or too small to provide meaningful measurements.
-
-## 📈 Performance Analysis
-
-### Basic Methods
-
-#### 🏋️ Warmup Phase
-
-The benchmark includes a **warmup phase** to help get more consistent measurements by reducing cold-start effects:
-
-| Step | Process | Rationale |
-|------|---------|-----------|
-| 1. **Warmup Calculation** | Calculate 10% of measurement iterations | Proportional to test size |
-| 2. **Cache Warming** | Run operations without timing measurement | Prime CPU caches and memory |
-| 3. **System Stabilization** | Allow CPU frequency scaling to settle | More consistent conditions |
-| 4. **Memory Allocation** | Pre-allocate common data structures | Reduce allocation overhead |
-
-```rust
-// Warmup phase implementation
-fn benchmark_template(&self, name: &str, template_str: &str) -> BenchmarkResult {
-    let template = Template::parse(template_str)?;
-
-    // Warmup phase - run operations without timing
-    for _ in 0..self.warmup_iterations {
-        let _ = template.format(&self.test_data)?;
-    }
-
-    // Actual measurement phase begins here...
-}
-```
-
-> 🎯 **Warmup Benefits:** Helps reduce timing variations by reducing cold cache effects and system instability.
-
-#### 🎯 Outlier Removal
-
-The benchmark uses a **simple approach** to reduce measurement noise:
-
-| Step | Process | Rationale |
-|------|---------|-----------|
-| 1. **Data Collection** | Collect all timing measurements | Raw performance data |
-| 2. **Sorting** | Sort measurements by duration | Prepare for filtering |
-| 3. **Filtering** | Remove top & bottom 5% | Remove timing outliers |
-| 4. **Average Calculation** | Calculate mean of remaining 90% | More stable average |
-| 5. **Reporting** | Report outliers removed count | Show what was filtered |
-
-```rust
-// Simplified outlier removal algorithm
-fn remove_outliers(mut times: Vec<Duration>) -> (Vec<Duration>, usize) {
-    times.sort();
-    let len = times.len();
-    let outlier_count = (len as f64 * 0.05).ceil() as usize;
-
-    let start_idx = outlier_count;
-    let end_idx = len - outlier_count;
-
-    let filtered = times[start_idx..end_idx].to_vec();
-    let outliers_removed = times.len() - filtered.len();
-
-    (filtered, outliers_removed)
-}
-```
-
-> 📊 **Simple Approach:** This basic filtering helps reduce noise in timing measurements, similar to what other benchmarking tools do.
-
-### Timing Precision
-
-#### ⚡ Timing Details
-
-| Feature | Implementation | Benefit |
-|---------|----------------|---------|
-| **Resolution** | Nanosecond precision via `std::time::Instant` | Good for fast operations |
-| **Overhead** | Small timing overhead (~10-20ns) | Minimal impact on results |
-| **Platform** | Cross-platform timing support | Works across systems |
-| **Formatting** | Automatic unit selection (ns/μs/ms/s) | Easy to read output |
-
-#### 📏 Unit Formatting Algorithm
-
-```rust
-fn format_duration(duration: Duration) -> String {
-    let nanos = duration.as_nanos();
-    if nanos < 1_000 {
-        format!("{}ns", nanos)
-    } else if nanos < 1_000_000 {
-        format!("{:.2}μs", nanos as f64 / 1_000.0)
-    } else if nanos < 1_000_000_000 {
-        format!("{:.2}ms", nanos as f64 / 1_000_000.0)
-    } else {
-        format!("{:.2}s", duration.as_secs_f64())
-    }
-}
-```
-
-### Metrics Explanation
-
-#### 📊 Core Metrics
-
-| Metric | Description | Interpretation |
-|--------|-------------|----------------|
-| **Average** | Mean time after outlier removal | Main performance indicator |
-| **Min** | Fastest measurement after outlier removal | Best-case timing |
-| **Max** | Slowest measurement after outlier removal | Worst-case timing |
-| **Iterations** | Number of measurement runs performed | How many times we measured |
-| **Warmup** | Number of pre-measurement runs | System preparation cycles |
-
-#### 🎯 Performance Ranges
-
-| Performance Level | Time Range | Operations |
-|------------------|------------|------------|
-| **Ultra Fast** | < 1μs | `upper`, `lower`, `trim` |
-| **Fast** | 1-10μs | `split`, `join`, `sort`, basic chains |
-| **Moderate** | 10-50μs | `map` operations, complex chains |
-| **Intensive** | > 50μs | `replace` operations, regex processing |
-
-> 💡 **Iteration Guidelines:**
->
-> - **Development**: 50-100 iterations for quick feedback
-> - **Automation**: 500-1000 iterations for better reliability
-> - **Thorough testing**: 2000-5000 iterations for more stable results
-
-## 📋 Example Results
-
-### 📊 Text Output Sample
-
-```text
-🔸 Running single operation benchmarks...
-  Single: split ... ✓ avg: 3.53μs
-  Single: upper ... ✓ avg: 295ns
-  Single: lower ... ✓ avg: 149ns
-
-🔸 Running multiple simple operations benchmarks...
-  Multi: split + join ... ✓ avg: 3.12μs
-  Multi: split + sort + join ... ✓ avg: 3.47μs
-
-================================================================================
-                          BENCHMARK RESULTS
-================================================================================
-
-📊 Summary:
-• Total benchmarks run: 33
-• Total execution time: 392.17ms
-• Measurement iterations per benchmark: 1000
-• Warmup iterations per benchmark: 100 (10% of measurements)
-
-📈 Detailed Results:
-Benchmark                                               Average          Min          Max
-----------------------------------------------------------------------------------------
-Single: upper                                             295ns        200ns       380ns
-Single: lower                                             149ns        120ns       180ns
-Map: split + map(replace) + join                        49.16μs      42.90μs      55.80μs
-
-📋 Performance by Category:
-🔹 Single Operations (9 tests)
-   Average: 3.31μs | Fastest: 136ns (trim) | Slowest: 14.03μs (filter)
-
-🔹 Map Operations (8 tests)
-   Average: 14.22μs | Fastest: 8.35μs (map(upper)) | Slowest: 49.16μs (map(replace))
-```
-
-### 🤖 JSON Output Sample
-
-```json
-{
-  "summary": {
-    "total_benchmarks": 33,
-    "total_execution_time_ns": 392170000,
-    "total_execution_time_formatted": "392.17ms",
-    "iterations_per_benchmark": 1000,
-    "outlier_removal_method": "Top and bottom 5% removed",
-    "warmup_iterations_per_benchmark": 100
-  },
-  "categories": {
-    "single_operations": [
-      {
-        "name": "Single: upper",
-        "iterations": 1000,
-        "average_time_ns": 295000,
-        "average_time_formatted": "295ns",
-        "min_time_ns": 200000,
-        "min_time_formatted": "200ns",
-        "max_time_ns": 9100000,
-        "max_time_formatted": "9.10μs",
-        "outliers_removed": 100,
-        "total_raw_measurements": 1000
-      }
-    ]
-  },
-  "timestamp": "2024-01-15T10:30:45Z",
-  "version": "0.13.0"
-}
-```
-
-## 💼 Automated Usage
-
-### Script Integration
-
-#### 🚀 GitHub Actions Example
-
-```yaml
-name: Performance Benchmarks
-on: [push, pull_request]
-
-jobs:
-  benchmark:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: dtolnay/rust-toolchain@stable
-      - name: Build benchmark tool
-        run: cargo build --release --bin bench
-      - name: Run benchmarks
-        run: |
-          ./target/release/bench --iterations 5000 --format json > benchmark_results.json
-      - name: Upload results
-        uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results
-          path: benchmark_results.json
-```
-
-#### 🔍 Processing Results with jq
-
-```bash
-# Extract summary information
-cat benchmark_results.json | jq '.summary'
-
-# Get average times for single operations
-cat benchmark_results.json | jq '.categories.single_operations[].average_time_formatted'
-
-# Find slowest operations
-cat benchmark_results.json | jq -r '.categories[] | .[] | "\(.name): \(.average_time_formatted)"' | sort -V
-
-# Performance alerts (fail if any operation > 100μs)
-SLOW_OPS=$(cat benchmark_results.json | jq '.categories[][] | select(.average_time_ns > 100000000)')
-if [ ! -z "$SLOW_OPS" ]; then
-  echo "Performance regression detected!"
-  exit 1
-fi
-```
-
-### Performance Comparison
-
-#### 📊 Simple Comparison Script
-
-```bash
-#!/bin/bash
-# compare_benchmarks.sh
-
-BASELINE="baseline.json"
-CURRENT="current.json"
-THRESHOLD=1.1  # 10% regression threshold
-
-# Run current benchmark
-./target/release/bench --format json > "$CURRENT"
-
-# Compare with baseline (if exists)
-if [ -f "$BASELINE" ]; then
-  echo "🔍 Checking for performance changes..."
-
-  # Extract and compare key metrics
-  jq -r '.categories[][] | "\(.name) \(.average_time_ns)"' "$BASELINE" > baseline_times.txt
-  jq -r '.categories[][] | "\(.name) \(.average_time_ns)"' "$CURRENT" > current_times.txt
-
-  # Performance regression analysis
-  python3 << 'EOF'
-import json
-import sys
-
-with open('baseline.json') as f:
-    baseline = json.load(f)
-with open('current.json') as f:
-    current = json.load(f)
-
-threshold = 1.1
-regressions = []
-
-for category in baseline['categories']:
-    for i, bench in enumerate(baseline['categories'][category]):
-        current_bench = current['categories'][category][i]
-        ratio = current_bench['average_time_ns'] / bench['average_time_ns']
-
-        if ratio > threshold:
-            regressions.append({
-                'name': bench['name'],
-                'baseline': bench['average_time_formatted'],
-                'current': current_bench['average_time_formatted'],
-                'ratio': f"{ratio:.2f}x"
-            })
-
-if regressions:
-    print("⚠️  Performance changes detected:")
-    for reg in regressions:
-        print(f"  {reg['name']}: {reg['baseline']} → {reg['current']} ({reg['ratio']})")
-    sys.exit(1)
-else:
-    print("✅ No significant performance changes")
-EOF
-else
-  echo "📁 No baseline found, creating baseline from current run..."
-  cp "$CURRENT" "$BASELINE"
-fi
-```
-
-## 🔧 Development Guide
-
-### Adding New Benchmarks
-
-#### 📝 Step-by-Step Process
-
-1. **🎯 Identify the Operation Category**
-
-   ```rust
-   // Choose the appropriate method in src/bin/bench.rs
-   fn run_single_operation_benchmarks()     // Individual operations
-   fn run_multiple_simple_benchmarks()     // Operation chains
-   fn run_multiple_map_benchmarks()        // Map operations
-   fn run_complex_benchmarks()             // Complex scenarios
-   ```
-
-2. **✍️ Follow the Naming Convention**
-
-   ```rust
-   // Pattern: "Category: descriptive_name"
-   ("Single: operation_name", "{template}")
-   ("Multi: operation1 + operation2", "{template}")
-   ("Map: split + map(operation)", "{template}")
-   ("Complex: detailed_description", "{template}")
-   ```
-
-3. **🧪 Create Valid Templates**
-
-   ```rust
-   // ✅ Good examples
-   ("Single: upper", "{upper}"),
-   ("Multi: split + sort + join", "{split:,:..|sort|join:,}"),
-   ("Map: split + map(trim)", "{split:,:..|map:{trim}|join:,}"),
-
-   // ❌ Avoid these patterns
-   ("Single: split", "{split:,}"),  // Missing range/join
-   ("Map: nested", "{split:,:..|map:{map:{upper}}}"),  // Nested maps not supported
-   ```
-
-4. **🔍 Test with Small Iterations**
-
-   ```bash
-   # Test new benchmarks first
-   cargo run --bin bench -- --iterations 10
-   ```
-
-### Performance Considerations
-
-#### ⚡ Basic Guidelines
-
-| Consideration | Impact | Recommendation |
-|---------------|--------|----------------|
-| **Build Mode** | 3-10x performance difference | Use `--release` for better measurements |
-| **Iteration Count** | Result stability | 1000+ for automation, 2000+ for comparison |
-| **Data Size** | Timing consistency | Current 208-char dataset works well |
-| **System Load** | Measurement variance | Run on quiet systems when possible |
-| **Memory** | Allocation overhead | Consider memory usage for intensive operations |
-
-#### 🏗️ Architecture Insights
-
-```rust
-// Performance-critical path in benchmark execution
-fn benchmark_template(&self, name: &str, template_str: &str) -> BenchmarkResult {
-    // 1. Template compilation (one-time cost)
-    let template = Template::parse(template_str, None).unwrap();
-
-    // 2. Hot loop (measured operations)
-    for _ in 0..self.iterations {
-        let start = Instant::now();
-        let _ = template.format(&self.test_data).unwrap();  // Core measurement
-        let duration = start.elapsed();
-        times.push(duration);
-    }
-
-    // 3. Basic analysis (post-processing)
-    BenchmarkResult::new(name.to_string(), times)
-}
-```
-
-### Best Practices
-
-#### ✅ Do's
-
-1. **🏭 Use Release Builds for Better Measurements**
-
-   ```bash
-   # Development/testing
-   cargo run --bin bench -- --iterations 100
-
-   # More accurate benchmarks
-   cargo build --release --bin bench
-   ./target/release/bench --iterations 2000
-   ```
-
-2. **📊 Choose Appropriate Iteration Counts**
-
-   ```bash
-   # Quick development feedback (30-60 seconds)
-   --iterations 50
-
-   # Automated scripts (2-5 minutes)
-   --iterations 1000
-
-   # Thorough analysis (5-15 minutes)
-   --iterations 5000
-   ```
-
-3. **🔍 Validate Templates Before Adding**
-
-   ```bash
-   # Test individual templates
-   cargo run --bin string-pipeline -- "{new_template}" "test_data"
-   ```
-
-4. **📈 Monitor Trends, Not Just Absolutes**
-
-   ```bash
-   # Track performance over time
-   git log --oneline | head -10 | while read commit; do
-     git checkout $commit
-     ./target/release/bench --format json >> performance_history.jsonl
-   done
-   ```
-
-#### ❌ Don'ts
-
-1. **🚫 Don't Mix Debug and Release Results**
-
-   ```bash
-   # Wrong: Comparing different build modes
-   cargo run --bin bench > debug_results.txt
-   cargo run --release --bin bench > release_results.txt
-   # These results are not comparable!
-   ```
-
-2. **🚫 Don't Ignore System Conditions**
-
-   ```bash
-   # Wrong: Running during high system load
-   # Make sure system is idle before benchmarking
-
-   # Right: Check system load
-   top -bn1 | grep "load average"
-   ```
-
-3. **🚫 Don't Skip Outlier Analysis**
-
-   ```bash
-   # Wrong: Assuming outliers are always noise
-   # High outlier counts may indicate:
-   # - System interference
-   # - Memory allocation issues
-   # - Template complexity problems
-   ```
-
-## ⚠️ Troubleshooting
-
-### Common Issues
-
-#### 🐛 Build Problems
-
-**Problem:** `error: failed to remove file benchmark.exe`
-
-```bash
-# Solution: Process is still running
-taskkill /F /IM bench.exe  # Windows
-killall bench             # Linux/macOS
-
-# Wait a moment, then rebuild
-cargo build --release --bin bench
-```
-
-**Problem:** `Parse error: Expected operation`
-
-```bash
-# Check template syntax
-cargo run --bin string-pipeline -- "{your_template}" "test"
-
-# Common fixes:
-"{split:,}"          → "{split:,:..|join:,}"
-"{map:{map:{upper}}}" → "{split:,:..|map:{upper}}"
-```
-
-#### ⚡ Performance Issues
-
-**Problem:** Benchmarks taking too long
-
-```bash
-# Reduce iterations for development
-cargo run --bin bench -- --iterations 100
-
-# Check system resources
-htop  # Linux/macOS
-taskmgr  # Windows
-```
-
-**Problem:** Inconsistent results
-
-```bash
-# Possible causes and solutions:
-# 1. System load → Run on idle system
-# 2. Debug build → Use --release
-# 3. Too few iterations → Increase --iterations
-# 4. Background processes → Close unnecessary applications
-```
-
-#### 📊 Data Analysis Issues
-
-**Problem:** JSON parsing errors
-
-```bash
-# Validate JSON output
-./target/release/bench --format json | jq '.'
-
-# Check for truncated output
-./target/release/bench --format json > results.json
-jq '.' results.json  # Should not error
-```
-
-**Problem:** Unexpected performance patterns
-
-```bash
-# Debug with template analysis
-cargo run --bin string-pipeline -- "{!your_template}" "test_data"
-
-# Profile memory usage
-valgrind --tool=massif ./target/release/bench --iterations 100
-```
-
-> 💡 **Need More Help?**
->
-> 🔍 **Template Issues**: Check the [Template System Documentation](template-system.md) for syntax help
->
-> 🐛 **Debug Mode**: Use `{!template}` syntax to see step-by-step execution
->
-> 📊 **Performance Analysis**: Consider using `cargo flamegraph` for detailed profiling
diff --git a/docs/command-line-options.md b/docs/command-line-options.md
index 61fa99a..fe78591 100644
--- a/docs/command-line-options.md
+++ b/docs/command-line-options.md
@@ -730,8 +730,6 @@ DEBUG: Total execution time: 18.7456ms
 '{split:,:..|map:{trim|upper|append:!}}'
 ```
 
-> 📊 **Comprehensive Guide:** For detailed benchmarking methodology, performance analysis, automation scripts, and optimization strategies, see the [🏆 Performance Benchmarking Guide](benchmarking.md).
-
 ## 🔧 Troubleshooting
 
 ### 🐛 Common Issues and Solutions
diff --git a/docs/template-system.md b/docs/template-system.md
index 63d115c..de85f88 100644
--- a/docs/template-system.md
+++ b/docs/template-system.md
@@ -653,7 +653,7 @@ Converts text to uppercase.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Examples:**
 
@@ -671,7 +671,7 @@ Converts text to lowercase.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Examples:**
 
@@ -784,7 +784,7 @@ Reverses the order of list items or characters in a string.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Behavior on Different Input Types:**
 
@@ -807,7 +807,7 @@ Removes duplicate items from a list, preserving order.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Order Preservation:** The first occurrence of each item is kept, maintaining the original order.
 
@@ -873,7 +873,7 @@ Removes ANSI escape sequences (colors, formatting) from text.
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| *(none)* | - | - | No parameters required |
+| _(none)_ | - | - | No parameters required |
 
 **Sequence Types Removed:** Color codes, cursor movement, text formatting, and other ANSI escape sequences.
 
@@ -1431,5 +1431,4 @@ string-pipeline '{split:,:..|map:{prepend:• |append: ✓}}' 'First item,Second
 📚 **Essential Resources:**
 
 - 🐛 **[Debug System Guide](debug-system.md)** - Master debugging techniques and error diagnosis
-- 🏆 **[Performance Benchmarking Guide](benchmarking.md)** - Optimize templates for production use
 - 🔗 **[Command Line Interface Guide](command-line-options.md)** - CLI features and automation tips
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..bb3f069
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,346 @@
+# Benchmark CI/CD Scripts
+
+This directory contains scripts used by the GitHub Actions CI/CD pipeline to track and compare performance benchmarks.
+
+## Overview
+
+The benchmark system uses an **on-demand approach** triggered via PR comments. There are no automatic benchmark runs,
+all comparisons are triggered manually by the repository owner using the `/bench` command.
+
+## The `/bench` Command
+
+### Command Syntax
+
+```bash
+/bench <ref1> <ref2> [size] [warmup] [runs]
+```
+
+**Parameters:**
+
+- `ref1`   (required): First git reference (commit, branch, or tag)
+- `ref2`   (required): Second git reference to compare
+- `size`   (optional): Number of paths to process per run (default: 10000)
+- `warmup` (optional): Number of warmup runs (default: 5)
+- `runs`   (optional): Number of measurement runs (default: 50)
+
+**Auto-Ordering:** The workflow automatically determines which ref is older (baseline) and which is newer (current)
+based on commit timestamps. You don't need to worry about parameter order - `/bench main feature` and
+`/bench feature main` produce the same comparison with correct labeling.
+
+### Examples
+
+```bash
+# Basic comparison with all defaults (size=10000, warmup=5, runs=50)
+/bench main v0.13.0
+
+# Compare two commits with custom size
+/bench abc12345 def56789 50000
+
+# Custom size and warmup
+/bench main HEAD 50000 10
+
+# Full custom parameters: size=50000, warmup=10, runs=100
+/bench main HEAD 50000 10 100
+
+# Compare feature branch vs main (order doesn't matter)
+/bench feature-branch main
+```
+
+### Workflow
+
+1. **Post command** in a PR comment: `/bench main HEAD`
+2. **Bot acknowledges** with 👀 reaction and status message
+3. **Validation** checks:
+   - User is repository owner
+   - Both refs exist
+   - Benchmark tool exists in both refs
+   - Parameters are valid
+4. **Install hyperfine** in CI environment
+5. **Build** benchmark binaries for both refs
+6. **Run with hyperfine**:
+   - 5 warmup runs
+   - 50 measurement runs
+   - Tests all templates in each run
+7. **Results posted** as PR comment with hyperfine comparison
+   - Mean execution time for each version
+   - Standard deviation, min/max ranges
+   - Relative speed comparison (e.g., "1.05x faster")
+8. **Success reaction** 🚀 (or 😕 on failure)
+9. **Artifacts uploaded** for 30 days
+
+## Files
+
+### `analyze_all_templates.sh`
+
+Benchmarks all templates by running hyperfine twice (once per version).
+
+**Usage:**
+
+```bash
+./scripts/analyze_all_templates.sh <baseline-sha> <current-sha> [options]
+
+Options:
+  --size <n>       Input size in paths (default: 10000)
+  --warmup <n>     Warmup runs (default: 5)
+  --runs <n>       Benchmark runs (default: 50)
+  --export-dir     Output directory (default: ./template_analysis)
+```
+
+**Output:**
+
+- Hyperfine JSON files
+- Markdown report with per-template comparison
+- Highlights regressions and improvements
+
+**Workflow integration:**
+
+```bash
+# 1. Compile versions
+./scripts/compile_benchmark_versions.sh abc12345 def56789
+
+# 2. Run comprehensive analysis
+./scripts/analyze_all_templates.sh abc12345 def56789 --runs 100
+
+# 3. View results
+cat template_analysis/comparison_report.md
+```
+
+### `compare_template_results.py`
+
+Parses hyperfine JSON outputs and generates per-template comparison reports.
+
+Called automatically by `analyze_all_templates.sh`.
+
+## GitHub Actions Workflow
+
+### Benchmark Command (`.github/workflows/bench-command.yml`)
+
+The single workflow that handles all benchmark comparisons.
+
+**Triggers:**
+
+- PR comments starting with `/bench`
+
+**What it does:**
+
+1. **Validates** user permissions and parameters
+2. **Installs** hyperfine
+3. **Checks** both refs for benchmark tool existence
+4. **Builds** the benchmark tool for each ref
+5. **Runs** benchmarks with hyperfine directly
+   - 5 warmup runs + 50 measurement runs
+   - All templates mode (single execution time per run)
+   - Results exported as markdown table
+6. **Posts** detailed report to PR with markdown table
+7. **Uploads** artifacts (markdown results + build logs)
+
+**Artifacts:**
+
+- **benchmark-comparison-<comment_id>**
+  - Hyperfine comparison results (markdown table)
+  - Build logs for both refs (baseline and current)
+  - Retained for 30 days
+
+## Running Benchmarks Locally
+
+### Quick Single-Template Test
+
+```bash
+cargo build --release --bin bench-throughput
+
+# Single template, single run (quick smoke test)
+./target/release/bench-throughput --template "{split:/:-1}" --size 10000
+
+# With JSON output for inspection
+./target/release/bench-throughput --template all --size 10000 --output my_benchmark.json
+```
+
+### Analysis with Hyperfine
+
+```bash
+# Quick overall check (all templates in one run)
+hyperfine --warmup 5 --runs 50 \
+  './target/release/bench-throughput --template all --size 10000 --output /dev/null'
+
+# Detailed analysis of specific template
+hyperfine --warmup 10 --runs 100 \
+  './target/release/bench-throughput --template "{split:/:-1}" --size 10000 --output /dev/null'
+```
+
+### Per-Template Detailed Analysis
+
+Analyze all templates using a single command:
+
+```bash
+# First, compile the versions you want to compare
+./scripts/compile_benchmark_versions.sh abc1234 def5678
+
+# Run comprehensive per-template analysis
+./scripts/analyze_all_templates.sh abc1234 def5678
+
+# With custom parameters
+./scripts/analyze_all_templates.sh abc1234 def5678 \
+  --size 50000 \
+  --runs 100 \
+  --export-dir ./my_analysis
+```
+
+**What it does:**
+
+1. Runs hyperfine with `--parameter-list` on all templates (baseline version)
+2. Runs hyperfine with `--parameter-list` on all templates (current version)
+3. Generates report comparing each template
+
+**Output:**
+
+- `template_analysis/baseline_results.json`
+- `template_analysis/current_results.json`
+- `template_analysis/comparison_report.md`
+
+## Version Comparison Workflow
+
+For comparing performance across multiple commits (e.g., to find when a regression was introduced), use the
+`compile_benchmark_versions.sh` script.
+
+### `compile_benchmark_versions.sh`
+
+This script compiles the benchmark tool for every commit in a range, making it easy to run performance comparisons
+across different versions.
+
+**Features:**
+
+- **Idempotent**: Only compiles versions that don't already exist
+- **Safe**: Uses git worktrees in temporary directories (doesn't affect your working directory)
+- **Convenient**: Stores binaries with commit SHA for easy identification
+- **Non-intrusive**: Works even with uncommitted changes in your main working directory
+- **Storage**: Uses `$XDG_DATA_HOME/string_pipeline/benchmarks/` (typically `~/.local/share/string_pipeline/benchmarks/`)
+
+**Usage:**
+
+```bash
+# Compile all versions since the introduction of the benchmark tool
+./scripts/compile_benchmark_versions.sh
+
+# Compile specific range
+./scripts/compile_benchmark_versions.sh --start abc1234 --end def5678
+
+# See what would be compiled (dry run)
+./scripts/compile_benchmark_versions.sh --dry-run
+
+# List already compiled versions
+./scripts/compile_benchmark_versions.sh --list
+
+# Remove all compiled versions
+./scripts/compile_benchmark_versions.sh --clean
+
+# Verbose output for debugging
+./scripts/compile_benchmark_versions.sh --verbose
+```
+
+**Example Workflow - Finding a Performance Regression:**
+
+```bash
+# 1. Compile all versions
+./scripts/compile_benchmark_versions.sh
+
+# 2. Set up benchmark directory path
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+# 3. Quick overall comparison with hyperfine
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --all
+
+# 4. If regression detected, run detailed per-template analysis
+./scripts/analyze_all_templates.sh abc12345 def56789 --runs 100
+
+# 5. Or analyze a specific template
+./scripts/compare_benchmark_versions.sh abc12345 def56789 \
+  --template "{split:/:-1}" --runs 100
+```
+
+### `compare_benchmark_versions.sh`
+
+After compiling benchmark binaries, use this script to quickly compare performance between two versions using hyperfine.
+
+**Requirements:**
+
+- hyperfine must be installed (`apt install hyperfine` or `brew install hyperfine`)
+
+**Usage:**
+
+```bash
+# Specific template mode (default)
+./scripts/compare_benchmark_versions.sh abc12345 def56789
+
+# Custom template
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --template "{upper}"
+
+# All templates mode
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --all
+
+# Custom parameters
+./scripts/compare_benchmark_versions.sh abc12345 def56789 \
+  --template "{split:/:-1}" \
+  --warmup 10 --runs 100 --size 50000
+```
+
+**Example Workflow - Performance Comparison:**
+
+```bash
+# 1. Compile the versions you want to compare
+./scripts/compile_benchmark_versions.sh --start abc12345 --end def56789
+
+# 2. Run hyperfine comparison on specific template
+./scripts/compare_benchmark_versions.sh abc12345 def56789 \
+  --template "{split:/:-1}" \
+  --warmup 10 --runs 100
+
+# 3. For comprehensive check, use all-templates mode
+./scripts/compare_benchmark_versions.sh abc12345 def56789 --all --runs 20
+```
+
+## Configuration
+
+### Benchmark Parameters
+
+Default parameters:
+
+- **Input size:** 10,000 paths
+- **Templates:** All predefined templates
+- **Hyperfine warmup:** 5 runs (CI only)
+- **Hyperfine runs:** 50 runs (CI only)
+
+These can be overridden:
+
+```bash
+# Custom size
+/bench main HEAD 50000
+
+# Local: Custom hyperfine parameters
+hyperfine --warmup 20 --runs 200 \
+  './bench-throughput --template "{upper}" --size 100000'
+```
+
+## Offline vs CI Benchmarking
+
+**CI/CD (Quick check):**
+
+- Uses hyperfine with 5 warmup + 50 runs
+- Tests all templates at once
+- Provides overall execution time + per-template breakdown
+- Good for regression detection
+- Fast feedback (~3-5 minutes)
+
+**Offline (Comprehensive analysis):**
+
+- Use `compare_benchmark_versions.sh` locally
+- Full control over hyperfine parameters (warmup, runs)
+- Focus on specific templates
+- Export results in multiple formats
+- Ideal for performance investigation
+
+**Recommended workflow:**
+
+1. CI detects potential regression via `/bench`
+2. Investigate offline with hyperfine + specific templates
+3. Narrow down the problematic operation
+4. Fix and verify with both CI and offline tools
diff --git a/scripts/analyze_all_templates.sh b/scripts/analyze_all_templates.sh
new file mode 100755
index 0000000..6c4d274
--- /dev/null
+++ b/scripts/analyze_all_templates.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+set -euo pipefail
+
+# Analyze all predefined templates
+# Uses hyperfine's --parameter-list to run efficiently
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") <baseline-sha> <current-sha> [options]
+
+Analyze all predefined templates.
+
+Arguments:
+  baseline-sha    Git SHA/ref for baseline version
+  current-sha     Git SHA/ref for current version
+
+Options:
+  --size <n>      Input size in paths (default: 10000)
+  --warmup <n>    Number of warmup runs (default: 5)
+  --runs <n>      Number of benchmark runs (default: 50)
+  --export-dir    Directory for output files (default: ./template_analysis)
+
+Examples:
+  $(basename "$0") abc12345 def56789
+  $(basename "$0") main HEAD --size 50000 --runs 100
+  $(basename "$0") main feature-branch --export-dir ./results
+
+Output:
+  - Hyperfine JSON for baseline and current versions
+  - Markdown comparison report with per-template analysis
+EOF
+  exit 1
+}
+
+# Default values
+SIZE="10000"
+WARMUP=5
+RUNS=50
+EXPORT_DIR="./template_analysis"
+
+# Parse arguments
+if [ $# -lt 2 ]; then
+  usage
+fi
+
+BASELINE_SHA="$1"
+CURRENT_SHA="$2"
+shift 2
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --size)
+    SIZE="$2"
+    shift 2
+    ;;
+  --warmup)
+    WARMUP="$2"
+    shift 2
+    ;;
+  --runs)
+    RUNS="$2"
+    shift 2
+    ;;
+  --export-dir)
+    EXPORT_DIR="$2"
+    shift 2
+    ;;
+  -h | --help)
+    usage
+    ;;
+  *)
+    echo "Error: Unknown option $1"
+    usage
+    ;;
+  esac
+done
+
+# Check binaries exist
+BASELINE_BIN="$BENCH_DIR/bench_throughput_$BASELINE_SHA"
+CURRENT_BIN="$BENCH_DIR/bench_throughput_$CURRENT_SHA"
+
+if [ ! -f "$BASELINE_BIN" ]; then
+  echo "Error: Baseline binary not found: $BASELINE_BIN"
+  echo "Run compile_benchmark_versions.sh first"
+  exit 1
+fi
+
+if [ ! -f "$CURRENT_BIN" ]; then
+  echo "Error: Current binary not found: $CURRENT_BIN"
+  echo "Run compile_benchmark_versions.sh first"
+  exit 1
+fi
+
+# Create export directory
+mkdir -p "$EXPORT_DIR"
+
+# Comprehensive template set covering all operation types
+TEMPLATES=(
+  # String Operations (direct, no split needed)
+  "{upper}"
+  "{lower}"
+  "{reverse}"
+  "{trim}"
+  "{trim:left}"
+  "{trim:right}"
+  "{substring:0..10}"
+  "{substring:-5..}"
+  "{append:.bak}"
+  "{prepend:backup_}"
+  "{surround:\"}"
+  "{pad:80: :right}"
+  "{pad:80:0:left}"
+  "{replace:s/\\.txt$/.md/}"
+  "{replace:s/\\/\\/+/\\//g}"
+  "{regex_extract:[^/]+$}"
+  "{strip_ansi}"
+  # Split Operations
+  "{split:/:..}"
+  "{split:/:-1}"
+  "{split:/:0..-1}"
+  "{split:/:0..3}"
+  # List Operations (with split)
+  "{split:/:..|join:/}"
+  "{split:/:..|filter:^[a-z]+$}"
+  "{split:/:..|filter_not:^\\.}"
+  "{split:/:..|sort}"
+  "{split:/:..|sort:desc}"
+  "{split:/:..|reverse}"
+  "{split:/:..|unique}"
+  "{split:/:..|slice:2..5}"
+  "{split:/:..|slice:-3..}"
+  "{split:/:..|map:{upper}}"
+  "{split:/:..|map:{trim}}"
+  # Complex Chains
+  "{trim|upper|pad:20}"
+  "{split:/:..|filter:^[a-z]+$|sort|join:-}"
+  "{split:/:-1|split:.:0}"
+  "{split:/:..|map:{upper}|join:/}"
+)
+
+# Convert array to comma-separated list for hyperfine
+TEMPLATE_LIST=$(
+  IFS=,
+  echo "${TEMPLATES[*]}"
+)
+
+echo "========================================="
+echo "Per-Template Benchmark Analysis"
+echo "========================================="
+echo "Baseline:    $BASELINE_SHA"
+echo "Current:     $CURRENT_SHA"
+echo "Templates:   All predefined templates"
+echo "Input size:  $SIZE paths"
+echo "Warmup:      $WARMUP runs"
+echo "Runs:        $RUNS measurements"
+echo "Output dir:  $EXPORT_DIR"
+echo "========================================="
+echo ""
+
+# Run hyperfine for baseline version (all templates)
+echo "Phase 1/3: Benchmarking baseline version ($BASELINE_SHA)..."
+hyperfine \
+  --warmup "$WARMUP" \
+  --runs "$RUNS" \
+  --parameter-list template "$TEMPLATE_LIST" \
+  --export-json "$EXPORT_DIR/baseline_results.json" \
+  --style basic \
+  "$BASELINE_BIN --template {template} --size $SIZE --output /dev/null"
+
+echo ""
+echo "Phase 2/3: Benchmarking current version ($CURRENT_SHA)..."
+# Run hyperfine for current version (all templates)
+hyperfine \
+  --warmup "$WARMUP" \
+  --runs "$RUNS" \
+  --parameter-list template "$TEMPLATE_LIST" \
+  --export-json "$EXPORT_DIR/current_results.json" \
+  --style basic \
+  "$CURRENT_BIN --template {template} --size $SIZE --output /dev/null"
+
+echo ""
+echo "Phase 3/3: Generating comparison report..."
+
+# Generate comparison report using Python
+python3 "$SCRIPT_DIR/compare_template_results.py" \
+  "$EXPORT_DIR/baseline_results.json" \
+  "$EXPORT_DIR/current_results.json" \
+  --baseline-name "$BASELINE_SHA" \
+  --current-name "$CURRENT_SHA" \
+  --size "$SIZE" \
+  >"$EXPORT_DIR/comparison_report.md"
+
+echo ""
+echo "✓ Analysis complete!"
+echo ""
+echo "Results:"
+echo "  - Baseline data:  $EXPORT_DIR/baseline_results.json"
+echo "  - Current data:   $EXPORT_DIR/current_results.json"
+echo "  - Report:         $EXPORT_DIR/comparison_report.md"
+echo ""
+echo "View report:"
+echo "  cat $EXPORT_DIR/comparison_report.md"
diff --git a/scripts/compare_benchmark_versions.sh b/scripts/compare_benchmark_versions.sh
new file mode 100755
index 0000000..57fa5d9
--- /dev/null
+++ b/scripts/compare_benchmark_versions.sh
@@ -0,0 +1,230 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Script to compare two compiled benchmark binaries using hyperfine
+# Supports both "all templates" mode and specific template mode
+
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Default values
+WARMUP=5
+RUNS=50
+SIZE="10000"
+TEMPLATE="{split:/:-1}"
+ALL_MODE=false
+STYLE=""
+
+# Usage information
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") <sha1> <sha2> [OPTIONS]
+
+Compare performance of two compiled benchmark binaries using hyperfine.
+
+ARGUMENTS:
+    <sha1>              Short SHA of first benchmark version (baseline)
+    <sha2>              Short SHA of second benchmark version (current)
+
+OPTIONS:
+    --warmup N          Number of warmup runs (default: $WARMUP)
+    --runs N            Number of benchmark runs (default: $RUNS)
+    --size SIZE         Input size (default: $SIZE)
+    --template TPL      Template to benchmark (default: "$TEMPLATE")
+    --all               Compare using all templates mode
+    --style STYLE       Hyperfine output style (basic|full|nocolor|color|none)
+    -h, --help          Show this help message
+
+EXAMPLES:
+    # Compare specific template with hyperfine (default)
+    $(basename "$0") abc12345 def56789
+
+    # Compare with custom template
+    $(basename "$0") abc12345 def56789 --template "{split:/:..|join:/}"
+
+    # Compare all templates mode (single run each, summary output)
+    $(basename "$0") abc12345 def56789 --all
+
+    # Custom settings for specific template
+    $(basename "$0") abc12345 def56789 --template "{upper}" --warmup 10 --runs 100 --size 50000
+
+MODES:
+    Specific template mode (default):
+        - Uses hyperfine to benchmark a single template
+        - Multiple runs with statistical analysis from hyperfine
+        - Best for detailed performance comparison of one template
+
+    All templates mode (--all):
+        - Runs all predefined templates once
+        - Hyperfine measures total execution time
+        - Best for overall performance regression testing
+
+NOTES:
+    - Binaries must be compiled first using compile_benchmark_versions.sh
+    - hyperfine must be installed (https://github.com/sharkdp/hyperfine)
+EOF
+}
+
+# Print colored message
+log_info() {
+  echo -e "${BLUE}ℹ${NC} $*"
+}
+
+log_success() {
+  echo -e "${GREEN}✓${NC} $*"
+}
+
+log_error() {
+  echo -e "${RED}✗${NC} $*" >&2
+}
+
+# Check if hyperfine is installed
+check_hyperfine() {
+  if ! command -v hyperfine &>/dev/null; then
+    log_error "hyperfine is not installed"
+    echo ""
+    echo "Install hyperfine:"
+    echo "  - Debian/Ubuntu: apt install hyperfine"
+    echo "  - macOS: brew install hyperfine"
+    echo "  - Cargo: cargo install hyperfine"
+    echo "  - GitHub: https://github.com/sharkdp/hyperfine"
+    echo ""
+    exit 1
+  fi
+}
+
+# Check if binary exists
+check_binary() {
+  local sha=$1
+  local binary_path="$BENCH_DIR/bench_throughput_$sha"
+
+  if [ ! -f "$binary_path" ]; then
+    log_error "Benchmark binary not found: bench_throughput_$sha"
+    echo ""
+    echo "The binary for commit $sha has not been compiled yet."
+    echo ""
+    echo "Compile it first using:"
+    echo -e "  ${YELLOW}./scripts/compile_benchmark_versions.sh --start $sha --end $sha${NC}"
+    echo ""
+    echo "Or compile a range of versions:"
+    echo -e "  ${YELLOW}./scripts/compile_benchmark_versions.sh${NC}"
+    echo ""
+    exit 1
+  fi
+}
+
+# Parse command line arguments
+if [ $# -lt 2 ]; then
+  usage
+  exit 1
+fi
+
+SHA1=$1
+SHA2=$2
+shift 2
+
+while [ $# -gt 0 ]; do
+  case $1 in
+  --warmup)
+    WARMUP="$2"
+    shift 2
+    ;;
+  --runs)
+    RUNS="$2"
+    shift 2
+    ;;
+  --size)
+    SIZE="$2"
+    shift 2
+    ;;
+  --template)
+    TEMPLATE="$2"
+    shift 2
+    ;;
+  --all)
+    ALL_MODE=true
+    shift
+    ;;
+  --style)
+    STYLE="$2"
+    shift 2
+    ;;
+  -h | --help)
+    usage
+    exit 0
+    ;;
+  *)
+    log_error "Unknown option: $1"
+    echo ""
+    usage
+    exit 1
+    ;;
+  esac
+done
+
+# Validate inputs
+check_hyperfine
+check_binary "$SHA1"
+check_binary "$SHA2"
+
+BINARY1="$BENCH_DIR/bench_throughput_$SHA1"
+BINARY2="$BENCH_DIR/bench_throughput_$SHA2"
+
+# Print comparison info
+echo ""
+log_info "Comparing benchmark versions using hyperfine"
+echo ""
+echo "  Baseline: $SHA1"
+echo "  Current:  $SHA2"
+echo ""
+
+if [ "$ALL_MODE" = true ]; then
+  echo "Mode: All templates"
+  echo "  Size: $SIZE"
+  echo ""
+  echo "Hyperfine parameters:"
+  echo "  Warmup runs:     $WARMUP"
+  echo "  Benchmark runs:  $RUNS"
+  echo ""
+
+  # All templates mode - benchmark complete tool execution
+  HYPERFINE_ARGS=(--warmup "$WARMUP" --runs "$RUNS")
+  [ -n "$STYLE" ] && HYPERFINE_ARGS+=(--style "$STYLE")
+
+  hyperfine \
+    "${HYPERFINE_ARGS[@]}" \
+    --command-name "$SHA1" \
+    "$BINARY1 --template all --size $SIZE --output /dev/null" \
+    --command-name "$SHA2" \
+    "$BINARY2 --template all --size $SIZE --output /dev/null"
+else
+  echo "Mode: Specific template"
+  echo "  Template: $TEMPLATE"
+  echo "  Size:     $SIZE"
+  echo ""
+  echo "Hyperfine parameters:"
+  echo "  Warmup runs:     $WARMUP"
+  echo "  Benchmark runs:  $RUNS"
+  echo ""
+
+  # Specific template mode - hyperfine orchestrates multiple runs
+  HYPERFINE_ARGS=(--warmup "$WARMUP" --runs "$RUNS")
+  [ -n "$STYLE" ] && HYPERFINE_ARGS+=(--style "$STYLE")
+
+  hyperfine \
+    "${HYPERFINE_ARGS[@]}" \
+    --command-name "$SHA1" \
+    "$BINARY1 --template '$TEMPLATE' --size $SIZE" \
+    --command-name "$SHA2" \
+    "$BINARY2 --template '$TEMPLATE' --size $SIZE"
+fi
+
+echo ""
+log_success "Comparison complete!"
diff --git a/scripts/compare_template_results.py b/scripts/compare_template_results.py
new file mode 100755
index 0000000..45c5cfc
--- /dev/null
+++ b/scripts/compare_template_results.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+"""
+Compare hyperfine JSON results for per-template analysis.
+
+This script parses two hyperfine JSON files (baseline and current) where each
+file contains results from running all templates. It generates a markdown
+comparison report.
+"""
+
+import json
+import sys
+import argparse
+from typing import Dict, Tuple
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Compare hyperfine per-template results"
+    )
+    parser.add_argument("baseline_json", help="Baseline hyperfine JSON results")
+    parser.add_argument("current_json", help="Current hyperfine JSON results")
+    parser.add_argument(
+        "--baseline-name", default="baseline", help="Name for baseline version"
+    )
+    parser.add_argument(
+        "--current-name", default="current", help="Name for current version"
+    )
+    parser.add_argument("--size", type=int, help="Input size used")
+    return parser.parse_args()
+
+
+def load_hyperfine_json(filepath: str) -> Dict:
+    """Load hyperfine JSON results."""
+    with open(filepath, "r") as f:
+        return json.load(f)
+
+
+def extract_template_from_command(command: str) -> str:
+    """Extract template string from hyperfine command.
+
+    Command format: 'binary --template {template} --size N --output /dev/null'
+    """
+    parts = command.split("--template ")
+    if len(parts) < 2:
+        return "unknown"
+
+    template_part = parts[1].split(" ")[0]
+    return template_part
+
+
+def format_time_ms(seconds: float) -> str:
+    """Format time in seconds to human-readable string."""
+    ms = seconds * 1000
+    if ms < 1:
+        return f"{ms * 1000:.2f}μs"
+    elif ms < 1000:
+        return f"{ms:.2f}ms"
+    else:
+        return f"{ms / 1000:.2f}s"
+
+
+def calculate_change(baseline: float, current: float) -> Tuple[float, str]:
+    """Calculate percentage change and return emoji indicator.
+
+    For timing metrics, lower is better:
+    - Negative change = improvement (faster)
+    - Positive change = regression (slower)
+    """
+    if baseline == 0:
+        return 0.0, "➖"
+
+    change_pct = ((current - baseline) / baseline) * 100
+
+    if abs(change_pct) < 2:  # Less than 2% change is noise
+        emoji = "➖"
+    elif change_pct < -5:  # >5% faster is significant improvement
+        emoji = "🟢"
+    elif change_pct < -2:  # 2-5% faster is improvement
+        emoji = "✅"
+    elif change_pct > 10:  # >10% slower is regression
+        emoji = "🔴"
+    elif change_pct > 5:  # 5-10% slower is warning
+        emoji = "⚠️"
+    else:  # 2-5% slower is caution
+        emoji = "🟡"
+
+    return change_pct, emoji
+
+
+def generate_comparison_report(
+    baseline_data: Dict,
+    current_data: Dict,
+    baseline_name: str,
+    current_name: str,
+    input_size: int | None = None,
+) -> str:
+    """Generate markdown comparison report from hyperfine JSON data."""
+
+    # Build lookup by template
+    baseline_by_template = {}
+    for result in baseline_data["results"]:
+        template = extract_template_from_command(result["command"])
+        baseline_by_template[template] = result
+
+    current_by_template = {}
+    for result in current_data["results"]:
+        template = extract_template_from_command(result["command"])
+        current_by_template[template] = result
+
+    # Find common templates
+    common_templates = sorted(
+        set(baseline_by_template.keys()) & set(current_by_template.keys())
+    )
+
+    if not common_templates:
+        return "Error: No common templates found between baseline and current results."
+
+    # Generate report
+    lines = []
+    lines.append("# 📊 Per-Template Benchmark Analysis\n")
+    lines.append(f"**Baseline:** `{baseline_name}`")
+    lines.append(f"**Current:** `{current_name}`")
+    if input_size:
+        lines.append(f"**Input size:** {input_size:,} paths per run")
+    lines.append(f"**Templates analyzed:** {len(common_templates)}\n")
+
+    # Summary statistics
+    regressions = []
+    improvements = []
+    neutral = []
+
+    # Build comparison table
+    lines.append("## Performance Comparison\n")
+    lines.append(
+        "| Template | Baseline Mean | Current Mean | Change | Min | Max | StdDev | Notes |"
+    )
+    lines.append(
+        "|----------|---------------|--------------|--------|-----|-----|--------|-------|"
+    )
+
+    for template in common_templates:
+        baseline = baseline_by_template[template]
+        current = current_by_template[template]
+
+        # Extract timing statistics (all in seconds from hyperfine)
+        baseline_mean = baseline["mean"]
+        current_mean = current["mean"]
+        current_min = current["min"]
+        current_max = current["max"]
+        current_stddev = current["stddev"]
+
+        # Calculate change
+        change_pct, emoji = calculate_change(baseline_mean, current_mean)
+
+        # Track significant changes
+        if change_pct > 10:
+            regressions.append((template, change_pct))
+        elif change_pct < -5:
+            improvements.append((template, change_pct))
+        else:
+            neutral.append(template)
+
+        # Build notes (check if variation is high)
+        notes = []
+        cv = (current_stddev / current_mean * 100) if current_mean > 0 else 0
+        if cv > 10:
+            notes.append("high variance")
+
+        # Format timing data
+        baseline_str = format_time_ms(baseline_mean)
+        current_str = format_time_ms(current_mean)
+        min_str = format_time_ms(current_min)
+        max_str = format_time_ms(current_max)
+        stddev_str = format_time_ms(current_stddev)
+
+        notes_str = ", ".join(notes) if notes else "—"
+
+        lines.append(
+            f"| `{template}` "
+            f"| {baseline_str} "
+            f"| {current_str} "
+            f"| {emoji} {change_pct:+.1f}% "
+            f"| {min_str} "
+            f"| {max_str} "
+            f"| ±{stddev_str} "
+            f"| {notes_str} |"
+        )
+
+    lines.append("")
+
+    # Summary section
+    lines.append("## Summary\n")
+    lines.append(f"- **Total templates:** {len(common_templates)}")
+    lines.append(f"- **Improvements:** {len(improvements)} 🟢")
+    lines.append(f"- **Regressions:** {len(regressions)} 🔴")
+    lines.append(f"- **Neutral:** {len(neutral)} ➖\n")
+
+    # Highlight significant changes
+    if regressions:
+        lines.append("### ⚠️ Performance Regressions\n")
+        for template, change in sorted(regressions, key=lambda x: x[1], reverse=True):
+            baseline = baseline_by_template[template]
+            current = current_by_template[template]
+            lines.append(
+                f"- **`{template}`**: {change:+.1f}% slower "
+                f"({format_time_ms(baseline['mean'])} → {format_time_ms(current['mean'])})"
+            )
+        lines.append("")
+
+    if improvements:
+        lines.append("### ✨ Performance Improvements\n")
+        for template, change in sorted(improvements, key=lambda x: x[1]):
+            baseline = baseline_by_template[template]
+            current = current_by_template[template]
+            lines.append(
+                f"- **`{template}`**: {abs(change):.1f}% faster "
+                f"({format_time_ms(baseline['mean'])} → {format_time_ms(current['mean'])})"
+            )
+        lines.append("")
+
+    # Measurement details
+    lines.append("## Measurement Details\n")
+    lines.append("Hyperfine metrics:")
+    lines.append("- **Mean**: Average execution time across all runs")
+    lines.append("- **Min/Max**: Fastest and slowest runs observed")
+    lines.append("- **StdDev**: Standard deviation (measure of consistency)")
+    lines.append("- **High variance**: Templates with coefficient of variation >10%\n")
+
+    # Legend
+    lines.append("---\n")
+    lines.append("### Legend")
+    lines.append("- 🟢 Significant improvement (>5% faster)")
+    lines.append("- ✅ Improvement (2-5% faster)")
+    lines.append("- ➖ Neutral (<2% change)")
+    lines.append("- 🟡 Caution (2-5% slower)")
+    lines.append("- ⚠️ Warning (5-10% slower)")
+    lines.append("- 🔴 Regression (>10% slower)")
+
+    return "\n".join(lines)
+
+
+def main():
+    args = parse_args()
+
+    try:
+        baseline_data = load_hyperfine_json(args.baseline_json)
+        current_data = load_hyperfine_json(args.current_json)
+
+        report = generate_comparison_report(
+            baseline_data,
+            current_data,
+            args.baseline_name,
+            args.current_name,
+            args.size,
+        )
+
+        print(report)
+
+    except FileNotFoundError as e:
+        print(f"Error: File not found: {e}", file=sys.stderr)
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid JSON: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/compile_benchmark_versions.sh b/scripts/compile_benchmark_versions.sh
new file mode 100755
index 0000000..2a6fb7f
--- /dev/null
+++ b/scripts/compile_benchmark_versions.sh
@@ -0,0 +1,348 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Script to compile benchmark binaries for multiple git commits
+# This makes it easy to compare performance across different versions
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
+DEFAULT_START_COMMIT="HEAD~10"
+VERBOSE=0
+DRY_RUN=0
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Usage information
+usage() {
+  cat <<EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Compile the throughput benchmark for multiple git commits to enable
+version-to-version performance comparisons.
+
+Compiled binaries are stored as: bench_throughput_<shortsha>
+Location: \$XDG_DATA_HOME/string_pipeline/benchmarks/
+
+OPTIONS:
+    --start COMMIT      Starting commit (default: $DEFAULT_START_COMMIT)
+    --end COMMIT        Ending commit (default: HEAD)
+    --list              List already compiled versions and exit
+    --dry-run           Show what would be compiled without doing it
+    --clean             Remove all compiled benchmarks and exit
+    --verbose           Show detailed output
+    -h, --help          Show this help message
+
+EXAMPLES:
+    # Compile all versions from $DEFAULT_START_COMMIT to HEAD
+    $(basename "$0")
+
+    # Compile specific range
+    $(basename "$0") --start abc12345 --end def56789
+
+    # List available compiled versions
+    $(basename "$0") --list
+
+    # See what would be compiled
+    $(basename "$0") --dry-run
+
+    # Clean up old compiled versions
+    $(basename "$0") --clean
+
+USAGE AFTER COMPILATION:
+    # Quick overall comparison with hyperfine
+    ./scripts/compare_benchmark_versions.sh abc12345 def56789 --all
+
+    # Detailed per-template analysis
+    ./scripts/analyze_all_templates.sh abc12345 def56789 --runs 100
+
+    # Analyze specific template
+    ./scripts/compare_benchmark_versions.sh abc12345 def56789 \\
+        --template "{split:/:-1}" --runs 100
+EOF
+}
+
+# Print colored message
+log_info() {
+  echo -e "${BLUE}ℹ${NC} $*"
+}
+
+log_success() {
+  echo -e "${GREEN}✓${NC} $*"
+}
+
+log_warning() {
+  echo -e "${YELLOW}⚠${NC} $*"
+}
+
+log_error() {
+  echo -e "${RED}✗${NC} $*" >&2
+}
+
+log_verbose() {
+  if [ "$VERBOSE" -eq 1 ]; then
+    echo -e "${BLUE}[verbose]${NC} $*"
+  fi
+}
+
+# List compiled versions
+list_versions() {
+  if [ ! -d "$BENCH_DIR" ]; then
+    log_warning "No benchmark directory found at: $BENCH_DIR"
+    return
+  fi
+
+  local count=0
+  log_info "Compiled benchmark versions in: $BENCH_DIR"
+  echo ""
+
+  while IFS= read -r -d '' binary; do
+    local filename
+    filename=$(basename "$binary")
+    local sha="${filename#bench_throughput_}"
+    local size
+    size=$(du -h "$binary" | cut -f1)
+    local date
+    date=$(stat -c '%y' "$binary" 2>/dev/null || stat -f '%Sm' "$binary" 2>/dev/null || echo "unknown")
+
+    echo "  $sha  ($size, compiled: ${date%.*})"
+    count=$((count + 1))
+  done < <(find "$BENCH_DIR" -type f -name "bench_throughput_*" -print0 2>/dev/null | sort -z)
+
+  if [ "$count" -eq 0 ]; then
+    log_warning "No compiled benchmarks found"
+  else
+    echo ""
+    log_success "Found $count compiled version(s)"
+  fi
+}
+
+# Clean compiled versions
+clean_versions() {
+  if [ ! -d "$BENCH_DIR" ]; then
+    log_warning "No benchmark directory found at: $BENCH_DIR"
+    return
+  fi
+
+  local count=0
+  while IFS= read -r -d '' binary; do
+    log_verbose "Removing: $binary"
+    rm -f "$binary"
+    count=$((count + 1))
+  done < <(find "$BENCH_DIR" -type f -name "bench_throughput_*" -print0 2>/dev/null)
+
+  if [ "$count" -eq 0 ]; then
+    log_info "No compiled benchmarks to clean"
+  else
+    log_success "Removed $count compiled version(s)"
+  fi
+}
+
+# Get short SHA for a commit
+get_short_sha() {
+  local commit=$1
+  git rev-parse --short=8 "$commit" 2>/dev/null
+}
+
+# Check if binary exists for a commit
+binary_exists() {
+  local short_sha=$1
+  [ -f "$BENCH_DIR/bench_throughput_$short_sha" ]
+}
+
+# Compile benchmark for a commit using git worktree
+compile_for_commit() {
+  local commit=$1
+  local short_sha=$2
+  local binary_path="$BENCH_DIR/bench_throughput_$short_sha"
+
+  if binary_exists "$short_sha"; then
+    log_verbose "Skipping $short_sha (already compiled)"
+    return 0
+  fi
+
+  log_info "Compiling $short_sha..."
+
+  if [ "$DRY_RUN" -eq 1 ]; then
+    echo "  [DRY RUN] Would create worktree for $commit and compile"
+    return 0
+  fi
+
+  # Create temporary directory for worktree
+  local worktree_dir
+  worktree_dir=$(mktemp -d -t "bench_compile_${short_sha}_XXXXXX")
+
+  log_verbose "Created worktree directory: $worktree_dir"
+
+  # Add worktree for this commit
+  if ! git worktree add -q --detach "$worktree_dir" "$commit" 2>/dev/null; then
+    log_error "Failed to create worktree for $commit"
+    rm -rf "$worktree_dir"
+    return 1
+  fi
+
+  # Try to compile in the worktree
+  local compile_success=0
+  if (cd "$worktree_dir" && cargo build --release --bin bench-throughput >/dev/null 2>&1); then
+    # Copy binary to benchmark directory
+    if [ -f "$worktree_dir/target/release/bench-throughput" ]; then
+      cp "$worktree_dir/target/release/bench-throughput" "$binary_path"
+      log_success "Compiled $short_sha"
+      compile_success=1
+    else
+      log_error "Binary not found after compilation for $short_sha"
+    fi
+  else
+    log_warning "Compilation failed for $short_sha"
+  fi
+
+  # Cleanup worktree
+  log_verbose "Cleaning up worktree for $short_sha"
+  git worktree remove --force "$worktree_dir" 2>/dev/null || true
+  rm -rf "$worktree_dir"
+
+  [ "$compile_success" -eq 1 ]
+}
+
+# Main compilation logic
+compile_versions() {
+  local start_commit=$1
+  local end_commit=$2
+
+  # Verify commits exist
+  if ! git rev-parse "$start_commit" >/dev/null 2>&1; then
+    log_error "Invalid start commit: $start_commit"
+    exit 1
+  fi
+
+  if ! git rev-parse "$end_commit" >/dev/null 2>&1; then
+    log_error "Invalid end commit: $end_commit"
+    exit 1
+  fi
+
+  # Create benchmark directory
+  mkdir -p "$BENCH_DIR"
+
+  # Get list of commits
+  log_info "Collecting commits from $start_commit to $end_commit..."
+  local commits
+  mapfile -t commits < <(git rev-list --reverse "$start_commit^..$end_commit")
+
+  local total=${#commits[@]}
+  log_info "Found $total commit(s) to process"
+  echo ""
+
+  # Counters
+  local compiled=0
+  local skipped=0
+  local failed=0
+
+  # Process each commit
+  for commit in "${commits[@]}"; do
+    local short_sha
+    short_sha=$(get_short_sha "$commit")
+
+    if binary_exists "$short_sha"; then
+      log_success "[$((compiled + skipped + failed + 1))/$total] $short_sha (already exists)"
+      skipped=$((skipped + 1))
+    else
+      echo -n "[$((compiled + skipped + failed + 1))/$total] "
+      if compile_for_commit "$commit" "$short_sha"; then
+        compiled=$((compiled + 1))
+      else
+        failed=$((failed + 1))
+      fi
+    fi
+  done
+
+  # Print summary
+  echo ""
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo "Summary:"
+  echo "  Total commits:      $total"
+  echo "  Newly compiled:     $compiled"
+  echo "  Already compiled:   $skipped"
+  echo "  Failed:             $failed"
+  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+  echo ""
+  log_info "Binaries location: $BENCH_DIR"
+
+  if [ "$compiled" -gt 0 ] || [ "$skipped" -gt 0 ]; then
+    echo ""
+    log_success "Ready for version comparison!"
+    echo ""
+    echo "Example usage:"
+    echo "  # Run benchmark with a specific version"
+    local example_sha
+    example_sha=$(get_short_sha "$end_commit")
+    echo "  $BENCH_DIR/bench_throughput_$example_sha \\"
+    echo "    --template all --size 10000 \\"
+    echo "    --output results.json"
+  fi
+}
+
+# Parse command line arguments
+START_COMMIT="$DEFAULT_START_COMMIT"
+END_COMMIT="HEAD"
+ACTION="compile"
+
+while [ $# -gt 0 ]; do
+  case $1 in
+  --start)
+    START_COMMIT="$2"
+    shift 2
+    ;;
+  --end)
+    END_COMMIT="$2"
+    shift 2
+    ;;
+  --list)
+    ACTION="list"
+    shift
+    ;;
+  --clean)
+    ACTION="clean"
+    shift
+    ;;
+  --dry-run)
+    DRY_RUN=1
+    shift
+    ;;
+  --verbose)
+    VERBOSE=1
+    shift
+    ;;
+  -h | --help)
+    usage
+    exit 0
+    ;;
+  *)
+    log_error "Unknown option: $1"
+    echo ""
+    usage
+    exit 1
+    ;;
+  esac
+done
+
+# Change to project root
+cd "$PROJECT_ROOT"
+
+# Execute action
+case $ACTION in
+list)
+  list_versions
+  ;;
+clean)
+  clean_versions
+  ;;
+compile)
+  compile_versions "$START_COMMIT" "$END_COMMIT"
+  ;;
+esac
diff --git a/src/bin/bench-throughput.rs b/src/bin/bench-throughput.rs
new file mode 100644
index 0000000..f9e7742
--- /dev/null
+++ b/src/bin/bench-throughput.rs
@@ -0,0 +1,694 @@
+use clap::{Arg, Command};
+use comfy_table::{
+    Attribute as TableAttribute, Cell, Color as TableColor, ContentArrangement, Table,
+    presets::UTF8_FULL,
+};
+use crossterm::{
+    cursor, execute, queue,
+    style::{Attribute, Color, Print, ResetColor, SetAttribute, SetForegroundColor},
+    terminal::{Clear, ClearType},
+};
+use serde::{Serialize, Serializer};
+use std::io::{self, Write};
+use std::time::{Duration, Instant};
+use string_pipeline::Template;
+use unicode_width::UnicodeWidthStr;
+
+const TOOL_VERSION: &str = "2.0.0";
+
+// Helper to serialize Duration as nanoseconds
+fn serialize_duration<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    serializer.serialize_u128(duration.as_nanos())
+}
+
+/// Represents the results of a throughput benchmark for a specific input size
+#[derive(Debug, Clone, Serialize)]
+struct BenchmarkResult {
+    input_size: usize,
+    #[serde(serialize_with = "serialize_duration")]
+    parse_time: Duration,
+    #[serde(serialize_with = "serialize_duration")]
+    total_format_time: Duration,
+    #[serde(serialize_with = "serialize_duration")]
+    avg_time_per_path: Duration,
+    throughput_paths_per_sec: f64,
+}
+
+impl BenchmarkResult {
+    fn new(
+        input_size: usize,
+        parse_time: Duration,
+        total_format_time: Duration,
+    ) -> Self {
+        let avg_time_per_path = total_format_time / input_size as u32;
+        let throughput_paths_per_sec = input_size as f64 / total_format_time.as_secs_f64();
+
+        BenchmarkResult {
+            input_size,
+            parse_time,
+            total_format_time,
+            avg_time_per_path,
+            throughput_paths_per_sec,
+        }
+    }
+}
+
+/// Generates realistic absolute path strings for benchmarking
+struct PathGenerator {
+    directories: Vec<&'static str>,
+    filenames: Vec<&'static str>,
+    extensions: Vec<&'static str>,
+}
+
+impl PathGenerator {
+    fn new() -> Self {
+        PathGenerator {
+            directories: vec![
+                "home",
+                "usr",
+                "var",
+                "opt",
+                "etc",
+                "lib",
+                "bin",
+                "sbin",
+                "tmp",
+                "dev",
+                "projects",
+                "workspace",
+                "repos",
+                "src",
+                "tests",
+                "docs",
+                "config",
+                "data",
+                "cache",
+                "logs",
+                "build",
+                "dist",
+                "target",
+                "node_modules",
+                "vendor",
+                "components",
+                "services",
+                "models",
+                "controllers",
+                "views",
+                "utils",
+            ],
+            filenames: vec![
+                "main",
+                "lib",
+                "index",
+                "app",
+                "server",
+                "client",
+                "config",
+                "utils",
+                "helper",
+                "handler",
+                "service",
+                "model",
+                "controller",
+                "router",
+                "middleware",
+                "test",
+                "spec",
+                "readme",
+                "license",
+                "changelog",
+                "makefile",
+                "dockerfile",
+                "package",
+                "cargo",
+                "mod",
+                "types",
+                "constants",
+                "errors",
+                "validation",
+            ],
+            extensions: vec![
+                "rs", "txt", "md", "json", "toml", "yaml", "yml", "js", "ts", "py", "go", "c",
+                "cpp", "h", "sh",
+            ],
+        }
+    }
+
+    /// Generate a single path with specified seed and depth
+    fn generate_path(&self, seed: usize, depth: usize) -> String {
+        let mut parts = vec![];
+
+        // Generate directory components
+        for i in 0..depth {
+            let idx = (seed + i * 7) % self.directories.len();
+            parts.push(self.directories[idx]);
+        }
+
+        // Add filename with extension
+        let filename_idx = (seed * 13) % self.filenames.len();
+        let ext_idx = (seed * 17) % self.extensions.len();
+        let filename = format!(
+            "{}.{}",
+            self.filenames[filename_idx], self.extensions[ext_idx]
+        );
+        parts.push(&filename);
+
+        format!("/{}", parts.join("/"))
+    }
+
+    /// Generate N unique paths with varying depths
+    fn generate_paths(&self, count: usize) -> Vec<String> {
+        (0..count)
+            .map(|i| {
+                let depth = 2 + (i % 9); // Depths from 2 to 10
+                self.generate_path(i, depth)
+            })
+            .collect()
+    }
+}
+
+/// Comprehensive template set with proper coverage for all operation types.
+/// Organizes templates into three categories:
+/// - String operations (direct, no split needed)
+/// - Split operations
+/// - List operations (require split first, use map:{upper} for secondary ops)
+struct TemplateSet;
+
+impl TemplateSet {
+    fn get_templates() -> Vec<(&'static str, &'static str)> {
+        vec![
+            // ===== String Operations (direct, no split needed) =====
+            ("Upper", "{upper}"),
+            ("Lower", "{lower}"),
+            ("Reverse", "{reverse}"),
+            ("Trim", "{trim}"),
+            ("Trim left", "{trim:left}"),
+            ("Trim right", "{trim:right}"),
+            ("Substring range", "{substring:0..10}"),
+            ("Substring negative", "{substring:-5..}"),
+            ("Append", "{append:.bak}"),
+            ("Prepend", "{prepend:backup_}"),
+            ("Surround", "{surround:\"}"),
+            ("Pad right", "{pad:80: :right}"),
+            ("Pad left", "{pad:80:0:left}"),
+            ("Replace simple", "{replace:s/\\.txt$/.md/}"),
+            ("Replace global", "{replace:s/\\/\\/+/\\//g}"),
+            ("Regex extract", "{regex_extract:[^/]+$}"),
+            ("Strip ANSI", "{strip_ansi}"),
+
+            // ===== Split Operations =====
+            ("Split all", "{split:/:..}"),
+            ("Split last", "{split:/:-1}"),
+            ("Split range", "{split:/:0..-1}"),
+            ("Split first 3", "{split:/:0..3}"),
+
+            // ===== List Operations (with split) =====
+            ("Join", "{split:/:..|join:/}"),
+            ("Filter", "{split:/:..|filter:^[a-z]+$}"),
+            ("Filter not", "{split:/:..|filter_not:^\\.}"),
+            ("Sort", "{split:/:..|sort}"),
+            ("Sort desc", "{split:/:..|sort:desc}"),
+            ("Reverse list", "{split:/:..|reverse}"),
+            ("Unique", "{split:/:..|unique}"),
+            ("Slice", "{split:/:..|slice:2..5}"),
+            ("Slice negative", "{split:/:..|slice:-3..}"),
+            ("Map upper", "{split:/:..|map:{upper}}"),
+            ("Map trim", "{split:/:..|map:{trim}}"),
+
+            // ===== Complex Chains =====
+            ("Chain string ops", "{trim|upper|pad:20}"),
+            ("Chain list ops", "{split:/:..|filter:^[a-z]+$|sort|join:-}"),
+            ("Nested split", "{split:/:-1|split:.:0}"),
+            ("Map + join", "{split:/:..|map:{upper}|join:/}"),
+        ]
+    }
+}
+
+/// Runs a benchmark for a single template with a single input size
+fn benchmark_template(
+    template_str: &str,
+    size: usize,
+) -> Result<BenchmarkResult, Box<dyn std::error::Error>> {
+    let generator = PathGenerator::new();
+
+    // Time template parsing
+    let parse_start = Instant::now();
+    let template = Template::parse(template_str)?;
+    let parse_time = parse_start.elapsed();
+
+    // Generate paths
+    let paths = generator.generate_paths(size);
+
+    // Time formatting
+    let format_start = Instant::now();
+    for path in &paths {
+        let _ = template.format(path)?;
+    }
+    let total_format_time = format_start.elapsed();
+
+    Ok(BenchmarkResult::new(size, parse_time, total_format_time))
+}
+
+/// Execute a template without timing (for hyperfine integration)
+fn execute_template(
+    template_str: &str,
+    size: usize,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Parse template
+    let template = Template::parse(template_str)?;
+
+    // Generate paths
+    let generator = PathGenerator::new();
+    let paths = generator.generate_paths(size);
+
+    // Format all paths
+    for path in &paths {
+        let _ = template.format(path)?;
+    }
+
+    Ok(())
+}
+
+fn format_duration(duration: Duration) -> String {
+    let nanos = duration.as_nanos();
+    if nanos < 1_000 {
+        format!("{nanos}ns")
+    } else if nanos < 1_000_000 {
+        format!("{:.2}μs", nanos as f64 / 1_000.0)
+    } else if nanos < 1_000_000_000 {
+        format!("{:.2}ms", nanos as f64 / 1_000_000.0)
+    } else {
+        format!("{:.2}s", duration.as_secs_f64())
+    }
+}
+
+fn format_throughput(paths_per_sec: f64) -> String {
+    if paths_per_sec >= 1_000_000.0 {
+        format!("{:.2}M/s", paths_per_sec / 1_000_000.0)
+    } else if paths_per_sec >= 1_000.0 {
+        format!("{:.2}K/s", paths_per_sec / 1_000.0)
+    } else {
+        format!("{:.2}/s", paths_per_sec)
+    }
+}
+
+fn format_size(size: usize) -> String {
+    if size >= 1_000_000 {
+        format!("{}M", size / 1_000_000)
+    } else if size >= 1_000 {
+        format!("{}K", size / 1_000)
+    } else {
+        size.to_string()
+    }
+}
+
+// Styled output helpers
+fn print_header(text: &str) {
+    let mut stdout = io::stdout();
+    let text_width = text.width();
+    let _ = execute!(
+        stdout,
+        SetForegroundColor(Color::Cyan),
+        SetAttribute(Attribute::Bold),
+        Print("╔"),
+        Print("═".repeat(78)),
+        Print("╗\n║ "),
+        Print(text),
+        Print(" ".repeat(77 - text_width)),
+        Print("║\n╚"),
+        Print("═".repeat(78)),
+        Print("╝\n"),
+        ResetColor
+    );
+}
+
+fn print_section_header(text: &str) {
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        Print("\n"),
+        SetForegroundColor(Color::Cyan),
+        SetAttribute(Attribute::Bold),
+        Print(text),
+        ResetColor,
+        Print("\n"),
+        SetForegroundColor(Color::DarkGrey),
+        Print("─".repeat(80)),
+        ResetColor
+    );
+}
+
+fn print_error(msg: &str) {
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        SetForegroundColor(Color::Red),
+        Print("✗ "),
+        ResetColor,
+        Print(msg),
+        Print("\n")
+    );
+}
+
+fn print_progress_bar(current: usize, total: usize, template_name: &str) {
+    let mut stdout = io::stdout();
+    let progress = (current as f64 / total as f64) * 100.0;
+    let filled = ((progress / 100.0) * 40.0) as usize;
+    let _ = queue!(
+        stdout,
+        cursor::MoveToColumn(0),
+        Clear(ClearType::CurrentLine),
+        SetForegroundColor(Color::Cyan),
+        Print("["),
+        SetForegroundColor(Color::Green),
+        Print("█".repeat(filled)),
+        SetForegroundColor(Color::DarkGrey),
+        Print("░".repeat(40 - filled)),
+        SetForegroundColor(Color::Cyan),
+        Print("]"),
+        ResetColor,
+        Print(format!(" {:.0}% ({}/{}) - ", progress, current, total)),
+        SetAttribute(Attribute::Dim),
+        Print(template_name),
+        ResetColor
+    );
+    stdout.flush().ok();
+}
+
+fn print_template_result(template_name: &str, result: &BenchmarkResult) {
+    print_section_header(&format!("Template: {}", template_name));
+
+    // Create results table with comfy-table
+    let mut table = Table::new();
+    table
+        .load_preset(UTF8_FULL)
+        .set_content_arrangement(ContentArrangement::Dynamic)
+        .set_header(vec![
+            Cell::new("Input Size")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Parse Time")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Total Time")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Avg/Path")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Throughput")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+        ]);
+
+    table.add_row(vec![
+        Cell::new(format_size(result.input_size)),
+        Cell::new(format_duration(result.parse_time)),
+        Cell::new(format_duration(result.total_format_time)),
+        Cell::new(format_duration(result.avg_time_per_path)),
+        Cell::new(format_throughput(result.throughput_paths_per_sec)),
+    ]);
+
+    println!("\n{}\n", table);
+}
+
+fn print_summary(all_results: &[(&str, BenchmarkResult)]) {
+    let size = all_results[0].1.input_size;
+    let header_text = format!("📊 SUMMARY - Performance at {}", format_size(size));
+    print_header(&header_text);
+
+    // Collect results for sorting
+    let mut summary_data: Vec<(&str, Duration, f64)> = all_results
+        .iter()
+        .map(|(name, result)| (*name, result.avg_time_per_path, result.throughput_paths_per_sec))
+        .collect();
+
+    // Sort by throughput (highest first)
+    summary_data.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
+
+    // Create summary table with comfy-table
+    let mut table = Table::new();
+    table
+        .load_preset(UTF8_FULL)
+        .set_content_arrangement(ContentArrangement::Dynamic)
+        .set_header(vec![
+            Cell::new("Template")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Avg/Path")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+            Cell::new("Throughput")
+                .add_attribute(TableAttribute::Bold)
+                .fg(TableColor::Yellow),
+        ]);
+
+    for (idx, (template_name, avg_time, throughput)) in summary_data.iter().enumerate() {
+        // Highlight fastest (green) and slowest (yellow)
+        let color = if idx == 0 {
+            TableColor::Green
+        } else if idx == summary_data.len() - 1 {
+            TableColor::Yellow
+        } else {
+            TableColor::Reset
+        };
+
+        table.add_row(vec![
+            Cell::new(template_name).fg(color),
+            Cell::new(format_duration(*avg_time)).fg(color),
+            Cell::new(format_throughput(*throughput)).fg(color),
+        ]);
+    }
+
+    println!("{}", table);
+}
+
+/// Output results in JSON format for tracking over time
+#[derive(Serialize)]
+struct BenchmarkOutput<'a> {
+    version: String,
+    timestamp: u64,
+    benchmarks: Vec<TemplateBenchmark<'a>>,
+}
+
+#[derive(Serialize)]
+struct TemplateBenchmark<'a> {
+    template_name: &'a str,
+    result: &'a BenchmarkResult,
+}
+
+fn output_json(
+    all_results: &[(&str, BenchmarkResult)],
+    output_path: Option<&str>,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let timestamp = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)?
+        .as_secs();
+
+    let benchmarks: Vec<TemplateBenchmark> = all_results
+        .iter()
+        .map(|(name, result)| TemplateBenchmark {
+            template_name: name,
+            result,
+        })
+        .collect();
+
+    let output = BenchmarkOutput {
+        version: TOOL_VERSION.to_string(),
+        timestamp,
+        benchmarks,
+    };
+
+    let json_string = serde_json::to_string_pretty(&output)?;
+
+    if let Some(path) = output_path {
+        std::fs::write(path, json_string)?;
+        let mut stdout = io::stdout();
+        let _ = execute!(
+            stdout,
+            Print("\n"),
+            SetForegroundColor(Color::Green),
+            Print("✓ JSON output written to: "),
+            ResetColor,
+            Print(format!("{}\n", path))
+        );
+    } else {
+        println!("\n{}", json_string);
+    }
+
+    Ok(())
+}
+
+fn get_default_output_path() -> Result<String, Box<dyn std::error::Error>> {
+    let data_home = std::env::var("XDG_DATA_HOME").unwrap_or_else(|_| {
+        let home = std::env::var("HOME").expect("HOME environment variable not set");
+        format!("{}/.local/share", home)
+    });
+
+    let benchmark_dir = format!("{}/string-pipeline/benchmarks", data_home);
+    std::fs::create_dir_all(&benchmark_dir)?;
+
+    let timestamp = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)?
+        .as_secs();
+
+    Ok(format!("{}/bench-{}.json", benchmark_dir, timestamp))
+}
+
+fn list_templates() {
+    println!("Available predefined templates:\n");
+    for (name, template) in TemplateSet::get_templates() {
+        println!("  {:<30} {}", name, template);
+    }
+}
+
+fn execute_all_templates_mode(size: usize, output_path: Option<&str>, verbose: bool) {
+    print_header(&format!(
+        "String Pipeline Throughput Benchmark {}",
+        TOOL_VERSION
+    ));
+
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        Print("Measuring template processing performance\n\n"),
+        SetForegroundColor(Color::Cyan),
+        Print("Input size: "),
+        ResetColor,
+        Print(format!("{}\n", format_size(size)))
+    );
+
+    let templates = TemplateSet::get_templates();
+    let mut all_results = Vec::new();
+    let total_templates = templates.len();
+
+    for (idx, (template_name, template_str)) in templates.iter().enumerate() {
+        print_progress_bar(idx + 1, total_templates, template_name);
+
+        match benchmark_template(template_str, size) {
+            Ok(result) => {
+                let mut stdout = io::stdout();
+                let _ = execute!(
+                    stdout,
+                    cursor::MoveToColumn(0),
+                    Clear(ClearType::CurrentLine)
+                );
+                if verbose {
+                    print_template_result(template_name, &result);
+                }
+                all_results.push((*template_name, result));
+            }
+            Err(e) => {
+                let mut stdout = io::stdout();
+                let _ = execute!(
+                    stdout,
+                    cursor::MoveToColumn(0),
+                    Clear(ClearType::CurrentLine)
+                );
+                print_error(&format!("Failed to benchmark '{}': {}", template_name, e));
+            }
+        }
+    }
+
+    print_summary(&all_results);
+
+    if let Some(path) = output_path
+        && let Err(e) = output_json(&all_results, Some(path))
+    {
+        eprintln!("Error writing JSON output: {}", e);
+        std::process::exit(1);
+    }
+
+    let mut stdout = io::stdout();
+    let _ = execute!(
+        stdout,
+        SetForegroundColor(Color::Green),
+        SetAttribute(Attribute::Bold),
+        Print("\n✓ Benchmark complete!\n"),
+        ResetColor
+    );
+}
+
+fn execute_specific_template_mode(template_str: &str, size: usize) {
+    match execute_template(template_str, size) {
+        Ok(_) => std::process::exit(0),
+        Err(e) => {
+            eprintln!("Error: {}", e);
+            std::process::exit(1);
+        }
+    }
+}
+
+fn main() {
+    let matches = Command::new("String Pipeline Throughput Benchmark")
+        .version(TOOL_VERSION)
+        .about("Benchmarks template processing performance")
+        .arg(
+            Arg::new("template")
+                .short('t')
+                .long("template")
+                .value_name("TEMPLATE")
+                .help("Template to benchmark: 'all' for predefined set, or template string")
+                .default_value("all"),
+        )
+        .arg(
+            Arg::new("size")
+                .short('s')
+                .long("size")
+                .value_name("COUNT")
+                .help("Number of paths to process")
+                .default_value("10000"),
+        )
+        .arg(
+            Arg::new("output")
+                .short('o')
+                .long("output")
+                .value_name("FILE")
+                .help("JSON output file (only for --template all)"),
+        )
+        .arg(
+            Arg::new("verbose")
+                .short('v')
+                .long("verbose")
+                .action(clap::ArgAction::SetTrue)
+                .help("Show detailed per-template results (only for --template all)"),
+        )
+        .arg(
+            Arg::new("list")
+                .long("list-templates")
+                .action(clap::ArgAction::SetTrue)
+                .help("List available predefined templates and exit"),
+        )
+        .get_matches();
+
+    // Parse arguments
+    let template_arg = matches.get_one::<String>("template").unwrap();
+    let size: usize = matches
+        .get_one::<String>("size")
+        .unwrap()
+        .parse()
+        .expect("Invalid size value");
+    let output_path = matches
+        .get_one::<String>("output")
+        .map(|s| s.to_string())
+        .or_else(|| get_default_output_path().ok());
+    let verbose = matches.get_flag("verbose");
+    let list = matches.get_flag("list");
+
+    // List templates
+    if list {
+        list_templates();
+        return;
+    }
+
+    // Mode 1: All templates
+    if template_arg == "all" {
+        execute_all_templates_mode(size, output_path.as_deref(), verbose);
+    } else {
+        // Mode 2: Specific template
+        execute_specific_template_mode(template_arg, size);
+    }
+}

From 6033d04fda3a029e2ff3f098baee0169f7c440f7 Mon Sep 17 00:00:00 2001
From: LM <lalvarezt89@gmail.com>
Date: Sun, 9 Nov 2025 14:01:46 +0100
Subject: [PATCH 2/5] fix(bench): formalize initial commit

---
 scripts/compile_benchmark_versions.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/compile_benchmark_versions.sh b/scripts/compile_benchmark_versions.sh
index 2a6fb7f..e2f8904 100755
--- a/scripts/compile_benchmark_versions.sh
+++ b/scripts/compile_benchmark_versions.sh
@@ -8,7 +8,7 @@ set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
 BENCH_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/string_pipeline/benchmarks"
-DEFAULT_START_COMMIT="HEAD~10"
+DEFAULT_START_COMMIT="5e028194"
 VERBOSE=0
 DRY_RUN=0
 

From e1fca2f4532aa17669f9b481a324be786feda363 Mon Sep 17 00:00:00 2001
From: LM <lalvarezt89@gmail.com>
Date: Fri, 1 Aug 2025 09:47:03 +0200
Subject: [PATCH 3/5] refactor(pest)!: greatly simplified grammar, removed
 expensive look-ahead that offer no real benefit

expect some change in templates that relied on smart escaping, now it's
more intentional
---
 docs/template-system.md            |  18 ++---
 src/pipeline/parser.rs             |   3 +-
 src/pipeline/template.pest         | 105 ++++++++---------------------
 tests/multi_template_tests.rs      |   2 +-
 tests/template/complex_pipeline.rs |   2 +-
 tests/template/simple_pipeline.rs  |   2 +-
 6 files changed, 37 insertions(+), 95 deletions(-)

diff --git a/docs/template-system.md b/docs/template-system.md
index de85f88..228b1d1 100644
--- a/docs/template-system.md
+++ b/docs/template-system.md
@@ -942,9 +942,11 @@ The range system includes robust edge case handling:
 
 ### When is Escaping Required?
 
-Different argument types have different escaping requirements:
+The template parser uses a unified argument parsing system where all operations follow the same escaping rules for consistency and maintainability:
 
-### Simple Arguments (append, prepend, join, etc.)
+### Operation Arguments
+
+All operations use the same argument parsing rules. The following characters require escaping:
 
 | Character | Escape | Reason                |
 |-----------|--------|----------------------|
@@ -954,18 +956,6 @@ Different argument types have different escaping requirements:
 | `{`       | `\{`   | Starts template      |
 | `\`       | `\\`   | Escape character     |
 
-### Regex Arguments (filter, regex_extract)
-
-Regex patterns can contain most characters naturally.
-
-### Split Arguments
-
-Split separators can contain most characters. Only escape:
-
-| Character | Escape | Reason |
-|-----------|--------|--------|
-| `:` | `\:` | Visual helper |
-
 ### Special Sequences
 
 | Sequence | Result | Description |
diff --git a/src/pipeline/parser.rs b/src/pipeline/parser.rs
index 0ac12f0..a2f179e 100644
--- a/src/pipeline/parser.rs
+++ b/src/pipeline/parser.rs
@@ -5,8 +5,7 @@
 //! Pest parser generator for robust syntax handling with comprehensive error reporting.
 //!
 //! The parser supports the full template syntax including operations, ranges,
-//! escape sequences, and debug flags, with intelligent handling of special
-//! characters in different contexts.
+//! escape sequences, and debug flags.
 //!
 
 use pest::Parser;
diff --git a/src/pipeline/template.pest b/src/pipeline/template.pest
index 0aee8ea..3c16beb 100644
--- a/src/pipeline/template.pest
+++ b/src/pipeline/template.pest
@@ -40,33 +40,38 @@ shorthand_range = {
   | range_full
 }
 
+// Argument parsing - requires escaping for special characters
+argument     = { (escaped_char | normal_char)* }
+normal_char  = { !("|" | "}" | "{" | ":" | "\\") ~ ANY }
+escaped_char = { "\\" ~ ANY }
+
 // Main operations - using specific arg types where needed
-regex_extract = { "regex_extract" ~ ":" ~ regex_arg ~ (":" ~ number)? }
-filter_not    = { "filter_not" ~ ":" ~ regex_arg }
-filter        = { "filter" ~ ":" ~ regex_arg }
+regex_extract =  { "regex_extract" ~ ":" ~ argument ~ (":" ~ number)? }
+filter_not    =  { "filter_not" ~ ":" ~ argument }
+filter        =  { "filter" ~ ":" ~ argument }
 strip_ansi    = @{ "strip_ansi" }
-map           = { "map" ~ ":" ~ map_operation }
-split         = { "split" ~ ":" ~ split_arg ~ ":" ~ range_spec? }
-substring     = { "substring" ~ ":" ~ range_spec }
-replace       = { "replace" ~ ":" ~ sed_string }
-append        = { "append" ~ ":" ~ simple_arg }
-prepend       = { "prepend" ~ ":" ~ simple_arg }
-surround      = { "surround" ~ ":" ~ simple_arg }
-quote         = { "quote" ~ ":" ~ simple_arg }
+map           =  { "map" ~ ":" ~ map_operation }
+split         =  { "split" ~ ":" ~ argument ~ ":" ~ range_spec? }
+substring     =  { "substring" ~ ":" ~ range_spec }
+replace       =  { "replace" ~ ":" ~ sed_string }
+append        =  { "append" ~ ":" ~ argument }
+prepend       =  { "prepend" ~ ":" ~ argument }
+surround      =  { "surround" ~ ":" ~ argument }
+quote         =  { "quote" ~ ":" ~ argument }
 upper         = @{ "upper" }
 lower         = @{ "lower" }
-trim          = { "trim" ~ (":" ~ simple_arg)? ~ (":" ~ direction)? }
-join          = { "join" ~ ":" ~ simple_arg }
-slice         = { "slice" ~ ":" ~ range_spec }
-sort          = { "sort" ~ (":" ~ sort_direction)? }
+trim          =  { "trim" ~ (":" ~ argument)? ~ (":" ~ direction)? }
+join          =  { "join" ~ ":" ~ argument }
+slice         =  { "slice" ~ ":" ~ range_spec }
+sort          =  { "sort" ~ (":" ~ sort_direction)? }
 reverse       = @{ "reverse" }
 unique        = @{ "unique" }
-pad           = { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)? }
+pad           =  { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)? }
 
 // Direction specifiers
 direction      = @{ "left" | "right" | "both" }
 sort_direction = @{ "asc" | "desc" }
-pad_char       = @{ simple_arg_content+ }
+pad_char       = @{ argument }
 
 // Map operation
 map_operation       = { "{" ~ map_operation_list ~ "}" }
@@ -95,68 +100,16 @@ map_inner_operation = {
 }
 
 // Map-specific operations that need special handling
-map_split      = { "split" ~ ":" ~ split_arg ~ (":" ~ range_spec)? }
-map_join       = { "join" ~ ":" ~ simple_arg }
-map_slice      = { "slice" ~ ":" ~ range_spec }
-map_sort       = { "sort" ~ (":" ~ sort_direction)? }
+map_split      =  { "split" ~ ":" ~ argument ~ (":" ~ range_spec)? }
+map_join       =  { "join" ~ ":" ~ argument }
+map_slice      =  { "slice" ~ ":" ~ range_spec }
+map_sort       =  { "sort" ~ (":" ~ sort_direction)? }
 map_unique     = @{ "unique" }
-map_filter     = { "filter" ~ ":" ~ map_regex_arg }
-map_filter_not = { "filter_not" ~ ":" ~ map_regex_arg }
+map_filter     =  { "filter" ~ ":" ~ argument }
+map_filter_not =  { "filter_not" ~ ":" ~ argument }
 
 // Map-specific regex extract
-map_regex_extract = { "regex_extract" ~ ":" ~ map_regex_arg ~ (":" ~ number)? }
-
-// Simplified argument handling - three types to handle specific cases
-simple_arg         = @{ simple_arg_content* }
-simple_arg_content =  { escaped_char | simple_normal_char }
-simple_normal_char =  { !(":" | "|" | "}" | "{" | "\\") ~ ANY }
-
-// Split args - need to handle pipes that aren't operations
-split_arg          = @{ (split_escaped_char | split_content)* }
-split_content      =  { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
-split_escaped_char =  { "\\" ~ ANY }
-
-// Regex args - need to handle pipes and braces in regex patterns
-regex_arg          = @{ (regex_escaped_char | regex_content)* }
-regex_content      =  { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
-regex_escaped_char =  { "\\" ~ ANY }
-
-// Map regex args - handle braces in regex patterns
-map_regex_arg          = @{ (map_regex_escaped_char | map_regex_brace | map_regex_content)* }
-map_regex_brace        =  { "{" ~ (!"}" ~ ANY)* ~ "}" }
-map_regex_content      =  { !(":" ~ number) ~ !("|" ~ operation_keyword) ~ !("{" | ("}" ~ ("|" | "}" | EOI))) ~ ANY }
-map_regex_escaped_char =  { "\\" ~ ANY }
-
-// Common escaped character handling
-escaped_char = { "\\" ~ ANY }
-
-// Operation keywords for lookahead (simplified list)
-operation_keyword = _{
-    "split"
-  | "upper"
-  | "lower"
-  | "trim"
-  | "append"
-  | "prepend"
-  | "surround"
-  | "quote"
-  | "join"
-  | "substring"
-  | "replace"
-  | "map"
-  | "filter"
-  | "filter_not"
-  | "slice"
-  | "sort"
-  | "reverse"
-  | "unique"
-  | "regex_extract"
-  | "strip_ansi"
-  | "pad"
-}
-
-// Range parts for lookahead
-range_part = _{ ".." | "..=" }
+map_regex_extract = { "regex_extract" ~ ":" ~ argument ~ (":" ~ number)? }
 
 // Sed strings
 sed_string       =  { "s/" ~ sed_pattern ~ "/" ~ sed_replacement ~ "/" ~ sed_flags? }
diff --git a/tests/multi_template_tests.rs b/tests/multi_template_tests.rs
index 786a1d7..f396d6e 100644
--- a/tests/multi_template_tests.rs
+++ b/tests/multi_template_tests.rs
@@ -55,7 +55,7 @@ fn test_multi_template_caching_optimization() {
 fn test_multi_template_different_separators() {
     // Test multiple template sections with different separators
     let template =
-        MultiTemplate::parse("Comma: {split:,:0} Space: {split: :1} Pipe: {split:|:0}").unwrap();
+        MultiTemplate::parse("Comma: {split:,:0} Space: {split: :1} Pipe: {split:\\|:0}").unwrap();
     let result = template.format("a,b c|d").unwrap();
     assert_eq!(result, "Comma: a Space: c|d Pipe: a,b c");
 }
diff --git a/tests/template/complex_pipeline.rs b/tests/template/complex_pipeline.rs
index 6e281bf..8853e83 100644
--- a/tests/template/complex_pipeline.rs
+++ b/tests/template/complex_pipeline.rs
@@ -395,7 +395,7 @@ fn test_special_chars_pipeline() {
 
 #[test]
 fn test_escaped_pipes_pipeline() {
-    let result = process("test", r"{replace:s/test/a|b/|split:|:..|join:-}");
+    let result = process("test", r"{replace:s/test/a|b/|split:\|:..|join:-}");
     assert_eq!(result.unwrap(), "a-b");
 }
 
diff --git a/tests/template/simple_pipeline.rs b/tests/template/simple_pipeline.rs
index c2a480d..f98e154 100644
--- a/tests/template/simple_pipeline.rs
+++ b/tests/template/simple_pipeline.rs
@@ -1290,7 +1290,7 @@ pub mod regex_extract_operations {
         assert_eq!(
             process(
                 "Version: 1.2.3-beta",
-                r"{regex_extract:Version: (\d+\.\d+\.\d+):1}"
+                r"{regex_extract:Version\: (\d+\.\d+\.\d+):1}"
             )
             .unwrap(),
             "1.2.3"

From cd5a766f01a90d3dc4efabb69dc368d5d4fb3766 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 7 Nov 2025 18:46:11 +0000
Subject: [PATCH 4/5] perf(grammar): reorder pest grammar operations by usage
 frequency

Reorganized operation alternatives based on actual benchmark usage
patterns to optimize PEG parser performance. Most frequently used
operations (split, join, upper, lower, trim, substring, reverse) are now
tested first.
---
 src/pipeline/template.pest | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/pipeline/template.pest b/src/pipeline/template.pest
index 3c16beb..3d0d72a 100644
--- a/src/pipeline/template.pest
+++ b/src/pipeline/template.pest
@@ -4,30 +4,31 @@ debug_flag = @{ "!" }
 
 operation_list = { operation ~ ("|" ~ operation)* }
 
+// Ordered by frequency based on benchmark usage patterns
 operation = {
     shorthand_range
   | shorthand_index
   | split
+  | join
   | upper
   | lower
   | trim
-  | append
-  | prepend
-  | surround
-  | quote
-  | join
   | substring
+  | reverse
   | replace
-  | map
   | filter
   | filter_not
-  | slice
   | sort
-  | reverse
   | unique
+  | map
+  | slice
+  | append
+  | prepend
+  | surround
+  | quote
+  | pad
   | regex_extract
   | strip_ansi
-  | pad
 }
 
 shorthand_index = { number }
@@ -76,27 +77,28 @@ pad_char       = @{ argument }
 // Map operation
 map_operation       = { "{" ~ map_operation_list ~ "}" }
 map_operation_list  = { map_inner_operation ~ ("|" ~ map_inner_operation)* }
+// Ordered by frequency for map operations
 map_inner_operation = {
-    strip_ansi
+    upper
+  | lower
+  | trim
   | substring
+  | reverse
   | replace
+  | map_split
+  | map_join
   | append
   | prepend
   | surround
   | quote
-  | upper
-  | lower
-  | trim
   | pad
-  | reverse
-  | map_split
-  | map_join
   | map_slice
   | map_sort
   | map_unique
   | map_filter
   | map_filter_not
   | map_regex_extract
+  | strip_ansi
 }
 
 // Map-specific operations that need special handling

From 92ab5d17930fb8d691f20a4ab8e73199afa1e584 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 7 Nov 2025 19:03:35 +0000
Subject: [PATCH 5/5] perf(operations): add literal string fast paths for
 filter and replace

Add fast-path optimizations for filter, filter_not, and replace
operations when patterns contain no regex metacharacters. This avoids
unnecessary regex compilation and matching for simple literal string
operations.
---
 src/pipeline/mod.rs | 92 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 74 insertions(+), 18 deletions(-)

diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs
index 83e5cde..565112c 100644
--- a/src/pipeline/mod.rs
+++ b/src/pipeline/mod.rs
@@ -1354,21 +1354,69 @@ fn apply_single_operation(
             apply_list_operation(val, |list| apply_range(&list, range), "Slice")
         }
         StringOp::Filter { pattern } => {
-            let re = get_cached_regex(pattern)?;
+            // Fast path for literal string matching (no regex metacharacters)
+            let is_literal = !pattern.contains([
+                '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
+            ]);
+
             match val {
-                Value::List(list) => Ok(Value::List(
-                    list.into_iter().filter(|s| re.is_match(s)).collect(),
-                )),
-                Value::Str(s) => Ok(Value::Str(if re.is_match(&s) { s } else { String::new() })),
+                Value::List(list) => {
+                    if is_literal {
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| s.contains(pattern)).collect(),
+                        ))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| re.is_match(s)).collect(),
+                        ))
+                    }
+                }
+                Value::Str(s) => {
+                    if is_literal {
+                        Ok(Value::Str(if s.contains(pattern) {
+                            s
+                        } else {
+                            String::new()
+                        }))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::Str(if re.is_match(&s) { s } else { String::new() }))
+                    }
+                }
             }
         }
         StringOp::FilterNot { pattern } => {
-            let re = get_cached_regex(pattern)?;
+            // Fast path for literal string matching (no regex metacharacters)
+            let is_literal = !pattern.contains([
+                '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
+            ]);
+
             match val {
-                Value::List(list) => Ok(Value::List(
-                    list.into_iter().filter(|s| !re.is_match(s)).collect(),
-                )),
-                Value::Str(s) => Ok(Value::Str(if re.is_match(&s) { String::new() } else { s })),
+                Value::List(list) => {
+                    if is_literal {
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| !s.contains(pattern)).collect(),
+                        ))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::List(
+                            list.into_iter().filter(|s| !re.is_match(s)).collect(),
+                        ))
+                    }
+                }
+                Value::Str(s) => {
+                    if is_literal {
+                        Ok(Value::Str(if s.contains(pattern) {
+                            String::new()
+                        } else {
+                            s
+                        }))
+                    } else {
+                        let re = get_cached_regex(pattern)?;
+                        Ok(Value::Str(if re.is_match(&s) { String::new() } else { s }))
+                    }
+                }
             }
         }
         StringOp::Sort { direction } => {
@@ -1429,16 +1477,24 @@ fn apply_single_operation(
             flags,
         } => {
             if let Value::Str(s) = val {
-                // Early exit for simple string patterns (not regex)
-                if !flags.contains('g')
-                    && !pattern.contains([
-                        '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
-                    ])
-                    && !s.contains(pattern)
-                {
-                    return Ok(Value::Str(s));
+                // Fast path for literal string replacement (no regex metacharacters or special flags)
+                let is_literal = !pattern.contains([
+                    '\\', '.', '*', '+', '?', '^', '$', '|', '[', ']', '(', ')', '{', '}',
+                ]);
+
+                // Only use fast path if no special regex flags (case-insensitive, multiline, etc.)
+                let has_special_flags = flags.chars().any(|c| c != 'g');
+
+                if is_literal && !has_special_flags {
+                    let result = if flags.contains('g') {
+                        s.replace(pattern, replacement)
+                    } else {
+                        s.replacen(pattern, replacement, 1)
+                    };
+                    return Ok(Value::Str(result));
                 }
 
+                // Regex path for complex patterns
                 let pattern_to_use = if flags.is_empty() {
                     pattern.clone()
                 } else {