From 2bdefef60b0f0e9fd4db2f154d0610432c746363 Mon Sep 17 00:00:00 2001
From: Jared Pleva <jpleva91@gmail.com>
Date: Wed, 1 Apr 2026 00:47:29 +0000
Subject: [PATCH 1/4] =?UTF-8?q?feat(ralph):=20add=20Ralph=20Loop=20?=
 =?UTF-8?q?=E2=80=94=20stateless-iterative=20task=20execution=20engine?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements PICK -> IMPLEMENT -> VALIDATE -> COMMIT -> RESET cycle.
Each task gets a fresh RunLoop call with isolated context, preventing
cross-task context pollution. Includes task file parser, priority-based
picker, JSONL append-only log, dry-run mode, and validation commands.

21 tests passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/ralph/log.go         |  91 ++++++++++++++++
 internal/ralph/log_test.go    |  97 +++++++++++++++++
 internal/ralph/picker.go      |  43 ++++++++
 internal/ralph/picker_test.go |  84 +++++++++++++++
 internal/ralph/ralph.go       | 190 ++++++++++++++++++++++++++++++++++
 internal/ralph/ralph_test.go  | 174 +++++++++++++++++++++++++++++++
 internal/ralph/task.go        |  78 ++++++++++++++
 internal/ralph/task_test.go   | 114 ++++++++++++++++++++
 8 files changed, 871 insertions(+)
 create mode 100644 internal/ralph/log.go
 create mode 100644 internal/ralph/log_test.go
 create mode 100644 internal/ralph/picker.go
 create mode 100644 internal/ralph/picker_test.go
 create mode 100644 internal/ralph/ralph.go
 create mode 100644 internal/ralph/ralph_test.go
 create mode 100644 internal/ralph/task.go
 create mode 100644 internal/ralph/task_test.go

diff --git a/internal/ralph/log.go b/internal/ralph/log.go
new file mode 100644
index 0000000..c9eca3a
--- /dev/null
+++ b/internal/ralph/log.go
@@ -0,0 +1,91 @@
+package ralph
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"time"
+)
+
+// TaskLogEntry records the result of a single task execution.
+type TaskLogEntry struct {
+	TaskID      string     `json:"task_id"`
+	Description string     `json:"description"`
+	Status      TaskStatus `json:"status"`
+	Output      string     `json:"output,omitempty"`
+	Error       string     `json:"error,omitempty"`
+	Turns       int        `json:"turns"`
+	ToolCalls   int        `json:"tool_calls"`
+	DurationMs  int64      `json:"duration_ms"`
+	Timestamp   string     `json:"timestamp"`
+}
+
+// TaskLog is an append-only JSONL log of task results.
+type TaskLog struct {
+	Path string
+}
+
+// NewTaskLog creates a TaskLog that writes to the given file path.
+func NewTaskLog(path string) *TaskLog {
+	return &TaskLog{Path: path}
+}
+
+// Append writes a single log entry as a JSONL line.
+func (tl *TaskLog) Append(entry TaskLogEntry) error {
+	if entry.Timestamp == "" {
+		entry.Timestamp = time.Now().UTC().Format(time.RFC3339)
+	}
+	data, err := json.Marshal(entry)
+	if err != nil {
+		return fmt.Errorf("marshal log entry: %w", err)
+	}
+	f, err := os.OpenFile(tl.Path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
+	if err != nil {
+		return fmt.Errorf("open log file: %w", err)
+	}
+	defer f.Close()
+	_, err = f.Write(append(data, '\n'))
+	if err != nil {
+		return fmt.Errorf("write log entry: %w", err)
+	}
+	return nil
+}
+
+// Read returns all log entries from the file.
+func (tl *TaskLog) Read() ([]TaskLogEntry, error) {
+	data, err := os.ReadFile(tl.Path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
+		return nil, fmt.Errorf("read log file: %w", err)
+	}
+
+	var entries []TaskLogEntry
+	// Split by newlines and parse each line
+	start := 0
+	for i := 0; i < len(data); i++ {
+		if data[i] == '\n' {
+			line := data[start:i]
+			start = i + 1
+			if len(line) == 0 {
+				continue
+			}
+			var entry TaskLogEntry
+			if err := json.Unmarshal(line, &entry); err != nil {
+				continue // skip malformed lines
+			}
+			entries = append(entries, entry)
+		}
+	}
+	// Handle last line without trailing newline
+	if start < len(data) {
+		line := data[start:]
+		var entry TaskLogEntry
+		if err := json.Unmarshal(line, &entry); err == nil {
+			entries = append(entries, entry)
+		}
+	}
+
+	return entries, nil
+}
diff --git a/internal/ralph/log_test.go b/internal/ralph/log_test.go
new file mode 100644
index 0000000..928dad0
--- /dev/null
+++ b/internal/ralph/log_test.go
@@ -0,0 +1,97 @@
+package ralph
+
+import (
+	"path/filepath"
+	"testing"
+)
+
+func TestTaskLog_AppendAndRead(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "ralph.jsonl")
+
+	log := NewTaskLog(path)
+
+	// Append two entries
+	err := log.Append(TaskLogEntry{
+		TaskID:      "1",
+		Description: "First task",
+		Status:      StatusCompleted,
+		Output:      "done",
+		Turns:       3,
+		ToolCalls:   5,
+		DurationMs:  1200,
+		Timestamp:   "2026-03-31T10:00:00Z",
+	})
+	if err != nil {
+		t.Fatalf("Append 1: %v", err)
+	}
+
+	err = log.Append(TaskLogEntry{
+		TaskID:      "2",
+		Description: "Second task",
+		Status:      StatusFailed,
+		Error:       "timeout",
+		Turns:       10,
+		ToolCalls:   8,
+		DurationMs:  5000,
+		Timestamp:   "2026-03-31T10:01:00Z",
+	})
+	if err != nil {
+		t.Fatalf("Append 2: %v", err)
+	}
+
+	// Read back
+	entries, err := log.Read()
+	if err != nil {
+		t.Fatalf("Read: %v", err)
+	}
+	if len(entries) != 2 {
+		t.Fatalf("expected 2 entries, got %d", len(entries))
+	}
+
+	if entries[0].TaskID != "1" || entries[0].Status != StatusCompleted {
+		t.Errorf("entry 0: %+v", entries[0])
+	}
+	if entries[1].TaskID != "2" || entries[1].Status != StatusFailed {
+		t.Errorf("entry 1: %+v", entries[1])
+	}
+	if entries[1].Error != "timeout" {
+		t.Errorf("entry 1 error: expected 'timeout', got %q", entries[1].Error)
+	}
+}
+
+func TestTaskLog_Read_NonExistent(t *testing.T) {
+	log := NewTaskLog("/nonexistent/ralph.jsonl")
+	entries, err := log.Read()
+	if err != nil {
+		t.Fatalf("Read non-existent: %v", err)
+	}
+	if len(entries) != 0 {
+		t.Errorf("expected 0 entries, got %d", len(entries))
+	}
+}
+
+func TestTaskLog_Append_SetsTimestamp(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "ralph.jsonl")
+
+	log := NewTaskLog(path)
+
+	// Append without explicit timestamp
+	err := log.Append(TaskLogEntry{
+		TaskID:      "1",
+		Description: "Auto-timestamp",
+		Status:      StatusCompleted,
+	})
+	if err != nil {
+		t.Fatalf("Append: %v", err)
+	}
+
+	entries, _ := log.Read()
+	if len(entries) != 1 {
+		t.Fatalf("expected 1 entry, got %d", len(entries))
+	}
+	if entries[0].Timestamp == "" {
+		t.Error("expected auto-generated timestamp")
+	}
+}
diff --git a/internal/ralph/picker.go b/internal/ralph/picker.go
new file mode 100644
index 0000000..b0ffb49
--- /dev/null
+++ b/internal/ralph/picker.go
@@ -0,0 +1,43 @@
+package ralph
+
+// Picker selects the next task to execute from a task source.
+type Picker interface {
+	// Pick returns the next task to execute, or nil if none are available.
+	Pick() (*Task, error)
+	// Update persists a status change for a task.
+	Update(task Task) error
+}
+
+// FilePicker reads tasks from a JSON file on disk.
+type FilePicker struct {
+	Path string
+}
+
+// NewFilePicker creates a Picker backed by a task file.
+func NewFilePicker(path string) *FilePicker {
+	return &FilePicker{Path: path}
+}
+
+// Pick reads the task file and returns the highest-priority pending task.
+func (fp *FilePicker) Pick() (*Task, error) {
+	tasks, err := ParseTaskFile(fp.Path)
+	if err != nil {
+		return nil, err
+	}
+	return NextPending(tasks), nil
+}
+
+// Update reads the task file, updates the matching task, and writes it back.
+func (fp *FilePicker) Update(task Task) error {
+	tasks, err := ParseTaskFile(fp.Path)
+	if err != nil {
+		return err
+	}
+	for i, t := range tasks {
+		if t.ID == task.ID {
+			tasks[i] = task
+			return WriteTaskFile(fp.Path, tasks)
+		}
+	}
+	return nil
+}
diff --git a/internal/ralph/picker_test.go b/internal/ralph/picker_test.go
new file mode 100644
index 0000000..f91332f
--- /dev/null
+++ b/internal/ralph/picker_test.go
@@ -0,0 +1,84 @@
+package ralph
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestFilePicker_Pick(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "tasks.json")
+
+	content := `[
+  {"id": "1", "description": "First task", "status": "pending", "priority": 2},
+  {"id": "2", "description": "Second task", "status": "pending", "priority": 1},
+  {"id": "3", "description": "Done task", "status": "completed", "priority": 0}
+]`
+	os.WriteFile(path, []byte(content), 0o644)
+
+	picker := NewFilePicker(path)
+	task, err := picker.Pick()
+	if err != nil {
+		t.Fatalf("Pick: %v", err)
+	}
+	if task == nil {
+		t.Fatal("expected a task")
+	}
+	if task.ID != "2" {
+		t.Errorf("expected task 2, got %s", task.ID)
+	}
+}
+
+func TestFilePicker_Pick_NoPending(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "tasks.json")
+
+	content := `[{"id": "1", "description": "Done", "status": "completed", "priority": 1}]`
+	os.WriteFile(path, []byte(content), 0o644)
+
+	picker := NewFilePicker(path)
+	task, err := picker.Pick()
+	if err != nil {
+		t.Fatalf("Pick: %v", err)
+	}
+	if task != nil {
+		t.Errorf("expected nil, got task %s", task.ID)
+	}
+}
+
+func TestFilePicker_Update(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "tasks.json")
+
+	content := `[
+  {"id": "1", "description": "Task one", "status": "pending", "priority": 1},
+  {"id": "2", "description": "Task two", "status": "pending", "priority": 2}
+]`
+	os.WriteFile(path, []byte(content), 0o644)
+
+	picker := NewFilePicker(path)
+
+	// Update task 1 to completed
+	err := picker.Update(Task{
+		ID:          "1",
+		Description: "Task one",
+		Status:      StatusCompleted,
+		Priority:    1,
+	})
+	if err != nil {
+		t.Fatalf("Update: %v", err)
+	}
+
+	// Verify the update persisted
+	tasks, err := ParseTaskFile(path)
+	if err != nil {
+		t.Fatalf("ParseTaskFile after update: %v", err)
+	}
+	if tasks[0].Status != StatusCompleted {
+		t.Errorf("expected completed, got %s", tasks[0].Status)
+	}
+	if tasks[1].Status != StatusPending {
+		t.Errorf("task 2 should still be pending, got %s", tasks[1].Status)
+	}
+}
diff --git a/internal/ralph/ralph.go b/internal/ralph/ralph.go
new file mode 100644
index 0000000..37c5017
--- /dev/null
+++ b/internal/ralph/ralph.go
@@ -0,0 +1,190 @@
+package ralph
+
+import (
+	"fmt"
+	"os/exec"
+	"strings"
+	"time"
+
+	"github.com/AgentGuardHQ/shellforge/internal/agent"
+	"github.com/AgentGuardHQ/shellforge/internal/governance"
+)
+
+// TaskSourceType defines how tasks are sourced.
+type TaskSourceType string
+
+const (
+	SourceFile TaskSourceType = "file"
+	SourceMCP  TaskSourceType = "mcp"
+)
+
+// RalphConfig configures a Ralph Loop execution.
+type RalphConfig struct {
+	TaskSource  TaskSourceType
+	TaskFile    string
+	MCPEndpoint string
+	LogFile     string
+	Validate    []string // shell commands to run for validation
+	AutoCommit  bool
+	MaxTasks    int // 0 = unlimited
+	LoopConfig  agent.LoopConfig
+	DryRun      bool
+}
+
+// RalphResult summarizes the outcome of a Ralph Loop run.
+type RalphResult struct {
+	Completed int
+	Failed    int
+	Skipped   int
+	Total     int
+	Entries   []TaskLogEntry
+}
+
+// RunRalph executes the Ralph Loop: PICK -> IMPLEMENT -> VALIDATE -> COMMIT -> RESET.
+// Each iteration picks the next pending task, runs agent.RunLoop with a fresh context,
+// validates the result, optionally commits, then resets for the next task.
+func RunRalph(cfg RalphConfig, engine *governance.Engine) (*RalphResult, error) {
+	picker, err := makePicker(cfg)
+	if err != nil {
+		return nil, fmt.Errorf("create picker: %w", err)
+	}
+
+	logFile := cfg.LogFile
+	if logFile == "" {
+		logFile = "ralph-log.jsonl"
+	}
+	taskLog := NewTaskLog(logFile)
+
+	result := &RalphResult{}
+	processed := 0
+
+	for {
+		// Check task limit
+		if cfg.MaxTasks > 0 && processed >= cfg.MaxTasks {
+			break
+		}
+
+		// ── PICK ──
+		task, err := picker.Pick()
+		if err != nil {
+			return result, fmt.Errorf("pick task: %w", err)
+		}
+		if task == nil {
+			break // no more pending tasks
+		}
+		result.Total++
+
+		// Mark as running
+		task.Status = StatusRunning
+		picker.Update(*task)
+
+		if cfg.DryRun {
+			fmt.Printf("[ralph] DRY RUN — would implement task %s: %s\n", task.ID, task.Description)
+			task.Status = StatusCompleted // mark completed so we don't pick it again
+			picker.Update(*task)
+			result.Skipped++
+			processed++
+			continue
+		}
+
+		// ── IMPLEMENT ──
+		loopCfg := cfg.LoopConfig
+		loopCfg.UserPrompt = task.Description
+		loopCfg.Agent = fmt.Sprintf("ralph-task-%s", task.ID)
+
+		start := time.Now()
+		runResult, runErr := agent.RunLoop(loopCfg, engine)
+
+		entry := TaskLogEntry{
+			TaskID:      task.ID,
+			Description: task.Description,
+			Timestamp:   time.Now().UTC().Format(time.RFC3339),
+		}
+
+		if runErr != nil {
+			task.Status = StatusFailed
+			task.Error = runErr.Error()
+			entry.Status = StatusFailed
+			entry.Error = runErr.Error()
+			entry.DurationMs = time.Since(start).Milliseconds()
+			picker.Update(*task)
+			taskLog.Append(entry)
+			result.Failed++
+			result.Entries = append(result.Entries, entry)
+			processed++
+			continue
+		}
+
+		entry.Output = runResult.Output
+		entry.Turns = runResult.Turns
+		entry.ToolCalls = runResult.ToolCalls
+		entry.DurationMs = runResult.DurationMs
+
+		// ── VALIDATE ──
+		validated := true
+		if len(cfg.Validate) > 0 && runResult.Success {
+			for _, cmdStr := range cfg.Validate {
+				parts := strings.Fields(cmdStr)
+				if len(parts) == 0 {
+					continue
+				}
+				cmd := exec.Command(parts[0], parts[1:]...)
+				out, verr := cmd.CombinedOutput()
+				if verr != nil {
+					validated = false
+					task.Error = fmt.Sprintf("validation failed (%s): %s", cmdStr, string(out))
+					break
+				}
+			}
+		}
+
+		if !runResult.Success || !validated {
+			task.Status = StatusFailed
+			if task.Error == "" {
+				task.Error = fmt.Sprintf("agent exit: %s", runResult.ExitReason)
+			}
+			entry.Status = StatusFailed
+			entry.Error = task.Error
+			picker.Update(*task)
+			taskLog.Append(entry)
+			result.Failed++
+			result.Entries = append(result.Entries, entry)
+			processed++
+			continue
+		}
+
+		// ── COMMIT ──
+		if cfg.AutoCommit {
+			commitMsg := fmt.Sprintf("ralph: task %s — %s", task.ID, task.Description)
+			addCmd := exec.Command("git", "add", "-A")
+			addCmd.Run()
+			commitCmd := exec.Command("git", "commit", "-m", commitMsg, "--allow-empty")
+			commitCmd.Run()
+		}
+
+		// ── RESET ── (implicit: next iteration creates a fresh RunLoop)
+		task.Status = StatusCompleted
+		entry.Status = StatusCompleted
+		picker.Update(*task)
+		taskLog.Append(entry)
+		result.Completed++
+		result.Entries = append(result.Entries, entry)
+		processed++
+	}
+
+	return result, nil
+}
+
+func makePicker(cfg RalphConfig) (Picker, error) {
+	switch cfg.TaskSource {
+	case SourceFile, "":
+		if cfg.TaskFile == "" {
+			return nil, fmt.Errorf("task file path required for file source")
+		}
+		return NewFilePicker(cfg.TaskFile), nil
+	case SourceMCP:
+		return nil, fmt.Errorf("MCP task source not yet implemented")
+	default:
+		return nil, fmt.Errorf("unknown task source: %s", cfg.TaskSource)
+	}
+}
diff --git a/internal/ralph/ralph_test.go b/internal/ralph/ralph_test.go
new file mode 100644
index 0000000..0df0f48
--- /dev/null
+++ b/internal/ralph/ralph_test.go
@@ -0,0 +1,174 @@
+package ralph
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/AgentGuardHQ/shellforge/internal/agent"
+)
+
+func TestRalphConfig_Defaults(t *testing.T) {
+	cfg := RalphConfig{
+		TaskSource: SourceFile,
+		TaskFile:   "tasks.json",
+	}
+	if cfg.TaskSource != SourceFile {
+		t.Errorf("expected file source, got %s", cfg.TaskSource)
+	}
+}
+
+func TestMakePicker_File(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "tasks.json")
+	os.WriteFile(path, []byte("[]"), 0o644)
+
+	cfg := RalphConfig{TaskSource: SourceFile, TaskFile: path}
+	picker, err := makePicker(cfg)
+	if err != nil {
+		t.Fatalf("makePicker: %v", err)
+	}
+	if picker == nil {
+		t.Fatal("expected non-nil picker")
+	}
+}
+
+func TestMakePicker_NoFile(t *testing.T) {
+	cfg := RalphConfig{TaskSource: SourceFile, TaskFile: ""}
+	_, err := makePicker(cfg)
+	if err == nil {
+		t.Fatal("expected error for empty task file")
+	}
+}
+
+func TestMakePicker_MCP(t *testing.T) {
+	cfg := RalphConfig{TaskSource: SourceMCP}
+	_, err := makePicker(cfg)
+	if err == nil {
+		t.Fatal("expected error for unimplemented MCP source")
+	}
+}
+
+func TestMakePicker_Unknown(t *testing.T) {
+	cfg := RalphConfig{TaskSource: "unknown"}
+	_, err := makePicker(cfg)
+	if err == nil {
+		t.Fatal("expected error for unknown source type")
+	}
+}
+
+func TestRunRalph_DryRun(t *testing.T) {
+	dir := t.TempDir()
+	taskPath := filepath.Join(dir, "tasks.json")
+	logPath := filepath.Join(dir, "ralph.jsonl")
+
+	tasks := `[
+  {"id": "1", "description": "Task one", "status": "pending", "priority": 1},
+  {"id": "2", "description": "Task two", "status": "pending", "priority": 2}
+]`
+	os.WriteFile(taskPath, []byte(tasks), 0o644)
+
+	cfg := RalphConfig{
+		TaskSource: SourceFile,
+		TaskFile:   taskPath,
+		LogFile:    logPath,
+		DryRun:     true,
+		MaxTasks:   10,
+		LoopConfig: agent.LoopConfig{
+			Agent:       "test-agent",
+			System:      "test",
+			MaxTurns:    5,
+			TimeoutMs:   10000,
+			OutputDir:   dir,
+			TokenBudget: 1000,
+		},
+	}
+
+	// In dry-run, we don't need a real governance engine
+	result, err := RunRalph(cfg, nil)
+	if err != nil {
+		t.Fatalf("RunRalph dry-run: %v", err)
+	}
+	if result.Skipped != 2 {
+		t.Errorf("expected 2 skipped, got %d", result.Skipped)
+	}
+	if result.Total != 2 {
+		t.Errorf("expected 2 total, got %d", result.Total)
+	}
+
+	// Verify tasks are marked completed (dry-run advances past them)
+	readTasks, _ := ParseTaskFile(taskPath)
+	for _, task := range readTasks {
+		if task.Status != StatusCompleted {
+			t.Errorf("task %s should be completed after dry-run, got %s", task.ID, task.Status)
+		}
+	}
+}
+
+func TestRunRalph_MaxTasks(t *testing.T) {
+	dir := t.TempDir()
+	taskPath := filepath.Join(dir, "tasks.json")
+	logPath := filepath.Join(dir, "ralph.jsonl")
+
+	tasks := `[
+  {"id": "1", "description": "Task one", "status": "pending", "priority": 1},
+  {"id": "2", "description": "Task two", "status": "pending", "priority": 2},
+  {"id": "3", "description": "Task three", "status": "pending", "priority": 3}
+]`
+	os.WriteFile(taskPath, []byte(tasks), 0o644)
+
+	cfg := RalphConfig{
+		TaskSource: SourceFile,
+		TaskFile:   taskPath,
+		LogFile:    logPath,
+		DryRun:     true,
+		MaxTasks:   2, // only process 2
+		LoopConfig: agent.LoopConfig{
+			Agent:       "test-agent",
+			System:      "test",
+			MaxTurns:    5,
+			TimeoutMs:   10000,
+			OutputDir:   dir,
+			TokenBudget: 1000,
+		},
+	}
+
+	result, err := RunRalph(cfg, nil)
+	if err != nil {
+		t.Fatalf("RunRalph max-tasks: %v", err)
+	}
+	if result.Total != 2 {
+		t.Errorf("expected 2 total (max limit), got %d", result.Total)
+	}
+}
+
+func TestRunRalph_NoPendingTasks(t *testing.T) {
+	dir := t.TempDir()
+	taskPath := filepath.Join(dir, "tasks.json")
+	logPath := filepath.Join(dir, "ralph.jsonl")
+
+	tasks := `[{"id": "1", "description": "Done", "status": "completed", "priority": 1}]`
+	os.WriteFile(taskPath, []byte(tasks), 0o644)
+
+	cfg := RalphConfig{
+		TaskSource: SourceFile,
+		TaskFile:   taskPath,
+		LogFile:    logPath,
+		LoopConfig: agent.LoopConfig{
+			Agent:       "test-agent",
+			System:      "test",
+			MaxTurns:    5,
+			TimeoutMs:   10000,
+			OutputDir:   dir,
+			TokenBudget: 1000,
+		},
+	}
+
+	result, err := RunRalph(cfg, nil)
+	if err != nil {
+		t.Fatalf("RunRalph no-pending: %v", err)
+	}
+	if result.Total != 0 {
+		t.Errorf("expected 0 total, got %d", result.Total)
+	}
+}
diff --git a/internal/ralph/task.go b/internal/ralph/task.go
new file mode 100644
index 0000000..c907590
--- /dev/null
+++ b/internal/ralph/task.go
@@ -0,0 +1,78 @@
+// Package ralph implements the Ralph Loop — a stateless-iterative
+// execution pattern for autonomous agent task processing. Each cycle:
+// PICK → IMPLEMENT → VALIDATE → COMMIT → RESET.
+//
+// The loop is stateless across iterations: each task gets a fresh
+// RunLoop call with no prior message history, preventing context
+// pollution between tasks.
+package ralph
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"sort"
+)
+
+// TaskStatus represents the lifecycle state of a task.
+type TaskStatus string
+
+const (
+	StatusPending   TaskStatus = "pending"
+	StatusRunning   TaskStatus = "running"
+	StatusCompleted TaskStatus = "completed"
+	StatusFailed    TaskStatus = "failed"
+)
+
+// Task is a single unit of work for the Ralph Loop.
+type Task struct {
+	ID          string     `json:"id"`
+	Description string     `json:"description"`
+	Status      TaskStatus `json:"status"`
+	Priority    int        `json:"priority"`
+	Error       string     `json:"error,omitempty"`
+}
+
+// ParseTaskFile reads a JSON task file and returns the task list.
+func ParseTaskFile(path string) ([]Task, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("read task file: %w", err)
+	}
+	var tasks []Task
+	if err := json.Unmarshal(data, &tasks); err != nil {
+		return nil, fmt.Errorf("parse task file: %w", err)
+	}
+	return tasks, nil
+}
+
+// WriteTaskFile writes the task list back to disk as JSON.
+func WriteTaskFile(path string, tasks []Task) error {
+	data, err := json.MarshalIndent(tasks, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal tasks: %w", err)
+	}
+	if err := os.WriteFile(path, data, 0o644); err != nil {
+		return fmt.Errorf("write task file: %w", err)
+	}
+	return nil
+}
+
+// NextPending returns the highest-priority pending task (lowest priority number).
+// Returns nil if no pending tasks remain.
+func NextPending(tasks []Task) *Task {
+	var pending []Task
+	for _, t := range tasks {
+		if t.Status == StatusPending {
+			pending = append(pending, t)
+		}
+	}
+	if len(pending) == 0 {
+		return nil
+	}
+	sort.Slice(pending, func(i, j int) bool {
+		return pending[i].Priority < pending[j].Priority
+	})
+	result := pending[0]
+	return &result
+}
diff --git a/internal/ralph/task_test.go b/internal/ralph/task_test.go
new file mode 100644
index 0000000..2886760
--- /dev/null
+++ b/internal/ralph/task_test.go
@@ -0,0 +1,114 @@
+package ralph
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestParseTaskFile(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "tasks.json")
+
+	content := `[
+  {"id": "1", "description": "Add input validation", "status": "pending", "priority": 1},
+  {"id": "2", "description": "Write tests", "status": "pending", "priority": 2},
+  {"id": "3", "description": "Fix bug", "status": "completed", "priority": 0}
+]`
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatalf("write test file: %v", err)
+	}
+
+	tasks, err := ParseTaskFile(path)
+	if err != nil {
+		t.Fatalf("ParseTaskFile: %v", err)
+	}
+	if len(tasks) != 3 {
+		t.Fatalf("expected 3 tasks, got %d", len(tasks))
+	}
+
+	if tasks[0].ID != "1" || tasks[0].Status != StatusPending {
+		t.Errorf("task 0: got id=%s status=%s", tasks[0].ID, tasks[0].Status)
+	}
+	if tasks[2].Status != StatusCompleted {
+		t.Errorf("task 2: expected completed, got %s", tasks[2].Status)
+	}
+}
+
+func TestParseTaskFile_NotFound(t *testing.T) {
+	_, err := ParseTaskFile("/nonexistent/tasks.json")
+	if err == nil {
+		t.Fatal("expected error for missing file")
+	}
+}
+
+func TestParseTaskFile_InvalidJSON(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "tasks.json")
+	os.WriteFile(path, []byte("not json"), 0o644)
+
+	_, err := ParseTaskFile(path)
+	if err == nil {
+		t.Fatal("expected error for invalid JSON")
+	}
+}
+
+func TestWriteTaskFile(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "tasks.json")
+
+	tasks := []Task{
+		{ID: "1", Description: "Test task", Status: StatusPending, Priority: 1},
+	}
+	if err := WriteTaskFile(path, tasks); err != nil {
+		t.Fatalf("WriteTaskFile: %v", err)
+	}
+
+	// Read back
+	readTasks, err := ParseTaskFile(path)
+	if err != nil {
+		t.Fatalf("ParseTaskFile after write: %v", err)
+	}
+	if len(readTasks) != 1 {
+		t.Fatalf("expected 1 task, got %d", len(readTasks))
+	}
+	if readTasks[0].ID != "1" || readTasks[0].Description != "Test task" {
+		t.Errorf("round-trip mismatch: %+v", readTasks[0])
+	}
+}
+
+func TestNextPending(t *testing.T) {
+	tasks := []Task{
+		{ID: "1", Description: "Low priority", Status: StatusPending, Priority: 3},
+		{ID: "2", Description: "High priority", Status: StatusPending, Priority: 1},
+		{ID: "3", Description: "Already done", Status: StatusCompleted, Priority: 0},
+		{ID: "4", Description: "Medium priority", Status: StatusPending, Priority: 2},
+	}
+
+	next := NextPending(tasks)
+	if next == nil {
+		t.Fatal("expected a pending task")
+	}
+	if next.ID != "2" {
+		t.Errorf("expected task 2 (highest priority), got task %s", next.ID)
+	}
+}
+
+func TestNextPending_NoPending(t *testing.T) {
+	tasks := []Task{
+		{ID: "1", Status: StatusCompleted, Priority: 1},
+		{ID: "2", Status: StatusFailed, Priority: 2},
+	}
+
+	next := NextPending(tasks)
+	if next != nil {
+		t.Errorf("expected nil, got task %s", next.ID)
+	}
+}
+
+func TestNextPending_EmptyList(t *testing.T) {
+	next := NextPending(nil)
+	if next != nil {
+		t.Error("expected nil for empty list")
+	}
+}

From 220ac3670feb78c58e82cef2778636c5ac0ba505 Mon Sep 17 00:00:00 2001
From: Jared Pleva <jpleva91@gmail.com>
Date: Wed, 1 Apr 2026 00:47:36 +0000
Subject: [PATCH 2/4] feat(orchestrator): add sub-agent orchestration with
 concurrency control

Adds SubTask/SubResult/TaskHandle types, Orchestrator with SpawnSync
(blocking) and SpawnAsync (goroutine+channel) execution, semaphore-based
concurrency limiting, and CompressResult for output truncation. Each
sub-agent gets its own RunLoop call with isolated context.

12 tests passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/orchestrator/compress.go          |  54 +++++++++
 internal/orchestrator/compress_test.go     |  83 +++++++++++++
 internal/orchestrator/handle.go            |  37 ++++++
 internal/orchestrator/handle_test.go       |  53 +++++++++
 internal/orchestrator/orchestrator.go      | 129 +++++++++++++++++++++
 internal/orchestrator/orchestrator_test.go |  34 ++++++
 6 files changed, 390 insertions(+)
 create mode 100644 internal/orchestrator/compress.go
 create mode 100644 internal/orchestrator/compress_test.go
 create mode 100644 internal/orchestrator/handle.go
 create mode 100644 internal/orchestrator/handle_test.go
 create mode 100644 internal/orchestrator/orchestrator.go
 create mode 100644 internal/orchestrator/orchestrator_test.go

diff --git a/internal/orchestrator/compress.go b/internal/orchestrator/compress.go
new file mode 100644
index 0000000..4efe9a2
--- /dev/null
+++ b/internal/orchestrator/compress.go
@@ -0,0 +1,54 @@
+package orchestrator
+
+// tokenThreshold is the maximum output size (in estimated tokens) that
+// passes through without compression. Outputs below this are returned as-is.
+const tokenThreshold = 750
+
+// CompressResult compresses a sub-agent output if it exceeds the token threshold.
+// Strategy:
+//   1. If output < 750 tokens (estimated), return as-is
+//   2. Otherwise truncate to the threshold with a marker
+//
+// A future version will use LLM summarization when a Provider is available.
+func CompressResult(output string) string {
+	estimated := estimateTokens(output)
+	if estimated <= tokenThreshold {
+		return output
+	}
+
+	// Truncate to approximately tokenThreshold tokens (4 chars per token estimate)
+	maxChars := tokenThreshold * 4
+	if maxChars >= len(output) {
+		return output
+	}
+	return output[:maxChars] + "\n\n[... output truncated — " + itoa(estimated-tokenThreshold) + " tokens omitted]"
+}
+
+// estimateTokens provides a rough token count (1 token ~ 4 chars).
+func estimateTokens(s string) int {
+	return len(s) / 4
+}
+
+// itoa converts an int to a string without importing strconv (Go 1.18 compat).
+func itoa(n int) string {
+	if n == 0 {
+		return "0"
+	}
+	neg := false
+	if n < 0 {
+		neg = true
+		n = -n
+	}
+	var buf [20]byte
+	i := len(buf)
+	for n > 0 {
+		i--
+		buf[i] = byte('0' + n%10)
+		n /= 10
+	}
+	if neg {
+		i--
+		buf[i] = '-'
+	}
+	return string(buf[i:])
+}
diff --git a/internal/orchestrator/compress_test.go b/internal/orchestrator/compress_test.go
new file mode 100644
index 0000000..f68d15a
--- /dev/null
+++ b/internal/orchestrator/compress_test.go
@@ -0,0 +1,83 @@
+package orchestrator
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestCompressResult_ShortOutput(t *testing.T) {
+	short := "This is a short output."
+	result := CompressResult(short)
+	if result != short {
+		t.Errorf("short output should pass through unchanged, got %q", result)
+	}
+}
+
+func TestCompressResult_ExactThreshold(t *testing.T) {
+	// 750 tokens * 4 chars = 3000 chars
+	exact := strings.Repeat("a", 3000)
+	result := CompressResult(exact)
+	if result != exact {
+		t.Error("output at exact threshold should pass through unchanged")
+	}
+}
+
+func TestCompressResult_OverThreshold(t *testing.T) {
+	// 4000 tokens * 4 chars = 16000 chars
+	long := strings.Repeat("x", 16000)
+	result := CompressResult(long)
+
+	if len(result) >= len(long) {
+		t.Errorf("compressed result should be shorter than original (%d >= %d)", len(result), len(long))
+	}
+	if !strings.Contains(result, "truncated") {
+		t.Error("compressed result should contain truncation marker")
+	}
+	if !strings.Contains(result, "omitted") {
+		t.Error("compressed result should indicate omitted tokens")
+	}
+}
+
+func TestCompressResult_Empty(t *testing.T) {
+	result := CompressResult("")
+	if result != "" {
+		t.Errorf("empty input should return empty, got %q", result)
+	}
+}
+
+func TestEstimateTokens(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected int
+	}{
+		{"", 0},
+		{"abcd", 1},
+		{"12345678", 2},
+		{strings.Repeat("a", 100), 25},
+	}
+	for _, tt := range tests {
+		got := estimateTokens(tt.input)
+		if got != tt.expected {
+			t.Errorf("estimateTokens(%d chars): expected %d, got %d", len(tt.input), tt.expected, got)
+		}
+	}
+}
+
+func TestItoa(t *testing.T) {
+	tests := []struct {
+		input    int
+		expected string
+	}{
+		{0, "0"},
+		{1, "1"},
+		{42, "42"},
+		{-5, "-5"},
+		{1000, "1000"},
+	}
+	for _, tt := range tests {
+		got := itoa(tt.input)
+		if got != tt.expected {
+			t.Errorf("itoa(%d): expected %q, got %q", tt.input, tt.expected, got)
+		}
+	}
+}
diff --git a/internal/orchestrator/handle.go b/internal/orchestrator/handle.go
new file mode 100644
index 0000000..4f0f463
--- /dev/null
+++ b/internal/orchestrator/handle.go
@@ -0,0 +1,37 @@
+package orchestrator
+
+// SubTask describes a unit of work to be executed by a sub-agent.
+// Each sub-agent gets its own RunLoop call with isolated context.
+type SubTask struct {
+	ID          string
+	Description string
+	System      string // system prompt for the sub-agent
+	Model       string
+	MaxTurns    int
+	TimeoutMs   int
+	TokenBudget int
+}
+
+// SubResult captures the outcome of a sub-agent execution.
+type SubResult struct {
+	TaskID     string
+	Success    bool
+	Output     string
+	Turns      int
+	ToolCalls  int
+	DurationMs int64
+	Error      string
+}
+
+// TaskHandle is a reference to an in-flight async sub-agent task.
+// Use Collect() to block until the result is available.
+type TaskHandle struct {
+	TaskID string
+	done   chan *asyncResult
+}
+
+// asyncResult wraps a SubResult with an optional error from the agent call.
+type asyncResult struct {
+	result *SubResult
+	err    error
+}
diff --git a/internal/orchestrator/handle_test.go b/internal/orchestrator/handle_test.go
new file mode 100644
index 0000000..6d54b68
--- /dev/null
+++ b/internal/orchestrator/handle_test.go
@@ -0,0 +1,53 @@
+package orchestrator
+
+import (
+	"testing"
+)
+
+func TestSubTask_Fields(t *testing.T) {
+	task := SubTask{
+		ID:          "sub-1",
+		Description: "Analyze code quality",
+		System:      "You are a QA agent.",
+		Model:       "test-model",
+		MaxTurns:    5,
+		TimeoutMs:   30000,
+		TokenBudget: 2000,
+	}
+
+	if task.ID != "sub-1" {
+		t.Errorf("expected id sub-1, got %s", task.ID)
+	}
+	if task.MaxTurns != 5 {
+		t.Errorf("expected 5 max turns, got %d", task.MaxTurns)
+	}
+}
+
+func TestSubResult_Fields(t *testing.T) {
+	result := SubResult{
+		TaskID:     "sub-1",
+		Success:    true,
+		Output:     "All tests pass",
+		Turns:      3,
+		ToolCalls:  5,
+		DurationMs: 1500,
+	}
+
+	if !result.Success {
+		t.Error("expected success")
+	}
+	if result.TaskID != "sub-1" {
+		t.Errorf("expected task id sub-1, got %s", result.TaskID)
+	}
+}
+
+func TestTaskHandle_Fields(t *testing.T) {
+	handle := TaskHandle{
+		TaskID: "sub-1",
+		done:   make(chan *asyncResult, 1),
+	}
+
+	if handle.TaskID != "sub-1" {
+		t.Errorf("expected task id sub-1, got %s", handle.TaskID)
+	}
+}
diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go
new file mode 100644
index 0000000..7f78d82
--- /dev/null
+++ b/internal/orchestrator/orchestrator.go
@@ -0,0 +1,129 @@
+package orchestrator
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/AgentGuardHQ/shellforge/internal/agent"
+	"github.com/AgentGuardHQ/shellforge/internal/governance"
+	"github.com/AgentGuardHQ/shellforge/internal/llm"
+)
+
+// Orchestrator manages sub-agent execution with concurrency control.
+// Each sub-agent gets its own RunLoop call with isolated context.
+type Orchestrator struct {
+	provider    llm.Provider
+	governance  *governance.Engine
+	maxParallel int
+	slots       chan struct{}
+}
+
+// NewOrchestrator creates an Orchestrator with the given concurrency limit.
+func NewOrchestrator(provider llm.Provider, gov *governance.Engine, maxParallel int) *Orchestrator {
+	if maxParallel < 1 {
+		maxParallel = 1
+	}
+	slots := make(chan struct{}, maxParallel)
+	for i := 0; i < maxParallel; i++ {
+		slots <- struct{}{}
+	}
+	return &Orchestrator{
+		provider:    provider,
+		governance:  gov,
+		maxParallel: maxParallel,
+		slots:       slots,
+	}
+}
+
+// SpawnSync executes a sub-agent synchronously, blocking until completion.
+// Acquires a concurrency slot before running.
+func (o *Orchestrator) SpawnSync(task SubTask) (*SubResult, error) {
+	// Acquire slot
+	<-o.slots
+	defer func() { o.slots <- struct{}{} }()
+
+	return o.executeTask(task)
+}
+
+// SpawnAsync launches a sub-agent in a goroutine and returns a handle.
+// The handle can be passed to Collect() to retrieve the result.
+func (o *Orchestrator) SpawnAsync(task SubTask) (TaskHandle, error) {
+	handle := TaskHandle{
+		TaskID: task.ID,
+		done:   make(chan *asyncResult, 1),
+	}
+
+	go func() {
+		// Acquire slot
+		<-o.slots
+		defer func() { o.slots <- struct{}{} }()
+
+		result, err := o.executeTask(task)
+		handle.done <- &asyncResult{result: result, err: err}
+	}()
+
+	return handle, nil
+}
+
+// Collect blocks until the async task completes or the timeout expires.
+func (o *Orchestrator) Collect(h TaskHandle, timeout time.Duration) (*SubResult, error) {
+	select {
+	case ar := <-h.done:
+		if ar.err != nil {
+			return nil, ar.err
+		}
+		return ar.result, nil
+	case <-time.After(timeout):
+		return nil, fmt.Errorf("collect timeout for task %s after %s", h.TaskID, timeout)
+	}
+}
+
+// executeTask runs a single sub-agent via agent.RunLoop.
+func (o *Orchestrator) executeTask(task SubTask) (*SubResult, error) {
+	cfg := agent.LoopConfig{
+		Agent:       fmt.Sprintf("sub-agent-%s", task.ID),
+		System:      task.System,
+		UserPrompt:  task.Description,
+		Model:       task.Model,
+		MaxTurns:    task.MaxTurns,
+		TimeoutMs:   task.TimeoutMs,
+		OutputDir:   "",
+		TokenBudget: task.TokenBudget,
+		Provider:    o.provider,
+	}
+
+	if cfg.System == "" {
+		cfg.System = "You are a sub-agent. Complete the requested task precisely."
+	}
+	if cfg.MaxTurns == 0 {
+		cfg.MaxTurns = 10
+	}
+	if cfg.TimeoutMs == 0 {
+		cfg.TimeoutMs = 60000
+	}
+	if cfg.TokenBudget == 0 {
+		cfg.TokenBudget = 3000
+	}
+
+	start := time.Now()
+	runResult, err := agent.RunLoop(cfg, o.governance)
+	if err != nil {
+		return &SubResult{
+			TaskID:     task.ID,
+			Success:    false,
+			Error:      err.Error(),
+			DurationMs: time.Since(start).Milliseconds(),
+		}, err
+	}
+
+	output := CompressResult(runResult.Output)
+
+	return &SubResult{
+		TaskID:     task.ID,
+		Success:    runResult.Success,
+		Output:     output,
+		Turns:      runResult.Turns,
+		ToolCalls:  runResult.ToolCalls,
+		DurationMs: runResult.DurationMs,
+	}, nil
+}
diff --git a/internal/orchestrator/orchestrator_test.go b/internal/orchestrator/orchestrator_test.go
new file mode 100644
index 0000000..6113eaf
--- /dev/null
+++ b/internal/orchestrator/orchestrator_test.go
@@ -0,0 +1,34 @@
+package orchestrator
+
+import (
+	"testing"
+)
+
+func TestNewOrchestrator_MinParallel(t *testing.T) {
+	// maxParallel < 1 should default to 1
+	o := NewOrchestrator(nil, nil, 0)
+	if o.maxParallel != 1 {
+		t.Errorf("expected maxParallel=1, got %d", o.maxParallel)
+	}
+}
+
+func TestNewOrchestrator_SetsFields(t *testing.T) {
+	o := NewOrchestrator(nil, nil, 4)
+	if o.maxParallel != 4 {
+		t.Errorf("expected maxParallel=4, got %d", o.maxParallel)
+	}
+	if cap(o.slots) != 4 {
+		t.Errorf("expected slots capacity=4, got %d", cap(o.slots))
+	}
+	// All slots should be available initially
+	if len(o.slots) != 4 {
+		t.Errorf("expected 4 available slots, got %d", len(o.slots))
+	}
+}
+
+func TestNewOrchestrator_NegativeParallel(t *testing.T) {
+	o := NewOrchestrator(nil, nil, -5)
+	if o.maxParallel != 1 {
+		t.Errorf("expected maxParallel=1 for negative input, got %d", o.maxParallel)
+	}
+}

From 5d25816c747d174870c3a43bd0a5270785e57784 Mon Sep 17 00:00:00 2001
From: Jared Pleva <jpleva91@gmail.com>
Date: Wed, 1 Apr 2026 00:47:42 +0000
Subject: [PATCH 3/4] feat(cli): add 'shellforge ralph' command for task loop
 execution

Wires the Ralph Loop into the CLI with full flag support: --tasks,
--log, --provider, --validate, --dry-run, --auto-commit, --max-tasks.
Supports both Anthropic API and legacy Ollama providers.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cmd/shellforge/main.go | 119 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/cmd/shellforge/main.go b/cmd/shellforge/main.go
index b895c92..b4fa52d 100644
--- a/cmd/shellforge/main.go
+++ b/cmd/shellforge/main.go
@@ -21,6 +21,7 @@ import (
 "github.com/AgentGuardHQ/shellforge/internal/llm"
 "github.com/AgentGuardHQ/shellforge/internal/logger"
 "github.com/AgentGuardHQ/shellforge/internal/ollama"
+"github.com/AgentGuardHQ/shellforge/internal/ralph"
 "github.com/AgentGuardHQ/shellforge/internal/scheduler"
 )
 
@@ -84,6 +85,8 @@ os.Exit(1)
 }
 cmdAgent(strings.Join(filtered, " "), providerName, thinkingBudget)
 }
+case "ralph":
+cmdRalph()
 case "swarm":
 cmdSwarm()
 case "serve":
@@ -120,6 +123,7 @@ Usage:
   shellforge scan [dir]             DefenseClaw supply chain scan
   shellforge version                Print version
 
+  shellforge ralph [flags]          Run Ralph Loop (stateless-iterative task execution)
   shellforge serve [config]        Simple daemon mode (built-in scheduler)
   shellforge swarm                 Setup Dagu orchestration (DAG workflows + web UI)
 
@@ -724,6 +728,121 @@ printResult("prototype-agent", result)
 saveReport("outputs/logs", "prototype", result)
 }
 
+func cmdRalph() {
+engine := mustGovernance()
+
+// Parse flags
+taskFile := "tasks.json"
+logFile := "ralph-log.jsonl"
+providerName := ""
+dryRun := false
+autoCommit := false
+maxTasks := 0
+var validate []string
+
+remaining := os.Args[2:]
+for i := 0; i < len(remaining); i++ {
+	switch remaining[i] {
+	case "--tasks":
+		if i+1 < len(remaining) {
+			taskFile = remaining[i+1]
+			i++
+		}
+	case "--log":
+		if i+1 < len(remaining) {
+			logFile = remaining[i+1]
+			i++
+		}
+	case "--provider":
+		if i+1 < len(remaining) {
+			providerName = remaining[i+1]
+			i++
+		}
+	case "--validate":
+		if i+1 < len(remaining) {
+			validate = append(validate, remaining[i+1])
+			i++
+		}
+	case "--dry-run":
+		dryRun = true
+	case "--auto-commit":
+		autoCommit = true
+	case "--max-tasks":
+		if i+1 < len(remaining) {
+			fmt.Sscanf(remaining[i+1], "%d", &maxTasks)
+			i++
+		}
+	default:
+		fmt.Fprintf(os.Stderr, "Unknown ralph flag: %s\n", remaining[i])
+		fmt.Fprintln(os.Stderr, "Usage: shellforge ralph [--tasks file] [--log file] [--provider name] [--validate cmd] [--dry-run] [--auto-commit] [--max-tasks N]")
+		os.Exit(1)
+	}
+}
+
+var provider llm.Provider
+switch providerName {
+case "anthropic":
+	apiKey := os.Getenv("ANTHROPIC_API_KEY")
+	if apiKey == "" {
+		fmt.Fprintln(os.Stderr, "Error: ANTHROPIC_API_KEY environment variable not set")
+		os.Exit(1)
+	}
+	model := os.Getenv("ANTHROPIC_MODEL")
+	if model == "" {
+		model = "claude-haiku-4-5-20251001"
+	}
+	provider = llm.NewAnthropicProvider(apiKey, model)
+	fmt.Fprintf(os.Stderr, "[ralph] Using Anthropic API (model: %s)\n", model)
+case "":
+	// Legacy Ollama path
+	mustOllama()
+default:
+	fmt.Fprintf(os.Stderr, "Unknown provider: %s\n", providerName)
+	os.Exit(1)
+}
+
+cfg := ralph.RalphConfig{
+	TaskSource: ralph.SourceFile,
+	TaskFile:   taskFile,
+	LogFile:    logFile,
+	Validate:   validate,
+	AutoCommit: autoCommit,
+	MaxTasks:   maxTasks,
+	DryRun:     dryRun,
+	LoopConfig: agent.LoopConfig{
+		Agent:       "ralph-agent",
+		System:      "You are a senior engineer. Complete the requested task using available tools. Read files, write files, run commands. Be precise and thorough.",
+		Model:       ollama.Model,
+		MaxTurns:    15,
+		TimeoutMs:   180_000,
+		OutputDir:   "outputs/logs",
+		TokenBudget: 3000,
+		Provider:    provider,
+	},
+}
+
+fmt.Printf("[ralph] Starting Ralph Loop — tasks: %s, dry-run: %v\n", taskFile, dryRun)
+
+result, err := ralph.RunRalph(cfg, engine)
+if err != nil {
+	fmt.Fprintf(os.Stderr, "ERROR: ralph loop: %s\n", err)
+	os.Exit(1)
+}
+
+fmt.Println()
+fmt.Printf("[ralph] Complete — %d completed, %d failed, %d skipped (of %d total)\n",
+	result.Completed, result.Failed, result.Skipped, result.Total)
+
+for _, entry := range result.Entries {
+	status := "completed"
+	if entry.Status == ralph.StatusFailed {
+		status = "FAILED"
+	}
+	fmt.Printf("  [%s] task %s: %s (%d turns, %dms)\n",
+		status, entry.TaskID, entry.Description, entry.Turns, entry.DurationMs)
+}
+}
+
 func cmdSwarm() {
 fmt.Println("=== ShellForge Swarm Setup (Dagu) ===")
 fmt.Println()

From e4fc2f66ee2139a871865f8800b54646c4edd0bb Mon Sep 17 00:00:00 2001
From: Jared Pleva <jpleva91@gmail.com>
Date: Wed, 1 Apr 2026 01:04:41 +0000
Subject: [PATCH 4/4] docs: update README, architecture, and roadmap for v0.7.0

- Fix headline to reflect Anthropic API provider option
- Add --provider anthropic and --thinking-budget to CLI commands
- Add llm/ and agent/drift.go to Go project layout
- Mark Phase 11 (workspace swarm migration) as complete
- Add v0.7.0 completed items (LLM provider, prompt caching, drift detection)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md            | 10 ++++++----
 docs/architecture.md |  7 ++++++-
 docs/roadmap.md      | 20 ++++++++++++++------
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 6b64a12..4f2b154 100644
--- a/README.md
+++ b/README.md
@@ -2,14 +2,14 @@
 
 # ShellForge
 
-**Governed local AI agents — one Go binary, zero cloud.**
+**Governed AI agent runtime — one Go binary, local or cloud.**
 
 [![Go](https://img.shields.io/badge/Go-1.18+-00ADD8?style=for-the-badge&logo=go&logoColor=white)](https://go.dev)
 [![GitHub Pages](https://img.shields.io/badge/Live_Site-agentguardhq.github.io/shellforge-ff6b2b?style=for-the-badge)](https://agentguardhq.github.io/shellforge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue?style=for-the-badge)](LICENSE)
 [![AgentGuard](https://img.shields.io/badge/Governed_by-AgentGuard-green?style=for-the-badge)](https://github.com/AgentGuardHQ/agentguard)
 
-*Run autonomous AI agents on your machine with policy enforcement on every tool call. No cloud. No API keys. No data leaves your laptop.*
+*Run autonomous AI agents with policy enforcement on every tool call. Local via Ollama or cloud via Anthropic API — your choice.*
 
 [Website](https://agentguardhq.github.io/shellforge) · [Docs](docs/architecture.md) · [Roadmap](docs/roadmap.md) · [AgentGuard](https://github.com/AgentGuardHQ/agentguard)
 
@@ -112,9 +112,11 @@ shellforge status
 
 | Command | Description |
 |---------|-------------|
-| `shellforge run <driver> "prompt"` | Run a governed agent (goose, claude, copilot, codex, gemini) |
+| `shellforge agent "prompt"` | Run a governed agent (Ollama, default) |
+| `shellforge agent --provider anthropic "prompt"` | Run via Anthropic API (Haiku/Sonnet/Opus, prompt caching) |
+| `shellforge agent --thinking-budget 8000 "prompt"` | Enable extended thinking (Sonnet/Opus) |
+| `shellforge run <driver> "prompt"` | Run a governed CLI driver (goose, claude, copilot, codex, gemini) |
 | `shellforge setup` | Install Ollama, create governance config, verify stack |
-| `shellforge agent "prompt"` | Run a governed agent — every tool call checked |
 | `shellforge qa [dir]` | QA analysis — find test gaps and issues |
 | `shellforge report [repo]` | Generate a status report from git + logs |
 | `shellforge serve agents.yaml` | Daemon mode — run a 24/7 agent swarm |
diff --git a/docs/architecture.md b/docs/architecture.md
index 017ca82..44aefb5 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -123,9 +123,14 @@ cmd/shellforge/
 └── status.go       # Ecosystem health check
 
 internal/
+├── llm/            # LLM provider interface
+│   ├── provider.go # Provider interface (Chat, Name) + Message/Response types
+│   └── anthropic.go# Anthropic API adapter (stdlib HTTP, prompt caching, tool_use)
+├── agent/          # Agentic loop
+│   ├── loop.go     # runProviderLoop (Anthropic) + runOllamaLoop, drift detection wiring
+│   └── drift.go    # Drift detector — self-score every 5 calls, steer/kill on low scores
 ├── governance/     # agentguard.yaml parser + policy engine
 ├── ollama/         # Ollama HTTP client (chat, generate)
-├── agent/          # Native fallback agentic loop
 ├── tools/          # 5 tool implementations + RTK wrapper
 ├── engine/         # Pluggable engine interface (Goose, OpenClaw, OpenCode)
 ├── logger/         # Structured JSON logging
diff --git a/docs/roadmap.md b/docs/roadmap.md
index 283e75e..1bfe4d6 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -33,13 +33,22 @@
 - [x] Tested Aider (file editing only, no shell execution)
 - [x] Evaluated Goose (Block) — native Ollama, actually executes tools
 
-### v0.6.0 — Goose + Governed Shell ← CURRENT
+### v0.6.0 — Goose + Governed Shell
 - [x] Goose as local model driver (`shellforge run goose`)
 - [x] `govern-shell.sh` — shell wrapper that evaluates every command through AgentGuard
 - [x] `shellforge run goose` sets SHELL to governed wrapper automatically
 - [x] Fixed catch-all deny bug (bounded-execution policy was denying everything)
 - [x] Dagu DAG templates (sdlc-swarm, studio-swarm, workspace-swarm, multi-driver)
 
+### v0.7.0 — Anthropic API Provider ← CURRENT
+- [x] LLM provider interface (`llm.Provider`) — pluggable Ollama vs Anthropic backends
+- [x] Anthropic API adapter — stdlib HTTP, structured `tool_use` blocks, multi-turn history
+- [x] Prompt caching — `cache_control: ephemeral` on system + tools, ~90% savings on cached tokens
+- [x] Extended thinking budget (`--thinking-budget` flag)
+- [x] Model cascading via Octi Pulpo (Haiku→Sonnet→Opus by `TaskComplexity` score)
+- [x] Drift detection — self-score every 5 tool calls, steer below 7, kill below 5 twice
+- [x] RTK token compression wired into `runShellWithRTK()` (70-90% savings on shell output)
+
 ---
 
 ## In Progress
@@ -110,11 +119,10 @@ ShellForge orchestrates, Octi Pulpo coordinates, AgentGuard governs. This phase
 - [ ] Move `internal/` types to `pkg/` for external import
 - [ ] Cloud telemetry opt-in (AgentGuard Cloud)
 
-### Phase 11 — Replace Workspace Bash Swarm
-- [ ] Dagu replaces `server/deploy.sh` + cron + queue.txt
-- [ ] Multi-driver DAGs: Claude Code + Copilot + Codex on Linux box
-- [ ] Same governance policy across all drivers
-- [ ] ShellForge as the runtime for agentguard-workspace swarm
+### Phase 11 — Replace Workspace Bash Swarm ✅ DONE
+- [x] Migrated to API-driven dispatch: Octi Pulpo → ShellForge → Anthropic API
+- [x] GH Actions Copilot Agent workflow (`dispatch-agent.yml`) for free-tier automation
+- [x] ShellForge is now the execution harness for the agentguard-workspace swarm
 
 ---