From 2bdefef60b0f0e9fd4db2f154d0610432c746363 Mon Sep 17 00:00:00 2001 From: Jared Pleva Date: Wed, 1 Apr 2026 00:47:29 +0000 Subject: [PATCH 1/4] =?UTF-8?q?feat(ralph):=20add=20Ralph=20Loop=20?= =?UTF-8?q?=E2=80=94=20stateless-iterative=20task=20execution=20engine?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements PICK -> IMPLEMENT -> VALIDATE -> COMMIT -> RESET cycle. Each task gets a fresh RunLoop call with isolated context, preventing cross-task context pollution. Includes task file parser, priority-based picker, JSONL append-only log, dry-run mode, and validation commands. 21 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/ralph/log.go | 91 ++++++++++++++++ internal/ralph/log_test.go | 97 +++++++++++++++++ internal/ralph/picker.go | 43 ++++++++ internal/ralph/picker_test.go | 84 +++++++++++++++ internal/ralph/ralph.go | 190 ++++++++++++++++++++++++++++++++++ internal/ralph/ralph_test.go | 174 +++++++++++++++++++++++++++++++ internal/ralph/task.go | 78 ++++++++++++++ internal/ralph/task_test.go | 114 ++++++++++++++++++++ 8 files changed, 871 insertions(+) create mode 100644 internal/ralph/log.go create mode 100644 internal/ralph/log_test.go create mode 100644 internal/ralph/picker.go create mode 100644 internal/ralph/picker_test.go create mode 100644 internal/ralph/ralph.go create mode 100644 internal/ralph/ralph_test.go create mode 100644 internal/ralph/task.go create mode 100644 internal/ralph/task_test.go diff --git a/internal/ralph/log.go b/internal/ralph/log.go new file mode 100644 index 0000000..c9eca3a --- /dev/null +++ b/internal/ralph/log.go @@ -0,0 +1,91 @@ +package ralph + +import ( + "encoding/json" + "fmt" + "os" + "time" +) + +// TaskLogEntry records the result of a single task execution. +type TaskLogEntry struct { + TaskID string `json:"task_id"` + Description string `json:"description"` + Status TaskStatus `json:"status"` + Output string `json:"output,omitempty"` + Error string `json:"error,omitempty"` + Turns int `json:"turns"` + ToolCalls int `json:"tool_calls"` + DurationMs int64 `json:"duration_ms"` + Timestamp string `json:"timestamp"` +} + +// TaskLog is an append-only JSONL log of task results. +type TaskLog struct { + Path string +} + +// NewTaskLog creates a TaskLog that writes to the given file path. +func NewTaskLog(path string) *TaskLog { + return &TaskLog{Path: path} +} + +// Append writes a single log entry as a JSONL line. +func (tl *TaskLog) Append(entry TaskLogEntry) error { + if entry.Timestamp == "" { + entry.Timestamp = time.Now().UTC().Format(time.RFC3339) + } + data, err := json.Marshal(entry) + if err != nil { + return fmt.Errorf("marshal log entry: %w", err) + } + f, err := os.OpenFile(tl.Path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("open log file: %w", err) + } + defer f.Close() + _, err = f.Write(append(data, '\n')) + if err != nil { + return fmt.Errorf("write log entry: %w", err) + } + return nil +} + +// Read returns all log entries from the file. +func (tl *TaskLog) Read() ([]TaskLogEntry, error) { + data, err := os.ReadFile(tl.Path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read log file: %w", err) + } + + var entries []TaskLogEntry + // Split by newlines and parse each line + start := 0 + for i := 0; i < len(data); i++ { + if data[i] == '\n' { + line := data[start:i] + start = i + 1 + if len(line) == 0 { + continue + } + var entry TaskLogEntry + if err := json.Unmarshal(line, &entry); err != nil { + continue // skip malformed lines + } + entries = append(entries, entry) + } + } + // Handle last line without trailing newline + if start < len(data) { + line := data[start:] + var entry TaskLogEntry + if err := json.Unmarshal(line, &entry); err == nil { + entries = append(entries, entry) + } + } + + return entries, nil +} diff --git a/internal/ralph/log_test.go b/internal/ralph/log_test.go new file mode 100644 index 0000000..928dad0 --- /dev/null +++ b/internal/ralph/log_test.go @@ -0,0 +1,97 @@ +package ralph + +import ( + "path/filepath" + "testing" +) + +func TestTaskLog_AppendAndRead(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "ralph.jsonl") + + log := NewTaskLog(path) + + // Append two entries + err := log.Append(TaskLogEntry{ + TaskID: "1", + Description: "First task", + Status: StatusCompleted, + Output: "done", + Turns: 3, + ToolCalls: 5, + DurationMs: 1200, + Timestamp: "2026-03-31T10:00:00Z", + }) + if err != nil { + t.Fatalf("Append 1: %v", err) + } + + err = log.Append(TaskLogEntry{ + TaskID: "2", + Description: "Second task", + Status: StatusFailed, + Error: "timeout", + Turns: 10, + ToolCalls: 8, + DurationMs: 5000, + Timestamp: "2026-03-31T10:01:00Z", + }) + if err != nil { + t.Fatalf("Append 2: %v", err) + } + + // Read back + entries, err := log.Read() + if err != nil { + t.Fatalf("Read: %v", err) + } + if len(entries) != 2 { + t.Fatalf("expected 2 entries, got %d", len(entries)) + } + + if entries[0].TaskID != "1" || entries[0].Status != StatusCompleted { + t.Errorf("entry 0: %+v", entries[0]) + } + if entries[1].TaskID != "2" || entries[1].Status != StatusFailed { + t.Errorf("entry 1: %+v", entries[1]) + } + if entries[1].Error != "timeout" { + t.Errorf("entry 1 error: expected 'timeout', got %q", entries[1].Error) + } +} + +func TestTaskLog_Read_NonExistent(t *testing.T) { + log := NewTaskLog("/nonexistent/ralph.jsonl") + entries, err := log.Read() + if err != nil { + t.Fatalf("Read non-existent: %v", err) + } + if len(entries) != 0 { + t.Errorf("expected 0 entries, got %d", len(entries)) + } +} + +func TestTaskLog_Append_SetsTimestamp(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "ralph.jsonl") + + log := NewTaskLog(path) + + // Append without explicit timestamp + err := log.Append(TaskLogEntry{ + TaskID: "1", + Description: "Auto-timestamp", + Status: StatusCompleted, + }) + if err != nil { + t.Fatalf("Append: %v", err) + } + + entries, _ := log.Read() + if len(entries) != 1 { + t.Fatalf("expected 1 entry, got %d", len(entries)) + } + if entries[0].Timestamp == "" { + t.Error("expected auto-generated timestamp") + } +} diff --git a/internal/ralph/picker.go b/internal/ralph/picker.go new file mode 100644 index 0000000..b0ffb49 --- /dev/null +++ b/internal/ralph/picker.go @@ -0,0 +1,43 @@ +package ralph + +// Picker selects the next task to execute from a task source. +type Picker interface { + // Pick returns the next task to execute, or nil if none are available. + Pick() (*Task, error) + // Update persists a status change for a task. + Update(task Task) error +} + +// FilePicker reads tasks from a JSON file on disk. +type FilePicker struct { + Path string +} + +// NewFilePicker creates a Picker backed by a task file. +func NewFilePicker(path string) *FilePicker { + return &FilePicker{Path: path} +} + +// Pick reads the task file and returns the highest-priority pending task. +func (fp *FilePicker) Pick() (*Task, error) { + tasks, err := ParseTaskFile(fp.Path) + if err != nil { + return nil, err + } + return NextPending(tasks), nil +} + +// Update reads the task file, updates the matching task, and writes it back. +func (fp *FilePicker) Update(task Task) error { + tasks, err := ParseTaskFile(fp.Path) + if err != nil { + return err + } + for i, t := range tasks { + if t.ID == task.ID { + tasks[i] = task + return WriteTaskFile(fp.Path, tasks) + } + } + return nil +} diff --git a/internal/ralph/picker_test.go b/internal/ralph/picker_test.go new file mode 100644 index 0000000..f91332f --- /dev/null +++ b/internal/ralph/picker_test.go @@ -0,0 +1,84 @@ +package ralph + +import ( + "os" + "path/filepath" + "testing" +) + +func TestFilePicker_Pick(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tasks.json") + + content := `[ + {"id": "1", "description": "First task", "status": "pending", "priority": 2}, + {"id": "2", "description": "Second task", "status": "pending", "priority": 1}, + {"id": "3", "description": "Done task", "status": "completed", "priority": 0} +]` + os.WriteFile(path, []byte(content), 0o644) + + picker := NewFilePicker(path) + task, err := picker.Pick() + if err != nil { + t.Fatalf("Pick: %v", err) + } + if task == nil { + t.Fatal("expected a task") + } + if task.ID != "2" { + t.Errorf("expected task 2, got %s", task.ID) + } +} + +func TestFilePicker_Pick_NoPending(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tasks.json") + + content := `[{"id": "1", "description": "Done", "status": "completed", "priority": 1}]` + os.WriteFile(path, []byte(content), 0o644) + + picker := NewFilePicker(path) + task, err := picker.Pick() + if err != nil { + t.Fatalf("Pick: %v", err) + } + if task != nil { + t.Errorf("expected nil, got task %s", task.ID) + } +} + +func TestFilePicker_Update(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tasks.json") + + content := `[ + {"id": "1", "description": "Task one", "status": "pending", "priority": 1}, + {"id": "2", "description": "Task two", "status": "pending", "priority": 2} +]` + os.WriteFile(path, []byte(content), 0o644) + + picker := NewFilePicker(path) + + // Update task 1 to completed + err := picker.Update(Task{ + ID: "1", + Description: "Task one", + Status: StatusCompleted, + Priority: 1, + }) + if err != nil { + t.Fatalf("Update: %v", err) + } + + // Verify the update persisted + tasks, err := ParseTaskFile(path) + if err != nil { + t.Fatalf("ParseTaskFile after update: %v", err) + } + if tasks[0].Status != StatusCompleted { + t.Errorf("expected completed, got %s", tasks[0].Status) + } + if tasks[1].Status != StatusPending { + t.Errorf("task 2 should still be pending, got %s", tasks[1].Status) + } +} diff --git a/internal/ralph/ralph.go b/internal/ralph/ralph.go new file mode 100644 index 0000000..37c5017 --- /dev/null +++ b/internal/ralph/ralph.go @@ -0,0 +1,190 @@ +package ralph + +import ( + "fmt" + "os/exec" + "strings" + "time" + + "github.com/AgentGuardHQ/shellforge/internal/agent" + "github.com/AgentGuardHQ/shellforge/internal/governance" +) + +// TaskSourceType defines how tasks are sourced. +type TaskSourceType string + +const ( + SourceFile TaskSourceType = "file" + SourceMCP TaskSourceType = "mcp" +) + +// RalphConfig configures a Ralph Loop execution. +type RalphConfig struct { + TaskSource TaskSourceType + TaskFile string + MCPEndpoint string + LogFile string + Validate []string // shell commands to run for validation + AutoCommit bool + MaxTasks int // 0 = unlimited + LoopConfig agent.LoopConfig + DryRun bool +} + +// RalphResult summarizes the outcome of a Ralph Loop run. +type RalphResult struct { + Completed int + Failed int + Skipped int + Total int + Entries []TaskLogEntry +} + +// RunRalph executes the Ralph Loop: PICK -> IMPLEMENT -> VALIDATE -> COMMIT -> RESET. +// Each iteration picks the next pending task, runs agent.RunLoop with a fresh context, +// validates the result, optionally commits, then resets for the next task. +func RunRalph(cfg RalphConfig, engine *governance.Engine) (*RalphResult, error) { + picker, err := makePicker(cfg) + if err != nil { + return nil, fmt.Errorf("create picker: %w", err) + } + + logFile := cfg.LogFile + if logFile == "" { + logFile = "ralph-log.jsonl" + } + taskLog := NewTaskLog(logFile) + + result := &RalphResult{} + processed := 0 + + for { + // Check task limit + if cfg.MaxTasks > 0 && processed >= cfg.MaxTasks { + break + } + + // ── PICK ── + task, err := picker.Pick() + if err != nil { + return result, fmt.Errorf("pick task: %w", err) + } + if task == nil { + break // no more pending tasks + } + result.Total++ + + // Mark as running + task.Status = StatusRunning + picker.Update(*task) + + if cfg.DryRun { + fmt.Printf("[ralph] DRY RUN — would implement task %s: %s\n", task.ID, task.Description) + task.Status = StatusCompleted // mark completed so we don't pick it again + picker.Update(*task) + result.Skipped++ + processed++ + continue + } + + // ── IMPLEMENT ── + loopCfg := cfg.LoopConfig + loopCfg.UserPrompt = task.Description + loopCfg.Agent = fmt.Sprintf("ralph-task-%s", task.ID) + + start := time.Now() + runResult, runErr := agent.RunLoop(loopCfg, engine) + + entry := TaskLogEntry{ + TaskID: task.ID, + Description: task.Description, + Timestamp: time.Now().UTC().Format(time.RFC3339), + } + + if runErr != nil { + task.Status = StatusFailed + task.Error = runErr.Error() + entry.Status = StatusFailed + entry.Error = runErr.Error() + entry.DurationMs = time.Since(start).Milliseconds() + picker.Update(*task) + taskLog.Append(entry) + result.Failed++ + result.Entries = append(result.Entries, entry) + processed++ + continue + } + + entry.Output = runResult.Output + entry.Turns = runResult.Turns + entry.ToolCalls = runResult.ToolCalls + entry.DurationMs = runResult.DurationMs + + // ── VALIDATE ── + validated := true + if len(cfg.Validate) > 0 && runResult.Success { + for _, cmdStr := range cfg.Validate { + parts := strings.Fields(cmdStr) + if len(parts) == 0 { + continue + } + cmd := exec.Command(parts[0], parts[1:]...) + out, verr := cmd.CombinedOutput() + if verr != nil { + validated = false + task.Error = fmt.Sprintf("validation failed (%s): %s", cmdStr, string(out)) + break + } + } + } + + if !runResult.Success || !validated { + task.Status = StatusFailed + if task.Error == "" { + task.Error = fmt.Sprintf("agent exit: %s", runResult.ExitReason) + } + entry.Status = StatusFailed + entry.Error = task.Error + picker.Update(*task) + taskLog.Append(entry) + result.Failed++ + result.Entries = append(result.Entries, entry) + processed++ + continue + } + + // ── COMMIT ── + if cfg.AutoCommit { + commitMsg := fmt.Sprintf("ralph: task %s — %s", task.ID, task.Description) + addCmd := exec.Command("git", "add", "-A") + addCmd.Run() + commitCmd := exec.Command("git", "commit", "-m", commitMsg, "--allow-empty") + commitCmd.Run() + } + + // ── RESET ── (implicit: next iteration creates a fresh RunLoop) + task.Status = StatusCompleted + entry.Status = StatusCompleted + picker.Update(*task) + taskLog.Append(entry) + result.Completed++ + result.Entries = append(result.Entries, entry) + processed++ + } + + return result, nil +} + +func makePicker(cfg RalphConfig) (Picker, error) { + switch cfg.TaskSource { + case SourceFile, "": + if cfg.TaskFile == "" { + return nil, fmt.Errorf("task file path required for file source") + } + return NewFilePicker(cfg.TaskFile), nil + case SourceMCP: + return nil, fmt.Errorf("MCP task source not yet implemented") + default: + return nil, fmt.Errorf("unknown task source: %s", cfg.TaskSource) + } +} diff --git a/internal/ralph/ralph_test.go b/internal/ralph/ralph_test.go new file mode 100644 index 0000000..0df0f48 --- /dev/null +++ b/internal/ralph/ralph_test.go @@ -0,0 +1,174 @@ +package ralph + +import ( + "os" + "path/filepath" + "testing" + + "github.com/AgentGuardHQ/shellforge/internal/agent" +) + +func TestRalphConfig_Defaults(t *testing.T) { + cfg := RalphConfig{ + TaskSource: SourceFile, + TaskFile: "tasks.json", + } + if cfg.TaskSource != SourceFile { + t.Errorf("expected file source, got %s", cfg.TaskSource) + } +} + +func TestMakePicker_File(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tasks.json") + os.WriteFile(path, []byte("[]"), 0o644) + + cfg := RalphConfig{TaskSource: SourceFile, TaskFile: path} + picker, err := makePicker(cfg) + if err != nil { + t.Fatalf("makePicker: %v", err) + } + if picker == nil { + t.Fatal("expected non-nil picker") + } +} + +func TestMakePicker_NoFile(t *testing.T) { + cfg := RalphConfig{TaskSource: SourceFile, TaskFile: ""} + _, err := makePicker(cfg) + if err == nil { + t.Fatal("expected error for empty task file") + } +} + +func TestMakePicker_MCP(t *testing.T) { + cfg := RalphConfig{TaskSource: SourceMCP} + _, err := makePicker(cfg) + if err == nil { + t.Fatal("expected error for unimplemented MCP source") + } +} + +func TestMakePicker_Unknown(t *testing.T) { + cfg := RalphConfig{TaskSource: "unknown"} + _, err := makePicker(cfg) + if err == nil { + t.Fatal("expected error for unknown source type") + } +} + +func TestRunRalph_DryRun(t *testing.T) { + dir := t.TempDir() + taskPath := filepath.Join(dir, "tasks.json") + logPath := filepath.Join(dir, "ralph.jsonl") + + tasks := `[ + {"id": "1", "description": "Task one", "status": "pending", "priority": 1}, + {"id": "2", "description": "Task two", "status": "pending", "priority": 2} +]` + os.WriteFile(taskPath, []byte(tasks), 0o644) + + cfg := RalphConfig{ + TaskSource: SourceFile, + TaskFile: taskPath, + LogFile: logPath, + DryRun: true, + MaxTasks: 10, + LoopConfig: agent.LoopConfig{ + Agent: "test-agent", + System: "test", + MaxTurns: 5, + TimeoutMs: 10000, + OutputDir: dir, + TokenBudget: 1000, + }, + } + + // In dry-run, we don't need a real governance engine + result, err := RunRalph(cfg, nil) + if err != nil { + t.Fatalf("RunRalph dry-run: %v", err) + } + if result.Skipped != 2 { + t.Errorf("expected 2 skipped, got %d", result.Skipped) + } + if result.Total != 2 { + t.Errorf("expected 2 total, got %d", result.Total) + } + + // Verify tasks are marked completed (dry-run advances past them) + readTasks, _ := ParseTaskFile(taskPath) + for _, task := range readTasks { + if task.Status != StatusCompleted { + t.Errorf("task %s should be completed after dry-run, got %s", task.ID, task.Status) + } + } +} + +func TestRunRalph_MaxTasks(t *testing.T) { + dir := t.TempDir() + taskPath := filepath.Join(dir, "tasks.json") + logPath := filepath.Join(dir, "ralph.jsonl") + + tasks := `[ + {"id": "1", "description": "Task one", "status": "pending", "priority": 1}, + {"id": "2", "description": "Task two", "status": "pending", "priority": 2}, + {"id": "3", "description": "Task three", "status": "pending", "priority": 3} +]` + os.WriteFile(taskPath, []byte(tasks), 0o644) + + cfg := RalphConfig{ + TaskSource: SourceFile, + TaskFile: taskPath, + LogFile: logPath, + DryRun: true, + MaxTasks: 2, // only process 2 + LoopConfig: agent.LoopConfig{ + Agent: "test-agent", + System: "test", + MaxTurns: 5, + TimeoutMs: 10000, + OutputDir: dir, + TokenBudget: 1000, + }, + } + + result, err := RunRalph(cfg, nil) + if err != nil { + t.Fatalf("RunRalph max-tasks: %v", err) + } + if result.Total != 2 { + t.Errorf("expected 2 total (max limit), got %d", result.Total) + } +} + +func TestRunRalph_NoPendingTasks(t *testing.T) { + dir := t.TempDir() + taskPath := filepath.Join(dir, "tasks.json") + logPath := filepath.Join(dir, "ralph.jsonl") + + tasks := `[{"id": "1", "description": "Done", "status": "completed", "priority": 1}]` + os.WriteFile(taskPath, []byte(tasks), 0o644) + + cfg := RalphConfig{ + TaskSource: SourceFile, + TaskFile: taskPath, + LogFile: logPath, + LoopConfig: agent.LoopConfig{ + Agent: "test-agent", + System: "test", + MaxTurns: 5, + TimeoutMs: 10000, + OutputDir: dir, + TokenBudget: 1000, + }, + } + + result, err := RunRalph(cfg, nil) + if err != nil { + t.Fatalf("RunRalph no-pending: %v", err) + } + if result.Total != 0 { + t.Errorf("expected 0 total, got %d", result.Total) + } +} diff --git a/internal/ralph/task.go b/internal/ralph/task.go new file mode 100644 index 0000000..c907590 --- /dev/null +++ b/internal/ralph/task.go @@ -0,0 +1,78 @@ +// Package ralph implements the Ralph Loop — a stateless-iterative +// execution pattern for autonomous agent task processing. Each cycle: +// PICK → IMPLEMENT → VALIDATE → COMMIT → RESET. +// +// The loop is stateless across iterations: each task gets a fresh +// RunLoop call with no prior message history, preventing context +// pollution between tasks. +package ralph + +import ( + "encoding/json" + "fmt" + "os" + "sort" +) + +// TaskStatus represents the lifecycle state of a task. +type TaskStatus string + +const ( + StatusPending TaskStatus = "pending" + StatusRunning TaskStatus = "running" + StatusCompleted TaskStatus = "completed" + StatusFailed TaskStatus = "failed" +) + +// Task is a single unit of work for the Ralph Loop. +type Task struct { + ID string `json:"id"` + Description string `json:"description"` + Status TaskStatus `json:"status"` + Priority int `json:"priority"` + Error string `json:"error,omitempty"` +} + +// ParseTaskFile reads a JSON task file and returns the task list. +func ParseTaskFile(path string) ([]Task, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read task file: %w", err) + } + var tasks []Task + if err := json.Unmarshal(data, &tasks); err != nil { + return nil, fmt.Errorf("parse task file: %w", err) + } + return tasks, nil +} + +// WriteTaskFile writes the task list back to disk as JSON. +func WriteTaskFile(path string, tasks []Task) error { + data, err := json.MarshalIndent(tasks, "", " ") + if err != nil { + return fmt.Errorf("marshal tasks: %w", err) + } + if err := os.WriteFile(path, data, 0o644); err != nil { + return fmt.Errorf("write task file: %w", err) + } + return nil +} + +// NextPending returns the highest-priority pending task (lowest priority number). +// Returns nil if no pending tasks remain. +func NextPending(tasks []Task) *Task { + var pending []Task + for _, t := range tasks { + if t.Status == StatusPending { + pending = append(pending, t) + } + } + if len(pending) == 0 { + return nil + } + sort.Slice(pending, func(i, j int) bool { + return pending[i].Priority < pending[j].Priority + }) + result := pending[0] + return &result +} diff --git a/internal/ralph/task_test.go b/internal/ralph/task_test.go new file mode 100644 index 0000000..2886760 --- /dev/null +++ b/internal/ralph/task_test.go @@ -0,0 +1,114 @@ +package ralph + +import ( + "os" + "path/filepath" + "testing" +) + +func TestParseTaskFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tasks.json") + + content := `[ + {"id": "1", "description": "Add input validation", "status": "pending", "priority": 1}, + {"id": "2", "description": "Write tests", "status": "pending", "priority": 2}, + {"id": "3", "description": "Fix bug", "status": "completed", "priority": 0} +]` + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("write test file: %v", err) + } + + tasks, err := ParseTaskFile(path) + if err != nil { + t.Fatalf("ParseTaskFile: %v", err) + } + if len(tasks) != 3 { + t.Fatalf("expected 3 tasks, got %d", len(tasks)) + } + + if tasks[0].ID != "1" || tasks[0].Status != StatusPending { + t.Errorf("task 0: got id=%s status=%s", tasks[0].ID, tasks[0].Status) + } + if tasks[2].Status != StatusCompleted { + t.Errorf("task 2: expected completed, got %s", tasks[2].Status) + } +} + +func TestParseTaskFile_NotFound(t *testing.T) { + _, err := ParseTaskFile("/nonexistent/tasks.json") + if err == nil { + t.Fatal("expected error for missing file") + } +} + +func TestParseTaskFile_InvalidJSON(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tasks.json") + os.WriteFile(path, []byte("not json"), 0o644) + + _, err := ParseTaskFile(path) + if err == nil { + t.Fatal("expected error for invalid JSON") + } +} + +func TestWriteTaskFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tasks.json") + + tasks := []Task{ + {ID: "1", Description: "Test task", Status: StatusPending, Priority: 1}, + } + if err := WriteTaskFile(path, tasks); err != nil { + t.Fatalf("WriteTaskFile: %v", err) + } + + // Read back + readTasks, err := ParseTaskFile(path) + if err != nil { + t.Fatalf("ParseTaskFile after write: %v", err) + } + if len(readTasks) != 1 { + t.Fatalf("expected 1 task, got %d", len(readTasks)) + } + if readTasks[0].ID != "1" || readTasks[0].Description != "Test task" { + t.Errorf("round-trip mismatch: %+v", readTasks[0]) + } +} + +func TestNextPending(t *testing.T) { + tasks := []Task{ + {ID: "1", Description: "Low priority", Status: StatusPending, Priority: 3}, + {ID: "2", Description: "High priority", Status: StatusPending, Priority: 1}, + {ID: "3", Description: "Already done", Status: StatusCompleted, Priority: 0}, + {ID: "4", Description: "Medium priority", Status: StatusPending, Priority: 2}, + } + + next := NextPending(tasks) + if next == nil { + t.Fatal("expected a pending task") + } + if next.ID != "2" { + t.Errorf("expected task 2 (highest priority), got task %s", next.ID) + } +} + +func TestNextPending_NoPending(t *testing.T) { + tasks := []Task{ + {ID: "1", Status: StatusCompleted, Priority: 1}, + {ID: "2", Status: StatusFailed, Priority: 2}, + } + + next := NextPending(tasks) + if next != nil { + t.Errorf("expected nil, got task %s", next.ID) + } +} + +func TestNextPending_EmptyList(t *testing.T) { + next := NextPending(nil) + if next != nil { + t.Error("expected nil for empty list") + } +} From 220ac3670feb78c58e82cef2778636c5ac0ba505 Mon Sep 17 00:00:00 2001 From: Jared Pleva Date: Wed, 1 Apr 2026 00:47:36 +0000 Subject: [PATCH 2/4] feat(orchestrator): add sub-agent orchestration with concurrency control Adds SubTask/SubResult/TaskHandle types, Orchestrator with SpawnSync (blocking) and SpawnAsync (goroutine+channel) execution, semaphore-based concurrency limiting, and CompressResult for output truncation. Each sub-agent gets its own RunLoop call with isolated context. 12 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/orchestrator/compress.go | 54 +++++++++ internal/orchestrator/compress_test.go | 83 +++++++++++++ internal/orchestrator/handle.go | 37 ++++++ internal/orchestrator/handle_test.go | 53 +++++++++ internal/orchestrator/orchestrator.go | 129 +++++++++++++++++++++ internal/orchestrator/orchestrator_test.go | 34 ++++++ 6 files changed, 390 insertions(+) create mode 100644 internal/orchestrator/compress.go create mode 100644 internal/orchestrator/compress_test.go create mode 100644 internal/orchestrator/handle.go create mode 100644 internal/orchestrator/handle_test.go create mode 100644 internal/orchestrator/orchestrator.go create mode 100644 internal/orchestrator/orchestrator_test.go diff --git a/internal/orchestrator/compress.go b/internal/orchestrator/compress.go new file mode 100644 index 0000000..4efe9a2 --- /dev/null +++ b/internal/orchestrator/compress.go @@ -0,0 +1,54 @@ +package orchestrator + +// tokenThreshold is the maximum output size (in estimated tokens) that +// passes through without compression. Outputs below this are returned as-is. +const tokenThreshold = 750 + +// CompressResult compresses a sub-agent output if it exceeds the token threshold. +// Strategy: +// 1. If output < 750 tokens (estimated), return as-is +// 2. Otherwise truncate to the threshold with a marker +// +// A future version will use LLM summarization when a Provider is available. +func CompressResult(output string) string { + estimated := estimateTokens(output) + if estimated <= tokenThreshold { + return output + } + + // Truncate to approximately tokenThreshold tokens (4 chars per token estimate) + maxChars := tokenThreshold * 4 + if maxChars >= len(output) { + return output + } + return output[:maxChars] + "\n\n[... output truncated — " + itoa(estimated-tokenThreshold) + " tokens omitted]" +} + +// estimateTokens provides a rough token count (1 token ~ 4 chars). +func estimateTokens(s string) int { + return len(s) / 4 +} + +// itoa converts an int to a string without importing strconv (Go 1.18 compat). +func itoa(n int) string { + if n == 0 { + return "0" + } + neg := false + if n < 0 { + neg = true + n = -n + } + var buf [20]byte + i := len(buf) + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + if neg { + i-- + buf[i] = '-' + } + return string(buf[i:]) +} diff --git a/internal/orchestrator/compress_test.go b/internal/orchestrator/compress_test.go new file mode 100644 index 0000000..f68d15a --- /dev/null +++ b/internal/orchestrator/compress_test.go @@ -0,0 +1,83 @@ +package orchestrator + +import ( + "strings" + "testing" +) + +func TestCompressResult_ShortOutput(t *testing.T) { + short := "This is a short output." + result := CompressResult(short) + if result != short { + t.Errorf("short output should pass through unchanged, got %q", result) + } +} + +func TestCompressResult_ExactThreshold(t *testing.T) { + // 750 tokens * 4 chars = 3000 chars + exact := strings.Repeat("a", 3000) + result := CompressResult(exact) + if result != exact { + t.Error("output at exact threshold should pass through unchanged") + } +} + +func TestCompressResult_OverThreshold(t *testing.T) { + // 4000 tokens * 4 chars = 16000 chars + long := strings.Repeat("x", 16000) + result := CompressResult(long) + + if len(result) >= len(long) { + t.Errorf("compressed result should be shorter than original (%d >= %d)", len(result), len(long)) + } + if !strings.Contains(result, "truncated") { + t.Error("compressed result should contain truncation marker") + } + if !strings.Contains(result, "omitted") { + t.Error("compressed result should indicate omitted tokens") + } +} + +func TestCompressResult_Empty(t *testing.T) { + result := CompressResult("") + if result != "" { + t.Errorf("empty input should return empty, got %q", result) + } +} + +func TestEstimateTokens(t *testing.T) { + tests := []struct { + input string + expected int + }{ + {"", 0}, + {"abcd", 1}, + {"12345678", 2}, + {strings.Repeat("a", 100), 25}, + } + for _, tt := range tests { + got := estimateTokens(tt.input) + if got != tt.expected { + t.Errorf("estimateTokens(%d chars): expected %d, got %d", len(tt.input), tt.expected, got) + } + } +} + +func TestItoa(t *testing.T) { + tests := []struct { + input int + expected string + }{ + {0, "0"}, + {1, "1"}, + {42, "42"}, + {-5, "-5"}, + {1000, "1000"}, + } + for _, tt := range tests { + got := itoa(tt.input) + if got != tt.expected { + t.Errorf("itoa(%d): expected %q, got %q", tt.input, tt.expected, got) + } + } +} diff --git a/internal/orchestrator/handle.go b/internal/orchestrator/handle.go new file mode 100644 index 0000000..4f0f463 --- /dev/null +++ b/internal/orchestrator/handle.go @@ -0,0 +1,37 @@ +package orchestrator + +// SubTask describes a unit of work to be executed by a sub-agent. +// Each sub-agent gets its own RunLoop call with isolated context. +type SubTask struct { + ID string + Description string + System string // system prompt for the sub-agent + Model string + MaxTurns int + TimeoutMs int + TokenBudget int +} + +// SubResult captures the outcome of a sub-agent execution. +type SubResult struct { + TaskID string + Success bool + Output string + Turns int + ToolCalls int + DurationMs int64 + Error string +} + +// TaskHandle is a reference to an in-flight async sub-agent task. +// Use Collect() to block until the result is available. +type TaskHandle struct { + TaskID string + done chan *asyncResult +} + +// asyncResult wraps a SubResult with an optional error from the agent call. +type asyncResult struct { + result *SubResult + err error +} diff --git a/internal/orchestrator/handle_test.go b/internal/orchestrator/handle_test.go new file mode 100644 index 0000000..6d54b68 --- /dev/null +++ b/internal/orchestrator/handle_test.go @@ -0,0 +1,53 @@ +package orchestrator + +import ( + "testing" +) + +func TestSubTask_Fields(t *testing.T) { + task := SubTask{ + ID: "sub-1", + Description: "Analyze code quality", + System: "You are a QA agent.", + Model: "test-model", + MaxTurns: 5, + TimeoutMs: 30000, + TokenBudget: 2000, + } + + if task.ID != "sub-1" { + t.Errorf("expected id sub-1, got %s", task.ID) + } + if task.MaxTurns != 5 { + t.Errorf("expected 5 max turns, got %d", task.MaxTurns) + } +} + +func TestSubResult_Fields(t *testing.T) { + result := SubResult{ + TaskID: "sub-1", + Success: true, + Output: "All tests pass", + Turns: 3, + ToolCalls: 5, + DurationMs: 1500, + } + + if !result.Success { + t.Error("expected success") + } + if result.TaskID != "sub-1" { + t.Errorf("expected task id sub-1, got %s", result.TaskID) + } +} + +func TestTaskHandle_Fields(t *testing.T) { + handle := TaskHandle{ + TaskID: "sub-1", + done: make(chan *asyncResult, 1), + } + + if handle.TaskID != "sub-1" { + t.Errorf("expected task id sub-1, got %s", handle.TaskID) + } +} diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go new file mode 100644 index 0000000..7f78d82 --- /dev/null +++ b/internal/orchestrator/orchestrator.go @@ -0,0 +1,129 @@ +package orchestrator + +import ( + "fmt" + "time" + + "github.com/AgentGuardHQ/shellforge/internal/agent" + "github.com/AgentGuardHQ/shellforge/internal/governance" + "github.com/AgentGuardHQ/shellforge/internal/llm" +) + +// Orchestrator manages sub-agent execution with concurrency control. +// Each sub-agent gets its own RunLoop call with isolated context. +type Orchestrator struct { + provider llm.Provider + governance *governance.Engine + maxParallel int + slots chan struct{} +} + +// NewOrchestrator creates an Orchestrator with the given concurrency limit. +func NewOrchestrator(provider llm.Provider, gov *governance.Engine, maxParallel int) *Orchestrator { + if maxParallel < 1 { + maxParallel = 1 + } + slots := make(chan struct{}, maxParallel) + for i := 0; i < maxParallel; i++ { + slots <- struct{}{} + } + return &Orchestrator{ + provider: provider, + governance: gov, + maxParallel: maxParallel, + slots: slots, + } +} + +// SpawnSync executes a sub-agent synchronously, blocking until completion. +// Acquires a concurrency slot before running. +func (o *Orchestrator) SpawnSync(task SubTask) (*SubResult, error) { + // Acquire slot + <-o.slots + defer func() { o.slots <- struct{}{} }() + + return o.executeTask(task) +} + +// SpawnAsync launches a sub-agent in a goroutine and returns a handle. +// The handle can be passed to Collect() to retrieve the result. +func (o *Orchestrator) SpawnAsync(task SubTask) (TaskHandle, error) { + handle := TaskHandle{ + TaskID: task.ID, + done: make(chan *asyncResult, 1), + } + + go func() { + // Acquire slot + <-o.slots + defer func() { o.slots <- struct{}{} }() + + result, err := o.executeTask(task) + handle.done <- &asyncResult{result: result, err: err} + }() + + return handle, nil +} + +// Collect blocks until the async task completes or the timeout expires. +func (o *Orchestrator) Collect(h TaskHandle, timeout time.Duration) (*SubResult, error) { + select { + case ar := <-h.done: + if ar.err != nil { + return nil, ar.err + } + return ar.result, nil + case <-time.After(timeout): + return nil, fmt.Errorf("collect timeout for task %s after %s", h.TaskID, timeout) + } +} + +// executeTask runs a single sub-agent via agent.RunLoop. +func (o *Orchestrator) executeTask(task SubTask) (*SubResult, error) { + cfg := agent.LoopConfig{ + Agent: fmt.Sprintf("sub-agent-%s", task.ID), + System: task.System, + UserPrompt: task.Description, + Model: task.Model, + MaxTurns: task.MaxTurns, + TimeoutMs: task.TimeoutMs, + OutputDir: "", + TokenBudget: task.TokenBudget, + Provider: o.provider, + } + + if cfg.System == "" { + cfg.System = "You are a sub-agent. Complete the requested task precisely." + } + if cfg.MaxTurns == 0 { + cfg.MaxTurns = 10 + } + if cfg.TimeoutMs == 0 { + cfg.TimeoutMs = 60000 + } + if cfg.TokenBudget == 0 { + cfg.TokenBudget = 3000 + } + + start := time.Now() + runResult, err := agent.RunLoop(cfg, o.governance) + if err != nil { + return &SubResult{ + TaskID: task.ID, + Success: false, + Error: err.Error(), + DurationMs: time.Since(start).Milliseconds(), + }, err + } + + output := CompressResult(runResult.Output) + + return &SubResult{ + TaskID: task.ID, + Success: runResult.Success, + Output: output, + Turns: runResult.Turns, + ToolCalls: runResult.ToolCalls, + DurationMs: runResult.DurationMs, + }, nil +} diff --git a/internal/orchestrator/orchestrator_test.go b/internal/orchestrator/orchestrator_test.go new file mode 100644 index 0000000..6113eaf --- /dev/null +++ b/internal/orchestrator/orchestrator_test.go @@ -0,0 +1,34 @@ +package orchestrator + +import ( + "testing" +) + +func TestNewOrchestrator_MinParallel(t *testing.T) { + // maxParallel < 1 should default to 1 + o := NewOrchestrator(nil, nil, 0) + if o.maxParallel != 1 { + t.Errorf("expected maxParallel=1, got %d", o.maxParallel) + } +} + +func TestNewOrchestrator_SetsFields(t *testing.T) { + o := NewOrchestrator(nil, nil, 4) + if o.maxParallel != 4 { + t.Errorf("expected maxParallel=4, got %d", o.maxParallel) + } + if cap(o.slots) != 4 { + t.Errorf("expected slots capacity=4, got %d", cap(o.slots)) + } + // All slots should be available initially + if len(o.slots) != 4 { + t.Errorf("expected 4 available slots, got %d", len(o.slots)) + } +} + +func TestNewOrchestrator_NegativeParallel(t *testing.T) { + o := NewOrchestrator(nil, nil, -5) + if o.maxParallel != 1 { + t.Errorf("expected maxParallel=1 for negative input, got %d", o.maxParallel) + } +} From 5d25816c747d174870c3a43bd0a5270785e57784 Mon Sep 17 00:00:00 2001 From: Jared Pleva Date: Wed, 1 Apr 2026 00:47:42 +0000 Subject: [PATCH 3/4] feat(cli): add 'shellforge ralph' command for task loop execution Wires the Ralph Loop into the CLI with full flag support: --tasks, --log, --provider, --validate, --dry-run, --auto-commit, --max-tasks. Supports both Anthropic API and legacy Ollama providers. Co-Authored-By: Claude Opus 4.6 (1M context) --- cmd/shellforge/main.go | 119 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/cmd/shellforge/main.go b/cmd/shellforge/main.go index b895c92..b4fa52d 100644 --- a/cmd/shellforge/main.go +++ b/cmd/shellforge/main.go @@ -21,6 +21,7 @@ import ( "github.com/AgentGuardHQ/shellforge/internal/llm" "github.com/AgentGuardHQ/shellforge/internal/logger" "github.com/AgentGuardHQ/shellforge/internal/ollama" +"github.com/AgentGuardHQ/shellforge/internal/ralph" "github.com/AgentGuardHQ/shellforge/internal/scheduler" ) @@ -84,6 +85,8 @@ os.Exit(1) } cmdAgent(strings.Join(filtered, " "), providerName, thinkingBudget) } +case "ralph": +cmdRalph() case "swarm": cmdSwarm() case "serve": @@ -120,6 +123,7 @@ Usage: shellforge scan [dir] DefenseClaw supply chain scan shellforge version Print version + shellforge ralph [flags] Run Ralph Loop (stateless-iterative task execution) shellforge serve [config] Simple daemon mode (built-in scheduler) shellforge swarm Setup Dagu orchestration (DAG workflows + web UI) @@ -724,6 +728,121 @@ printResult("prototype-agent", result) saveReport("outputs/logs", "prototype", result) } +func cmdRalph() { +engine := mustGovernance() + +// Parse flags +taskFile := "tasks.json" +logFile := "ralph-log.jsonl" +providerName := "" +dryRun := false +autoCommit := false +maxTasks := 0 +var validate []string + +remaining := os.Args[2:] +for i := 0; i < len(remaining); i++ { + switch remaining[i] { + case "--tasks": + if i+1 < len(remaining) { + taskFile = remaining[i+1] + i++ + } + case "--log": + if i+1 < len(remaining) { + logFile = remaining[i+1] + i++ + } + case "--provider": + if i+1 < len(remaining) { + providerName = remaining[i+1] + i++ + } + case "--validate": + if i+1 < len(remaining) { + validate = append(validate, remaining[i+1]) + i++ + } + case "--dry-run": + dryRun = true + case "--auto-commit": + autoCommit = true + case "--max-tasks": + if i+1 < len(remaining) { + fmt.Sscanf(remaining[i+1], "%d", &maxTasks) + i++ + } + default: + fmt.Fprintf(os.Stderr, "Unknown ralph flag: %s\n", remaining[i]) + fmt.Fprintln(os.Stderr, "Usage: shellforge ralph [--tasks file] [--log file] [--provider name] [--validate cmd] [--dry-run] [--auto-commit] [--max-tasks N]") + os.Exit(1) + } +} + +var provider llm.Provider +switch providerName { +case "anthropic": + apiKey := os.Getenv("ANTHROPIC_API_KEY") + if apiKey == "" { + fmt.Fprintln(os.Stderr, "Error: ANTHROPIC_API_KEY environment variable not set") + os.Exit(1) + } + model := os.Getenv("ANTHROPIC_MODEL") + if model == "" { + model = "claude-haiku-4-5-20251001" + } + provider = llm.NewAnthropicProvider(apiKey, model) + fmt.Fprintf(os.Stderr, "[ralph] Using Anthropic API (model: %s)\n", model) +case "": + // Legacy Ollama path + mustOllama() +default: + fmt.Fprintf(os.Stderr, "Unknown provider: %s\n", providerName) + os.Exit(1) +} + +cfg := ralph.RalphConfig{ + TaskSource: ralph.SourceFile, + TaskFile: taskFile, + LogFile: logFile, + Validate: validate, + AutoCommit: autoCommit, + MaxTasks: maxTasks, + DryRun: dryRun, + LoopConfig: agent.LoopConfig{ + Agent: "ralph-agent", + System: "You are a senior engineer. Complete the requested task using available tools. Read files, write files, run commands. Be precise and thorough.", + Model: ollama.Model, + MaxTurns: 15, + TimeoutMs: 180_000, + OutputDir: "outputs/logs", + TokenBudget: 3000, + Provider: provider, + }, +} + +fmt.Printf("[ralph] Starting Ralph Loop — tasks: %s, dry-run: %v\n", taskFile, dryRun) + +result, err := ralph.RunRalph(cfg, engine) +if err != nil { + fmt.Fprintf(os.Stderr, "ERROR: ralph loop: %s\n", err) + os.Exit(1) +} + +fmt.Println() +fmt.Printf("[ralph] Complete — %d completed, %d failed, %d skipped (of %d total)\n", + result.Completed, result.Failed, result.Skipped, result.Total) + +for _, entry := range result.Entries { + status := "completed" + if entry.Status == ralph.StatusFailed { + status = "FAILED" + } + fmt.Printf(" [%s] task %s: %s (%d turns, %dms)\n", + status, entry.TaskID, entry.Description, entry.Turns, entry.DurationMs) +} +} + func cmdSwarm() { fmt.Println("=== ShellForge Swarm Setup (Dagu) ===") fmt.Println() From e4fc2f66ee2139a871865f8800b54646c4edd0bb Mon Sep 17 00:00:00 2001 From: Jared Pleva Date: Wed, 1 Apr 2026 01:04:41 +0000 Subject: [PATCH 4/4] docs: update README, architecture, and roadmap for v0.7.0 - Fix headline to reflect Anthropic API provider option - Add --provider anthropic and --thinking-budget to CLI commands - Add llm/ and agent/drift.go to Go project layout - Mark Phase 11 (workspace swarm migration) as complete - Add v0.7.0 completed items (LLM provider, prompt caching, drift detection) Co-Authored-By: Claude Sonnet 4.6 --- README.md | 10 ++++++---- docs/architecture.md | 7 ++++++- docs/roadmap.md | 20 ++++++++++++++------ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6b64a12..4f2b154 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,14 @@ # ShellForge -**Governed local AI agents — one Go binary, zero cloud.** +**Governed AI agent runtime — one Go binary, local or cloud.** [![Go](https://img.shields.io/badge/Go-1.18+-00ADD8?style=for-the-badge&logo=go&logoColor=white)](https://go.dev) [![GitHub Pages](https://img.shields.io/badge/Live_Site-agentguardhq.github.io/shellforge-ff6b2b?style=for-the-badge)](https://agentguardhq.github.io/shellforge) [![License: MIT](https://img.shields.io/badge/License-MIT-blue?style=for-the-badge)](LICENSE) [![AgentGuard](https://img.shields.io/badge/Governed_by-AgentGuard-green?style=for-the-badge)](https://github.com/AgentGuardHQ/agentguard) -*Run autonomous AI agents on your machine with policy enforcement on every tool call. No cloud. No API keys. No data leaves your laptop.* +*Run autonomous AI agents with policy enforcement on every tool call. Local via Ollama or cloud via Anthropic API — your choice.* [Website](https://agentguardhq.github.io/shellforge) · [Docs](docs/architecture.md) · [Roadmap](docs/roadmap.md) · [AgentGuard](https://github.com/AgentGuardHQ/agentguard) @@ -112,9 +112,11 @@ shellforge status | Command | Description | |---------|-------------| -| `shellforge run "prompt"` | Run a governed agent (goose, claude, copilot, codex, gemini) | +| `shellforge agent "prompt"` | Run a governed agent (Ollama, default) | +| `shellforge agent --provider anthropic "prompt"` | Run via Anthropic API (Haiku/Sonnet/Opus, prompt caching) | +| `shellforge agent --thinking-budget 8000 "prompt"` | Enable extended thinking (Sonnet/Opus) | +| `shellforge run "prompt"` | Run a governed CLI driver (goose, claude, copilot, codex, gemini) | | `shellforge setup` | Install Ollama, create governance config, verify stack | -| `shellforge agent "prompt"` | Run a governed agent — every tool call checked | | `shellforge qa [dir]` | QA analysis — find test gaps and issues | | `shellforge report [repo]` | Generate a status report from git + logs | | `shellforge serve agents.yaml` | Daemon mode — run a 24/7 agent swarm | diff --git a/docs/architecture.md b/docs/architecture.md index 017ca82..44aefb5 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -123,9 +123,14 @@ cmd/shellforge/ └── status.go # Ecosystem health check internal/ +├── llm/ # LLM provider interface +│ ├── provider.go # Provider interface (Chat, Name) + Message/Response types +│ └── anthropic.go# Anthropic API adapter (stdlib HTTP, prompt caching, tool_use) +├── agent/ # Agentic loop +│ ├── loop.go # runProviderLoop (Anthropic) + runOllamaLoop, drift detection wiring +│ └── drift.go # Drift detector — self-score every 5 calls, steer/kill on low scores ├── governance/ # agentguard.yaml parser + policy engine ├── ollama/ # Ollama HTTP client (chat, generate) -├── agent/ # Native fallback agentic loop ├── tools/ # 5 tool implementations + RTK wrapper ├── engine/ # Pluggable engine interface (Goose, OpenClaw, OpenCode) ├── logger/ # Structured JSON logging diff --git a/docs/roadmap.md b/docs/roadmap.md index 283e75e..1bfe4d6 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -33,13 +33,22 @@ - [x] Tested Aider (file editing only, no shell execution) - [x] Evaluated Goose (Block) — native Ollama, actually executes tools -### v0.6.0 — Goose + Governed Shell ← CURRENT +### v0.6.0 — Goose + Governed Shell - [x] Goose as local model driver (`shellforge run goose`) - [x] `govern-shell.sh` — shell wrapper that evaluates every command through AgentGuard - [x] `shellforge run goose` sets SHELL to governed wrapper automatically - [x] Fixed catch-all deny bug (bounded-execution policy was denying everything) - [x] Dagu DAG templates (sdlc-swarm, studio-swarm, workspace-swarm, multi-driver) +### v0.7.0 — Anthropic API Provider ← CURRENT +- [x] LLM provider interface (`llm.Provider`) — pluggable Ollama vs Anthropic backends +- [x] Anthropic API adapter — stdlib HTTP, structured `tool_use` blocks, multi-turn history +- [x] Prompt caching — `cache_control: ephemeral` on system + tools, ~90% savings on cached tokens +- [x] Extended thinking budget (`--thinking-budget` flag) +- [x] Model cascading via Octi Pulpo (Haiku→Sonnet→Opus by `TaskComplexity` score) +- [x] Drift detection — self-score every 5 tool calls, steer below 7, kill below 5 twice +- [x] RTK token compression wired into `runShellWithRTK()` (70-90% savings on shell output) + --- ## In Progress @@ -110,11 +119,10 @@ ShellForge orchestrates, Octi Pulpo coordinates, AgentGuard governs. This phase - [ ] Move `internal/` types to `pkg/` for external import - [ ] Cloud telemetry opt-in (AgentGuard Cloud) -### Phase 11 — Replace Workspace Bash Swarm -- [ ] Dagu replaces `server/deploy.sh` + cron + queue.txt -- [ ] Multi-driver DAGs: Claude Code + Copilot + Codex on Linux box -- [ ] Same governance policy across all drivers -- [ ] ShellForge as the runtime for agentguard-workspace swarm +### Phase 11 — Replace Workspace Bash Swarm ✅ DONE +- [x] Migrated to API-driven dispatch: Octi Pulpo → ShellForge → Anthropic API +- [x] GH Actions Copilot Agent workflow (`dispatch-agent.yml`) for free-tier automation +- [x] ShellForge is now the execution harness for the agentguard-workspace swarm ---