AgentGuardHQ · jpleva91 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/README.md b/README.md
@@ -2,14 +2,14 @@
 
 # ShellForge
 
-**Governed local AI agents — one Go binary, zero cloud.**
+**Governed AI agent runtime — one Go binary, local or cloud.**
 
 [![Go](https://img.shields.io/badge/Go-1.18+-00ADD8?style=for-the-badge&logo=go&logoColor=white)](https://go.dev)
 [![GitHub Pages](https://img.shields.io/badge/Live_Site-agentguardhq.github.io/shellforge-ff6b2b?style=for-the-badge)](https://agentguardhq.github.io/shellforge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue?style=for-the-badge)](LICENSE)
 [![AgentGuard](https://img.shields.io/badge/Governed_by-AgentGuard-green?style=for-the-badge)](https://github.com/AgentGuardHQ/agentguard)
 
-*Run autonomous AI agents on your machine with policy enforcement on every tool call. No cloud. No API keys. No data leaves your laptop.*
+*Run autonomous AI agents with policy enforcement on every tool call. Local via Ollama or cloud via Anthropic API — your choice.*
 
 [Website](https://agentguardhq.github.io/shellforge) · [Docs](docs/architecture.md) · [Roadmap](docs/roadmap.md) · [AgentGuard](https://github.com/AgentGuardHQ/agentguard)
 
@@ -112,9 +112,11 @@ shellforge status
 
 | Command | Description |
 |---------|-------------|
-| `shellforge run <driver> "prompt"` | Run a governed agent (goose, claude, copilot, codex, gemini) |
+| `shellforge agent "prompt"` | Run a governed agent (Ollama, default) |
+| `shellforge agent --provider anthropic "prompt"` | Run via Anthropic API (Haiku/Sonnet/Opus, prompt caching) |
+| `shellforge agent --thinking-budget 8000 "prompt"` | Enable extended thinking (Sonnet/Opus) |
+| `shellforge run <driver> "prompt"` | Run a governed CLI driver (goose, claude, copilot, codex, gemini) |
 | `shellforge setup` | Install Ollama, create governance config, verify stack |
-| `shellforge agent "prompt"` | Run a governed agent — every tool call checked |
 | `shellforge qa [dir]` | QA analysis — find test gaps and issues |
 | `shellforge report [repo]` | Generate a status report from git + logs |
 | `shellforge serve agents.yaml` | Daemon mode — run a 24/7 agent swarm |

diff --git a/cmd/shellforge/main.go b/cmd/shellforge/main.go
@@ -21,6 +21,7 @@ import (
 "github.com/AgentGuardHQ/shellforge/internal/llm"
 "github.com/AgentGuardHQ/shellforge/internal/logger"
 "github.com/AgentGuardHQ/shellforge/internal/ollama"
+"github.com/AgentGuardHQ/shellforge/internal/ralph"
 "github.com/AgentGuardHQ/shellforge/internal/scheduler"
 )
 
@@ -84,6 +85,8 @@ os.Exit(1)
 }
 cmdAgent(strings.Join(filtered, " "), providerName, thinkingBudget)
 }
+case "ralph":
+cmdRalph()
 case "swarm":
 cmdSwarm()
 case "serve":
@@ -120,6 +123,7 @@ Usage:
   shellforge scan [dir]             DefenseClaw supply chain scan
   shellforge version                Print version
 
+  shellforge ralph [flags]          Run Ralph Loop (stateless-iterative task execution)
   shellforge serve [config]        Simple daemon mode (built-in scheduler)
   shellforge swarm                 Setup Dagu orchestration (DAG workflows + web UI)
 
@@ -724,6 +728,121 @@ printResult("prototype-agent", result)
 saveReport("outputs/logs", "prototype", result)
 }
 
+func cmdRalph() {
+engine := mustGovernance()
+
+// Parse flags
+taskFile := "tasks.json"
+logFile := "ralph-log.jsonl"
+providerName := ""
+dryRun := false
+autoCommit := false
+maxTasks := 0
+var validate []string
+
+remaining := os.Args[2:]
+for i := 0; i < len(remaining); i++ {
+	switch remaining[i] {
+	case "--tasks":
+		if i+1 < len(remaining) {
+			taskFile = remaining[i+1]
+			i++
+		}
+	case "--log":
+		if i+1 < len(remaining) {
+			logFile = remaining[i+1]
+			i++
+		}
+	case "--provider":
+		if i+1 < len(remaining) {
+			providerName = remaining[i+1]
+			i++
+		}
+	case "--validate":
+		if i+1 < len(remaining) {
+			validate = append(validate, remaining[i+1])
+			i++
+		}
+	case "--dry-run":
+		dryRun = true
+	case "--auto-commit":
+		autoCommit = true
+	case "--max-tasks":
+		if i+1 < len(remaining) {
+			fmt.Sscanf(remaining[i+1], "%d", &maxTasks)
+			i++
+		}
+	default:
+		fmt.Fprintf(os.Stderr, "Unknown ralph flag: %s\n", remaining[i])
+		fmt.Fprintln(os.Stderr, "Usage: shellforge ralph [--tasks file] [--log file] [--provider name] [--validate cmd] [--dry-run] [--auto-commit] [--max-tasks N]")
+		os.Exit(1)
+	}
+}
+
+var provider llm.Provider
+switch providerName {
+case "anthropic":
+	apiKey := os.Getenv("ANTHROPIC_API_KEY")
+	if apiKey == "" {
+		fmt.Fprintln(os.Stderr, "Error: ANTHROPIC_API_KEY environment variable not set")
+		os.Exit(1)
+	}
+	model := os.Getenv("ANTHROPIC_MODEL")
+	if model == "" {
+		model = "claude-haiku-4-5-20251001"
+	}
+	provider = llm.NewAnthropicProvider(apiKey, model)
+	fmt.Fprintf(os.Stderr, "[ralph] Using Anthropic API (model: %s)\n", model)
+case "":
+	// Legacy Ollama path
+	mustOllama()
+default:
+	fmt.Fprintf(os.Stderr, "Unknown provider: %s\n", providerName)
+	os.Exit(1)
+}
+
+cfg := ralph.RalphConfig{
+	TaskSource: ralph.SourceFile,
+	TaskFile:   taskFile,
+	LogFile:    logFile,
+	Validate:   validate,
+	AutoCommit: autoCommit,
+	MaxTasks:   maxTasks,
+	DryRun:     dryRun,
+	LoopConfig: agent.LoopConfig{
+		Agent:       "ralph-agent",
+		System:      "You are a senior engineer. Complete the requested task using available tools. Read files, write files, run commands. Be precise and thorough.",
+		Model:       ollama.Model,
+		MaxTurns:    15,
+		TimeoutMs:   180_000,
+		OutputDir:   "outputs/logs",
+		TokenBudget: 3000,
+		Provider:    provider,
+	},
+}
+
+fmt.Printf("[ralph] Starting Ralph Loop — tasks: %s, dry-run: %v\n", taskFile, dryRun)
+
+result, err := ralph.RunRalph(cfg, engine)
+if err != nil {
+	fmt.Fprintf(os.Stderr, "ERROR: ralph loop: %s\n", err)
+	os.Exit(1)
+}
+
+fmt.Println()
+fmt.Printf("[ralph] Complete — %d completed, %d failed, %d skipped (of %d total)\n",
+	result.Completed, result.Failed, result.Skipped, result.Total)
+
+for _, entry := range result.Entries {
+	status := "completed"
+	if entry.Status == ralph.StatusFailed {
+		status = "FAILED"
+	}
+	fmt.Printf("  [%s] task %s: %s (%d turns, %dms)\n",
+		status, entry.TaskID, entry.Description, entry.Turns, entry.DurationMs)
+}
+}
+
 func cmdSwarm() {
 fmt.Println("=== ShellForge Swarm Setup (Dagu) ===")
 fmt.Println()

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -123,9 +123,14 @@ cmd/shellforge/
 └── status.go       # Ecosystem health check
 
 internal/
+├── llm/            # LLM provider interface
+│   ├── provider.go # Provider interface (Chat, Name) + Message/Response types
+│   └── anthropic.go# Anthropic API adapter (stdlib HTTP, prompt caching, tool_use)
+├── agent/          # Agentic loop
+│   ├── loop.go     # runProviderLoop (Anthropic) + runOllamaLoop, drift detection wiring
+│   └── drift.go    # Drift detector — self-score every 5 calls, steer/kill on low scores
 ├── governance/     # agentguard.yaml parser + policy engine
 ├── ollama/         # Ollama HTTP client (chat, generate)
-├── agent/          # Native fallback agentic loop
 ├── tools/          # 5 tool implementations + RTK wrapper
 ├── engine/         # Pluggable engine interface (Goose, OpenClaw, OpenCode)
 ├── logger/         # Structured JSON logging

diff --git a/docs/roadmap.md b/docs/roadmap.md
@@ -33,13 +33,22 @@
 - [x] Tested Aider (file editing only, no shell execution)
 - [x] Evaluated Goose (Block) — native Ollama, actually executes tools
 
-### v0.6.0 — Goose + Governed Shell ← CURRENT
+### v0.6.0 — Goose + Governed Shell
 - [x] Goose as local model driver (`shellforge run goose`)
 - [x] `govern-shell.sh` — shell wrapper that evaluates every command through AgentGuard
 - [x] `shellforge run goose` sets SHELL to governed wrapper automatically
 - [x] Fixed catch-all deny bug (bounded-execution policy was denying everything)
 - [x] Dagu DAG templates (sdlc-swarm, studio-swarm, workspace-swarm, multi-driver)
 
+### v0.7.0 — Anthropic API Provider ← CURRENT
+- [x] LLM provider interface (`llm.Provider`) — pluggable Ollama vs Anthropic backends
+- [x] Anthropic API adapter — stdlib HTTP, structured `tool_use` blocks, multi-turn history
+- [x] Prompt caching — `cache_control: ephemeral` on system + tools, ~90% savings on cached tokens
+- [x] Extended thinking budget (`--thinking-budget` flag)
+- [x] Model cascading via Octi Pulpo (Haiku→Sonnet→Opus by `TaskComplexity` score)
+- [x] Drift detection — self-score every 5 tool calls, steer below 7, kill below 5 twice
+- [x] RTK token compression wired into `runShellWithRTK()` (70-90% savings on shell output)
+
 ---
 
 ## In Progress
@@ -110,11 +119,10 @@ ShellForge orchestrates, Octi Pulpo coordinates, AgentGuard governs. This phase
 - [ ] Move `internal/` types to `pkg/` for external import
 - [ ] Cloud telemetry opt-in (AgentGuard Cloud)
 
-### Phase 11 — Replace Workspace Bash Swarm
-- [ ] Dagu replaces `server/deploy.sh` + cron + queue.txt
-- [ ] Multi-driver DAGs: Claude Code + Copilot + Codex on Linux box
-- [ ] Same governance policy across all drivers
-- [ ] ShellForge as the runtime for agentguard-workspace swarm
+### Phase 11 — Replace Workspace Bash Swarm ✅ DONE
+- [x] Migrated to API-driven dispatch: Octi Pulpo → ShellForge → Anthropic API
+- [x] GH Actions Copilot Agent workflow (`dispatch-agent.yml`) for free-tier automation
+- [x] ShellForge is now the execution harness for the agentguard-workspace swarm
 
 ---
 

diff --git a/internal/orchestrator/compress.go b/internal/orchestrator/compress.go
@@ -0,0 +1,54 @@
+package orchestrator
+
+// tokenThreshold is the maximum output size (in estimated tokens) that
+// passes through without compression. Outputs below this are returned as-is.
+const tokenThreshold = 750
+
+// CompressResult compresses a sub-agent output if it exceeds the token threshold.
+// Strategy:
+//   1. If output < 750 tokens (estimated), return as-is
+//   2. Otherwise truncate to the threshold with a marker
+//
+// A future version will use LLM summarization when a Provider is available.
+func CompressResult(output string) string {
+	estimated := estimateTokens(output)
+	if estimated <= tokenThreshold {
+		return output
+	}
+
+	// Truncate to approximately tokenThreshold tokens (4 chars per token estimate)
+	maxChars := tokenThreshold * 4
+	if maxChars >= len(output) {
+		return output
+	}
+	return output[:maxChars] + "\n\n[... output truncated — " + itoa(estimated-tokenThreshold) + " tokens omitted]"
+}
+
+// estimateTokens provides a rough token count (1 token ~ 4 chars).
+func estimateTokens(s string) int {
+	return len(s) / 4
+}
+
+// itoa converts an int to a string without importing strconv (Go 1.18 compat).
+func itoa(n int) string {
+	if n == 0 {
+		return "0"
+	}
+	neg := false
+	if n < 0 {
+		neg = true
+		n = -n
+	}
+	var buf [20]byte
+	i := len(buf)
+	for n > 0 {
+		i--
+		buf[i] = byte('0' + n%10)
+		n /= 10
+	}
+	if neg {
+		i--
+		buf[i] = '-'
+	}
+	return string(buf[i:])
+}
diff --git a/internal/orchestrator/compress_test.go b/internal/orchestrator/compress_test.go
@@ -0,0 +1,83 @@
+package orchestrator
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestCompressResult_ShortOutput(t *testing.T) {
+	short := "This is a short output."
+	result := CompressResult(short)
+	if result != short {
+		t.Errorf("short output should pass through unchanged, got %q", result)
+	}
+}
+
+func TestCompressResult_ExactThreshold(t *testing.T) {
+	// 750 tokens * 4 chars = 3000 chars
+	exact := strings.Repeat("a", 3000)
+	result := CompressResult(exact)
+	if result != exact {
+		t.Error("output at exact threshold should pass through unchanged")
+	}
+}
+
+func TestCompressResult_OverThreshold(t *testing.T) {
+	// 4000 tokens * 4 chars = 16000 chars
+	long := strings.Repeat("x", 16000)
+	result := CompressResult(long)
+
+	if len(result) >= len(long) {
+		t.Errorf("compressed result should be shorter than original (%d >= %d)", len(result), len(long))
+	}
+	if !strings.Contains(result, "truncated") {
+		t.Error("compressed result should contain truncation marker")
+	}
+	if !strings.Contains(result, "omitted") {
+		t.Error("compressed result should indicate omitted tokens")
+	}
+}
+
+func TestCompressResult_Empty(t *testing.T) {
+	result := CompressResult("")
+	if result != "" {
+		t.Errorf("empty input should return empty, got %q", result)
+	}
+}
+
+func TestEstimateTokens(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected int
+	}{
+		{"", 0},
+		{"abcd", 1},
+		{"12345678", 2},
+		{strings.Repeat("a", 100), 25},
+	}
+	for _, tt := range tests {
+		got := estimateTokens(tt.input)
+		if got != tt.expected {
+			t.Errorf("estimateTokens(%d chars): expected %d, got %d", len(tt.input), tt.expected, got)
+		}
+	}
+}
+
+func TestItoa(t *testing.T) {
+	tests := []struct {
+		input    int
+		expected string
+	}{
+		{0, "0"},
+		{1, "1"},
+		{42, "42"},
+		{-5, "-5"},
+		{1000, "1000"},
+	}
+	for _, tt := range tests {
+		got := itoa(tt.input)
+		if got != tt.expected {
+			t.Errorf("itoa(%d): expected %q, got %q", tt.input, tt.expected, got)
+		}
+	}
+}