AgentGuardHQ · jpleva91 · Apr 1, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/.agentguard/live-viewer.json b/.agentguard/live-viewer.json
@@ -0,0 +1 @@
+{"port":36841,"pid":853811,"startedAt":1774998051503}
diff --git a/.agentguard/squads/shellforge/blockers.md b/.agentguard/squads/shellforge/blockers.md
@@ -1,7 +1,7 @@
 # ShellForge Squad — Blockers
 
-**Updated:** 2026-03-31T00:00Z
-**Reported by:** EM run 9 (claude-code:opus:shellforge:em)
+**Updated:** 2026-03-31T08:30Z
+**Reported by:** EM run 11 (claude-code:opus:shellforge:em)
 
 ---
 
@@ -13,49 +13,36 @@
 
 ## P1 — Active Work
 
-**None.** All P1 issues closed (PR #89 merged — closes #68 + #66).
+**None.** All P1 issues closed.
 
 ---
 
-## Incident (Resolved)
-
-### Broken worktree — incomplete WIP fix for #51
-**Detected:** Run 9 (2026-03-31)
-**Resolved:** Yes
-**Description:** The worktree had uncommitted partial changes to `cmd/shellforge/main.go`:
-- `import (` was replaced with `import "log"`, breaking the multi-package import block syntax
-- `run()` was partially refactored to call a non-existent `executeCommand()` function, leaving the old body orphaned outside any function
-- Build failure: `syntax error: non-declaration statement outside function body`
+## P2 — Active Blockers
 
-**Resolution:** Stashed the WIP changes, created `fix/run-silent-errors-51` branch from `origin/main`, implemented the fix correctly (add `"log"` to imports, log error in `run()` via `if err := cmd.Run(); err != nil`). PR #93 open.
+### PR Review Queue (budget: 3/3)
 
----
+| PR | Title | CI | Status |
+|----|-------|----|--------|
+| #93 | fix run() silent errors (closes #51) | ✅ 5/5 | REVIEW REQUIRED |
+| #95 | fix scheduler WriteFile silent error (closes #65) | ✅ 5/5 | REVIEW REQUIRED |
+| #96 | fix cmdScan Glob→WalkDir (closes #52) | ⏳ pending | REVIEW REQUIRED |
 
-## P2 — Active Blockers
+**Action Required:** @jpleva91 review and merge PRs #93, #95, #96 to clear budget for remaining P2 sweep.
 
-### PR Review Queue (budget: 2/3)
-| PR | Title | Status |
-|----|-------|--------|
-| #91 | EM state update run 8 | CI green — REVIEW REQUIRED |
-| #93 | fix run() silent errors (closes #51) | CI pending — REVIEW REQUIRED |
+### #76 — Dogfood: setup.sh doesn't support remote Ollama (4th escalation)
 
-**Action Required:** @jpleva91 review and merge PR #91 and PR #93.
-
-### #76 — Dogfood: setup.sh doesn't support remote Ollama (3rd escalation)
 **Severity:** Medium — dogfood on jared-box (headless WSL2 + RunPod GPU) blocked
 **Root cause:** `shellforge setup` detects `isServer=true` on headless Linux and skips Goose + Ollama entirely, with no option to configure `OLLAMA_HOST` for a remote GPU endpoint.
 **Fix needed:** setup.sh should offer remote Ollama config when `isServer=true` — set `OLLAMA_HOST`, skip local Ollama install, keep Goose setup.
 **URL:** https://github.com/AgentGuardHQ/shellforge/issues/76
 
 ---
 
-## P2 — Queued (unassigned)
+## P2 — Queued (unassigned, after budget clears)
 
 | # | Issue | Notes |
 |---|-------|-------|
 | #92 | Bundle Preflight in Goose bootstrap | Blocked on Preflight v1 ship |
-| #65 | scheduler.go silent os.WriteFile error | Next EM fix after PR budget clears |
-| #52 | filepath.Glob ** never matches Go files | Next EM fix — needs filepath.Walk |
 | #53 | README stale ./shellforge commands | Docs rot |
 | #50 | kernel version comparison lexicographic | setup.sh version gate broken |
 | #49 | InferenceQueue not priority-aware | Documented but unimplemented |
@@ -65,14 +52,17 @@
 
 ---
 
-## Resolved (this cycle)
+## Resolved (this cycle — run 11)
 
-- **#68** — zero test coverage → merged PR #89 (25 tests for normalizer/governance/intent)
-- **#66** — dead code in flattenParams() → fixed in PR #89
-- **#51** — run() helper silently ignores errors → PR #93 open
+- **#52** — filepath.Glob ** never matches Go files → fixed with WalkDir in PR #96
+- **PR #94** — stale EM state PR (run 9 state was already on master at 832cb58) → closed
 
 ## Resolved (prior cycles)
 
+- **#65** — scheduler.go silent WriteFile error → PR #95 open
+- **#51** — run() helper silently ignores errors → PR #93 open
+- **#68** — zero test coverage → merged PR #89 (25 tests)
+- **#66** — dead code in flattenParams() → merged PR #89
 - **#28** → PR #86 merged
 - **#63** → PR #88 merged
 - **#58, #62, #75, #67, #69** → PR #83 merged
@@ -86,10 +76,11 @@
 |------|--------|
 | P0 issues | ✅ All closed |
 | P1 issues | ✅ All closed |
-| PR #91 (EM state run 8) | 🟡 CI green — REVIEW REQUIRED |
-| PR #93 (fix #51) | 🟡 CI pending — REVIEW REQUIRED |
-| Sprint goal | 🔵 Active — P2 sweep in progress |
-| PR budget | 2/3 |
-| Dogfood (#76) | 🔴 Blocked — setup.sh remote Ollama gap (3rd escalation) |
+| PR #93 (fix #51 run() errors) | 🟡 CI green — REVIEW REQUIRED |
+| PR #95 (fix #65 WriteFile) | 🟡 CI green — REVIEW REQUIRED |
+| PR #96 (fix #52 Glob→WalkDir) | 🟡 CI pending — REVIEW REQUIRED |
+| Sprint goal | 🔵 Active — P2 sweep 3/3 bugs fixed, all in PRs |
+| PR budget | 3/3 (full — merge needed before new work) |
+| Dogfood (#76) | 🔴 Blocked — setup.sh remote Ollama gap (4th escalation) |
 | Retry loops | None |
 | Blast radius | Low |
diff --git a/.claude/settings.json b/.claude/settings.json
@@ -0,0 +1,37 @@
+{
+  "hooks": {
+    "PreToolUse": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash -c 'W=${AGENTGUARD_WORKSPACE:-$HOME/agentguard-workspace}; BIN=${AGENTGUARD_BIN:-node $W/agent-guard/apps/cli/dist/bin.js}; $BIN claude-hook pre --store sqlite'"
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash -c 'W=${AGENTGUARD_WORKSPACE:-$HOME/agentguard-workspace}; BIN=${AGENTGUARD_BIN:-node $W/agent-guard/apps/cli/dist/bin.js}; $BIN claude-hook post --store sqlite'"
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash -c 'W=${AGENTGUARD_WORKSPACE:-$HOME/agentguard-workspace}; BIN=${AGENTGUARD_BIN:-node $W/agent-guard/apps/cli/dist/bin.js}; $BIN claude-hook stop --store sqlite'",
+            "timeout": 15000,
+            "blocking": false
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/cmd/shellforge/main.go b/cmd/shellforge/main.go
@@ -7,6 +7,7 @@ import (
 "encoding/json"
 "fmt"
 "io"
+"io/fs"
 "log"
 "os"
 "os/exec"
@@ -17,6 +18,7 @@ import (
 
 "github.com/AgentGuardHQ/shellforge/internal/agent"
 "github.com/AgentGuardHQ/shellforge/internal/governance"
+"github.com/AgentGuardHQ/shellforge/internal/llm"
 "github.com/AgentGuardHQ/shellforge/internal/logger"
 "github.com/AgentGuardHQ/shellforge/internal/ollama"
 "github.com/AgentGuardHQ/shellforge/internal/scheduler"
@@ -60,11 +62,28 @@ cmdRun(driver, prompt)
 case "evaluate":
 cmdEvaluate()
 case "agent":
-if len(os.Args) < 3 {
-fmt.Fprintln(os.Stderr, "Usage: shellforge agent \"your prompt\"")
+{
+providerName := ""
+thinkingBudget := 0
+remaining := os.Args[2:]
+filtered := remaining[:0]
+for i := 0; i < len(remaining); i++ {
+if remaining[i] == "--provider" && i+1 < len(remaining) {
+providerName = remaining[i+1]
+i++
+} else if remaining[i] == "--thinking-budget" && i+1 < len(remaining) {
+fmt.Sscanf(remaining[i+1], "%d", &thinkingBudget)
+i++
+} else {
+filtered = append(filtered, remaining[i])
+}
+}
+if len(filtered) == 0 {
+fmt.Fprintln(os.Stderr, "Usage: shellforge agent [--provider <name>] [--thinking-budget <tokens>] \"your prompt\"")
 os.Exit(1)
 }
-cmdAgent(strings.Join(os.Args[2:], " "))
+cmdAgent(strings.Join(filtered, " "), providerName, thinkingBudget)
+}
 case "swarm":
 cmdSwarm()
 case "serve":
@@ -656,11 +675,35 @@ printResult("report-agent", result)
 saveReport("outputs/reports", "report", result)
 }
 
-func cmdAgent(prompt string) {
+func cmdAgent(prompt, providerName string, thinkingBudget int) {
 engine := mustGovernance()
+
+var provider llm.Provider
+switch providerName {
+case "anthropic":
+apiKey := os.Getenv("ANTHROPIC_API_KEY")
+if apiKey == "" {
+fmt.Fprintln(os.Stderr, "Error: ANTHROPIC_API_KEY environment variable not set")
+os.Exit(1)
+}
+model := os.Getenv("ANTHROPIC_MODEL")
+if model == "" {
+model = "claude-haiku-4-5-20251001"
+}
+p := llm.NewAnthropicProvider(apiKey, model)
+if thinkingBudget > 0 {
+p.ThinkingBudget = thinkingBudget
+fmt.Fprintf(os.Stderr, "Using Anthropic API (model: %s, thinking budget: %d tokens)\n", model, thinkingBudget)
+} else {
+fmt.Fprintf(os.Stderr, "Using Anthropic API (model: %s)\n", model)
+}
+provider = p
+default:
+// Legacy Ollama path
 mustOllama()
+}
 
-result, err := agent.RunLoop(agent.LoopConfig{
+cfg := agent.LoopConfig{
 Agent:       "prototype-agent",
 System:      "You are a senior engineer. Complete the requested task using available tools. Read files, write files, run commands, search code. Be precise.",
 UserPrompt:  prompt,
@@ -669,7 +712,10 @@ MaxTurns:    15,
 TimeoutMs:   180_000,
 OutputDir:   "outputs/logs",
 TokenBudget: 3000,
-}, engine)
+Provider:    provider,
+}
+
+result, err := agent.RunLoop(cfg, engine)
 if err != nil {
 logger.Error("prototype-agent", err.Error())
 os.Exit(1)
@@ -864,7 +910,13 @@ if _, err := os.Stat("agentguard.yaml"); err == nil {
 fmt.Println("  ✓ agentguard.yaml found")
 }
 entries, _ := filepath.Glob(filepath.Join(dir, "agents", "*.ts"))
-goEntries, _ := filepath.Glob(filepath.Join(dir, "internal", "**", "*.go"))
+var goEntries []string
+filepath.WalkDir(filepath.Join(dir, "internal"), func(p string, d fs.DirEntry, err error) error {
+if err == nil && !d.IsDir() && strings.HasSuffix(p, ".go") {
+goEntries = append(goEntries, p)
+}
+return nil
+})
 fmt.Printf("  Found %d TS agents, %d Go files\n", len(entries), len(goEntries))
 fmt.Println("  Install defenseclaw for full supply chain scanning")
 }
@@ -907,7 +959,7 @@ func printResult(name string, r *agent.RunResult) {
 fmt.Println()
 status := "✓ success"
 if !r.Success {
-status = "✗ failed"
+status = fmt.Sprintf("✗ %s", r.ExitReason)
 }
 fmt.Printf("[%s] %s — %d turns, %d tool calls, %d denials\n", name, status, r.Turns, r.ToolCalls, r.Denials)
 fmt.Printf("  tokens: %d prompt + %d response | %dms\n", r.PromptTok, r.ResponseTok, r.DurationMs)
@@ -921,8 +973,8 @@ func saveReport(dir, prefix string, r *agent.RunResult) {
 os.MkdirAll(dir, 0o755)
 ts := time.Now().Format("2006-01-02T15-04-05")
 path := filepath.Join(dir, fmt.Sprintf("%s-%s.md", prefix, ts))
-content := fmt.Sprintf("# %s — %s\n\n**Turns:** %d | **Tool calls:** %d | **Denials:** %d\n**Tokens:** %d+%d | **Duration:** %dms\n\n%s\n",
-prefix, time.Now().Format(time.RFC3339), r.Turns, r.ToolCalls, r.Denials, r.PromptTok, r.ResponseTok, r.DurationMs, r.Output)
+content := fmt.Sprintf("# %s — %s\n\n**Exit:** %s | **Turns:** %d | **Tool calls:** %d | **Denials:** %d\n**Tokens:** %d+%d | **Duration:** %dms\n\n%s\n",
+prefix, time.Now().Format(time.RFC3339), r.ExitReason, r.Turns, r.ToolCalls, r.Denials, r.PromptTok, r.ResponseTok, r.DurationMs, r.Output)
 os.WriteFile(path, []byte(content), 0o644)
 fmt.Printf("\n→ Saved to %s\n", path)
 }

diff --git a/cmd/shellforge/shellforge b/cmd/shellforge/shellforge
diff --git a/internal/agent/drift.go b/internal/agent/drift.go
@@ -0,0 +1,107 @@
+package agent
+
+import (
+	"fmt"
+	"strings"
+)
+
+const (
+	driftCheckInterval = 5  // check every N tool calls
+	driftWarnThreshold = 7  // score below this → inject steering
+	driftKillThreshold = 5  // score below this twice → kill
+)
+
+// driftDetector tracks whether the agent is staying on-task.
+type driftDetector struct {
+	taskSpec     string   // original user prompt (the task spec)
+	actionLog    []string // recent tool calls for summarization
+	warnings     int      // how many times we've warned
+	lowScores    int      // consecutive scores below kill threshold
+}
+
+func newDriftDetector(taskSpec string) *driftDetector {
+	return &driftDetector{taskSpec: taskSpec}
+}
+
+// record logs a tool call for drift analysis.
+func (d *driftDetector) record(toolName string, params map[string]string) {
+	summary := toolName
+	if target, ok := params["path"]; ok {
+		summary += " → " + target
+	} else if target, ok := params["command"]; ok {
+		summary += " → " + target
+	} else if target, ok := params["directory"]; ok {
+		summary += " → " + target
+	}
+	d.actionLog = append(d.actionLog, summary)
+}
+
+// shouldCheck returns true every driftCheckInterval tool calls.
+func (d *driftDetector) shouldCheck(totalToolCalls int) bool {
+	return totalToolCalls > 0 && totalToolCalls%driftCheckInterval == 0
+}
+
+// buildCheckPrompt creates the drift check message to send to the model.
+func (d *driftDetector) buildCheckPrompt() string {
+	recent := d.actionLog
+	if len(recent) > driftCheckInterval {
+		recent = recent[len(recent)-driftCheckInterval:]
+	}
+
+	return fmt.Sprintf(`DRIFT CHECK — Score your alignment with the original task.
+
+Original task: %s
+
+Your last %d actions:
+%s
+
+Rate your alignment 1-10 (10 = perfectly on task, 1 = completely off topic).
+Respond with ONLY a single number.`, d.taskSpec, len(recent), strings.Join(recent, "\n"))
+}
+
+// parseScore extracts the drift score from the model's response.
+func parseScore(content string) int {
+	content = strings.TrimSpace(content)
+	for _, c := range content {
+		if c >= '0' && c <= '9' {
+			return int(c - '0')
+		}
+	}
+	return 10 // default to "on task" if unparseable
+}
+
+// evaluate processes the drift score and returns the action to take.
+func (d *driftDetector) evaluate(score int) driftAction {
+	if score >= driftWarnThreshold {
+		d.lowScores = 0
+		return driftOK
+	}
+
+	if score < driftKillThreshold {
+		d.lowScores++
+		if d.lowScores >= 2 {
+			return driftKill
+		}
+	}
+
+	d.warnings++
+	return driftWarn
+}
+
+// steeringMessage returns the message to inject when drift is detected.
+func (d *driftDetector) steeringMessage() string {
+	return fmt.Sprintf(`⚠️ DRIFT DETECTED — You are going off-task.
+
+Original task: %s
+
+Refocus on the original task. Do not continue with unrelated work.
+Warning %d — task will be terminated if drift continues.`, d.taskSpec, d.warnings)
+}
+
+type driftAction int
+
+const (
+	driftOK   driftAction = iota
+	driftWarn
+	driftKill
+)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"port":36841,"pid":853811,"startedAt":1774998051503}