frostyard · bketelsen · Apr 30, 2026
diff --git a/docs/exec-plans/completed/mentat-writing-skills-prompt.md b/docs/exec-plans/completed/mentat-writing-skills-prompt.md
@@ -0,0 +1,36 @@
+# Mentat: Improve Skill Generation Prompt
+
+## Purpose
+Rewrite `buildPrompt()` in `mentat/internal/generator/generator.go` to follow Anthropic's
+skill authoring best practices — concise, example-driven, and minimal in structure. Also
+simplify `normaliseContent()` by removing the fragile `---\n` heuristic. Add targeted unit
+tests for the revised prompt and normaliser.
+
+## Baseline
+Run `just test` to confirm baseline passes before any changes. ✅ (all packages pass)
+
+## Milestones
+- [x] Milestone 1 — Write exec plan at `docs/exec-plans/active/mentat-writing-skills-prompt.md`; verify: manual
+- [x] Milestone 2 — Rewrite `buildPrompt()` in `mentat/internal/generator/generator.go`; verify: `cd mentat && go vet ./...`
+- [x] Milestone 3 — Simplify `normaliseContent()` in the same file; verify: `cd mentat && go vet ./...`
+- [x] Milestone 4 — Add/update unit tests for `buildPrompt` and `normaliseContent` in `mentat/internal/generator/generator_internal_test.go`; verify: `just test-mentat`
+- [x] Milestone 5 — Final verify: `just build-mentat` and `just test`
+
+## Surprises & Discoveries
+- Go does not allow backtick characters inside raw string literals. The `promptExample`
+  constant had to be split into a package-level `const` with string concatenation for
+  inline code segments rather than being inlined directly in `buildPrompt`.
+- The existing test file uses `package generator_test` (black-box), so internal function
+  tests needed a new `generator_internal_test.go` file with `package generator`.
+
+## Decision Log
+- Keeping the concrete example inside a raw string literal to avoid indentation drift.
+- Not enumerating sections by name — trusting the model to pick the right structure.
+- Removing the `\n---\n` heuristic from `normaliseContent` because it could silently
+  truncate valid content; the fallback frontmatter injection already handles missing frontmatter.
+
+## Outcomes & Retrospective
+`buildPrompt` shrank from ~40 lines of prescriptive instructions to ~30 lines of
+concise, example-driven text. `normaliseContent` dropped the fragile `\n---\n`
+heuristic entirely (regression-tested). 14 new tests cover all prompt properties
+and normaliser edge cases.
diff --git a/mentat/internal/generator/generator.go b/mentat/internal/generator/generator.go
@@ -233,44 +233,71 @@ func generateAllWith(ctx context.Context, domains []classifier.DomainResult, rep
 // Prompt construction
 // ---------------------------------------------------------------------------
 
+// promptExample is the one-shot SKILL.md example embedded in every prompt.
+// Keeping it as a package-level constant avoids nesting raw-string literals
+// (Go does not allow backtick inside a backtick raw string).
+const promptExample = `---
+name: queue
+description: In-memory task queue with priority scheduling and dead-letter support.
+---
+
+## When to use
+
+Call ` + "`queue.Enqueue`" + ` to schedule work; call ` + "`queue.Drain`" + ` in tests to flush synchronously.
+Never access ` + "`Queue.items`" + ` directly — the field is unexported intentionally.
+
+## Key invariant
+
+` + "`maxRetries`" + ` is checked BEFORE re-enqueue. Exceeding it moves the item to the
+dead-letter list, not back to the main queue. Forgetting this causes silent data loss.
+
+## Entry point
+
+Start at ` + "`queue.go`" + ` → ` + "`Queue.process`" + ` loop. The scheduler lives in ` + "`scheduler.go`" + `.
+`
+
 // buildPrompt assembles the LLM prompt for a single domain.
+//
+// Design rationale: concise over prescriptive. One concrete example beats five
+// section headers. Only the frontmatter format is mandatory — structure of the
+// markdown body is left to the model. Domain context is passed on a single line
+// to reduce token count while keeping all signal.
 func buildPrompt(domain classifier.DomainResult, fileSample string) string {
 	langs := strings.Join(domain.Languages, ", ")
 	if langs == "" {
 		langs = "unknown"
 	}
 
 	var sb strings.Builder
-	sb.WriteString("You are generating a SKILL.md documentation file for an AI coding agent navigating a software repository.\n\n")
-	sb.WriteString(fmt.Sprintf("Domain: %s\n", domain.Name))
-	sb.WriteString(fmt.Sprintf("Path: %s\n", domain.Path))
+
+	// Domain context — one dense line keeps token usage low.
+	sb.WriteString(fmt.Sprintf("Domain: %s | Path: %s | Lang: %s | Files: %d\n", domain.Name, domain.Path, langs, domain.FileCount))
 	sb.WriteString(fmt.Sprintf("Description: %s\n", domain.Description))
-	sb.WriteString(fmt.Sprintf("Languages: %s\n", langs))
-	sb.WriteString(fmt.Sprintf("File count: %d\n\n", domain.FileCount))
 
 	if fileSample != "" {
-		sb.WriteString("Source files in this domain (filename + first few lines):\n")
+		sb.WriteString("\nSource sample (filename + first lines):\n")
 		sb.WriteString(fileSample)
-		sb.WriteString("\n")
 	}
 
-	sb.WriteString(`Generate a SKILL.md file that an AI agent would find useful when working in this domain.
-
-Include these sections:
-- Purpose: what this domain does and why it exists
-- Key Abstractions: main types, interfaces, and concepts an agent must understand
-- Common Patterns: recurring code patterns agents should follow when modifying this domain
-- Entry Points: where to start when reading or modifying this domain
-- Things to Know Before Modifying: gotchas, invariants, constraints, and things that will break if ignored
-
-Output ONLY the SKILL.md content — no preamble, no explanation, no markdown fences.
-The file must start with YAML frontmatter in exactly this format:
----
-name: ` + domain.Name + `
-description: <one-sentence description>
----
-
-Then a markdown body with the sections listed above.`)
+	// Task instruction.
+	sb.WriteString("\nWrite a SKILL.md for an AI coding agent working in this domain.\n\n")
+	sb.WriteString("A good SKILL.md is short and specific. It tells the agent only what it cannot\n")
+	sb.WriteString("infer from reading the code: non-obvious invariants, tricky entry points,\n")
+	sb.WriteString("load-bearing conventions, and concrete usage patterns. Prefer code snippets\n")
+	sb.WriteString("over prose where they are clearer. Do not reproduce information that is\n")
+	sb.WriteString("obvious from filenames, type names, or package comments.\n\n")
+
+	// Frontmatter requirement.
+	sb.WriteString("Required: the file MUST start with YAML frontmatter:\n\n")
+	sb.WriteString("---\n")
+	sb.WriteString(fmt.Sprintf("name: %s\n", domain.Name))
+	sb.WriteString("description: <one-sentence description>\n")
+	sb.WriteString("---\n\n")
+
+	// One-shot example.
+	sb.WriteString("Example of a well-written SKILL.md (for a different domain — do not copy it verbatim):\n\n")
+	sb.WriteString(promptExample)
+	sb.WriteString("\nOutput ONLY the SKILL.md content — no preamble, no explanation, no markdown fences.\n")
 
 	return sb.String()
 }
@@ -342,14 +369,16 @@ func readFirstLines(path string, n int) (string, error) {
 // Response normalisation
 // ---------------------------------------------------------------------------
 
-// normaliseContent strips any markdown code fences, agent tool-call output,
-// and trailing stats lines (e.g. "Changes +0 -0 / Requests 1 Premium") that
-// some LLM CLIs emit around the actual content.
+// normaliseContent strips markdown code fences and trailing stats lines that
+// some LLM CLIs emit, then ensures the result ends with a single newline.
+//
+// If no YAML frontmatter is detected the caller (generateWith) injects a
+// minimal one — that responsibility lives there, not here.
 func normaliseContent(raw string) string {
 	s := strings.TrimSpace(raw)
 
 	// Strip trailing stats lines emitted by some CLI tools (e.g. copilot).
-	// These appear as lines like "Changes +0 -0" or "Requests 1 Premium (5s)".
+	// Patterns: "Changes +N -N" or "Requests N Premium (Ns)".
 	lines := strings.Split(s, "\n")
 	for len(lines) > 0 {
 		last := strings.TrimSpace(lines[len(lines)-1])
@@ -361,10 +390,8 @@ func normaliseContent(raw string) string {
 	}
 	s = strings.TrimSpace(strings.Join(lines, "\n"))
 
-	// If the response contains a code fence, extract the content inside the
-	// last/outermost fence (handles agent preamble before the fence).
-	if idx := strings.LastIndex(s, "```"); idx != -1 {
-		// Find the opening fence
+	// Strip outermost code fence if present (handles "```markdown ... ```").
+	if strings.Contains(s, "```") {
 		openIdx := strings.Index(s, "```")
 		closeIdx := strings.LastIndex(s, "```")
 		if openIdx != closeIdx {
@@ -375,15 +402,5 @@ func normaliseContent(raw string) string {
 		}
 	}
 
-	// If the response has agent tool-call preamble before the YAML frontmatter,
-	// find the FIRST occurrence of a line that is exactly "---" and take
-	// everything from there — but only if it precedes the main content.
-	// Guard: only apply if the current content does NOT already start with ---.
-	if !strings.HasPrefix(s, "---") {
-		if i := strings.Index(s, "\n---\n"); i >= 0 {
-			s = strings.TrimSpace(s[i+1:])
-		}
-	}
-
 	return s + "\n"
 }
diff --git a/mentat/internal/generator/generator_internal_test.go b/mentat/internal/generator/generator_internal_test.go
@@ -0,0 +1,207 @@
+// Package generator (internal tests) exercises unexported helpers directly.
+package generator
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/frostyard/firn/mentat/internal/classifier"
+)
+
+// ---------------------------------------------------------------------------
+// buildPrompt tests
+// ---------------------------------------------------------------------------
+
+var testDomain = classifier.DomainResult{
+	Name:        "storage",
+	Path:        "internal/storage",
+	Description: "Persistent key-value store backed by BoltDB.",
+	FileCount:   6,
+	Languages:   []string{"Go"},
+}
+
+func TestBuildPrompt_ContainsDomainContext(t *testing.T) {
+	prompt := buildPrompt(testDomain, "")
+
+	checks := []struct {
+		label string
+		want  string
+	}{
+		{"domain name", "storage"},
+		{"domain path", "internal/storage"},
+		{"language", "Go"},
+		{"file count", "6"},
+		{"description", "Persistent key-value store"},
+	}
+	for _, tc := range checks {
+		if !strings.Contains(prompt, tc.want) {
+			t.Errorf("prompt missing %s (%q); prompt snippet: %q", tc.label, tc.want, prompt[:min(200, len(prompt))])
+		}
+	}
+}
+
+func TestBuildPrompt_ContainsFrontmatterRequirement(t *testing.T) {
+	prompt := buildPrompt(testDomain, "")
+
+	// Must include the domain name inside the frontmatter template.
+	wantFrontmatter := "name: storage"
+	if !strings.Contains(prompt, wantFrontmatter) {
+		t.Errorf("prompt does not contain frontmatter template %q", wantFrontmatter)
+	}
+}
+
+func TestBuildPrompt_ContainsExample(t *testing.T) {
+	prompt := buildPrompt(testDomain, "")
+
+	// The one-shot example is the queue domain — it must appear.
+	if !strings.Contains(prompt, "queue") {
+		t.Error("prompt is missing the one-shot example (expected 'queue' domain reference)")
+	}
+	// The example should not be the target domain.
+	if strings.Contains(prompt, "do not copy it verbatim") == false {
+		t.Error("prompt is missing the 'do not copy it verbatim' instruction")
+	}
+}
+
+func TestBuildPrompt_NoSectionHeaders(t *testing.T) {
+	prompt := buildPrompt(testDomain, "")
+
+	// The new prompt must NOT enumerate rigid section names as instructions.
+	prescriptiveSections := []string{
+		"## Key Abstractions",
+		"## Common Patterns",
+		"## Things to Know Before Modifying",
+	}
+	for _, sec := range prescriptiveSections {
+		if strings.Contains(prompt, sec) {
+			t.Errorf("prompt contains prescriptive section header %q — trust the model to choose structure", sec)
+		}
+	}
+}
+
+func TestBuildPrompt_IncludesFileSample(t *testing.T) {
+	sample := "### storage.go\npackage storage\n"
+	prompt := buildPrompt(testDomain, sample)
+
+	if !strings.Contains(prompt, sample) {
+		t.Error("prompt does not include the file sample")
+	}
+}
+
+func TestBuildPrompt_EmptySample_NoGarbage(t *testing.T) {
+	prompt := buildPrompt(testDomain, "")
+
+	// An empty sample must not leave "Source sample" heading in the prompt.
+	if strings.Contains(prompt, "Source sample") {
+		t.Error("prompt contains 'Source sample' section even when sample is empty")
+	}
+}
+
+func TestBuildPrompt_NoMarkdownFences_InInstructions(t *testing.T) {
+	prompt := buildPrompt(testDomain, "")
+
+	// The instruction portion must not ask the model to wrap output in fences —
+	// the prompt explicitly requests no fences.
+	if !strings.Contains(prompt, "no markdown fences") {
+		t.Error("prompt is missing the 'no markdown fences' instruction")
+	}
+}
+
+func TestBuildPrompt_MultipleLanguages(t *testing.T) {
+	d := testDomain
+	d.Languages = []string{"Go", "TypeScript", "Bash"}
+	prompt := buildPrompt(d, "")
+
+	if !strings.Contains(prompt, "Go, TypeScript, Bash") {
+		t.Errorf("prompt does not list all languages; got snippet: %q", prompt[:min(300, len(prompt))])
+	}
+}
+
+func TestBuildPrompt_EmptyLanguages_FallsBackToUnknown(t *testing.T) {
+	d := testDomain
+	d.Languages = nil
+	prompt := buildPrompt(d, "")
+
+	if !strings.Contains(prompt, "unknown") {
+		t.Error("prompt should fall back to 'unknown' when no languages are provided")
+	}
+}
+
+// ---------------------------------------------------------------------------
+// normaliseContent tests
+// ---------------------------------------------------------------------------
+
+func TestNormaliseContent_PlainContent_Unchanged(t *testing.T) {
+	input := "---\nname: foo\ndescription: bar\n---\n\n## Overview\n\nSome content.\n"
+	got := normaliseContent(input)
+	// TrimSpace + "\n" is the only transformation for clean input.
+	want := strings.TrimSpace(input) + "\n"
+	if got != want {
+		t.Errorf("normaliseContent modified clean content:\nwant: %q\n got: %q", want, got)
+	}
+}
+
+func TestNormaliseContent_StripsCodeFences(t *testing.T) {
+	inner := "---\nname: foo\ndescription: bar\n---\n\n## Overview\n\nContent here."
+	fenced := "```markdown\n" + inner + "\n```"
+	got := normaliseContent(fenced)
+
+	if strings.Contains(got, "```") {
+		t.Errorf("normaliseContent did not strip code fences; got: %q", got[:min(100, len(got))])
+	}
+	if !strings.Contains(got, "---\nname: foo") {
+		t.Errorf("normaliseContent removed content when stripping fences; got: %q", got[:min(200, len(got))])
+	}
+}
+
+func TestNormaliseContent_StripsTrailingStatsLines(t *testing.T) {
+	content := "---\nname: foo\ndescription: bar\n---\n\n## Overview\n\nSome content."
+	withStats := content + "\nChanges +2 -1\nRequests 3 Premium (5s)\n"
+	got := normaliseContent(withStats)
+
+	if strings.Contains(got, "Changes") {
+		t.Errorf("normaliseContent did not strip 'Changes' stats line; got: %q", got)
+	}
+	if strings.Contains(got, "Requests") {
+		t.Errorf("normaliseContent did not strip 'Requests' stats line; got: %q", got)
+	}
+	if !strings.Contains(got, "Some content") {
+		t.Error("normaliseContent removed real content while stripping stats")
+	}
+}
+
+func TestNormaliseContent_EndsWithSingleNewline(t *testing.T) {
+	cases := []string{
+		"---\nname: x\ndescription: y\n---\n\nContent.",
+		"---\nname: x\ndescription: y\n---\n\nContent.\n\n\n",
+		"```\n---\nname: x\ndescription: y\n---\n\nContent.\n```",
+	}
+	for _, input := range cases {
+		got := normaliseContent(input)
+		if !strings.HasSuffix(got, "\n") {
+			t.Errorf("output does not end with newline; got: %q", got)
+		}
+		if strings.HasSuffix(got, "\n\n") {
+			t.Errorf("output ends with multiple newlines; got: %q", got)
+		}
+	}
+}
+
+func TestNormaliseContent_DoesNotCutMidContentOnDashDash(t *testing.T) {
+	// Regression: old normaliser had a "\n---\n" heuristic that could truncate
+	// valid content mid-document when a horizontal rule appeared in the body.
+	content := "---\nname: foo\ndescription: bar\n---\n\n## Section\n\nSome text.\n\n---\n\nMore text after HR."
+	got := normaliseContent(content)
+
+	if !strings.Contains(got, "More text after HR") {
+		t.Errorf("normaliseContent cut content at mid-document HR; got: %q", got)
+	}
+}
+
+func TestNormaliseContent_EmptyInput(t *testing.T) {
+	got := normaliseContent("")
+	// Empty input should not panic; returns a single newline.
+	if got != "\n" {
+		t.Errorf("normaliseContent(\"\") = %q, want %q", got, "\n")
+	}
+}