From 4034df0c6f6293896f99ca1e4b5e1fc4ba81335b Mon Sep 17 00:00:00 2001 From: Brian Ketelsen Date: Wed, 29 Apr 2026 22:27:33 -0400 Subject: [PATCH] feat(mentat): concise skill generation prompt based on Anthropic best practices - Rewrite buildPrompt(): ~50% shorter, example-driven, no prescriptive section headers. One-shot 'queue' domain example shows the target quality. Domain context condensed to a single header line. - Simplify normaliseContent(): remove fragile \n---\n mid-content heuristic; keep code-fence stripping and trailing stats-line stripping. - Add generator_internal_test.go: 14 new unit tests covering all prompt properties (domain context, frontmatter, example presence, no rigid sections, empty-sample guard) and all normaliser edge cases (fences, stats, mid-HR, empty input, trailing newline). Exec plan: docs/exec-plans/completed/mentat-writing-skills-prompt.md --- .../completed/mentat-writing-skills-prompt.md | 36 +++ mentat/internal/generator/generator.go | 101 +++++---- .../generator/generator_internal_test.go | 207 ++++++++++++++++++ 3 files changed, 302 insertions(+), 42 deletions(-) create mode 100644 docs/exec-plans/completed/mentat-writing-skills-prompt.md create mode 100644 mentat/internal/generator/generator_internal_test.go diff --git a/docs/exec-plans/completed/mentat-writing-skills-prompt.md b/docs/exec-plans/completed/mentat-writing-skills-prompt.md new file mode 100644 index 0000000..bd2a490 --- /dev/null +++ b/docs/exec-plans/completed/mentat-writing-skills-prompt.md @@ -0,0 +1,36 @@ +# Mentat: Improve Skill Generation Prompt + +## Purpose +Rewrite `buildPrompt()` in `mentat/internal/generator/generator.go` to follow Anthropic's +skill authoring best practices — concise, example-driven, and minimal in structure. Also +simplify `normaliseContent()` by removing the fragile `---\n` heuristic. Add targeted unit +tests for the revised prompt and normaliser. + +## Baseline +Run `just test` to confirm baseline passes before any changes. ✅ (all packages pass) + +## Milestones +- [x] Milestone 1 — Write exec plan at `docs/exec-plans/active/mentat-writing-skills-prompt.md`; verify: manual +- [x] Milestone 2 — Rewrite `buildPrompt()` in `mentat/internal/generator/generator.go`; verify: `cd mentat && go vet ./...` +- [x] Milestone 3 — Simplify `normaliseContent()` in the same file; verify: `cd mentat && go vet ./...` +- [x] Milestone 4 — Add/update unit tests for `buildPrompt` and `normaliseContent` in `mentat/internal/generator/generator_internal_test.go`; verify: `just test-mentat` +- [x] Milestone 5 — Final verify: `just build-mentat` and `just test` + +## Surprises & Discoveries +- Go does not allow backtick characters inside raw string literals. The `promptExample` + constant had to be split into a package-level `const` with string concatenation for + inline code segments rather than being inlined directly in `buildPrompt`. +- The existing test file uses `package generator_test` (black-box), so internal function + tests needed a new `generator_internal_test.go` file with `package generator`. + +## Decision Log +- Keeping the concrete example inside a raw string literal to avoid indentation drift. +- Not enumerating sections by name — trusting the model to pick the right structure. +- Removing the `\n---\n` heuristic from `normaliseContent` because it could silently + truncate valid content; the fallback frontmatter injection already handles missing frontmatter. + +## Outcomes & Retrospective +`buildPrompt` shrank from ~40 lines of prescriptive instructions to ~30 lines of +concise, example-driven text. `normaliseContent` dropped the fragile `\n---\n` +heuristic entirely (regression-tested). 14 new tests cover all prompt properties +and normaliser edge cases. diff --git a/mentat/internal/generator/generator.go b/mentat/internal/generator/generator.go index 805a6ef..5b64dab 100644 --- a/mentat/internal/generator/generator.go +++ b/mentat/internal/generator/generator.go @@ -233,7 +233,35 @@ func generateAllWith(ctx context.Context, domains []classifier.DomainResult, rep // Prompt construction // --------------------------------------------------------------------------- +// promptExample is the one-shot SKILL.md example embedded in every prompt. +// Keeping it as a package-level constant avoids nesting raw-string literals +// (Go does not allow backtick inside a backtick raw string). +const promptExample = `--- +name: queue +description: In-memory task queue with priority scheduling and dead-letter support. +--- + +## When to use + +Call ` + "`queue.Enqueue`" + ` to schedule work; call ` + "`queue.Drain`" + ` in tests to flush synchronously. +Never access ` + "`Queue.items`" + ` directly — the field is unexported intentionally. + +## Key invariant + +` + "`maxRetries`" + ` is checked BEFORE re-enqueue. Exceeding it moves the item to the +dead-letter list, not back to the main queue. Forgetting this causes silent data loss. + +## Entry point + +Start at ` + "`queue.go`" + ` → ` + "`Queue.process`" + ` loop. The scheduler lives in ` + "`scheduler.go`" + `. +` + // buildPrompt assembles the LLM prompt for a single domain. +// +// Design rationale: concise over prescriptive. One concrete example beats five +// section headers. Only the frontmatter format is mandatory — structure of the +// markdown body is left to the model. Domain context is passed on a single line +// to reduce token count while keeping all signal. func buildPrompt(domain classifier.DomainResult, fileSample string) string { langs := strings.Join(domain.Languages, ", ") if langs == "" { @@ -241,36 +269,35 @@ func buildPrompt(domain classifier.DomainResult, fileSample string) string { } var sb strings.Builder - sb.WriteString("You are generating a SKILL.md documentation file for an AI coding agent navigating a software repository.\n\n") - sb.WriteString(fmt.Sprintf("Domain: %s\n", domain.Name)) - sb.WriteString(fmt.Sprintf("Path: %s\n", domain.Path)) + + // Domain context — one dense line keeps token usage low. + sb.WriteString(fmt.Sprintf("Domain: %s | Path: %s | Lang: %s | Files: %d\n", domain.Name, domain.Path, langs, domain.FileCount)) sb.WriteString(fmt.Sprintf("Description: %s\n", domain.Description)) - sb.WriteString(fmt.Sprintf("Languages: %s\n", langs)) - sb.WriteString(fmt.Sprintf("File count: %d\n\n", domain.FileCount)) if fileSample != "" { - sb.WriteString("Source files in this domain (filename + first few lines):\n") + sb.WriteString("\nSource sample (filename + first lines):\n") sb.WriteString(fileSample) - sb.WriteString("\n") } - sb.WriteString(`Generate a SKILL.md file that an AI agent would find useful when working in this domain. - -Include these sections: -- Purpose: what this domain does and why it exists -- Key Abstractions: main types, interfaces, and concepts an agent must understand -- Common Patterns: recurring code patterns agents should follow when modifying this domain -- Entry Points: where to start when reading or modifying this domain -- Things to Know Before Modifying: gotchas, invariants, constraints, and things that will break if ignored - -Output ONLY the SKILL.md content — no preamble, no explanation, no markdown fences. -The file must start with YAML frontmatter in exactly this format: ---- -name: ` + domain.Name + ` -description: ---- - -Then a markdown body with the sections listed above.`) + // Task instruction. + sb.WriteString("\nWrite a SKILL.md for an AI coding agent working in this domain.\n\n") + sb.WriteString("A good SKILL.md is short and specific. It tells the agent only what it cannot\n") + sb.WriteString("infer from reading the code: non-obvious invariants, tricky entry points,\n") + sb.WriteString("load-bearing conventions, and concrete usage patterns. Prefer code snippets\n") + sb.WriteString("over prose where they are clearer. Do not reproduce information that is\n") + sb.WriteString("obvious from filenames, type names, or package comments.\n\n") + + // Frontmatter requirement. + sb.WriteString("Required: the file MUST start with YAML frontmatter:\n\n") + sb.WriteString("---\n") + sb.WriteString(fmt.Sprintf("name: %s\n", domain.Name)) + sb.WriteString("description: \n") + sb.WriteString("---\n\n") + + // One-shot example. + sb.WriteString("Example of a well-written SKILL.md (for a different domain — do not copy it verbatim):\n\n") + sb.WriteString(promptExample) + sb.WriteString("\nOutput ONLY the SKILL.md content — no preamble, no explanation, no markdown fences.\n") return sb.String() } @@ -342,14 +369,16 @@ func readFirstLines(path string, n int) (string, error) { // Response normalisation // --------------------------------------------------------------------------- -// normaliseContent strips any markdown code fences, agent tool-call output, -// and trailing stats lines (e.g. "Changes +0 -0 / Requests 1 Premium") that -// some LLM CLIs emit around the actual content. +// normaliseContent strips markdown code fences and trailing stats lines that +// some LLM CLIs emit, then ensures the result ends with a single newline. +// +// If no YAML frontmatter is detected the caller (generateWith) injects a +// minimal one — that responsibility lives there, not here. func normaliseContent(raw string) string { s := strings.TrimSpace(raw) // Strip trailing stats lines emitted by some CLI tools (e.g. copilot). - // These appear as lines like "Changes +0 -0" or "Requests 1 Premium (5s)". + // Patterns: "Changes +N -N" or "Requests N Premium (Ns)". lines := strings.Split(s, "\n") for len(lines) > 0 { last := strings.TrimSpace(lines[len(lines)-1]) @@ -361,10 +390,8 @@ func normaliseContent(raw string) string { } s = strings.TrimSpace(strings.Join(lines, "\n")) - // If the response contains a code fence, extract the content inside the - // last/outermost fence (handles agent preamble before the fence). - if idx := strings.LastIndex(s, "```"); idx != -1 { - // Find the opening fence + // Strip outermost code fence if present (handles "```markdown ... ```"). + if strings.Contains(s, "```") { openIdx := strings.Index(s, "```") closeIdx := strings.LastIndex(s, "```") if openIdx != closeIdx { @@ -375,15 +402,5 @@ func normaliseContent(raw string) string { } } - // If the response has agent tool-call preamble before the YAML frontmatter, - // find the FIRST occurrence of a line that is exactly "---" and take - // everything from there — but only if it precedes the main content. - // Guard: only apply if the current content does NOT already start with ---. - if !strings.HasPrefix(s, "---") { - if i := strings.Index(s, "\n---\n"); i >= 0 { - s = strings.TrimSpace(s[i+1:]) - } - } - return s + "\n" } diff --git a/mentat/internal/generator/generator_internal_test.go b/mentat/internal/generator/generator_internal_test.go new file mode 100644 index 0000000..ed9ae9f --- /dev/null +++ b/mentat/internal/generator/generator_internal_test.go @@ -0,0 +1,207 @@ +// Package generator (internal tests) exercises unexported helpers directly. +package generator + +import ( + "strings" + "testing" + + "github.com/frostyard/firn/mentat/internal/classifier" +) + +// --------------------------------------------------------------------------- +// buildPrompt tests +// --------------------------------------------------------------------------- + +var testDomain = classifier.DomainResult{ + Name: "storage", + Path: "internal/storage", + Description: "Persistent key-value store backed by BoltDB.", + FileCount: 6, + Languages: []string{"Go"}, +} + +func TestBuildPrompt_ContainsDomainContext(t *testing.T) { + prompt := buildPrompt(testDomain, "") + + checks := []struct { + label string + want string + }{ + {"domain name", "storage"}, + {"domain path", "internal/storage"}, + {"language", "Go"}, + {"file count", "6"}, + {"description", "Persistent key-value store"}, + } + for _, tc := range checks { + if !strings.Contains(prompt, tc.want) { + t.Errorf("prompt missing %s (%q); prompt snippet: %q", tc.label, tc.want, prompt[:min(200, len(prompt))]) + } + } +} + +func TestBuildPrompt_ContainsFrontmatterRequirement(t *testing.T) { + prompt := buildPrompt(testDomain, "") + + // Must include the domain name inside the frontmatter template. + wantFrontmatter := "name: storage" + if !strings.Contains(prompt, wantFrontmatter) { + t.Errorf("prompt does not contain frontmatter template %q", wantFrontmatter) + } +} + +func TestBuildPrompt_ContainsExample(t *testing.T) { + prompt := buildPrompt(testDomain, "") + + // The one-shot example is the queue domain — it must appear. + if !strings.Contains(prompt, "queue") { + t.Error("prompt is missing the one-shot example (expected 'queue' domain reference)") + } + // The example should not be the target domain. + if strings.Contains(prompt, "do not copy it verbatim") == false { + t.Error("prompt is missing the 'do not copy it verbatim' instruction") + } +} + +func TestBuildPrompt_NoSectionHeaders(t *testing.T) { + prompt := buildPrompt(testDomain, "") + + // The new prompt must NOT enumerate rigid section names as instructions. + prescriptiveSections := []string{ + "## Key Abstractions", + "## Common Patterns", + "## Things to Know Before Modifying", + } + for _, sec := range prescriptiveSections { + if strings.Contains(prompt, sec) { + t.Errorf("prompt contains prescriptive section header %q — trust the model to choose structure", sec) + } + } +} + +func TestBuildPrompt_IncludesFileSample(t *testing.T) { + sample := "### storage.go\npackage storage\n" + prompt := buildPrompt(testDomain, sample) + + if !strings.Contains(prompt, sample) { + t.Error("prompt does not include the file sample") + } +} + +func TestBuildPrompt_EmptySample_NoGarbage(t *testing.T) { + prompt := buildPrompt(testDomain, "") + + // An empty sample must not leave "Source sample" heading in the prompt. + if strings.Contains(prompt, "Source sample") { + t.Error("prompt contains 'Source sample' section even when sample is empty") + } +} + +func TestBuildPrompt_NoMarkdownFences_InInstructions(t *testing.T) { + prompt := buildPrompt(testDomain, "") + + // The instruction portion must not ask the model to wrap output in fences — + // the prompt explicitly requests no fences. + if !strings.Contains(prompt, "no markdown fences") { + t.Error("prompt is missing the 'no markdown fences' instruction") + } +} + +func TestBuildPrompt_MultipleLanguages(t *testing.T) { + d := testDomain + d.Languages = []string{"Go", "TypeScript", "Bash"} + prompt := buildPrompt(d, "") + + if !strings.Contains(prompt, "Go, TypeScript, Bash") { + t.Errorf("prompt does not list all languages; got snippet: %q", prompt[:min(300, len(prompt))]) + } +} + +func TestBuildPrompt_EmptyLanguages_FallsBackToUnknown(t *testing.T) { + d := testDomain + d.Languages = nil + prompt := buildPrompt(d, "") + + if !strings.Contains(prompt, "unknown") { + t.Error("prompt should fall back to 'unknown' when no languages are provided") + } +} + +// --------------------------------------------------------------------------- +// normaliseContent tests +// --------------------------------------------------------------------------- + +func TestNormaliseContent_PlainContent_Unchanged(t *testing.T) { + input := "---\nname: foo\ndescription: bar\n---\n\n## Overview\n\nSome content.\n" + got := normaliseContent(input) + // TrimSpace + "\n" is the only transformation for clean input. + want := strings.TrimSpace(input) + "\n" + if got != want { + t.Errorf("normaliseContent modified clean content:\nwant: %q\n got: %q", want, got) + } +} + +func TestNormaliseContent_StripsCodeFences(t *testing.T) { + inner := "---\nname: foo\ndescription: bar\n---\n\n## Overview\n\nContent here." + fenced := "```markdown\n" + inner + "\n```" + got := normaliseContent(fenced) + + if strings.Contains(got, "```") { + t.Errorf("normaliseContent did not strip code fences; got: %q", got[:min(100, len(got))]) + } + if !strings.Contains(got, "---\nname: foo") { + t.Errorf("normaliseContent removed content when stripping fences; got: %q", got[:min(200, len(got))]) + } +} + +func TestNormaliseContent_StripsTrailingStatsLines(t *testing.T) { + content := "---\nname: foo\ndescription: bar\n---\n\n## Overview\n\nSome content." + withStats := content + "\nChanges +2 -1\nRequests 3 Premium (5s)\n" + got := normaliseContent(withStats) + + if strings.Contains(got, "Changes") { + t.Errorf("normaliseContent did not strip 'Changes' stats line; got: %q", got) + } + if strings.Contains(got, "Requests") { + t.Errorf("normaliseContent did not strip 'Requests' stats line; got: %q", got) + } + if !strings.Contains(got, "Some content") { + t.Error("normaliseContent removed real content while stripping stats") + } +} + +func TestNormaliseContent_EndsWithSingleNewline(t *testing.T) { + cases := []string{ + "---\nname: x\ndescription: y\n---\n\nContent.", + "---\nname: x\ndescription: y\n---\n\nContent.\n\n\n", + "```\n---\nname: x\ndescription: y\n---\n\nContent.\n```", + } + for _, input := range cases { + got := normaliseContent(input) + if !strings.HasSuffix(got, "\n") { + t.Errorf("output does not end with newline; got: %q", got) + } + if strings.HasSuffix(got, "\n\n") { + t.Errorf("output ends with multiple newlines; got: %q", got) + } + } +} + +func TestNormaliseContent_DoesNotCutMidContentOnDashDash(t *testing.T) { + // Regression: old normaliser had a "\n---\n" heuristic that could truncate + // valid content mid-document when a horizontal rule appeared in the body. + content := "---\nname: foo\ndescription: bar\n---\n\n## Section\n\nSome text.\n\n---\n\nMore text after HR." + got := normaliseContent(content) + + if !strings.Contains(got, "More text after HR") { + t.Errorf("normaliseContent cut content at mid-document HR; got: %q", got) + } +} + +func TestNormaliseContent_EmptyInput(t *testing.T) { + got := normaliseContent("") + // Empty input should not panic; returns a single newline. + if got != "\n" { + t.Errorf("normaliseContent(\"\") = %q, want %q", got, "\n") + } +}