From d1d8e2e9e0bdee227c0b8b2bc018f415800729fa Mon Sep 17 00:00:00 2001 From: dacharyc Date: Sun, 19 Apr 2026 17:07:19 -0400 Subject: [PATCH] fix: keyword stuffing detection checks for minimum average words per segment --- structure/frontmatter.go | 12 ++++++++++++ structure/frontmatter_test.go | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/structure/frontmatter.go b/structure/frontmatter.go index 83ffa05..8e20eaf 100644 --- a/structure/frontmatter.go +++ b/structure/frontmatter.go @@ -117,6 +117,11 @@ const ( // maxShortSegmentPct is the percentage of comma segments that must be // "short" (≤3 words) for the comma-list heuristic to fire. maxShortSegmentPct = 60 + + // minAvgWordsPerSegment is the minimum average words per comma-separated + // segment. Sentences at or above this density are considered prose with + // inline lists rather than keyword dumps, even if many segments are short. + minAvgWordsPerSegment = 3 ) func checkDescriptionKeywordStuffing(ctx types.ResultContext, desc string) []types.Result { @@ -165,12 +170,19 @@ func checkDescriptionKeywordStuffing(ctx types.ResultContext, desc string) []typ } if len(segments) >= minCommaSegments { shortCount := 0 + totalWords := 0 for _, seg := range segments { words := strings.Fields(strings.TrimSpace(seg)) + totalWords += len(words) if len(words) <= 3 { shortCount++ } } + // Sentences with enough prose density are not keyword dumps, + // even if many individual segments are short. + if totalWords >= minAvgWordsPerSegment*len(segments) { + continue + } if shortCount*100/len(segments) >= maxShortSegmentPct { return []types.Result{ctx.Warnf( "description has %d comma-separated segments, most very short — "+ diff --git a/structure/frontmatter_test.go b/structure/frontmatter_test.go index 6b96866..d10fb76 100644 --- a/structure/frontmatter_test.go +++ b/structure/frontmatter_test.go @@ -237,6 +237,14 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) { requireResultContaining(t, results, types.Warning, "comma-separated segments") }) + t.Run("prose with inline enumeration is fine (issue #71)", func(t *testing.T) { + desc := "Helps agents write and edit interface copy (microcopy) for digital products — buttons, labels, error messages, forms, onboarding flows, empty states, and help text. Use this skill whenever you need to produce or improve any text that appears in an app, website, or software UI. It applies four core quality standards (purposeful, concise, conversational, and clear) and ships with accessibility guidelines, research-backed readability benchmarks, error-message patterns, tone adaptation frameworks, and fillable templates." + s := makeSkill("/tmp/my-skill", "my-skill", desc) + results := CheckFrontmatter(s, Options{}) + requireNoResultContaining(t, results, types.Warning, "keyword") + requireNoResultContaining(t, results, types.Warning, "comma-separated") + }) + t.Run("description with abbreviations splits correctly", func(t *testing.T) { desc := "Use for e.g. vector search and embedding workflows. Supports multiple backends, distributed indexing, and query optimization." s := makeSkill("/tmp/my-skill", "my-skill", desc)