From cc6263dd8b2bbc02b5afb41cd69305be01d31607 Mon Sep 17 00:00:00 2001 From: dacharyc Date: Fri, 10 Apr 2026 18:37:47 -0400 Subject: [PATCH] Focus contamination warn on PLC potential --- README.md | 8 +-- cmd/root.go | 2 +- contamination/contamination.go | 52 ++++++++++++++- contamination/contamination_test.go | 89 ++++++++++++++++++++------ orchestrate/orchestrate_test.go | 48 ++++++++++++++ report/json_test.go | 6 +- report/report_test.go | 14 ++-- testdata/auxiliary-only-skill/SKILL.md | 44 +++++++++++++ 8 files changed, 224 insertions(+), 39 deletions(-) create mode 100644 testdata/auxiliary-only-skill/SKILL.md diff --git a/README.md b/README.md index 94ad4ab..15d339c 100644 --- a/README.md +++ b/README.md @@ -273,7 +273,7 @@ Detects cross-language contamination — skills where code examples in one langu Contamination Analysis Contamination level: medium (score: 0.35) Primary language category: javascript - ⚠ Language mismatch: python, shell (2 categories differ from primary) + ⚠ Language mismatch: python (1 category differ from primary) ℹ Multi-interface tool detected: mongodb Scope breadth: 4 @@ -282,7 +282,7 @@ References Contamination Analysis Scope breadth: 0 ``` -Contamination scoring considers three factors: multi-interface tools (0.3 weight), language mismatch across code blocks (0.4 weight), and scope breadth (0.3 weight). Reference files in `references/` are analyzed in aggregate. Use `--per-file` to see a breakdown by individual reference file. +Contamination scoring considers three factors: multi-interface tools (0.3 weight), application language mismatch across code blocks (0.4 weight), and scope breadth (0.3 weight). Auxiliary languages (shell, config formats, query languages, markup) are excluded from the mismatch calculation since they don't cause syntactic confusion with application languages. Reference files in `references/` are analyzed in aggregate. Use `--per-file` to see a breakdown by individual reference file. ### check @@ -790,10 +790,10 @@ Detects cross-language contamination — where code examples in one language cou - **Multi-interface tools**: detects tools with many language bindings (MongoDB, AWS, Docker, Kubernetes, Redis, etc.) by scanning the skill name and content - **Language categories**: maps code block languages to broad categories (shell, javascript, python, java, systems, config, etc.) -- **Language mismatch**: code blocks spanning different language categories +- **Language mismatch**: code blocks spanning different application language categories (auxiliary categories like shell, config, query, and markup are excluded) - **Technology references**: framework/runtime mentions (Node.js, Django, Flask, Spring, Rails, etc.) - **Scope breadth**: number of distinct technology categories referenced -- **Contamination score**: 3-factor formula — multi_interface (0.3) + mismatch (0.4) + breadth (0.3), capped at 1.0 +- **Contamination score**: 3-factor formula — multi_interface (0.3) + application language mismatch (0.4) + breadth (0.3), capped at 1.0 - **Contamination level**: high (≥0.5), medium (≥0.2), low (<0.2) ### LLM scoring (`score evaluate`) diff --git a/cmd/root.go b/cmd/root.go index 91e0db8..613481b 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -11,7 +11,7 @@ import ( "github.com/agent-ecosystem/skill-validator/types" ) -const version = "v1.5.2" +const version = "v1.5.3" var ( outputFormat string diff --git a/contamination/contamination.go b/contamination/contamination.go index e5900d5..1675760 100644 --- a/contamination/contamination.go +++ b/contamination/contamination.go @@ -138,12 +138,19 @@ func Analyze(name, content string, codeLanguages []string) *types.ContaminationR } scopeBreadth := len(allScopes) - // Detect language mismatch + // Detect language mismatch (application↔application only). + // Auxiliary categories (shell, config, query, markup) are expected alongside + // any primary language and do not cause syntactic confusion, so they are + // excluded from the mismatch set. The primary application category is + // determined separately from the overall primary, since a skill's most + // common language may be auxiliary (e.g. bash) while still mixing multiple + // application languages. primaryCategory := findPrimaryCategory(codeLanguages) + primaryAppCategory := findPrimaryApplicationCategory(codeLanguages) mismatchedCategories := make(map[string]bool) - if primaryCategory != "" { + if primaryAppCategory != "" { for cat := range langCategories { - if cat != primaryCategory { + if cat != primaryAppCategory && applicationCategories[cat] { mismatchedCategories[cat] = true } } @@ -283,3 +290,42 @@ func findPrimaryCategory(codeLanguages []string) string { } return primary } + +// findPrimaryApplicationCategory returns the most common application language +// category (ignoring auxiliary categories like shell, config, query, markup). +func findPrimaryApplicationCategory(codeLanguages []string) string { + if len(codeLanguages) == 0 { + return "" + } + + counts := make(map[string]int) + var order []string + seen := make(map[string]bool) + for _, lang := range codeLanguages { + langLower := strings.ToLower(lang) + for category, members := range languageCategories { + if members[langLower] && applicationCategories[category] { + counts[category]++ + if !seen[category] { + seen[category] = true + order = append(order, category) + } + break + } + } + } + + if len(counts) == 0 { + return "" + } + + maxCount := 0 + primary := "" + for _, cat := range order { + if counts[cat] > maxCount { + maxCount = counts[cat] + primary = cat + } + } + return primary +} diff --git a/contamination/contamination_test.go b/contamination/contamination_test.go index 938c45f..8865e3f 100644 --- a/contamination/contamination_test.go +++ b/contamination/contamination_test.go @@ -42,8 +42,9 @@ func TestAnalyze_LanguageMismatch(t *testing.T) { if r.PrimaryCategory != "python" { t.Errorf("expected primary category python, got %s", r.PrimaryCategory) } - if len(r.MismatchedCategories) == 0 { - t.Error("expected mismatched categories") + // Only application↔application mismatches are reported; bash (shell) is excluded + if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "javascript" { + t.Errorf("expected mismatched categories [javascript], got %v", r.MismatchedCategories) } } @@ -172,24 +173,21 @@ func TestMismatchWeight(t *testing.T) { } func TestAnalyze_AuxiliaryOnlyMismatches(t *testing.T) { - // python + bash + yaml: auxiliary mismatches should score low + // python + bash + yaml: auxiliary categories are not mismatches languages := []string{"python", "python", "bash", "yaml"} r := Analyze("deploy-skill", "Deploy with bash and config.", languages) - if !r.LanguageMismatch { - t.Error("expected language mismatch") + if r.LanguageMismatch { + t.Error("expected no language mismatch when only auxiliary categories differ") } - // 2 auxiliary mismatches × 0.25 = 0.50 weighted → 0.4 × (0.50/3) ≈ 0.067 - // No multi-interface tool, scope breadth = 3 → factor3 = 0.3 * (1/4) = 0.075 - // Total ≈ 0.142, should be low - if r.ContaminationLevel != "low" { - t.Errorf("expected low contamination for python+bash+yaml, got %s (score=%f)", r.ContaminationLevel, r.ContaminationScore) + if len(r.MismatchedCategories) != 0 { + t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories) } - // Verify weights are populated - if w, ok := r.MismatchWeights["shell"]; !ok || w != 0.25 { - t.Errorf("expected shell weight 0.25, got %f (ok=%v)", w, ok) + if len(r.MismatchWeights) != 0 { + t.Errorf("expected no mismatch weights, got %v", r.MismatchWeights) } - if w, ok := r.MismatchWeights["config"]; !ok || w != 0.25 { - t.Errorf("expected config weight 0.25, got %f (ok=%v)", w, ok) + // Score should be very low with no mismatches + if r.ContaminationLevel != "low" { + t.Errorf("expected low contamination for python+bash+yaml, got %s (score=%f)", r.ContaminationLevel, r.ContaminationScore) } } @@ -213,16 +211,65 @@ func TestAnalyze_ApplicationOnlyMismatches(t *testing.T) { } func TestAnalyze_MixedMismatches(t *testing.T) { - // java + config + shell + markup: 3 auxiliary mismatches + // java + config + shell + markup: all auxiliary, no app↔app mismatch languages := []string{"java", "java", "yaml", "bash", "html"} r := Analyze("spring-boot", "Spring Boot app with config.", languages) + if r.LanguageMismatch { + t.Error("expected no language mismatch when only auxiliary categories differ from primary") + } + if len(r.MismatchedCategories) != 0 { + t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories) + } +} + +func TestAnalyze_AppAndAuxMixed(t *testing.T) { + // python + javascript + bash + yaml: only javascript is an app mismatch + languages := []string{"python", "python", "javascript", "bash", "yaml"} + r := Analyze("mixed-skill", "Some content.", languages) if !r.LanguageMismatch { - t.Error("expected language mismatch") + t.Error("expected language mismatch for app↔app") + } + if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "javascript" { + t.Errorf("expected mismatched categories [javascript], got %v", r.MismatchedCategories) + } + // bash and yaml should not appear in weights + if _, ok := r.MismatchWeights["shell"]; ok { + t.Error("shell should not be in mismatch weights") + } + if _, ok := r.MismatchWeights["config"]; ok { + t.Error("config should not be in mismatch weights") + } +} + +func TestAnalyze_AuxPrimaryWithAppMismatch(t *testing.T) { + // bash appears most often (overall primary is shell/auxiliary), + // but javascript and python are both present → app↔app mismatch + languages := []string{"bash", "bash", "bash", "javascript", "python"} + r := Analyze("scripty-skill", "Some content.", languages) + if r.PrimaryCategory != "shell" { + t.Errorf("expected overall primary category shell, got %s", r.PrimaryCategory) + } + if !r.LanguageMismatch { + t.Error("expected language mismatch between javascript and python") + } + // Primary app category should be first-encountered app (javascript) + if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "python" { + t.Errorf("expected mismatched categories [python], got %v", r.MismatchedCategories) + } +} + +func TestAnalyze_PurelyAuxiliary(t *testing.T) { + // Only auxiliary languages — no application languages at all + languages := []string{"bash", "yaml", "json", "sh"} + r := Analyze("config-skill", "Just config and shell.", languages) + if r.LanguageMismatch { + t.Error("expected no language mismatch with only auxiliary languages") + } + if len(r.MismatchedCategories) != 0 { + t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories) } - // 3 auxiliary mismatches × 0.25 = 0.75 weighted → 0.4 × (0.75/3) = 0.1 - // Should be significantly lower than old score of 0.4 × (3/3) = 0.4 - if r.ContaminationScore >= 0.4 { - t.Errorf("expected score < 0.4 for java+config+shell+markup, got %f", r.ContaminationScore) + if r.ContaminationScore >= 0.2 { + t.Errorf("expected low score for purely auxiliary languages, got %f", r.ContaminationScore) } } diff --git a/orchestrate/orchestrate_test.go b/orchestrate/orchestrate_test.go index fa089d5..64d7432 100644 --- a/orchestrate/orchestrate_test.go +++ b/orchestrate/orchestrate_test.go @@ -435,6 +435,54 @@ func TestRunContaminationAnalysis_RichSkill(t *testing.T) { } } +func TestRunContaminationAnalysis_RichSkill_AppMismatch(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + r := RunContaminationAnalysis(dir) + cr := r.ContaminationReport + if cr == nil { + t.Fatal("expected ContaminationReport") + } + // rich-skill has bash+javascript+python+yaml: javascript↔python is app↔app mismatch + if !cr.LanguageMismatch { + t.Error("expected language mismatch for javascript↔python") + } + // Auxiliary categories (shell, config) should NOT appear in mismatches + for _, cat := range cr.MismatchedCategories { + if cat == "shell" || cat == "config" { + t.Errorf("auxiliary category %q should not be in mismatched categories", cat) + } + } + + var buf bytes.Buffer + report.Print(&buf, r, false) + output := buf.String() + if !strings.Contains(output, "Language mismatch") { + t.Error("expected Language mismatch warning in output for app↔app mismatch") + } +} + +func TestRunContaminationAnalysis_AuxiliaryOnly_NoMismatch(t *testing.T) { + dir := fixtureDir(t, "auxiliary-only-skill") + r := RunContaminationAnalysis(dir) + cr := r.ContaminationReport + if cr == nil { + t.Fatal("expected ContaminationReport") + } + if cr.LanguageMismatch { + t.Error("expected no language mismatch for auxiliary-only languages") + } + if len(cr.MismatchedCategories) != 0 { + t.Errorf("expected no mismatched categories, got %v", cr.MismatchedCategories) + } + + var buf bytes.Buffer + report.Print(&buf, r, false) + output := buf.String() + if strings.Contains(output, "Language mismatch") { + t.Error("Language mismatch warning should not appear for auxiliary-only languages") + } +} + func TestRunContaminationAnalysis_BrokenDir(t *testing.T) { dir := t.TempDir() r := RunContaminationAnalysis(dir) diff --git a/report/json_test.go b/report/json_test.go index a356eb7..338698b 100644 --- a/report/json_test.go +++ b/report/json_test.go @@ -400,7 +400,7 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) { CodeLanguages: []string{"python", "javascript", "bash"}, LanguageCategories: []string{"python", "javascript", "shell"}, PrimaryCategory: "python", - MismatchedCategories: []string{"javascript", "shell"}, + MismatchedCategories: []string{"javascript"}, LanguageMismatch: true, TechReferences: []string{"javascript", "python"}, ScopeBreadth: 4, @@ -446,8 +446,8 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) { } mismatched := ca["mismatched_categories"].([]any) - if len(mismatched) != 2 { - t.Errorf("mismatched_categories length = %d, want 2", len(mismatched)) + if len(mismatched) != 1 { + t.Errorf("mismatched_categories length = %d, want 1", len(mismatched)) } } diff --git a/report/report_test.go b/report/report_test.go index a0999c5..d355bd6 100644 --- a/report/report_test.go +++ b/report/report_test.go @@ -556,10 +556,10 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) { ContaminationLevel: "medium", ContaminationScore: 0.35, ScopeBreadth: 3, - CodeLanguages: []string{"python", "bash"}, - LanguageCategories: []string{"python", "shell"}, + CodeLanguages: []string{"python", "javascript"}, + LanguageCategories: []string{"python", "javascript"}, PrimaryCategory: "python", - MismatchedCategories: []string{"shell"}, + MismatchedCategories: []string{"javascript"}, LanguageMismatch: true, }, } @@ -571,8 +571,8 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) { if !strings.Contains(output, colorYellow+"medium") { t.Error("expected yellow color for medium level") } - if !strings.Contains(output, "Language mismatch: shell") { - t.Error("expected language mismatch warning with shell") + if !strings.Contains(output, "Language mismatch: javascript") { + t.Error("expected language mismatch warning with javascript") } if !strings.Contains(output, "1 category differ from primary") { t.Error("expected singular category count") @@ -590,7 +590,7 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) { CodeLanguages: []string{"python", "javascript", "bash", "ruby"}, LanguageCategories: []string{"python", "javascript", "shell", "ruby"}, PrimaryCategory: "python", - MismatchedCategories: []string{"javascript", "ruby", "shell"}, + MismatchedCategories: []string{"javascript", "ruby"}, LanguageMismatch: true, MultiInterfaceTools: []string{"mongodb"}, }, @@ -606,7 +606,7 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) { if !strings.Contains(output, "Multi-interface tool detected: mongodb") { t.Error("expected multi-interface tool warning") } - if !strings.Contains(output, "3 categories differ") { + if !strings.Contains(output, "2 categories differ") { t.Error("expected plural categories count") } if !strings.Contains(output, "Scope breadth: 5") { diff --git a/testdata/auxiliary-only-skill/SKILL.md b/testdata/auxiliary-only-skill/SKILL.md new file mode 100644 index 0000000..5e17b96 --- /dev/null +++ b/testdata/auxiliary-only-skill/SKILL.md @@ -0,0 +1,44 @@ +--- +name: auxiliary-only-skill +description: A skill that uses only shell and config code blocks for testing contamination. +--- +# Auxiliary Only Skill + +This skill uses bash commands and config files but no application languages. + +## Setup + +Install the required tools: + +```bash +brew install jq yq +``` + +## Configuration + +Create a config file: + +```yaml +server: + host: localhost + port: 8080 +``` + +Alternatively use JSON: + +```json +{ + "server": { + "host": "localhost", + "port": 8080 + } +} +``` + +## Running + +Start the service: + +```sh +./start.sh --config config.yaml +```