Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ Detects cross-language contamination — skills where code examples in one langu
Contamination Analysis
Contamination level: medium (score: 0.35)
Primary language category: javascript
⚠ Language mismatch: python, shell (2 categories differ from primary)
⚠ Language mismatch: python (1 category differ from primary)
ℹ Multi-interface tool detected: mongodb
Scope breadth: 4

Expand All @@ -282,7 +282,7 @@ References Contamination Analysis
Scope breadth: 0
```

Contamination scoring considers three factors: multi-interface tools (0.3 weight), language mismatch across code blocks (0.4 weight), and scope breadth (0.3 weight). Reference files in `references/` are analyzed in aggregate. Use `--per-file` to see a breakdown by individual reference file.
Contamination scoring considers three factors: multi-interface tools (0.3 weight), application language mismatch across code blocks (0.4 weight), and scope breadth (0.3 weight). Auxiliary languages (shell, config formats, query languages, markup) are excluded from the mismatch calculation since they don't cause syntactic confusion with application languages. Reference files in `references/` are analyzed in aggregate. Use `--per-file` to see a breakdown by individual reference file.

### check

Expand Down Expand Up @@ -790,10 +790,10 @@ Detects cross-language contamination — where code examples in one language cou

- **Multi-interface tools**: detects tools with many language bindings (MongoDB, AWS, Docker, Kubernetes, Redis, etc.) by scanning the skill name and content
- **Language categories**: maps code block languages to broad categories (shell, javascript, python, java, systems, config, etc.)
- **Language mismatch**: code blocks spanning different language categories
- **Language mismatch**: code blocks spanning different application language categories (auxiliary categories like shell, config, query, and markup are excluded)
- **Technology references**: framework/runtime mentions (Node.js, Django, Flask, Spring, Rails, etc.)
- **Scope breadth**: number of distinct technology categories referenced
- **Contamination score**: 3-factor formula — multi_interface (0.3) + mismatch (0.4) + breadth (0.3), capped at 1.0
- **Contamination score**: 3-factor formula — multi_interface (0.3) + application language mismatch (0.4) + breadth (0.3), capped at 1.0
- **Contamination level**: high (≥0.5), medium (≥0.2), low (<0.2)

### LLM scoring (`score evaluate`)
Expand Down
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"github.com/agent-ecosystem/skill-validator/types"
)

const version = "v1.5.2"
const version = "v1.5.3"

var (
outputFormat string
Expand Down
52 changes: 49 additions & 3 deletions contamination/contamination.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,19 @@ func Analyze(name, content string, codeLanguages []string) *types.ContaminationR
}
scopeBreadth := len(allScopes)

// Detect language mismatch
// Detect language mismatch (application↔application only).
// Auxiliary categories (shell, config, query, markup) are expected alongside
// any primary language and do not cause syntactic confusion, so they are
// excluded from the mismatch set. The primary application category is
// determined separately from the overall primary, since a skill's most
// common language may be auxiliary (e.g. bash) while still mixing multiple
// application languages.
primaryCategory := findPrimaryCategory(codeLanguages)
primaryAppCategory := findPrimaryApplicationCategory(codeLanguages)
mismatchedCategories := make(map[string]bool)
if primaryCategory != "" {
if primaryAppCategory != "" {
for cat := range langCategories {
if cat != primaryCategory {
if cat != primaryAppCategory && applicationCategories[cat] {
mismatchedCategories[cat] = true
}
}
Expand Down Expand Up @@ -283,3 +290,42 @@ func findPrimaryCategory(codeLanguages []string) string {
}
return primary
}

// findPrimaryApplicationCategory returns the most common application language
// category (ignoring auxiliary categories like shell, config, query, markup).
func findPrimaryApplicationCategory(codeLanguages []string) string {
if len(codeLanguages) == 0 {
return ""
}

counts := make(map[string]int)
var order []string
seen := make(map[string]bool)
for _, lang := range codeLanguages {
langLower := strings.ToLower(lang)
for category, members := range languageCategories {
if members[langLower] && applicationCategories[category] {
counts[category]++
if !seen[category] {
seen[category] = true
order = append(order, category)
}
break
}
}
}

if len(counts) == 0 {
return ""
}

maxCount := 0
primary := ""
for _, cat := range order {
if counts[cat] > maxCount {
maxCount = counts[cat]
primary = cat
}
}
return primary
}
89 changes: 68 additions & 21 deletions contamination/contamination_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ func TestAnalyze_LanguageMismatch(t *testing.T) {
if r.PrimaryCategory != "python" {
t.Errorf("expected primary category python, got %s", r.PrimaryCategory)
}
if len(r.MismatchedCategories) == 0 {
t.Error("expected mismatched categories")
// Only application↔application mismatches are reported; bash (shell) is excluded
if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "javascript" {
t.Errorf("expected mismatched categories [javascript], got %v", r.MismatchedCategories)
}
}

Expand Down Expand Up @@ -172,24 +173,21 @@ func TestMismatchWeight(t *testing.T) {
}

func TestAnalyze_AuxiliaryOnlyMismatches(t *testing.T) {
// python + bash + yaml: auxiliary mismatches should score low
// python + bash + yaml: auxiliary categories are not mismatches
languages := []string{"python", "python", "bash", "yaml"}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thoughts re this:

  1. "python" is listed twice
  2. According to our skill corpus sh and shell appear as the codefenced language in 649 of the original skills. So, < 1% but…

r := Analyze("deploy-skill", "Deploy with bash and config.", languages)
if !r.LanguageMismatch {
t.Error("expected language mismatch")
if r.LanguageMismatch {
t.Error("expected no language mismatch when only auxiliary categories differ")
}
// 2 auxiliary mismatches × 0.25 = 0.50 weighted → 0.4 × (0.50/3) ≈ 0.067
// No multi-interface tool, scope breadth = 3 → factor3 = 0.3 * (1/4) = 0.075
// Total ≈ 0.142, should be low
if r.ContaminationLevel != "low" {
t.Errorf("expected low contamination for python+bash+yaml, got %s (score=%f)", r.ContaminationLevel, r.ContaminationScore)
if len(r.MismatchedCategories) != 0 {
t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories)
}
// Verify weights are populated
if w, ok := r.MismatchWeights["shell"]; !ok || w != 0.25 {
t.Errorf("expected shell weight 0.25, got %f (ok=%v)", w, ok)
if len(r.MismatchWeights) != 0 {
t.Errorf("expected no mismatch weights, got %v", r.MismatchWeights)
}
if w, ok := r.MismatchWeights["config"]; !ok || w != 0.25 {
t.Errorf("expected config weight 0.25, got %f (ok=%v)", w, ok)
// Score should be very low with no mismatches
if r.ContaminationLevel != "low" {
t.Errorf("expected low contamination for python+bash+yaml, got %s (score=%f)", r.ContaminationLevel, r.ContaminationScore)
}
}

Expand All @@ -213,16 +211,65 @@ func TestAnalyze_ApplicationOnlyMismatches(t *testing.T) {
}

func TestAnalyze_MixedMismatches(t *testing.T) {
// java + config + shell + markup: 3 auxiliary mismatches
// java + config + shell + markup: all auxiliary, no app↔app mismatch
languages := []string{"java", "java", "yaml", "bash", "html"}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"java", "java"

r := Analyze("spring-boot", "Spring Boot app with config.", languages)
if r.LanguageMismatch {
t.Error("expected no language mismatch when only auxiliary categories differ from primary")
}
if len(r.MismatchedCategories) != 0 {
t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories)
}
}

func TestAnalyze_AppAndAuxMixed(t *testing.T) {
// python + javascript + bash + yaml: only javascript is an app mismatch
languages := []string{"python", "python", "javascript", "bash", "yaml"}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"python", "python"

r := Analyze("mixed-skill", "Some content.", languages)
if !r.LanguageMismatch {
t.Error("expected language mismatch")
t.Error("expected language mismatch for app↔app")
}
if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "javascript" {
t.Errorf("expected mismatched categories [javascript], got %v", r.MismatchedCategories)
}
// bash and yaml should not appear in weights
if _, ok := r.MismatchWeights["shell"]; ok {
t.Error("shell should not be in mismatch weights")
}
if _, ok := r.MismatchWeights["config"]; ok {
t.Error("config should not be in mismatch weights")
}
}

func TestAnalyze_AuxPrimaryWithAppMismatch(t *testing.T) {
// bash appears most often (overall primary is shell/auxiliary),
// but javascript and python are both present → app↔app mismatch
languages := []string{"bash", "bash", "bash", "javascript", "python"}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"bash", "bash", "bash",

r := Analyze("scripty-skill", "Some content.", languages)
if r.PrimaryCategory != "shell" {
t.Errorf("expected overall primary category shell, got %s", r.PrimaryCategory)
}
if !r.LanguageMismatch {
t.Error("expected language mismatch between javascript and python")
}
// Primary app category should be first-encountered app (javascript)
if len(r.MismatchedCategories) != 1 || r.MismatchedCategories[0] != "python" {
t.Errorf("expected mismatched categories [python], got %v", r.MismatchedCategories)
}
}

func TestAnalyze_PurelyAuxiliary(t *testing.T) {
// Only auxiliary languages — no application languages at all
languages := []string{"bash", "yaml", "json", "sh"}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sh is here but not shell and neither is present above.

r := Analyze("config-skill", "Just config and shell.", languages)
if r.LanguageMismatch {
t.Error("expected no language mismatch with only auxiliary languages")
}
if len(r.MismatchedCategories) != 0 {
t.Errorf("expected no mismatched categories, got %v", r.MismatchedCategories)
}
// 3 auxiliary mismatches × 0.25 = 0.75 weighted → 0.4 × (0.75/3) = 0.1
// Should be significantly lower than old score of 0.4 × (3/3) = 0.4
if r.ContaminationScore >= 0.4 {
t.Errorf("expected score < 0.4 for java+config+shell+markup, got %f", r.ContaminationScore)
if r.ContaminationScore >= 0.2 {
t.Errorf("expected low score for purely auxiliary languages, got %f", r.ContaminationScore)
}
}

Expand Down
48 changes: 48 additions & 0 deletions orchestrate/orchestrate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,54 @@ func TestRunContaminationAnalysis_RichSkill(t *testing.T) {
}
}

func TestRunContaminationAnalysis_RichSkill_AppMismatch(t *testing.T) {
dir := fixtureDir(t, "rich-skill")
r := RunContaminationAnalysis(dir)
cr := r.ContaminationReport
if cr == nil {
t.Fatal("expected ContaminationReport")
}
// rich-skill has bash+javascript+python+yaml: javascript↔python is app↔app mismatch
if !cr.LanguageMismatch {
t.Error("expected language mismatch for javascript↔python")
}
// Auxiliary categories (shell, config) should NOT appear in mismatches
for _, cat := range cr.MismatchedCategories {
if cat == "shell" || cat == "config" {
t.Errorf("auxiliary category %q should not be in mismatched categories", cat)
}
}

var buf bytes.Buffer
report.Print(&buf, r, false)
output := buf.String()
if !strings.Contains(output, "Language mismatch") {
t.Error("expected Language mismatch warning in output for app↔app mismatch")
}
}

func TestRunContaminationAnalysis_AuxiliaryOnly_NoMismatch(t *testing.T) {
dir := fixtureDir(t, "auxiliary-only-skill")
r := RunContaminationAnalysis(dir)
cr := r.ContaminationReport
if cr == nil {
t.Fatal("expected ContaminationReport")
}
if cr.LanguageMismatch {
t.Error("expected no language mismatch for auxiliary-only languages")
}
if len(cr.MismatchedCategories) != 0 {
t.Errorf("expected no mismatched categories, got %v", cr.MismatchedCategories)
}

var buf bytes.Buffer
report.Print(&buf, r, false)
output := buf.String()
if strings.Contains(output, "Language mismatch") {
t.Error("Language mismatch warning should not appear for auxiliary-only languages")
}
}

func TestRunContaminationAnalysis_BrokenDir(t *testing.T) {
dir := t.TempDir()
r := RunContaminationAnalysis(dir)
Expand Down
6 changes: 3 additions & 3 deletions report/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) {
CodeLanguages: []string{"python", "javascript", "bash"},
LanguageCategories: []string{"python", "javascript", "shell"},
PrimaryCategory: "python",
MismatchedCategories: []string{"javascript", "shell"},
MismatchedCategories: []string{"javascript"},
LanguageMismatch: true,
TechReferences: []string{"javascript", "python"},
ScopeBreadth: 4,
Expand Down Expand Up @@ -446,8 +446,8 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) {
}

mismatched := ca["mismatched_categories"].([]any)
if len(mismatched) != 2 {
t.Errorf("mismatched_categories length = %d, want 2", len(mismatched))
if len(mismatched) != 1 {
t.Errorf("mismatched_categories length = %d, want 1", len(mismatched))
}
}

Expand Down
14 changes: 7 additions & 7 deletions report/report_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -556,10 +556,10 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) {
ContaminationLevel: "medium",
ContaminationScore: 0.35,
ScopeBreadth: 3,
CodeLanguages: []string{"python", "bash"},
LanguageCategories: []string{"python", "shell"},
CodeLanguages: []string{"python", "javascript"},
LanguageCategories: []string{"python", "javascript"},
PrimaryCategory: "python",
MismatchedCategories: []string{"shell"},
MismatchedCategories: []string{"javascript"},
LanguageMismatch: true,
},
}
Expand All @@ -571,8 +571,8 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) {
if !strings.Contains(output, colorYellow+"medium") {
t.Error("expected yellow color for medium level")
}
if !strings.Contains(output, "Language mismatch: shell") {
t.Error("expected language mismatch warning with shell")
if !strings.Contains(output, "Language mismatch: javascript") {
t.Error("expected language mismatch warning with javascript")
}
if !strings.Contains(output, "1 category differ from primary") {
t.Error("expected singular category count")
Expand All @@ -590,7 +590,7 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) {
CodeLanguages: []string{"python", "javascript", "bash", "ruby"},
LanguageCategories: []string{"python", "javascript", "shell", "ruby"},
PrimaryCategory: "python",
MismatchedCategories: []string{"javascript", "ruby", "shell"},
MismatchedCategories: []string{"javascript", "ruby"},
LanguageMismatch: true,
MultiInterfaceTools: []string{"mongodb"},
},
Expand All @@ -606,7 +606,7 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) {
if !strings.Contains(output, "Multi-interface tool detected: mongodb") {
t.Error("expected multi-interface tool warning")
}
if !strings.Contains(output, "3 categories differ") {
if !strings.Contains(output, "2 categories differ") {
t.Error("expected plural categories count")
}
if !strings.Contains(output, "Scope breadth: 5") {
Expand Down
44 changes: 44 additions & 0 deletions testdata/auxiliary-only-skill/SKILL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
name: auxiliary-only-skill
description: A skill that uses only shell and config code blocks for testing contamination.
---
# Auxiliary Only Skill

This skill uses bash commands and config files but no application languages.

## Setup

Install the required tools:

```bash
brew install jq yq
```

## Configuration

Create a config file:

```yaml
server:
host: localhost
port: 8080
```

Alternatively use JSON:

```json
{
"server": {
"host": "localhost",
"port": 8080
}
}
```

## Running

Start the service:

```sh
./start.sh --config config.yaml
```
Loading