From 2c31d9aaab7c4ceb9a7df80d132ad3a11d467029 Mon Sep 17 00:00:00 2001 From: Bianca Lisle Date: Thu, 16 Apr 2026 19:20:09 +0100 Subject: [PATCH 1/4] Add --ignore-link flag to skip URLs in link validation Adds an IgnorePatterns option to links.Options that skips any URL containing a matching substring (case-insensitive). Useful for private repositories or localhost URLs that are valid locally but unreachable from CI environments. skill-validator check --ignore-link=github.com/myorg,localhost . skill-validator validate links --ignore-link=github.com/myorg . Closes #68 --- cmd/check.go | 5 +++++ cmd/validate_links.go | 10 ++++++++-- links/check.go | 32 +++++++++++++++++++++++++++++--- orchestrate/orchestrate.go | 7 ++++--- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/cmd/check.go b/cmd/check.go index a11f82a..e23d993 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -7,6 +7,7 @@ import ( "github.com/spf13/cobra" + "github.com/agent-ecosystem/skill-validator/links" "github.com/agent-ecosystem/skill-validator/orchestrate" "github.com/agent-ecosystem/skill-validator/structure" "github.com/agent-ecosystem/skill-validator/types" @@ -15,6 +16,7 @@ import ( var ( checkOnly []string checkSkip []string + checkIgnoreLinks []string perFileCheck bool checkSkipOrphans bool strictCheck bool @@ -44,6 +46,8 @@ func init() { "allow files at the skill root without warnings and treat them as standard content for token counting") checkCmd.Flags().StringSliceVar(&checkAllowDirs, "allow-dirs", nil, "comma-separated list of directory names to accept without warnings (e.g. --allow-dirs=evals,testing)") + checkCmd.Flags().StringSliceVar(&checkIgnoreLinks, "ignore-link", nil, + "URL patterns to skip in link validation (case-insensitive substring match; repeatable or comma-separated, e.g. --ignore-link=github.com/myorg,localhost)") rootCmd.AddCommand(checkCmd) } @@ -77,6 +81,7 @@ func runCheck(cmd *cobra.Command, args []string) error { AllowFlatLayouts: checkAllowFlatLayouts, AllowDirs: checkAllowDirs, }, + LinksOpts: links.Options{IgnorePatterns: checkIgnoreLinks}, } eopts := exitOpts{strict: strictCheck} ctx := context.Background() diff --git a/cmd/validate_links.go b/cmd/validate_links.go index 5d547a7..3f99261 100644 --- a/cmd/validate_links.go +++ b/cmd/validate_links.go @@ -5,10 +5,13 @@ import ( "github.com/spf13/cobra" + "github.com/agent-ecosystem/skill-validator/links" "github.com/agent-ecosystem/skill-validator/orchestrate" "github.com/agent-ecosystem/skill-validator/types" ) +var validateLinksIgnore []string + var validateLinksCmd = &cobra.Command{ Use: "links ", Short: "Check external link validity (HTTP/HTTPS)", @@ -18,6 +21,8 @@ var validateLinksCmd = &cobra.Command{ } func init() { + validateLinksCmd.Flags().StringSliceVar(&validateLinksIgnore, "ignore-link", nil, + "URL patterns to skip (case-insensitive substring match; repeatable or comma-separated, e.g. --ignore-link=github.com/myorg,localhost)") validateCmd.AddCommand(validateLinksCmd) } @@ -28,15 +33,16 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { } ctx := context.Background() + lopts := links.Options{IgnorePatterns: validateLinksIgnore} switch mode { case types.SingleSkill: - r := orchestrate.RunLinkChecks(ctx, dirs[0]) + r := orchestrate.RunLinkChecks(ctx, dirs[0], lopts) return outputReport(r) case types.MultiSkill: mr := &types.MultiReport{} for _, dir := range dirs { - r := orchestrate.RunLinkChecks(ctx, dir) + r := orchestrate.RunLinkChecks(ctx, dir, lopts) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings diff --git a/links/check.go b/links/check.go index df8d0ba..078dcc4 100644 --- a/links/check.go +++ b/links/check.go @@ -9,13 +9,23 @@ import ( "github.com/agent-ecosystem/skill-validator/types" ) +// Options configures external link validation. +type Options struct { + // IgnorePatterns is a list of substrings. Any URL that contains one of + // these strings (case-insensitive) is skipped without being checked. + // Useful for private repositories or localhost URLs that are unreachable + // from CI but valid in a local development context. + // Example: --ignore-link=github.com/myorg --ignore-link=localhost + IgnorePatterns []string +} + type linkResult struct { url string result types.Result } // CheckLinks validates external (HTTP/HTTPS) links in the skill body. -func CheckLinks(ctx context.Context, dir, body string) []types.Result { +func CheckLinks(ctx context.Context, dir, body string, opts Options) []types.Result { rctx := types.ResultContext{Category: "Links", File: "SKILL.md"} allLinks := ExtractLinks(body) if len(allLinks) == 0 { @@ -35,9 +45,13 @@ func CheckLinks(ctx context.Context, dir, body string) []types.Result { if strings.Contains(link, "{") { continue } - if strings.HasPrefix(link, "http://") || strings.HasPrefix(link, "https://") { - httpLinks = append(httpLinks, link) + if !strings.HasPrefix(link, "http://") && !strings.HasPrefix(link, "https://") { + continue + } + if isIgnored(link, opts.IgnorePatterns) { + continue } + httpLinks = append(httpLinks, link) } if len(httpLinks) == 0 { @@ -111,6 +125,18 @@ func checkHTTPLinkGET(rctx types.ResultContext, client *http.Client, url string) return classifyResponse(rctx, url, resp.StatusCode) } +// isIgnored reports whether url contains any of the given patterns +// (case-insensitive substring match). +func isIgnored(url string, patterns []string) bool { + lower := strings.ToLower(url) + for _, p := range patterns { + if strings.Contains(lower, strings.ToLower(p)) { + return true + } + } + return false +} + func classifyResponse(rctx types.ResultContext, url string, statusCode int) types.Result { if statusCode >= 200 && statusCode < 300 { return rctx.Passf("%s (HTTP %d)", url, statusCode) diff --git a/orchestrate/orchestrate.go b/orchestrate/orchestrate.go index 623bcf0..85212e3 100644 --- a/orchestrate/orchestrate.go +++ b/orchestrate/orchestrate.go @@ -47,6 +47,7 @@ func AllGroups() map[CheckGroup]bool { type Options struct { Enabled map[CheckGroup]bool StructOpts structure.Options + LinksOpts links.Options } // RunAllChecks runs all enabled check groups against a single skill directory @@ -85,7 +86,7 @@ func RunAllChecks(ctx context.Context, dir string, opts Options) *types.Report { // Link checks require a fully parsed skill if skillLoaded && opts.Enabled[GroupLinks] { - rpt.Results = append(rpt.Results, links.CheckLinks(ctx, dir, body)...) + rpt.Results = append(rpt.Results, links.CheckLinks(ctx, dir, body, opts.LinksOpts)...) } // Content analysis works on raw content (no frontmatter parsing needed) @@ -179,7 +180,7 @@ func RunContaminationAnalysis(dir string) *types.Report { } // RunLinkChecks validates external HTTP/HTTPS links in a single skill directory. -func RunLinkChecks(ctx context.Context, dir string) *types.Report { +func RunLinkChecks(ctx context.Context, dir string, opts links.Options) *types.Report { rpt := &types.Report{SkillDir: dir} s, err := skill.Load(dir) @@ -190,7 +191,7 @@ func RunLinkChecks(ctx context.Context, dir string) *types.Report { return rpt } - rpt.Results = append(rpt.Results, links.CheckLinks(ctx, dir, s.Body)...) + rpt.Results = append(rpt.Results, links.CheckLinks(ctx, dir, s.Body, opts)...) // If no results at all, add a pass result if len(rpt.Results) == 0 { From 3cb4505b31ae98e93ecca9eef91f7c1e82fe3f2e Mon Sep 17 00:00:00 2001 From: Bianca Lisle Date: Thu, 16 Apr 2026 19:32:58 +0100 Subject: [PATCH 2/4] add tests for --ignore-link flag - Update all CheckLinks and RunLinkChecks call sites to pass the new Options parameter - Add TestIsIgnored with 8 table-driven cases covering empty patterns, exact match, case-insensitive matching, partial substrings, multi-pattern scenarios - Add TestCheckLinks_IgnorePatterns covering ignored URLs skipped entirely, non-ignored URLs still checked, mixed scenarios, and case-insensitive matching - Add TestRunLinkChecks_WithIgnorePatterns and TestRunAllChecks_WithIgnoreLinks in the orchestrate package to verify patterns propagate end-to-end --- cmd/cmd_test.go | 4 +- links/check_test.go | 148 +++++++++++++++++++++++++++++--- links/safenet_test.go | 4 +- orchestrate/orchestrate_test.go | 100 ++++++++++++++++++++- 4 files changed, 238 insertions(+), 18 deletions(-) diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go index bb1a468..c45ca15 100644 --- a/cmd/cmd_test.go +++ b/cmd/cmd_test.go @@ -112,7 +112,7 @@ func TestValidateLinks_ValidSkill(t *testing.T) { } // External link checks: valid-skill has no HTTP links, so no results - linkResults := links.CheckLinks(t.Context(), dir, s.Body) + linkResults := links.CheckLinks(t.Context(), dir, s.Body, links.Options{}) if linkResults != nil { t.Errorf("expected nil for skill with no HTTP links, got %d results", len(linkResults)) } @@ -139,7 +139,7 @@ func TestValidateLinks_InvalidSkill(t *testing.T) { } // External link checks: invalid-skill has an HTTP link - linkResults := links.CheckLinks(t.Context(), dir, s.Body) + linkResults := links.CheckLinks(t.Context(), dir, s.Body, links.Options{}) if len(linkResults) == 0 { t.Error("expected at least one external link check result") } diff --git a/links/check_test.go b/links/check_test.go index 3d05a72..0e095e2 100644 --- a/links/check_test.go +++ b/links/check_test.go @@ -50,7 +50,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("relative-only links returns nil", func(t *testing.T) { dir := t.TempDir() body := "See [guide](references/guide.md)." - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) if results != nil { t.Errorf("expected nil for relative-only links, got %v", results) } @@ -59,7 +59,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("mailto and anchors are skipped", func(t *testing.T) { dir := t.TempDir() body := "[email](mailto:user@example.com) and [section](#heading)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) if results != nil { t.Errorf("expected nil for mailto/anchor links, got %v", results) } @@ -68,7 +68,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("template URLs are skipped", func(t *testing.T) { dir := t.TempDir() body := "[PR](https://github.com/{OWNER}/{REPO}/pull/{PR}) and https://api.example.com/{version}/users/{id}" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) if results != nil { t.Errorf("expected nil for template URLs, got %v", results) } @@ -77,7 +77,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("no links returns nil", func(t *testing.T) { dir := t.TempDir() body := "No links here." - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) if results != nil { t.Errorf("expected nil for no links, got %v", results) } @@ -124,42 +124,42 @@ func TestCheckLinks_HTTP(t *testing.T) { t.Run("successful HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[ok](" + server.URL + "/ok)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) requireResultContaining(t, results, types.Pass, "HTTP 200") }) t.Run("404 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[missing](" + server.URL + "/not-found)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) requireResultContaining(t, results, types.Error, "HTTP 404") }) t.Run("403 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[blocked](" + server.URL + "/forbidden)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) requireResultContaining(t, results, types.Info, "HTTP 403") }) t.Run("500 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[error](" + server.URL + "/server-error)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) requireResultContaining(t, results, types.Error, "HTTP 500") }) t.Run("HEAD 404 falls back to GET 200", func(t *testing.T) { dir := t.TempDir() body := "[spa](" + server.URL + "/head-404-get-200)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) requireResultContaining(t, results, types.Pass, "HTTP 200") }) t.Run("HEAD 405 falls back to GET 200", func(t *testing.T) { dir := t.TempDir() body := "[nohead](" + server.URL + "/head-405-get-200)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) requireResultContaining(t, results, types.Pass, "HTTP 200") }) @@ -167,7 +167,7 @@ func TestCheckLinks_HTTP(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/guide.md", "content") body := "[guide](references/guide.md) and [site](" + server.URL + "/ok)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) if len(results) != 1 { t.Fatalf("expected 1 result (HTTP only), got %d", len(results)) } @@ -325,3 +325,129 @@ func TestCheckHTTPLink(t *testing.T) { requireContains(t, result.Message, "invalid URL") }) } + +func TestIsIgnored(t *testing.T) { + tests := []struct { + name string + url string + patterns []string + want bool + }{ + { + name: "empty patterns never ignores", + url: "https://example.com/page", + patterns: nil, + want: false, + }, + { + name: "exact domain match", + url: "https://github.com/myorg/private-repo", + patterns: []string{"github.com/myorg"}, + want: true, + }, + { + name: "case-insensitive match", + url: "https://GitHub.COM/MyOrg/Repo", + patterns: []string{"github.com/myorg"}, + want: true, + }, + { + name: "partial URL substring match", + url: "https://internal.company.com/api/v1/resource", + patterns: []string{"internal.company.com"}, + want: true, + }, + { + name: "no match", + url: "https://public.example.com/page", + patterns: []string{"private.example.com"}, + want: false, + }, + { + name: "first of multiple patterns matches", + url: "https://github.com/10gen/secret", + patterns: []string{"github.com/10gen", "localhost"}, + want: true, + }, + { + name: "second of multiple patterns matches", + url: "http://localhost:8080/health", + patterns: []string{"github.com/10gen", "localhost"}, + want: true, + }, + { + name: "no pattern matches among multiple", + url: "https://public.docs.com/guide", + patterns: []string{"github.com/10gen", "localhost", "internal.corp"}, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isIgnored(tt.url, tt.patterns) + if got != tt.want { + t.Errorf("isIgnored(%q, %v) = %v, want %v", tt.url, tt.patterns, got, tt.want) + } + }) + } +} + +func TestCheckLinks_IgnorePatterns(t *testing.T) { + orig := newHTTPClient + newHTTPClient = func() *http.Client { return testHTTPClient() } + t.Cleanup(func() { newHTTPClient = orig }) + + mux := http.NewServeMux() + mux.HandleFunc("/ok", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + mux.HandleFunc("/not-found", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + server := httptest.NewServer(mux) + defer server.Close() + + t.Run("ignored URL is not checked", func(t *testing.T) { + dir := t.TempDir() + body := "[broken](" + server.URL + "/not-found)" + opts := Options{IgnorePatterns: []string{server.URL}} + results := CheckLinks(t.Context(), dir, body, opts) + // All HTTP links were ignored, so CheckLinks returns nil + if results != nil { + t.Errorf("expected nil when all links are ignored, got %v", results) + } + }) + + t.Run("non-ignored URL is still checked", func(t *testing.T) { + dir := t.TempDir() + body := "[ok](" + server.URL + "/ok)" + opts := Options{IgnorePatterns: []string{"some-other-host.example.com"}} + results := CheckLinks(t.Context(), dir, body, opts) + requireResultContaining(t, results, types.Pass, "HTTP 200") + }) + + t.Run("mixed: ignored URL skipped, other URL checked", func(t *testing.T) { + dir := t.TempDir() + // Two links: one that would fail (ignored) and one that passes (not ignored) + body := "[broken](" + server.URL + "/not-found) and [ok](" + server.URL + "/ok)" + // Ignore only the not-found path; the /ok path will still be checked. + // We use a pattern that only matches /not-found. + opts := Options{IgnorePatterns: []string{server.URL + "/not-found"}} + results := CheckLinks(t.Context(), dir, body, opts) + if len(results) != 1 { + t.Fatalf("expected exactly 1 result (only non-ignored link checked), got %d: %v", len(results), results) + } + requireResultContaining(t, results, types.Pass, "HTTP 200") + }) + + t.Run("case-insensitive pattern matching", func(t *testing.T) { + dir := t.TempDir() + body := "[broken](" + server.URL + "/not-found)" + opts := Options{IgnorePatterns: []string{strings.ToUpper(server.URL)}} + results := CheckLinks(t.Context(), dir, body, opts) + if results != nil { + t.Errorf("expected nil when link matches case-insensitive pattern, got %v", results) + } + }) +} diff --git a/links/safenet_test.go b/links/safenet_test.go index e0c672e..775921f 100644 --- a/links/safenet_test.go +++ b/links/safenet_test.go @@ -51,7 +51,7 @@ func TestSafeTransportBlocksPrivateIPs(t *testing.T) { // We don't need a server running; the dialer should refuse before connecting. dir := t.TempDir() body := "[metadata](http://169.254.169.254/latest/meta-data/)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) if len(results) == 0 { t.Fatal("expected a result for blocked private IP link") } @@ -65,7 +65,7 @@ func TestSafeTransportBlocksPrivateIPs(t *testing.T) { func TestSafeTransportBlocksLocalhost(t *testing.T) { dir := t.TempDir() body := "[local](http://127.0.0.1:8080/admin)" - results := CheckLinks(t.Context(), dir, body) + results := CheckLinks(t.Context(), dir, body, Options{}) if len(results) == 0 { t.Fatal("expected a result for blocked localhost link") } diff --git a/orchestrate/orchestrate_test.go b/orchestrate/orchestrate_test.go index 64d7432..fddaba1 100644 --- a/orchestrate/orchestrate_test.go +++ b/orchestrate/orchestrate_test.go @@ -8,6 +8,7 @@ import ( "strings" "testing" + "github.com/agent-ecosystem/skill-validator/links" "github.com/agent-ecosystem/skill-validator/report" "github.com/agent-ecosystem/skill-validator/skillcheck" "github.com/agent-ecosystem/skill-validator/structure" @@ -605,7 +606,7 @@ func TestRunContentAnalysis_NoReferencesContamination(t *testing.T) { func TestRunLinkChecks_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - r := RunLinkChecks(t.Context(), dir) + r := RunLinkChecks(t.Context(), dir, links.Options{}) if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { @@ -627,7 +628,7 @@ func TestRunLinkChecks_ValidSkill(t *testing.T) { func TestRunLinkChecks_InvalidSkill(t *testing.T) { dir := fixtureDir(t, "invalid-skill") - r := RunLinkChecks(t.Context(), dir) + r := RunLinkChecks(t.Context(), dir, links.Options{}) if r.Errors == 0 { t.Error("expected errors for invalid skill with broken links") } @@ -635,7 +636,7 @@ func TestRunLinkChecks_InvalidSkill(t *testing.T) { func TestRunLinkChecks_BrokenDir(t *testing.T) { dir := t.TempDir() - r := RunLinkChecks(t.Context(), dir) + r := RunLinkChecks(t.Context(), dir, links.Options{}) if r.Errors != 1 { t.Errorf("expected 1 error, got %d", r.Errors) } @@ -853,3 +854,96 @@ func TestOutputJSON_PerFile_ValidSkill(t *testing.T) { t.Error("expected contamination_analysis in per-file report") } } + +// writeSkillFile creates a minimal valid SKILL.md in dir with the given body content. +func writeSkillFile(t *testing.T, dir, body string) { + t.Helper() + content := "---\nname: test-skill\ndescription: Test skill for link ignore patterns.\n---\n" + body + if err := os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte(content), 0o644); err != nil { + t.Fatal(err) + } +} + +func TestRunLinkChecks_WithIgnorePatterns(t *testing.T) { + t.Run("ignored URL produces no error", func(t *testing.T) { + dir := t.TempDir() + // Use an .invalid domain that would fail DNS resolution if actually requested. + writeSkillFile(t, dir, "See [private](https://private-repo.corp.invalid/secret) for details.") + opts := links.Options{IgnorePatterns: []string{"private-repo.corp.invalid"}} + r := RunLinkChecks(t.Context(), dir, opts) + if r.Errors != 0 { + t.Errorf("expected 0 errors when all links are ignored, got %d", r.Errors) + for _, res := range r.Results { + if res.Level == types.Error { + t.Logf(" error: %s: %s", res.Category, res.Message) + } + } + } + }) + + t.Run("empty ignore patterns checks links normally", func(t *testing.T) { + dir := t.TempDir() + // Use an .invalid domain — DNS will fail, which is an error. + writeSkillFile(t, dir, "See [broken](https://definitely-unreachable.invalid/path) for details.") + opts := links.Options{IgnorePatterns: nil} + r := RunLinkChecks(t.Context(), dir, opts) + if r.Errors == 0 { + t.Error("expected errors for unreachable link with no ignore patterns") + } + }) + + t.Run("multiple ignore patterns, all links matched", func(t *testing.T) { + dir := t.TempDir() + writeSkillFile(t, dir, + "[A](https://internal.corp.invalid/a) and [B](https://private.corp.invalid/b)") + opts := links.Options{IgnorePatterns: []string{"internal.corp.invalid", "private.corp.invalid"}} + r := RunLinkChecks(t.Context(), dir, opts) + if r.Errors != 0 { + t.Errorf("expected 0 errors when all links are ignored, got %d", r.Errors) + } + }) +} + +func TestRunAllChecks_WithIgnoreLinks(t *testing.T) { + t.Run("ignored link produces no error in full check", func(t *testing.T) { + dir := t.TempDir() + writeSkillFile(t, dir, "See [private](https://private-repo.corp.invalid/secret) for details.") + opts := Options{ + Enabled: AllGroups(), + LinksOpts: links.Options{ + IgnorePatterns: []string{"private-repo.corp.invalid"}, + }, + } + r := RunAllChecks(t.Context(), dir, opts) + for _, res := range r.Results { + if res.Category == "Links" && res.Level == types.Error { + t.Errorf("unexpected Links error (should have been ignored): %s", res.Message) + } + } + }) + + t.Run("ignore patterns propagate through RunAllChecks", func(t *testing.T) { + dir := t.TempDir() + writeSkillFile(t, dir, + "[A](https://internal.corp.invalid/a) and [B](https://other.corp.invalid/b)") + // Ignore only one; the other would fail if actually requested. + // Since both are .invalid domains, both would fail DNS — so ignore both. + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: false, + GroupLinks: true, + GroupContent: false, + GroupContamination: false, + }, + LinksOpts: links.Options{ + IgnorePatterns: []string{"internal.corp.invalid", "other.corp.invalid"}, + }, + } + r := RunAllChecks(t.Context(), dir, opts) + for _, res := range r.Results { + if res.Category == "Links" && res.Level == types.Error { + t.Errorf("unexpected Links error: %s", res.Message) + } + } + }) +} From 804eb90afd67176470dd2fd4918a53cc8f2511b0 Mon Sep 17 00:00:00 2001 From: Bianca Lisle Date: Thu, 16 Apr 2026 19:53:44 +0100 Subject: [PATCH 3/4] use fixture dirs and add integration tests for --ignore-link Replace the ad-hoc writeSkillFile helper with the existing invalid-skill fixture (which has httpstat.us/404 as its only HTTP link). Tests now follow the same pattern as the rest of the orchestrate package. Add --ignore-link integration tests to TestSliceFlags covering the repeated flag form, the comma-separated form, and the flag on the check subcommand, all consistent with how --only/--skip/--allow-dirs are tested. --- cmd/exitcode_integration_test.go | 18 ++++++++ orchestrate/orchestrate_test.go | 72 +++++++------------------------- 2 files changed, 32 insertions(+), 58 deletions(-) diff --git a/cmd/exitcode_integration_test.go b/cmd/exitcode_integration_test.go index 8b0d234..7315871 100644 --- a/cmd/exitcode_integration_test.go +++ b/cmd/exitcode_integration_test.go @@ -172,6 +172,24 @@ func TestSliceFlags(t *testing.T) { wantStdout: "unknown directory: testing/", noStdout: "unknown directory: evals/", }, + // --ignore-link on validate links: repeated flag + { + name: "ignore-link repeated flag skips matched URL", + args: []string{"validate", "links", "--ignore-link=httpstat.us", fixture(t, "invalid-skill")}, + wantCode: 0, + }, + // --ignore-link on validate links: comma-separated + { + name: "ignore-link comma-separated skips matched URL", + args: []string{"validate", "links", "--ignore-link=httpstat.us,other.example.com", fixture(t, "invalid-skill")}, + wantCode: 0, + }, + // --ignore-link on check --only=links + { + name: "check --only=links --ignore-link skips matched URL", + args: []string{"check", "--only=links", "--ignore-link=httpstat.us", fixture(t, "invalid-skill")}, + wantCode: 0, + }, } for _, tt := range tests { diff --git a/orchestrate/orchestrate_test.go b/orchestrate/orchestrate_test.go index fddaba1..0060d7d 100644 --- a/orchestrate/orchestrate_test.go +++ b/orchestrate/orchestrate_test.go @@ -855,24 +855,15 @@ func TestOutputJSON_PerFile_ValidSkill(t *testing.T) { } } -// writeSkillFile creates a minimal valid SKILL.md in dir with the given body content. -func writeSkillFile(t *testing.T, dir, body string) { - t.Helper() - content := "---\nname: test-skill\ndescription: Test skill for link ignore patterns.\n---\n" + body - if err := os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte(content), 0o644); err != nil { - t.Fatal(err) - } -} - func TestRunLinkChecks_WithIgnorePatterns(t *testing.T) { - t.Run("ignored URL produces no error", func(t *testing.T) { - dir := t.TempDir() - // Use an .invalid domain that would fail DNS resolution if actually requested. - writeSkillFile(t, dir, "See [private](https://private-repo.corp.invalid/secret) for details.") - opts := links.Options{IgnorePatterns: []string{"private-repo.corp.invalid"}} + // invalid-skill has https://httpstat.us/404 as its only HTTP link. + dir := fixtureDir(t, "invalid-skill") + + t.Run("ignored URL produces no errors", func(t *testing.T) { + opts := links.Options{IgnorePatterns: []string{"httpstat.us"}} r := RunLinkChecks(t.Context(), dir, opts) if r.Errors != 0 { - t.Errorf("expected 0 errors when all links are ignored, got %d", r.Errors) + t.Errorf("expected 0 errors when matching URL is ignored, got %d", r.Errors) for _, res := range r.Results { if res.Level == types.Error { t.Logf(" error: %s: %s", res.Category, res.Message) @@ -881,53 +872,20 @@ func TestRunLinkChecks_WithIgnorePatterns(t *testing.T) { } }) - t.Run("empty ignore patterns checks links normally", func(t *testing.T) { - dir := t.TempDir() - // Use an .invalid domain — DNS will fail, which is an error. - writeSkillFile(t, dir, "See [broken](https://definitely-unreachable.invalid/path) for details.") - opts := links.Options{IgnorePatterns: nil} + t.Run("non-matching pattern leaves link checked", func(t *testing.T) { + opts := links.Options{IgnorePatterns: []string{"some-other-host.example.com"}} r := RunLinkChecks(t.Context(), dir, opts) if r.Errors == 0 { - t.Error("expected errors for unreachable link with no ignore patterns") - } - }) - - t.Run("multiple ignore patterns, all links matched", func(t *testing.T) { - dir := t.TempDir() - writeSkillFile(t, dir, - "[A](https://internal.corp.invalid/a) and [B](https://private.corp.invalid/b)") - opts := links.Options{IgnorePatterns: []string{"internal.corp.invalid", "private.corp.invalid"}} - r := RunLinkChecks(t.Context(), dir, opts) - if r.Errors != 0 { - t.Errorf("expected 0 errors when all links are ignored, got %d", r.Errors) + t.Error("expected errors when ignore pattern does not match the broken link") } }) } func TestRunAllChecks_WithIgnoreLinks(t *testing.T) { - t.Run("ignored link produces no error in full check", func(t *testing.T) { - dir := t.TempDir() - writeSkillFile(t, dir, "See [private](https://private-repo.corp.invalid/secret) for details.") - opts := Options{ - Enabled: AllGroups(), - LinksOpts: links.Options{ - IgnorePatterns: []string{"private-repo.corp.invalid"}, - }, - } - r := RunAllChecks(t.Context(), dir, opts) - for _, res := range r.Results { - if res.Category == "Links" && res.Level == types.Error { - t.Errorf("unexpected Links error (should have been ignored): %s", res.Message) - } - } - }) + // invalid-skill has https://httpstat.us/404 as its only HTTP link. + dir := fixtureDir(t, "invalid-skill") - t.Run("ignore patterns propagate through RunAllChecks", func(t *testing.T) { - dir := t.TempDir() - writeSkillFile(t, dir, - "[A](https://internal.corp.invalid/a) and [B](https://other.corp.invalid/b)") - // Ignore only one; the other would fail if actually requested. - // Since both are .invalid domains, both would fail DNS — so ignore both. + t.Run("ignored link produces no Links errors", func(t *testing.T) { opts := Options{ Enabled: map[CheckGroup]bool{ GroupStructure: false, @@ -935,14 +893,12 @@ func TestRunAllChecks_WithIgnoreLinks(t *testing.T) { GroupContent: false, GroupContamination: false, }, - LinksOpts: links.Options{ - IgnorePatterns: []string{"internal.corp.invalid", "other.corp.invalid"}, - }, + LinksOpts: links.Options{IgnorePatterns: []string{"httpstat.us"}}, } r := RunAllChecks(t.Context(), dir, opts) for _, res := range r.Results { if res.Category == "Links" && res.Level == types.Error { - t.Errorf("unexpected Links error: %s", res.Message) + t.Errorf("unexpected Links error (should have been ignored): %s", res.Message) } } }) From 334f0a62ffb64285178117a3e99de3271b582625 Mon Sep 17 00:00:00 2001 From: Bianca Lisle Date: Thu, 16 Apr 2026 20:09:27 +0100 Subject: [PATCH 4/4] document --ignore-link flag in README --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 15d339c..c6cced2 100644 --- a/README.md +++ b/README.md @@ -227,10 +227,16 @@ Result: passed ``` skill-validator validate links +skill-validator validate links --ignore-link=github.com/myorg +skill-validator validate links --ignore-link=github.com/myorg --ignore-link=localhost ``` Validates external (HTTP/HTTPS) links in SKILL.md. Internal (relative) links are checked by `validate structure`. +| Flag | Effect | +|---|---| +| `--ignore-link=` | Skip URLs whose lowercase form contains the pattern (case-insensitive substring match). Repeatable or comma-separated. Useful for private repos and localhost URLs that are unreachable from CI. | + ### analyze content ``` @@ -296,6 +302,7 @@ skill-validator check --strict skill-validator check --allow-extra-frontmatter skill-validator check --allow-flat-layouts skill-validator check --allow-dirs=evals,testing +skill-validator check --ignore-link=github.com/myorg,localhost ``` Runs all checks (structure + links + content + contamination). @@ -310,6 +317,7 @@ Runs all checks (structure + links + content + contamination). | `--allow-extra-frontmatter` | Suppress warnings for non-spec frontmatter fields | | `--allow-flat-layouts` | Allow files at the skill root without warnings (see [Flat skill layouts](#flat-skill-layouts)) | | `--allow-dirs=evals,testing` | Accept specific non-standard directories without warnings (see [Allowing non-standard directories](#allowing-non-standard-directories)) | +| `--ignore-link=` | Skip URLs whose lowercase form contains the pattern during link validation. Repeatable or comma-separated (e.g. `--ignore-link=github.com/myorg,localhost`) | Valid check groups: `structure`, `links`, `content`, `contamination`. @@ -764,6 +772,7 @@ Directories not in the allow list still produce the standard warning with file c - Checks external (HTTP/HTTPS) links only -- internal (relative) links are validated by `validate structure` - HTTP/HTTPS links are verified with a HEAD request (10s timeout, concurrent checks) - Template URLs using [RFC 6570](https://www.rfc-editor.org/rfc/rfc6570) syntax are skipped (e.g. `https://github.com/{OWNER}/{REPO}/pull/{PR}`) +- Use `--ignore-link=` to skip URLs matching a case-insensitive substring (e.g. `--ignore-link=github.com/myorg` skips all links to that org's repos). The flag is repeatable and also accepts comma-separated patterns. This is useful for private repositories or localhost URLs that are unreachable from CI but valid in local development. > [!TIP] > HTTP 403 responses are reported as `info` rather than errors, since many sites (e.g. doi.org, science.org, mathworks.com) block automated HEAD requests while working fine in browsers. A 403 doesn't necessarily mean the link is broken -- but it does mean the validator couldn't verify it. If your skill includes 403-flagged links, keep in mind that sites blocking the validator's requests may also block requests from LLM agents. If an agent can't access a linked resource, the link wastes context without providing value. Where possible, consider providing the content directly in `references/` rather than linking to it, or offer an alternate source that doesn't restrict automated access. If the links are for human readers rather than agent use, consider removing them from the skill entirely.