Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,16 @@ Result: passed

```
skill-validator validate links <path>
skill-validator validate links --ignore-link=github.com/myorg <path>
skill-validator validate links --ignore-link=github.com/myorg --ignore-link=localhost <path>
```

Validates external (HTTP/HTTPS) links in SKILL.md. Internal (relative) links are checked by `validate structure`.

| Flag | Effect |
|---|---|
| `--ignore-link=<pattern>` | Skip URLs whose lowercase form contains the pattern (case-insensitive substring match). Repeatable or comma-separated. Useful for private repos and localhost URLs that are unreachable from CI. |

### analyze content

```
Expand Down Expand Up @@ -296,6 +302,7 @@ skill-validator check --strict <path>
skill-validator check --allow-extra-frontmatter <path>
skill-validator check --allow-flat-layouts <path>
skill-validator check --allow-dirs=evals,testing <path>
skill-validator check --ignore-link=github.com/myorg,localhost <path>
```

Runs all checks (structure + links + content + contamination).
Expand All @@ -310,6 +317,7 @@ Runs all checks (structure + links + content + contamination).
| `--allow-extra-frontmatter` | Suppress warnings for non-spec frontmatter fields |
| `--allow-flat-layouts` | Allow files at the skill root without warnings (see [Flat skill layouts](#flat-skill-layouts)) |
| `--allow-dirs=evals,testing` | Accept specific non-standard directories without warnings (see [Allowing non-standard directories](#allowing-non-standard-directories)) |
| `--ignore-link=<pattern>` | Skip URLs whose lowercase form contains the pattern during link validation. Repeatable or comma-separated (e.g. `--ignore-link=github.com/myorg,localhost`) |

Valid check groups: `structure`, `links`, `content`, `contamination`.

Expand Down Expand Up @@ -764,6 +772,7 @@ Directories not in the allow list still produce the standard warning with file c
- Checks external (HTTP/HTTPS) links only -- internal (relative) links are validated by `validate structure`
- HTTP/HTTPS links are verified with a HEAD request (10s timeout, concurrent checks)
- Template URLs using [RFC 6570](https://www.rfc-editor.org/rfc/rfc6570) syntax are skipped (e.g. `https://github.com/{OWNER}/{REPO}/pull/{PR}`)
- Use `--ignore-link=<pattern>` to skip URLs matching a case-insensitive substring (e.g. `--ignore-link=github.com/myorg` skips all links to that org's repos). The flag is repeatable and also accepts comma-separated patterns. This is useful for private repositories or localhost URLs that are unreachable from CI but valid in local development.

> [!TIP]
> HTTP 403 responses are reported as `info` rather than errors, since many sites (e.g. doi.org, science.org, mathworks.com) block automated HEAD requests while working fine in browsers. A 403 doesn't necessarily mean the link is broken -- but it does mean the validator couldn't verify it. If your skill includes 403-flagged links, keep in mind that sites blocking the validator's requests may also block requests from LLM agents. If an agent can't access a linked resource, the link wastes context without providing value. Where possible, consider providing the content directly in `references/` rather than linking to it, or offer an alternate source that doesn't restrict automated access. If the links are for human readers rather than agent use, consider removing them from the skill entirely.
Expand Down
5 changes: 5 additions & 0 deletions cmd/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/spf13/cobra"

"github.com/agent-ecosystem/skill-validator/links"
"github.com/agent-ecosystem/skill-validator/orchestrate"
"github.com/agent-ecosystem/skill-validator/structure"
"github.com/agent-ecosystem/skill-validator/types"
Expand All @@ -15,6 +16,7 @@ import (
var (
checkOnly []string
checkSkip []string
checkIgnoreLinks []string
perFileCheck bool
checkSkipOrphans bool
strictCheck bool
Expand Down Expand Up @@ -44,6 +46,8 @@ func init() {
"allow files at the skill root without warnings and treat them as standard content for token counting")
checkCmd.Flags().StringSliceVar(&checkAllowDirs, "allow-dirs", nil,
"comma-separated list of directory names to accept without warnings (e.g. --allow-dirs=evals,testing)")
checkCmd.Flags().StringSliceVar(&checkIgnoreLinks, "ignore-link", nil,
"URL patterns to skip in link validation (case-insensitive substring match; repeatable or comma-separated, e.g. --ignore-link=github.com/myorg,localhost)")
rootCmd.AddCommand(checkCmd)
}

Expand Down Expand Up @@ -77,6 +81,7 @@ func runCheck(cmd *cobra.Command, args []string) error {
AllowFlatLayouts: checkAllowFlatLayouts,
AllowDirs: checkAllowDirs,
},
LinksOpts: links.Options{IgnorePatterns: checkIgnoreLinks},
}
eopts := exitOpts{strict: strictCheck}
ctx := context.Background()
Expand Down
4 changes: 2 additions & 2 deletions cmd/cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func TestValidateLinks_ValidSkill(t *testing.T) {
}

// External link checks: valid-skill has no HTTP links, so no results
linkResults := links.CheckLinks(t.Context(), dir, s.Body)
linkResults := links.CheckLinks(t.Context(), dir, s.Body, links.Options{})
if linkResults != nil {
t.Errorf("expected nil for skill with no HTTP links, got %d results", len(linkResults))
}
Expand All @@ -139,7 +139,7 @@ func TestValidateLinks_InvalidSkill(t *testing.T) {
}

// External link checks: invalid-skill has an HTTP link
linkResults := links.CheckLinks(t.Context(), dir, s.Body)
linkResults := links.CheckLinks(t.Context(), dir, s.Body, links.Options{})
if len(linkResults) == 0 {
t.Error("expected at least one external link check result")
}
Expand Down
18 changes: 18 additions & 0 deletions cmd/exitcode_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,24 @@ func TestSliceFlags(t *testing.T) {
wantStdout: "unknown directory: testing/",
noStdout: "unknown directory: evals/",
},
// --ignore-link on validate links: repeated flag
{
name: "ignore-link repeated flag skips matched URL",
args: []string{"validate", "links", "--ignore-link=httpstat.us", fixture(t, "invalid-skill")},
wantCode: 0,
},
// --ignore-link on validate links: comma-separated
{
name: "ignore-link comma-separated skips matched URL",
args: []string{"validate", "links", "--ignore-link=httpstat.us,other.example.com", fixture(t, "invalid-skill")},
wantCode: 0,
},
// --ignore-link on check --only=links
{
name: "check --only=links --ignore-link skips matched URL",
args: []string{"check", "--only=links", "--ignore-link=httpstat.us", fixture(t, "invalid-skill")},
wantCode: 0,
},
}

for _, tt := range tests {
Expand Down
10 changes: 8 additions & 2 deletions cmd/validate_links.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ import (

"github.com/spf13/cobra"

"github.com/agent-ecosystem/skill-validator/links"
"github.com/agent-ecosystem/skill-validator/orchestrate"
"github.com/agent-ecosystem/skill-validator/types"
)

var validateLinksIgnore []string

var validateLinksCmd = &cobra.Command{
Use: "links <path>",
Short: "Check external link validity (HTTP/HTTPS)",
Expand All @@ -18,6 +21,8 @@ var validateLinksCmd = &cobra.Command{
}

func init() {
validateLinksCmd.Flags().StringSliceVar(&validateLinksIgnore, "ignore-link", nil,
"URL patterns to skip (case-insensitive substring match; repeatable or comma-separated, e.g. --ignore-link=github.com/myorg,localhost)")
validateCmd.AddCommand(validateLinksCmd)
}

Expand All @@ -28,15 +33,16 @@ func runValidateLinks(cmd *cobra.Command, args []string) error {
}

ctx := context.Background()
lopts := links.Options{IgnorePatterns: validateLinksIgnore}

switch mode {
case types.SingleSkill:
r := orchestrate.RunLinkChecks(ctx, dirs[0])
r := orchestrate.RunLinkChecks(ctx, dirs[0], lopts)
return outputReport(r)
case types.MultiSkill:
mr := &types.MultiReport{}
for _, dir := range dirs {
r := orchestrate.RunLinkChecks(ctx, dir)
r := orchestrate.RunLinkChecks(ctx, dir, lopts)
mr.Skills = append(mr.Skills, r)
mr.Errors += r.Errors
mr.Warnings += r.Warnings
Expand Down
32 changes: 29 additions & 3 deletions links/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,23 @@ import (
"github.com/agent-ecosystem/skill-validator/types"
)

// Options configures external link validation.
type Options struct {
// IgnorePatterns is a list of substrings. Any URL that contains one of
// these strings (case-insensitive) is skipped without being checked.
// Useful for private repositories or localhost URLs that are unreachable
// from CI but valid in a local development context.
// Example: --ignore-link=github.com/myorg --ignore-link=localhost
IgnorePatterns []string
}

type linkResult struct {
url string
result types.Result
}

// CheckLinks validates external (HTTP/HTTPS) links in the skill body.
func CheckLinks(ctx context.Context, dir, body string) []types.Result {
func CheckLinks(ctx context.Context, dir, body string, opts Options) []types.Result {
rctx := types.ResultContext{Category: "Links", File: "SKILL.md"}
allLinks := ExtractLinks(body)
if len(allLinks) == 0 {
Expand All @@ -35,9 +45,13 @@ func CheckLinks(ctx context.Context, dir, body string) []types.Result {
if strings.Contains(link, "{") {
continue
}
if strings.HasPrefix(link, "http://") || strings.HasPrefix(link, "https://") {
httpLinks = append(httpLinks, link)
if !strings.HasPrefix(link, "http://") && !strings.HasPrefix(link, "https://") {
continue
}
if isIgnored(link, opts.IgnorePatterns) {
continue
}
httpLinks = append(httpLinks, link)
}

if len(httpLinks) == 0 {
Expand Down Expand Up @@ -111,6 +125,18 @@ func checkHTTPLinkGET(rctx types.ResultContext, client *http.Client, url string)
return classifyResponse(rctx, url, resp.StatusCode)
}

// isIgnored reports whether url contains any of the given patterns
// (case-insensitive substring match).
func isIgnored(url string, patterns []string) bool {
lower := strings.ToLower(url)
for _, p := range patterns {
if strings.Contains(lower, strings.ToLower(p)) {
return true
}
}
return false
}

func classifyResponse(rctx types.ResultContext, url string, statusCode int) types.Result {
if statusCode >= 200 && statusCode < 300 {
return rctx.Passf("%s (HTTP %d)", url, statusCode)
Expand Down
Loading
Loading