From 4d56e208e30edbce3b6725d7e3df83e5a127e5c0 Mon Sep 17 00:00:00 2001 From: TristanInSec Date: Fri, 10 Apr 2026 17:34:46 -0400 Subject: [PATCH] fix(datasource): restrict maven registries parsed from pom.xml Registries added through MavenRegistryAPIClient.AddRegistry typically originate from blocks inside pom.xml files, which are attacker-controlled input when scanning third-party projects. Two tightenings: - Validate untrusted registry URLs: require http/https, and reject hosts that resolve to loopback, private, link-local, unspecified, or multicast addresses. The default registry passed to NewMavenRegistryAPIClient is exempt so users can still point osv-scanner at internal Artifactory/Nexus mirrors. - Only attach settings.xml credentials to the default registry. Registries added later via AddRegistry are marked TrustedForAuth=false and receive no auth, so a pom.xml-declared repository cannot pull credentials scoped to an unrelated host. Unit tests cover the scheme/host rejection paths and the credential-gating behaviour in getProject, getVersionMetadata, and getArtifactMetadata. --- internal/datasource/maven_registry.go | 86 +++++++++++++++++++++- internal/datasource/maven_registry_test.go | 86 ++++++++++++++++++++++ 2 files changed, 169 insertions(+), 3 deletions(-) diff --git a/internal/datasource/maven_registry.go b/internal/datasource/maven_registry.go index 0f67d148c07..0b49a4d9cc6 100644 --- a/internal/datasource/maven_registry.go +++ b/internal/datasource/maven_registry.go @@ -7,7 +7,9 @@ import ( "errors" "fmt" "io" + "net" "net/http" + "net/netip" "net/url" "slices" "strings" @@ -22,6 +24,10 @@ import ( const MavenCentral = "https://repo.maven.apache.org/maven2" var errAPIFailed = errors.New("API query failed") +var errUntrustedRegistry = errors.New("untrusted Maven registry URL") + +// lookupHost is indirected so tests can stub DNS resolution. +var lookupHost = net.LookupHost type MavenRegistryAPIClient struct { defaultRegistry MavenRegistry // The default registry that we are making requests @@ -47,6 +53,50 @@ type MavenRegistry struct { ID string ReleasesEnabled bool SnapshotsEnabled bool + + // TrustedForAuth indicates the registry URL is from a trusted source + // (e.g. a user-supplied CLI flag) and is allowed to receive credentials + // looked up from settings.xml. Registries sourced from parsed pom.xml + // files must never set this, otherwise a hostile pom.xml can redirect + // requests and steal the matching credentials. + TrustedForAuth bool +} + +// validateUntrustedRegistryURL rejects URLs that either use a non-http(s) +// scheme or point at private/loopback/link-local/unspecified/multicast +// addresses. It is only applied to registries added from pom.xml files +// (via AddRegistry). The default registry passed to +// NewMavenRegistryAPIClient is intentionally exempt so enterprise users +// can still point osv-scanner at internal Artifactory/Nexus instances. +func validateUntrustedRegistryURL(u *url.URL) error { + if u == nil { + return fmt.Errorf("%w: nil URL", errUntrustedRegistry) + } + scheme := strings.ToLower(u.Scheme) + if scheme != "http" && scheme != "https" { + return fmt.Errorf("%w: scheme %q not allowed", errUntrustedRegistry, u.Scheme) + } + host := u.Hostname() + if host == "" { + return fmt.Errorf("%w: empty host", errUntrustedRegistry) + } + // Resolve and reject any address that is not publicly routable. + addrs, err := lookupHost(host) + if err != nil { + return fmt.Errorf("%w: resolving %q: %w", errUntrustedRegistry, host, err) + } + for _, a := range addrs { + ip, err := netip.ParseAddr(a) + if err != nil { + return fmt.Errorf("%w: parsing resolved address %q: %w", errUntrustedRegistry, a, err) + } + if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() || + ip.IsLinkLocalMulticast() || ip.IsUnspecified() || ip.IsMulticast() { + return fmt.Errorf("%w: host %q resolves to non-public address %s", errUntrustedRegistry, host, a) + } + } + + return nil } func NewMavenRegistryAPIClient(registry MavenRegistry) (*MavenRegistryAPIClient, error) { @@ -59,6 +109,9 @@ func NewMavenRegistryAPIClient(registry MavenRegistry) (*MavenRegistryAPIClient, return nil, fmt.Errorf("invalid Maven registry %s: %w", registry.URL, err) } registry.Parsed = u + // The default registry is user-configured (CLI flag or library caller) + // and is the only registry trusted to receive settings.xml credentials. + registry.TrustedForAuth = true // TODO: allow for manual specification of settings files globalSettings := ParseMavenSettings(globalMavenSettingsFile()) @@ -84,6 +137,14 @@ func (m *MavenRegistryAPIClient) WithoutRegistries() *MavenRegistryAPIClient { } // AddRegistry adds the given registry to the list of registries if it has not been added. +// +// Registries added through this path are treated as untrusted input: they +// typically originate from entries in parsed pom.xml files, +// which an attacker can control. The URL is validated against the +// untrusted-registry policy (http(s) only, no private/loopback targets) and +// the TrustedForAuth flag is force-cleared so that credentials from +// settings.xml are never attached to requests made against these +// registries. func (m *MavenRegistryAPIClient) AddRegistry(registry MavenRegistry) error { for _, reg := range m.registries { if reg.ID == registry.ID { @@ -96,7 +157,14 @@ func (m *MavenRegistryAPIClient) AddRegistry(registry MavenRegistry) error { return err } + if err := validateUntrustedRegistryURL(u); err != nil { + return fmt.Errorf("refusing to add Maven registry %q: %w", registry.URL, err) + } + registry.Parsed = u + // Credentials looked up from settings.xml must never flow to a + // registry defined by untrusted pom.xml content. + registry.TrustedForAuth = false m.registries = append(m.registries, registry) return nil @@ -179,7 +247,7 @@ func (m *MavenRegistryAPIClient) getProject(ctx context.Context, registry MavenR u := registry.Parsed.JoinPath(strings.ReplaceAll(groupID, ".", "/"), artifactID, version, fmt.Sprintf("%s-%s.pom", artifactID, snapshot)).String() var project maven.Project - if err := m.get(ctx, m.registryAuths[registry.ID], u, &project); err != nil { + if err := m.get(ctx, m.authFor(registry), u, &project); err != nil { return maven.Project{}, err } @@ -191,7 +259,7 @@ func (m *MavenRegistryAPIClient) getVersionMetadata(ctx context.Context, registr u := registry.Parsed.JoinPath(strings.ReplaceAll(groupID, ".", "/"), artifactID, version, "maven-metadata.xml").String() var metadata maven.Metadata - if err := m.get(ctx, m.registryAuths[registry.ID], u, &metadata); err != nil { + if err := m.get(ctx, m.authFor(registry), u, &metadata); err != nil { return maven.Metadata{}, err } @@ -203,13 +271,25 @@ func (m *MavenRegistryAPIClient) getArtifactMetadata(ctx context.Context, regist u := registry.Parsed.JoinPath(strings.ReplaceAll(groupID, ".", "/"), artifactID, "maven-metadata.xml").String() var metadata maven.Metadata - if err := m.get(ctx, m.registryAuths[registry.ID], u, &metadata); err != nil { + if err := m.get(ctx, m.authFor(registry), u, &metadata); err != nil { return maven.Metadata{}, err } return metadata, nil } +// authFor returns the configured HTTP credentials for a registry only if +// the registry was marked trusted by NewMavenRegistryAPIClient. Registries +// added from parsed pom.xml files get nil, which makes auth.Get perform an +// unauthenticated request. +func (m *MavenRegistryAPIClient) authFor(registry MavenRegistry) *HTTPAuthentication { + if !registry.TrustedForAuth { + return nil + } + + return m.registryAuths[registry.ID] +} + func (m *MavenRegistryAPIClient) get(ctx context.Context, auth *HTTPAuthentication, apiURL string, dst any) error { resp, err := m.responses.Get(apiURL, func() (response, error) { resp, err := auth.Get(ctx, http.DefaultClient, apiURL) diff --git a/internal/datasource/maven_registry_test.go b/internal/datasource/maven_registry_test.go index 420383d4b55..e94d9fb09bd 100644 --- a/internal/datasource/maven_registry_test.go +++ b/internal/datasource/maven_registry_test.go @@ -243,6 +243,12 @@ func TestMultipleRegistry(t *testing.T) { `)) srv := testutility.NewMockHTTPServer(t) + // The untrusted-registry URL validator normally rejects loopback hosts, + // so stub the resolver to pretend 127.0.0.1 is a public address for the + // duration of this test. + origLookup := lookupHost + lookupHost = func(string) ([]string, error) { return []string{"203.0.113.1"}, nil } + t.Cleanup(func() { lookupHost = origLookup }) if err := client.AddRegistry(MavenRegistry{URL: srv.URL, ReleasesEnabled: true}); err != nil { t.Fatalf("failed to add registry %s: %v", srv.URL, err) } @@ -299,3 +305,83 @@ func TestMultipleRegistry(t *testing.T) { t.Errorf("GetVersions(%s, %s):\ngot %v\nwant %v\n", "org.example", "x.y.z", gotVersions, wantVersions) } } + +func TestAddRegistry_RejectsUntrustedURL(t *testing.T) { + t.Parallel() + + origLookup := lookupHost + t.Cleanup(func() { lookupHost = origLookup }) + + cases := []struct { + name string + url string + resolveTo []string + }{ + {name: "non-http scheme", url: "file:///etc/passwd", resolveTo: nil}, + {name: "ftp scheme", url: "ftp://example.com/repo", resolveTo: []string{"203.0.113.10"}}, + {name: "loopback literal", url: "http://127.0.0.1/repo", resolveTo: []string{"127.0.0.1"}}, + {name: "rfc1918 literal", url: "http://10.0.0.1/repo", resolveTo: []string{"10.0.0.1"}}, + {name: "link-local literal", url: "http://169.254.169.254/repo", resolveTo: []string{"169.254.169.254"}}, + {name: "dns-rebind to private", url: "http://evil.example.com/repo", resolveTo: []string{"192.168.1.1"}}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + lookupHost = func(string) ([]string, error) { return tc.resolveTo, nil } + client, err := NewMavenRegistryAPIClient(MavenRegistry{URL: "https://repo.maven.apache.org/maven2", ReleasesEnabled: true}) + if err != nil { + t.Fatalf("NewMavenRegistryAPIClient: %v", err) + } + err = client.AddRegistry(MavenRegistry{URL: tc.url, ID: "hostile"}) + if err == nil { + t.Fatalf("AddRegistry(%q) = nil, want error", tc.url) + } + if got := client.GetRegistries(); len(got) != 0 { + t.Errorf("registry was added despite validation failure: %+v", got) + } + }) + } +} + +func TestAddRegistry_ClearsTrustedForAuth(t *testing.T) { + t.Parallel() + + origLookup := lookupHost + lookupHost = func(string) ([]string, error) { return []string{"203.0.113.42"}, nil } + t.Cleanup(func() { lookupHost = origLookup }) + + client, err := NewMavenRegistryAPIClient(MavenRegistry{URL: "https://repo.maven.apache.org/maven2", ReleasesEnabled: true}) + if err != nil { + t.Fatalf("NewMavenRegistryAPIClient: %v", err) + } + + // The caller tries to smuggle in TrustedForAuth=true; AddRegistry must drop it. + if err := client.AddRegistry(MavenRegistry{URL: "https://attacker.example/repo", ID: "central", TrustedForAuth: true}); err != nil { + t.Fatalf("AddRegistry: %v", err) + } + regs := client.GetRegistries() + if len(regs) != 1 { + t.Fatalf("expected 1 added registry, got %d", len(regs)) + } + if regs[0].TrustedForAuth { + t.Errorf("AddRegistry left TrustedForAuth=true for an untrusted registry") + } +} + +func TestAuthFor_OnlyTrustedRegistriesReceiveCredentials(t *testing.T) { + t.Parallel() + + m := &MavenRegistryAPIClient{ + registryAuths: map[string]*HTTPAuthentication{ + "central": {Username: "u", Password: "p"}, + }, + } + trusted := MavenRegistry{ID: "central", TrustedForAuth: true} + if got := m.authFor(trusted); got == nil { + t.Errorf("authFor(trusted) = nil, want credentials") + } + untrusted := MavenRegistry{ID: "central", TrustedForAuth: false} + if got := m.authFor(untrusted); got != nil { + t.Errorf("authFor(untrusted) returned credentials, leak") + } +}