Skip to content
34 changes: 32 additions & 2 deletions bun/lib/dependabot/bun/metadata_finder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# frozen_string_literal: true

require "excon"
require "cgi"
require "sorbet-runtime"
require "time"

Expand All @@ -16,6 +17,13 @@ module Bun
class MetadataFinder < Dependabot::MetadataFinders::Base
extend T::Sig

# RFC 3986 unreserved characters safe in URI paths: A-Z, a-z, 0-9, ., _, -, ~
# Use an explicit ASCII character class because Ruby's \w is encoding-aware
# and can match non-ASCII word characters under UTF-8.
# Characters outside this set require percent-encoding in npm releaser profile URLs.
CHARS_REQUIRING_ENCODING = T.let(/[^A-Za-z0-9._~-]/, Regexp)
private_constant :CHARS_REQUIRING_ENCODING

sig { override.returns(T.nilable(String)) }
def homepage_url
# Attempt to use version_listing first, as fetching the entire listing
Expand All @@ -32,13 +40,27 @@ def maintainer_changes
return unless npm_listing.dig("time", dependency.version)
return if previous_releasers&.include?(npm_releaser)

# Safe: npm_releaser is non-nil after the guard clause above
encoded_releaser = encode_npm_releaser(T.must(npm_releaser))
"This version was pushed to npm by " \
"[#{npm_releaser}](https://www.npmjs.com/~#{npm_releaser}), a new " \
"[#{npm_releaser}](https://www.npmjs.com/~#{encoded_releaser}), a new " \
"releaser for #{dependency.name} since your current version."
end

private

# Encodes npm releaser names for safe inclusion in npmjs.com profile URLs.
# Optimization: Returns unmodified if all characters are RFC 3986 unreserved.
# Names with special characters (spaces, @, +, etc.) are percent-encoded.
sig { params(releaser: String).returns(String) }
def encode_npm_releaser(releaser)
# Early return for common case: most npm usernames contain only safe characters
return releaser unless releaser.match?(CHARS_REQUIRING_ENCODING)

# CGI.escape uses + for spaces; convert to %20 for proper URL encoding
CGI.escape(releaser).gsub("+", "%20")
end

sig { override.returns(T.nilable(Dependabot::Source)) }
def look_up_source
return find_source_from_registry if new_source.nil?
Expand Down Expand Up @@ -216,7 +238,13 @@ def dependency_url
new_source&.fetch(:url)
end

# Remove trailing slashes and escape spaces for proper URL formatting
# TODO: Remove URI::DEFAULT_PARSER.escape in favor of explicit space encoding (like npm_and_yarn).
# Currently, normalize_registry_url safely handles spaces for configured registries (new_source.nil?),
# but URI::DEFAULT_PARSER.escape remains here for the new_source case. This should be addressed in a
# separate concern when standardizing URL handling across all ecosystems.
# NOTE: URI::DEFAULT_PARSER.escape is deprecated and should be replaced with URI.encode_uri_component
# or a similar approach.
# URI::DEFAULT_PARSER.escape encodes many characters; then remove trailing slashes
registry_url = URI::DEFAULT_PARSER.escape(registry_url)&.gsub(%r{/+$}, "")

# NPM registries expect slashes to be escaped
Expand All @@ -236,6 +264,8 @@ def configured_registry_from_credentials
sig { params(registry: T.nilable(String)).returns(T.nilable(String)) }
def normalize_registry_url(registry)
return nil unless registry

registry = registry.strip.gsub(/\s+/, "%20")
return registry if registry.start_with?("http")

"https://#{registry}"
Expand Down
198 changes: 190 additions & 8 deletions bun/spec/dependabot/bun/metadata_finder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
requirements: [
{ file: "package.json", requirement: "^1.0", groups: [], source: nil }
],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand Down Expand Up @@ -72,7 +72,7 @@
ref: "master"
}
}],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand Down Expand Up @@ -298,7 +298,7 @@
url: "https://npm.fury.io/dependabot"
}
}],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand Down Expand Up @@ -395,7 +395,7 @@
}
}
],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand Down Expand Up @@ -437,7 +437,7 @@
}
}
],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand Down Expand Up @@ -513,7 +513,7 @@
groups: [],
source: nil
}],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand All @@ -525,6 +525,36 @@
)
end
end

context "when the maintainer name contains spaces" do
let(:dependency_name) { "npm-package-json-lint" }
let(:npm_url) { "https://registry.npmjs.org/npm-package-json-lint" }
let(:dependency) do
Dependabot::Dependency.new(
name: dependency_name,
version: "10.0.0",
previous_version: "9.0.0",
requirements: [{
file: "package.json",
requirement: "^10.0",
groups: [],
source: nil
}],
package_manager: "bun"
)
end
let(:npm_all_versions_response) do
fixture("npm_responses", "npm-package-json-lint.json")
end

it "properly URL-encodes the maintainer name in the link" do
expect(maintainer_changes).to eq(
"This version was pushed to npm by " \
"[GitHub Actions](https://www.npmjs.com/~GitHub%20Actions), a new releaser " \
"for npm-package-json-lint since your current version."
)
end
end
end

describe "#dependency_url" do
Expand All @@ -538,7 +568,7 @@
requirements: [
{ file: "package.json", requirement: "^1.0", groups: [], source: nil }
],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand Down Expand Up @@ -646,7 +676,7 @@
source: { type: "registry", url: "https://npm.fury.io/dependabot" }
}
],
package_manager: "npm_and_yarn"
package_manager: "bun"
)
end

Expand All @@ -666,4 +696,156 @@
end
end
end

describe "#encode_npm_releaser (private helper)" do
subject(:encoded) { finder.send(:encode_npm_releaser, releaser_name) }

context "with safe characters only (no encoding needed)" do
let(:releaser_name) { "dougwilson" }

it "returns the name unmodified" do
expect(encoded).to eq("dougwilson")
end
end

context "with safe characters including dot, underscore, dash" do
let(:releaser_name) { "user.name_test-pkg" }

it "returns the name unmodified" do
expect(encoded).to eq("user.name_test-pkg")
end
end

context "with space character" do
let(:releaser_name) { "GitHub Actions" }

it "encodes space as %20" do
expect(encoded).to eq("GitHub%20Actions")
end
end

context "with @ symbol (common in email-like usernames)" do
let(:releaser_name) { "user@domain" }

it "encodes @ as %40" do
expect(encoded).to eq("user%40domain")
end
end

context "with + symbol" do
let(:releaser_name) { "user+admin" }

it "encodes + as %2B" do
expect(encoded).to eq("user%2Badmin")
end
end

context "with / symbol" do
let(:releaser_name) { "scope/user" }

it "encodes / as %2F" do
expect(encoded).to eq("scope%2Fuser")
end
end

context "with mixed special characters" do
let(:releaser_name) { "user@host+admin" }

it "encodes all unsafe characters" do
expect(encoded).to eq("user%40host%2Badmin")
end
end

context "with empty string" do
let(:releaser_name) { "" }

it "returns empty string unchanged" do
expect(encoded).to eq("")
end
end

context "with numeric-only name" do
let(:releaser_name) { "12345" }

it "returns unchanged (digits are safe)" do
expect(encoded).to eq("12345")
end
end
end

describe "#normalize_registry_url (private helper)" do
subject(:normalized) { finder.send(:normalize_registry_url, registry_url) }

context "with nil input" do
let(:registry_url) { nil }

it "returns nil" do
expect(normalized).to be_nil
end
end

context "with registry URL without protocol" do
let(:registry_url) { "my.registry.com" }

it "adds https:// prefix" do
expect(normalized).to eq("https://my.registry.com")
end
end

context "with registry URL with https protocol" do
let(:registry_url) { "https://my.registry.com" }

it "returns unchanged" do
expect(normalized).to eq("https://my.registry.com")
end
end

context "with registry URL with http protocol" do
let(:registry_url) { "http://my.registry.com" }

it "returns unchanged" do
expect(normalized).to eq("http://my.registry.com")
end
end

context "with registry URL containing spaces" do
let(:registry_url) { "https://my registry.com" }

it "encodes spaces as %20" do
expect(normalized).to eq("https://my%20registry.com")
end
end

context "with registry URL containing multiple spaces" do
let(:registry_url) { "https://my registry com" }

it "encodes consecutive spaces as single %20" do
expect(normalized).to eq("https://my%20registry%20com")
end
end

context "with registry URL with leading/trailing whitespace" do
let(:registry_url) { " https://my.registry.com " }

it "strips whitespace and returns URL unchanged" do
expect(normalized).to eq("https://my.registry.com")
end
end

context "with registry URL with mixed whitespace (spaces and tabs)" do
let(:registry_url) { "https://my\t registry.com" }

it "encodes consecutive whitespace as single %20" do
expect(normalized).to eq("https://my%20registry.com")
end
end

context "with registry URL with nested spaces and no protocol" do
let(:registry_url) { "my registry.com" }

it "encodes spaces and adds https://" do
expect(normalized).to eq("https://my%20registry.com")
end
end
end
end
Loading
Loading