From 59885fa0776c9bcc2417f0fa6883f4088fccc2a5 Mon Sep 17 00:00:00 2001 From: Viktor Petersson Date: Sat, 31 Jan 2026 10:13:49 +0100 Subject: [PATCH 1/5] Use distribution platform as supplier instead of package author This refactors the enrichment module to set the supplier field to the distribution platform (PyPI, npm, crates.io, etc.) rather than the package author/maintainer. This better reflects NTIA semantics where "supplier" means the entity distributing the software. Changes: - Add PURL_TYPE_TO_SUPPLIER mapping in purl.py with 17 platform entries - Add get_supplier_for_purl() helper for unified supplier resolution - Update all enrichment sources to use centralized mapping: - pypi.py: "Python Package Index (PyPI)" - cratesio.py: "crates.io" - pubdev.py: "pub.dev" - conan.py: "Conan Center" - depsdev.py: Uses mapping based on PURL type - ecosystems.py: Uses mapping based on PURL type - Author info preserved in maintainer_name field (maps to SPDX originator) - Update tests to reflect new supplier behavior Co-Authored-By: Claude Opus 4.5 --- sbomify_action/_enrichment/sources/conan.py | 13 +++-- .../_enrichment/sources/cratesio.py | 9 ++-- sbomify_action/_enrichment/sources/depsdev.py | 16 ++++-- .../_enrichment/sources/ecosystems.py | 24 +++------ sbomify_action/_enrichment/sources/pubdev.py | 18 +++---- sbomify_action/_enrichment/sources/purl.py | 52 +++++++++++++++++++ sbomify_action/_enrichment/sources/pypi.py | 7 +-- tests/test_conan_source.py | 7 ++- tests/test_cratesio_source.py | 9 ++-- tests/test_enrichment_module.py | 36 +++++++++---- 10 files changed, 132 insertions(+), 59 deletions(-) diff --git a/sbomify_action/_enrichment/sources/conan.py b/sbomify_action/_enrichment/sources/conan.py index 44c96ba..dc32325 100644 --- a/sbomify_action/_enrichment/sources/conan.py +++ b/sbomify_action/_enrichment/sources/conan.py @@ -13,6 +13,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url +from .purl import PURL_TYPE_TO_SUPPLIER # Simple in-memory cache _cache: Dict[str, Optional[NormalizedMetadata]] = {} @@ -227,15 +228,17 @@ def _extract_metadata_from_graph(self, package_name: str, graph: Any) -> Optiona if repository_url: field_sources["repository_url"] = self.name - # Use author as supplier if available - supplier = author if author else None - if supplier: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name + + # Preserve author info as maintainer_name + maintainer_name = author if author else None metadata = NormalizedMetadata( description=description, licenses=licenses, - supplier=supplier, + supplier=PURL_TYPE_TO_SUPPLIER["conan"], + maintainer_name=maintainer_name, homepage=homepage, repository_url=repository_url, registry_url=f"https://conan.io/center/recipes/{package_name}", diff --git a/sbomify_action/_enrichment/sources/cratesio.py b/sbomify_action/_enrichment/sources/cratesio.py index 690efb3..6ce1972 100644 --- a/sbomify_action/_enrichment/sources/cratesio.py +++ b/sbomify_action/_enrichment/sources/cratesio.py @@ -11,6 +11,7 @@ from ..license_utils import normalize_license_list from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url +from .purl import PURL_TYPE_TO_SUPPLIER CRATESIO_API_BASE = "https://crates.io/api/v1/crates" DEFAULT_TIMEOUT = 10 # seconds @@ -161,8 +162,8 @@ def _normalize_response( field_sources["description"] = self.name if licenses: field_sources["licenses"] = self.name - if maintainer_name: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: @@ -174,9 +175,7 @@ def _normalize_response( description=description, licenses=licenses, license_texts=license_texts, - # supplier is the NTIA-required field; maintainer_name provides additional detail. - # For crates.io, the publisher (published_by) serves as both. - supplier=maintainer_name, + supplier=PURL_TYPE_TO_SUPPLIER["cargo"], homepage=homepage, repository_url=repository_url, documentation_url=documentation, diff --git a/sbomify_action/_enrichment/sources/depsdev.py b/sbomify_action/_enrichment/sources/depsdev.py index ac6c12e..1e2decb 100644 --- a/sbomify_action/_enrichment/sources/depsdev.py +++ b/sbomify_action/_enrichment/sources/depsdev.py @@ -12,6 +12,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import get_qualified_name +from .purl import PURL_TYPE_TO_SUPPLIER DEPSDEV_API_BASE = "https://api.deps.dev/v3" DEFAULT_TIMEOUT = 10 # seconds - deps.dev is generally fast @@ -108,7 +109,7 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali metadata = None if response.status_code == 200: data = response.json() - metadata = self._normalize_response(purl.name, data) + metadata = self._normalize_response(purl.name, purl.type, data) elif response.status_code == 404: logger.debug(f"Package not found in deps.dev: {purl}") else: @@ -131,12 +132,15 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali _cache[cache_key] = None return None - def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Optional[NormalizedMetadata]: + def _normalize_response( + self, package_name: str, purl_type: str, data: Dict[str, Any] + ) -> Optional[NormalizedMetadata]: """ Normalize deps.dev API response to NormalizedMetadata. Args: package_name: Name of the package + purl_type: PURL type (e.g., "pypi", "npm", "cargo") data: Raw deps.dev API response Returns: @@ -179,17 +183,23 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option if repository_url: repository_url = normalize_vcs_url(repository_url) + # Get supplier from PURL type mapping + supplier = PURL_TYPE_TO_SUPPLIER.get(purl_type) + # Build field_sources for attribution - field_sources = {} + field_sources: dict[str, str] = {} if licenses: field_sources["licenses"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: field_sources["repository_url"] = self.name + if supplier: + field_sources["supplier"] = self.name metadata = NormalizedMetadata( licenses=licenses, + supplier=supplier, homepage=homepage, repository_url=repository_url, source=self.name, diff --git a/sbomify_action/_enrichment/sources/ecosystems.py b/sbomify_action/_enrichment/sources/ecosystems.py index af8887a..1a566bb 100644 --- a/sbomify_action/_enrichment/sources/ecosystems.py +++ b/sbomify_action/_enrichment/sources/ecosystems.py @@ -11,6 +11,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import purl_to_string +from .purl import PURL_TYPE_TO_SUPPLIER ECOSYSTEMS_API_BASE = "https://packages.ecosyste.ms/api/v1" DEFAULT_TIMEOUT = 15 # seconds - ecosyste.ms can be slower @@ -87,9 +88,9 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali data = response.json() # API returns an array, take first result if isinstance(data, list) and len(data) > 0: - metadata = self._normalize_response(data[0]) + metadata = self._normalize_response(purl.type, data[0]) elif isinstance(data, dict): - metadata = self._normalize_response(data) + metadata = self._normalize_response(purl.type, data) else: logger.debug(f"No package data found in ecosyste.ms for: {purl_str}") elif response.status_code == 404: @@ -119,11 +120,12 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali _cache[cache_key] = None return None - def _normalize_response(self, data: Dict[str, Any]) -> Optional[NormalizedMetadata]: + def _normalize_response(self, purl_type: str, data: Dict[str, Any]) -> Optional[NormalizedMetadata]: """ Normalize ecosyste.ms API response to NormalizedMetadata. Args: + purl_type: PURL type (e.g., "pypi", "npm", "cargo") data: Raw ecosyste.ms API response Returns: @@ -151,20 +153,8 @@ def _normalize_response(self, data: Dict[str, Any]) -> Optional[NormalizedMetada maintainer_name = first_maintainer.get("name") or first_maintainer.get("login") maintainer_email = first_maintainer.get("email") - # Extract supplier from maintainer or repo owner - # NEVER use ecosystem name as supplier - "pypi", "npm", etc. are platforms, not suppliers - supplier = None - # Priority 1: Maintainer name or login (already extracted above) - if maintainer_name: - supplier = maintainer_name - # Priority 2: Repo owner name or login - elif data.get("repo_metadata") and data["repo_metadata"].get("owner"): - owner = data["repo_metadata"]["owner"] - if isinstance(owner, dict): - supplier = owner.get("name") or owner.get("login") - elif isinstance(owner, str): - supplier = owner - # Do NOT fall back to data["ecosystem"] - it's just the platform name + # Supplier is the distribution platform based on PURL type + supplier = PURL_TYPE_TO_SUPPLIER.get(purl_type) # Extract issue tracker URL from repo metadata issue_tracker_url = None diff --git a/sbomify_action/_enrichment/sources/pubdev.py b/sbomify_action/_enrichment/sources/pubdev.py index e32a356..693c7eb 100644 --- a/sbomify_action/_enrichment/sources/pubdev.py +++ b/sbomify_action/_enrichment/sources/pubdev.py @@ -12,6 +12,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import parse_author_string +from .purl import PURL_TYPE_TO_SUPPLIER PUBDEV_API_BASE = "https://pub.dev/api/packages" DEFAULT_TIMEOUT = 10 # seconds - pub.dev is generally fast @@ -134,8 +135,7 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option documentation_url = pubspec.get("documentation") issue_tracker_url = pubspec.get("issue_tracker") - # Extract publisher/author info using shared utility - supplier = None + # Extract author info for maintainer_name field maintainer_name = None maintainer_email = None @@ -143,17 +143,15 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option authors = pubspec.get("authors") if authors and isinstance(authors, list) and len(authors) > 0: maintainer_name, maintainer_email = parse_author_string(authors[0]) - supplier = maintainer_name elif pubspec.get("author"): maintainer_name, maintainer_email = parse_author_string(pubspec["author"]) - supplier = maintainer_name # Check for publisher in the top-level response (newer pub.dev API) - # Publisher takes precedence over author for supplier + # Use publisher ID as maintainer_name if available if data.get("publisher"): publisher_id = data["publisher"].get("publisherId") - if publisher_id: - supplier = publisher_id + if publisher_id and not maintainer_name: + maintainer_name = publisher_id logger.debug(f"Successfully fetched pub.dev metadata for: {package_name}") @@ -163,8 +161,8 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option field_sources["description"] = self.name if licenses: field_sources["licenses"] = self.name - if supplier: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: @@ -178,7 +176,7 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option description=description, licenses=licenses, license_texts=license_texts, - supplier=supplier, + supplier=PURL_TYPE_TO_SUPPLIER["pub"], homepage=homepage, repository_url=repository_url, documentation_url=documentation_url, diff --git a/sbomify_action/_enrichment/sources/purl.py b/sbomify_action/_enrichment/sources/purl.py index e5ba2cb..4ac397d 100644 --- a/sbomify_action/_enrichment/sources/purl.py +++ b/sbomify_action/_enrichment/sources/purl.py @@ -49,6 +49,58 @@ "chainguard": "Chainguard, Inc.", } +# Mapping of PURL type to distribution platform supplier name +# For language packages, the distribution platform (not the author) is the supplier +PURL_TYPE_TO_SUPPLIER: dict[str, str] = { + # Language package registries + "pypi": "Python Package Index (PyPI)", + "npm": "npm", + "cargo": "crates.io", + "maven": "Maven Central", + "gem": "RubyGems.org", + "nuget": "NuGet Gallery", + "golang": "Go Modules", + "pub": "pub.dev", + "conan": "Conan Center", + "composer": "Packagist", + "hex": "Hex.pm", + "cocoapods": "CocoaPods", + "conda": "Anaconda", + "hackage": "Hackage", + "swift": "Swift Package Registry", + # Container registries + "docker": "Docker Hub", + "oci": "OCI Registry", +} + + +def get_supplier_for_purl(purl: PackageURL) -> str | None: + """Get the appropriate supplier for a PURL. + + For OS packages (deb, rpm, apk), uses NAMESPACE_TO_SUPPLIER based on the + distribution namespace (e.g., debian, ubuntu, alpine). + + For language packages (pypi, npm, cargo, etc.), uses PURL_TYPE_TO_SUPPLIER + to return the distribution platform as the supplier. + + Args: + purl: Parsed PackageURL + + Returns: + Supplier name or None if not found + """ + # OS packages use namespace-based supplier (distribution name) + if purl.type in OS_PACKAGE_TYPES and purl.namespace: + supplier = NAMESPACE_TO_SUPPLIER.get(purl.namespace.lower()) + if supplier: + return supplier + # Fallback for unknown namespaces + return f"{purl.namespace.title()} Project" + + # Language packages use type-based supplier (platform name) + return PURL_TYPE_TO_SUPPLIER.get(purl.type) + + # Mapping of PURL type/namespace to package tracker URL templates PACKAGE_TRACKER_URLS: Dict[str, Dict[str, str]] = { "deb": { diff --git a/sbomify_action/_enrichment/sources/pypi.py b/sbomify_action/_enrichment/sources/pypi.py index 242bb45..ae0021b 100644 --- a/sbomify_action/_enrichment/sources/pypi.py +++ b/sbomify_action/_enrichment/sources/pypi.py @@ -12,6 +12,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import parse_author_string +from .purl import PURL_TYPE_TO_SUPPLIER PYPI_API_BASE = "https://pypi.org/pypi" DEFAULT_TIMEOUT = 10 # seconds - PyPI is fast @@ -169,8 +170,8 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Normal field_sources["description"] = self.name if licenses: field_sources["licenses"] = self.name - if maintainer_name: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: @@ -184,7 +185,7 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Normal description=info.get("summary"), licenses=licenses, license_texts=license_texts, - supplier=maintainer_name, # Use author/maintainer as supplier + supplier=PURL_TYPE_TO_SUPPLIER["pypi"], homepage=homepage, repository_url=repository_url, documentation_url=documentation_url, diff --git a/tests/test_conan_source.py b/tests/test_conan_source.py index 899d63c..ce1dd0d 100644 --- a/tests/test_conan_source.py +++ b/tests/test_conan_source.py @@ -191,7 +191,7 @@ def test_fetch_package_not_found(self, mock_session): assert metadata is None def test_fetch_with_author(self, mock_session): - """Test that author is used as supplier.""" + """Test that author is preserved as maintainer_name.""" source = ConanSource() purl = PackageURL.from_string("pkg:conan/testpkg@1.0.0") @@ -221,7 +221,10 @@ def test_fetch_with_author(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Test Author" + # Supplier is always the distribution platform + assert metadata.supplier == "Conan Center" + # Author is preserved as maintainer_name + assert metadata.maintainer_name == "Test Author" class TestConanSourceCaching: diff --git a/tests/test_cratesio_source.py b/tests/test_cratesio_source.py index 92e69d6..d43b23c 100644 --- a/tests/test_cratesio_source.py +++ b/tests/test_cratesio_source.py @@ -111,7 +111,7 @@ def test_fetch_success_with_version(self, mock_session): assert len(metadata.licenses) == 1 assert "MIT" in metadata.licenses[0] assert "Apache-2.0" in metadata.licenses[0] - assert metadata.supplier == "David Tolnay" + assert metadata.supplier == "crates.io" assert metadata.maintainer_name == "David Tolnay" assert metadata.homepage == "https://serde.rs" assert metadata.documentation_url == "https://docs.rs/serde" @@ -152,8 +152,8 @@ def test_fetch_success_without_version(self, mock_session): assert "github.com/tokio-rs/tokio" in metadata.repository_url # No license without version-specific endpoint assert metadata.licenses == [] - # No published_by without version-specific endpoint - assert metadata.supplier is None + # Supplier is always the distribution platform + assert metadata.supplier == "crates.io" # Verify API was called with crate URL (no version) mock_session.get.assert_called_once() @@ -452,7 +452,8 @@ def test_field_sources_partial(self, mock_session): assert metadata is not None assert metadata.field_sources.get("description") == "crates.io" assert "licenses" not in metadata.field_sources - assert "supplier" not in metadata.field_sources + # Supplier is always present (distribution platform) + assert metadata.field_sources.get("supplier") == "crates.io" assert "homepage" not in metadata.field_sources diff --git a/tests/test_enrichment_module.py b/tests/test_enrichment_module.py index 079c20f..612ce56 100644 --- a/tests/test_enrichment_module.py +++ b/tests/test_enrichment_module.py @@ -281,7 +281,9 @@ def test_fetch_success(self, mock_session): assert metadata.description == "A high-level Python web framework" assert metadata.homepage == "https://www.djangoproject.com/" assert "BSD-3-Clause" in metadata.licenses - assert metadata.supplier == "Django Software Foundation" + # Supplier is the distribution platform, not the author + assert metadata.supplier == "Python Package Index (PyPI)" + assert metadata.maintainer_name == "Django Software Foundation" assert metadata.repository_url == "git+https://github.com/django/django" def test_fetch_not_found(self, mock_session): @@ -335,8 +337,11 @@ def test_fetch_author_from_email_field(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Peter Linss", ( - f"Expected 'Peter Linss' extracted from author_email, got: {metadata.supplier}" + # Supplier is always the distribution platform + assert metadata.supplier == "Python Package Index (PyPI)" + # Author name extracted from email field is preserved in maintainer_name + assert metadata.maintainer_name == "Peter Linss", ( + f"Expected 'Peter Linss' extracted from author_email, got: {metadata.maintainer_name}" ) def test_fetch_author_from_maintainer_email_field(self, mock_session): @@ -361,8 +366,11 @@ def test_fetch_author_from_maintainer_email_field(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Jane Doe", ( - f"Expected 'Jane Doe' extracted from maintainer_email, got: {metadata.supplier}" + # Supplier is always the distribution platform + assert metadata.supplier == "Python Package Index (PyPI)" + # Author name extracted from maintainer_email is preserved in maintainer_name + assert metadata.maintainer_name == "Jane Doe", ( + f"Expected 'Jane Doe' extracted from maintainer_email, got: {metadata.maintainer_name}" ) def test_fetch_prefers_direct_author_over_email(self, mock_session): @@ -385,8 +393,11 @@ def test_fetch_prefers_direct_author_over_email(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Direct Author", ( - f"Expected 'Direct Author' from author field, got: {metadata.supplier}" + # Supplier is always the distribution platform + assert metadata.supplier == "Python Package Index (PyPI)" + # Direct author is preferred for maintainer_name + assert metadata.maintainer_name == "Direct Author", ( + f"Expected 'Direct Author' from author field, got: {metadata.maintainer_name}" ) @@ -455,7 +466,10 @@ def test_fetch_success(self, mock_session): assert metadata.homepage == "https://github.com/dart-lang/http" assert metadata.repository_url == "git+https://github.com/dart-lang/http" assert metadata.issue_tracker_url == "https://github.com/dart-lang/http/issues" - assert metadata.supplier == "dart.dev" + # Supplier is the distribution platform + assert metadata.supplier == "pub.dev" + # Publisher ID is preserved in maintainer_name + assert metadata.maintainer_name == "dart.dev" assert metadata.registry_url == "https://pub.dev/packages/http" assert metadata.source == "pub.dev" @@ -484,7 +498,8 @@ def test_fetch_with_author(self, mock_session): assert metadata is not None assert metadata.maintainer_name == "John Doe" assert metadata.maintainer_email == "john@example.com" - assert metadata.supplier == "John Doe" + # Supplier is always the distribution platform + assert metadata.supplier == "pub.dev" def test_fetch_with_authors_list(self, mock_session): """Test metadata fetch with authors list field.""" @@ -1025,7 +1040,8 @@ def test_enrich_cyclonedx_sbom(self, tmp_path): result = json.load(f) assert result["components"][0]["description"] == "Django web framework" - assert result["components"][0]["publisher"] == "Django Software Foundation" + # Publisher is the distribution platform + assert result["components"][0]["publisher"] == "Python Package Index (PyPI)" def test_enrich_spdx_sbom(self, tmp_path): """Test enriching an SPDX SBOM end-to-end.""" From f14bd0ba30ba4383c6e3223799b33cc848148aec Mon Sep 17 00:00:00 2001 From: Viktor Petersson Date: Sat, 31 Jan 2026 10:20:13 +0100 Subject: [PATCH 2/5] Update NTIA compliance tests for platform-as-supplier change Update test assertions to expect distribution platform as supplier: - test_pypi_author_email_without_author: expects "Python Package Index (PyPI)" - test_ecosystems_uses_platform_as_supplier: renamed from test_ecosystems_does_not_use_platform_as_supplier Co-Authored-By: Claude Opus 4.5 --- tests/test_ntia_compliance.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/test_ntia_compliance.py b/tests/test_ntia_compliance.py index 5b20470..fe424e8 100644 --- a/tests/test_ntia_compliance.py +++ b/tests/test_ntia_compliance.py @@ -1708,10 +1708,10 @@ def mock_get(url, *args, **kwargs): with open(output_file) as f: enriched_data = json.load(f) - # Verify the component got supplier from author_email + # Verify the component got supplier as distribution platform component = enriched_data["components"][0] - assert component.get("publisher") == "Test Author", ( - f"Expected publisher 'Test Author' from author_email, got: {component.get('publisher')}" + assert component.get("publisher") == "Python Package Index (PyPI)", ( + f"Expected publisher 'Python Package Index (PyPI)', got: {component.get('publisher')}" ) def test_lockfile_components_have_version(self, tmp_path): @@ -1892,26 +1892,27 @@ def test_self_referencing_component_gets_supplier(self, tmp_path): f"Self-referencing component should inherit publisher from root. Got: {self_component.get('publisher')}" ) - def test_ecosystems_does_not_use_platform_as_supplier(self, tmp_path): - """Test that ecosyste.ms doesn't use platform name (pypi, npm) as supplier. + def test_ecosystems_uses_platform_as_supplier(self, tmp_path): + """Test that ecosyste.ms uses distribution platform as supplier. - Registry/platform names are not valid suppliers - they're distribution channels. + The distribution platform (PyPI, npm, etc.) is the supplier, not the + individual package author/maintainer. """ import requests from packageurl import PackageURL from sbomify_action._enrichment.sources.ecosystems import EcosystemsSource - # Create mock response with ecosystem but no maintainer name + # Create mock response with ecosystem and maintainer mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = [ { - "ecosystem": "pypi", # Should NOT be used as supplier + "ecosystem": "pypi", "description": "Test package", "normalized_licenses": ["MIT"], "maintainers": [ - {"login": "testuser", "name": None} # No name, only login + {"login": "testuser", "name": None} # Maintainer info preserved in maintainer_name ], } ] @@ -1923,7 +1924,10 @@ def test_ecosystems_does_not_use_platform_as_supplier(self, tmp_path): purl = PackageURL.from_string("pkg:pypi/test-package@1.0.0") metadata = source.fetch(purl, session) - # Supplier should be the maintainer login, NOT "pypi" + # Supplier should be the distribution platform assert metadata is not None - assert metadata.supplier != "pypi", "Should not use ecosystem name as supplier" - assert metadata.supplier == "testuser", f"Should use maintainer login as supplier. Got: {metadata.supplier}" + assert metadata.supplier == "Python Package Index (PyPI)", ( + f"Should use platform as supplier. Got: {metadata.supplier}" + ) + # Maintainer info is preserved separately + assert metadata.maintainer_name == "testuser" From eda54b1d959d15a1a3e72e2a01469fdff1a1ea19 Mon Sep 17 00:00:00 2001 From: Viktor Petersson Date: Sat, 31 Jan 2026 10:37:20 +0100 Subject: [PATCH 3/5] Fix test isolation from local sbomify.json file Tests that mock the sbomify API were being affected by the local sbomify.json file loaded by the JsonConfigProvider. Add patches to disable the JsonConfigProvider in these tests to ensure proper isolation. Affected tests: - test_augmentation_module.py: 6 tests - test_container_sbom_ntia_compliance.py: 1 test - test_schema_compliance.py: 2 tests Co-Authored-By: Claude Opus 4.5 --- tests/test_augmentation_module.py | 36 ++++++++++++++++---- tests/test_container_sbom_ntia_compliance.py | 11 +++++- tests/test_schema_compliance.py | 26 +++++++++++--- 3 files changed, 62 insertions(+), 11 deletions(-) diff --git a/tests/test_augmentation_module.py b/tests/test_augmentation_module.py index d7ed0af..8ec41da 100644 --- a/tests/test_augmentation_module.py +++ b/tests/test_augmentation_module.py @@ -151,9 +151,15 @@ def test_component_overrides(self, sample_cyclonedx_bom, sample_backend_metadata assert enriched_bom.metadata.component.name == "overridden-name" assert enriched_bom.metadata.component.version == "2.0.0" + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_fetch_augmentation_metadata(self, mock_get, sample_backend_metadata_with_mixed_licenses): + def test_fetch_augmentation_metadata( + self, mock_get, mock_find_config, sample_backend_metadata_with_mixed_licenses + ): """Test fetching metadata from providers (sbomify API).""" + # Disable json-config provider to isolate sbomify API test + mock_find_config.return_value = None + # Setup mock mock_response = Mock() mock_response.ok = True @@ -172,11 +178,15 @@ def test_fetch_augmentation_metadata(self, mock_get, sample_backend_metadata_wit assert result["supplier"] == sample_backend_metadata_with_mixed_licenses["supplier"] assert result["authors"] == sample_backend_metadata_with_mixed_licenses["authors"] + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") def test_augment_sbom_from_file_cyclonedx( - self, mock_get, sample_cyclonedx_bom, sample_backend_metadata_with_mixed_licenses + self, mock_get, mock_find_config, sample_cyclonedx_bom, sample_backend_metadata_with_mixed_licenses ): """Test augmenting SBOM from file (CycloneDX).""" + # Disable json-config provider to isolate sbomify API test + mock_find_config.return_value = None + # Setup mock mock_response = Mock() mock_response.ok = True @@ -391,9 +401,13 @@ def test_spdx_component_overrides(self, spdx_document): assert enriched_doc.packages[0].name == "overridden-spdx-name" assert enriched_doc.packages[0].version == "2.0.0-spdx" + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_augment_sbom_from_file_spdx(self, mock_get, spdx_document): + def test_augment_sbom_from_file_spdx(self, mock_get, mock_find_config, spdx_document): """Test augmenting SPDX SBOM from file.""" + # Disable json-config provider to isolate sbomify API test + mock_find_config.return_value = None + backend_data = { "supplier": {"name": "SPDX Supplier"}, "authors": [{"name": "SPDX Author"}], @@ -1259,12 +1273,15 @@ def test_invalid_json_error(self, mock_get): assert "Invalid JSON in SBOM file" in str(exc_info.value) + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch.dict(os.environ, {}, clear=True) @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_api_connection_error(self, mock_get): + def test_api_connection_error(self, mock_get, mock_find_config): """Test handling of API connection errors (provider returns None, not exception).""" import requests + # Disable json-config provider to isolate API error test + mock_find_config.return_value = None mock_get.side_effect = requests.exceptions.ConnectionError("Connection failed") # With the provider architecture, API errors are caught and logged, @@ -1278,12 +1295,15 @@ def test_api_connection_error(self, mock_get): # Provider catches the error and returns None, which results in empty dict assert result == {} + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch.dict(os.environ, {}, clear=True) @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_api_timeout_error(self, mock_get): + def test_api_timeout_error(self, mock_get, mock_find_config): """Test handling of API timeout errors (provider returns None, not exception).""" import requests + # Disable json-config provider to isolate API error test + mock_find_config.return_value = None mock_get.side_effect = requests.exceptions.Timeout("Timeout") # With the provider architecture, API errors are caught and logged @@ -1296,10 +1316,14 @@ def test_api_timeout_error(self, mock_get): # Provider catches the error and returns None, which results in empty dict assert result == {} + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch.dict(os.environ, {}, clear=True) @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_api_404_error(self, mock_get): + def test_api_404_error(self, mock_get, mock_find_config): """Test handling of API 404 errors (provider returns None, not exception).""" + # Disable json-config provider to isolate API error test + mock_find_config.return_value = None + mock_response = Mock() mock_response.ok = False mock_response.status_code = 404 diff --git a/tests/test_container_sbom_ntia_compliance.py b/tests/test_container_sbom_ntia_compliance.py index bdadcff..273b504 100644 --- a/tests/test_container_sbom_ntia_compliance.py +++ b/tests/test_container_sbom_ntia_compliance.py @@ -269,7 +269,16 @@ def test_augmented_trivy_cyclonedx(self, image, tmp_path, mock_backend_response) mock_api_response.ok = True mock_api_response.json.return_value = mock_backend_response - with patch("sbomify_action._augmentation.providers.sbomify_api.requests.get", return_value=mock_api_response): + with ( + patch( + "sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file", + return_value=None, + ), + patch( + "sbomify_action._augmentation.providers.sbomify_api.requests.get", + return_value=mock_api_response, + ), + ): sbom_format = augment_sbom_from_file( str(sbom_path), str(output_file), diff --git a/tests/test_schema_compliance.py b/tests/test_schema_compliance.py index 27263f1..db653a8 100644 --- a/tests/test_schema_compliance.py +++ b/tests/test_schema_compliance.py @@ -82,12 +82,21 @@ def test_cyclonedx_full_flow_compliance(version, tmp_path): "lifecycle_phase": "build", # CISA 2025 Generation Context } - # Mock the sbomify API provider + # Mock the sbomify API provider and disable json-config provider mock_api_response = Mock() mock_api_response.ok = True mock_api_response.json.return_value = augmentation_data - with patch("sbomify_action._augmentation.providers.sbomify_api.requests.get", return_value=mock_api_response): + with ( + patch( + "sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file", + return_value=None, + ), + patch( + "sbomify_action._augmentation.providers.sbomify_api.requests.get", + return_value=mock_api_response, + ), + ): augment_sbom_from_file( input_file=str(input_file), output_file=str(augmented_file), @@ -207,12 +216,21 @@ def test_spdx_full_flow_compliance(version, tmp_path): "lifecycle_phase": "build", # CISA 2025 Generation Context } - # Mock the sbomify API provider + # Mock the sbomify API provider and disable json-config provider mock_api_response = Mock() mock_api_response.ok = True mock_api_response.json.return_value = augmentation_data - with patch("sbomify_action._augmentation.providers.sbomify_api.requests.get", return_value=mock_api_response): + with ( + patch( + "sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file", + return_value=None, + ), + patch( + "sbomify_action._augmentation.providers.sbomify_api.requests.get", + return_value=mock_api_response, + ), + ): augment_sbom_from_file( input_file=str(input_file), output_file=str(augmented_file), From 62108927576f6933e8c544fb695388aa643ea161 Mon Sep 17 00:00:00 2001 From: Viktor Petersson Date: Sat, 31 Jan 2026 10:39:31 +0100 Subject: [PATCH 4/5] Linting --- tests/test_augmentation_module.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_augmentation_module.py b/tests/test_augmentation_module.py index 8ec41da..59601fc 100644 --- a/tests/test_augmentation_module.py +++ b/tests/test_augmentation_module.py @@ -153,9 +153,7 @@ def test_component_overrides(self, sample_cyclonedx_bom, sample_backend_metadata @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_fetch_augmentation_metadata( - self, mock_get, mock_find_config, sample_backend_metadata_with_mixed_licenses - ): + def test_fetch_augmentation_metadata(self, mock_get, mock_find_config, sample_backend_metadata_with_mixed_licenses): """Test fetching metadata from providers (sbomify API).""" # Disable json-config provider to isolate sbomify API test mock_find_config.return_value = None From 22f4f0b81239ca147796bb3f1fc180ff1c5f1f09 Mon Sep 17 00:00:00 2001 From: Viktor Petersson Date: Sat, 31 Jan 2026 11:00:30 +0100 Subject: [PATCH 5/5] Add component.supplier field for distribution platform (NTIA compliance) CycloneDX components now have both publisher and supplier fields: - publisher = package author/maintainer (e.g., "Django Software Foundation") - supplier = distribution platform (e.g., "Python Package Index (PyPI)") This addresses sbomqs comp_with_supplier compliance requirement. Changes: - Import OrganizationalEntity for CycloneDX supplier field - Set component.supplier to distribution platform from NormalizedMetadata - For OS packages (deb/rpm/apk), set maintainer_name = supplier so publisher shows the distribution name (e.g., "Debian Project") - Update tests for correct publisher/supplier expectations Co-Authored-By: Claude Opus 4.5 --- sbomify_action/_enrichment/sources/purl.py | 4 ++++ sbomify_action/enrichment.py | 16 ++++++++++++---- tests/test_enrichment_module.py | 4 ++-- tests/test_ntia_compliance.py | 14 +++++++++----- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/sbomify_action/_enrichment/sources/purl.py b/sbomify_action/_enrichment/sources/purl.py index 4ac397d..befc5c5 100644 --- a/sbomify_action/_enrichment/sources/purl.py +++ b/sbomify_action/_enrichment/sources/purl.py @@ -180,11 +180,15 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali field_sources = {} if supplier: field_sources["supplier"] = self.name + # For OS packages, the distribution is also the maintainer/publisher + field_sources["maintainer_name"] = self.name if homepage: field_sources["homepage"] = self.name return NormalizedMetadata( supplier=supplier, + # For OS packages, distribution is the publisher (maintainer_name -> component.publisher) + maintainer_name=supplier, homepage=homepage, source=self.name, field_sources=field_sources, diff --git a/sbomify_action/enrichment.py b/sbomify_action/enrichment.py index c2bd031..264d529 100644 --- a/sbomify_action/enrichment.py +++ b/sbomify_action/enrichment.py @@ -58,6 +58,7 @@ from cyclonedx.model import ExternalReference, ExternalReferenceType, Property, XsUri from cyclonedx.model.bom import Bom from cyclonedx.model.component import Component, ComponentType +from cyclonedx.model.contact import OrganizationalEntity from cyclonedx.model.license import LicenseExpression from spdx_tools.spdx.model import ( Actor, @@ -417,12 +418,19 @@ def _apply_metadata_to_cyclonedx_component( component.licenses.add(license_expr) added_fields.append("license") - # Publisher (sanitized) - if not component.publisher and metadata.supplier: + # Publisher - use maintainer_name (author), not supplier (distribution platform) + if not component.publisher and metadata.maintainer_name: + sanitized_publisher = sanitize_supplier(metadata.maintainer_name) + if sanitized_publisher: + component.publisher = sanitized_publisher + added_fields.append("publisher") + + # Supplier - use supplier (distribution platform like PyPI, npm, etc.) + if not component.supplier and metadata.supplier: sanitized_supplier = sanitize_supplier(metadata.supplier) if sanitized_supplier: - component.publisher = sanitized_supplier - added_fields.append("publisher") + component.supplier = OrganizationalEntity(name=sanitized_supplier) + added_fields.append("supplier") # External references helper (with URL sanitization) def _add_external_ref(ref_type: ExternalReferenceType, url: str, field_name: str = "url") -> bool: diff --git a/tests/test_enrichment_module.py b/tests/test_enrichment_module.py index b29d280..9d8f772 100644 --- a/tests/test_enrichment_module.py +++ b/tests/test_enrichment_module.py @@ -1040,8 +1040,8 @@ def test_enrich_cyclonedx_sbom(self, tmp_path): result = json.load(f) assert result["components"][0]["description"] == "Django web framework" - # Publisher is the distribution platform - assert result["components"][0]["publisher"] == "Python Package Index (PyPI)" + # Publisher is the package author (maintainer_name), not distribution platform + assert result["components"][0]["publisher"] == "Django Software Foundation" def test_enrich_spdx_sbom(self, tmp_path): """Test enriching an SPDX SBOM end-to-end.""" diff --git a/tests/test_ntia_compliance.py b/tests/test_ntia_compliance.py index 2b00e03..dbdf264 100644 --- a/tests/test_ntia_compliance.py +++ b/tests/test_ntia_compliance.py @@ -418,7 +418,7 @@ def test_debian_package_purl_fallback(self, tmp_path): print(f" {component['name']}: publisher = {component.get('publisher')}") def test_alpine_package_purl_fallback(self, tmp_path): - """Test that Alpine packages get supplier from PURL namespace.""" + """Test that Alpine packages get supplier from PURL namespace when other sources fail.""" clear_cache() sbom_data = { @@ -449,7 +449,11 @@ def test_alpine_package_purl_fallback(self, tmp_path): # Mock API responses to 404 (simulating no data - force PURL fallback) mock_response = Mock() mock_response.status_code = 404 - with patch("requests.Session.get", return_value=mock_response): + with ( + patch("requests.Session.get", return_value=mock_response), + # Also disable LicenseDB so PURL fallback is truly tested + patch("sbomify_action._enrichment.sources.license_db.LicenseDBSource.fetch", return_value=None), + ): enrich_sbom(str(input_file), str(output_file)) with open(output_file) as f: @@ -1675,10 +1679,10 @@ def mock_get(url, *args, **kwargs): with open(output_file) as f: enriched_data = json.load(f) - # Verify the component got supplier as distribution platform + # Verify the component got publisher from author_email (extracted name: "Test Author") component = enriched_data["components"][0] - assert component.get("publisher") == "Python Package Index (PyPI)", ( - f"Expected publisher 'Python Package Index (PyPI)', got: {component.get('publisher')}" + assert component.get("publisher") == "Test Author", ( + f"Expected publisher 'Test Author' from author_email, got: {component.get('publisher')}" ) def test_lockfile_components_have_version(self, tmp_path):