diff --git a/sbomify_action/_enrichment/sources/conan.py b/sbomify_action/_enrichment/sources/conan.py index 44c96ba..dc32325 100644 --- a/sbomify_action/_enrichment/sources/conan.py +++ b/sbomify_action/_enrichment/sources/conan.py @@ -13,6 +13,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url +from .purl import PURL_TYPE_TO_SUPPLIER # Simple in-memory cache _cache: Dict[str, Optional[NormalizedMetadata]] = {} @@ -227,15 +228,17 @@ def _extract_metadata_from_graph(self, package_name: str, graph: Any) -> Optiona if repository_url: field_sources["repository_url"] = self.name - # Use author as supplier if available - supplier = author if author else None - if supplier: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name + + # Preserve author info as maintainer_name + maintainer_name = author if author else None metadata = NormalizedMetadata( description=description, licenses=licenses, - supplier=supplier, + supplier=PURL_TYPE_TO_SUPPLIER["conan"], + maintainer_name=maintainer_name, homepage=homepage, repository_url=repository_url, registry_url=f"https://conan.io/center/recipes/{package_name}", diff --git a/sbomify_action/_enrichment/sources/cratesio.py b/sbomify_action/_enrichment/sources/cratesio.py index 690efb3..6ce1972 100644 --- a/sbomify_action/_enrichment/sources/cratesio.py +++ b/sbomify_action/_enrichment/sources/cratesio.py @@ -11,6 +11,7 @@ from ..license_utils import normalize_license_list from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url +from .purl import PURL_TYPE_TO_SUPPLIER CRATESIO_API_BASE = "https://crates.io/api/v1/crates" DEFAULT_TIMEOUT = 10 # seconds @@ -161,8 +162,8 @@ def _normalize_response( field_sources["description"] = self.name if licenses: field_sources["licenses"] = self.name - if maintainer_name: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: @@ -174,9 +175,7 @@ def _normalize_response( description=description, licenses=licenses, license_texts=license_texts, - # supplier is the NTIA-required field; maintainer_name provides additional detail. - # For crates.io, the publisher (published_by) serves as both. - supplier=maintainer_name, + supplier=PURL_TYPE_TO_SUPPLIER["cargo"], homepage=homepage, repository_url=repository_url, documentation_url=documentation, diff --git a/sbomify_action/_enrichment/sources/depsdev.py b/sbomify_action/_enrichment/sources/depsdev.py index ac6c12e..1e2decb 100644 --- a/sbomify_action/_enrichment/sources/depsdev.py +++ b/sbomify_action/_enrichment/sources/depsdev.py @@ -12,6 +12,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import get_qualified_name +from .purl import PURL_TYPE_TO_SUPPLIER DEPSDEV_API_BASE = "https://api.deps.dev/v3" DEFAULT_TIMEOUT = 10 # seconds - deps.dev is generally fast @@ -108,7 +109,7 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali metadata = None if response.status_code == 200: data = response.json() - metadata = self._normalize_response(purl.name, data) + metadata = self._normalize_response(purl.name, purl.type, data) elif response.status_code == 404: logger.debug(f"Package not found in deps.dev: {purl}") else: @@ -131,12 +132,15 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali _cache[cache_key] = None return None - def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Optional[NormalizedMetadata]: + def _normalize_response( + self, package_name: str, purl_type: str, data: Dict[str, Any] + ) -> Optional[NormalizedMetadata]: """ Normalize deps.dev API response to NormalizedMetadata. Args: package_name: Name of the package + purl_type: PURL type (e.g., "pypi", "npm", "cargo") data: Raw deps.dev API response Returns: @@ -179,17 +183,23 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option if repository_url: repository_url = normalize_vcs_url(repository_url) + # Get supplier from PURL type mapping + supplier = PURL_TYPE_TO_SUPPLIER.get(purl_type) + # Build field_sources for attribution - field_sources = {} + field_sources: dict[str, str] = {} if licenses: field_sources["licenses"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: field_sources["repository_url"] = self.name + if supplier: + field_sources["supplier"] = self.name metadata = NormalizedMetadata( licenses=licenses, + supplier=supplier, homepage=homepage, repository_url=repository_url, source=self.name, diff --git a/sbomify_action/_enrichment/sources/ecosystems.py b/sbomify_action/_enrichment/sources/ecosystems.py index af8887a..1a566bb 100644 --- a/sbomify_action/_enrichment/sources/ecosystems.py +++ b/sbomify_action/_enrichment/sources/ecosystems.py @@ -11,6 +11,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import purl_to_string +from .purl import PURL_TYPE_TO_SUPPLIER ECOSYSTEMS_API_BASE = "https://packages.ecosyste.ms/api/v1" DEFAULT_TIMEOUT = 15 # seconds - ecosyste.ms can be slower @@ -87,9 +88,9 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali data = response.json() # API returns an array, take first result if isinstance(data, list) and len(data) > 0: - metadata = self._normalize_response(data[0]) + metadata = self._normalize_response(purl.type, data[0]) elif isinstance(data, dict): - metadata = self._normalize_response(data) + metadata = self._normalize_response(purl.type, data) else: logger.debug(f"No package data found in ecosyste.ms for: {purl_str}") elif response.status_code == 404: @@ -119,11 +120,12 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali _cache[cache_key] = None return None - def _normalize_response(self, data: Dict[str, Any]) -> Optional[NormalizedMetadata]: + def _normalize_response(self, purl_type: str, data: Dict[str, Any]) -> Optional[NormalizedMetadata]: """ Normalize ecosyste.ms API response to NormalizedMetadata. Args: + purl_type: PURL type (e.g., "pypi", "npm", "cargo") data: Raw ecosyste.ms API response Returns: @@ -151,20 +153,8 @@ def _normalize_response(self, data: Dict[str, Any]) -> Optional[NormalizedMetada maintainer_name = first_maintainer.get("name") or first_maintainer.get("login") maintainer_email = first_maintainer.get("email") - # Extract supplier from maintainer or repo owner - # NEVER use ecosystem name as supplier - "pypi", "npm", etc. are platforms, not suppliers - supplier = None - # Priority 1: Maintainer name or login (already extracted above) - if maintainer_name: - supplier = maintainer_name - # Priority 2: Repo owner name or login - elif data.get("repo_metadata") and data["repo_metadata"].get("owner"): - owner = data["repo_metadata"]["owner"] - if isinstance(owner, dict): - supplier = owner.get("name") or owner.get("login") - elif isinstance(owner, str): - supplier = owner - # Do NOT fall back to data["ecosystem"] - it's just the platform name + # Supplier is the distribution platform based on PURL type + supplier = PURL_TYPE_TO_SUPPLIER.get(purl_type) # Extract issue tracker URL from repo metadata issue_tracker_url = None diff --git a/sbomify_action/_enrichment/sources/pubdev.py b/sbomify_action/_enrichment/sources/pubdev.py index e32a356..693c7eb 100644 --- a/sbomify_action/_enrichment/sources/pubdev.py +++ b/sbomify_action/_enrichment/sources/pubdev.py @@ -12,6 +12,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import parse_author_string +from .purl import PURL_TYPE_TO_SUPPLIER PUBDEV_API_BASE = "https://pub.dev/api/packages" DEFAULT_TIMEOUT = 10 # seconds - pub.dev is generally fast @@ -134,8 +135,7 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option documentation_url = pubspec.get("documentation") issue_tracker_url = pubspec.get("issue_tracker") - # Extract publisher/author info using shared utility - supplier = None + # Extract author info for maintainer_name field maintainer_name = None maintainer_email = None @@ -143,17 +143,15 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option authors = pubspec.get("authors") if authors and isinstance(authors, list) and len(authors) > 0: maintainer_name, maintainer_email = parse_author_string(authors[0]) - supplier = maintainer_name elif pubspec.get("author"): maintainer_name, maintainer_email = parse_author_string(pubspec["author"]) - supplier = maintainer_name # Check for publisher in the top-level response (newer pub.dev API) - # Publisher takes precedence over author for supplier + # Use publisher ID as maintainer_name if available if data.get("publisher"): publisher_id = data["publisher"].get("publisherId") - if publisher_id: - supplier = publisher_id + if publisher_id and not maintainer_name: + maintainer_name = publisher_id logger.debug(f"Successfully fetched pub.dev metadata for: {package_name}") @@ -163,8 +161,8 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option field_sources["description"] = self.name if licenses: field_sources["licenses"] = self.name - if supplier: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: @@ -178,7 +176,7 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Option description=description, licenses=licenses, license_texts=license_texts, - supplier=supplier, + supplier=PURL_TYPE_TO_SUPPLIER["pub"], homepage=homepage, repository_url=repository_url, documentation_url=documentation_url, diff --git a/sbomify_action/_enrichment/sources/purl.py b/sbomify_action/_enrichment/sources/purl.py index e5ba2cb..befc5c5 100644 --- a/sbomify_action/_enrichment/sources/purl.py +++ b/sbomify_action/_enrichment/sources/purl.py @@ -49,6 +49,58 @@ "chainguard": "Chainguard, Inc.", } +# Mapping of PURL type to distribution platform supplier name +# For language packages, the distribution platform (not the author) is the supplier +PURL_TYPE_TO_SUPPLIER: dict[str, str] = { + # Language package registries + "pypi": "Python Package Index (PyPI)", + "npm": "npm", + "cargo": "crates.io", + "maven": "Maven Central", + "gem": "RubyGems.org", + "nuget": "NuGet Gallery", + "golang": "Go Modules", + "pub": "pub.dev", + "conan": "Conan Center", + "composer": "Packagist", + "hex": "Hex.pm", + "cocoapods": "CocoaPods", + "conda": "Anaconda", + "hackage": "Hackage", + "swift": "Swift Package Registry", + # Container registries + "docker": "Docker Hub", + "oci": "OCI Registry", +} + + +def get_supplier_for_purl(purl: PackageURL) -> str | None: + """Get the appropriate supplier for a PURL. + + For OS packages (deb, rpm, apk), uses NAMESPACE_TO_SUPPLIER based on the + distribution namespace (e.g., debian, ubuntu, alpine). + + For language packages (pypi, npm, cargo, etc.), uses PURL_TYPE_TO_SUPPLIER + to return the distribution platform as the supplier. + + Args: + purl: Parsed PackageURL + + Returns: + Supplier name or None if not found + """ + # OS packages use namespace-based supplier (distribution name) + if purl.type in OS_PACKAGE_TYPES and purl.namespace: + supplier = NAMESPACE_TO_SUPPLIER.get(purl.namespace.lower()) + if supplier: + return supplier + # Fallback for unknown namespaces + return f"{purl.namespace.title()} Project" + + # Language packages use type-based supplier (platform name) + return PURL_TYPE_TO_SUPPLIER.get(purl.type) + + # Mapping of PURL type/namespace to package tracker URL templates PACKAGE_TRACKER_URLS: Dict[str, Dict[str, str]] = { "deb": { @@ -128,11 +180,15 @@ def fetch(self, purl: PackageURL, session: requests.Session) -> Optional[Normali field_sources = {} if supplier: field_sources["supplier"] = self.name + # For OS packages, the distribution is also the maintainer/publisher + field_sources["maintainer_name"] = self.name if homepage: field_sources["homepage"] = self.name return NormalizedMetadata( supplier=supplier, + # For OS packages, distribution is the publisher (maintainer_name -> component.publisher) + maintainer_name=supplier, homepage=homepage, source=self.name, field_sources=field_sources, diff --git a/sbomify_action/_enrichment/sources/pypi.py b/sbomify_action/_enrichment/sources/pypi.py index 242bb45..ae0021b 100644 --- a/sbomify_action/_enrichment/sources/pypi.py +++ b/sbomify_action/_enrichment/sources/pypi.py @@ -12,6 +12,7 @@ from ..metadata import NormalizedMetadata from ..sanitization import normalize_vcs_url from ..utils import parse_author_string +from .purl import PURL_TYPE_TO_SUPPLIER PYPI_API_BASE = "https://pypi.org/pypi" DEFAULT_TIMEOUT = 10 # seconds - PyPI is fast @@ -169,8 +170,8 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Normal field_sources["description"] = self.name if licenses: field_sources["licenses"] = self.name - if maintainer_name: - field_sources["supplier"] = self.name + # Supplier is always the distribution platform + field_sources["supplier"] = self.name if homepage: field_sources["homepage"] = self.name if repository_url: @@ -184,7 +185,7 @@ def _normalize_response(self, package_name: str, data: Dict[str, Any]) -> Normal description=info.get("summary"), licenses=licenses, license_texts=license_texts, - supplier=maintainer_name, # Use author/maintainer as supplier + supplier=PURL_TYPE_TO_SUPPLIER["pypi"], homepage=homepage, repository_url=repository_url, documentation_url=documentation_url, diff --git a/sbomify_action/enrichment.py b/sbomify_action/enrichment.py index c2bd031..264d529 100644 --- a/sbomify_action/enrichment.py +++ b/sbomify_action/enrichment.py @@ -58,6 +58,7 @@ from cyclonedx.model import ExternalReference, ExternalReferenceType, Property, XsUri from cyclonedx.model.bom import Bom from cyclonedx.model.component import Component, ComponentType +from cyclonedx.model.contact import OrganizationalEntity from cyclonedx.model.license import LicenseExpression from spdx_tools.spdx.model import ( Actor, @@ -417,12 +418,19 @@ def _apply_metadata_to_cyclonedx_component( component.licenses.add(license_expr) added_fields.append("license") - # Publisher (sanitized) - if not component.publisher and metadata.supplier: + # Publisher - use maintainer_name (author), not supplier (distribution platform) + if not component.publisher and metadata.maintainer_name: + sanitized_publisher = sanitize_supplier(metadata.maintainer_name) + if sanitized_publisher: + component.publisher = sanitized_publisher + added_fields.append("publisher") + + # Supplier - use supplier (distribution platform like PyPI, npm, etc.) + if not component.supplier and metadata.supplier: sanitized_supplier = sanitize_supplier(metadata.supplier) if sanitized_supplier: - component.publisher = sanitized_supplier - added_fields.append("publisher") + component.supplier = OrganizationalEntity(name=sanitized_supplier) + added_fields.append("supplier") # External references helper (with URL sanitization) def _add_external_ref(ref_type: ExternalReferenceType, url: str, field_name: str = "url") -> bool: diff --git a/tests/test_augmentation_module.py b/tests/test_augmentation_module.py index d7ed0af..59601fc 100644 --- a/tests/test_augmentation_module.py +++ b/tests/test_augmentation_module.py @@ -151,9 +151,13 @@ def test_component_overrides(self, sample_cyclonedx_bom, sample_backend_metadata assert enriched_bom.metadata.component.name == "overridden-name" assert enriched_bom.metadata.component.version == "2.0.0" + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_fetch_augmentation_metadata(self, mock_get, sample_backend_metadata_with_mixed_licenses): + def test_fetch_augmentation_metadata(self, mock_get, mock_find_config, sample_backend_metadata_with_mixed_licenses): """Test fetching metadata from providers (sbomify API).""" + # Disable json-config provider to isolate sbomify API test + mock_find_config.return_value = None + # Setup mock mock_response = Mock() mock_response.ok = True @@ -172,11 +176,15 @@ def test_fetch_augmentation_metadata(self, mock_get, sample_backend_metadata_wit assert result["supplier"] == sample_backend_metadata_with_mixed_licenses["supplier"] assert result["authors"] == sample_backend_metadata_with_mixed_licenses["authors"] + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") def test_augment_sbom_from_file_cyclonedx( - self, mock_get, sample_cyclonedx_bom, sample_backend_metadata_with_mixed_licenses + self, mock_get, mock_find_config, sample_cyclonedx_bom, sample_backend_metadata_with_mixed_licenses ): """Test augmenting SBOM from file (CycloneDX).""" + # Disable json-config provider to isolate sbomify API test + mock_find_config.return_value = None + # Setup mock mock_response = Mock() mock_response.ok = True @@ -391,9 +399,13 @@ def test_spdx_component_overrides(self, spdx_document): assert enriched_doc.packages[0].name == "overridden-spdx-name" assert enriched_doc.packages[0].version == "2.0.0-spdx" + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_augment_sbom_from_file_spdx(self, mock_get, spdx_document): + def test_augment_sbom_from_file_spdx(self, mock_get, mock_find_config, spdx_document): """Test augmenting SPDX SBOM from file.""" + # Disable json-config provider to isolate sbomify API test + mock_find_config.return_value = None + backend_data = { "supplier": {"name": "SPDX Supplier"}, "authors": [{"name": "SPDX Author"}], @@ -1259,12 +1271,15 @@ def test_invalid_json_error(self, mock_get): assert "Invalid JSON in SBOM file" in str(exc_info.value) + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch.dict(os.environ, {}, clear=True) @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_api_connection_error(self, mock_get): + def test_api_connection_error(self, mock_get, mock_find_config): """Test handling of API connection errors (provider returns None, not exception).""" import requests + # Disable json-config provider to isolate API error test + mock_find_config.return_value = None mock_get.side_effect = requests.exceptions.ConnectionError("Connection failed") # With the provider architecture, API errors are caught and logged, @@ -1278,12 +1293,15 @@ def test_api_connection_error(self, mock_get): # Provider catches the error and returns None, which results in empty dict assert result == {} + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch.dict(os.environ, {}, clear=True) @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_api_timeout_error(self, mock_get): + def test_api_timeout_error(self, mock_get, mock_find_config): """Test handling of API timeout errors (provider returns None, not exception).""" import requests + # Disable json-config provider to isolate API error test + mock_find_config.return_value = None mock_get.side_effect = requests.exceptions.Timeout("Timeout") # With the provider architecture, API errors are caught and logged @@ -1296,10 +1314,14 @@ def test_api_timeout_error(self, mock_get): # Provider catches the error and returns None, which results in empty dict assert result == {} + @patch("sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file") @patch.dict(os.environ, {}, clear=True) @patch("sbomify_action._augmentation.providers.sbomify_api.requests.get") - def test_api_404_error(self, mock_get): + def test_api_404_error(self, mock_get, mock_find_config): """Test handling of API 404 errors (provider returns None, not exception).""" + # Disable json-config provider to isolate API error test + mock_find_config.return_value = None + mock_response = Mock() mock_response.ok = False mock_response.status_code = 404 diff --git a/tests/test_conan_source.py b/tests/test_conan_source.py index 899d63c..ce1dd0d 100644 --- a/tests/test_conan_source.py +++ b/tests/test_conan_source.py @@ -191,7 +191,7 @@ def test_fetch_package_not_found(self, mock_session): assert metadata is None def test_fetch_with_author(self, mock_session): - """Test that author is used as supplier.""" + """Test that author is preserved as maintainer_name.""" source = ConanSource() purl = PackageURL.from_string("pkg:conan/testpkg@1.0.0") @@ -221,7 +221,10 @@ def test_fetch_with_author(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Test Author" + # Supplier is always the distribution platform + assert metadata.supplier == "Conan Center" + # Author is preserved as maintainer_name + assert metadata.maintainer_name == "Test Author" class TestConanSourceCaching: diff --git a/tests/test_container_sbom_ntia_compliance.py b/tests/test_container_sbom_ntia_compliance.py index bdadcff..273b504 100644 --- a/tests/test_container_sbom_ntia_compliance.py +++ b/tests/test_container_sbom_ntia_compliance.py @@ -269,7 +269,16 @@ def test_augmented_trivy_cyclonedx(self, image, tmp_path, mock_backend_response) mock_api_response.ok = True mock_api_response.json.return_value = mock_backend_response - with patch("sbomify_action._augmentation.providers.sbomify_api.requests.get", return_value=mock_api_response): + with ( + patch( + "sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file", + return_value=None, + ), + patch( + "sbomify_action._augmentation.providers.sbomify_api.requests.get", + return_value=mock_api_response, + ), + ): sbom_format = augment_sbom_from_file( str(sbom_path), str(output_file), diff --git a/tests/test_cratesio_source.py b/tests/test_cratesio_source.py index 92e69d6..d43b23c 100644 --- a/tests/test_cratesio_source.py +++ b/tests/test_cratesio_source.py @@ -111,7 +111,7 @@ def test_fetch_success_with_version(self, mock_session): assert len(metadata.licenses) == 1 assert "MIT" in metadata.licenses[0] assert "Apache-2.0" in metadata.licenses[0] - assert metadata.supplier == "David Tolnay" + assert metadata.supplier == "crates.io" assert metadata.maintainer_name == "David Tolnay" assert metadata.homepage == "https://serde.rs" assert metadata.documentation_url == "https://docs.rs/serde" @@ -152,8 +152,8 @@ def test_fetch_success_without_version(self, mock_session): assert "github.com/tokio-rs/tokio" in metadata.repository_url # No license without version-specific endpoint assert metadata.licenses == [] - # No published_by without version-specific endpoint - assert metadata.supplier is None + # Supplier is always the distribution platform + assert metadata.supplier == "crates.io" # Verify API was called with crate URL (no version) mock_session.get.assert_called_once() @@ -452,7 +452,8 @@ def test_field_sources_partial(self, mock_session): assert metadata is not None assert metadata.field_sources.get("description") == "crates.io" assert "licenses" not in metadata.field_sources - assert "supplier" not in metadata.field_sources + # Supplier is always present (distribution platform) + assert metadata.field_sources.get("supplier") == "crates.io" assert "homepage" not in metadata.field_sources diff --git a/tests/test_enrichment_module.py b/tests/test_enrichment_module.py index b16e306..9d8f772 100644 --- a/tests/test_enrichment_module.py +++ b/tests/test_enrichment_module.py @@ -281,7 +281,9 @@ def test_fetch_success(self, mock_session): assert metadata.description == "A high-level Python web framework" assert metadata.homepage == "https://www.djangoproject.com/" assert "BSD-3-Clause" in metadata.licenses - assert metadata.supplier == "Django Software Foundation" + # Supplier is the distribution platform, not the author + assert metadata.supplier == "Python Package Index (PyPI)" + assert metadata.maintainer_name == "Django Software Foundation" assert metadata.repository_url == "git+https://github.com/django/django" def test_fetch_not_found(self, mock_session): @@ -335,8 +337,11 @@ def test_fetch_author_from_email_field(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Peter Linss", ( - f"Expected 'Peter Linss' extracted from author_email, got: {metadata.supplier}" + # Supplier is always the distribution platform + assert metadata.supplier == "Python Package Index (PyPI)" + # Author name extracted from email field is preserved in maintainer_name + assert metadata.maintainer_name == "Peter Linss", ( + f"Expected 'Peter Linss' extracted from author_email, got: {metadata.maintainer_name}" ) def test_fetch_author_from_maintainer_email_field(self, mock_session): @@ -361,8 +366,11 @@ def test_fetch_author_from_maintainer_email_field(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Jane Doe", ( - f"Expected 'Jane Doe' extracted from maintainer_email, got: {metadata.supplier}" + # Supplier is always the distribution platform + assert metadata.supplier == "Python Package Index (PyPI)" + # Author name extracted from maintainer_email is preserved in maintainer_name + assert metadata.maintainer_name == "Jane Doe", ( + f"Expected 'Jane Doe' extracted from maintainer_email, got: {metadata.maintainer_name}" ) def test_fetch_prefers_direct_author_over_email(self, mock_session): @@ -385,8 +393,11 @@ def test_fetch_prefers_direct_author_over_email(self, mock_session): metadata = source.fetch(purl, mock_session) assert metadata is not None - assert metadata.supplier == "Direct Author", ( - f"Expected 'Direct Author' from author field, got: {metadata.supplier}" + # Supplier is always the distribution platform + assert metadata.supplier == "Python Package Index (PyPI)" + # Direct author is preferred for maintainer_name + assert metadata.maintainer_name == "Direct Author", ( + f"Expected 'Direct Author' from author field, got: {metadata.maintainer_name}" ) @@ -455,7 +466,10 @@ def test_fetch_success(self, mock_session): assert metadata.homepage == "https://github.com/dart-lang/http" assert metadata.repository_url == "git+https://github.com/dart-lang/http" assert metadata.issue_tracker_url == "https://github.com/dart-lang/http/issues" - assert metadata.supplier == "dart.dev" + # Supplier is the distribution platform + assert metadata.supplier == "pub.dev" + # Publisher ID is preserved in maintainer_name + assert metadata.maintainer_name == "dart.dev" assert metadata.registry_url == "https://pub.dev/packages/http" assert metadata.source == "pub.dev" @@ -484,7 +498,8 @@ def test_fetch_with_author(self, mock_session): assert metadata is not None assert metadata.maintainer_name == "John Doe" assert metadata.maintainer_email == "john@example.com" - assert metadata.supplier == "John Doe" + # Supplier is always the distribution platform + assert metadata.supplier == "pub.dev" def test_fetch_with_authors_list(self, mock_session): """Test metadata fetch with authors list field.""" @@ -1025,6 +1040,7 @@ def test_enrich_cyclonedx_sbom(self, tmp_path): result = json.load(f) assert result["components"][0]["description"] == "Django web framework" + # Publisher is the package author (maintainer_name), not distribution platform assert result["components"][0]["publisher"] == "Django Software Foundation" def test_enrich_spdx_sbom(self, tmp_path): diff --git a/tests/test_ntia_compliance.py b/tests/test_ntia_compliance.py index 0cf6d13..dbdf264 100644 --- a/tests/test_ntia_compliance.py +++ b/tests/test_ntia_compliance.py @@ -418,7 +418,7 @@ def test_debian_package_purl_fallback(self, tmp_path): print(f" {component['name']}: publisher = {component.get('publisher')}") def test_alpine_package_purl_fallback(self, tmp_path): - """Test that Alpine packages get supplier from PURL namespace.""" + """Test that Alpine packages get supplier from PURL namespace when other sources fail.""" clear_cache() sbom_data = { @@ -449,7 +449,11 @@ def test_alpine_package_purl_fallback(self, tmp_path): # Mock API responses to 404 (simulating no data - force PURL fallback) mock_response = Mock() mock_response.status_code = 404 - with patch("requests.Session.get", return_value=mock_response): + with ( + patch("requests.Session.get", return_value=mock_response), + # Also disable LicenseDB so PURL fallback is truly tested + patch("sbomify_action._enrichment.sources.license_db.LicenseDBSource.fetch", return_value=None), + ): enrich_sbom(str(input_file), str(output_file)) with open(output_file) as f: @@ -1675,7 +1679,7 @@ def mock_get(url, *args, **kwargs): with open(output_file) as f: enriched_data = json.load(f) - # Verify the component got supplier from author_email + # Verify the component got publisher from author_email (extracted name: "Test Author") component = enriched_data["components"][0] assert component.get("publisher") == "Test Author", ( f"Expected publisher 'Test Author' from author_email, got: {component.get('publisher')}" @@ -1859,26 +1863,27 @@ def test_self_referencing_component_gets_supplier(self, tmp_path): f"Self-referencing component should inherit publisher from root. Got: {self_component.get('publisher')}" ) - def test_ecosystems_does_not_use_platform_as_supplier(self, tmp_path): - """Test that ecosyste.ms doesn't use platform name (pypi, npm) as supplier. + def test_ecosystems_uses_platform_as_supplier(self, tmp_path): + """Test that ecosyste.ms uses distribution platform as supplier. - Registry/platform names are not valid suppliers - they're distribution channels. + The distribution platform (PyPI, npm, etc.) is the supplier, not the + individual package author/maintainer. """ import requests from packageurl import PackageURL from sbomify_action._enrichment.sources.ecosystems import EcosystemsSource - # Create mock response with ecosystem but no maintainer name + # Create mock response with ecosystem and maintainer mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = [ { - "ecosystem": "pypi", # Should NOT be used as supplier + "ecosystem": "pypi", "description": "Test package", "normalized_licenses": ["MIT"], "maintainers": [ - {"login": "testuser", "name": None} # No name, only login + {"login": "testuser", "name": None} # Maintainer info preserved in maintainer_name ], } ] @@ -1890,7 +1895,10 @@ def test_ecosystems_does_not_use_platform_as_supplier(self, tmp_path): purl = PackageURL.from_string("pkg:pypi/test-package@1.0.0") metadata = source.fetch(purl, session) - # Supplier should be the maintainer login, NOT "pypi" + # Supplier should be the distribution platform assert metadata is not None - assert metadata.supplier != "pypi", "Should not use ecosystem name as supplier" - assert metadata.supplier == "testuser", f"Should use maintainer login as supplier. Got: {metadata.supplier}" + assert metadata.supplier == "Python Package Index (PyPI)", ( + f"Should use platform as supplier. Got: {metadata.supplier}" + ) + # Maintainer info is preserved separately + assert metadata.maintainer_name == "testuser" diff --git a/tests/test_schema_compliance.py b/tests/test_schema_compliance.py index 27263f1..db653a8 100644 --- a/tests/test_schema_compliance.py +++ b/tests/test_schema_compliance.py @@ -82,12 +82,21 @@ def test_cyclonedx_full_flow_compliance(version, tmp_path): "lifecycle_phase": "build", # CISA 2025 Generation Context } - # Mock the sbomify API provider + # Mock the sbomify API provider and disable json-config provider mock_api_response = Mock() mock_api_response.ok = True mock_api_response.json.return_value = augmentation_data - with patch("sbomify_action._augmentation.providers.sbomify_api.requests.get", return_value=mock_api_response): + with ( + patch( + "sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file", + return_value=None, + ), + patch( + "sbomify_action._augmentation.providers.sbomify_api.requests.get", + return_value=mock_api_response, + ), + ): augment_sbom_from_file( input_file=str(input_file), output_file=str(augmented_file), @@ -207,12 +216,21 @@ def test_spdx_full_flow_compliance(version, tmp_path): "lifecycle_phase": "build", # CISA 2025 Generation Context } - # Mock the sbomify API provider + # Mock the sbomify API provider and disable json-config provider mock_api_response = Mock() mock_api_response.ok = True mock_api_response.json.return_value = augmentation_data - with patch("sbomify_action._augmentation.providers.sbomify_api.requests.get", return_value=mock_api_response): + with ( + patch( + "sbomify_action._augmentation.providers.json_config.JsonConfigProvider._find_config_file", + return_value=None, + ), + patch( + "sbomify_action._augmentation.providers.sbomify_api.requests.get", + return_value=mock_api_response, + ), + ): augment_sbom_from_file( input_file=str(input_file), output_file=str(augmented_file),