Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies = [
"packageurl-python>=0.17.6",
"conan>=2.0,<3",
"questionary>=2.0.1,<3",
"pipdeptree>=2.0.0",
]

[project.urls]
Expand Down
50 changes: 50 additions & 0 deletions sbomify_action/_dependency_expansion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Transitive dependency expansion for SBOMs.

This module provides functionality to discover transitive dependencies
that are missing from lockfiles (particularly requirements.txt) and
add them to SBOMs. It uses ecosystem-specific tools like pipdeptree
to inspect installed packages.

Supported lockfiles:
- Python: requirements.txt (using pipdeptree)

Example usage:
from sbomify_action._dependency_expansion import expand_sbom_dependencies

result = expand_sbom_dependencies(
sbom_file="sbom.json",
lock_file="requirements.txt",
)
print(f"Added {result.added_count} transitive dependencies")

Note:
For pipdeptree to work, packages from requirements.txt must be
installed in the current Python environment. If packages are not
installed, expansion is skipped gracefully.
"""

from .enricher import (
DependencyEnricher,
create_default_registry,
expand_sbom_dependencies,
supports_dependency_expansion,
)
from .models import DiscoveredDependency, ExpansionResult, normalize_python_package_name
from .protocol import DependencyExpander
from .registry import ExpanderRegistry

__all__ = [
# Main API
"expand_sbom_dependencies",
"supports_dependency_expansion",
# Classes for advanced usage
"DependencyEnricher",
"ExpanderRegistry",
"DependencyExpander",
# Models
"DiscoveredDependency",
"ExpansionResult",
"normalize_python_package_name",
# Factory
"create_default_registry",
]
313 changes: 313 additions & 0 deletions sbomify_action/_dependency_expansion/enricher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
"""Dependency expansion orchestration for SBOMs."""

import json
from pathlib import Path
from typing import Any

from cyclonedx.model.bom import Bom
from cyclonedx.model.component import Component, ComponentType
from packageurl import PackageURL

from ..console import get_audit_trail
from ..logging_config import logger
from ..serialization import serialize_cyclonedx_bom
from .expanders.pipdeptree import PipdeptreeExpander
from .models import DiscoveredDependency, ExpansionResult, normalize_python_package_name
from .registry import ExpanderRegistry


def create_default_registry() -> ExpanderRegistry:
"""Create registry with default expanders."""
registry = ExpanderRegistry()
registry.register(PipdeptreeExpander())
return registry


class DependencyEnricher:
"""Orchestrates dependency expansion for SBOMs.

Uses registered expanders to discover transitive dependencies
and adds them to the SBOM. Records all additions to the audit trail.
"""

def __init__(self, registry: ExpanderRegistry | None = None) -> None:
self._registry = registry or create_default_registry()

def expand_sbom(
self,
sbom_file: str,
lock_file: str,
) -> ExpansionResult:
"""Expand SBOM with discovered transitive dependencies.

Args:
sbom_file: Path to SBOM file (modified in place)
lock_file: Path to lockfile that was used for generation

Returns:
ExpansionResult with statistics
"""
sbom_path = Path(sbom_file)
lock_path = Path(lock_file)

# Find applicable expander
expander = self._registry.get_expander_for(lock_path)
if not expander:
logger.debug(f"No expander supports {lock_path.name}")
return ExpansionResult(
original_count=0,
discovered_count=0,
added_count=0,
dependencies=[],
source="none",
)

# Check if expansion is possible
if not expander.can_expand():
logger.info(
f"Dependency expansion skipped: {expander.name} cannot run "
"(packages may not be installed in environment)"
)
return ExpansionResult(
original_count=0,
discovered_count=0,
added_count=0,
dependencies=[],
source=expander.name,
)

# Discover transitive dependencies
logger.info(f"Discovering transitive dependencies with {expander.name}...")
discovered = expander.expand(lock_path)

if not discovered:
logger.info("No transitive dependencies discovered")
return ExpansionResult(
original_count=0,
discovered_count=0,
added_count=0,
dependencies=[],
source=expander.name,
)

# Load SBOM and detect format
with sbom_path.open("r") as f:
sbom_data = json.load(f)

if sbom_data.get("bomFormat") == "CycloneDX":
result = self._enrich_cyclonedx(sbom_path, sbom_data, discovered, expander.name)
elif sbom_data.get("spdxVersion"):
result = self._enrich_spdx(sbom_path, sbom_data, discovered, expander.name)
else:
logger.warning("Unknown SBOM format, skipping dependency expansion")
result = ExpansionResult(
original_count=0,
discovered_count=len(discovered),
added_count=0,
dependencies=discovered,
source=expander.name,
)

return result

def _enrich_cyclonedx(
self,
sbom_path: Path,
sbom_data: dict[str, Any],
discovered: list[DiscoveredDependency],
source: str,
) -> ExpansionResult:
"""Add discovered dependencies to CycloneDX SBOM."""
bom = Bom.from_json(sbom_data)
original_count = len(bom.components) if bom.components else 0

# Build set of existing PURLs for deduplication
existing_purls: set[str] = set()
if bom.components:
for comp in bom.components:
if comp.purl:
existing_purls.add(str(comp.purl).lower())

# Add new components
added_count = 0
audit = get_audit_trail()

for dep in discovered:
# Skip if already exists
if dep.purl.lower() in existing_purls:
continue

# Create new component
try:
purl_obj = PackageURL.from_string(dep.purl)
except Exception as e:
logger.warning(f"Invalid PURL {dep.purl}: {e}")
continue

component = Component(
type=ComponentType.LIBRARY,
name=dep.name,
version=dep.version,
purl=purl_obj,
)

bom.components.add(component)
added_count += 1

# Record to audit trail
parent_info = f"discovered via {dep.parent}" if dep.parent else "discovered as transitive"
audit.record_enrichment(
component=dep.purl,
field="transitive_dependency",
value=parent_info,
source=source,
)

# Serialize back
spec_version = sbom_data.get("specVersion", "1.6")
serialized = serialize_cyclonedx_bom(bom, spec_version)
with sbom_path.open("w") as f:
f.write(serialized)

logger.info(
f"Dependency expansion: added {added_count} transitive dependencies "
f"(discovered {len(discovered)}, {original_count} original)"
)

return ExpansionResult(
original_count=original_count,
discovered_count=len(discovered),
added_count=added_count,
dependencies=discovered,
source=source,
)

def _enrich_spdx(
self,
sbom_path: Path,
sbom_data: dict[str, Any],
discovered: list[DiscoveredDependency],
source: str,
) -> ExpansionResult:
"""Add discovered dependencies to SPDX SBOM."""
packages = sbom_data.get("packages", [])
original_count = len(packages)

# Build set of existing package identifiers (normalized for comparison)
existing: set[str] = set()
existing_spdx_ids: set[str] = set()
for pkg in packages:
name = pkg.get("name", "")
version = pkg.get("versionInfo", "")
normalized = normalize_python_package_name(name)
existing.add(f"{normalized}@{version}")
if pkg.get("SPDXID"):
existing_spdx_ids.add(pkg["SPDXID"])

# Add new packages
added_count = 0
audit = get_audit_trail()

for dep in discovered:
normalized_name = normalize_python_package_name(dep.name)
key = f"{normalized_name}@{dep.version}"
if key in existing:
continue

# Create new SPDX package
# Generate a unique SPDXID. SPDX IDs must contain only letters, numbers,
# dots, and hyphens, and cannot start with a number. We normalize
# separators to hyphens; collision handling below ensures uniqueness
# within this document if two packages still normalize to the same ID.
safe_name = dep.name.replace("_", "-").replace(".", "-")
safe_version = dep.version.replace("_", "-")
base_spdx_id = f"SPDXRef-Package-{safe_name}-{safe_version}"

# Handle potential collisions by adding a suffix
spdx_id = base_spdx_id
suffix = 1
while spdx_id in existing_spdx_ids:
spdx_id = f"{base_spdx_id}-{suffix}"
suffix += 1
existing_spdx_ids.add(spdx_id)

new_package = {
"SPDXID": spdx_id,
"name": dep.name,
"versionInfo": dep.version,
"downloadLocation": "NOASSERTION",
"filesAnalyzed": False,
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": dep.purl,
}
],
}

packages.append(new_package)
added_count += 1

# Record to audit trail
parent_info = f"discovered via {dep.parent}" if dep.parent else "discovered as transitive"
audit.record_enrichment(
component=dep.purl,
field="transitive_dependency",
value=parent_info,
source=source,
)

sbom_data["packages"] = packages

# Write back
with sbom_path.open("w") as f:
json.dump(sbom_data, f, indent=2)

logger.info(
f"Dependency expansion: added {added_count} transitive dependencies "
f"(discovered {len(discovered)}, {original_count} original)"
)

return ExpansionResult(
original_count=original_count,
discovered_count=len(discovered),
added_count=added_count,
dependencies=discovered,
source=source,
)


def expand_sbom_dependencies(
sbom_file: str,
lock_file: str,
) -> ExpansionResult:
"""Expand SBOM with transitive dependencies from installed packages.

This is the main public API for dependency expansion.

Args:
sbom_file: Path to SBOM file (modified in place)
lock_file: Path to lockfile used for generation

Returns:
ExpansionResult with statistics
"""
enricher = DependencyEnricher()
return enricher.expand_sbom(sbom_file, lock_file)


def supports_dependency_expansion(lock_file: str) -> bool:
"""Check if dependency expansion is supported for this lockfile.

Uses the registry to check if any expander supports the lockfile type.

Args:
lock_file: Path to lockfile

Returns:
True if an expander supports this lockfile type
"""
registry = create_default_registry()
expander = registry.get_expander_for(Path(lock_file))
return expander is not None
5 changes: 5 additions & 0 deletions sbomify_action/_dependency_expansion/expanders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Dependency expander implementations."""

from .pipdeptree import PipdeptreeExpander

__all__ = ["PipdeptreeExpander"]
Loading