diff --git a/.github/workflows/sbomify.yaml b/.github/workflows/sbomify.yaml index 14c8f32..52ca2ea 100644 --- a/.github/workflows/sbomify.yaml +++ b/.github/workflows/sbomify.yaml @@ -1133,6 +1133,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Determine version id: version uses: ./.github/actions/determine-version @@ -1153,6 +1159,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 @@ -1211,6 +1220,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Determine version id: version uses: ./.github/actions/determine-version @@ -1230,6 +1245,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 @@ -1268,6 +1286,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Generate additional packages from Dockerfile run: ./scripts/generate_additional_packages.sh > container_additional_packages.txt @@ -1292,6 +1316,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 @@ -1330,6 +1357,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Generate additional packages from Dockerfile run: ./scripts/generate_additional_packages.sh > container_additional_packages.txt @@ -1353,6 +1386,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 diff --git a/README.md b/README.md index fdc1844..ff8e868 100644 --- a/README.md +++ b/README.md @@ -204,6 +204,9 @@ All timestamps are in UTC (ISO 8601 format with Z suffix). | `ADDITIONAL_PACKAGES_FILE` | No | Custom path to additional packages file | | `ADDITIONAL_PACKAGES` | No | Inline PURLs to inject (comma or newline separated) | | `DISABLE_VCS_AUGMENTATION` | No | Set to `true` to disable auto-detection of VCS info from CI environment | +| `SBOMIFY_CACHE_DIR` | No | Directory for sbomify license database cache | +| `TRIVY_CACHE_DIR` | No | Directory for Trivy cache | +| `SYFT_CACHE_DIR` | No | Directory for Syft cache | † **One** of `LOCK_FILE`, `SBOM_FILE`, or `DOCKER_IMAGE` is required (pick one) ‡ Required when uploading to sbomify or using sbomify features (`AUGMENT`, `PRODUCT_RELEASE`) @@ -262,6 +265,39 @@ When uploading to Dependency Track (`UPLOAD_DESTINATIONS=dependency-track`), con ENRICH: true ``` +## Caching + +The sbomify action caches data internally to speed up runs: + +- **License databases** (~20-50MB) - Pre-computed metadata for Linux distro packages +- **Trivy cache** - SBOM generation metadata and package databases +- **Syft cache** - Package metadata for SBOM generation + +To persist caches across CI runs, configure your CI platform's caching mechanism. + +### GitHub Actions + +Use `actions/cache` before calling the sbomify action: + +```yaml +- name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + +- uses: sbomify/github-action@master + env: + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft + LOCK_FILE: requirements.txt + ENRICH: true + UPLOAD: false +``` + +For caching in other CI environments (GitLab, Bitbucket, Docker), see [Other CI/CD Platforms](#other-cicd-platforms). + ## Product Releases Tag your SBOMs with product releases for version tracking and release management in sbomify. @@ -381,13 +417,20 @@ Append packages across multiple steps: ```yaml generate-sbom: image: sbomifyhub/sbomify-action + cache: + key: sbomify-cache + paths: + - .sbomify-cache/ variables: + SBOMIFY_CACHE_DIR: "${CI_PROJECT_DIR}/.sbomify-cache/sbomify" + TRIVY_CACHE_DIR: "${CI_PROJECT_DIR}/.sbomify-cache/trivy" + SYFT_CACHE_DIR: "${CI_PROJECT_DIR}/.sbomify-cache/syft" LOCK_FILE: poetry.lock OUTPUT_FILE: sbom.cdx.json UPLOAD: "false" ENRICH: "true" script: - - /sbomify.sh + - sbomify-action ``` ### Bitbucket @@ -396,22 +439,41 @@ generate-sbom: pipelines: default: - step: + caches: + - sbomify script: - pipe: docker://sbomifyhub/sbomify-action:latest variables: + SBOMIFY_CACHE_DIR: "${BITBUCKET_CLONE_DIR}/.sbomify-cache/sbomify" + TRIVY_CACHE_DIR: "${BITBUCKET_CLONE_DIR}/.sbomify-cache/trivy" + SYFT_CACHE_DIR: "${BITBUCKET_CLONE_DIR}/.sbomify-cache/syft" LOCK_FILE: poetry.lock OUTPUT_FILE: sbom.cdx.json UPLOAD: "false" ENRICH: "true" + +definitions: + caches: + sbomify: .sbomify-cache ``` ### Docker ```bash -docker run --rm -v $(pwd):/code \ - -e LOCK_FILE=/code/requirements.txt \ - -e OUTPUT_FILE=/code/sbom.cdx.json \ +# Create persistent cache volume +docker volume create sbomify-cache + +docker run --rm \ + -v $(pwd):/github/workspace \ + -v sbomify-cache:/cache \ + -w /github/workspace \ + -e SBOMIFY_CACHE_DIR=/cache/sbomify \ + -e TRIVY_CACHE_DIR=/cache/trivy \ + -e SYFT_CACHE_DIR=/cache/syft \ + -e LOCK_FILE=/github/workspace/requirements.txt \ + -e OUTPUT_FILE=/github/workspace/sbom.cdx.json \ -e UPLOAD=false \ + -e ENRICH=true \ sbomifyhub/sbomify-action ``` diff --git a/sbomify_action/_enrichment/sources/license_db.py b/sbomify_action/_enrichment/sources/license_db.py index b4b6c6c..0225c0f 100644 --- a/sbomify_action/_enrichment/sources/license_db.py +++ b/sbomify_action/_enrichment/sources/license_db.py @@ -114,8 +114,19 @@ def clear_cache() -> None: def get_cache_dir() -> Path: - """Get the cache directory, creating it if needed.""" - cache_dir = DEFAULT_CACHE_DIR + """Get the cache directory, creating it if needed. + + Priority: + 1. SBOMIFY_CACHE_DIR environment variable (explicit cache location) + 2. XDG_CACHE_HOME/sbomify/license-db (XDG standard) + 3. ~/.cache/sbomify/license-db (fallback) + """ + explicit_cache = os.environ.get("SBOMIFY_CACHE_DIR") + if explicit_cache: + cache_dir = Path(explicit_cache) / "license-db" + else: + cache_dir = DEFAULT_CACHE_DIR + cache_dir.mkdir(parents=True, exist_ok=True) return cache_dir diff --git a/tests/test_license_db_cache.py b/tests/test_license_db_cache.py new file mode 100644 index 0000000..6490b86 --- /dev/null +++ b/tests/test_license_db_cache.py @@ -0,0 +1,88 @@ +"""Tests for license database cache directory configuration.""" + +import os +from pathlib import Path +from unittest.mock import patch + +from sbomify_action._enrichment.sources.license_db import get_cache_dir + + +class TestLicenseDBCacheDir: + """Test cache directory configuration.""" + + def test_default_cache_dir_when_no_env_vars(self, tmp_path: Path): + """Test default cache directory when no environment variables are set.""" + with patch.dict(os.environ, {}, clear=True): + # Also need to patch home directory to avoid side effects + with patch.object(Path, "home", return_value=tmp_path): + # DEFAULT_CACHE_DIR is computed at import time, so we need to test + # the function behavior + cache_dir = get_cache_dir() + # It should be under the home directory's .cache + assert "license-db" in str(cache_dir) + + def test_sbomify_cache_dir_takes_precedence(self, tmp_path: Path): + """Test that SBOMIFY_CACHE_DIR environment variable is used when set.""" + custom_cache = tmp_path / "custom-cache" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(custom_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir == custom_cache / "license-db" + # Verify directory was created + assert cache_dir.exists() + + def test_sbomify_cache_dir_creates_subdirectory(self, tmp_path: Path): + """Test that license-db subdirectory is created under SBOMIFY_CACHE_DIR.""" + custom_cache = tmp_path / "my-cache" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(custom_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir.name == "license-db" + assert cache_dir.parent == custom_cache + assert cache_dir.exists() + + def test_xdg_cache_home_respected_when_sbomify_cache_dir_not_set(self, tmp_path: Path): + """Test that XDG_CACHE_HOME is respected as fallback.""" + import importlib + + import sbomify_action._enrichment.sources.license_db as license_db_module + + xdg_cache = tmp_path / "xdg-cache" + with patch.dict(os.environ, {"XDG_CACHE_HOME": str(xdg_cache)}, clear=False): + # Clear SBOMIFY_CACHE_DIR to test fallback + env = os.environ.copy() + env.pop("SBOMIFY_CACHE_DIR", None) + with patch.dict(os.environ, env, clear=True): + # Reload the module to pick up new XDG_CACHE_HOME + importlib.reload(license_db_module) + try: + cache_dir = license_db_module.get_cache_dir() + assert "sbomify" in str(cache_dir) + assert "license-db" in str(cache_dir) + finally: + # Restore module to original state + importlib.reload(license_db_module) + + def test_cache_directory_is_created_if_not_exists(self, tmp_path: Path): + """Test that cache directory is created if it doesn't exist.""" + custom_cache = tmp_path / "new-cache-dir" + assert not custom_cache.exists() + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(custom_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir.exists() + assert cache_dir.is_dir() + + def test_sbomify_cache_dir_works_with_nested_path(self, tmp_path: Path): + """Test SBOMIFY_CACHE_DIR works with deeply nested paths.""" + nested_cache = tmp_path / "a" / "b" / "c" / "cache" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(nested_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir.exists() + assert cache_dir == nested_cache / "license-db" + + def test_empty_sbomify_cache_dir_falls_back_to_default(self, tmp_path: Path): + """Test that empty string SBOMIFY_CACHE_DIR falls back to default.""" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": ""}, clear=False): + cache_dir = get_cache_dir() + # Empty string is falsy, so should use DEFAULT_CACHE_DIR + assert "license-db" in str(cache_dir) + # Should NOT be empty string path + assert str(cache_dir) != "/license-db"