From 370e031860649fd91625a0125c33836d35e3f90e Mon Sep 17 00:00:00 2001 From: Viktor Petersson Date: Thu, 29 Jan 2026 16:13:51 +0000 Subject: [PATCH] Add caching support for license DBs, Trivy, and Syft Enable persistent caching across CI runs by: - Adding SBOMIFY_CACHE_DIR environment variable support to license_db.py - Adding cache steps to all SBOM generation jobs in the workflow - Documenting caching configuration for GitHub Actions, GitLab CI, Bitbucket Pipelines, and direct Docker usage This speeds up subsequent runs by reusing cached license databases, Trivy vulnerability databases, and Syft metadata. Co-Authored-By: Claude Opus 4.5 --- .github/workflows/sbomify.yaml | 36 ++++++++ README.md | 70 ++++++++++++++- .../_enrichment/sources/license_db.py | 15 +++- tests/test_license_db_cache.py | 88 +++++++++++++++++++ 4 files changed, 203 insertions(+), 6 deletions(-) create mode 100644 tests/test_license_db_cache.py diff --git a/.github/workflows/sbomify.yaml b/.github/workflows/sbomify.yaml index 14c8f32..52ca2ea 100644 --- a/.github/workflows/sbomify.yaml +++ b/.github/workflows/sbomify.yaml @@ -1133,6 +1133,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Determine version id: version uses: ./.github/actions/determine-version @@ -1153,6 +1159,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 @@ -1211,6 +1220,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Determine version id: version uses: ./.github/actions/determine-version @@ -1230,6 +1245,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 @@ -1268,6 +1286,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Generate additional packages from Dockerfile run: ./scripts/generate_additional_packages.sh > container_additional_packages.txt @@ -1292,6 +1316,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 @@ -1330,6 +1357,12 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + - name: Generate additional packages from Dockerfile run: ./scripts/generate_additional_packages.sh > container_additional_packages.txt @@ -1353,6 +1386,9 @@ jobs: ENRICH: true UPLOAD: true OUTPUT_FILE: ${{ matrix.output_file }} + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft - name: Attest SBOM uses: actions/attest-build-provenance@v1 diff --git a/README.md b/README.md index fdc1844..ff8e868 100644 --- a/README.md +++ b/README.md @@ -204,6 +204,9 @@ All timestamps are in UTC (ISO 8601 format with Z suffix). | `ADDITIONAL_PACKAGES_FILE` | No | Custom path to additional packages file | | `ADDITIONAL_PACKAGES` | No | Inline PURLs to inject (comma or newline separated) | | `DISABLE_VCS_AUGMENTATION` | No | Set to `true` to disable auto-detection of VCS info from CI environment | +| `SBOMIFY_CACHE_DIR` | No | Directory for sbomify license database cache | +| `TRIVY_CACHE_DIR` | No | Directory for Trivy cache | +| `SYFT_CACHE_DIR` | No | Directory for Syft cache | † **One** of `LOCK_FILE`, `SBOM_FILE`, or `DOCKER_IMAGE` is required (pick one) ‡ Required when uploading to sbomify or using sbomify features (`AUGMENT`, `PRODUCT_RELEASE`) @@ -262,6 +265,39 @@ When uploading to Dependency Track (`UPLOAD_DESTINATIONS=dependency-track`), con ENRICH: true ``` +## Caching + +The sbomify action caches data internally to speed up runs: + +- **License databases** (~20-50MB) - Pre-computed metadata for Linux distro packages +- **Trivy cache** - SBOM generation metadata and package databases +- **Syft cache** - Package metadata for SBOM generation + +To persist caches across CI runs, configure your CI platform's caching mechanism. + +### GitHub Actions + +Use `actions/cache` before calling the sbomify action: + +```yaml +- name: Cache sbomify data + uses: actions/cache@v4 + with: + path: .sbomify-cache + key: sbomify-${{ runner.os }} + +- uses: sbomify/github-action@master + env: + SBOMIFY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache + TRIVY_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/trivy + SYFT_CACHE_DIR: ${{ github.workspace }}/.sbomify-cache/syft + LOCK_FILE: requirements.txt + ENRICH: true + UPLOAD: false +``` + +For caching in other CI environments (GitLab, Bitbucket, Docker), see [Other CI/CD Platforms](#other-cicd-platforms). + ## Product Releases Tag your SBOMs with product releases for version tracking and release management in sbomify. @@ -381,13 +417,20 @@ Append packages across multiple steps: ```yaml generate-sbom: image: sbomifyhub/sbomify-action + cache: + key: sbomify-cache + paths: + - .sbomify-cache/ variables: + SBOMIFY_CACHE_DIR: "${CI_PROJECT_DIR}/.sbomify-cache/sbomify" + TRIVY_CACHE_DIR: "${CI_PROJECT_DIR}/.sbomify-cache/trivy" + SYFT_CACHE_DIR: "${CI_PROJECT_DIR}/.sbomify-cache/syft" LOCK_FILE: poetry.lock OUTPUT_FILE: sbom.cdx.json UPLOAD: "false" ENRICH: "true" script: - - /sbomify.sh + - sbomify-action ``` ### Bitbucket @@ -396,22 +439,41 @@ generate-sbom: pipelines: default: - step: + caches: + - sbomify script: - pipe: docker://sbomifyhub/sbomify-action:latest variables: + SBOMIFY_CACHE_DIR: "${BITBUCKET_CLONE_DIR}/.sbomify-cache/sbomify" + TRIVY_CACHE_DIR: "${BITBUCKET_CLONE_DIR}/.sbomify-cache/trivy" + SYFT_CACHE_DIR: "${BITBUCKET_CLONE_DIR}/.sbomify-cache/syft" LOCK_FILE: poetry.lock OUTPUT_FILE: sbom.cdx.json UPLOAD: "false" ENRICH: "true" + +definitions: + caches: + sbomify: .sbomify-cache ``` ### Docker ```bash -docker run --rm -v $(pwd):/code \ - -e LOCK_FILE=/code/requirements.txt \ - -e OUTPUT_FILE=/code/sbom.cdx.json \ +# Create persistent cache volume +docker volume create sbomify-cache + +docker run --rm \ + -v $(pwd):/github/workspace \ + -v sbomify-cache:/cache \ + -w /github/workspace \ + -e SBOMIFY_CACHE_DIR=/cache/sbomify \ + -e TRIVY_CACHE_DIR=/cache/trivy \ + -e SYFT_CACHE_DIR=/cache/syft \ + -e LOCK_FILE=/github/workspace/requirements.txt \ + -e OUTPUT_FILE=/github/workspace/sbom.cdx.json \ -e UPLOAD=false \ + -e ENRICH=true \ sbomifyhub/sbomify-action ``` diff --git a/sbomify_action/_enrichment/sources/license_db.py b/sbomify_action/_enrichment/sources/license_db.py index b4b6c6c..0225c0f 100644 --- a/sbomify_action/_enrichment/sources/license_db.py +++ b/sbomify_action/_enrichment/sources/license_db.py @@ -114,8 +114,19 @@ def clear_cache() -> None: def get_cache_dir() -> Path: - """Get the cache directory, creating it if needed.""" - cache_dir = DEFAULT_CACHE_DIR + """Get the cache directory, creating it if needed. + + Priority: + 1. SBOMIFY_CACHE_DIR environment variable (explicit cache location) + 2. XDG_CACHE_HOME/sbomify/license-db (XDG standard) + 3. ~/.cache/sbomify/license-db (fallback) + """ + explicit_cache = os.environ.get("SBOMIFY_CACHE_DIR") + if explicit_cache: + cache_dir = Path(explicit_cache) / "license-db" + else: + cache_dir = DEFAULT_CACHE_DIR + cache_dir.mkdir(parents=True, exist_ok=True) return cache_dir diff --git a/tests/test_license_db_cache.py b/tests/test_license_db_cache.py new file mode 100644 index 0000000..6490b86 --- /dev/null +++ b/tests/test_license_db_cache.py @@ -0,0 +1,88 @@ +"""Tests for license database cache directory configuration.""" + +import os +from pathlib import Path +from unittest.mock import patch + +from sbomify_action._enrichment.sources.license_db import get_cache_dir + + +class TestLicenseDBCacheDir: + """Test cache directory configuration.""" + + def test_default_cache_dir_when_no_env_vars(self, tmp_path: Path): + """Test default cache directory when no environment variables are set.""" + with patch.dict(os.environ, {}, clear=True): + # Also need to patch home directory to avoid side effects + with patch.object(Path, "home", return_value=tmp_path): + # DEFAULT_CACHE_DIR is computed at import time, so we need to test + # the function behavior + cache_dir = get_cache_dir() + # It should be under the home directory's .cache + assert "license-db" in str(cache_dir) + + def test_sbomify_cache_dir_takes_precedence(self, tmp_path: Path): + """Test that SBOMIFY_CACHE_DIR environment variable is used when set.""" + custom_cache = tmp_path / "custom-cache" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(custom_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir == custom_cache / "license-db" + # Verify directory was created + assert cache_dir.exists() + + def test_sbomify_cache_dir_creates_subdirectory(self, tmp_path: Path): + """Test that license-db subdirectory is created under SBOMIFY_CACHE_DIR.""" + custom_cache = tmp_path / "my-cache" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(custom_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir.name == "license-db" + assert cache_dir.parent == custom_cache + assert cache_dir.exists() + + def test_xdg_cache_home_respected_when_sbomify_cache_dir_not_set(self, tmp_path: Path): + """Test that XDG_CACHE_HOME is respected as fallback.""" + import importlib + + import sbomify_action._enrichment.sources.license_db as license_db_module + + xdg_cache = tmp_path / "xdg-cache" + with patch.dict(os.environ, {"XDG_CACHE_HOME": str(xdg_cache)}, clear=False): + # Clear SBOMIFY_CACHE_DIR to test fallback + env = os.environ.copy() + env.pop("SBOMIFY_CACHE_DIR", None) + with patch.dict(os.environ, env, clear=True): + # Reload the module to pick up new XDG_CACHE_HOME + importlib.reload(license_db_module) + try: + cache_dir = license_db_module.get_cache_dir() + assert "sbomify" in str(cache_dir) + assert "license-db" in str(cache_dir) + finally: + # Restore module to original state + importlib.reload(license_db_module) + + def test_cache_directory_is_created_if_not_exists(self, tmp_path: Path): + """Test that cache directory is created if it doesn't exist.""" + custom_cache = tmp_path / "new-cache-dir" + assert not custom_cache.exists() + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(custom_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir.exists() + assert cache_dir.is_dir() + + def test_sbomify_cache_dir_works_with_nested_path(self, tmp_path: Path): + """Test SBOMIFY_CACHE_DIR works with deeply nested paths.""" + nested_cache = tmp_path / "a" / "b" / "c" / "cache" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": str(nested_cache)}, clear=False): + cache_dir = get_cache_dir() + assert cache_dir.exists() + assert cache_dir == nested_cache / "license-db" + + def test_empty_sbomify_cache_dir_falls_back_to_default(self, tmp_path: Path): + """Test that empty string SBOMIFY_CACHE_DIR falls back to default.""" + with patch.dict(os.environ, {"SBOMIFY_CACHE_DIR": ""}, clear=False): + cache_dir = get_cache_dir() + # Empty string is falsy, so should use DEFAULT_CACHE_DIR + assert "license-db" in str(cache_dir) + # Should NOT be empty string path + assert str(cache_dir) != "/license-db"