From bd5c1f2ca7a85e8c1ee7b2c0016b46d46a679709 Mon Sep 17 00:00:00 2001 From: Michael Ehab Mikhail Date: Fri, 25 Jul 2025 15:52:50 +0300 Subject: [PATCH 1/2] Add PyPa live importer #1953 * Add PyPa live pipeline importer to fetch advisories affecting a single PURL * Add tests for PyPa live importer Signed-off-by: Michael Ehab Mikhail --- vulnerabilities/importers/__init__.py | 7 + .../v2_importers/pypa_live_importer.py | 150 ++++++++++++++++++ .../test_pypa_v2_live_importer_pipeline.py | 134 ++++++++++++++++ 3 files changed, 291 insertions(+) create mode 100644 vulnerabilities/pipelines/v2_importers/pypa_live_importer.py create mode 100644 vulnerabilities/tests/pipelines/test_pypa_v2_live_importer_pipeline.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 706ca3c07..e8b19c20e 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -55,6 +55,7 @@ from vulnerabilities.pipelines.v2_importers import oss_fuzz as oss_fuzz_v2 from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2 from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2 +from vulnerabilities.pipelines.v2_importers import pypa_live_importer as pypa_live_importer_v2 from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2 from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2 from vulnerabilities.pipelines.v2_importers import xen_importer as xen_importer_v2 @@ -113,3 +114,9 @@ oss_fuzz.OSSFuzzImporter, ] ) + +LIVE_IMPORTERS_REGISTRY = create_registry( + [ + pypa_live_importer_v2.PyPaLiveImporterPipeline, + ] +) diff --git a/vulnerabilities/pipelines/v2_importers/pypa_live_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_live_importer.py new file mode 100644 index 000000000..4760df2a3 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/pypa_live_importer.py @@ -0,0 +1,150 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +from typing import Iterable + +import requests +import saneyaml +from packageurl import PackageURL +from univers.versions import PypiVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class PyPaLiveImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Pypa Live Importer Pipeline + + Collect advisories from PyPA GitHub repository for a single PURL. + """ + + pipeline_id = "pypa_live_importer_v2" + supported_types = ["pypi"] + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" + + @classmethod + def steps(cls): + return ( + cls.get_purl_inputs, + cls.fetch_package_advisories, + cls.collect_and_store_advisories, + ) + + def get_purl_inputs(self): + purl = self.inputs["purl"] + if not purl: + raise ValueError("PURL is required for PyPaLiveImporterPipeline") + + if isinstance(purl, str): + purl = PackageURL.from_string(purl) + + if not isinstance(purl, PackageURL): + raise ValueError(f"Object of type {type(purl)} {purl!r} is not a PackageURL instance") + + if purl.type not in self.supported_types: + raise ValueError( + f"PURL: {purl!s} is not among the supported package types {self.supported_types!r}" + ) + + if not purl.version: + raise ValueError(f"PURL: {purl!s} is expected to have a version") + + self.purl = purl + + def _is_version_affected(self, advisory_dict, version): + affected = advisory_dict.get("affected", []) + try: + v = PypiVersion(version) + except Exception: + return False + for entry in affected: + ranges = entry.get("ranges", []) + for r in ranges: + events = r.get("events", []) + introduced = None + fixed = None + for event in events: + if "introduced" in event: + introduced = event["introduced"] + if "fixed" in event: + fixed = event["fixed"] + try: + if introduced: + introduced_v = PypiVersion(introduced) + if v < introduced_v: + continue + if fixed: + fixed_v = PypiVersion(fixed) + if v >= fixed_v: + continue + if introduced: + introduced_v = PypiVersion(introduced) + if (not fixed or v < PypiVersion(fixed)) and v >= introduced_v: + return True + except Exception: + continue + return False + + def fetch_package_advisories(self): + if not self.purl.type in self.supported_types: + return + + search_path = f"vulns/{self.purl.name}" + + self.package_advisories = [] + + api_url = f"https://api.github.com/repos/pypa/advisory-database/contents/{search_path}" + response = requests.get(api_url) + + if response.status_code == 404: + self.log(f"No advisories found for package {self.purl.name}") + return + + if response.status_code != 200: + self.log(f"Failed to fetch advisories: {response.status_code} {response.text}") + return + + for item in response.json(): + if item["type"] == "file" and item["name"].endswith(".yaml"): + file_url = item["download_url"] + self.log("Fetching advisory file: " + item["name"]) + file_response = requests.get(file_url) + + if file_response.status_code == 200: + advisory_text = file_response.text + advisory_dict = saneyaml.load(advisory_text) + + if self.purl.version and not self._is_version_affected( + advisory_dict, self.purl.version + ): + continue + + self.package_advisories.append( + {"text": advisory_text, "dict": advisory_dict, "url": item["html_url"]} + ) + + def advisories_count(self): + return len(self.package_advisories) if hasattr(self, "package_advisories") else 0 + + def collect_advisories(self) -> Iterable[AdvisoryData]: + from vulnerabilities.importers.osv import parse_advisory_data_v2 + + if not hasattr(self, "package_advisories"): + return + + for advisory in self.package_advisories: + yield parse_advisory_data_v2( + raw_data=advisory["dict"], + supported_ecosystems=self.supported_types, + advisory_url=advisory["url"], + advisory_text=advisory["text"], + ) diff --git a/vulnerabilities/tests/pipelines/test_pypa_v2_live_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_v2_live_importer_pipeline.py new file mode 100644 index 000000000..8b07624de --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pypa_v2_live_importer_pipeline.py @@ -0,0 +1,134 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +import saneyaml +from packageurl import PackageURL + +from vulnerabilities.importer import AdvisoryData + + +@pytest.fixture +def mock_github_api_response(): + return { + "status_code": 200, + "json": [ + { + "type": "file", + "name": "CVE-2022-1234.yaml", + "download_url": "https://raw.githubusercontent.com/pypa/advisory-database/main/vulns/package1/CVE-2022-1234.yaml", + "html_url": "https://github.com/pypa/advisory-database/blob/main/vulns/package1/CVE-2022-1234.yaml", + }, + { + "type": "file", + "name": "CVE-2022-5678.yaml", + "download_url": "https://raw.githubusercontent.com/pypa/advisory-database/main/vulns/package1/CVE-2022-5678.yaml", + "html_url": "https://github.com/pypa/advisory-database/blob/main/vulns/package1/CVE-2022-5678.yaml", + }, + ], + } + + +@pytest.fixture +def mock_advisory_files(): + advisory1 = { + "id": "CVE-2022-1234", + "summary": "A vulnerability in package1", + "affected": [ + { + "package": {"name": "package1", "ecosystem": "PyPI"}, + "ranges": [ + {"type": "ECOSYSTEM", "events": [{"introduced": "1.0.0"}, {"fixed": "1.2.0"}]} + ], + } + ], + } + + advisory2 = { + "id": "CVE-2022-5678", + "summary": "Another vulnerability in package1", + "affected": [ + { + "package": {"name": "package1", "ecosystem": "PyPI"}, + "ranges": [ + {"type": "ECOSYSTEM", "events": [{"introduced": "1.5.0"}, {"fixed": "1.7.0"}]} + ], + } + ], + } + + return { + "https://raw.githubusercontent.com/pypa/advisory-database/main/vulns/package1/CVE-2022-1234.yaml": advisory1, + "https://raw.githubusercontent.com/pypa/advisory-database/main/vulns/package1/CVE-2022-5678.yaml": advisory2, + } + + +def test_package_with_version_affected(mock_github_api_response, mock_advisory_files): + from vulnerabilities.pipelines.v2_importers.pypa_live_importer import PyPaLiveImporterPipeline + + purl = PackageURL(type="pypi", name="package1", version="1.1.0") + + with patch("requests.get") as mock_get: + mock_api_response = MagicMock() + mock_api_response.status_code = mock_github_api_response["status_code"] + mock_api_response.json.return_value = mock_github_api_response["json"] + + def mock_get_side_effect(url, *args, **kwargs): + if "api.github.com" in url: + return mock_api_response + + mock_file_response = MagicMock() + mock_file_response.status_code = 200 + mock_file_response.text = saneyaml.dump(mock_advisory_files[url]) + return mock_file_response + + mock_get.side_effect = mock_get_side_effect + + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + + def side_effect(raw_data, supported_ecosystems, advisory_url, advisory_text): + return AdvisoryData( + advisory_id=raw_data["id"], + summary=raw_data["summary"], + references_v2=[{"url": advisory_url}], + affected_packages=[], + weaknesses=[], + url=advisory_url, + ) + + mock_parse.side_effect = side_effect + + pipeline = PyPaLiveImporterPipeline(selected_groups=["package_first"], purl=purl) + pipeline.get_purl_inputs() + pipeline.fetch_package_advisories() + advisories = list(pipeline.collect_advisories()) + + assert len(advisories) == 1 + assert advisories[0].advisory_id == "CVE-2022-1234" + + +def test_nonexistent_package(): + from vulnerabilities.pipelines.v2_importers.pypa_live_importer import PyPaLiveImporterPipeline + + purl = PackageURL(type="pypi", name="nonexistent_package", version="1.0.0") + + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.status_code = 404 + mock_get.return_value = mock_response + + pipeline = PyPaLiveImporterPipeline(selected_groups=["package_first"], purl=purl) + pipeline.get_purl_inputs() + pipeline.fetch_package_advisories() + advisories = list(pipeline.collect_advisories()) + + assert len(advisories) == 0 From d1a7cc12fa1d99ec78d9fc6b5949f6a283e6f1d6 Mon Sep 17 00:00:00 2001 From: Michael Ehab Mikhail Date: Wed, 30 Jul 2025 15:53:40 +0300 Subject: [PATCH 2/2] Add Live Evaluation API endpoint #1902 * Add a new API endpoint to run live evaluation importers * Add tests for the live evaluation API endpoint Signed-off-by: Michael Ehab Mikhail --- vulnerabilities/api_v2.py | 84 ++++++++++++++++++++++++++++ vulnerabilities/tests/test_api_v2.py | 65 +++++++++++++++++++++ vulnerablecode/urls.py | 2 + 3 files changed, 151 insertions(+) diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index c45dbfebe..066651ff7 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -8,6 +8,9 @@ # +from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import as_completed + from django.db.models import Prefetch from django_filters import rest_framework as filters from drf_spectacular.utils import OpenApiParameter @@ -25,6 +28,7 @@ from rest_framework.reverse import reverse from rest_framework.throttling import AnonRateThrottle +from vulnerabilities.importers import LIVE_IMPORTERS_REGISTRY from vulnerabilities.models import AdvisoryReference from vulnerabilities.models import AdvisorySeverity from vulnerabilities.models import AdvisoryV2 @@ -1225,3 +1229,83 @@ def lookup(self, request): return Response( AdvisoryPackageV2Serializer(qs, many=True, context={"request": request}).data ) + + +class LiveEvaluationSerializer(serializers.Serializer): + purl_string = serializers.CharField(help_text="PackageURL to evaluate") + no_threading = serializers.BooleanField(required=False, default=False) + + +class LiveEvaluationViewSet(viewsets.GenericViewSet): + serializer_class = LiveEvaluationSerializer + + @extend_schema( + request=LiveEvaluationSerializer, + responses={ + 202: {"description": "Live evaluation done successfully"}, + 400: {"description": "Invalid request"}, + 500: {"description": "Internal server error"}, + }, + ) + @action(detail=False, methods=["post"]) + def evaluate(self, request): + serializer = self.get_serializer(data=request.data) + if not serializer.is_valid(): + return Response( + serializer.errors, + status=status.HTTP_400_BAD_REQUEST, + ) + + purl_string = serializer.validated_data.get("purl_string") + no_threading = serializer.validated_data.get("no_threading", False) + + try: + purl = PackageURL.from_string(purl_string) if purl_string else None + if not purl: + return Response({"error": "Invalid PackageURL"}, status=status.HTTP_400_BAD_REQUEST) + except Exception as e: + return Response( + {"error": f"Invalid PackageURL: {str(e)}"}, status=status.HTTP_400_BAD_REQUEST + ) + + importers = [ + importer + for importer in LIVE_IMPORTERS_REGISTRY.values() + if hasattr(importer, "supported_types") + and purl.type in getattr(importer, "supported_types", []) + ] + + if not importers: + return Response( + {"error": f"No live importers found for purl type '{purl.type}'"}, + status=status.HTTP_400_BAD_REQUEST, + ) + + results = [] + + def run_importer(importer): + importer_name = getattr(importer, "pipeline_id", importer.__name__) + response_data = {"importer": importer_name, "purl": purl_string, "steps_completed": []} + try: + pipeline_instance = importer(purl=purl) + status_code, error = pipeline_instance.execute() + if status_code != 0: + response_data["error"] = f"Importer {importer_name} failed: {error}" + else: + response_data["steps_completed"].append("import") + except Exception as e: + response_data["error"] = f"Error running importer {importer_name}: {str(e)}" + return response_data + + if not no_threading and len(importers) > 1: + with ThreadPoolExecutor(max_workers=len(importers)) as executor: + future_to_importer = { + executor.submit(run_importer, importer): importer for importer in importers + } + for future in as_completed(future_to_importer): + results.append(future.result()) + else: + for importer in importers: + results.append(run_importer(importer)) + + return Response(results, status=status.HTTP_202_ACCEPTED) diff --git a/vulnerabilities/tests/test_api_v2.py b/vulnerabilities/tests/test_api_v2.py index 432c7c10f..eab8425ec 100644 --- a/vulnerabilities/tests/test_api_v2.py +++ b/vulnerabilities/tests/test_api_v2.py @@ -905,3 +905,68 @@ def test_get_all_vulnerable_purls(self): response = self.client.get(url) assert response.status_code == 200 assert "pkg:pypi/sample@1.0.0" in response.data + + +class LiveEvaluationAPITest(APITestCase): + def setUp(self): + self.client = APIClient(enforce_csrf_checks=True) + self.url = "/api/v2/live-evaluation/evaluate" + + @patch("vulnerabilities.api_v2.LIVE_IMPORTERS_REGISTRY") + def test_evaluate_success(self, mock_registry): + class MockImporter: + pipeline_id = "dummy" + supported_types = ["pypi"] + + def __init__(self, purl=None): + pass + + def execute(self): + return 0, None + + mock_registry.values.return_value = [MockImporter] + data = {"purl_string": "pkg:pypi/django@3.2"} + response = self.client.post(self.url, data, format="json") + assert response.status_code == 202 + assert isinstance(response.data, list) + assert response.data[0]["importer"] == "dummy" + assert response.data[0]["purl"] == "pkg:pypi/django@3.2" + assert "steps_completed" in response.data[0] + assert "import" in response.data[0]["steps_completed"] + + @patch("vulnerabilities.api_v2.LIVE_IMPORTERS_REGISTRY") + def test_evaluate_no_importer_found(self, mock_registry): + class MockImporter: + pipeline_id = "dummy" + supported_types = ["npm"] + + mock_registry.values.return_value = [MockImporter] + data = {"purl_string": "pkg:pypi/django@3.2"} + response = self.client.post(self.url, data, format="json") + assert response.status_code == 400 + assert "No live importers found" in response.data["error"] + + def test_evaluate_invalid_purl(self): + data = {"purl_string": "not_a_valid_purl"} + response = self.client.post(self.url, data, format="json") + assert response.status_code == 400 + assert "Invalid PackageURL" in response.data["error"] + + @patch("vulnerabilities.api_v2.LIVE_IMPORTERS_REGISTRY") + def test_evaluate_no_threading(self, mock_registry): + class MockImporter: + pipeline_id = "dummy" + supported_types = ["pypi"] + + def __init__(self, purl=None): + pass + + def execute(self): + return 0, None + + mock_registry.values.return_value = [MockImporter] + data = {"purl_string": "pkg:pypi/django@3.2", "no_threading": True} + response = self.client.post(self.url, data, format="json") + assert response.status_code == 202 + assert isinstance(response.data, list) + assert response.data[0]["importer"] == "dummy" diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 8d170678a..7140965fe 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -23,6 +23,7 @@ from vulnerabilities.api_v2 import AdvisoriesPackageV2ViewSet from vulnerabilities.api_v2 import CodeFixV2ViewSet from vulnerabilities.api_v2 import CodeFixViewSet +from vulnerabilities.api_v2 import LiveEvaluationViewSet from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet @@ -69,6 +70,7 @@ def __init__(self, *args, **kwargs): api_v2_router.register("codefixes", CodeFixViewSet, basename="codefix") api_v2_router.register("pipelines", PipelineScheduleV2ViewSet, basename="pipelines") api_v2_router.register("advisory-codefixes", CodeFixV2ViewSet, basename="advisory-codefix") +api_v2_router.register("live-evaluation", LiveEvaluationViewSet, basename="live-evaluation") urlpatterns = [