Skip to content

Commit c2e47ca

Browse files
committed
Add GitHub OSV Live V2 Importer Pipeline #1904
* Add GitHub OSV Live V2 Importer * Add tests for the GitHub OSV Live V2 Importer * Tested functionally using the Live Evaluation API in #1969 Signed-off-by: Michael Ehab Mikhail <michael.ehab@hotmail.com>
1 parent dcb0511 commit c2e47ca

File tree

3 files changed

+253
-2
lines changed

3 files changed

+253
-2
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
elixir_security_importer as elixir_security_importer_v2,
4949
)
5050
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
51+
from vulnerabilities.pipelines.v2_importers import (
52+
github_osv_live_importer as github_osv_live_importer_v2,
53+
)
5154
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
5255
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
5356
from vulnerabilities.pipelines.v2_importers import mozilla_importer as mozilla_importer_v2
@@ -64,7 +67,6 @@
6467

6568
IMPORTERS_REGISTRY = create_registry(
6669
[
67-
archlinux_importer_v2.ArchLinuxImporterPipeline,
6870
nvd_importer_v2.NVDImporterPipeline,
6971
elixir_security_importer_v2.ElixirSecurityImporterPipeline,
7072
npm_importer_v2.NpmImporterPipeline,
@@ -80,7 +82,6 @@
8082
postgresql_importer_v2.PostgreSQLImporterPipeline,
8183
mozilla_importer_v2.MozillaImporterPipeline,
8284
github_osv_importer_v2.GithubOSVImporterPipeline,
83-
redhat_importer_v2.RedHatImporterPipeline,
8485
nvd_importer.NVDImporterPipeline,
8586
github_importer.GitHubAPIImporterPipeline,
8687
gitlab_importer.GitLabImporterPipeline,
@@ -117,3 +118,9 @@
117118
oss_fuzz.OSSFuzzImporter,
118119
]
119120
)
121+
122+
LIVE_IMPORTERS_REGISTRY = create_registry(
123+
[
124+
github_osv_live_importer_v2.GithubOSVLiveImporterPipeline,
125+
]
126+
)
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import json
2+
from typing import Iterable
3+
4+
import requests
5+
from packageurl import PackageURL
6+
from univers.version_range import RANGE_CLASS_BY_SCHEMES
7+
8+
from vulnerabilities.importer import AdvisoryData
9+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
10+
11+
12+
class GithubOSVLiveImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
13+
"""
14+
GithubOSV Live Importer Pipeline
15+
16+
Collect advisories from GitHub Advisory Database for a single PURL.
17+
"""
18+
19+
pipeline_id = "github_osv_live_importer_v2"
20+
spdx_license_expression = "CC-BY-4.0"
21+
license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md"
22+
supported_types = ["pypi", "npm", "maven", "composer", "hex", "gem", "nuget", "cargo"]
23+
24+
@classmethod
25+
def steps(cls):
26+
return (
27+
cls.get_purl_inputs,
28+
cls.collect_and_store_advisories,
29+
)
30+
31+
def get_purl_inputs(self):
32+
purl = self.inputs["purl"]
33+
if not purl:
34+
raise ValueError("PURL is required for GithubOSVLiveImporterPipeline")
35+
36+
if isinstance(purl, str):
37+
purl = PackageURL.from_string(purl)
38+
39+
if not isinstance(purl, PackageURL):
40+
raise ValueError(f"Object of type {type(purl)} {purl!r} is not a PackageURL instance")
41+
42+
if purl.type not in self.supported_types:
43+
raise ValueError(
44+
f"PURL: {purl!s} is not among the supported package types {self.supported_types!r}"
45+
)
46+
47+
if not purl.version:
48+
raise ValueError(f"PURL: {purl!s} is expected to have a version")
49+
50+
self.purl = purl
51+
52+
def advisories_count(self):
53+
self.advisories = fetch_github_osv_advisories_for_purl(self.purl)
54+
return len(self.advisories)
55+
56+
def collect_advisories(self) -> Iterable[AdvisoryData]:
57+
from vulnerabilities.importers.osv import parse_advisory_data_v2
58+
59+
supported_ecosystems = [
60+
"pypi",
61+
"npm",
62+
"maven",
63+
# "golang",
64+
"composer",
65+
"hex",
66+
"gem",
67+
"nuget",
68+
"cargo",
69+
]
70+
71+
input_version = self.purl.version
72+
vrc = RANGE_CLASS_BY_SCHEMES[self.purl.type]
73+
version_obj = vrc.version_class(input_version)
74+
75+
for adv in self.advisories:
76+
adv_id = adv.get("id")
77+
advisory_url = build_github_repo_advisory_url(adv, adv_id)
78+
79+
advisory = parse_advisory_data_v2(
80+
raw_data=adv,
81+
supported_ecosystems=supported_ecosystems,
82+
advisory_url=advisory_url,
83+
advisory_text=json.dumps(adv, ensure_ascii=False),
84+
)
85+
86+
advisory.affected_packages = [
87+
ap
88+
for ap in advisory.affected_packages
89+
if ap.package
90+
and ap.package.type == self.purl.type
91+
and ap.package.name == self.purl.name
92+
and (ap.package.namespace or "") == (self.purl.namespace or "")
93+
]
94+
95+
if not advisory.affected_packages:
96+
continue
97+
98+
if any(
99+
ap.affected_version_range and version_obj in ap.affected_version_range
100+
for ap in advisory.affected_packages
101+
):
102+
yield advisory
103+
104+
105+
ECOSYSTEM_BY_PURL_TYPE = {
106+
"pypi": "PyPI",
107+
"npm": "npm",
108+
"maven": "Maven",
109+
"composer": "Packagist",
110+
"hex": "Hex",
111+
"gem": "RubyGems",
112+
"nuget": "NuGet",
113+
"cargo": "crates.io",
114+
}
115+
116+
# Map purl.type to directory names used in the advisory-database repository
117+
REPO_DIR_BY_PURL_TYPE = {
118+
"pypi": "pypi",
119+
"npm": "npm",
120+
"maven": "maven",
121+
"composer": "composer",
122+
"hex": "hex",
123+
"gem": "rubygems",
124+
"nuget": "nuget",
125+
"cargo": "crates.io",
126+
}
127+
128+
129+
def build_github_repo_advisory_url(adv: dict, adv_id: str | None) -> str:
130+
"""
131+
Return the advisory JSON URL in the GitHub advisory-database repo, using the GHSA path:
132+
advisories/github-reviewed/YYYY/MM/GHSA-ID/GHSA-ID.json
133+
"""
134+
base = "https://github.com/github/advisory-database/blob/main/advisories/github-reviewed"
135+
if not adv_id:
136+
return f"{base}/"
137+
138+
date_str = adv.get("published") or adv.get("modified")
139+
140+
if date_str:
141+
from datetime import datetime
142+
143+
try:
144+
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
145+
year = dt.strftime("%Y")
146+
month = dt.strftime("%m")
147+
return f"{base}/{year}/{month}/{adv_id}/{adv_id}.json"
148+
except Exception:
149+
pass
150+
151+
# Fallback to the base directory if no parseable date is present
152+
return f"{base}/"
153+
154+
155+
def _osv_package_name(purl: PackageURL) -> str:
156+
# Maven uses groupId:artifactId, most others use namespace/name when namespace exists
157+
if purl.type == "maven" and purl.namespace:
158+
return f"{purl.namespace}:{purl.name}"
159+
if purl.namespace:
160+
return f"{purl.namespace}/{purl.name}"
161+
return purl.name
162+
163+
164+
def fetch_github_osv_advisories_for_purl(purl: PackageURL):
165+
"""
166+
Return a list of OSV advisory dicts from the OSV API for a given PURL,
167+
filtered to only GitHub advisories (GHSA-*).
168+
"""
169+
ecosystem = ECOSYSTEM_BY_PURL_TYPE.get(purl.type)
170+
if not ecosystem:
171+
return []
172+
173+
pkg = {"ecosystem": ecosystem, "name": _osv_package_name(purl)}
174+
# Query by package to get all advisories for that package; we filter GHSA below.
175+
body = {"package": pkg}
176+
try:
177+
resp = requests.post("https://api.osv.dev/v1/query", json=body, timeout=30)
178+
if resp.status_code != 200:
179+
return []
180+
data = resp.json() or {}
181+
vulns = data.get("vulns") or []
182+
# Keep only GHSA advisories which correspond to GitHub Advisory Database
183+
return [v for v in vulns if isinstance(v.get("id"), str) and v["id"].startswith("GHSA-")]
184+
except Exception:
185+
return []
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import json
2+
from unittest import mock
3+
4+
from packageurl import PackageURL
5+
6+
from vulnerabilities.importer import AdvisoryData
7+
from vulnerabilities.pipelines.v2_importers.github_osv_live_importer import (
8+
GithubOSVLiveImporterPipeline,
9+
)
10+
11+
SAMPLE_OSV = {
12+
"id": "GHSA-xxxx-yyyy-zzzz",
13+
"summary": "Sample summary",
14+
"details": "Sample details",
15+
"aliases": ["CVE-2021-99999"],
16+
"affected": [
17+
{
18+
"package": {"name": "sample", "ecosystem": "PyPI"},
19+
"ranges": [
20+
{"type": "ECOSYSTEM", "events": [{"introduced": "1.0.0"}, {"fixed": "1.2.0"}]}
21+
],
22+
"versions": ["1.0.0", "1.1.0"],
23+
}
24+
],
25+
"database_specific": {"cwe_ids": ["CWE-79"]},
26+
}
27+
28+
29+
@mock.patch(
30+
"vulnerabilities.pipelines.v2_importers.github_osv_live_importer.fetch_github_osv_advisories_for_purl"
31+
)
32+
def test_github_osv_live_importer_found_with_version(mock_fetch):
33+
mock_fetch.return_value = [json.loads(json.dumps(SAMPLE_OSV))]
34+
purl = PackageURL(type="pypi", name="sample", version="1.1.0")
35+
pipeline = GithubOSVLiveImporterPipeline(purl=purl)
36+
pipeline.get_purl_inputs()
37+
pipeline.advisories_count()
38+
advisories = list(pipeline.collect_advisories())
39+
assert len(advisories) == 1
40+
adv = advisories[0]
41+
assert isinstance(adv, AdvisoryData)
42+
assert adv.advisory_id == "GHSA-xxxx-yyyy-zzzz"
43+
assert "CVE-2021-99999" in adv.aliases
44+
assert adv.summary.startswith("Sample")
45+
assert adv.affected_packages
46+
assert adv.affected_packages[0].package.type == "pypi"
47+
48+
49+
@mock.patch(
50+
"vulnerabilities.pipelines.v2_importers.github_osv_live_importer.fetch_github_osv_advisories_for_purl"
51+
)
52+
def test_github_osv_live_importer_none_found_with_version(mock_fetch):
53+
mock_fetch.return_value = [json.loads(json.dumps(SAMPLE_OSV))]
54+
purl = PackageURL(type="pypi", name="sample", version="1.2.0")
55+
pipeline = GithubOSVLiveImporterPipeline(purl=purl)
56+
pipeline.get_purl_inputs()
57+
pipeline.advisories_count()
58+
advisories = list(pipeline.collect_advisories())
59+
assert advisories == []

0 commit comments

Comments
 (0)