Skip to content

Commit 8fe34e5

Browse files
authored
fix: improve build tool detection (#1169)
This PR refactores the build tool detection logic by reusing the repo verifier’s path filtering approach. This change ensures that paths that are likely used for testing are excluded during the build tool detection process. Additionally, this PR adds an integration test for a project that uses Maven, with Gradle used as an example to demonstrate the filtering mechanism. Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
1 parent 6aa7a4c commit 8fe34e5

File tree

29 files changed

+480
-125
lines changed

29 files changed

+480
-125
lines changed

src/macaron/config/defaults.ini

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,17 @@ hostname = gitlab.com
9292
# [git_service.local_repo]
9393
# hostname = example.org
9494

95+
[builder]
96+
# Skip detecting build tool configuration files in paths containing the following keywords.
97+
build_tool_path_filters =
98+
test
99+
example
100+
sample
101+
doc
102+
demo
103+
spec
104+
mock
105+
95106
# This is the spec for trusted Maven build tools.
96107
[builder.maven]
97108
entry_conf = settings.xml

src/macaron/repo_verifier/repo_verifier.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def verify_repo(
8282
version=version,
8383
reported_repo_url=reported_repo_url,
8484
reported_repo_fs=reported_repo_fs,
85+
build_tool=build_tool,
8586
provenance_repo_url=provenance_repo_url,
8687
)
8788

src/macaron/repo_verifier/repo_verifier_base.py

Lines changed: 4 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -4,66 +4,14 @@
44
"""This module contains the base class and core data models for repository verification."""
55
import abc
66
import logging
7-
import os
8-
from collections import deque
97
from dataclasses import dataclass
108
from enum import Enum
11-
from pathlib import Path
129

1310
from macaron.slsa_analyzer.build_tool import BaseBuildTool
1411

1512
logger = logging.getLogger(__name__)
1613

1714

18-
def find_file_in_repo(root_dir: Path, filename: str) -> Path | None:
19-
"""Find the highest level file with a given name in a local repository.
20-
21-
This function ignores certain paths that are not under the main source code directories.
22-
23-
Parameters
24-
----------
25-
root_dir : Path
26-
The root directory of the repository.
27-
filename : str
28-
The name of the file to search for.
29-
30-
Returns
31-
-------
32-
Path | None
33-
The path to the file if it exists, otherwise
34-
"""
35-
# TODO: Consider using BaseBuildTool.get_build_dirs.
36-
# + Refactor 'get_build_dirs' to skip certain directories
37-
# that are most likely not part of the main codebase (e.g., sample).
38-
# + Need to find a way to look for other
39-
# files (e.g., gradle.properties) for the purpose of repo verification
40-
# without breaking the current logic of finding build directories.
41-
# + Add the capability to return the content/path of the file.
42-
if not os.path.isdir(root_dir):
43-
return None
44-
45-
queue: deque[Path] = deque()
46-
queue.append(Path(root_dir))
47-
while queue:
48-
current_dir = queue.popleft()
49-
50-
# Don't look through non-main directories.
51-
if any(
52-
keyword in current_dir.name.lower()
53-
for keyword in ["test", "example", "sample", "doc", "demo", "spec", "mock"]
54-
):
55-
continue
56-
57-
if Path(current_dir, filename).exists():
58-
return Path(current_dir, filename)
59-
60-
# Ignore symlinks to prevent potential infinite loop.
61-
sub_dirs = [Path(it) for it in current_dir.iterdir() if it.is_dir() and not it.is_symlink()]
62-
queue.extend(sub_dirs)
63-
64-
return None
65-
66-
6715
class RepositoryVerificationStatus(str, Enum):
6816
"""A class to store the status of the repo verification."""
6917

@@ -167,18 +115,14 @@ class RepoVerifierToolSpecific(RepoVerifierFromProvenance, abc.ABC):
167115
From-provenance verification is inherited from the parent class.
168116
"""
169117

170-
@property
171-
@abc.abstractmethod
172-
def specific_tool(self) -> BaseBuildTool:
173-
"""Define the build tool used to build the package."""
174-
175118
def __init__(
176119
self,
177120
namespace: str | None,
178121
name: str,
179122
version: str,
180123
reported_repo_url: str,
181124
reported_repo_fs: str,
125+
build_tool: BaseBuildTool,
182126
provenance_repo_url: str | None,
183127
):
184128
"""Instantiate the class.
@@ -195,12 +139,12 @@ def __init__(
195139
The URL of the repository reported by the publisher.
196140
reported_repo_fs : str
197141
The file system path of the reported repository.
142+
build_tool : BaseBuildTool
143+
The build tool used to build the package.
198144
provenance_repo_url : str | None
199145
The URL of the repository from a provenance file, or None if it, or the provenance, is not present.
200146
"""
201-
super().__init__(
202-
namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, self.specific_tool
203-
)
147+
super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url, build_tool)
204148

205149
def verify_repo(self) -> RepositoryVerificationResult:
206150
"""Verify the repository as per the base class method."""

src/macaron/repo_verifier/repo_verifier_gradle.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,9 @@
1010
RepositoryVerificationResult,
1111
RepositoryVerificationStatus,
1212
RepoVerifierToolSpecific,
13-
find_file_in_repo,
1413
)
1514
from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven
16-
from macaron.slsa_analyzer.build_tool import Gradle
15+
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, file_exists
1716
from macaron.slsa_analyzer.package_registry.maven_central_registry import same_organization
1817

1918
logger = logging.getLogger(__name__)
@@ -22,15 +21,14 @@
2221
class RepoVerifierGradle(RepoVerifierToolSpecific):
2322
"""A class to verify whether a repository with Gradle build tool links back to the artifact."""
2423

25-
specific_tool = Gradle()
26-
2724
def __init__(
2825
self,
2926
namespace: str,
3027
name: str,
3128
version: str,
3229
reported_repo_url: str,
3330
reported_repo_fs: str,
31+
build_tool: BaseBuildTool,
3432
provenance_repo_url: str | None,
3533
):
3634
"""Initialize a RepoVerifierGradle instance.
@@ -47,17 +45,20 @@ def __init__(
4745
The URL of the repository reported by the publisher.
4846
reported_repo_fs : str
4947
The file system path of the reported repository.
48+
build_tool : BaseBuildTool
49+
The build tool used to build the package.
5050
provenance_repo_url : str | None
5151
The URL of the repository from a provenance file, or None if it, or the provenance, is not present.
5252
"""
53-
super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, provenance_repo_url)
53+
super().__init__(namespace, name, version, reported_repo_url, reported_repo_fs, build_tool, provenance_repo_url)
5454

5555
self.maven_verifier = RepoVerifierMaven(
5656
namespace=namespace,
5757
name=name,
5858
version=version,
5959
reported_repo_url=reported_repo_url,
6060
reported_repo_fs=reported_repo_fs,
61+
build_tool=build_tool,
6162
provenance_repo_url=provenance_repo_url,
6263
)
6364

@@ -81,11 +82,11 @@ def verify_by_tool(self) -> RepositoryVerificationResult:
8182
if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED:
8283
return recognized_services_verification_result
8384

84-
gradle_group_id = self._extract_group_id_from_properties()
85+
gradle_group_id = self.extract_group_id_from_properties()
8586
if not gradle_group_id:
86-
gradle_group_id = self._extract_group_id_from_build_groovy()
87+
gradle_group_id = self.extract_group_id_from_build_groovy()
8788
if not gradle_group_id:
88-
gradle_group_id = self._extract_group_id_from_build_kotlin()
89+
gradle_group_id = self.extract_group_id_from_build_kotlin()
8990
if not gradle_group_id:
9091
logger.debug("Could not find group from gradle manifests for %s", self.reported_repo_url)
9192
return RepositoryVerificationResult(
@@ -149,17 +150,37 @@ def _extract_group_id_from_gradle_manifest(
149150

150151
return None
151152

152-
def _extract_group_id_from_properties(self) -> str | None:
153-
"""Extract the group id from the gradle.properties file."""
154-
gradle_properties = find_file_in_repo(Path(self.reported_repo_fs), "gradle.properties")
153+
def extract_group_id_from_properties(self) -> str | None:
154+
"""Extract the group id from the gradle.properties file.
155+
156+
Returns
157+
-------
158+
str | None
159+
The extracted group id if found, otherwise None.
160+
"""
161+
gradle_properties = file_exists(
162+
self.reported_repo_fs, "gradle.properties", filters=self.build_tool.path_filters
163+
)
155164
return self._extract_group_id_from_gradle_manifest(gradle_properties)
156165

157-
def _extract_group_id_from_build_groovy(self) -> str | None:
158-
"""Extract the group id from the build.gradle file."""
159-
build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle")
160-
return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter=" ")
166+
def extract_group_id_from_build_groovy(self) -> str | None:
167+
"""Extract the group id from the build.gradle file.
161168
162-
def _extract_group_id_from_build_kotlin(self) -> str | None:
163-
"""Extract the group id from the build.gradle.kts file."""
164-
build_gradle = find_file_in_repo(Path(self.reported_repo_fs), "build.gradle.kts")
169+
Returns
170+
-------
171+
str | None
172+
The extracted group id if found, otherwise None.
173+
"""
174+
build_gradle = file_exists(self.reported_repo_fs, "build.gradle", filters=self.build_tool.path_filters)
175+
return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={"'", '"'}, delimiter="=")
176+
177+
def extract_group_id_from_build_kotlin(self) -> str | None:
178+
"""Extract the group id from the build.gradle.kts file.
179+
180+
Returns
181+
-------
182+
str | None
183+
The extracted group id if found, otherwise None.
184+
"""
185+
build_gradle = file_exists(self.reported_repo_fs, "build.gradle.kts", filters=self.build_tool.path_filters)
165186
return self._extract_group_id_from_gradle_manifest(build_gradle, quote_chars={'"'}, delimiter="=")

src/macaron/repo_verifier/repo_verifier_maven.py

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,15 @@
33

44
"""This module contains code to verify whether a reported Maven-based repository can be linked back to the artifact."""
55
import logging
6-
from pathlib import Path
76
from urllib.parse import urlparse
87

98
from macaron.parsers.pomparser import parse_pom_string
109
from macaron.repo_verifier.repo_verifier_base import (
1110
RepositoryVerificationResult,
1211
RepositoryVerificationStatus,
1312
RepoVerifierToolSpecific,
14-
find_file_in_repo,
1513
)
16-
from macaron.slsa_analyzer.build_tool import Maven
14+
from macaron.slsa_analyzer.build_tool.base_build_tool import file_exists
1715
from macaron.slsa_analyzer.package_registry.maven_central_registry import (
1816
RECOGNIZED_CODE_HOSTING_SERVICES,
1917
same_organization,
@@ -25,8 +23,6 @@
2523
class RepoVerifierMaven(RepoVerifierToolSpecific):
2624
"""A class to verify whether a repository with Maven build tool links back to the artifact."""
2725

28-
specific_tool = Maven()
29-
3026
def verify_by_tool(self) -> RepositoryVerificationResult:
3127
"""Verify whether the reported repository links back to the Maven artifact.
3228
@@ -45,43 +41,52 @@ def verify_by_tool(self) -> RepositoryVerificationResult:
4541
if recognized_services_verification_result.status == RepositoryVerificationStatus.PASSED:
4642
return recognized_services_verification_result
4743

44+
pom_group_id = self.extract_group_id_from_pom()
45+
if pom_group_id is None:
46+
logger.debug("Could not find groupId from the pom.xml in %s", self.reported_repo_url)
47+
return RepositoryVerificationResult(
48+
status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool
49+
)
50+
if not same_organization(pom_group_id, self.namespace):
51+
logger.debug("Group id in pom.xml does not match the provided group id for: %s", self.reported_repo_url)
52+
return RepositoryVerificationResult(
53+
status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool
54+
)
55+
56+
return RepositoryVerificationResult(
57+
status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool
58+
)
59+
60+
def extract_group_id_from_pom(self) -> str | None:
61+
"""Extract the group id from the pom.xml file.
62+
63+
Returns
64+
-------
65+
str | None
66+
The extracted group id if found, otherwise None.
67+
"""
4868
# TODO: check other pom files. Think about how to decide in case of contradicting evidence.
4969
# Check if repo contains pom.xml.
50-
pom_file = find_file_in_repo(Path(self.reported_repo_fs), "pom.xml")
70+
pom_file = file_exists(self.reported_repo_fs, "pom.xml", filters=self.build_tool.path_filters)
5171
if not pom_file:
5272
logger.debug("Could not find any pom.xml in the repository: %s", self.reported_repo_url)
53-
return RepositoryVerificationResult(
54-
status=RepositoryVerificationStatus.UNKNOWN, reason="no_pom", build_tool=self.build_tool
55-
)
73+
return None
5674

5775
pom_content = pom_file.read_text(encoding="utf-8")
5876
pom_root = parse_pom_string(pom_content)
5977

60-
if not pom_root:
78+
if pom_root is None:
6179
logger.debug("Could not parse pom.xml: %s", pom_file.as_posix())
62-
return RepositoryVerificationResult(
63-
status=RepositoryVerificationStatus.UNKNOWN, reason="not_parsed_pom", build_tool=self.build_tool
64-
)
80+
return None
6581

6682
# Find the group id in the pom (project/groupId).
6783
# The closing curly brace represents the end of the XML namespace.
6884
pom_group_id_elem = next((ch for ch in pom_root if ch.tag.endswith("}groupId")), None)
6985
if pom_group_id_elem is None or not pom_group_id_elem.text:
7086
logger.debug("Could not find groupId in pom.xml: %s", pom_file)
71-
return RepositoryVerificationResult(
72-
status=RepositoryVerificationStatus.UNKNOWN, reason="no_group_id_in_pom", build_tool=self.build_tool
73-
)
87+
return None
7488

75-
pom_group_id = pom_group_id_elem.text.strip()
76-
if not same_organization(pom_group_id, self.namespace):
77-
logger.debug("Group id in pom.xml does not match the provided group id: %s", pom_file)
78-
return RepositoryVerificationResult(
79-
status=RepositoryVerificationStatus.FAILED, reason="group_id_mismatch", build_tool=self.build_tool
80-
)
81-
82-
return RepositoryVerificationResult(
83-
status=RepositoryVerificationStatus.PASSED, reason="group_id_match", build_tool=self.build_tool
84-
)
89+
return pom_group_id_elem.text.strip()
8590

8691
def verify_domains_from_recognized_code_hosting_services(self) -> RepositoryVerificationResult:
8792
"""Verify repository link by comparing the maven domain name and the account on code hosting services.

0 commit comments

Comments
 (0)