Skip to content

Commit 2d9e829

Browse files
committed
chore: improve the pypi spec for default case
Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
1 parent 82e8f92 commit 2d9e829

File tree

7 files changed

+148
-112
lines changed

7 files changed

+148
-112
lines changed

src/macaron/build_spec_generator/common_spec/base_spec.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,13 @@ class BaseBuildSpecDict(TypedDict, total=False):
7373
#: Entry point script, class, or binary for running the project.
7474
entry_point: NotRequired[str | None]
7575

76+
#: The build_requires is the required packages that need to be available in the build environment.
77+
build_requires: NotRequired[dict[str, str]]
78+
7679
#: A "back end" is tool that a "front end" (such as pip/build) would call to
7780
#: package the source distribution into the wheel format. build_backends would
7881
#: be a list of these that were used in building the wheel alongside their version.
79-
build_backends: NotRequired[dict[str, str]]
82+
build_backends: NotRequired[list[str]]
8083

8184

8285
class BaseBuildSpec(ABC):

src/macaron/build_spec_generator/common_spec/pypi_spec.py

Lines changed: 72 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import tomli
1111
from packageurl import PackageURL
1212
from packaging.requirements import InvalidRequirement, Requirement
13+
from packaging.specifiers import InvalidSpecifier
1314
from packaging.utils import InvalidWheelFilename, parse_wheel_filename
1415

1516
from macaron.build_spec_generator.build_command_patcher import CLI_COMMAND_PATCHES, patch_commands
@@ -110,14 +111,15 @@ def resolve_fields(self, purl: PackageURL) -> None:
110111

111112
pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info)
112113
patched_build_commands: list[list[str]] = []
114+
build_requires_set: set[str] = set()
115+
build_backends_set: set[str] = set()
116+
parsed_build_requires: dict[str, str] = {}
117+
python_version_set: set[str] = set()
118+
wheel_name_python_version_list: list[str] = []
119+
wheel_name_platforms: set[str] = set()
113120

114121
if pypi_package_json is not None:
115122
if pypi_package_json.package_json or pypi_package_json.download(dest=""):
116-
requires_array: list[str] = []
117-
build_backends: dict[str, str] = {}
118-
python_version_set: set[str] = set()
119-
wheel_name_python_version_list: list[str] = []
120-
wheel_name_platforms: set[str] = set()
121123

122124
# Get the Python constraints from the PyPI JSON response.
123125
json_releases = pypi_package_json.get_releases()
@@ -135,59 +137,62 @@ def resolve_fields(self, purl: PackageURL) -> None:
135137
wheel_contents, metadata_contents = self.read_directory(pypi_package_json.wheel_path, purl)
136138
generator, version = self.read_generator_line(wheel_contents)
137139
if generator != "":
138-
build_backends[generator] = "==" + version
139-
if generator != "setuptools":
140-
# Apply METADATA heuristics to determine setuptools version.
141-
if "License-File" in metadata_contents:
142-
build_backends["setuptools"] = "==" + defaults.get(
143-
"heuristic.pypi", "setuptools_version_emitting_license"
144-
)
145-
elif "Platform: UNKNOWN" in metadata_contents:
146-
build_backends["setuptools"] = "==" + defaults.get(
147-
"heuristic.pypi", "setuptools_version_emitting_platform_unknown"
148-
)
149-
else:
150-
build_backends["setuptools"] = "==" + defaults.get(
151-
"heuristic.pypi", "default_setuptools"
152-
)
140+
parsed_build_requires[generator] = "==" + version.replace(" ", "")
141+
# Apply METADATA heuristics to determine setuptools version.
142+
elif "License-File" in metadata_contents:
143+
parsed_build_requires["setuptools"] = "==" + defaults.get(
144+
"heuristic.pypi", "setuptools_version_emitting_license"
145+
)
146+
elif "Platform: UNKNOWN" in metadata_contents:
147+
parsed_build_requires["setuptools"] = "==" + defaults.get(
148+
"heuristic.pypi", "setuptools_version_emitting_platform_unknown"
149+
)
153150
except SourceCodeError:
154151
logger.debug("Could not find pure wheel matching this PURL")
155152

156153
logger.debug("From .dist_info:")
157-
logger.debug(build_backends)
154+
logger.debug(parsed_build_requires)
158155

159156
try:
160157
with pypi_package_json.sourcecode():
161158
try:
162159
pyproject_content = pypi_package_json.get_sourcecode_file_contents("pyproject.toml")
163160
content = tomli.loads(pyproject_content.decode("utf-8"))
164-
build_system: dict[str, list[str]] = content.get("build-system", {})
165-
requires_array = build_system.get("requires", [])
161+
requires = json_extract(content, ["build-system", "requires"], list)
162+
if requires:
163+
build_requires_set.update(elem.replace(" ", "") for elem in requires)
164+
backend = json_extract(content, ["build-system", "build-backend"], str)
165+
if backend:
166+
build_backends_set.add(backend.replace(" ", ""))
166167

167168
python_version_constraint = json_extract(content, ["project", "requires-python"], str)
168169
if python_version_constraint:
169170
python_version_set.add(python_version_constraint.replace(" ", ""))
170-
logger.debug("From pyproject.toml:")
171-
logger.debug(requires_array)
172-
except SourceCodeError:
173-
logger.debug("No pyproject.toml found")
174-
except SourceCodeError:
175-
logger.debug("No source distribution found")
176-
177-
# Merge in pyproject.toml information only when the wheel dist_info does not contain the same
171+
logger.debug(
172+
"After analyzing pyproject.toml from the sdist: build-requires: %s, build_backend: %s",
173+
build_requires_set,
174+
build_backends_set,
175+
)
176+
except TypeError as error:
177+
logger.debug(
178+
"Found a type error while reading the pyproject.toml file from the sdist: %s", error
179+
)
180+
except tomli.TOMLDecodeError as error:
181+
logger.debug("Failed to read the pyproject.toml file from the sdist: %s", error)
182+
except SourceCodeError as error:
183+
logger.debug("No pyproject.toml found: %s", error)
184+
except SourceCodeError as error:
185+
logger.debug("No source distribution found: %s", error)
186+
187+
# Merge in pyproject.toml information only when the wheel dist_info does not contain the same.
178188
# Hatch is an interesting example of this merge being required.
179-
for requirement in requires_array:
189+
for requirement in build_requires_set:
180190
try:
181191
parsed_requirement = Requirement(requirement)
182-
if parsed_requirement.name not in build_backends:
183-
build_backends[parsed_requirement.name] = str(parsed_requirement.specifier)
184-
except InvalidRequirement:
185-
logger.debug("Malformed requirement encountered:")
186-
logger.debug(requirement)
187-
188-
logger.debug("Combined:")
189-
logger.debug(build_backends)
190-
self.data["build_backends"] = build_backends
192+
if parsed_requirement.name not in parsed_build_requires:
193+
parsed_build_requires[parsed_requirement.name] = str(parsed_requirement.specifier)
194+
except (InvalidRequirement, InvalidSpecifier) as error:
195+
logger.debug("Malformed requirement encountered %s : %s", requirement, error)
191196

192197
try:
193198
# Get information from the wheel file name.
@@ -206,23 +211,33 @@ def resolve_fields(self, purl: PackageURL) -> None:
206211
if "any" in wheel_name_platforms:
207212
patched_build_commands = self.get_default_build_commands(self.data["build_tools"])
208213

209-
if not patched_build_commands:
210-
# Resolve and patch build commands.
211-
selected_build_commands = self.data["build_commands"] or self.get_default_build_commands(
212-
self.data["build_tools"]
213-
)
214+
# If we were not able to find any build and backends, use the default setuptools.
215+
if not parsed_build_requires:
216+
parsed_build_requires["setuptools"] = "==" + defaults.get("heuristic.pypi", "default_setuptools")
217+
if not build_backends_set:
218+
build_backends_set.add("setuptools.build_meta")
214219

215-
patched_build_commands = (
216-
patch_commands(
217-
cmds_sequence=selected_build_commands,
218-
patches=CLI_COMMAND_PATCHES,
219-
)
220-
or []
220+
logger.debug("Combined build-requires: %s", parsed_build_requires)
221+
self.data["build_requires"] = parsed_build_requires
222+
self.data["build_backends"] = list(build_backends_set)
223+
224+
if not patched_build_commands:
225+
# Resolve and patch build commands.
226+
selected_build_commands = self.data["build_commands"] or self.get_default_build_commands(
227+
self.data["build_tools"]
228+
)
229+
230+
patched_build_commands = (
231+
patch_commands(
232+
cmds_sequence=selected_build_commands,
233+
patches=CLI_COMMAND_PATCHES,
221234
)
222-
if not patched_build_commands:
223-
raise GenerateBuildSpecError(f"Failed to patch command sequences {selected_build_commands}.")
235+
or []
236+
)
237+
if not patched_build_commands:
238+
raise GenerateBuildSpecError(f"Failed to patch command sequences {selected_build_commands}.")
224239

225-
self.data["build_commands"] = patched_build_commands
240+
self.data["build_commands"] = patched_build_commands
226241

227242
def read_directory(self, wheel_path: str, purl: PackageURL) -> tuple[str, str]:
228243
"""
@@ -286,5 +301,6 @@ def read_generator_line(self, wheel_contents: str) -> tuple[str, str]:
286301
for line in wheel_contents.splitlines():
287302
if line.startswith("Generator:"):
288303
split_line = line.split(" ")
289-
return split_line[1], split_line[2]
304+
if len(split_line) > 2:
305+
return split_line[1], split_line[2]
290306
return "", ""

src/macaron/slsa_analyzer/build_tool/pyproject.py

Lines changed: 52 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,37 @@
66
import logging
77
import tomllib
88
from pathlib import Path
9+
from typing import Any
10+
11+
from tomli import TOMLDecodeError
12+
13+
from macaron.json_tools import json_extract
914

1015
logger: logging.Logger = logging.getLogger(__name__)
1116

1217

18+
def get_content(pyproject_path: Path) -> dict[str, Any] | None:
19+
"""
20+
Return the pyproject.toml content.
21+
22+
Parameters
23+
----------
24+
pyproject_path : Path
25+
The file path to the pyproject.toml file.
26+
27+
Returns
28+
-------
29+
dict[str, Any] | None
30+
The [build-system] section as a dict, or None otherwise.
31+
"""
32+
try:
33+
with open(pyproject_path, "rb") as toml_file:
34+
return tomllib.load(toml_file)
35+
except (FileNotFoundError, TypeError, TOMLDecodeError) as error:
36+
logger.debug("Failed to read the %s file: %s", pyproject_path, error)
37+
return None
38+
39+
1340
def contains_build_tool(tool_name: str, pyproject_path: Path) -> bool:
1441
"""
1542
Check if a given build tool is present in the [tool] section of a pyproject.toml file.
@@ -26,22 +53,16 @@ def contains_build_tool(tool_name: str, pyproject_path: Path) -> bool:
2653
bool
2754
True if the build tool is found in the [tool] section, False otherwise.
2855
"""
29-
try:
30-
# Parse the pyproject.toml file.
31-
with open(pyproject_path, "rb") as toml_file:
32-
try:
33-
data = tomllib.load(toml_file)
34-
# Check for the existence of a [tool.<tool_name>] section.
35-
if ("tool" in data) and (tool_name in data["tool"]):
36-
return True
37-
except tomllib.TOMLDecodeError:
38-
logger.debug("Failed to read the %s file: invalid toml file.", pyproject_path)
39-
return False
40-
return False
41-
except FileNotFoundError:
42-
logger.debug("Failed to read the %s file.", pyproject_path)
56+
content = get_content(pyproject_path)
57+
if not content:
4358
return False
4459

60+
# Check for the existence of a [tool.<tool_name>] section.
61+
tools = json_extract(content, ["tool"], dict)
62+
if tools and tool_name in tools:
63+
return True
64+
return False
65+
4566

4667
def build_system_contains_tool(tool_name: str, pyproject_path: Path) -> bool:
4768
"""
@@ -59,27 +80,21 @@ def build_system_contains_tool(tool_name: str, pyproject_path: Path) -> bool:
5980
bool
6081
True if the tool is found in either the 'build-backend' or 'requires' of the [build-system] section, False otherwise.
6182
"""
62-
try:
63-
with open(pyproject_path, "rb") as toml_file:
64-
try:
65-
data = tomllib.load(toml_file)
66-
build_system = data.get("build-system", {})
67-
backend = build_system.get("build-backend", "")
68-
requires = build_system.get("requires", [])
69-
# Check in 'build-backend'.
70-
if tool_name in backend:
71-
return True
72-
# Check in 'requires' list.
73-
if any(tool_name in req for req in requires):
74-
return True
75-
except tomllib.TOMLDecodeError:
76-
logger.debug("Failed to read the %s file: invalid toml file.", pyproject_path)
77-
return False
78-
return False
79-
except FileNotFoundError:
80-
logger.debug("Failed to read the %s file.", pyproject_path)
83+
content = get_content(pyproject_path)
84+
if not content:
8185
return False
8286

87+
# Check in 'build-backend'.
88+
backend = json_extract(content, ["build-system", "build-backend"], str)
89+
if backend and tool_name in backend:
90+
return True
91+
# Check in 'requires' list.
92+
requires = json_extract(content, ["build-system", "requires"], list)
93+
if requires and any(tool_name in req for req in requires):
94+
return True
95+
96+
return False
97+
8398

8499
def get_build_system(pyproject_path: Path) -> dict[str, str] | None:
85100
"""
@@ -95,14 +110,8 @@ def get_build_system(pyproject_path: Path) -> dict[str, str] | None:
95110
dict[str, str] | None
96111
The [build-system] section as a dict, or None otherwise.
97112
"""
98-
try:
99-
with open(pyproject_path, "rb") as toml_file:
100-
try:
101-
data = tomllib.load(toml_file)
102-
return data.get("build-system", {}) or None
103-
except tomllib.TOMLDecodeError:
104-
logger.debug("Failed to read the %s file: invalid toml file.", pyproject_path)
105-
return None
106-
except FileNotFoundError:
107-
logger.debug("Failed to read the %s file.", pyproject_path)
113+
content = get_content(pyproject_path)
114+
if not content:
108115
return None
116+
117+
return json_extract(content, ["build-system"], dict)

src/macaron/slsa_analyzer/package_registry/pypi_registry.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -555,13 +555,13 @@ class PyPIPackageJsonAsset:
555555
#: The asset content.
556556
package_json: dict
557557

558-
#: the source code temporary location name
558+
#: The source code temporary location name.
559559
package_sourcecode_path: str
560560

561-
#: the wheel temporary location name
561+
#: The wheel temporary location name.
562562
wheel_path: str
563563

564-
#: name of the wheel file
564+
#: Name of the wheel file.
565565
wheel_filename: str
566566

567567
#: The size of the asset (in bytes). This attribute is added to match the AssetLocator

tests/integration/cases/pypi_cachetools/expected_default.buildspec

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@
2222
"build"
2323
]
2424
],
25-
"build_backends": {
25+
"build_requires": {
2626
"setuptools": "==(80.9.0)",
2727
"wheel": ""
28-
}
28+
},
29+
"build_backends": [
30+
"setuptools.build_meta"
31+
]
2932
}

tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
"build"
2222
]
2323
],
24-
"build_backends": {
24+
"build_requires": {
2525
"flit": "==3.12.0",
26-
"setuptools": "==56.2.0",
2726
"flit_core": "<4,>=3.4"
28-
}
27+
},
28+
"build_backends": [
29+
"flit_core.buildapi"
30+
]
2931
}

tests/integration/cases/pypi_toga/expected_default.buildspec

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@
2222
"build"
2323
]
2424
],
25-
"build_backends": {
25+
"build_requires": {
2626
"setuptools": "==(80.3.1)",
27-
"setuptools_scm": "==8.3.1",
28-
"setuptools_dynamic_dependencies": "==1.0.0"
29-
}
27+
"setuptools_dynamic_dependencies": "==1.0.0",
28+
"setuptools_scm": "==8.3.1"
29+
},
30+
"build_backends": [
31+
"setuptools.build_meta"
32+
]
3033
}

0 commit comments

Comments
 (0)