diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index 58d35f2681..7cf60ff85f 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -566,8 +566,13 @@ def _whl_repo( for p in src.target_platforms ] + # TODO @aignas 2025-11-02: once we have pipstar enabled we can add extra + # targets to each hub for each extra combination and solve this more cleanly as opposed to + # duplicating whl_library repositories. + target_platforms = src.target_platforms if is_multiple_versions else [] + return struct( - repo_name = whl_repo_name(src.filename, src.sha256), + repo_name = whl_repo_name(src.filename, src.sha256, *target_platforms), args = args, config_setting = whl_config_setting( version = python_version, diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl index acf3b0c6ae..7d210abbaa 100644 --- a/python/private/pypi/parse_requirements.bzl +++ b/python/private/pypi/parse_requirements.bzl @@ -88,6 +88,7 @@ def parse_requirements( evaluate_markers = evaluate_markers or (lambda _ctx, _requirements: {}) options = {} requirements = {} + reqs_with_env_markers = {} for file, plats in requirements_by_platform.items(): logger.trace(lambda: "Using {} for {}".format(file, plats)) contents = ctx.read(file) @@ -96,16 +97,41 @@ def parse_requirements( # needed for the whl_library declarations later. parse_result = parse_requirements_txt(contents) + tokenized_options = [] + for opt in parse_result.options: + for p in opt.split(" "): + tokenized_options.append(p) + + pip_args = tokenized_options + extra_pip_args + for plat in plats: + requirements[plat] = parse_result.requirements + for entry in parse_result.requirements: + requirement_line = entry[1] + + # output all of the requirement lines that have a marker + if ";" in requirement_line: + reqs_with_env_markers.setdefault(requirement_line, []).append(plat) + options[plat] = pip_args + + # This may call to Python, so execute it early (before calling to the + # internet below) and ensure that we call it only once. + resolved_marker_platforms = evaluate_markers(ctx, reqs_with_env_markers) + logger.trace(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format( + reqs_with_env_markers, + resolved_marker_platforms, + )) + + requirements_by_platform = {} + for plat, parse_results in requirements.items(): # Replicate a surprising behavior that WORKSPACE builds allowed: # Defining a repo with the same name multiple times, but only the last # definition is respected. # The requirement lines might have duplicate names because lines for extras # are returned as just the base package name. e.g., `foo[bar]` results # in an entry like `("foo", "foo[bar] == 1.0 ...")`. - # Lines with different markers are not condidered duplicates. requirements_dict = {} for entry in sorted( - parse_result.requirements, + parse_results, # Get the longest match and fallback to original WORKSPACE sorting, # which should get us the entry with most extras. # @@ -114,33 +140,22 @@ def parse_requirements( # should do this now. key = lambda x: (len(x[1].partition("==")[0]), x), ): - req = requirement(entry[1]) - requirements_dict[(req.name, req.version, req.marker)] = entry + req_line = entry[1] + req = requirement(req_line) - tokenized_options = [] - for opt in parse_result.options: - for p in opt.split(" "): - tokenized_options.append(p) + if req.marker and plat not in resolved_marker_platforms.get(req_line, []): + continue - pip_args = tokenized_options + extra_pip_args - for plat in plats: - requirements[plat] = requirements_dict.values() - options[plat] = pip_args + requirements_dict[req.name] = entry - requirements_by_platform = {} - reqs_with_env_markers = {} - for target_platform, reqs_ in requirements.items(): - extra_pip_args = options[target_platform] + extra_pip_args = options[plat] - for distribution, requirement_line in reqs_: + for distribution, requirement_line in requirements_dict.values(): for_whl = requirements_by_platform.setdefault( normalize_name(distribution), {}, ) - if ";" in requirement_line: - reqs_with_env_markers.setdefault(requirement_line, []).append(target_platform) - for_req = for_whl.setdefault( (requirement_line, ",".join(extra_pip_args)), struct( @@ -151,20 +166,7 @@ def parse_requirements( extra_pip_args = extra_pip_args, ), ) - for_req.target_platforms.append(target_platform) - - # This may call to Python, so execute it early (before calling to the - # internet below) and ensure that we call it only once. - # - # NOTE @aignas 2024-07-13: in the future, if this is something that we want - # to do, we could use Python to parse the requirement lines and infer the - # URL of the files to download things from. This should be important for - # VCS package references. - env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers) - logger.trace(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format( - reqs_with_env_markers, - env_marker_target_platforms, - )) + for_req.target_platforms.append(plat) index_urls = {} if get_index_urls: @@ -183,8 +185,7 @@ def parse_requirements( for name, reqs in sorted(requirements_by_platform.items()): requirement_target_platforms = {} for r in reqs.values(): - target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms) - for p in target_platforms: + for p in r.target_platforms: requirement_target_platforms[p] = None item = struct( @@ -197,7 +198,6 @@ def parse_requirements( reqs = reqs, index_urls = index_urls, platforms = platforms, - env_marker_target_platforms = env_marker_target_platforms, extract_url_srcs = extract_url_srcs, logger = logger, ), @@ -221,18 +221,13 @@ def _package_srcs( index_urls, platforms, logger, - env_marker_target_platforms, extract_url_srcs): """A function to return sources for a particular package.""" srcs = {} for r in sorted(reqs.values(), key = lambda r: r.requirement_line): - if ";" in r.requirement_line: - target_platforms = env_marker_target_platforms.get(r.requirement_line, []) - else: - target_platforms = r.target_platforms extra_pip_args = tuple(r.extra_pip_args) - for target_platform in target_platforms: + for target_platform in r.target_platforms: if platforms and target_platform not in platforms: fail("The target platform '{}' could not be found in {}".format( target_platform, diff --git a/python/private/pypi/whl_repo_name.bzl b/python/private/pypi/whl_repo_name.bzl index 2b3b5418aa..29d774c361 100644 --- a/python/private/pypi/whl_repo_name.bzl +++ b/python/private/pypi/whl_repo_name.bzl @@ -18,12 +18,14 @@ load("//python/private:normalize_name.bzl", "normalize_name") load(":parse_whl_name.bzl", "parse_whl_name") -def whl_repo_name(filename, sha256): +def whl_repo_name(filename, sha256, *target_platforms): """Return a valid whl_library repo name given a distribution filename. Args: filename: {type}`str` the filename of the distribution. sha256: {type}`str` the sha256 of the distribution. + *target_platforms: {type}`list[str]` the extra suffixes to append. + Only used when we need to support different extras per version. Returns: a string that can be used in {obj}`whl_library`. @@ -59,6 +61,8 @@ def whl_repo_name(filename, sha256): elif version: parts.insert(1, version) + parts.extend([p.partition("_")[-1] for p in target_platforms]) + return "_".join(parts) def pypi_repo_name(whl_name, *target_platforms): diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index ee6200a70a..6d061f4d56 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -203,6 +203,142 @@ def _test_simple_multiple_requirements(env): _tests.append(_test_simple_multiple_requirements) +def _test_simple_extras_vs_no_extras(env): + builder = hub_builder(env) + builder.pip_parse( + _mock_mctx( + read = lambda x: { + "darwin.txt": "simple[foo]==0.0.1 --hash=sha256:deadbeef", + "win.txt": "simple==0.0.1 --hash=sha256:deadbeef", + }[x], + ), + _parse( + hub_name = "pypi", + python_version = "3.15", + requirements_darwin = "darwin.txt", + requirements_windows = "win.txt", + ), + ) + pypi = builder.build() + + pypi.exposed_packages().contains_exactly(["simple"]) + pypi.group_map().contains_exactly({}) + pypi.whl_map().contains_exactly({ + "simple": { + "pypi_315_simple_osx_aarch64": [ + whl_config_setting( + target_platforms = [ + "cp315_osx_aarch64", + ], + version = "3.15", + ), + ], + "pypi_315_simple_windows_aarch64": [ + whl_config_setting( + target_platforms = [ + "cp315_windows_aarch64", + ], + version = "3.15", + ), + ], + }, + }) + pypi.whl_libraries().contains_exactly({ + "pypi_315_simple_osx_aarch64": { + "config_load": "@pypi//:config.bzl", + "dep_template": "@pypi//{name}:{target}", + "python_interpreter_target": "unit_test_interpreter_target", + "requirement": "simple[foo]==0.0.1 --hash=sha256:deadbeef", + }, + "pypi_315_simple_windows_aarch64": { + "config_load": "@pypi//:config.bzl", + "dep_template": "@pypi//{name}:{target}", + "python_interpreter_target": "unit_test_interpreter_target", + "requirement": "simple==0.0.1 --hash=sha256:deadbeef", + }, + }) + pypi.extra_aliases().contains_exactly({}) + +_tests.append(_test_simple_extras_vs_no_extras) + +def _test_simple_extras_vs_no_extras_simpleapi(env): + def mocksimpleapi_download(*_, **__): + return { + "simple": parse_simpleapi_html( + url = "https://example.com", + content = """\ + simple-0.0.1-py3-none-any.whl
+""", + ), + } + + builder = hub_builder( + env, + simpleapi_download_fn = mocksimpleapi_download, + ) + builder.pip_parse( + _mock_mctx( + read = lambda x: { + "darwin.txt": "simple[foo]==0.0.1 --hash=sha256:deadbeef", + "win.txt": "simple==0.0.1 --hash=sha256:deadbeef", + }[x], + ), + _parse( + hub_name = "pypi", + python_version = "3.15", + requirements_darwin = "darwin.txt", + requirements_windows = "win.txt", + experimental_index_url = "example.com", + ), + ) + pypi = builder.build() + + pypi.exposed_packages().contains_exactly(["simple"]) + pypi.group_map().contains_exactly({}) + pypi.whl_map().contains_exactly({ + "simple": { + "pypi_315_simple_py3_none_any_deadbeef_osx_aarch64": [ + whl_config_setting( + target_platforms = [ + "cp315_osx_aarch64", + ], + version = "3.15", + ), + ], + "pypi_315_simple_py3_none_any_deadbeef_windows_aarch64": [ + whl_config_setting( + target_platforms = [ + "cp315_windows_aarch64", + ], + version = "3.15", + ), + ], + }, + }) + pypi.whl_libraries().contains_exactly({ + "pypi_315_simple_py3_none_any_deadbeef_osx_aarch64": { + "config_load": "@pypi//:config.bzl", + "dep_template": "@pypi//{name}:{target}", + "filename": "simple-0.0.1-py3-none-any.whl", + "python_interpreter_target": "unit_test_interpreter_target", + "requirement": "simple[foo]==0.0.1", + "sha256": "deadbeef", + "urls": ["https://example.com/simple-0.0.1-py3-none-any.whl"], + }, + "pypi_315_simple_py3_none_any_deadbeef_windows_aarch64": { + "config_load": "@pypi//:config.bzl", + "dep_template": "@pypi//{name}:{target}", + "filename": "simple-0.0.1-py3-none-any.whl", + "python_interpreter_target": "unit_test_interpreter_target", + "requirement": "simple==0.0.1", + "sha256": "deadbeef", + "urls": ["https://example.com/simple-0.0.1-py3-none-any.whl"], + }, + }) + pypi.extra_aliases().contains_exactly({}) + +_tests.append(_test_simple_extras_vs_no_extras_simpleapi) + def _test_simple_multiple_python_versions(env): builder = hub_builder( env, @@ -496,34 +632,34 @@ torch==2.4.1+cpu ; platform_machine == 'x86_64' \ pypi.group_map().contains_exactly({}) pypi.whl_map().contains_exactly({ "torch": { - "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef": [ + "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef_linux_x86_64": [ whl_config_setting( - target_platforms = ("cp312_linux_x86_64",), + target_platforms = ["cp312_linux_x86_64"], version = "3.12", ), ], - "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432": [ + "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432_linux_aarch64": [ whl_config_setting( - target_platforms = ("cp312_linux_aarch64",), + target_platforms = ["cp312_linux_aarch64"], version = "3.12", ), ], - "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c": [ + "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c_windows_x86_64": [ whl_config_setting( - target_platforms = ("cp312_windows_x86_64",), + target_platforms = ["cp312_windows_x86_64"], version = "3.12", ), ], - "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5": [ + "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5_osx_aarch64": [ whl_config_setting( - target_platforms = ("cp312_osx_aarch64",), + target_platforms = ["cp312_osx_aarch64"], version = "3.12", ), ], }, }) pypi.whl_libraries().contains_exactly({ - "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef": { + "pypi_312_torch_cp312_cp312_linux_x86_64_8800deef_linux_x86_64": { "config_load": "@pypi//:config.bzl", "dep_template": "@pypi//{name}:{target}", "filename": "torch-2.4.1+cpu-cp312-cp312-linux_x86_64.whl", @@ -532,7 +668,7 @@ torch==2.4.1+cpu ; platform_machine == 'x86_64' \ "sha256": "8800deef0026011d502c0c256cc4b67d002347f63c3a38cd8e45f1f445c61364", "urls": ["https://torch.index/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-linux_x86_64.whl"], }, - "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432": { + "pypi_312_torch_cp312_cp312_manylinux_2_17_aarch64_36109432_linux_aarch64": { "config_load": "@pypi//:config.bzl", "dep_template": "@pypi//{name}:{target}", "filename": "torch-2.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", @@ -541,7 +677,7 @@ torch==2.4.1+cpu ; platform_machine == 'x86_64' \ "sha256": "36109432b10bd7163c9b30ce896f3c2cca1b86b9765f956a1594f0ff43091e2a", "urls": ["https://torch.index/whl/cpu/torch-2.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"], }, - "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c": { + "pypi_312_torch_cp312_cp312_win_amd64_3a570e5c_windows_x86_64": { "config_load": "@pypi//:config.bzl", "dep_template": "@pypi//{name}:{target}", "filename": "torch-2.4.1+cpu-cp312-cp312-win_amd64.whl", @@ -550,7 +686,7 @@ torch==2.4.1+cpu ; platform_machine == 'x86_64' \ "sha256": "3a570e5c553415cdbddfe679207327b3a3806b21c6adea14fba77684d1619e97", "urls": ["https://torch.index/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-win_amd64.whl"], }, - "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5": { + "pypi_312_torch_cp312_none_macosx_11_0_arm64_72b484d5_osx_aarch64": { "config_load": "@pypi//:config.bzl", "dep_template": "@pypi//{name}:{target}", "filename": "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl index bd0078bfa4..63755d2edd 100644 --- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl +++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl @@ -22,12 +22,6 @@ load("//python/private/pypi:pep508_env.bzl", pep508_env = "env") # buildifier: def _mock_ctx(): testdata = { - "requirements_different_package_version": """\ -foo==0.0.1+local \ - --hash=sha256:deadbeef -foo==0.0.1 \ - --hash=sha256:deadb00f -""", "requirements_direct": """\ foo[extra] @ https://some-url/package.whl """, @@ -39,6 +33,14 @@ foo @ https://github.com/org/foo/downloads/foo-1.1.tar.gz foo[extra]==0.0.1 \ --hash=sha256:deadbeef +""", + "requirements_foo": """\ +foo==0.0.1 \ + --hash=sha256:deadb00f +""", + "requirements_foo_local": """\ +foo==0.0.1+local \ + --hash=sha256:deadbeef """, "requirements_git": """ foo @ git+https://github.com/org/foo.git@deadbeef @@ -75,7 +77,7 @@ foo==0.0.2; python_full_version >= '3.10.0' \ --hash=sha256:deadb11f """, "requirements_optional_hash": """ -foo==0.0.4 @ https://example.org/foo-0.0.4.whl +bar==0.0.4 @ https://example.org/bar-0.0.4.whl foo==0.0.5 @ https://example.org/foo-0.0.5.whl --hash=sha256:deadbeef """, "requirements_osx": """\ @@ -441,7 +443,8 @@ _tests.append(_test_env_marker_resolution) def _test_different_package_version(env): got = parse_requirements( requirements_by_platform = { - "requirements_different_package_version": ["linux_x86_64"], + "requirements_foo": ["linux_aarch64"], + "requirements_foo_local": ["linux_x86_64"], }, ) env.expect.that_collection(got).contains_exactly([ @@ -454,7 +457,7 @@ def _test_different_package_version(env): distribution = "foo", extra_pip_args = [], requirement_line = "foo==0.0.1 --hash=sha256:deadb00f", - target_platforms = ["linux_x86_64"], + target_platforms = ["linux_aarch64"], url = "", filename = "", sha256 = "", @@ -476,10 +479,11 @@ def _test_different_package_version(env): _tests.append(_test_different_package_version) -def _test_optional_hash(env): +def _test_different_package_extras(env): got = parse_requirements( requirements_by_platform = { - "requirements_optional_hash": ["linux_x86_64"], + "requirements_foo": ["linux_aarch64"], + "requirements_lock": ["linux_x86_64"], }, ) env.expect.that_collection(got).contains_exactly([ @@ -491,13 +495,58 @@ def _test_optional_hash(env): struct( distribution = "foo", extra_pip_args = [], - requirement_line = "foo==0.0.4", + requirement_line = "foo==0.0.1 --hash=sha256:deadb00f", + target_platforms = ["linux_aarch64"], + url = "", + filename = "", + sha256 = "", + yanked = False, + ), + struct( + distribution = "foo", + extra_pip_args = [], + requirement_line = "foo[extra]==0.0.1 --hash=sha256:deadbeef", target_platforms = ["linux_x86_64"], - url = "https://example.org/foo-0.0.4.whl", - filename = "foo-0.0.4.whl", + url = "", + filename = "", sha256 = "", yanked = False, ), + ], + ), + ]) + +_tests.append(_test_different_package_extras) + +def _test_optional_hash(env): + got = parse_requirements( + requirements_by_platform = { + "requirements_optional_hash": ["linux_x86_64"], + }, + ) + env.expect.that_collection(got).contains_exactly([ + struct( + name = "bar", + is_exposed = True, + is_multiple_versions = False, + srcs = [ + struct( + distribution = "bar", + extra_pip_args = [], + requirement_line = "bar==0.0.4", + target_platforms = ["linux_x86_64"], + url = "https://example.org/bar-0.0.4.whl", + filename = "bar-0.0.4.whl", + sha256 = "", + yanked = False, + ), + ], + ), + struct( + name = "foo", + is_exposed = True, + is_multiple_versions = False, + srcs = [ struct( distribution = "foo", extra_pip_args = [], @@ -686,7 +735,6 @@ def _test_get_index_urls_different_versions(env): ), }, ), - debug = True, ) env.expect.that_collection(got).contains_exactly([ @@ -760,13 +808,12 @@ def _test_get_index_urls_single_py_version(env): ), }, ), - debug = True, ) env.expect.that_collection(got).contains_exactly([ struct( is_exposed = True, - is_multiple_versions = True, + is_multiple_versions = False, name = "foo", srcs = [ struct(