diff --git a/MODULE.bazel b/MODULE.bazel index 80c7ab1d99..790a4d4642 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -220,7 +220,7 @@ bazel_dep(name = "rules_testing", version = "0.6.0", dev_dependency = True) bazel_dep(name = "rules_shell", version = "0.3.0", dev_dependency = True) bazel_dep(name = "rules_multirun", version = "0.9.0", dev_dependency = True) bazel_dep(name = "bazel_ci_rules", version = "1.0.0", dev_dependency = True) -bazel_dep(name = "rules_pkg", version = "1.0.1", dev_dependency = True) +bazel_dep(name = "rules_pkg", version = "1.2.0", dev_dependency = True) bazel_dep(name = "other", version = "0", dev_dependency = True) bazel_dep(name = "another_module", version = "0", dev_dependency = True) diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 9084454c65..cf0b0ed5a0 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -513,17 +513,13 @@ def _create_zip_main(ctx, *, stage2_bootstrap, runtime_details, venv): # * https://github.com/python/cpython/blob/main/Modules/getpath.py # * https://github.com/python/cpython/blob/main/Lib/site.py def _create_venv(ctx, output_prefix, imports, runtime_details, add_runfiles_root_to_sys_path): - create_full_venv = BootstrapImplFlag.get_value(ctx) == BootstrapImplFlag.SCRIPT venv = "_{}.venv".format(output_prefix.lstrip("_")) - if create_full_venv: - # The pyvenv.cfg file must be present to trigger the venv site hooks. - # Because it's paths are expected to be absolute paths, we can't reliably - # put much in it. See https://github.com/python/cpython/issues/83650 - pyvenv_cfg = ctx.actions.declare_file("{}/pyvenv.cfg".format(venv)) - ctx.actions.write(pyvenv_cfg, "") - else: - pyvenv_cfg = None + # The pyvenv.cfg file must be present to trigger the venv site hooks. + # Because it's paths are expected to be absolute paths, we can't reliably + # put much in it. See https://github.com/python/cpython/issues/83650 + pyvenv_cfg = ctx.actions.declare_file("{}/pyvenv.cfg".format(venv)) + ctx.actions.write(pyvenv_cfg, "") runtime = runtime_details.effective_runtime @@ -539,40 +535,38 @@ def _create_venv(ctx, output_prefix, imports, runtime_details, add_runfiles_root bin_dir = "{}/bin".format(venv) - if create_full_venv: - # Some wrappers around the interpreter (e.g. pyenv) use the program - # name to decide what to do, so preserve the name. - py_exe_basename = paths.basename(interpreter_actual_path) - - if not venvs_use_declare_symlink_enabled or not runtime.supports_build_time_venv: - recreate_venv_at_runtime = True - - # When the venv symlinks are disabled, the $venv/bin/python3 file isn't - # needed or used at runtime. However, the zip code uses the interpreter - # File object to figure out some paths. - interpreter = ctx.actions.declare_file("{}/{}".format(bin_dir, py_exe_basename)) - ctx.actions.write(interpreter, "actual:{}".format(interpreter_actual_path)) - - elif runtime.interpreter: - # Even though ctx.actions.symlink() is used, using - # declare_symlink() is required to ensure that the resulting file - # in runfiles is always a symlink. An RBE implementation, for example, - # may choose to write what symlink() points to instead. - interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) - - rel_path = relative_path( - # dirname is necessary because a relative symlink is relative to - # the directory the symlink resides within. - from_ = paths.dirname(runfiles_root_path(ctx, interpreter.short_path)), - to = interpreter_actual_path, - ) + # Some wrappers around the interpreter (e.g. pyenv) use the program + # name to decide what to do, so preserve the name. + py_exe_basename = paths.basename(interpreter_actual_path) - ctx.actions.symlink(output = interpreter, target_path = rel_path) - else: - interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) - ctx.actions.symlink(output = interpreter, target_path = runtime.interpreter_path) + if not venvs_use_declare_symlink_enabled or not runtime.supports_build_time_venv: + recreate_venv_at_runtime = True + + # When the venv symlinks are disabled, the $venv/bin/python3 file isn't + # needed or used at runtime. However, the zip code uses the interpreter + # File object to figure out some paths. + interpreter = ctx.actions.declare_file("{}/{}".format(bin_dir, py_exe_basename)) + + ctx.actions.write(interpreter, "actual:{}".format(interpreter_actual_path)) + + elif runtime.interpreter: + # Even though ctx.actions.symlink() is used, using + # declare_symlink() is required to ensure that the resulting file + # in runfiles is always a symlink. An RBE implementation, for example, + # may choose to write what symlink() points to instead. + interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) + + rel_path = relative_path( + # dirname is necessary because a relative symlink is relative to + # the directory the symlink resides within. + from_ = paths.dirname(runfiles_root_path(ctx, interpreter.short_path)), + to = interpreter_actual_path, + ) + + ctx.actions.symlink(output = interpreter, target_path = rel_path) else: - interpreter = None + interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) + ctx.actions.symlink(output = interpreter, target_path = runtime.interpreter_path) if runtime.interpreter_version_info: version = "{}.{}".format( diff --git a/python/private/python_bootstrap_template.txt b/python/private/python_bootstrap_template.txt index 9717756036..2bf1e6a091 100644 --- a/python/private/python_bootstrap_template.txt +++ b/python/private/python_bootstrap_template.txt @@ -8,19 +8,21 @@ from __future__ import print_function import sys import os +from os.path import dirname, join, basename import subprocess import uuid +import shutil # runfiles-relative path STAGE2_BOOTSTRAP="%stage2_bootstrap%" -# runfiles-relative path to venv's python interpreter +# runfiles-root-relative path to venv's python interpreter # Empty string if a venv is not setup. PYTHON_BINARY = '%python_binary%' # The path to the actual interpreter that is used. # Typically PYTHON_BINARY is a symlink pointing to this. -# runfiles-relative path, absolute path, or single word. +# runfiles-root-relative path, absolute path, or single word. # Used to create a venv at runtime, or when a venv isn't setup. PYTHON_BINARY_ACTUAL = "%python_binary_actual%" @@ -30,7 +32,14 @@ IS_ZIPFILE = "%is_zipfile%" == "1" # 0 or 1. # If 1, then a venv will be created at runtime that replicates what would have # been the build-time structure. -RECREATE_VENV_AT_RUNTIME="%recreate_venv_at_runtime%" +RECREATE_VENV_AT_RUNTIME = "%recreate_venv_at_runtime%" == "1" +# 0 or 1 +# If 1, then the path to python will be resolved by running +# PYTHON_BINARY_ACTUAL to determine the actual underlying interpreter. +RESOLVE_PYTHON_BINARY_AT_RUNTIME = "%resolve_python_binary_at_runtime%" == "1" +# venv-relative path to the site-packages +# e.g. lib/python3.12t/site-packages +VENV_REL_SITE_PACKAGES = "%venv_rel_site_packages%" WORKSPACE_NAME = "%workspace_name%" @@ -40,6 +49,7 @@ INTERPRETER_ARGS = [ ] ADDITIONAL_INTERPRETER_ARGS = os.environ.get("RULES_PYTHON_ADDITIONAL_INTERPRETER_ARGS", "") +EXTRACT_ROOT = os.environ.get("RULES_PYTHON_EXTRACT_ROOT") def IsRunningFromZip(): return IS_ZIPFILE @@ -111,12 +121,12 @@ def SearchPath(name): return path return None -def FindPythonBinary(module_space): +def FindPythonBinary(runfiles_root): """Finds the real Python binary if it's not a normal absolute path.""" if PYTHON_BINARY: - return FindBinary(module_space, PYTHON_BINARY) + return FindBinary(runfiles_root, PYTHON_BINARY) else: - return FindBinary(module_space, PYTHON_BINARY_ACTUAL) + return FindBinary(runfiles_root, PYTHON_BINARY_ACTUAL) def print_verbose(*args, mapping=None, values=None): @@ -124,7 +134,7 @@ def print_verbose(*args, mapping=None, values=None): if mapping is not None: for key, value in sorted((mapping or {}).items()): print( - "bootstrap: stage 1: ", + "bootstrap: stage 1:", *(list(args) + ["{}={}".format(key, repr(value))]), file=sys.stderr, flush=True @@ -140,7 +150,7 @@ def print_verbose(*args, mapping=None, values=None): else: print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True) -def FindBinary(module_space, bin_name): +def FindBinary(runfiles_root, bin_name): """Finds the real binary if it's not a normal absolute path.""" if not bin_name: return None @@ -155,7 +165,7 @@ def FindBinary(module_space, bin_name): # Use normpath() to convert slashes to os.sep on Windows. elif os.sep in os.path.normpath(bin_name): # Case 3: Path is relative to the repo root. - return os.path.join(module_space, bin_name) + return os.path.join(runfiles_root, bin_name) else: # Case 4: Path has to be looked up in the search path. return SearchPath(bin_name) @@ -189,9 +199,9 @@ def FindModuleSpace(main_rel_path): stub_filename = os.path.join(os.getcwd(), stub_filename) while True: - module_space = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles' - if os.path.isdir(module_space): - return module_space + runfiles_root = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles' + if os.path.isdir(runfiles_root): + return runfiles_root runfiles_pattern = r'(.*\.runfiles)' + (r'\\' if IsWindows() else '/') + '.*' matchobj = re.match(runfiles_pattern, stub_filename) @@ -239,11 +249,62 @@ def ExtractZip(zip_path, dest_dir): def CreateModuleSpace(): temp_dir = tempfile.mkdtemp('', 'Bazel.runfiles_') ExtractZip(os.path.dirname(__file__), temp_dir) - # IMPORTANT: Later code does `rm -fr` on dirname(module_space) -- it's + # IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's # important that deletion code be in sync with this directory structure return os.path.join(temp_dir, 'runfiles') -def RunfilesEnvvar(module_space): +def _create_venv(runfiles_root): + runfiles_venv = join(runfiles_root, dirname(dirname(PYTHON_BINARY))) + if EXTRACT_ROOT: + venv = join(EXTRACT_ROOT, runfiles_venv) + os.makedirs(venv, exist_ok=True) + cleanup_dir = None + else: + import tempfile + venv = tempfile.mkdtemp("", f"bazel.{basename(runfiles_venv)}.") + cleanup_dir = venv + + python_exe_actual = FindBinary(runfiles_root, PYTHON_BINARY_ACTUAL) + + # See stage1_bootstrap_template.sh for details on this code path. In short, + # this handles when the build-time python version doesn't match runtime + # and if the initially resolved python_exe_actual is a wrapper script. + if RESOLVE_PYTHON_BINARY_AT_RUNTIME: + src = f""" +import sys, site +print(sys.executable) +print(site.getsitepackages(["{venv}"])[-1]) + """ + output = subprocess.check_output([python_exe_actual, "-I"], shell=True, + encoding = "utf8", input=src) + output = output.strip().split("\n") + python_exe_actual = output[0] + venv_site_packages = output[1] + os.makedirs(dirname(venv_site_packages), exist_ok=True) + runfiles_venv_site_packages = join(runfiles_venv, VENV_REL_SITE_PACKAGES) + else: + python_exe_actual = FindBinary(runfiles_root, PYTHON_BINARY_ACTUAL) + venv_site_packages = join(venv, "lib") + runfiles_venv_site_packages = join(runfiles_venv, "lib") + + if python_exe_actual is None: + raise AssertionError('Could not find python binary: ' + repr(PYTHON_BINARY_ACTUAL)) + + venv_bin = join(venv, "bin") + try: + os.mkdir(venv_bin) + except FileExistsError as e: + pass + + # Match the basename; some tools, e.g. pyvenv key off the executable name + venv_python_exe = join(venv_bin, os.path.basename(python_exe_actual)) + _symlink_exist_ok(from_=venv_python_exe, to=python_exe_actual) + _symlink_exist_ok(from_=join(venv, "lib"), to=join(runfiles_venv, "lib")) + _symlink_exist_ok(from_=venv_site_packages, to=runfiles_venv_site_packages) + _symlink_exist_ok(from_=join(venv, "pyvenv.cfg"), to=join(runfiles_venv, "pyvenv.cfg")) + return cleanup_dir, venv_python_exe + +def RunfilesEnvvar(runfiles_root): """Finds the runfiles manifest or the runfiles directory. Returns: @@ -263,10 +324,10 @@ def RunfilesEnvvar(module_space): # If running from a zip, there's no manifest file. if IsRunningFromZip(): - return ('RUNFILES_DIR', module_space) + return ('RUNFILES_DIR', runfiles_root) # Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest" - runfiles = module_space + '_manifest' + runfiles = runfiles_root + '_manifest' if os.path.exists(runfiles): return ('RUNFILES_MANIFEST_FILE', runfiles) @@ -274,19 +335,19 @@ def RunfilesEnvvar(module_space): # Normally .runfiles_manifest and MANIFEST are both present, but the # former will be missing for zip-based builds or if someone copies the # runfiles tree elsewhere. - runfiles = os.path.join(module_space, 'MANIFEST') + runfiles = os.path.join(runfiles_root, 'MANIFEST') if os.path.exists(runfiles): return ('RUNFILES_MANIFEST_FILE', runfiles) # If running in a sandbox and no environment variables are set, then # Look for the runfiles next to the binary. - if module_space.endswith('.runfiles') and os.path.isdir(module_space): - return ('RUNFILES_DIR', module_space) + if runfiles_root.endswith('.runfiles') and os.path.isdir(runfiles_root): + return ('RUNFILES_DIR', runfiles_root) return (None, None) -def ExecuteFile(python_program, main_filename, args, env, module_space, - workspace, delete_module_space): +def ExecuteFile(python_program, main_filename, args, env, runfiles_root, + workspace, delete_dirs): # type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ... """Executes the given Python file using the various environment settings. @@ -298,11 +359,11 @@ def ExecuteFile(python_program, main_filename, args, env, module_space, main_filename: (str) The Python file to execute args: (list[str]) Additional args to pass to the Python file env: (dict[str, str]) A dict of environment variables to set for the execution - module_space: (str) Path to the module space/runfiles tree directory + runfiles_root: (str) Path to the module space/runfiles tree directory workspace: (str|None) Name of the workspace to execute in. This is expected to be a directory under the runfiles tree. - delete_module_space: (bool), True if the module space should be deleted - after a successful (exit code zero) program run, False if not. + delete_dirs: (list[str]) directories that should be deleted after the user + program has finished running. """ argv = [python_program] argv.extend(INTERPRETER_ARGS) @@ -326,20 +387,22 @@ def ExecuteFile(python_program, main_filename, args, env, module_space, # can't execv because we need control to return here. This only # happens for targets built in the host config. # - if not (IsWindows() or workspace or delete_module_space): + if not (IsWindows() or workspace or delete_dirs): _RunExecv(python_program, argv, env) + print_verbose("run: subproc: environ:", mapping=os.environ) + print_verbose("run: subproc: cwd:", workspace) + print_verbose("run: subproc: argv:", values=argv) ret_code = subprocess.call( argv, env=env, cwd=workspace ) - if delete_module_space: - # NOTE: dirname() is called because CreateModuleSpace() creates a - # sub-directory within a temporary directory, and we want to remove the - # whole temporary directory. - shutil.rmtree(os.path.dirname(module_space), True) + if delete_dirs: + for delete_dir in delete_dirs: + print_verbose("rmtree:", delete_dir) + shutil.rmtree(delete_dir, True) sys.exit(ret_code) def _RunExecv(python_program, argv, env): @@ -349,9 +412,32 @@ def _RunExecv(python_program, argv, env): print_verbose("RunExecv: environ:", mapping=os.environ) print_verbose("RunExecv: python:", python_program) print_verbose("RunExecv: argv:", values=argv) - os.execv(python_program, argv) + try: + os.execv(python_program, argv) + except: + with open(python_program, 'rb') as f: + print_verbose("pyprog head:" + str(f.read(50))) + raise + +def _symlink_exist_ok(*, from_, to): + try: + os.symlink(to, from_) + except FileExistsError: + pass + + def Main(): + print_verbose("STAGE2_BOOTSTRAP:", STAGE2_BOOTSTRAP) + print_verbose("PYTHON_BINARY:", PYTHON_BINARY) + print_verbose("PYTHON_BINARY_ACTUAL:", PYTHON_BINARY_ACTUAL) + print_verbose("RECREATE_VENV_AT_RUNTIME:", RECREATE_VENV_AT_RUNTIME) + print_verbose("RESOLVE_PYTHON_BINARY_AT_RUNTIME:", RESOLVE_PYTHON_BINARY_AT_RUNTIME) + print_verbose("bootstrap sys.executable:", sys.executable) + print_verbose("bootstrap sys._base_executable:", sys._base_executable) + print_verbose("bootstrap sys.version:", sys.version) + + print_verbose("sys.version:", sys.version) print_verbose("initial argv:", values=sys.argv) print_verbose("initial cwd:", os.getcwd()) print_verbose("initial environ:", mapping=os.environ) @@ -367,17 +453,21 @@ def Main(): # is packaged and needs no artifacts from the main repo) main_rel_path = os.path.normpath(STAGE2_BOOTSTRAP) + delete_dirs = [] + if IsRunningFromZip(): - module_space = CreateModuleSpace() - delete_module_space = True + runfiles_root = CreateModuleSpace() + # NOTE: dirname() is called because CreateModuleSpace() creates a + # sub-directory within a temporary directory, and we want to remove the + # whole temporary directory. + delete_dirs.append(dirname(runfiles_root)) else: - module_space = FindModuleSpace(main_rel_path) - delete_module_space = False + runfiles_root = FindModuleSpace(main_rel_path) if os.environ.get("RULES_PYTHON_TESTING_TELL_MODULE_SPACE"): - new_env["RULES_PYTHON_TESTING_MODULE_SPACE"] = module_space + new_env["RULES_PYTHON_TESTING_MODULE_SPACE"] = runfiles_root - runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(module_space) + runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(runfiles_root) if runfiles_envkey: new_env[runfiles_envkey] = runfiles_envvalue @@ -385,17 +475,25 @@ def Main(): # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH new_env['PYTHONSAFEPATH'] = '1' - main_filename = os.path.join(module_space, main_rel_path) + main_filename = os.path.join(runfiles_root, main_rel_path) main_filename = GetWindowsPathWithUNCPrefix(main_filename) assert os.path.exists(main_filename), \ 'Cannot exec() %r: file not found.' % main_filename assert os.access(main_filename, os.R_OK), \ 'Cannot exec() %r: file not readable.' % main_filename - program = python_program = FindPythonBinary(module_space) - if python_program is None: + python_exe = FindPythonBinary(runfiles_root) + if python_exe is None: raise AssertionError('Could not find python binary: ' + repr(PYTHON_BINARY)) + if RECREATE_VENV_AT_RUNTIME: + # When the venv is created at runtime, python_exe is PYTHON_BINARY_ACTUAL + # so we have to re-point it to the symlink in the venv + venv, python_exe = _create_venv(runfiles_root) + delete_dirs.append(venv) + else: + python_exe = FindPythonBinary(runfiles_root) + # Some older Python versions on macOS (namely Python 3.7) may unintentionally # leave this environment variable set after starting the interpreter, which # causes problems with Python subprocesses correctly locating sys.executable, @@ -411,15 +509,15 @@ def Main(): # change directory to the right runfiles directory. # (So that the data files are accessible) if os.environ.get('RUN_UNDER_RUNFILES') == '1': - workspace = os.path.join(module_space, WORKSPACE_NAME) + workspace = os.path.join(runfiles_root, WORKSPACE_NAME) try: sys.stdout.flush() # NOTE: ExecuteFile may call execve() and lines after this will never run. ExecuteFile( - python_program, main_filename, args, new_env, module_space, + python_exe, main_filename, args, new_env, runfiles_root, workspace, - delete_module_space = delete_module_space, + delete_dirs = delete_dirs, ) except EnvironmentError: @@ -427,7 +525,7 @@ def Main(): e = sys.exc_info()[1] # This exception occurs when os.execv() fails for some reason. if not getattr(e, 'filename', None): - e.filename = program # Add info to error message + e.filename = python_program # Add info to error message raise if __name__ == '__main__': diff --git a/python/private/stage1_bootstrap_template.sh b/python/private/stage1_bootstrap_template.sh index a984344647..5a85b9f7d4 100644 --- a/python/private/stage1_bootstrap_template.sh +++ b/python/private/stage1_bootstrap_template.sh @@ -6,14 +6,14 @@ if [[ -n "${RULES_PYTHON_BOOTSTRAP_VERBOSE:-}" ]]; then set -x fi -# runfiles-relative path +# runfiles-root-relative path STAGE2_BOOTSTRAP="%stage2_bootstrap%" -# runfiles-relative path to python interpreter to use. +# runfiles-root-relative path to python interpreter to use. # This is the `bin/python3` path in the binary's venv. PYTHON_BINARY='%python_binary%' # The path that PYTHON_BINARY should symlink to. -# runfiles-relative path, absolute path, or single word. +# runfiles-root-relative path, absolute path, or single word. # Only applicable for zip files or when venv is recreated at runtime. PYTHON_BINARY_ACTUAL="%python_binary_actual%" @@ -211,7 +211,7 @@ elif [[ "$RECREATE_VENV_AT_RUNTIME" == "1" ]]; then read -r resolved_py_exe read -r resolved_site_packages } < <("$python_exe_actual" -I <