diff --git a/utils/update_checkout/tests/test_locked_repository.py b/utils/update_checkout/tests/test_locked_repository.py index e81166b533c79..80773d2703ef2 100644 --- a/utils/update_checkout/tests/test_locked_repository.py +++ b/utils/update_checkout/tests/test_locked_repository.py @@ -16,7 +16,7 @@ def _update_arguments_with_fake_path(repo_name: str, path: str) -> UpdateArgumen reset_to_remote=False, clean=False, stash=False, - cross_repos_pr=False, + cross_repos_pr={}, output_prefix="", verbose=False, ) diff --git a/utils/update_checkout/update_checkout/git_command.py b/utils/update_checkout/update_checkout/git_command.py index e6c09fb9cdf0d..1634aa5836573 100644 --- a/utils/update_checkout/update_checkout/git_command.py +++ b/utils/update_checkout/update_checkout/git_command.py @@ -1,7 +1,44 @@ +import os import shlex import subprocess import sys -from typing import List, Any, Optional, Dict +from typing import List, Any, Optional, Dict, Tuple + + +class GitException(Exception): + """ + Exception raised when a Git command execution fails. + + Attributes + ---------- + returncode : int + The return code from the failed Git command. + command : List[str] + The Git command that was executed. + repo_name : str + The name of the Git repository. + stderr : str + The output of the failed Git command. + """ + + def __init__( + self, + returncode: int, + command: List[str], + repo_name: str, + output: str, + ): + super().__init__() + self.returncode = returncode + self.command = command + self.repo_name = repo_name + self.stderr = output + + def __str__(self): + return ( + f"[{self.repo_name}] '{Git._quote_command(self.command)}' " + f"returned ({self.returncode}) with the following {self.stderr}." + ) class Git: @@ -15,9 +52,9 @@ def run( allow_non_zero_exit: bool = False, fatal: bool = False, **kwargs, - ): + ) -> Tuple[str, int, List[str]]: command = Git._build_command(args) - + output = "" try: result = subprocess.run( command, @@ -38,20 +75,15 @@ def run( if fatal: sys.exit( f"command `{command}` terminated with a non-zero exit " - f"status {str(e.returncode)}, aborting") - eout = Exception( - f"[{repo_path}] '{Git._quote_command(command)}' failed with '{output}'" + f"status {str(e.returncode)}, aborting" + ) + raise GitException( + e.returncode, command, os.path.dirname(repo_path), output ) - eout.ret = e.returncode - eout.arguments = command - eout.repo_path = repo_path - eout.stderr = output - raise eout except OSError as e: if fatal: sys.exit( - f"could not execute '{Git._quote_command(command)}': " - f"{e.strerror}" + f"could not execute '{Git._quote_command(command)}': {e.strerror}" ) return (output.strip(), result.returncode, command) @@ -73,7 +105,7 @@ def _echo_command( print(f"{prefix}+ {' '.join(command_str)}", file=sys.stderr) if output: for line in output.splitlines(): - print(prefix+line) + print(prefix + line) sys.stdout.flush() sys.stderr.flush() @@ -86,5 +118,5 @@ def _quote(arg: Any) -> str: return shlex.quote(str(arg)) @staticmethod - def _quote_command(command: Any) -> str: + def _quote_command(command: List[Any]) -> str: return " ".join(Git._quote(arg) for arg in command) diff --git a/utils/update_checkout/update_checkout/parallel_runner.py b/utils/update_checkout/update_checkout/parallel_runner.py index d5bcbd953ca38..edee4145463a3 100644 --- a/utils/update_checkout/update_checkout/parallel_runner.py +++ b/utils/update_checkout/update_checkout/parallel_runner.py @@ -1,12 +1,14 @@ import sys from multiprocessing import cpu_count import time -from typing import Callable, List, Any, Tuple, Union +from typing import Callable, List, Any, Optional, Tuple, Union from threading import Lock, Thread, Event from concurrent.futures import ThreadPoolExecutor import shutil -from .runner_arguments import RunnerArguments, AdditionalSwiftSourcesArguments +from .git_command import GitException + +from .runner_arguments import RunnerArguments, AdditionalSwiftSourcesArguments, UpdateArguments class TaskTracker: @@ -50,10 +52,10 @@ def done_task_counter(self) -> int: class MonitoredFunction: def __init__( self, - fn: Callable, + fn: Callable[..., Union[Exception]], task_tracker: TaskTracker, ): - self.fn = fn + self._fn = fn self._task_tracker = task_tracker def __call__(self, *args: Union[RunnerArguments, AdditionalSwiftSourcesArguments]): @@ -61,7 +63,7 @@ def __call__(self, *args: Union[RunnerArguments, AdditionalSwiftSourcesArguments self._task_tracker.mark_task_as_running(task_name) result = None try: - result = self.fn(*args) + result = self._fn(*args) except Exception as e: print(e) finally: @@ -69,13 +71,19 @@ def __call__(self, *args: Union[RunnerArguments, AdditionalSwiftSourcesArguments return result -class ParallelRunner: +class ParallelRunner(): def __init__( self, - fn: Callable, - pool_args: List[Union[RunnerArguments, AdditionalSwiftSourcesArguments]], + fn: Callable[..., None], + pool_args: Union[List[UpdateArguments], List[AdditionalSwiftSourcesArguments]], n_threads: int = 0, ): + def run_safely(*args, **kwargs): + try: + fn(*args, **kwargs) + except GitException as e: + return e + if n_threads == 0: # Limit the number of threads as the performance regresses if the # number is too high. @@ -84,7 +92,8 @@ def __init__( self._monitor_polling_period = 0.1 self._terminal_width = shutil.get_terminal_size().columns self._pool_args = pool_args - self._fn = fn + self._fn_name = fn.__name__ + self._fn = run_safely self._output_prefix = pool_args[0].output_prefix self._nb_repos = len(pool_args) self._stop_event = Event() @@ -93,8 +102,8 @@ def __init__( self._task_tracker = TaskTracker() self._monitored_fn = MonitoredFunction(self._fn, self._task_tracker) - def run(self) -> List[Any]: - print(f"Running ``{self._fn.__name__}`` with up to {self._n_threads} processes.") + def run(self) -> List[Union[None, Exception]]: + print(f"Running ``{self._fn_name}`` with up to {self._n_threads} processes.") if self._verbose: with ThreadPoolExecutor(max_workers=self._n_threads) as pool: results = list(pool.map(self._fn, self._pool_args, timeout=1800)) @@ -129,13 +138,10 @@ def _monitor(self): sys.stdout.flush() @staticmethod - def check_results(results, op) -> int: - """Function used to check the results of ParallelRunner. - - NOTE: This function was originally located in the shell module of - swift_build_support and should eventually be replaced with a better - parallel implementation. - """ + def check_results( + results: Optional[List[Union[GitException, Exception, Any]]], operation: str + ) -> int: + """Check the results of ParallelRunner and print the failures.""" fail_count = 0 if results is None: @@ -144,15 +150,10 @@ def check_results(results, op) -> int: if r is None: continue if fail_count == 0: - print("======%s FAILURES======" % op) + print(f"======{operation} FAILURES======") fail_count += 1 - if isinstance(r, str): + if isinstance(r, (GitException, Exception)): print(r) continue - if not hasattr(r, "repo_path"): - # TODO: create a proper Exception class with these attributes - continue - print("%s failed (ret=%d): %s" % (r.repo_path, r.ret, r)) - if r.stderr: - print(r.stderr.decode()) + print(r) return fail_count diff --git a/utils/update_checkout/update_checkout/runner_arguments.py b/utils/update_checkout/update_checkout/runner_arguments.py index ea1129acfac83..2c9cc1a52aaf7 100644 --- a/utils/update_checkout/update_checkout/runner_arguments.py +++ b/utils/update_checkout/update_checkout/runner_arguments.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from .cli_arguments import CliArguments @@ -15,12 +15,12 @@ class UpdateArguments(RunnerArguments): source_root: str config: Dict[str, Any] scheme_map: Any - tag: str + tag: Optional[str] timestamp: Any reset_to_remote: bool clean: bool stash: bool - cross_repos_pr: bool + cross_repos_pr: Dict[str, str] @dataclass class AdditionalSwiftSourcesArguments(RunnerArguments): diff --git a/utils/update_checkout/update_checkout/update_checkout.py b/utils/update_checkout/update_checkout/update_checkout.py index 10fb865109ce2..8204da145e6db 100755 --- a/utils/update_checkout/update_checkout/update_checkout.py +++ b/utils/update_checkout/update_checkout/update_checkout.py @@ -15,10 +15,10 @@ import sys import traceback from multiprocessing import freeze_support -from typing import Any, Dict, Optional, Set, List, Union +from typing import Any, Dict, Hashable, Optional, Set, List, Union from .cli_arguments import CliArguments -from .git_command import Git +from .git_command import Git, GitException from .runner_arguments import AdditionalSwiftSourcesArguments, UpdateArguments from .parallel_runner import ParallelRunner @@ -27,6 +27,20 @@ SCRIPT_DIR = os.path.dirname(SCRIPT_FILE) +class SkippedReason: + def __init__(self, repo_name: str, reason: str): + self.repo_name = repo_name + self.reason = reason + + @staticmethod + def print_skipped_repositories(skipped_reasons: List["SkippedReason"], step: str): + if not skipped_reasons: + return + print(f"Skipped {step}:") + for reason in skipped_reasons: + print(f" '{reason.repo_name}' - {reason.reason}") + + def confirm_tag_in_repo(repo_path: str, tag: str, repo_name: str) -> Optional[str]: """Confirm that a given tag exists in a git repository. This function assumes that the repository is already a current working directory before @@ -74,7 +88,7 @@ def get_branch_for_repo( config: Dict[str, Any], repo_name: str, scheme_name: str, - scheme_map: Dict[str, str], + scheme_map: Optional[Dict[str, str]], cross_repos_pr: Dict[str, str], ): """Infer, fetch, and return a branch corresponding to a given PR, otherwise @@ -85,7 +99,7 @@ def get_branch_for_repo( config (Dict[str, Any]): deserialized `update-checkout-config.json` repo_name (str): name of the repository for checking out the branch scheme_name (str): name of the scheme to look up in the config - scheme_map (Dict[str, str]): map of repo names to branches to check out + scheme_map (Dict[str, str] | None): map of repo names to branches to check out cross_repos_pr (Dict[str, str]): map of repo ids to PRs to check out Returns: @@ -239,8 +253,8 @@ def run_for_repo_and_each_submodule_rec(args: List[str]): # Otherwise there was some other error, and we need to handle # it like other command errors. Git.run(repo_path, ["symbolic-ref", "-q", "HEAD"]) - except Exception as e: - if e.ret == 1: + except GitException as e: + if e.returncode == 1: detached_head = True else: raise # Pass this error up the chain. @@ -268,19 +282,17 @@ def run_for_repo_and_each_submodule_rec(args: List[str]): prefix=prefix, ) except Exception: - (type, value, tb) = sys.exc_info() if verbose: print('Error on repo "%s": %s' % (repo_path, traceback.format_exc())) - return value + raise -def get_timestamp_to_match(match_timestamp, source_root): - # type: (str | None, str) -> str | None +def get_timestamp_to_match(match_timestamp: bool, source_root: str): """Computes a timestamp of the last commit on the current branch in the `swift` repository. Args: - match_timestamp (str | None): value of `--match-timestamp` to check. + match_timestamp (bool): value of `--match-timestamp` to check. source_root (str): directory that contains sources of the Swift project. Returns: @@ -295,7 +307,7 @@ def get_timestamp_to_match(match_timestamp, source_root): return output -def get_scheme_map(config: Dict[str, Any], scheme_name: str): +def get_scheme_map(config: Dict[str, Any], scheme_name: str) -> Optional[Dict[str, str]]: """Find a mapping from repository IDs to branches in the config. Args: @@ -342,7 +354,7 @@ def _is_any_repository_locked(pool_args: List[UpdateArguments]) -> Set[str]: locked_repositories.add(repo_name) return locked_repositories -def _move_llvm_project_to_first_index(pool_args: List[Union[UpdateArguments, AdditionalSwiftSourcesArguments]]): +def _move_llvm_project_to_first_index(pool_args: Union[List[UpdateArguments], List[AdditionalSwiftSourcesArguments]]): llvm_project_idx = None for i in range(len(pool_args)): if pool_args[i].repo_name == "llvm-project": @@ -351,25 +363,26 @@ def _move_llvm_project_to_first_index(pool_args: List[Union[UpdateArguments, Add if llvm_project_idx is not None: pool_args.insert(0, pool_args.pop(llvm_project_idx)) -def update_all_repositories(args: CliArguments, config, scheme_name, scheme_map, cross_repos_pr): +def update_all_repositories( + args: CliArguments, + config: Dict[str, Any], + scheme_name: str, + scheme_map: Optional[Dict[str, Any]], + cross_repos_pr: Dict[str, str], +): + skipped_repositories = [] pool_args: List[UpdateArguments] = [] timestamp = get_timestamp_to_match(args.match_timestamp, args.source_root) for repo_name in config['repos'].keys(): if repo_name in args.skip_repository_list: - print("Skipping update of '" + repo_name + "', requested by user") + skipped_repositories.append(SkippedReason(repo_name, "requested by user",)) continue # If the repository is not listed in the branch-scheme, skip it. if scheme_map and repo_name not in scheme_map: # If the repository exists locally, notify we are skipping it. if os.path.isdir(os.path.join(args.source_root, repo_name)): - print( - "Skipping update of '" - + repo_name - + "', repository not listed in the '" - + scheme_name - + "' branch-scheme" - ) + skipped_repositories.append(SkippedReason(repo_name, f"repository not listed in the {scheme_name} branch-scheme")) continue my_args = UpdateArguments( @@ -392,11 +405,11 @@ def update_all_repositories(args: CliArguments, config, scheme_name, scheme_map, locked_repositories: set[str] = _is_any_repository_locked(pool_args) if len(locked_repositories) > 0: return [ - f"'{repo_name}' is locked by git. Cannot update it." + Exception(f"'{repo_name}' is locked by git. Cannot update it.") for repo_name in locked_repositories ] _move_llvm_project_to_first_index(pool_args) - return ParallelRunner(update_single_repository, pool_args, args.n_processes).run() + return skipped_repositories, ParallelRunner(update_single_repository, pool_args, args.n_processes).run() def obtain_additional_swift_sources(pool_args: AdditionalSwiftSourcesArguments): @@ -447,12 +460,12 @@ def obtain_all_additional_swift_sources( scheme_name: str, skip_repository_list: List[str], ): + skipped_repositories = [] pool_args = [] for repo_name, repo_info in config['repos'].items(): repo_path = os.path.join(args.source_root, repo_name) if repo_name in skip_repository_list: - print("Skipping clone of '" + repo_name + "', requested by " - "user") + skipped_repositories.append(SkippedReason(repo_name, "requested by user")) continue if args.use_submodules: @@ -468,8 +481,7 @@ def obtain_all_additional_swift_sources( repo_exists = os.path.isdir(os.path.join(repo_path, ".git")) if repo_exists: - print("Skipping clone of '" + repo_name + "', directory " - "already exists") + skipped_repositories.append(SkippedReason(repo_name, "directory already exists")) continue # If we have a url override, use that url instead of @@ -484,7 +496,7 @@ def obtain_all_additional_swift_sources( else: remote = config['https-clone-pattern'] % remote_repo_id - repo_branch = None + repo_branch: Optional[str] = None repo_not_in_scheme = False if scheme_name: for v in config['branch-schemes'].values(): @@ -500,6 +512,9 @@ def obtain_all_additional_swift_sources( repo_branch = scheme_name if repo_not_in_scheme: continue + + if repo_branch is None: + raise RuntimeError("repo_branch is None") new_args = AdditionalSwiftSourcesArguments( args=args, @@ -521,13 +536,13 @@ def obtain_all_additional_swift_sources( # Only use `ParallelRunner` when submodules are not used, since `.git` dir # can't be accessed concurrently. if args.use_submodules: - return + return [], None if not pool_args: print("Not cloning any repositories.") - return + return [], None _move_llvm_project_to_first_index(pool_args) - return ParallelRunner( + return skipped_repositories, ParallelRunner( obtain_additional_swift_sources, pool_args, args.n_processes ).run() @@ -570,7 +585,7 @@ def print_repo_hashes(args: CliArguments, config: Dict[str, Any]): print("{:<35}: {:<35}".format(repo_name, repo_hash)) -def merge_no_duplicates(a: dict, b: dict) -> dict: +def merge_no_duplicates(a: Dict[Hashable, Any], b: Dict[Hashable, Any]) -> Dict[Hashable, Any]: result = {**a} for key, value in b.items(): if key in a: @@ -580,7 +595,7 @@ def merge_no_duplicates(a: dict, b: dict) -> dict: return result -def merge_config(config: dict, new_config: dict) -> dict: +def merge_config(config: Dict[str, Any], new_config: Dict[str, Any]) -> Dict[str, Any]: """ Merge two configs, with a 'last-wins' strategy. @@ -619,7 +634,7 @@ def validate_config(config: Dict[str, Any]): 'too.'.format(scheme_name)) # Then make sure the alias names used by our branches are unique. - seen = dict() + seen: Dict[str, Any] = dict() for (scheme_name, scheme) in config['branch-schemes'].items(): aliases = scheme['aliases'] for alias in aliases: @@ -631,7 +646,7 @@ def validate_config(config: Dict[str, Any]): seen[alias] = scheme_name -def full_target_name(repo_path, repository, target): +def full_target_name(repo_path: str, repository: str, target: str) -> str: tag, _, _ = Git.run(repo_path, ["tag", "-l", target], fatal=True) if tag == target: return tag @@ -645,13 +660,13 @@ def full_target_name(repo_path, repository, target): raise RuntimeError('Cannot determine if %s is a branch or a tag' % target) -def skip_list_for_platform(config: Dict[str, Any], all_repos: List[str]) -> List[str]: +def skip_list_for_platform(config: Dict[str, Any], all_repos: bool) -> List[str]: """Computes a list of repositories to skip when updating or cloning, if not overridden by `--all-repositories` CLI argument. Args: config (Dict[str, Any]): deserialized `update-checkout-config.json` - all_repos (List[str]): repositories not required for current platform. + all_repos (bool): include all repositories. Returns: List[str]: a resulting list of repositories to skip or empty list if @@ -677,7 +692,7 @@ def skip_list_for_platform(config: Dict[str, Any], all_repos: List[str]) -> List return skip_list -def main(): +def main() -> int: freeze_support() args = CliArguments.parse_args() @@ -704,7 +719,7 @@ def main(): config = merge_config(config, json.load(f)) validate_config(config) - cross_repos_pr = {} + cross_repos_pr: Dict[str, str] = {} if args.github_comment: regex_pr = r'(apple/[-a-zA-Z0-9_]+/pull/\d+'\ r'|apple/[-a-zA-Z0-9_]+#\d+'\ @@ -728,9 +743,10 @@ def main(): if args.clone or args.clone_with_ssh: skip_repo_list = skip_list_for_platform(config, args.all_repositories) skip_repo_list.extend(args.skip_repository_list) - clone_results = obtain_all_additional_swift_sources(args, config, - scheme_name, - skip_repo_list) + skipped_repositories, clone_results = obtain_all_additional_swift_sources( + args, config, scheme_name, skip_repo_list + ) + SkippedReason.print_skipped_repositories(skipped_repositories, "clone") swift_repo_path = os.path.join(args.source_root, 'swift') if 'swift' not in skip_repo_list and os.path.exists(swift_repo_path): @@ -755,11 +771,11 @@ def main(): if args.dump_hashes: dump_repo_hashes(args, config) - return (None, None) + return 0 if args.dump_hashes_config: dump_repo_hashes(args, config, args.dump_hashes_config) - return (None, None) + return 0 # Quick check whether somebody is calling update in an empty directory directory_contents = os.listdir(args.source_root) @@ -769,8 +785,10 @@ def main(): print("You don't have all swift sources. " "Call this script with --clone to get them.") - update_results = update_all_repositories(args, config, scheme_name, + skipped_repositories, update_results = update_all_repositories(args, config, scheme_name, scheme_map, cross_repos_pr) + SkippedReason.print_skipped_repositories(skipped_repositories, "update") + fail_count = 0 fail_count += ParallelRunner.check_results(clone_results, "CLONE") fail_count += ParallelRunner.check_results(update_results, "UPDATE")