From 87a6bf7c3462bd03e03abd35f24628a0963eb9ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= Date: Thu, 18 Sep 2025 14:59:55 +0000 Subject: [PATCH] Add option for shallow clones To speed up cloning big repositories, add an option for a shallow clone. --- devops/scripts/benchmarks/git_project.py | 109 ++++++++++++++---- .../benchmarks/utils/compute_runtime.py | 3 + 2 files changed, 92 insertions(+), 20 deletions(-) diff --git a/devops/scripts/benchmarks/git_project.py b/devops/scripts/benchmarks/git_project.py index 84fe722d2974c..1c1f52b26c2cd 100644 --- a/devops/scripts/benchmarks/git_project.py +++ b/devops/scripts/benchmarks/git_project.py @@ -20,6 +20,7 @@ def __init__( name: str, force_rebuild: bool = False, no_suffix_src: bool = False, + shallow_clone: bool = True, ) -> None: self._url = url self._ref = ref @@ -27,7 +28,8 @@ def __init__( self._name = name self._force_rebuild = force_rebuild self._no_suffix_src = no_suffix_src - self._rebuild_needed = self._git_clone() + self._shallow_clone = shallow_clone + self._rebuild_needed = self._setup_repo() @property def src_dir(self) -> Path: @@ -128,42 +130,109 @@ def install(self) -> None: """Installs the project.""" run(f"cmake --install {self.build_dir}") - def _git_clone(self) -> bool: + def _can_shallow_clone_ref(self, ref: str) -> bool: + """Check if we can do a shallow clone with this ref using git ls-remote.""" + try: + result = run(f"git ls-remote --heads --tags {self._url} {ref}") + output = result.stdout.decode().strip() + + if output: + # Found the ref as a branch or tag + log.debug( + f"Ref {ref} found as branch/tag via ls-remote, can shallow clone" + ) + return True + else: + # Not found as branch/tag, likely a SHA commit + log.debug( + f"Ref {ref} not found as branch/tag via ls-remote, likely SHA commit" + ) + return False + except Exception as e: + log.debug( + f"Could not check ref {ref} via ls-remote: {e}, assuming SHA commit" + ) + return False + + def _git_clone(self) -> None: + """Clone the git repository.""" + try: + log.debug(f"Cloning {self._url} into {self.src_dir} at commit {self._ref}") + git_clone_cmd = f"git clone --recursive {self._url} {self.src_dir}" + if self._shallow_clone: + if self._can_shallow_clone_ref(self._ref): + # Shallow clone for branches and tags only + git_clone_cmd = f"git clone --recursive --depth 1 --branch {self._ref} {self._url} {self.src_dir}" + else: + log.debug(f"Cannot shallow clone SHA {self._ref}, using full clone") + + run(git_clone_cmd) + run(f"git checkout {self._ref}", cwd=self.src_dir) + log.debug(f"Cloned {self._url} into {self.src_dir} at commit {self._ref}") + except Exception as e: + log.error(f"Failed to clone repository {self._url}: {e}") + raise + + def _git_fetch(self) -> None: + """Fetch the latest changes from the remote repository.""" + try: + log.debug(f"Fetching latest changes for {self._url} in {self.src_dir}") + run("git fetch", cwd=self.src_dir) + run("git reset --hard", cwd=self.src_dir) + run(f"git checkout {self._ref}", cwd=self.src_dir) + log.debug(f"Fetched latest changes for {self._url} in {self.src_dir}") + except Exception as e: + log.error(f"Failed to fetch updates for repository {self._url}: {e}") + raise + + def _setup_repo(self) -> bool: """Clone a git repository into a specified directory at a specific commit. Returns: bool: True if the repository was cloned or updated, False if it was already up-to-date. """ - log.debug(f"Cloning {self._url} into {self.src_dir} at commit {self._ref}") - if self.src_dir.exists() and Path(self.src_dir, ".git").exists(): + if not self.src_dir.exists(): + self._git_clone() + return True + elif Path(self.src_dir, ".git").exists(): log.debug( f"Repository {self._url} already exists at {self.src_dir}, checking for updates." ) - run("git fetch", cwd=self.src_dir) - target_commit = ( - run(f"git rev-parse {self._ref}", cwd=self.src_dir) + current_commit = ( + run("git rev-parse HEAD^{commit}", cwd=self.src_dir) .stdout.decode() .strip() ) - current_commit = ( - run("git rev-parse HEAD", cwd=self.src_dir).stdout.decode().strip() - ) - if current_commit != target_commit: - log.debug( - f"Current commit {current_commit} does not match target {target_commit}, checking out {self._ref}." + try: + target_commit = ( + run(f"git rev-parse {self._ref}^{{commit}}", cwd=self.src_dir) + .stdout.decode() + .strip() + ) + if current_commit != target_commit: + log.debug( + f"Current commit {current_commit} does not match target {target_commit}, checking out {self._ref}." + ) + run("git reset --hard", cwd=self.src_dir) + run(f"git checkout {self._ref}", cwd=self.src_dir) + return True + except Exception: + log.error( + f"Failed to resolve target commit {self._ref}. Fetching updates." ) - run("git reset --hard", cwd=self.src_dir) - run(f"git checkout {self._ref}", cwd=self.src_dir) + if self._shallow_clone: + log.debug(f"Cloning a clean shallow copy.") + shutil.rmtree(self.src_dir) + self._git_clone() + return True + else: + self._git_fetch() + return True else: log.debug( f"Current commit {current_commit} matches target {target_commit}, no update needed." ) return False - elif not self.src_dir.exists(): - run(f"git clone --recursive {self._url} {self.src_dir}") - run(f"git checkout {self._ref}", cwd=self.src_dir) else: raise Exception( f"The directory {self.src_dir} exists but is not a git repository." ) - log.debug(f"Cloned {self._url} into {self.src_dir} at commit {self._ref}") - return True diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py index 074457bef415e..ad7eb7b74719e 100644 --- a/devops/scripts/benchmarks/utils/compute_runtime.py +++ b/devops/scripts/benchmarks/utils/compute_runtime.py @@ -96,6 +96,7 @@ def build_igc(self, repo, commit) -> tuple[Path, bool]: Path(options.workdir), "vc-intrinsics", no_suffix_src=True, + shallow_clone=False, ) llvm_project = GitProject( "https://github.com/llvm/llvm-project", @@ -125,6 +126,7 @@ def build_igc(self, repo, commit) -> tuple[Path, bool]: Path(options.workdir), "SPIRV-Tools", no_suffix_src=True, + shallow_clone=False, ) GitProject( "https://github.com/KhronosGroup/SPIRV-Headers.git", @@ -132,6 +134,7 @@ def build_igc(self, repo, commit) -> tuple[Path, bool]: Path(options.workdir), "SPIRV-Headers", no_suffix_src=True, + shallow_clone=False, ) configure_args = [