From 1b19623947d0d5d2701671e27584637f1aa765c4 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 3 Feb 2026 11:15:17 +0000 Subject: [PATCH 1/5] further tidies to git extraction --- github_scripts/get_git_sources.py | 42 ++++++++++++++++++++++ github_scripts/merge_sources.py | 34 ++---------------- github_scripts/rose_stem_extract_source.py | 33 ++--------------- 3 files changed, 48 insertions(+), 61 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 1cde083..fbcd551 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -87,6 +87,48 @@ def datetime_str() -> str: return datetime.now().strftime("%Y-%m-%d %H:%M:%S") +def clone_and_merge( + dependency: str, opts: list | dict, loc: Path, use_mirrors: bool, mirror_loc: Path +) -> None: + """ + Wrapper script for calling get_source and merge_source for a single dependency + + dependency: name of the dependency + opts: dict or list of dicts for a dependency in the dependencies file + loc: path to location to clone to + use_mirrors: bool, use local git mirrors if true + mirror_loc: path to local git mirrors + """ + + if not isinstance(opts, list): + opts = [opts] + + for i, values in enumerate(opts): + if values["ref"] is None: + values["ref"] = "" + + # Clone the first provided source + if i == 0: + get_source( + values["source"], + values["ref"], + loc, + dependency, + use_mirrors, + mirror_loc, + ) + # For all other sources, attempt to merge into the first + else: + merge_source( + values["source"], + values["ref"], + loc, + dependency, + use_mirrors, + mirror_loc, + ) + + def get_source( source: str, ref: str, diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index fafde37..87d7464 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -12,7 +12,7 @@ import os import yaml from pathlib import Path -from get_git_sources import get_source, merge_source, set_https, validate_dependencies +from get_git_sources import clone_and_merge, set_https, validate_dependencies import logging @@ -80,37 +80,9 @@ def main(): if args.tokens: dependencies = set_https(dependencies) - for dependency, opts in dependencies.items(): + for dependency, sources in dependencies.items(): dest = args.path / dependency - - if not isinstance(opts, list): - opts = [opts] - - for i, values in enumerate(opts): - if values["ref"] is None: - values["ref"] = "" - - # Clone the first provided source - if i == 0: - get_source( - values["source"], - values["ref"], - dest, - dependency, - args.mirrors, - args.mirror_loc, - ) - continue - - # For all other sources, attempt to merge into the first - merge_source( - values["source"], - values["ref"], - dest, - dependency, - args.mirrors, - args.mirror_loc, - ) + clone_and_merge(dependency, sources, dest, args.mirrors, args.mirror_loc) if __name__ == "__main__": diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index b3c5a86..d7bbf55 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -14,7 +14,7 @@ import os from pathlib import Path from ast import literal_eval -from get_git_sources import get_source, merge_source, set_https, validate_dependencies +from get_git_sources import clone_and_merge, set_https, validate_dependencies import logging @@ -43,36 +43,9 @@ def main() -> None: use_mirrors = os.environ.get("USE_MIRRORS", "false").lower() == "true" mirror_loc = Path(os.getenv("GIT_MIRROR_LOC", "")) - for dependency, opts in dependencies.items(): + for dependency, sources in dependencies.items(): loc = clone_loc / dependency - - if not isinstance(opts, list): - opts = [opts] - - for i, values in enumerate(opts): - if values["ref"] is None: - values["ref"] = "" - - # Clone the first provided source - if i == 0: - get_source( - values["source"], - values["ref"], - loc, - dependency, - use_mirrors, - mirror_loc, - ) - continue - # For all other sources, attempt to merge into the first - merge_source( - values["source"], - values["ref"], - loc, - dependency, - use_mirrors, - mirror_loc, - ) + clone_and_merge(dependency, sources, loc, use_mirrors, mirror_loc) if __name__ == "__main__": From 68b7f2798582030ebf90ed83f2f6e77dc5a328d4 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 3 Feb 2026 11:41:49 +0000 Subject: [PATCH 2/5] copy over rsync changes --- github_scripts/get_git_sources.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index fbcd551..d9bc1b8 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -346,7 +346,12 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: loc.mkdir(parents=True) exclude_dirs = [] - host, path = repo_source.split(":", 1) + try: + host, path = repo_source.split(":", 1) + result = run_command(f"ssh {host} git -C {path} status --ignored -s") + except ValueError: + # In case the path does not contain `host:` - see if it can be accessed locally + result = run_command(f"git -C {path} status --ignored -s") result = run_command(f"ssh {host} git -C {path} status --ignored -s") for ignore_file in result.stdout.split("\n"): ignore_file = ignore_file.strip() From 8bb95752728e7ba88daf04795a137c67fa625791 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 3 Feb 2026 11:42:22 +0000 Subject: [PATCH 3/5] delete unused line --- github_scripts/get_git_sources.py | 1 - 1 file changed, 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index d9bc1b8..47dd5c5 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -352,7 +352,6 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: except ValueError: # In case the path does not contain `host:` - see if it can be accessed locally result = run_command(f"git -C {path} status --ignored -s") - result = run_command(f"ssh {host} git -C {path} status --ignored -s") for ignore_file in result.stdout.split("\n"): ignore_file = ignore_file.strip() if not ignore_file.startswith("!!"): From 531ffdccd1e764e8e46db56d43d0b8f8eedf74ad Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 3 Feb 2026 11:43:57 +0000 Subject: [PATCH 4/5] bug --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 47dd5c5..827ec66 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -351,7 +351,7 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: result = run_command(f"ssh {host} git -C {path} status --ignored -s") except ValueError: # In case the path does not contain `host:` - see if it can be accessed locally - result = run_command(f"git -C {path} status --ignored -s") + result = run_command(f"git -C {repo_source} status --ignored -s") for ignore_file in result.stdout.split("\n"): ignore_file = ignore_file.strip() if not ignore_file.startswith("!!"): From 5979471312c07e1e938691f8fc5e871d918ef34f Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 3 Feb 2026 14:51:50 +0000 Subject: [PATCH 5/5] fix for jules internal --- github_scripts/get_git_sources.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 827ec66..f467ed8 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -146,7 +146,10 @@ def get_source( logger.info( f"[{datetime_str()}] Cloning {repo} from {mirror_loc} at ref {ref}" ) - mirror_loc = Path(mirror_loc) / "MetOffice" / repo + mirror_repo = repo + if "jules-internal" in source: + mirror_repo = "jules-internal" + mirror_loc = Path(mirror_loc) / "MetOffice" / mirror_repo clone_repo_mirror(source, ref, mirror_loc, dest) else: logger.info(f"[{datetime_str()}] Cloning {repo} from {source} at ref {ref}")