From def930791feb3592a985cb09c4598536eb58237f Mon Sep 17 00:00:00 2001 From: hoangtrann Date: Fri, 10 Oct 2025 07:05:24 +0700 Subject: [PATCH 1/2] [imp] add sparse checkout --- README.rst | 21 ++++++++++ git_aggregator/config.py | 9 +++++ git_aggregator/repo.py | 18 ++++++++- tests/test_config.py | 52 +++++++++++++++++++++++++ tests/test_repo.py | 84 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 183 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index d4fd885..04bbe65 100644 --- a/README.rst +++ b/README.rst @@ -206,6 +206,27 @@ A real life example: applying a patch shell_command_after: - git am "$(git format-patch -1 XXXXXX -o ../patches)" +Sparse Checkout +--------------- + +Git provides sparse-checkout to only checkout a set of files/directories which is +very useful for more granular control over what we should keep. Especially useful +when repository is getting big, or when you want to automatically install only +specific modules from the directory. + +Looking at the example below, only ``product_brand`` will be checkout from remote. + +.. code-block:: yaml + + ./product_attribute: + remotes: + oca: https://github.com/OCA/product-attribute.git + merges: + - oca 8.0 + target: oca 8.0 + sparse-checkout: + - product_brand + Command line Usage ================== diff --git a/git_aggregator/config.py b/git_aggregator/config.py index 1f95b66..17084f1 100644 --- a/git_aggregator/config.py +++ b/git_aggregator/config.py @@ -126,6 +126,15 @@ def get_repos(config, force=False): cmds = [cmds] commands = cmds repo_dict['shell_command_after'] = commands + # Handle sparse-checkout configuration + sparse_checkout = repo_data.get('sparse-checkout', None) + if sparse_checkout: + if isinstance(sparse_checkout, string_types): + sparse_checkout = [sparse_checkout] + elif not isinstance(sparse_checkout, list): + raise ConfigException( + '%s: sparse-checkout must be a string or list of strings.' % directory) + repo_dict['sparse_checkout'] = sparse_checkout repo_list.append(repo_dict) return repo_list diff --git a/git_aggregator/repo.py b/git_aggregator/repo.py index 3801434..0b93a0f 100644 --- a/git_aggregator/repo.py +++ b/git_aggregator/repo.py @@ -36,7 +36,7 @@ class Repo: def __init__(self, cwd, remotes, merges, target, shell_command_after=None, fetch_all=False, defaults=None, - force=False): + force=False, sparse_checkout=None): """Initialize a git repository aggregator :param cwd: path to the directory where to initialize the repository @@ -55,6 +55,7 @@ def __init__(self, cwd, remotes, merges, target, Collection of default parameters to be passed to git. :param bool force: When ``False``, it will stop if repo is dirty. + :param sparse_checkout: list of paths to include in sparse-checkout """ self.cwd = cwd self.remotes = remotes @@ -67,6 +68,7 @@ def __init__(self, cwd, remotes, merges, target, self.shell_command_after = shell_command_after or [] self.defaults = defaults or dict() self.force = force + self.sparse_checkout = sparse_checkout @property def git_version(self): @@ -226,6 +228,9 @@ def init_repository(self, target_dir): # Speeds up cloning by functioning without a complete copy of # repository cmd += ('--filter=blob:none',) + # Enable sparse-checkout if configured + if self.sparse_checkout: + cmd += ('--no-checkout',) # Try to clone target branch, if it exists rtype, _sha = self.query_remote_ref(repository, branch) if rtype in {'branch', 'tag'}: @@ -234,6 +239,17 @@ def init_repository(self, target_dir): cmd += self._fetch_options({}) cmd += (repository, target_dir) self.log_call(cmd) + + # Configure and apply sparse-checkout if specified + if self.sparse_checkout: + logger.info('Configuring sparse-checkout for %s', self.sparse_checkout) + # Enable sparse-checkout + self.log_call(['git', 'sparse-checkout', 'init', '--cone'], cwd=target_dir) + # Set the paths to include + self.log_call(['git', 'sparse-checkout', 'set'] + self.sparse_checkout, cwd=target_dir) + # Checkout the files + self.log_call(['git', 'checkout'], cwd=target_dir) + return True def fetch(self): diff --git a/tests/test_config.py b/tests/test_config.py index ab2eba2..c835aa6 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -435,3 +435,55 @@ def test_fetch_all_true(self): config_yaml = dedent(config_yaml) repos = config.get_repos(self._parse_config(config_yaml)) self.assertIs(repos[0]["fetch_all"], True) + + def test_sparse_checkout_string(self): + """Test sparse-checkout with a single string path.""" + config_yaml = """ + ./test: + remotes: + oca: https://github.com/test/test.git + merges: + - oca 8.0 + target: oca aggregated_branch_name + sparse-checkout: src/module1 + """ + config_yaml = dedent(config_yaml) + repos = config.get_repos(self._parse_config(config_yaml)) + self.assertEqual(repos[0]["sparse_checkout"], ["src/module1"]) + + def test_sparse_checkout_list(self): + """Test sparse-checkout with a list of paths.""" + config_yaml = """ + ./test: + remotes: + oca: https://github.com/test/test.git + merges: + - oca 8.0 + target: oca aggregated_branch_name + sparse-checkout: + - src/module1 + - src/module2 + - docs + """ + config_yaml = dedent(config_yaml) + repos = config.get_repos(self._parse_config(config_yaml)) + self.assertEqual(repos[0]["sparse_checkout"], ["src/module1", "src/module2", "docs"]) + + def test_sparse_checkout_invalid_type(self): + """Test sparse-checkout with invalid type raises ConfigException.""" + config_yaml = """ + /test: + remotes: + oca: https://github.com/test/test.git + merges: + - oca 8.0 + target: oca aggregated_branch_name + sparse-checkout: 123 + """ + config_yaml = dedent(config_yaml) + with self.assertRaises(ConfigException) as ex: + config.get_repos(self._parse_config(config_yaml)) + self.assertEqual( + ex.exception.args[0], + '/test: sparse-checkout must be a string or list of strings.' + ) diff --git a/tests/test_repo.py b/tests/test_repo.py index 91c303e..ba0784a 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -444,3 +444,87 @@ def test_multithreading(self): self.assertTrue(os.path.isfile(os.path.join(repo3_dir, 'tracked'))) self.assertTrue(os.path.isfile(os.path.join(repo3_dir, 'tracked2'))) + + def test_sparse_checkout_single_path(self): + """Test sparse-checkout with a single path.""" + # Create a directory structure in remote1 + with WorkingDirectoryKeeper(): + os.chdir(self.remote1) + os.makedirs('src/module1', exist_ok=True) + os.makedirs('src/module2', exist_ok=True) + git_write_commit(self.remote1, 'src/module1/file1.txt', + 'content1', msg='add module1 file') + git_write_commit(self.remote1, 'src/module2/file2.txt', + 'content2', msg='add module2 file') + + remotes = [{ + 'name': 'r1', + 'url': self.url_remote1 + }] + merges = [{ + 'remote': 'r1', + 'ref': 'main' + }] + target = { + 'remote': 'r1', + 'branch': 'agg' + } + + # Test with sparse-checkout for only module1 + repo = Repo(self.cwd, remotes, merges, target, + sparse_checkout=['src/module1']) + repo.aggregate() + + # module1 should be present + self.assertTrue(os.path.isfile( + os.path.join(self.cwd, 'src/module1/file1.txt'))) + # module2 should not be checked out + self.assertFalse(os.path.exists( + os.path.join(self.cwd, 'src/module2'))) + + def test_sparse_checkout_multiple_paths(self): + """Test sparse-checkout with multiple paths.""" + # Create a directory structure in remote1 + with WorkingDirectoryKeeper(): + os.chdir(self.remote1) + os.makedirs('docs', exist_ok=True) + os.makedirs('src/core', exist_ok=True) + os.makedirs('src/utils', exist_ok=True) + os.makedirs('tests', exist_ok=True) + git_write_commit(self.remote1, 'docs/readme.md', + 'docs content', msg='add docs') + git_write_commit(self.remote1, 'src/core/main.py', + 'core code', msg='add core') + git_write_commit(self.remote1, 'src/utils/helpers.py', + 'utils code', msg='add utils') + git_write_commit(self.remote1, 'tests/test_main.py', + 'test code', msg='add tests') + + remotes = [{ + 'name': 'r1', + 'url': self.url_remote1 + }] + merges = [{ + 'remote': 'r1', + 'ref': 'main' + }] + target = { + 'remote': 'r1', + 'branch': 'agg' + } + + # Test with sparse-checkout for docs and src/core only + repo = Repo(self.cwd, remotes, merges, target, + sparse_checkout=['docs', 'src/core']) + repo.aggregate() + + # docs and src/core should be present + self.assertTrue(os.path.isfile( + os.path.join(self.cwd, 'docs/readme.md'))) + self.assertTrue(os.path.isfile( + os.path.join(self.cwd, 'src/core/main.py'))) + # src/utils and tests should not be checked out + self.assertFalse(os.path.exists( + os.path.join(self.cwd, 'src/utils'))) + self.assertFalse(os.path.exists( + os.path.join(self.cwd, 'tests'))) From 7d522a4910208d3dc25e68c5246fb2c380ec79a9 Mon Sep 17 00:00:00 2001 From: hoangtrann Date: Mon, 22 Dec 2025 01:53:36 +0700 Subject: [PATCH 2/2] [imp] add flag to skip sparse-checkout --- README.rst | 6 +++ git_aggregator/config.py | 9 ++-- git_aggregator/main.py | 10 ++++- git_aggregator/repo.py | 12 ++++-- tests/test_config.py | 36 ++++++++++++++++ tests/test_repo.py | 91 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 157 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 04bbe65..0104ecf 100644 --- a/README.rst +++ b/README.rst @@ -227,6 +227,12 @@ Looking at the example below, only ``product_brand`` will be checkout from remot sparse-checkout: - product_brand +You can skip sparse-checkout for all repositories using the ``--no-sparse-checkout`` flag: + +.. code-block:: bash + + $ gitaggregate -c repos.yaml --no-sparse-checkout + Command line Usage ================== diff --git a/git_aggregator/config.py b/git_aggregator/config.py index 17084f1..967fed7 100644 --- a/git_aggregator/config.py +++ b/git_aggregator/config.py @@ -13,10 +13,11 @@ log = logging.getLogger(__name__) -def get_repos(config, force=False): +def get_repos(config, force=False, no_sparse_checkout=False): """Return a :py:obj:`list` list of repos from config file. :param config: the repos config in :py:class:`dict` format. :param bool force: Force aggregate dirty repos or not. + :param bool no_sparse_checkout: Disable sparse checkout regardless of config. :type config: dict :rtype: list """ @@ -28,6 +29,7 @@ def get_repos(config, force=False): 'cwd': directory, 'defaults': repo_data.get('defaults', dict()), 'force': force, + 'no_sparse_checkout': no_sparse_checkout, } remote_names = set() if 'remotes' in repo_data: @@ -139,7 +141,7 @@ def get_repos(config, force=False): return repo_list -def load_config(config, expand_env=False, env_file=None, force=False): +def load_config(config, expand_env=False, env_file=None, force=False, no_sparse_checkout=False): """Return repos from a directory and fnmatch. Not recursive. :param config: paths to config file @@ -149,6 +151,7 @@ def load_config(config, expand_env=False, env_file=None, force=False): :param env_file: path to file with variables to add to the environment. :type env_file: str or None :param bool force: True to aggregate even if repo is dirty. + :param bool no_sparse_checkout: True to disable sparse checkout regardless of config. :returns: expanded config dict item :rtype: iter(dict) """ @@ -182,4 +185,4 @@ def load_config(config, expand_env=False, env_file=None, force=False): conf = yaml.load(config, Loader=yaml.SafeLoader) - return get_repos(conf or {}, force) + return get_repos(conf or {}, force, no_sparse_checkout) diff --git a/git_aggregator/main.py b/git_aggregator/main.py index 1c922a6..2465864 100644 --- a/git_aggregator/main.py +++ b/git_aggregator/main.py @@ -122,6 +122,14 @@ def get_parser(): help='Force cleanup and aggregation on dirty repositories.', ) + main_parser.add_argument( + '--no-sparse-checkout', + dest='no_sparse_checkout', + default=False, + action='store_true', + help='Skip sparse-checkout for all repositories.', + ) + main_parser.add_argument( '-j', '--jobs', dest='jobs', @@ -245,7 +253,7 @@ def run(args): in args.command""" repos = load_config( - args.config, args.expand_env, args.env_file, args.force) + args.config, args.expand_env, args.env_file, args.force, args.no_sparse_checkout) jobs = max(args.jobs, 1) threads = [] diff --git a/git_aggregator/repo.py b/git_aggregator/repo.py index 0b93a0f..0f53e0a 100644 --- a/git_aggregator/repo.py +++ b/git_aggregator/repo.py @@ -36,7 +36,7 @@ class Repo: def __init__(self, cwd, remotes, merges, target, shell_command_after=None, fetch_all=False, defaults=None, - force=False, sparse_checkout=None): + force=False, sparse_checkout=None, no_sparse_checkout=False): """Initialize a git repository aggregator :param cwd: path to the directory where to initialize the repository @@ -56,6 +56,8 @@ def __init__(self, cwd, remotes, merges, target, :param bool force: When ``False``, it will stop if repo is dirty. :param sparse_checkout: list of paths to include in sparse-checkout + :param bool no_sparse_checkout: + When ``True``, disable sparse checkout regardless of configuration. """ self.cwd = cwd self.remotes = remotes @@ -68,6 +70,7 @@ def __init__(self, cwd, remotes, merges, target, self.shell_command_after = shell_command_after or [] self.defaults = defaults or dict() self.force = force + self.no_sparse_checkout = no_sparse_checkout self.sparse_checkout = sparse_checkout @property @@ -221,6 +224,9 @@ def init_repository(self, target_dir): repository, target_dir, ) + if self.no_sparse_checkout and self.sparse_checkout: + logger.info('Sparse checkout is disabled (ignoring config: %s)', + self.sparse_checkout) cmd = ('git', 'clone') if self.git_version >= (2, 17): # Git added support for partial clone in 2.17 @@ -229,7 +235,7 @@ def init_repository(self, target_dir): # repository cmd += ('--filter=blob:none',) # Enable sparse-checkout if configured - if self.sparse_checkout: + if self.sparse_checkout and not self.no_sparse_checkout: cmd += ('--no-checkout',) # Try to clone target branch, if it exists rtype, _sha = self.query_remote_ref(repository, branch) @@ -241,7 +247,7 @@ def init_repository(self, target_dir): self.log_call(cmd) # Configure and apply sparse-checkout if specified - if self.sparse_checkout: + if self.sparse_checkout and not self.no_sparse_checkout: logger.info('Configuring sparse-checkout for %s', self.sparse_checkout) # Enable sparse-checkout self.log_call(['git', 'sparse-checkout', 'init', '--cone'], cwd=target_dir) diff --git a/tests/test_config.py b/tests/test_config.py index c835aa6..f6eee52 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -487,3 +487,39 @@ def test_sparse_checkout_invalid_type(self): ex.exception.args[0], '/test: sparse-checkout must be a string or list of strings.' ) + + def test_sparse_checkout_none(self): + """Test sparse-checkout with None value (not configured).""" + config_yaml = """ + ./test: + remotes: + oca: https://github.com/test/test.git + merges: + - oca 8.0 + target: oca aggregated_branch_name + """ + config_yaml = dedent(config_yaml) + repos = config.get_repos(self._parse_config(config_yaml)) + self.assertIsNone(repos[0]["sparse_checkout"]) + + def test_no_sparse_checkout_parameter(self): + """Test no_sparse_checkout parameter is passed through get_repos.""" + config_yaml = """ + ./test: + remotes: + oca: https://github.com/test/test.git + merges: + - oca 8.0 + target: oca aggregated_branch_name + sparse-checkout: src/module1 + """ + config_yaml = dedent(config_yaml) + repos = config.get_repos( + self._parse_config(config_yaml), + force=False, + no_sparse_checkout=True + ) + # Verify no_sparse_checkout is passed to the repo configuration + self.assertEqual(repos[0]["no_sparse_checkout"], True) + # Verify sparse_checkout is still in config (not removed) + self.assertEqual(repos[0]["sparse_checkout"], ["src/module1"]) diff --git a/tests/test_repo.py b/tests/test_repo.py index ba0784a..2a27c56 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -426,6 +426,7 @@ def test_multithreading(self): expand_env=False, env_file=None, force=False, + no_sparse_checkout=False, ) with working_directory_keeper: @@ -528,3 +529,93 @@ def test_sparse_checkout_multiple_paths(self): os.path.join(self.cwd, 'src/utils'))) self.assertFalse(os.path.exists( os.path.join(self.cwd, 'tests'))) + + def test_sparse_checkout_disabled_by_flag(self): + """Test that no_sparse_checkout flag overrides sparse-checkout config.""" + # Create a directory structure in remote1 + with WorkingDirectoryKeeper(): + os.chdir(self.remote1) + os.makedirs('src/module1', exist_ok=True) + os.makedirs('src/module2', exist_ok=True) + git_write_commit(self.remote1, 'src/module1/file1.txt', + 'content1', msg='add module1 file') + git_write_commit(self.remote1, 'src/module2/file2.txt', + 'content2', msg='add module2 file') + + remotes = [{ + 'name': 'r1', + 'url': self.url_remote1 + }] + merges = [{ + 'remote': 'r1', + 'ref': 'main' + }] + target = { + 'remote': 'r1', + 'branch': 'agg' + } + + # Configure sparse-checkout but disable it with the flag + repo = Repo(self.cwd, remotes, merges, target, + sparse_checkout=['src/module1'], + no_sparse_checkout=True) + repo.aggregate() + + # Both modules should be present despite sparse-checkout config + # because no_sparse_checkout=True overrides the config + self.assertTrue(os.path.isfile( + os.path.join(self.cwd, 'src/module1/file1.txt'))) + self.assertTrue(os.path.isfile( + os.path.join(self.cwd, 'src/module2/file2.txt'))) + + def test_no_sparse_checkout_integration(self): + """Integration test: --no-sparse-checkout flag through main.run().""" + # Create a directory structure in remote1 + with WorkingDirectoryKeeper(): + os.chdir(self.remote1) + os.makedirs('src/module1', exist_ok=True) + os.makedirs('src/module2', exist_ok=True) + git_write_commit(self.remote1, 'src/module1/file1.txt', + 'content1', msg='add module1 file') + git_write_commit(self.remote1, 'src/module2/file2.txt', + 'content2', msg='add module2 file') + + config_yaml = os.path.join(self.sandbox, 'config_sparse.yaml') + with open(config_yaml, 'w') as f: + f.write(dedent(""" + ./repo_sparse: + remotes: + r1: %(r1_remote_url)s + merges: + - r1 main + target: r1 agg + sparse-checkout: + - src/module1 + """ % { + 'r1_remote_url': self.url_remote1, + })) + + # Test with no_sparse_checkout=True + args = argparse.Namespace( + command='aggregate', + config=config_yaml, + jobs=1, + dirmatch=None, + do_push=False, + expand_env=False, + env_file=None, + force=False, + no_sparse_checkout=True, + ) + + with working_directory_keeper: + os.chdir(self.sandbox) + main.run(args) + + repo_dir = os.path.join(self.sandbox, 'repo_sparse') + + # Both modules should be present because --no-sparse-checkout overrides config + self.assertTrue(os.path.isfile( + os.path.join(repo_dir, 'src/module1/file1.txt'))) + self.assertTrue(os.path.isfile( + os.path.join(repo_dir, 'src/module2/file2.txt')))