diff --git a/src/deadline/client/cli/_groups/manifest_group.py b/src/deadline/client/cli/_groups/manifest_group.py index 4d2872053..143685bbe 100644 --- a/src/deadline/client/cli/_groups/manifest_group.py +++ b/src/deadline/client/cli/_groups/manifest_group.py @@ -75,14 +75,14 @@ def cli_manifest(): "-i", "--include", default=None, - help="Glob syntax of files and directories to include in the manifest. Can be provided multiple times.", + help="Glob pattern for files to include. Matches dotfiles. Can be provided multiple times", multiple=True, ) @click.option( "-e", "--exclude", default=None, - help="Glob syntax of files and directories to exclude in the manifest. Can be provided multiple times.", + help="Glob pattern for files to exclude. Matches dotfiles. Can be provided multiple times", multiple=True, ) @click.option( @@ -117,6 +117,11 @@ def manifest_snapshot( BETA - Generates a snapshot of files in a directory root as a job attachment manifest. + By default all files are included, including dotfiles (files and + directories starting with `.`). Use `--include` and `--exclude` to + filter. Patterns follow Python pathlib glob syntax where wildcards + match dotfiles, similar to `.gitignore` behavior. + \b Learn more about [job attachments](https://docs.aws.amazon.com/deadline-cloud/latest/userguide/storage-job-attachments.html) """ @@ -173,14 +178,14 @@ def manifest_snapshot( "-i", "--include", default=None, - help="Glob syntax of files and directories to include in the manifest. Can be provided multiple times.", + help="Glob pattern for files to include. Matches dotfiles. Can be provided multiple times", multiple=True, ) @click.option( "-e", "--exclude", default=None, - help="Glob syntax of files and directories to exclude in the manifest. Can be provided multiple times.", + help="Glob pattern for files to exclude. Matches dotfiles. Can be provided multiple times", multiple=True, ) @click.option( @@ -211,6 +216,9 @@ def manifest_diff( BETA - Compute the file difference of a root directory against an existing job attachment manifest for new, modified or deleted files. + Patterns follow Python pathlib glob syntax where wildcards match + dotfiles (files and directories starting with `.`). + \b Learn more about [job attachments](https://docs.aws.amazon.com/deadline-cloud/latest/userguide/storage-job-attachments.html) """ diff --git a/src/deadline/job_attachments/_glob.py b/src/deadline/job_attachments/_glob.py index 598e87b7a..31472e2b3 100644 --- a/src/deadline/job_attachments/_glob.py +++ b/src/deadline/job_attachments/_glob.py @@ -1,7 +1,16 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +""" +Glob utilities for file matching in job attachment manifests. + +Uses pathlib.Path.glob which, unlike the glob module, treats dotfiles +(files and directories starting with '.') the same as any other file. +The '*' wildcard matches dotfiles without requiring a leading dot in +the pattern. This is consistent with .gitignore behavior and with the +os.walk-based file discovery used by the main job submission path. +""" + import os -import glob import json from pathlib import Path from typing import List, Optional @@ -55,17 +64,17 @@ def _match_files_with_pattern(base_path: str, patterns: List[str]) -> set: Set of normalized file paths that match the patterns """ matched_files = set() + root = Path(base_path) for pattern in patterns: - # Make pattern relative to base path - full_pattern = os.path.join(base_path, pattern) - - # Use recursive glob for directory matching - for matched_path in glob.glob(full_pattern, recursive=True): - # Only add files, not directories - if os.path.isfile(matched_path): - # Convert to proper path format - normalized_path = os.path.normpath(matched_path) - matched_files.add(normalized_path) + for matched_path in root.glob(pattern): + if matched_path.is_file(): + matched_files.add(os.path.normpath(str(matched_path))) + # On Python <3.13, a trailing "**" only yields directories. + # Append "/*" so files inside those directories are also matched. + if pattern.endswith("**"): + for matched_path in root.glob(pattern + "/*"): + if matched_path.is_file(): + matched_files.add(os.path.normpath(str(matched_path))) return matched_files diff --git a/test/unit/deadline_job_attachments/data/glob/.dotfile b/test/unit/deadline_job_attachments/data/glob/.dotfile new file mode 100644 index 000000000..e69de29bb diff --git a/test/unit/deadline_job_attachments/data/glob/.hidden_dir/inside_hidden.txt b/test/unit/deadline_job_attachments/data/glob/.hidden_dir/inside_hidden.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/unit/deadline_job_attachments/test_glob.py b/test/unit/deadline_job_attachments/test_glob.py index c55e2618a..6b3ebd471 100644 --- a/test/unit/deadline_job_attachments/test_glob.py +++ b/test/unit/deadline_job_attachments/test_glob.py @@ -43,12 +43,16 @@ def test_glob_path_default(test_glob_folder: str): """ globbed_files: List[str] = _glob_paths(path=test_glob_folder) - # There are 4 files - assert len(globbed_files) == 4 + # There are 6 files (4 original + .dotfile + .hidden_dir/inside_hidden.txt) + assert len(globbed_files) == 6 assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files assert os.path.join(os.sep, test_glob_folder, "exclude.txt") in globbed_files assert os.path.join(os.sep, test_glob_folder, "nested", "nested_include.txt") in globbed_files assert os.path.join(os.sep, test_glob_folder, "nested", "nested_exclude.txt") in globbed_files + assert os.path.join(os.sep, test_glob_folder, ".dotfile") in globbed_files + assert ( + os.path.join(os.sep, test_glob_folder, ".hidden_dir", "inside_hidden.txt") in globbed_files + ) def test_glob_path_default_include(test_glob_folder: str): @@ -73,8 +77,8 @@ def test_glob_path_exclude(test_glob_folder: str): path=test_glob_folder, exclude=["*exclude.txt", "*/*exclude.txt"] ) - # There are 4 files - assert len(globbed_files) == 2 + # 6 total - 2 excluded = 4 remaining + assert len(globbed_files) == 4 assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files assert os.path.join(os.sep, test_glob_folder, "nested", "nested_include.txt") in globbed_files @@ -107,8 +111,8 @@ def test_glob_path_exclude_subdir(test_glob_folder: str): """ globbed_files: List[str] = _glob_paths(path=test_glob_folder, exclude=["nested/**"]) - # There are 2 files - assert len(globbed_files) == 2 + # 6 total - 2 nested = 4 remaining + assert len(globbed_files) == 4 assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files assert os.path.join(os.sep, test_glob_folder, "exclude.txt") in globbed_files @@ -119,5 +123,33 @@ def test_glob_path_exclude_nonexistent(test_glob_folder: str): """ globbed_files: List[str] = _glob_paths(path=test_glob_folder, exclude=["nonexistent/**"]) - # There are 2 files - assert len(globbed_files) == 4 + # All 6 files remain since the exclude pattern matches nothing + assert len(globbed_files) == 6 + + +def test_glob_path_default_includes_dotfiles(test_glob_folder: str): + """ + Test that _glob_paths includes dotfiles and files inside dot-directories, + matching the behavior of pathlib.Path.glob("**/*"). + """ + globbed_files: List[str] = _glob_paths(path=test_glob_folder) + + assert os.path.join(os.sep, test_glob_folder, ".dotfile") in globbed_files + assert ( + os.path.join(os.sep, test_glob_folder, ".hidden_dir", "inside_hidden.txt") in globbed_files + ) + + +def test_glob_path_exclude_dotfiles(test_glob_folder: str): + """ + Test that dotfiles can be explicitly excluded via exclude patterns. + """ + globbed_files: List[str] = _glob_paths(path=test_glob_folder, exclude=["**/.*", "**/.*/**"]) + + assert os.path.join(os.sep, test_glob_folder, ".dotfile") not in globbed_files + assert ( + os.path.join(os.sep, test_glob_folder, ".hidden_dir", "inside_hidden.txt") + not in globbed_files + ) + # Non-dotfiles should still be present + assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files