Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions src/deadline/client/cli/_groups/manifest_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ def cli_manifest():
"-i",
"--include",
default=None,
help="Glob syntax of files and directories to include in the manifest. Can be provided multiple times.",
help="Glob pattern for files to include. Matches dotfiles. Can be provided multiple times",
multiple=True,
)
@click.option(
"-e",
"--exclude",
default=None,
help="Glob syntax of files and directories to exclude in the manifest. Can be provided multiple times.",
help="Glob pattern for files to exclude. Matches dotfiles. Can be provided multiple times",
multiple=True,
)
@click.option(
Expand Down Expand Up @@ -117,6 +117,11 @@ def manifest_snapshot(
BETA - Generates a snapshot of files in a directory root as a job
attachment manifest.

By default all files are included, including dotfiles (files and
directories starting with `.`). Use `--include` and `--exclude` to
filter. Patterns follow Python pathlib glob syntax where wildcards
match dotfiles, similar to `.gitignore` behavior.

\b
Learn more about [job attachments](https://docs.aws.amazon.com/deadline-cloud/latest/userguide/storage-job-attachments.html)
"""
Expand Down Expand Up @@ -173,14 +178,14 @@ def manifest_snapshot(
"-i",
"--include",
default=None,
help="Glob syntax of files and directories to include in the manifest. Can be provided multiple times.",
help="Glob pattern for files to include. Matches dotfiles. Can be provided multiple times",
multiple=True,
)
@click.option(
"-e",
"--exclude",
default=None,
help="Glob syntax of files and directories to exclude in the manifest. Can be provided multiple times.",
help="Glob pattern for files to exclude. Matches dotfiles. Can be provided multiple times",
multiple=True,
)
@click.option(
Expand Down Expand Up @@ -211,6 +216,9 @@ def manifest_diff(
BETA - Compute the file difference of a root directory against an existing
job attachment manifest for new, modified or deleted files.

Patterns follow Python pathlib glob syntax where wildcards match
dotfiles (files and directories starting with `.`).

\b
Learn more about [job attachments](https://docs.aws.amazon.com/deadline-cloud/latest/userguide/storage-job-attachments.html)
"""
Expand Down
31 changes: 20 additions & 11 deletions src/deadline/job_attachments/_glob.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

"""
Glob utilities for file matching in job attachment manifests.

Uses pathlib.Path.glob which, unlike the glob module, treats dotfiles
(files and directories starting with '.') the same as any other file.
The '*' wildcard matches dotfiles without requiring a leading dot in
the pattern. This is consistent with .gitignore behavior and with the
os.walk-based file discovery used by the main job submission path.
"""

import os
import glob
import json
from pathlib import Path
from typing import List, Optional
Expand Down Expand Up @@ -55,17 +64,17 @@ def _match_files_with_pattern(base_path: str, patterns: List[str]) -> set:
Set of normalized file paths that match the patterns
"""
matched_files = set()
root = Path(base_path)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that this will work, but do we want to change this behavior? What if the customer's job checkout a git clone and now they get a whole new hidden git tree getting uploaded?

for pattern in patterns:
# Make pattern relative to base path
full_pattern = os.path.join(base_path, pattern)

# Use recursive glob for directory matching
for matched_path in glob.glob(full_pattern, recursive=True):
# Only add files, not directories
if os.path.isfile(matched_path):
# Convert to proper path format
normalized_path = os.path.normpath(matched_path)
matched_files.add(normalized_path)
for matched_path in root.glob(pattern):
if matched_path.is_file():
matched_files.add(os.path.normpath(str(matched_path)))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a quick search, it seems 3.11+ the default would support the hidden folders but < 3.11 the behavior depends. So I dont' think we want variation of behaviour on different versions of python.

# On Python <3.13, a trailing "**" only yields directories.
# Append "/*" so files inside those directories are also matched.
if pattern.endswith("**"):
for matched_path in root.glob(pattern + "/*"):
if matched_path.is_file():
matched_files.add(os.path.normpath(str(matched_path)))

return matched_files

Expand Down
Empty file.
48 changes: 40 additions & 8 deletions test/unit/deadline_job_attachments/test_glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,16 @@ def test_glob_path_default(test_glob_folder: str):
"""
globbed_files: List[str] = _glob_paths(path=test_glob_folder)

# There are 4 files
assert len(globbed_files) == 4
# There are 6 files (4 original + .dotfile + .hidden_dir/inside_hidden.txt)
assert len(globbed_files) == 6
assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files
assert os.path.join(os.sep, test_glob_folder, "exclude.txt") in globbed_files
assert os.path.join(os.sep, test_glob_folder, "nested", "nested_include.txt") in globbed_files
assert os.path.join(os.sep, test_glob_folder, "nested", "nested_exclude.txt") in globbed_files
assert os.path.join(os.sep, test_glob_folder, ".dotfile") in globbed_files
assert (
os.path.join(os.sep, test_glob_folder, ".hidden_dir", "inside_hidden.txt") in globbed_files
)


def test_glob_path_default_include(test_glob_folder: str):
Expand All @@ -73,8 +77,8 @@ def test_glob_path_exclude(test_glob_folder: str):
path=test_glob_folder, exclude=["*exclude.txt", "*/*exclude.txt"]
)

# There are 4 files
assert len(globbed_files) == 2
# 6 total - 2 excluded = 4 remaining
assert len(globbed_files) == 4
assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files
assert os.path.join(os.sep, test_glob_folder, "nested", "nested_include.txt") in globbed_files

Expand Down Expand Up @@ -107,8 +111,8 @@ def test_glob_path_exclude_subdir(test_glob_folder: str):
"""
globbed_files: List[str] = _glob_paths(path=test_glob_folder, exclude=["nested/**"])

# There are 2 files
assert len(globbed_files) == 2
# 6 total - 2 nested = 4 remaining
assert len(globbed_files) == 4
assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files
assert os.path.join(os.sep, test_glob_folder, "exclude.txt") in globbed_files

Expand All @@ -119,5 +123,33 @@ def test_glob_path_exclude_nonexistent(test_glob_folder: str):
"""
globbed_files: List[str] = _glob_paths(path=test_glob_folder, exclude=["nonexistent/**"])

# There are 2 files
assert len(globbed_files) == 4
# All 6 files remain since the exclude pattern matches nothing
assert len(globbed_files) == 6


def test_glob_path_default_includes_dotfiles(test_glob_folder: str):
"""
Test that _glob_paths includes dotfiles and files inside dot-directories,
matching the behavior of pathlib.Path.glob("**/*").
"""
globbed_files: List[str] = _glob_paths(path=test_glob_folder)

assert os.path.join(os.sep, test_glob_folder, ".dotfile") in globbed_files
assert (
os.path.join(os.sep, test_glob_folder, ".hidden_dir", "inside_hidden.txt") in globbed_files
)


def test_glob_path_exclude_dotfiles(test_glob_folder: str):
"""
Test that dotfiles can be explicitly excluded via exclude patterns.
"""
globbed_files: List[str] = _glob_paths(path=test_glob_folder, exclude=["**/.*", "**/.*/**"])

assert os.path.join(os.sep, test_glob_folder, ".dotfile") not in globbed_files
assert (
os.path.join(os.sep, test_glob_folder, ".hidden_dir", "inside_hidden.txt")
not in globbed_files
)
# Non-dotfiles should still be present
assert os.path.join(os.sep, test_glob_folder, "include.txt") in globbed_files
Loading