From 2ee1e483fdfe527bb454f5bccb18e386ad89a636 Mon Sep 17 00:00:00 2001 From: larrygao Date: Tue, 21 Apr 2026 19:05:53 +0000 Subject: [PATCH 01/14] feat(cli): Add --include-path selective filtering to download-output Add selective download support for job output attachments: - Add --include-path and --include-path-stdin options to download-output for downloading specific files or directory prefixes - Add _filter_paths() and _matches_any_filter() in download.py - Add path_filters parameter to OutputDownloader - Add _validate_and_normalize_include_paths() with path traversal rejection, backslash-to-forward-slash conversion, and normalization - Fix bug in get_job_input_paths_by_asset_root() where S3 root prefix was not prepended to input manifest keys - Update job_attachments_guide.md to document --include-path - Add unit tests for path filtering and validation Signed-off-by: larrygao --- docs/job_attachments_guide.md | 2 +- src/deadline/client/cli/_groups/job_group.py | 55 ++++++++- src/deadline/job_attachments/download.py | 49 +++++++- .../cli/test_cli_handle_web_url.py | 2 + test/unit/deadline_client/cli/test_cli_job.py | 8 ++ .../cli/test_cli_path_filters.py | 70 +++++++++++ .../unit/deadline_job_attachments/conftest.py | 2 +- .../test_path_filtering.py | 109 ++++++++++++++++++ 8 files changed, 292 insertions(+), 5 deletions(-) create mode 100644 test/unit/deadline_client/cli/test_cli_path_filters.py create mode 100644 test/unit/deadline_job_attachments/test_path_filtering.py diff --git a/docs/job_attachments_guide.md b/docs/job_attachments_guide.md index 8f8af8951..ff35dc61c 100644 --- a/docs/job_attachments_guide.md +++ b/docs/job_attachments_guide.md @@ -4,7 +4,7 @@ Job attachments uses your configured S3 bucket as a [content-addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage), which creates a snapshot of the files used in your job submission in [asset manifests](#asset-manifests), only uploading files that aren't already in S3. This saves you time and bandwidth when iterating on jobs. When an [AWS Deadline Cloud worker agent][worker-agent] starts working on a job with job attachments, it recreates the file system snapshot in the worker agent session directory, and uploads any outputs back to your S3 bucket. -You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. +You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include-path` for downloading specific files or directories. Job attachments also works as an auxiliary storage when used with [AWS Deadline Cloud storage profiles][shared-storage], allowing you to flexibly upload files to your Amazon S3 bucket that aren't on your configured shared storage. diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 9db533091..58d5df667 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -499,6 +499,7 @@ def _download_job_output( task_id: Optional[str], is_json_format: bool = False, ignore_storage_profiles: bool = False, + path_filters: Optional[list[str]] = None, ): """ Starts the download of job output and handles the progress reporting callback. @@ -557,6 +558,7 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, + path_filters=path_filters, ) def _check_and_warn_long_output_paths( @@ -959,6 +961,28 @@ def _assert_valid_path(path: str) -> None: raise ValueError(f"Path {path} is not an absolute path.") +def _validate_and_normalize_include_paths(filters: list[str]) -> list[str]: + """ + Validates and normalizes include paths. + - Rejects filters containing '..' (path traversal prevention) + - Converts backslashes to forward slashes (Windows compatibility) + - Strips leading './' + - Normalizes '//' to '/' + """ + normalized = [] + for f in filters: + if ".." in f: + raise click.BadParameter(f"Path filter must not contain '..': {f}") + f = f.replace("\\", "/") + if f.startswith("./"): + f = f[2:] + while "//" in f: + f = f.replace("//", "/") + if f: + normalized.append(f) + return normalized + + @cli_job.command(name="download-output") @click.option("--profile", help="The AWS profile to use.") @click.option("--farm-id", help="The farm to use.") @@ -966,6 +990,16 @@ def _assert_valid_path(path: str) -> None: @click.option("--job-id", help="The job to use.") @click.option("--step-id", help="The step to use.") @click.option("--task-id", help="The task to use.") +@click.option( + "--include-path", + multiple=True, + help="Download only files matching this relative path or directory prefix (trailing /). Repeatable.", +) +@click.option( + "--include-path-stdin", + is_flag=True, + help="Read include paths from stdin, one per line.", +) @click.option( "--ignore-storage-profiles", is_flag=True, @@ -1007,7 +1041,9 @@ def _assert_valid_path(path: str) -> None: "parsed/consumed by custom scripts.", ) @_handle_error -def job_download_output(step_id, task_id, output, ignore_storage_profiles, **args): +def job_download_output( + step_id, task_id, output, ignore_storage_profiles, include_path, include_path_stdin, **args +): """ Download the output of a Deadline Cloud job that was saved as job attachments. @@ -1018,6 +1054,22 @@ def job_download_output(step_id, task_id, output, ignore_storage_profiles, **arg if task_id and not step_id: raise click.UsageError("Missing option '--step-id' required with '--task-id'") + filters = list(include_path) + if include_path_stdin: + for line in sys.stdin: + stripped = line.strip() + if not stripped: + break + filters.append(stripped) + try: + tty_path = "CON" if sys.platform == "win32" else "/dev/tty" + sys.stdin = open(tty_path) # noqa: SIM115 + except OSError: + pass + if filters: + filters = _validate_and_normalize_include_paths(filters) + path_filters = filters or None + # Get a temporary config object with the standard options handled config = _apply_cli_options_to_config( required_options={"farm_id", "queue_id", "job_id"}, **args @@ -1038,6 +1090,7 @@ def job_download_output(step_id, task_id, output, ignore_storage_profiles, **arg task_id=task_id, is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, + path_filters=path_filters, ) except Exception as e: if is_json_format: diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index 7e33b93a6..89bcecfc0 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -73,7 +73,6 @@ from ._utils import ( _get_long_path_compatible_path, _is_relative_to, - _join_s3_paths, ) from threading import Lock @@ -325,7 +324,9 @@ def get_job_input_paths_by_asset_root( for manifest_properties in attachments.manifests: if manifest_properties.inputManifestPath: - key = _join_s3_paths(manifest_properties.inputManifestPath) + key = s3_settings.add_root_and_manifest_folder_prefix( + manifest_properties.inputManifestPath + ) _, asset_manifest = get_asset_root_and_manifest_from_s3( manifest_key=key, s3_bucket=s3_settings.s3BucketName, @@ -1243,6 +1244,47 @@ def mount_vfs_from_manifests( vfs_manager.start(session_dir=session_dir) +def _matches_any_filter(file_path: str, filters: list[str]) -> bool: + """ + Check if a file path matches any of the given filters. + - Exact match: filter "renders/frame_001.exr" matches only that path + - Directory prefix: filter "renders/" matches all paths starting with "renders/" + """ + for f in filters: + if f.endswith("/"): + if file_path.startswith(f): + return True + else: + if file_path == f: + return True + return False + + +def _filter_paths( + paths_by_root: dict[str, ManifestPathGroup], + path_filters: list[str], +) -> dict[str, ManifestPathGroup]: + """ + Filter ManifestPathGroups to only include files matching the given filters. + Each filter is an exact relative file path or a directory prefix (ending with '/'). + Filters are matched against file paths across all asset roots. + """ + filtered: dict[str, ManifestPathGroup] = {} + for root, group in paths_by_root.items(): + filtered_group = ManifestPathGroup() + for hash_alg, file_list in group.files_by_hash_alg.items(): + matching = [f for f in file_list if _matches_any_filter(f.path, path_filters)] + if matching: + if hash_alg not in filtered_group.files_by_hash_alg: + filtered_group.files_by_hash_alg[hash_alg] = matching + else: + filtered_group.files_by_hash_alg[hash_alg].extend(matching) + filtered_group.total_bytes += sum(f.size for f in matching) + if filtered_group.files_by_hash_alg: + filtered[root] = filtered_group + return filtered + + def _ensure_paths_within_directory(root_path: str, paths_relative_to_root: list[str]) -> None: """ Validates the given paths to ensure that they are within the given root path. @@ -1281,6 +1323,7 @@ def __init__( task_id: Optional[str] = None, session_action_id: Optional[str] = None, session: Optional[boto3.Session] = None, + path_filters: Optional[list[str]] = None, ) -> None: self.s3_settings = s3_settings self.session = session @@ -1294,6 +1337,8 @@ def __init__( session_action_id=session_action_id, session=session, ) + if path_filters: + self.outputs_by_root = _filter_paths(self.outputs_by_root, path_filters) def get_output_paths_by_root(self) -> dict[str, list[str]]: """ diff --git a/test/unit/deadline_client/cli/test_cli_handle_web_url.py b/test/unit/deadline_client/cli/test_cli_handle_web_url.py index 9c231fb9f..8a50638f1 100644 --- a/test/unit/deadline_client/cli/test_cli_handle_web_url.py +++ b/test/unit/deadline_client/cli/test_cli_handle_web_url.py @@ -306,6 +306,7 @@ def test_cli_handle_web_url_download_output_only_required_input(fresh_deadline_c task_id=None, session_action_id=None, session=ANY, + path_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -371,6 +372,7 @@ def test_cli_handle_web_url_download_output_with_optional_input(fresh_deadline_c task_id=MOCK_TASK_ID, session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, + path_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, diff --git a/test/unit/deadline_client/cli/test_cli_job.py b/test/unit/deadline_client/cli/test_cli_job.py index a1d08da29..7c1675fa9 100644 --- a/test/unit/deadline_client/cli/test_cli_job.py +++ b/test/unit/deadline_client/cli/test_cli_job.py @@ -377,6 +377,7 @@ def test_cli_job_download_output_stdout_with_only_required_input( task_id=None, session_action_id=None, session=ANY, + path_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -490,6 +491,7 @@ def test_cli_job_download_output_stdout_with_mismatching_path_format( task_id=None, session_action_id=None, session=ANY, + path_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -590,6 +592,7 @@ def test_cli_job_download_output_handles_unc_path_on_windows(fresh_deadline_conf task_id=None, session_action_id=None, session=ANY, + path_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -673,6 +676,7 @@ def test_cli_job_download_no_output_stdout(fresh_deadline_config, tmp_path: Path task_id=None, session_action_id=None, session=ANY, + path_filters=None, ) assert ( @@ -774,6 +778,7 @@ def test_cli_job_download_output_stdout_with_json_format( task_id=None, session_action_id=None, session=ANY, + path_filters=None, ) expected_json_title = {"messageType": "title", "value": "Mock Job"} @@ -1390,6 +1395,7 @@ def test_cli_job_download_output_handle_web_url_with_optional_input( task_id="task-2", session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, + path_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -1476,6 +1482,7 @@ def test_cli_job_download_output_with_different_asset_root_path_format_than_job( task_id=None, session_action_id=None, session=ANY, + path_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -1707,6 +1714,7 @@ def test_cli_job_download_output_with_session_action_id(fresh_deadline_config): task_id=MOCK_TASK_ID, session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, + path_filters=None, ) diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py new file mode 100644 index 000000000..f2d2f1fce --- /dev/null +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -0,0 +1,70 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +"""Tests for _validate_and_normalize_include_paths.""" + +import sys + +import pytest +import click + +from deadline.client.cli._groups.job_group import _validate_and_normalize_include_paths + + +class TestValidateAndNormalizePathFilters: + def test_rejects_double_dot(self): + with pytest.raises(click.BadParameter, match="must not contain '..'"): + _validate_and_normalize_include_paths(["../../etc/passwd"]) + + def test_rejects_double_dot_in_middle(self): + with pytest.raises(click.BadParameter, match="must not contain '..'"): + _validate_and_normalize_include_paths(["renders/../secret"]) + + def test_converts_backslashes(self): + result = _validate_and_normalize_include_paths(["renders\\frame_001.exr"]) + assert result == ["renders/frame_001.exr"] + + def test_strips_leading_dot_slash(self): + result = _validate_and_normalize_include_paths(["./renders/frame.exr"]) + assert result == ["renders/frame.exr"] + + def test_collapses_double_slashes(self): + result = _validate_and_normalize_include_paths(["renders//frame.exr"]) + assert result == ["renders/frame.exr"] + + def test_empty_filter_removed(self): + result = _validate_and_normalize_include_paths(["", "a.txt"]) + assert result == ["a.txt"] + + def test_passthrough_normal_paths(self): + result = _validate_and_normalize_include_paths( + ["renders/frame_001.exr", "textures/", "scripts/setup.mel"] + ) + assert result == ["renders/frame_001.exr", "textures/", "scripts/setup.mel"] + + def test_backslash_with_double_dot_rejected(self): + """Backslash path traversal like 'renders\\..\\..' should be rejected.""" + with pytest.raises(click.BadParameter, match="must not contain '..'"): + _validate_and_normalize_include_paths(["renders\\..\\secret"]) + + def test_multiple_normalizations(self): + result = _validate_and_normalize_include_paths([".\\renders\\\\frame.exr"]) + # .\ -> ./ after backslash conversion, then ./ stripped, // collapsed + assert result == ["renders/frame.exr"] + + +class TestStdinReadsUntilEmptyLine: + def test_stdin_stops_at_empty_line(self): + """Verify that --include-path-stdin reads until an empty line.""" + from io import StringIO + from unittest.mock import patch + + stdin_data = "renders/frame_001.exr\nrenders/frame_002.exr\n\nextra_ignored\n" + collected = [] + with patch("sys.stdin", StringIO(stdin_data)): + for line in sys.stdin: + stripped = line.strip() + if not stripped: + break + collected.append(stripped) + + assert collected == ["renders/frame_001.exr", "renders/frame_002.exr"] diff --git a/test/unit/deadline_job_attachments/conftest.py b/test/unit/deadline_job_attachments/conftest.py index 272e377c0..ce5e41270 100644 --- a/test/unit/deadline_job_attachments/conftest.py +++ b/test/unit/deadline_job_attachments/conftest.py @@ -112,7 +112,7 @@ def fixture_default_attachments(farm_id, queue_id): ManifestProperties( rootPath="/tmp", rootPathFormat=PathFormat.POSIX, - inputManifestPath=f"assetRoot/Manifests/{farm_id}/{queue_id}/Inputs/0000/manifest_input", + inputManifestPath=f"{farm_id}/{queue_id}/Inputs/0000/manifest_input", inputManifestHash="manifesthash", outputRelativeDirectories=["test/outputs"], ) diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py new file mode 100644 index 000000000..8f1e9d76d --- /dev/null +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -0,0 +1,109 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +"""Tests for path filtering in download.py""" + +from typing import List + +from deadline.job_attachments.download import ( + _matches_any_filter, + _filter_paths, +) +from deadline.job_attachments.models import ManifestPathGroup +from deadline.job_attachments.asset_manifests.hash_algorithms import HashAlgorithm +from deadline.job_attachments.asset_manifests.v2023_03_03 import ( + ManifestPath as ManifestPathv2023_03_03, +) + + +class TestMatchesAnyFilter: + def test_exact_match(self): + assert _matches_any_filter("renders/frame_001.exr", ["renders/frame_001.exr"]) is True + + def test_exact_no_match(self): + assert _matches_any_filter("renders/frame_002.exr", ["renders/frame_001.exr"]) is False + + def test_directory_prefix_match(self): + assert _matches_any_filter("renders/frame_001.exr", ["renders/"]) is True + + def test_directory_prefix_no_match(self): + assert _matches_any_filter("textures/wood.exr", ["renders/"]) is False + + def test_directory_prefix_does_not_match_similar_names(self): + """'renders/' should NOT match 'renders_v2/file.exr'""" + assert _matches_any_filter("renders_v2/file.exr", ["renders/"]) is False + + def test_multiple_filters_or(self): + assert ( + _matches_any_filter("textures/wood.exr", ["renders/frame_001.exr", "textures/"]) is True + ) + + def test_empty_filters(self): + assert _matches_any_filter("renders/frame_001.exr", []) is False + + def test_nested_directory_prefix(self): + assert _matches_any_filter("a/b/c/file.txt", ["a/b/"]) is True + + def test_exact_match_no_trailing_slash(self): + """Exact filter without trailing slash should not match subdirectory files.""" + assert _matches_any_filter("renders/frame_001.exr", ["renders"]) is False + + +class TestFilterPaths: + def _make_group(self, paths: List[str]) -> ManifestPathGroup: + group = ManifestPathGroup() + group.files_by_hash_alg[HashAlgorithm.XXH128] = [ + ManifestPathv2023_03_03(path=p, hash="abc123", size=100, mtime=1234000000) + for p in paths + ] + group.total_bytes = len(paths) * 100 + return group + + def test_exact_filter(self): + paths_by_root = {"/root": self._make_group(["a.txt", "b.txt", "c.txt"])} + result = _filter_paths(paths_by_root, ["b.txt"]) + assert list(result.keys()) == ["/root"] + assert [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] == [ + "b.txt" + ] + assert result["/root"].total_bytes == 100 + + def test_directory_prefix_filter(self): + paths_by_root = { + "/root": self._make_group(["renders/a.exr", "renders/b.exr", "textures/c.png"]) + } + result = _filter_paths(paths_by_root, ["renders/"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert files == ["renders/a.exr", "renders/b.exr"] + + def test_no_matches_returns_empty(self): + paths_by_root = {"/root": self._make_group(["a.txt"])} + result = _filter_paths(paths_by_root, ["nonexistent.txt"]) + assert result == {} + + def test_multiple_asset_roots(self): + paths_by_root = { + "/root1": self._make_group(["shared/file.txt", "other.txt"]), + "/root2": self._make_group(["shared/file.txt", "different.txt"]), + } + result = _filter_paths(paths_by_root, ["shared/file.txt"]) + assert "/root1" in result + assert "/root2" in result + + def test_mixed_filters(self): + paths_by_root = { + "/root": self._make_group( + ["renders/a.exr", "renders/b.exr", "textures/c.png", "scripts/setup.mel"] + ) + } + result = _filter_paths(paths_by_root, ["renders/", "scripts/setup.mel"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert set(files) == {"renders/a.exr", "renders/b.exr", "scripts/setup.mel"} + + def test_empty_root_removed(self): + paths_by_root = { + "/has_match": self._make_group(["a.txt"]), + "/no_match": self._make_group(["b.txt"]), + } + result = _filter_paths(paths_by_root, ["a.txt"]) + assert "/has_match" in result + assert "/no_match" not in result From 4e93020f2cf5c88f783aaaffa3a5e6bb51137b57 Mon Sep 17 00:00:00 2001 From: larrygao Date: Tue, 21 Apr 2026 19:56:33 +0000 Subject: [PATCH 02/14] fix: add explanatory comment for empty except clause Signed-off-by: larrygao --- src/deadline/client/cli/_groups/job_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 58d5df667..069423869 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -1065,7 +1065,7 @@ def job_download_output( tty_path = "CON" if sys.platform == "win32" else "/dev/tty" sys.stdin = open(tty_path) # noqa: SIM115 except OSError: - pass + pass # Non-interactive environment (CI, Tauri) — no TTY to reopen if filters: filters = _validate_and_normalize_include_paths(filters) path_filters = filters or None From 4cc2547e9bb53a92d421c4272a704a07985c26b5 Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:05:46 -0700 Subject: [PATCH 03/14] fix: apply --include-path filters to storage profile download path The --include-path filters were only applied in the OutputDownloader code path. When storage profiles are configured, downloads go through _transform_manifests_to_absolute_paths and _download_mapped_manifests, which bypassed the filters entirely. Add _filter_manifests() to filter BaseAssetManifest objects by path, and apply it before path transformation in the storage profile flow. Add unit tests for the new function. Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- src/deadline/client/cli/_groups/job_group.py | 3 + src/deadline/job_attachments/download.py | 22 +++++++ .../test_path_filtering.py | 58 +++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 069423869..f5b1ec466 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -67,6 +67,7 @@ from ....job_attachments._path_mapping import _generate_path_mapping_rules from ....job_attachments.download import ( OutputDownloader, + _filter_manifests, get_output_manifests_by_asset_root, ) @@ -605,6 +606,8 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) + if path_filters: + manifests_by_root = _filter_manifests(manifests_by_root, path_filters) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index 89bcecfc0..fd184b88c 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -1285,6 +1285,28 @@ def _filter_paths( return filtered +def _filter_manifests( + manifests_by_root: dict[str, list[BaseAssetManifest]], + path_filters: list[str], +) -> dict[str, list[BaseAssetManifest]]: + """ + Filter BaseAssetManifest objects to only include files matching the given filters. + Each filter is an exact relative file path or a directory prefix (ending with '/'). + Returns a new dict with manifests whose paths have been filtered; empty manifests are removed. + """ + filtered: dict[str, list[BaseAssetManifest]] = {} + for root, manifest_list in manifests_by_root.items(): + filtered_manifests = [] + for manifest in manifest_list: + matching = [p for p in manifest.paths if _matches_any_filter(p.path, path_filters)] + if matching: + manifest.paths = matching + filtered_manifests.append(manifest) + if filtered_manifests: + filtered[root] = filtered_manifests + return filtered + + def _ensure_paths_within_directory(root_path: str, paths_relative_to_root: list[str]) -> None: """ Validates the given paths to ensure that they are within the given root path. diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py index 8f1e9d76d..56890ff01 100644 --- a/test/unit/deadline_job_attachments/test_path_filtering.py +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -7,10 +7,16 @@ from deadline.job_attachments.download import ( _matches_any_filter, _filter_paths, + _filter_manifests, ) from deadline.job_attachments.models import ManifestPathGroup +from deadline.job_attachments.asset_manifests.base_manifest import ( + BaseAssetManifest, + BaseManifestPath, +) from deadline.job_attachments.asset_manifests.hash_algorithms import HashAlgorithm from deadline.job_attachments.asset_manifests.v2023_03_03 import ( + AssetManifest as AssetManifestv2023_03_03, ManifestPath as ManifestPathv2023_03_03, ) @@ -107,3 +113,55 @@ def test_empty_root_removed(self): result = _filter_paths(paths_by_root, ["a.txt"]) assert "/has_match" in result assert "/no_match" not in result + + +class TestFilterManifests: + def _make_manifest(self, paths: List[str]) -> BaseAssetManifest: + manifest_paths: List[BaseManifestPath] = [ + ManifestPathv2023_03_03(path=p, hash="abc123", size=100, mtime=1234000000) + for p in paths + ] + return AssetManifestv2023_03_03( + hash_alg=HashAlgorithm.XXH128, + paths=manifest_paths, + total_size=len(paths) * 100, + ) + + def test_exact_filter(self): + manifests_by_root = {"/root": [self._make_manifest(["a.txt", "b.txt", "c.txt"])]} + result = _filter_manifests(manifests_by_root, ["b.txt"]) + assert "/root" in result + assert [p.path for p in result["/root"][0].paths] == ["b.txt"] + + def test_directory_prefix_filter(self): + manifests_by_root = { + "/root": [self._make_manifest(["renders/a.exr", "renders/b.exr", "textures/c.png"])] + } + result = _filter_manifests(manifests_by_root, ["renders/"]) + assert [p.path for p in result["/root"][0].paths] == ["renders/a.exr", "renders/b.exr"] + + def test_no_matches_returns_empty(self): + manifests_by_root = {"/root": [self._make_manifest(["a.txt"])]} + result = _filter_manifests(manifests_by_root, ["nonexistent.txt"]) + assert result == {} + + def test_empty_root_removed(self): + manifests_by_root = { + "/has_match": [self._make_manifest(["a.txt"])], + "/no_match": [self._make_manifest(["b.txt"])], + } + result = _filter_manifests(manifests_by_root, ["a.txt"]) + assert "/has_match" in result + assert "/no_match" not in result + + def test_multiple_manifests_per_root(self): + manifests_by_root = { + "/root": [ + self._make_manifest(["a.txt", "b.txt"]), + self._make_manifest(["c.txt", "d.txt"]), + ] + } + result = _filter_manifests(manifests_by_root, ["a.txt", "c.txt"]) + assert len(result["/root"]) == 2 + assert [p.path for p in result["/root"][0].paths] == ["a.txt"] + assert [p.path for p in result["/root"][1].paths] == ["c.txt"] From 8476651301a4ae56d17409941a0b11c0f472013c Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:41:27 -0700 Subject: [PATCH 04/14] refactor: address review feedback from leongdl - Simplify _matches_any_filter() to use any() expression - Move _validate_and_normalize_include_paths() to _job_download_helpers.py - Skip TTY stdin reopen in JSON mode since there are no interactive prompts Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- .../cli/_groups/_job_download_helpers.py | 22 ++++++++++++ src/deadline/client/cli/_groups/job_group.py | 34 ++++--------------- src/deadline/job_attachments/download.py | 9 +---- .../cli/test_cli_path_filters.py | 2 +- 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 62ca31005..339925f1c 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -305,3 +305,25 @@ def _on_progress_json(download_metadata: ProgressReportMetadata) -> bool: return True return _do_download(on_downloading_files=_on_progress_json) + + +def _validate_and_normalize_include_paths(filters: list[str]) -> list[str]: + """ + Validates and normalizes include paths. + - Rejects filters containing '..' (path traversal prevention) + - Converts backslashes to forward slashes (Windows compatibility) + - Strips leading './' + - Normalizes '//' to '/' + """ + normalized = [] + for f in filters: + if ".." in f: + raise click.BadParameter(f"Path filter must not contain '..': {f}") + f = f.replace("\\", "/") + if f.startswith("./"): + f = f[2:] + while "//" in f: + f = f.replace("//", "/") + if f: + normalized.append(f) + return normalized diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index f5b1ec466..520d06964 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -63,6 +63,7 @@ _resolve_conflict_resolution, _resolve_storage_profiles, _transform_manifests_to_absolute_paths, + _validate_and_normalize_include_paths, ) from ....job_attachments._path_mapping import _generate_path_mapping_rules from ....job_attachments.download import ( @@ -964,28 +965,6 @@ def _assert_valid_path(path: str) -> None: raise ValueError(f"Path {path} is not an absolute path.") -def _validate_and_normalize_include_paths(filters: list[str]) -> list[str]: - """ - Validates and normalizes include paths. - - Rejects filters containing '..' (path traversal prevention) - - Converts backslashes to forward slashes (Windows compatibility) - - Strips leading './' - - Normalizes '//' to '/' - """ - normalized = [] - for f in filters: - if ".." in f: - raise click.BadParameter(f"Path filter must not contain '..': {f}") - f = f.replace("\\", "/") - if f.startswith("./"): - f = f[2:] - while "//" in f: - f = f.replace("//", "/") - if f: - normalized.append(f) - return normalized - - @cli_job.command(name="download-output") @click.option("--profile", help="The AWS profile to use.") @click.option("--farm-id", help="The farm to use.") @@ -1064,11 +1043,12 @@ def job_download_output( if not stripped: break filters.append(stripped) - try: - tty_path = "CON" if sys.platform == "win32" else "/dev/tty" - sys.stdin = open(tty_path) # noqa: SIM115 - except OSError: - pass # Non-interactive environment (CI, Tauri) — no TTY to reopen + if output != "json": + try: + tty_path = "CON" if sys.platform == "win32" else "/dev/tty" + sys.stdin = open(tty_path) # noqa: SIM115 + except OSError: + pass # Non-interactive environment — no TTY to reopen if filters: filters = _validate_and_normalize_include_paths(filters) path_filters = filters or None diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index fd184b88c..ab8d1abdf 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -1250,14 +1250,7 @@ def _matches_any_filter(file_path: str, filters: list[str]) -> bool: - Exact match: filter "renders/frame_001.exr" matches only that path - Directory prefix: filter "renders/" matches all paths starting with "renders/" """ - for f in filters: - if f.endswith("/"): - if file_path.startswith(f): - return True - else: - if file_path == f: - return True - return False + return any(file_path.startswith(f) if f.endswith("/") else file_path == f for f in filters) def _filter_paths( diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py index f2d2f1fce..9438a158b 100644 --- a/test/unit/deadline_client/cli/test_cli_path_filters.py +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -7,7 +7,7 @@ import pytest import click -from deadline.client.cli._groups.job_group import _validate_and_normalize_include_paths +from deadline.client.cli._groups._job_download_helpers import _validate_and_normalize_include_paths class TestValidateAndNormalizePathFilters: From 6c93c4e006f3d2d833164e2310d40deaddf3fcba Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:38:41 -0700 Subject: [PATCH 05/14] refactor: Address review feedback on --include-path filtering - Remove path traversal (..) rejection from path filter normalization; filters only select which files to download so .. is harmless - Remove TTY stdin reopen hack; force auto_accept when --include-path-stdin is used so no interactive prompts are attempted - Rename _validate_and_normalize_include_paths to _normalize_include_paths - Add e2e tests using mock HTTP server pattern: directory prefix, exact file, multiple filters, no-match, stdin, and stdin+json (DCM pattern) - Extract _seed_output_job helper to reduce e2e test boilerplate Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- .../cli/_groups/_job_download_helpers.py | 7 +- src/deadline/client/cli/_groups/job_group.py | 14 +- .../cli/test_cli_attachment_e2e.py | 310 ++++++++++++++++++ .../cli/test_cli_path_filters.py | 32 +- 4 files changed, 325 insertions(+), 38 deletions(-) diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 339925f1c..75daedd4e 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -307,18 +307,15 @@ def _on_progress_json(download_metadata: ProgressReportMetadata) -> bool: return _do_download(on_downloading_files=_on_progress_json) -def _validate_and_normalize_include_paths(filters: list[str]) -> list[str]: +def _normalize_include_paths(filters: list[str]) -> list[str]: """ - Validates and normalizes include paths. - - Rejects filters containing '..' (path traversal prevention) + Normalizes include paths. - Converts backslashes to forward slashes (Windows compatibility) - Strips leading './' - Normalizes '//' to '/' """ normalized = [] for f in filters: - if ".." in f: - raise click.BadParameter(f"Path filter must not contain '..': {f}") f = f.replace("\\", "/") if f.startswith("./"): f = f[2:] diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 520d06964..166fc36bc 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -63,7 +63,7 @@ _resolve_conflict_resolution, _resolve_storage_profiles, _transform_manifests_to_absolute_paths, - _validate_and_normalize_include_paths, + _normalize_include_paths, ) from ....job_attachments._path_mapping import _generate_path_mapping_rules from ....job_attachments.download import ( @@ -502,13 +502,14 @@ def _download_job_output( is_json_format: bool = False, ignore_storage_profiles: bool = False, path_filters: Optional[list[str]] = None, + force_auto_accept: bool = False, ): """ Starts the download of job output and handles the progress reporting callback. """ deadline = api.get_boto3_client("deadline", config=config) - auto_accept = config_file.str2bool( + auto_accept = force_auto_accept or config_file.str2bool( config_file.get_setting("settings.auto_accept", config=config) ) conflict_resolution = config_file.get_setting("settings.conflict_resolution", config=config) @@ -1043,14 +1044,8 @@ def job_download_output( if not stripped: break filters.append(stripped) - if output != "json": - try: - tty_path = "CON" if sys.platform == "win32" else "/dev/tty" - sys.stdin = open(tty_path) # noqa: SIM115 - except OSError: - pass # Non-interactive environment — no TTY to reopen if filters: - filters = _validate_and_normalize_include_paths(filters) + filters = _normalize_include_paths(filters) path_filters = filters or None # Get a temporary config object with the standard options handled @@ -1074,6 +1069,7 @@ def job_download_output( is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, path_filters=path_filters, + force_auto_accept=include_path_stdin, ) except Exception as e: if is_json_format: diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index a29241824..2002873a2 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -195,6 +195,67 @@ def _s3_client(endpoint_url: str): ) +# ---- helpers ----------------------------------------------------------------- + + +def _seed_output_job( + backend: MockDeadlineBackend, + s3_endpoint: str, + farm_id: str, + queue_id: str, + job_id: str, + asset_root: str, + files: dict[str, bytes], + step_id: str = "step-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa0", + task_id: str = "task-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa0-0", +) -> None: + """Seed S3 with CAS objects + output manifest and register the job in the mock backend.""" + s3 = _s3_client(s3_endpoint) + manifest_paths = [] + for rel_path, content in files.items(): + file_hash = hash_data(content, HashAlgorithm.XXH128) + s3.put_object(Bucket=BUCKET, Key=f"{ROOT_PREFIX}/Data/{file_hash}.xxh128", Body=content) + manifest_paths.append( + {"hash": file_hash, "mtime": 1234000000, "path": rel_path, "size": len(content)} + ) + + manifest_body = json.dumps( + { + "hashAlg": "xxh128", + "manifestVersion": "2023-03-03", + "paths": manifest_paths, + "totalSize": sum(len(c) for c in files.values()), + } + ).encode() + manifest_key = ( + f"{ROOT_PREFIX}/Manifests/{farm_id}/{queue_id}/{job_id}/{step_id}/{task_id}/" + f"sessionaction-0/outputmanifestv2023-03-03_output" + ) + s3.put_object( + Bucket=BUCKET, Key=manifest_key, Body=manifest_body, Metadata={"asset-root": asset_root} + ) + + backend.jobs[(farm_id, queue_id, job_id)] = { + "jobId": job_id, + "name": f"test-job-{job_id[-4:]}", + "lifecycleStatus": "CREATE_COMPLETE", + "lifecycleStatusMessage": "", + "priority": 50, + "createdAt": backend._now(), + "createdBy": "tester", + "taskRunStatus": "READY", + "attachments": { + "manifests": [ + { + "rootPath": asset_root, + "rootPathFormat": "windows" if os.name == "nt" else "posix", + } + ], + "fileSystem": "COPIED", + }, + } + + # ---- tests ------------------------------------------------------------------ @@ -597,6 +658,255 @@ def test_cli_job_download_output(deadline_setup, tmp_path): assert (Path(asset_root) / "result.txt").read_text() == "rendered-output" +def test_cli_job_download_output_include_path(deadline_setup, tmp_path): + """ + `deadline job download-output --include-path` with a directory prefix + downloads only files under that directory. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1" + asset_root = str(tmp_path / "filtered_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include-path", + "renders/", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "renders" / "frame_002.exr").read_bytes() == b"frame-two" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_include_path_exact_file(deadline_setup, tmp_path): + """ + `deadline job download-output --include-path` with an exact file path + downloads only that single file. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa2" + asset_root = str(tmp_path / "exact_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include-path", + "renders/frame_001.exr", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "renders" / "frame_002.exr").exists() + + +def test_cli_job_download_output_include_path_multiple(deadline_setup, tmp_path): + """ + Multiple --include-path values are OR'd: files matching any filter are downloaded. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa3" + asset_root = str(tmp_path / "multi_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "logs/render.log": b"log-data", + "scripts/setup.mel": b"mel-script", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include-path", + "renders/frame_001.exr", + "--include-path", + "scripts/", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "scripts" / "setup.mel").read_bytes() == b"mel-script" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_include_path_no_match(deadline_setup, tmp_path): + """ + --include-path with a filter that matches nothing reports no output files. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4" + asset_root = str(tmp_path / "nomatch_outputs") + Path(asset_root).mkdir() + + files = {"renders/frame_001.exr": b"frame-one"} + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include-path", + "nonexistent.txt", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + assert "no output files available" in r.stdout.lower() + + assert not (Path(asset_root) / "renders" / "frame_001.exr").exists() + + +def test_cli_job_download_output_include_path_stdin(deadline_setup, tmp_path): + """ + --include-path-stdin reads paths from stdin (one per line, empty line terminates). + This mirrors DCM's usage pattern where the desktop app pipes paths to the CLI. + Interactive prompts are skipped when stdin is consumed. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa5" + asset_root = str(tmp_path / "stdin_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + # Pipe paths via stdin with empty-line sentinel, like DCM does + stdin_data = "renders/frame_001.exr\nrenders/frame_002.exr\n\n" + r = subprocess.run( + [ + "deadline", + "job", + "download-output", + "--job-id", + job_id, + "--include-path-stdin", + "--conflict-resolution", + "OVERWRITE", + ], + env=env, + input=stdin_data, + capture_output=True, + text=True, + timeout=120, + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "renders" / "frame_002.exr").read_bytes() == b"frame-two" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_include_path_stdin_json(deadline_setup, tmp_path): + """ + --include-path-stdin with --output json mirrors DCM's exact invocation pattern. + Verifies JSON progress output and no interactive prompts. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa6" + asset_root = str(tmp_path / "stdin_json_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + stdin_data = "renders/frame_001.exr\n\n" + r = subprocess.run( + [ + "deadline", + "job", + "download-output", + "--job-id", + job_id, + "--include-path-stdin", + "--output", + "json", + ], + env=env, + input=stdin_data, + capture_output=True, + text=True, + timeout=120, + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + # Verify output is JSON lines (DCM parses these) + for line in r.stdout.strip().splitlines(): + json.loads(line) # Should not raise + + @pytest.mark.skipif( sys.version_info < (3, 9), reason="MockDeadlineBackend.create_job requires openjd-model, which requires Python >= 3.9", diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py index 9438a158b..5633f951b 100644 --- a/test/unit/deadline_client/cli/test_cli_path_filters.py +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -1,53 +1,37 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -"""Tests for _validate_and_normalize_include_paths.""" +"""Tests for _normalize_include_paths.""" import sys -import pytest -import click - -from deadline.client.cli._groups._job_download_helpers import _validate_and_normalize_include_paths +from deadline.client.cli._groups._job_download_helpers import _normalize_include_paths class TestValidateAndNormalizePathFilters: - def test_rejects_double_dot(self): - with pytest.raises(click.BadParameter, match="must not contain '..'"): - _validate_and_normalize_include_paths(["../../etc/passwd"]) - - def test_rejects_double_dot_in_middle(self): - with pytest.raises(click.BadParameter, match="must not contain '..'"): - _validate_and_normalize_include_paths(["renders/../secret"]) - def test_converts_backslashes(self): - result = _validate_and_normalize_include_paths(["renders\\frame_001.exr"]) + result = _normalize_include_paths(["renders\\frame_001.exr"]) assert result == ["renders/frame_001.exr"] def test_strips_leading_dot_slash(self): - result = _validate_and_normalize_include_paths(["./renders/frame.exr"]) + result = _normalize_include_paths(["./renders/frame.exr"]) assert result == ["renders/frame.exr"] def test_collapses_double_slashes(self): - result = _validate_and_normalize_include_paths(["renders//frame.exr"]) + result = _normalize_include_paths(["renders//frame.exr"]) assert result == ["renders/frame.exr"] def test_empty_filter_removed(self): - result = _validate_and_normalize_include_paths(["", "a.txt"]) + result = _normalize_include_paths(["", "a.txt"]) assert result == ["a.txt"] def test_passthrough_normal_paths(self): - result = _validate_and_normalize_include_paths( + result = _normalize_include_paths( ["renders/frame_001.exr", "textures/", "scripts/setup.mel"] ) assert result == ["renders/frame_001.exr", "textures/", "scripts/setup.mel"] - def test_backslash_with_double_dot_rejected(self): - """Backslash path traversal like 'renders\\..\\..' should be rejected.""" - with pytest.raises(click.BadParameter, match="must not contain '..'"): - _validate_and_normalize_include_paths(["renders\\..\\secret"]) - def test_multiple_normalizations(self): - result = _validate_and_normalize_include_paths([".\\renders\\\\frame.exr"]) + result = _normalize_include_paths([".\\renders\\\\frame.exr"]) # .\ -> ./ after backslash conversion, then ./ stripped, // collapsed assert result == ["renders/frame.exr"] From dd8bdee63570ff9248c1596916da45f1039ccf63 Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:51:00 -0700 Subject: [PATCH 06/14] refactor: Replace --include-path with glob-based --include/--exclude Align download-output filtering with the manifest CLI pattern: - Replace --include-path/--include-path-stdin with -i/--include and -e/--exclude options supporting glob patterns via fnmatch - Add -ie/--include-exclude-config for JSON string or file input, reusing _process_glob_inputs from the manifest CLI - Default to include-all when only --exclude is provided - Rename OutputDownloader param path_filters to include_filters - Add e2e tests: exclude, glob patterns, config file, and large inline JSON blob (200 files, 150 selected) Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- docs/job_attachments_guide.md | 2 +- .../cli/_groups/_job_download_helpers.py | 30 ++- src/deadline/client/cli/_groups/job_group.py | 63 +++--- src/deadline/job_attachments/download.py | 36 ++-- .../cli/test_cli_attachment_e2e.py | 188 ++++++++++++------ .../cli/test_cli_handle_web_url.py | 6 +- test/unit/deadline_client/cli/test_cli_job.py | 24 ++- .../cli/test_cli_path_filters.py | 81 +++++--- .../test_path_filtering.py | 40 +++- 9 files changed, 333 insertions(+), 137 deletions(-) diff --git a/docs/job_attachments_guide.md b/docs/job_attachments_guide.md index ff35dc61c..2ff1231b8 100644 --- a/docs/job_attachments_guide.md +++ b/docs/job_attachments_guide.md @@ -4,7 +4,7 @@ Job attachments uses your configured S3 bucket as a [content-addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage), which creates a snapshot of the files used in your job submission in [asset manifests](#asset-manifests), only uploading files that aren't already in S3. This saves you time and bandwidth when iterating on jobs. When an [AWS Deadline Cloud worker agent][worker-agent] starts working on a job with job attachments, it recreates the file system snapshot in the worker agent session directory, and uploads any outputs back to your S3 bucket. -You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include-path` for downloading specific files or directories. +You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include` and `--exclude` glob patterns for downloading specific files or directories, and `--include-exclude-config` for passing patterns via a JSON file. Job attachments also works as an auxiliary storage when used with [AWS Deadline Cloud storage profiles][shared-storage], allowing you to flexibly upload files to your Amazon S3 bucket that aren't on your configured shared storage. diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 75daedd4e..85bdc6972 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -307,9 +307,9 @@ def _on_progress_json(download_metadata: ProgressReportMetadata) -> bool: return _do_download(on_downloading_files=_on_progress_json) -def _normalize_include_paths(filters: list[str]) -> list[str]: +def _normalize_filters(filters: list[str]) -> list[str]: """ - Normalizes include paths. + Normalizes path filter patterns. - Converts backslashes to forward slashes (Windows compatibility) - Strips leading './' - Normalizes '//' to '/' @@ -324,3 +324,29 @@ def _normalize_include_paths(filters: list[str]) -> list[str]: if f: normalized.append(f) return normalized + + +def _parse_include_exclude( + include: tuple[str, ...], + exclude: tuple[str, ...], + include_exclude_config: Optional[str], +) -> tuple[Optional[list[str]], Optional[list[str]]]: + """ + Parse --include, --exclude, and --include-exclude-config into normalized filter lists. + --include/--exclude take precedence over --include-exclude-config. + Returns (include_filters, exclude_filters) where either may be None. + """ + if include or exclude: + include_filters = _normalize_filters(list(include)) if include else ["*"] + exclude_filters = _normalize_filters(list(exclude)) if exclude else None + return include_filters, exclude_filters + + if include_exclude_config: + from ....job_attachments._glob import _process_glob_inputs + + glob_config = _process_glob_inputs(include_exclude_config) + parsed_include = _normalize_filters(glob_config.include_glob) or None + parsed_exclude = _normalize_filters(glob_config.exclude_glob) or None + return parsed_include, parsed_exclude + + return None, None diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 166fc36bc..70b499af0 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -60,10 +60,10 @@ from ._job_download_helpers import ( JSON_MSG_TYPE_PROGRESS, _download_mapped_manifests, + _parse_include_exclude, _resolve_conflict_resolution, _resolve_storage_profiles, _transform_manifests_to_absolute_paths, - _normalize_include_paths, ) from ....job_attachments._path_mapping import _generate_path_mapping_rules from ....job_attachments.download import ( @@ -501,15 +501,15 @@ def _download_job_output( task_id: Optional[str], is_json_format: bool = False, ignore_storage_profiles: bool = False, - path_filters: Optional[list[str]] = None, - force_auto_accept: bool = False, + include_filters: Optional[list[str]] = None, + exclude_filters: Optional[list[str]] = None, ): """ Starts the download of job output and handles the progress reporting callback. """ deadline = api.get_boto3_client("deadline", config=config) - auto_accept = force_auto_accept or config_file.str2bool( + auto_accept = config_file.str2bool( config_file.get_setting("settings.auto_accept", config=config) ) conflict_resolution = config_file.get_setting("settings.conflict_resolution", config=config) @@ -561,7 +561,8 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, - path_filters=path_filters, + include_filters=include_filters, + exclude_filters=exclude_filters, ) def _check_and_warn_long_output_paths( @@ -608,8 +609,10 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) - if path_filters: - manifests_by_root = _filter_manifests(manifests_by_root, path_filters) + if include_filters: + manifests_by_root = _filter_manifests( + manifests_by_root, include_filters, exclude_filters + ) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) @@ -974,14 +977,24 @@ def _assert_valid_path(path: str) -> None: @click.option("--step-id", help="The step to use.") @click.option("--task-id", help="The task to use.") @click.option( - "--include-path", + "-i", + "--include", multiple=True, - help="Download only files matching this relative path or directory prefix (trailing /). Repeatable.", + help="Glob pattern for files to download. Supports *, ?, [seq]. " + "A trailing / matches all files under that directory. Repeatable", ) @click.option( - "--include-path-stdin", - is_flag=True, - help="Read include paths from stdin, one per line.", + "-e", + "--exclude", + multiple=True, + help="Glob pattern for files to exclude from download. Applied after --include. Repeatable", +) +@click.option( + "-ie", + "--include-exclude-config", + default=None, + help="JSON string or file path with include/exclude patterns, " + 'e.g. \'{"include": ["renders/*.exr"], "exclude": ["renders/draft/"]}\'', ) @click.option( "--ignore-storage-profiles", @@ -1025,7 +1038,14 @@ def _assert_valid_path(path: str) -> None: ) @_handle_error def job_download_output( - step_id, task_id, output, ignore_storage_profiles, include_path, include_path_stdin, **args + step_id, + task_id, + output, + ignore_storage_profiles, + include, + exclude, + include_exclude_config, + **args, ): """ Download the output of a Deadline Cloud job that was saved as job @@ -1037,16 +1057,9 @@ def job_download_output( if task_id and not step_id: raise click.UsageError("Missing option '--step-id' required with '--task-id'") - filters = list(include_path) - if include_path_stdin: - for line in sys.stdin: - stripped = line.strip() - if not stripped: - break - filters.append(stripped) - if filters: - filters = _normalize_include_paths(filters) - path_filters = filters or None + include_filters, exclude_filters = _parse_include_exclude( + include, exclude, include_exclude_config + ) # Get a temporary config object with the standard options handled config = _apply_cli_options_to_config( @@ -1068,8 +1081,8 @@ def job_download_output( task_id=task_id, is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, - path_filters=path_filters, - force_auto_accept=include_path_stdin, + include_filters=include_filters, + exclude_filters=exclude_filters, ) except Exception as e: if is_json_format: diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index ab8d1abdf..e1e0ba836 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -1246,27 +1246,34 @@ def mount_vfs_from_manifests( def _matches_any_filter(file_path: str, filters: list[str]) -> bool: """ - Check if a file path matches any of the given filters. - - Exact match: filter "renders/frame_001.exr" matches only that path - - Directory prefix: filter "renders/" matches all paths starting with "renders/" + Check if a file path matches any of the given filters using glob-style matching. + Uses fnmatch for pattern matching (supports *, ?, [seq], [!seq]). + A filter ending with '/' is treated as a directory prefix (matches all files under it). """ - return any(file_path.startswith(f) if f.endswith("/") else file_path == f for f in filters) + from fnmatch import fnmatch + + return any( + file_path.startswith(f) if f.endswith("/") else fnmatch(file_path, f) for f in filters + ) def _filter_paths( paths_by_root: dict[str, ManifestPathGroup], path_filters: list[str], + exclude_filters: Optional[list[str]] = None, ) -> dict[str, ManifestPathGroup]: """ - Filter ManifestPathGroups to only include files matching the given filters. - Each filter is an exact relative file path or a directory prefix (ending with '/'). - Filters are matched against file paths across all asset roots. + Filter ManifestPathGroups using glob-style include/exclude patterns. + Include filters select files; exclude filters remove from the included set. + Filters are matched against relative file paths across all asset roots. """ filtered: dict[str, ManifestPathGroup] = {} for root, group in paths_by_root.items(): filtered_group = ManifestPathGroup() for hash_alg, file_list in group.files_by_hash_alg.items(): matching = [f for f in file_list if _matches_any_filter(f.path, path_filters)] + if exclude_filters: + matching = [f for f in matching if not _matches_any_filter(f.path, exclude_filters)] if matching: if hash_alg not in filtered_group.files_by_hash_alg: filtered_group.files_by_hash_alg[hash_alg] = matching @@ -1281,10 +1288,10 @@ def _filter_paths( def _filter_manifests( manifests_by_root: dict[str, list[BaseAssetManifest]], path_filters: list[str], + exclude_filters: Optional[list[str]] = None, ) -> dict[str, list[BaseAssetManifest]]: """ - Filter BaseAssetManifest objects to only include files matching the given filters. - Each filter is an exact relative file path or a directory prefix (ending with '/'). + Filter BaseAssetManifest objects using glob-style include/exclude patterns. Returns a new dict with manifests whose paths have been filtered; empty manifests are removed. """ filtered: dict[str, list[BaseAssetManifest]] = {} @@ -1292,6 +1299,8 @@ def _filter_manifests( filtered_manifests = [] for manifest in manifest_list: matching = [p for p in manifest.paths if _matches_any_filter(p.path, path_filters)] + if exclude_filters: + matching = [p for p in matching if not _matches_any_filter(p.path, exclude_filters)] if matching: manifest.paths = matching filtered_manifests.append(manifest) @@ -1338,7 +1347,8 @@ def __init__( task_id: Optional[str] = None, session_action_id: Optional[str] = None, session: Optional[boto3.Session] = None, - path_filters: Optional[list[str]] = None, + include_filters: Optional[list[str]] = None, + exclude_filters: Optional[list[str]] = None, ) -> None: self.s3_settings = s3_settings self.session = session @@ -1352,8 +1362,10 @@ def __init__( session_action_id=session_action_id, session=session, ) - if path_filters: - self.outputs_by_root = _filter_paths(self.outputs_by_root, path_filters) + if include_filters: + self.outputs_by_root = _filter_paths( + self.outputs_by_root, include_filters, exclude_filters + ) def get_output_paths_by_root(self) -> dict[str, list[str]]: """ diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index 2002873a2..d9620b98a 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -685,7 +685,7 @@ def test_cli_job_download_output_include_path(deadline_setup, tmp_path): "download-output", "--job-id", job_id, - "--include-path", + "--include", "renders/", "--conflict-resolution", "OVERWRITE", @@ -724,7 +724,7 @@ def test_cli_job_download_output_include_path_exact_file(deadline_setup, tmp_pat "download-output", "--job-id", job_id, - "--include-path", + "--include", "renders/frame_001.exr", "--conflict-resolution", "OVERWRITE", @@ -762,9 +762,9 @@ def test_cli_job_download_output_include_path_multiple(deadline_setup, tmp_path) "download-output", "--job-id", job_id, - "--include-path", + "--include", "renders/frame_001.exr", - "--include-path", + "--include", "scripts/", "--conflict-resolution", "OVERWRITE", @@ -799,7 +799,7 @@ def test_cli_job_download_output_include_path_no_match(deadline_setup, tmp_path) "download-output", "--job-id", job_id, - "--include-path", + "--include", "nonexistent.txt", "--conflict-resolution", "OVERWRITE", @@ -811,100 +811,176 @@ def test_cli_job_download_output_include_path_no_match(deadline_setup, tmp_path) assert not (Path(asset_root) / "renders" / "frame_001.exr").exists() -def test_cli_job_download_output_include_path_stdin(deadline_setup, tmp_path): +def test_cli_job_download_output_exclude(deadline_setup, tmp_path): """ - --include-path-stdin reads paths from stdin (one per line, empty line terminates). - This mirrors DCM's usage pattern where the desktop app pipes paths to the CLI. - Interactive prompts are skipped when stdin is consumed. + --exclude removes files from the download set after --include is applied. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa5" - asset_root = str(tmp_path / "stdin_outputs") + asset_root = str(tmp_path / "exclude_outputs") Path(asset_root).mkdir() files = { "renders/frame_001.exr": b"frame-one", - "renders/frame_002.exr": b"frame-two", - "logs/render.log": b"log-data", + "renders/draft/frame_002.exr": b"frame-two-draft", + "renders/frame_003.exr": b"frame-three", } _seed_output_job( backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files ) - # Pipe paths via stdin with empty-line sentinel, like DCM does - stdin_data = "renders/frame_001.exr\nrenders/frame_002.exr\n\n" - r = subprocess.run( - [ - "deadline", - "job", - "download-output", - "--job-id", - job_id, - "--include-path-stdin", - "--conflict-resolution", - "OVERWRITE", - ], - env=env, - input=stdin_data, - capture_output=True, - text=True, - timeout=120, + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "renders/", + "--exclude", + "renders/draft/", + "--conflict-resolution", + "OVERWRITE", + "--yes", ) assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" - assert (Path(asset_root) / "renders" / "frame_002.exr").read_bytes() == b"frame-two" - assert not (Path(asset_root) / "logs" / "render.log").exists() + assert (Path(asset_root) / "renders" / "frame_003.exr").read_bytes() == b"frame-three" + assert not (Path(asset_root) / "renders" / "draft" / "frame_002.exr").exists() -def test_cli_job_download_output_include_path_stdin_json(deadline_setup, tmp_path): +def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): """ - --include-path-stdin with --output json mirrors DCM's exact invocation pattern. - Verifies JSON progress output and no interactive prompts. + --include with glob patterns (e.g. *.exr) filters using fnmatch. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa6" - asset_root = str(tmp_path / "stdin_json_outputs") + asset_root = str(tmp_path / "glob_outputs") Path(asset_root).mkdir() files = { "renders/frame_001.exr": b"frame-one", + "renders/frame_002.png": b"frame-two-png", + "renders/frame_003.exr": b"frame-three", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "renders/*.exr", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "renders" / "frame_003.exr").read_bytes() == b"frame-three" + assert not (Path(asset_root) / "renders" / "frame_002.png").exists() + + +def test_cli_job_download_output_include_exclude_config(deadline_setup, tmp_path): + """ + --include-exclude-config accepts a JSON file with include/exclude patterns, + matching the manifest CLI pattern. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa7" + asset_root = str(tmp_path / "config_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/draft/frame_002.exr": b"draft", "logs/render.log": b"log-data", } _seed_output_job( backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files ) - stdin_data = "renders/frame_001.exr\n\n" - r = subprocess.run( - [ - "deadline", - "job", - "download-output", - "--job-id", - job_id, - "--include-path-stdin", - "--output", - "json", - ], - env=env, - input=stdin_data, - capture_output=True, - text=True, - timeout=120, + config_file = tmp_path / "filters.json" + config_file.write_text(json.dumps({"include": ["renders/"], "exclude": ["renders/draft/"]})) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include-exclude-config", + str(config_file), + "--conflict-resolution", + "OVERWRITE", + "--yes", ) assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "renders" / "draft" / "frame_002.exr").exists() assert not (Path(asset_root) / "logs" / "render.log").exists() - # Verify output is JSON lines (DCM parses these) - for line in r.stdout.strip().splitlines(): - json.loads(line) # Should not raise + +def test_cli_job_download_output_include_exclude_config_large_inline_json(deadline_setup, tmp_path): + """ + --include-exclude-config with a large inline JSON blob containing many paths. + This mirrors DCM's usage where the desktop app may pass hundreds of file paths. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa8" + asset_root = str(tmp_path / "large_config_outputs") + Path(asset_root).mkdir() + + # Generate 200 files, select 150 of them via include config + num_total = 200 + num_selected = 150 + files = {f"renders/frame_{i:04d}.exr": f"content-{i}".encode() for i in range(num_total)} + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + # Build a large inline JSON with 150 exact paths + selected_paths = [f"renders/frame_{i:04d}.exr" for i in range(num_selected)] + config_json = json.dumps({"include": selected_paths}) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include-exclude-config", + config_json, + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + # Verify exactly the 50 selected files were downloaded + for i in range(num_selected): + assert ( + Path(asset_root) / "renders" / f"frame_{i:04d}.exr" + ).read_bytes() == f"content-{i}".encode() + + # Verify the rest were NOT downloaded + for i in range(num_selected, num_total): + assert not (Path(asset_root) / "renders" / f"frame_{i:04d}.exr").exists() @pytest.mark.skipif( diff --git a/test/unit/deadline_client/cli/test_cli_handle_web_url.py b/test/unit/deadline_client/cli/test_cli_handle_web_url.py index 8a50638f1..9b0e8321c 100644 --- a/test/unit/deadline_client/cli/test_cli_handle_web_url.py +++ b/test/unit/deadline_client/cli/test_cli_handle_web_url.py @@ -306,7 +306,8 @@ def test_cli_handle_web_url_download_output_only_required_input(fresh_deadline_c task_id=None, session_action_id=None, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -372,7 +373,8 @@ def test_cli_handle_web_url_download_output_with_optional_input(fresh_deadline_c task_id=MOCK_TASK_ID, session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, diff --git a/test/unit/deadline_client/cli/test_cli_job.py b/test/unit/deadline_client/cli/test_cli_job.py index 7c1675fa9..76f8ba7a6 100644 --- a/test/unit/deadline_client/cli/test_cli_job.py +++ b/test/unit/deadline_client/cli/test_cli_job.py @@ -377,7 +377,8 @@ def test_cli_job_download_output_stdout_with_only_required_input( task_id=None, session_action_id=None, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -491,7 +492,8 @@ def test_cli_job_download_output_stdout_with_mismatching_path_format( task_id=None, session_action_id=None, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -592,7 +594,8 @@ def test_cli_job_download_output_handles_unc_path_on_windows(fresh_deadline_conf task_id=None, session_action_id=None, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -676,7 +679,8 @@ def test_cli_job_download_no_output_stdout(fresh_deadline_config, tmp_path: Path task_id=None, session_action_id=None, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) assert ( @@ -778,7 +782,8 @@ def test_cli_job_download_output_stdout_with_json_format( task_id=None, session_action_id=None, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) expected_json_title = {"messageType": "title", "value": "Mock Job"} @@ -1395,7 +1400,8 @@ def test_cli_job_download_output_handle_web_url_with_optional_input( task_id="task-2", session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -1482,7 +1488,8 @@ def test_cli_job_download_output_with_different_asset_root_path_format_than_job( task_id=None, session_action_id=None, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -1714,7 +1721,8 @@ def test_cli_job_download_output_with_session_action_id(fresh_deadline_config): task_id=MOCK_TASK_ID, session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, - path_filters=None, + include_filters=None, + exclude_filters=None, ) diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py index 5633f951b..2eed11c06 100644 --- a/test/unit/deadline_client/cli/test_cli_path_filters.py +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -1,54 +1,79 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -"""Tests for _normalize_include_paths.""" +"""Tests for _normalize_filters and _parse_include_exclude.""" -import sys +from deadline.client.cli._groups._job_download_helpers import ( + _normalize_filters, + _parse_include_exclude, +) -from deadline.client.cli._groups._job_download_helpers import _normalize_include_paths - -class TestValidateAndNormalizePathFilters: +class TestNormalizeFilters: def test_converts_backslashes(self): - result = _normalize_include_paths(["renders\\frame_001.exr"]) + result = _normalize_filters(["renders\\frame_001.exr"]) assert result == ["renders/frame_001.exr"] def test_strips_leading_dot_slash(self): - result = _normalize_include_paths(["./renders/frame.exr"]) + result = _normalize_filters(["./renders/frame.exr"]) assert result == ["renders/frame.exr"] def test_collapses_double_slashes(self): - result = _normalize_include_paths(["renders//frame.exr"]) + result = _normalize_filters(["renders//frame.exr"]) assert result == ["renders/frame.exr"] def test_empty_filter_removed(self): - result = _normalize_include_paths(["", "a.txt"]) + result = _normalize_filters(["", "a.txt"]) assert result == ["a.txt"] def test_passthrough_normal_paths(self): - result = _normalize_include_paths( - ["renders/frame_001.exr", "textures/", "scripts/setup.mel"] - ) + result = _normalize_filters(["renders/frame_001.exr", "textures/", "scripts/setup.mel"]) assert result == ["renders/frame_001.exr", "textures/", "scripts/setup.mel"] + def test_passthrough_glob_patterns(self): + result = _normalize_filters(["renders/*.exr", "**/*.png", "textures/wood[0-9].jpg"]) + assert result == ["renders/*.exr", "**/*.png", "textures/wood[0-9].jpg"] + def test_multiple_normalizations(self): - result = _normalize_include_paths([".\\renders\\\\frame.exr"]) - # .\ -> ./ after backslash conversion, then ./ stripped, // collapsed + result = _normalize_filters([".\\renders\\\\frame.exr"]) assert result == ["renders/frame.exr"] -class TestStdinReadsUntilEmptyLine: - def test_stdin_stops_at_empty_line(self): - """Verify that --include-path-stdin reads until an empty line.""" - from io import StringIO - from unittest.mock import patch +class TestParseIncludeExclude: + def test_include_only(self): + inc, exc = _parse_include_exclude(("renders/",), (), None) + assert inc == ["renders/"] + assert exc is None + + def test_exclude_only(self): + inc, exc = _parse_include_exclude((), ("*.log",), None) + assert inc == ["*"] + assert exc == ["*.log"] + + def test_include_and_exclude(self): + inc, exc = _parse_include_exclude(("renders/",), ("renders/draft/",), None) + assert inc == ["renders/"] + assert exc == ["renders/draft/"] + + def test_no_filters(self): + inc, exc = _parse_include_exclude((), (), None) + assert inc is None + assert exc is None + + def test_config_json_string(self, tmp_path): + inc, exc = _parse_include_exclude( + (), (), '{"include": ["renders/*.exr"], "exclude": ["renders/draft/"]}' + ) + assert inc == ["renders/*.exr"] + assert exc == ["renders/draft/"] - stdin_data = "renders/frame_001.exr\nrenders/frame_002.exr\n\nextra_ignored\n" - collected = [] - with patch("sys.stdin", StringIO(stdin_data)): - for line in sys.stdin: - stripped = line.strip() - if not stripped: - break - collected.append(stripped) + def test_config_file(self, tmp_path): + config_file = tmp_path / "filters.json" + config_file.write_text('{"include": ["**/*.exr"]}') + inc, exc = _parse_include_exclude((), (), str(config_file)) + assert inc == ["**/*.exr"] + assert exc is None - assert collected == ["renders/frame_001.exr", "renders/frame_002.exr"] + def test_cli_args_take_precedence_over_config(self): + inc, exc = _parse_include_exclude(("renders/",), (), '{"include": ["textures/"]}') + assert inc == ["renders/"] + assert exc is None diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py index 56890ff01..142cc041f 100644 --- a/test/unit/deadline_job_attachments/test_path_filtering.py +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -49,9 +49,18 @@ def test_empty_filters(self): def test_nested_directory_prefix(self): assert _matches_any_filter("a/b/c/file.txt", ["a/b/"]) is True - def test_exact_match_no_trailing_slash(self): - """Exact filter without trailing slash should not match subdirectory files.""" - assert _matches_any_filter("renders/frame_001.exr", ["renders"]) is False + def test_glob_wildcard(self): + assert _matches_any_filter("renders/frame_001.exr", ["renders/*.exr"]) is True + + def test_glob_wildcard_no_match(self): + assert _matches_any_filter("renders/frame_001.png", ["renders/*.exr"]) is False + + def test_glob_question_mark(self): + assert _matches_any_filter("renders/frame_00?.exr", ["renders/frame_00?.exr"]) is True + + def test_glob_recursive(self): + """fnmatch does not support ** recursion — it matches as a literal wildcard.""" + assert _matches_any_filter("a/b/c.txt", ["a/*/c.txt"]) is True class TestFilterPaths: @@ -114,6 +123,22 @@ def test_empty_root_removed(self): assert "/has_match" in result assert "/no_match" not in result + def test_exclude_filters(self): + paths_by_root = { + "/root": self._make_group(["renders/a.exr", "renders/draft/b.exr", "renders/c.exr"]) + } + result = _filter_paths(paths_by_root, ["renders/"], ["renders/draft/"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert set(files) == {"renders/a.exr", "renders/c.exr"} + + def test_glob_pattern(self): + paths_by_root = { + "/root": self._make_group(["renders/a.exr", "renders/b.png", "textures/c.exr"]) + } + result = _filter_paths(paths_by_root, ["renders/*.exr"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert files == ["renders/a.exr"] + class TestFilterManifests: def _make_manifest(self, paths: List[str]) -> BaseAssetManifest: @@ -165,3 +190,12 @@ def test_multiple_manifests_per_root(self): assert len(result["/root"]) == 2 assert [p.path for p in result["/root"][0].paths] == ["a.txt"] assert [p.path for p in result["/root"][1].paths] == ["c.txt"] + + def test_exclude_filters(self): + manifests_by_root = { + "/root": [ + self._make_manifest(["renders/a.exr", "renders/draft/b.exr", "renders/c.exr"]) + ] + } + result = _filter_manifests(manifests_by_root, ["renders/"], ["renders/draft/"]) + assert [p.path for p in result["/root"][0].paths] == ["renders/a.exr", "renders/c.exr"] From bba6ea59632edbbfb6f86b96991b9a608b188b67 Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:39:32 -0700 Subject: [PATCH 07/14] refactor: Replace --include/--exclude with full-path glob filtering Replace the --include/--exclude/--include-exclude-config options on download-output with --include, --include-config, and --submission-path. Key changes: - Match glob patterns against the full path (root + relative) instead of just the relative path, enabling patterns like "*/renders/*.png" - Filter against workstation paths by default; add --submission-path flag to filter against original submission paths instead - Remove --exclude and --include-exclude-config; rename to --include-config (include-only) - Normalize Windows backslash root paths for cross-OS glob matching - Add OutputDownloader.apply_include_filters() to avoid reaching into internals from the CLI layer - Extract _full_path() helper to DRY up path joining in filter functions Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- .../cli/_groups/_job_download_helpers.py | 29 ++--- src/deadline/client/cli/_groups/job_group.py | 60 ++++++---- src/deadline/job_attachments/download.py | 45 +++++--- .../cli/test_cli_attachment_e2e.py | 108 ++++++++++++------ .../cli/test_cli_handle_web_url.py | 2 - test/unit/deadline_client/cli/test_cli_job.py | 8 -- .../cli/test_cli_path_filters.py | 49 +++----- .../test_path_filtering.py | 105 +++++++++++------ 8 files changed, 237 insertions(+), 169 deletions(-) diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 85bdc6972..43bdec677 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -326,27 +326,22 @@ def _normalize_filters(filters: list[str]) -> list[str]: return normalized -def _parse_include_exclude( +def _parse_include_config( include: tuple[str, ...], - exclude: tuple[str, ...], - include_exclude_config: Optional[str], -) -> tuple[Optional[list[str]], Optional[list[str]]]: + include_config: Optional[str], +) -> Optional[list[str]]: """ - Parse --include, --exclude, and --include-exclude-config into normalized filter lists. - --include/--exclude take precedence over --include-exclude-config. - Returns (include_filters, exclude_filters) where either may be None. + Parse --include and --include-config into a normalized filter list. + --include takes precedence over --include-config. + Returns include_filters or None if no filters specified. """ - if include or exclude: - include_filters = _normalize_filters(list(include)) if include else ["*"] - exclude_filters = _normalize_filters(list(exclude)) if exclude else None - return include_filters, exclude_filters + if include: + return _normalize_filters(list(include)) or None - if include_exclude_config: + if include_config: from ....job_attachments._glob import _process_glob_inputs - glob_config = _process_glob_inputs(include_exclude_config) - parsed_include = _normalize_filters(glob_config.include_glob) or None - parsed_exclude = _normalize_filters(glob_config.exclude_glob) or None - return parsed_include, parsed_exclude + glob_config = _process_glob_inputs(include_config) + return _normalize_filters(glob_config.include_glob) or None - return None, None + return None diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 70b499af0..fef37a4e9 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -60,7 +60,7 @@ from ._job_download_helpers import ( JSON_MSG_TYPE_PROGRESS, _download_mapped_manifests, - _parse_include_exclude, + _parse_include_config, _resolve_conflict_resolution, _resolve_storage_profiles, _transform_manifests_to_absolute_paths, @@ -502,7 +502,7 @@ def _download_job_output( is_json_format: bool = False, ignore_storage_profiles: bool = False, include_filters: Optional[list[str]] = None, - exclude_filters: Optional[list[str]] = None, + submission_path: bool = False, ): """ Starts the download of job output and handles the progress reporting callback. @@ -552,6 +552,8 @@ def _download_job_output( for manifest in job_attachments_manifests: root_path_format_mapping[manifest["rootPath"]] = manifest["rootPathFormat"] + # When --submission-path is set, filter against the submission (S3) paths. + # Otherwise, filtering happens later against workstation paths. job_output_downloader = OutputDownloader( s3_settings=JobAttachmentS3Settings(**queue["jobAttachmentSettings"]), farm_id=farm_id, @@ -561,8 +563,7 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, - include_filters=include_filters, - exclude_filters=exclude_filters, + include_filters=include_filters if submission_path else None, ) def _check_and_warn_long_output_paths( @@ -609,13 +610,13 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) - if include_filters: - manifests_by_root = _filter_manifests( - manifests_by_root, include_filters, exclude_filters - ) + if include_filters and submission_path: + manifests_by_root = _filter_manifests(manifests_by_root, include_filters) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) + if include_filters and not submission_path: + mapped_manifests = _filter_manifests(mapped_manifests, include_filters) if mapped_manifests: download_summary = _download_mapped_manifests( mapped_manifests=mapped_manifests, @@ -723,6 +724,15 @@ def _check_and_warn_long_output_paths( output_paths_by_root = job_output_downloader.get_output_paths_by_root() _check_and_warn_long_output_paths(output_paths_by_root) + # Apply include filters against workstation paths (default behavior). + # When --submission-path is set, filtering was already applied at the S3/submission level. + if include_filters and not submission_path: + job_output_downloader.apply_include_filters(include_filters) + output_paths_by_root = job_output_downloader.get_output_paths_by_root() + if output_paths_by_root == {}: + click.echo(_get_no_output_message(is_json_format)) + return + if not is_json_format: # Create and print a summary of all the paths to download all_output_paths: set[str] = set() @@ -980,21 +990,23 @@ def _assert_valid_path(path: str) -> None: "-i", "--include", multiple=True, - help="Glob pattern for files to download. Supports *, ?, [seq]. " - "A trailing / matches all files under that directory. Repeatable", + help="Glob pattern for files to include in download. Matched against the full path " + "(root + relative). Supports *, ?, [seq]. A trailing / matches all files under " + "that directory. Repeatable", ) @click.option( - "-e", - "--exclude", - multiple=True, - help="Glob pattern for files to exclude from download. Applied after --include. Repeatable", + "-ic", + "--include-config", + default=None, + help="JSON string or file path with include patterns, " + 'e.g. \'{"include": ["*/renders/*.exr"]}\'', ) @click.option( - "-ie", - "--include-exclude-config", - default=None, - help="JSON string or file path with include/exclude patterns, " - 'e.g. \'{"include": ["renders/*.exr"], "exclude": ["renders/draft/"]}\'', + "--submission-path", + is_flag=True, + default=False, + help="Match include filters against the original submission paths instead of " + "the local workstation paths. By default, filters match against workstation paths.", ) @click.option( "--ignore-storage-profiles", @@ -1043,8 +1055,8 @@ def job_download_output( output, ignore_storage_profiles, include, - exclude, - include_exclude_config, + include_config, + submission_path, **args, ): """ @@ -1057,9 +1069,7 @@ def job_download_output( if task_id and not step_id: raise click.UsageError("Missing option '--step-id' required with '--task-id'") - include_filters, exclude_filters = _parse_include_exclude( - include, exclude, include_exclude_config - ) + include_filters = _parse_include_config(include, include_config) # Get a temporary config object with the standard options handled config = _apply_cli_options_to_config( @@ -1082,7 +1092,7 @@ def job_download_output( is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, include_filters=include_filters, - exclude_filters=exclude_filters, + submission_path=submission_path, ) except Exception as e: if is_json_format: diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index e1e0ba836..bda760487 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -1244,11 +1244,21 @@ def mount_vfs_from_manifests( vfs_manager.start(session_dir=session_dir) +def _full_path(root: str, relative: str) -> str: + """Join root and relative path using forward slashes for consistent matching.""" + normalized_root = root.replace("\\", "/") + if normalized_root.endswith("/"): + return normalized_root + relative + return normalized_root + "/" + relative + + def _matches_any_filter(file_path: str, filters: list[str]) -> bool: """ Check if a file path matches any of the given filters using glob-style matching. Uses fnmatch for pattern matching (supports *, ?, [seq], [!seq]). A filter ending with '/' is treated as a directory prefix (matches all files under it). + The file_path should be the full path (root + relative) to support patterns like + '*/renders/*.png'. """ from fnmatch import fnmatch @@ -1260,20 +1270,19 @@ def _matches_any_filter(file_path: str, filters: list[str]) -> bool: def _filter_paths( paths_by_root: dict[str, ManifestPathGroup], path_filters: list[str], - exclude_filters: Optional[list[str]] = None, ) -> dict[str, ManifestPathGroup]: """ - Filter ManifestPathGroups using glob-style include/exclude patterns. - Include filters select files; exclude filters remove from the included set. - Filters are matched against relative file paths across all asset roots. + Filter ManifestPathGroups using glob-style include patterns. + Filters are matched against the full path (root + relative) to support + patterns like '*/renders/*.png'. """ filtered: dict[str, ManifestPathGroup] = {} for root, group in paths_by_root.items(): filtered_group = ManifestPathGroup() for hash_alg, file_list in group.files_by_hash_alg.items(): - matching = [f for f in file_list if _matches_any_filter(f.path, path_filters)] - if exclude_filters: - matching = [f for f in matching if not _matches_any_filter(f.path, exclude_filters)] + matching = [ + f for f in file_list if _matches_any_filter(_full_path(root, f.path), path_filters) + ] if matching: if hash_alg not in filtered_group.files_by_hash_alg: filtered_group.files_by_hash_alg[hash_alg] = matching @@ -1288,19 +1297,22 @@ def _filter_paths( def _filter_manifests( manifests_by_root: dict[str, list[BaseAssetManifest]], path_filters: list[str], - exclude_filters: Optional[list[str]] = None, ) -> dict[str, list[BaseAssetManifest]]: """ - Filter BaseAssetManifest objects using glob-style include/exclude patterns. + Filter BaseAssetManifest objects using glob-style include patterns. + Filters are matched against the full path (root + relative) to support + patterns like '*/renders/*.png'. Returns a new dict with manifests whose paths have been filtered; empty manifests are removed. """ filtered: dict[str, list[BaseAssetManifest]] = {} for root, manifest_list in manifests_by_root.items(): filtered_manifests = [] for manifest in manifest_list: - matching = [p for p in manifest.paths if _matches_any_filter(p.path, path_filters)] - if exclude_filters: - matching = [p for p in matching if not _matches_any_filter(p.path, exclude_filters)] + matching = [ + p + for p in manifest.paths + if _matches_any_filter(_full_path(root, p.path), path_filters) + ] if matching: manifest.paths = matching filtered_manifests.append(manifest) @@ -1348,7 +1360,6 @@ def __init__( session_action_id: Optional[str] = None, session: Optional[boto3.Session] = None, include_filters: Optional[list[str]] = None, - exclude_filters: Optional[list[str]] = None, ) -> None: self.s3_settings = s3_settings self.session = session @@ -1363,9 +1374,7 @@ def __init__( session=session, ) if include_filters: - self.outputs_by_root = _filter_paths( - self.outputs_by_root, include_filters, exclude_filters - ) + self.outputs_by_root = _filter_paths(self.outputs_by_root, include_filters) def get_output_paths_by_root(self) -> dict[str, list[str]]: """ @@ -1377,6 +1386,10 @@ def get_output_paths_by_root(self) -> dict[str, list[str]]: output_paths_by_root[root] = path_group.get_all_paths() return output_paths_by_root + def apply_include_filters(self, include_filters: list[str]) -> None: + """Apply glob-style include filters against the current workstation paths.""" + self.outputs_by_root = _filter_paths(self.outputs_by_root, include_filters) + def set_root_path(self, original_root: str, new_root: str) -> None: """ Changes the root path for downloading output files, (which is the root path diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index d9620b98a..f4a3de386 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -660,8 +660,8 @@ def test_cli_job_download_output(deadline_setup, tmp_path): def test_cli_job_download_output_include_path(deadline_setup, tmp_path): """ - `deadline job download-output --include-path` with a directory prefix - downloads only files under that directory. + `deadline job download-output --include` with a glob pattern + downloads only files matching the pattern against the full path. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) @@ -686,7 +686,7 @@ def test_cli_job_download_output_include_path(deadline_setup, tmp_path): "--job-id", job_id, "--include", - "renders/", + "*/renders/*", "--conflict-resolution", "OVERWRITE", "--yes", @@ -700,7 +700,7 @@ def test_cli_job_download_output_include_path(deadline_setup, tmp_path): def test_cli_job_download_output_include_path_exact_file(deadline_setup, tmp_path): """ - `deadline job download-output --include-path` with an exact file path + `deadline job download-output --include` with an exact file glob downloads only that single file. """ backend, farm_id, queue_id, env = deadline_setup @@ -725,7 +725,7 @@ def test_cli_job_download_output_include_path_exact_file(deadline_setup, tmp_pat "--job-id", job_id, "--include", - "renders/frame_001.exr", + "*/frame_001.exr", "--conflict-resolution", "OVERWRITE", "--yes", @@ -738,7 +738,7 @@ def test_cli_job_download_output_include_path_exact_file(deadline_setup, tmp_pat def test_cli_job_download_output_include_path_multiple(deadline_setup, tmp_path): """ - Multiple --include-path values are OR'd: files matching any filter are downloaded. + Multiple --include values are OR'd: files matching any filter are downloaded. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) @@ -763,9 +763,9 @@ def test_cli_job_download_output_include_path_multiple(deadline_setup, tmp_path) "--job-id", job_id, "--include", - "renders/frame_001.exr", + "*/frame_001.exr", "--include", - "scripts/", + "*/scripts/*", "--conflict-resolution", "OVERWRITE", "--yes", @@ -811,26 +811,27 @@ def test_cli_job_download_output_include_path_no_match(deadline_setup, tmp_path) assert not (Path(asset_root) / "renders" / "frame_001.exr").exists() -def test_cli_job_download_output_exclude(deadline_setup, tmp_path): +def test_cli_job_download_output_include_matches_full_workstation_path(deadline_setup, tmp_path): """ - --exclude removes files from the download set after --include is applied. + --include patterns match against the full workstation path (root + relative) + by default, so a pattern containing part of the root directory name works. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa5" - asset_root = str(tmp_path / "exclude_outputs") + asset_root = str(tmp_path / "fullpath_outputs") Path(asset_root).mkdir() files = { "renders/frame_001.exr": b"frame-one", - "renders/draft/frame_002.exr": b"frame-two-draft", - "renders/frame_003.exr": b"frame-three", + "logs/render.log": b"log-data", } _seed_output_job( backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files ) + # Use a pattern that includes the workstation root directory name r = _run( env, "job", @@ -838,9 +839,7 @@ def test_cli_job_download_output_exclude(deadline_setup, tmp_path): "--job-id", job_id, "--include", - "renders/", - "--exclude", - "renders/draft/", + "*fullpath_outputs/renders/*", "--conflict-resolution", "OVERWRITE", "--yes", @@ -848,18 +847,57 @@ def test_cli_job_download_output_exclude(deadline_setup, tmp_path): assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" - assert (Path(asset_root) / "renders" / "frame_003.exr").read_bytes() == b"frame-three" - assert not (Path(asset_root) / "renders" / "draft" / "frame_002.exr").exists() + assert not (Path(asset_root) / "logs" / "render.log").exists() -def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): +def test_cli_job_download_output_submission_path_flag(deadline_setup, tmp_path): """ - --include with glob patterns (e.g. *.exr) filters using fnmatch. + --submission-path causes --include to filter against the original submission + paths (asset roots from S3) rather than the workstation paths. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa6" + asset_root = str(tmp_path / "subpath_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + # Pattern uses the submission root path (same as asset_root in this case) + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "*subpath_outputs/renders/*", + "--submission-path", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): + """ + --include with glob patterns (e.g. *.exr) filters using fnmatch against full paths. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa7" asset_root = str(tmp_path / "glob_outputs") Path(asset_root).mkdir() @@ -879,7 +917,7 @@ def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): "--job-id", job_id, "--include", - "renders/*.exr", + "*.exr", "--conflict-resolution", "OVERWRITE", "--yes", @@ -891,15 +929,14 @@ def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): assert not (Path(asset_root) / "renders" / "frame_002.png").exists() -def test_cli_job_download_output_include_exclude_config(deadline_setup, tmp_path): +def test_cli_job_download_output_include_config(deadline_setup, tmp_path): """ - --include-exclude-config accepts a JSON file with include/exclude patterns, - matching the manifest CLI pattern. + --include-config accepts a JSON file with include patterns. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) - job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa7" + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa8" asset_root = str(tmp_path / "config_outputs") Path(asset_root).mkdir() @@ -913,7 +950,7 @@ def test_cli_job_download_output_include_exclude_config(deadline_setup, tmp_path ) config_file = tmp_path / "filters.json" - config_file.write_text(json.dumps({"include": ["renders/"], "exclude": ["renders/draft/"]})) + config_file.write_text(json.dumps({"include": ["*/renders/*"]})) r = _run( env, @@ -921,8 +958,9 @@ def test_cli_job_download_output_include_exclude_config(deadline_setup, tmp_path "download-output", "--job-id", job_id, - "--include-exclude-config", + "--include-config", str(config_file), + "--submission-path", "--conflict-resolution", "OVERWRITE", "--yes", @@ -930,19 +968,20 @@ def test_cli_job_download_output_include_exclude_config(deadline_setup, tmp_path assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" - assert not (Path(asset_root) / "renders" / "draft" / "frame_002.exr").exists() + # renders/draft/frame_002.exr matches */renders/* via fnmatch (single * matches path segments) + assert (Path(asset_root) / "renders" / "draft" / "frame_002.exr").read_bytes() == b"draft" assert not (Path(asset_root) / "logs" / "render.log").exists() -def test_cli_job_download_output_include_exclude_config_large_inline_json(deadline_setup, tmp_path): +def test_cli_job_download_output_include_config_large_inline_json(deadline_setup, tmp_path): """ - --include-exclude-config with a large inline JSON blob containing many paths. + --include-config with a large inline JSON blob containing many paths. This mirrors DCM's usage where the desktop app may pass hundreds of file paths. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) - job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa8" + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9" asset_root = str(tmp_path / "large_config_outputs") Path(asset_root).mkdir() @@ -954,8 +993,8 @@ def test_cli_job_download_output_include_exclude_config_large_inline_json(deadli backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files ) - # Build a large inline JSON with 150 exact paths - selected_paths = [f"renders/frame_{i:04d}.exr" for i in range(num_selected)] + # Build a large inline JSON with 150 exact paths (using glob to match full paths) + selected_paths = [f"*/renders/frame_{i:04d}.exr" for i in range(num_selected)] config_json = json.dumps({"include": selected_paths}) r = _run( @@ -964,8 +1003,9 @@ def test_cli_job_download_output_include_exclude_config_large_inline_json(deadli "download-output", "--job-id", job_id, - "--include-exclude-config", + "--include-config", config_json, + "--submission-path", "--conflict-resolution", "OVERWRITE", "--yes", diff --git a/test/unit/deadline_client/cli/test_cli_handle_web_url.py b/test/unit/deadline_client/cli/test_cli_handle_web_url.py index 9b0e8321c..2bf4ebce8 100644 --- a/test/unit/deadline_client/cli/test_cli_handle_web_url.py +++ b/test/unit/deadline_client/cli/test_cli_handle_web_url.py @@ -307,7 +307,6 @@ def test_cli_handle_web_url_download_output_only_required_input(fresh_deadline_c session_action_id=None, session=ANY, include_filters=None, - exclude_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -374,7 +373,6 @@ def test_cli_handle_web_url_download_output_with_optional_input(fresh_deadline_c session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, include_filters=None, - exclude_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, diff --git a/test/unit/deadline_client/cli/test_cli_job.py b/test/unit/deadline_client/cli/test_cli_job.py index 76f8ba7a6..3a514c627 100644 --- a/test/unit/deadline_client/cli/test_cli_job.py +++ b/test/unit/deadline_client/cli/test_cli_job.py @@ -378,7 +378,6 @@ def test_cli_job_download_output_stdout_with_only_required_input( session_action_id=None, session=ANY, include_filters=None, - exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -493,7 +492,6 @@ def test_cli_job_download_output_stdout_with_mismatching_path_format( session_action_id=None, session=ANY, include_filters=None, - exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -595,7 +593,6 @@ def test_cli_job_download_output_handles_unc_path_on_windows(fresh_deadline_conf session_action_id=None, session=ANY, include_filters=None, - exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -680,7 +677,6 @@ def test_cli_job_download_no_output_stdout(fresh_deadline_config, tmp_path: Path session_action_id=None, session=ANY, include_filters=None, - exclude_filters=None, ) assert ( @@ -783,7 +779,6 @@ def test_cli_job_download_output_stdout_with_json_format( session_action_id=None, session=ANY, include_filters=None, - exclude_filters=None, ) expected_json_title = {"messageType": "title", "value": "Mock Job"} @@ -1401,7 +1396,6 @@ def test_cli_job_download_output_handle_web_url_with_optional_input( session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, include_filters=None, - exclude_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -1489,7 +1483,6 @@ def test_cli_job_download_output_with_different_asset_root_path_format_than_job( session_action_id=None, session=ANY, include_filters=None, - exclude_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -1722,7 +1715,6 @@ def test_cli_job_download_output_with_session_action_id(fresh_deadline_config): session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, include_filters=None, - exclude_filters=None, ) diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py index 2eed11c06..ea04d483b 100644 --- a/test/unit/deadline_client/cli/test_cli_path_filters.py +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -1,10 +1,10 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -"""Tests for _normalize_filters and _parse_include_exclude.""" +"""Tests for _normalize_filters and _parse_include_config.""" from deadline.client.cli._groups._job_download_helpers import ( _normalize_filters, - _parse_include_exclude, + _parse_include_config, ) @@ -38,42 +38,29 @@ def test_multiple_normalizations(self): assert result == ["renders/frame.exr"] -class TestParseIncludeExclude: +class TestParseIncludeConfig: def test_include_only(self): - inc, exc = _parse_include_exclude(("renders/",), (), None) - assert inc == ["renders/"] - assert exc is None - - def test_exclude_only(self): - inc, exc = _parse_include_exclude((), ("*.log",), None) - assert inc == ["*"] - assert exc == ["*.log"] - - def test_include_and_exclude(self): - inc, exc = _parse_include_exclude(("renders/",), ("renders/draft/",), None) - assert inc == ["renders/"] - assert exc == ["renders/draft/"] + result = _parse_include_config(("renders/",), None) + assert result == ["renders/"] def test_no_filters(self): - inc, exc = _parse_include_exclude((), (), None) - assert inc is None - assert exc is None + result = _parse_include_config((), None) + assert result is None - def test_config_json_string(self, tmp_path): - inc, exc = _parse_include_exclude( - (), (), '{"include": ["renders/*.exr"], "exclude": ["renders/draft/"]}' - ) - assert inc == ["renders/*.exr"] - assert exc == ["renders/draft/"] + def test_config_json_string(self): + result = _parse_include_config((), '{"include": ["renders/*.exr"]}') + assert result == ["renders/*.exr"] def test_config_file(self, tmp_path): config_file = tmp_path / "filters.json" config_file.write_text('{"include": ["**/*.exr"]}') - inc, exc = _parse_include_exclude((), (), str(config_file)) - assert inc == ["**/*.exr"] - assert exc is None + result = _parse_include_config((), str(config_file)) + assert result == ["**/*.exr"] def test_cli_args_take_precedence_over_config(self): - inc, exc = _parse_include_exclude(("renders/",), (), '{"include": ["textures/"]}') - assert inc == ["renders/"] - assert exc is None + result = _parse_include_config(("renders/",), '{"include": ["textures/"]}') + assert result == ["renders/"] + + def test_multiple_include_patterns(self): + result = _parse_include_config(("*.exr", "*/renders/*.png"), None) + assert result == ["*.exr", "*/renders/*.png"] diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py index 142cc041f..27f7c5cce 100644 --- a/test/unit/deadline_job_attachments/test_path_filtering.py +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -6,6 +6,7 @@ from deadline.job_attachments.download import ( _matches_any_filter, + _full_path, _filter_paths, _filter_manifests, ) @@ -62,6 +63,32 @@ def test_glob_recursive(self): """fnmatch does not support ** recursion — it matches as a literal wildcard.""" assert _matches_any_filter("a/b/c.txt", ["a/*/c.txt"]) is True + def test_glob_full_path_wildcard(self): + """Patterns like '*/renders/*.png' should match against full paths.""" + assert _matches_any_filter("/home/user/renders/frame.png", ["*/renders/*.png"]) is True + + def test_glob_extension_only(self): + """Simple extension patterns like '*.png' should match full paths.""" + assert _matches_any_filter("/root/renders/frame.png", ["*.png"]) is True + + +class TestFullPath: + def test_unix_root(self): + assert _full_path("/home/user", "renders/frame.png") == "/home/user/renders/frame.png" + + def test_windows_root_backslashes(self): + """Windows root paths with backslashes are normalized to forward slashes.""" + assert ( + _full_path("C:\\Users\\artist\\project", "renders/frame.png") + == "C:/Users/artist/project/renders/frame.png" + ) + + def test_trailing_slash_root(self): + assert _full_path("/root/", "file.txt") == "/root/file.txt" + + def test_trailing_backslash_root(self): + assert _full_path("C:\\root\\", "file.txt") == "C:/root/file.txt" + class TestFilterPaths: def _make_group(self, paths: List[str]) -> ManifestPathGroup: @@ -73,20 +100,29 @@ def _make_group(self, paths: List[str]) -> ManifestPathGroup: group.total_bytes = len(paths) * 100 return group - def test_exact_filter(self): - paths_by_root = {"/root": self._make_group(["a.txt", "b.txt", "c.txt"])} - result = _filter_paths(paths_by_root, ["b.txt"]) - assert list(result.keys()) == ["/root"] - assert [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] == [ - "b.txt" + def test_glob_against_full_path(self): + """Filters match against root + relative path.""" + paths_by_root = { + "/home/user/project": self._make_group(["renders/a.exr", "textures/b.png"]) + } + result = _filter_paths(paths_by_root, ["*/renders/*.exr"]) + files = [ + f.path for f in result["/home/user/project"].files_by_hash_alg[HashAlgorithm.XXH128] ] - assert result["/root"].total_bytes == 100 + assert files == ["renders/a.exr"] + + def test_extension_filter_against_full_path(self): + """Simple extension patterns match against full paths.""" + paths_by_root = {"/root": self._make_group(["a.exr", "b.png"])} + result = _filter_paths(paths_by_root, ["*.exr"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert files == ["a.exr"] def test_directory_prefix_filter(self): paths_by_root = { "/root": self._make_group(["renders/a.exr", "renders/b.exr", "textures/c.png"]) } - result = _filter_paths(paths_by_root, ["renders/"]) + result = _filter_paths(paths_by_root, ["/root/renders/"]) files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] assert files == ["renders/a.exr", "renders/b.exr"] @@ -100,7 +136,7 @@ def test_multiple_asset_roots(self): "/root1": self._make_group(["shared/file.txt", "other.txt"]), "/root2": self._make_group(["shared/file.txt", "different.txt"]), } - result = _filter_paths(paths_by_root, ["shared/file.txt"]) + result = _filter_paths(paths_by_root, ["*/shared/file.txt"]) assert "/root1" in result assert "/root2" in result @@ -110,7 +146,7 @@ def test_mixed_filters(self): ["renders/a.exr", "renders/b.exr", "textures/c.png", "scripts/setup.mel"] ) } - result = _filter_paths(paths_by_root, ["renders/", "scripts/setup.mel"]) + result = _filter_paths(paths_by_root, ["/root/renders/", "*/setup.mel"]) files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] assert set(files) == {"renders/a.exr", "renders/b.exr", "scripts/setup.mel"} @@ -119,24 +155,28 @@ def test_empty_root_removed(self): "/has_match": self._make_group(["a.txt"]), "/no_match": self._make_group(["b.txt"]), } - result = _filter_paths(paths_by_root, ["a.txt"]) + result = _filter_paths(paths_by_root, ["*/a.txt"]) assert "/has_match" in result assert "/no_match" not in result - def test_exclude_filters(self): + def test_glob_pattern(self): paths_by_root = { - "/root": self._make_group(["renders/a.exr", "renders/draft/b.exr", "renders/c.exr"]) + "/root": self._make_group(["renders/a.exr", "renders/b.png", "textures/c.exr"]) } - result = _filter_paths(paths_by_root, ["renders/"], ["renders/draft/"]) + result = _filter_paths(paths_by_root, ["*/renders/*.exr"]) files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] - assert set(files) == {"renders/a.exr", "renders/c.exr"} + assert files == ["renders/a.exr"] - def test_glob_pattern(self): + def test_windows_root_path(self): + """Windows backslash roots are normalized so forward-slash patterns match.""" paths_by_root = { - "/root": self._make_group(["renders/a.exr", "renders/b.png", "textures/c.exr"]) + "C:\\Users\\artist\\project": self._make_group(["renders/a.exr", "logs/b.log"]) } - result = _filter_paths(paths_by_root, ["renders/*.exr"]) - files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + result = _filter_paths(paths_by_root, ["*/renders/*.exr"]) + files = [ + f.path + for f in result["C:\\Users\\artist\\project"].files_by_hash_alg[HashAlgorithm.XXH128] + ] assert files == ["renders/a.exr"] @@ -152,17 +192,19 @@ def _make_manifest(self, paths: List[str]) -> BaseAssetManifest: total_size=len(paths) * 100, ) - def test_exact_filter(self): - manifests_by_root = {"/root": [self._make_manifest(["a.txt", "b.txt", "c.txt"])]} - result = _filter_manifests(manifests_by_root, ["b.txt"]) - assert "/root" in result - assert [p.path for p in result["/root"][0].paths] == ["b.txt"] + def test_glob_against_full_path(self): + """Filters match against root + relative path.""" + manifests_by_root = { + "/home/user": [self._make_manifest(["renders/a.exr", "textures/b.png"])] + } + result = _filter_manifests(manifests_by_root, ["*/renders/*.exr"]) + assert [p.path for p in result["/home/user"][0].paths] == ["renders/a.exr"] def test_directory_prefix_filter(self): manifests_by_root = { "/root": [self._make_manifest(["renders/a.exr", "renders/b.exr", "textures/c.png"])] } - result = _filter_manifests(manifests_by_root, ["renders/"]) + result = _filter_manifests(manifests_by_root, ["/root/renders/"]) assert [p.path for p in result["/root"][0].paths] == ["renders/a.exr", "renders/b.exr"] def test_no_matches_returns_empty(self): @@ -175,7 +217,7 @@ def test_empty_root_removed(self): "/has_match": [self._make_manifest(["a.txt"])], "/no_match": [self._make_manifest(["b.txt"])], } - result = _filter_manifests(manifests_by_root, ["a.txt"]) + result = _filter_manifests(manifests_by_root, ["*/a.txt"]) assert "/has_match" in result assert "/no_match" not in result @@ -186,16 +228,7 @@ def test_multiple_manifests_per_root(self): self._make_manifest(["c.txt", "d.txt"]), ] } - result = _filter_manifests(manifests_by_root, ["a.txt", "c.txt"]) + result = _filter_manifests(manifests_by_root, ["*/a.txt", "*/c.txt"]) assert len(result["/root"]) == 2 assert [p.path for p in result["/root"][0].paths] == ["a.txt"] assert [p.path for p in result["/root"][1].paths] == ["c.txt"] - - def test_exclude_filters(self): - manifests_by_root = { - "/root": [ - self._make_manifest(["renders/a.exr", "renders/draft/b.exr", "renders/c.exr"]) - ] - } - result = _filter_manifests(manifests_by_root, ["renders/"], ["renders/draft/"]) - assert [p.path for p in result["/root"][0].paths] == ["renders/a.exr", "renders/c.exr"] From 7bc6204924f7feace0f4255cc82eb4b2269f84fa Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:48:47 -0700 Subject: [PATCH 08/14] docs: Update job attachments guide for renamed CLI options Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- docs/job_attachments_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/job_attachments_guide.md b/docs/job_attachments_guide.md index 2ff1231b8..2f0025e30 100644 --- a/docs/job_attachments_guide.md +++ b/docs/job_attachments_guide.md @@ -4,7 +4,7 @@ Job attachments uses your configured S3 bucket as a [content-addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage), which creates a snapshot of the files used in your job submission in [asset manifests](#asset-manifests), only uploading files that aren't already in S3. This saves you time and bandwidth when iterating on jobs. When an [AWS Deadline Cloud worker agent][worker-agent] starts working on a job with job attachments, it recreates the file system snapshot in the worker agent session directory, and uploads any outputs back to your S3 bucket. -You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include` and `--exclude` glob patterns for downloading specific files or directories, and `--include-exclude-config` for passing patterns via a JSON file. +You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include` glob patterns for downloading specific files or directories (matched against the full path), and `--include-config` for passing patterns via a JSON string or file. Job attachments also works as an auxiliary storage when used with [AWS Deadline Cloud storage profiles][shared-storage], allowing you to flexibly upload files to your Amazon S3 bucket that aren't on your configured shared storage. From 1c9f2560bc002d0ea6b0d74cc06763dddba394f3 Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Fri, 24 Apr 2026 07:08:32 -0700 Subject: [PATCH 09/14] feat: Support relative path filters as suffix matches Plain relative paths (no glob characters, not absolute) are now matched as a suffix against the full path. This allows filters like "renders/frame_001.exr" to match "/home/user/project/renders/frame_001.exr" without requiring glob prefixes. This supports the DCM use case where the frontend has relative paths from manifests and passes them directly as include filters. Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- src/deadline/job_attachments/download.py | 21 ++++- .../cli/test_cli_attachment_e2e.py | 84 +++++++++++++++++++ .../test_path_filtering.py | 24 ++++++ 3 files changed, 126 insertions(+), 3 deletions(-) diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index bda760487..fb6b3dbff 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -1252,19 +1252,34 @@ def _full_path(root: str, relative: str) -> str: return normalized_root + "/" + relative +def _is_relative_filter(f: str) -> bool: + """Check if a filter is a plain relative path (no glob characters, not absolute).""" + return ( + not any(c in f for c in ("*", "?", "[")) and not f.startswith("/") and not f.endswith("/") + ) + + def _matches_any_filter(file_path: str, filters: list[str]) -> bool: """ Check if a file path matches any of the given filters using glob-style matching. Uses fnmatch for pattern matching (supports *, ?, [seq], [!seq]). A filter ending with '/' is treated as a directory prefix (matches all files under it). + A plain relative path (no globs, not absolute) matches as a suffix of the full path. The file_path should be the full path (root + relative) to support patterns like '*/renders/*.png'. """ from fnmatch import fnmatch - return any( - file_path.startswith(f) if f.endswith("/") else fnmatch(file_path, f) for f in filters - ) + for f in filters: + if f.endswith("/"): + if file_path.startswith(f): + return True + elif _is_relative_filter(f): + if file_path.endswith("/" + f) or file_path == f: + return True + elif fnmatch(file_path, f): + return True + return False def _filter_paths( diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index f4a3de386..ad8ade8d7 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -890,6 +890,90 @@ def test_cli_job_download_output_submission_path_flag(deadline_setup, tmp_path): assert not (Path(asset_root) / "logs" / "render.log").exists() +def test_cli_job_download_output_relative_path_filter(deadline_setup, tmp_path): + """ + --include with a plain relative path (no globs) matches as a suffix + against the full workstation path. This is the DCM use case. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaab01" + asset_root = str(tmp_path / "relpath_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "renders/frame_001.exr", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "renders" / "frame_002.exr").exists() + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_relative_paths_via_include_config(deadline_setup, tmp_path): + """ + --include-config with relative paths in JSON and --submission-path. + This is the DCM integration path: relative paths from manifests passed + as JSON, filtered against submission paths. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaab02" + asset_root = str(tmp_path / "relconfig_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + config_json = json.dumps({"include": ["renders/frame_001.exr", "logs/render.log"]}) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include-config", + config_json, + "--submission-path", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "logs" / "render.log").read_bytes() == b"log-data" + assert not (Path(asset_root) / "renders" / "frame_002.exr").exists() + + def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): """ --include with glob patterns (e.g. *.exr) filters using fnmatch against full paths. diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py index 27f7c5cce..5ac204581 100644 --- a/test/unit/deadline_job_attachments/test_path_filtering.py +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -71,6 +71,21 @@ def test_glob_extension_only(self): """Simple extension patterns like '*.png' should match full paths.""" assert _matches_any_filter("/root/renders/frame.png", ["*.png"]) is True + def test_relative_path_suffix_match(self): + """Plain relative paths match as a suffix of the full path.""" + assert _matches_any_filter("/home/user/renders/frame.exr", ["renders/frame.exr"]) is True + + def test_relative_path_no_partial_match(self): + """Relative path must match complete path segments.""" + assert _matches_any_filter("/home/user/xrenders/frame.exr", ["renders/frame.exr"]) is False + + def test_relative_path_exact_file(self): + """Single filename matches as suffix.""" + assert _matches_any_filter("/root/renders/frame.exr", ["frame.exr"]) is True + + def test_relative_path_no_match(self): + assert _matches_any_filter("/root/renders/frame.exr", ["other.exr"]) is False + class TestFullPath: def test_unix_root(self): @@ -179,6 +194,15 @@ def test_windows_root_path(self): ] assert files == ["renders/a.exr"] + def test_relative_path_filter(self): + """Plain relative paths match as suffix against full path.""" + paths_by_root = {"/home/user/project": self._make_group(["renders/a.exr", "logs/b.log"])} + result = _filter_paths(paths_by_root, ["renders/a.exr"]) + files = [ + f.path for f in result["/home/user/project"].files_by_hash_alg[HashAlgorithm.XXH128] + ] + assert files == ["renders/a.exr"] + class TestFilterManifests: def _make_manifest(self, paths: List[str]) -> BaseAssetManifest: From 73173a3e8667a35da09ce8516e707790e7d249ab Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Fri, 24 Apr 2026 08:49:19 -0700 Subject: [PATCH 10/14] refactor: Simplify to --include only, drop --include-config and stdin Remove --include-config and stdin (--include -) options, keeping only --include as the single filtering mechanism. This simplifies the CLI surface while covering all use cases: - Human CLI: --include "*.exr" or --include "renders/frame_001.exr" - DCM: multiple --include args via Tauri shell with args:true scope - Web copy/paste: repeated --include flags in the command The --include-config and stdin approaches can be added back later if the Windows 32KB CreateProcess limit becomes a practical concern. Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- docs/job_attachments_guide.md | 2 +- .../cli/_groups/_job_download_helpers.py | 15 +-- src/deadline/client/cli/_groups/job_group.py | 18 +-- .../cli/test_cli_attachment_e2e.py | 111 ++---------------- .../cli/test_cli_path_filters.py | 34 +++--- 5 files changed, 30 insertions(+), 150 deletions(-) diff --git a/docs/job_attachments_guide.md b/docs/job_attachments_guide.md index 2f0025e30..630f8f93f 100644 --- a/docs/job_attachments_guide.md +++ b/docs/job_attachments_guide.md @@ -4,7 +4,7 @@ Job attachments uses your configured S3 bucket as a [content-addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage), which creates a snapshot of the files used in your job submission in [asset manifests](#asset-manifests), only uploading files that aren't already in S3. This saves you time and bandwidth when iterating on jobs. When an [AWS Deadline Cloud worker agent][worker-agent] starts working on a job with job attachments, it recreates the file system snapshot in the worker agent session directory, and uploads any outputs back to your S3 bucket. -You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include` glob patterns for downloading specific files or directories (matched against the full path), and `--include-config` for passing patterns via a JSON string or file. +You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include` glob patterns and relative paths for downloading specific files or directories. Job attachments also works as an auxiliary storage when used with [AWS Deadline Cloud storage profiles][shared-storage], allowing you to flexibly upload files to your Amazon S3 bucket that aren't on your configured shared storage. diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 43bdec677..7c9a04744 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -326,22 +326,11 @@ def _normalize_filters(filters: list[str]) -> list[str]: return normalized -def _parse_include_config( - include: tuple[str, ...], - include_config: Optional[str], -) -> Optional[list[str]]: +def _parse_include_filters(include: tuple[str, ...]) -> Optional[list[str]]: """ - Parse --include and --include-config into a normalized filter list. - --include takes precedence over --include-config. + Parse --include into a normalized filter list. Returns include_filters or None if no filters specified. """ if include: return _normalize_filters(list(include)) or None - - if include_config: - from ....job_attachments._glob import _process_glob_inputs - - glob_config = _process_glob_inputs(include_config) - return _normalize_filters(glob_config.include_glob) or None - return None diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index fef37a4e9..92d92a409 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -60,7 +60,7 @@ from ._job_download_helpers import ( JSON_MSG_TYPE_PROGRESS, _download_mapped_manifests, - _parse_include_config, + _parse_include_filters, _resolve_conflict_resolution, _resolve_storage_profiles, _transform_manifests_to_absolute_paths, @@ -990,16 +990,9 @@ def _assert_valid_path(path: str) -> None: "-i", "--include", multiple=True, - help="Glob pattern for files to include in download. Matched against the full path " - "(root + relative). Supports *, ?, [seq]. A trailing / matches all files under " - "that directory. Repeatable", -) -@click.option( - "-ic", - "--include-config", - default=None, - help="JSON string or file path with include patterns, " - 'e.g. \'{"include": ["*/renders/*.exr"]}\'', + help="Glob pattern or relative path for files to include in download. Matched against " + "the full path (root + relative). Supports *, ?, [seq]. A trailing / matches all " + "files under that directory. Repeatable", ) @click.option( "--submission-path", @@ -1055,7 +1048,6 @@ def job_download_output( output, ignore_storage_profiles, include, - include_config, submission_path, **args, ): @@ -1069,7 +1061,7 @@ def job_download_output( if task_id and not step_id: raise click.UsageError("Missing option '--step-id' required with '--task-id'") - include_filters = _parse_include_config(include, include_config) + include_filters = _parse_include_filters(include) # Get a temporary config object with the standard options handled config = _apply_cli_options_to_config( diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index ad8ade8d7..340785e63 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -930,17 +930,16 @@ def test_cli_job_download_output_relative_path_filter(deadline_setup, tmp_path): assert not (Path(asset_root) / "logs" / "render.log").exists() -def test_cli_job_download_output_relative_paths_via_include_config(deadline_setup, tmp_path): +def test_cli_job_download_output_relative_paths_with_submission_path(deadline_setup, tmp_path): """ - --include-config with relative paths in JSON and --submission-path. - This is the DCM integration path: relative paths from manifests passed - as JSON, filtered against submission paths. + --include with relative paths and --submission-path filters against + submission paths. This is the DCM integration path. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaab02" - asset_root = str(tmp_path / "relconfig_outputs") + asset_root = str(tmp_path / "relsubmit_outputs") Path(asset_root).mkdir() files = { @@ -952,16 +951,16 @@ def test_cli_job_download_output_relative_paths_via_include_config(deadline_setu backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files ) - config_json = json.dumps({"include": ["renders/frame_001.exr", "logs/render.log"]}) - r = _run( env, "job", "download-output", "--job-id", job_id, - "--include-config", - config_json, + "--include", + "renders/frame_001.exr", + "--include", + "logs/render.log", "--submission-path", "--conflict-resolution", "OVERWRITE", @@ -1013,100 +1012,6 @@ def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): assert not (Path(asset_root) / "renders" / "frame_002.png").exists() -def test_cli_job_download_output_include_config(deadline_setup, tmp_path): - """ - --include-config accepts a JSON file with include patterns. - """ - backend, farm_id, queue_id, env = deadline_setup - _configure_defaults(env, farm_id, queue_id) - - job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa8" - asset_root = str(tmp_path / "config_outputs") - Path(asset_root).mkdir() - - files = { - "renders/frame_001.exr": b"frame-one", - "renders/draft/frame_002.exr": b"draft", - "logs/render.log": b"log-data", - } - _seed_output_job( - backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files - ) - - config_file = tmp_path / "filters.json" - config_file.write_text(json.dumps({"include": ["*/renders/*"]})) - - r = _run( - env, - "job", - "download-output", - "--job-id", - job_id, - "--include-config", - str(config_file), - "--submission-path", - "--conflict-resolution", - "OVERWRITE", - "--yes", - ) - assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" - - assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" - # renders/draft/frame_002.exr matches */renders/* via fnmatch (single * matches path segments) - assert (Path(asset_root) / "renders" / "draft" / "frame_002.exr").read_bytes() == b"draft" - assert not (Path(asset_root) / "logs" / "render.log").exists() - - -def test_cli_job_download_output_include_config_large_inline_json(deadline_setup, tmp_path): - """ - --include-config with a large inline JSON blob containing many paths. - This mirrors DCM's usage where the desktop app may pass hundreds of file paths. - """ - backend, farm_id, queue_id, env = deadline_setup - _configure_defaults(env, farm_id, queue_id) - - job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9" - asset_root = str(tmp_path / "large_config_outputs") - Path(asset_root).mkdir() - - # Generate 200 files, select 150 of them via include config - num_total = 200 - num_selected = 150 - files = {f"renders/frame_{i:04d}.exr": f"content-{i}".encode() for i in range(num_total)} - _seed_output_job( - backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files - ) - - # Build a large inline JSON with 150 exact paths (using glob to match full paths) - selected_paths = [f"*/renders/frame_{i:04d}.exr" for i in range(num_selected)] - config_json = json.dumps({"include": selected_paths}) - - r = _run( - env, - "job", - "download-output", - "--job-id", - job_id, - "--include-config", - config_json, - "--submission-path", - "--conflict-resolution", - "OVERWRITE", - "--yes", - ) - assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" - - # Verify exactly the 50 selected files were downloaded - for i in range(num_selected): - assert ( - Path(asset_root) / "renders" / f"frame_{i:04d}.exr" - ).read_bytes() == f"content-{i}".encode() - - # Verify the rest were NOT downloaded - for i in range(num_selected, num_total): - assert not (Path(asset_root) / "renders" / f"frame_{i:04d}.exr").exists() - - @pytest.mark.skipif( sys.version_info < (3, 9), reason="MockDeadlineBackend.create_job requires openjd-model, which requires Python >= 3.9", diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py index ea04d483b..82bc6edce 100644 --- a/test/unit/deadline_client/cli/test_cli_path_filters.py +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -1,10 +1,10 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -"""Tests for _normalize_filters and _parse_include_config.""" +"""Tests for _normalize_filters and _parse_include_filters.""" from deadline.client.cli._groups._job_download_helpers import ( _normalize_filters, - _parse_include_config, + _parse_include_filters, ) @@ -38,29 +38,23 @@ def test_multiple_normalizations(self): assert result == ["renders/frame.exr"] -class TestParseIncludeConfig: +class TestParseIncludeFilters: def test_include_only(self): - result = _parse_include_config(("renders/",), None) + result = _parse_include_filters(("renders/",)) assert result == ["renders/"] def test_no_filters(self): - result = _parse_include_config((), None) + result = _parse_include_filters(()) assert result is None - def test_config_json_string(self): - result = _parse_include_config((), '{"include": ["renders/*.exr"]}') - assert result == ["renders/*.exr"] - - def test_config_file(self, tmp_path): - config_file = tmp_path / "filters.json" - config_file.write_text('{"include": ["**/*.exr"]}') - result = _parse_include_config((), str(config_file)) - assert result == ["**/*.exr"] - - def test_cli_args_take_precedence_over_config(self): - result = _parse_include_config(("renders/",), '{"include": ["textures/"]}') - assert result == ["renders/"] - def test_multiple_include_patterns(self): - result = _parse_include_config(("*.exr", "*/renders/*.png"), None) + result = _parse_include_filters(("*.exr", "*/renders/*.png")) assert result == ["*.exr", "*/renders/*.png"] + + def test_relative_paths(self): + result = _parse_include_filters(("renders/frame_001.exr", "logs/render.log")) + assert result == ["renders/frame_001.exr", "logs/render.log"] + + def test_mixed_globs_and_relative(self): + result = _parse_include_filters(("*.exr", "renders/frame_001.exr")) + assert result == ["*.exr", "renders/frame_001.exr"] From ef78dedd8644c3e037715d06bb5961cf0356673d Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Fri, 24 Apr 2026 13:06:38 -0700 Subject: [PATCH 11/14] refactor: Address review feedback on filtering implementation - Remove _is_relative_filter and suffix matching; instead auto-prepend */ to relative patterns so all non-directory filters use fnmatch. This enables relative globs like "renders/*.exr" to work naturally. - Remove _parse_include_filters wrapper; inline _normalize_filters at the call site. - Rename include_filters to include_patterns in _download_job_output for clarity (OutputDownloader keeps include_filters as its API). - Fix unit tests to use full paths as file_path input, matching real usage through _full_path(). Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- .../cli/_groups/_job_download_helpers.py | 10 ----- src/deadline/client/cli/_groups/job_group.py | 24 +++++------ src/deadline/job_attachments/download.py | 20 +++------ .../cli/test_cli_path_filters.py | 27 +++--------- .../test_path_filtering.py | 41 ++++++++++++------- 5 files changed, 49 insertions(+), 73 deletions(-) diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 7c9a04744..85506bc04 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -324,13 +324,3 @@ def _normalize_filters(filters: list[str]) -> list[str]: if f: normalized.append(f) return normalized - - -def _parse_include_filters(include: tuple[str, ...]) -> Optional[list[str]]: - """ - Parse --include into a normalized filter list. - Returns include_filters or None if no filters specified. - """ - if include: - return _normalize_filters(list(include)) or None - return None diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 92d92a409..5dd87b364 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -60,7 +60,7 @@ from ._job_download_helpers import ( JSON_MSG_TYPE_PROGRESS, _download_mapped_manifests, - _parse_include_filters, + _normalize_filters, _resolve_conflict_resolution, _resolve_storage_profiles, _transform_manifests_to_absolute_paths, @@ -501,7 +501,7 @@ def _download_job_output( task_id: Optional[str], is_json_format: bool = False, ignore_storage_profiles: bool = False, - include_filters: Optional[list[str]] = None, + include_patterns: Optional[list[str]] = None, submission_path: bool = False, ): """ @@ -552,7 +552,7 @@ def _download_job_output( for manifest in job_attachments_manifests: root_path_format_mapping[manifest["rootPath"]] = manifest["rootPathFormat"] - # When --submission-path is set, filter against the submission (S3) paths. + # When --submission-path is set, filter against the submission paths. # Otherwise, filtering happens later against workstation paths. job_output_downloader = OutputDownloader( s3_settings=JobAttachmentS3Settings(**queue["jobAttachmentSettings"]), @@ -563,7 +563,7 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, - include_filters=include_filters if submission_path else None, + include_filters=include_patterns if submission_path else None, ) def _check_and_warn_long_output_paths( @@ -610,13 +610,13 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) - if include_filters and submission_path: - manifests_by_root = _filter_manifests(manifests_by_root, include_filters) + if include_patterns and submission_path: + manifests_by_root = _filter_manifests(manifests_by_root, include_patterns) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) - if include_filters and not submission_path: - mapped_manifests = _filter_manifests(mapped_manifests, include_filters) + if include_patterns and not submission_path: + mapped_manifests = _filter_manifests(mapped_manifests, include_patterns) if mapped_manifests: download_summary = _download_mapped_manifests( mapped_manifests=mapped_manifests, @@ -726,8 +726,8 @@ def _check_and_warn_long_output_paths( # Apply include filters against workstation paths (default behavior). # When --submission-path is set, filtering was already applied at the S3/submission level. - if include_filters and not submission_path: - job_output_downloader.apply_include_filters(include_filters) + if include_patterns and not submission_path: + job_output_downloader.apply_include_filters(include_patterns) output_paths_by_root = job_output_downloader.get_output_paths_by_root() if output_paths_by_root == {}: click.echo(_get_no_output_message(is_json_format)) @@ -1061,7 +1061,7 @@ def job_download_output( if task_id and not step_id: raise click.UsageError("Missing option '--step-id' required with '--task-id'") - include_filters = _parse_include_filters(include) + include_patterns = _normalize_filters(list(include)) or None # Get a temporary config object with the standard options handled config = _apply_cli_options_to_config( @@ -1083,7 +1083,7 @@ def job_download_output( task_id=task_id, is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, - include_filters=include_filters, + include_patterns=include_patterns, submission_path=submission_path, ) except Exception as e: diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index fb6b3dbff..3dbd5f6d3 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -1252,21 +1252,14 @@ def _full_path(root: str, relative: str) -> str: return normalized_root + "/" + relative -def _is_relative_filter(f: str) -> bool: - """Check if a filter is a plain relative path (no glob characters, not absolute).""" - return ( - not any(c in f for c in ("*", "?", "[")) and not f.startswith("/") and not f.endswith("/") - ) - - def _matches_any_filter(file_path: str, filters: list[str]) -> bool: """ Check if a file path matches any of the given filters using glob-style matching. Uses fnmatch for pattern matching (supports *, ?, [seq], [!seq]). A filter ending with '/' is treated as a directory prefix (matches all files under it). - A plain relative path (no globs, not absolute) matches as a suffix of the full path. - The file_path should be the full path (root + relative) to support patterns like - '*/renders/*.png'. + Relative filters (not starting with '/' or '*') are auto-prepended with '*/' so they + match anywhere under the root — e.g. 'renders/*.exr' matches '*/renders/*.exr'. + The file_path should be the full path (root + relative). """ from fnmatch import fnmatch @@ -1274,11 +1267,10 @@ def _matches_any_filter(file_path: str, filters: list[str]) -> bool: if f.endswith("/"): if file_path.startswith(f): return True - elif _is_relative_filter(f): - if file_path.endswith("/" + f) or file_path == f: + else: + pattern = f if f.startswith(("/", "*")) else "*/" + f + if fnmatch(file_path, pattern): return True - elif fnmatch(file_path, f): - return True return False diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py index 82bc6edce..db40b9245 100644 --- a/test/unit/deadline_client/cli/test_cli_path_filters.py +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -1,10 +1,9 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -"""Tests for _normalize_filters and _parse_include_filters.""" +"""Tests for _normalize_filters.""" from deadline.client.cli._groups._job_download_helpers import ( _normalize_filters, - _parse_include_filters, ) @@ -37,24 +36,8 @@ def test_multiple_normalizations(self): result = _normalize_filters([".\\renders\\\\frame.exr"]) assert result == ["renders/frame.exr"] + def test_empty_input_returns_empty(self): + assert _normalize_filters([]) == [] -class TestParseIncludeFilters: - def test_include_only(self): - result = _parse_include_filters(("renders/",)) - assert result == ["renders/"] - - def test_no_filters(self): - result = _parse_include_filters(()) - assert result is None - - def test_multiple_include_patterns(self): - result = _parse_include_filters(("*.exr", "*/renders/*.png")) - assert result == ["*.exr", "*/renders/*.png"] - - def test_relative_paths(self): - result = _parse_include_filters(("renders/frame_001.exr", "logs/render.log")) - assert result == ["renders/frame_001.exr", "logs/render.log"] - - def test_mixed_globs_and_relative(self): - result = _parse_include_filters(("*.exr", "renders/frame_001.exr")) - assert result == ["*.exr", "renders/frame_001.exr"] + def test_all_empty_returns_empty(self): + assert _normalize_filters(["", ""]) == [] diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py index 5ac204581..7cd2a1585 100644 --- a/test/unit/deadline_job_attachments/test_path_filtering.py +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -24,44 +24,48 @@ class TestMatchesAnyFilter: def test_exact_match(self): - assert _matches_any_filter("renders/frame_001.exr", ["renders/frame_001.exr"]) is True + assert _matches_any_filter("/root/renders/frame_001.exr", ["renders/frame_001.exr"]) is True def test_exact_no_match(self): - assert _matches_any_filter("renders/frame_002.exr", ["renders/frame_001.exr"]) is False + assert ( + _matches_any_filter("/root/renders/frame_002.exr", ["renders/frame_001.exr"]) is False + ) def test_directory_prefix_match(self): - assert _matches_any_filter("renders/frame_001.exr", ["renders/"]) is True + assert _matches_any_filter("/root/renders/frame_001.exr", ["/root/renders/"]) is True def test_directory_prefix_no_match(self): - assert _matches_any_filter("textures/wood.exr", ["renders/"]) is False + assert _matches_any_filter("/root/textures/wood.exr", ["/root/renders/"]) is False def test_directory_prefix_does_not_match_similar_names(self): """'renders/' should NOT match 'renders_v2/file.exr'""" - assert _matches_any_filter("renders_v2/file.exr", ["renders/"]) is False + assert _matches_any_filter("/root/renders_v2/file.exr", ["/root/renders/"]) is False def test_multiple_filters_or(self): assert ( - _matches_any_filter("textures/wood.exr", ["renders/frame_001.exr", "textures/"]) is True + _matches_any_filter( + "/root/textures/wood.exr", ["renders/frame_001.exr", "/root/textures/"] + ) + is True ) def test_empty_filters(self): - assert _matches_any_filter("renders/frame_001.exr", []) is False + assert _matches_any_filter("/root/renders/frame_001.exr", []) is False def test_nested_directory_prefix(self): - assert _matches_any_filter("a/b/c/file.txt", ["a/b/"]) is True + assert _matches_any_filter("/root/a/b/c/file.txt", ["/root/a/b/"]) is True def test_glob_wildcard(self): - assert _matches_any_filter("renders/frame_001.exr", ["renders/*.exr"]) is True + assert _matches_any_filter("/root/renders/frame_001.exr", ["renders/*.exr"]) is True def test_glob_wildcard_no_match(self): - assert _matches_any_filter("renders/frame_001.png", ["renders/*.exr"]) is False + assert _matches_any_filter("/root/renders/frame_001.png", ["renders/*.exr"]) is False def test_glob_question_mark(self): - assert _matches_any_filter("renders/frame_00?.exr", ["renders/frame_00?.exr"]) is True + assert _matches_any_filter("/root/renders/frame_00x.exr", ["renders/frame_00?.exr"]) is True def test_glob_recursive(self): - """fnmatch does not support ** recursion — it matches as a literal wildcard.""" - assert _matches_any_filter("a/b/c.txt", ["a/*/c.txt"]) is True + assert _matches_any_filter("/root/a/b/c.txt", ["*/a/*/c.txt"]) is True def test_glob_full_path_wildcard(self): """Patterns like '*/renders/*.png' should match against full paths.""" @@ -72,7 +76,7 @@ def test_glob_extension_only(self): assert _matches_any_filter("/root/renders/frame.png", ["*.png"]) is True def test_relative_path_suffix_match(self): - """Plain relative paths match as a suffix of the full path.""" + """Relative paths are auto-prepended with */ and matched via fnmatch.""" assert _matches_any_filter("/home/user/renders/frame.exr", ["renders/frame.exr"]) is True def test_relative_path_no_partial_match(self): @@ -80,12 +84,19 @@ def test_relative_path_no_partial_match(self): assert _matches_any_filter("/home/user/xrenders/frame.exr", ["renders/frame.exr"]) is False def test_relative_path_exact_file(self): - """Single filename matches as suffix.""" + """Single filename matches anywhere under root.""" assert _matches_any_filter("/root/renders/frame.exr", ["frame.exr"]) is True def test_relative_path_no_match(self): assert _matches_any_filter("/root/renders/frame.exr", ["other.exr"]) is False + def test_relative_glob(self): + """Relative globs like 'renders/*.exr' match without needing a leading */.""" + assert _matches_any_filter("/home/user/renders/frame.exr", ["renders/*.exr"]) is True + + def test_relative_glob_no_match(self): + assert _matches_any_filter("/home/user/renders/frame.png", ["renders/*.exr"]) is False + class TestFullPath: def test_unix_root(self): From 6713c0ed2ec7a00d97f292b1e4f5820a0e6a1014 Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:40:01 -0700 Subject: [PATCH 12/14] refactor: Rename --submission-path to --match-paths-by enum Replace the --submission-path boolean flag with --match-paths-by that accepts SOURCE or WORKSTATION (default). This makes the option self-documenting about the two filtering modes. Also clean up comments to reference "source" instead of "S3". Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- src/deadline/client/cli/_groups/job_group.py | 29 ++++++++++--------- .../cli/test_cli_attachment_e2e.py | 22 ++++++++------ 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 5dd87b364..acf28aad0 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -502,7 +502,7 @@ def _download_job_output( is_json_format: bool = False, ignore_storage_profiles: bool = False, include_patterns: Optional[list[str]] = None, - submission_path: bool = False, + match_paths_by: str = "WORKSTATION", ): """ Starts the download of job output and handles the progress reporting callback. @@ -552,7 +552,7 @@ def _download_job_output( for manifest in job_attachments_manifests: root_path_format_mapping[manifest["rootPath"]] = manifest["rootPathFormat"] - # When --submission-path is set, filter against the submission paths. + # When --match-paths-by SOURCE is set, filter against the source paths. # Otherwise, filtering happens later against workstation paths. job_output_downloader = OutputDownloader( s3_settings=JobAttachmentS3Settings(**queue["jobAttachmentSettings"]), @@ -563,7 +563,7 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, - include_filters=include_patterns if submission_path else None, + include_filters=include_patterns if match_paths_by == "SOURCE" else None, ) def _check_and_warn_long_output_paths( @@ -610,12 +610,12 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) - if include_patterns and submission_path: + if include_patterns and match_paths_by == "SOURCE": manifests_by_root = _filter_manifests(manifests_by_root, include_patterns) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) - if include_patterns and not submission_path: + if include_patterns and match_paths_by != "SOURCE": mapped_manifests = _filter_manifests(mapped_manifests, include_patterns) if mapped_manifests: download_summary = _download_mapped_manifests( @@ -725,8 +725,8 @@ def _check_and_warn_long_output_paths( _check_and_warn_long_output_paths(output_paths_by_root) # Apply include filters against workstation paths (default behavior). - # When --submission-path is set, filtering was already applied at the S3/submission level. - if include_patterns and not submission_path: + # When --match-paths-by SOURCE is set, filtering was already applied at the source level. + if include_patterns and match_paths_by != "SOURCE": job_output_downloader.apply_include_filters(include_patterns) output_paths_by_root = job_output_downloader.get_output_paths_by_root() if output_paths_by_root == {}: @@ -995,11 +995,12 @@ def _assert_valid_path(path: str) -> None: "files under that directory. Repeatable", ) @click.option( - "--submission-path", - is_flag=True, - default=False, - help="Match include filters against the original submission paths instead of " - "the local workstation paths. By default, filters match against workstation paths.", + "--match-paths-by", + type=click.Choice(["SOURCE", "WORKSTATION"], case_sensitive=False), + default="WORKSTATION", + help="Control which paths --include filters are matched against. " + "SOURCE matches against the original paths from the submitting machine. " + "WORKSTATION matches against the local download paths (the default).", ) @click.option( "--ignore-storage-profiles", @@ -1048,7 +1049,7 @@ def job_download_output( output, ignore_storage_profiles, include, - submission_path, + match_paths_by, **args, ): """ @@ -1084,7 +1085,7 @@ def job_download_output( is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, include_patterns=include_patterns, - submission_path=submission_path, + match_paths_by=match_paths_by, ) except Exception as e: if is_json_format: diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index 340785e63..c29519bb7 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -850,10 +850,10 @@ def test_cli_job_download_output_include_matches_full_workstation_path(deadline_ assert not (Path(asset_root) / "logs" / "render.log").exists() -def test_cli_job_download_output_submission_path_flag(deadline_setup, tmp_path): +def test_cli_job_download_output_match_paths_by_source_flag(deadline_setup, tmp_path): """ - --submission-path causes --include to filter against the original submission - paths (asset roots from S3) rather than the workstation paths. + --match-paths-by SOURCE causes --include to filter against the original source + paths rather than the workstation paths. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) @@ -870,7 +870,7 @@ def test_cli_job_download_output_submission_path_flag(deadline_setup, tmp_path): backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files ) - # Pattern uses the submission root path (same as asset_root in this case) + # Pattern uses the source root path (same as asset_root in this case) r = _run( env, "job", @@ -879,7 +879,8 @@ def test_cli_job_download_output_submission_path_flag(deadline_setup, tmp_path): job_id, "--include", "*subpath_outputs/renders/*", - "--submission-path", + "--match-paths-by", + "SOURCE", "--conflict-resolution", "OVERWRITE", "--yes", @@ -930,10 +931,12 @@ def test_cli_job_download_output_relative_path_filter(deadline_setup, tmp_path): assert not (Path(asset_root) / "logs" / "render.log").exists() -def test_cli_job_download_output_relative_paths_with_submission_path(deadline_setup, tmp_path): +def test_cli_job_download_output_relative_paths_with_match_paths_by_source( + deadline_setup, tmp_path +): """ - --include with relative paths and --submission-path filters against - submission paths. This is the DCM integration path. + --include with relative paths and --match-paths-by SOURCE filters against + source paths. This is the DCM integration path. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) @@ -961,7 +964,8 @@ def test_cli_job_download_output_relative_paths_with_submission_path(deadline_se "renders/frame_001.exr", "--include", "logs/render.log", - "--submission-path", + "--match-paths-by", + "SOURCE", "--conflict-resolution", "OVERWRITE", "--yes", From 01151ae786da3970feb8f7889a6730d56286c491 Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Fri, 24 Apr 2026 16:06:29 -0700 Subject: [PATCH 13/14] refactor: Rename --match-paths-by values to JOB and LOCAL Rename SOURCE to JOB and WORKSTATION to LOCAL for clarity: - JOB matches against the paths recorded at job submission - LOCAL matches against the local download paths (default) Also update help text to be more user-friendly. Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- src/deadline/client/cli/_groups/job_group.py | 22 +++++++++---------- .../cli/test_cli_attachment_e2e.py | 18 +++++++-------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index acf28aad0..18c07bf80 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -502,7 +502,7 @@ def _download_job_output( is_json_format: bool = False, ignore_storage_profiles: bool = False, include_patterns: Optional[list[str]] = None, - match_paths_by: str = "WORKSTATION", + match_paths_by: str = "LOCAL", ): """ Starts the download of job output and handles the progress reporting callback. @@ -552,7 +552,7 @@ def _download_job_output( for manifest in job_attachments_manifests: root_path_format_mapping[manifest["rootPath"]] = manifest["rootPathFormat"] - # When --match-paths-by SOURCE is set, filter against the source paths. + # When --match-paths-by JOB is set, filter against the job paths. # Otherwise, filtering happens later against workstation paths. job_output_downloader = OutputDownloader( s3_settings=JobAttachmentS3Settings(**queue["jobAttachmentSettings"]), @@ -563,7 +563,7 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, - include_filters=include_patterns if match_paths_by == "SOURCE" else None, + include_filters=include_patterns if match_paths_by == "JOB" else None, ) def _check_and_warn_long_output_paths( @@ -610,12 +610,12 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) - if include_patterns and match_paths_by == "SOURCE": + if include_patterns and match_paths_by == "JOB": manifests_by_root = _filter_manifests(manifests_by_root, include_patterns) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) - if include_patterns and match_paths_by != "SOURCE": + if include_patterns and match_paths_by != "JOB": mapped_manifests = _filter_manifests(mapped_manifests, include_patterns) if mapped_manifests: download_summary = _download_mapped_manifests( @@ -725,8 +725,8 @@ def _check_and_warn_long_output_paths( _check_and_warn_long_output_paths(output_paths_by_root) # Apply include filters against workstation paths (default behavior). - # When --match-paths-by SOURCE is set, filtering was already applied at the source level. - if include_patterns and match_paths_by != "SOURCE": + # When --match-paths-by JOB is set, filtering was already applied at the job level. + if include_patterns and match_paths_by != "JOB": job_output_downloader.apply_include_filters(include_patterns) output_paths_by_root = job_output_downloader.get_output_paths_by_root() if output_paths_by_root == {}: @@ -996,11 +996,11 @@ def _assert_valid_path(path: str) -> None: ) @click.option( "--match-paths-by", - type=click.Choice(["SOURCE", "WORKSTATION"], case_sensitive=False), - default="WORKSTATION", + type=click.Choice(["JOB", "LOCAL"], case_sensitive=False), + default="LOCAL", help="Control which paths --include filters are matched against. " - "SOURCE matches against the original paths from the submitting machine. " - "WORKSTATION matches against the local download paths (the default).", + "JOB matches against the paths recorded at job submission. " + "LOCAL matches against the local download paths (the default).", ) @click.option( "--ignore-storage-profiles", diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index c29519bb7..3acc521bb 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -850,9 +850,9 @@ def test_cli_job_download_output_include_matches_full_workstation_path(deadline_ assert not (Path(asset_root) / "logs" / "render.log").exists() -def test_cli_job_download_output_match_paths_by_source_flag(deadline_setup, tmp_path): +def test_cli_job_download_output_match_paths_by_job_flag(deadline_setup, tmp_path): """ - --match-paths-by SOURCE causes --include to filter against the original source + --match-paths-by JOB causes --include to filter against the original source paths rather than the workstation paths. """ backend, farm_id, queue_id, env = deadline_setup @@ -870,7 +870,7 @@ def test_cli_job_download_output_match_paths_by_source_flag(deadline_setup, tmp_ backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files ) - # Pattern uses the source root path (same as asset_root in this case) + # Pattern uses the job root path (same as asset_root in this case) r = _run( env, "job", @@ -880,7 +880,7 @@ def test_cli_job_download_output_match_paths_by_source_flag(deadline_setup, tmp_ "--include", "*subpath_outputs/renders/*", "--match-paths-by", - "SOURCE", + "JOB", "--conflict-resolution", "OVERWRITE", "--yes", @@ -931,12 +931,10 @@ def test_cli_job_download_output_relative_path_filter(deadline_setup, tmp_path): assert not (Path(asset_root) / "logs" / "render.log").exists() -def test_cli_job_download_output_relative_paths_with_match_paths_by_source( - deadline_setup, tmp_path -): +def test_cli_job_download_output_relative_paths_with_match_paths_by_job(deadline_setup, tmp_path): """ - --include with relative paths and --match-paths-by SOURCE filters against - source paths. This is the DCM integration path. + --include with relative paths and --match-paths-by JOB filters against + job paths. This is the DCM integration path. """ backend, farm_id, queue_id, env = deadline_setup _configure_defaults(env, farm_id, queue_id) @@ -965,7 +963,7 @@ def test_cli_job_download_output_relative_paths_with_match_paths_by_source( "--include", "logs/render.log", "--match-paths-by", - "SOURCE", + "JOB", "--conflict-resolution", "OVERWRITE", "--yes", From a361c5e660c2bfd07b34befb0a6cffea8c1800cd Mon Sep 17 00:00:00 2001 From: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> Date: Fri, 24 Apr 2026 16:56:02 -0700 Subject: [PATCH 14/14] refactor: Address review feedback on filtering and path handling - Add MatchPathsBy enum (JOB/LOCAL) replacing raw strings - Use posixpath.join in _full_path for consistency with _transform_manifests_to_absolute_paths - Use fnmatch for directory prefix filters instead of startswith - Handle Windows drive letter paths as absolute in _matches_any_filter - Move fnmatch import to top of download.py - Add Windows path separator tests for _matches_any_filter Signed-off-by: Nathy MacKinlay <61921733+waninggibbon@users.noreply.github.com> --- .../cli/_groups/_job_download_helpers.py | 8 ++++++ src/deadline/client/cli/_groups/job_group.py | 13 +++++----- src/deadline/job_attachments/download.py | 24 ++++++++++------- .../test_path_filtering.py | 26 +++++++++++++++++++ 4 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 85506bc04..6b9e790b8 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -15,6 +15,7 @@ import posixpath from configparser import ConfigParser from dataclasses import dataclass +from enum import Enum from typing import Any, Optional import click @@ -42,6 +43,13 @@ JSON_MSG_TYPE_PROGRESS = "progress" +class MatchPathsBy(str, Enum): + """Which paths --include filters are matched against.""" + + JOB = "JOB" + LOCAL = "LOCAL" + + @dataclass class ResolvedStorageProfiles: """The result of resolving storage profiles for a download operation.""" diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 18c07bf80..e2103008b 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -59,6 +59,7 @@ ) from ._job_download_helpers import ( JSON_MSG_TYPE_PROGRESS, + MatchPathsBy, _download_mapped_manifests, _normalize_filters, _resolve_conflict_resolution, @@ -502,7 +503,7 @@ def _download_job_output( is_json_format: bool = False, ignore_storage_profiles: bool = False, include_patterns: Optional[list[str]] = None, - match_paths_by: str = "LOCAL", + match_paths_by: MatchPathsBy = MatchPathsBy.LOCAL, ): """ Starts the download of job output and handles the progress reporting callback. @@ -563,7 +564,7 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, - include_filters=include_patterns if match_paths_by == "JOB" else None, + include_filters=include_patterns if match_paths_by == MatchPathsBy.JOB else None, ) def _check_and_warn_long_output_paths( @@ -610,12 +611,12 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) - if include_patterns and match_paths_by == "JOB": + if include_patterns and match_paths_by == MatchPathsBy.JOB: manifests_by_root = _filter_manifests(manifests_by_root, include_patterns) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) - if include_patterns and match_paths_by != "JOB": + if include_patterns and match_paths_by != MatchPathsBy.JOB: mapped_manifests = _filter_manifests(mapped_manifests, include_patterns) if mapped_manifests: download_summary = _download_mapped_manifests( @@ -726,7 +727,7 @@ def _check_and_warn_long_output_paths( # Apply include filters against workstation paths (default behavior). # When --match-paths-by JOB is set, filtering was already applied at the job level. - if include_patterns and match_paths_by != "JOB": + if include_patterns and match_paths_by != MatchPathsBy.JOB: job_output_downloader.apply_include_filters(include_patterns) output_paths_by_root = job_output_downloader.get_output_paths_by_root() if output_paths_by_root == {}: @@ -1085,7 +1086,7 @@ def job_download_output( is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, include_patterns=include_patterns, - match_paths_by=match_paths_by, + match_paths_by=MatchPathsBy(match_paths_by), ) except Exception as e: if is_json_format: diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index 3dbd5f6d3..8e95be22c 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -7,10 +7,12 @@ import concurrent.futures import json import os +import posixpath import re import time from collections import defaultdict from datetime import datetime +from fnmatch import fnmatch from itertools import chain from logging import Logger, LoggerAdapter, getLogger from pathlib import Path @@ -1245,30 +1247,34 @@ def mount_vfs_from_manifests( def _full_path(root: str, relative: str) -> str: - """Join root and relative path using forward slashes for consistent matching.""" - normalized_root = root.replace("\\", "/") - if normalized_root.endswith("/"): - return normalized_root + relative - return normalized_root + "/" + relative + """Join root and relative path, normalizing to forward slashes for consistent matching. + + Uses posixpath.join for consistency with _transform_manifests_to_absolute_paths + in _job_download_helpers.py, which joins roots and manifest paths the same way. + """ + return posixpath.join(root.replace("\\", "/"), relative) def _matches_any_filter(file_path: str, filters: list[str]) -> bool: """ Check if a file path matches any of the given filters using glob-style matching. Uses fnmatch for pattern matching (supports *, ?, [seq], [!seq]). - A filter ending with '/' is treated as a directory prefix (matches all files under it). + A filter ending with '/' matches all files under that directory. Relative filters (not starting with '/' or '*') are auto-prepended with '*/' so they match anywhere under the root — e.g. 'renders/*.exr' matches '*/renders/*.exr'. The file_path should be the full path (root + relative). """ - from fnmatch import fnmatch + + def _is_absolute(p: str) -> bool: + return p.startswith(("/", "*")) or (len(p) >= 2 and p[1] == ":") for f in filters: if f.endswith("/"): - if file_path.startswith(f): + pattern = f + "*" if _is_absolute(f) else "*/" + f + "*" + if fnmatch(file_path, pattern): return True else: - pattern = f if f.startswith(("/", "*")) else "*/" + f + pattern = f if _is_absolute(f) else "*/" + f if fnmatch(file_path, pattern): return True return False diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py index 7cd2a1585..104c47223 100644 --- a/test/unit/deadline_job_attachments/test_path_filtering.py +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -97,6 +97,32 @@ def test_relative_glob(self): def test_relative_glob_no_match(self): assert _matches_any_filter("/home/user/renders/frame.png", ["renders/*.exr"]) is False + def test_windows_full_path_with_glob(self): + """Windows full paths (normalized) match glob patterns.""" + assert ( + _matches_any_filter("C:/Users/artist/project/renders/frame.exr", ["*/renders/*.exr"]) + is True + ) + + def test_windows_full_path_with_relative_filter(self): + """Relative filters match against normalized Windows full paths.""" + assert ( + _matches_any_filter("C:/Users/artist/project/renders/frame.exr", ["renders/frame.exr"]) + is True + ) + + def test_windows_full_path_with_directory_filter(self): + """Directory filters match against normalized Windows full paths.""" + assert ( + _matches_any_filter( + "C:/Users/artist/project/renders/frame.exr", ["C:/Users/artist/project/renders/"] + ) + is True + ) + + def test_windows_full_path_extension_glob(self): + assert _matches_any_filter("C:/Users/artist/project/frame.exr", ["*.exr"]) is True + class TestFullPath: def test_unix_root(self):