diff --git a/docs/job_attachments_guide.md b/docs/job_attachments_guide.md index 8f8af8951..630f8f93f 100644 --- a/docs/job_attachments_guide.md +++ b/docs/job_attachments_guide.md @@ -4,7 +4,7 @@ Job attachments uses your configured S3 bucket as a [content-addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage), which creates a snapshot of the files used in your job submission in [asset manifests](#asset-manifests), only uploading files that aren't already in S3. This saves you time and bandwidth when iterating on jobs. When an [AWS Deadline Cloud worker agent][worker-agent] starts working on a job with job attachments, it recreates the file system snapshot in the worker agent session directory, and uploads any outputs back to your S3 bucket. -You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. +You can then easily download your outputs with the [deadline job download-output] command, or using the [protocol handler](#protocol-handler) to download from a click of a button in the [AWS Deadline Cloud monitor][monitor]. The command supports `--include` glob patterns and relative paths for downloading specific files or directories. Job attachments also works as an auxiliary storage when used with [AWS Deadline Cloud storage profiles][shared-storage], allowing you to flexibly upload files to your Amazon S3 bucket that aren't on your configured shared storage. diff --git a/src/deadline/client/cli/_groups/_job_download_helpers.py b/src/deadline/client/cli/_groups/_job_download_helpers.py index 62ca31005..6b9e790b8 100644 --- a/src/deadline/client/cli/_groups/_job_download_helpers.py +++ b/src/deadline/client/cli/_groups/_job_download_helpers.py @@ -15,6 +15,7 @@ import posixpath from configparser import ConfigParser from dataclasses import dataclass +from enum import Enum from typing import Any, Optional import click @@ -42,6 +43,13 @@ JSON_MSG_TYPE_PROGRESS = "progress" +class MatchPathsBy(str, Enum): + """Which paths --include filters are matched against.""" + + JOB = "JOB" + LOCAL = "LOCAL" + + @dataclass class ResolvedStorageProfiles: """The result of resolving storage profiles for a download operation.""" @@ -305,3 +313,22 @@ def _on_progress_json(download_metadata: ProgressReportMetadata) -> bool: return True return _do_download(on_downloading_files=_on_progress_json) + + +def _normalize_filters(filters: list[str]) -> list[str]: + """ + Normalizes path filter patterns. + - Converts backslashes to forward slashes (Windows compatibility) + - Strips leading './' + - Normalizes '//' to '/' + """ + normalized = [] + for f in filters: + f = f.replace("\\", "/") + if f.startswith("./"): + f = f[2:] + while "//" in f: + f = f.replace("//", "/") + if f: + normalized.append(f) + return normalized diff --git a/src/deadline/client/cli/_groups/job_group.py b/src/deadline/client/cli/_groups/job_group.py index 9db533091..e2103008b 100644 --- a/src/deadline/client/cli/_groups/job_group.py +++ b/src/deadline/client/cli/_groups/job_group.py @@ -59,7 +59,9 @@ ) from ._job_download_helpers import ( JSON_MSG_TYPE_PROGRESS, + MatchPathsBy, _download_mapped_manifests, + _normalize_filters, _resolve_conflict_resolution, _resolve_storage_profiles, _transform_manifests_to_absolute_paths, @@ -67,6 +69,7 @@ from ....job_attachments._path_mapping import _generate_path_mapping_rules from ....job_attachments.download import ( OutputDownloader, + _filter_manifests, get_output_manifests_by_asset_root, ) @@ -499,6 +502,8 @@ def _download_job_output( task_id: Optional[str], is_json_format: bool = False, ignore_storage_profiles: bool = False, + include_patterns: Optional[list[str]] = None, + match_paths_by: MatchPathsBy = MatchPathsBy.LOCAL, ): """ Starts the download of job output and handles the progress reporting callback. @@ -548,6 +553,8 @@ def _download_job_output( for manifest in job_attachments_manifests: root_path_format_mapping[manifest["rootPath"]] = manifest["rootPathFormat"] + # When --match-paths-by JOB is set, filter against the job paths. + # Otherwise, filtering happens later against workstation paths. job_output_downloader = OutputDownloader( s3_settings=JobAttachmentS3Settings(**queue["jobAttachmentSettings"]), farm_id=farm_id, @@ -557,6 +564,7 @@ def _download_job_output( task_id=task_id, session_action_id=session_action_id, session=queue_role_session, + include_filters=include_patterns if match_paths_by == MatchPathsBy.JOB else None, ) def _check_and_warn_long_output_paths( @@ -603,9 +611,13 @@ def _check_and_warn_long_output_paths( session_action_id=session_action_id, session=queue_role_session, ) + if include_patterns and match_paths_by == MatchPathsBy.JOB: + manifests_by_root = _filter_manifests(manifests_by_root, include_patterns) mapped_manifests = _transform_manifests_to_absolute_paths( manifests_by_root, rules, resolved.job_profile.osFamily ) + if include_patterns and match_paths_by != MatchPathsBy.JOB: + mapped_manifests = _filter_manifests(mapped_manifests, include_patterns) if mapped_manifests: download_summary = _download_mapped_manifests( mapped_manifests=mapped_manifests, @@ -713,6 +725,15 @@ def _check_and_warn_long_output_paths( output_paths_by_root = job_output_downloader.get_output_paths_by_root() _check_and_warn_long_output_paths(output_paths_by_root) + # Apply include filters against workstation paths (default behavior). + # When --match-paths-by JOB is set, filtering was already applied at the job level. + if include_patterns and match_paths_by != MatchPathsBy.JOB: + job_output_downloader.apply_include_filters(include_patterns) + output_paths_by_root = job_output_downloader.get_output_paths_by_root() + if output_paths_by_root == {}: + click.echo(_get_no_output_message(is_json_format)) + return + if not is_json_format: # Create and print a summary of all the paths to download all_output_paths: set[str] = set() @@ -966,6 +987,22 @@ def _assert_valid_path(path: str) -> None: @click.option("--job-id", help="The job to use.") @click.option("--step-id", help="The step to use.") @click.option("--task-id", help="The task to use.") +@click.option( + "-i", + "--include", + multiple=True, + help="Glob pattern or relative path for files to include in download. Matched against " + "the full path (root + relative). Supports *, ?, [seq]. A trailing / matches all " + "files under that directory. Repeatable", +) +@click.option( + "--match-paths-by", + type=click.Choice(["JOB", "LOCAL"], case_sensitive=False), + default="LOCAL", + help="Control which paths --include filters are matched against. " + "JOB matches against the paths recorded at job submission. " + "LOCAL matches against the local download paths (the default).", +) @click.option( "--ignore-storage-profiles", is_flag=True, @@ -1007,7 +1044,15 @@ def _assert_valid_path(path: str) -> None: "parsed/consumed by custom scripts.", ) @_handle_error -def job_download_output(step_id, task_id, output, ignore_storage_profiles, **args): +def job_download_output( + step_id, + task_id, + output, + ignore_storage_profiles, + include, + match_paths_by, + **args, +): """ Download the output of a Deadline Cloud job that was saved as job attachments. @@ -1018,6 +1063,8 @@ def job_download_output(step_id, task_id, output, ignore_storage_profiles, **arg if task_id and not step_id: raise click.UsageError("Missing option '--step-id' required with '--task-id'") + include_patterns = _normalize_filters(list(include)) or None + # Get a temporary config object with the standard options handled config = _apply_cli_options_to_config( required_options={"farm_id", "queue_id", "job_id"}, **args @@ -1038,6 +1085,8 @@ def job_download_output(step_id, task_id, output, ignore_storage_profiles, **arg task_id=task_id, is_json_format=is_json_format, ignore_storage_profiles=ignore_storage_profiles, + include_patterns=include_patterns, + match_paths_by=MatchPathsBy(match_paths_by), ) except Exception as e: if is_json_format: diff --git a/src/deadline/job_attachments/download.py b/src/deadline/job_attachments/download.py index 7e33b93a6..8e95be22c 100644 --- a/src/deadline/job_attachments/download.py +++ b/src/deadline/job_attachments/download.py @@ -7,10 +7,12 @@ import concurrent.futures import json import os +import posixpath import re import time from collections import defaultdict from datetime import datetime +from fnmatch import fnmatch from itertools import chain from logging import Logger, LoggerAdapter, getLogger from pathlib import Path @@ -73,7 +75,6 @@ from ._utils import ( _get_long_path_compatible_path, _is_relative_to, - _join_s3_paths, ) from threading import Lock @@ -325,7 +326,9 @@ def get_job_input_paths_by_asset_root( for manifest_properties in attachments.manifests: if manifest_properties.inputManifestPath: - key = _join_s3_paths(manifest_properties.inputManifestPath) + key = s3_settings.add_root_and_manifest_folder_prefix( + manifest_properties.inputManifestPath + ) _, asset_manifest = get_asset_root_and_manifest_from_s3( manifest_key=key, s3_bucket=s3_settings.s3BucketName, @@ -1243,6 +1246,94 @@ def mount_vfs_from_manifests( vfs_manager.start(session_dir=session_dir) +def _full_path(root: str, relative: str) -> str: + """Join root and relative path, normalizing to forward slashes for consistent matching. + + Uses posixpath.join for consistency with _transform_manifests_to_absolute_paths + in _job_download_helpers.py, which joins roots and manifest paths the same way. + """ + return posixpath.join(root.replace("\\", "/"), relative) + + +def _matches_any_filter(file_path: str, filters: list[str]) -> bool: + """ + Check if a file path matches any of the given filters using glob-style matching. + Uses fnmatch for pattern matching (supports *, ?, [seq], [!seq]). + A filter ending with '/' matches all files under that directory. + Relative filters (not starting with '/' or '*') are auto-prepended with '*/' so they + match anywhere under the root — e.g. 'renders/*.exr' matches '*/renders/*.exr'. + The file_path should be the full path (root + relative). + """ + + def _is_absolute(p: str) -> bool: + return p.startswith(("/", "*")) or (len(p) >= 2 and p[1] == ":") + + for f in filters: + if f.endswith("/"): + pattern = f + "*" if _is_absolute(f) else "*/" + f + "*" + if fnmatch(file_path, pattern): + return True + else: + pattern = f if _is_absolute(f) else "*/" + f + if fnmatch(file_path, pattern): + return True + return False + + +def _filter_paths( + paths_by_root: dict[str, ManifestPathGroup], + path_filters: list[str], +) -> dict[str, ManifestPathGroup]: + """ + Filter ManifestPathGroups using glob-style include patterns. + Filters are matched against the full path (root + relative) to support + patterns like '*/renders/*.png'. + """ + filtered: dict[str, ManifestPathGroup] = {} + for root, group in paths_by_root.items(): + filtered_group = ManifestPathGroup() + for hash_alg, file_list in group.files_by_hash_alg.items(): + matching = [ + f for f in file_list if _matches_any_filter(_full_path(root, f.path), path_filters) + ] + if matching: + if hash_alg not in filtered_group.files_by_hash_alg: + filtered_group.files_by_hash_alg[hash_alg] = matching + else: + filtered_group.files_by_hash_alg[hash_alg].extend(matching) + filtered_group.total_bytes += sum(f.size for f in matching) + if filtered_group.files_by_hash_alg: + filtered[root] = filtered_group + return filtered + + +def _filter_manifests( + manifests_by_root: dict[str, list[BaseAssetManifest]], + path_filters: list[str], +) -> dict[str, list[BaseAssetManifest]]: + """ + Filter BaseAssetManifest objects using glob-style include patterns. + Filters are matched against the full path (root + relative) to support + patterns like '*/renders/*.png'. + Returns a new dict with manifests whose paths have been filtered; empty manifests are removed. + """ + filtered: dict[str, list[BaseAssetManifest]] = {} + for root, manifest_list in manifests_by_root.items(): + filtered_manifests = [] + for manifest in manifest_list: + matching = [ + p + for p in manifest.paths + if _matches_any_filter(_full_path(root, p.path), path_filters) + ] + if matching: + manifest.paths = matching + filtered_manifests.append(manifest) + if filtered_manifests: + filtered[root] = filtered_manifests + return filtered + + def _ensure_paths_within_directory(root_path: str, paths_relative_to_root: list[str]) -> None: """ Validates the given paths to ensure that they are within the given root path. @@ -1281,6 +1372,7 @@ def __init__( task_id: Optional[str] = None, session_action_id: Optional[str] = None, session: Optional[boto3.Session] = None, + include_filters: Optional[list[str]] = None, ) -> None: self.s3_settings = s3_settings self.session = session @@ -1294,6 +1386,8 @@ def __init__( session_action_id=session_action_id, session=session, ) + if include_filters: + self.outputs_by_root = _filter_paths(self.outputs_by_root, include_filters) def get_output_paths_by_root(self) -> dict[str, list[str]]: """ @@ -1305,6 +1399,10 @@ def get_output_paths_by_root(self) -> dict[str, list[str]]: output_paths_by_root[root] = path_group.get_all_paths() return output_paths_by_root + def apply_include_filters(self, include_filters: list[str]) -> None: + """Apply glob-style include filters against the current workstation paths.""" + self.outputs_by_root = _filter_paths(self.outputs_by_root, include_filters) + def set_root_path(self, original_root: str, new_root: str) -> None: """ Changes the root path for downloading output files, (which is the root path diff --git a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py index a29241824..3acc521bb 100644 --- a/test/unit/deadline_client/cli/test_cli_attachment_e2e.py +++ b/test/unit/deadline_client/cli/test_cli_attachment_e2e.py @@ -195,6 +195,67 @@ def _s3_client(endpoint_url: str): ) +# ---- helpers ----------------------------------------------------------------- + + +def _seed_output_job( + backend: MockDeadlineBackend, + s3_endpoint: str, + farm_id: str, + queue_id: str, + job_id: str, + asset_root: str, + files: dict[str, bytes], + step_id: str = "step-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa0", + task_id: str = "task-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa0-0", +) -> None: + """Seed S3 with CAS objects + output manifest and register the job in the mock backend.""" + s3 = _s3_client(s3_endpoint) + manifest_paths = [] + for rel_path, content in files.items(): + file_hash = hash_data(content, HashAlgorithm.XXH128) + s3.put_object(Bucket=BUCKET, Key=f"{ROOT_PREFIX}/Data/{file_hash}.xxh128", Body=content) + manifest_paths.append( + {"hash": file_hash, "mtime": 1234000000, "path": rel_path, "size": len(content)} + ) + + manifest_body = json.dumps( + { + "hashAlg": "xxh128", + "manifestVersion": "2023-03-03", + "paths": manifest_paths, + "totalSize": sum(len(c) for c in files.values()), + } + ).encode() + manifest_key = ( + f"{ROOT_PREFIX}/Manifests/{farm_id}/{queue_id}/{job_id}/{step_id}/{task_id}/" + f"sessionaction-0/outputmanifestv2023-03-03_output" + ) + s3.put_object( + Bucket=BUCKET, Key=manifest_key, Body=manifest_body, Metadata={"asset-root": asset_root} + ) + + backend.jobs[(farm_id, queue_id, job_id)] = { + "jobId": job_id, + "name": f"test-job-{job_id[-4:]}", + "lifecycleStatus": "CREATE_COMPLETE", + "lifecycleStatusMessage": "", + "priority": 50, + "createdAt": backend._now(), + "createdBy": "tester", + "taskRunStatus": "READY", + "attachments": { + "manifests": [ + { + "rootPath": asset_root, + "rootPathFormat": "windows" if os.name == "nt" else "posix", + } + ], + "fileSystem": "COPIED", + }, + } + + # ---- tests ------------------------------------------------------------------ @@ -597,6 +658,362 @@ def test_cli_job_download_output(deadline_setup, tmp_path): assert (Path(asset_root) / "result.txt").read_text() == "rendered-output" +def test_cli_job_download_output_include_path(deadline_setup, tmp_path): + """ + `deadline job download-output --include` with a glob pattern + downloads only files matching the pattern against the full path. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1" + asset_root = str(tmp_path / "filtered_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "*/renders/*", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "renders" / "frame_002.exr").read_bytes() == b"frame-two" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_include_path_exact_file(deadline_setup, tmp_path): + """ + `deadline job download-output --include` with an exact file glob + downloads only that single file. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa2" + asset_root = str(tmp_path / "exact_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "*/frame_001.exr", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "renders" / "frame_002.exr").exists() + + +def test_cli_job_download_output_include_path_multiple(deadline_setup, tmp_path): + """ + Multiple --include values are OR'd: files matching any filter are downloaded. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa3" + asset_root = str(tmp_path / "multi_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "logs/render.log": b"log-data", + "scripts/setup.mel": b"mel-script", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "*/frame_001.exr", + "--include", + "*/scripts/*", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "scripts" / "setup.mel").read_bytes() == b"mel-script" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_include_path_no_match(deadline_setup, tmp_path): + """ + --include-path with a filter that matches nothing reports no output files. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4" + asset_root = str(tmp_path / "nomatch_outputs") + Path(asset_root).mkdir() + + files = {"renders/frame_001.exr": b"frame-one"} + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "nonexistent.txt", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + assert "no output files available" in r.stdout.lower() + + assert not (Path(asset_root) / "renders" / "frame_001.exr").exists() + + +def test_cli_job_download_output_include_matches_full_workstation_path(deadline_setup, tmp_path): + """ + --include patterns match against the full workstation path (root + relative) + by default, so a pattern containing part of the root directory name works. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa5" + asset_root = str(tmp_path / "fullpath_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + # Use a pattern that includes the workstation root directory name + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "*fullpath_outputs/renders/*", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_match_paths_by_job_flag(deadline_setup, tmp_path): + """ + --match-paths-by JOB causes --include to filter against the original source + paths rather than the workstation paths. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa6" + asset_root = str(tmp_path / "subpath_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + # Pattern uses the job root path (same as asset_root in this case) + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "*subpath_outputs/renders/*", + "--match-paths-by", + "JOB", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_relative_path_filter(deadline_setup, tmp_path): + """ + --include with a plain relative path (no globs) matches as a suffix + against the full workstation path. This is the DCM use case. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaab01" + asset_root = str(tmp_path / "relpath_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "renders/frame_001.exr", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert not (Path(asset_root) / "renders" / "frame_002.exr").exists() + assert not (Path(asset_root) / "logs" / "render.log").exists() + + +def test_cli_job_download_output_relative_paths_with_match_paths_by_job(deadline_setup, tmp_path): + """ + --include with relative paths and --match-paths-by JOB filters against + job paths. This is the DCM integration path. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaab02" + asset_root = str(tmp_path / "relsubmit_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.exr": b"frame-two", + "logs/render.log": b"log-data", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "renders/frame_001.exr", + "--include", + "logs/render.log", + "--match-paths-by", + "JOB", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "logs" / "render.log").read_bytes() == b"log-data" + assert not (Path(asset_root) / "renders" / "frame_002.exr").exists() + + +def test_cli_job_download_output_glob_pattern(deadline_setup, tmp_path): + """ + --include with glob patterns (e.g. *.exr) filters using fnmatch against full paths. + """ + backend, farm_id, queue_id, env = deadline_setup + _configure_defaults(env, farm_id, queue_id) + + job_id = "job-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa7" + asset_root = str(tmp_path / "glob_outputs") + Path(asset_root).mkdir() + + files = { + "renders/frame_001.exr": b"frame-one", + "renders/frame_002.png": b"frame-two-png", + "renders/frame_003.exr": b"frame-three", + } + _seed_output_job( + backend, env["AWS_ENDPOINT_URL_S3"], farm_id, queue_id, job_id, asset_root, files + ) + + r = _run( + env, + "job", + "download-output", + "--job-id", + job_id, + "--include", + "*.exr", + "--conflict-resolution", + "OVERWRITE", + "--yes", + ) + assert r.returncode == 0, f"download-output failed: {r.stderr}\nstdout: {r.stdout}" + + assert (Path(asset_root) / "renders" / "frame_001.exr").read_bytes() == b"frame-one" + assert (Path(asset_root) / "renders" / "frame_003.exr").read_bytes() == b"frame-three" + assert not (Path(asset_root) / "renders" / "frame_002.png").exists() + + @pytest.mark.skipif( sys.version_info < (3, 9), reason="MockDeadlineBackend.create_job requires openjd-model, which requires Python >= 3.9", diff --git a/test/unit/deadline_client/cli/test_cli_handle_web_url.py b/test/unit/deadline_client/cli/test_cli_handle_web_url.py index 9c231fb9f..2bf4ebce8 100644 --- a/test/unit/deadline_client/cli/test_cli_handle_web_url.py +++ b/test/unit/deadline_client/cli/test_cli_handle_web_url.py @@ -306,6 +306,7 @@ def test_cli_handle_web_url_download_output_only_required_input(fresh_deadline_c task_id=None, session_action_id=None, session=ANY, + include_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -371,6 +372,7 @@ def test_cli_handle_web_url_download_output_with_optional_input(fresh_deadline_c task_id=MOCK_TASK_ID, session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, + include_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, diff --git a/test/unit/deadline_client/cli/test_cli_job.py b/test/unit/deadline_client/cli/test_cli_job.py index a1d08da29..3a514c627 100644 --- a/test/unit/deadline_client/cli/test_cli_job.py +++ b/test/unit/deadline_client/cli/test_cli_job.py @@ -377,6 +377,7 @@ def test_cli_job_download_output_stdout_with_only_required_input( task_id=None, session_action_id=None, session=ANY, + include_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -490,6 +491,7 @@ def test_cli_job_download_output_stdout_with_mismatching_path_format( task_id=None, session_action_id=None, session=ANY, + include_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -590,6 +592,7 @@ def test_cli_job_download_output_handles_unc_path_on_windows(fresh_deadline_conf task_id=None, session_action_id=None, session=ANY, + include_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -673,6 +676,7 @@ def test_cli_job_download_no_output_stdout(fresh_deadline_config, tmp_path: Path task_id=None, session_action_id=None, session=ANY, + include_filters=None, ) assert ( @@ -774,6 +778,7 @@ def test_cli_job_download_output_stdout_with_json_format( task_id=None, session_action_id=None, session=ANY, + include_filters=None, ) expected_json_title = {"messageType": "title", "value": "Mock Job"} @@ -1390,6 +1395,7 @@ def test_cli_job_download_output_handle_web_url_with_optional_input( task_id="task-2", session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, + include_filters=None, ) mock_download.assert_called_once_with( file_conflict_resolution=FileConflictResolution.CREATE_COPY, @@ -1476,6 +1482,7 @@ def test_cli_job_download_output_with_different_asset_root_path_format_than_job( task_id=None, session_action_id=None, session=ANY, + include_filters=None, ) path_separator = "/" if sys.platform != "win32" else "\\" @@ -1707,6 +1714,7 @@ def test_cli_job_download_output_with_session_action_id(fresh_deadline_config): task_id=MOCK_TASK_ID, session_action_id=MOCK_SESSION_ACTION_ID, session=ANY, + include_filters=None, ) diff --git a/test/unit/deadline_client/cli/test_cli_path_filters.py b/test/unit/deadline_client/cli/test_cli_path_filters.py new file mode 100644 index 000000000..db40b9245 --- /dev/null +++ b/test/unit/deadline_client/cli/test_cli_path_filters.py @@ -0,0 +1,43 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +"""Tests for _normalize_filters.""" + +from deadline.client.cli._groups._job_download_helpers import ( + _normalize_filters, +) + + +class TestNormalizeFilters: + def test_converts_backslashes(self): + result = _normalize_filters(["renders\\frame_001.exr"]) + assert result == ["renders/frame_001.exr"] + + def test_strips_leading_dot_slash(self): + result = _normalize_filters(["./renders/frame.exr"]) + assert result == ["renders/frame.exr"] + + def test_collapses_double_slashes(self): + result = _normalize_filters(["renders//frame.exr"]) + assert result == ["renders/frame.exr"] + + def test_empty_filter_removed(self): + result = _normalize_filters(["", "a.txt"]) + assert result == ["a.txt"] + + def test_passthrough_normal_paths(self): + result = _normalize_filters(["renders/frame_001.exr", "textures/", "scripts/setup.mel"]) + assert result == ["renders/frame_001.exr", "textures/", "scripts/setup.mel"] + + def test_passthrough_glob_patterns(self): + result = _normalize_filters(["renders/*.exr", "**/*.png", "textures/wood[0-9].jpg"]) + assert result == ["renders/*.exr", "**/*.png", "textures/wood[0-9].jpg"] + + def test_multiple_normalizations(self): + result = _normalize_filters([".\\renders\\\\frame.exr"]) + assert result == ["renders/frame.exr"] + + def test_empty_input_returns_empty(self): + assert _normalize_filters([]) == [] + + def test_all_empty_returns_empty(self): + assert _normalize_filters(["", ""]) == [] diff --git a/test/unit/deadline_job_attachments/conftest.py b/test/unit/deadline_job_attachments/conftest.py index 272e377c0..ce5e41270 100644 --- a/test/unit/deadline_job_attachments/conftest.py +++ b/test/unit/deadline_job_attachments/conftest.py @@ -112,7 +112,7 @@ def fixture_default_attachments(farm_id, queue_id): ManifestProperties( rootPath="/tmp", rootPathFormat=PathFormat.POSIX, - inputManifestPath=f"assetRoot/Manifests/{farm_id}/{queue_id}/Inputs/0000/manifest_input", + inputManifestPath=f"{farm_id}/{queue_id}/Inputs/0000/manifest_input", inputManifestHash="manifesthash", outputRelativeDirectories=["test/outputs"], ) diff --git a/test/unit/deadline_job_attachments/test_path_filtering.py b/test/unit/deadline_job_attachments/test_path_filtering.py new file mode 100644 index 000000000..104c47223 --- /dev/null +++ b/test/unit/deadline_job_attachments/test_path_filtering.py @@ -0,0 +1,295 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +"""Tests for path filtering in download.py""" + +from typing import List + +from deadline.job_attachments.download import ( + _matches_any_filter, + _full_path, + _filter_paths, + _filter_manifests, +) +from deadline.job_attachments.models import ManifestPathGroup +from deadline.job_attachments.asset_manifests.base_manifest import ( + BaseAssetManifest, + BaseManifestPath, +) +from deadline.job_attachments.asset_manifests.hash_algorithms import HashAlgorithm +from deadline.job_attachments.asset_manifests.v2023_03_03 import ( + AssetManifest as AssetManifestv2023_03_03, + ManifestPath as ManifestPathv2023_03_03, +) + + +class TestMatchesAnyFilter: + def test_exact_match(self): + assert _matches_any_filter("/root/renders/frame_001.exr", ["renders/frame_001.exr"]) is True + + def test_exact_no_match(self): + assert ( + _matches_any_filter("/root/renders/frame_002.exr", ["renders/frame_001.exr"]) is False + ) + + def test_directory_prefix_match(self): + assert _matches_any_filter("/root/renders/frame_001.exr", ["/root/renders/"]) is True + + def test_directory_prefix_no_match(self): + assert _matches_any_filter("/root/textures/wood.exr", ["/root/renders/"]) is False + + def test_directory_prefix_does_not_match_similar_names(self): + """'renders/' should NOT match 'renders_v2/file.exr'""" + assert _matches_any_filter("/root/renders_v2/file.exr", ["/root/renders/"]) is False + + def test_multiple_filters_or(self): + assert ( + _matches_any_filter( + "/root/textures/wood.exr", ["renders/frame_001.exr", "/root/textures/"] + ) + is True + ) + + def test_empty_filters(self): + assert _matches_any_filter("/root/renders/frame_001.exr", []) is False + + def test_nested_directory_prefix(self): + assert _matches_any_filter("/root/a/b/c/file.txt", ["/root/a/b/"]) is True + + def test_glob_wildcard(self): + assert _matches_any_filter("/root/renders/frame_001.exr", ["renders/*.exr"]) is True + + def test_glob_wildcard_no_match(self): + assert _matches_any_filter("/root/renders/frame_001.png", ["renders/*.exr"]) is False + + def test_glob_question_mark(self): + assert _matches_any_filter("/root/renders/frame_00x.exr", ["renders/frame_00?.exr"]) is True + + def test_glob_recursive(self): + assert _matches_any_filter("/root/a/b/c.txt", ["*/a/*/c.txt"]) is True + + def test_glob_full_path_wildcard(self): + """Patterns like '*/renders/*.png' should match against full paths.""" + assert _matches_any_filter("/home/user/renders/frame.png", ["*/renders/*.png"]) is True + + def test_glob_extension_only(self): + """Simple extension patterns like '*.png' should match full paths.""" + assert _matches_any_filter("/root/renders/frame.png", ["*.png"]) is True + + def test_relative_path_suffix_match(self): + """Relative paths are auto-prepended with */ and matched via fnmatch.""" + assert _matches_any_filter("/home/user/renders/frame.exr", ["renders/frame.exr"]) is True + + def test_relative_path_no_partial_match(self): + """Relative path must match complete path segments.""" + assert _matches_any_filter("/home/user/xrenders/frame.exr", ["renders/frame.exr"]) is False + + def test_relative_path_exact_file(self): + """Single filename matches anywhere under root.""" + assert _matches_any_filter("/root/renders/frame.exr", ["frame.exr"]) is True + + def test_relative_path_no_match(self): + assert _matches_any_filter("/root/renders/frame.exr", ["other.exr"]) is False + + def test_relative_glob(self): + """Relative globs like 'renders/*.exr' match without needing a leading */.""" + assert _matches_any_filter("/home/user/renders/frame.exr", ["renders/*.exr"]) is True + + def test_relative_glob_no_match(self): + assert _matches_any_filter("/home/user/renders/frame.png", ["renders/*.exr"]) is False + + def test_windows_full_path_with_glob(self): + """Windows full paths (normalized) match glob patterns.""" + assert ( + _matches_any_filter("C:/Users/artist/project/renders/frame.exr", ["*/renders/*.exr"]) + is True + ) + + def test_windows_full_path_with_relative_filter(self): + """Relative filters match against normalized Windows full paths.""" + assert ( + _matches_any_filter("C:/Users/artist/project/renders/frame.exr", ["renders/frame.exr"]) + is True + ) + + def test_windows_full_path_with_directory_filter(self): + """Directory filters match against normalized Windows full paths.""" + assert ( + _matches_any_filter( + "C:/Users/artist/project/renders/frame.exr", ["C:/Users/artist/project/renders/"] + ) + is True + ) + + def test_windows_full_path_extension_glob(self): + assert _matches_any_filter("C:/Users/artist/project/frame.exr", ["*.exr"]) is True + + +class TestFullPath: + def test_unix_root(self): + assert _full_path("/home/user", "renders/frame.png") == "/home/user/renders/frame.png" + + def test_windows_root_backslashes(self): + """Windows root paths with backslashes are normalized to forward slashes.""" + assert ( + _full_path("C:\\Users\\artist\\project", "renders/frame.png") + == "C:/Users/artist/project/renders/frame.png" + ) + + def test_trailing_slash_root(self): + assert _full_path("/root/", "file.txt") == "/root/file.txt" + + def test_trailing_backslash_root(self): + assert _full_path("C:\\root\\", "file.txt") == "C:/root/file.txt" + + +class TestFilterPaths: + def _make_group(self, paths: List[str]) -> ManifestPathGroup: + group = ManifestPathGroup() + group.files_by_hash_alg[HashAlgorithm.XXH128] = [ + ManifestPathv2023_03_03(path=p, hash="abc123", size=100, mtime=1234000000) + for p in paths + ] + group.total_bytes = len(paths) * 100 + return group + + def test_glob_against_full_path(self): + """Filters match against root + relative path.""" + paths_by_root = { + "/home/user/project": self._make_group(["renders/a.exr", "textures/b.png"]) + } + result = _filter_paths(paths_by_root, ["*/renders/*.exr"]) + files = [ + f.path for f in result["/home/user/project"].files_by_hash_alg[HashAlgorithm.XXH128] + ] + assert files == ["renders/a.exr"] + + def test_extension_filter_against_full_path(self): + """Simple extension patterns match against full paths.""" + paths_by_root = {"/root": self._make_group(["a.exr", "b.png"])} + result = _filter_paths(paths_by_root, ["*.exr"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert files == ["a.exr"] + + def test_directory_prefix_filter(self): + paths_by_root = { + "/root": self._make_group(["renders/a.exr", "renders/b.exr", "textures/c.png"]) + } + result = _filter_paths(paths_by_root, ["/root/renders/"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert files == ["renders/a.exr", "renders/b.exr"] + + def test_no_matches_returns_empty(self): + paths_by_root = {"/root": self._make_group(["a.txt"])} + result = _filter_paths(paths_by_root, ["nonexistent.txt"]) + assert result == {} + + def test_multiple_asset_roots(self): + paths_by_root = { + "/root1": self._make_group(["shared/file.txt", "other.txt"]), + "/root2": self._make_group(["shared/file.txt", "different.txt"]), + } + result = _filter_paths(paths_by_root, ["*/shared/file.txt"]) + assert "/root1" in result + assert "/root2" in result + + def test_mixed_filters(self): + paths_by_root = { + "/root": self._make_group( + ["renders/a.exr", "renders/b.exr", "textures/c.png", "scripts/setup.mel"] + ) + } + result = _filter_paths(paths_by_root, ["/root/renders/", "*/setup.mel"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert set(files) == {"renders/a.exr", "renders/b.exr", "scripts/setup.mel"} + + def test_empty_root_removed(self): + paths_by_root = { + "/has_match": self._make_group(["a.txt"]), + "/no_match": self._make_group(["b.txt"]), + } + result = _filter_paths(paths_by_root, ["*/a.txt"]) + assert "/has_match" in result + assert "/no_match" not in result + + def test_glob_pattern(self): + paths_by_root = { + "/root": self._make_group(["renders/a.exr", "renders/b.png", "textures/c.exr"]) + } + result = _filter_paths(paths_by_root, ["*/renders/*.exr"]) + files = [f.path for f in result["/root"].files_by_hash_alg[HashAlgorithm.XXH128]] + assert files == ["renders/a.exr"] + + def test_windows_root_path(self): + """Windows backslash roots are normalized so forward-slash patterns match.""" + paths_by_root = { + "C:\\Users\\artist\\project": self._make_group(["renders/a.exr", "logs/b.log"]) + } + result = _filter_paths(paths_by_root, ["*/renders/*.exr"]) + files = [ + f.path + for f in result["C:\\Users\\artist\\project"].files_by_hash_alg[HashAlgorithm.XXH128] + ] + assert files == ["renders/a.exr"] + + def test_relative_path_filter(self): + """Plain relative paths match as suffix against full path.""" + paths_by_root = {"/home/user/project": self._make_group(["renders/a.exr", "logs/b.log"])} + result = _filter_paths(paths_by_root, ["renders/a.exr"]) + files = [ + f.path for f in result["/home/user/project"].files_by_hash_alg[HashAlgorithm.XXH128] + ] + assert files == ["renders/a.exr"] + + +class TestFilterManifests: + def _make_manifest(self, paths: List[str]) -> BaseAssetManifest: + manifest_paths: List[BaseManifestPath] = [ + ManifestPathv2023_03_03(path=p, hash="abc123", size=100, mtime=1234000000) + for p in paths + ] + return AssetManifestv2023_03_03( + hash_alg=HashAlgorithm.XXH128, + paths=manifest_paths, + total_size=len(paths) * 100, + ) + + def test_glob_against_full_path(self): + """Filters match against root + relative path.""" + manifests_by_root = { + "/home/user": [self._make_manifest(["renders/a.exr", "textures/b.png"])] + } + result = _filter_manifests(manifests_by_root, ["*/renders/*.exr"]) + assert [p.path for p in result["/home/user"][0].paths] == ["renders/a.exr"] + + def test_directory_prefix_filter(self): + manifests_by_root = { + "/root": [self._make_manifest(["renders/a.exr", "renders/b.exr", "textures/c.png"])] + } + result = _filter_manifests(manifests_by_root, ["/root/renders/"]) + assert [p.path for p in result["/root"][0].paths] == ["renders/a.exr", "renders/b.exr"] + + def test_no_matches_returns_empty(self): + manifests_by_root = {"/root": [self._make_manifest(["a.txt"])]} + result = _filter_manifests(manifests_by_root, ["nonexistent.txt"]) + assert result == {} + + def test_empty_root_removed(self): + manifests_by_root = { + "/has_match": [self._make_manifest(["a.txt"])], + "/no_match": [self._make_manifest(["b.txt"])], + } + result = _filter_manifests(manifests_by_root, ["*/a.txt"]) + assert "/has_match" in result + assert "/no_match" not in result + + def test_multiple_manifests_per_root(self): + manifests_by_root = { + "/root": [ + self._make_manifest(["a.txt", "b.txt"]), + self._make_manifest(["c.txt", "d.txt"]), + ] + } + result = _filter_manifests(manifests_by_root, ["*/a.txt", "*/c.txt"]) + assert len(result["/root"]) == 2 + assert [p.path for p in result["/root"][0].paths] == ["a.txt"] + assert [p.path for p in result["/root"][1].paths] == ["c.txt"]