-
Notifications
You must be signed in to change notification settings - Fork 70
feat(cli): Add --include-path selective filtering to download-output #1108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: mainline
Are you sure you want to change the base?
Changes from all commits
2ee1e48
4e93020
6dbdd41
4cc2547
8476651
6c93c4e
dd8bdee
bba6ea5
7bc6204
1c9f256
73173a3
ef78ded
6713c0e
01151ae
a361c5e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,10 +7,12 @@ | |
| import concurrent.futures | ||
| import json | ||
| import os | ||
| import posixpath | ||
| import re | ||
| import time | ||
| from collections import defaultdict | ||
| from datetime import datetime | ||
| from fnmatch import fnmatch | ||
| from itertools import chain | ||
| from logging import Logger, LoggerAdapter, getLogger | ||
| from pathlib import Path | ||
|
|
@@ -73,7 +75,6 @@ | |
| from ._utils import ( | ||
| _get_long_path_compatible_path, | ||
| _is_relative_to, | ||
| _join_s3_paths, | ||
| ) | ||
| from threading import Lock | ||
|
|
||
|
|
@@ -325,7 +326,9 @@ def get_job_input_paths_by_asset_root( | |
|
|
||
| for manifest_properties in attachments.manifests: | ||
| if manifest_properties.inputManifestPath: | ||
| key = _join_s3_paths(manifest_properties.inputManifestPath) | ||
| key = s3_settings.add_root_and_manifest_folder_prefix( | ||
| manifest_properties.inputManifestPath | ||
| ) | ||
| _, asset_manifest = get_asset_root_and_manifest_from_s3( | ||
| manifest_key=key, | ||
| s3_bucket=s3_settings.s3BucketName, | ||
|
|
@@ -1243,6 +1246,94 @@ def mount_vfs_from_manifests( | |
| vfs_manager.start(session_dir=session_dir) | ||
|
|
||
|
|
||
| def _full_path(root: str, relative: str) -> str: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's other code that joins the roots and manifest paths. Can we use the same mechanism here to make sure we don't have differences?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WARNING - we cannot merge this now.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the heads up, I'll raise a new PR against the new repo. |
||
| """Join root and relative path, normalizing to forward slashes for consistent matching. | ||
|
|
||
| Uses posixpath.join for consistency with _transform_manifests_to_absolute_paths | ||
| in _job_download_helpers.py, which joins roots and manifest paths the same way. | ||
| """ | ||
| return posixpath.join(root.replace("\\", "/"), relative) | ||
|
|
||
|
|
||
| def _matches_any_filter(file_path: str, filters: list[str]) -> bool: | ||
| """ | ||
| Check if a file path matches any of the given filters using glob-style matching. | ||
| Uses fnmatch for pattern matching (supports *, ?, [seq], [!seq]). | ||
| A filter ending with '/' matches all files under that directory. | ||
| Relative filters (not starting with '/' or '*') are auto-prepended with '*/' so they | ||
| match anywhere under the root — e.g. 'renders/*.exr' matches '*/renders/*.exr'. | ||
| The file_path should be the full path (root + relative). | ||
| """ | ||
|
|
||
| def _is_absolute(p: str) -> bool: | ||
| return p.startswith(("/", "*")) or (len(p) >= 2 and p[1] == ":") | ||
|
|
||
| for f in filters: | ||
| if f.endswith("/"): | ||
| pattern = f + "*" if _is_absolute(f) else "*/" + f + "*" | ||
| if fnmatch(file_path, pattern): | ||
| return True | ||
| else: | ||
| pattern = f if _is_absolute(f) else "*/" + f | ||
| if fnmatch(file_path, pattern): | ||
| return True | ||
| return False | ||
|
|
||
|
|
||
| def _filter_paths( | ||
| paths_by_root: dict[str, ManifestPathGroup], | ||
| path_filters: list[str], | ||
| ) -> dict[str, ManifestPathGroup]: | ||
| """ | ||
| Filter ManifestPathGroups using glob-style include patterns. | ||
| Filters are matched against the full path (root + relative) to support | ||
| patterns like '*/renders/*.png'. | ||
| """ | ||
| filtered: dict[str, ManifestPathGroup] = {} | ||
| for root, group in paths_by_root.items(): | ||
| filtered_group = ManifestPathGroup() | ||
| for hash_alg, file_list in group.files_by_hash_alg.items(): | ||
| matching = [ | ||
| f for f in file_list if _matches_any_filter(_full_path(root, f.path), path_filters) | ||
| ] | ||
| if matching: | ||
| if hash_alg not in filtered_group.files_by_hash_alg: | ||
| filtered_group.files_by_hash_alg[hash_alg] = matching | ||
| else: | ||
| filtered_group.files_by_hash_alg[hash_alg].extend(matching) | ||
| filtered_group.total_bytes += sum(f.size for f in matching) | ||
| if filtered_group.files_by_hash_alg: | ||
| filtered[root] = filtered_group | ||
| return filtered | ||
|
|
||
|
|
||
| def _filter_manifests( | ||
| manifests_by_root: dict[str, list[BaseAssetManifest]], | ||
| path_filters: list[str], | ||
| ) -> dict[str, list[BaseAssetManifest]]: | ||
| """ | ||
| Filter BaseAssetManifest objects using glob-style include patterns. | ||
| Filters are matched against the full path (root + relative) to support | ||
| patterns like '*/renders/*.png'. | ||
| Returns a new dict with manifests whose paths have been filtered; empty manifests are removed. | ||
| """ | ||
| filtered: dict[str, list[BaseAssetManifest]] = {} | ||
| for root, manifest_list in manifests_by_root.items(): | ||
| filtered_manifests = [] | ||
| for manifest in manifest_list: | ||
| matching = [ | ||
| p | ||
| for p in manifest.paths | ||
| if _matches_any_filter(_full_path(root, p.path), path_filters) | ||
| ] | ||
| if matching: | ||
| manifest.paths = matching | ||
| filtered_manifests.append(manifest) | ||
| if filtered_manifests: | ||
| filtered[root] = filtered_manifests | ||
| return filtered | ||
|
|
||
|
|
||
| def _ensure_paths_within_directory(root_path: str, paths_relative_to_root: list[str]) -> None: | ||
| """ | ||
| Validates the given paths to ensure that they are within the given root path. | ||
|
|
@@ -1281,6 +1372,7 @@ def __init__( | |
| task_id: Optional[str] = None, | ||
| session_action_id: Optional[str] = None, | ||
| session: Optional[boto3.Session] = None, | ||
| include_filters: Optional[list[str]] = None, | ||
| ) -> None: | ||
| self.s3_settings = s3_settings | ||
| self.session = session | ||
|
|
@@ -1294,6 +1386,8 @@ def __init__( | |
| session_action_id=session_action_id, | ||
| session=session, | ||
| ) | ||
| if include_filters: | ||
| self.outputs_by_root = _filter_paths(self.outputs_by_root, include_filters) | ||
|
|
||
| def get_output_paths_by_root(self) -> dict[str, list[str]]: | ||
| """ | ||
|
|
@@ -1305,6 +1399,10 @@ def get_output_paths_by_root(self) -> dict[str, list[str]]: | |
| output_paths_by_root[root] = path_group.get_all_paths() | ||
| return output_paths_by_root | ||
|
|
||
| def apply_include_filters(self, include_filters: list[str]) -> None: | ||
| """Apply glob-style include filters against the current workstation paths.""" | ||
| self.outputs_by_root = _filter_paths(self.outputs_by_root, include_filters) | ||
|
|
||
| def set_root_path(self, original_root: str, new_root: str) -> None: | ||
| """ | ||
| Changes the root path for downloading output files, (which is the root path | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.