diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index d26338e..1ecc22a 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -14,7 +14,8 @@ convert_item_or_items_into_compiled_regexes_else_none, get_id, type_is_subclass_of_type_group, type_in_type_group, number_to_string, datetime_normalize, KEY_TO_VAL_STR, - get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel) + get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel, + separate_wildcard_and_exact_paths) from deepdiff.base import Base @@ -189,6 +190,7 @@ def __init__(self, custom_operators: Optional[List[Any]] = None, default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc, encodings: Optional[List[str]] = None, + exclude_glob_paths: Optional[List[Any]] = None, exclude_obj_callback: Optional[Callable[[Any, str], bool]] = None, exclude_paths: Optional[PathType] = None, exclude_regex_paths: Optional[RegexType] = None, @@ -205,6 +207,7 @@ def __init__(self, ignore_type_in_groups: Any = None, ignore_type_subclasses: bool = False, ignore_uuid_types: bool = False, + include_glob_paths: Optional[List[Any]] = None, include_paths: Optional[PathType] = None, number_format_notation: str = "f", number_to_string_func: Optional[NumberToStringFunc] = None, @@ -231,8 +234,14 @@ def __init__(self, exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition - self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths)) - self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths)) + _exclude_set = convert_item_or_items_into_set_else_none(exclude_paths) + _exclude_exact, _exclude_globs = separate_wildcard_and_exact_paths(_exclude_set) + self.exclude_paths = add_root_to_paths(_exclude_exact) + self.exclude_glob_paths = exclude_glob_paths or _exclude_globs + _include_set = convert_item_or_items_into_set_else_none(include_paths) + _include_exact, _include_globs = separate_wildcard_and_exact_paths(_include_set) + self.include_paths = add_root_to_paths(_include_exact) + self.include_glob_paths = include_glob_paths or _include_globs self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.hasher = default_hasher if hasher is None else hasher self.hashes[UNPROCESSED_KEY] = [] # type: ignore @@ -461,11 +470,21 @@ def _skip_this(self, obj: Any, parent: str) -> bool: skip = False if self.exclude_paths and parent in self.exclude_paths: skip = True - if self.include_paths and parent != 'root': - if parent not in self.include_paths: - skip = True - for prefix in self.include_paths: - if parent.startswith(prefix): + elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths): + skip = True + if (self.include_paths or self.include_glob_paths) and parent != 'root': + skip = True + if self.include_paths: + if parent in self.include_paths: + skip = False + else: + for prefix in self.include_paths: + if parent.startswith(prefix): + skip = False + break + if skip and self.include_glob_paths: + for gp in self.include_glob_paths: + if gp.match_or_is_ancestor(parent): skip = False break elif self.exclude_regex_paths and any( diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 2931cef..4a1314e 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -29,7 +29,8 @@ TEXT_VIEW, TREE_VIEW, DELTA_VIEW, COLORED_VIEW, COLORED_COMPACT_VIEW, detailed__dict__, add_root_to_paths, np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS, - PydanticBaseModel, Opcode, SetOrdered, ipranges) + PydanticBaseModel, Opcode, SetOrdered, ipranges, + separate_wildcard_and_exact_paths) from deepdiff.serialization import SerializationMixin from deepdiff.distance import DistanceMixin, logarithmic_similarity from deepdiff.model import ( @@ -102,7 +103,9 @@ def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], No DEEPHASH_PARAM_KEYS = ( 'exclude_types', 'exclude_paths', + 'exclude_glob_paths', 'include_paths', + 'include_glob_paths', 'exclude_regex_paths', 'hasher', 'significant_digits', @@ -198,6 +201,10 @@ def __init__(self, _shared_parameters: Optional[Dict[str, Any]]=None, **kwargs): super().__init__() + # Defaults for glob path attributes — needed for non-root instances + # that may receive _parameters without these keys. + self.exclude_glob_paths = None + self.include_glob_paths = None if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" @@ -245,8 +252,12 @@ def __init__(self, ignore_type_subclasses=ignore_type_subclasses, ignore_uuid_types=ignore_uuid_types) self.report_repetition = report_repetition - self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths)) - self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths)) + _exclude_set = convert_item_or_items_into_set_else_none(exclude_paths) + _exclude_exact, self.exclude_glob_paths = separate_wildcard_and_exact_paths(_exclude_set) + self.exclude_paths = add_root_to_paths(_exclude_exact) + _include_set = convert_item_or_items_into_set_else_none(include_paths) + _include_exact, self.include_glob_paths = separate_wildcard_and_exact_paths(_include_set) + self.include_paths = add_root_to_paths(_include_exact) self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.exclude_types = set(exclude_types) if exclude_types else None self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance @@ -402,7 +413,7 @@ def _group_by_sort_key(x): self.__dict__.clear() def _get_deephash_params(self): - result = {key: self._parameters[key] for key in DEEPHASH_PARAM_KEYS} + result = {key: self._parameters.get(key) for key in DEEPHASH_PARAM_KEYS} result['ignore_repetition'] = not self.report_repetition result['number_to_string_func'] = self.number_to_string return result @@ -421,6 +432,8 @@ def _report_result(self, report_type, change_level, local_tree=None): """ if not self._skip_this(change_level): + if self._skip_report_for_include_glob(change_level): + return change_level.report_type = report_type tree = self.tree if local_tree is None else local_tree tree[report_type].add(change_level) @@ -440,10 +453,33 @@ def custom_report_result(self, report_type, level, extra_info=None): """ if not self._skip_this(level): + if self._skip_report_for_include_glob(level): + return level.report_type = report_type level.additional[CUSTOM_FIELD] = extra_info self.tree[report_type].add(level) + def _skip_report_for_include_glob(self, level): + """When include_glob_paths is set, _skip_this allows ancestors through for traversal. + This method does a stricter check at report time: only report if the path + actually matches a glob pattern or is a descendant of a matching path, + or if it already matches an exact include_path.""" + if not self.include_glob_paths: + return False + level_path = level.path() + # If exact include_paths already matched, don't skip + if self.include_paths: + if level_path in self.include_paths: + return False + for prefix in self.include_paths: + if prefix in level_path: + return False + # Check glob patterns: match or descendant + for gp in self.include_glob_paths: + if gp.match_or_is_descendant(level_path): + return False + return True + @staticmethod def _dict_from_slots(object: Any) -> Dict[str, Any]: def unmangle(attribute: str) -> str: @@ -531,11 +567,21 @@ def _skip_this(self, level: Any) -> bool: skip = False if self.exclude_paths and level_path in self.exclude_paths: skip = True - if self.include_paths and level_path != 'root': - if level_path not in self.include_paths: - skip = True - for prefix in self.include_paths: - if prefix in level_path or level_path in prefix: + elif self.exclude_glob_paths and any(gp.match(level_path) for gp in self.exclude_glob_paths): + skip = True + if not skip and (self.include_paths or self.include_glob_paths) and level_path != 'root': + skip = True + if self.include_paths: + if level_path in self.include_paths: + skip = False + else: + for prefix in self.include_paths: + if prefix in level_path or level_path in prefix: + skip = False + break + if skip and self.include_glob_paths: + for gp in self.include_glob_paths: + if gp.match_or_is_ancestor(level_path): skip = False break elif self.exclude_regex_paths and any( @@ -565,28 +611,34 @@ def _skip_this(self, level: Any) -> bool: def _skip_this_key(self, level: Any, key: Any) -> bool: # if include_paths is not set, than treet every path as included - if self.include_paths is None: - return False - if "{}['{}']".format(level.path(), key) in self.include_paths: - return False - if level.path() in self.include_paths: - # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"] + if self.include_paths is None and self.include_glob_paths is None: return False - for prefix in self.include_paths: - if "{}['{}']".format(level.path(), key) in prefix: - # matches as long the prefix is longer than this object key - # eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths - # level+key root['foo'] matches prefix root['foo']['bar'] from include_paths - # level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards + key_path = "{}['{}']".format(level.path(), key) + if self.include_paths: + if key_path in self.include_paths: return False - # check if a higher level is included as a whole (=without any sublevels specified) - # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"] - # but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"] - up = level.up - while up is not None: - if up.path() in self.include_paths: + if level.path() in self.include_paths: + # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"] return False - up = up.up + for prefix in self.include_paths: + if key_path in prefix: + # matches as long the prefix is longer than this object key + # eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths + # level+key root['foo'] matches prefix root['foo']['bar'] from include_paths + # level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards + return False + # check if a higher level is included as a whole (=without any sublevels specified) + # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"] + # but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"] + up = level.up + while up is not None: + if up.path() in self.include_paths: + return False + up = up.up + if self.include_glob_paths: + for gp in self.include_glob_paths: + if gp.match_or_is_ancestor(key_path): + return False return True def _get_clean_to_keys_mapping(self, keys: Any, level: Any) -> Dict[Any, Any]: @@ -680,9 +732,13 @@ def _diff_dict( t_keys_removed = t1_keys - t_keys_intersect if self.threshold_to_diff_deeper: - if self.exclude_paths: + if self.exclude_paths or self.exclude_glob_paths: t_keys_union = {f"{level.path()}[{repr(key)}]" for key in (t2_keys | t1_keys)} - t_keys_union -= self.exclude_paths + if self.exclude_paths: + t_keys_union -= self.exclude_paths + if self.exclude_glob_paths: + t_keys_union = {k for k in t_keys_union + if not any(gp.match(k) for gp in self.exclude_glob_paths)} t_keys_union_len = len(t_keys_union) else: t_keys_union_len = len(t2_keys | t1_keys) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index cb382af..f7eeea2 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -352,6 +352,30 @@ def add_root_to_paths(paths: Optional[Iterable[str]]) -> Optional[SetOrdered]: return result +def separate_wildcard_and_exact_paths(paths): + """Separate a set of paths into exact paths and wildcard pattern paths. + + Returns ``(exact_set_or_none, wildcard_list_or_none)``. + Wildcard paths must start with ``root``; a ``ValueError`` is raised otherwise. + """ + if not paths: + return None, None + from deepdiff.path import path_has_wildcard, compile_glob_paths + exact = set() + wildcards = [] + for path in paths: + if path_has_wildcard(path): + if not path.startswith('root'): + raise ValueError( + "Wildcard paths must start with 'root'. Got: {}".format(path)) + wildcards.append(path) + else: + exact.add(path) + exact_result = exact if exact else None + glob_result = compile_glob_paths(wildcards) if wildcards else None + return exact_result, glob_result + + RE_COMPILED_TYPE = type(re.compile('')) diff --git a/deepdiff/path.py b/deepdiff/path.py index e5b64c7..2ac62b5 100644 --- a/deepdiff/path.py +++ b/deepdiff/path.py @@ -1,3 +1,4 @@ +import re import logging from ast import literal_eval from functools import lru_cache @@ -8,6 +9,30 @@ GET = 'GET' +class _WildcardToken: + """Sentinel object for wildcard path tokens. + + Using a dedicated class (instead of plain strings) ensures that a literal + dict key ``'*'`` (parsed from ``root['*']``) is never confused with the + wildcard ``*`` (parsed from ``root[*]``). + """ + def __init__(self, symbol): + self._symbol = symbol + + def __repr__(self): + return self._symbol + + def __eq__(self, other): + return isinstance(other, _WildcardToken) and self._symbol == other._symbol + + def __hash__(self): + return hash(('_WildcardToken', self._symbol)) + + +SINGLE_WILDCARD = _WildcardToken('*') +MULTI_WILDCARD = _WildcardToken('**') + + class PathExtractionError(ValueError): pass @@ -21,6 +46,16 @@ def _add_to_elements(elements, elem, inside): if not elem: return if not elem.startswith('__'): + # Handle wildcard tokens (* and **) as-is. + # Unquoted root[*] arrives as bare '*' which matches the string check. + # Quoted root['*'] arrives as "'*'" which does NOT match, so it falls + # through to literal_eval and becomes the plain string '*' — which is + # distinct from the _WildcardToken sentinel and thus treated as a + # literal dict key. + if elem in ('*', '**'): + action = GETATTR if inside == '.' else GET + elements.append((SINGLE_WILDCARD if elem == '*' else MULTI_WILDCARD, action)) + return remove_quotes = False if '𝆺𝅥𝅯' in elem or '\\' in elem: remove_quotes = True @@ -321,3 +356,129 @@ def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"): else: result.append(f".{element}") return ''.join(result) + + +# Regex to detect wildcard segments in a raw path string. +# Matches [*], [**], .*, .** that are NOT inside quotes. +_WILDCARD_RE = re.compile( + r'\[\*\*?\]' # [*] or [**] + r'|\.\*\*?(?=[.\[]|$)' # .* or .** followed by . or [ or end of string +) + + +def path_has_wildcard(path): + """Check if a path string contains wildcard segments (* or **).""" + return bool(_WILDCARD_RE.search(path)) + + +class GlobPathMatcher: + """Pre-compiled matcher for a single glob pattern path. + + Parses a pattern like ``root['users'][*]['password']`` into segments + and matches concrete path strings against it. + + ``*`` matches exactly one path segment (any key, index, or attribute). + ``**`` matches zero or more path segments. + """ + + def __init__(self, pattern_path): + self.original_pattern = pattern_path + elements = _path_to_elements(pattern_path, root_element=('root', GETATTR)) + # Skip the root element for matching + self._pattern = elements[1:] + + def match(self, path_string): + """Return True if *path_string* matches this pattern exactly.""" + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + return self._match_segments(self._pattern, target, 0, 0) + + def match_or_is_ancestor(self, path_string): + """Return True if *path_string* matches OR is an ancestor of a potential match. + + This is needed for ``include_paths``: we must not prune a path that + could lead to a matching descendant. + """ + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + return (self._match_segments(self._pattern, target, 0, 0) or + self._could_match_descendant(self._pattern, target, 0, 0)) + + def match_or_is_descendant(self, path_string): + """Return True if *path_string* matches OR is a descendant of a matching path. + + This checks whether the pattern matches any prefix of *path_string*, + meaning the path is "inside" a matched subtree. + """ + elements = _path_to_elements(path_string, root_element=('root', GETATTR)) + target = elements[1:] + # Check exact match first + if self._match_segments(self._pattern, target, 0, 0): + return True + # Check if any prefix of target matches (making this path a descendant) + for length in range(len(target)): + if self._match_segments(self._pattern, target[:length], 0, 0): + return True + return False + + @staticmethod + def _match_segments(pattern, target, pi, ti): + """Recursive segment matcher with backtracking for ``**``.""" + while pi < len(pattern) and ti < len(target): + pat_elem = pattern[pi][0] + + if pat_elem == MULTI_WILDCARD: + # ** matches zero or more segments — try every suffix + for k in range(ti, len(target) + 1): + if GlobPathMatcher._match_segments(pattern, target, pi + 1, k): + return True + return False + elif pat_elem == SINGLE_WILDCARD: + # * matches exactly one segment regardless of value/action + pi += 1 + ti += 1 + else: + tgt_elem = target[ti][0] + if pat_elem != tgt_elem: + return False + pi += 1 + ti += 1 + + # Consume any trailing ** (they can match zero segments) + while pi < len(pattern) and pattern[pi][0] == MULTI_WILDCARD: + pi += 1 + + return pi == len(pattern) and ti == len(target) + + @staticmethod + def _could_match_descendant(pattern, target, pi, ti): + """Check if *target* is a prefix that could lead to a match deeper down.""" + if ti == len(target): + # Target exhausted — it's an ancestor if pattern has remaining segments + return pi < len(pattern) + + if pi >= len(pattern): + return False + + pat_elem = pattern[pi][0] + + if pat_elem == MULTI_WILDCARD: + return (GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti) or + GlobPathMatcher._could_match_descendant(pattern, target, pi, ti + 1)) + elif pat_elem == SINGLE_WILDCARD: + return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) + else: + tgt_elem = target[ti][0] + if pat_elem != tgt_elem: + return False + return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1) + + +def compile_glob_paths(paths): + """Compile a list of glob pattern strings into GlobPathMatcher objects. + + Returns a list of ``GlobPathMatcher`` or ``None`` if *paths* is empty/None. + """ + if not paths: + return None + return [GlobPathMatcher(p) for p in paths] diff --git a/deepdiff/search.py b/deepdiff/search.py index fdb73d7..9b1b11a 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -6,7 +6,8 @@ import logging from deepdiff.helper import ( - strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges + strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges, + separate_wildcard_and_exact_paths, ) @@ -106,7 +107,8 @@ def __init__(self, self.obj: Any = obj self.case_sensitive: bool = case_sensitive if isinstance(item, strings) else True item = item if self.case_sensitive else (item.lower() if isinstance(item, str) else item) - self.exclude_paths: SetOrdered = SetOrdered(exclude_paths) + _exclude_exact, self.exclude_glob_paths = separate_wildcard_and_exact_paths(set(exclude_paths) if exclude_paths else None) + self.exclude_paths: SetOrdered = SetOrdered(_exclude_exact) if _exclude_exact else SetOrdered() self.exclude_regex_paths: List[Pattern[str]] = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths] self.exclude_types: SetOrdered = SetOrdered(exclude_types) self.exclude_types_tuple: tuple[type, ...] = tuple( @@ -193,6 +195,8 @@ def __skip_this(self, item: Any, parent: str) -> bool: skip = False if parent in self.exclude_paths: skip = True + elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths): + skip = True elif self.exclude_regex_paths and any( [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): skip = True diff --git a/docs/deephash_doc.rst b/docs/deephash_doc.rst index da271b7..7039281 100644 --- a/docs/deephash_doc.rst +++ b/docs/deephash_doc.rst @@ -32,10 +32,12 @@ exclude_types: list, default = None exclude_paths: list, default = None List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. include_paths: list, default = None List of the only paths to include in the report. If only one item, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. exclude_regex_paths: list, default = None diff --git a/docs/diff_doc.rst b/docs/diff_doc.rst index e01dab2..1fc18db 100644 --- a/docs/diff_doc.rst +++ b/docs/diff_doc.rst @@ -55,7 +55,8 @@ encodings: List, default = None exclude_paths: list, default = None :ref:`exclude_paths_label` - List of paths to exclude from the report. If only one item, you can path it as a string. + List of paths to exclude from the report. If only one item, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. exclude_regex_paths: list, default = None :ref:`exclude_regex_paths_label` @@ -77,6 +78,7 @@ exclude_obj_callback_strict: function, default = None include_paths: list, default = None :ref:`include_paths_label` List of the only paths to include in the report. If only one item is in the list, you can pass it as a string. + Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth. include_obj_callback: function, default = None :ref:`include_obj_callback_label` diff --git a/docs/exclude_paths.rst b/docs/exclude_paths.rst index 2de453b..0c9b78a 100644 --- a/docs/exclude_paths.rst +++ b/docs/exclude_paths.rst @@ -59,6 +59,49 @@ Example {'values_changed': {"root['foo']['bar']": {'new_value': 'banana', 'old_value': 'potato'}}} +.. _wildcard_paths_label: + +Wildcard (Glob) Paths +--------------------- + +Both ``exclude_paths`` and ``include_paths`` support wildcard patterns for matching multiple paths at once: + +- ``[*]`` or ``.*`` matches exactly **one** path segment (any key, index, or attribute). +- ``[**]`` or ``.**`` matches **zero or more** path segments at any depth. + +Wildcard patterns must use the full ``root`` prefix (shorthand keys are not supported for wildcards). + +Exclude all ``password`` fields regardless of the parent key: + >>> t1 = {"users": {"alice": {"name": "Alice", "password": "s1"}, "bob": {"name": "Bob", "password": "s2"}}} + >>> t2 = {"users": {"alice": {"name": "Alice", "password": "x1"}, "bob": {"name": "Bob", "password": "x2"}}} + >>> DeepDiff(t1, t2, exclude_paths=["root['users'][*]['password']"]) + {} + +Include only ``name`` fields at any depth: + >>> t1 = {"a": {"name": "A", "secret": 1}, "b": {"name": "B", "secret": 2}} + >>> t2 = {"a": {"name": "X", "secret": 1}, "b": {"name": "Y", "secret": 2}} + >>> result = DeepDiff(t1, t2, include_paths=["root[*]['name']"]) + >>> set(result.get('values_changed', {}).keys()) == {"root['a']['name']", "root['b']['name']"} + True + +Use ``[**]`` to match at any depth: + >>> t1 = {"config": {"db": {"password": "old"}, "cache": {"password": "old"}}} + >>> t2 = {"config": {"db": {"password": "new"}, "cache": {"password": "new"}}} + >>> DeepDiff(t1, t2, exclude_paths=["root[**]['password']"]) + {} + +Literal keys named ``*`` or ``**`` are not treated as wildcards when quoted: + >>> t1 = {"*": 1, "a": 2} + >>> t2 = {"*": 10, "a": 20} + >>> result = DeepDiff(t1, t2, exclude_paths=["root['*']"]) + >>> "root['a']" in result.get('values_changed', {}) + True + +When both ``exclude_paths`` and ``include_paths`` apply to the same path, exclusion takes precedence. + +Wildcards also work with ``DeepHash`` and ``DeepSearch`` exclude_paths. + + .. _exclude_regex_paths_label: Exclude Regex Paths diff --git a/docs/search_doc.rst b/docs/search_doc.rst index 0b26873..89ef333 100644 --- a/docs/search_doc.rst +++ b/docs/search_doc.rst @@ -14,6 +14,7 @@ verbose_level : int >= 0, default = 1. exclude_paths: list, default = None. List of paths to exclude from the report. + Supports wildcard patterns: use ``[*]`` to match one segment or ``[**]`` to match any depth. exclude_types: list, default = None. List of object types to exclude from the report. diff --git a/tests/test_glob_paths.py b/tests/test_glob_paths.py new file mode 100644 index 0000000..d97ead2 --- /dev/null +++ b/tests/test_glob_paths.py @@ -0,0 +1,719 @@ +import pytest +from deepdiff import DeepDiff, DeepHash, DeepSearch, grep +from deepdiff.path import ( + GlobPathMatcher, compile_glob_paths, path_has_wildcard, + _path_to_elements, SINGLE_WILDCARD, MULTI_WILDCARD, +) +from deepdiff.helper import separate_wildcard_and_exact_paths + + +# ── path_has_wildcard detection ────────────────────────────────────── + + +class TestPathHasWildcard: + + @pytest.mark.parametrize("path, expected", [ + ("root[*]", True), + ("root[**]", True), + ("root.*", True), + ("root.**", True), + ("root['users'][*]['name']", True), + ("root[**]['password']", True), + ("root['*']", False), # literal key named '*' + ("root['**']", False), # literal key named '**' + ("root['foo']['bar']", False), + ("root[0][1]", False), + ("root.foo.bar", False), + ("root[*][*]", True), # multiple wildcards + ("root[**][**]", True), + ("root.*.bar.*", True), # multiple dot wildcards + ]) + def test_detection(self, path, expected): + assert path_has_wildcard(path) is expected + + +# ── _path_to_elements parsing of wildcards ─────────────────────────── + + +class TestWildcardParsing: + + @pytest.mark.parametrize("path, expected", [ + ("root[*]", (('root', 'GETATTR'), (SINGLE_WILDCARD, 'GET'))), + ("root[**]", (('root', 'GETATTR'), (MULTI_WILDCARD, 'GET'))), + ("root['users'][*]['password']", ( + ('root', 'GETATTR'), ('users', 'GET'), (SINGLE_WILDCARD, 'GET'), ('password', 'GET'), + )), + ("root[**]['secret']", ( + ('root', 'GETATTR'), (MULTI_WILDCARD, 'GET'), ('secret', 'GET'), + )), + ("root.*.name", ( + ('root', 'GETATTR'), (SINGLE_WILDCARD, 'GETATTR'), ('name', 'GETATTR'), + )), + ("root[*][*]", ( + ('root', 'GETATTR'), (SINGLE_WILDCARD, 'GET'), (SINGLE_WILDCARD, 'GET'), + )), + ]) + def test_parsing(self, path, expected): + assert _path_to_elements(path) == expected + + def test_literal_star_key_not_wildcard(self): + """root['*'] should parse as a literal string '*', not a wildcard token.""" + elems = _path_to_elements("root['*']") + # The element should be a plain string, not a _WildcardToken + assert elems[1][0] == '*' + assert elems[1][0] != SINGLE_WILDCARD + assert isinstance(elems[1][0], str) + + def test_literal_double_star_key_not_wildcard(self): + """root['**'] should parse as a literal string '**', not a wildcard token.""" + elems = _path_to_elements("root['**']") + assert elems[1][0] == '**' + assert elems[1][0] != MULTI_WILDCARD + assert isinstance(elems[1][0], str) + + def test_wildcard_token_repr(self): + """_WildcardToken repr should return the symbol string.""" + assert repr(SINGLE_WILDCARD) == '*' + assert repr(MULTI_WILDCARD) == '**' + + def test_wildcard_token_hash(self): + """_WildcardToken instances should be hashable and usable in sets/dicts.""" + s = {SINGLE_WILDCARD, MULTI_WILDCARD} + assert len(s) == 2 + assert SINGLE_WILDCARD in s + d = {SINGLE_WILDCARD: 'one', MULTI_WILDCARD: 'many'} + assert d[SINGLE_WILDCARD] == 'one' + + +# ── separate_wildcard_and_exact_paths ──────────────────────────────── + + +class TestSeparateWildcardPaths: + + def test_none_input(self): + exact, globs = separate_wildcard_and_exact_paths(None) + assert exact is None + assert globs is None + + def test_empty_input(self): + exact, globs = separate_wildcard_and_exact_paths(set()) + assert exact is None + assert globs is None + + def test_all_exact(self): + exact, globs = separate_wildcard_and_exact_paths({"root['foo']", "root['bar']"}) + assert exact == {"root['foo']", "root['bar']"} + assert globs is None + + def test_all_wildcards(self): + exact, globs = separate_wildcard_and_exact_paths({"root[*]", "root[**]['x']"}) + assert exact is None + assert len(globs) == 2 + + def test_mixed(self): + exact, globs = separate_wildcard_and_exact_paths( + {"root['foo']", "root[*]['bar']"} + ) + assert exact == {"root['foo']"} + assert len(globs) == 1 + assert globs[0].original_pattern == "root[*]['bar']" + + def test_wildcard_must_start_with_root(self): + with pytest.raises(ValueError, match="Wildcard paths must start with 'root'"): + separate_wildcard_and_exact_paths({"[*]['foo']"}) + + +# ── GlobPathMatcher.match ──────────────────────────────────────────── + + +class TestGlobPathMatcherMatch: + + # ── single wildcard [*] ── + + @pytest.mark.parametrize("target, expected", [ + ("root['a']", True), + ("root[0]", True), + ("root[99]", True), + ("root", False), # too short + ("root['a']['b']", False), # too long + ]) + def test_single_wildcard_basic(self, target, expected): + m = GlobPathMatcher("root[*]") + assert m.match(target) is expected + + @pytest.mark.parametrize("target, expected", [ + ("root['users']['alice']['password']", True), + ("root['users'][0]['password']", True), + ("root['users'][99]['password']", True), + ("root['users']['password']", False), # missing middle segment + ("root['users']['a']['b']['password']", False), # too many middle segments + ("root['users']['alice']['email']", False), # wrong last segment + ]) + def test_single_wildcard_in_middle(self, target, expected): + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match(target) is expected + + def test_multiple_single_wildcards(self): + """root[*][*] matches exactly two segments after root.""" + m = GlobPathMatcher("root[*][*]") + assert m.match("root['a']['b']") is True + assert m.match("root[0][1]") is True + assert m.match("root['a']") is False + assert m.match("root['a']['b']['c']") is False + + # ── double wildcard [**] ── + + @pytest.mark.parametrize("target, expected", [ + ("root", True), # zero segments + ("root['a']", True), # one segment + ("root['a']['b']['c']", True), # many segments + ("root[0][1][2]", True), # numeric indices + ]) + def test_double_wildcard_standalone(self, target, expected): + m = GlobPathMatcher("root[**]") + assert m.match(target) is expected + + @pytest.mark.parametrize("target, expected", [ + ("root['password']", True), # ** matches zero + ("root['a']['password']", True), # ** matches one + ("root['a']['b']['c']['password']", True), # ** matches many + ("root['a']['b']", False), # doesn't end with password + ("root['password']['extra']", False), # extra after password + ]) + def test_double_wildcard_before_key(self, target, expected): + m = GlobPathMatcher("root[**]['password']") + assert m.match(target) is expected + + def test_double_wildcard_both_ends(self): + m = GlobPathMatcher("root[**]['config'][**]['value']") + assert m.match("root['config']['value']") is True + assert m.match("root['a']['config']['value']") is True + assert m.match("root['a']['config']['b']['c']['value']") is True + assert m.match("root['config']['x']") is False + assert m.match("root['value']") is False + + def test_double_wildcard_zero_match_in_middle(self): + """** between two fixed segments can match zero segments.""" + m = GlobPathMatcher("root['a'][**]['b']") + assert m.match("root['a']['b']") is True # ** matches zero + assert m.match("root['a']['x']['b']") is True # ** matches one + assert m.match("root['a']['x']['y']['b']") is True # ** matches two + + def test_adjacent_double_wildcards(self): + m = GlobPathMatcher("root[**][**]['x']") + assert m.match("root['x']") is True + assert m.match("root['a']['x']") is True + assert m.match("root['a']['b']['x']") is True + + # ── dot notation wildcards ── + + def test_dot_single_wildcard(self): + m = GlobPathMatcher("root.*.name") + assert m.match("root.user.name") is True + assert m.match("root.name") is False + + def test_dot_double_wildcard(self): + m = GlobPathMatcher("root.**.name") + assert m.match("root.name") is True + assert m.match("root.a.name") is True + assert m.match("root.a.b.name") is True + + # ── mixed bracket and dot ── + + def test_mixed_bracket_and_dot_wildcard(self): + m = GlobPathMatcher("root[*].name") + assert m.match("root['user'].name") is True + assert m.match("root[0].name") is True + + +# ── GlobPathMatcher.match_or_is_ancestor ───────────────────────────── + + +class TestGlobPathMatcherAncestor: + + def test_ancestor_of_double_wildcard(self): + m = GlobPathMatcher("root[**]['password']") + assert m.match_or_is_ancestor("root['users']") is True + assert m.match_or_is_ancestor("root") is True + + def test_match_also_returns_true(self): + m = GlobPathMatcher("root[**]['password']") + assert m.match_or_is_ancestor("root['password']") is True + + def test_any_path_is_ancestor_with_double_wildcard(self): + """With ** in the pattern, any intermediate path could lead to a match.""" + m = GlobPathMatcher("root[**]['password']") + assert m.match_or_is_ancestor("root['x']") is True + assert m.match_or_is_ancestor("root['x']['y']['z']") is True + + def test_single_wildcard_ancestor_positive(self): + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match_or_is_ancestor("root['users']") is True + assert m.match_or_is_ancestor("root") is True + + def test_single_wildcard_ancestor_negative(self): + """A path that diverges from a single-wildcard pattern is not an ancestor.""" + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match_or_is_ancestor("root['other']") is False + + +# ── GlobPathMatcher.match_or_is_descendant ─────────────────────────── + + +class TestGlobPathMatcherDescendant: + + def test_descendant_of_match(self): + m = GlobPathMatcher("root[**]['config']") + assert m.match_or_is_descendant("root['config']['value']") is True + assert m.match_or_is_descendant("root['config']['a']['b']") is True + + def test_exact_match(self): + m = GlobPathMatcher("root[**]['config']") + assert m.match_or_is_descendant("root['config']") is True + + def test_not_descendant_or_match(self): + m = GlobPathMatcher("root[**]['secret']") + assert m.match_or_is_descendant("root['config']['db']['host']") is False + + def test_ancestor_is_not_descendant(self): + m = GlobPathMatcher("root['users'][*]['password']") + assert m.match_or_is_descendant("root['users']") is False + + def test_descendant_of_single_wildcard_match(self): + m = GlobPathMatcher("root[*]") + assert m.match_or_is_descendant("root['a']['nested']") is True + + +# ── compile_glob_paths ─────────────────────────────────────────────── + + +class TestCompileGlobPaths: + + def test_none_returns_none(self): + assert compile_glob_paths(None) is None + + def test_empty_returns_none(self): + assert compile_glob_paths([]) is None + + def test_compiles_list(self): + result = compile_glob_paths(["root[*]", "root[**]['x']"]) + assert len(result) == 2 + assert all(isinstance(r, GlobPathMatcher) for r in result) + + +# ── DeepDiff integration: exclude_paths with wildcards ─────────────── + + +class TestDeepDiffExcludeGlob: + + def test_exclude_single_wildcard(self): + t1 = {'users': {'alice': {'name': 'Alice', 'pw': 's1'}, 'bob': {'name': 'Bob', 'pw': 's2'}}} + t2 = {'users': {'alice': {'name': 'Alice', 'pw': 'c1'}, 'bob': {'name': 'Bobby', 'pw': 'c2'}}} + diff = DeepDiff(t1, t2, exclude_paths=["root['users'][*]['pw']"]) + changed = diff.get('values_changed', {}) + assert "root['users']['bob']['name']" in changed + assert "root['users']['alice']['pw']" not in changed + assert "root['users']['bob']['pw']" not in changed + + def test_exclude_double_wildcard(self): + t1 = { + 'config': {'db': {'host': 'localhost', 'secret': 'abc'}, + 'api': {'nested': {'secret': 'xyz'}}}, + 'name': 'app' + } + t2 = { + 'config': {'db': {'host': 'remotehost', 'secret': 'def'}, + 'api': {'nested': {'secret': 'uvw'}}}, + 'name': 'app2' + } + diff = DeepDiff(t1, t2, exclude_paths=["root[**]['secret']"]) + changed = diff.get('values_changed', {}) + assert "root['config']['db']['host']" in changed + assert "root['name']" in changed + assert "root['config']['db']['secret']" not in changed + assert "root['config']['api']['nested']['secret']" not in changed + + def test_exclude_wildcard_with_list(self): + t1 = [{'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}] + t2 = [{'name': 'Alice', 'age': 31}, {'name': 'Bobby', 'age': 26}] + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['age']"]) + changed = diff.get('values_changed', {}) + assert "root[1]['name']" in changed + assert "root[0]['age']" not in changed + assert "root[1]['age']" not in changed + + def test_exclude_mix_exact_and_wildcard(self): + t1 = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': 4}} + t2 = {'a': 10, 'b': 20, 'c': {'d': 30, 'e': 40}} + diff = DeepDiff(t1, t2, exclude_paths=["root['a']", "root['c'][*]"]) + changed = diff.get('values_changed', {}) + assert "root['b']" in changed + assert "root['a']" not in changed + assert "root['c']['d']" not in changed + assert "root['c']['e']" not in changed + + def test_exclude_nested_list_of_dicts(self): + t1 = {'data': [{'id': 1, 'meta': {'ts': 100}}, {'id': 2, 'meta': {'ts': 200}}]} + t2 = {'data': [{'id': 1, 'meta': {'ts': 999}}, {'id': 2, 'meta': {'ts': 888}}]} + diff = DeepDiff(t1, t2, exclude_paths=["root['data'][*]['meta']"]) + assert diff == {} + + def test_exclude_with_type_changes(self): + t1 = {'a': {'x': 1, 'y': 'hello'}} + t2 = {'a': {'x': 'changed_type', 'y': 'world'}} + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['x']"]) + changed = diff.get('values_changed', {}) + assert "root['a']['y']" in changed + assert 'type_changes' not in diff + + +# ── DeepDiff integration: include_paths with wildcards ─────────────── + + +class TestDeepDiffIncludeGlob: + + def test_include_single_wildcard(self): + t1 = {'users': {'alice': {'name': 'Alice', 'pw': 's1'}, 'bob': {'name': 'Bob', 'pw': 's2'}}} + t2 = {'users': {'alice': {'name': 'Alice2', 'pw': 'c1'}, 'bob': {'name': 'Bobby', 'pw': 'c2'}}} + diff = DeepDiff(t1, t2, include_paths=["root['users'][*]['name']"]) + changed = diff.get('values_changed', {}) + assert "root['users']['alice']['name']" in changed + assert "root['users']['bob']['name']" in changed + assert "root['users']['alice']['pw']" not in changed + assert "root['users']['bob']['pw']" not in changed + + def test_include_double_wildcard(self): + t1 = { + 'config': {'db': {'host': 'localhost', 'secret': 'abc'}, + 'api': {'url': 'http://api', 'nested': {'secret': 'xyz'}}}, + 'name': 'app' + } + t2 = { + 'config': {'db': {'host': 'remotehost', 'secret': 'def'}, + 'api': {'url': 'http://api2', 'nested': {'secret': 'uvw'}}}, + 'name': 'app2' + } + diff = DeepDiff(t1, t2, include_paths=["root[**]['secret']"]) + changed = diff.get('values_changed', {}) + assert "root['config']['db']['secret']" in changed + assert "root['config']['api']['nested']['secret']" in changed + assert "root['config']['db']['host']" not in changed + assert "root['config']['api']['url']" not in changed + assert "root['name']" not in changed + + def test_include_mix_exact_and_wildcard(self): + t1 = { + 'config': {'db': {'host': 'localhost', 'secret': 'abc'}}, + 'name': 'app' + } + t2 = { + 'config': {'db': {'host': 'remotehost', 'secret': 'def'}}, + 'name': 'app2' + } + diff = DeepDiff(t1, t2, include_paths=["root[**]['secret']", "root['name']"]) + changed = diff.get('values_changed', {}) + assert "root['config']['db']['secret']" in changed + assert "root['name']" in changed + assert "root['config']['db']['host']" not in changed + + def test_include_wildcard_no_changes(self): + t1 = {'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}} + t2 = {'a': {'x': 1, 'y': 20}, 'b': {'x': 3, 'y': 40}} + diff = DeepDiff(t1, t2, include_paths=["root[*]['x']"]) + assert diff == {} + + def test_include_wildcard_with_added_keys(self): + """When a new key is added, include_paths restricts reporting to matching paths only.""" + t1 = {'a': {'name': 'x'}} + t2 = {'a': {'name': 'y'}, 'b': {'name': 'z'}} + diff = DeepDiff(t1, t2, include_paths=["root[*]['name']"]) + changed = diff.get('values_changed', {}) + assert "root['a']['name']" in changed + # root['b'] addition is not reported because the add is at root['b'], + # not at root[*]['name'] + assert 'dictionary_item_added' not in diff + + def test_include_double_wildcard_with_nested_list(self): + t1 = {'data': [{'scores': [1, 2]}, {'scores': [3, 4]}]} + t2 = {'data': [{'scores': [1, 2]}, {'scores': [3, 5]}]} + diff = DeepDiff(t1, t2, include_paths=["root[**]['scores']"]) + changed = diff.get('values_changed', {}) + assert "root['data'][1]['scores'][1]" in changed + assert len(changed) == 1 + + +# ── Backward compatibility ─────────────────────────────────────────── + + +class TestBackwardCompatibility: + + def test_exact_exclude_paths_unchanged(self): + t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs"]} + t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu"]} + ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) + assert ddiff == {} + + def test_exact_include_paths_unchanged(self): + t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs"]} + t2 = {"for life": "vegan2", "ingredients": ["veggies", "tofu"]} + ddiff = DeepDiff(t1, t2, include_paths={"root['for life']"}) + changed = ddiff.get('values_changed', {}) + assert "root['for life']" in changed + assert len(changed) == 1 + + def test_exclude_regex_paths_unchanged(self): + t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + ddiff = DeepDiff(t1, t2, exclude_regex_paths=[r"root\[\d+\]\['b'\]"]) + assert ddiff == {} + + def test_shorthand_paths_unchanged(self): + t1 = {"for life": "vegan", "ingredients": ["no meat"]} + t2 = {"for life": "vegan", "ingredients": ["veggies"]} + ddiff = DeepDiff(t1, t2, exclude_paths={"ingredients"}) + assert ddiff == {} + + def test_include_paths_with_nested_prefix(self): + """Existing prefix-based include logic must still work.""" + t1 = {"foo": {"bar": {"veg": "potato", "fruit": "apple"}}} + t2 = {"foo": {"bar": {"veg": "potato", "fruit": "peach"}}} + ddiff = DeepDiff(t1, t2, include_paths="root['foo']['bar']") + changed = ddiff.get('values_changed', {}) + assert "root['foo']['bar']['fruit']" in changed + + +# ── DeepSearch integration ─────────────────────────────────────────── + + +class TestDeepSearchGlob: + + def test_exclude_glob_in_search(self): + obj = {'a': {'secret': 'find_me', 'name': 'x'}, 'b': {'secret': 'find_me', 'name': 'y'}} + result = DeepSearch(obj, 'find_me', exclude_paths=["root[*]['secret']"]) + assert result == {} + + def test_exclude_deep_glob_in_search(self): + obj = {'level1': {'level2': {'target': 'needle', 'other': 'needle'}}} + result = DeepSearch(obj, 'needle', exclude_paths=["root[**]['target']"]) + matched = result.get('matched_values', {}) + assert "root['level1']['level2']['other']" in matched + assert "root['level1']['level2']['target']" not in matched + + def test_exclude_glob_via_grep(self): + obj = [{'secret': 'findme', 'name': 'x'}, {'secret': 'findme', 'name': 'y'}] + result = obj | grep('findme', exclude_paths=["root[*]['secret']"]) + assert result == {} + + def test_exclude_deep_glob_in_list_search(self): + obj = [[1, 2, 'target'], [3, 'target', 4]] + result = DeepSearch(obj, 'target', exclude_paths=["root[*][2]"]) + matched = result.get('matched_values', {}) + assert 'root[1][1]' in matched + assert 'root[0][2]' not in matched + + def test_search_with_mixed_exact_and_glob_exclude(self): + obj = {'a': 'val', 'b': {'c': 'val'}, 'd': {'e': {'f': 'val'}}} + result = DeepSearch(obj, 'val', exclude_paths=["root['a']", "root[**]['f']"]) + matched = result.get('matched_values', {}) + assert "root['b']['c']" in matched + assert "root['a']" not in matched + assert "root['d']['e']['f']" not in matched + + +# ── DeepHash integration ───────────────────────────────────────────── + + +class TestDeepHashGlob: + + def test_exclude_exact_makes_hash_equal(self): + t1 = {'name': 'app', 'secret': 'abc'} + t2 = {'name': 'app', 'secret': 'def'} + h1 = DeepHash(t1, exclude_paths=["root['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root['secret']"]) + assert h1[t1] == h2[t2] + + def test_exclude_glob_wildcard_makes_hash_equal(self): + t1 = {'a': {'secret': 'x', 'name': 'n1'}, 'b': {'secret': 'y', 'name': 'n2'}} + t2 = {'a': {'secret': 'X', 'name': 'n1'}, 'b': {'secret': 'Y', 'name': 'n2'}} + h1 = DeepHash(t1, exclude_paths=["root[*]['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root[*]['secret']"]) + assert h1[t1] == h2[t2] + + def test_exclude_deep_glob_makes_hash_equal(self): + t1 = {'a': {'b': {'secret': 1, 'val': 2}}} + t2 = {'a': {'b': {'secret': 99, 'val': 2}}} + h1 = DeepHash(t1, exclude_paths=["root[**]['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root[**]['secret']"]) + assert h1[t1] == h2[t2] + + def test_exclude_glob_hash_not_equal_when_included_part_differs(self): + t1 = {'a': {'secret': 'x', 'name': 'n1'}} + t2 = {'a': {'secret': 'x', 'name': 'DIFFERENT'}} + h1 = DeepHash(t1, exclude_paths=["root[*]['secret']"]) + h2 = DeepHash(t2, exclude_paths=["root[*]['secret']"]) + assert h1[t1] != h2[t2] + + +# ── Edge cases ─────────────────────────────────────────────────────── + + +class TestEdgeCases: + + def test_wildcard_with_ignore_order(self): + t1 = [{'name': 'a', 'pw': '1'}, {'name': 'b', 'pw': '2'}] + t2 = [{'name': 'b', 'pw': '20'}, {'name': 'a', 'pw': '10'}] + diff = DeepDiff(t1, t2, ignore_order=True, exclude_paths=["root[*]['pw']"]) + assert diff == {} + + def test_include_wildcard_with_ignore_order(self): + t1 = [{'name': 'a', 'pw': '1'}, {'name': 'b', 'pw': '2'}] + t2 = [{'name': 'b', 'pw': '20'}, {'name': 'a', 'pw': '10'}] + diff = DeepDiff(t1, t2, ignore_order=True, include_paths=["root[*]['name']"]) + assert diff == {} + + def test_wildcard_with_added_removed_keys(self): + t1 = {'users': {'alice': {'name': 'Alice', 'pw': 'a'}}} + t2 = {'users': {'alice': {'name': 'Alice', 'pw': 'b'}, 'bob': {'name': 'Bob', 'pw': 'c'}}} + diff = DeepDiff(t1, t2, exclude_paths=["root['users'][*]['pw']"]) + added = diff.get('dictionary_item_added', []) + assert any("bob" in str(p) for p in added) + + def test_empty_diff_with_wildcard(self): + t1 = {'a': 1} + t2 = {'a': 1} + diff = DeepDiff(t1, t2, exclude_paths=["root[*]"]) + assert diff == {} + + def test_root_double_wildcard_excludes_everything(self): + t1 = {'a': 1, 'b': {'c': 2}} + t2 = {'a': 10, 'b': {'c': 20}} + diff = DeepDiff(t1, t2, exclude_paths=["root[**]"]) + assert diff == {} + + def test_wildcard_with_custom_object(self): + class Obj: + def __init__(self, name, secret): + self.name = name + self.secret = secret + o1 = Obj('a', 's1') + o2 = Obj('b', 's2') + diff = DeepDiff(o1, o2, exclude_paths=["root.secret"]) + changed = diff.get('values_changed', {}) + assert 'root.name' in changed + assert 'root.secret' not in changed + + def test_exclude_wildcard_with_removed_items(self): + t1 = [{'x': 1, 'y': 2}, {'x': 3, 'y': 4}, {'x': 5, 'y': 6}] + t2 = [{'x': 1, 'y': 2}] + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['y']"]) + removed = diff.get('iterable_item_removed', {}) + assert len(removed) == 2 + + def test_wildcard_verbose_level_2(self): + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + diff = DeepDiff(t1, t2, exclude_paths=["root[*]['x']"], verbose_level=2) + assert diff == {} + + def test_multiple_wildcards_in_one_pattern(self): + t1 = {'a': {'b': {'c': 1}}, 'x': {'y': {'z': 2}}} + t2 = {'a': {'b': {'c': 10}}, 'x': {'y': {'z': 20}}} + diff = DeepDiff(t1, t2, exclude_paths=["root[*][*][*]"]) + assert diff == {} + + def test_wildcard_does_not_affect_identical_objects(self): + t1 = {'a': [1, 2, 3], 'b': {'c': 'd'}} + diff = DeepDiff(t1, t1, exclude_paths=["root[**]"]) + assert diff == {} + + def test_wildcard_as_single_exclude_path_string(self): + """exclude_paths accepts a single string, not just a list.""" + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + diff = DeepDiff(t1, t2, exclude_paths="root[*]['x']") + assert diff == {} + + def test_include_wildcard_as_single_string(self): + """include_paths accepts a single string, not just a list.""" + t1 = {'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}} + t2 = {'a': {'x': 10, 'y': 2}, 'b': {'x': 30, 'y': 4}} + diff = DeepDiff(t1, t2, include_paths="root[*]['x']") + changed = diff.get('values_changed', {}) + assert len(changed) == 2 + assert "root['a']['y']" not in changed + + def test_literal_star_key_not_treated_as_wildcard(self): + """A dict key named '*' should be treated literally, not as a wildcard.""" + t1 = {'*': 1, 'a': 2, 'b': 3} + t2 = {'*': 10, 'a': 20, 'b': 30} + # Exclude only the literal '*' key, not all keys + diff = DeepDiff(t1, t2, exclude_paths=["root['*']"]) + changed = diff.get('values_changed', {}) + # '*' key should be excluded, but 'a' and 'b' should still show changes + assert "root['*']" not in changed + assert "root['a']" in changed + assert "root['b']" in changed + + def test_glob_matcher_literal_star_vs_wildcard(self): + """GlobPathMatcher(root['*']) should only match literal '*' key.""" + matcher = GlobPathMatcher("root['*']") + # Should NOT match arbitrary keys (that's what root[*] is for) + assert not matcher.match("root['hello']") + assert not matcher.match("root['a']") + # Should match the literal '*' key + assert matcher.match("root['*']") + + def test_exclude_takes_precedence_over_include(self): + """When a path matches both include and exclude, exclude should win.""" + t1 = {'x': 1, 'y': 2} + t2 = {'x': 10, 'y': 20} + diff = DeepDiff(t1, t2, include_paths=["root['x']"], exclude_paths=["root['x']"]) + assert diff == {} + + def test_exclude_glob_takes_precedence_over_include_glob(self): + """Exclude glob should take precedence over include glob for same path.""" + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + diff = DeepDiff(t1, t2, include_paths=["root[*]['x']"], exclude_paths=["root['a'][*]"]) + changed = diff.get('values_changed', {}) + assert "root['a']['x']" not in changed + assert "root['b']['x']" in changed + + def test_include_glob_with_custom_operator(self): + """include_glob_paths should filter custom operator reports to only matching paths.""" + from deepdiff.operator import BaseOperator + + class AlwaysReport(BaseOperator): + """Reports on dict-level comparisons, which are ancestors of the glob target.""" + def give_up_diffing(self, level, diff_instance): + diff_instance.custom_report_result( + 'custom_report', level, {'message': 'custom'}) + return True + + t1 = {'a': {'x': 1}, 'b': {'x': 2}} + t2 = {'a': {'x': 10}, 'b': {'x': 20}} + # Operator fires on dict type — so it reports at root['a'] and root['b'] level + op = AlwaysReport(types=[dict]) + diff = DeepDiff(t1, t2, include_paths=["root[*]['x']"], custom_operators=[op]) + custom = diff.get('custom_report', set()) + # root['a'] and root['b'] are ancestors of the glob pattern, not matches + # or descendants — _skip_report_for_include_glob should filter them out + assert "root['a']" not in custom + assert "root['b']" not in custom + + def test_mixed_exact_include_and_glob_include(self): + """When both exact include_paths and glob include_paths are used together, + exact matches should pass through without glob filtering.""" + t1 = {'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}} + t2 = {'a': {'x': 10, 'y': 20}, 'b': {'x': 30, 'y': 40}} + diff = DeepDiff( + t1, t2, + include_paths=["root['a']", "root[*]['x']"], + ) + changed = diff.get('values_changed', {}) + # root['a']['y'] is covered by exact include root['a'] + assert "root['a']['y']" in changed + # root['b']['x'] is covered by glob root[*]['x'] + assert "root['b']['x']" in changed + # root['b']['y'] is NOT covered by either + assert "root['b']['y']" not in changed