diff --git a/deepdiff/base.py b/deepdiff/base.py index 56a70b1c..d3b24fb8 100644 --- a/deepdiff/base.py +++ b/deepdiff/base.py @@ -1,4 +1,4 @@ -from typing import Protocol, Any +from typing import Any from deepdiff.helper import strings, numbers, SetOrdered @@ -6,16 +6,7 @@ TYPE_STABILIZATION_MSG = 'Unable to stabilize the Numpy array {} due to {}. Please set ignore_order=False.' -class BaseProtocol(Protocol): - t1: Any - t2: Any - cutoff_distance_for_pairs: float - use_log_scale: bool - log_scale_similarity_threshold: float - view: str - - -class Base(BaseProtocol): +class Base: numbers = numbers strings = strings diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index dd8080c1..47b900e5 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -8,7 +8,7 @@ from pathlib import Path from enum import Enum from deepdiff.helper import (strings, numbers, times, unprocessed, not_hashed, add_to_frozen_set, - convert_item_or_items_into_set_else_none, get_doc, + convert_item_or_items_into_set_else_none, get_doc, ipranges, convert_item_or_items_into_compiled_regexes_else_none, get_id, type_is_subclass_of_type_group, type_in_type_group, number_to_string, datetime_normalize, KEY_TO_VAL_STR, @@ -142,7 +142,7 @@ class DeepHash(Base): __doc__ = doc def __init__(self, - obj, + obj: Any, *, apply_hash=True, custom_operators: Optional[List[Any]] =None, @@ -484,6 +484,11 @@ def _prep_number(self, obj): number_format_notation=self.number_format_notation) return KEY_TO_VAL_STR.format(type_, obj) + def _prep_ipranges(self, obj): + type_ = 'iprange' + obj = str(obj) + return KEY_TO_VAL_STR.format(type_, obj) + def _prep_datetime(self, obj): type_ = 'datetime' obj = datetime_normalize(self.truncate_datetime, obj, default_timezone=self.default_timezone) @@ -558,6 +563,9 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): elif isinstance(obj, numbers): # type: ignore result = self._prep_number(obj) + elif isinstance(obj, ipranges): + result = self._prep_ipranges(obj) + elif isinstance(obj, MutableMapping): result, counts = self._prep_dict(obj=obj, parent=parent, parents_ids=parents_ids) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 82bc4021..d84ecc7e 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -12,7 +12,7 @@ from enum import Enum from copy import deepcopy from math import isclose as is_close -from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING +from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING, Protocol from collections.abc import Mapping, Iterable, Sequence from collections import defaultdict from inspect import getmembers @@ -27,7 +27,7 @@ np_ndarray, np_floating, get_numpy_ndarray_rows, RepeatedTimer, TEXT_VIEW, TREE_VIEW, DELTA_VIEW, detailed__dict__, add_root_to_paths, np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS, - PydanticBaseModel, Opcode, SetOrdered) + PydanticBaseModel, Opcode, SetOrdered, ipranges) from deepdiff.serialization import SerializationMixin from deepdiff.distance import DistanceMixin, logarithmic_similarity from deepdiff.model import ( @@ -119,7 +119,17 @@ def _report_progress(_stats, progress_logger, duration): ) -class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, Base): +class DeepDiffProtocol(Protocol): + t1: Any + t2: Any + cutoff_distance_for_pairs: float + use_log_scale: bool + log_scale_similarity_threshold: float + view: str + + + +class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, DeepDiffProtocol, Base): __doc__ = doc CACHE_AUTO_ADJUST_THRESHOLD = 0.25 @@ -1501,6 +1511,11 @@ def _diff_numbers(self, level, local_tree=None, report_type_change=True): if t1_s != t2_s: self._report_result('values_changed', level, local_tree=local_tree) + def _diff_ipranges(self, level, local_tree=None): + """Diff IP ranges""" + if str(level.t1) != str(level.t2): + self._report_result('values_changed', level, local_tree=local_tree) + def _diff_datetime(self, level, local_tree=None): """Diff DateTimes""" level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone) @@ -1695,6 +1710,9 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree= elif isinstance(level.t1, datetime.datetime): self._diff_datetime(level, local_tree=local_tree) + elif isinstance(level.t1, ipranges): + self._diff_ipranges(level, local_tree=local_tree) + elif isinstance(level.t1, (datetime.date, datetime.timedelta, datetime.time)): self._diff_time(level, local_tree=local_tree) diff --git a/deepdiff/distance.py b/deepdiff/distance.py index 789fe445..adaf5045 100644 --- a/deepdiff/distance.py +++ b/deepdiff/distance.py @@ -1,6 +1,6 @@ import math import datetime -from deepdiff.base import BaseProtocol +from typing import TYPE_CHECKING, Callable, Protocol, Any from deepdiff.deephash import DeepHash from deepdiff.helper import ( DELTA_VIEW, numbers, strings, add_to_frozen_set, not_found, only_numbers, np, np_float64, time_to_seconds, @@ -8,15 +8,38 @@ CannotCompare) from collections.abc import Mapping, Iterable +if TYPE_CHECKING: + from deepdiff.diff import DeepDiffProtocol -DISTANCE_CALCS_NEEDS_CACHE = "Distance calculation can not happen once the cache is purged. Try with _cache='keep'" + class DistanceProtocol(DeepDiffProtocol, Protocol): + hashes: dict + deephash_parameters: dict + iterable_compare_func: Callable | None + math_epsilon: float + cutoff_distance_for_pairs: float + + def __get_item_rough_length(self, item, parent:str="root") -> float: + ... + def _to_delta_dict( + self, + directed: bool = True, + report_repetition_required: bool = True, + always_include_values: bool = False, + ) -> dict: + ... + def __calculate_item_deephash(self, item: Any) -> None: + ... -class DistanceMixin(BaseProtocol): - def _get_rough_distance(self): +DISTANCE_CALCS_NEEDS_CACHE = "Distance calculation can not happen once the cache is purged. Try with _cache='keep'" + + +class DistanceMixin: + + def _get_rough_distance(self: "DistanceProtocol"): """ Gives a numeric value for the distance of t1 and t2 based on how many operations are needed to convert one to the other. @@ -51,7 +74,7 @@ def _get_rough_distance(self): return diff_length / (t1_len + t2_len) - def __get_item_rough_length(self, item, parent='root'): + def __get_item_rough_length(self: "DistanceProtocol", item, parent='root'): """ Get the rough length of an item. It is used as a part of calculating the rough distance between objects. @@ -69,7 +92,7 @@ def __get_item_rough_length(self, item, parent='root'): length = DeepHash.get_key(self.hashes, key=item, default=None, extract_index=1) return length - def __calculate_item_deephash(self, item): + def __calculate_item_deephash(self: "DistanceProtocol", item: Any) -> None: DeepHash( item, hashes=self.hashes, @@ -79,8 +102,7 @@ def __calculate_item_deephash(self, item): ) def _precalculate_distance_by_custom_compare_func( - self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type): - + self: "DistanceProtocol", hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type): pre_calced_distances = dict_() for added_hash in hashes_added: for removed_hash in hashes_removed: @@ -99,7 +121,7 @@ def _precalculate_distance_by_custom_compare_func( return pre_calced_distances def _precalculate_numpy_arrays_distance( - self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type): + self: "DistanceProtocol", hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type): # We only want to deal with 1D arrays. if isinstance(t2_hashtable[next(iter(hashes_added))].item, (np_ndarray, list)): @@ -203,7 +225,7 @@ def _get_numbers_distance(num1, num2, max_=1, use_log_scale=False, log_scale_sim return 0 if use_log_scale: distance = logarithmic_distance(num1, num2) - if distance < logarithmic_distance: + if distance < 0: return 0 return distance if not isinstance(num1, float): @@ -246,7 +268,7 @@ def numpy_apply_log_keep_sign(array, offset=MATH_LOG_OFFSET): return signed_log_values -def logarithmic_similarity(a: numbers, b: numbers, threshold: float=0.1): +def logarithmic_similarity(a: numbers, b: numbers, threshold: float=0.1) -> float: """ A threshold of 0.1 translates to about 10.5% difference. A threshold of 0.5 translates to about 65% difference. @@ -255,7 +277,7 @@ def logarithmic_similarity(a: numbers, b: numbers, threshold: float=0.1): return logarithmic_distance(a, b) < threshold -def logarithmic_distance(a: numbers, b: numbers): +def logarithmic_distance(a: numbers, b: numbers) -> float: # Apply logarithm to the absolute values and consider the sign a = float(a) b = float(b) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index b1975580..63a4e315 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -8,7 +8,8 @@ import string import time import enum -from typing import NamedTuple, Any, List, Optional, Dict, Union, TYPE_CHECKING +import ipaddress +from typing import NamedTuple, Any, List, Optional, Dict, Union, TYPE_CHECKING, Tuple from ast import literal_eval from decimal import Decimal, localcontext, InvalidOperation as InvalidDecimalOperation from itertools import repeat @@ -184,9 +185,10 @@ def get_semvar_as_integer(version): only_complex_number = (complex,) + numpy_complex_numbers only_numbers = (int, float, complex, Decimal) + numpy_numbers datetimes = (datetime.datetime, datetime.date, datetime.timedelta, datetime.time) +ipranges = (ipaddress.IPv4Interface, ipaddress.IPv6Interface, ipaddress.IPv4Network, ipaddress.IPv6Network) uuids = (uuid.UUID, ) times = (datetime.datetime, datetime.time) -numbers = only_numbers + datetimes +numbers: Tuple = only_numbers + datetimes booleans = (bool, np_bool_) basic_types = strings + numbers + uuids + booleans + (type(None), ) diff --git a/deepdiff/search.py b/deepdiff/search.py index ae86ce09..007c566c 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -5,7 +5,7 @@ import logging from deepdiff.helper import ( - strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE + strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges ) logger = logging.getLogger(__name__) @@ -115,7 +115,7 @@ def __init__(self, matched_values=self.__set_or_dict(), unprocessed=[]) self.use_regexp = use_regexp - if not strict_checking and isinstance(item, numbers): + if not strict_checking and (isinstance(item, numbers) or isinstance(item, ipranges)): item = str(item) if self.use_regexp: try: @@ -312,6 +312,9 @@ def __search(self, obj, item, parent="root", parents_ids=frozenset()): elif isinstance(obj, strings) and isinstance(item, numbers): return + elif isinstance(obj, ipranges): + self.__search_str(str(obj), item, parent) + elif isinstance(obj, numbers): self.__search_numbers(obj, item, parent) diff --git a/deepdiff/serialization.py b/deepdiff/serialization.py index 4a471ed3..c148aadf 100644 --- a/deepdiff/serialization.py +++ b/deepdiff/serialization.py @@ -32,6 +32,7 @@ pydantic_base_model_type, PydanticBaseModel, NotPresent, + ipranges, ) from deepdiff.model import DeltaResult @@ -112,7 +113,8 @@ class UnsupportedFormatErr(TypeError): 'SetOrdered': SetOrdered, 'namedtuple': collections.namedtuple, 'OrderedDict': collections.OrderedDict, - 'Pattern': re.Pattern, + 'Pattern': re.Pattern, + 'iprange': str, } diff --git a/deepdiff/summarize.py b/deepdiff/summarize.py index 06dc69c4..f911b84c 100644 --- a/deepdiff/summarize.py +++ b/deepdiff/summarize.py @@ -22,7 +22,10 @@ def calculate_weights(node): weight = 0 children_weights = {} for k, v in node.items(): - edge_weight = len(k) + try: + edge_weight = len(k) + except TypeError: + edge_weight = 1 child_weight, child_structure = calculate_weights(v) total_weight = edge_weight + child_weight weight += total_weight @@ -133,6 +136,9 @@ def greedy_tree_summarization_balanced(json_data: JSON, max_weight: int, balance def summarize(data: JSON, max_length:int=200, balance_threshold:float=0.6) -> str: - return json_dumps( - greedy_tree_summarization_balanced(data, max_length, balance_threshold) - ) + try: + return json_dumps( + greedy_tree_summarization_balanced(data, max_length, balance_threshold) + ) + except Exception: + return str(data) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 9b426044..10fbdb21 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -2252,3 +2252,9 @@ def test_affected_root_keys_when_dict_empty(self): diff2 = DeepDiff({}, {1:1, 2:2}) assert [] == diff2.affected_root_keys + + def test_range1(self): + range1 = range(0, 10) + range2 = range(0, 8) + diff = DeepDiff(range1, range2) + assert {'iterable_item_removed': {'root[8]': 8, 'root[9]': 9}} == diff diff --git a/tests/test_hash.py b/tests/test_hash.py index 43900c0b..c57afee8 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -4,11 +4,13 @@ import pytz import logging import datetime +import ipaddress +from typing import Union from pathlib import Path from collections import namedtuple from functools import partial from enum import Enum -from deepdiff import DeepHash +from deepdiff import DeepDiff, DeepHash from deepdiff.deephash import ( prepare_string_for_hashing, unprocessed, UNPROCESSED_KEY, BoolObj, HASH_LOOKUP_ERR_MSG, combine_hashes_lists) @@ -999,10 +1001,39 @@ def test_combine_hashes_lists(self, items, prefix, expected): (7, b"First have a cup of potatos. Then \xc3\x28 cup of flour", None, False, UnicodeDecodeError, EXPECTED_MESSAGE3), ]) def test_hash_encodings(self, test_num, item, encodings, ignore_encoding_errors, expected_result, expected_message): - if UnicodeDecodeError == expected_result: + if UnicodeDecodeError == expected_result: # NOQA with pytest.raises(expected_result) as exc_info: DeepHash(item, encodings=encodings, ignore_encoding_errors=ignore_encoding_errors) assert expected_message == str(exc_info.value), f"test_encodings test #{test_num} failed." else: result = DeepHash(item, encodings=encodings, ignore_encoding_errors=ignore_encoding_errors) assert expected_result == result, f"test_encodings test #{test_num} failed." + + def test_ip_addresses(self): + + class ClassWithIp: + """Class containing single data member to demonstrate deepdiff infinite iterate over IPv6Interface""" + + def __init__(self, addr: str): + self.field: Union[ + ipaddress.IPv4Network, + ipaddress.IPv6Network, + ipaddress.IPv4Interface, + ipaddress.IPv6Interface, + ] = ipaddress.IPv6Network(addr) + + + obj1 = ClassWithIp("2002:db8::/30") + obj1_hash = DeepHashPrep(obj1) + repr(obj1_hash) # shouldn't raise error + assert r"objClassWithIp:{str:field:iprange:2002:db8::/30}" == obj1_hash[obj1] + obj2 = ClassWithIp("2001:db8::/32") + diff = DeepDiff(obj1, obj2) + assert { + "values_changed": { + "root.field": { + "new_value": ipaddress.IPv6Network("2001:db8::/32"), + "old_value": ipaddress.IPv6Network("2002:db8::/30"), + } + } + } == diff diff --git a/tests/test_search.py b/tests/test_search.py index b8075c2a..3984349a 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1,8 +1,10 @@ #!/usr/bin/env python import pytest +import ipaddress +import logging +from typing import Union from deepdiff import DeepSearch, grep from datetime import datetime -import logging logging.disable(logging.CRITICAL) item = "somewhere" @@ -20,6 +22,19 @@ def __repr__(self): return self.__str__() +class ClassWithIp: + """Class containing single data member to demonstrate deepdiff infinite iterate over IPv6Interface""" + + def __init__(self, addr: str): + self.field: Union[ + ipaddress.IPv4Network, + ipaddress.IPv6Network, + ipaddress.IPv4Interface, + ipaddress.IPv6Interface, + ] = ipaddress.IPv6Network(addr) + + + class TestDeepSearch: """DeepSearch Tests.""" @@ -501,3 +516,7 @@ def test_grep_regex_in_string_in_tuple(self): item = "some.*" result = {"matched_values": {"root[3]"}} assert obj | grep(item, verbose_level=1, use_regexp=True) == result + + def test_search_ip_addresses(self): + obj1 = [ClassWithIp("2002:db8::/30"), ClassWithIp("2002:db8::/32")] + assert obj1 | grep("2002:db8::/32") == {'matched_values': ['root[1].field']}