From b10ee9ac73637f528978fd412a679d5cebaa34e8 Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Wed, 13 Aug 2025 00:03:30 +0530 Subject: [PATCH 1/5] Detect moved items in ordered iterables --- README.md | 12 ++++++ deepdiff/diff.py | 90 ++++++++++++++++++++++++++++++++--------- docs/basics.rst | 8 ++++ tests/test_diff_text.py | 19 +++++---- 4 files changed, 102 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 5c28ee1..de4f7a5 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,18 @@ Tested on Python 3.9+ and PyPy3. +### Detect moved items in lists + +DeepDiff reports items that only change position in an ordered iterable under +the ``iterable_item_moved`` key: + +```python +>>> from deepdiff import DeepDiff +>>> DeepDiff([1, 2, 3, 4], [4, 2, 3, 1], verbose_level=2) +{'iterable_item_moved': {'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}}} +``` + - **[Documentation](https://zepworks.com/deepdiff/8.6.0/)** ## What is new? diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 43ccd00..eb34247 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -1015,32 +1015,28 @@ def _diff_ordered_iterable_by_difflib( opcodes = seq.get_opcodes() opcodes_with_values = [] + replace_opcodes: List[Opcode] = [] - # TODO: this logic should be revisted so we detect reverse operations - # like when a replacement happens at index X and a reverse replacement happens at index Y - # in those cases we have a "iterable_item_moved" operation. for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcodes: if tag == 'equal': - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - )) + opcodes_with_values.append( + Opcode(tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index) + ) continue - # print('{:7} t1[{}:{}] --> t2[{}:{}] {!r:>8} --> {!r}'.format( - # tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, level.t1[t1_from_index:t1_to_index], level.t2[t2_from_index:t2_to_index])) - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - old_values = level.t1[t1_from_index: t1_to_index], - new_values = level.t2[t2_from_index: t2_to_index], - )) + opcode = Opcode( + tag, + t1_from_index, + t1_to_index, + t2_from_index, + t2_to_index, + old_values=level.t1[t1_from_index:t1_to_index], + new_values=level.t2[t2_from_index:t2_to_index], + ) + opcodes_with_values.append(opcode) if tag == 'replace': - self._diff_by_forming_pairs_and_comparing_one_by_one( - level, local_tree=local_tree, parents_ids=parents_ids, - _original_type=_original_type, child_relationship_class=child_relationship_class, - t1_from_index=t1_from_index, t1_to_index=t1_to_index, - t2_from_index=t2_from_index, t2_to_index=t2_to_index, - ) + replace_opcodes.append(opcode) elif tag == 'delete': for index, x in enumerate(level.t1[t1_from_index:t1_to_index]): change_level = level.branch_deeper( @@ -1061,6 +1057,62 @@ def _diff_ordered_iterable_by_difflib( child_relationship_param2=index + t2_from_index, ) self._report_result('iterable_item_added', change_level, local_tree=local_tree) + + used: Set[int] = set() + for i, opcode_a in enumerate(replace_opcodes): + if i in used: + continue + for j in range(i + 1, len(replace_opcodes)): + opcode_b = replace_opcodes[j] + if j in used: + continue + if ( + opcode_a.old_values == opcode_b.new_values + and opcode_a.new_values == opcode_b.old_values + and len(opcode_a.old_values or []) == len(opcode_b.old_values or []) + ): + length = len(opcode_a.old_values or []) + for offset in range(length): + val_a = opcode_a.old_values[offset] + new_index_a = opcode_b.t2_from_index + offset + change_level = level.branch_deeper( + val_a, + val_a, + child_relationship_class=child_relationship_class, + child_relationship_param=opcode_a.t1_from_index + offset, + child_relationship_param2=new_index_a, + ) + self._report_result('iterable_item_moved', change_level, local_tree=local_tree) + + val_b = opcode_b.old_values[offset] + new_index_b = opcode_a.t2_from_index + offset + change_level = level.branch_deeper( + val_b, + val_b, + child_relationship_class=child_relationship_class, + child_relationship_param=opcode_b.t1_from_index + offset, + child_relationship_param2=new_index_b, + ) + self._report_result('iterable_item_moved', change_level, local_tree=local_tree) + + used.update({i, j}) + break + + for idx, opcode in enumerate(replace_opcodes): + if idx in used: + continue + self._diff_by_forming_pairs_and_comparing_one_by_one( + level, + local_tree=local_tree, + parents_ids=parents_ids, + _original_type=_original_type, + child_relationship_class=child_relationship_class, + t1_from_index=opcode.t1_from_index, + t1_to_index=opcode.t1_to_index, + t2_from_index=opcode.t2_from_index, + t2_to_index=opcode.t2_to_index, + ) + return opcodes_with_values diff --git a/docs/basics.rst b/docs/basics.rst index c944d28..0897d0e 100644 --- a/docs/basics.rst +++ b/docs/basics.rst @@ -106,6 +106,14 @@ List difference >>> pprint (ddiff, indent = 2) {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} +List item moved + >>> t1 = [1, 2, 3, 4] + >>> t2 = [4, 2, 3, 1] + >>> pprint(DeepDiff(t1, t2, verbose_level=2), indent=2) + { 'iterable_item_moved': { + 'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}}} + List that contains dictionary: >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index fb0087b..7eb9a0a 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1819,14 +1819,17 @@ def test_list_item_removed_from_the_middle(self): assert {"root[4]"} == diff.affected_paths assert {4} == diff.affected_root_keys - # TODO: we need to support reporting that items have been swapped - # def test_item_moved(self): - # # currently all the items in the list need to be hashables - # t1 = [1, 2, 3, 4] - # t2 = [4, 2, 3, 1] - # diff = DeepDiff(t1, t2) - # result = {} # it should show that those items are swapped. - # assert result == diff + def test_item_moved(self): + t1 = [1, 2, 3, 4] + t2 = [4, 2, 3, 1] + diff = DeepDiff(t1, t2, verbose_level=2) + result = { + 'iterable_item_moved': { + 'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}, + } + } + assert result == diff def test_list_item_values_replace_in_the_middle(self): t1 = [0, 1, 2, 3, 'bye', 5, 6, 7, 8, 'a', 'b', 'c'] From 34e22af1744a5c06ed74783b492b6e1d247a03af Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Wed, 13 Aug 2025 00:18:00 +0530 Subject: [PATCH 2/5] Preserve moved-item diffs over standard replacements --- README.md | 12 ++++++ deepdiff/diff.py | 95 ++++++++++++++++++++++++++++++++--------- docs/basics.rst | 8 ++++ tests/test_diff_text.py | 19 +++++---- 4 files changed, 106 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 5c28ee1..de4f7a5 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,18 @@ Tested on Python 3.9+ and PyPy3. +### Detect moved items in lists + +DeepDiff reports items that only change position in an ordered iterable under +the ``iterable_item_moved`` key: + +```python +>>> from deepdiff import DeepDiff +>>> DeepDiff([1, 2, 3, 4], [4, 2, 3, 1], verbose_level=2) +{'iterable_item_moved': {'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}}} +``` + - **[Documentation](https://zepworks.com/deepdiff/8.6.0/)** ## What is new? diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 43ccd00..6347ece 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -896,8 +896,9 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type child_relationship_class=child_relationship_class, local_tree=local_tree_pass, ) + has_moves = bool(local_tree_pass['iterable_item_moved']) # Sometimes DeepDiff's old iterable diff does a better job than DeepDiff - if len(local_tree_pass) > 1: + if len(local_tree_pass) > 1 and not has_moves: local_tree_pass2 = TreeResult() self._diff_by_forming_pairs_and_comparing_one_by_one( level, @@ -910,6 +911,8 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type local_tree_pass = local_tree_pass2 else: self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values + else: + self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values for report_type, levels in local_tree_pass.items(): if levels: self.tree[report_type] |= levels @@ -1015,32 +1018,28 @@ def _diff_ordered_iterable_by_difflib( opcodes = seq.get_opcodes() opcodes_with_values = [] + replace_opcodes: List[Opcode] = [] - # TODO: this logic should be revisted so we detect reverse operations - # like when a replacement happens at index X and a reverse replacement happens at index Y - # in those cases we have a "iterable_item_moved" operation. for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcodes: if tag == 'equal': - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - )) + opcodes_with_values.append( + Opcode(tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index) + ) continue - # print('{:7} t1[{}:{}] --> t2[{}:{}] {!r:>8} --> {!r}'.format( - # tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, level.t1[t1_from_index:t1_to_index], level.t2[t2_from_index:t2_to_index])) - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - old_values = level.t1[t1_from_index: t1_to_index], - new_values = level.t2[t2_from_index: t2_to_index], - )) + opcode = Opcode( + tag, + t1_from_index, + t1_to_index, + t2_from_index, + t2_to_index, + old_values=level.t1[t1_from_index:t1_to_index], + new_values=level.t2[t2_from_index:t2_to_index], + ) + opcodes_with_values.append(opcode) if tag == 'replace': - self._diff_by_forming_pairs_and_comparing_one_by_one( - level, local_tree=local_tree, parents_ids=parents_ids, - _original_type=_original_type, child_relationship_class=child_relationship_class, - t1_from_index=t1_from_index, t1_to_index=t1_to_index, - t2_from_index=t2_from_index, t2_to_index=t2_to_index, - ) + replace_opcodes.append(opcode) elif tag == 'delete': for index, x in enumerate(level.t1[t1_from_index:t1_to_index]): change_level = level.branch_deeper( @@ -1061,6 +1060,62 @@ def _diff_ordered_iterable_by_difflib( child_relationship_param2=index + t2_from_index, ) self._report_result('iterable_item_added', change_level, local_tree=local_tree) + + used: Set[int] = set() + for i, opcode_a in enumerate(replace_opcodes): + if i in used: + continue + for j in range(i + 1, len(replace_opcodes)): + opcode_b = replace_opcodes[j] + if j in used: + continue + if ( + opcode_a.old_values == opcode_b.new_values + and opcode_a.new_values == opcode_b.old_values + and len(opcode_a.old_values or []) == len(opcode_b.old_values or []) + ): + length = len(opcode_a.old_values or []) + for offset in range(length): + val_a = opcode_a.old_values[offset] + new_index_a = opcode_b.t2_from_index + offset + change_level = level.branch_deeper( + val_a, + val_a, + child_relationship_class=child_relationship_class, + child_relationship_param=opcode_a.t1_from_index + offset, + child_relationship_param2=new_index_a, + ) + self._report_result('iterable_item_moved', change_level, local_tree=local_tree) + + val_b = opcode_b.old_values[offset] + new_index_b = opcode_a.t2_from_index + offset + change_level = level.branch_deeper( + val_b, + val_b, + child_relationship_class=child_relationship_class, + child_relationship_param=opcode_b.t1_from_index + offset, + child_relationship_param2=new_index_b, + ) + self._report_result('iterable_item_moved', change_level, local_tree=local_tree) + + used.update({i, j}) + break + + for idx, opcode in enumerate(replace_opcodes): + if idx in used: + continue + self._diff_by_forming_pairs_and_comparing_one_by_one( + level, + local_tree=local_tree, + parents_ids=parents_ids, + _original_type=_original_type, + child_relationship_class=child_relationship_class, + t1_from_index=opcode.t1_from_index, + t1_to_index=opcode.t1_to_index, + t2_from_index=opcode.t2_from_index, + t2_to_index=opcode.t2_to_index, + ) + return opcodes_with_values diff --git a/docs/basics.rst b/docs/basics.rst index c944d28..0897d0e 100644 --- a/docs/basics.rst +++ b/docs/basics.rst @@ -106,6 +106,14 @@ List difference >>> pprint (ddiff, indent = 2) {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} +List item moved + >>> t1 = [1, 2, 3, 4] + >>> t2 = [4, 2, 3, 1] + >>> pprint(DeepDiff(t1, t2, verbose_level=2), indent=2) + { 'iterable_item_moved': { + 'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}}} + List that contains dictionary: >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index fb0087b..7eb9a0a 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1819,14 +1819,17 @@ def test_list_item_removed_from_the_middle(self): assert {"root[4]"} == diff.affected_paths assert {4} == diff.affected_root_keys - # TODO: we need to support reporting that items have been swapped - # def test_item_moved(self): - # # currently all the items in the list need to be hashables - # t1 = [1, 2, 3, 4] - # t2 = [4, 2, 3, 1] - # diff = DeepDiff(t1, t2) - # result = {} # it should show that those items are swapped. - # assert result == diff + def test_item_moved(self): + t1 = [1, 2, 3, 4] + t2 = [4, 2, 3, 1] + diff = DeepDiff(t1, t2, verbose_level=2) + result = { + 'iterable_item_moved': { + 'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}, + } + } + assert result == diff def test_list_item_values_replace_in_the_middle(self): t1 = [0, 1, 2, 3, 'bye', 5, 6, 7, 8, 'a', 'b', 'c'] From 843a040fd7165b5adba7c8c45c42c8fadc5c36c5 Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Tue, 12 Aug 2025 18:53:47 +0000 Subject: [PATCH 3/5] Revert "Merge pull request #2 from akshat62/tucdyf-codex/update-diff-behavior-for-item-movements" This reverts commit ff5ddace669ea28e23246cab42368df6f6634f2f, reversing changes made to 606847081461af87760b106647a0e7630bf014c9. --- deepdiff/diff.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 6347ece..eb34247 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -896,9 +896,8 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type child_relationship_class=child_relationship_class, local_tree=local_tree_pass, ) - has_moves = bool(local_tree_pass['iterable_item_moved']) # Sometimes DeepDiff's old iterable diff does a better job than DeepDiff - if len(local_tree_pass) > 1 and not has_moves: + if len(local_tree_pass) > 1: local_tree_pass2 = TreeResult() self._diff_by_forming_pairs_and_comparing_one_by_one( level, @@ -911,8 +910,6 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type local_tree_pass = local_tree_pass2 else: self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values - else: - self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values for report_type, levels in local_tree_pass.items(): if levels: self.tree[report_type] |= levels From 9f11980972bd4879f390f9b6676ffe7651756551 Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Tue, 12 Aug 2025 19:05:32 +0000 Subject: [PATCH 4/5] Fixing test cases --- tests/test_diff_text.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 7eb9a0a..fddc69f 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1824,9 +1824,9 @@ def test_item_moved(self): t2 = [4, 2, 3, 1] diff = DeepDiff(t1, t2, verbose_level=2) result = { - 'iterable_item_moved': { - 'root[0]': {'new_path': 'root[3]', 'value': 1}, - 'root[3]': {'new_path': 'root[0]', 'value': 4}, + 'values_changed': { + 'root[0]': {'new_value': 4, 'old_value': 1}, + 'root[3]': {'new_value': 1, 'old_value': 4}, } } assert result == diff From 9c0352c25082e27b24679136358a109d8d4ea14a Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Tue, 12 Aug 2025 19:34:26 +0000 Subject: [PATCH 5/5] Fix --- deepdiff/diff.py | 5 +++-- tests/test_diff_text.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index eb34247..994b7be 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -906,7 +906,7 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type child_relationship_class=child_relationship_class, local_tree=local_tree_pass2, ) - if len(local_tree_pass) >= len(local_tree_pass2): + if len(local_tree_pass) > len(local_tree_pass2): local_tree_pass = local_tree_pass2 else: self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values @@ -1071,6 +1071,8 @@ def _diff_ordered_iterable_by_difflib( and opcode_a.new_values == opcode_b.old_values and len(opcode_a.old_values or []) == len(opcode_b.old_values or []) ): + # Mark both opcodes as consumed before any further processing + used.update({i, j}) length = len(opcode_a.old_values or []) for offset in range(length): val_a = opcode_a.old_values[offset] @@ -1095,7 +1097,6 @@ def _diff_ordered_iterable_by_difflib( ) self._report_result('iterable_item_moved', change_level, local_tree=local_tree) - used.update({i, j}) break for idx, opcode in enumerate(replace_opcodes): diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index fddc69f..7eb9a0a 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1824,9 +1824,9 @@ def test_item_moved(self): t2 = [4, 2, 3, 1] diff = DeepDiff(t1, t2, verbose_level=2) result = { - 'values_changed': { - 'root[0]': {'new_value': 4, 'old_value': 1}, - 'root[3]': {'new_value': 1, 'old_value': 4}, + 'iterable_item_moved': { + 'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}, } } assert result == diff