From 93833e36a7542632e019d2de16876d7b83957779 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Sep 2025 17:17:14 -0700 Subject: [PATCH 01/17] Fix drop_conflicts to handle non-bool returns from __eq__ When merging attrs with drop_conflicts, objects whose __eq__ returns non-bool values (e.g., numpy arrays) would raise ValueError. Now these are caught and treated as non-equivalent, causing the attribute to be dropped rather than raising an error. Fixes regression from #10726 --- xarray/structure/merge.py | 23 +++++-- xarray/tests/test_merge.py | 125 +++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 5 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 58168ddb024..6024be4c607 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -641,11 +641,24 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): if key not in result and key not in dropped_keys } ) - result = { - key: value - for key, value in result.items() - if key not in attrs or equivalent(attrs[key], value) - } + # Filter out attributes that have conflicts + filtered_result = {} + for key, value in result.items(): + if key not in attrs: + # No conflict, keep the attribute + filtered_result[key] = value + else: + # Check if values are equivalent + try: + if equivalent(attrs[key], value): + # Values are equivalent, keep the attribute + filtered_result[key] = value + # else: Values differ, drop the attribute (don't add to filtered_result) + except ValueError: + # Likely an ambiguous truth value from numpy array comparison + # Treat as non-equivalent and drop the attribute + pass + result = filtered_result dropped_keys |= {key for key in attrs if key not in result} return result elif combine_attrs == "identical": diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 8ae05fbb261..e6e4b85afae 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -235,6 +235,131 @@ def test_merge_attrs_drop_conflicts(self): expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0}) assert_identical(actual, expected) + def test_merge_attrs_drop_conflicts_non_bool_eq(self): + """Test drop_conflicts behavior when __eq__ returns non-bool values. + + When comparing attribute values, the _equivalent_drop_conflicts() function + uses == which can return non-bool values. The new behavior treats ambiguous + or falsy equality results as non-equivalent, dropping the attribute rather + than raising an error. + """ + import numpy as np + + # Test with numpy arrays (which return arrays from ==) + arr1 = np.array([1, 2, 3]) + arr2 = np.array([1, 2, 3]) + arr3 = np.array([4, 5, 6]) + + ds1 = xr.Dataset(attrs={"arr": arr1, "scalar": 1}) + ds2 = xr.Dataset(attrs={"arr": arr2, "scalar": 1}) # Same array values + ds3 = xr.Dataset(attrs={"arr": arr3, "other": 2}) # Different array values + + # Arrays are considered equivalent if they have the same values + actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts") + assert "arr" in actual.attrs # Should keep the array since they're equivalent + assert actual.attrs["scalar"] == 1 + + # Different arrays cause the attribute to be dropped + actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts") + assert "arr" not in actual.attrs # Should drop due to conflict + assert "other" in actual.attrs + + # Test with custom objects that return non-bool from __eq__ + class CustomEq: + """Object whose __eq__ returns a non-bool value.""" + + def __init__(self, value): + self.value = value + + def __eq__(self, other): + if not isinstance(other, CustomEq): + return False + # Return a numpy array (truthy if all elements are non-zero) + return np.array([self.value == other.value]) + + def __repr__(self): + return f"CustomEq({self.value})" + + obj1 = CustomEq(42) + obj2 = CustomEq(42) # Same value + obj3 = CustomEq(99) # Different value + + ds4 = xr.Dataset(attrs={"custom": obj1, "x": 1}) + ds5 = xr.Dataset(attrs={"custom": obj2, "x": 1}) + ds6 = xr.Dataset(attrs={"custom": obj3, "y": 2}) + + # Objects with same value (returning truthy array [True]) + actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts") + assert "custom" in actual.attrs + assert actual.attrs["x"] == 1 + + # Objects with different values (returning falsy array [False]) + actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") + assert "custom" not in actual.attrs # Dropped due to conflict + assert actual.attrs["x"] == 1 + assert actual.attrs["y"] == 2 + + # Test edge case: object whose __eq__ returns empty array (ambiguous truth value) + class EmptyArrayEq: + def __eq__(self, other): + if not isinstance(other, EmptyArrayEq): + return False + return np.array([]) # Empty array has ambiguous truth value + + def __repr__(self): + return "EmptyArrayEq()" + + empty_obj1 = EmptyArrayEq() + empty_obj2 = EmptyArrayEq() + + ds7 = xr.Dataset(attrs={"empty": empty_obj1}) + ds8 = xr.Dataset(attrs={"empty": empty_obj2}) + + # With new behavior: ambiguous truth values are treated as non-equivalent + # So the attribute is dropped instead of raising an error + actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts") + assert "empty" not in actual.attrs # Dropped due to ambiguous comparison + + # Test with object that returns multi-element array (also ambiguous) + class MultiArrayEq: + def __eq__(self, other): + if not isinstance(other, MultiArrayEq): + return False + return np.array([True, False]) # Multi-element array is ambiguous + + def __repr__(self): + return "MultiArrayEq()" + + multi_obj1 = MultiArrayEq() + multi_obj2 = MultiArrayEq() + + ds9 = xr.Dataset(attrs={"multi": multi_obj1}) + ds10 = xr.Dataset(attrs={"multi": multi_obj2}) + + # With new behavior: ambiguous arrays are treated as non-equivalent + actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts") + assert "multi" not in actual.attrs # Dropped due to ambiguous comparison + + # Test with all-True multi-element array (unambiguous truthy) + class AllTrueArrayEq: + def __eq__(self, other): + if not isinstance(other, AllTrueArrayEq): + return False + return np.array([True, True, True]) # All True, but still multi-element + + def __repr__(self): + return "AllTrueArrayEq()" + + alltrue1 = AllTrueArrayEq() + alltrue2 = AllTrueArrayEq() + + ds11 = xr.Dataset(attrs={"alltrue": alltrue1}) + ds12 = xr.Dataset(attrs={"alltrue": alltrue2}) + + # Multi-element arrays are ambiguous even if all True + actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts") + assert "alltrue" not in actual.attrs # Dropped due to ambiguous comparison + def test_merge_attrs_no_conflicts_compat_minimal(self): """make sure compat="minimal" does not silence errors""" ds1 = xr.Dataset({"a": ("x", [], {"a": 0})}) From ed18a1358affd9eb68e9ef638a40e547a37ae313 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Sep 2025 17:18:22 -0700 Subject: [PATCH 02/17] Add changelog entry --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index af47a096697..19e36dc1cc9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,9 @@ Bug fixes `_. - Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`). By `Justus Magin `_. +- Fix ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return + non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`). + By `Maximilian Roos `_. Documentation ~~~~~~~~~~~~~ From 0fad4c8200617c8b18a63941d96f2185dc3916b2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Sep 2025 19:29:27 -0700 Subject: [PATCH 03/17] Suppress DeprecationWarning for ambiguous array truth values The CI was failing because NumPy raises a DeprecationWarning before ValueError when evaluating ambiguous truth values. Since we're properly handling the ValueError, we suppress the warning in this specific context. --- xarray/structure/merge.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 6024be4c607..c8e79793081 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -650,10 +650,16 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): else: # Check if values are equivalent try: - if equivalent(attrs[key], value): - # Values are equivalent, keep the attribute - filtered_result[key] = value - # else: Values differ, drop the attribute (don't add to filtered_result) + import warnings + + # Suppress DeprecationWarning about ambiguous truth values + # since we handle the resulting ValueError appropriately + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + if equivalent(attrs[key], value): + # Values are equivalent, keep the attribute + filtered_result[key] = value + # else: Values differ, drop the attribute (don't add to filtered_result) except ValueError: # Likely an ambiguous truth value from numpy array comparison # Treat as non-equivalent and drop the attribute From 7adaa40d0761abd007e26d7df5a93542b832962f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Sep 2025 02:29:52 +0000 Subject: [PATCH 04/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/structure/merge.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index c8e79793081..6c2d9fe6bf4 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -655,7 +655,9 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): # Suppress DeprecationWarning about ambiguous truth values # since we handle the resulting ValueError appropriately with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) + warnings.filterwarnings( + "ignore", category=DeprecationWarning + ) if equivalent(attrs[key], value): # Values are equivalent, keep the attribute filtered_result[key] = value From b87933f39c64239953061b3922f29309908ced16 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Sep 2025 19:45:27 -0700 Subject: [PATCH 05/17] Move DeprecationWarning suppression to equivalent function Rather than suppressing the warning in merge_attrs, handle it at the source in equivalent() where the 'or' operation happens. This is cleaner and includes a clear comment about when the suppression can be removed (when minimum numpy version >= 2.0). The warning only occurs in numpy < 2.0; numpy 2.0+ raises ValueError directly, which we already handle properly in merge_attrs. --- xarray/core/utils.py | 9 ++++++++- xarray/structure/merge.py | 16 ++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index e490fc05c2f..66441b7718e 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -249,7 +249,14 @@ def equivalent(first: T, second: T) -> bool: return duck_array_ops.array_equiv(first, second) if isinstance(first, list) or isinstance(second, list): return list_equiv(first, second) # type: ignore[arg-type] - return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] + + # Suppress DeprecationWarning about ambiguous truth values from numpy < 2.0 + # In numpy 2.0+, this will raise ValueError directly, which we handle in callers + # Can remove this suppression when minimum numpy version >= 2.0 + import warnings + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 6c2d9fe6bf4..6024be4c607 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -650,18 +650,10 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): else: # Check if values are equivalent try: - import warnings - - # Suppress DeprecationWarning about ambiguous truth values - # since we handle the resulting ValueError appropriately - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", category=DeprecationWarning - ) - if equivalent(attrs[key], value): - # Values are equivalent, keep the attribute - filtered_result[key] = value - # else: Values differ, drop the attribute (don't add to filtered_result) + if equivalent(attrs[key], value): + # Values are equivalent, keep the attribute + filtered_result[key] = value + # else: Values differ, drop the attribute (don't add to filtered_result) except ValueError: # Likely an ambiguous truth value from numpy array comparison # Treat as non-equivalent and drop the attribute From 116a8f09bdfa78314e1b3e358a971f04d7267d5f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Sep 2025 02:45:52 +0000 Subject: [PATCH 06/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 66441b7718e..0c5334104bb 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -254,6 +254,7 @@ def equivalent(first: T, second: T) -> bool: # In numpy 2.0+, this will raise ValueError directly, which we handle in callers # Can remove this suppression when minimum numpy version >= 2.0 import warnings + with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] From b1cba12de9e2870b5e115f8321b8b9883203db85 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Sep 2025 20:40:24 -0700 Subject: [PATCH 07/17] Move DeprecationWarning suppression from library to test The warning only occurs in our specific test case with custom objects that return numpy arrays from __eq__. This is a very edge case scenario, so it's more appropriate to suppress the warning in the test rather than in the library code. --- xarray/core/utils.py | 9 +-------- xarray/tests/test_merge.py | 36 +++++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 0c5334104bb..d969fa8ea8c 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -250,14 +250,7 @@ def equivalent(first: T, second: T) -> bool: if isinstance(first, list) or isinstance(second, list): return list_equiv(first, second) # type: ignore[arg-type] - # Suppress DeprecationWarning about ambiguous truth values from numpy < 2.0 - # In numpy 2.0+, this will raise ValueError directly, which we handle in callers - # Can remove this suppression when minimum numpy version >= 2.0 - import warnings - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] + return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index e6e4b85afae..efdee93a7ca 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -244,6 +244,7 @@ def test_merge_attrs_drop_conflicts_non_bool_eq(self): than raising an error. """ import numpy as np + import warnings # Test with numpy arrays (which return arrays from ==) arr1 = np.array([1, 2, 3]) @@ -288,16 +289,21 @@ def __repr__(self): ds5 = xr.Dataset(attrs={"custom": obj2, "x": 1}) ds6 = xr.Dataset(attrs={"custom": obj3, "y": 2}) - # Objects with same value (returning truthy array [True]) - actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts") - assert "custom" in actual.attrs - assert actual.attrs["x"] == 1 + # Suppress DeprecationWarning from numpy < 2.0 about ambiguous truth values + # when our custom __eq__ returns arrays that are evaluated in boolean context + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) - # Objects with different values (returning falsy array [False]) - actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") - assert "custom" not in actual.attrs # Dropped due to conflict - assert actual.attrs["x"] == 1 - assert actual.attrs["y"] == 2 + # Objects with same value (returning truthy array [True]) + actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts") + assert "custom" in actual.attrs + assert actual.attrs["x"] == 1 + + # Objects with different values (returning falsy array [False]) + actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") + assert "custom" not in actual.attrs # Dropped due to conflict + assert actual.attrs["x"] == 1 + assert actual.attrs["y"] == 2 # Test edge case: object whose __eq__ returns empty array (ambiguous truth value) class EmptyArrayEq: @@ -317,8 +323,10 @@ def __repr__(self): # With new behavior: ambiguous truth values are treated as non-equivalent # So the attribute is dropped instead of raising an error - actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts") - assert "empty" not in actual.attrs # Dropped due to ambiguous comparison + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts") + assert "empty" not in actual.attrs # Dropped due to ambiguous comparison # Test with object that returns multi-element array (also ambiguous) class MultiArrayEq: @@ -337,8 +345,10 @@ def __repr__(self): ds10 = xr.Dataset(attrs={"multi": multi_obj2}) # With new behavior: ambiguous arrays are treated as non-equivalent - actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts") - assert "multi" not in actual.attrs # Dropped due to ambiguous comparison + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts") + assert "multi" not in actual.attrs # Dropped due to ambiguous comparison # Test with all-True multi-element array (unambiguous truthy) class AllTrueArrayEq: From be07f64824bbbf4e919ce379540c297937dd6174 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Sep 2025 03:40:49 +0000 Subject: [PATCH 08/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_merge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index efdee93a7ca..066d64d2a6d 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -243,9 +243,10 @@ def test_merge_attrs_drop_conflicts_non_bool_eq(self): or falsy equality results as non-equivalent, dropping the attribute rather than raising an error. """ - import numpy as np import warnings + import numpy as np + # Test with numpy arrays (which return arrays from ==) arr1 = np.array([1, 2, 3]) arr2 = np.array([1, 2, 3]) From ce92f94faff5e03fdf3367ee6f7e657f851e4647 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Sep 2025 20:53:02 -0700 Subject: [PATCH 09/17] Simplify drop_conflicts implementation Based on code review, simplified the logic to: - Process attributes in a single loop instead of nested operations - Eliminate intermediate dictionary creation - Make the control flow clearer and more efficient --- xarray/structure/merge.py | 42 ++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 6024be4c607..1a142010f62 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -633,33 +633,29 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): elif combine_attrs == "drop_conflicts": result = {} dropped_keys = set() + for attrs in variable_attrs: - result.update( - { - key: value - for key, value in attrs.items() - if key not in result and key not in dropped_keys - } - ) - # Filter out attributes that have conflicts - filtered_result = {} - for key, value in result.items(): - if key not in attrs: - # No conflict, keep the attribute - filtered_result[key] = value + # Process each attribute in the current attrs dict + for key, value in attrs.items(): + if key in dropped_keys: + continue # Already marked as conflicted + + if key not in result: + # New key, add it + result[key] = value else: - # Check if values are equivalent + # Existing key, check for conflict try: - if equivalent(attrs[key], value): - # Values are equivalent, keep the attribute - filtered_result[key] = value - # else: Values differ, drop the attribute (don't add to filtered_result) + if not equivalent(result[key], value): + # Values are different, drop the key + result.pop(key, None) + dropped_keys.add(key) except ValueError: - # Likely an ambiguous truth value from numpy array comparison - # Treat as non-equivalent and drop the attribute - pass - result = filtered_result - dropped_keys |= {key for key in attrs if key not in result} + # equivalent() failed (likely ambiguous truth value) + # Treat as conflict and drop + result.pop(key, None) + dropped_keys.add(key) + return result elif combine_attrs == "identical": result = dict(variable_attrs[0]) From c259f93369bad3ac59929598dc7fd06a0b4c0a1b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 14 Sep 2025 08:08:18 -0700 Subject: [PATCH 10/17] Update whats-new.rst Co-authored-by: Justus Magin --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 19e36dc1cc9..235544a7536 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,7 +36,7 @@ Bug fixes `_. - Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`). By `Justus Magin `_. -- Fix ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return +- Allow ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`). By `Maximilian Roos `_. From 2b85c4347943ca89494a8206218bb6fb9cbcfab9 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 14 Sep 2025 08:08:29 -0700 Subject: [PATCH 11/17] Update whats-new.rst Co-authored-by: Justus Magin --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 235544a7536..b034d37746b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,7 +34,7 @@ Bug fixes - Fix error when encoding an empty :py:class:`numpy.datetime64` array (:issue:`10722`, :pull:`10723`). By `Spencer Clark `_. -- Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`). +- Propagate coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`). By `Justus Magin `_. - Allow ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`). From b7c05aa2ffae04fc572dde7ab7458d403958fd83 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 14 Sep 2025 17:18:56 -0700 Subject: [PATCH 12/17] Implement equivalent_attrs function as suggested by @keewis - Create dedicated equivalent_attrs function that wraps equivalent() - Returns False when ValueError is raised (e.g., for numpy arrays) - Simplifies merge_attrs logic by encapsulating error handling - Makes future behavior changes easier --- xarray/structure/merge.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 1a142010f62..3f51bc2f04c 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -607,6 +607,17 @@ def merge_coords( return variables, out_indexes +def equivalent_attrs(a: Any, b: Any) -> bool: + """Check if two attribute values are equivalent. + + Returns False if the comparison raises ValueError (e.g., for numpy arrays). + """ + try: + return equivalent(a, b) + except ValueError: + return False + + def merge_attrs(variable_attrs, combine_attrs, context=None): """Combine attributes from different variables according to combine_attrs""" if not variable_attrs: @@ -635,26 +646,15 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): dropped_keys = set() for attrs in variable_attrs: - # Process each attribute in the current attrs dict for key, value in attrs.items(): if key in dropped_keys: - continue # Already marked as conflicted + continue if key not in result: - # New key, add it result[key] = value - else: - # Existing key, check for conflict - try: - if not equivalent(result[key], value): - # Values are different, drop the key - result.pop(key, None) - dropped_keys.add(key) - except ValueError: - # equivalent() failed (likely ambiguous truth value) - # Treat as conflict and drop - result.pop(key, None) - dropped_keys.add(key) + elif not equivalent_attrs(result[key], value): + result.pop(key, None) + dropped_keys.add(key) return result elif combine_attrs == "identical": From d4c55229b624b47c67f77c0519acad151fb0fa0f Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 15 Sep 2025 07:48:58 -0700 Subject: [PATCH 13/17] Add pathological test cases suggested by @shoyer - Test NumPy object arrays with nested arrays - Test xarray.Dataset objects as attributes - Test Pandas Series as attributes - Update equivalent_attrs to catch TypeError in addition to ValueError - Ensure equivalent_attrs always returns a boolean (not Dataset/array-like) These cases all properly get dropped when they can't be reliably compared. --- xarray/structure/merge.py | 14 ++++-- xarray/tests/test_merge.py | 95 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 7 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 3f51bc2f04c..c2bab18d35c 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -610,11 +610,19 @@ def merge_coords( def equivalent_attrs(a: Any, b: Any) -> bool: """Check if two attribute values are equivalent. - Returns False if the comparison raises ValueError (e.g., for numpy arrays). + Returns False if the comparison raises ValueError or TypeError, + or if the result is not a boolean. This handles cases like: + - numpy arrays with ambiguous truth values + - xarray Datasets which can't be directly converted to numpy arrays + - pandas Series which return Series from comparisons """ try: - return equivalent(a, b) - except ValueError: + result = equivalent(a, b) + # Ensure we have a boolean result, not an array-like or Dataset + if not isinstance(result, bool): + return False + return result + except (ValueError, TypeError): return False diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 066d64d2a6d..da946d492a5 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -295,14 +295,15 @@ def __repr__(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) - # Objects with same value (returning truthy array [True]) + # Objects with custom __eq__ that returns non-bool are dropped + # even if they're "equal" because the comparison result is ambiguous actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts") - assert "custom" in actual.attrs + assert "custom" not in actual.attrs # Dropped due to non-bool comparison assert actual.attrs["x"] == 1 - # Objects with different values (returning falsy array [False]) + # Objects with different values also get dropped actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") - assert "custom" not in actual.attrs # Dropped due to conflict + assert "custom" not in actual.attrs # Dropped due to non-bool comparison assert actual.attrs["x"] == 1 assert actual.attrs["y"] == 2 @@ -371,6 +372,92 @@ def __repr__(self): actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts") assert "alltrue" not in actual.attrs # Dropped due to ambiguous comparison + def test_merge_attrs_drop_conflicts_pathological_cases(self): + """Test drop_conflicts with pathological cases suggested by @shoyer. + + These test cases ensure we handle various problematic attribute types + that can raise exceptions during equality comparison. + """ + import warnings + + import numpy as np + import pandas as pd + + # Test 1: NumPy object arrays with nested arrays + # These can have complex comparison behavior + x = np.array([None], dtype=object) + x[0] = np.arange(3) + y = np.array([None], dtype=object) + y[0] = np.arange(10, 13) + + ds1 = xr.Dataset(attrs={"nested_array": x, "common": 1}) + ds2 = xr.Dataset(attrs={"nested_array": y, "common": 1}) + + # Different nested arrays should cause attribute to be dropped + actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts") + assert ( + "nested_array" not in actual.attrs + ) # Dropped due to different nested arrays + assert actual.attrs["common"] == 1 + + # Test with identical nested arrays + # Note: Even identical nested arrays will be dropped because comparison + # raises ValueError due to ambiguous truth value + z = np.array([None], dtype=object) + z[0] = np.arange(3) # Same as x + ds3 = xr.Dataset(attrs={"nested_array": z, "other": 2}) + + actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts") + assert ( + "nested_array" not in actual.attrs + ) # Dropped due to ValueError in comparison + assert actual.attrs["other"] == 2 + + # Test 2: xarray.Dataset objects as attributes (raises TypeError in equivalent) + attr_ds1 = xr.Dataset({"foo": 1}) + attr_ds2 = xr.Dataset({"bar": 1}) # Different dataset + attr_ds3 = xr.Dataset({"foo": 1}) # Same as attr_ds1 + + ds4 = xr.Dataset(attrs={"dataset_attr": attr_ds1, "scalar": 42}) + ds5 = xr.Dataset(attrs={"dataset_attr": attr_ds2, "scalar": 42}) + ds6 = xr.Dataset(attrs={"dataset_attr": attr_ds3, "other": 99}) + + # Different datasets raise TypeError and should be dropped + actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts") + assert "dataset_attr" not in actual.attrs # Dropped due to TypeError + assert actual.attrs["scalar"] == 42 + + # Even identical datasets cause TypeError in equivalent() and get dropped + # because equivalent() tries to convert them to numpy arrays + actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") + assert "dataset_attr" not in actual.attrs # Dropped due to TypeError + assert actual.attrs["other"] == 99 + + # Test 3: Pandas Series (raises ValueError due to ambiguous truth value) + series1 = pd.Series([1, 2]) + series2 = pd.Series([3, 4]) # Different values + series3 = pd.Series([1, 2]) # Same as series1 + + ds7 = xr.Dataset(attrs={"series": series1, "value": "a"}) + ds8 = xr.Dataset(attrs={"series": series2, "value": "a"}) + ds9 = xr.Dataset(attrs={"series": series3, "value": "a"}) + + # Suppress potential warnings from pandas comparisons + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + warnings.filterwarnings("ignore", category=FutureWarning) + + # Different series raise ValueError and get dropped + actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts") + assert "series" not in actual.attrs # Dropped due to ValueError + assert actual.attrs["value"] == "a" + + # Even identical series raise ValueError in equivalent() and get dropped + # because Series comparison returns another Series with ambiguous truth value + actual = xr.merge([ds7, ds9], combine_attrs="drop_conflicts") + assert "series" not in actual.attrs # Dropped due to ValueError + assert actual.attrs["value"] == "a" + def test_merge_attrs_no_conflicts_compat_minimal(self): """make sure compat="minimal" does not silence errors""" ds1 = xr.Dataset({"a": ("x", [], {"a": 0})}) From e2731251e23e58a939f43cca8063f63ba2a8cb55 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 15 Sep 2025 09:32:43 -0700 Subject: [PATCH 14/17] Use truthiness in equivalent_attrs for consistency with Python - Choose truthiness over strict boolean checking for consistency with Python's standard 'if a == b:' behavior - Accept edge cases with non-standard __eq__ methods as a deliberate tradeoff - Add comprehensive tests documenting the behavior and edge cases - Add clear documentation explaining the design choice and its implications This approach is more permissive and works better with numpy scalars and similar types, while still catching truly ambiguous cases (ValueError/TypeError). --- xarray/structure/merge.py | 37 +++++++++++++++----- xarray/tests/test_merge.py | 71 +++++++++++++++++++++++++++++++++----- 2 files changed, 91 insertions(+), 17 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index c2bab18d35c..24d5b4e9120 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -610,19 +610,38 @@ def merge_coords( def equivalent_attrs(a: Any, b: Any) -> bool: """Check if two attribute values are equivalent. - Returns False if the comparison raises ValueError or TypeError, - or if the result is not a boolean. This handles cases like: - - numpy arrays with ambiguous truth values - - xarray Datasets which can't be directly converted to numpy arrays - - pandas Series which return Series from comparisons + Returns False if the comparison raises ValueError or TypeError. + This handles cases like numpy arrays with ambiguous truth values + and xarray Datasets which can't be directly converted to numpy arrays. + + For non-boolean results, we use truthiness (consistent with `if a == b`). + This is an imperfect but pragmatic choice: + + Pros of truthiness: + - Consistent with Python's normal `if a == b:` behavior + - Preserves numpy scalars (np.bool_(True)) and similar types + - More permissive for common use cases + + Cons of truthiness: + - Keeps attrs when __eq__ returns truthy non-bool (e.g., "error") + - Drops attrs when __eq__ returns falsy non-bool (e.g., 0, []) + + The alternative (strict bool checking) would be safer but would drop + many legitimate comparisons. We choose consistency with Python's + standard behavior, accepting edge cases with pathological __eq__ methods. + + TODO: Revisit this behavior in the future - consider strict type checking + or a more sophisticated approach to handling non-boolean comparisons. """ try: result = equivalent(a, b) - # Ensure we have a boolean result, not an array-like or Dataset - if not isinstance(result, bool): - return False - return result + # Use truthiness, consistent with `if a == b:` behavior + # Note: This means non-boolean returns are interpreted by truthiness, + # which can lead to false positives/negatives but is more permissive + return bool(result) except (ValueError, TypeError): + # These exceptions indicate the comparison is truly ambiguous + # (e.g., numpy arrays that would raise "ambiguous truth value") return False diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index da946d492a5..06069b75ebe 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -295,15 +295,14 @@ def __repr__(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) - # Objects with custom __eq__ that returns non-bool are dropped - # even if they're "equal" because the comparison result is ambiguous + # With truthiness: objects returning [True] are kept (truthy) actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts") - assert "custom" not in actual.attrs # Dropped due to non-bool comparison + assert "custom" in actual.attrs # Kept - [True] is truthy assert actual.attrs["x"] == 1 - # Objects with different values also get dropped + # Objects with different values: equivalent returns False (bool), dropped actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") - assert "custom" not in actual.attrs # Dropped due to non-bool comparison + assert "custom" not in actual.attrs # Dropped - different values assert actual.attrs["x"] == 1 assert actual.attrs["y"] == 2 @@ -427,10 +426,10 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self): assert "dataset_attr" not in actual.attrs # Dropped due to TypeError assert actual.attrs["scalar"] == 42 - # Even identical datasets cause TypeError in equivalent() and get dropped - # because equivalent() tries to convert them to numpy arrays + # With truthiness: identical datasets are kept + # The comparison returns a truthy Dataset, so they're treated as equal actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") - assert "dataset_attr" not in actual.attrs # Dropped due to TypeError + assert "dataset_attr" in actual.attrs # Kept with truthiness approach assert actual.attrs["other"] == 99 # Test 3: Pandas Series (raises ValueError due to ambiguous truth value) @@ -458,6 +457,62 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self): assert "series" not in actual.attrs # Dropped due to ValueError assert actual.attrs["value"] == "a" + def test_merge_attrs_drop_conflicts_truthiness_edge_cases(self): + """Test edge cases demonstrating the truthiness tradeoff. + + We deliberately use truthiness for consistency with Python's `if a == b:` + behavior. This test documents the implications of this design choice + with objects that have non-standard __eq__ methods. + """ + + # Case 1: Objects whose __eq__ returns truthy non-booleans + # These are kept because we respect truthiness + class ReturnsString: + def __init__(self, value): + self.value = value + + def __eq__(self, other): + # Always returns a string (truthy if non-empty) + return "comparison result" + + obj1 = ReturnsString("A") + obj2 = ReturnsString("B") # Different object + + ds1 = xr.Dataset(attrs={"obj": obj1}) + ds2 = xr.Dataset(attrs={"obj": obj2}) + + actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts") + + # Truthiness behavior: keeps attribute because "comparison result" is truthy + # This is the expected behavior when respecting truthiness + assert "obj" in actual.attrs + + # Case 2: Objects whose __eq__ returns falsy non-booleans + # These are dropped because we respect truthiness + class ReturnsZero: + def __init__(self, value): + self.value = value + + def __eq__(self, other): + # Always returns 0 (falsy) even if values match + return 0 + + obj3 = ReturnsZero("same") + obj4 = ReturnsZero("same") # Different object, same value + + ds3 = xr.Dataset(attrs={"zero": obj3}) + ds4 = xr.Dataset(attrs={"zero": obj4}) + + actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts") + + # Truthiness behavior: drops attribute because 0 is falsy + # This is the expected behavior when respecting truthiness + assert "zero" not in actual.attrs + + # Note: These edge cases demonstrate the tradeoff of using truthiness. + # Well-behaved __eq__ methods return booleans and work correctly. + # We accept these edge cases for consistency with Python's standard behavior. + def test_merge_attrs_no_conflicts_compat_minimal(self): """make sure compat="minimal" does not silence errors""" ds1 = xr.Dataset({"a": ("x", [], {"a": 0})}) From 5557193e9f4c7403e699b165d4afe40927bcf522 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 15 Sep 2025 10:04:10 -0700 Subject: [PATCH 15/17] Implement @shoyer's stricter equivalent() function - Rewrite equivalent() to reject non-boolean comparison results - Accept numpy bool scalars (np.bool_) but reject other non-bool types - Simplify equivalent_attrs() since equivalent() now handles non-bool cases - Update tests to reflect stricter behavior with non-standard __eq__ methods This makes comparisons more predictable by rejecting ambiguous cases like Dataset comparisons, custom objects with weird __eq__, etc. The tradeoff is being less permissive than Python's standard 'if a == b:' behavior. --- xarray/core/utils.py | 18 ++++++++++++- xarray/structure/merge.py | 27 +++----------------- xarray/tests/test_merge.py | 52 ++++++++++++-------------------------- 3 files changed, 37 insertions(+), 60 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index d969fa8ea8c..d44aff9ff36 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -239,18 +239,34 @@ def equivalent(first: T, second: T) -> bool: """Compare two objects for equivalence (identity or equality), using array_equiv if either object is an ndarray. If both objects are lists, equivalent is sequentially called on all the elements. + + Returns False for any comparison that doesn't return a boolean, + making this function safer to use with objects that have non-standard + __eq__ implementations. """ # TODO: refactor to avoid circular import from xarray.core import duck_array_ops if first is second: return True + if isinstance(first, np.ndarray) or isinstance(second, np.ndarray): return duck_array_ops.array_equiv(first, second) + if isinstance(first, list) or isinstance(second, list): return list_equiv(first, second) # type: ignore[arg-type] - return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] + # For non-array/list types, use == but require boolean result + result = first == second + if not isinstance(result, bool): + # Accept numpy bool scalars as well + if isinstance(result, np.bool_): + return bool(result) + # Reject any other non-boolean type (Dataset, Series, custom objects, etc.) + return False + + # Check for NaN equivalence + return result or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 24d5b4e9120..ca39e794035 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -614,31 +614,12 @@ def equivalent_attrs(a: Any, b: Any) -> bool: This handles cases like numpy arrays with ambiguous truth values and xarray Datasets which can't be directly converted to numpy arrays. - For non-boolean results, we use truthiness (consistent with `if a == b`). - This is an imperfect but pragmatic choice: - - Pros of truthiness: - - Consistent with Python's normal `if a == b:` behavior - - Preserves numpy scalars (np.bool_(True)) and similar types - - More permissive for common use cases - - Cons of truthiness: - - Keeps attrs when __eq__ returns truthy non-bool (e.g., "error") - - Drops attrs when __eq__ returns falsy non-bool (e.g., 0, []) - - The alternative (strict bool checking) would be safer but would drop - many legitimate comparisons. We choose consistency with Python's - standard behavior, accepting edge cases with pathological __eq__ methods. - - TODO: Revisit this behavior in the future - consider strict type checking - or a more sophisticated approach to handling non-boolean comparisons. + Since equivalent() now handles non-boolean returns by returning False, + this wrapper mainly catches exceptions from comparisons that can't be + evaluated at all. """ try: - result = equivalent(a, b) - # Use truthiness, consistent with `if a == b:` behavior - # Note: This means non-boolean returns are interpreted by truthiness, - # which can lead to false positives/negatives but is more permissive - return bool(result) + return equivalent(a, b) except (ValueError, TypeError): # These exceptions indicate the comparison is truly ambiguous # (e.g., numpy arrays that would raise "ambiguous truth value") diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 06069b75ebe..9c02d26ba15 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -236,13 +236,7 @@ def test_merge_attrs_drop_conflicts(self): assert_identical(actual, expected) def test_merge_attrs_drop_conflicts_non_bool_eq(self): - """Test drop_conflicts behavior when __eq__ returns non-bool values. - - When comparing attribute values, the _equivalent_drop_conflicts() function - uses == which can return non-bool values. The new behavior treats ambiguous - or falsy equality results as non-equivalent, dropping the attribute rather - than raising an error. - """ + """Test drop_conflicts behavior when __eq__ returns non-bool values.""" import warnings import numpy as np @@ -295,14 +289,14 @@ def __repr__(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) - # With truthiness: objects returning [True] are kept (truthy) + # Objects returning arrays are dropped (non-boolean return) actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts") - assert "custom" in actual.attrs # Kept - [True] is truthy + assert "custom" not in actual.attrs # Dropped - returns array, not bool assert actual.attrs["x"] == 1 - # Objects with different values: equivalent returns False (bool), dropped + # Different values also dropped (returns array, not bool) actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") - assert "custom" not in actual.attrs # Dropped - different values + assert "custom" not in actual.attrs # Dropped - returns non-boolean assert actual.attrs["x"] == 1 assert actual.attrs["y"] == 2 @@ -426,10 +420,9 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self): assert "dataset_attr" not in actual.attrs # Dropped due to TypeError assert actual.attrs["scalar"] == 42 - # With truthiness: identical datasets are kept - # The comparison returns a truthy Dataset, so they're treated as equal + # Identical datasets are also dropped (comparison returns Dataset, not bool) actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts") - assert "dataset_attr" in actual.attrs # Kept with truthiness approach + assert "dataset_attr" not in actual.attrs # Dropped - returns Dataset, not bool assert actual.attrs["other"] == 99 # Test 3: Pandas Series (raises ValueError due to ambiguous truth value) @@ -457,22 +450,16 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self): assert "series" not in actual.attrs # Dropped due to ValueError assert actual.attrs["value"] == "a" - def test_merge_attrs_drop_conflicts_truthiness_edge_cases(self): - """Test edge cases demonstrating the truthiness tradeoff. + def test_merge_attrs_drop_conflicts_non_boolean_eq_returns(self): + """Test objects with non-boolean __eq__ returns are dropped.""" - We deliberately use truthiness for consistency with Python's `if a == b:` - behavior. This test documents the implications of this design choice - with objects that have non-standard __eq__ methods. - """ - - # Case 1: Objects whose __eq__ returns truthy non-booleans - # These are kept because we respect truthiness + # Case 1: Objects whose __eq__ returns non-boolean strings class ReturnsString: def __init__(self, value): self.value = value def __eq__(self, other): - # Always returns a string (truthy if non-empty) + # Always returns a string (non-boolean) return "comparison result" obj1 = ReturnsString("A") @@ -483,18 +470,16 @@ def __eq__(self, other): actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts") - # Truthiness behavior: keeps attribute because "comparison result" is truthy - # This is the expected behavior when respecting truthiness - assert "obj" in actual.attrs + # Strict behavior: drops attribute because __eq__ returns non-boolean + assert "obj" not in actual.attrs - # Case 2: Objects whose __eq__ returns falsy non-booleans - # These are dropped because we respect truthiness + # Case 2: Objects whose __eq__ returns numbers class ReturnsZero: def __init__(self, value): self.value = value def __eq__(self, other): - # Always returns 0 (falsy) even if values match + # Always returns 0 (non-boolean) return 0 obj3 = ReturnsZero("same") @@ -505,14 +490,9 @@ def __eq__(self, other): actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts") - # Truthiness behavior: drops attribute because 0 is falsy - # This is the expected behavior when respecting truthiness + # Strict behavior: drops attribute because __eq__ returns non-boolean assert "zero" not in actual.attrs - # Note: These edge cases demonstrate the tradeoff of using truthiness. - # Well-behaved __eq__ methods return booleans and work correctly. - # We accept these edge cases for consistency with Python's standard behavior. - def test_merge_attrs_no_conflicts_compat_minimal(self): """make sure compat="minimal" does not silence errors""" ds1 = xr.Dataset({"a": ("x", [], {"a": 0})}) From 786285cb5396463a8028a13f680e07086359f749 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 15 Sep 2025 14:39:24 -0700 Subject: [PATCH 16/17] Address @shoyer's review comments - Fix comment to say 'nested numpy arrays' instead of 'numpy arrays' - Use del instead of pop() to avoid creating unused return value - Split long test methods into smaller, focused test methods - Move all imports (warnings, pandas) to top of file Co-authored-by: Claude --- xarray/structure/merge.py | 4 ++-- xarray/tests/test_merge.py | 38 +++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index ca39e794035..5bb53036042 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -622,7 +622,7 @@ def equivalent_attrs(a: Any, b: Any) -> bool: return equivalent(a, b) except (ValueError, TypeError): # These exceptions indicate the comparison is truly ambiguous - # (e.g., numpy arrays that would raise "ambiguous truth value") + # (e.g., nested numpy arrays that would raise "ambiguous truth value") return False @@ -661,7 +661,7 @@ def merge_attrs(variable_attrs, combine_attrs, context=None): if key not in result: result[key] = value elif not equivalent_attrs(result[key], value): - result.pop(key, None) + del result[key] dropped_keys.add(key) return result diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 9c02d26ba15..7b2e46d03a0 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -1,6 +1,9 @@ from __future__ import annotations +import warnings + import numpy as np +import pandas as pd import pytest import xarray as xr @@ -235,12 +238,8 @@ def test_merge_attrs_drop_conflicts(self): expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0}) assert_identical(actual, expected) - def test_merge_attrs_drop_conflicts_non_bool_eq(self): - """Test drop_conflicts behavior when __eq__ returns non-bool values.""" - import warnings - - import numpy as np - + def test_merge_attrs_drop_conflicts_numpy_arrays(self): + """Test drop_conflicts with numpy arrays.""" # Test with numpy arrays (which return arrays from ==) arr1 = np.array([1, 2, 3]) arr2 = np.array([1, 2, 3]) @@ -260,6 +259,9 @@ def test_merge_attrs_drop_conflicts_non_bool_eq(self): assert "arr" not in actual.attrs # Should drop due to conflict assert "other" in actual.attrs + def test_merge_attrs_drop_conflicts_custom_eq_returns_array(self): + """Test drop_conflicts with custom objects that return arrays from __eq__.""" + # Test with custom objects that return non-bool from __eq__ class CustomEq: """Object whose __eq__ returns a non-bool value.""" @@ -300,6 +302,9 @@ def __repr__(self): assert actual.attrs["x"] == 1 assert actual.attrs["y"] == 2 + def test_merge_attrs_drop_conflicts_ambiguous_array_returns(self): + """Test drop_conflicts with objects returning ambiguous arrays from __eq__.""" + # Test edge case: object whose __eq__ returns empty array (ambiguous truth value) class EmptyArrayEq: def __eq__(self, other): @@ -365,17 +370,8 @@ def __repr__(self): actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts") assert "alltrue" not in actual.attrs # Dropped due to ambiguous comparison - def test_merge_attrs_drop_conflicts_pathological_cases(self): - """Test drop_conflicts with pathological cases suggested by @shoyer. - - These test cases ensure we handle various problematic attribute types - that can raise exceptions during equality comparison. - """ - import warnings - - import numpy as np - import pandas as pd - + def test_merge_attrs_drop_conflicts_nested_arrays(self): + """Test drop_conflicts with NumPy object arrays containing nested arrays.""" # Test 1: NumPy object arrays with nested arrays # These can have complex comparison behavior x = np.array([None], dtype=object) @@ -406,7 +402,9 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self): ) # Dropped due to ValueError in comparison assert actual.attrs["other"] == 2 - # Test 2: xarray.Dataset objects as attributes (raises TypeError in equivalent) + def test_merge_attrs_drop_conflicts_dataset_attrs(self): + """Test drop_conflicts with xarray.Dataset objects as attributes.""" + # xarray.Dataset objects as attributes (raises TypeError in equivalent) attr_ds1 = xr.Dataset({"foo": 1}) attr_ds2 = xr.Dataset({"bar": 1}) # Different dataset attr_ds3 = xr.Dataset({"foo": 1}) # Same as attr_ds1 @@ -425,7 +423,9 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self): assert "dataset_attr" not in actual.attrs # Dropped - returns Dataset, not bool assert actual.attrs["other"] == 99 - # Test 3: Pandas Series (raises ValueError due to ambiguous truth value) + def test_merge_attrs_drop_conflicts_pandas_series(self): + """Test drop_conflicts with Pandas Series as attributes.""" + # Pandas Series (raises ValueError due to ambiguous truth value) series1 = pd.Series([1, 2]) series2 = pd.Series([3, 4]) # Different values series3 = pd.Series([1, 2]) # Same as series1 From 23d693db52b9489a0b42f7e7f9c27e8f91ef1cc3 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 15 Sep 2025 21:09:26 -0700 Subject: [PATCH 17/17] Address @shoyer's review comments - Split long test methods into smaller, focused test cases: - test_merge_attrs_drop_conflicts_ambiguous_array_returns split to add test_merge_attrs_drop_conflicts_all_true_array - test_merge_attrs_drop_conflicts_non_boolean_eq_returns split into test_merge_attrs_drop_conflicts_eq_returns_string and test_merge_attrs_drop_conflicts_eq_returns_number - Ran formatter which removed a blank line Co-authored-by: Claude --- xarray/tests/test_merge.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 7b2e46d03a0..01bab8ca8b2 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -350,6 +350,9 @@ def __repr__(self): actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts") assert "multi" not in actual.attrs # Dropped due to ambiguous comparison + def test_merge_attrs_drop_conflicts_all_true_array(self): + """Test drop_conflicts with all-True multi-element array from __eq__.""" + # Test with all-True multi-element array (unambiguous truthy) class AllTrueArrayEq: def __eq__(self, other): @@ -450,8 +453,8 @@ def test_merge_attrs_drop_conflicts_pandas_series(self): assert "series" not in actual.attrs # Dropped due to ValueError assert actual.attrs["value"] == "a" - def test_merge_attrs_drop_conflicts_non_boolean_eq_returns(self): - """Test objects with non-boolean __eq__ returns are dropped.""" + def test_merge_attrs_drop_conflicts_eq_returns_string(self): + """Test objects whose __eq__ returns strings are dropped.""" # Case 1: Objects whose __eq__ returns non-boolean strings class ReturnsString: @@ -473,6 +476,9 @@ def __eq__(self, other): # Strict behavior: drops attribute because __eq__ returns non-boolean assert "obj" not in actual.attrs + def test_merge_attrs_drop_conflicts_eq_returns_number(self): + """Test objects whose __eq__ returns numbers are dropped.""" + # Case 2: Objects whose __eq__ returns numbers class ReturnsZero: def __init__(self, value):