From 93833e36a7542632e019d2de16876d7b83957779 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 13 Sep 2025 17:17:14 -0700
Subject: [PATCH 01/17] Fix drop_conflicts to handle non-bool returns from
 __eq__

When merging attrs with drop_conflicts, objects whose __eq__ returns
non-bool values (e.g., numpy arrays) would raise ValueError. Now these
are caught and treated as non-equivalent, causing the attribute to be
dropped rather than raising an error.

Fixes regression from #10726
---
 xarray/structure/merge.py  |  23 +++++--
 xarray/tests/test_merge.py | 125 +++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 5 deletions(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 58168ddb024..6024be4c607 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -641,11 +641,24 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
                     if key not in result and key not in dropped_keys
                 }
             )
-            result = {
-                key: value
-                for key, value in result.items()
-                if key not in attrs or equivalent(attrs[key], value)
-            }
+            # Filter out attributes that have conflicts
+            filtered_result = {}
+            for key, value in result.items():
+                if key not in attrs:
+                    # No conflict, keep the attribute
+                    filtered_result[key] = value
+                else:
+                    # Check if values are equivalent
+                    try:
+                        if equivalent(attrs[key], value):
+                            # Values are equivalent, keep the attribute
+                            filtered_result[key] = value
+                        # else: Values differ, drop the attribute (don't add to filtered_result)
+                    except ValueError:
+                        # Likely an ambiguous truth value from numpy array comparison
+                        # Treat as non-equivalent and drop the attribute
+                        pass
+            result = filtered_result
             dropped_keys |= {key for key in attrs if key not in result}
         return result
     elif combine_attrs == "identical":
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index 8ae05fbb261..e6e4b85afae 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -235,6 +235,131 @@ def test_merge_attrs_drop_conflicts(self):
         expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0})
         assert_identical(actual, expected)
 
+    def test_merge_attrs_drop_conflicts_non_bool_eq(self):
+        """Test drop_conflicts behavior when __eq__ returns non-bool values.
+
+        When comparing attribute values, the _equivalent_drop_conflicts() function
+        uses == which can return non-bool values. The new behavior treats ambiguous
+        or falsy equality results as non-equivalent, dropping the attribute rather
+        than raising an error.
+        """
+        import numpy as np
+
+        # Test with numpy arrays (which return arrays from ==)
+        arr1 = np.array([1, 2, 3])
+        arr2 = np.array([1, 2, 3])
+        arr3 = np.array([4, 5, 6])
+
+        ds1 = xr.Dataset(attrs={"arr": arr1, "scalar": 1})
+        ds2 = xr.Dataset(attrs={"arr": arr2, "scalar": 1})  # Same array values
+        ds3 = xr.Dataset(attrs={"arr": arr3, "other": 2})  # Different array values
+
+        # Arrays are considered equivalent if they have the same values
+        actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
+        assert "arr" in actual.attrs  # Should keep the array since they're equivalent
+        assert actual.attrs["scalar"] == 1
+
+        # Different arrays cause the attribute to be dropped
+        actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
+        assert "arr" not in actual.attrs  # Should drop due to conflict
+        assert "other" in actual.attrs
+
+        # Test with custom objects that return non-bool from __eq__
+        class CustomEq:
+            """Object whose __eq__ returns a non-bool value."""
+
+            def __init__(self, value):
+                self.value = value
+
+            def __eq__(self, other):
+                if not isinstance(other, CustomEq):
+                    return False
+                # Return a numpy array (truthy if all elements are non-zero)
+                return np.array([self.value == other.value])
+
+            def __repr__(self):
+                return f"CustomEq({self.value})"
+
+        obj1 = CustomEq(42)
+        obj2 = CustomEq(42)  # Same value
+        obj3 = CustomEq(99)  # Different value
+
+        ds4 = xr.Dataset(attrs={"custom": obj1, "x": 1})
+        ds5 = xr.Dataset(attrs={"custom": obj2, "x": 1})
+        ds6 = xr.Dataset(attrs={"custom": obj3, "y": 2})
+
+        # Objects with same value (returning truthy array [True])
+        actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
+        assert "custom" in actual.attrs
+        assert actual.attrs["x"] == 1
+
+        # Objects with different values (returning falsy array [False])
+        actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
+        assert "custom" not in actual.attrs  # Dropped due to conflict
+        assert actual.attrs["x"] == 1
+        assert actual.attrs["y"] == 2
+
+        # Test edge case: object whose __eq__ returns empty array (ambiguous truth value)
+        class EmptyArrayEq:
+            def __eq__(self, other):
+                if not isinstance(other, EmptyArrayEq):
+                    return False
+                return np.array([])  # Empty array has ambiguous truth value
+
+            def __repr__(self):
+                return "EmptyArrayEq()"
+
+        empty_obj1 = EmptyArrayEq()
+        empty_obj2 = EmptyArrayEq()
+
+        ds7 = xr.Dataset(attrs={"empty": empty_obj1})
+        ds8 = xr.Dataset(attrs={"empty": empty_obj2})
+
+        # With new behavior: ambiguous truth values are treated as non-equivalent
+        # So the attribute is dropped instead of raising an error
+        actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
+        assert "empty" not in actual.attrs  # Dropped due to ambiguous comparison
+
+        # Test with object that returns multi-element array (also ambiguous)
+        class MultiArrayEq:
+            def __eq__(self, other):
+                if not isinstance(other, MultiArrayEq):
+                    return False
+                return np.array([True, False])  # Multi-element array is ambiguous
+
+            def __repr__(self):
+                return "MultiArrayEq()"
+
+        multi_obj1 = MultiArrayEq()
+        multi_obj2 = MultiArrayEq()
+
+        ds9 = xr.Dataset(attrs={"multi": multi_obj1})
+        ds10 = xr.Dataset(attrs={"multi": multi_obj2})
+
+        # With new behavior: ambiguous arrays are treated as non-equivalent
+        actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts")
+        assert "multi" not in actual.attrs  # Dropped due to ambiguous comparison
+
+        # Test with all-True multi-element array (unambiguous truthy)
+        class AllTrueArrayEq:
+            def __eq__(self, other):
+                if not isinstance(other, AllTrueArrayEq):
+                    return False
+                return np.array([True, True, True])  # All True, but still multi-element
+
+            def __repr__(self):
+                return "AllTrueArrayEq()"
+
+        alltrue1 = AllTrueArrayEq()
+        alltrue2 = AllTrueArrayEq()
+
+        ds11 = xr.Dataset(attrs={"alltrue": alltrue1})
+        ds12 = xr.Dataset(attrs={"alltrue": alltrue2})
+
+        # Multi-element arrays are ambiguous even if all True
+        actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts")
+        assert "alltrue" not in actual.attrs  # Dropped due to ambiguous comparison
+
     def test_merge_attrs_no_conflicts_compat_minimal(self):
         """make sure compat="minimal" does not silence errors"""
         ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})

From ed18a1358affd9eb68e9ef638a40e547a37ae313 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 13 Sep 2025 17:18:22 -0700
Subject: [PATCH 02/17] Add changelog entry

---
 doc/whats-new.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index af47a096697..19e36dc1cc9 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,6 +36,9 @@ Bug fixes
   <https://github.com/spencerkclark>`_.
 - Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
   By `Justus Magin <https://github.com/keewis>`_.
+- Fix ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return
+  non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`).
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 Documentation
 ~~~~~~~~~~~~~

From 0fad4c8200617c8b18a63941d96f2185dc3916b2 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 13 Sep 2025 19:29:27 -0700
Subject: [PATCH 03/17] Suppress DeprecationWarning for ambiguous array truth
 values

The CI was failing because NumPy raises a DeprecationWarning before
ValueError when evaluating ambiguous truth values. Since we're properly
handling the ValueError, we suppress the warning in this specific context.
---
 xarray/structure/merge.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 6024be4c607..c8e79793081 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -650,10 +650,16 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
                 else:
                     # Check if values are equivalent
                     try:
-                        if equivalent(attrs[key], value):
-                            # Values are equivalent, keep the attribute
-                            filtered_result[key] = value
-                        # else: Values differ, drop the attribute (don't add to filtered_result)
+                        import warnings
+
+                        # Suppress DeprecationWarning about ambiguous truth values
+                        # since we handle the resulting ValueError appropriately
+                        with warnings.catch_warnings():
+                            warnings.filterwarnings("ignore", category=DeprecationWarning)
+                            if equivalent(attrs[key], value):
+                                # Values are equivalent, keep the attribute
+                                filtered_result[key] = value
+                            # else: Values differ, drop the attribute (don't add to filtered_result)
                     except ValueError:
                         # Likely an ambiguous truth value from numpy array comparison
                         # Treat as non-equivalent and drop the attribute

From 7adaa40d0761abd007e26d7df5a93542b832962f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 14 Sep 2025 02:29:52 +0000
Subject: [PATCH 04/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/structure/merge.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index c8e79793081..6c2d9fe6bf4 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -655,7 +655,9 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
                         # Suppress DeprecationWarning about ambiguous truth values
                         # since we handle the resulting ValueError appropriately
                         with warnings.catch_warnings():
-                            warnings.filterwarnings("ignore", category=DeprecationWarning)
+                            warnings.filterwarnings(
+                                "ignore", category=DeprecationWarning
+                            )
                             if equivalent(attrs[key], value):
                                 # Values are equivalent, keep the attribute
                                 filtered_result[key] = value

From b87933f39c64239953061b3922f29309908ced16 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 13 Sep 2025 19:45:27 -0700
Subject: [PATCH 05/17] Move DeprecationWarning suppression to equivalent
 function

Rather than suppressing the warning in merge_attrs, handle it at the
source in equivalent() where the 'or' operation happens. This is cleaner
and includes a clear comment about when the suppression can be removed
(when minimum numpy version >= 2.0).

The warning only occurs in numpy < 2.0; numpy 2.0+ raises ValueError
directly, which we already handle properly in merge_attrs.
---
 xarray/core/utils.py      |  9 ++++++++-
 xarray/structure/merge.py | 16 ++++------------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index e490fc05c2f..66441b7718e 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -249,7 +249,14 @@ def equivalent(first: T, second: T) -> bool:
         return duck_array_ops.array_equiv(first, second)
     if isinstance(first, list) or isinstance(second, list):
         return list_equiv(first, second)  # type: ignore[arg-type]
-    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
+
+    # Suppress DeprecationWarning about ambiguous truth values from numpy < 2.0
+    # In numpy 2.0+, this will raise ValueError directly, which we handle in callers
+    # Can remove this suppression when minimum numpy version >= 2.0
+    import warnings
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=DeprecationWarning)
+        return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
 
 
 def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 6c2d9fe6bf4..6024be4c607 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -650,18 +650,10 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
                 else:
                     # Check if values are equivalent
                     try:
-                        import warnings
-
-                        # Suppress DeprecationWarning about ambiguous truth values
-                        # since we handle the resulting ValueError appropriately
-                        with warnings.catch_warnings():
-                            warnings.filterwarnings(
-                                "ignore", category=DeprecationWarning
-                            )
-                            if equivalent(attrs[key], value):
-                                # Values are equivalent, keep the attribute
-                                filtered_result[key] = value
-                            # else: Values differ, drop the attribute (don't add to filtered_result)
+                        if equivalent(attrs[key], value):
+                            # Values are equivalent, keep the attribute
+                            filtered_result[key] = value
+                        # else: Values differ, drop the attribute (don't add to filtered_result)
                     except ValueError:
                         # Likely an ambiguous truth value from numpy array comparison
                         # Treat as non-equivalent and drop the attribute

From 116a8f09bdfa78314e1b3e358a971f04d7267d5f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 14 Sep 2025 02:45:52 +0000
Subject: [PATCH 06/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/core/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 66441b7718e..0c5334104bb 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -254,6 +254,7 @@ def equivalent(first: T, second: T) -> bool:
     # In numpy 2.0+, this will raise ValueError directly, which we handle in callers
     # Can remove this suppression when minimum numpy version >= 2.0
     import warnings
+
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", category=DeprecationWarning)
         return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]

From b1cba12de9e2870b5e115f8321b8b9883203db85 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 13 Sep 2025 20:40:24 -0700
Subject: [PATCH 07/17] Move DeprecationWarning suppression from library to
 test

The warning only occurs in our specific test case with custom objects that
return numpy arrays from __eq__. This is a very edge case scenario, so it's
more appropriate to suppress the warning in the test rather than in the
library code.
---
 xarray/core/utils.py       |  9 +--------
 xarray/tests/test_merge.py | 36 +++++++++++++++++++++++-------------
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 0c5334104bb..d969fa8ea8c 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -250,14 +250,7 @@ def equivalent(first: T, second: T) -> bool:
     if isinstance(first, list) or isinstance(second, list):
         return list_equiv(first, second)  # type: ignore[arg-type]
 
-    # Suppress DeprecationWarning about ambiguous truth values from numpy < 2.0
-    # In numpy 2.0+, this will raise ValueError directly, which we handle in callers
-    # Can remove this suppression when minimum numpy version >= 2.0
-    import warnings
-
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", category=DeprecationWarning)
-        return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
+    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
 
 
 def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index e6e4b85afae..efdee93a7ca 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -244,6 +244,7 @@ def test_merge_attrs_drop_conflicts_non_bool_eq(self):
         than raising an error.
         """
         import numpy as np
+        import warnings
 
         # Test with numpy arrays (which return arrays from ==)
         arr1 = np.array([1, 2, 3])
@@ -288,16 +289,21 @@ def __repr__(self):
         ds5 = xr.Dataset(attrs={"custom": obj2, "x": 1})
         ds6 = xr.Dataset(attrs={"custom": obj3, "y": 2})
 
-        # Objects with same value (returning truthy array [True])
-        actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
-        assert "custom" in actual.attrs
-        assert actual.attrs["x"] == 1
+        # Suppress DeprecationWarning from numpy < 2.0 about ambiguous truth values
+        # when our custom __eq__ returns arrays that are evaluated in boolean context
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-        # Objects with different values (returning falsy array [False])
-        actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-        assert "custom" not in actual.attrs  # Dropped due to conflict
-        assert actual.attrs["x"] == 1
-        assert actual.attrs["y"] == 2
+            # Objects with same value (returning truthy array [True])
+            actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
+            assert "custom" in actual.attrs
+            assert actual.attrs["x"] == 1
+
+            # Objects with different values (returning falsy array [False])
+            actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
+            assert "custom" not in actual.attrs  # Dropped due to conflict
+            assert actual.attrs["x"] == 1
+            assert actual.attrs["y"] == 2
 
         # Test edge case: object whose __eq__ returns empty array (ambiguous truth value)
         class EmptyArrayEq:
@@ -317,8 +323,10 @@ def __repr__(self):
 
         # With new behavior: ambiguous truth values are treated as non-equivalent
         # So the attribute is dropped instead of raising an error
-        actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
-        assert "empty" not in actual.attrs  # Dropped due to ambiguous comparison
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+            actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
+            assert "empty" not in actual.attrs  # Dropped due to ambiguous comparison
 
         # Test with object that returns multi-element array (also ambiguous)
         class MultiArrayEq:
@@ -337,8 +345,10 @@ def __repr__(self):
         ds10 = xr.Dataset(attrs={"multi": multi_obj2})
 
         # With new behavior: ambiguous arrays are treated as non-equivalent
-        actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts")
-        assert "multi" not in actual.attrs  # Dropped due to ambiguous comparison
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+            actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts")
+            assert "multi" not in actual.attrs  # Dropped due to ambiguous comparison
 
         # Test with all-True multi-element array (unambiguous truthy)
         class AllTrueArrayEq:

From be07f64824bbbf4e919ce379540c297937dd6174 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 14 Sep 2025 03:40:49 +0000
Subject: [PATCH 08/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/tests/test_merge.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index efdee93a7ca..066d64d2a6d 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -243,9 +243,10 @@ def test_merge_attrs_drop_conflicts_non_bool_eq(self):
         or falsy equality results as non-equivalent, dropping the attribute rather
         than raising an error.
         """
-        import numpy as np
         import warnings
 
+        import numpy as np
+
         # Test with numpy arrays (which return arrays from ==)
         arr1 = np.array([1, 2, 3])
         arr2 = np.array([1, 2, 3])

From ce92f94faff5e03fdf3367ee6f7e657f851e4647 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 13 Sep 2025 20:53:02 -0700
Subject: [PATCH 09/17] Simplify drop_conflicts implementation

Based on code review, simplified the logic to:
- Process attributes in a single loop instead of nested operations
- Eliminate intermediate dictionary creation
- Make the control flow clearer and more efficient
---
 xarray/structure/merge.py | 42 ++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 6024be4c607..1a142010f62 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -633,33 +633,29 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
     elif combine_attrs == "drop_conflicts":
         result = {}
         dropped_keys = set()
+
         for attrs in variable_attrs:
-            result.update(
-                {
-                    key: value
-                    for key, value in attrs.items()
-                    if key not in result and key not in dropped_keys
-                }
-            )
-            # Filter out attributes that have conflicts
-            filtered_result = {}
-            for key, value in result.items():
-                if key not in attrs:
-                    # No conflict, keep the attribute
-                    filtered_result[key] = value
+            # Process each attribute in the current attrs dict
+            for key, value in attrs.items():
+                if key in dropped_keys:
+                    continue  # Already marked as conflicted
+
+                if key not in result:
+                    # New key, add it
+                    result[key] = value
                 else:
-                    # Check if values are equivalent
+                    # Existing key, check for conflict
                     try:
-                        if equivalent(attrs[key], value):
-                            # Values are equivalent, keep the attribute
-                            filtered_result[key] = value
-                        # else: Values differ, drop the attribute (don't add to filtered_result)
+                        if not equivalent(result[key], value):
+                            # Values are different, drop the key
+                            result.pop(key, None)
+                            dropped_keys.add(key)
                     except ValueError:
-                        # Likely an ambiguous truth value from numpy array comparison
-                        # Treat as non-equivalent and drop the attribute
-                        pass
-            result = filtered_result
-            dropped_keys |= {key for key in attrs if key not in result}
+                        # equivalent() failed (likely ambiguous truth value)
+                        # Treat as conflict and drop
+                        result.pop(key, None)
+                        dropped_keys.add(key)
+
         return result
     elif combine_attrs == "identical":
         result = dict(variable_attrs[0])

From c259f93369bad3ac59929598dc7fd06a0b4c0a1b Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Sun, 14 Sep 2025 08:08:18 -0700
Subject: [PATCH 10/17] Update whats-new.rst

Co-authored-by: Justus Magin <keewis@users.noreply.github.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 19e36dc1cc9..235544a7536 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,7 +36,7 @@ Bug fixes
   <https://github.com/spencerkclark>`_.
 - Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
   By `Justus Magin <https://github.com/keewis>`_.
-- Fix ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return
+- Allow ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return
   non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`).
   By `Maximilian Roos <https://github.com/max-sixty>`_.
 

From 2b85c4347943ca89494a8206218bb6fb9cbcfab9 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Sun, 14 Sep 2025 08:08:29 -0700
Subject: [PATCH 11/17] Update whats-new.rst

Co-authored-by: Justus Magin <keewis@users.noreply.github.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 235544a7536..b034d37746b 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -34,7 +34,7 @@ Bug fixes
 - Fix error when encoding an empty :py:class:`numpy.datetime64` array
   (:issue:`10722`, :pull:`10723`). By `Spencer Clark
   <https://github.com/spencerkclark>`_.
-- Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
+- Propagate coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
   By `Justus Magin <https://github.com/keewis>`_.
 - Allow ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return
   non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`).

From b7c05aa2ffae04fc572dde7ab7458d403958fd83 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sun, 14 Sep 2025 17:18:56 -0700
Subject: [PATCH 12/17] Implement equivalent_attrs function as suggested by
 @keewis

- Create dedicated equivalent_attrs function that wraps equivalent()
- Returns False when ValueError is raised (e.g., for numpy arrays)
- Simplifies merge_attrs logic by encapsulating error handling
- Makes future behavior changes easier
---
 xarray/structure/merge.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 1a142010f62..3f51bc2f04c 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -607,6 +607,17 @@ def merge_coords(
     return variables, out_indexes
 
 
+def equivalent_attrs(a: Any, b: Any) -> bool:
+    """Check if two attribute values are equivalent.
+
+    Returns False if the comparison raises ValueError (e.g., for numpy arrays).
+    """
+    try:
+        return equivalent(a, b)
+    except ValueError:
+        return False
+
+
 def merge_attrs(variable_attrs, combine_attrs, context=None):
     """Combine attributes from different variables according to combine_attrs"""
     if not variable_attrs:
@@ -635,26 +646,15 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
         dropped_keys = set()
 
         for attrs in variable_attrs:
-            # Process each attribute in the current attrs dict
             for key, value in attrs.items():
                 if key in dropped_keys:
-                    continue  # Already marked as conflicted
+                    continue
 
                 if key not in result:
-                    # New key, add it
                     result[key] = value
-                else:
-                    # Existing key, check for conflict
-                    try:
-                        if not equivalent(result[key], value):
-                            # Values are different, drop the key
-                            result.pop(key, None)
-                            dropped_keys.add(key)
-                    except ValueError:
-                        # equivalent() failed (likely ambiguous truth value)
-                        # Treat as conflict and drop
-                        result.pop(key, None)
-                        dropped_keys.add(key)
+                elif not equivalent_attrs(result[key], value):
+                    result.pop(key, None)
+                    dropped_keys.add(key)
 
         return result
     elif combine_attrs == "identical":

From d4c55229b624b47c67f77c0519acad151fb0fa0f Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Mon, 15 Sep 2025 07:48:58 -0700
Subject: [PATCH 13/17] Add pathological test cases suggested by @shoyer

- Test NumPy object arrays with nested arrays
- Test xarray.Dataset objects as attributes
- Test Pandas Series as attributes
- Update equivalent_attrs to catch TypeError in addition to ValueError
- Ensure equivalent_attrs always returns a boolean (not Dataset/array-like)

These cases all properly get dropped when they can't be reliably compared.
---
 xarray/structure/merge.py  | 14 ++++--
 xarray/tests/test_merge.py | 95 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 3f51bc2f04c..c2bab18d35c 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -610,11 +610,19 @@ def merge_coords(
 def equivalent_attrs(a: Any, b: Any) -> bool:
     """Check if two attribute values are equivalent.
 
-    Returns False if the comparison raises ValueError (e.g., for numpy arrays).
+    Returns False if the comparison raises ValueError or TypeError,
+    or if the result is not a boolean. This handles cases like:
+    - numpy arrays with ambiguous truth values
+    - xarray Datasets which can't be directly converted to numpy arrays
+    - pandas Series which return Series from comparisons
     """
     try:
-        return equivalent(a, b)
-    except ValueError:
+        result = equivalent(a, b)
+        # Ensure we have a boolean result, not an array-like or Dataset
+        if not isinstance(result, bool):
+            return False
+        return result
+    except (ValueError, TypeError):
         return False
 
 
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index 066d64d2a6d..da946d492a5 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -295,14 +295,15 @@ def __repr__(self):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-            # Objects with same value (returning truthy array [True])
+            # Objects with custom __eq__ that returns non-bool are dropped
+            # even if they're "equal" because the comparison result is ambiguous
             actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
-            assert "custom" in actual.attrs
+            assert "custom" not in actual.attrs  # Dropped due to non-bool comparison
             assert actual.attrs["x"] == 1
 
-            # Objects with different values (returning falsy array [False])
+            # Objects with different values also get dropped
             actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-            assert "custom" not in actual.attrs  # Dropped due to conflict
+            assert "custom" not in actual.attrs  # Dropped due to non-bool comparison
             assert actual.attrs["x"] == 1
             assert actual.attrs["y"] == 2
 
@@ -371,6 +372,92 @@ def __repr__(self):
         actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts")
         assert "alltrue" not in actual.attrs  # Dropped due to ambiguous comparison
 
+    def test_merge_attrs_drop_conflicts_pathological_cases(self):
+        """Test drop_conflicts with pathological cases suggested by @shoyer.
+
+        These test cases ensure we handle various problematic attribute types
+        that can raise exceptions during equality comparison.
+        """
+        import warnings
+
+        import numpy as np
+        import pandas as pd
+
+        # Test 1: NumPy object arrays with nested arrays
+        # These can have complex comparison behavior
+        x = np.array([None], dtype=object)
+        x[0] = np.arange(3)
+        y = np.array([None], dtype=object)
+        y[0] = np.arange(10, 13)
+
+        ds1 = xr.Dataset(attrs={"nested_array": x, "common": 1})
+        ds2 = xr.Dataset(attrs={"nested_array": y, "common": 1})
+
+        # Different nested arrays should cause attribute to be dropped
+        actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
+        assert (
+            "nested_array" not in actual.attrs
+        )  # Dropped due to different nested arrays
+        assert actual.attrs["common"] == 1
+
+        # Test with identical nested arrays
+        # Note: Even identical nested arrays will be dropped because comparison
+        # raises ValueError due to ambiguous truth value
+        z = np.array([None], dtype=object)
+        z[0] = np.arange(3)  # Same as x
+        ds3 = xr.Dataset(attrs={"nested_array": z, "other": 2})
+
+        actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
+        assert (
+            "nested_array" not in actual.attrs
+        )  # Dropped due to ValueError in comparison
+        assert actual.attrs["other"] == 2
+
+        # Test 2: xarray.Dataset objects as attributes (raises TypeError in equivalent)
+        attr_ds1 = xr.Dataset({"foo": 1})
+        attr_ds2 = xr.Dataset({"bar": 1})  # Different dataset
+        attr_ds3 = xr.Dataset({"foo": 1})  # Same as attr_ds1
+
+        ds4 = xr.Dataset(attrs={"dataset_attr": attr_ds1, "scalar": 42})
+        ds5 = xr.Dataset(attrs={"dataset_attr": attr_ds2, "scalar": 42})
+        ds6 = xr.Dataset(attrs={"dataset_attr": attr_ds3, "other": 99})
+
+        # Different datasets raise TypeError and should be dropped
+        actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
+        assert "dataset_attr" not in actual.attrs  # Dropped due to TypeError
+        assert actual.attrs["scalar"] == 42
+
+        # Even identical datasets cause TypeError in equivalent() and get dropped
+        # because equivalent() tries to convert them to numpy arrays
+        actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
+        assert "dataset_attr" not in actual.attrs  # Dropped due to TypeError
+        assert actual.attrs["other"] == 99
+
+        # Test 3: Pandas Series (raises ValueError due to ambiguous truth value)
+        series1 = pd.Series([1, 2])
+        series2 = pd.Series([3, 4])  # Different values
+        series3 = pd.Series([1, 2])  # Same as series1
+
+        ds7 = xr.Dataset(attrs={"series": series1, "value": "a"})
+        ds8 = xr.Dataset(attrs={"series": series2, "value": "a"})
+        ds9 = xr.Dataset(attrs={"series": series3, "value": "a"})
+
+        # Suppress potential warnings from pandas comparisons
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+            warnings.filterwarnings("ignore", category=FutureWarning)
+
+            # Different series raise ValueError and get dropped
+            actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
+            assert "series" not in actual.attrs  # Dropped due to ValueError
+            assert actual.attrs["value"] == "a"
+
+            # Even identical series raise ValueError in equivalent() and get dropped
+            # because Series comparison returns another Series with ambiguous truth value
+            actual = xr.merge([ds7, ds9], combine_attrs="drop_conflicts")
+            assert "series" not in actual.attrs  # Dropped due to ValueError
+            assert actual.attrs["value"] == "a"
+
     def test_merge_attrs_no_conflicts_compat_minimal(self):
         """make sure compat="minimal" does not silence errors"""
         ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})

From e2731251e23e58a939f43cca8063f63ba2a8cb55 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Mon, 15 Sep 2025 09:32:43 -0700
Subject: [PATCH 14/17] Use truthiness in equivalent_attrs for consistency with
 Python

- Choose truthiness over strict boolean checking for consistency with
  Python's standard 'if a == b:' behavior
- Accept edge cases with non-standard __eq__ methods as a deliberate tradeoff
- Add comprehensive tests documenting the behavior and edge cases
- Add clear documentation explaining the design choice and its implications

This approach is more permissive and works better with numpy scalars and
similar types, while still catching truly ambiguous cases (ValueError/TypeError).
---
 xarray/structure/merge.py  | 37 +++++++++++++++-----
 xarray/tests/test_merge.py | 71 +++++++++++++++++++++++++++++++++-----
 2 files changed, 91 insertions(+), 17 deletions(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index c2bab18d35c..24d5b4e9120 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -610,19 +610,38 @@ def merge_coords(
 def equivalent_attrs(a: Any, b: Any) -> bool:
     """Check if two attribute values are equivalent.
 
-    Returns False if the comparison raises ValueError or TypeError,
-    or if the result is not a boolean. This handles cases like:
-    - numpy arrays with ambiguous truth values
-    - xarray Datasets which can't be directly converted to numpy arrays
-    - pandas Series which return Series from comparisons
+    Returns False if the comparison raises ValueError or TypeError.
+    This handles cases like numpy arrays with ambiguous truth values
+    and xarray Datasets which can't be directly converted to numpy arrays.
+
+    For non-boolean results, we use truthiness (consistent with `if a == b`).
+    This is an imperfect but pragmatic choice:
+
+    Pros of truthiness:
+    - Consistent with Python's normal `if a == b:` behavior
+    - Preserves numpy scalars (np.bool_(True)) and similar types
+    - More permissive for common use cases
+
+    Cons of truthiness:
+    - Keeps attrs when __eq__ returns truthy non-bool (e.g., "error")
+    - Drops attrs when __eq__ returns falsy non-bool (e.g., 0, [])
+
+    The alternative (strict bool checking) would be safer but would drop
+    many legitimate comparisons. We choose consistency with Python's
+    standard behavior, accepting edge cases with pathological __eq__ methods.
+
+    TODO: Revisit this behavior in the future - consider strict type checking
+    or a more sophisticated approach to handling non-boolean comparisons.
     """
     try:
         result = equivalent(a, b)
-        # Ensure we have a boolean result, not an array-like or Dataset
-        if not isinstance(result, bool):
-            return False
-        return result
+        # Use truthiness, consistent with `if a == b:` behavior
+        # Note: This means non-boolean returns are interpreted by truthiness,
+        # which can lead to false positives/negatives but is more permissive
+        return bool(result)
     except (ValueError, TypeError):
+        # These exceptions indicate the comparison is truly ambiguous
+        # (e.g., numpy arrays that would raise "ambiguous truth value")
         return False
 
 
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index da946d492a5..06069b75ebe 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -295,15 +295,14 @@ def __repr__(self):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-            # Objects with custom __eq__ that returns non-bool are dropped
-            # even if they're "equal" because the comparison result is ambiguous
+            # With truthiness: objects returning [True] are kept (truthy)
             actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
-            assert "custom" not in actual.attrs  # Dropped due to non-bool comparison
+            assert "custom" in actual.attrs  # Kept - [True] is truthy
             assert actual.attrs["x"] == 1
 
-            # Objects with different values also get dropped
+            # Objects with different values: equivalent returns False (bool), dropped
             actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-            assert "custom" not in actual.attrs  # Dropped due to non-bool comparison
+            assert "custom" not in actual.attrs  # Dropped - different values
             assert actual.attrs["x"] == 1
             assert actual.attrs["y"] == 2
 
@@ -427,10 +426,10 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
         assert "dataset_attr" not in actual.attrs  # Dropped due to TypeError
         assert actual.attrs["scalar"] == 42
 
-        # Even identical datasets cause TypeError in equivalent() and get dropped
-        # because equivalent() tries to convert them to numpy arrays
+        # With truthiness: identical datasets are kept
+        # The comparison returns a truthy Dataset, so they're treated as equal
         actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-        assert "dataset_attr" not in actual.attrs  # Dropped due to TypeError
+        assert "dataset_attr" in actual.attrs  # Kept with truthiness approach
         assert actual.attrs["other"] == 99
 
         # Test 3: Pandas Series (raises ValueError due to ambiguous truth value)
@@ -458,6 +457,62 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
             assert "series" not in actual.attrs  # Dropped due to ValueError
             assert actual.attrs["value"] == "a"
 
+    def test_merge_attrs_drop_conflicts_truthiness_edge_cases(self):
+        """Test edge cases demonstrating the truthiness tradeoff.
+
+        We deliberately use truthiness for consistency with Python's `if a == b:`
+        behavior. This test documents the implications of this design choice
+        with objects that have non-standard __eq__ methods.
+        """
+
+        # Case 1: Objects whose __eq__ returns truthy non-booleans
+        # These are kept because we respect truthiness
+        class ReturnsString:
+            def __init__(self, value):
+                self.value = value
+
+            def __eq__(self, other):
+                # Always returns a string (truthy if non-empty)
+                return "comparison result"
+
+        obj1 = ReturnsString("A")
+        obj2 = ReturnsString("B")  # Different object
+
+        ds1 = xr.Dataset(attrs={"obj": obj1})
+        ds2 = xr.Dataset(attrs={"obj": obj2})
+
+        actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
+
+        # Truthiness behavior: keeps attribute because "comparison result" is truthy
+        # This is the expected behavior when respecting truthiness
+        assert "obj" in actual.attrs
+
+        # Case 2: Objects whose __eq__ returns falsy non-booleans
+        # These are dropped because we respect truthiness
+        class ReturnsZero:
+            def __init__(self, value):
+                self.value = value
+
+            def __eq__(self, other):
+                # Always returns 0 (falsy) even if values match
+                return 0
+
+        obj3 = ReturnsZero("same")
+        obj4 = ReturnsZero("same")  # Different object, same value
+
+        ds3 = xr.Dataset(attrs={"zero": obj3})
+        ds4 = xr.Dataset(attrs={"zero": obj4})
+
+        actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts")
+
+        # Truthiness behavior: drops attribute because 0 is falsy
+        # This is the expected behavior when respecting truthiness
+        assert "zero" not in actual.attrs
+
+        # Note: These edge cases demonstrate the tradeoff of using truthiness.
+        # Well-behaved __eq__ methods return booleans and work correctly.
+        # We accept these edge cases for consistency with Python's standard behavior.
+
     def test_merge_attrs_no_conflicts_compat_minimal(self):
         """make sure compat="minimal" does not silence errors"""
         ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})

From 5557193e9f4c7403e699b165d4afe40927bcf522 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Mon, 15 Sep 2025 10:04:10 -0700
Subject: [PATCH 15/17] Implement @shoyer's stricter equivalent() function

- Rewrite equivalent() to reject non-boolean comparison results
- Accept numpy bool scalars (np.bool_) but reject other non-bool types
- Simplify equivalent_attrs() since equivalent() now handles non-bool cases
- Update tests to reflect stricter behavior with non-standard __eq__ methods

This makes comparisons more predictable by rejecting ambiguous cases like
Dataset comparisons, custom objects with weird __eq__, etc. The tradeoff is
being less permissive than Python's standard 'if a == b:' behavior.
---
 xarray/core/utils.py       | 18 ++++++++++++-
 xarray/structure/merge.py  | 27 +++-----------------
 xarray/tests/test_merge.py | 52 ++++++++++++--------------------------
 3 files changed, 37 insertions(+), 60 deletions(-)

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index d969fa8ea8c..d44aff9ff36 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -239,18 +239,34 @@ def equivalent(first: T, second: T) -> bool:
     """Compare two objects for equivalence (identity or equality), using
     array_equiv if either object is an ndarray. If both objects are lists,
     equivalent is sequentially called on all the elements.
+
+    Returns False for any comparison that doesn't return a boolean,
+    making this function safer to use with objects that have non-standard
+    __eq__ implementations.
     """
     # TODO: refactor to avoid circular import
     from xarray.core import duck_array_ops
 
     if first is second:
         return True
+
     if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
         return duck_array_ops.array_equiv(first, second)
+
     if isinstance(first, list) or isinstance(second, list):
         return list_equiv(first, second)  # type: ignore[arg-type]
 
-    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
+    # For non-array/list types, use == but require boolean result
+    result = first == second
+    if not isinstance(result, bool):
+        # Accept numpy bool scalars as well
+        if isinstance(result, np.bool_):
+            return bool(result)
+        # Reject any other non-boolean type (Dataset, Series, custom objects, etc.)
+        return False
+
+    # Check for NaN equivalence
+    return result or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
 
 
 def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 24d5b4e9120..ca39e794035 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -614,31 +614,12 @@ def equivalent_attrs(a: Any, b: Any) -> bool:
     This handles cases like numpy arrays with ambiguous truth values
     and xarray Datasets which can't be directly converted to numpy arrays.
 
-    For non-boolean results, we use truthiness (consistent with `if a == b`).
-    This is an imperfect but pragmatic choice:
-
-    Pros of truthiness:
-    - Consistent with Python's normal `if a == b:` behavior
-    - Preserves numpy scalars (np.bool_(True)) and similar types
-    - More permissive for common use cases
-
-    Cons of truthiness:
-    - Keeps attrs when __eq__ returns truthy non-bool (e.g., "error")
-    - Drops attrs when __eq__ returns falsy non-bool (e.g., 0, [])
-
-    The alternative (strict bool checking) would be safer but would drop
-    many legitimate comparisons. We choose consistency with Python's
-    standard behavior, accepting edge cases with pathological __eq__ methods.
-
-    TODO: Revisit this behavior in the future - consider strict type checking
-    or a more sophisticated approach to handling non-boolean comparisons.
+    Since equivalent() now handles non-boolean returns by returning False,
+    this wrapper mainly catches exceptions from comparisons that can't be
+    evaluated at all.
     """
     try:
-        result = equivalent(a, b)
-        # Use truthiness, consistent with `if a == b:` behavior
-        # Note: This means non-boolean returns are interpreted by truthiness,
-        # which can lead to false positives/negatives but is more permissive
-        return bool(result)
+        return equivalent(a, b)
     except (ValueError, TypeError):
         # These exceptions indicate the comparison is truly ambiguous
         # (e.g., numpy arrays that would raise "ambiguous truth value")
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index 06069b75ebe..9c02d26ba15 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -236,13 +236,7 @@ def test_merge_attrs_drop_conflicts(self):
         assert_identical(actual, expected)
 
     def test_merge_attrs_drop_conflicts_non_bool_eq(self):
-        """Test drop_conflicts behavior when __eq__ returns non-bool values.
-
-        When comparing attribute values, the _equivalent_drop_conflicts() function
-        uses == which can return non-bool values. The new behavior treats ambiguous
-        or falsy equality results as non-equivalent, dropping the attribute rather
-        than raising an error.
-        """
+        """Test drop_conflicts behavior when __eq__ returns non-bool values."""
         import warnings
 
         import numpy as np
@@ -295,14 +289,14 @@ def __repr__(self):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-            # With truthiness: objects returning [True] are kept (truthy)
+            # Objects returning arrays are dropped (non-boolean return)
             actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
-            assert "custom" in actual.attrs  # Kept - [True] is truthy
+            assert "custom" not in actual.attrs  # Dropped - returns array, not bool
             assert actual.attrs["x"] == 1
 
-            # Objects with different values: equivalent returns False (bool), dropped
+            # Different values also dropped (returns array, not bool)
             actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-            assert "custom" not in actual.attrs  # Dropped - different values
+            assert "custom" not in actual.attrs  # Dropped - returns non-boolean
             assert actual.attrs["x"] == 1
             assert actual.attrs["y"] == 2
 
@@ -426,10 +420,9 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
         assert "dataset_attr" not in actual.attrs  # Dropped due to TypeError
         assert actual.attrs["scalar"] == 42
 
-        # With truthiness: identical datasets are kept
-        # The comparison returns a truthy Dataset, so they're treated as equal
+        # Identical datasets are also dropped (comparison returns Dataset, not bool)
         actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
-        assert "dataset_attr" in actual.attrs  # Kept with truthiness approach
+        assert "dataset_attr" not in actual.attrs  # Dropped - returns Dataset, not bool
         assert actual.attrs["other"] == 99
 
         # Test 3: Pandas Series (raises ValueError due to ambiguous truth value)
@@ -457,22 +450,16 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
             assert "series" not in actual.attrs  # Dropped due to ValueError
             assert actual.attrs["value"] == "a"
 
-    def test_merge_attrs_drop_conflicts_truthiness_edge_cases(self):
-        """Test edge cases demonstrating the truthiness tradeoff.
+    def test_merge_attrs_drop_conflicts_non_boolean_eq_returns(self):
+        """Test objects with non-boolean __eq__ returns are dropped."""
 
-        We deliberately use truthiness for consistency with Python's `if a == b:`
-        behavior. This test documents the implications of this design choice
-        with objects that have non-standard __eq__ methods.
-        """
-
-        # Case 1: Objects whose __eq__ returns truthy non-booleans
-        # These are kept because we respect truthiness
+        # Case 1: Objects whose __eq__ returns non-boolean strings
         class ReturnsString:
             def __init__(self, value):
                 self.value = value
 
             def __eq__(self, other):
-                # Always returns a string (truthy if non-empty)
+                # Always returns a string (non-boolean)
                 return "comparison result"
 
         obj1 = ReturnsString("A")
@@ -483,18 +470,16 @@ def __eq__(self, other):
 
         actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
 
-        # Truthiness behavior: keeps attribute because "comparison result" is truthy
-        # This is the expected behavior when respecting truthiness
-        assert "obj" in actual.attrs
+        # Strict behavior: drops attribute because __eq__ returns non-boolean
+        assert "obj" not in actual.attrs
 
-        # Case 2: Objects whose __eq__ returns falsy non-booleans
-        # These are dropped because we respect truthiness
+        # Case 2: Objects whose __eq__ returns numbers
         class ReturnsZero:
             def __init__(self, value):
                 self.value = value
 
             def __eq__(self, other):
-                # Always returns 0 (falsy) even if values match
+                # Always returns 0 (non-boolean)
                 return 0
 
         obj3 = ReturnsZero("same")
@@ -505,14 +490,9 @@ def __eq__(self, other):
 
         actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts")
 
-        # Truthiness behavior: drops attribute because 0 is falsy
-        # This is the expected behavior when respecting truthiness
+        # Strict behavior: drops attribute because __eq__ returns non-boolean
         assert "zero" not in actual.attrs
 
-        # Note: These edge cases demonstrate the tradeoff of using truthiness.
-        # Well-behaved __eq__ methods return booleans and work correctly.
-        # We accept these edge cases for consistency with Python's standard behavior.
-
     def test_merge_attrs_no_conflicts_compat_minimal(self):
         """make sure compat="minimal" does not silence errors"""
         ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})

From 786285cb5396463a8028a13f680e07086359f749 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Mon, 15 Sep 2025 14:39:24 -0700
Subject: [PATCH 16/17] Address @shoyer's review comments

- Fix comment to say 'nested numpy arrays' instead of 'numpy arrays'
- Use del instead of pop() to avoid creating unused return value
- Split long test methods into smaller, focused test methods
- Move all imports (warnings, pandas) to top of file

Co-authored-by: Claude <claude@anthropic.com>
---
 xarray/structure/merge.py  |  4 ++--
 xarray/tests/test_merge.py | 38 +++++++++++++++++++-------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index ca39e794035..5bb53036042 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -622,7 +622,7 @@ def equivalent_attrs(a: Any, b: Any) -> bool:
         return equivalent(a, b)
     except (ValueError, TypeError):
         # These exceptions indicate the comparison is truly ambiguous
-        # (e.g., numpy arrays that would raise "ambiguous truth value")
+        # (e.g., nested numpy arrays that would raise "ambiguous truth value")
         return False
 
 
@@ -661,7 +661,7 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
                 if key not in result:
                     result[key] = value
                 elif not equivalent_attrs(result[key], value):
-                    result.pop(key, None)
+                    del result[key]
                     dropped_keys.add(key)
 
         return result
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index 9c02d26ba15..7b2e46d03a0 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
+import warnings
+
 import numpy as np
+import pandas as pd
 import pytest
 
 import xarray as xr
@@ -235,12 +238,8 @@ def test_merge_attrs_drop_conflicts(self):
         expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0})
         assert_identical(actual, expected)
 
-    def test_merge_attrs_drop_conflicts_non_bool_eq(self):
-        """Test drop_conflicts behavior when __eq__ returns non-bool values."""
-        import warnings
-
-        import numpy as np
-
+    def test_merge_attrs_drop_conflicts_numpy_arrays(self):
+        """Test drop_conflicts with numpy arrays."""
         # Test with numpy arrays (which return arrays from ==)
         arr1 = np.array([1, 2, 3])
         arr2 = np.array([1, 2, 3])
@@ -260,6 +259,9 @@ def test_merge_attrs_drop_conflicts_non_bool_eq(self):
         assert "arr" not in actual.attrs  # Should drop due to conflict
         assert "other" in actual.attrs
 
+    def test_merge_attrs_drop_conflicts_custom_eq_returns_array(self):
+        """Test drop_conflicts with custom objects that return arrays from __eq__."""
+
         # Test with custom objects that return non-bool from __eq__
         class CustomEq:
             """Object whose __eq__ returns a non-bool value."""
@@ -300,6 +302,9 @@ def __repr__(self):
             assert actual.attrs["x"] == 1
             assert actual.attrs["y"] == 2
 
+    def test_merge_attrs_drop_conflicts_ambiguous_array_returns(self):
+        """Test drop_conflicts with objects returning ambiguous arrays from __eq__."""
+
         # Test edge case: object whose __eq__ returns empty array (ambiguous truth value)
         class EmptyArrayEq:
             def __eq__(self, other):
@@ -365,17 +370,8 @@ def __repr__(self):
         actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts")
         assert "alltrue" not in actual.attrs  # Dropped due to ambiguous comparison
 
-    def test_merge_attrs_drop_conflicts_pathological_cases(self):
-        """Test drop_conflicts with pathological cases suggested by @shoyer.
-
-        These test cases ensure we handle various problematic attribute types
-        that can raise exceptions during equality comparison.
-        """
-        import warnings
-
-        import numpy as np
-        import pandas as pd
-
+    def test_merge_attrs_drop_conflicts_nested_arrays(self):
+        """Test drop_conflicts with NumPy object arrays containing nested arrays."""
         # Test 1: NumPy object arrays with nested arrays
         # These can have complex comparison behavior
         x = np.array([None], dtype=object)
@@ -406,7 +402,9 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
         )  # Dropped due to ValueError in comparison
         assert actual.attrs["other"] == 2
 
-        # Test 2: xarray.Dataset objects as attributes (raises TypeError in equivalent)
+    def test_merge_attrs_drop_conflicts_dataset_attrs(self):
+        """Test drop_conflicts with xarray.Dataset objects as attributes."""
+        # xarray.Dataset objects as attributes (raises TypeError in equivalent)
         attr_ds1 = xr.Dataset({"foo": 1})
         attr_ds2 = xr.Dataset({"bar": 1})  # Different dataset
         attr_ds3 = xr.Dataset({"foo": 1})  # Same as attr_ds1
@@ -425,7 +423,9 @@ def test_merge_attrs_drop_conflicts_pathological_cases(self):
         assert "dataset_attr" not in actual.attrs  # Dropped - returns Dataset, not bool
         assert actual.attrs["other"] == 99
 
-        # Test 3: Pandas Series (raises ValueError due to ambiguous truth value)
+    def test_merge_attrs_drop_conflicts_pandas_series(self):
+        """Test drop_conflicts with Pandas Series as attributes."""
+        # Pandas Series (raises ValueError due to ambiguous truth value)
         series1 = pd.Series([1, 2])
         series2 = pd.Series([3, 4])  # Different values
         series3 = pd.Series([1, 2])  # Same as series1

From 23d693db52b9489a0b42f7e7f9c27e8f91ef1cc3 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Mon, 15 Sep 2025 21:09:26 -0700
Subject: [PATCH 17/17] Address @shoyer's review comments

- Split long test methods into smaller, focused test cases:
  - test_merge_attrs_drop_conflicts_ambiguous_array_returns split to add test_merge_attrs_drop_conflicts_all_true_array
  - test_merge_attrs_drop_conflicts_non_boolean_eq_returns split into test_merge_attrs_drop_conflicts_eq_returns_string and test_merge_attrs_drop_conflicts_eq_returns_number
- Ran formatter which removed a blank line

Co-authored-by: Claude <claude@anthropic.com>
---
 xarray/tests/test_merge.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index 7b2e46d03a0..01bab8ca8b2 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -350,6 +350,9 @@ def __repr__(self):
             actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts")
             assert "multi" not in actual.attrs  # Dropped due to ambiguous comparison
 
+    def test_merge_attrs_drop_conflicts_all_true_array(self):
+        """Test drop_conflicts with all-True multi-element array from __eq__."""
+
         # Test with all-True multi-element array (unambiguous truthy)
         class AllTrueArrayEq:
             def __eq__(self, other):
@@ -450,8 +453,8 @@ def test_merge_attrs_drop_conflicts_pandas_series(self):
             assert "series" not in actual.attrs  # Dropped due to ValueError
             assert actual.attrs["value"] == "a"
 
-    def test_merge_attrs_drop_conflicts_non_boolean_eq_returns(self):
-        """Test objects with non-boolean __eq__ returns are dropped."""
+    def test_merge_attrs_drop_conflicts_eq_returns_string(self):
+        """Test objects whose __eq__ returns strings are dropped."""
 
         # Case 1: Objects whose __eq__ returns non-boolean strings
         class ReturnsString:
@@ -473,6 +476,9 @@ def __eq__(self, other):
         # Strict behavior: drops attribute because __eq__ returns non-boolean
         assert "obj" not in actual.attrs
 
+    def test_merge_attrs_drop_conflicts_eq_returns_number(self):
+        """Test objects whose __eq__ returns numbers are dropped."""
+
         # Case 2: Objects whose __eq__ returns numbers
         class ReturnsZero:
             def __init__(self, value):