From b7b9d363bd193d82e74eaba78172da6b79456f59 Mon Sep 17 00:00:00 2001 From: Rich Signell Date: Wed, 8 Apr 2026 08:31:51 -0400 Subject: [PATCH 1/5] Fix multi-coordinate indexes dropped in _replace_maybe_drop_dims and _copy_listed When a custom Index spans multiple coordinates across different dimensions (e.g. a UGRID topology index covering both `nodes` and `faces`), it was incorrectly dropped during: - DataArray dimension reduction (e.g. `.mean("extra_dim")`) via `_replace_maybe_drop_dims`, which filtered coords by strict dim subset without consulting `Index.should_add_coord_to_array()` - Dataset subsetting by variable names (e.g. `ds[["node_data"]]`) via `_copy_listed`, with the same issue Both paths now use `should_add_coord_to_array()` for index-backed coordinates, consistent with how `_construct_dataarray` already works. Fixes #11215 Co-authored-by: Claude --- xarray/core/dataarray.py | 10 +++++++--- xarray/core/dataset.py | 8 +++++++- xarray/tests/test_dataarray.py | 35 +++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 36 ++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c49a41de108..668daa71992 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -538,9 +538,13 @@ def _replace_maybe_drop_dims( indexes = filter_indexes_from_coords(self._indexes, set(coords)) else: allowed_dims = set(variable.dims) - coords = { - k: v for k, v in self._coords.items() if set(v.dims) <= allowed_dims - } + coords = {} + for k, v in self._coords.items(): + if k in self._indexes: + if self._indexes[k].should_add_coord_to_array(k, v, allowed_dims): + coords[k] = v + elif set(v.dims) <= allowed_dims: + coords[k] = v indexes = filter_indexes_from_coords(self._indexes, set(coords)) return self._replace(variable, coords, name, indexes=indexes) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 78a897b8891..c784426ba4f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1221,7 +1221,13 @@ def _copy_listed(self, names: Iterable[Hashable]) -> Self: if k not in self._coord_names: continue - if set(self.variables[k].dims) <= needed_dims: + if k in self._indexes: + if self._indexes[k].should_add_coord_to_array( + k, self._variables[k], needed_dims + ): + variables[k] = self._variables[k] + coord_names.add(k) + elif set(self.variables[k].dims) <= needed_dims: variables[k] = self._variables[k] coord_names.add(k) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5e514cc9767..af2f9c896e5 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -555,6 +555,41 @@ def should_add_coord_to_array(self, name, var, dims): assert_identical(actual.coords, coords, check_default_indexes=False) assert "x_bnds" not in actual.dims + def test_replace_maybe_drop_dims_preserves_multi_coord_index(self) -> None: + # Regression test for https://github.com/pydata/xarray/issues/11215 + # Multi-coordinate indexes spanning multiple dims should be preserved + # after reducing over an unrelated dimension. + class MultiDimIndex(Index): + def should_add_coord_to_array(self, name, var, dims): + return True + + idx = MultiDimIndex() + coords = Coordinates( + coords={ + "node_x": ("nodes", [0.0, 1.0, 2.0]), + "node_y": ("nodes", [0.0, 0.0, 1.0]), + "face_x": ("faces", [0.5, 1.5]), + "face_y": ("faces", [0.5, 0.5]), + }, + indexes={k: idx for k in ["node_x", "node_y", "face_x", "face_y"]}, + ) + node_da = DataArray( + np.random.rand(3, 4), dims=("nodes", "extra"), coords=coords + ) + face_da = DataArray( + np.random.rand(2, 4), dims=("faces", "extra"), coords=coords + ) + + reduced_node = node_da.mean("extra") + reduced_face = face_da.mean("extra") + + assert isinstance( + next(iter(reduced_node.xindexes.values())), MultiDimIndex + ), "Index dropped from node DataArray after reduction" + assert isinstance( + next(iter(reduced_face.xindexes.values())), MultiDimIndex + ), "Index dropped from face DataArray after reduction" + def test_equals_and_identical(self) -> None: orig = DataArray(np.arange(5.0), {"a": 42}, dims="x") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6be6c75dc5f..a3cc2e9cf45 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4553,6 +4553,42 @@ def should_add_coord_to_array(self, name, var, dims): assert_identical(actual.coords, coords, check_default_indexes=False) assert "x_bnds" not in actual.dims + def test_copy_listed_preserves_multi_coord_index(self) -> None: + # Regression test for https://github.com/pydata/xarray/issues/11215 + # Multi-coordinate indexes spanning multiple dims should be preserved + # when subsetting a Dataset by variable names via ds[["var"]]. + class MultiDimIndex(Index): + def should_add_coord_to_array(self, name, var, dims): + return True + + idx = MultiDimIndex() + coords = Coordinates( + coords={ + "node_x": ("nodes", [0.0, 1.0, 2.0]), + "node_y": ("nodes", [0.0, 0.0, 1.0]), + "face_x": ("faces", [0.5, 1.5]), + "face_y": ("faces", [0.5, 0.5]), + }, + indexes={k: idx for k in ["node_x", "node_y", "face_x", "face_y"]}, + ) + ds = Dataset( + { + "node_data": (("nodes",), [1.0, 2.0, 3.0]), + "face_data": (("faces",), [10.0, 20.0]), + }, + coords=coords, + ) + + node_subset = ds[["node_data"]] + face_subset = ds[["face_data"]] + + assert isinstance( + next(iter(node_subset.xindexes.values())), MultiDimIndex + ), "Index dropped from Dataset when subsetting to node variable" + assert isinstance( + next(iter(face_subset.xindexes.values())), MultiDimIndex + ), "Index dropped from Dataset when subsetting to face variable" + def test_virtual_variables_default_coords(self) -> None: dataset = Dataset({"foo": ("x", range(10))}) expected1 = DataArray(range(10), dims="x", name="x") From a661575e9b51b3894707b3abf6d574774d92c324 Mon Sep 17 00:00:00 2001 From: Rich Signell Date: Wed, 8 Apr 2026 08:49:04 -0400 Subject: [PATCH 2/5] Fix mypy type error and ruff style issues - Cast OrderedSet to set when calling should_add_coord_to_array to satisfy the expected set[Hashable] type annotation - Replace dict comprehensions with dict.fromkeys (ruff C416) - Reformat assert messages to ruff-preferred style Co-authored-by: Claude --- xarray/core/dataset.py | 2 +- xarray/tests/test_dataarray.py | 14 +++++++------- xarray/tests/test_dataset.py | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c784426ba4f..11c7bf5b51a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1223,7 +1223,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> Self: if k in self._indexes: if self._indexes[k].should_add_coord_to_array( - k, self._variables[k], needed_dims + k, self._variables[k], set(needed_dims) ): variables[k] = self._variables[k] coord_names.add(k) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index af2f9c896e5..b7fa843ba0e 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -571,7 +571,7 @@ def should_add_coord_to_array(self, name, var, dims): "face_x": ("faces", [0.5, 1.5]), "face_y": ("faces", [0.5, 0.5]), }, - indexes={k: idx for k in ["node_x", "node_y", "face_x", "face_y"]}, + indexes=dict.fromkeys(["node_x", "node_y", "face_x", "face_y"], idx), ) node_da = DataArray( np.random.rand(3, 4), dims=("nodes", "extra"), coords=coords @@ -583,12 +583,12 @@ def should_add_coord_to_array(self, name, var, dims): reduced_node = node_da.mean("extra") reduced_face = face_da.mean("extra") - assert isinstance( - next(iter(reduced_node.xindexes.values())), MultiDimIndex - ), "Index dropped from node DataArray after reduction" - assert isinstance( - next(iter(reduced_face.xindexes.values())), MultiDimIndex - ), "Index dropped from face DataArray after reduction" + assert isinstance(next(iter(reduced_node.xindexes.values())), MultiDimIndex), ( + "Index dropped from node DataArray after reduction" + ) + assert isinstance(next(iter(reduced_face.xindexes.values())), MultiDimIndex), ( + "Index dropped from face DataArray after reduction" + ) def test_equals_and_identical(self) -> None: orig = DataArray(np.arange(5.0), {"a": 42}, dims="x") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a3cc2e9cf45..2319b137927 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4569,7 +4569,7 @@ def should_add_coord_to_array(self, name, var, dims): "face_x": ("faces", [0.5, 1.5]), "face_y": ("faces", [0.5, 0.5]), }, - indexes={k: idx for k in ["node_x", "node_y", "face_x", "face_y"]}, + indexes=dict.fromkeys(["node_x", "node_y", "face_x", "face_y"], idx), ) ds = Dataset( { @@ -4582,12 +4582,12 @@ def should_add_coord_to_array(self, name, var, dims): node_subset = ds[["node_data"]] face_subset = ds[["face_data"]] - assert isinstance( - next(iter(node_subset.xindexes.values())), MultiDimIndex - ), "Index dropped from Dataset when subsetting to node variable" - assert isinstance( - next(iter(face_subset.xindexes.values())), MultiDimIndex - ), "Index dropped from Dataset when subsetting to face variable" + assert isinstance(next(iter(node_subset.xindexes.values())), MultiDimIndex), ( + "Index dropped from Dataset when subsetting to node variable" + ) + assert isinstance(next(iter(face_subset.xindexes.values())), MultiDimIndex), ( + "Index dropped from Dataset when subsetting to face variable" + ) def test_virtual_variables_default_coords(self) -> None: dataset = Dataset({"foo": ("x", range(10))}) From 526105df6c3f6c10fc986c65fc6a8dd1bca034a9 Mon Sep 17 00:00:00 2001 From: Rich Signell Date: Fri, 10 Apr 2026 15:40:14 -0400 Subject: [PATCH 3/5] Address reviewer feedback: fix to_dataarray() and simplify tests - Apply should_add_coord_to_array() in Dataset.to_dataarray() for consistent handling of multi-coordinate indexes (per benbovy review) - Add test_to_dataarray_preserves_multi_coord_index to cover this path - Simplify isinstance(next(iter(...))) checks to cleaner for-loops in both test_dataarray.py and test_dataset.py (per benbovy suggestion) Co-authored-by: Claude --- xarray/core/dataset.py | 9 ++++++++- xarray/tests/test_dataarray.py | 10 ++++----- xarray/tests/test_dataset.py | 37 ++++++++++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 11c7bf5b51a..b0dd3dffc80 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7160,7 +7160,14 @@ def to_dataarray( dims = (dim,) + broadcast_vars[0].dims variable = Variable(dims, data, self.attrs, fastpath=True) - coords = {k: v.variable for k, v in self.coords.items()} + broadcast_dims = set(broadcast_vars[0].dims) + coords = {} + for k, v in self.coords.items(): + if k in self._indexes: + if self._indexes[k].should_add_coord_to_array(k, v.variable, broadcast_dims): + coords[k] = v.variable + elif set(v.dims) <= broadcast_dims: + coords[k] = v.variable indexes = filter_indexes_from_coords(self._indexes, set(coords)) new_dim_index = PandasIndex(list(self.data_vars), dim) indexes[dim] = new_dim_index diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b7fa843ba0e..8eb52046a31 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -583,12 +583,10 @@ def should_add_coord_to_array(self, name, var, dims): reduced_node = node_da.mean("extra") reduced_face = face_da.mean("extra") - assert isinstance(next(iter(reduced_node.xindexes.values())), MultiDimIndex), ( - "Index dropped from node DataArray after reduction" - ) - assert isinstance(next(iter(reduced_face.xindexes.values())), MultiDimIndex), ( - "Index dropped from face DataArray after reduction" - ) + for da in [reduced_node, reduced_face]: + for name in ["node_x", "node_y", "face_x", "face_y"]: + assert name in da.coords + assert isinstance(da.xindexes[name], MultiDimIndex) def test_equals_and_identical(self) -> None: orig = DataArray(np.arange(5.0), {"a": 42}, dims="x") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 2319b137927..36655819a97 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4582,13 +4582,42 @@ def should_add_coord_to_array(self, name, var, dims): node_subset = ds[["node_data"]] face_subset = ds[["face_data"]] - assert isinstance(next(iter(node_subset.xindexes.values())), MultiDimIndex), ( - "Index dropped from Dataset when subsetting to node variable" + for ds_sub in [node_subset, face_subset]: + for name in ["node_x", "node_y", "face_x", "face_y"]: + assert name in ds_sub.coords + assert isinstance(ds_sub.xindexes[name], MultiDimIndex) + + def test_to_dataarray_preserves_multi_coord_index(self) -> None: + # Regression test for https://github.com/pydata/xarray/issues/11215 + # Multi-coordinate indexes spanning multiple dims should be preserved + # when converting a Dataset to a DataArray via to_dataarray(). + class MultiDimIndex(Index): + def should_add_coord_to_array(self, name, var, dims): + return True + + idx = MultiDimIndex() + coords = Coordinates( + coords={ + "node_x": ("nodes", [0.0, 1.0, 2.0]), + "node_y": ("nodes", [0.0, 0.0, 1.0]), + "face_x": ("faces", [0.5, 1.5]), + "face_y": ("faces", [0.5, 0.5]), + }, + indexes=dict.fromkeys(["node_x", "node_y", "face_x", "face_y"], idx), ) - assert isinstance(next(iter(face_subset.xindexes.values())), MultiDimIndex), ( - "Index dropped from Dataset when subsetting to face variable" + ds = Dataset( + { + "node_data": (("nodes",), [1.0, 2.0, 3.0]), + }, + coords=coords, ) + da = ds.to_dataarray() + + for name in ["node_x", "node_y", "face_x", "face_y"]: + assert name in da.coords + assert isinstance(da.xindexes[name], MultiDimIndex) + def test_virtual_variables_default_coords(self) -> None: dataset = Dataset({"foo": ("x", range(10))}) expected1 = DataArray(range(10), dims="x", name="x") From e1cea469470fd335a8075cecb083f27ffd9db7f3 Mon Sep 17 00:00:00 2001 From: Rich Signell Date: Mon, 13 Apr 2026 12:36:08 -0400 Subject: [PATCH 4/5] Simplify to_dataarray(): keep all coords and indexes without filtering Per reviewer feedback, no fix was needed in to_dataarray() since Dataset variables are broadcasted and all coordinates are kept anyway. Replace the should_add_coord_to_array loop and filter_indexes_from_coords call with a simple dict copy of all coords and indexes. Co-authored-by: Claude --- xarray/core/dataset.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b0dd3dffc80..e0a81106dca 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7160,15 +7160,8 @@ def to_dataarray( dims = (dim,) + broadcast_vars[0].dims variable = Variable(dims, data, self.attrs, fastpath=True) - broadcast_dims = set(broadcast_vars[0].dims) - coords = {} - for k, v in self.coords.items(): - if k in self._indexes: - if self._indexes[k].should_add_coord_to_array(k, v.variable, broadcast_dims): - coords[k] = v.variable - elif set(v.dims) <= broadcast_dims: - coords[k] = v.variable - indexes = filter_indexes_from_coords(self._indexes, set(coords)) + coords = {k: v.variable for k, v in self.coords.items()} + indexes = dict(self._indexes) new_dim_index = PandasIndex(list(self.data_vars), dim) indexes[dim] = new_dim_index coords.update(new_dim_index.create_variables()) From 85fb34fc1c08645097d3ab0a5571a111b5b42d32 Mon Sep 17 00:00:00 2001 From: Rich Signell Date: Mon, 13 Apr 2026 12:40:36 -0400 Subject: [PATCH 5/5] Add whats-new entry for multi-coord index fix (#11215) Co-authored-by: Claude --- doc/whats-new.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2b1e1c354eb..c40ddd771b2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -107,6 +107,14 @@ Deprecations Bug Fixes ~~~~~~~~~ +- Fix multi-coordinate indexes being dropped in :py:meth:`DataArray._replace_maybe_drop_dims` + (e.g. after reducing over an unrelated dimension) and in :py:meth:`Dataset._copy_listed` + (e.g. when subsetting a Dataset by variable names). Both paths now consult + :py:meth:`Index.should_add_coord_to_array`, consistent with + :py:meth:`Dataset._construct_dataarray`. Also simplify :py:meth:`Dataset.to_dataarray` + to keep all coordinates and indexes directly, since variables are broadcast and all + coords are retained (:issue:`11215`, :pull:`11286`). + By `Rich Signell `_. - Allow writing ``StringDType`` variables to netCDF files (:issue:`11199`). By `Kristian KollsgÄrd `_. - Fix ``Source`` link in api docs (:pull:`11187`)