diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 96d965ed347..81d370766ab 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,6 +50,9 @@ Bug fixes By `Deepak Cherian `_. - Fix detection of the ``h5netcdf`` backend. Xarray now selects ``h5netcdf`` if the default ``netCDF4`` engine is not available (:issue:`10401`, :pull:`10557`). By `Scott Staniewicz `_. +- Ensure ``unlimited_dims`` passed to :py:meth:`xarray.DataArray.to_netcdf`, :py:meth:`xarray.Dataset.to_netcdf` or :py:meth:`xarray.DataTree.to_netcdf` only contains dimensions present in the object; raise ``ValueError`` otherwise (:issue:`10549`, :pull:`10608`). + By `Kai Mühlbauer `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index b20af13e628..28639c242d5 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -244,6 +244,28 @@ def check_attr(name, value, valid_types): check_attr(k, v, valid_types) +def _sanitize_unlimited_dims(dataset, unlimited_dims): + msg_origin = "unlimited_dims-kwarg" + if unlimited_dims is None: + unlimited_dims = dataset.encoding.get("unlimited_dims", None) + msg_origin = "dataset.encoding" + if unlimited_dims is not None: + if isinstance(unlimited_dims, str) or not isinstance(unlimited_dims, Iterable): + unlimited_dims = [unlimited_dims] + else: + unlimited_dims = list(unlimited_dims) + dataset_dims = set(dataset.dims) + unlimited_dims = set(unlimited_dims) + if undeclared_dims := (unlimited_dims - dataset_dims): + msg = ( + f"Unlimited dimension(s) {undeclared_dims!r} declared in {msg_origin!r}, " + f"but not part of current dataset dimensions. " + f"Consider removing {undeclared_dims!r} from {msg_origin!r}." + ) + raise ValueError(msg) + return unlimited_dims + + def _resolve_decoders_kwargs(decode_cf, open_backend_dataset_parameters, **decoders): for d in list(decoders): if decode_cf is False and d in open_backend_dataset_parameters: @@ -2007,6 +2029,8 @@ def to_netcdf( # validate Dataset keys, DataArray names, and attr keys/values _validate_dataset_names(dataset) _validate_attrs(dataset, engine, invalid_netcdf) + # sanitize unlimited_dims + unlimited_dims = _sanitize_unlimited_dims(dataset, unlimited_dims) try: store_open = WRITEABLE_STORES[engine] @@ -2045,14 +2069,6 @@ def to_netcdf( store = store_open(target, mode, format, group, **kwargs) - if unlimited_dims is None: - unlimited_dims = dataset.encoding.get("unlimited_dims", None) - if unlimited_dims is not None: - if isinstance(unlimited_dims, str) or not isinstance(unlimited_dims, Iterable): - unlimited_dims = [unlimited_dims] - else: - unlimited_dims = list(unlimited_dims) - writer = ArrayWriter() # TODO: figure out how to refactor this logic (here and in save_mfdataset) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ef68a959dfc..ab90723d1aa 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1324,6 +1324,47 @@ def test_encoding_kwarg(self) -> None: with self.roundtrip(ds, save_kwargs=kwargs) as actual: pass + def test_encoding_unlimited_dims(self) -> None: + if isinstance(self, ZarrBase): + pytest.skip("No unlimited_dims handled in zarr.") + ds = Dataset({"x": ("y", np.arange(10.0))}) + with self.roundtrip(ds, save_kwargs=dict(unlimited_dims=["y"])) as actual: + assert actual.encoding["unlimited_dims"] == set("y") + assert_equal(ds, actual) + + # Regression test for https://github.com/pydata/xarray/issues/2134 + with self.roundtrip(ds, save_kwargs=dict(unlimited_dims="y")) as actual: + assert actual.encoding["unlimited_dims"] == set("y") + assert_equal(ds, actual) + + ds.encoding = {"unlimited_dims": ["y"]} + with self.roundtrip(ds) as actual: + assert actual.encoding["unlimited_dims"] == set("y") + assert_equal(ds, actual) + + # Regression test for https://github.com/pydata/xarray/issues/2134 + ds.encoding = {"unlimited_dims": "y"} + with self.roundtrip(ds) as actual: + assert actual.encoding["unlimited_dims"] == set("y") + assert_equal(ds, actual) + + # test unlimited_dims validation + # https://github.com/pydata/xarray/issues/10549 + ds.encoding = {"unlimited_dims": "z"} + with pytest.raises( + ValueError, + match=r"Unlimited dimension\(s\) .* declared in 'dataset.encoding'", + ): + with self.roundtrip(ds) as _: + pass + ds.encoding = {} + with pytest.raises( + ValueError, + match=r"Unlimited dimension\(s\) .* declared in 'unlimited_dims-kwarg'", + ): + with self.roundtrip(ds, save_kwargs=dict(unlimited_dims=["z"])) as _: + pass + def test_encoding_kwarg_dates(self) -> None: ds = Dataset({"t": pd.date_range("2000-01-01", periods=3)}) units = "days since 1900-01-01" @@ -1918,16 +1959,6 @@ def test_read_variable_len_strings(self) -> None: with open_dataset(tmp_file, **cast(dict, kwargs)) as actual: assert_identical(expected, actual) - def test_encoding_unlimited_dims(self) -> None: - ds = Dataset({"x": ("y", np.arange(10.0))}) - with self.roundtrip(ds, save_kwargs=dict(unlimited_dims=["y"])) as actual: - assert actual.encoding["unlimited_dims"] == set("y") - assert_equal(ds, actual) - ds.encoding = {"unlimited_dims": ["y"]} - with self.roundtrip(ds) as actual: - assert actual.encoding["unlimited_dims"] == set("y") - assert_equal(ds, actual) - def test_raise_on_forward_slashes_in_names(self) -> None: # test for forward slash in variable names and dimensions # see GH 7943 @@ -4393,16 +4424,6 @@ def test_read_byte_attrs_as_unicode(self) -> None: expected = Dataset(attrs={"foo": "bar"}) assert_identical(expected, actual) - def test_encoding_unlimited_dims(self) -> None: - ds = Dataset({"x": ("y", np.arange(10.0))}) - with self.roundtrip(ds, save_kwargs=dict(unlimited_dims=["y"])) as actual: - assert actual.encoding["unlimited_dims"] == set("y") - assert_equal(ds, actual) - ds.encoding = {"unlimited_dims": ["y"]} - with self.roundtrip(ds) as actual: - assert actual.encoding["unlimited_dims"] == set("y") - assert_equal(ds, actual) - def test_compression_encoding_h5py(self) -> None: ENCODINGS: tuple[tuple[dict[str, Any], dict[str, Any]], ...] = ( # h5py style compression with gzip codec will be converted to diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 4b3729e2724..a958d5b3d19 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -606,6 +606,10 @@ def test_encoding_kwarg_fixed_width_string(self) -> None: # CFEncodedInMemoryStore doesn't support explicit string encodings. pass + def test_encoding_unlimited_dims(self) -> None: + # CFEncodedInMemoryStore doesn't support unlimited_dims. + pass + class TestDecodeCFVariableWithArrayUnits: def test_decode_cf_variable_with_array_units(self) -> None: