Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
ad3f63f
dev
davidhassell Aug 22, 2025
1179c8e
dev
davidhassell Aug 22, 2025
e1d4c97
first successful write
davidhassell Aug 23, 2025
d009cc6
dev
davidhassell Aug 24, 2025
0af9c76
dev
davidhassell Aug 26, 2025
31d1c83
dev
davidhassell Aug 26, 2025
76736e8
dev
davidhassell Aug 27, 2025
b0de4b3
dev
davidhassell Aug 27, 2025
b0d2fc5
dev
davidhassell Aug 28, 2025
27f0b35
dev
davidhassell Aug 29, 2025
1bccf28
dev
davidhassell Aug 29, 2025
ed5804d
dev
davidhassell Aug 29, 2025
0ab6a87
dev
davidhassell Aug 30, 2025
b72fb6a
dev
davidhassell Aug 31, 2025
e96375a
dev
davidhassell Aug 31, 2025
22d182e
dev
davidhassell Sep 1, 2025
5ac1dde
dev
davidhassell Sep 1, 2025
2158977
dev
davidhassell Sep 2, 2025
59303f5
dev
davidhassell Sep 3, 2025
308a991
dev
davidhassell Sep 3, 2025
ccf933b
dev
davidhassell Sep 3, 2025
548d039
dev
davidhassell Sep 15, 2025
4451f99
dev
davidhassell Oct 6, 2025
2c90245
dev
davidhassell Oct 6, 2025
3b4993c
dev
davidhassell Oct 8, 2025
1deb8c0
main merge
davidhassell Oct 8, 2025
33dc03b
deprecated netcdf_flatten
davidhassell Oct 13, 2025
fef739f
dev
davidhassell Oct 13, 2025
32b8b55
upstream merge
davidhassell Oct 20, 2025
cd9aec7
dev
davidhassell Oct 20, 2025
8616dcf
dev
davidhassell Oct 20, 2025
1b95e5c
dev
davidhassell Oct 27, 2025
48fd32a
dev
davidhassell Oct 29, 2025
cdafa4b
dev
davidhassell Nov 5, 2025
2eb45be
dev
davidhassell Nov 13, 2025
e02c0b7
Doc string and comment fixes and improvements
davidhassell Jan 8, 2026
a07f9d6
Typo
davidhassell Jan 8, 2026
c7c71b7
netcdf_flatten deprecation
davidhassell Jan 8, 2026
1e4968d
rename NETCDF4 variable to is_netcdf4
davidhassell Jan 8, 2026
0cc7aad
zarr no append
davidhassell Jan 8, 2026
f66f3a8
Typo
davidhassell Jan 8, 2026
769c2a9
fix no append with zarr
davidhassell Jan 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
Version NEXTVERSION
-------------------

**2025-12-??**

**2026-01-??**

* Write Zarr v3 datasets with `cfdm.write`, and allow the reading of
grouped Zarr v2 and v3 datasets with `cfdm.read`
(https://github.com/NCAS-CMS/cfdm/issues/354)
* Read Zarr v2 and v3 datasets that contain a group hierarchy with
`cfdm.read` (https://github.com/NCAS-CMS/cfdm/issues/355)
* New function `cfdm.dataset_flatten` that replaces the deprecated
`cfdm.netcdf_flatten` (https://github.com/NCAS-CMS/cfdm/issues/355)
* New optional dependency: ``zarr>=3.1.3``
* Removed dependency (now optional): ``zarr>=3.0.8``
* Reduce the time taken to import `cfdm`
(https://github.com/NCAS-CMS/cfdm/issues/361)

Expand All @@ -29,7 +37,6 @@ Version 1.12.3.0

**2025-08-18**


* Fix `cfdm.Data.reshape` when the underlying data originate on disk
(https://github.com/NCAS-CMS/cfdm/issues/348)
* New keyword parameter to `cfdm.Field.dump`: ``data``
Expand Down Expand Up @@ -57,7 +64,7 @@ Version 1.12.2.0
retrieved from disk (https://github.com/NCAS-CMS/cfdm/issues/313)
* New keyword parameter to `cfdm.write`: ``chunk_cache``
(https://github.com/NCAS-CMS/cfdm/issues/328)
* Read Zarr datasets with `cfdm.read`
* Read Zarr v2 and v3 datasets with `cfdm.read`
(https://github.com/NCAS-CMS/cfdm/issues/335)
* Read multiple datasets simultaneously with `cfdm.read`
(https://github.com/NCAS-CMS/cfdm/issues/336)
Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,12 @@ inspecting it:
The ``cfdm`` package can:

* read field and domain constructs from netCDF, CDL, and Zarr datasets
with a choice of netCDF backends, and in local, http, and s3 locations,
* be fully flexible with respect to HDF5 chunking,
with a choice of netCDF backends, and in local, http, and s3
locations,
* be fully flexible with respect to dataset storage chunking,
* create new field and domain constructs in memory,
* write and append field and domain constructs to netCDF datasets on disk,
* write and append field and domain constructs to netCDF and Zarr v3
datasets on disk,
* read, write, and manipulate UGRID mesh topologies,
* read, write, and create coordinates defined by geometry cells,
* read and write netCDF4 string data-type variables,
Expand Down
3 changes: 2 additions & 1 deletion cfdm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
is_log_level_debug,
is_log_level_detail,
is_log_level_info,
netcdf_flatten,
)

# Though these are internal-use methods, include them in the namespace
Expand Down Expand Up @@ -161,7 +162,7 @@
from .cfdmimplementation import CFDMImplementation, implementation

from .read_write import read, write
from .read_write.netcdf.flatten import netcdf_flatten
from .read_write.netcdf.flatten import dataset_flatten

from .examplefield import example_field, example_fields, example_domain

Expand Down
41 changes: 41 additions & 0 deletions cfdm/cfdmimplementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,25 @@ def nc_get_dataset_chunksizes(self, data):
"""
return data.nc_dataset_chunksizes()

def nc_get_dataset_shards(self, data):
"""Get the Zarr dataset sharding strategy for the data.

.. versionadded:: (cfdm) NEXTVERSION

.. seealso:: `nc_set_dataset_shards`

:Parameters:

data: `Data`

:Returns:

`int` or `tuple` or `None`
The dataset sharding strategy.

"""
return data.nc_dataset_shards()

def nc_get_sample_dimension(self, count, default=None):
"""Return the name of the netCDF sample dimension.

Expand Down Expand Up @@ -1245,6 +1264,28 @@ def nc_set_hdf5_chunksizes(self, data, chunksizes):
"Use 'nc_set_dataset_chunksizes' instead."
)

def nc_set_dataset_shards(self, data, shards):
"""Set the Zarr dataset sharding strategy for the data.

.. versionadded:: (cfdm) NEXTVERSION

.. seealso:: `nc_get_dataset_shards`

:Parameters:

data: `Data`

shards: `None` or `int` or sequence of `int`
Set the sharding strategy when writing to a Zarr
dataset.

:Returns:

`None`

"""
return data.nc_set_dataset_shards(shards)

def parameters(self, parent):
"""Return all parameters from a component.

Expand Down
10 changes: 9 additions & 1 deletion cfdm/data/aggregatedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,17 @@ def _parse_fragment_array(self, aggregated_filename, fragment_array):
if not scalar:
identifier = fa_identifiers[index].item()

uri = fa_uris[index]
try:
# 'uri' is scalar numpy string type
uri = uri.item()
except AttributeError:
# E.g. 'uri' is already a `str` instance
pass

parsed_fragment_array[index] = {
"map": shape,
"uri": fa_uris[index].item(),
"uri": uri,
"identifier": identifier,
}
else:
Expand Down
6 changes: 4 additions & 2 deletions cfdm/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
)
from ..mixin.container import Container
from ..mixin.files import Files
from ..mixin.netcdf import NetCDFAggregation, NetCDFChunks
from ..mixin.netcdf import NetCDFAggregation, NetCDFChunks, NetCDFShards
from ..units import Units
from .abstract import Array
from .creation import to_dask
Expand All @@ -50,7 +50,9 @@
logger = logging.getLogger(__name__)


class Data(Container, NetCDFAggregation, NetCDFChunks, Files, core.Data):
class Data(
Container, NetCDFAggregation, NetCDFChunks, NetCDFShards, Files, core.Data
):
"""An N-dimensional data array with units and masked values.

* Contains an N-dimensional, indexable and broadcastable array with
Expand Down
1 change: 1 addition & 0 deletions cfdm/data/fragment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .fragmenth5netcdfarray import FragmentH5netcdfArray
from .fragmentnetcdf4array import FragmentNetCDF4Array
from .fragmentuniquevaluearray import FragmentUniqueValueArray
from .fragmentzarrarray import FragmentZarrArray
7 changes: 6 additions & 1 deletion cfdm/data/fragment/fragmentfilearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,17 @@ def __new__(cls, *args, **kwargs):
"""
# Import fragment classes. Do this here (as opposed to outside
# the class) to aid subclassing.
from . import FragmentH5netcdfArray, FragmentNetCDF4Array
from . import (
FragmentH5netcdfArray,
FragmentNetCDF4Array,
FragmentZarrArray,
)

instance = super().__new__(cls)
instance._FragmentArrays = (
FragmentNetCDF4Array,
FragmentH5netcdfArray,
FragmentZarrArray,
)
return instance

Expand Down
10 changes: 10 additions & 0 deletions cfdm/data/fragment/fragmentzarrarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from ..zarrarray import ZarrArray
from .mixin import FragmentFileArrayMixin


class FragmentZarrArray(FragmentFileArrayMixin, ZarrArray):
"""A fragment of aggregated data in a file accessed with `zarr`.
.. versionadded:: (cfdm) NEXTVERSION
"""
11 changes: 6 additions & 5 deletions cfdm/data/h5netcdfarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,14 +194,15 @@ def get_groups(self, address):
return out[:-1], out[-1]

def open(self, **kwargs):
"""Return a dataset file object and address.

When multiple files have been provided an attempt is made to
open each one, in the order stored, and a file object is
returned from the first file that exists.
"""Return a dataset object and address.

.. versionadded:: (cfdm) 1.11.2.0

:Parameters:

kwargs: optional
Extra keyword arguments to `h5netcdf.File`.

:Returns:

(`h5netcdf.File`, `str`)
Expand Down
13 changes: 7 additions & 6 deletions cfdm/data/netcdf4array.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,13 @@ def get_groups(self, address):
out = address.split("/")[1:]
return out[:-1], out[-1]

def open(self):
"""Return a dataset file object and address.
def open(self, **kwargs):
"""Return a dataset object and address.

When multiple files have been provided an attempt is made to
open each one, in the order stored, and a file object is
returned from the first file that exists.
:Parameters:

kwargs: optional
Extra keyword arguments to `netCDF4.Dataset`.

:Returns:

Expand All @@ -221,4 +222,4 @@ def open(self):
"""
import netCDF4

return super().open(netCDF4.Dataset, mode="r")
return super().open(netCDF4.Dataset, mode="r", **kwargs)
15 changes: 12 additions & 3 deletions cfdm/data/netcdfindexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
class netcdf_indexer:
"""A data indexer that also applies netCDF masking and unpacking.

Here "netCDF4" refers to the API of the netCDF data model, rather
Here "netCDF" refers to the API of the netCDF data model, rather
than any particular dataset encoding or software library API.

Indexing may be orthogonal or non-orthogonal. Orthogonal indexing
Expand Down Expand Up @@ -395,9 +395,15 @@ def _default_FillValue(self, dtype):
"""
from netCDF4 import default_fillvals

if dtype.kind in "OS":
kind = dtype.kind
if kind in "OS":
return default_fillvals["S1"]

if kind == "T":
# np.dtypes.StringDType, which stores variable-width
# string data in a UTF-8 encoding, as used by `zarr`
return ""

return default_fillvals[dtype.str[1:]]

def _index(self, index, data=None):
Expand Down Expand Up @@ -623,7 +629,10 @@ def _mask(self, data, dtype, attributes, dtype_unsigned_int):
if fvalisnan:
mask = np.isnan(data)
else:
mask = data == fval
# Must use `np.asanyarray` here, to ensure that
# 'mask' is a never a `bool`, which would make the
# following 'mask.any' call fail.
mask = np.asanyarray(data == fval)

if mask.any():
if fill_value is None:
Expand Down
22 changes: 17 additions & 5 deletions cfdm/data/zarrarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,30 @@ def close(self, dataset):
# `zarr.Group` objects don't need closing
pass

def open(self):
"""Return a dataset file object and address.
def open(self, **kwargs):
"""Return a dataset object and address.

.. versionadded:: (cfdm) 1.12.2.0

:Parameters:

kwargs: optional
Extra keyword arguments to `zarr.open`.

:Returns:

(`zarr.Group`, `str`)
The dataset object open in read-only mode, and the
variable name of the data within the dataset.

"""
import zarr

return super().open(zarr.open, mode="r")
try:
import zarr
except ModuleNotFoundError as error:
error.msg += (
". Install the 'zarr' package "
"(https://pypi.org/project/zarr) to read Zarr datasets"
)
raise

return super().open(zarr.open, mode="r", **kwargs)
Loading