Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changes/3422.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix a potential race condition when using :func:`zarr.create_array` with the ``data`` parameter
set to a NumPy array. Previously Zarr was iterating over the newly created array with a granularity
that was too low. Now Zarr chooses a granularity that matches the size of the stored objects for
that array.
2 changes: 1 addition & 1 deletion src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -4421,7 +4421,7 @@ async def _copy_arraylike_region(chunk_coords: slice, _data: NDArrayLike) -> Non

# Stream data from the source array to the new array
await concurrent_map(
[(region, data) for region in result._iter_chunk_regions()],
[(region, data) for region in result._iter_shard_regions()],
_copy_arraylike_region,
zarr.core.config.config.get("async.concurrency"),
)
Expand Down
26 changes: 26 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
ZarrUserWarning,
)
from zarr.storage import LocalStore, MemoryStore, StorePath
from zarr.storage._logging import LoggingStore

from .test_dtype.conftest import zdtype_examples

Expand Down Expand Up @@ -2119,3 +2120,28 @@ def test_iter_chunk_regions(
assert observed == expected
assert observed == tuple(arr._iter_chunk_regions())
assert observed == tuple(arr._async_array._iter_chunk_regions())


@pytest.mark.parametrize("num_shards", [1, 3])
@pytest.mark.parametrize("array_type", ["numpy", "zarr"])
def test_create_array_with_data_num_gets(
num_shards: int, array_type: Literal["numpy", "zarr"]
) -> None:
"""
Test that creating an array with data only invokes a single get request per stored object
"""
store = LoggingStore(store=MemoryStore())

chunk_shape = (1,)
shard_shape = (100,)
shape = (shard_shape[0] * num_shards,)
data: Array | npt.NDArray[np.int64]
if array_type == "numpy":
data = np.zeros(shape[0], dtype="int64")
else:
data = zarr.zeros(shape, dtype="int64")

zarr.create_array(store, data=data, chunks=chunk_shape, shards=shard_shape, fill_value=-1) # type: ignore[arg-type]
# one get for the metadata and one per shard.
# Note: we don't actually need one get per shard, but this is the current behavior
assert store.counter["get"] == 1 + num_shards
Loading