Skip to content

Commit 61a17ea

Browse files
committed
feature: support zep8 strings when opening groups and arrays
1 parent 926a52f commit 61a17ea

File tree

14 files changed

+2382
-17
lines changed

14 files changed

+2382
-17
lines changed

examples/zep8_url_demo.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# /// script
2+
# dependencies = [
3+
# "zarr",
4+
# "numpy",
5+
# "fsspec",
6+
# ]
7+
# ///
8+
"""
9+
ZEP 8 URL Syntax Demo
10+
11+
This script demonstrates ZEP 8 URL syntax for chained store access in zarr-python.
12+
ZEP 8 URLs allow you to chain different storage adapters using pipe (|) syntax.
13+
14+
Examples of ZEP 8 URLs:
15+
- "memory:" - Simple in-memory store
16+
- "file:/path/data.zip|zip:" - ZIP file access
17+
- "s3://bucket/data.zip|zip:|zarr3:" - Cloud ZIP with zarr3 format
18+
- "file:/path/repo|icechunk:branch:main" - Icechunk repository (if available)
19+
20+
For comprehensive Icechunk integration examples, see the icechunk repository tests.
21+
"""
22+
23+
import tempfile
24+
from pathlib import Path
25+
26+
import numpy as np
27+
28+
import zarr
29+
from zarr.storage import ZipStore
30+
from zarr.storage._zep8 import URLParser, is_zep8_url
31+
32+
33+
def demo_basic_zep8() -> None:
34+
"""Demonstrate basic ZEP 8 URL syntax features."""
35+
print("=== Basic ZEP 8 URL Demo ===")
36+
37+
print("📝 Testing basic ZEP 8 URL formats")
38+
39+
# Memory store
40+
print("\n1. Memory store:")
41+
memory_url = "memory:"
42+
root = zarr.open_group(memory_url, mode="w")
43+
arr = root.create_array("test_data", shape=(10,), dtype="f4")
44+
arr[:] = np.random.random(10)
45+
print(f"✅ Created array via {memory_url}")
46+
print(f" Data shape: {arr.shape}, dtype: {arr.dtype}")
47+
48+
# File store
49+
print("\n2. File store:")
50+
with tempfile.TemporaryDirectory() as tmpdir:
51+
file_url = f"file:{tmpdir}/test.zarr"
52+
root2 = zarr.open_group(file_url, mode="w")
53+
arr2 = root2.create_array("persistent_data", shape=(20,), dtype="i4")
54+
arr2[:] = range(20)
55+
print(f"✅ Created array via {file_url}")
56+
print(f" Data: {list(arr2[:5])}... (first 5 elements)")
57+
58+
59+
def demo_zip_chaining() -> None:
60+
"""Demonstrate ZIP file chaining with ZEP 8 URLs."""
61+
print("\n=== ZIP Chaining Demo ===")
62+
63+
print("📝 Creating ZIP file with zarr data, then accessing via ZEP 8 URL")
64+
65+
with tempfile.TemporaryDirectory() as tmpdir:
66+
zip_path = Path(tmpdir) / "data.zip"
67+
68+
# Step 1: Create ZIP file with zarr data
69+
print(f"Creating ZIP file at: {zip_path}")
70+
with ZipStore(str(zip_path), mode="w") as zip_store:
71+
root = zarr.open_group(zip_store, mode="w")
72+
73+
# Create sample datasets
74+
temps = root.create_array("temperatures", shape=(365,), dtype="f4")
75+
temp_data = (
76+
20 + 10 * np.sin(np.arange(365) * 2 * np.pi / 365) + np.random.normal(0, 2, 365)
77+
)
78+
temps[:] = temp_data
79+
temps.attrs["units"] = "celsius"
80+
temps.attrs["description"] = "Daily temperature readings"
81+
82+
metadata = root.create_group("metadata")
83+
info = metadata.create_array("info", shape=(1,), dtype="U50")
84+
info[0] = "Weather data from ZIP demo"
85+
86+
print("✅ Created temperature data in ZIP file")
87+
print(f" Temperature range: {temps[:].min():.1f}°C to {temps[:].max():.1f}°C")
88+
89+
# Step 2: Access via ZEP 8 URL syntax
90+
print("\nAccessing ZIP data via ZEP 8 URL")
91+
zip_url = f"file:{zip_path}|zip:"
92+
root_read = zarr.open_group(zip_url, mode="r")
93+
94+
temps_read = root_read["temperatures"]
95+
info_read = root_read["metadata/info"]
96+
97+
print(f"✅ Successfully read via URL: {zip_url}")
98+
print(f" Temperature units: {temps_read.attrs['units']}")
99+
print(f" Description: {temps_read.attrs['description']}")
100+
print(f" Metadata: {info_read[0]}")
101+
print(f" Data integrity: {np.array_equal(temp_data, temps_read[:])}")
102+
103+
104+
def demo_url_parsing() -> None:
105+
"""Demonstrate ZEP 8 URL parsing and validation."""
106+
print("\n=== URL Parsing Demo ===")
107+
108+
parser = URLParser()
109+
110+
test_urls = [
111+
"memory:",
112+
"file:/tmp/data.zarr",
113+
"file:/tmp/data.zip|zip:",
114+
"s3://bucket/data.zip|zip:|zarr3:",
115+
"memory:|icechunk:branch:main", # This would be rejected by icechunk adapter
116+
"/regular/file/path", # Not a ZEP 8 URL
117+
]
118+
119+
print("📝 Testing URL parsing:")
120+
121+
for url in test_urls:
122+
is_zep8 = is_zep8_url(url)
123+
print(f"\n URL: {url}")
124+
print(f" ZEP 8: {is_zep8}")
125+
126+
if is_zep8:
127+
try:
128+
segments = parser.parse(url)
129+
print(f" Segments: {len(segments)}")
130+
for i, seg in enumerate(segments):
131+
scheme_part = f"scheme={seg.scheme}" if seg.scheme else ""
132+
adapter_part = f"adapter={seg.adapter}" if seg.adapter else ""
133+
path_part = f"path='{seg.path}'" if seg.path else ""
134+
parts = [p for p in [scheme_part, adapter_part, path_part] if p]
135+
print(f" {i}: {', '.join(parts)}")
136+
except Exception as e:
137+
print(f" Parse error: {e}")
138+
139+
140+
def demo_error_cases() -> None:
141+
"""Demonstrate common error cases and their handling."""
142+
print("\n=== Error Handling Demo ===")
143+
144+
print("🚫 Testing error cases:")
145+
146+
# Test 1: Invalid URL format
147+
print("\n1. Invalid URL formats:")
148+
invalid_urls = [
149+
"|invalid:start", # Starts with pipe
150+
"memory:|", # Ends with pipe
151+
"memory:||zip:", # Double pipe
152+
"", # Empty URL
153+
]
154+
155+
for url in invalid_urls:
156+
try:
157+
zarr.open_group(url, mode="r")
158+
print(f"❌ Should have failed: {url}")
159+
except Exception as e:
160+
print(f"✅ Correctly rejected: {url} -> {type(e).__name__}")
161+
162+
# Test 2: Unknown adapters
163+
print("\n2. Unknown adapters:")
164+
try:
165+
zarr.open_group("memory:|unknown_adapter:", mode="r")
166+
print("❌ Should have failed: unknown adapter")
167+
except Exception as e:
168+
print(f"✅ Correctly rejected unknown adapter -> {type(e).__name__}")
169+
170+
# Test 3: Fallback behavior
171+
print("\n3. Fallback to regular stores:")
172+
regular_urls = ["memory:", f"file:{tempfile.mkdtemp()}/fallback.zarr"]
173+
174+
for url in regular_urls:
175+
try:
176+
root = zarr.open_group(url, mode="w")
177+
arr = root.create_array("data", shape=(5,), dtype="i4")
178+
arr[:] = [1, 2, 3, 4, 5]
179+
print(f"✅ Fallback works: {url}")
180+
except Exception as e:
181+
print(f"❌ Fallback failed: {url} -> {e}")
182+
183+
184+
if __name__ == "__main__":
185+
print("ZEP 8 URL Syntax Demo")
186+
print("=" * 30)
187+
188+
demo_basic_zep8()
189+
demo_zip_chaining()
190+
demo_url_parsing()
191+
demo_error_cases()
192+
193+
print("\n" + "=" * 30)
194+
print("Demo completed!")
195+
print("\nZEP 8 URL syntax enables flexible chaining of storage adapters.")
196+
print("For adapter-specific examples (like Icechunk), see the respective")
197+
print("package repositories and their test suites.")

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,12 @@ ignore-words-list = "astroid"
428428

429429
[project.entry-points.pytest11]
430430
zarr = "zarr.testing"
431+
432+
[project.entry-points."zarr.stores"]
433+
file = "zarr.storage._builtin_adapters:FileSystemAdapter"
434+
memory = "zarr.storage._builtin_adapters:MemoryAdapter"
435+
https = "zarr.storage._builtin_adapters:HttpsAdapter"
436+
s3 = "zarr.storage._builtin_adapters:S3Adapter"
437+
gcs = "zarr.storage._builtin_adapters:GCSAdapter"
438+
gs = "zarr.storage._builtin_adapters:GSAdapter"
439+
zip = "zarr.storage._zip:ZipStoreAdapter"

0 commit comments

Comments
 (0)