|
| 1 | +# /// script |
| 2 | +# dependencies = [ |
| 3 | +# "zarr", |
| 4 | +# "numpy", |
| 5 | +# "fsspec", |
| 6 | +# ] |
| 7 | +# /// |
| 8 | +""" |
| 9 | +ZEP 8 URL Syntax Demo |
| 10 | +
|
| 11 | +This script demonstrates ZEP 8 URL syntax for chained store access in zarr-python. |
| 12 | +ZEP 8 URLs allow you to chain different storage adapters using pipe (|) syntax. |
| 13 | +
|
| 14 | +Examples of ZEP 8 URLs: |
| 15 | + - "memory:" - Simple in-memory store |
| 16 | + - "file:/path/data.zip|zip:" - ZIP file access |
| 17 | + - "s3://bucket/data.zip|zip:|zarr3:" - Cloud ZIP with zarr3 format |
| 18 | + - "file:/path/repo|icechunk:branch:main" - Icechunk repository (if available) |
| 19 | +
|
| 20 | +For comprehensive Icechunk integration examples, see the icechunk repository tests. |
| 21 | +""" |
| 22 | + |
| 23 | +import tempfile |
| 24 | +from pathlib import Path |
| 25 | + |
| 26 | +import numpy as np |
| 27 | + |
| 28 | +import zarr |
| 29 | +from zarr.storage import ZipStore |
| 30 | +from zarr.storage._zep8 import URLParser, is_zep8_url |
| 31 | + |
| 32 | + |
| 33 | +def demo_basic_zep8() -> None: |
| 34 | + """Demonstrate basic ZEP 8 URL syntax features.""" |
| 35 | + print("=== Basic ZEP 8 URL Demo ===") |
| 36 | + |
| 37 | + print("📝 Testing basic ZEP 8 URL formats") |
| 38 | + |
| 39 | + # Memory store |
| 40 | + print("\n1. Memory store:") |
| 41 | + memory_url = "memory:" |
| 42 | + root = zarr.open_group(memory_url, mode="w") |
| 43 | + arr = root.create_array("test_data", shape=(10,), dtype="f4") |
| 44 | + arr[:] = np.random.random(10) |
| 45 | + print(f"✅ Created array via {memory_url}") |
| 46 | + print(f" Data shape: {arr.shape}, dtype: {arr.dtype}") |
| 47 | + |
| 48 | + # File store |
| 49 | + print("\n2. File store:") |
| 50 | + with tempfile.TemporaryDirectory() as tmpdir: |
| 51 | + file_url = f"file:{tmpdir}/test.zarr" |
| 52 | + root2 = zarr.open_group(file_url, mode="w") |
| 53 | + arr2 = root2.create_array("persistent_data", shape=(20,), dtype="i4") |
| 54 | + arr2[:] = range(20) |
| 55 | + print(f"✅ Created array via {file_url}") |
| 56 | + print(f" Data: {list(arr2[:5])}... (first 5 elements)") |
| 57 | + |
| 58 | + |
| 59 | +def demo_zip_chaining() -> None: |
| 60 | + """Demonstrate ZIP file chaining with ZEP 8 URLs.""" |
| 61 | + print("\n=== ZIP Chaining Demo ===") |
| 62 | + |
| 63 | + print("📝 Creating ZIP file with zarr data, then accessing via ZEP 8 URL") |
| 64 | + |
| 65 | + with tempfile.TemporaryDirectory() as tmpdir: |
| 66 | + zip_path = Path(tmpdir) / "data.zip" |
| 67 | + |
| 68 | + # Step 1: Create ZIP file with zarr data |
| 69 | + print(f"Creating ZIP file at: {zip_path}") |
| 70 | + with ZipStore(str(zip_path), mode="w") as zip_store: |
| 71 | + root = zarr.open_group(zip_store, mode="w") |
| 72 | + |
| 73 | + # Create sample datasets |
| 74 | + temps = root.create_array("temperatures", shape=(365,), dtype="f4") |
| 75 | + temp_data = ( |
| 76 | + 20 + 10 * np.sin(np.arange(365) * 2 * np.pi / 365) + np.random.normal(0, 2, 365) |
| 77 | + ) |
| 78 | + temps[:] = temp_data |
| 79 | + temps.attrs["units"] = "celsius" |
| 80 | + temps.attrs["description"] = "Daily temperature readings" |
| 81 | + |
| 82 | + metadata = root.create_group("metadata") |
| 83 | + info = metadata.create_array("info", shape=(1,), dtype="U50") |
| 84 | + info[0] = "Weather data from ZIP demo" |
| 85 | + |
| 86 | + print("✅ Created temperature data in ZIP file") |
| 87 | + print(f" Temperature range: {temps[:].min():.1f}°C to {temps[:].max():.1f}°C") |
| 88 | + |
| 89 | + # Step 2: Access via ZEP 8 URL syntax |
| 90 | + print("\nAccessing ZIP data via ZEP 8 URL") |
| 91 | + zip_url = f"file:{zip_path}|zip:" |
| 92 | + root_read = zarr.open_group(zip_url, mode="r") |
| 93 | + |
| 94 | + temps_read = root_read["temperatures"] |
| 95 | + info_read = root_read["metadata/info"] |
| 96 | + |
| 97 | + print(f"✅ Successfully read via URL: {zip_url}") |
| 98 | + print(f" Temperature units: {temps_read.attrs['units']}") |
| 99 | + print(f" Description: {temps_read.attrs['description']}") |
| 100 | + print(f" Metadata: {info_read[0]}") |
| 101 | + print(f" Data integrity: {np.array_equal(temp_data, temps_read[:])}") |
| 102 | + |
| 103 | + |
| 104 | +def demo_url_parsing() -> None: |
| 105 | + """Demonstrate ZEP 8 URL parsing and validation.""" |
| 106 | + print("\n=== URL Parsing Demo ===") |
| 107 | + |
| 108 | + parser = URLParser() |
| 109 | + |
| 110 | + test_urls = [ |
| 111 | + "memory:", |
| 112 | + "file:/tmp/data.zarr", |
| 113 | + "file:/tmp/data.zip|zip:", |
| 114 | + "s3://bucket/data.zip|zip:|zarr3:", |
| 115 | + "memory:|icechunk:branch:main", # This would be rejected by icechunk adapter |
| 116 | + "/regular/file/path", # Not a ZEP 8 URL |
| 117 | + ] |
| 118 | + |
| 119 | + print("📝 Testing URL parsing:") |
| 120 | + |
| 121 | + for url in test_urls: |
| 122 | + is_zep8 = is_zep8_url(url) |
| 123 | + print(f"\n URL: {url}") |
| 124 | + print(f" ZEP 8: {is_zep8}") |
| 125 | + |
| 126 | + if is_zep8: |
| 127 | + try: |
| 128 | + segments = parser.parse(url) |
| 129 | + print(f" Segments: {len(segments)}") |
| 130 | + for i, seg in enumerate(segments): |
| 131 | + scheme_part = f"scheme={seg.scheme}" if seg.scheme else "" |
| 132 | + adapter_part = f"adapter={seg.adapter}" if seg.adapter else "" |
| 133 | + path_part = f"path='{seg.path}'" if seg.path else "" |
| 134 | + parts = [p for p in [scheme_part, adapter_part, path_part] if p] |
| 135 | + print(f" {i}: {', '.join(parts)}") |
| 136 | + except Exception as e: |
| 137 | + print(f" Parse error: {e}") |
| 138 | + |
| 139 | + |
| 140 | +def demo_error_cases() -> None: |
| 141 | + """Demonstrate common error cases and their handling.""" |
| 142 | + print("\n=== Error Handling Demo ===") |
| 143 | + |
| 144 | + print("🚫 Testing error cases:") |
| 145 | + |
| 146 | + # Test 1: Invalid URL format |
| 147 | + print("\n1. Invalid URL formats:") |
| 148 | + invalid_urls = [ |
| 149 | + "|invalid:start", # Starts with pipe |
| 150 | + "memory:|", # Ends with pipe |
| 151 | + "memory:||zip:", # Double pipe |
| 152 | + "", # Empty URL |
| 153 | + ] |
| 154 | + |
| 155 | + for url in invalid_urls: |
| 156 | + try: |
| 157 | + zarr.open_group(url, mode="r") |
| 158 | + print(f"❌ Should have failed: {url}") |
| 159 | + except Exception as e: |
| 160 | + print(f"✅ Correctly rejected: {url} -> {type(e).__name__}") |
| 161 | + |
| 162 | + # Test 2: Unknown adapters |
| 163 | + print("\n2. Unknown adapters:") |
| 164 | + try: |
| 165 | + zarr.open_group("memory:|unknown_adapter:", mode="r") |
| 166 | + print("❌ Should have failed: unknown adapter") |
| 167 | + except Exception as e: |
| 168 | + print(f"✅ Correctly rejected unknown adapter -> {type(e).__name__}") |
| 169 | + |
| 170 | + # Test 3: Fallback behavior |
| 171 | + print("\n3. Fallback to regular stores:") |
| 172 | + regular_urls = ["memory:", f"file:{tempfile.mkdtemp()}/fallback.zarr"] |
| 173 | + |
| 174 | + for url in regular_urls: |
| 175 | + try: |
| 176 | + root = zarr.open_group(url, mode="w") |
| 177 | + arr = root.create_array("data", shape=(5,), dtype="i4") |
| 178 | + arr[:] = [1, 2, 3, 4, 5] |
| 179 | + print(f"✅ Fallback works: {url}") |
| 180 | + except Exception as e: |
| 181 | + print(f"❌ Fallback failed: {url} -> {e}") |
| 182 | + |
| 183 | + |
| 184 | +if __name__ == "__main__": |
| 185 | + print("ZEP 8 URL Syntax Demo") |
| 186 | + print("=" * 30) |
| 187 | + |
| 188 | + demo_basic_zep8() |
| 189 | + demo_zip_chaining() |
| 190 | + demo_url_parsing() |
| 191 | + demo_error_cases() |
| 192 | + |
| 193 | + print("\n" + "=" * 30) |
| 194 | + print("Demo completed!") |
| 195 | + print("\nZEP 8 URL syntax enables flexible chaining of storage adapters.") |
| 196 | + print("For adapter-specific examples (like Icechunk), see the respective") |
| 197 | + print("package repositories and their test suites.") |
0 commit comments