Skip to content

Commit a38b626

Browse files
authored
Merge pull request #55 from thewtex/multi-scale-generation
2 parents d1cc20d + 5ba2065 commit a38b626

File tree

1 file changed

+74
-12
lines changed

1 file changed

+74
-12
lines changed

server/scripts/compress_encode.py

100755100644
Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,61 @@
66
import zarr
77
from pathlib import Path
88
import numpy as np
9+
import zipfile
10+
import json
11+
12+
def zip_zchunkstore(zip_file, url=None):
13+
"""Returns a reference description for ReferenceFileSystem from an
14+
uncompressed Zarr zip file store.
15+
16+
https://github.com/intake/fsspec-reference-maker
17+
18+
Parameters
19+
----------
20+
21+
zip_file: str
22+
Path to the zip file.
23+
url: str, optional
24+
URL where the zip file will be served. Defaults to zip_file.
25+
26+
Returns
27+
-------
28+
29+
JSON-serializable reference description.
30+
"""
31+
rfs = {}
32+
with zipfile.ZipFile(zip_file) as zf:
33+
if zf.compression != 0:
34+
raise RuntimeError("Compressed zip's are not supported.")
35+
36+
zarr_json_files = ('.zattrs', '.zgroup', '.zmetadata', '.zarray')
37+
38+
data_url = zip_file
39+
if url is not None:
40+
data_url = url
41+
42+
zchunkstore = {}
43+
for info in zf.infolist():
44+
name_bytes = len(info.filename.encode("utf-8"))
45+
offset = info.header_offset + 30 + name_bytes
46+
size = info.compress_size
47+
if any([info.filename.endswith(z) for z in zarr_json_files]):
48+
content = zipfile.Path(zf, at=info.filename).read_text(encoding='utf-8')
49+
zchunkstore[info.filename] = content
50+
else:
51+
zchunkstore[info.filename] = [data_url, offset, size]
52+
53+
return zchunkstore
54+
955

1056
def compress_encode(input_filepath,
1157
output_directory,
1258
multiscale=True,
1359
chunk_size=64,
1460
cname='zstd',
1561
clevel=5,
16-
shuffle=True):
62+
shuffle=True,
63+
zip_chunk_store=True):
1764
image = itk.imread(input_filepath)
1865
image_da = itk.xarray_from_image(image)
1966
dataset_name = str(Path(input_filepath))
@@ -29,6 +76,7 @@ def compress_encode(input_filepath,
2976

3077
image_ds.to_zarr(store,
3178
mode='w',
79+
group=f'0',
3280
compute=True,
3381
encoding={dataset_name: {'chunks': [chunk_size]*image.GetImageDimension(), 'compressor': compressor}})
3482

@@ -40,30 +88,42 @@ def compress_encode(input_filepath,
4088
reduced = image
4189
while not np.all(np.array(itk.size(reduced)) < 64):
4290
scale = len(pyramid)
43-
shrink_factors = [2**scale]*3
44-
reduced = itk.bin_shrink_image_filter(image, shrink_factors=shrink_factors)
45-
reduced_da = itk.xarray_from_image(reduced)
91+
shrink_factors = [2]*3
92+
for i, s in enumerate(itk.size(reduced)):
93+
if s < 4:
94+
shrink_factors[i] = 1
95+
reduced = itk.bin_shrink_image_filter(reduced, shrink_factors=shrink_factors)
96+
reduced_da = itk.xarray_from_image(reduced).copy()
4697
pyramid.append(reduced_da)
4798

48-
pyramid_group_paths = [""]
49-
for scale in range(1, len(pyramid)):
50-
pyramid_group_paths.append('scale_{0}'.format(scale))
51-
5299
for scale in range(1, len(pyramid)):
53100
ds = pyramid[scale].to_dataset(name=dataset_name)
54101
ds.to_zarr(store,
55102
mode='w',
56-
group=pyramid_group_paths[scale],
103+
group=f'{scale}',
57104
compute=True,
58105
encoding={dataset_name: {'chunks': [chunk_size]*3, 'compressor': compressor}})
59106

107+
datasets = [ { 'path': f'{scale}/{dataset_name}' } for scale in range(len(pyramid)) ]
108+
with zarr.open(store) as z:
109+
z.attrs['multiscales'] = [{ 'version': '0.1', 'name': dataset_name, 'datasets': datasets }]
110+
60111
# Re-consolidate entire dataset
61112
zarr.consolidate_metadata(store)
62-
for scale in range(1, len(pyramid)):
63-
store = zarr.DirectoryStore(str(Path(store_name) / pyramid_group_paths[scale]))
113+
for scale in range(0, len(pyramid)):
114+
store = zarr.DirectoryStore(str(Path(store_name) / f'{scale}'))
64115
# Also consolidate the metadata on the pyramid scales so they can be used independently
65116
zarr.consolidate_metadata(store)
66117

118+
if zip_chunk_store:
119+
store = zarr.DirectoryStore(store_name)
120+
zip_store_path = str(Path(output_directory)) + '.zip'
121+
with zarr.storage.ZipStore(zip_store_path, mode='w', compression=0) as zip_store:
122+
zarr.copy_store(store, zip_store)
123+
zchunkstore = zip_zchunkstore(zip_store_path)
124+
with open(zip_store_path + '.zchunkstore', 'w') as fp:
125+
json.dump(zchunkstore, fp)
126+
67127

68128
if __name__ == '__main__':
69129
parser = argparse.ArgumentParser('Convert and encode a medical image file in a compressed Zarr directory store.')
@@ -75,6 +135,7 @@ def compress_encode(input_filepath,
75135
parser.add_argument('--cname', default='zstd', help='Base compression codec.')
76136
parser.add_argument('--clevel', default=5, type=int, help='Compression level.')
77137
parser.add_argument('--no-multi-scale', action='store_true', help='Do not generate a multi-scale pyramid.')
138+
parser.add_argument('--no-zip-chunk-store', action='store_true', help='Do not generate a zip file and corresponding chunk store.')
78139

79140
args = parser.parse_args()
80141

@@ -84,4 +145,5 @@ def compress_encode(input_filepath,
84145
chunk_size=args.chunk_size,
85146
cname=args.cname,
86147
clevel=args.clevel,
87-
shuffle=not args.no_shuffle)
148+
shuffle=not args.no_shuffle,
149+
zip_chunk_store=not args.no_zip_chunk_store)

0 commit comments

Comments
 (0)