66import zarr
77from pathlib import Path
88import numpy as np
9+ import zipfile
10+ import json
11+
12+ def zip_zchunkstore (zip_file , url = None ):
13+ """Returns a reference description for ReferenceFileSystem from an
14+ uncompressed Zarr zip file store.
15+
16+ https://github.com/intake/fsspec-reference-maker
17+
18+ Parameters
19+ ----------
20+
21+ zip_file: str
22+ Path to the zip file.
23+ url: str, optional
24+ URL where the zip file will be served. Defaults to zip_file.
25+
26+ Returns
27+ -------
28+
29+ JSON-serializable reference description.
30+ """
31+ rfs = {}
32+ with zipfile .ZipFile (zip_file ) as zf :
33+ if zf .compression != 0 :
34+ raise RuntimeError ("Compressed zip's are not supported." )
35+
36+ zarr_json_files = ('.zattrs' , '.zgroup' , '.zmetadata' , '.zarray' )
37+
38+ data_url = zip_file
39+ if url is not None :
40+ data_url = url
41+
42+ zchunkstore = {}
43+ for info in zf .infolist ():
44+ name_bytes = len (info .filename .encode ("utf-8" ))
45+ offset = info .header_offset + 30 + name_bytes
46+ size = info .compress_size
47+ if any ([info .filename .endswith (z ) for z in zarr_json_files ]):
48+ content = zipfile .Path (zf , at = info .filename ).read_text (encoding = 'utf-8' )
49+ zchunkstore [info .filename ] = content
50+ else :
51+ zchunkstore [info .filename ] = [data_url , offset , size ]
52+
53+ return zchunkstore
54+
955
1056def compress_encode (input_filepath ,
1157 output_directory ,
1258 multiscale = True ,
1359 chunk_size = 64 ,
1460 cname = 'zstd' ,
1561 clevel = 5 ,
16- shuffle = True ):
62+ shuffle = True ,
63+ zip_chunk_store = True ):
1764 image = itk .imread (input_filepath )
1865 image_da = itk .xarray_from_image (image )
1966 dataset_name = str (Path (input_filepath ))
@@ -29,6 +76,7 @@ def compress_encode(input_filepath,
2976
3077 image_ds .to_zarr (store ,
3178 mode = 'w' ,
79+ group = f'0' ,
3280 compute = True ,
3381 encoding = {dataset_name : {'chunks' : [chunk_size ]* image .GetImageDimension (), 'compressor' : compressor }})
3482
@@ -40,30 +88,42 @@ def compress_encode(input_filepath,
4088 reduced = image
4189 while not np .all (np .array (itk .size (reduced )) < 64 ):
4290 scale = len (pyramid )
43- shrink_factors = [2 ** scale ]* 3
44- reduced = itk .bin_shrink_image_filter (image , shrink_factors = shrink_factors )
45- reduced_da = itk .xarray_from_image (reduced )
91+ shrink_factors = [2 ]* 3
92+ for i , s in enumerate (itk .size (reduced )):
93+ if s < 4 :
94+ shrink_factors [i ] = 1
95+ reduced = itk .bin_shrink_image_filter (reduced , shrink_factors = shrink_factors )
96+ reduced_da = itk .xarray_from_image (reduced ).copy ()
4697 pyramid .append (reduced_da )
4798
48- pyramid_group_paths = ["" ]
49- for scale in range (1 , len (pyramid )):
50- pyramid_group_paths .append ('scale_{0}' .format (scale ))
51-
5299 for scale in range (1 , len (pyramid )):
53100 ds = pyramid [scale ].to_dataset (name = dataset_name )
54101 ds .to_zarr (store ,
55102 mode = 'w' ,
56- group = pyramid_group_paths [ scale ] ,
103+ group = f' { scale } ' ,
57104 compute = True ,
58105 encoding = {dataset_name : {'chunks' : [chunk_size ]* 3 , 'compressor' : compressor }})
59106
107+ datasets = [ { 'path' : f'{ scale } /{ dataset_name } ' } for scale in range (len (pyramid )) ]
108+ with zarr .open (store ) as z :
109+ z .attrs ['multiscales' ] = [{ 'version' : '0.1' , 'name' : dataset_name , 'datasets' : datasets }]
110+
60111 # Re-consolidate entire dataset
61112 zarr .consolidate_metadata (store )
62- for scale in range (1 , len (pyramid )):
63- store = zarr .DirectoryStore (str (Path (store_name ) / pyramid_group_paths [ scale ] ))
113+ for scale in range (0 , len (pyramid )):
114+ store = zarr .DirectoryStore (str (Path (store_name ) / f' { scale } ' ))
64115 # Also consolidate the metadata on the pyramid scales so they can be used independently
65116 zarr .consolidate_metadata (store )
66117
118+ if zip_chunk_store :
119+ store = zarr .DirectoryStore (store_name )
120+ zip_store_path = str (Path (output_directory )) + '.zip'
121+ with zarr .storage .ZipStore (zip_store_path , mode = 'w' , compression = 0 ) as zip_store :
122+ zarr .copy_store (store , zip_store )
123+ zchunkstore = zip_zchunkstore (zip_store_path )
124+ with open (zip_store_path + '.zchunkstore' , 'w' ) as fp :
125+ json .dump (zchunkstore , fp )
126+
67127
68128if __name__ == '__main__' :
69129 parser = argparse .ArgumentParser ('Convert and encode a medical image file in a compressed Zarr directory store.' )
@@ -75,6 +135,7 @@ def compress_encode(input_filepath,
75135 parser .add_argument ('--cname' , default = 'zstd' , help = 'Base compression codec.' )
76136 parser .add_argument ('--clevel' , default = 5 , type = int , help = 'Compression level.' )
77137 parser .add_argument ('--no-multi-scale' , action = 'store_true' , help = 'Do not generate a multi-scale pyramid.' )
138+ parser .add_argument ('--no-zip-chunk-store' , action = 'store_true' , help = 'Do not generate a zip file and corresponding chunk store.' )
78139
79140 args = parser .parse_args ()
80141
@@ -84,4 +145,5 @@ def compress_encode(input_filepath,
84145 chunk_size = args .chunk_size ,
85146 cname = args .cname ,
86147 clevel = args .clevel ,
87- shuffle = not args .no_shuffle )
148+ shuffle = not args .no_shuffle ,
149+ zip_chunk_store = not args .no_zip_chunk_store )
0 commit comments