diff --git a/datasets/datasets.json b/datasets/datasets.json index 0edb7ff..2366c42 100644 --- a/datasets/datasets.json +++ b/datasets/datasets.json @@ -40,21 +40,6 @@ "node": 160000 } }, - "cora_icechunk": { - "path": "s3://nextgen-dmac-icechunk-test/cora-main-individual", - "type": "virtual-icechunk", - "drop_variables": ["adcirc_mesh", "ibtype", "ibtypee", "nbvv", "nvdll", "nvell"], - "storage_options": { - "anonymous": true, - "virtual_chunk_container": { - "type": "s3", - "store": { - "anonymous": true, - "region": "us-east-1" - } - } - } - }, "dbofs": { "path": "s3://noaa-nodd-kerchunk-pds/nos/dbofs/dbofs.fields.best.nc.zarr", "type": "zarr", diff --git a/datasets/icechunk_datasets.json b/datasets/icechunk_datasets.json new file mode 100644 index 0000000..ec5f41b --- /dev/null +++ b/datasets/icechunk_datasets.json @@ -0,0 +1,82 @@ +{ + "cbofs": { + "path": "s3://noaa-nodd-kerchunk-pds/nos/cbofs/cbofs.fields.best.nc.zarr", + "type": "zarr", + "chunks": "auto", + "drop_variables": ["dstart"], + "extensions": { + "vdatum": { + "path": "s3://noaa-nodd-kerchunk-pds/nos_vdatums/cbofs_vdatums.nc.zarr", + "water_level_var": "zeta", + "vdatum_var": "mllwtomsl", + "vdatum_name": "mllw", + "multiplier": -1.0 + }, + "roms": {} + } + }, + "cbofs_icechunk_30d_static": { + "path": "s3://nextgen-dmac-icechunk-test/cbofs-30day-static", + "type": "virtual-icechunk", + "drop_variables": ["dstart"], + "storage_options": { + "anonymous": true, + "virtual_chunk_container": { + "type": "s3", + "store": { + "path": "s3://noaa-nos-ofs-pds/", + "anonymous": true, + "region": "us-east-1" + } + } + } + }, + "cbofs_overwrite": { + "path": "s3://nextgen-dmac-icechunk-test/cbofs-overwrite", + "type": "virtual-icechunk", + "drop_variables": ["dstart"], + "storage_options": { + "anonymous": true, + "virtual_chunk_container": { + "type": "s3", + "store": { + "path": "s3://noaa-nos-ofs-pds/", + "anonymous": true, + "region": "us-east-1" + } + } + } + }, + "dbofs": { + "path": "s3://noaa-nodd-kerchunk-pds/nos/dbofs/dbofs.fields.best.nc.zarr", + "type": "zarr", + "chunks": "auto", + "drop_variables": ["dstart"], + "extensions": { + "vdatum": { + "path": "s3://noaa-nodd-kerchunk-pds/nos_vdatums/dbofs_vdatums.nc.zarr", + "water_level_var": "zeta", + "vdatum_var": "mllwtomsl", + "vdatum_name": "mllw", + "multiplier": -1.0 + }, + "roms": {} + } + }, + "dbofs_icechunk_30d_static": { + "path": "s3://nextgen-dmac-icechunk-test/dbofs-30day-static", + "type": "virtual-icechunk", + "drop_variables": ["dstart"], + "storage_options": { + "anonymous": true, + "virtual_chunk_container": { + "type": "s3", + "store": { + "path": "s3://noaa-nos-ofs-pds/", + "anonymous": true, + "region": "us-east-1" + } + } + } + } +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2e69caf..21c7299 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,12 +35,12 @@ setuptools~=75.8.0 uvicorn~=0.34.0 xarray~=2025.3.1 zarr~=3.0.5 -kerchunk@git+https://github.com/ndellicarpini/kerchunk@main +virtualizarr~=1.3.2 +icechunk~=1.1.10 +kerchunk~=0.2.9 +xpublish-wms@git+https://github.com/xpublish-community/xpublish-wms@main redis-fsspec-cache@git+https://github.com/asascience-open/redis-fsspec-cache@main xarray-subset-grid@git+https://github.com/asascience-open/xarray-subset-grid@main xpublish@git+https://github.com/xpublish-community/xpublish@main xpublish-opendap@git+https://github.com/xpublish-community/xpublish-opendap@main -xpublish-wms@git+https://github.com/ndellicarpini/xpublish-wms@main -xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@main -virtualizarr@git+https://github.com/zarr-developers/VirtualiZarr@develop -icechunk@git+https://github.com/earth-mover/icechunk.git#subdirectory=icechunk-python +xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@main \ No newline at end of file diff --git a/xreds/dataset_utils.py b/xreds/dataset_utils.py index 32fe259..cb6d550 100644 --- a/xreds/dataset_utils.py +++ b/xreds/dataset_utils.py @@ -212,7 +212,7 @@ def _load_zarr( ) def _load_virtual_icechunk( - dataset_path: str, + dataset_path: str, chunks: Optional[str | dict], drop_variables: Optional[str | list[str]], storage_options: dict, @@ -222,13 +222,17 @@ def _load_virtual_icechunk( if "virtual_chunk_container" in storage_options: chunk_params = storage_options.pop("virtual_chunk_container", {}) if chunk_params.get("type", "s3").lower() == "s3": + store = chunk_params.get("store", {}) + print('Store: ', store) ic_config.set_virtual_chunk_container( icechunk.VirtualChunkContainer( - "s3", "s3://", icechunk.s3_store(**chunk_params.get("store", {})) + url_prefix=store.get("path", ""), + store=icechunk.s3_store(region=store.get("region", "us-east-1"), + anonymous=store.get("anonymous", True)) ) ) ic_creds = icechunk.containers_credentials( - s3=icechunk.s3_credentials(**chunk_params.get("credentials", {"anonymous": True})) + {store.get("path", ""): icechunk.s3_anonymous_credentials()} ) repo_type = storage_options.pop( @@ -240,12 +244,12 @@ def _load_virtual_icechunk( ic_storage = None if repo_type == "s3": parsed_bucket = dataset_path.replace("s3://", "").split("/")[0] - parsed_prefix = dataset_path.replace("s3://", "").split("/")[-1] + parsed_prefix = '/'.join(dataset_path.replace("s3://", "").split("/")[1:]) ic_storage = icechunk.s3_storage( bucket=storage_options.pop("bucket", parsed_bucket), prefix=storage_options.pop("prefix", parsed_prefix), - **storage_options + anonymous=storage_options.pop("anonymous", True) ) if ic_storage is None or not icechunk.Repository.exists(ic_storage): @@ -253,17 +257,21 @@ def _load_virtual_icechunk( repo = icechunk.Repository.open(ic_storage, ic_config, ic_creds) - branch = storage_options.get("branch", dataset_path.split("@")[-1] if "@" in dataset_path else None) + branch = storage_options.get("branch", None) if branch is None: all_branches = list(repo.list_branches()) branch = ("main" if "main" in all_branches else "master" if "master" in all_branches else all_branches[0]) - - return xr.open_zarr( + + ds = xr.open_zarr( repo.readonly_session(branch).store, chunks=chunks, drop_variables=drop_variables, consolidated=False, zarr_format=3 ) + + ds_sorted = ds.sortby('ocean_time') + + return ds_sorted