Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions datasets/datasets.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,6 @@
"node": 160000
}
},
"cora_icechunk": {
"path": "s3://nextgen-dmac-icechunk-test/cora-main-individual",
"type": "virtual-icechunk",
"drop_variables": ["adcirc_mesh", "ibtype", "ibtypee", "nbvv", "nvdll", "nvell"],
"storage_options": {
"anonymous": true,
"virtual_chunk_container": {
"type": "s3",
"store": {
"anonymous": true,
"region": "us-east-1"
}
}
}
},
"dbofs": {
"path": "s3://noaa-nodd-kerchunk-pds/nos/dbofs/dbofs.fields.best.nc.zarr",
"type": "zarr",
Expand Down
82 changes: 82 additions & 0 deletions datasets/icechunk_datasets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"cbofs": {
"path": "s3://noaa-nodd-kerchunk-pds/nos/cbofs/cbofs.fields.best.nc.zarr",
"type": "zarr",
"chunks": "auto",
"drop_variables": ["dstart"],
"extensions": {
"vdatum": {
"path": "s3://noaa-nodd-kerchunk-pds/nos_vdatums/cbofs_vdatums.nc.zarr",
"water_level_var": "zeta",
"vdatum_var": "mllwtomsl",
"vdatum_name": "mllw",
"multiplier": -1.0
},
"roms": {}
}
},
"cbofs_icechunk_30d_static": {
"path": "s3://nextgen-dmac-icechunk-test/cbofs-30day-static",
"type": "virtual-icechunk",
"drop_variables": ["dstart"],
"storage_options": {
"anonymous": true,
"virtual_chunk_container": {
"type": "s3",
"store": {
"path": "s3://noaa-nos-ofs-pds/",
"anonymous": true,
"region": "us-east-1"
}
}
}
},
"cbofs_overwrite": {
"path": "s3://nextgen-dmac-icechunk-test/cbofs-overwrite",
"type": "virtual-icechunk",
"drop_variables": ["dstart"],
"storage_options": {
"anonymous": true,
"virtual_chunk_container": {
"type": "s3",
"store": {
"path": "s3://noaa-nos-ofs-pds/",
"anonymous": true,
"region": "us-east-1"
}
}
}
},
"dbofs": {
"path": "s3://noaa-nodd-kerchunk-pds/nos/dbofs/dbofs.fields.best.nc.zarr",
"type": "zarr",
"chunks": "auto",
"drop_variables": ["dstart"],
"extensions": {
"vdatum": {
"path": "s3://noaa-nodd-kerchunk-pds/nos_vdatums/dbofs_vdatums.nc.zarr",
"water_level_var": "zeta",
"vdatum_var": "mllwtomsl",
"vdatum_name": "mllw",
"multiplier": -1.0
},
"roms": {}
}
},
"dbofs_icechunk_30d_static": {
"path": "s3://nextgen-dmac-icechunk-test/dbofs-30day-static",
"type": "virtual-icechunk",
"drop_variables": ["dstart"],
"storage_options": {
"anonymous": true,
"virtual_chunk_container": {
"type": "s3",
"store": {
"path": "s3://noaa-nos-ofs-pds/",
"anonymous": true,
"region": "us-east-1"
}
}
}
}
}
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ setuptools~=75.8.0
uvicorn~=0.34.0
xarray~=2025.3.1
zarr~=3.0.5
kerchunk@git+https://github.com/ndellicarpini/kerchunk@main
virtualizarr~=1.3.2
icechunk~=1.1.10
kerchunk~=0.2.9
xpublish-wms@git+https://github.com/xpublish-community/xpublish-wms@main
redis-fsspec-cache@git+https://github.com/asascience-open/redis-fsspec-cache@main
xarray-subset-grid@git+https://github.com/asascience-open/xarray-subset-grid@main
xpublish@git+https://github.com/xpublish-community/xpublish@main
xpublish-opendap@git+https://github.com/xpublish-community/xpublish-opendap@main
xpublish-wms@git+https://github.com/ndellicarpini/xpublish-wms@main
xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@main
virtualizarr@git+https://github.com/zarr-developers/VirtualiZarr@develop
icechunk@git+https://github.com/earth-mover/icechunk.git#subdirectory=icechunk-python
xpublish-edr@git+https://github.com/xpublish-community/xpublish-edr@main
24 changes: 16 additions & 8 deletions xreds/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _load_zarr(
)

def _load_virtual_icechunk(
dataset_path: str,
dataset_path: str,
chunks: Optional[str | dict],
drop_variables: Optional[str | list[str]],
storage_options: dict,
Expand All @@ -222,13 +222,17 @@ def _load_virtual_icechunk(
if "virtual_chunk_container" in storage_options:
chunk_params = storage_options.pop("virtual_chunk_container", {})
if chunk_params.get("type", "s3").lower() == "s3":
store = chunk_params.get("store", {})
print('Store: ', store)
ic_config.set_virtual_chunk_container(
icechunk.VirtualChunkContainer(
"s3", "s3://", icechunk.s3_store(**chunk_params.get("store", {}))
url_prefix=store.get("path", ""),
store=icechunk.s3_store(region=store.get("region", "us-east-1"),
anonymous=store.get("anonymous", True))
)
)
ic_creds = icechunk.containers_credentials(
s3=icechunk.s3_credentials(**chunk_params.get("credentials", {"anonymous": True}))
{store.get("path", ""): icechunk.s3_anonymous_credentials()}
)

repo_type = storage_options.pop(
Expand All @@ -240,30 +244,34 @@ def _load_virtual_icechunk(
ic_storage = None
if repo_type == "s3":
parsed_bucket = dataset_path.replace("s3://", "").split("/")[0]
parsed_prefix = dataset_path.replace("s3://", "").split("/")[-1]
parsed_prefix = '/'.join(dataset_path.replace("s3://", "").split("/")[1:])

ic_storage = icechunk.s3_storage(
bucket=storage_options.pop("bucket", parsed_bucket),
prefix=storage_options.pop("prefix", parsed_prefix),
**storage_options
anonymous=storage_options.pop("anonymous", True)
)

if ic_storage is None or not icechunk.Repository.exists(ic_storage):
raise Exception(f"Could not open icechunk repository for {dataset_path}")

repo = icechunk.Repository.open(ic_storage, ic_config, ic_creds)

branch = storage_options.get("branch", dataset_path.split("@")[-1] if "@" in dataset_path else None)
branch = storage_options.get("branch", None)
if branch is None:
all_branches = list(repo.list_branches())
branch = ("main" if "main" in all_branches
else "master" if "master" in all_branches
else all_branches[0])

return xr.open_zarr(
ds = xr.open_zarr(
repo.readonly_session(branch).store,
chunks=chunks,
drop_variables=drop_variables,
consolidated=False,
zarr_format=3
)

ds_sorted = ds.sortby('ocean_time')

return ds_sorted