From d503966ae80beae6228776cf64d2ced3eb427f9d Mon Sep 17 00:00:00 2001 From: Maarten Pronk Date: Tue, 26 Aug 2025 17:32:22 +0200 Subject: [PATCH] Add extension on HDF5. --- Project.toml | 6 ++- ext/HDF5Ext.jl | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 ext/HDF5Ext.jl diff --git a/Project.toml b/Project.toml index fa7fb8d..7f61db1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,14 +1,13 @@ name = "YAXArrayBase" uuid = "90b8fcef-0c2d-428d-9c56-5f86629e9d14" authors = ["Fabian Gans "] -version = "0.7.6" +version = "0.7.7" [deps] DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" - [compat] DataStructures = "0.17,0.18" julia = "1.9" @@ -16,6 +15,7 @@ ArchGDAL = "0.10" AxisArrays = "0.4" AxisKeys = "0.2" DimensionalData = "0.27, 0.28, 0.29" +HDF5 = "0.17" NetCDF = "0.11, 0.12" Zarr = "0.8, 0.9" @@ -24,6 +24,7 @@ ArchGDALExt = "ArchGDAL" AxisArraysExt = "AxisArrays" AxisKeysExt = "AxisKeys" DimensionalDataExt = "DimensionalData" +HDF5Ext = "HDF5" NamedDimsExt = "NamedDims" NetCDFExt = "NetCDF" ZarrExt = "Zarr" @@ -33,6 +34,7 @@ ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" diff --git a/ext/HDF5Ext.jl b/ext/HDF5Ext.jl new file mode 100644 index 0000000..55aa828 --- /dev/null +++ b/ext/HDF5Ext.jl @@ -0,0 +1,105 @@ +module HDF5Ext +import YAXArrayBase: YAXArrayBase as YAB +using HDF5 + +""" + HDF5Dataset + +Dataset backend to read HDF5 files using HDF5.jl +""" +struct HDF5Dataset + filename::String + mode::String + handle::Base.RefValue{Union{Nothing,HDF5.File}} +end +function HDF5Dataset(filename; mode="r") + HDF5Dataset(filename, mode, Ref{Union{Nothing,HDF5.File}}(nothing)) +end +function dsopen(f, ds::HDF5Dataset) + if ds.handle[] === nothing || !Base.isopen(ds.handle[]) + HDF5.h5open(f, ds.filename, ds.mode) + else + f(ds.handle[]) + end +end +function YAB.open_dataset_handle(f, ds::HDF5Dataset) + if ds.handle[] === nothing || !Base.isopen(ds.handle[]) + try + ds.handle[] = HDF5.h5open(ds.filename, ds.mode) + f(ds) + finally + ds.handle[] = nothing + end + else + f(ds) + end +end + +function __init__() + @debug "new driver key :HDF5, updating backendlist." + YAB.backendlist[:HDF5] = HDF5Dataset + push!(YAB.backendregex, r".h5$" => HDF5Dataset) +end + +function get_all_paths(file, prefix="") + paths = String[] + + for key in keys(file) + full_path = isempty(prefix) ? key : "$prefix/$key" + obj = file[key] + + if isa(obj, HDF5.Dataset) + push!(paths, full_path) + elseif isa(obj, HDF5.Group) + append!(paths, get_all_paths(obj, full_path)) + end + end + + return paths +end + +function get_dims(f, var) + dims = String[] + ds = f[var] + if haskey(ds, "DIMENSION_LIST") + dimension_list = read_attribute(ds, "DIMENSION_LIST") + for dimensions in dimension_list + for dim_ref in dimensions + push!(dims, HDF5.name(f[dim_ref])) + end + end + end + return dims +end + +"Return a list of variable names" +YAB.get_varnames(ds::HDF5Dataset) = dsopen(get_all_paths, ds) + +"Return a list of dimension names for a given variable" +YAB.get_var_dims(ds::HDF5Dataset, name) = dsopen(x -> get_dims(x, name), ds) + +function get_var_attrs(file, name) + attributes = Dict(attrs(file[name])) + pop!(attributes, "DIMENSION_LIST", nothing) # Remove DIMENSION_LIST if present + return attributes +end + +"Return a dict with the attributes for a given variable" +YAB.get_var_attrs(ds::HDF5Dataset, name) = dsopen(v -> get_var_attrs(v, name), ds) + +"Return a dict with global attributes for the dataset" +YAB.get_global_attrs(ds::HDF5Dataset) = dsopen(h5 -> Dict(attrs(h5)), ds) + +"Return a DiskArray handle to a dataset" +function YAB.get_var_handle(ds::HDF5Dataset, i; persist=true) + if persist || ds.handle[] === nothing + s, et = NetCDF.open(j -> (size(j), eltype(j)), ds.filename, i) + NetCDFVariable{et,length(s)}(ds.filename, i, s) + else + ds.handle[][i] + end +end +Base.haskey(ds::HDF5Dataset, k) = dsopen(h5 -> haskey(h5, k), ds) + + +end