From 4078d8c5c0064a1efe5dc461b4986022fe8af256 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 11:08:15 -0700 Subject: [PATCH 01/48] Implement ERA5PressureDataset, ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Updated exports to include ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, pressure_field - Added using Statistics, Oceananigans.Fields.CenterField/interior, Oceananigans.BoundaryConditions.fill_halo_regions!, native_grid, InverseGravity to imports - Extended import block to include is_three_dimensional, reversed_vertical_axis, conversion_units - Added ERA5_all_pressure_levels constant (37 standard hPa levels) - Added ERA5PressureDataset, ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels with keyword constructors - Added ERA5PressureMetadata{D} and ERA5PressureMetadatum type aliases - Added Base.size, all_dates, is_three_dimensional, reversed_vertical_axis dispatches - Added ERA5PL_dataset_variable_names and ERA5PL_netcdf_variable_names dicts (15 variables each) - Added available_variables, dataset_variable_name, netcdf_variable_name, conversion_units dispatches - Added retrieve_data(::ERA5PressureMetadatum) — reads 4D NetCDF, reverses vertical axis - Added metadata_prefix(::ERA5PressureMetadata) — uses ERA5PL_dataset_variable_names for filename construction - Added _std_atm_geopotential_height, _std_atm_z_interfaces, z_interfaces(::ERA5PressureMetadata) - Added pressure_field and mean_geopotential_heights --- src/DataWrangling/ERA5/ERA5.jl | 216 ++++++++++++++++++++++++++++++++- 1 file changed, 213 insertions(+), 3 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index b69d68d2f..112b76297 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -1,13 +1,16 @@ module ERA5 export ERA5Hourly, ERA5Monthly +export ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, pressure_field using NCDatasets using Printf using Scratch +using Statistics -using Oceananigans.Fields: Center -using NumericalEarth.DataWrangling: Metadata, Metadatum, metadata_path +using Oceananigans.Fields: Center, CenterField, interior +using Oceananigans.BoundaryConditions: fill_halo_regions! +using NumericalEarth.DataWrangling: Metadata, Metadatum, metadata_path, native_grid, InverseGravity using Dates using Dates: DateTime, Day, Month, Hour @@ -24,7 +27,10 @@ import NumericalEarth.DataWrangling: inpainted_metadata_path, available_variables, retrieve_data, - metadata_path + metadata_path, + is_three_dimensional, + reversed_vertical_axis, + conversion_units import Base: eltype @@ -48,6 +54,37 @@ struct ERA5Monthly <: ERA5Dataset end dataset_name(::ERA5Hourly) = "ERA5Hourly" dataset_name(::ERA5Monthly) = "ERA5Monthly" +##### +##### ERA5 pressure-level datasets +##### + +const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 150, + 175, 200, 225, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 775, 800, + 825, 850, 875, 900, 925, 950, 975, 1000] + +abstract type ERA5PressureDataset <: ERA5Dataset end + +struct ERA5HourlyPressureLevels <: ERA5PressureDataset + levels :: Vector{Int} +end +ERA5HourlyPressureLevels(; levels=ERA5_all_pressure_levels) = ERA5HourlyPressureLevels(levels) + +struct ERA5MonthlyPressureLevels <: ERA5PressureDataset + levels :: Vector{Int} +end +ERA5MonthlyPressureLevels(; levels=ERA5_all_pressure_levels) = ERA5MonthlyPressureLevels(levels) + +dataset_name(::ERA5HourlyPressureLevels) = "ERA5HourlyPressureLevels" +dataset_name(::ERA5MonthlyPressureLevels) = "ERA5MonthlyPressureLevels" + +Base.size(ds::ERA5PressureDataset, variable) = (1440, 721, length(ds.levels)) + +all_dates(::ERA5HourlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) +all_dates(::ERA5MonthlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) + +const ERA5PressureMetadata{D} = Metadata{<:ERA5PressureDataset, D} +const ERA5PressureMetadatum = Metadatum{<:ERA5PressureDataset} + # Wave variables are on a 0.5° grid (720×361), atmospheric variables on 0.25° (1440×721) const ERA5_wave_variables = Set([ :eastward_stokes_drift, :northward_stokes_drift, @@ -72,6 +109,12 @@ const ERA5Metadatum = Metadatum{<:ERA5Dataset} # ERA5 is a spatially 2D dataset (atmospheric surface variables) is_three_dimensional(::ERA5Metadata) = false +# ERA5 pressure-level data is 3D +is_three_dimensional(::ERA5PressureMetadata) = true + +# ERA5 stores pressure levels top-to-bottom; reverse to match Oceananigans bottom-to-top +reversed_vertical_axis(::ERA5PressureDataset) = true + # Variable name mappings from NumericalEarth names to ERA5/CDS API variable names ERA5_dataset_variable_names = Dict( :temperature => "2m_temperature", @@ -126,6 +169,53 @@ ERA5_netcdf_variable_names = Dict( netcdf_variable_name(metadata::ERA5Metadata) = ERA5_netcdf_variable_names[metadata.name] +##### +##### ERA5 pressure-level variable name mappings +##### + +ERA5PL_dataset_variable_names = Dict( + :temperature => "temperature", + :eastward_velocity => "u_component_of_wind", + :northward_velocity => "v_component_of_wind", + :vertical_velocity => "vertical_velocity", + :geopotential => "geopotential", + :geopotential_height => "geopotential", + :specific_humidity => "specific_humidity", + :relative_humidity => "relative_humidity", + :vorticity => "vorticity", + :divergence => "divergence", + :potential_vorticity => "potential_vorticity", + :ozone_mass_mixing_ratio => "ozone_mass_mixing_ratio", + :fraction_of_cloud_cover => "fraction_of_cloud_cover", + :specific_cloud_liquid_water_content => "specific_cloud_liquid_water_content", + :specific_cloud_ice_water_content => "specific_cloud_ice_water_content", +) + +ERA5PL_netcdf_variable_names = Dict( + :temperature => "t", + :eastward_velocity => "u", + :northward_velocity => "v", + :vertical_velocity => "w", + :geopotential => "z", + :geopotential_height => "z", + :specific_humidity => "q", + :relative_humidity => "r", + :vorticity => "vo", + :divergence => "d", + :potential_vorticity => "pv", + :ozone_mass_mixing_ratio => "o3", + :fraction_of_cloud_cover => "cc", + :specific_cloud_liquid_water_content => "clwc", + :specific_cloud_ice_water_content => "ciwc", +) + +available_variables(::ERA5PressureDataset) = ERA5PL_dataset_variable_names +dataset_variable_name(md::ERA5PressureMetadata) = ERA5PL_dataset_variable_names[md.name] +netcdf_variable_name(md::ERA5PressureMetadata) = ERA5PL_netcdf_variable_names[md.name] + +conversion_units(md::ERA5PressureMetadatum) = + md.name == :geopotential_height ? InverseGravity() : nothing + """ retrieve_data(metadata::ERA5Metadatum) @@ -158,6 +248,22 @@ function retrieve_data(metadata::ERA5Metadatum) return reshape(data_2d, size(data_2d, 1), size(data_2d, 2), 1) end +""" + retrieve_data(metadata::ERA5PressureMetadatum) + +Retrieve ERA5 pressure-level data from a NetCDF file. +Returns a 3D array (lon, lat, level) with levels ordered bottom-to-top +(highest pressure at k=1, lowest pressure at k=Nz). +""" +function retrieve_data(metadata::ERA5PressureMetadatum) + path = metadata_path(metadata) + name = netcdf_variable_name(metadata) + ds = NCDatasets.Dataset(path) + data = ds[name][:, :, :, 1] # (lon, lat, pressure_level, time=1) + close(ds) + return reverse(data, dims=3) # flip from ERA5 top-to-bottom to Oceananigans bottom-to-top +end + ##### ##### Metadata filename construction ##### @@ -186,6 +292,27 @@ function bbox_strs(c) return first, second end +function metadata_prefix(metadata::ERA5PressureMetadata) + var = ERA5PL_dataset_variable_names[metadata.name] + dataset = dataset_name(metadata.dataset) + start_date = start_date_str(metadata.dates) + end_date = end_date_str(metadata.dates) + bbox = metadata.bounding_box + + if !isnothing(bbox) + w, e = bbox_strs(bbox.longitude) + s, n = bbox_strs(bbox.latitude) + suffix = string(w, e, s, n) + else + suffix = "" + end + + prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) + prefix = colon2dash(prefix) + prefix = underscore_spaces(prefix) + return prefix +end + function metadata_prefix(metadata::ERA5Metadata) var = ERA5_dataset_variable_names[metadata.name] dataset = dataset_name(metadata.dataset) @@ -240,5 +367,88 @@ z_interfaces(::ERA5Metadata) = (0, 1) # ERA5 data is stored as Float32 eltype(::ERA5Metadata) = Float32 +##### +##### Pressure-level vertical coordinate +##### + +# Standard atmosphere height (m) for a given pressure (hPa) +_std_atm_geopotential_height(p_hPa) = (287.05 * 288.15 / 9.80665) * log(1013.25 / p_hPa) + +# Build z-interfaces (Nz+1 values) from pressure levels. +# Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). +function _std_atm_z_interfaces(levels) + sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom + heights = _std_atm_geopotential_height.(Float64.(sorted_levels)) + Nz = length(heights) + + interfaces = Vector{Float64}(undef, Nz + 1) + + if Nz == 1 + interfaces[1] = max(0.0, heights[1] - 500.0) + interfaces[2] = heights[1] + 500.0 + return interfaces + end + + interfaces[1] = max(0.0, heights[1] - (heights[2] - heights[1]) / 2) + for k in 2:Nz + interfaces[k] = (heights[k-1] + heights[k]) / 2 + end + interfaces[Nz+1] = heights[Nz] + (heights[Nz] - heights[Nz-1]) / 2 + + return interfaces +end + +z_interfaces(metadata::ERA5PressureMetadata) = _std_atm_z_interfaces(metadata.dataset.levels) + +##### +##### pressure_field — synthetic pressure coordinate field +##### + +""" + pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) + +Return a `CenterField` on the native grid of `metadata` filled with the pressure +value (hPa) at each vertical level. Levels are ordered bottom-to-top (k=1 is the +highest pressure level). +""" +function pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) + grid = native_grid(metadata, arch; halo) + field = CenterField(grid) + reversed_levels = sort(metadata.dataset.levels, rev=true) # highest pressure → k=1 + for (k, p) in enumerate(reversed_levels) + interior(field)[:, :, k] .= Float32(p) + end + fill_halo_regions!(field) + return field +end + +##### +##### mean_geopotential_heights — data-derived static z-coordinate +##### + +""" + mean_geopotential_heights(metadata::ERA5PressureMetadata; arch=CPU()) + +Compute spatially and temporally averaged geopotential heights (m) for each +pressure level in `metadata`. This provides more accurate z-coordinates than +the standard-atmosphere fallback used by `z_interfaces`. + +Downloads the `:geopotential` field for every date in `metadata`, divides by g, +averages over the horizontal domain and all dates, and returns one representative +height per pressure level in bottom-to-top order (k=1 is highest pressure). +""" +function mean_geopotential_heights(metadata::ERA5PressureMetadata; arch=CPU()) + geo_metadata = Metadata(:geopotential; dataset=metadata.dataset, + dates=metadata.dates, bounding_box=metadata.bounding_box, + dir=metadata.dir) + heights = zeros(length(metadata.dataset.levels)) + for geo_datum in geo_metadata + data = retrieve_data(geo_datum) ./ Float32(9.80665) # Φ → Z (m) + heights .+= dropdims(mean(data; dims=(1, 2)); dims=(1, 2)) + end + heights ./= length(geo_metadata) + return heights +end + end # module ERA5 From 54a2a0c4d74435116f936ab18de2876c5315e3f0 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 11:11:21 -0700 Subject: [PATCH 02/48] Add ERA5*PressureLevels exports --- src/DataWrangling/DataWrangling.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataWrangling/DataWrangling.jl b/src/DataWrangling/DataWrangling.jl index 6bbe3dccd..9db7553e0 100644 --- a/src/DataWrangling/DataWrangling.jl +++ b/src/DataWrangling/DataWrangling.jl @@ -8,7 +8,7 @@ export Metadata, Metadatum, ECCOMetadatum, EN4Metadatum, all_dates, first_date, export metadata_time_step, metadata_epoch export LinearlyTaperedPolarMask export DatasetRestoring -export ERA5Hourly, ERA5Monthly +export ERA5Hourly, ERA5Monthly, ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels using Oceananigans using Downloads From 8fede163943b4764e19147a476ae2f5d2b8c6ee4 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 11:12:44 -0700 Subject: [PATCH 03/48] Add InverseGravity for geopotential height calc --- src/DataWrangling/metadata.jl | 1 + src/DataWrangling/metadata_field.jl | 1 + 2 files changed, 2 insertions(+) diff --git a/src/DataWrangling/metadata.jl b/src/DataWrangling/metadata.jl index b4de44791..09e90c2a5 100644 --- a/src/DataWrangling/metadata.jl +++ b/src/DataWrangling/metadata.jl @@ -297,6 +297,7 @@ struct NanomolePerKilogram end struct NanomolePerLiter end struct InverseSign end +struct InverseGravity end struct GramPerKilogramMinus35 end # Salinity anomaly struct MilliliterPerLiter end # Sometimes for disssolved_oxygen diff --git a/src/DataWrangling/metadata_field.jl b/src/DataWrangling/metadata_field.jl index 7107ca8be..846e6dcc0 100644 --- a/src/DataWrangling/metadata_field.jl +++ b/src/DataWrangling/metadata_field.jl @@ -248,6 +248,7 @@ end @inline convert_units(C::FT, ::Union{NanomolePerLiter, NanomolePerKilogram}) where FT = C * convert(FT, 1e-6) @inline convert_units(C::FT, ::MilliliterPerLiter) where FT = C / convert(FT, 22.3916) @inline convert_units(C::FT, ::GramPerKilogramMinus35) where FT = C + convert(FT, 35) +@inline convert_units(Φ::FT, ::InverseGravity) where FT = Φ / convert(FT, 9.80665) ##### From 13e4fab5f83da180004793af52ffc58d1b1ad600 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 11:16:04 -0700 Subject: [PATCH 04/48] Add ERA5 pressure-levels downloader --- ext/NumericalEarthCDSAPIExt.jl | 50 ++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index b450dabe9..52f54d301 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -8,6 +8,7 @@ using Oceananigans.DistributedComputations: @root using Dates using NumericalEarth.DataWrangling.ERA5: ERA5Metadata, ERA5Metadatum, ERA5_dataset_variable_names +using NumericalEarth.DataWrangling.ERA5: ERA5PressureMetadata, ERA5PressureMetadatum, ERA5PL_dataset_variable_names import NumericalEarth.DataWrangling: download_dataset @@ -90,6 +91,55 @@ function download_dataset(meta::ERA5Metadatum; skip_existing=true) return output_path end +##### +##### ERA5 pressure-level download +##### + +function download_dataset(metadata::ERA5PressureMetadata; kwargs...) + paths = Array{String}(undef, length(metadata)) + for (m, metadatum) in enumerate(metadata) + paths[m] = download_dataset(metadatum; kwargs...) + end + return paths +end + +function download_dataset(meta::ERA5PressureMetadatum; skip_existing=true) + output_directory = meta.dir + output_filename = NumericalEarth.DataWrangling.metadata_filename(meta) + output_path = joinpath(output_directory, output_filename) + + skip_existing && isfile(output_path) && return output_path + mkpath(output_directory) + + variable_name = ERA5PL_dataset_variable_names[meta.name] + date = meta.dates + year = string(Dates.year(date)) + month = lpad(string(Dates.month(date)), 2, '0') + day = lpad(string(Dates.day(date)), 2, '0') + hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" + + request = Dict( + "product_type" => ["reanalysis"], + "variable" => [variable_name], + "pressure_level" => [string(p) for p in meta.dataset.levels], + "year" => [year], + "month" => [month], + "day" => [day], + "time" => [hour], + "data_format" => "netcdf", + "download_format" => "unarchived", + ) + + area = build_era5_area(meta.bounding_box) + isnothing(area) || (request["area"] = area) + + @root begin + CDSAPI.retrieve("reanalysis-era5-pressure-levels", request, output_path) + end + + return output_path +end + ##### ##### Area/bounding box utilities ##### From afdd2272af285545628595c6274265d2b1d04957 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 11:20:28 -0700 Subject: [PATCH 05/48] Add CDS tests, verified 95/95 pass --- test/test_cds_downloading.jl | 114 ++++++++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 1 deletion(-) diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 3e5d8fa9e..6fb4c62ca 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -5,6 +5,9 @@ using NCDatasets using NumericalEarth.DataWrangling.ERA5 using NumericalEarth.DataWrangling.ERA5: ERA5Hourly, ERA5Monthly, ERA5_dataset_variable_names +using NumericalEarth.DataWrangling.ERA5: ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, + ERA5_all_pressure_levels, ERA5PL_dataset_variable_names, + pressure_field using NumericalEarth.DataWrangling: metadata_path, download_dataset using Dates @@ -127,13 +130,53 @@ start_date = DateTime(2005, 2, 16, 12) @testset "ERA5 Monthly dataset" begin monthly_dataset = ERA5Monthly() @test monthly_dataset isa ERA5Monthly - + # Test that all_dates returns a valid range dates = NumericalEarth.DataWrangling.all_dates(monthly_dataset, :temperature) @test first(dates) == DateTime("1940-01-01") @test step(dates) == Month(1) end + @testset "ERA5HourlyPressureLevels construction and metadata" begin + # Default constructor uses all 37 standard levels + ds_full = ERA5HourlyPressureLevels() + @test ds_full isa ERA5HourlyPressureLevels + @test length(ds_full.levels) == 37 + @test Base.size(ds_full, :temperature) == (1440, 721, 37) + + # Subset constructor + ds_sub = ERA5HourlyPressureLevels(levels=[850, 500]) + @test Base.size(ds_sub, :temperature) == (1440, 721, 2) + + # Monthly variant + ds_monthly = ERA5MonthlyPressureLevels() + @test ds_monthly isa ERA5MonthlyPressureLevels + + # Metadatum size propagates Nz correctly + meta = Metadatum(:temperature; dataset=ds_sub, bounding_box=bounding_box, date=start_date) + Nx, Ny, Nz, Nt = size(meta) + @test Nz == 2 + @test NumericalEarth.DataWrangling.ERA5.is_three_dimensional(meta) == true + + # Variable name lookups + @test ERA5PL_dataset_variable_names[:temperature] == "temperature" + @test ERA5PL_dataset_variable_names[:geopotential_height] == "geopotential" + end + + @testset "ERA5 pressure-level z_interfaces (standard atmosphere)" begin + levels_2 = [850, 500] + z = NumericalEarth.DataWrangling.ERA5._std_atm_z_interfaces(levels_2) + @test length(z) == 3 # Nz+1 interfaces + @test issorted(z) # monotonically increasing with altitude + # 850 hPa ≈ 1457 m, 500 hPa ≈ 5575 m + @test z[1] < 1457.0 < z[2] < 5575.0 < z[3] + + # Single level + z1 = NumericalEarth.DataWrangling.ERA5._std_atm_z_interfaces([500]) + @test length(z1) == 2 + @test z1[1] < z1[2] + end + for arch in test_architectures A = typeof(arch) @@ -195,4 +238,73 @@ start_date = DateTime(2005, 2, 16, 12) isfile(inpainted_path) && rm(inpainted_path; force=true) end end + + @testset "ERA5 pressure-level download and Field on CPU" begin + arch = CPU() + ds_pl = ERA5HourlyPressureLevels(levels=[850, 500]) + + @testset "Download and 3D Field" begin + meta = Metadatum(:temperature; dataset=ds_pl, bounding_box, date=start_date) + filepath = metadata_path(meta) + isfile(filepath) && rm(filepath; force=true) + + download_dataset(meta) + @test isfile(filepath) + + # Verify the NetCDF has a pressure_level dimension and the right variable + ds_nc = NCDataset(filepath) + @test haskey(ds_nc, "t") + @test haskey(ds_nc, "pressure_level") || haskey(ds_nc, "level") + close(ds_nc) + + f = Field(meta, arch) + @test f isa Field + Nx, Ny, Nz = size(f) + @test Nz == 2 + + @allowscalar begin + @test !all(iszero, interior(f)) + # Temperature at these levels should be in a plausible range (K) + @test all(x -> 180 < x < 340, filter(!isnan, vec(interior(f)))) + end + + rm(filepath; force=true) + inpainted_path = NumericalEarth.DataWrangling.inpainted_metadata_path(meta) + isfile(inpainted_path) && rm(inpainted_path; force=true) + end + + @testset "Geopotential height conversion" begin + meta_z = Metadatum(:geopotential_height; dataset=ds_pl, bounding_box, date=start_date) + filepath = metadata_path(meta_z) + isfile(filepath) && rm(filepath; force=true) + + download_dataset(meta_z) + fz = Field(meta_z, arch) + + @allowscalar begin + max_z = maximum(filter(!isnan, vec(interior(fz)))) + # 500 hPa geopotential height ≈ 5500 m + @test 4000 < max_z < 7000 + end + + rm(filepath; force=true) + inpainted_path = NumericalEarth.DataWrangling.inpainted_metadata_path(meta_z) + isfile(inpainted_path) && rm(inpainted_path; force=true) + end + + @testset "pressure_field" begin + meta = Metadatum(:temperature; dataset=ds_pl, bounding_box, date=start_date) + pf = pressure_field(meta, arch) + @test pf isa Field + Nx, Ny, Nz = size(pf) + @test Nz == 2 + + @allowscalar begin + # k=1 should be 850 hPa (highest pressure, lowest altitude) + @test interior(pf)[1, 1, 1] ≈ 850.0f0 + # k=2 should be 500 hPa + @test interior(pf)[1, 1, 2] ≈ 500.0f0 + end + end + end end From c09f6daf8695e6f80d28b07306f0fcebcf29cf79 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 13:00:41 -0700 Subject: [PATCH 06/48] Fix imports --- src/DataWrangling/ERA5/ERA5.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 112b76297..e6c688fd3 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -8,8 +8,8 @@ using Printf using Scratch using Statistics -using Oceananigans.Fields: Center, CenterField, interior -using Oceananigans.BoundaryConditions: fill_halo_regions! +using Oceananigans.Fields: Center +using Oceananigans: CenterField, interior, fill_halo_regions!, CPU using NumericalEarth.DataWrangling: Metadata, Metadatum, metadata_path, native_grid, InverseGravity using Dates using Dates: DateTime, Day, Month, Hour From 9c4893705bcb9bf61e1fdc4f525a342c0373dfcd Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 13:34:45 -0700 Subject: [PATCH 07/48] Add day, hour to date_str for filenaming --- src/DataWrangling/ERA5/ERA5.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index e6c688fd3..a7664625d 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -271,7 +271,9 @@ end function date_str(date::DateTime) y = Dates.year(date) m = lpad(Dates.month(date), 2, '0') - return "$(y)-$(m)" + d = lpad(Dates.day(date), 2, '0') + h = lpad(Dates.hour(date), 2, '0') + return "$(y)-$(m)-$(d)T$(h)" end start_date_str(date::DateTime) = date_str(date) From aec39c801d75d58ba86e881b3fc9ef5dc6302ea2 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 15:55:54 -0700 Subject: [PATCH 08/48] Allow simultaneous download of multiple variables --- ext/NumericalEarthCDSAPIExt.jl | 174 ++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 1 deletion(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index 52f54d301..025e8b0c8 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -2,16 +2,26 @@ module NumericalEarthCDSAPIExt using NumericalEarth using CDSAPI +using NCDatasets using Oceananigans using Oceananigans.DistributedComputations: @root using Dates using NumericalEarth.DataWrangling.ERA5: ERA5Metadata, ERA5Metadatum, ERA5_dataset_variable_names -using NumericalEarth.DataWrangling.ERA5: ERA5PressureMetadata, ERA5PressureMetadatum, ERA5PL_dataset_variable_names +using NumericalEarth.DataWrangling.ERA5: ERA5PressureDataset, + ERA5PressureMetadata, ERA5PressureMetadatum, + ERA5PL_dataset_variable_names, ERA5PL_netcdf_variable_names import NumericalEarth.DataWrangling: download_dataset +# Coordinate / dimension variables to propagate into each split file +const ERA5PL_COORD_VARS = Set(["longitude", "latitude", + "pressure_level", "level", + "time", "valid_time", + "expver", # experiment version: ==1 for final data, ==5 preliminary 5-day data + "number"]) # ensemble member + """ download_dataset(metadata::ERA5Metadata; kwargs...) @@ -140,6 +150,168 @@ function download_dataset(meta::ERA5PressureMetadatum; skip_existing=true) return output_path end +##### +##### Multi-variable ERA5 pressure-level download +##### + +""" + download_dataset(names::Vector{Symbol}, metadata::ERA5PressureMetadata; kwargs...) + +Download multiple ERA5 pressure-level variables for each date in `metadata`. +""" +function download_dataset(names::Vector{Symbol}, metadata::ERA5PressureMetadata; kwargs...) + for metadatum in metadata + download_dataset(names, metadatum; kwargs...) + end + return nothing +end + +""" + download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; skip_existing=true) + +Download multiple ERA5 pressure-level variables for a single date in one CDS API request. + +The multi-variable NetCDF returned by CDS is split into individual per-variable files, each +stored at the path returned by `metadata_path(Metadatum(name; dataset=meta.dataset, ...))`. +This keeps subsequent calls to `Field` and `retrieve_data` unchanged. +""" +function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; skip_existing=true) + # Build (symbol → output path) pairs using the standard single-variable filename scheme + name_path_pairs = [(name, NumericalEarth.DataWrangling.metadata_path( + NumericalEarth.DataWrangling.Metadatum(name; + dataset = meta.dataset, + bounding_box = meta.bounding_box, + date = meta.dates, + dir = meta.dir))) + for name in names] + + # Determine which output files are missing + pending = if skip_existing + [(n, p) for (n, p) in name_path_pairs if !isfile(p)] + else + name_path_pairs + end + + isempty(pending) && return [path for (_, path) in name_path_pairs] + + # Unique CDS variable names (:geopotential and :geopotential_height share one CDS field) + cds_vars = unique([ERA5PL_dataset_variable_names[name] for (name, _) in pending]) + + date = meta.dates + year = string(Dates.year(date)) + month = lpad(string(Dates.month(date)), 2, '0') + day = lpad(string(Dates.day(date)), 2, '0') + hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" + + request = Dict( + "product_type" => ["reanalysis"], + "variable" => cds_vars, + "pressure_level" => [string(p) for p in meta.dataset.levels], + "year" => [year], + "month" => [month], + "day" => [day], + "time" => [hour], + "data_format" => "netcdf", + "download_format" => "unarchived", + ) + + area = build_era5_area(meta.bounding_box) + isnothing(area) || (request["area"] = area) + + mkpath(meta.dir) + tmp_path = joinpath(meta.dir, "_tmp_multi_$(year)$(month)$(day)T$(hour[1:2]).nc") + + nc_name_path_pairs = [(ERA5PL_netcdf_variable_names[name], path) for (name, path) in pending] + + @root begin + CDSAPI.retrieve("reanalysis-era5-pressure-levels", request, tmp_path) + _split_era5pl_nc(tmp_path, nc_name_path_pairs) + rm(tmp_path; force=true) + end + + return [path for (_, path) in name_path_pairs] +end + +""" + download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset; + date, bounding_box=nothing, dir=default_download_directory(dataset)) + +Download multiple ERA5 pressure-level variables without requiring a dummy `Metadatum`. + +# Example +```julia +ds = ERA5HourlyPressureLevels(levels=[850, 500]) +download_dataset([:temperature, :geopotential_height, :eastward_velocity], ds; + date=DateTime(2020, 6, 15, 0), bounding_box=bbox) +``` +""" +function download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset; + date, + bounding_box = nothing, + dir = NumericalEarth.DataWrangling.default_download_directory(dataset)) + meta = NumericalEarth.DataWrangling.Metadatum(first(names); dataset, date, bounding_box, dir) + return download_dataset(names, meta) +end + +""" + _split_era5pl_nc(src_path, nc_name_path_pairs) + +Split a multi-variable ERA5 pressure-level NetCDF at `src_path` into individual +per-variable files. `nc_name_path_pairs` is a vector of `(nc_varname, dst_path)` tuples. + +Each output file contains all coordinate/dimension variables plus the one data variable. +Global attributes and dimension definitions are preserved. Raw data (including scale/offset +encoding) is copied verbatim so that downstream readers decode values identically. +""" +function _split_era5pl_nc(src_path, nc_name_path_pairs) + NCDatasets.Dataset(src_path, "r") do src + for (nc_varname, dst_path) in nc_name_path_pairs + NCDatasets.Dataset(dst_path, "c") do dst + # Define dimensions + unlimited = NCDatasets.unlimited(src) + for (dname, dlen) in src.dim + NCDatasets.defDim(dst, dname, dname in unlimited ? Inf : dlen) + end + + # Copy global attributes + for (k, v) in src.attrib + dst.attrib[k] = v + end + + # Copy coordinate variables and the one requested data variable + for (vname, var) in src + (vname in ERA5PL_COORD_VARS || vname == nc_varname) || continue + _ncvar_copy!(dst, var, vname) + end + end + end + end +end + +function _ncvar_copy!(dst, src_var, vname) + dims = NCDatasets.dimnames(src_var) + T = eltype(src_var.var) # raw storage type; eltype(src_var) gives CF-decoded DateTime + attribs = src_var.attrib + fill_val = haskey(attribs, "_FillValue") ? attribs["_FillValue"] : nothing + + dst_var = if isnothing(fill_val) + NCDatasets.defVar(dst, vname, T, dims) + else + NCDatasets.defVar(dst, vname, T, dims; fillvalue=fill_val) + end + + # Copy variable attributes (skip _FillValue — handled by defVar above) + for (k, v) in attribs + k == "_FillValue" && continue + dst_var.attrib[k] = v + end + + # Copy raw encoded data, bypassing scale/offset transforms + dst_var.var[:] = src_var.var[:] + + return nothing +end + ##### ##### Area/bounding box utilities ##### From 03efebcf613657f0474b77ffbaf693e595bfc253 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 16:25:43 -0700 Subject: [PATCH 09/48] Aggregate hourly downloads into API requests per calendar day --- ext/NumericalEarthCDSAPIExt.jl | 140 +++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index 025e8b0c8..390191bc8 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -312,6 +312,146 @@ function _ncvar_copy!(dst, src_var, vname) return nothing end +""" + download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset, dates::AbstractVector; + bounding_box=nothing, dir=default_download_directory(dataset), skip_existing=true) + +Download multiple ERA5 pressure-level variables for a vector of `dates`, issuing one CDS API +request per unique calendar day (which may contain multiple hours). + +# Example +```julia +ds = ERA5HourlyPressureLevels(levels=[850, 500]) +dates = DateTime(2020, 6, 15, 0) : Hour(6) : DateTime(2020, 6, 16, 18) +download_dataset([:temperature, :geopotential_height], ds, collect(dates); bounding_box=bbox) +``` +""" +function download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset, + dates::AbstractVector; + bounding_box = nothing, + dir = NumericalEarth.DataWrangling.default_download_directory(dataset), + skip_existing = true) + + grouped = Dict(d => filter(dt -> Dates.Date(dt) == d, dates) + for d in unique(Dates.Date.(dates))) + + for day in sort(collect(keys(grouped))) + _download_era5pl_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing) + end + + return nothing +end + +function _download_era5pl_day(names, dataset, day_dates; + bounding_box, dir, skip_existing) + + MDatum = NumericalEarth.DataWrangling.Metadatum + meta_path = NumericalEarth.DataWrangling.metadata_path + + # All (name, dt, output path) triples for this day + all_triples = [(name, dt, meta_path(MDatum(name; dataset, date=dt, bounding_box, dir))) + for name in names for dt in day_dates] + + pending = skip_existing ? filter(((_, _, p),) -> !isfile(p), all_triples) : all_triples + isempty(pending) && return nothing + + # Unique CDS variable names and hours needed + cds_vars = unique([ERA5PL_dataset_variable_names[name] for (name, _, _) in pending]) + sorted_dts = sort(unique([dt for (_, dt, _) in pending])) + hours_str = [lpad(string(Dates.hour(dt)), 2, '0') * ":00" for dt in sorted_dts] + dt_to_tidx = Dict(dt => i for (i, dt) in enumerate(sorted_dts)) + + dt = first(sorted_dts) + year = string(Dates.year(dt)) + month = lpad(string(Dates.month(dt)), 2, '0') + day = lpad(string(Dates.day(dt)), 2, '0') + + request = Dict( + "product_type" => ["reanalysis"], + "variable" => cds_vars, + "pressure_level" => [string(p) for p in dataset.levels], + "year" => [year], + "month" => [month], + "day" => [day], + "time" => hours_str, + "data_format" => "netcdf", + "download_format" => "unarchived", + ) + + area = build_era5_area(bounding_box) + isnothing(area) || (request["area"] = area) + + mkpath(dir) + tmp_path = joinpath(dir, "_tmp_multi_$(year)$(month)$(day).nc") + nc_triples = [(ERA5PL_netcdf_variable_names[name], dt_to_tidx[dt], path) + for (name, dt, path) in pending] + + @root begin + CDSAPI.retrieve("reanalysis-era5-pressure-levels", request, tmp_path) + _split_era5pl_nc_multistep(tmp_path, nc_triples) + rm(tmp_path; force=true) + end + + return nothing +end + +function _split_era5pl_nc_multistep(src_path, nc_varname_tidx_path_triples) + time_dimnames = Set(["time", "valid_time"]) + + NCDatasets.Dataset(src_path, "r") do src + unlimited = NCDatasets.unlimited(src) + + for (nc_varname, tidx, dst_path) in nc_varname_tidx_path_triples + NCDatasets.Dataset(dst_path, "c") do dst + # Collapse the time dimension to 1; keep all others + for (dname, dlen) in src.dim + out_len = dname in time_dimnames ? 1 : + dname in unlimited ? Inf : dlen + NCDatasets.defDim(dst, dname, out_len) + end + + for (k, v) in src.attrib + dst.attrib[k] = v + end + + for (vname, var) in src + (vname in ERA5PL_COORD_VARS || vname == nc_varname) || continue + _ncvar_copy_tslice!(dst, var, vname, tidx, time_dimnames) + end + end + end + end +end + +function _ncvar_copy_tslice!(dst, src_var, vname, tidx, time_dimnames) + dims = NCDatasets.dimnames(src_var) + T = eltype(src_var.var) + attribs = src_var.attrib + fill_val = haskey(attribs, "_FillValue") ? attribs["_FillValue"] : nothing + + dst_var = isnothing(fill_val) ? + NCDatasets.defVar(dst, vname, T, dims) : + NCDatasets.defVar(dst, vname, T, dims; fillvalue=fill_val) + + for (k, v) in attribs + k == "_FillValue" && continue + dst_var.attrib[k] = v + end + + # Slice time dimension; keep all others intact + has_time = any(d -> d in time_dimnames, dims) + if has_time + idx = ntuple(ndims(src_var.var)) do i + dims[i] in time_dimnames ? (tidx:tidx) : Colon() + end + dst_var.var[:] = src_var.var[idx...] + else + dst_var.var[:] = src_var.var[:] + end + + return nothing +end + ##### ##### Area/bounding box utilities ##### From f1de9e55c9174d303ac6fbb85c6ba11d7b61ec53 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 20:06:55 -0700 Subject: [PATCH 10/48] Cleanup file naming --- src/DataWrangling/ERA5/ERA5.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index a7664625d..13e6723da 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -309,7 +309,11 @@ function metadata_prefix(metadata::ERA5PressureMetadata) suffix = "" end - prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) + if start_date == end_date + prefix = string(var, "_", dataset, "_", start_date, suffix) + else + prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) + end prefix = colon2dash(prefix) prefix = underscore_spaces(prefix) return prefix @@ -330,7 +334,11 @@ function metadata_prefix(metadata::ERA5Metadata) suffix = "" end - prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) + if start_date == end_date + prefix = string(var, "_", dataset, "_", start_date, suffix) + else + prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) + end prefix = colon2dash(prefix) prefix = underscore_spaces(prefix) return prefix From 916614e9d55aab470e02fbd6aa27db4a2e78b403 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 3 Mar 2026 23:22:19 -0700 Subject: [PATCH 11/48] Cleanup, clarify --- src/DataWrangling/ERA5/ERA5.jl | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 13e6723da..3c1b270fd 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -371,7 +371,7 @@ location(::ERA5Metadata) = (Center, Center, Center) longitude_interfaces(::ERA5Metadata) = (0, 360) latitude_interfaces(::ERA5Metadata) = (-90, 90) -# ERA5 is a 2D surface dataset, so z is a single level at the surface +# ERA5 single-levels (2-D) data product z_interfaces(::ERA5Metadata) = (0, 1) # ERA5 data is stored as Float32 @@ -381,25 +381,38 @@ eltype(::ERA5Metadata) = Float32 ##### Pressure-level vertical coordinate ##### -# Standard atmosphere height (m) for a given pressure (hPa) -_std_atm_geopotential_height(p_hPa) = (287.05 * 288.15 / 9.80665) * log(1013.25 / p_hPa) +const ERA5_gravitational_acceleration = 9.80665 + +# International Standard Atmosphere height (m) for a given pressure (hPa) +function standard_atmosphere_geopotential_height(p) + g = ERA5_gravitational_acceleration + T⁰ = 288.15 # K + p⁰ = 1013.25 # hPa + Rᵈ = 287.0528 # J/(kg-K) + + return (Rᵈ * T⁰ / g) * log(p⁰ / p) +end # Build z-interfaces (Nz+1 values) from pressure levels. # Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). -function _std_atm_z_interfaces(levels) +function standard_atmosphere_z_interfaces(levels) + @info """ + Calculating z-interfaces based on International Standard Atmosphere... + For greater accuracy, use `mean_geopotential_heights`! + """ sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom - heights = _std_atm_geopotential_height.(Float64.(sorted_levels)) + heights = standard_atmosphere_geopotential_height.(Float64.(sorted_levels)) Nz = length(heights) interfaces = Vector{Float64}(undef, Nz + 1) if Nz == 1 - interfaces[1] = max(0.0, heights[1] - 500.0) - interfaces[2] = heights[1] + 500.0 + interfaces[1] = max(0, heights[1] - 500) + interfaces[2] = heights[1] + 500 return interfaces end - interfaces[1] = max(0.0, heights[1] - (heights[2] - heights[1]) / 2) + interfaces[1] = max(0, heights[1] - (heights[2] - heights[1]) / 2) for k in 2:Nz interfaces[k] = (heights[k-1] + heights[k]) / 2 end @@ -408,7 +421,8 @@ function _std_atm_z_interfaces(levels) return interfaces end -z_interfaces(metadata::ERA5PressureMetadata) = _std_atm_z_interfaces(metadata.dataset.levels) +# ERA5 pressure-levels (3-D) data product +z_interfaces(metadata::ERA5PressureMetadata) = standard_atmosphere_z_interfaces(metadata.dataset.levels) ##### ##### pressure_field — synthetic pressure coordinate field @@ -453,7 +467,7 @@ function mean_geopotential_heights(metadata::ERA5PressureMetadata; arch=CPU()) dir=metadata.dir) heights = zeros(length(metadata.dataset.levels)) for geo_datum in geo_metadata - data = retrieve_data(geo_datum) ./ Float32(9.80665) # Φ → Z (m) + data = retrieve_data(geo_datum) ./ Float32(ERA5_gravitational_acceleration) # Φ → Z (m) heights .+= dropdims(mean(data; dims=(1, 2)); dims=(1, 2)) end heights ./= length(geo_metadata) From 8f42deed11a1d6e983843a5f9cc0cca25798ca9e Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Thu, 5 Mar 2026 10:45:43 -0700 Subject: [PATCH 12/48] Allow one or more variables to be downloaded, at one or more datetimes, without explicitly defining metadata --- ext/NumericalEarthCDSAPIExt.jl | 51 ++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index 390191bc8..cb13a4af0 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -233,10 +233,14 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk end """ - download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset; - date, bounding_box=nothing, dir=default_download_directory(dataset)) + download_dataset(names::Union{Symbol, Vector{Symbol}}, + dataset::ERA5PressureDataset, + datetime; + bounding_box=nothing, + dir=default_download_directory(dataset)) -Download multiple ERA5 pressure-level variables without requiring a dummy `Metadatum`. +Download one or more ERA5 pressure-level variables at a single datetime, without requiring a dummy +`Metadatum`. # Example ```julia @@ -245,14 +249,19 @@ download_dataset([:temperature, :geopotential_height, :eastward_velocity], ds; date=DateTime(2020, 6, 15, 0), bounding_box=bbox) ``` """ -function download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset; - date, +function download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset, datetime; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset)) - meta = NumericalEarth.DataWrangling.Metadatum(first(names); dataset, date, bounding_box, dir) + meta = NumericalEarth.DataWrangling.Metadatum(first(names); dataset, datetime, bounding_box, dir) return download_dataset(names, meta) end +function download_dataset(name::Symbol, dataset::ERA5PressureDataset, datetime; + bounding_box = nothing, + dir = NumericalEarth.DataWrangling.default_download_directory(dataset)) + return download_dataset([name], dataset, datetime; bounding_box, dir) +end + """ _split_era5pl_nc(src_path, nc_name_path_pairs) @@ -313,10 +322,14 @@ function _ncvar_copy!(dst, src_var, vname) end """ - download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset, dates::AbstractVector; - bounding_box=nothing, dir=default_download_directory(dataset), skip_existing=true) - -Download multiple ERA5 pressure-level variables for a vector of `dates`, issuing one CDS API + download_dataset(names::Union{Symbol, Vector{Symbol}}, + dataset::ERA5PressureDataset, + datetimes::AbstractVector; + bounding_box=nothing, + dir=default_download_directory(dataset), + skip_existing=true) + +Download one or more ERA5 pressure-level variables for a vector of `datetimes`, issuing one CDS API request per unique calendar day (which may contain multiple hours). # Example @@ -326,14 +339,15 @@ dates = DateTime(2020, 6, 15, 0) : Hour(6) : DateTime(2020, 6, 16, 18) download_dataset([:temperature, :geopotential_height], ds, collect(dates); bounding_box=bbox) ``` """ -function download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset, - dates::AbstractVector; +function download_dataset(names::Vector{Symbol}, + dataset::ERA5PressureDataset, + datetimes::AbstractVector; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset), skip_existing = true) - grouped = Dict(d => filter(dt -> Dates.Date(dt) == d, dates) - for d in unique(Dates.Date.(dates))) + grouped = Dict(d => filter(dt -> Dates.Date(dt) == d, datetimes) + for d in unique(Dates.Date.(datetimes))) for day in sort(collect(keys(grouped))) _download_era5pl_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing) @@ -342,6 +356,15 @@ function download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset, return nothing end +function download_dataset(name::Symbol, + dataset::ERA5PressureDataset, + datetimes::AbstractVector; + bounding_box = nothing, + dir = NumericalEarth.DataWrangling.default_download_directory(dataset), + skip_existing = true) + return download_dataset([name], dataset, datetimes; bounding_box, dir, skip_existing) +end + function _download_era5pl_day(names, dataset, day_dates; bounding_box, dir, skip_existing) From 3cac1f8a79c32101c6bea91d3254f87d5be0d67f Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Thu, 5 Mar 2026 15:39:51 -0700 Subject: [PATCH 13/48] Add cleanup flag for multi-datetime downloads Combined CDSAPI requests will return a single combined netcdf; set cleanup=false to keep the "_tmp_multi__*" files --- ext/NumericalEarthCDSAPIExt.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index cb13a4af0..afbdcd8ac 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -344,13 +344,14 @@ function download_dataset(names::Vector{Symbol}, datetimes::AbstractVector; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset), - skip_existing = true) + skip_existing = true, + cleanup = true) grouped = Dict(d => filter(dt -> Dates.Date(dt) == d, datetimes) for d in unique(Dates.Date.(datetimes))) for day in sort(collect(keys(grouped))) - _download_era5pl_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing) + _download_era5pl_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing, cleanup) end return nothing @@ -361,12 +362,13 @@ function download_dataset(name::Symbol, datetimes::AbstractVector; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset), - skip_existing = true) - return download_dataset([name], dataset, datetimes; bounding_box, dir, skip_existing) + skip_existing = true, + cleanup = true) + return download_dataset([name], dataset, datetimes; bounding_box, dir, skip_existing, cleanup) end function _download_era5pl_day(names, dataset, day_dates; - bounding_box, dir, skip_existing) + bounding_box, dir, skip_existing, cleanup) MDatum = NumericalEarth.DataWrangling.Metadatum meta_path = NumericalEarth.DataWrangling.metadata_path @@ -412,7 +414,7 @@ function _download_era5pl_day(names, dataset, day_dates; @root begin CDSAPI.retrieve("reanalysis-era5-pressure-levels", request, tmp_path) _split_era5pl_nc_multistep(tmp_path, nc_triples) - rm(tmp_path; force=true) + cleanup && rm(tmp_path; force=true) end return nothing From c56c6fce435598965ed58011258ad3c41a130fbb Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Mon, 9 Mar 2026 14:17:53 -0700 Subject: [PATCH 14/48] Fix silly Claude vertical-level flip; no inpainting needed for ERA5 --- src/DataWrangling/ERA5/ERA5.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 3c1b270fd..d8e464de7 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -20,6 +20,7 @@ import NumericalEarth.DataWrangling: all_dates, dataset_variable_name, default_download_directory, + default_inpainting, longitude_interfaces, latitude_interfaces, z_interfaces, @@ -112,8 +113,8 @@ is_three_dimensional(::ERA5Metadata) = false # ERA5 pressure-level data is 3D is_three_dimensional(::ERA5PressureMetadata) = true -# ERA5 stores pressure levels top-to-bottom; reverse to match Oceananigans bottom-to-top -reversed_vertical_axis(::ERA5PressureDataset) = true +# ERA5 stores pressure levels bottom-to-top +reversed_vertical_axis(::ERA5PressureDataset) = false # Variable name mappings from NumericalEarth names to ERA5/CDS API variable names ERA5_dataset_variable_names = Dict( @@ -216,6 +217,8 @@ netcdf_variable_name(md::ERA5PressureMetadata) = ERA5PL_netcdf_variable_names[m conversion_units(md::ERA5PressureMetadatum) = md.name == :geopotential_height ? InverseGravity() : nothing +default_inpainting(md::ERA5PressureMetadatum) = nothing + """ retrieve_data(metadata::ERA5Metadatum) @@ -261,7 +264,7 @@ function retrieve_data(metadata::ERA5PressureMetadatum) ds = NCDatasets.Dataset(path) data = ds[name][:, :, :, 1] # (lon, lat, pressure_level, time=1) close(ds) - return reverse(data, dims=3) # flip from ERA5 top-to-bottom to Oceananigans bottom-to-top + return data end ##### From e552ccbc97c8cb1da59bbe7c261992dbaa2a0b87 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Mon, 9 Mar 2026 15:17:03 -0700 Subject: [PATCH 15/48] Fix handling of ERA5 latitude grid Note: inpainting is now turned OFF for 2D data -- the reanalysis data should be complete and turning on inpainting would artificially fill in data that should be masked (e.g., ocean quantities over land) Tested for 2D (single levels) and 3D (pressure levels) --- src/DataWrangling/ERA5/ERA5.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index d8e464de7..1fa7cb3c2 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -217,6 +217,7 @@ netcdf_variable_name(md::ERA5PressureMetadata) = ERA5PL_netcdf_variable_names[m conversion_units(md::ERA5PressureMetadatum) = md.name == :geopotential_height ? InverseGravity() : nothing +default_inpainting(md::ERA5Metadatum) = nothing default_inpainting(md::ERA5PressureMetadatum) = nothing """ @@ -245,6 +246,9 @@ function retrieve_data(metadata::ERA5Metadatum) end close(ds) + + # Latitude is stored from 90°N → 90°S + data_2d = reverse(data_2d, dims=2) # Add singleton z-dimension for 3D field compatibility # Return as (Nx, Ny, 1) @@ -264,7 +268,7 @@ function retrieve_data(metadata::ERA5PressureMetadatum) ds = NCDatasets.Dataset(path) data = ds[name][:, :, :, 1] # (lon, lat, pressure_level, time=1) close(ds) - return data + return reverse(data, dims=2) # Latitude is stored from 90°N → 90°S end ##### From fce1404b85b895246dfd213b178c29c4a7ba0669 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Mon, 9 Mar 2026 15:39:08 -0700 Subject: [PATCH 16/48] Rename original "ERA5*" metadata to "ERA5SingleLevels*" to disambiguate from ERA5PressureLevels* --- examples/ERA5_winds_and_stokes_drift.jl | 4 ++-- src/DataWrangling/DataWrangling.jl | 2 +- src/DataWrangling/ERA5/ERA5.jl | 17 ++++++++++------- test/test_cds_downloading.jl | 10 +++++----- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/examples/ERA5_winds_and_stokes_drift.jl b/examples/ERA5_winds_and_stokes_drift.jl index b45be444c..f58ebdb87 100644 --- a/examples/ERA5_winds_and_stokes_drift.jl +++ b/examples/ERA5_winds_and_stokes_drift.jl @@ -16,7 +16,7 @@ using NumericalEarth using NumericalEarth.DataWrangling: Metadatum -using NumericalEarth.DataWrangling.ERA5: ERA5Hourly +using NumericalEarth.DataWrangling.ERA5: ERA5SingleLevelsHourly using CDSAPI using Oceananigans @@ -29,7 +29,7 @@ using Dates # while ocean wave variables (Stokes drift) live on a 0.5° grid (720×361). # We define metadata for each variable at a single date. -dataset = ERA5Hourly() +dataset = ERA5SingleLevelsHourly() date = DateTime(2020, 1, 15, 12) # January 15, 2020 at 12:00 UTC u_stokes_meta = Metadatum(:eastward_stokes_drift; dataset, date) diff --git a/src/DataWrangling/DataWrangling.jl b/src/DataWrangling/DataWrangling.jl index bd1ceb4cd..142eb47b2 100644 --- a/src/DataWrangling/DataWrangling.jl +++ b/src/DataWrangling/DataWrangling.jl @@ -9,7 +9,7 @@ export WOAClimatology, WOAAnnual, WOAMonthly export metadata_time_step, metadata_epoch export LinearlyTaperedPolarMask export DatasetRestoring -export ERA5Hourly, ERA5Monthly, ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels +export ERA5SingleLevelsHourly, ERA5SingleLevelsMonthly, ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels using Oceananigans using Downloads diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 1fa7cb3c2..5c41e08e4 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -1,6 +1,9 @@ module ERA5 -export ERA5Hourly, ERA5Monthly +# 2-D data +export ERA5SingleLevelsHourly, ERA5SingleLevelsMonthly + +# 3-D data export ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, pressure_field using NCDatasets @@ -49,11 +52,11 @@ abstract type ERA5Dataset end default_download_directory(::ERA5Dataset) = download_ERA5_cache -struct ERA5Hourly <: ERA5Dataset end -struct ERA5Monthly <: ERA5Dataset end +struct ERA5SingleLevelsHourly <: ERA5Dataset end +struct ERA5SingleLevelsMonthly <: ERA5Dataset end -dataset_name(::ERA5Hourly) = "ERA5Hourly" -dataset_name(::ERA5Monthly) = "ERA5Monthly" +dataset_name(::ERA5SingleLevelsHourly) = "ERA5SingleLevelsHourly" +dataset_name(::ERA5SingleLevelsMonthly) = "ERA5SingleLevelsMonthly" ##### ##### ERA5 pressure-level datasets @@ -101,8 +104,8 @@ function Base.size(::ERA5Dataset, variable) end # ERA5 reanalysis data available from 1940 to present (we use a practical range here) -all_dates(::ERA5Hourly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) -all_dates(::ERA5Monthly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) +all_dates(::ERA5SingleLevelsHourly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) +all_dates(::ERA5SingleLevelsMonthly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) const ERA5Metadata{D} = Metadata{<:ERA5Dataset, D} const ERA5Metadatum = Metadatum{<:ERA5Dataset} diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index c51958657..63b725b7a 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -5,7 +5,7 @@ using Dates using NCDatasets using NumericalEarth.DataWrangling.ERA5 -using NumericalEarth.DataWrangling.ERA5: ERA5Hourly, ERA5Monthly, ERA5_dataset_variable_names +using NumericalEarth.DataWrangling.ERA5: ERA5SingleLevelsHourly, ERA5SingleLevelsMonthly, ERA5_dataset_variable_names using NumericalEarth.DataWrangling.ERA5: ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, ERA5PL_dataset_variable_names, pressure_field @@ -17,7 +17,7 @@ start_date = DateTime(2005, 2, 16, 12) @testset "ERA5 data downloading and utilities" begin @info "Testing ERA5 downloading and NetCDF file verification..." - dataset = ERA5Hourly() + dataset = ERA5SingleLevelsHourly() # Use a small bounding box to reduce download time bounding_box = NumericalEarth.DataWrangling.BoundingBox(longitude=(0, 5), latitude=(40, 45)) @@ -88,7 +88,7 @@ start_date = DateTime(2005, 2, 16, 12) # Test metadata properties @test metadatum.name == :temperature - @test metadatum.dataset isa ERA5Hourly + @test metadatum.dataset isa ERA5SingleLevelsHourly @test metadatum.dates == start_date @test metadatum.bounding_box == bounding_box @@ -127,8 +127,8 @@ start_date = DateTime(2005, 2, 16, 12) end @testset "ERA5 Monthly dataset" begin - monthly_dataset = ERA5Monthly() - @test monthly_dataset isa ERA5Monthly + monthly_dataset = ERA5SingleLevelsMonthly() + @test monthly_dataset isa ERA5SingleLevelsMonthly # Test that all_dates returns a valid range dates = NumericalEarth.DataWrangling.all_dates(monthly_dataset, :temperature) From 4643ffa34092286020a7c7a3b7ba6ead152c009b Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 12:34:49 -0700 Subject: [PATCH 17/48] Fix ERA5 grid creation --- examples/ERA5_winds_and_stokes_drift.jl | 3 ++- src/DataWrangling/ERA5/ERA5.jl | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/ERA5_winds_and_stokes_drift.jl b/examples/ERA5_winds_and_stokes_drift.jl index f58ebdb87..871359b1f 100644 --- a/examples/ERA5_winds_and_stokes_drift.jl +++ b/examples/ERA5_winds_and_stokes_drift.jl @@ -42,10 +42,11 @@ v_wind_meta = Metadatum(:northward_velocity; dataset, date) # We build a single `LatitudeLongitudeGrid` and use `set!` to download # and interpolate all four variables onto it. -grid = LatitudeLongitudeGrid(size = (1440, 721, 1), +grid = LatitudeLongitudeGrid(size = (1440, 720, 1), longitude = (0, 360), latitude = (-90, 90), z = (0, 1)) +@show grid u_stokes = CenterField(grid) v_stokes = CenterField(grid) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 5c41e08e4..1fadac59d 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -81,7 +81,7 @@ ERA5MonthlyPressureLevels(; levels=ERA5_all_pressure_levels) = ERA5MonthlyPressu dataset_name(::ERA5HourlyPressureLevels) = "ERA5HourlyPressureLevels" dataset_name(::ERA5MonthlyPressureLevels) = "ERA5MonthlyPressureLevels" -Base.size(ds::ERA5PressureDataset, variable) = (1440, 721, length(ds.levels)) +Base.size(ds::ERA5PressureDataset, variable) = (1440, 720, length(ds.levels)) all_dates(::ERA5HourlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) all_dates(::ERA5MonthlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) @@ -377,8 +377,8 @@ inpainted_metadata_path(metadata::ERA5Metadata) = joinpath(metadata.dir, inpaint location(::ERA5Metadata) = (Center, Center, Center) -# ERA5 global coverage: 0-360 longitude, -90 to 90 latitude at 0.25 degree resolution -longitude_interfaces(::ERA5Metadata) = (0, 360) +# ERA5 global coverage: 0-359.75 longitude, -90 to 90 latitude at 0.25 degree resolution +longitude_interfaces(::ERA5Metadata) = (-0.125, 359.875) latitude_interfaces(::ERA5Metadata) = (-90, 90) # ERA5 single-levels (2-D) data product From 9089c991c47777bb0a21adc890c0f7653f329b79 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 14:24:38 -0700 Subject: [PATCH 18/48] Fix bbox handling Clipped field will match downloaded bounding box, tested with ERA5 --- src/DataWrangling/metadata_field.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/DataWrangling/metadata_field.jl b/src/DataWrangling/metadata_field.jl index 061684dc9..758728452 100644 --- a/src/DataWrangling/metadata_field.jl +++ b/src/DataWrangling/metadata_field.jl @@ -8,11 +8,16 @@ restrict(::Nothing, interfaces, N) = interfaces, N # TODO support stretched native grids function restrict(bbox_interfaces, interfaces, N) - Δ = interfaces[2] - interfaces[1] - rΔ = bbox_interfaces[2] - bbox_interfaces[1] - ϵ = rΔ / Δ + LΔ = interfaces[2] - interfaces[1] + Δ = LΔ / N + grid_interfaces = (bbox_interfaces[1] - Δ/2, + bbox_interfaces[2] + Δ/2) + + rΔ = grid_interfaces[2] - grid_interfaces[1] + ϵ = rΔ / LΔ rN = ceil(Int, ϵ * N) # Round up to ensure bounding box is covered - return bbox_interfaces, rN + + return grid_interfaces, rN end """ @@ -188,9 +193,12 @@ function set_metadata_field!(field, data, metadatum) arch = architecture(grid) Nx, Ny, Nz = size(metadatum) + mangling = if size(data, 2) == Ny-1 + @info "Shifting field southward" ShiftSouth() elseif size(data, 2) == Ny+1 + @info "Averaging field in north-south dir" AverageNorthSouth() else nothing From cf2b8e0f7e2aa14ea9a9584afe63bd2ab6027cfd Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 15:36:51 -0700 Subject: [PATCH 19/48] Use standard units for pressure_levels --- ext/NumericalEarthCDSAPIExt.jl | 12 +++++++++--- src/DataWrangling/ERA5/ERA5.jl | 8 +++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index afbdcd8ac..bc8cddcdc 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -128,10 +128,12 @@ function download_dataset(meta::ERA5PressureMetadatum; skip_existing=true) day = lpad(string(Dates.day(date)), 2, '0') hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" + p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.levels] + request = Dict( "product_type" => ["reanalysis"], "variable" => [variable_name], - "pressure_level" => [string(p) for p in meta.dataset.levels], + "pressure_level" => [string(p) for p in p_hPa], "year" => [year], "month" => [month], "day" => [day], @@ -203,10 +205,12 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk day = lpad(string(Dates.day(date)), 2, '0') hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" + p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.levels] + request = Dict( "product_type" => ["reanalysis"], "variable" => cds_vars, - "pressure_level" => [string(p) for p in meta.dataset.levels], + "pressure_level" => [string(p) for p in p_hPa], "year" => [year], "month" => [month], "day" => [day], @@ -391,10 +395,12 @@ function _download_era5pl_day(names, dataset, day_dates; month = lpad(string(Dates.month(dt)), 2, '0') day = lpad(string(Dates.day(dt)), 2, '0') + p_hPa = [round(Int, p * 1e-2) for p in dataset.levels] + request = Dict( "product_type" => ["reanalysis"], "variable" => cds_vars, - "pressure_level" => [string(p) for p in dataset.levels], + "pressure_level" => [string(p) for p in p_hPa], "year" => [year], "month" => [month], "day" => [day], diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 1fadac59d..5fe770263 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -4,7 +4,8 @@ module ERA5 export ERA5SingleLevelsHourly, ERA5SingleLevelsMonthly # 3-D data -export ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, pressure_field +export ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, pressure_field, hPa +export standard_atmosphere_z_interfaces, mean_geopotential_z_interfaces using NCDatasets using Printf @@ -62,9 +63,10 @@ dataset_name(::ERA5SingleLevelsMonthly) = "ERA5SingleLevelsMonthly" ##### ERA5 pressure-level datasets ##### +const hPa = 100.0 const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 150, 175, 200, 225, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 775, 800, - 825, 850, 875, 900, 925, 950, 975, 1000] + 825, 850, 875, 900, 925, 950, 975, 1000]*hPa abstract type ERA5PressureDataset <: ERA5Dataset end @@ -397,7 +399,7 @@ const ERA5_gravitational_acceleration = 9.80665 function standard_atmosphere_geopotential_height(p) g = ERA5_gravitational_acceleration T⁰ = 288.15 # K - p⁰ = 1013.25 # hPa + p⁰ = 1013.25 * hPa Rᵈ = 287.0528 # J/(kg-K) return (Rᵈ * T⁰ / g) * log(p⁰ / p) From 4139a6e25e4aacf6ec3afa32c8e5e1da9ebdc7b2 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 16:01:28 -0700 Subject: [PATCH 20/48] No special treatment of lower z_interface, allow z < 0 --- src/DataWrangling/ERA5/ERA5.jl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 5fe770263..1a49bf952 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -419,16 +419,15 @@ function standard_atmosphere_z_interfaces(levels) interfaces = Vector{Float64}(undef, Nz + 1) if Nz == 1 - interfaces[1] = max(0, heights[1] - 500) - interfaces[2] = heights[1] + 500 - return interfaces - end - - interfaces[1] = max(0, heights[1] - (heights[2] - heights[1]) / 2) - for k in 2:Nz - interfaces[k] = (heights[k-1] + heights[k]) / 2 + interfaces[1] = heights[1] - 0.5 + interfaces[2] = heights[1] + 0.5 + else + interfaces[1] = heights[1] - (heights[2] - heights[1]) / 2 + for k in 2:Nz + interfaces[k] = (heights[k-1] + heights[k]) / 2 + end + interfaces[Nz+1] = heights[Nz] + (heights[Nz] - heights[Nz-1]) / 2 end - interfaces[Nz+1] = heights[Nz] + (heights[Nz] - heights[Nz-1]) / 2 return interfaces end From c45200a9153ddfea4d6a6e5fd88992165eb7ad17 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 18:31:53 -0700 Subject: [PATCH 21/48] Fix defaults --- src/DataWrangling/ERA5/ERA5.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 1a49bf952..f2e97d32d 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -219,11 +219,11 @@ available_variables(::ERA5PressureDataset) = ERA5PL_dataset_variable_names dataset_variable_name(md::ERA5PressureMetadata) = ERA5PL_dataset_variable_names[md.name] netcdf_variable_name(md::ERA5PressureMetadata) = ERA5PL_netcdf_variable_names[md.name] -conversion_units(md::ERA5PressureMetadatum) = +conversion_units(md::ERA5PressureMetadata) = md.name == :geopotential_height ? InverseGravity() : nothing -default_inpainting(md::ERA5Metadatum) = nothing -default_inpainting(md::ERA5PressureMetadatum) = nothing +default_inpainting(md::ERA5Metadata) = nothing +default_inpainting(md::ERA5PressureMetadata) = nothing """ retrieve_data(metadata::ERA5Metadatum) From 18cba4c3951c13ee4fb93244d13bbb4fd7bf9b38 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 18:50:25 -0700 Subject: [PATCH 22/48] Implement mean_geopotential_z_interfaces; make mutable to allow z update rename levels -> pressure_levels for clarity --- ext/NumericalEarthCDSAPIExt.jl | 10 +++--- src/DataWrangling/ERA5/ERA5.jl | 62 +++++++++++++++++++++++----------- test/test_cds_downloading.jl | 6 ++-- 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index bc8cddcdc..59e9bdbe9 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -128,7 +128,7 @@ function download_dataset(meta::ERA5PressureMetadatum; skip_existing=true) day = lpad(string(Dates.day(date)), 2, '0') hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" - p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.levels] + p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.pressure_levels] request = Dict( "product_type" => ["reanalysis"], @@ -205,7 +205,7 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk day = lpad(string(Dates.day(date)), 2, '0') hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" - p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.levels] + p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.pressure_levels] request = Dict( "product_type" => ["reanalysis"], @@ -248,7 +248,7 @@ Download one or more ERA5 pressure-level variables at a single datetime, without # Example ```julia -ds = ERA5HourlyPressureLevels(levels=[850, 500]) +ds = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) download_dataset([:temperature, :geopotential_height, :eastward_velocity], ds; date=DateTime(2020, 6, 15, 0), bounding_box=bbox) ``` @@ -338,7 +338,7 @@ request per unique calendar day (which may contain multiple hours). # Example ```julia -ds = ERA5HourlyPressureLevels(levels=[850, 500]) +ds = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) dates = DateTime(2020, 6, 15, 0) : Hour(6) : DateTime(2020, 6, 16, 18) download_dataset([:temperature, :geopotential_height], ds, collect(dates); bounding_box=bbox) ``` @@ -395,7 +395,7 @@ function _download_era5pl_day(names, dataset, day_dates; month = lpad(string(Dates.month(dt)), 2, '0') day = lpad(string(Dates.day(dt)), 2, '0') - p_hPa = [round(Int, p * 1e-2) for p in dataset.levels] + p_hPa = [round(Int, p * 1e-2) for p in dataset.pressure_levels] request = Dict( "product_type" => ["reanalysis"], diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index f2e97d32d..b02dad505 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -70,20 +70,26 @@ const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 1 abstract type ERA5PressureDataset <: ERA5Dataset end -struct ERA5HourlyPressureLevels <: ERA5PressureDataset - levels :: Vector{Int} +mutable struct ERA5HourlyPressureLevels <: ERA5PressureDataset + pressure_levels :: Vector{Float64} + z :: Union{Nothing, Vector{Float64}} + ERA5HourlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) end -ERA5HourlyPressureLevels(; levels=ERA5_all_pressure_levels) = ERA5HourlyPressureLevels(levels) +ERA5HourlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = + ERA5HourlyPressureLevels(pressure_levels, z) -struct ERA5MonthlyPressureLevels <: ERA5PressureDataset - levels :: Vector{Int} +mutable struct ERA5MonthlyPressureLevels <: ERA5PressureDataset + pressure_levels :: Vector{Float64} + z :: Union{Nothing, Vector{Float64}} + ERA5MonthlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) end -ERA5MonthlyPressureLevels(; levels=ERA5_all_pressure_levels) = ERA5MonthlyPressureLevels(levels) +ERA5MonthlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = + ERA5MonthlyPressureLevels(pressure_levels, z) dataset_name(::ERA5HourlyPressureLevels) = "ERA5HourlyPressureLevels" dataset_name(::ERA5MonthlyPressureLevels) = "ERA5MonthlyPressureLevels" -Base.size(ds::ERA5PressureDataset, variable) = (1440, 720, length(ds.levels)) +Base.size(ds::ERA5PressureDataset, variable) = (1440, 720, length(ds.pressure_levels)) all_dates(::ERA5HourlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) all_dates(::ERA5MonthlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) @@ -408,10 +414,9 @@ end # Build z-interfaces (Nz+1 values) from pressure levels. # Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). function standard_atmosphere_z_interfaces(levels) - @info """ - Calculating z-interfaces based on International Standard Atmosphere... - For greater accuracy, use `mean_geopotential_heights`! - """ + @info "Calculating z-interfaces based on the International Standard Atmosphere... \ + For greater accuracy, download the :geopotential field and \ + set `dataset.z = mean_geopotential_z_interfaces(metadata)`" sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom heights = standard_atmosphere_geopotential_height.(Float64.(sorted_levels)) Nz = length(heights) @@ -433,7 +438,10 @@ function standard_atmosphere_z_interfaces(levels) end # ERA5 pressure-levels (3-D) data product -z_interfaces(metadata::ERA5PressureMetadata) = standard_atmosphere_z_interfaces(metadata.dataset.levels) +function z_interfaces(metadata::ERA5PressureMetadata) + z = metadata.dataset.z + return isnothing(z) ? standard_atmosphere_z_interfaces(metadata.dataset.pressure_levels) : z +end ##### ##### pressure_field — synthetic pressure coordinate field @@ -449,7 +457,7 @@ highest pressure level). function pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) grid = native_grid(metadata, arch; halo) field = CenterField(grid) - reversed_levels = sort(metadata.dataset.levels, rev=true) # highest pressure → k=1 + reversed_levels = sort(metadata.dataset.pressure_levels, rev=true) # highest pressure → k=1 for (k, p) in enumerate(reversed_levels) interior(field)[:, :, k] .= Float32(p) end @@ -462,27 +470,43 @@ end ##### """ - mean_geopotential_heights(metadata::ERA5PressureMetadata; arch=CPU()) + mean_geopotential_heights(metadata::ERA5PressureMetadata; interfaces=false) Compute spatially and temporally averaged geopotential heights (m) for each -pressure level in `metadata`. This provides more accurate z-coordinates than -the standard-atmosphere fallback used by `z_interfaces`. +pressure level in `metadata`. Downloads the `:geopotential` field for every date in `metadata`, divides by g, averages over the horizontal domain and all dates, and returns one representative height per pressure level in bottom-to-top order (k=1 is highest pressure). """ -function mean_geopotential_heights(metadata::ERA5PressureMetadata; arch=CPU()) +function mean_geopotential_heights(metadata::ERA5PressureMetadata) geo_metadata = Metadata(:geopotential; dataset=metadata.dataset, dates=metadata.dates, bounding_box=metadata.bounding_box, dir=metadata.dir) - heights = zeros(length(metadata.dataset.levels)) + heights = zeros(length(metadata.dataset.pressure_levels)) + # average over time for geo_datum in geo_metadata data = retrieve_data(geo_datum) ./ Float32(ERA5_gravitational_acceleration) # Φ → Z (m) + # average over horizontal dims heights .+= dropdims(mean(data; dims=(1, 2)); dims=(1, 2)) end heights ./= length(geo_metadata) - return heights + + return sort(heights) +end + +function mean_geopotential_z_interfaces(metadata::ERA5PressureMetadata) + return stagger(mean_geopotential_heights(metadata)) +end + +function stagger(zc::AbstractVector) + # heights are ascending (k=1 = highest pressure = lowest altitude, + # consistent with retrieve_data's reverse() and Oceananigans bottom-to-top + # convention); bottom and top interfaces are extrapolated + zf = (zc[1:end-1] .+ zc[2:end]) / 2 # Nz-1 interior interfaces + pushfirst!(zf, zc[1] - (zf[1] - zc[1])) # bottom interface + push!(zf, zc[end] + (zc[end] - zf[end])) # top interface + return zf end end # module ERA5 diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 63b725b7a..304d17079 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -140,11 +140,11 @@ start_date = DateTime(2005, 2, 16, 12) # Default constructor uses all 37 standard levels ds_full = ERA5HourlyPressureLevels() @test ds_full isa ERA5HourlyPressureLevels - @test length(ds_full.levels) == 37 + @test length(ds_full.pressure_levels) == 37 @test Base.size(ds_full, :temperature) == (1440, 721, 37) # Subset constructor - ds_sub = ERA5HourlyPressureLevels(levels=[850, 500]) + ds_sub = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) @test Base.size(ds_sub, :temperature) == (1440, 721, 2) # Monthly variant @@ -240,7 +240,7 @@ start_date = DateTime(2005, 2, 16, 12) @testset "ERA5 pressure-level download and Field on CPU" begin arch = CPU() - ds_pl = ERA5HourlyPressureLevels(levels=[850, 500]) + ds_pl = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) @testset "Download and 3D Field" begin meta = Metadatum(:temperature; dataset=ds_pl, bounding_box, date=start_date) From ae0f0b17e8baeef2b7cd277e44bc2ed6fa5711cd Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 23:28:08 -0700 Subject: [PATCH 23/48] Organize ERA5 code into single-level and pressure-levels source files --- src/DataWrangling/ERA5/ERA5.jl | 410 +----------------- .../ERA5/ERA5_pressure_levels.jl | 257 +++++++++++ src/DataWrangling/ERA5/ERA5_single_levels.jl | 150 +++++++ 3 files changed, 420 insertions(+), 397 deletions(-) create mode 100644 src/DataWrangling/ERA5/ERA5_pressure_levels.jl create mode 100644 src/DataWrangling/ERA5/ERA5_single_levels.jl diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index b02dad505..f790a7ced 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -53,237 +53,27 @@ abstract type ERA5Dataset end default_download_directory(::ERA5Dataset) = download_ERA5_cache -struct ERA5SingleLevelsHourly <: ERA5Dataset end -struct ERA5SingleLevelsMonthly <: ERA5Dataset end - -dataset_name(::ERA5SingleLevelsHourly) = "ERA5SingleLevelsHourly" -dataset_name(::ERA5SingleLevelsMonthly) = "ERA5SingleLevelsMonthly" - -##### -##### ERA5 pressure-level datasets -##### - -const hPa = 100.0 -const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 150, - 175, 200, 225, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 775, 800, - 825, 850, 875, 900, 925, 950, 975, 1000]*hPa - -abstract type ERA5PressureDataset <: ERA5Dataset end - -mutable struct ERA5HourlyPressureLevels <: ERA5PressureDataset - pressure_levels :: Vector{Float64} - z :: Union{Nothing, Vector{Float64}} - ERA5HourlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) -end -ERA5HourlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = - ERA5HourlyPressureLevels(pressure_levels, z) - -mutable struct ERA5MonthlyPressureLevels <: ERA5PressureDataset - pressure_levels :: Vector{Float64} - z :: Union{Nothing, Vector{Float64}} - ERA5MonthlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) -end -ERA5MonthlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = - ERA5MonthlyPressureLevels(pressure_levels, z) - -dataset_name(::ERA5HourlyPressureLevels) = "ERA5HourlyPressureLevels" -dataset_name(::ERA5MonthlyPressureLevels) = "ERA5MonthlyPressureLevels" - -Base.size(ds::ERA5PressureDataset, variable) = (1440, 720, length(ds.pressure_levels)) - -all_dates(::ERA5HourlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) -all_dates(::ERA5MonthlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) - -const ERA5PressureMetadata{D} = Metadata{<:ERA5PressureDataset, D} -const ERA5PressureMetadatum = Metadatum{<:ERA5PressureDataset} - -# Wave variables are on a 0.5° grid (720×361), atmospheric variables on 0.25° (1440×721) -const ERA5_wave_variables = Set([ - :eastward_stokes_drift, :northward_stokes_drift, - :significant_wave_height, :mean_wave_period, :mean_wave_direction, -]) - -function Base.size(::ERA5Dataset, variable) - if variable in ERA5_wave_variables - return (720, 361, 1) - else - return (1440, 721, 1) - end -end - -# ERA5 reanalysis data available from 1940 to present (we use a practical range here) -all_dates(::ERA5SingleLevelsHourly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) -all_dates(::ERA5SingleLevelsMonthly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) - const ERA5Metadata{D} = Metadata{<:ERA5Dataset, D} const ERA5Metadatum = Metadatum{<:ERA5Dataset} -# ERA5 is a spatially 2D dataset (atmospheric surface variables) -is_three_dimensional(::ERA5Metadata) = false - -# ERA5 pressure-level data is 3D -is_three_dimensional(::ERA5PressureMetadata) = true - -# ERA5 stores pressure levels bottom-to-top -reversed_vertical_axis(::ERA5PressureDataset) = false - -# Variable name mappings from NumericalEarth names to ERA5/CDS API variable names -ERA5_dataset_variable_names = Dict( - :temperature => "2m_temperature", - :dewpoint_temperature => "2m_dewpoint_temperature", - :eastward_velocity => "10m_u_component_of_wind", - :northward_velocity => "10m_v_component_of_wind", - :surface_pressure => "surface_pressure", - :mean_sea_level_pressure => "mean_sea_level_pressure", - :total_precipitation => "total_precipitation", - :sea_surface_temperature => "sea_surface_temperature", - :downwelling_shortwave_radiation => "surface_solar_radiation_downwards", - :downwelling_longwave_radiation => "surface_thermal_radiation_downwards", - :total_cloud_cover => "total_cloud_cover", - :evaporation => "evaporation", - :specific_humidity => "specific_humidity", - :eastward_stokes_drift => "u_component_stokes_drift", - :northward_stokes_drift => "v_component_stokes_drift", - :significant_wave_height => "significant_height_of_combined_wind_waves_and_swell", - :mean_wave_period => "mean_wave_period", - :mean_wave_direction => "mean_wave_direction", -) - -# Variables available for download -ERA5_variable_names = keys(ERA5_dataset_variable_names) - -available_variables(::ERA5Dataset) = ERA5_dataset_variable_names - -dataset_variable_name(metadata::ERA5Metadata) = ERA5_dataset_variable_names[metadata.name] - -# NetCDF short variable names (what's actually in the downloaded files) -# These differ from the CDS API variable names above -ERA5_netcdf_variable_names = Dict( - :temperature => "t2m", - :dewpoint_temperature => "d2m", - :eastward_velocity => "u10", - :northward_velocity => "v10", - :surface_pressure => "sp", - :mean_sea_level_pressure => "msl", - :total_precipitation => "tp", - :sea_surface_temperature => "sst", - :downwelling_shortwave_radiation => "ssrd", - :downwelling_longwave_radiation => "strd", - :total_cloud_cover => "tcc", - :evaporation => "e", - :specific_humidity => "q", - :eastward_stokes_drift => "ust", - :northward_stokes_drift => "vst", - :significant_wave_height => "swh", - :mean_wave_period => "mwp", - :mean_wave_direction => "mwd", -) - -netcdf_variable_name(metadata::ERA5Metadata) = ERA5_netcdf_variable_names[metadata.name] - ##### -##### ERA5 pressure-level variable name mappings +##### Grid interfaces ##### -ERA5PL_dataset_variable_names = Dict( - :temperature => "temperature", - :eastward_velocity => "u_component_of_wind", - :northward_velocity => "v_component_of_wind", - :vertical_velocity => "vertical_velocity", - :geopotential => "geopotential", - :geopotential_height => "geopotential", - :specific_humidity => "specific_humidity", - :relative_humidity => "relative_humidity", - :vorticity => "vorticity", - :divergence => "divergence", - :potential_vorticity => "potential_vorticity", - :ozone_mass_mixing_ratio => "ozone_mass_mixing_ratio", - :fraction_of_cloud_cover => "fraction_of_cloud_cover", - :specific_cloud_liquid_water_content => "specific_cloud_liquid_water_content", - :specific_cloud_ice_water_content => "specific_cloud_ice_water_content", -) - -ERA5PL_netcdf_variable_names = Dict( - :temperature => "t", - :eastward_velocity => "u", - :northward_velocity => "v", - :vertical_velocity => "w", - :geopotential => "z", - :geopotential_height => "z", - :specific_humidity => "q", - :relative_humidity => "r", - :vorticity => "vo", - :divergence => "d", - :potential_vorticity => "pv", - :ozone_mass_mixing_ratio => "o3", - :fraction_of_cloud_cover => "cc", - :specific_cloud_liquid_water_content => "clwc", - :specific_cloud_ice_water_content => "ciwc", -) - -available_variables(::ERA5PressureDataset) = ERA5PL_dataset_variable_names -dataset_variable_name(md::ERA5PressureMetadata) = ERA5PL_dataset_variable_names[md.name] -netcdf_variable_name(md::ERA5PressureMetadata) = ERA5PL_netcdf_variable_names[md.name] - -conversion_units(md::ERA5PressureMetadata) = - md.name == :geopotential_height ? InverseGravity() : nothing - -default_inpainting(md::ERA5Metadata) = nothing -default_inpainting(md::ERA5PressureMetadata) = nothing - -""" - retrieve_data(metadata::ERA5Metadatum) - -Retrieve ERA5 data from NetCDF file according to `metadata`. -ERA5 is 2D surface data, so we return a 2D array with an added singleton z-dimension. -""" -function retrieve_data(metadata::ERA5Metadatum) - path = metadata_path(metadata) - name = netcdf_variable_name(metadata) - - ds = NCDatasets.Dataset(path) - - # ERA5 is 2D + time, we take the first time step - # Data shape is typically (lon, lat) or (lon, lat, time) - raw_data = ds[name] - ndim = ndims(raw_data) - - if ndim == 2 - data_2d = raw_data[:, :] - elseif ndim == 3 - data_2d = raw_data[:, :, 1] - else - error("Unexpected ERA5 data dimensions: $ndim") - end - - close(ds) +location(::ERA5Metadata) = (Center, Center, Center) - # Latitude is stored from 90°N → 90°S - data_2d = reverse(data_2d, dims=2) - - # Add singleton z-dimension for 3D field compatibility - # Return as (Nx, Ny, 1) - return reshape(data_2d, size(data_2d, 1), size(data_2d, 2), 1) -end +# ERA5 global coverage: 0-359.75 longitude, -90 to 90 latitude at 0.25 degree resolution +longitude_interfaces(::ERA5Metadata) = (-0.125, 359.875) +latitude_interfaces(::ERA5Metadata) = (-90, 90) -""" - retrieve_data(metadata::ERA5PressureMetadatum) +# ERA5 single-levels (2-D) data product +z_interfaces(::ERA5Metadata) = (0, 1) -Retrieve ERA5 pressure-level data from a NetCDF file. -Returns a 3D array (lon, lat, level) with levels ordered bottom-to-top -(highest pressure at k=1, lowest pressure at k=Nz). -""" -function retrieve_data(metadata::ERA5PressureMetadatum) - path = metadata_path(metadata) - name = netcdf_variable_name(metadata) - ds = NCDatasets.Dataset(path) - data = ds[name][:, :, :, 1] # (lon, lat, pressure_level, time=1) - close(ds) - return reverse(data, dims=2) # Latitude is stored from 90°N → 90°S -end +# ERA5 data is stored as Float32 +eltype(::ERA5Metadata) = Float32 ##### -##### Metadata filename construction +##### Shared filename utilities ##### function date_str(date::DateTime) @@ -312,56 +102,6 @@ function bbox_strs(c) return first, second end -function metadata_prefix(metadata::ERA5PressureMetadata) - var = ERA5PL_dataset_variable_names[metadata.name] - dataset = dataset_name(metadata.dataset) - start_date = start_date_str(metadata.dates) - end_date = end_date_str(metadata.dates) - bbox = metadata.bounding_box - - if !isnothing(bbox) - w, e = bbox_strs(bbox.longitude) - s, n = bbox_strs(bbox.latitude) - suffix = string(w, e, s, n) - else - suffix = "" - end - - if start_date == end_date - prefix = string(var, "_", dataset, "_", start_date, suffix) - else - prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) - end - prefix = colon2dash(prefix) - prefix = underscore_spaces(prefix) - return prefix -end - -function metadata_prefix(metadata::ERA5Metadata) - var = ERA5_dataset_variable_names[metadata.name] - dataset = dataset_name(metadata.dataset) - start_date = start_date_str(metadata.dates) - end_date = end_date_str(metadata.dates) - bbox = metadata.bounding_box - - if !isnothing(bbox) - w, e = bbox_strs(bbox.longitude) - s, n = bbox_strs(bbox.latitude) - suffix = string(w, e, s, n) - else - suffix = "" - end - - if start_date == end_date - prefix = string(var, "_", dataset, "_", start_date, suffix) - else - prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) - end - prefix = colon2dash(prefix) - prefix = underscore_spaces(prefix) - return prefix -end - function metadata_filename(metadata::ERA5Metadatum) prefix = metadata_prefix(metadata) return string(prefix, ".nc") @@ -380,134 +120,10 @@ end inpainted_metadata_path(metadata::ERA5Metadata) = joinpath(metadata.dir, inpainted_metadata_filename(metadata)) ##### -##### Grid interfaces +##### Single-level and pressure-level specifics ##### -location(::ERA5Metadata) = (Center, Center, Center) - -# ERA5 global coverage: 0-359.75 longitude, -90 to 90 latitude at 0.25 degree resolution -longitude_interfaces(::ERA5Metadata) = (-0.125, 359.875) -latitude_interfaces(::ERA5Metadata) = (-90, 90) - -# ERA5 single-levels (2-D) data product -z_interfaces(::ERA5Metadata) = (0, 1) - -# ERA5 data is stored as Float32 -eltype(::ERA5Metadata) = Float32 - -##### -##### Pressure-level vertical coordinate -##### - -const ERA5_gravitational_acceleration = 9.80665 - -# International Standard Atmosphere height (m) for a given pressure (hPa) -function standard_atmosphere_geopotential_height(p) - g = ERA5_gravitational_acceleration - T⁰ = 288.15 # K - p⁰ = 1013.25 * hPa - Rᵈ = 287.0528 # J/(kg-K) - - return (Rᵈ * T⁰ / g) * log(p⁰ / p) -end - -# Build z-interfaces (Nz+1 values) from pressure levels. -# Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). -function standard_atmosphere_z_interfaces(levels) - @info "Calculating z-interfaces based on the International Standard Atmosphere... \ - For greater accuracy, download the :geopotential field and \ - set `dataset.z = mean_geopotential_z_interfaces(metadata)`" - sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom - heights = standard_atmosphere_geopotential_height.(Float64.(sorted_levels)) - Nz = length(heights) - - interfaces = Vector{Float64}(undef, Nz + 1) - - if Nz == 1 - interfaces[1] = heights[1] - 0.5 - interfaces[2] = heights[1] + 0.5 - else - interfaces[1] = heights[1] - (heights[2] - heights[1]) / 2 - for k in 2:Nz - interfaces[k] = (heights[k-1] + heights[k]) / 2 - end - interfaces[Nz+1] = heights[Nz] + (heights[Nz] - heights[Nz-1]) / 2 - end - - return interfaces -end - -# ERA5 pressure-levels (3-D) data product -function z_interfaces(metadata::ERA5PressureMetadata) - z = metadata.dataset.z - return isnothing(z) ? standard_atmosphere_z_interfaces(metadata.dataset.pressure_levels) : z -end - -##### -##### pressure_field — synthetic pressure coordinate field -##### - -""" - pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) - -Return a `CenterField` on the native grid of `metadata` filled with the pressure -value (hPa) at each vertical level. Levels are ordered bottom-to-top (k=1 is the -highest pressure level). -""" -function pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) - grid = native_grid(metadata, arch; halo) - field = CenterField(grid) - reversed_levels = sort(metadata.dataset.pressure_levels, rev=true) # highest pressure → k=1 - for (k, p) in enumerate(reversed_levels) - interior(field)[:, :, k] .= Float32(p) - end - fill_halo_regions!(field) - return field -end - -##### -##### mean_geopotential_heights — data-derived static z-coordinate -##### - -""" - mean_geopotential_heights(metadata::ERA5PressureMetadata; interfaces=false) - -Compute spatially and temporally averaged geopotential heights (m) for each -pressure level in `metadata`. - -Downloads the `:geopotential` field for every date in `metadata`, divides by g, -averages over the horizontal domain and all dates, and returns one representative -height per pressure level in bottom-to-top order (k=1 is highest pressure). -""" -function mean_geopotential_heights(metadata::ERA5PressureMetadata) - geo_metadata = Metadata(:geopotential; dataset=metadata.dataset, - dates=metadata.dates, bounding_box=metadata.bounding_box, - dir=metadata.dir) - heights = zeros(length(metadata.dataset.pressure_levels)) - # average over time - for geo_datum in geo_metadata - data = retrieve_data(geo_datum) ./ Float32(ERA5_gravitational_acceleration) # Φ → Z (m) - # average over horizontal dims - heights .+= dropdims(mean(data; dims=(1, 2)); dims=(1, 2)) - end - heights ./= length(geo_metadata) - - return sort(heights) -end - -function mean_geopotential_z_interfaces(metadata::ERA5PressureMetadata) - return stagger(mean_geopotential_heights(metadata)) -end - -function stagger(zc::AbstractVector) - # heights are ascending (k=1 = highest pressure = lowest altitude, - # consistent with retrieve_data's reverse() and Oceananigans bottom-to-top - # convention); bottom and top interfaces are extrapolated - zf = (zc[1:end-1] .+ zc[2:end]) / 2 # Nz-1 interior interfaces - pushfirst!(zf, zc[1] - (zf[1] - zc[1])) # bottom interface - push!(zf, zc[end] + (zc[end] - zf[end])) # top interface - return zf -end +include("ERA5_single_levels.jl") +include("ERA5_pressure_levels.jl") end # module ERA5 - diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl new file mode 100644 index 000000000..c9feb2dae --- /dev/null +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -0,0 +1,257 @@ +abstract type ERA5PressureDataset <: ERA5Dataset end + +# ERA5PressureMetadata is a subtype of ERA5Metadata +const ERA5PressureMetadata{D} = Metadata{<:ERA5PressureDataset, D} +const ERA5PressureMetadatum = Metadatum{<:ERA5PressureDataset} + +mutable struct ERA5HourlyPressureLevels <: ERA5PressureDataset + pressure_levels :: Vector{Float64} + z :: Union{Nothing, Vector{Float64}} + ERA5HourlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) +end +ERA5HourlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = + ERA5HourlyPressureLevels(pressure_levels, z) + +mutable struct ERA5MonthlyPressureLevels <: ERA5PressureDataset + pressure_levels :: Vector{Float64} + z :: Union{Nothing, Vector{Float64}} + ERA5MonthlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) +end +ERA5MonthlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = + ERA5MonthlyPressureLevels(pressure_levels, z) + +dataset_name(::ERA5HourlyPressureLevels) = "ERA5HourlyPressureLevels" +dataset_name(::ERA5MonthlyPressureLevels) = "ERA5MonthlyPressureLevels" + +##### +##### ERA5 pressure-level data availability +##### + +# ERA5 reanalysis data available from 1940 to present (we use a practical range here) +all_dates(::ERA5HourlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) +all_dates(::ERA5MonthlyPressureLevels, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) + +# ERA5 pressure-level data is a spatially 3-D dataset +is_three_dimensional(::ERA5PressureMetadata) = true + +const hPa = 100.0 +const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 150, + 175, 200, 225, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 775, 800, + 825, 850, 875, 900, 925, 950, 975, 1000]*hPa + +# ERA5 stores pressure levels bottom-to-top +reversed_vertical_axis(::ERA5PressureDataset) = false + +Base.size(ds::ERA5PressureDataset, variable) = (1440, 720, length(ds.pressure_levels)) + +##### +##### ERA5 pressure-level variable name mappings +##### + +ERA5PL_dataset_variable_names = Dict( + :temperature => "temperature", + :eastward_velocity => "u_component_of_wind", + :northward_velocity => "v_component_of_wind", + :vertical_velocity => "vertical_velocity", + :geopotential => "geopotential", + :geopotential_height => "geopotential", + :specific_humidity => "specific_humidity", + :relative_humidity => "relative_humidity", + :vorticity => "vorticity", + :divergence => "divergence", + :potential_vorticity => "potential_vorticity", + :ozone_mass_mixing_ratio => "ozone_mass_mixing_ratio", + :fraction_of_cloud_cover => "fraction_of_cloud_cover", + :specific_cloud_liquid_water_content => "specific_cloud_liquid_water_content", + :specific_cloud_ice_water_content => "specific_cloud_ice_water_content", +) + +# NetCDF short variable names (what's actually in the downloaded files) +# These differ from the CDS API variable names above +ERA5PL_netcdf_variable_names = Dict( + :temperature => "t", + :eastward_velocity => "u", + :northward_velocity => "v", + :vertical_velocity => "w", + :geopotential => "z", + :geopotential_height => "z", + :specific_humidity => "q", + :relative_humidity => "r", + :vorticity => "vo", + :divergence => "d", + :potential_vorticity => "pv", + :ozone_mass_mixing_ratio => "o3", + :fraction_of_cloud_cover => "cc", + :specific_cloud_liquid_water_content => "clwc", + :specific_cloud_ice_water_content => "ciwc", +) + +# Variables available for download +available_variables(::ERA5PressureDataset) = ERA5PL_dataset_variable_names +dataset_variable_name(md::ERA5PressureMetadata) = ERA5PL_dataset_variable_names[md.name] +netcdf_variable_name(md::ERA5PressureMetadata) = ERA5PL_netcdf_variable_names[md.name] + +conversion_units(md::ERA5PressureMetadata) = + md.name == :geopotential_height ? InverseGravity() : nothing + +default_inpainting(md::ERA5PressureMetadata) = nothing + +""" + retrieve_data(metadata::ERA5PressureMetadatum) + +Retrieve ERA5 pressure-level data from a NetCDF file. +Returns a 3D array (lon, lat, level) with levels ordered bottom-to-top +(highest pressure at k=1, lowest pressure at k=Nz). +""" +function retrieve_data(metadata::ERA5PressureMetadatum) + path = metadata_path(metadata) + name = netcdf_variable_name(metadata) + ds = NCDatasets.Dataset(path) + data = ds[name][:, :, :, 1] # (lon, lat, pressure_level, time=1) + close(ds) + return reverse(data, dims=2) # Latitude is stored from 90°N → 90°S +end + +##### +##### Metadata filename construction +##### + +function metadata_prefix(md::ERA5PressureMetadata) + var = ERA5PL_dataset_variable_names[md.name] + dataset = dataset_name(md.dataset) + start_date = start_date_str(md.dates) + end_date = end_date_str(md.dates) + bbox = md.bounding_box + + if !isnothing(bbox) + w, e = bbox_strs(bbox.longitude) + s, n = bbox_strs(bbox.latitude) + suffix = string(w, e, s, n) + else + suffix = "" + end + + if start_date == end_date + prefix = string(var, "_", dataset, "_", start_date, suffix) + else + prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) + end + prefix = colon2dash(prefix) + prefix = underscore_spaces(prefix) + return prefix +end + +##### +##### Pressure-level vertical coordinate +##### + +const ERA5_gravitational_acceleration = 9.80665 + +# International Standard Atmosphere height (m) for a given pressure (hPa) +function standard_atmosphere_geopotential_height(p) + g = ERA5_gravitational_acceleration + T⁰ = 288.15 # K + p⁰ = 1013.25 * hPa + Rᵈ = 287.0528 # J/(kg-K) + + return (Rᵈ * T⁰ / g) * log(p⁰ / p) +end + +# Build z-interfaces (Nz+1 values) from pressure levels. +# Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). +function standard_atmosphere_z_interfaces(levels) + @info "Calculating z-interfaces based on the International Standard Atmosphere... \ + For greater accuracy, download the :geopotential field and \ + set `dataset.z = mean_geopotential_z_interfaces(ϕ_metadata)`" + sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom + heights = standard_atmosphere_geopotential_height.(Float64.(sorted_levels)) + Nz = length(heights) + + interfaces = Vector{Float64}(undef, Nz + 1) + + if Nz == 1 + interfaces[1] = heights[1] - 0.5 + interfaces[2] = heights[1] + 0.5 + else + interfaces[1] = heights[1] - (heights[2] - heights[1]) / 2 + for k in 2:Nz + interfaces[k] = (heights[k-1] + heights[k]) / 2 + end + interfaces[Nz+1] = heights[Nz] + (heights[Nz] - heights[Nz-1]) / 2 + end + + return interfaces +end + +# ERA5 pressure-levels (3-D) data product +function z_interfaces(metadata::ERA5PressureMetadata) + z = metadata.dataset.z + return isnothing(z) ? standard_atmosphere_z_interfaces(metadata.dataset.pressure_levels) : z +end + +##### +##### mean_geopotential_heights — data-derived static z-coordinate +##### + +""" + mean_geopotential_heights(metadata::ERA5PressureMetadata) + +Compute spatially and temporally averaged geopotential heights (m) for each +pressure level in `metadata`. + +Downloads the `:geopotential` field for every date in `metadata`, divides by g, +averages over the horizontal domain and all dates, and returns one representative +height per pressure level in bottom-to-top order (k=1 is highest pressure). +""" +function mean_geopotential_heights(metadata::ERA5PressureMetadata) + geo_metadata = Metadata(:geopotential; dataset=metadata.dataset, + dates=metadata.dates, bounding_box=metadata.bounding_box, + dir=metadata.dir) + heights = zeros(length(metadata.dataset.pressure_levels)) + # average over time + for geo_datum in geo_metadata + data = retrieve_data(geo_datum) ./ Float32(ERA5_gravitational_acceleration) # Φ → Z (m) + # average over horizontal dims + heights .+= dropdims(mean(data; dims=(1, 2)); dims=(1, 2)) + end + heights ./= length(geo_metadata) + + return sort(heights) +end + +function mean_geopotential_z_interfaces(metadata::ERA5PressureMetadata) + return stagger(mean_geopotential_heights(metadata)) +end + +function stagger(zc::AbstractVector) + # heights are ascending (k=1 = highest pressure = lowest altitude, + # consistent with retrieve_data's reverse() and Oceananigans bottom-to-top + # convention); bottom and top interfaces are extrapolated + zf = (zc[1:end-1] .+ zc[2:end]) / 2 # Nz-1 interior interfaces + pushfirst!(zf, zc[1] - (zf[1] - zc[1])) # bottom interface + push!(zf, zc[end] + (zc[end] - zf[end])) # top interface + return zf +end + +##### +##### pressure_field — synthetic pressure coordinate field +##### + +""" + pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) + +Return a `CenterField` on the native grid of `metadata` filled with the pressure +value (hPa) at each vertical level. Levels are ordered bottom-to-top (k=1 is the +highest pressure level). +""" +function pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) + grid = native_grid(metadata, arch; halo) + field = CenterField(grid) + reversed_levels = sort(metadata.dataset.pressure_levels, rev=true) # highest pressure → k=1 + for (k, p) in enumerate(reversed_levels) + interior(field)[:, :, k] .= Float32(p) + end + fill_halo_regions!(field) + return field +end + diff --git a/src/DataWrangling/ERA5/ERA5_single_levels.jl b/src/DataWrangling/ERA5/ERA5_single_levels.jl new file mode 100644 index 000000000..5ebbd46ed --- /dev/null +++ b/src/DataWrangling/ERA5/ERA5_single_levels.jl @@ -0,0 +1,150 @@ +struct ERA5SingleLevelsHourly <: ERA5Dataset end +struct ERA5SingleLevelsMonthly <: ERA5Dataset end + +dataset_name(::ERA5SingleLevelsHourly) = "ERA5SingleLevelsHourly" +dataset_name(::ERA5SingleLevelsMonthly) = "ERA5SingleLevelsMonthly" + +# Wave variables are on a 0.5° grid (720×361), atmospheric variables on 0.25° (1440×721) +const ERA5_wave_variables = Set([ + :eastward_stokes_drift, :northward_stokes_drift, + :significant_wave_height, :mean_wave_period, :mean_wave_direction, +]) + +##### +##### ERA5 single-level data availability +##### + +# ERA5 reanalysis data available from 1940 to present (we use a practical range here) +all_dates(::ERA5SingleLevelsHourly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) +all_dates(::ERA5SingleLevelsMonthly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) + +# ERA5 single-level data is a spatially 2-D dataset +is_three_dimensional(::ERA5Metadata) = false + +function Base.size(::ERA5Dataset, variable) + if variable in ERA5_wave_variables + return (720, 361, 1) + else + return (1440, 721, 1) + end +end + +##### +##### ERA5 single-level variable name mappings +##### + +# Variable name mappings from NumericalEarth names to ERA5/CDS API variable names +ERA5_dataset_variable_names = Dict( + :temperature => "2m_temperature", + :dewpoint_temperature => "2m_dewpoint_temperature", + :eastward_velocity => "10m_u_component_of_wind", + :northward_velocity => "10m_v_component_of_wind", + :surface_pressure => "surface_pressure", + :mean_sea_level_pressure => "mean_sea_level_pressure", + :total_precipitation => "total_precipitation", + :sea_surface_temperature => "sea_surface_temperature", + :downwelling_shortwave_radiation => "surface_solar_radiation_downwards", + :downwelling_longwave_radiation => "surface_thermal_radiation_downwards", + :total_cloud_cover => "total_cloud_cover", + :evaporation => "evaporation", + :specific_humidity => "specific_humidity", + :eastward_stokes_drift => "u_component_stokes_drift", + :northward_stokes_drift => "v_component_stokes_drift", + :significant_wave_height => "significant_height_of_combined_wind_waves_and_swell", + :mean_wave_period => "mean_wave_period", + :mean_wave_direction => "mean_wave_direction", +) + +# NetCDF short variable names (what's actually in the downloaded files) +# These differ from the CDS API variable names above +ERA5_netcdf_variable_names = Dict( + :temperature => "t2m", + :dewpoint_temperature => "d2m", + :eastward_velocity => "u10", + :northward_velocity => "v10", + :surface_pressure => "sp", + :mean_sea_level_pressure => "msl", + :total_precipitation => "tp", + :sea_surface_temperature => "sst", + :downwelling_shortwave_radiation => "ssrd", + :downwelling_longwave_radiation => "strd", + :total_cloud_cover => "tcc", + :evaporation => "e", + :specific_humidity => "q", + :eastward_stokes_drift => "ust", + :northward_stokes_drift => "vst", + :significant_wave_height => "swh", + :mean_wave_period => "mwp", + :mean_wave_direction => "mwd", +) + +# Variables available for download +available_variables(::ERA5Dataset) = ERA5_dataset_variable_names +dataset_variable_name(md::ERA5Metadata) = ERA5_dataset_variable_names[md.name] +netcdf_variable_name(md::ERA5Metadata) = ERA5_netcdf_variable_names[md.name] + +default_inpainting(md::ERA5Metadata) = nothing + +""" + retrieve_data(metadata::ERA5Metadatum) + +Retrieve ERA5 data from NetCDF file according to `metadata`. +ERA5 is 2D surface data, so we return a 2D array with an added singleton z-dimension. +""" +function retrieve_data(metadata::ERA5Metadatum) + path = metadata_path(metadata) + name = netcdf_variable_name(metadata) + + ds = NCDatasets.Dataset(path) + + # ERA5 is 2D + time, we take the first time step + # Data shape is typically (lon, lat) or (lon, lat, time) + raw_data = ds[name] + ndim = ndims(raw_data) + + if ndim == 2 + data_2d = raw_data[:, :] + elseif ndim == 3 + data_2d = raw_data[:, :, 1] + else + error("Unexpected ERA5 data dimensions: $ndim") + end + + close(ds) + + # Latitude is stored from 90°N → 90°S + data_2d = reverse(data_2d, dims=2) + + # Add singleton z-dimension for 3D field compatibility + # Return as (Nx, Ny, 1) + return reshape(data_2d, size(data_2d, 1), size(data_2d, 2), 1) +end + +##### +##### Metadata filename construction +##### + +function metadata_prefix(md::ERA5Metadata) + var = ERA5_dataset_variable_names[md.name] + dataset = dataset_name(md.dataset) + start_date = start_date_str(md.dates) + end_date = end_date_str(md.dates) + bbox = md.bounding_box + + if !isnothing(bbox) + w, e = bbox_strs(bbox.longitude) + s, n = bbox_strs(bbox.latitude) + suffix = string(w, e, s, n) + else + suffix = "" + end + + if start_date == end_date + prefix = string(var, "_", dataset, "_", start_date, suffix) + else + prefix = string(var, "_", dataset, "_", start_date, "_", end_date, suffix) + end + prefix = colon2dash(prefix) + prefix = underscore_spaces(prefix) + return prefix +end From c5c2e2f4b12b856221a0c44ad08218b71091473c Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 10 Mar 2026 23:33:57 -0700 Subject: [PATCH 24/48] Use consistent ERA5 naming --- examples/ERA5_winds_and_stokes_drift.jl | 4 ++-- src/DataWrangling/DataWrangling.jl | 2 +- src/DataWrangling/ERA5/ERA5.jl | 2 +- src/DataWrangling/ERA5/ERA5_single_levels.jl | 12 ++++++------ test/test_cds_downloading.jl | 10 +++++----- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/ERA5_winds_and_stokes_drift.jl b/examples/ERA5_winds_and_stokes_drift.jl index 871359b1f..d0fe9d018 100644 --- a/examples/ERA5_winds_and_stokes_drift.jl +++ b/examples/ERA5_winds_and_stokes_drift.jl @@ -16,7 +16,7 @@ using NumericalEarth using NumericalEarth.DataWrangling: Metadatum -using NumericalEarth.DataWrangling.ERA5: ERA5SingleLevelsHourly +using NumericalEarth.DataWrangling.ERA5: ERA5HourlySingleLevel using CDSAPI using Oceananigans @@ -29,7 +29,7 @@ using Dates # while ocean wave variables (Stokes drift) live on a 0.5° grid (720×361). # We define metadata for each variable at a single date. -dataset = ERA5SingleLevelsHourly() +dataset = ERA5HourlySingleLevel() date = DateTime(2020, 1, 15, 12) # January 15, 2020 at 12:00 UTC u_stokes_meta = Metadatum(:eastward_stokes_drift; dataset, date) diff --git a/src/DataWrangling/DataWrangling.jl b/src/DataWrangling/DataWrangling.jl index 142eb47b2..cb03f6040 100644 --- a/src/DataWrangling/DataWrangling.jl +++ b/src/DataWrangling/DataWrangling.jl @@ -9,7 +9,7 @@ export WOAClimatology, WOAAnnual, WOAMonthly export metadata_time_step, metadata_epoch export LinearlyTaperedPolarMask export DatasetRestoring -export ERA5SingleLevelsHourly, ERA5SingleLevelsMonthly, ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels +export ERA5HourlySingleLevel, ERA5MonthlySingleLevel, ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels using Oceananigans using Downloads diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index f790a7ced..85b131fc6 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -1,7 +1,7 @@ module ERA5 # 2-D data -export ERA5SingleLevelsHourly, ERA5SingleLevelsMonthly +export ERA5HourlySingleLevel, ERA5MonthlySingleLevel # 3-D data export ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, pressure_field, hPa diff --git a/src/DataWrangling/ERA5/ERA5_single_levels.jl b/src/DataWrangling/ERA5/ERA5_single_levels.jl index 5ebbd46ed..67f9f785b 100644 --- a/src/DataWrangling/ERA5/ERA5_single_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_single_levels.jl @@ -1,8 +1,8 @@ -struct ERA5SingleLevelsHourly <: ERA5Dataset end -struct ERA5SingleLevelsMonthly <: ERA5Dataset end +struct ERA5HourlySingleLevel <: ERA5Dataset end +struct ERA5MonthlySingleLevel <: ERA5Dataset end -dataset_name(::ERA5SingleLevelsHourly) = "ERA5SingleLevelsHourly" -dataset_name(::ERA5SingleLevelsMonthly) = "ERA5SingleLevelsMonthly" +dataset_name(::ERA5HourlySingleLevel) = "ERA5HourlySingleLevel" +dataset_name(::ERA5MonthlySingleLevel) = "ERA5MonthlySingleLevel" # Wave variables are on a 0.5° grid (720×361), atmospheric variables on 0.25° (1440×721) const ERA5_wave_variables = Set([ @@ -15,8 +15,8 @@ const ERA5_wave_variables = Set([ ##### # ERA5 reanalysis data available from 1940 to present (we use a practical range here) -all_dates(::ERA5SingleLevelsHourly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) -all_dates(::ERA5SingleLevelsMonthly, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) +all_dates(::ERA5HourlySingleLevel, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-31"), step=Hour(1)) +all_dates(::ERA5MonthlySingleLevel, var) = range(DateTime("1940-01-01"), stop=DateTime("2024-12-01"), step=Month(1)) # ERA5 single-level data is a spatially 2-D dataset is_three_dimensional(::ERA5Metadata) = false diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 304d17079..5ffad9671 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -5,7 +5,7 @@ using Dates using NCDatasets using NumericalEarth.DataWrangling.ERA5 -using NumericalEarth.DataWrangling.ERA5: ERA5SingleLevelsHourly, ERA5SingleLevelsMonthly, ERA5_dataset_variable_names +using NumericalEarth.DataWrangling.ERA5: ERA5HourlySingleLevel, ERA5MonthlySingleLevel, ERA5_dataset_variable_names using NumericalEarth.DataWrangling.ERA5: ERA5HourlyPressureLevels, ERA5MonthlyPressureLevels, ERA5_all_pressure_levels, ERA5PL_dataset_variable_names, pressure_field @@ -17,7 +17,7 @@ start_date = DateTime(2005, 2, 16, 12) @testset "ERA5 data downloading and utilities" begin @info "Testing ERA5 downloading and NetCDF file verification..." - dataset = ERA5SingleLevelsHourly() + dataset = ERA5HourlySingleLevel() # Use a small bounding box to reduce download time bounding_box = NumericalEarth.DataWrangling.BoundingBox(longitude=(0, 5), latitude=(40, 45)) @@ -88,7 +88,7 @@ start_date = DateTime(2005, 2, 16, 12) # Test metadata properties @test metadatum.name == :temperature - @test metadatum.dataset isa ERA5SingleLevelsHourly + @test metadatum.dataset isa ERA5HourlySingleLevel @test metadatum.dates == start_date @test metadatum.bounding_box == bounding_box @@ -127,8 +127,8 @@ start_date = DateTime(2005, 2, 16, 12) end @testset "ERA5 Monthly dataset" begin - monthly_dataset = ERA5SingleLevelsMonthly() - @test monthly_dataset isa ERA5SingleLevelsMonthly + monthly_dataset = ERA5MonthlySingleLevel() + @test monthly_dataset isa ERA5MonthlySingleLevel # Test that all_dates returns a valid range dates = NumericalEarth.DataWrangling.all_dates(monthly_dataset, :temperature) From 77d33f35ec3500d2af6466fbe182e85f2da9819e Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 11 Mar 2026 13:11:23 -0700 Subject: [PATCH 25/48] Automatic geopotential download to calculate mean geopotential heights --- src/DataWrangling/ERA5/ERA5.jl | 2 +- .../ERA5/ERA5_pressure_levels.jl | 67 ++++++++++++------- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 85b131fc6..3bb770e00 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -14,7 +14,7 @@ using Statistics using Oceananigans.Fields: Center using Oceananigans: CenterField, interior, fill_halo_regions!, CPU -using NumericalEarth.DataWrangling: Metadata, Metadatum, metadata_path, native_grid, InverseGravity +using NumericalEarth.DataWrangling: Metadata, Metadatum, metadata_path, native_grid, InverseGravity, download_dataset using Dates using Dates: DateTime, Day, Month, Hour diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index c9feb2dae..c5d360761 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -4,21 +4,25 @@ abstract type ERA5PressureDataset <: ERA5Dataset end const ERA5PressureMetadata{D} = Metadata{<:ERA5PressureDataset, D} const ERA5PressureMetadatum = Metadatum{<:ERA5PressureDataset} -mutable struct ERA5HourlyPressureLevels <: ERA5PressureDataset - pressure_levels :: Vector{Float64} - z :: Union{Nothing, Vector{Float64}} - ERA5HourlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) +struct ERA5HourlyPressureLevels <: ERA5PressureDataset + pressure_levels :: Vector{Float64} + z :: Union{Nothing, Vector{Float64}} + mean_geopotential_height :: Bool + ERA5HourlyPressureLevels(pressure_levels, z=nothing; mean_geopotential_height=true) = + new(sort(pressure_levels, rev=true), z, mean_geopotential_height) end -ERA5HourlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = - ERA5HourlyPressureLevels(pressure_levels, z) - -mutable struct ERA5MonthlyPressureLevels <: ERA5PressureDataset - pressure_levels :: Vector{Float64} - z :: Union{Nothing, Vector{Float64}} - ERA5MonthlyPressureLevels(pressure_levels, z=nothing) = new(sort(pressure_levels, rev=true), z) +ERA5HourlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing, mean_geopotential_height=true) = + ERA5HourlyPressureLevels(pressure_levels, z; mean_geopotential_height) + +struct ERA5MonthlyPressureLevels <: ERA5PressureDataset + pressure_levels :: Vector{Float64} + z :: Union{Nothing, Vector{Float64}} + mean_geopotential_height :: Bool + ERA5MonthlyPressureLevels(pressure_levels, z=nothing; mean_geopotential_height=true) = + new(sort(pressure_levels, rev=true), z, mean_geopotential_height) end -ERA5MonthlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing) = - ERA5MonthlyPressureLevels(pressure_levels, z) +ERA5MonthlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing, mean_geopotential_height=true) = + ERA5MonthlyPressureLevels(pressure_levels, z; mean_geopotential_height) dataset_name(::ERA5HourlyPressureLevels) = "ERA5HourlyPressureLevels" dataset_name(::ERA5MonthlyPressureLevels) = "ERA5MonthlyPressureLevels" @@ -161,8 +165,7 @@ end # Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). function standard_atmosphere_z_interfaces(levels) @info "Calculating z-interfaces based on the International Standard Atmosphere... \ - For greater accuracy, download the :geopotential field and \ - set `dataset.z = mean_geopotential_z_interfaces(ϕ_metadata)`" + For greater accuracy, set ERA5PressureDataset(; mean_geopotential_height=true)" sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom heights = standard_atmosphere_geopotential_height.(Float64.(sorted_levels)) Nz = length(heights) @@ -185,8 +188,26 @@ end # ERA5 pressure-levels (3-D) data product function z_interfaces(metadata::ERA5PressureMetadata) - z = metadata.dataset.z - return isnothing(z) ? standard_atmosphere_z_interfaces(metadata.dataset.pressure_levels) : z + # Return cached z if already set + !isnothing(metadata.dataset.z) && return metadata.dataset.z + + # If mean_geopotential_height is enabled, try to download and compute + if metadata.dataset.mean_geopotential_height + ϕ_metadata = Metadata(:geopotential; dataset=metadata.dataset, + dates=metadata.dates, bounding_box=metadata.bounding_box, + dir=metadata.dir) + try + download_dataset(ϕ_metadata) + zf = mean_geopotential_z_interfaces(metadata) + metadata.dataset.z = zf + return zf + catch e + @warn "Failed to download geopotential data; falling back to standard atmosphere" exception=e + end + end + + # Fallback + return standard_atmosphere_z_interfaces(metadata.dataset.pressure_levels) end ##### @@ -204,17 +225,17 @@ averages over the horizontal domain and all dates, and returns one representativ height per pressure level in bottom-to-top order (k=1 is highest pressure). """ function mean_geopotential_heights(metadata::ERA5PressureMetadata) - geo_metadata = Metadata(:geopotential; dataset=metadata.dataset, - dates=metadata.dates, bounding_box=metadata.bounding_box, - dir=metadata.dir) + ϕ_metadata = Metadata(:geopotential; dataset=metadata.dataset, + dates=metadata.dates, bounding_box=metadata.bounding_box, + dir=metadata.dir) heights = zeros(length(metadata.dataset.pressure_levels)) # average over time - for geo_datum in geo_metadata - data = retrieve_data(geo_datum) ./ Float32(ERA5_gravitational_acceleration) # Φ → Z (m) + for ϕ_datum in ϕ_metadata + data = retrieve_data(ϕ_datum) ./ Float32(ERA5_gravitational_acceleration) # Φ → Z (m) # average over horizontal dims heights .+= dropdims(mean(data; dims=(1, 2)); dims=(1, 2)) end - heights ./= length(geo_metadata) + heights ./= length(ϕ_metadata) return sort(heights) end From 5aa675b2f4a95c185c6eac670a09d8a4cd62ac5e Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 11 Mar 2026 13:23:59 -0700 Subject: [PATCH 26/48] Streamline automatic download --- ext/NumericalEarthCDSAPIExt.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index 59e9bdbe9..e46454b15 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -105,12 +105,12 @@ end ##### ERA5 pressure-level download ##### -function download_dataset(metadata::ERA5PressureMetadata; kwargs...) - paths = Array{String}(undef, length(metadata)) - for (m, metadatum) in enumerate(metadata) - paths[m] = download_dataset(metadatum; kwargs...) - end - return paths +function download_dataset(metadata::ERA5PressureMetadata; skip_existing=true, cleanup=true) + dates = metadata.dates isa AbstractVector ? metadata.dates : [metadata.dates] + download_dataset([metadata.name], metadata.dataset, dates; + bounding_box = metadata.bounding_box, + dir = metadata.dir, + skip_existing, cleanup) end function download_dataset(meta::ERA5PressureMetadatum; skip_existing=true) From 79dcb1730153b0d0b42ece7e1c6a609b88ca1bc2 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Thu, 12 Mar 2026 10:24:07 -0700 Subject: [PATCH 27/48] Refactor; add examples --- examples/ERA5_bounded_pressure_level_data.jl | 121 ++++++ examples/ERA5_bounded_surface_data.jl | 92 +++++ ext/NumericalEarthCDSAPIExt.jl | 381 ++++++++---------- .../ERA5/ERA5_pressure_levels.jl | 6 +- src/DataWrangling/ERA5/ERA5_single_levels.jl | 4 +- test/test_cds_downloading.jl | 28 +- 6 files changed, 397 insertions(+), 235 deletions(-) create mode 100644 examples/ERA5_bounded_pressure_level_data.jl create mode 100644 examples/ERA5_bounded_surface_data.jl diff --git a/examples/ERA5_bounded_pressure_level_data.jl b/examples/ERA5_bounded_pressure_level_data.jl new file mode 100644 index 000000000..792a16ce5 --- /dev/null +++ b/examples/ERA5_bounded_pressure_level_data.jl @@ -0,0 +1,121 @@ +# Download 3-D snapshots of the atmospheric state from ERA5, e.g., to be used with FieldTimeSeries +# +# ## Install dependencies +# +# ```julia +# using Pkg +# pkg"add NumericalEarth CDSAPI" +# ``` +# +# You also need CDS API credentials in `~/.cdsapirc`. +# See for setup instructions. +# +# See for +# more information about pressure-level data + +using NumericalEarth +using NumericalEarth.DataWrangling: Metadata, BoundingBox, download_dataset +using NumericalEarth.DataWrangling.ERA5 +using CDSAPI +using Dates +using Oceananigans + +using Statistics +using CairoMakie + +selected_levels = filter(≥(250hPa), ERA5_all_pressure_levels) # select all levels below 250 hPa +dataset = ERA5HourlyPressureLevels(selected_levels) + +dates = DateTime(2004, 12, 16):Hour(1):DateTime(2005, 01, 09) # van Zanten et al 2011 study period +#dates = DateTime(2004, 12, 16):Hour(1):DateTime(2004, 12, 23) # shorter time range for demo + +# Rauber et al 2007, Fig 1: focus region +bounding_box = BoundingBox(latitude=(17, 18.5), longitude=(-62.5, -61)) + +# OPTIONAL: download all variables at once (fewer CDS API requests) +variables = [:geopotential, # for calculating zlevels as the mean geopotential height + :eastward_velocity, + :northward_velocity, + :temperature, + :specific_humidity] +download_dataset(variables, dataset, dates; bounding_box) + +# ## Create time series + +T_meta = Metadata(:temperature; dataset, dates, bounding_box) +q_meta = Metadata(:specific_humidity; dataset, dates, bounding_box) +u_meta = Metadata(:eastward_velocity; dataset, dates, bounding_box) +v_meta = Metadata(:northward_velocity; dataset, dates, bounding_box) + +T_series = FieldTimeSeries(T_meta; time_indices_in_memory=2) +q_series = FieldTimeSeries(q_meta; time_indices_in_memory=2) +u_series = FieldTimeSeries(u_meta; time_indices_in_memory=2) +v_series = FieldTimeSeries(v_meta; time_indices_in_memory=2) + +# ## Vertical profiles (mean ± spread) + +# Height coordinate (m) from the grid +_, _, z = nodes(T_series[1]) +Nz = length(z) +Nt = length(T_series.times) + +# Pressure at each level (hPa), ordered bottom-to-top (k=1 = highest pressure) +p_levels = sort(selected_levels, rev=true) ./ hPa # in hPa + +# Horizontal-mean profile for each snapshot → (Nz, Nt) arrays +function horizontal_mean_profiles(series) + profiles = zeros(Nz, Nt) + for n in 1:Nt + data = interior(series[n], :, :, :) + profiles[:, n] = dropdims(mean(data, dims=(1, 2)), dims=(1, 2)) + end + return profiles +end + +T_profiles = horizontal_mean_profiles(T_series) +q_profiles = horizontal_mean_profiles(q_series) +u_profiles = horizontal_mean_profiles(u_series) +v_profiles = horizontal_mean_profiles(v_series) + +# Convert T → potential temperature: θ = T (p₀/p)^(R/cₚ) +Rₐ_over_cₚ = 0.286 +θ_profiles = T_profiles .* (1000 ./ p_levels) .^ Rₐ_over_cₚ + +# Convert specific humidity from kg/kg → g/kg +q_profiles .*= 1000 + +# ## Plot + +fig = Figure(size=(900, 500), fontsize=12) + +ax_θ = Axis(fig[1, 1], xlabel="θ [K]", ylabel="Height [m]") +ax_q = Axis(fig[1, 2], xlabel="qᵥ [g kg⁻¹]", ylabel="Height [m]") +ax_u = Axis(fig[1, 3], xlabel="u [m s⁻¹]", ylabel="Height [m]") +ax_v = Axis(fig[1, 4], xlabel="v [m s⁻¹]", ylabel="Height [m]") + +for (ax, profiles) in [(ax_θ, θ_profiles), + (ax_q, q_profiles), + (ax_u, u_profiles), + (ax_v, v_profiles)] + μ = vec(mean(profiles, dims=2)) + #lo = vec(minimum(profiles, dims=2)) + #hi = vec(maximum(profiles, dims=2)) + lo = [quantile(r, 0.25) for r in eachrow(profiles)] + hi = [quantile(r, 0.75) for r in eachrow(profiles)] + band!(ax, z, lo, hi; direction=:y, color=(:gray, 0.4)) + lines!(ax, μ, z; color=:black, linewidth=2) +end + +xlims!(ax_θ, 293, 317) +xlims!(ax_q, 0, 20.4) +xlims!(ax_u, -10, 0) +xlims!(ax_v, -10, 0) + +linkyaxes!(ax_θ, ax_q, ax_u, ax_v) +ylims!(ax_θ, 0, 4000) + +hideydecorations!(ax_q) +hideydecorations!(ax_u) +hideydecorations!(ax_v) + +save("ERA5_pressure_level_profiles.png", fig; px_per_unit=4) diff --git a/examples/ERA5_bounded_surface_data.jl b/examples/ERA5_bounded_surface_data.jl new file mode 100644 index 000000000..99aaeb8c9 --- /dev/null +++ b/examples/ERA5_bounded_surface_data.jl @@ -0,0 +1,92 @@ +# Download 3-D snapshots of the atmospheric state from ERA5, e.g., to be used with FieldTimeSeries +# +# ## Install dependencies +# +# ```julia +# using Pkg +# pkg"add NumericalEarth CDSAPI" +# ``` +# +# You also need CDS API credentials in `~/.cdsapirc`. +# See for setup instructions. +# +# See for +# more information about single-level data + +using NumericalEarth.DataWrangling: Metadata, BoundingBox, download_dataset +using NumericalEarth.DataWrangling.ERA5 +using CDSAPI +using Dates +using Oceananigans + +using Statistics: mean +using CairoMakie +#precip_cmap = :rain +precip_cmap = cgrad([:indigo, :darkblue, :blue, :deepskyblue, :cyan, + :palegreen, :green, :yellow, :orange, :red, :pink]) + + +dataset = ERA5HourlySingleLevel() + +#dates = DateTime(2004, 12, 16):Hour(1):DateTime(2005, 01, 09) # van Zanten et al 2011 study period +dates = DateTime(2004, 12, 16):Hour(1):DateTime(2004, 12, 23) # shorter time range for demo + +# Rauber et al 2007, Fig 1: precip map +bounding_box = BoundingBox(latitude=(-25, 35), longitude=(-110, 30)) + +# OPTIONAL: download all variables at once (fewer CDS API requests) +# variables = [:total_precipitation, +# :sea_surface_temperature, +# :temperature, # at 2 m +# :dewpoint_temperature, # at 2 m +# :surface_pressure] +# download_dataset(variables, dataset, dates; bounding_box) + +# ## Create time series + +precip_meta = Metadata(:total_precipitation; dataset, dates, bounding_box) +precip_series = FieldTimeSeries(precip_meta) + +# ## Plotting + +Nt = length(precip_series.times) +λ, φ, _ = nodes(precip_series[1]) + +# ERA5 total_precipitation is in metres per hour; convert to mm/day +to_mm_day = 1000 * 24 + +# ### Time-averaged precipitation map + +precip_avg = mean(interior(precip_series[n], :, :, 1) for n in 1:Nt) .* to_mm_day + +fig1 = Figure(size=(900, 400)) +ax1 = Axis(fig1[1, 1], + title = "Mean precipitation (2004-12-16 to 2005-01-08)", + xlabel = "Longitude (°)", + ylabel = "Latitude (°)") +ax1.xticks = -90:30:30 +hm = heatmap!(ax1, λ, φ, precip_avg; colormap=precip_cmap, colorrange=(0, 12)) +Colorbar(fig1[1, 2], hm, label="Precipitation (mm/day)") +save("ERA5_mean_precipitation.png", fig1) + +# ### Precipitation animation + +fig2 = Figure(size=(900, 400)) + +n = Observable(1) +precip_n = @lift interior(precip_series[$n], :, :, 1) .* to_mm_day +anim_title = @lift "Precipitation (mm/day), " * string(first(dates) + Second(round(Int, precip_series.times[$n]))) + +ax2 = Axis(fig2[1, 1], + title = anim_title, + xlabel = "Longitude (°)", + ylabel = "Latitude (°)") +ax2.xticks = -90:30:30 + +hm2 = heatmap!(ax2, λ, φ, precip_n; colormap=precip_cmap, colorrange=(0, 12)) +Colorbar(fig2[1, 2], hm2, label="Precipitation (mm/day)") + +record(fig2, "ERA5_precipitation.mp4", 1:Nt; framerate=12) do nn + n[] = nn +end + diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index e46454b15..1857a18ec 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -8,148 +8,144 @@ using Oceananigans using Oceananigans.DistributedComputations: @root using Dates -using NumericalEarth.DataWrangling.ERA5: ERA5Metadata, ERA5Metadatum, ERA5_dataset_variable_names +using NumericalEarth.DataWrangling.ERA5: ERA5Dataset, ERA5Metadata, ERA5Metadatum, + ERA5_dataset_variable_names, ERA5_netcdf_variable_names using NumericalEarth.DataWrangling.ERA5: ERA5PressureDataset, ERA5PressureMetadata, ERA5PressureMetadatum, ERA5PL_dataset_variable_names, ERA5PL_netcdf_variable_names import NumericalEarth.DataWrangling: download_dataset +##### +##### Dispatch helpers — encapsulate single-level vs pressure-level differences +##### + +_cds_product(::ERA5Dataset) = "reanalysis-era5-single-levels" +_cds_product(::ERA5PressureDataset) = "reanalysis-era5-pressure-levels" + +_cds_varnames(::ERA5Dataset) = ERA5_dataset_variable_names +_cds_varnames(::ERA5PressureDataset) = ERA5PL_dataset_variable_names + +_nc_varnames(::ERA5Dataset) = ERA5_netcdf_variable_names +_nc_varnames(::ERA5PressureDataset) = ERA5PL_netcdf_variable_names + # Coordinate / dimension variables to propagate into each split file +const ERA5_COORD_VARS = Set(["longitude", "latitude", + "time", "valid_time", + "expver", "number"]) + const ERA5PL_COORD_VARS = Set(["longitude", "latitude", "pressure_level", "level", "time", "valid_time", - "expver", # experiment version: ==1 for final data, ==5 preliminary 5-day data - "number"]) # ensemble member + "expver", "number"]) -""" - download_dataset(metadata::ERA5Metadata; kwargs...) +_coord_vars(::ERA5Dataset) = ERA5_COORD_VARS +_coord_vars(::ERA5PressureDataset) = ERA5PL_COORD_VARS -Download ERA5 data for each date in the metadata, returning paths to downloaded files. -""" -function download_dataset(metadata::ERA5Metadata; kwargs...) - paths = Array{String}(undef, length(metadata)) - for (m, metadatum) in enumerate(metadata) - paths[m] = download_dataset(metadatum; kwargs...) - end - return paths +_extra_request_keys!(request, ::ERA5Dataset) = nothing +function _extra_request_keys!(request, ds::ERA5PressureDataset) + p_hPa = [round(Int, p * 1e-2) for p in ds.pressure_levels] + request["pressure_level"] = [string(p) for p in p_hPa] end -""" - download_dataset(meta::ERA5Metadatum; skip_existing=true, kwargs...) - -Download ERA5 data for a single date/time using the CDSAPI package. - -# Keyword Arguments -- `skip_existing`: Skip download if the file already exists (default: `true`). - -# Environment Setup -Before downloading, you must: -1. Create an account at https://cds.climate.copernicus.eu/ -2. Accept the Terms of Use for the ERA5 dataset on the dataset page -3. Set up your API credentials in `~/.cdsapirc` +##### +##### Single-date download +##### -See https://cds.climate.copernicus.eu/how-to-api for details. -""" function download_dataset(meta::ERA5Metadatum; skip_existing=true) + output_path = NumericalEarth.DataWrangling.metadata_path(meta) - output_directory = meta.dir - output_filename = NumericalEarth.DataWrangling.metadata_filename(meta) - output_path = joinpath(output_directory, output_filename) - - # Skip if file already exists - if skip_existing && isfile(output_path) - return output_path - end - - # Ensure output directory exists - mkpath(output_directory) - - # Get the ERA5 variable name - variable_name = ERA5_dataset_variable_names[meta.name] + skip_existing && isfile(output_path) && return output_path + mkpath(dirname(output_path)) - # Extract date information date = meta.dates - year = string(Dates.year(date)) - month = lpad(string(Dates.month(date)), 2, '0') - day = lpad(string(Dates.day(date)), 2, '0') - hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" - - # Build request parameters request = Dict( "product_type" => ["reanalysis"], - "variable" => [variable_name], - "year" => [year], - "month" => [month], - "day" => [day], - "time" => [hour], + "variable" => [_cds_varnames(meta.dataset)[meta.name]], + "year" => [string(Dates.year(date))], + "month" => [lpad(string(Dates.month(date)), 2, '0')], + "day" => [lpad(string(Dates.day(date)), 2, '0')], + "time" => [lpad(string(Dates.hour(date)), 2, '0') * ":00"], "data_format" => "netcdf", "download_format" => "unarchived", ) - # Add area constraint from bounding box + _extra_request_keys!(request, meta.dataset) area = build_era5_area(meta.bounding_box) - if !isnothing(area) - request["area"] = area - end + isnothing(area) || (request["area"] = area) - # Perform the download using CDSAPI - @root begin - CDSAPI.retrieve("reanalysis-era5-single-levels", request, output_path) - end + @root CDSAPI.retrieve(_cds_product(meta.dataset), request, output_path) return output_path end ##### -##### ERA5 pressure-level download +##### Multi-date download — batches by calendar day ##### -function download_dataset(metadata::ERA5PressureMetadata; skip_existing=true, cleanup=true) +function download_dataset(metadata::ERA5Metadata; skip_existing=true, cleanup=true) dates = metadata.dates isa AbstractVector ? metadata.dates : [metadata.dates] - download_dataset([metadata.name], metadata.dataset, dates; - bounding_box = metadata.bounding_box, - dir = metadata.dir, - skip_existing, cleanup) + grouped = Dict(d => filter(dt -> Dates.Date(dt) == d, dates) + for d in unique(Dates.Date.(dates))) + + for day in sort(collect(keys(grouped))) + _download_era5_day(metadata.name, metadata.dataset, grouped[day]; + bounding_box = metadata.bounding_box, + dir = metadata.dir, + skip_existing, cleanup) + end end -function download_dataset(meta::ERA5PressureMetadatum; skip_existing=true) - output_directory = meta.dir - output_filename = NumericalEarth.DataWrangling.metadata_filename(meta) - output_path = joinpath(output_directory, output_filename) +function _download_era5_day(name, dataset, day_dates; + bounding_box, dir, skip_existing, cleanup) - skip_existing && isfile(output_path) && return output_path - mkpath(output_directory) + MDatum = NumericalEarth.DataWrangling.Metadatum + meta_path = NumericalEarth.DataWrangling.metadata_path - variable_name = ERA5PL_dataset_variable_names[meta.name] - date = meta.dates - year = string(Dates.year(date)) - month = lpad(string(Dates.month(date)), 2, '0') - day = lpad(string(Dates.day(date)), 2, '0') - hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" + all_pairs = [(dt, meta_path(MDatum(name; dataset, date=dt, bounding_box, dir))) + for dt in day_dates] + + pending = skip_existing ? filter(((_, p),) -> !isfile(p), all_pairs) : all_pairs + isempty(pending) && return nothing - p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.pressure_levels] + sorted_dts = sort(unique([dt for (dt, _) in pending])) + hours_str = [lpad(string(Dates.hour(dt)), 2, '0') * ":00" for dt in sorted_dts] + dt_to_tidx = Dict(dt => i for (i, dt) in enumerate(sorted_dts)) + + dt = first(sorted_dts) + year = string(Dates.year(dt)) + month = lpad(string(Dates.month(dt)), 2, '0') + day = lpad(string(Dates.day(dt)), 2, '0') request = Dict( "product_type" => ["reanalysis"], - "variable" => [variable_name], - "pressure_level" => [string(p) for p in p_hPa], + "variable" => [_cds_varnames(dataset)[name]], "year" => [year], "month" => [month], "day" => [day], - "time" => [hour], + "time" => hours_str, "data_format" => "netcdf", "download_format" => "unarchived", ) - area = build_era5_area(meta.bounding_box) + _extra_request_keys!(request, dataset) + area = build_era5_area(bounding_box) isnothing(area) || (request["area"] = area) + mkpath(dir) + tmp_path = joinpath(dir, "_tmp_$(year)$(month)$(day).nc") + nc_varname = _nc_varnames(dataset)[name] + nc_triples = [(nc_varname, dt_to_tidx[dt], path) for (dt, path) in pending] + + time_dimnames = Set(["time", "valid_time"]) + @root begin - CDSAPI.retrieve("reanalysis-era5-pressure-levels", request, output_path) + CDSAPI.retrieve(_cds_product(dataset), request, tmp_path) + _split_era5_nc_multistep(tmp_path, nc_triples, _coord_vars(dataset), time_dimnames) + cleanup && rm(tmp_path; force=true) end - return output_path + return nothing end ##### @@ -172,13 +168,9 @@ end download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; skip_existing=true) Download multiple ERA5 pressure-level variables for a single date in one CDS API request. - -The multi-variable NetCDF returned by CDS is split into individual per-variable files, each -stored at the path returned by `metadata_path(Metadatum(name; dataset=meta.dataset, ...))`. -This keeps subsequent calls to `Field` and `retrieve_data` unchanged. +The multi-variable NetCDF is split into individual per-variable files. """ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; skip_existing=true) - # Build (symbol → output path) pairs using the standard single-variable filename scheme name_path_pairs = [(name, NumericalEarth.DataWrangling.metadata_path( NumericalEarth.DataWrangling.Metadatum(name; dataset = meta.dataset, @@ -187,7 +179,6 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk dir = meta.dir))) for name in names] - # Determine which output files are missing pending = if skip_existing [(n, p) for (n, p) in name_path_pairs if !isfile(p)] else @@ -196,8 +187,7 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk isempty(pending) && return [path for (_, path) in name_path_pairs] - # Unique CDS variable names (:geopotential and :geopotential_height share one CDS field) - cds_vars = unique([ERA5PL_dataset_variable_names[name] for (name, _) in pending]) + cds_vars = unique([_cds_varnames(meta.dataset)[name] for (name, _) in pending]) date = meta.dates year = string(Dates.year(date)) @@ -205,12 +195,9 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk day = lpad(string(Dates.day(date)), 2, '0') hour = lpad(string(Dates.hour(date)), 2, '0') * ":00" - p_hPa = [round(Int, p * 1e-2) for p in meta.dataset.pressure_levels] - request = Dict( "product_type" => ["reanalysis"], "variable" => cds_vars, - "pressure_level" => [string(p) for p in p_hPa], "year" => [year], "month" => [month], "day" => [day], @@ -219,17 +206,18 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk "download_format" => "unarchived", ) + _extra_request_keys!(request, meta.dataset) area = build_era5_area(meta.bounding_box) isnothing(area) || (request["area"] = area) mkpath(meta.dir) tmp_path = joinpath(meta.dir, "_tmp_multi_$(year)$(month)$(day)T$(hour[1:2]).nc") - nc_name_path_pairs = [(ERA5PL_netcdf_variable_names[name], path) for (name, path) in pending] + nc_name_path_pairs = [(_nc_varnames(meta.dataset)[name], path) for (name, path) in pending] @root begin - CDSAPI.retrieve("reanalysis-era5-pressure-levels", request, tmp_path) - _split_era5pl_nc(tmp_path, nc_name_path_pairs) + CDSAPI.retrieve(_cds_product(meta.dataset), request, tmp_path) + _split_era5_nc(tmp_path, nc_name_path_pairs, _coord_vars(meta.dataset)) rm(tmp_path; force=true) end @@ -237,114 +225,30 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk end """ - download_dataset(names::Union{Symbol, Vector{Symbol}}, - dataset::ERA5PressureDataset, - datetime; - bounding_box=nothing, - dir=default_download_directory(dataset)) - -Download one or more ERA5 pressure-level variables at a single datetime, without requiring a dummy -`Metadatum`. - -# Example -```julia -ds = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) -download_dataset([:temperature, :geopotential_height, :eastward_velocity], ds; - date=DateTime(2020, 6, 15, 0), bounding_box=bbox) -``` + download_dataset(names, dataset::ERA5Dataset, datetime; ...) + +Download one or more ERA5 variables at a single datetime. """ -function download_dataset(names::Vector{Symbol}, dataset::ERA5PressureDataset, datetime; +function download_dataset(names::Vector{Symbol}, dataset::ERA5Dataset, datetime; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset)) - meta = NumericalEarth.DataWrangling.Metadatum(first(names); dataset, datetime, bounding_box, dir) + meta = NumericalEarth.DataWrangling.Metadatum(first(names); dataset, date=datetime, bounding_box, dir) return download_dataset(names, meta) end -function download_dataset(name::Symbol, dataset::ERA5PressureDataset, datetime; +function download_dataset(name::Symbol, dataset::ERA5Dataset, datetime; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset)) return download_dataset([name], dataset, datetime; bounding_box, dir) end """ - _split_era5pl_nc(src_path, nc_name_path_pairs) - -Split a multi-variable ERA5 pressure-level NetCDF at `src_path` into individual -per-variable files. `nc_name_path_pairs` is a vector of `(nc_varname, dst_path)` tuples. + download_dataset(names, dataset::ERA5Dataset, datetimes::AbstractVector; ...) -Each output file contains all coordinate/dimension variables plus the one data variable. -Global attributes and dimension definitions are preserved. Raw data (including scale/offset -encoding) is copied verbatim so that downstream readers decode values identically. -""" -function _split_era5pl_nc(src_path, nc_name_path_pairs) - NCDatasets.Dataset(src_path, "r") do src - for (nc_varname, dst_path) in nc_name_path_pairs - NCDatasets.Dataset(dst_path, "c") do dst - # Define dimensions - unlimited = NCDatasets.unlimited(src) - for (dname, dlen) in src.dim - NCDatasets.defDim(dst, dname, dname in unlimited ? Inf : dlen) - end - - # Copy global attributes - for (k, v) in src.attrib - dst.attrib[k] = v - end - - # Copy coordinate variables and the one requested data variable - for (vname, var) in src - (vname in ERA5PL_COORD_VARS || vname == nc_varname) || continue - _ncvar_copy!(dst, var, vname) - end - end - end - end -end - -function _ncvar_copy!(dst, src_var, vname) - dims = NCDatasets.dimnames(src_var) - T = eltype(src_var.var) # raw storage type; eltype(src_var) gives CF-decoded DateTime - attribs = src_var.attrib - fill_val = haskey(attribs, "_FillValue") ? attribs["_FillValue"] : nothing - - dst_var = if isnothing(fill_val) - NCDatasets.defVar(dst, vname, T, dims) - else - NCDatasets.defVar(dst, vname, T, dims; fillvalue=fill_val) - end - - # Copy variable attributes (skip _FillValue — handled by defVar above) - for (k, v) in attribs - k == "_FillValue" && continue - dst_var.attrib[k] = v - end - - # Copy raw encoded data, bypassing scale/offset transforms - dst_var.var[:] = src_var.var[:] - - return nothing -end - -""" - download_dataset(names::Union{Symbol, Vector{Symbol}}, - dataset::ERA5PressureDataset, - datetimes::AbstractVector; - bounding_box=nothing, - dir=default_download_directory(dataset), - skip_existing=true) - -Download one or more ERA5 pressure-level variables for a vector of `datetimes`, issuing one CDS API -request per unique calendar day (which may contain multiple hours). - -# Example -```julia -ds = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) -dates = DateTime(2020, 6, 15, 0) : Hour(6) : DateTime(2020, 6, 16, 18) -download_dataset([:temperature, :geopotential_height], ds, collect(dates); bounding_box=bbox) -``` +Download one or more ERA5 variables for multiple datetimes, batching by calendar day. """ function download_dataset(names::Vector{Symbol}, - dataset::ERA5PressureDataset, + dataset::ERA5Dataset, datetimes::AbstractVector; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset), @@ -355,14 +259,14 @@ function download_dataset(names::Vector{Symbol}, for d in unique(Dates.Date.(datetimes))) for day in sort(collect(keys(grouped))) - _download_era5pl_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing, cleanup) + _download_era5_multivar_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing, cleanup) end return nothing end function download_dataset(name::Symbol, - dataset::ERA5PressureDataset, + dataset::ERA5Dataset, datetimes::AbstractVector; bounding_box = nothing, dir = NumericalEarth.DataWrangling.default_download_directory(dataset), @@ -371,21 +275,19 @@ function download_dataset(name::Symbol, return download_dataset([name], dataset, datetimes; bounding_box, dir, skip_existing, cleanup) end -function _download_era5pl_day(names, dataset, day_dates; - bounding_box, dir, skip_existing, cleanup) +function _download_era5_multivar_day(names, dataset, day_dates; + bounding_box, dir, skip_existing, cleanup) MDatum = NumericalEarth.DataWrangling.Metadatum meta_path = NumericalEarth.DataWrangling.metadata_path - # All (name, dt, output path) triples for this day all_triples = [(name, dt, meta_path(MDatum(name; dataset, date=dt, bounding_box, dir))) for name in names for dt in day_dates] pending = skip_existing ? filter(((_, _, p),) -> !isfile(p), all_triples) : all_triples isempty(pending) && return nothing - # Unique CDS variable names and hours needed - cds_vars = unique([ERA5PL_dataset_variable_names[name] for (name, _, _) in pending]) + cds_vars = unique([_cds_varnames(dataset)[name] for (name, _, _) in pending]) sorted_dts = sort(unique([dt for (_, dt, _) in pending])) hours_str = [lpad(string(Dates.hour(dt)), 2, '0') * ":00" for dt in sorted_dts] dt_to_tidx = Dict(dt => i for (i, dt) in enumerate(sorted_dts)) @@ -395,12 +297,9 @@ function _download_era5pl_day(names, dataset, day_dates; month = lpad(string(Dates.month(dt)), 2, '0') day = lpad(string(Dates.day(dt)), 2, '0') - p_hPa = [round(Int, p * 1e-2) for p in dataset.pressure_levels] - request = Dict( "product_type" => ["reanalysis"], "variable" => cds_vars, - "pressure_level" => [string(p) for p in p_hPa], "year" => [year], "month" => [month], "day" => [day], @@ -409,32 +308,69 @@ function _download_era5pl_day(names, dataset, day_dates; "download_format" => "unarchived", ) + _extra_request_keys!(request, dataset) area = build_era5_area(bounding_box) isnothing(area) || (request["area"] = area) mkpath(dir) tmp_path = joinpath(dir, "_tmp_multi_$(year)$(month)$(day).nc") - nc_triples = [(ERA5PL_netcdf_variable_names[name], dt_to_tidx[dt], path) + nc_triples = [(_nc_varnames(dataset)[name], dt_to_tidx[dt], path) for (name, dt, path) in pending] + time_dimnames = Set(["time", "valid_time"]) + @root begin - CDSAPI.retrieve("reanalysis-era5-pressure-levels", request, tmp_path) - _split_era5pl_nc_multistep(tmp_path, nc_triples) + CDSAPI.retrieve(_cds_product(dataset), request, tmp_path) + _split_era5_nc_multistep(tmp_path, nc_triples, _coord_vars(dataset), time_dimnames) cleanup && rm(tmp_path; force=true) end return nothing end -function _split_era5pl_nc_multistep(src_path, nc_varname_tidx_path_triples) - time_dimnames = Set(["time", "valid_time"]) +##### +##### NetCDF splitting utilities +##### + +""" + _split_era5_nc(src_path, nc_name_path_pairs, coord_vars) + +Split a multi-variable NetCDF into individual per-variable files (single time step). +""" +function _split_era5_nc(src_path, nc_name_path_pairs, coord_vars) + NCDatasets.Dataset(src_path, "r") do src + for (nc_varname, dst_path) in nc_name_path_pairs + NCDatasets.Dataset(dst_path, "c") do dst + unlimited = NCDatasets.unlimited(src) + for (dname, dlen) in src.dim + NCDatasets.defDim(dst, dname, dname in unlimited ? Inf : dlen) + end + + for (k, v) in src.attrib + dst.attrib[k] = v + end + + for (vname, var) in src + (vname in coord_vars || vname == nc_varname) || continue + _ncvar_copy!(dst, var, vname) + end + end + end + end +end +""" + _split_era5_nc_multistep(src_path, triples, coord_vars, time_dimnames) + +Split a multi-timestep NetCDF into individual per-variable, per-timestep files. +`triples` is a vector of `(nc_varname, time_index, dst_path)`. +""" +function _split_era5_nc_multistep(src_path, nc_varname_tidx_path_triples, coord_vars, time_dimnames) NCDatasets.Dataset(src_path, "r") do src unlimited = NCDatasets.unlimited(src) for (nc_varname, tidx, dst_path) in nc_varname_tidx_path_triples NCDatasets.Dataset(dst_path, "c") do dst - # Collapse the time dimension to 1; keep all others for (dname, dlen) in src.dim out_len = dname in time_dimnames ? 1 : dname in unlimited ? Inf : dlen @@ -446,7 +382,7 @@ function _split_era5pl_nc_multistep(src_path, nc_varname_tidx_path_triples) end for (vname, var) in src - (vname in ERA5PL_COORD_VARS || vname == nc_varname) || continue + (vname in coord_vars || vname == nc_varname) || continue _ncvar_copy_tslice!(dst, var, vname, tidx, time_dimnames) end end @@ -454,6 +390,25 @@ function _split_era5pl_nc_multistep(src_path, nc_varname_tidx_path_triples) end end +function _ncvar_copy!(dst, src_var, vname) + dims = NCDatasets.dimnames(src_var) + T = eltype(src_var.var) + attribs = src_var.attrib + fill_val = haskey(attribs, "_FillValue") ? attribs["_FillValue"] : nothing + + dst_var = isnothing(fill_val) ? + NCDatasets.defVar(dst, vname, T, dims) : + NCDatasets.defVar(dst, vname, T, dims; fillvalue=fill_val) + + for (k, v) in attribs + k == "_FillValue" && continue + dst_var.attrib[k] = v + end + + dst_var.var[:] = src_var.var[:] + return nothing +end + function _ncvar_copy_tslice!(dst, src_var, vname, tidx, time_dimnames) dims = NCDatasets.dimnames(src_var) T = eltype(src_var.var) @@ -469,7 +424,6 @@ function _ncvar_copy_tslice!(dst, src_var, vname, tidx, time_dimnames) dst_var.attrib[k] = v end - # Slice time dimension; keep all others intact has_time = any(d -> d in time_dimnames, dims) if has_time idx = ntuple(ndims(src_var.var)) do i @@ -492,9 +446,6 @@ build_era5_area(::Nothing) = nothing const BBOX = NumericalEarth.DataWrangling.BoundingBox function build_era5_area(bbox::BBOX) - # CDS API uses [north, west, south, east] ordering - # BoundingBox has longitude = (west, east), latitude = (south, north) - lon = bbox.longitude lat = bbox.latitude diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index c5d360761..9e0a43096 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -198,9 +198,7 @@ function z_interfaces(metadata::ERA5PressureMetadata) dir=metadata.dir) try download_dataset(ϕ_metadata) - zf = mean_geopotential_z_interfaces(metadata) - metadata.dataset.z = zf - return zf + return mean_geopotential_z_interfaces(metadata) catch e @warn "Failed to download geopotential data; falling back to standard atmosphere" exception=e end @@ -262,7 +260,7 @@ end pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) Return a `CenterField` on the native grid of `metadata` filled with the pressure -value (hPa) at each vertical level. Levels are ordered bottom-to-top (k=1 is the +value (Pa) at each vertical level. Levels are ordered bottom-to-top (k=1 is the highest pressure level). """ function pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) diff --git a/src/DataWrangling/ERA5/ERA5_single_levels.jl b/src/DataWrangling/ERA5/ERA5_single_levels.jl index 67f9f785b..8da81deae 100644 --- a/src/DataWrangling/ERA5/ERA5_single_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_single_levels.jl @@ -23,9 +23,9 @@ is_three_dimensional(::ERA5Metadata) = false function Base.size(::ERA5Dataset, variable) if variable in ERA5_wave_variables - return (720, 361, 1) + return (720, 360, 1) else - return (1440, 721, 1) + return (1440, 720, 1) end end diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 5ffad9671..a13496f12 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -95,7 +95,7 @@ start_date = DateTime(2005, 2, 16, 12) # Test size (should be global ERA5 size with 1 time step) Nx, Ny, Nz, Nt = size(metadatum) @test Nx == 1440 # ERA5 longitude points - @test Ny == 721 # ERA5 latitude points + @test Ny == 720 # ERA5 latitude points (poles averaged into adjacent cells) @test Nz == 1 # 2D surface data @test Nt == 1 # Single time step @@ -104,23 +104,23 @@ start_date = DateTime(2005, 2, 16, 12) end @testset "ERA5 wave variable metadata sizes" begin - # Wave variables should be on the 0.5° grid (720×361) + # Wave variables should be on the 0.5° grid (720×360) for wave_var in (:eastward_stokes_drift, :northward_stokes_drift, :significant_wave_height, :mean_wave_period, :mean_wave_direction) metadatum = Metadatum(wave_var; dataset, date=start_date) Nx, Ny, Nz, Nt = size(metadatum) @test Nx == 720 - @test Ny == 361 + @test Ny == 360 @test Nz == 1 @test Nt == 1 end - # Atmospheric variables should remain on the 0.25° grid (1440×721) + # Atmospheric variables should remain on the 0.25° grid (1440×720) for atmos_var in (:temperature, :eastward_velocity, :surface_pressure) metadatum = Metadatum(atmos_var; dataset, date=start_date) Nx, Ny, Nz, Nt = size(metadatum) @test Nx == 1440 - @test Ny == 721 + @test Ny == 720 @test Nz == 1 @test Nt == 1 end @@ -141,11 +141,11 @@ start_date = DateTime(2005, 2, 16, 12) ds_full = ERA5HourlyPressureLevels() @test ds_full isa ERA5HourlyPressureLevels @test length(ds_full.pressure_levels) == 37 - @test Base.size(ds_full, :temperature) == (1440, 721, 37) + @test Base.size(ds_full, :temperature) == (1440, 720, 37) # Subset constructor ds_sub = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) - @test Base.size(ds_sub, :temperature) == (1440, 721, 2) + @test Base.size(ds_sub, :temperature) == (1440, 720, 2) # Monthly variant ds_monthly = ERA5MonthlyPressureLevels() @@ -163,15 +163,15 @@ start_date = DateTime(2005, 2, 16, 12) end @testset "ERA5 pressure-level z_interfaces (standard atmosphere)" begin - levels_2 = [850, 500] - z = NumericalEarth.DataWrangling.ERA5._std_atm_z_interfaces(levels_2) + levels_2 = [850, 500] .* hPa + z = standard_atmosphere_z_interfaces(levels_2) @test length(z) == 3 # Nz+1 interfaces @test issorted(z) # monotonically increasing with altitude # 850 hPa ≈ 1457 m, 500 hPa ≈ 5575 m @test z[1] < 1457.0 < z[2] < 5575.0 < z[3] # Single level - z1 = NumericalEarth.DataWrangling.ERA5._std_atm_z_interfaces([500]) + z1 = standard_atmosphere_z_interfaces([500] .* hPa) @test length(z1) == 2 @test z1[1] < z1[2] end @@ -299,10 +299,10 @@ start_date = DateTime(2005, 2, 16, 12) @test Nz == 2 @allowscalar begin - # k=1 should be 850 hPa (highest pressure, lowest altitude) - @test interior(pf)[1, 1, 1] ≈ 850.0f0 - # k=2 should be 500 hPa - @test interior(pf)[1, 1, 2] ≈ 500.0f0 + # k=1 should be 850 hPa = 85000 Pa (highest pressure, lowest altitude) + @test interior(pf)[1, 1, 1] ≈ Float32(850 * hPa) + # k=2 should be 500 hPa = 50000 Pa + @test interior(pf)[1, 1, 2] ≈ Float32(500 * hPa) end end end From 577b3c4e6d5dc2ee3a2578cf3a62137327f63983 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Mon, 16 Mar 2026 22:52:45 -0600 Subject: [PATCH 28/48] Address a very very very minor point --- ext/NumericalEarthCDSAPIExt.jl | 20 +++++++++---------- .../ERA5/ERA5_pressure_levels.jl | 18 ++++++++--------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index bee3ed9eb..629e7e31a 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -10,7 +10,7 @@ using Oceananigans.DistributedComputations: @root using Dates using NumericalEarth.DataWrangling.ERA5: ERA5Dataset, ERA5Metadata, ERA5Metadatum, ERA5_dataset_variable_names, ERA5_netcdf_variable_names -using NumericalEarth.DataWrangling.ERA5: ERA5PressureDataset, +using NumericalEarth.DataWrangling.ERA5: ERA5PressureLevelsDataset, ERA5PressureMetadata, ERA5PressureMetadatum, ERA5PL_dataset_variable_names, ERA5PL_netcdf_variable_names @@ -20,14 +20,14 @@ import NumericalEarth.DataWrangling: download_dataset ##### Dispatch helpers — encapsulate single-level vs pressure-level differences ##### -_cds_product(::ERA5Dataset) = "reanalysis-era5-single-levels" -_cds_product(::ERA5PressureDataset) = "reanalysis-era5-pressure-levels" +_cds_product(::ERA5Dataset) = "reanalysis-era5-single-levels" +_cds_product(::ERA5PressureLevelsDataset) = "reanalysis-era5-pressure-levels" -_cds_varnames(::ERA5Dataset) = ERA5_dataset_variable_names -_cds_varnames(::ERA5PressureDataset) = ERA5PL_dataset_variable_names +_cds_varnames(::ERA5Dataset) = ERA5_dataset_variable_names +_cds_varnames(::ERA5PressureLevelsDataset) = ERA5PL_dataset_variable_names -_nc_varnames(::ERA5Dataset) = ERA5_netcdf_variable_names -_nc_varnames(::ERA5PressureDataset) = ERA5PL_netcdf_variable_names +_nc_varnames(::ERA5Dataset) = ERA5_netcdf_variable_names +_nc_varnames(::ERA5PressureLevelsDataset) = ERA5PL_netcdf_variable_names # Coordinate / dimension variables to propagate into each split file const ERA5_COORD_VARS = Set(["longitude", "latitude", @@ -39,11 +39,11 @@ const ERA5PL_COORD_VARS = Set(["longitude", "latitude", "time", "valid_time", "expver", "number"]) -_coord_vars(::ERA5Dataset) = ERA5_COORD_VARS -_coord_vars(::ERA5PressureDataset) = ERA5PL_COORD_VARS +_coord_vars(::ERA5Dataset) = ERA5_COORD_VARS +_coord_vars(::ERA5PressureLevelsDataset) = ERA5PL_COORD_VARS _extra_request_keys!(request, ::ERA5Dataset) = nothing -function _extra_request_keys!(request, ds::ERA5PressureDataset) +function _extra_request_keys!(request, ds::ERA5PressureLevelsDataset) p_hPa = [round(Int, p * 1e-2) for p in ds.pressure_levels] request["pressure_level"] = [string(p) for p in p_hPa] end diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index 9e0a43096..ffb3b4155 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -1,10 +1,10 @@ -abstract type ERA5PressureDataset <: ERA5Dataset end +abstract type ERA5PressureLevelsDataset <: ERA5Dataset end # ERA5PressureMetadata is a subtype of ERA5Metadata -const ERA5PressureMetadata{D} = Metadata{<:ERA5PressureDataset, D} -const ERA5PressureMetadatum = Metadatum{<:ERA5PressureDataset} +const ERA5PressureMetadata{D} = Metadata{<:ERA5PressureLevelsDataset, D} +const ERA5PressureMetadatum = Metadatum{<:ERA5PressureLevelsDataset} -struct ERA5HourlyPressureLevels <: ERA5PressureDataset +struct ERA5HourlyPressureLevels <: ERA5PressureLevelsDataset pressure_levels :: Vector{Float64} z :: Union{Nothing, Vector{Float64}} mean_geopotential_height :: Bool @@ -14,7 +14,7 @@ end ERA5HourlyPressureLevels(; pressure_levels=ERA5_all_pressure_levels, z=nothing, mean_geopotential_height=true) = ERA5HourlyPressureLevels(pressure_levels, z; mean_geopotential_height) -struct ERA5MonthlyPressureLevels <: ERA5PressureDataset +struct ERA5MonthlyPressureLevels <: ERA5PressureLevelsDataset pressure_levels :: Vector{Float64} z :: Union{Nothing, Vector{Float64}} mean_geopotential_height :: Bool @@ -44,9 +44,9 @@ const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 1 825, 850, 875, 900, 925, 950, 975, 1000]*hPa # ERA5 stores pressure levels bottom-to-top -reversed_vertical_axis(::ERA5PressureDataset) = false +reversed_vertical_axis(::ERA5PressureLevelsDataset) = false -Base.size(ds::ERA5PressureDataset, variable) = (1440, 720, length(ds.pressure_levels)) +Base.size(ds::ERA5PressureLevelsDataset, variable) = (1440, 720, length(ds.pressure_levels)) ##### ##### ERA5 pressure-level variable name mappings @@ -91,7 +91,7 @@ ERA5PL_netcdf_variable_names = Dict( ) # Variables available for download -available_variables(::ERA5PressureDataset) = ERA5PL_dataset_variable_names +available_variables(::ERA5PressureLevelsDataset) = ERA5PL_dataset_variable_names dataset_variable_name(md::ERA5PressureMetadata) = ERA5PL_dataset_variable_names[md.name] netcdf_variable_name(md::ERA5PressureMetadata) = ERA5PL_netcdf_variable_names[md.name] @@ -165,7 +165,7 @@ end # Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). function standard_atmosphere_z_interfaces(levels) @info "Calculating z-interfaces based on the International Standard Atmosphere... \ - For greater accuracy, set ERA5PressureDataset(; mean_geopotential_height=true)" + For greater accuracy, set ERA5PressureLevelsDataset(; mean_geopotential_height=true)" sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom heights = standard_atmosphere_geopotential_height.(Float64.(sorted_levels)) Nz = length(heights) From f4afe090e6e5162eec2270ccefdafeace044d99d Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Mon, 16 Mar 2026 23:44:54 -0600 Subject: [PATCH 29/48] Retrieve var name from correct dict --- src/DataWrangling/ERA5/ERA5.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 9f3e76a72..57b31c2c3 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -103,7 +103,7 @@ function bbox_strs(c) end function metadata_prefix(dataset::ERA5Dataset, name, date, bounding_box) - var = ERA5_dataset_variable_names[name] + var = available_variables(dataset)[name] ds = dataset_name(dataset) start_date = start_date_str(date) end_date = end_date_str(date) From c28db4f8aa94658aad0e5cb66ea5b5a61772c10b Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Mon, 16 Mar 2026 23:50:24 -0600 Subject: [PATCH 30/48] Add additional surface variables --- src/DataWrangling/ERA5/ERA5_single_levels.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/DataWrangling/ERA5/ERA5_single_levels.jl b/src/DataWrangling/ERA5/ERA5_single_levels.jl index 8da81deae..6a7f8c49c 100644 --- a/src/DataWrangling/ERA5/ERA5_single_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_single_levels.jl @@ -43,6 +43,8 @@ ERA5_dataset_variable_names = Dict( :mean_sea_level_pressure => "mean_sea_level_pressure", :total_precipitation => "total_precipitation", :sea_surface_temperature => "sea_surface_temperature", + :surface_sensible_heat_flux => "surface_sensible_heat_flux", + :surface_latent_heat_flux => "surface_latent_heat_flux", :downwelling_shortwave_radiation => "surface_solar_radiation_downwards", :downwelling_longwave_radiation => "surface_thermal_radiation_downwards", :total_cloud_cover => "total_cloud_cover", @@ -66,6 +68,8 @@ ERA5_netcdf_variable_names = Dict( :mean_sea_level_pressure => "msl", :total_precipitation => "tp", :sea_surface_temperature => "sst", + :surface_sensible_heat_flux => "sshf", + :surface_latent_heat_flux => "slhf", :downwelling_shortwave_radiation => "ssrd", :downwelling_longwave_radiation => "strd", :total_cloud_cover => "tcc", From 74cb622ea9f52bc5be78ead37712271e3357c8eb Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 17 Mar 2026 00:00:56 -0600 Subject: [PATCH 31/48] Improved readability --- ext/NumericalEarthCDSAPIExt.jl | 17 ++++++++++------- src/DataWrangling/ERA5/ERA5_pressure_levels.jl | 3 ++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index 629e7e31a..feae84725 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -173,13 +173,16 @@ Download multiple ERA5 pressure-level variables for a single date in one CDS API The multi-variable NetCDF is split into individual per-variable files. """ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; skip_existing=true) - name_path_pairs = [(name, NumericalEarth.DataWrangling.metadata_path( - NumericalEarth.DataWrangling.Metadatum(name; - dataset = meta.dataset, - bounding_box = meta.bounding_box, - date = meta.dates, - dir = meta.dir))) - for name in names] + name_path_pairs = [] + for name in names + metadatum = NumericalEarth.DataWrangling.Metadatum(name; + dataset = meta.dataset, + bounding_box = meta.bounding_box, + date = meta.dates, + dir = meta.dir) + path = NumericalEarth.DataWrangling.metadata_path(metadatum) + push!(name_path_pairs, (name, path)) + end pending = if skip_existing [(n, p) for (n, p) in name_path_pairs if !isfile(p)] diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index ffb3b4155..4acec6639 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -231,7 +231,8 @@ function mean_geopotential_heights(metadata::ERA5PressureMetadata) for ϕ_datum in ϕ_metadata data = retrieve_data(ϕ_datum) ./ Float32(ERA5_gravitational_acceleration) # Φ → Z (m) # average over horizontal dims - heights .+= dropdims(mean(data; dims=(1, 2)); dims=(1, 2)) + data_mean = mean(data; dims=(1, 2)) + heights .+= dropdims(data_mean; dims=(1, 2)) end heights ./= length(ϕ_metadata) From ad135e31d87dc8679422b027f8f150b7c03e866d Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 17 Mar 2026 00:26:15 -0600 Subject: [PATCH 32/48] Follow Oceananigans convention --- ext/NumericalEarthCDSAPIExt.jl | 80 +++++++++++++++++----------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index feae84725..7816be62a 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -20,14 +20,14 @@ import NumericalEarth.DataWrangling: download_dataset ##### Dispatch helpers — encapsulate single-level vs pressure-level differences ##### -_cds_product(::ERA5Dataset) = "reanalysis-era5-single-levels" -_cds_product(::ERA5PressureLevelsDataset) = "reanalysis-era5-pressure-levels" +cds_product(::ERA5Dataset) = "reanalysis-era5-single-levels" +cds_product(::ERA5PressureLevelsDataset) = "reanalysis-era5-pressure-levels" -_cds_varnames(::ERA5Dataset) = ERA5_dataset_variable_names -_cds_varnames(::ERA5PressureLevelsDataset) = ERA5PL_dataset_variable_names +cds_varnames(::ERA5Dataset) = ERA5_dataset_variable_names +cds_varnames(::ERA5PressureLevelsDataset) = ERA5PL_dataset_variable_names -_nc_varnames(::ERA5Dataset) = ERA5_netcdf_variable_names -_nc_varnames(::ERA5PressureLevelsDataset) = ERA5PL_netcdf_variable_names +nc_varnames(::ERA5Dataset) = ERA5_netcdf_variable_names +nc_varnames(::ERA5PressureLevelsDataset) = ERA5PL_netcdf_variable_names # Coordinate / dimension variables to propagate into each split file const ERA5_COORD_VARS = Set(["longitude", "latitude", @@ -39,11 +39,11 @@ const ERA5PL_COORD_VARS = Set(["longitude", "latitude", "time", "valid_time", "expver", "number"]) -_coord_vars(::ERA5Dataset) = ERA5_COORD_VARS -_coord_vars(::ERA5PressureLevelsDataset) = ERA5PL_COORD_VARS +coord_vars(::ERA5Dataset) = ERA5_COORD_VARS +coord_vars(::ERA5PressureLevelsDataset) = ERA5PL_COORD_VARS -_extra_request_keys!(request, ::ERA5Dataset) = nothing -function _extra_request_keys!(request, ds::ERA5PressureLevelsDataset) +extra_request_keys!(request, ::ERA5Dataset) = nothing +function extra_request_keys!(request, ds::ERA5PressureLevelsDataset) p_hPa = [round(Int, p * 1e-2) for p in ds.pressure_levels] request["pressure_level"] = [string(p) for p in p_hPa] end @@ -63,7 +63,7 @@ function download_dataset(meta::ERA5Metadatum; skip_existing=true) date = meta.dates request = Dict( "product_type" => ["reanalysis"], - "variable" => [_cds_varnames(meta.dataset)[meta.name]], + "variable" => [cds_varnames(meta.dataset)[meta.name]], "year" => [string(Dates.year(date))], "month" => [lpad(string(Dates.month(date)), 2, '0')], "day" => [lpad(string(Dates.day(date)), 2, '0')], @@ -72,11 +72,11 @@ function download_dataset(meta::ERA5Metadatum; skip_existing=true) "download_format" => "unarchived", ) - _extra_request_keys!(request, meta.dataset) + extra_request_keys!(request, meta.dataset) area = build_era5_area(meta.bounding_box) isnothing(area) || (request["area"] = area) - @root CDSAPI.retrieve(_cds_product(meta.dataset), request, output_path) + @root CDSAPI.retrieve(cds_product(meta.dataset), request, output_path) return output_path end @@ -91,14 +91,14 @@ function download_dataset(metadata::ERA5Metadata; skip_existing=true, cleanup=tr for d in unique(Dates.Date.(dates))) for day in sort(collect(keys(grouped))) - _download_era5_day(metadata.name, metadata.dataset, grouped[day]; + download_era5_day(metadata.name, metadata.dataset, grouped[day]; bounding_box = metadata.bounding_box, dir = metadata.dir, skip_existing, cleanup) end end -function _download_era5_day(name, dataset, day_dates; +function download_era5_day(name, dataset, day_dates; bounding_box, dir, skip_existing, cleanup) MDatum = NumericalEarth.DataWrangling.Metadatum @@ -121,7 +121,7 @@ function _download_era5_day(name, dataset, day_dates; request = Dict( "product_type" => ["reanalysis"], - "variable" => [_cds_varnames(dataset)[name]], + "variable" => [cds_varnames(dataset)[name]], "year" => [year], "month" => [month], "day" => [day], @@ -130,20 +130,20 @@ function _download_era5_day(name, dataset, day_dates; "download_format" => "unarchived", ) - _extra_request_keys!(request, dataset) + extra_request_keys!(request, dataset) area = build_era5_area(bounding_box) isnothing(area) || (request["area"] = area) mkpath(dir) tmp_path = joinpath(dir, "_tmp_$(year)$(month)$(day).nc") - nc_varname = _nc_varnames(dataset)[name] + nc_varname = nc_varnames(dataset)[name] nc_triples = [(nc_varname, dt_to_tidx[dt], path) for (dt, path) in pending] time_dimnames = Set(["time", "valid_time"]) @root begin - CDSAPI.retrieve(_cds_product(dataset), request, tmp_path) - _split_era5_nc_multistep(tmp_path, nc_triples, _coord_vars(dataset), time_dimnames) + CDSAPI.retrieve(cds_product(dataset), request, tmp_path) + split_era5_nc_multistep(tmp_path, nc_triples, coord_vars(dataset), time_dimnames) cleanup && rm(tmp_path; force=true) end @@ -192,7 +192,7 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk isempty(pending) && return [path for (_, path) in name_path_pairs] - cds_vars = unique([_cds_varnames(meta.dataset)[name] for (name, _) in pending]) + cds_vars = unique([cds_varnames(meta.dataset)[name] for (name, _) in pending]) date = meta.dates year = string(Dates.year(date)) @@ -211,18 +211,18 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk "download_format" => "unarchived", ) - _extra_request_keys!(request, meta.dataset) + extra_request_keys!(request, meta.dataset) area = build_era5_area(meta.bounding_box) isnothing(area) || (request["area"] = area) mkpath(meta.dir) tmp_path = joinpath(meta.dir, "_tmp_multi_$(year)$(month)$(day)T$(hour[1:2]).nc") - nc_name_path_pairs = [(_nc_varnames(meta.dataset)[name], path) for (name, path) in pending] + nc_name_path_pairs = [(nc_varnames(meta.dataset)[name], path) for (name, path) in pending] @root begin - CDSAPI.retrieve(_cds_product(meta.dataset), request, tmp_path) - _split_era5_nc(tmp_path, nc_name_path_pairs, _coord_vars(meta.dataset)) + CDSAPI.retrieve(cds_product(meta.dataset), request, tmp_path) + split_era5_nc(tmp_path, nc_name_path_pairs, coord_vars(meta.dataset)) rm(tmp_path; force=true) end @@ -264,7 +264,7 @@ function download_dataset(names::Vector{Symbol}, for d in unique(Dates.Date.(datetimes))) for day in sort(collect(keys(grouped))) - _download_era5_multivar_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing, cleanup) + download_era5_multivar_day(names, dataset, grouped[day]; bounding_box, dir, skip_existing, cleanup) end return nothing @@ -280,7 +280,7 @@ function download_dataset(name::Symbol, return download_dataset([name], dataset, datetimes; bounding_box, dir, skip_existing, cleanup) end -function _download_era5_multivar_day(names, dataset, day_dates; +function download_era5_multivar_day(names, dataset, day_dates; bounding_box, dir, skip_existing, cleanup) MDatum = NumericalEarth.DataWrangling.Metadatum @@ -292,7 +292,7 @@ function _download_era5_multivar_day(names, dataset, day_dates; pending = skip_existing ? filter(((_, _, p),) -> !isfile(p), all_triples) : all_triples isempty(pending) && return nothing - cds_vars = unique([_cds_varnames(dataset)[name] for (name, _, _) in pending]) + cds_vars = unique([cds_varnames(dataset)[name] for (name, _, _) in pending]) sorted_dts = sort(unique([dt for (_, dt, _) in pending])) hours_str = [lpad(string(Dates.hour(dt)), 2, '0') * ":00" for dt in sorted_dts] dt_to_tidx = Dict(dt => i for (i, dt) in enumerate(sorted_dts)) @@ -313,20 +313,20 @@ function _download_era5_multivar_day(names, dataset, day_dates; "download_format" => "unarchived", ) - _extra_request_keys!(request, dataset) + extra_request_keys!(request, dataset) area = build_era5_area(bounding_box) isnothing(area) || (request["area"] = area) mkpath(dir) tmp_path = joinpath(dir, "_tmp_multi_$(year)$(month)$(day).nc") - nc_triples = [(_nc_varnames(dataset)[name], dt_to_tidx[dt], path) + nc_triples = [(nc_varnames(dataset)[name], dt_to_tidx[dt], path) for (name, dt, path) in pending] time_dimnames = Set(["time", "valid_time"]) @root begin - CDSAPI.retrieve(_cds_product(dataset), request, tmp_path) - _split_era5_nc_multistep(tmp_path, nc_triples, _coord_vars(dataset), time_dimnames) + CDSAPI.retrieve(cds_product(dataset), request, tmp_path) + split_era5_nc_multistep(tmp_path, nc_triples, coord_vars(dataset), time_dimnames) cleanup && rm(tmp_path; force=true) end @@ -338,11 +338,11 @@ end ##### """ - _split_era5_nc(src_path, nc_name_path_pairs, coord_vars) + split_era5_nc(src_path, nc_name_path_pairs, coord_vars) Split a multi-variable NetCDF into individual per-variable files (single time step). """ -function _split_era5_nc(src_path, nc_name_path_pairs, coord_vars) +function split_era5_nc(src_path, nc_name_path_pairs, coord_vars) NCDatasets.Dataset(src_path, "r") do src for (nc_varname, dst_path) in nc_name_path_pairs NCDatasets.Dataset(dst_path, "c") do dst @@ -357,7 +357,7 @@ function _split_era5_nc(src_path, nc_name_path_pairs, coord_vars) for (vname, var) in src (vname in coord_vars || vname == nc_varname) || continue - _ncvar_copy!(dst, var, vname) + ncvar_copy!(dst, var, vname) end end end @@ -365,12 +365,12 @@ function _split_era5_nc(src_path, nc_name_path_pairs, coord_vars) end """ - _split_era5_nc_multistep(src_path, triples, coord_vars, time_dimnames) + split_era5_nc_multistep(src_path, triples, coord_vars, time_dimnames) Split a multi-timestep NetCDF into individual per-variable, per-timestep files. `triples` is a vector of `(nc_varname, time_index, dst_path)`. """ -function _split_era5_nc_multistep(src_path, nc_varname_tidx_path_triples, coord_vars, time_dimnames) +function split_era5_nc_multistep(src_path, nc_varname_tidx_path_triples, coord_vars, time_dimnames) NCDatasets.Dataset(src_path, "r") do src unlimited = NCDatasets.unlimited(src) @@ -388,14 +388,14 @@ function _split_era5_nc_multistep(src_path, nc_varname_tidx_path_triples, coord_ for (vname, var) in src (vname in coord_vars || vname == nc_varname) || continue - _ncvar_copy_tslice!(dst, var, vname, tidx, time_dimnames) + ncvar_copy_tslice!(dst, var, vname, tidx, time_dimnames) end end end end end -function _ncvar_copy!(dst, src_var, vname) +function ncvar_copy!(dst, src_var, vname) dims = NCDatasets.dimnames(src_var) T = eltype(src_var.var) attribs = src_var.attrib @@ -414,7 +414,7 @@ function _ncvar_copy!(dst, src_var, vname) return nothing end -function _ncvar_copy_tslice!(dst, src_var, vname, tidx, time_dimnames) +function ncvar_copy_tslice!(dst, src_var, vname, tidx, time_dimnames) dims = NCDatasets.dimnames(src_var) T = eltype(src_var.var) attribs = src_var.attrib From b6baf6473d69744d2975d4b6e7f5e90b354725ee Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 18 Mar 2026 22:37:41 -0600 Subject: [PATCH 33/48] Aesthetics --- src/DataWrangling/ERA5/ERA5_pressure_levels.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index 4acec6639..3159d6757 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -269,7 +269,7 @@ function pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3 field = CenterField(grid) reversed_levels = sort(metadata.dataset.pressure_levels, rev=true) # highest pressure → k=1 for (k, p) in enumerate(reversed_levels) - interior(field)[:, :, k] .= Float32(p) + interior(field, :, :, k) .= Float32(p) end fill_halo_regions!(field) return field From 7cc877e7a1f62cec01f06256a323fda8a468e875 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Mon, 23 Mar 2026 12:58:05 -0600 Subject: [PATCH 34/48] Handle download of instantaneous and accumulated/mean quantities --- ext/NumericalEarthCDSAPIExt.jl | 22 ++++++++++++++- src/DataWrangling/ERA5/ERA5_single_levels.jl | 29 +++++++++++++++++--- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index 7816be62a..04c2a0d3e 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -9,7 +9,8 @@ using Oceananigans.DistributedComputations: @root using Dates using NumericalEarth.DataWrangling.ERA5: ERA5Dataset, ERA5Metadata, ERA5Metadatum, - ERA5_dataset_variable_names, ERA5_netcdf_variable_names + ERA5_dataset_variable_names, ERA5_netcdf_variable_names, + ERA5_single_level_accumulated_variables using NumericalEarth.DataWrangling.ERA5: ERA5PressureLevelsDataset, ERA5PressureMetadata, ERA5PressureMetadatum, ERA5PL_dataset_variable_names, ERA5PL_netcdf_variable_names @@ -280,9 +281,28 @@ function download_dataset(name::Symbol, return download_dataset([name], dataset, datetimes; bounding_box, dir, skip_existing, cleanup) end +is_accumulated(name::Symbol) = name in ERA5_single_level_accumulated_variables + function download_era5_multivar_day(names, dataset, day_dates; bounding_box, dir, skip_existing, cleanup) + # Split accumulated and instantaneous variables into separate requests + # to avoid CDS returning a zipfile with multiple NetCDF files + accum_names = filter(is_accumulated, names) + instant_names = filter(!is_accumulated, names) + + for name_group in (accum_names, instant_names) + isempty(name_group) && continue + download_era5_multivar_day_batch(name_group, dataset, day_dates; + bounding_box, dir, skip_existing, cleanup) + end + + return nothing +end + +function download_era5_multivar_day_batch(names, dataset, day_dates; + bounding_box, dir, skip_existing, cleanup) + MDatum = NumericalEarth.DataWrangling.Metadatum meta_path = NumericalEarth.DataWrangling.metadata_path diff --git a/src/DataWrangling/ERA5/ERA5_single_levels.jl b/src/DataWrangling/ERA5/ERA5_single_levels.jl index 6a7f8c49c..77bf5e36a 100644 --- a/src/DataWrangling/ERA5/ERA5_single_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_single_levels.jl @@ -10,6 +10,21 @@ const ERA5_wave_variables = Set([ :significant_wave_height, :mean_wave_period, :mean_wave_direction, ]) +# Mean rate / accumulated variables (CDS "step type" = accum). +# All other single-level variables are instantaneous. +# See ECMWF ERA5 documentation, Tables 3 and 4: +# https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#ERA5:datadocumentation-Meanrates/fluxesandaccumulations +const ERA5_single_level_accumulated_variables = Set([ + :total_precipitation, + :mean_surface_sensible_heat_flux, + :mean_surface_latent_heat_flux, + :mean_surface_momentum_flux_x, + :mean_surface_momentum_flux_y, + :downwelling_shortwave_radiation, + :downwelling_longwave_radiation, + :evaporation, +]) + ##### ##### ERA5 single-level data availability ##### @@ -42,9 +57,11 @@ ERA5_dataset_variable_names = Dict( :surface_pressure => "surface_pressure", :mean_sea_level_pressure => "mean_sea_level_pressure", :total_precipitation => "total_precipitation", + :mean_surface_momentum_flux_x => "mean_eastward_turbulent_surface_stress", + :mean_surface_momentum_flux_y => "mean_northward_turbulent_surface_stress", :sea_surface_temperature => "sea_surface_temperature", - :surface_sensible_heat_flux => "surface_sensible_heat_flux", - :surface_latent_heat_flux => "surface_latent_heat_flux", + :mean_surface_sensible_heat_flux => "mean_surface_sensible_heat_flux", + :mean_surface_latent_heat_flux => "mean_surface_latent_heat_flux", :downwelling_shortwave_radiation => "surface_solar_radiation_downwards", :downwelling_longwave_radiation => "surface_thermal_radiation_downwards", :total_cloud_cover => "total_cloud_cover", @@ -59,6 +76,8 @@ ERA5_dataset_variable_names = Dict( # NetCDF short variable names (what's actually in the downloaded files) # These differ from the CDS API variable names above +# The expected shortName (see https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#heading-Parameterlistings) +# did not always match the netcdf variable names. In those cases, the longName and units were manually verified. ERA5_netcdf_variable_names = Dict( :temperature => "t2m", :dewpoint_temperature => "d2m", @@ -67,9 +86,11 @@ ERA5_netcdf_variable_names = Dict( :surface_pressure => "sp", :mean_sea_level_pressure => "msl", :total_precipitation => "tp", + :mean_surface_momentum_flux_x => "avg_iews", # shortName: metss + :mean_surface_momentum_flux_y => "avg_inss", # shortName: mntss :sea_surface_temperature => "sst", - :surface_sensible_heat_flux => "sshf", - :surface_latent_heat_flux => "slhf", + :mean_surface_sensible_heat_flux => "avg_ishf", # shortName: msshf + :mean_surface_latent_heat_flux => "avg_slhtf", # shortName: mslhf :downwelling_shortwave_radiation => "ssrd", :downwelling_longwave_radiation => "strd", :total_cloud_cover => "tcc", From 567101b2c91cdf33298633430bb324e1fea7899d Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 24 Mar 2026 09:05:08 -0600 Subject: [PATCH 35/48] Remove redundant duplicate date when requesting ERA5 single-level hourly data --- src/DataWrangling/ERA5/ERA5.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 57b31c2c3..1a2e3f910 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -116,7 +116,11 @@ function metadata_prefix(dataset::ERA5Dataset, name, date, bounding_box) suffix = "" end - prefix = string(var, "_", ds, "_", start_date, "_", end_date, suffix) + if start_date == end_date + prefix = string(var, "_", ds, "_", start_date, suffix) + else + prefix = string(var, "_", ds, "_", start_date, "_", end_date, suffix) + end prefix = colon2dash(prefix) prefix = underscore_spaces(prefix) return prefix From 48a6b3e9ccf1192b66e7a58c0358114360d7b988 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 24 Mar 2026 09:10:38 -0600 Subject: [PATCH 36/48] =?UTF-8?q?Add=20mean=20evaporation=20rate=20aka=20"?= =?UTF-8?q?time-mean=20moisture=20flux"=20[kg=20m=E2=81=BB=C2=B2=20s?= =?UTF-8?q?=E2=81=BB=C2=B9]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/DataWrangling/ERA5/ERA5_single_levels.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5_single_levels.jl b/src/DataWrangling/ERA5/ERA5_single_levels.jl index 77bf5e36a..d53bd0fa6 100644 --- a/src/DataWrangling/ERA5/ERA5_single_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_single_levels.jl @@ -23,6 +23,7 @@ const ERA5_single_level_accumulated_variables = Set([ :downwelling_shortwave_radiation, :downwelling_longwave_radiation, :evaporation, + :mean_evaporation_rate, ]) ##### @@ -66,6 +67,7 @@ ERA5_dataset_variable_names = Dict( :downwelling_longwave_radiation => "surface_thermal_radiation_downwards", :total_cloud_cover => "total_cloud_cover", :evaporation => "evaporation", + :mean_evaporation_rate => "mean_evaporation_rate", :specific_humidity => "specific_humidity", :eastward_stokes_drift => "u_component_stokes_drift", :northward_stokes_drift => "v_component_stokes_drift", @@ -75,9 +77,9 @@ ERA5_dataset_variable_names = Dict( ) # NetCDF short variable names (what's actually in the downloaded files) -# These differ from the CDS API variable names above -# The expected shortName (see https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#heading-Parameterlistings) -# did not always match the netcdf variable names. In those cases, the longName and units were manually verified. +# - These differ from the CDS API variable names above +# - The expected "shortName" (see https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation#heading-Parameterlistings) +# did not always match the netcdf variable names?! In those cases, the longName and units were manually verified. ERA5_netcdf_variable_names = Dict( :temperature => "t2m", :dewpoint_temperature => "d2m", @@ -95,6 +97,7 @@ ERA5_netcdf_variable_names = Dict( :downwelling_longwave_radiation => "strd", :total_cloud_cover => "tcc", :evaporation => "e", + :mean_evaporation_rate => "avg_ie", # shortName: mer :specific_humidity => "q", :eastward_stokes_drift => "ust", :northward_stokes_drift => "vst", From 0eb2c78fc7a3cc73c151d5518b545b73e6de605e Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 25 Mar 2026 13:45:49 -0600 Subject: [PATCH 37/48] Most robustly handle situations with multiple data types in one request including instantaneous, accumulated, and averaged --- ext/NumericalEarthCDSAPIExt.jl | 72 ++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/ext/NumericalEarthCDSAPIExt.jl b/ext/NumericalEarthCDSAPIExt.jl index 04c2a0d3e..98940b16c 100644 --- a/ext/NumericalEarthCDSAPIExt.jl +++ b/ext/NumericalEarthCDSAPIExt.jl @@ -9,8 +9,7 @@ using Oceananigans.DistributedComputations: @root using Dates using NumericalEarth.DataWrangling.ERA5: ERA5Dataset, ERA5Metadata, ERA5Metadatum, - ERA5_dataset_variable_names, ERA5_netcdf_variable_names, - ERA5_single_level_accumulated_variables + ERA5_dataset_variable_names, ERA5_netcdf_variable_names using NumericalEarth.DataWrangling.ERA5: ERA5PressureLevelsDataset, ERA5PressureMetadata, ERA5PressureMetadatum, ERA5PL_dataset_variable_names, ERA5PL_netcdf_variable_names @@ -49,6 +48,40 @@ function extra_request_keys!(request, ds::ERA5PressureLevelsDataset) request["pressure_level"] = [string(p) for p in p_hPa] end +##### +##### ZIP detection — CDS returns a ZIP when mixing step types (inst/accum/avg) +##### + +const ZIP_MAGIC = UInt8[0x50, 0x4b, 0x03, 0x04] + +function is_zip(path) + open(path, "r") do io + magic = read(io, 4) + return length(magic) >= 4 && magic == ZIP_MAGIC + end +end + +""" + foreach_nc(f, download_path, cleanup_dir) + +If `download_path` is a ZIP archive (as CDS returns when mixing variable step types), +extract all NetCDF files and call `f(nc_path)` on each. Otherwise call `f` directly +on `download_path`. +""" +function foreach_nc(f, download_path, cleanup_dir) + if is_zip(download_path) + tmp_dir = mktempdir(cleanup_dir) + run(`unzip -qo $download_path -d $tmp_dir`) + nc_files = filter(p -> endswith(p, ".nc"), readdir(tmp_dir; join=true)) + for nc_file in nc_files + f(nc_file) + end + rm(tmp_dir; recursive=true, force=true) + else + f(download_path) + end +end + ##### ##### Single-date download ##### @@ -144,7 +177,9 @@ function download_era5_day(name, dataset, day_dates; @root begin CDSAPI.retrieve(cds_product(dataset), request, tmp_path) - split_era5_nc_multistep(tmp_path, nc_triples, coord_vars(dataset), time_dimnames) + foreach_nc(tmp_path, dir) do nc_path + split_era5_nc_multistep(nc_path, nc_triples, coord_vars(dataset), time_dimnames) + end cleanup && rm(tmp_path; force=true) end @@ -223,7 +258,9 @@ function download_dataset(names::Vector{Symbol}, meta::ERA5PressureMetadatum; sk @root begin CDSAPI.retrieve(cds_product(meta.dataset), request, tmp_path) - split_era5_nc(tmp_path, nc_name_path_pairs, coord_vars(meta.dataset)) + foreach_nc(tmp_path, meta.dir) do nc_path + split_era5_nc(nc_path, nc_name_path_pairs, coord_vars(meta.dataset)) + end rm(tmp_path; force=true) end @@ -281,28 +318,9 @@ function download_dataset(name::Symbol, return download_dataset([name], dataset, datetimes; bounding_box, dir, skip_existing, cleanup) end -is_accumulated(name::Symbol) = name in ERA5_single_level_accumulated_variables - function download_era5_multivar_day(names, dataset, day_dates; bounding_box, dir, skip_existing, cleanup) - # Split accumulated and instantaneous variables into separate requests - # to avoid CDS returning a zipfile with multiple NetCDF files - accum_names = filter(is_accumulated, names) - instant_names = filter(!is_accumulated, names) - - for name_group in (accum_names, instant_names) - isempty(name_group) && continue - download_era5_multivar_day_batch(name_group, dataset, day_dates; - bounding_box, dir, skip_existing, cleanup) - end - - return nothing -end - -function download_era5_multivar_day_batch(names, dataset, day_dates; - bounding_box, dir, skip_existing, cleanup) - MDatum = NumericalEarth.DataWrangling.Metadatum meta_path = NumericalEarth.DataWrangling.metadata_path @@ -346,7 +364,9 @@ function download_era5_multivar_day_batch(names, dataset, day_dates; @root begin CDSAPI.retrieve(cds_product(dataset), request, tmp_path) - split_era5_nc_multistep(tmp_path, nc_triples, coord_vars(dataset), time_dimnames) + foreach_nc(tmp_path, dir) do nc_path + split_era5_nc_multistep(nc_path, nc_triples, coord_vars(dataset), time_dimnames) + end cleanup && rm(tmp_path; force=true) end @@ -364,7 +384,9 @@ Split a multi-variable NetCDF into individual per-variable files (single time st """ function split_era5_nc(src_path, nc_name_path_pairs, coord_vars) NCDatasets.Dataset(src_path, "r") do src + src_varnames = Set(keys(src)) for (nc_varname, dst_path) in nc_name_path_pairs + nc_varname in src_varnames || continue NCDatasets.Dataset(dst_path, "c") do dst unlimited = NCDatasets.unlimited(src) for (dname, dlen) in src.dim @@ -392,9 +414,11 @@ Split a multi-timestep NetCDF into individual per-variable, per-timestep files. """ function split_era5_nc_multistep(src_path, nc_varname_tidx_path_triples, coord_vars, time_dimnames) NCDatasets.Dataset(src_path, "r") do src + src_varnames = Set(keys(src)) unlimited = NCDatasets.unlimited(src) for (nc_varname, tidx, dst_path) in nc_varname_tidx_path_triples + nc_varname in src_varnames || continue NCDatasets.Dataset(dst_path, "c") do dst for (dname, dlen) in src.dim out_len = dname in time_dimnames ? 1 : From 7b7d1c7c84f4ecb7aa220627552e8c4b99303cfd Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 25 Mar 2026 16:11:19 -0600 Subject: [PATCH 38/48] Better handle geopotential height variability --- src/DataWrangling/metadata_field.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/DataWrangling/metadata_field.jl b/src/DataWrangling/metadata_field.jl index 7529bb836..68c8af7f4 100644 --- a/src/DataWrangling/metadata_field.jl +++ b/src/DataWrangling/metadata_field.jl @@ -176,8 +176,13 @@ function set!(target_field::Field, metadata::Metadatum; kw...) Lzt = grid.Lz Lzm = meta_field.grid.Lz - - if Lzt > Lzm + + # Allow up to 1% vertical mismatch. For pressure-level datasets with time-varying + # geopotential heights, the per-timestep vertical extent can be slightly smaller + # than the temporal-mean extent used for the target grid (e.g. when the atmosphere + # is compressed). Oceananigans' interpolate! does not extrapolate, so target points + # just outside the source domain will use the nearest interior values. + if Lzt > Lzm * (1 + 1e-2) throw("The vertical range of the $(metadata.dataset) dataset ($(Lzm) m) is smaller than " * "the target grid ($(Lzt) m). Some vertical levels cannot be filled with data.") end From ae169b9aa62bdc9df398709da72dcad1ba50823a Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Thu, 26 Mar 2026 00:17:35 -0600 Subject: [PATCH 39/48] Allow use of pre-downloaded ERA5 data without depending on CDSAPI --- src/DataWrangling/DataWrangling.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/DataWrangling/DataWrangling.jl b/src/DataWrangling/DataWrangling.jl index 528a9650f..70c88711f 100644 --- a/src/DataWrangling/DataWrangling.jl +++ b/src/DataWrangling/DataWrangling.jl @@ -177,6 +177,19 @@ Arguments https://github.com/CliMA/NumericalEarth.jl/blob/main/src/DataWrangling/ECCO/README.md """ function download_dataset end # methods specific to datasets are added within each dataset module + +# Fallback: if no download extension is loaded, check that all files already exist +function download_dataset(metadata::Metadata) + paths = metadata_path(metadata) + paths isa AbstractString && (paths = [paths]) + missing_files = filter(!isfile, paths) + if !isempty(missing_files) + n = length(missing_files) + error("No download method is available (is the backend package loaded?) " * + "and $n data file(s) are missing. First missing: $(first(missing_files))") + end + return paths +end function inpainted_metadata_path end """ From 4afc7290e8e71e7801badc1265ec56364154cd40 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Fri, 27 Mar 2026 21:01:47 -0600 Subject: [PATCH 40/48] Fix bug introduced by prev commit; add additional era5 vars --- src/DataWrangling/DataWrangling.jl | 26 +++++++++---------- .../ERA5/ERA5_pressure_levels.jl | 4 +++ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/DataWrangling/DataWrangling.jl b/src/DataWrangling/DataWrangling.jl index 70c88711f..39e57321b 100644 --- a/src/DataWrangling/DataWrangling.jl +++ b/src/DataWrangling/DataWrangling.jl @@ -177,19 +177,6 @@ Arguments https://github.com/CliMA/NumericalEarth.jl/blob/main/src/DataWrangling/ECCO/README.md """ function download_dataset end # methods specific to datasets are added within each dataset module - -# Fallback: if no download extension is loaded, check that all files already exist -function download_dataset(metadata::Metadata) - paths = metadata_path(metadata) - paths isa AbstractString && (paths = [paths]) - missing_files = filter(!isfile, paths) - if !isempty(missing_files) - n = length(missing_files) - error("No download method is available (is the backend package loaded?) " * - "and $n data file(s) are missing. First missing: $(first(missing_files))") - end - return paths -end function inpainted_metadata_path end """ @@ -250,4 +237,17 @@ using .ORCA using .WOA using .JRA55 +# Fallback: if no download extension is loaded, check that all files already exist +function download_dataset(metadata::Metadata) + paths = metadata_path(metadata) + paths isa AbstractString && (paths = [paths]) + missing_files = filter(!isfile, paths) + if !isempty(missing_files) + n = length(missing_files) + error("No download method is available (is the backend package loaded?) " * + "and $n data file(s) are missing. First missing: $(first(missing_files))") + end + return paths +end + end # module diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index 3159d6757..84715f19c 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -68,6 +68,8 @@ ERA5PL_dataset_variable_names = Dict( :fraction_of_cloud_cover => "fraction_of_cloud_cover", :specific_cloud_liquid_water_content => "specific_cloud_liquid_water_content", :specific_cloud_ice_water_content => "specific_cloud_ice_water_content", + :specific_rain_water_content => "specific_rain_water_content", + :specific_snow_water_content => "specific_snow_water_content", ) # NetCDF short variable names (what's actually in the downloaded files) @@ -88,6 +90,8 @@ ERA5PL_netcdf_variable_names = Dict( :fraction_of_cloud_cover => "cc", :specific_cloud_liquid_water_content => "clwc", :specific_cloud_ice_water_content => "ciwc", + :specific_rain_water_content => "crwc", + :specific_snow_water_content => "cswc", ) # Variables available for download From 0960d35c3322facb3a2b3d4d67970b6e0fa56901 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Tue, 21 Apr 2026 16:38:39 -0600 Subject: [PATCH 41/48] Minor cleanup --- src/DataWrangling/ERA5/ERA5_pressure_levels.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index 84715f19c..1deaaa00b 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -38,10 +38,9 @@ all_dates(::ERA5MonthlyPressureLevels, var) = range(DateTime("1940-01-01"), stop # ERA5 pressure-level data is a spatially 3-D dataset is_three_dimensional(::ERA5PressureMetadata) = true -const hPa = 100.0 const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 150, 175, 200, 225, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 775, 800, - 825, 850, 875, 900, 925, 950, 975, 1000]*hPa + 825, 850, 875, 900, 925, 950, 975, 1000] * 100 # hPa -> Pa # ERA5 stores pressure levels bottom-to-top reversed_vertical_axis(::ERA5PressureLevelsDataset) = false @@ -155,11 +154,11 @@ end const ERA5_gravitational_acceleration = 9.80665 -# International Standard Atmosphere height (m) for a given pressure (hPa) +# International Standard Atmosphere height (m) for a given pressure function standard_atmosphere_geopotential_height(p) g = ERA5_gravitational_acceleration T⁰ = 288.15 # K - p⁰ = 1013.25 * hPa + p⁰ = 101325 Rᵈ = 287.0528 # J/(kg-K) return (Rᵈ * T⁰ / g) * log(p⁰ / p) From 50cc57dc0bb177f846cd8936e552f3f651a8ce42 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 29 Apr 2026 12:59:43 -0600 Subject: [PATCH 42/48] Fix ERA5 Column grid test to use renamed type Upstream's column-field test used `ERA5Hourly`, which this branch renamed to `ERA5HourlySingleLevel` (reserving the simpler name for the abstract dataset family). Update the test accordingly. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/test_column_field.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_column_field.jl b/test/test_column_field.jl index ea33bad6c..8a9b51695 100644 --- a/test/test_column_field.jl +++ b/test/test_column_field.jl @@ -212,7 +212,7 @@ end @testset "ERA5 Column grid" begin col = Column(200.0, 35.0) - md = Metadatum(:temperature; dataset=ERA5Hourly(), + md = Metadatum(:temperature; dataset=ERA5HourlySingleLevel(), date=DateTime(2020, 1, 1), region=col) grid = native_grid(md) From bb099f0ef4ad7a10985a6d0bf52bc6706bb4cf2a Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 29 Apr 2026 14:51:12 -0600 Subject: [PATCH 43/48] Simplify pressure_field to a reduced column Field Per @glwagner's review: a `Field{Nothing, Nothing, Center}` represents the same per-level pressure values as the previous `CenterField` but without copying across the full horizontal grid. Use `set!` for the column assignment, drop the per-k loop, and let the field's eltype follow the grid (no more hardcoded `Float32`). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/DataWrangling/ERA5/ERA5.jl | 4 ++-- src/DataWrangling/ERA5/ERA5_pressure_levels.jl | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl index 5af969520..610655d52 100644 --- a/src/DataWrangling/ERA5/ERA5.jl +++ b/src/DataWrangling/ERA5/ERA5.jl @@ -12,8 +12,8 @@ using Printf using Scratch using Statistics -using Oceananigans.Fields: Center -using Oceananigans: CenterField, interior, fill_halo_regions!, CPU +using Oceananigans.Fields: Center, set! +using Oceananigans: Field, fill_halo_regions!, CPU using NumericalEarth.DataWrangling: Metadata, Metadatum, metadata_path, native_grid, InverseGravity, download_dataset using Dates using Dates: DateTime, Day, Month, Hour diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index 68c21a79f..2cd3164e9 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -263,17 +263,16 @@ end """ pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) -Return a `CenterField` on the native grid of `metadata` filled with the pressure -value (Pa) at each vertical level. Levels are ordered bottom-to-top (k=1 is the -highest pressure level). +Return a `Field{Nothing, Nothing, Center}` on the native grid of `metadata` +holding the pressure value (Pa) at each vertical level. Levels are ordered +bottom-to-top (k=1 is the highest pressure level). The `Nothing` horizontal +locations make this field broadcast against full 3-D fields without copying. """ function pressure_field(metadata::ERA5PressureMetadatum, arch=CPU(); halo=(3,3,3)) grid = native_grid(metadata, arch; halo) - field = CenterField(grid) + field = Field{Nothing, Nothing, Center}(grid) reversed_levels = sort(metadata.dataset.pressure_levels, rev=true) # highest pressure → k=1 - for (k, p) in enumerate(reversed_levels) - interior(field, :, :, k) .= Float32(p) - end + set!(field, reversed_levels) fill_halo_regions!(field) return field end From 575b52d943ac43c5ad408b30d0e25d289b7420d6 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 29 Apr 2026 16:01:53 -0600 Subject: [PATCH 44/48] Temporarily define hPa --- src/DataWrangling/ERA5/ERA5_pressure_levels.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index 2cd3164e9..aab2567df 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -38,9 +38,12 @@ all_dates(::ERA5MonthlyPressureLevels, var) = range(DateTime("1940-01-01"), stop # ERA5 pressure-level data is a spatially 3-D dataset is_three_dimensional(::ERA5PressureMetadata) = true +# TODO: drop once Oceananigans.Units exports `hPa` in a tagged release +const hPa = 100 + const ERA5_all_pressure_levels = [1, 2, 3, 5, 7, 10, 20, 30, 50, 70, 100, 125, 150, 175, 200, 225, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 775, 800, - 825, 850, 875, 900, 925, 950, 975, 1000] * 100 # hPa -> Pa + 825, 850, 875, 900, 925, 950, 975, 1000]hPa # ERA5 stores pressure levels bottom-to-top reversed_vertical_axis(::ERA5PressureLevelsDataset) = false From a597c60ab8289e7ed03645a959a638e75250ec1d Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 29 Apr 2026 16:39:02 -0600 Subject: [PATCH 45/48] Cleanup CDS download test --- src/DataWrangling/ERA5/ERA5_pressure_levels.jl | 2 -- test/test_cds_downloading.jl | 12 ++++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl index aab2567df..beac2ab74 100644 --- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl +++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl @@ -170,8 +170,6 @@ end # Build z-interfaces (Nz+1 values) from pressure levels. # Levels may be in any order; output is sorted so k=1 is highest pressure (lowest altitude). function standard_atmosphere_z_interfaces(levels) - @info "Calculating z-interfaces based on the International Standard Atmosphere... \ - For greater accuracy, set ERA5PressureLevelsDataset(; mean_geopotential_height=true)" sorted_levels = sort(levels, rev=true) # highest pressure first → k=1 is bottom heights = standard_atmosphere_geopotential_height.(Float64.(sorted_levels)) Nz = length(heights) diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 8fbf8d9d6..55b4178cb 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -147,7 +147,7 @@ start_date = DateTime(2005, 2, 16, 12) @test Base.size(ds_full, :temperature) == (1440, 720, 37) # Subset constructor - ds_sub = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) + ds_sub = ERA5HourlyPressureLevels(pressure_levels=[850, 500]hPa) @test Base.size(ds_sub, :temperature) == (1440, 720, 2) # Monthly variant @@ -166,7 +166,7 @@ start_date = DateTime(2005, 2, 16, 12) end @testset "ERA5 pressure-level z_interfaces (standard atmosphere)" begin - levels_2 = [850, 500] .* hPa + levels_2 = [850, 500]hPa z = standard_atmosphere_z_interfaces(levels_2) @test length(z) == 3 # Nz+1 interfaces @test issorted(z) # monotonically increasing with altitude @@ -174,7 +174,7 @@ start_date = DateTime(2005, 2, 16, 12) @test z[1] < 1457.0 < z[2] < 5575.0 < z[3] # Single level - z1 = standard_atmosphere_z_interfaces([500] .* hPa) + z1 = standard_atmosphere_z_interfaces([500]hPa) @test length(z1) == 2 @test z1[1] < z1[2] end @@ -247,7 +247,7 @@ start_date = DateTime(2005, 2, 16, 12) @testset "ERA5 pressure-level download and Field on CPU" begin arch = CPU() - ds_pl = ERA5HourlyPressureLevels(pressure_levels=[850, 500] .* hPa) + ds_pl = ERA5HourlyPressureLevels(pressure_levels=[850, 500]hPa) @testset "Download and 3D Field" begin meta = Metadatum(:temperature; dataset=ds_pl, region, date=start_date) @@ -307,9 +307,9 @@ start_date = DateTime(2005, 2, 16, 12) @allowscalar begin # k=1 should be 850 hPa = 85000 Pa (highest pressure, lowest altitude) - @test interior(pf)[1, 1, 1] ≈ Float32(850 * hPa) + @test interior(pf)[1, 1, 1] ≈ Float32(850hPa) # k=2 should be 500 hPa = 50000 Pa - @test interior(pf)[1, 1, 2] ≈ Float32(500 * hPa) + @test interior(pf)[1, 1, 2] ≈ Float32(500hPa) end end end From c6653b239b8e5301131398798860ff1e2f95e0f2 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 29 Apr 2026 17:01:46 -0600 Subject: [PATCH 46/48] Skip CDS tests when neither credentials nor artifacts are available Two stopgaps to keep CI green when CDSAPI credentials aren't configured: - Surface-level tests: extend `download_dataset_with_fallback` with a fallback-of-the-fallback. If both CDS and the NumericalEarthArtifacts download fail (e.g., the artifact key was renamed when ERA5 dataset types were renamed), it now returns `false` instead of throwing. Callers `|| return` to skip the testset cleanly with an `@info` rather than cascading file-not-found errors. - Pressure-level tests: two subtests (`Download and 3D Field`, `Geopotential height conversion`) still call `download_dataset` directly without the fallback wrapper, so guard them with a credentials check that early-returns with an `@info`. TODO comment references the long-term fix (upload pressure-level test files to NumericalEarthArtifacts and switch to the fallback wrapper). Co-Authored-By: Claude Opus 4.7 (1M context) --- test/download_utils.jl | 19 +++++++++++++++---- test/test_cds_downloading.jl | 20 +++++++++++++++++--- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/test/download_utils.jl b/test/download_utils.jl index 23a98118e..dc041f2c0 100644 --- a/test/download_utils.jl +++ b/test/download_utils.jl @@ -25,20 +25,31 @@ function download_from_artifacts(filepaths::AbstractVector) end """ - download_dataset_with_fallback(download_fn; dataset_name="dataset") + download_dataset_with_fallback(download_fn, filepaths; dataset_name="dataset") Try `download_fn()`. If it throws, download the required files from NumericalEarthArtifacts and retry. Emits a CI warning when the fallback is used. -Returns the result of `download_fn()`. +Returns `true` when the data is available (either path succeeded), `false` when +neither path could obtain it (CDS unavailable AND artifact 404). Callers should +treat `false` as a signal to skip the rest of the testset. """ function download_dataset_with_fallback(download_fn, filepaths; dataset_name="dataset") try - return download_fn() + download_fn() + return true catch e @warn "Original download failed for $dataset_name, trying NumericalEarthArtifacts fallback..." exception=(e, catch_backtrace()) emit_ci_warning("Broken $dataset_name download", "Original source failed: $(sprint(showerror, e))") + end + + try download_from_artifacts(filepaths) - return download_fn() + download_fn() + return true + catch e + @info "Artifact fallback also failed for $dataset_name; skipping test" exception=e + emit_ci_warning("Missing $dataset_name artifact", "Both CDS and artifact fallback failed") + return false end end diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 55b4178cb..706352abe 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -12,6 +12,10 @@ using NumericalEarth.DataWrangling.ERA5: ERA5HourlyPressureLevels, ERA5MonthlyPr pressure_field using NumericalEarth.DataWrangling: metadata_path, download_dataset +# TODO: drop once test files are uploaded to NumericalEarthArtifacts and these +# subtests use download_dataset_with_fallback (matching the surface-level tests). +has_cds_credentials() = haskey(ENV, "CDSAPI_KEY") || isfile(joinpath(homedir(), ".cdsapirc")) + # Test date: Kyoto Protocol ratification date, February 16, 2005 start_date = DateTime(2005, 2, 16, 12) @@ -34,7 +38,7 @@ start_date = DateTime(2005, 2, 16, 12) # Download the data (falls back to NumericalEarthArtifacts if CDS is unreachable) download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do download_dataset(metadatum) - end + end || return @test isfile(filepath) # Verify the NetCDF file structure @@ -190,7 +194,7 @@ start_date = DateTime(2005, 2, 16, 12) filepath = metadata_path(metadatum) isfile(filepath) || download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do download_dataset(metadatum) - end + end || return # Create a Field from the downloaded data ψ = Field(metadatum, arch) @@ -219,7 +223,7 @@ start_date = DateTime(2005, 2, 16, 12) filepath = metadata_path(metadatum) isfile(filepath) || download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do download_dataset(metadatum) - end + end || return # Create a target grid matching the bounding box region grid = LatitudeLongitudeGrid(arch; @@ -250,6 +254,11 @@ start_date = DateTime(2005, 2, 16, 12) ds_pl = ERA5HourlyPressureLevels(pressure_levels=[850, 500]hPa) @testset "Download and 3D Field" begin + if !has_cds_credentials() + @info "Skipping pressure-level download test: no CDS credentials" + return + end + meta = Metadatum(:temperature; dataset=ds_pl, region, date=start_date) filepath = metadata_path(meta) isfile(filepath) && rm(filepath; force=true) @@ -280,6 +289,11 @@ start_date = DateTime(2005, 2, 16, 12) end @testset "Geopotential height conversion" begin + if !has_cds_credentials() + @info "Skipping geopotential height test: no CDS credentials" + return + end + meta_z = Metadatum(:geopotential_height; dataset=ds_pl, region, date=start_date) filepath = metadata_path(meta_z) isfile(filepath) && rm(filepath; force=true) From a133c354d059a02c78bfe6d131c63231eec6c84e Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 29 Apr 2026 20:03:48 -0600 Subject: [PATCH 47/48] Share CDS test fixtures across testsets to avoid redundant downloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface-level testsets each downloaded `2m_temperature` then removed it, forcing the next testset to re-download the same bytes — three CDS round-trips for one fixture. Keep the pre-clean only in the testset that's testing the download path itself; let downstream testsets reuse the file and run cleanup in the last consumer. Pressure-level "Geopotential height conversion" pre-cleaned the `geopotential_...nc` that the previous testset's `z_interfaces` side-effect leaves on disk, then re-downloaded it. Drop the pre-clean and the redundant explicit `download_dataset(meta_z)` call (Field() already downloads if needed). Net: surface round-trips drop 3 → 1, geopotential round-trips drop 2 → 1. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/test_cds_downloading.jl | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 706352abe..4208107da 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -70,8 +70,7 @@ start_date = DateTime(2005, 2, 16, 12) close(ds) - # Clean up - rm(filepath; force=true) + # Note: leave `filepath` in place; downstream surface-level testsets reuse it. end @testset "Availability of ERA5 variables" begin @@ -209,10 +208,7 @@ start_date = DateTime(2005, 2, 16, 12) @test !all(iszero, interior(ψ)) end - # Clean up - rm(filepath; force=true) - inpainted_path = NumericalEarth.DataWrangling.inpainted_metadata_path(metadatum) - isfile(inpainted_path) && rm(inpainted_path; force=true) + # Note: cleanup happens in the last surface-level testset below. end @testset "Setting a field from ERA5 metadata on $A" begin @@ -296,9 +292,9 @@ start_date = DateTime(2005, 2, 16, 12) meta_z = Metadatum(:geopotential_height; dataset=ds_pl, region, date=start_date) filepath = metadata_path(meta_z) - isfile(filepath) && rm(filepath; force=true) - download_dataset(meta_z) + # Field() downloads if needed; the file may already be on disk from + # the previous testset's z_interfaces side-effect. fz = Field(meta_z, arch) @allowscalar begin From 2bbc8fa6135edce6642c11aebcd2cc6d9990b9d9 Mon Sep 17 00:00:00 2001 From: Eliot Quon Date: Wed, 29 Apr 2026 20:20:29 -0600 Subject: [PATCH 48/48] Move skip-on-double-failure into opt-in try_download_or_skip helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit (c6653b23) folded the skip behavior directly into download_dataset_with_fallback by changing its return type from the result of download_fn() to a Bool. That broke other callers — JRA55 tests do `fts = download_dataset_with_fallback(...) do ... end; @test isfile(fts.path)`, which silently NPEs when fts becomes Bool. Restore download_dataset_with_fallback to its original semantics (returns download_fn() result, throws on double-failure). Add a new try_download_or_skip wrapper that catches the double-failure case, emits @info + CI warning, and returns false — used only by the CDS surface tests via `try_download_or_skip(...) do ... end || return`. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/download_utils.jl | 26 +++++++++++++++++--------- test/test_cds_downloading.jl | 15 +++++++++------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/test/download_utils.jl b/test/download_utils.jl index dc041f2c0..c0b34caa8 100644 --- a/test/download_utils.jl +++ b/test/download_utils.jl @@ -30,26 +30,34 @@ end Try `download_fn()`. If it throws, download the required files from NumericalEarthArtifacts and retry. Emits a CI warning when the fallback is used. -Returns `true` when the data is available (either path succeeded), `false` when -neither path could obtain it (CDS unavailable AND artifact 404). Callers should -treat `false` as a signal to skip the rest of the testset. +Returns the result of `download_fn()`. """ function download_dataset_with_fallback(download_fn, filepaths; dataset_name="dataset") try - download_fn() - return true + return download_fn() catch e @warn "Original download failed for $dataset_name, trying NumericalEarthArtifacts fallback..." exception=(e, catch_backtrace()) emit_ci_warning("Broken $dataset_name download", "Original source failed: $(sprint(showerror, e))") + download_from_artifacts(filepaths) + return download_fn() end +end +""" + try_download_or_skip(download_fn, filepaths; dataset_name="dataset") + +Like `download_dataset_with_fallback`, but if both the original source and the +artifact fallback fail, emit an `@info` and return `false` instead of throwing. +Returns `true` on success. Use as `try_download_or_skip(...) do ... end || return` +at testset scope to skip cleanly when test data is unobtainable. +""" +function try_download_or_skip(download_fn, filepaths; dataset_name="dataset") try - download_from_artifacts(filepaths) - download_fn() + download_dataset_with_fallback(download_fn, filepaths; dataset_name) return true catch e - @info "Artifact fallback also failed for $dataset_name; skipping test" exception=e - emit_ci_warning("Missing $dataset_name artifact", "Both CDS and artifact fallback failed") + @info "Skipping $dataset_name test: both CDS and artifact fallback failed" exception=e + emit_ci_warning("Missing $dataset_name", "Both CDS and artifact fallback failed") return false end end diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl index 4208107da..e75b0fad5 100644 --- a/test/test_cds_downloading.jl +++ b/test/test_cds_downloading.jl @@ -35,8 +35,9 @@ start_date = DateTime(2005, 2, 16, 12) filepath = metadata_path(metadatum) isfile(filepath) && rm(filepath; force=true) - # Download the data (falls back to NumericalEarthArtifacts if CDS is unreachable) - download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do + # Download the data (falls back to NumericalEarthArtifacts if CDS is unreachable; + # skips the testset cleanly if both fail). + try_download_or_skip(filepath; dataset_name="ERA5Hourly $variable") do download_dataset(metadatum) end || return @test isfile(filepath) @@ -189,9 +190,10 @@ start_date = DateTime(2005, 2, 16, 12) variable = :temperature metadatum = Metadatum(variable; dataset, region, date=start_date) - # Download if not present (falls back to NumericalEarthArtifacts if CDS is unreachable) + # Download if not present (falls back to NumericalEarthArtifacts if CDS is unreachable; + # skips the testset cleanly if both fail). filepath = metadata_path(metadatum) - isfile(filepath) || download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do + isfile(filepath) || try_download_or_skip(filepath; dataset_name="ERA5Hourly $variable") do download_dataset(metadatum) end || return @@ -215,9 +217,10 @@ start_date = DateTime(2005, 2, 16, 12) variable = :temperature metadatum = Metadatum(variable; dataset, region, date=start_date) - # Download if not present (falls back to NumericalEarthArtifacts if CDS is unreachable) + # Download if not present (falls back to NumericalEarthArtifacts if CDS is unreachable; + # skips the testset cleanly if both fail). filepath = metadata_path(metadatum) - isfile(filepath) || download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do + isfile(filepath) || try_download_or_skip(filepath; dataset_name="ERA5Hourly $variable") do download_dataset(metadatum) end || return