From 66cd84dac638f3780c274e08e1c975e9a16be737 Mon Sep 17 00:00:00 2001 From: Elynn Wu Date: Tue, 3 Feb 2026 12:50:50 -0800 Subject: [PATCH 1/3] rename to era5 standard --- .../data_process/compute_dataset_e3smv2.py | 8 +- .../configs/e3sm-1deg-8layer-v3.yaml | 84 ++++++++++++++++--- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/scripts/data_process/compute_dataset_e3smv2.py b/scripts/data_process/compute_dataset_e3smv2.py index edc49736a..930a653b0 100755 --- a/scripts/data_process/compute_dataset_e3smv2.py +++ b/scripts/data_process/compute_dataset_e3smv2.py @@ -82,7 +82,7 @@ "hybm", ], } - +GRAVITY = 9.80665 # m/s^2 def expand_names_by_level(variables: MutableMapping[str, List[str]]) -> List[str]: names = [] @@ -308,6 +308,10 @@ def compute_rad_fluxes( fluxes[output_name].attrs["units"] = ds[formula[1]].attrs["units"] return ds.assign(fluxes) +def sfc_phis_to_hgt(ds): + ds["HGTsfc"] = ds["PHIS"] / GRAVITY + ds = ds.drop_vars("PHIS") + return ds def construct_lazy_dataset( config: DatasetComputationConfig, @@ -422,6 +426,8 @@ def construct_lazy_dataset( "p_i pressure corresponds to the interface at the top of the i'th finite " "volume layer, counting down from the top of atmosphere." ) + ds = ds.rename(config.renaming) + ds = sfc_phis_to_hgt(ds) return ds diff --git a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml index 03d17309c..b6cb1e1ca 100644 --- a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml +++ b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml @@ -1,18 +1,14 @@ runs: - 2025-04-03-e3smv3-1deg-ramped: "" -data_output_directory: /pscratch/sd/r/rebassoo/fme-preprocess/zarr/ + 2025-04-03-e3smv3-1deg-ramped: +data_output_directory: /pscratch/sd/r/elynnwu/fme-preprocess/zarr/ stats: - output_directory: /pscratch/sd/r/rebassoo/fme-preprocess/2025-04-03-e3smv3-1deg-ramped - start_date: "1920-01-01" - end_date: "2019-12-31" + output_directory: /pscratch/sd/r/elynnwu/fme-dataset/2026-02-03-E3SMv3-AMIP-1970-2020-1deg-8layer + start_date: 1970-01-01 + end_date: 2019-12-31 data_type: E3SMV2 beaker_dataset: e3sm-1deg-8layers-stats # this is not used in e3sm data processing dataset_computation: - chunking: - time_dim: 10 - latitude_dim: 180 - longitude_dim: 360 reference_vertical_coordinate_file: None time_invariant_dir: /global/cfs/cdirs/m4331/jpduncan/e3smv2/time_invariant vertical_coarsening_indices: @@ -27,7 +23,24 @@ dataset_computation: - [61, 69] - [69, 80] roundtrip_fraction_kept: 1.0 - n_split: 1600 + roundtrip_variables: + - PS + - TS + - T + - U + - V + - Q + - CLDLIQ + - CLDICE + - RAINQM + - TMQ + - TGCLDLWP + - TGCLDIWP + - QREFHT + - TREFHT + - U10 + - V10 + n_split: 200 variable_sources: time_invariant: - PHIS @@ -47,6 +60,10 @@ dataset_computation: - OCNFRAC - LANDFRAC - ICEFRAC + - QREFHT + - TREFHT + - U10 + - V10 6hourly/1yr: - PRECT - LHFLX @@ -61,6 +78,53 @@ dataset_computation: - PRECSC - PRECSL - QFLX + renaming:: + PS: PRESsfc + LHFLX: LHTFLsfc + SHFLX: SHTFLsfc + surface_precipitation_rate: PRATEsfc + FSDS: DSWRFsfc + surface_upward_shortwave_flux: USWRFsfc + FLDS: DLWRFsfc + surface_upward_longwave_flux: ULWRFsfc + FLUT: ULWRFtoa + SOLIN: DSWRFtoa + top_of_atmos_upward_shortwave_flux: USWRFtoa + T_0: air_temperature_0 + T_1: air_temperature_1 + T_2: air_temperature_2 + T_3: air_temperature_3 + T_4: air_temperature_4 + T_5: air_temperature_5 + T_6: air_temperature_6 + T_7: air_temperature_7 + T_8: air_temperature_8 + LANDFRAC: land_fraction + OCNFRAC: ocean_fraction + ICEFRAC: sea_ice_fraction + TS: surface_temperature + U_0: eastward_wind_0 + U_1: eastward_wind_1 + U_2: eastward_wind_2 + U_3: eastward_wind_3 + U_4: eastward_wind_4 + U_5: eastward_wind_5 + U_6: eastward_wind_6 + U_7: eastward_wind_7 + U_8: eastward_wind_8 + V_0: northward_wind_0 + V_1: northward_wind_1 + V_2: northward_wind_2 + V_3: northward_wind_3 + V_4: northward_wind_4 + V_5: northward_wind_5 + V_6: northward_wind_6 + V_7: northward_wind_7 + V_8: northward_wind_8 + QREFHT: Q2m + TREFHT: TMP2m + U10: UGRD10m + V10: VGRD10m standard_names: total_frozen_precip_rate: None surface_snow_rate: PRECSL From 7e755eef6aca6faaf6bf386f104c8ecd7ab1de13 Mon Sep 17 00:00:00 2001 From: Elynn Wu Date: Tue, 3 Feb 2026 13:08:08 -0800 Subject: [PATCH 2/3] 8 layers --- scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml index b6cb1e1ca..9ac152bc0 100644 --- a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml +++ b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml @@ -98,7 +98,6 @@ dataset_computation: T_5: air_temperature_5 T_6: air_temperature_6 T_7: air_temperature_7 - T_8: air_temperature_8 LANDFRAC: land_fraction OCNFRAC: ocean_fraction ICEFRAC: sea_ice_fraction @@ -111,7 +110,6 @@ dataset_computation: U_5: eastward_wind_5 U_6: eastward_wind_6 U_7: eastward_wind_7 - U_8: eastward_wind_8 V_0: northward_wind_0 V_1: northward_wind_1 V_2: northward_wind_2 @@ -120,7 +118,6 @@ dataset_computation: V_5: northward_wind_5 V_6: northward_wind_6 V_7: northward_wind_7 - V_8: northward_wind_8 QREFHT: Q2m TREFHT: TMP2m U10: UGRD10m From 9167f0950ab27a45515ab9fad817d9afa7d275f1 Mon Sep 17 00:00:00 2001 From: Elynn Wu Date: Tue, 3 Feb 2026 22:18:17 -0800 Subject: [PATCH 3/3] write co2 too --- scripts/data_process/compute_dataset.py | 3 +++ scripts/data_process/compute_dataset_e3smv2.py | 15 ++++++++++++--- .../data_process/configs/e3sm-1deg-8layer-v3.yaml | 14 ++++++-------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/scripts/data_process/compute_dataset.py b/scripts/data_process/compute_dataset.py index d7b7c40f0..4e5ae3e66 100755 --- a/scripts/data_process/compute_dataset.py +++ b/scripts/data_process/compute_dataset.py @@ -271,6 +271,8 @@ class DatasetComputationConfig: mask_soil_moisture: (optional) whether to mask soil moisture content using soil temperature. This is useful for CM4 dataset, where soil moisture content is zero instead of NaN over the oceans. + CO2_concentration: (optional) CO2 concentration in volume mixing ratio. + This is used for E3SM dataset to add time-invariant CO2 concentration. """ reference_vertical_coordinate_file: str @@ -295,6 +297,7 @@ class DatasetComputationConfig: validate_vertical_coarsening_indices_land: bool = True reference_vertical_coordinate_file_land: Optional[str] = None mask_soil_moisture: bool = False + CO2_concentration: Optional[float] = None @dataclasses.dataclass diff --git a/scripts/data_process/compute_dataset_e3smv2.py b/scripts/data_process/compute_dataset_e3smv2.py index 930a653b0..54f8b9881 100755 --- a/scripts/data_process/compute_dataset_e3smv2.py +++ b/scripts/data_process/compute_dataset_e3smv2.py @@ -84,6 +84,7 @@ } GRAVITY = 9.80665 # m/s^2 + def expand_names_by_level(variables: MutableMapping[str, List[str]]) -> List[str]: names = [] for var_name, levels in variables.items(): @@ -178,6 +179,7 @@ def open_dataset( datasets[varname] = ds del var_paths["time_invariant"] for varname, paths in var_paths.items(): + print(varname) var_start = time.time() if varname in varnames_3D: drop_vars = DROP_VARIABLE_NAMES["3D"] @@ -186,9 +188,6 @@ def open_dataset( datasets[varname] = xr.open_mfdataset( paths, chunks=chunks, - data_vars="minimal", - coords="minimal", - parallel=True, ).drop(drop_vars, errors="ignore") logging.info(f"{varname} files opened in {time.time() - var_start:.2f} s...") logging.info(f"All files opened in {time.time() - start:.2f} s. Merging...") @@ -308,11 +307,20 @@ def compute_rad_fluxes( fluxes[output_name].attrs["units"] = ds[formula[1]].attrs["units"] return ds.assign(fluxes) + def sfc_phis_to_hgt(ds): ds["HGTsfc"] = ds["PHIS"] / GRAVITY ds = ds.drop_vars("PHIS") return ds + +def add_time_invariant_co2_concentration(ds, config): + if config.CO2_concentration is not None: + co2 = xr.DataArray(config.CO2_concentration, dims=["time"], coords=[ds["time"]]) + ds["global_mean_co2"] = co2 + return ds + + def construct_lazy_dataset( config: DatasetComputationConfig, dataset_dirs: MutableMapping[str, str], @@ -426,6 +434,7 @@ def construct_lazy_dataset( "p_i pressure corresponds to the interface at the top of the i'th finite " "volume layer, counting down from the top of atmosphere." ) + ds = add_time_invariant_co2_concentration(ds, config) ds = ds.rename(config.renaming) ds = sfc_phis_to_hgt(ds) return ds diff --git a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml index 9ac152bc0..6b333b173 100644 --- a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml +++ b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml @@ -1,10 +1,10 @@ runs: - 2025-04-03-e3smv3-1deg-ramped: + 2025-04-03-e3smv3-1deg-ramped: None data_output_directory: /pscratch/sd/r/elynnwu/fme-preprocess/zarr/ stats: output_directory: /pscratch/sd/r/elynnwu/fme-dataset/2026-02-03-E3SMv3-AMIP-1970-2020-1deg-8layer - start_date: 1970-01-01 - end_date: 2019-12-31 + start_date: "1970-01-01" + end_date: "2019-12-31" data_type: E3SMV2 beaker_dataset: e3sm-1deg-8layers-stats # this is not used in e3sm data processing @@ -39,8 +39,8 @@ dataset_computation: - QREFHT - TREFHT - U10 - - V10 n_split: 200 + CO2_concentration: 0.0003887 variable_sources: time_invariant: - PHIS @@ -63,7 +63,6 @@ dataset_computation: - QREFHT - TREFHT - U10 - - V10 6hourly/1yr: - PRECT - LHFLX @@ -78,7 +77,7 @@ dataset_computation: - PRECSC - PRECSL - QFLX - renaming:: + renaming: PS: PRESsfc LHFLX: LHTFLsfc SHFLX: SHTFLsfc @@ -120,8 +119,7 @@ dataset_computation: V_7: northward_wind_7 QREFHT: Q2m TREFHT: TMP2m - U10: UGRD10m - V10: VGRD10m + U10: wind_speed_10m standard_names: total_frozen_precip_rate: None surface_snow_rate: PRECSL