diff --git a/scripts/data_process/compute_dataset.py b/scripts/data_process/compute_dataset.py index d7b7c40f0..4e5ae3e66 100755 --- a/scripts/data_process/compute_dataset.py +++ b/scripts/data_process/compute_dataset.py @@ -271,6 +271,8 @@ class DatasetComputationConfig: mask_soil_moisture: (optional) whether to mask soil moisture content using soil temperature. This is useful for CM4 dataset, where soil moisture content is zero instead of NaN over the oceans. + CO2_concentration: (optional) CO2 concentration in volume mixing ratio. + This is used for E3SM dataset to add time-invariant CO2 concentration. """ reference_vertical_coordinate_file: str @@ -295,6 +297,7 @@ class DatasetComputationConfig: validate_vertical_coarsening_indices_land: bool = True reference_vertical_coordinate_file_land: Optional[str] = None mask_soil_moisture: bool = False + CO2_concentration: Optional[float] = None @dataclasses.dataclass diff --git a/scripts/data_process/compute_dataset_e3smv2.py b/scripts/data_process/compute_dataset_e3smv2.py index edc49736a..54f8b9881 100755 --- a/scripts/data_process/compute_dataset_e3smv2.py +++ b/scripts/data_process/compute_dataset_e3smv2.py @@ -82,6 +82,7 @@ "hybm", ], } +GRAVITY = 9.80665 # m/s^2 def expand_names_by_level(variables: MutableMapping[str, List[str]]) -> List[str]: @@ -178,6 +179,7 @@ def open_dataset( datasets[varname] = ds del var_paths["time_invariant"] for varname, paths in var_paths.items(): + print(varname) var_start = time.time() if varname in varnames_3D: drop_vars = DROP_VARIABLE_NAMES["3D"] @@ -186,9 +188,6 @@ def open_dataset( datasets[varname] = xr.open_mfdataset( paths, chunks=chunks, - data_vars="minimal", - coords="minimal", - parallel=True, ).drop(drop_vars, errors="ignore") logging.info(f"{varname} files opened in {time.time() - var_start:.2f} s...") logging.info(f"All files opened in {time.time() - start:.2f} s. Merging...") @@ -309,6 +308,19 @@ def compute_rad_fluxes( return ds.assign(fluxes) +def sfc_phis_to_hgt(ds): + ds["HGTsfc"] = ds["PHIS"] / GRAVITY + ds = ds.drop_vars("PHIS") + return ds + + +def add_time_invariant_co2_concentration(ds, config): + if config.CO2_concentration is not None: + co2 = xr.DataArray(config.CO2_concentration, dims=["time"], coords=[ds["time"]]) + ds["global_mean_co2"] = co2 + return ds + + def construct_lazy_dataset( config: DatasetComputationConfig, dataset_dirs: MutableMapping[str, str], @@ -422,6 +434,9 @@ def construct_lazy_dataset( "p_i pressure corresponds to the interface at the top of the i'th finite " "volume layer, counting down from the top of atmosphere." ) + ds = add_time_invariant_co2_concentration(ds, config) + ds = ds.rename(config.renaming) + ds = sfc_phis_to_hgt(ds) return ds diff --git a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml index 03d17309c..6b333b173 100644 --- a/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml +++ b/scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml @@ -1,18 +1,14 @@ runs: - 2025-04-03-e3smv3-1deg-ramped: "" -data_output_directory: /pscratch/sd/r/rebassoo/fme-preprocess/zarr/ + 2025-04-03-e3smv3-1deg-ramped: None +data_output_directory: /pscratch/sd/r/elynnwu/fme-preprocess/zarr/ stats: - output_directory: /pscratch/sd/r/rebassoo/fme-preprocess/2025-04-03-e3smv3-1deg-ramped - start_date: "1920-01-01" + output_directory: /pscratch/sd/r/elynnwu/fme-dataset/2026-02-03-E3SMv3-AMIP-1970-2020-1deg-8layer + start_date: "1970-01-01" end_date: "2019-12-31" data_type: E3SMV2 beaker_dataset: e3sm-1deg-8layers-stats # this is not used in e3sm data processing dataset_computation: - chunking: - time_dim: 10 - latitude_dim: 180 - longitude_dim: 360 reference_vertical_coordinate_file: None time_invariant_dir: /global/cfs/cdirs/m4331/jpduncan/e3smv2/time_invariant vertical_coarsening_indices: @@ -27,7 +23,24 @@ dataset_computation: - [61, 69] - [69, 80] roundtrip_fraction_kept: 1.0 - n_split: 1600 + roundtrip_variables: + - PS + - TS + - T + - U + - V + - Q + - CLDLIQ + - CLDICE + - RAINQM + - TMQ + - TGCLDLWP + - TGCLDIWP + - QREFHT + - TREFHT + - U10 + n_split: 200 + CO2_concentration: 0.0003887 variable_sources: time_invariant: - PHIS @@ -47,6 +60,9 @@ dataset_computation: - OCNFRAC - LANDFRAC - ICEFRAC + - QREFHT + - TREFHT + - U10 6hourly/1yr: - PRECT - LHFLX @@ -61,6 +77,49 @@ dataset_computation: - PRECSC - PRECSL - QFLX + renaming: + PS: PRESsfc + LHFLX: LHTFLsfc + SHFLX: SHTFLsfc + surface_precipitation_rate: PRATEsfc + FSDS: DSWRFsfc + surface_upward_shortwave_flux: USWRFsfc + FLDS: DLWRFsfc + surface_upward_longwave_flux: ULWRFsfc + FLUT: ULWRFtoa + SOLIN: DSWRFtoa + top_of_atmos_upward_shortwave_flux: USWRFtoa + T_0: air_temperature_0 + T_1: air_temperature_1 + T_2: air_temperature_2 + T_3: air_temperature_3 + T_4: air_temperature_4 + T_5: air_temperature_5 + T_6: air_temperature_6 + T_7: air_temperature_7 + LANDFRAC: land_fraction + OCNFRAC: ocean_fraction + ICEFRAC: sea_ice_fraction + TS: surface_temperature + U_0: eastward_wind_0 + U_1: eastward_wind_1 + U_2: eastward_wind_2 + U_3: eastward_wind_3 + U_4: eastward_wind_4 + U_5: eastward_wind_5 + U_6: eastward_wind_6 + U_7: eastward_wind_7 + V_0: northward_wind_0 + V_1: northward_wind_1 + V_2: northward_wind_2 + V_3: northward_wind_3 + V_4: northward_wind_4 + V_5: northward_wind_5 + V_6: northward_wind_6 + V_7: northward_wind_7 + QREFHT: Q2m + TREFHT: TMP2m + U10: wind_speed_10m standard_names: total_frozen_precip_rate: None surface_snow_rate: PRECSL