Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/data_process/compute_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ class DatasetComputationConfig:
mask_soil_moisture: (optional) whether to mask soil moisture content using soil
temperature. This is useful for CM4 dataset, where soil moisture content is
zero instead of NaN over the oceans.
CO2_concentration: (optional) CO2 concentration in volume mixing ratio.
This is used for E3SM dataset to add time-invariant CO2 concentration.
"""

reference_vertical_coordinate_file: str
Expand All @@ -295,6 +297,7 @@ class DatasetComputationConfig:
validate_vertical_coarsening_indices_land: bool = True
reference_vertical_coordinate_file_land: Optional[str] = None
mask_soil_moisture: bool = False
CO2_concentration: Optional[float] = None


@dataclasses.dataclass
Expand Down
21 changes: 18 additions & 3 deletions scripts/data_process/compute_dataset_e3smv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"hybm",
],
}
GRAVITY = 9.80665 # m/s^2


def expand_names_by_level(variables: MutableMapping[str, List[str]]) -> List[str]:
Expand Down Expand Up @@ -178,6 +179,7 @@ def open_dataset(
datasets[varname] = ds
del var_paths["time_invariant"]
for varname, paths in var_paths.items():
print(varname)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this print statement intentional? Probably don't need it

var_start = time.time()
if varname in varnames_3D:
drop_vars = DROP_VARIABLE_NAMES["3D"]
Expand All @@ -186,9 +188,6 @@ def open_dataset(
datasets[varname] = xr.open_mfdataset(
paths,
chunks=chunks,
data_vars="minimal",
coords="minimal",
parallel=True,
).drop(drop_vars, errors="ignore")
logging.info(f"{varname} files opened in {time.time() - var_start:.2f} s...")
logging.info(f"All files opened in {time.time() - start:.2f} s. Merging...")
Expand Down Expand Up @@ -309,6 +308,19 @@ def compute_rad_fluxes(
return ds.assign(fluxes)


def sfc_phis_to_hgt(ds):
ds["HGTsfc"] = ds["PHIS"] / GRAVITY
ds = ds.drop_vars("PHIS")
return ds


def add_time_invariant_co2_concentration(ds, config):
if config.CO2_concentration is not None:
co2 = xr.DataArray(config.CO2_concentration, dims=["time"], coords=[ds["time"]])
ds["global_mean_co2"] = co2
return ds


def construct_lazy_dataset(
config: DatasetComputationConfig,
dataset_dirs: MutableMapping[str, str],
Expand Down Expand Up @@ -422,6 +434,9 @@ def construct_lazy_dataset(
"p_i pressure corresponds to the interface at the top of the i'th finite "
"volume layer, counting down from the top of atmosphere."
)
ds = add_time_invariant_co2_concentration(ds, config)
ds = ds.rename(config.renaming)
ds = sfc_phis_to_hgt(ds)
return ds


Expand Down
77 changes: 68 additions & 9 deletions scripts/data_process/configs/e3sm-1deg-8layer-v3.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
runs:
2025-04-03-e3smv3-1deg-ramped: ""
data_output_directory: /pscratch/sd/r/rebassoo/fme-preprocess/zarr/
2025-04-03-e3smv3-1deg-ramped: None
data_output_directory: /pscratch/sd/r/elynnwu/fme-preprocess/zarr/
stats:
output_directory: /pscratch/sd/r/rebassoo/fme-preprocess/2025-04-03-e3smv3-1deg-ramped
start_date: "1920-01-01"
output_directory: /pscratch/sd/r/elynnwu/fme-dataset/2026-02-03-E3SMv3-AMIP-1970-2020-1deg-8layer
start_date: "1970-01-01"
end_date: "2019-12-31"
data_type: E3SMV2
beaker_dataset: e3sm-1deg-8layers-stats # this is not used in e3sm data processing

dataset_computation:
chunking:
time_dim: 10
latitude_dim: 180
longitude_dim: 360
reference_vertical_coordinate_file: None
time_invariant_dir: /global/cfs/cdirs/m4331/jpduncan/e3smv2/time_invariant
vertical_coarsening_indices:
Expand All @@ -27,7 +23,24 @@ dataset_computation:
- [61, 69]
- [69, 80]
roundtrip_fraction_kept: 1.0
n_split: 1600
roundtrip_variables:
- PS
- TS
- T
- U
- V
- Q
- CLDLIQ
- CLDICE
- RAINQM
- TMQ
- TGCLDLWP
- TGCLDIWP
- QREFHT
- TREFHT
- U10
n_split: 200
CO2_concentration: 0.0003887
variable_sources:
time_invariant:
- PHIS
Expand All @@ -47,6 +60,9 @@ dataset_computation:
- OCNFRAC
- LANDFRAC
- ICEFRAC
- QREFHT
- TREFHT
- U10
6hourly/1yr:
- PRECT
- LHFLX
Expand All @@ -61,6 +77,49 @@ dataset_computation:
- PRECSC
- PRECSL
- QFLX
renaming:
PS: PRESsfc
LHFLX: LHTFLsfc
SHFLX: SHTFLsfc
surface_precipitation_rate: PRATEsfc
FSDS: DSWRFsfc
surface_upward_shortwave_flux: USWRFsfc
FLDS: DLWRFsfc
surface_upward_longwave_flux: ULWRFsfc
FLUT: ULWRFtoa
SOLIN: DSWRFtoa
top_of_atmos_upward_shortwave_flux: USWRFtoa
T_0: air_temperature_0
T_1: air_temperature_1
T_2: air_temperature_2
T_3: air_temperature_3
T_4: air_temperature_4
T_5: air_temperature_5
T_6: air_temperature_6
T_7: air_temperature_7
LANDFRAC: land_fraction
OCNFRAC: ocean_fraction
ICEFRAC: sea_ice_fraction
TS: surface_temperature
U_0: eastward_wind_0
U_1: eastward_wind_1
U_2: eastward_wind_2
U_3: eastward_wind_3
U_4: eastward_wind_4
U_5: eastward_wind_5
U_6: eastward_wind_6
U_7: eastward_wind_7
V_0: northward_wind_0
V_1: northward_wind_1
V_2: northward_wind_2
V_3: northward_wind_3
V_4: northward_wind_4
V_5: northward_wind_5
V_6: northward_wind_6
V_7: northward_wind_7
QREFHT: Q2m
TREFHT: TMP2m
U10: wind_speed_10m
standard_names:
total_frozen_precip_rate: None
surface_snow_rate: PRECSL
Expand Down