diff --git a/configs/TCO95_CORE2_EP.yaml b/configs/TCO95_CORE2_EP.yaml new file mode 100644 index 0000000..ce01871 --- /dev/null +++ b/configs/TCO95_CORE2_EP.yaml @@ -0,0 +1,65 @@ +# OCP-Tool Configuration — Paleo Example (Early Pliocene) +# OpenIFS Coupling Preparation Tool with paleo reconstruction modifications + +# Atmosphere grid settings +atmosphere: + resolution_list: [95] + truncation_type: "cubic-octahedral" + experiment_name: "aack" # 4-digit ECMWF experiment code + +# Ocean grid settings +ocean: + grid_name: "CORE2" + has_ice_cavities: false + mesh_file: "/work/ab0246/a270092/input/fesom2/CORE2/mesh.nc" + +# Basin modifications for runoff +runoff: + manual_basin_removal: + - "caspian-sea" + - "black-sea" + +# Paths (relative to auto-detected root_dir, or absolute) +paths: + input: + fesom_mesh: "input/fesom_mesh/" + gaussian_grids_full: "input/gaussian_grids_full/" + gaussian_grids_octahedral_reduced: "input/gaussian_grids_octahedral_reduced/" + gaussian_grids_linear_reduced: "input/gaussian_grids_linear_reduced/" + openifs_default: "input/openifs_input_default/" + runoff_default: "input/runoff_map_default/" + lpj_guess: "input/lpj-guess/" + +# Paleo reconstruction settings +paleo: + enabled: true + experiment_id: "EP" # Used in filenames: EP_icemask_v1.0.nc, etc. + + # Directory containing paleo reconstruction NetCDF files + reconstruction_dir: "/work/ab0246/a270179/runtime/input/pliomip3/EP/" + + # Directory containing modern reference files + modern_reference_dir: "/work/ab0246/a270179/runtime/input/pliomip3/EP/" + + # Path to the ICMSH spectral topography reference file + icmsh_input_file: "/work/ab0246/a270179/runtime/input/pliomip3/EP/ICMSHaackINIT_CORE2_new3" + + # Path to compiled calnoro binary (set to null to skip SSO computation) + calnoro_binary: null # e.g. "/path/to/calnoro/a.out" + + # Reconstruction file name templates ({exp_id} is replaced by experiment_id) + # These are the defaults — override only if your files differ: + # ice_mask_file: "{exp_id}_icemask_v1.0.nc" + # topography_file: "{exp_id}_topo_v1.0.nc" + # lsm_file: "{exp_id}_LSM_v1.0.nc" + # lake_file: "{exp_id}_lake_v1.0.nc" + # soil_file: "{exp_id}_soil_v1.0.nc" + # biome_file: "{exp_id}_mbiome_v1.0.nc" + # modern_topo_file: "Modern_std_topo_v1.0.nc" + # modern_lake_file: "Modern_std_soil_lake_v1.0.nc" + +# Processing options +options: + verbose: true + parallel_workers: 4 + use_dask: true diff --git a/environment.yaml b/environment.yaml index eb7e4d7..daf1285 100644 --- a/environment.yaml +++ b/environment.yaml @@ -9,6 +9,8 @@ dependencies: - pandas - pyyaml - python-eccodes + - python-cdo + - scipy - git - pip - jupyterlab diff --git a/ocp_tool/__init__.py b/ocp_tool/__init__.py index e8854e2..282014a 100644 --- a/ocp_tool/__init__.py +++ b/ocp_tool/__init__.py @@ -10,6 +10,9 @@ - plotting: Visualization - co2_interpolation: 3D CO2 interpolation - field_interpolation: 2D field interpolation +- paleo_input: Paleo land surface modifications (ice, lakes, soils, vegetation) +- paleo_topo: Paleo topography modification (anomaly method) +- paleo_subgrid_oro: Subgrid-scale orography via calnoro """ -__version__ = "2.0.0" +__version__ = "2.1.0" diff --git a/ocp_tool/config.py b/ocp_tool/config.py index 958ad22..5aa9bdb 100644 --- a/ocp_tool/config.py +++ b/ocp_tool/config.py @@ -5,7 +5,7 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import List, Optional, Union +from typing import Dict, List, Optional, Union import yaml @@ -54,6 +54,38 @@ class OutputPaths: plots: Path +@dataclass +class PaleoConfig: + """Paleo reconstruction configuration.""" + enabled: bool + experiment_id: str # e.g. "EP", "LP" — used in filenames + reconstruction_dir: Path # directory containing reconstruction NetCDF files + modern_reference_dir: Path # directory containing modern reference files + icmsh_input_file: Path # path to ICMSHaackINIT_CORE2 (spectral topo) + calnoro_binary: Optional[Path] # path to compiled calnoro binary + # Reconstruction file names (relative to reconstruction_dir) + ice_mask_file: str = "{exp_id}_icemask_v1.0.nc" + topography_file: str = "{exp_id}_topo_v1.0.nc" + lsm_file: str = "{exp_id}_LSM_v1.0.nc" + lake_file: str = "{exp_id}_lake_v1.0.nc" + soil_file: str = "{exp_id}_soil_v1.0.nc" + biome_file: str = "{exp_id}_mbiome_v1.0.nc" + # Modern reference file names (relative to modern_reference_dir) + modern_topo_file: str = "Modern_std_topo_v1.0.nc" + modern_lake_file: str = "Modern_std_soil_lake_v1.0.nc" + + def get_reconstruction_file(self, file_attr: str) -> Path: + """Get full path to a reconstruction file, substituting experiment_id.""" + template = getattr(self, file_attr) + filename = template.format(exp_id=self.experiment_id) + return self.reconstruction_dir / filename + + def get_modern_file(self, file_attr: str) -> Path: + """Get full path to a modern reference file.""" + filename = getattr(self, file_attr) + return self.modern_reference_dir / filename + + @dataclass class ProcessingOptions: """Processing options.""" @@ -72,6 +104,7 @@ class OCPConfig: output_paths: OutputPaths options: ProcessingOptions root_dir: Path + paleo: Optional[PaleoConfig] = None @property def co2_grib_file(self) -> Path: @@ -96,6 +129,17 @@ def get_icmgg_output_file(self) -> Path: def get_icmgg_iniua_file(self) -> Path: """Get path to output ICMGG INIUA file.""" return self.output_paths.openifs_modified / f'ICMGG{self.atmosphere.experiment_name}INIUA' + + def get_icmsh_input_file(self) -> Path: + """Get path to input ICMSH file (spectral topography).""" + if self.paleo and self.paleo.icmsh_input_file: + return self.paleo.icmsh_input_file + return self.input_paths.openifs_default / f'ICMSH{self.atmosphere.experiment_name}INIT' + + def get_icmsh_output_file(self) -> Path: + """Get path to output ICMSH file.""" + suffix = f'_{self.paleo.experiment_id}' if self.paleo else f'_{self.ocean.grid_name}' + return self.output_paths.openifs_modified / f'ICMSH{self.atmosphere.experiment_name}INIT{suffix}' def load_config(config_path: Union[str, Path]) -> OCPConfig: @@ -186,6 +230,30 @@ def resolve_path(path_str: str) -> Path: use_dask=raw['options']['use_dask'], ), root_dir=root_dir, + paleo=_load_paleo_config(raw, resolve_path) if 'paleo' in raw else None, + ) + + +def _load_paleo_config(raw: dict, resolve_path) -> Optional[PaleoConfig]: + """Parse paleo section from raw YAML config.""" + paleo_raw = raw.get('paleo', {}) + if not paleo_raw.get('enabled', False): + return None + return PaleoConfig( + enabled=True, + experiment_id=paleo_raw['experiment_id'], + reconstruction_dir=resolve_path(paleo_raw['reconstruction_dir']), + modern_reference_dir=resolve_path(paleo_raw['modern_reference_dir']), + icmsh_input_file=resolve_path(paleo_raw['icmsh_input_file']), + calnoro_binary=resolve_path(paleo_raw['calnoro_binary']) if paleo_raw.get('calnoro_binary') else None, + ice_mask_file=paleo_raw.get('ice_mask_file', '{exp_id}_icemask_v1.0.nc'), + topography_file=paleo_raw.get('topography_file', '{exp_id}_topo_v1.0.nc'), + lsm_file=paleo_raw.get('lsm_file', '{exp_id}_LSM_v1.0.nc'), + lake_file=paleo_raw.get('lake_file', '{exp_id}_lake_v1.0.nc'), + soil_file=paleo_raw.get('soil_file', '{exp_id}_soil_v1.0.nc'), + biome_file=paleo_raw.get('biome_file', '{exp_id}_mbiome_v1.0.nc'), + modern_topo_file=paleo_raw.get('modern_topo_file', 'Modern_std_topo_v1.0.nc'), + modern_lake_file=paleo_raw.get('modern_lake_file', 'Modern_std_soil_lake_v1.0.nc'), ) diff --git a/ocp_tool/paleo_input.py b/ocp_tool/paleo_input.py new file mode 100644 index 0000000..4df3b12 --- /dev/null +++ b/ocp_tool/paleo_input.py @@ -0,0 +1,739 @@ +""" +Paleo land surface modification module for OCP-Tool. + +Replaces mod_input_oifs.sh: modifies ICMGG land surface variables +(ice, lakes, soils, soil water, vegetation type/cover/LAI, and remaining +fields) based on paleoclimate reconstruction data. + +All interpolation done in Python (scipy) — no CDO dependency. +Operates directly on the OpenIFS Gaussian grid. +""" + +import copy +import traceback +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import numpy as np +from netCDF4 import Dataset +from scipy.interpolate import griddata, NearestNDInterpolator +import eccodes + +from .config import OCPConfig, PaleoConfig +from .gaussian_grids import GaussianGrid +from .lsm import LSMData, read_grib_fields + +# --------------------------------------------------------------------------- +# Lookup tables: PlioMIP3 reconstruction → OpenIFS categories +# Derived from mod_input_oifs.sh +# --------------------------------------------------------------------------- + +# PlioMIP3 soil code → OpenIFS soil type (code 43) +SOIL_LOOKUP = { + 28: 1, 29: 1, # → coarse + 30: 2, 31: 2, # → medium + 32: 4, # → organic + 33: 2, + 34: 3, # → fine + 35: 4, + 36: 1, + 37: 3, + 38: 2, + 39: 2, + 40: 1, + 41: 3, + 42: 1, + 43: 3, +} + +# OpenIFS soil type → volumetric soil water content per layer +SWVL_LOOKUP = { + # layer: {soil_type: water_content} + 39: {1: 0.210, 2: 0.283, 3: 0.373, 4: 0.209}, # Layer 1, level 0 + 40: {1: 0.212, 2: 0.279, 3: 0.374, 4: 0.248}, # Layer 2, level 7 + 41: {1: 0.208, 2: 0.270, 3: 0.375, 4: 0.262}, # Layer 3, level 28 + 42: {1: 0.188, 2: 0.300, 3: 0.399, 4: 0.255}, # Layer 4, level 100 +} + +# PlioMIP3 biome → OpenIFS high vegetation type (code 30) +HIGH_VEG_TYPE_LOOKUP = {1: 6, 2: 3, 6: 5, 7: 4} + +# PlioMIP3 biome → OpenIFS low vegetation type (code 29) +LOW_VEG_TYPE_LOOKUP = {3: 7, 4: 2, 5: 11, 8: 9} + +# High vegetation type → cover fraction (code 28) +HIGH_VEG_COVER_LOOKUP = {3: 0.95, 4: 0.925, 5: 0.95, 6: 0.95} + +# Low vegetation type → cover fraction (code 27) +LOW_VEG_COVER_LOOKUP = {2: 0.87, 7: 0.90, 9: 0.40, 11: 0.10} + +# High vegetation type → LAI (code 67) +HIGH_VEG_LAI_LOOKUP = {3: 5.9, 4: 4.89, 5: 5.97, 6: 6.44} + +# Low vegetation type → LAI (code 66) +LOW_VEG_LAI_LOOKUP = {2: 2.92, 7: 3.79, 9: 2.95, 11: 3.11} + +# GRIB codes for remaining fields that need drowned/added interpolation +REMAINING_FIELD_CODES = [ + 139, 170, 183, 236, # soil temperatures + 198, # skin reservoir content + 235, # skin temperature + 31, # sea ice fraction + 8, 9, 10, 11, 12, 13, 14, # lake variables + 238, # snow layer temperature + 32, 33, 34, # snow albedo, density, SST + 35, 36, 37, 38, # ice temperatures + 148, # Charnock parameter + 174, # albedo + 15, 16, 17, 18, # UV/IR albedos + 74, # sdfor +] + + +# --------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------- + +def _read_netcdf_field(filepath: Path, var_name: Optional[str] = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Read a 2D field from a NetCDF file. + + Returns (values, lons, lats) on the file's native grid. + If var_name is None, reads the first non-coordinate variable. + """ + ds = Dataset(str(filepath), 'r') + coord_names = {'lon', 'lat', 'longitude', 'latitude', 'x', 'y', 'time', + 'lon_bnds', 'lat_bnds', 'time_bnds', 'bounds_lon', 'bounds_lat'} + + if var_name is None: + for name in ds.variables: + if name.lower() not in coord_names: + var_name = name + break + if var_name is None: + ds.close() + raise ValueError(f"No data variable found in {filepath}") + + data = np.squeeze(np.array(ds.variables[var_name][:])) + + # Find lon/lat variables + lon_name = lat_name = None + for candidate in ['lon', 'longitude', 'x']: + if candidate in ds.variables: + lon_name = candidate + break + for candidate in ['lat', 'latitude', 'y']: + if candidate in ds.variables: + lat_name = candidate + break + + lons_1d = np.array(ds.variables[lon_name][:]) + lats_1d = np.array(ds.variables[lat_name][:]) + ds.close() + + # Build 2D coordinate arrays + if data.ndim == 2: + lons_2d, lats_2d = np.meshgrid(lons_1d, lats_1d) + else: + lons_2d = lons_1d + lats_2d = lats_1d + + return data, lons_2d, lats_2d + + +def _interp_to_gaussian( + src_data: np.ndarray, + src_lons: np.ndarray, + src_lats: np.ndarray, + tgt_lons: np.ndarray, + tgt_lats: np.ndarray, + method: str = 'nearest', + fill_value: float = 0.0, +) -> np.ndarray: + """ + Interpolate source data to the OpenIFS Gaussian grid points. + + Parameters + ---------- + src_data : 2D source field + src_lons, src_lats : 2D coordinate arrays matching src_data + tgt_lons, tgt_lats : 1D target coordinate arrays (Gaussian grid) + method : 'nearest', 'linear', or 'cubic' + fill_value : value for points outside convex hull (linear/cubic only) + """ + src_pts = np.column_stack([src_lons.ravel(), src_lats.ravel()]) + src_vals = src_data.ravel() + + # Remove NaN / masked values + valid = np.isfinite(src_vals) + if hasattr(src_vals, 'mask'): + valid &= ~src_vals.mask + src_pts = src_pts[valid] + src_vals = src_vals[valid] + + tgt_pts = np.column_stack([tgt_lons, tgt_lats]) + + return griddata(src_pts, src_vals, tgt_pts, method=method, fill_value=fill_value) + + +def _distance_weighted_fill( + field: np.ndarray, + mask: np.ndarray, + lons: np.ndarray, + lats: np.ndarray, +) -> np.ndarray: + """ + Fill missing/masked points using nearest-neighbour from valid points. + + Replaces CDO's ``setmisstodis``. + + Parameters + ---------- + field : 1D array on Gaussian grid (values at valid points, anything at others) + mask : boolean, True where values are valid (known) + lons, lats : 1D coordinate arrays + """ + if np.all(mask) or np.sum(mask) == 0: + return field.copy() + + known_pts = np.column_stack([lons[mask], lats[mask]]) + known_vals = field[mask] + interp = NearestNDInterpolator(known_pts, known_vals) + + result = field.copy() + missing = ~mask + query_pts = np.column_stack([lons[missing], lats[missing]]) + result[missing] = interp(query_pts) + return result + + +def _apply_lookup(field: np.ndarray, lookup: Dict, default: float = 0.0) -> np.ndarray: + """Map integer categories through a lookup table.""" + result = np.full_like(field, default, dtype=np.float64) + for key, val in lookup.items(): + result[np.isclose(field, key, atol=0.5)] = val + return result + + +def _get_grib_field_by_code(grib_file: Path, param_code: int) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: + """ + Read a single GRIB field by parameter code, returning (values, lats, lons). + """ + with open(grib_file, 'rb') as f: + while True: + gid = eccodes.codes_grib_new_from_file(f) + if gid is None: + break + try: + code = eccodes.codes_get(gid, 'paramId') + if code == param_code: + vals = eccodes.codes_get_array(gid, 'values') + lats = eccodes.codes_get_array(gid, 'latitudes') + lons = eccodes.codes_get_array(gid, 'longitudes') + return vals, lats, lons + finally: + eccodes.codes_release(gid) + return None, None, None + + +def _read_all_grib_fields(grib_file: Path) -> Dict[int, np.ndarray]: + """Read all fields from a GRIB file, keyed by paramId.""" + fields = {} + with open(grib_file, 'rb') as f: + while True: + gid = eccodes.codes_grib_new_from_file(f) + if gid is None: + break + try: + code = eccodes.codes_get(gid, 'paramId') + vals = eccodes.codes_get_array(gid, 'values') + fields[code] = vals + finally: + eccodes.codes_release(gid) + return fields + + +def _replace_grib_fields( + input_file: Path, + output_file: Path, + replacements: Dict[int, np.ndarray], + verbose: bool = False, +) -> None: + """ + Copy a GRIB file, replacing specific fields identified by paramId. + + Parameters + ---------- + input_file : source GRIB + output_file : destination GRIB + replacements : dict mapping paramId → new values array + """ + # Read all messages into memory first + messages = [] + with open(input_file, 'rb') as f: + while True: + gid = eccodes.codes_grib_new_from_file(f) + if gid is None: + break + messages.append(gid) + + with open(output_file, 'wb') as f: + for gid in messages: + code = eccodes.codes_get(gid, 'paramId') + if code in replacements: + eccodes.codes_set_array(gid, 'values', replacements[code].astype(np.float64)) + if verbose: + print(f" Replaced paramId {code}") + eccodes.codes_write(gid, f) + eccodes.codes_release(gid) + + +# --------------------------------------------------------------------------- +# Main paleo input modification pipeline +# --------------------------------------------------------------------------- + +def create_paleo_masks( + config: OCPConfig, + grid: GaussianGrid, + lsm_data: LSMData, +) -> Dict[str, np.ndarray]: + """ + Create derivative masks on the Gaussian grid. + + Uses the pre-modification (CORE2) and post-modification (paleo) LSM + already available from the ocp-tool pipeline — no intermediate grid. + + Returns dict with keys: 'land', 'ocean', 'added', 'drowned', 'reduced_ice' + """ + paleo = config.paleo + verbose = config.options.verbose + + print("\n" + "=" * 60) + print(" Creating paleo derivative masks") + print("=" * 60) + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + + # Paleo LSM (post-modification) — already in lsm_data from step 3 + paleo_lsm = lsm_data.lsm_binary_atm.flatten() + + # Read CORE2 (pre-modification) LSM from original ICMGG file + core2_lsm, _, _ = _get_grib_field_by_code(config.get_icmgg_input_file(), 172) + if core2_lsm is None: + raise RuntimeError("Could not read LSM (code 172) from input ICMGG file") + + # Binary masks on Gaussian grid + land_mask = paleo_lsm >= 0.5 + ocean_mask = paleo_lsm < 0.5 + added_land = (core2_lsm < 0.5) & (paleo_lsm >= 0.5) # ocean → land + drowned = (core2_lsm >= 0.5) & (paleo_lsm < 0.5) # land → ocean + + print(f" Land points: {np.sum(land_mask)}") + print(f" Ocean points: {np.sum(ocean_mask)}") + print(f" Added land: {np.sum(added_land)}") + print(f" Drowned: {np.sum(drowned)}") + + # Reduced ice sheet mask + reduced_ice = np.zeros_like(paleo_lsm, dtype=bool) + ice_file = paleo.get_reconstruction_file('ice_mask_file') + if ice_file.exists(): + paleo_ice_data, ice_lons, ice_lats = _read_netcdf_field(ice_file) + paleo_ice = _interp_to_gaussian(paleo_ice_data, ice_lons, ice_lats, + tgt_lons, tgt_lats, method='nearest') + + # Read modern ice (snow depth, code 141) from ICMGG + modern_ice, _, _ = _get_grib_field_by_code(config.get_icmgg_input_file(), 141) + if modern_ice is not None: + # Modern ice > 0 but paleo ice absent → reduced ice sheet + modern_has_ice = modern_ice > 0.0 + paleo_has_ice = paleo_ice >= 2.0 # value 2 = ice sheet in PlioMIP3 + reduced_ice = modern_has_ice & ~paleo_has_ice & land_mask + print(f" Reduced ice: {np.sum(reduced_ice)}") + else: + print(f" Warning: ice mask file not found: {ice_file}") + + masks = { + 'land': land_mask, + 'ocean': ocean_mask, + 'added': added_land, + 'drowned': drowned, + 'reduced_ice': reduced_ice, + 'paleo_lsm': paleo_lsm, + 'core2_lsm': core2_lsm, + } + return masks + + +def modify_ice_snow_depth( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], +) -> np.ndarray: + """ + Modify snow depth (code 141) from reconstruction ice mask. + + PlioMIP3 convention: ice_mask value 1 → no ice (snow=0), + value 2 → ice sheet (snow depth = 10 m w.e.) + """ + paleo = config.paleo + + print("\n Modifying ice / snow depth (code 141)...") + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + n_pts = len(tgt_lons) + + ice_file = paleo.get_reconstruction_file('ice_mask_file') + if not ice_file.exists(): + print(f" Warning: ice mask not found: {ice_file}, skipping") + return None + + ice_data, ice_lons, ice_lats = _read_netcdf_field(ice_file) + ice_on_gauss = _interp_to_gaussian(ice_data, ice_lons, ice_lats, + tgt_lons, tgt_lats, method='nearest') + + # Map: 1 → 0, 2 → 10 + snow_depth = np.zeros(n_pts, dtype=np.float64) + snow_depth[np.isclose(ice_on_gauss, 2, atol=0.5)] = 10.0 + + print(f" Ice sheet points (snow=10): {np.sum(snow_depth > 0)}") + return snow_depth + + +def modify_lakes( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], + existing_lake: np.ndarray, +) -> np.ndarray: + """ + Modify lake cover (code 26) using anomaly method. + + PlioMIP3 convention: lake fraction indicated by value 1100. + """ + paleo = config.paleo + + print("\n Modifying lake cover (code 26)...") + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + + lake_file = paleo.get_reconstruction_file('lake_file') + modern_lake_file = paleo.get_modern_file('modern_lake_file') + + if not lake_file.exists() or not modern_lake_file.exists(): + print(f" Warning: lake files not found, skipping") + return None + + # Read and create binary lake masks (value 1100 = lake) + paleo_lake_data, plk_lons, plk_lats = _read_netcdf_field(lake_file) + modern_lake_data, mlk_lons, mlk_lats = _read_netcdf_field(modern_lake_file) + + paleo_lake_binary = (np.isclose(paleo_lake_data, 1100, atol=0.5)).astype(float) + modern_lake_binary = (np.isclose(modern_lake_data, 1100, atol=0.5)).astype(float) + + # Interpolate both to Gaussian grid + paleo_lake_gauss = _interp_to_gaussian(paleo_lake_binary, plk_lons, plk_lats, + tgt_lons, tgt_lats, method='nearest') + modern_lake_gauss = _interp_to_gaussian(modern_lake_binary, mlk_lons, mlk_lats, + tgt_lons, tgt_lats, method='nearest') + + # Anomaly method + lake_anom = paleo_lake_gauss - modern_lake_gauss + new_lake = existing_lake + lake_anom + + # Clamp to 0 or 1 + new_lake = np.where(new_lake >= 0.5, 1.0, 0.0) + + print(f" Lake points: {np.sum(new_lake > 0)}") + return new_lake + + +def modify_soils( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], +) -> Optional[np.ndarray]: + """ + Modify soil type (code 43) from reconstruction data with lookup table. + """ + paleo = config.paleo + + print("\n Modifying soil type (code 43)...") + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + land_mask = masks['land'] + + soil_file = paleo.get_reconstruction_file('soil_file') + if not soil_file.exists(): + print(f" Warning: soil file not found: {soil_file}, skipping") + return None + + soil_data, s_lons, s_lats = _read_netcdf_field(soil_file) + + # Interpolate to Gaussian grid + soil_gauss = _interp_to_gaussian(soil_data, s_lons, s_lats, + tgt_lons, tgt_lats, method='nearest') + + # Distance-weighted fill over land (replace missing with code 31 = default) + soil_gauss[~np.isfinite(soil_gauss)] = 0 + soil_on_land = np.where(land_mask, soil_gauss, 0) + soil_on_land[land_mask & (soil_on_land == 0)] = 31 # default fill + soil_filled = _distance_weighted_fill(soil_on_land, land_mask & (soil_on_land > 0), + tgt_lons, tgt_lats) + + # Apply PlioMIP3 → OpenIFS lookup + oifs_soil = _apply_lookup(soil_filled, SOIL_LOOKUP, default=0.0) + + # Ensure integers and mask ocean + oifs_soil = np.round(oifs_soil).astype(np.float64) + oifs_soil[~land_mask] = 0 + # Clamp to valid range 1–4 on land + oifs_soil[land_mask & (oifs_soil < 1)] = 1 + oifs_soil[land_mask & (oifs_soil > 4)] = 1 + + print(f" Soil types on land — 1:{np.sum(oifs_soil==1)}, 2:{np.sum(oifs_soil==2)}, " + f"3:{np.sum(oifs_soil==3)}, 4:{np.sum(oifs_soil==4)}") + return oifs_soil + + +def modify_soil_water( + soil_type: np.ndarray, + masks: Dict[str, np.ndarray], +) -> Dict[int, np.ndarray]: + """ + Compute volumetric soil water layers (codes 39–42) from soil type. + """ + print("\n Computing volumetric soil water layers (codes 39–42)...") + + results = {} + for code, lut in SWVL_LOOKUP.items(): + swvl = np.zeros_like(soil_type) + for stype, wval in lut.items(): + swvl[np.isclose(soil_type, stype, atol=0.5)] = wval + swvl[~masks['land']] = 0 + results[code] = swvl + + return results + + +def modify_vegetation( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], +) -> Dict[int, np.ndarray]: + """ + Modify vegetation type, cover, and LAI from reconstruction biome data. + + Returns dict mapping GRIB code → values array: + 30 (tvh), 29 (tvl), 28 (cvh), 27 (cvl), 67 (lai_hv), 66 (lai_lv) + """ + paleo = config.paleo + + print("\n Modifying vegetation (codes 27–30, 66–67)...") + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + land_mask = masks['land'] + + biome_file = paleo.get_reconstruction_file('biome_file') + if not biome_file.exists(): + print(f" Warning: biome file not found: {biome_file}, skipping") + return {} + + biome_data, b_lons, b_lats = _read_netcdf_field(biome_file) + + # Interpolate to Gaussian grid + biome_gauss = _interp_to_gaussian(biome_data, b_lons, b_lats, + tgt_lons, tgt_lats, method='nearest') + + # Distance-weighted fill over land + biome_gauss[~np.isfinite(biome_gauss)] = 0 + biome_on_land = np.where(land_mask, biome_gauss, 0) + biome_on_land[land_mask & (biome_on_land == 0)] = 8 # default biome + biome_filled = _distance_weighted_fill(biome_on_land, land_mask & (biome_on_land > 0), + tgt_lons, tgt_lats) + biome_filled[~land_mask] = 0 + + # High vegetation type (code 30) + thv = _apply_lookup(biome_filled, HIGH_VEG_TYPE_LOOKUP, default=0.0) + thv[~land_mask] = 0 + + # Low vegetation type (code 29) + tlv = _apply_lookup(biome_filled, LOW_VEG_TYPE_LOOKUP, default=0.0) + tlv[~land_mask] = 0 + + # High vegetation cover (code 28) + cvh = _apply_lookup(thv, HIGH_VEG_COVER_LOOKUP, default=0.0) + cvh[~land_mask] = 0 + + # Low vegetation cover (code 27) + cvl = _apply_lookup(tlv, LOW_VEG_COVER_LOOKUP, default=0.0) + cvl[~land_mask] = 0 + + # High vegetation LAI (code 67) + lai_hv = _apply_lookup(thv, HIGH_VEG_LAI_LOOKUP, default=0.0) + lai_hv[~land_mask] = 0 + + # Low vegetation LAI (code 66) + lai_lv = _apply_lookup(tlv, LOW_VEG_LAI_LOOKUP, default=0.0) + lai_lv[~land_mask] = 0 + + results = {30: thv, 29: tlv, 28: cvh, 27: cvl, 67: lai_hv, 66: lai_lv} + + for code, arr in results.items(): + nonzero = np.sum(arr > 0) + print(f" Code {code}: {nonzero} non-zero points") + + return results + + +def interpolate_remaining_fields( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], +) -> Dict[int, np.ndarray]: + """ + Interpolate remaining GRIB fields for added/drowned grid points. + + For each field: + - Drowned points: fill from surrounding ocean values + - Added land + reduced ice sheet points: fill from surrounding land values + - Combine both contributions + """ + verbose = config.options.verbose + icmgg_file = config.get_icmgg_output_file() + + print("\n Interpolating remaining fields for added/drowned points...") + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + land_mask = masks['land'] + ocean_mask = masks['ocean'] + added = masks['added'] + drowned = masks['drowned'] + reduced_ice = masks['reduced_ice'] + + # Points that need filling + needs_land_fill = added | reduced_ice + needs_ocean_fill = drowned + + if not np.any(needs_land_fill) and not np.any(needs_ocean_fill): + print(" No added/drowned points — nothing to interpolate") + return {} + + # Read all current fields from the (already LSM-adapted) ICMGG + all_fields = _read_all_grib_fields(icmgg_file) + + results = {} + for code in REMAINING_FIELD_CODES: + if code not in all_fields: + if verbose: + print(f" Code {code} not found in ICMGG, skipping") + continue + + field = all_fields[code].copy() + + # --- Ocean fill for drowned points --- + if np.any(needs_ocean_fill): + ocean_known = ocean_mask & ~drowned + if np.any(ocean_known): + filled_ocean = _distance_weighted_fill(field, ocean_known, tgt_lons, tgt_lats) + field[needs_ocean_fill] = filled_ocean[needs_ocean_fill] + + # --- Land fill for added / reduced-ice points --- + if np.any(needs_land_fill): + land_known = land_mask & ~added & ~reduced_ice + if np.any(land_known): + filled_land = _distance_weighted_fill(field, land_known, tgt_lons, tgt_lats) + field[needs_land_fill] = filled_land[needs_land_fill] + + results[code] = field + if verbose: + print(f" Interpolated code {code}") + + print(f" Processed {len(results)} remaining fields") + return results + + +# --------------------------------------------------------------------------- +# Top-level entry point +# --------------------------------------------------------------------------- + +def modify_paleo_input( + config: OCPConfig, + grid: GaussianGrid, + lsm_data: LSMData, +) -> Path: + """ + Full paleo land-surface modification pipeline for ICMGG. + + This runs after the standard ocp-tool LSM adaptation (steps 1–3). + It modifies the ICMGGaackINIT_ file in-place, producing the + final file with all reconstruction-based changes. + + Parameters + ---------- + config : OCPConfig with paleo section enabled + grid : Gaussian grid from step 1 + lsm_data : LSMData from step 3 (land-sea mask processing) + + Returns + ------- + Path to the modified ICMGG file + """ + paleo = config.paleo + if not paleo or not paleo.enabled: + raise ValueError("Paleo configuration is not enabled") + + icmgg_file = config.get_icmgg_output_file() + verbose = config.options.verbose + + print("\n" + "=" * 60) + print(f" Paleo Input Modification: {paleo.experiment_id}") + print(f" Target: {icmgg_file}") + print("=" * 60) + + # 1. Create derivative masks + masks = create_paleo_masks(config, grid, lsm_data) + + # 2. Collect all field replacements + replacements: Dict[int, np.ndarray] = {} + + # 3. Ice / snow depth (code 141) + snow = modify_ice_snow_depth(config, grid, masks) + if snow is not None: + replacements[141] = snow + + # 4. Lakes (code 26) + existing_lake, _, _ = _get_grib_field_by_code(icmgg_file, 26) + if existing_lake is not None: + new_lake = modify_lakes(config, grid, masks, existing_lake) + if new_lake is not None: + replacements[26] = new_lake + + # 5. Soils (code 43) + soil_type = modify_soils(config, grid, masks) + if soil_type is not None: + replacements[43] = soil_type + + # 6. Volumetric soil water (codes 39–42) + swvl = modify_soil_water(soil_type, masks) + replacements.update(swvl) + + # 7. Vegetation type, cover, LAI (codes 27–30, 66–67) + veg = modify_vegetation(config, grid, masks) + replacements.update(veg) + + # 8. Remaining fields (drowned / added interpolation) + remaining = interpolate_remaining_fields(config, grid, masks) + replacements.update(remaining) + + # 9. Write all replacements into the ICMGG file + print(f"\n Writing {len(replacements)} modified fields to {icmgg_file}...") + _replace_grib_fields(icmgg_file, icmgg_file, replacements, verbose=verbose) + + print(f"\n Paleo input modification complete: {icmgg_file}") + return icmgg_file diff --git a/ocp_tool/paleo_subgrid_oro.py b/ocp_tool/paleo_subgrid_oro.py new file mode 100644 index 0000000..983e97c --- /dev/null +++ b/ocp_tool/paleo_subgrid_oro.py @@ -0,0 +1,368 @@ +""" +Paleo subgrid-scale orography module for OCP-Tool. + +Replaces convert_topodata.sh + calnoro + create_subgrid_oro.sh + mod_oro_oifs.sh: +1. Prepare topography input for calnoro (remap to r720x360, rename to OROMEA) +2. Run compiled calnoro binary via subprocess +3. Post-process calnoro output (SERVICE format → NetCDF, rename variables) +4. Apply SSO parameters (sdor, isor, anor, slor) to ICMGGaackINIT + +All interpolation done in Python — no CDO dependency. +Calnoro itself remains a Fortran binary called via subprocess. +""" + +import os +import struct +import subprocess +import tempfile +from pathlib import Path +from typing import Dict, Optional, Tuple + +import numpy as np +from netCDF4 import Dataset +from scipy.interpolate import griddata +import eccodes + +from .config import OCPConfig, PaleoConfig +from .gaussian_grids import GaussianGrid +from .paleo_input import ( + _read_netcdf_field, + _interp_to_gaussian, + _replace_grib_fields, +) + + +# SSO variable mapping: calnoro output code → (OpenIFS GRIB code, name, scale_factor) +SSO_MAPPING = { + 52: (160, 'OROSTD', 1.0), # standard deviation of orography + 53: (163, 'OROSIG', 10.0), # slope of sub-gridscale orography (×10) + 54: (161, 'OROGAM', 1.0), # anisotropy of sub-gridscale orography + 55: (162, 'OROTHE', np.pi/180), # angle of sub-gridscale orography (deg→rad) +} + + +def _create_topodata_netcdf( + topo_data: np.ndarray, + topo_lons: np.ndarray, + topo_lats: np.ndarray, + output_path: Path, +) -> None: + """ + Create topodata.nc for calnoro on a regular 0.5° grid (r720x360). + + Replaces convert_topodata.sh: remap to r720x360 and rename variable to OROMEA. + """ + # Define target regular grid: 0.5° resolution, 720x360 + tgt_lons_1d = np.linspace(0.25, 359.75, 720) + tgt_lats_1d = np.linspace(89.75, -89.75, 360) + tgt_lons_2d, tgt_lats_2d = np.meshgrid(tgt_lons_1d, tgt_lats_1d) + + # Interpolate topography to regular grid + src_pts = np.column_stack([topo_lons.ravel(), topo_lats.ravel()]) + src_vals = topo_data.ravel() + + # Remove NaN / masked values + valid = np.isfinite(src_vals) + if hasattr(src_vals, 'mask'): + valid &= ~src_vals.mask + src_pts = src_pts[valid] + src_vals = src_vals[valid] + + tgt_pts = np.column_stack([tgt_lons_2d.ravel(), tgt_lats_2d.ravel()]) + topo_regular = griddata(src_pts, src_vals, tgt_pts, method='nearest', fill_value=0) + topo_regular = topo_regular.reshape(360, 720) + + # Write NetCDF + ds = Dataset(str(output_path), 'w', format='NETCDF4') + ds.createDimension('lon', 720) + ds.createDimension('lat', 360) + + lon_var = ds.createVariable('lon', 'f8', ('lon',)) + lon_var[:] = tgt_lons_1d + lon_var.units = 'degrees_east' + + lat_var = ds.createVariable('lat', 'f8', ('lat',)) + lat_var[:] = tgt_lats_1d + lat_var.units = 'degrees_north' + + oromea = ds.createVariable('OROMEA', 'f8', ('lat', 'lon')) + oromea[:] = topo_regular + oromea.long_name = 'mean orography' + oromea.units = 'm' + + ds.close() + print(f" Created topodata.nc: {output_path}") + + +def _read_service_file(filepath: Path, nlat: int = 64, nlon: int = 128) -> Dict[int, np.ndarray]: + """ + Read a Fortran SERVICE format file (as produced by calnoro). + + SERVICE format: for each field: + - 8-int header (code, level, date, time, nlon, nlat, ?, ?) + - nlon*nlat float32 values + + Returns dict mapping variable code → 2D array. + + Parameters + ---------- + filepath : path to .srv file + nlat, nlon : expected grid dimensions (T63 Gaussian = 64 lats × 128 lons) + """ + fields = {} + filesize = os.path.getsize(filepath) + + with open(filepath, 'rb') as f: + while f.tell() < filesize: + # Fortran record: 4-byte length prefix + rec_len_bytes = f.read(4) + if len(rec_len_bytes) < 4: + break + rec_len = struct.unpack('>i', rec_len_bytes)[0] + + # Read header (8 ints) + header = struct.unpack(f'>{rec_len // 4}i', f.read(rec_len)) + f.read(4) # trailing record length + + code = header[0] + grid_nlon = header[4] if len(header) > 4 else nlon + grid_nlat = header[5] if len(header) > 5 else nlat + + # Read data record + rec_len_bytes = f.read(4) + if len(rec_len_bytes) < 4: + break + rec_len = struct.unpack('>i', rec_len_bytes)[0] + n_vals = rec_len // 4 + data = np.array(struct.unpack(f'>{n_vals}f', f.read(rec_len))) + f.read(4) # trailing record length + + fields[code] = data.reshape(grid_nlat, grid_nlon) + + return fields + + +def prepare_calnoro_input( + config: OCPConfig, + work_dir: Path, +) -> Path: + """ + Prepare the topodata.nc file that calnoro expects as input. + + Returns path to topodata.nc. + """ + paleo = config.paleo + + print("\n Preparing calnoro input...") + + topo_file = paleo.get_reconstruction_file('topography_file') + if not topo_file.exists(): + raise FileNotFoundError(f"Paleo topography not found: {topo_file}") + + topo_data, topo_lons, topo_lats = _read_netcdf_field(topo_file) + + topodata_path = work_dir / 'topodata.nc' + _create_topodata_netcdf(topo_data, topo_lons, topo_lats, topodata_path) + + return topodata_path + + +def run_calnoro( + config: OCPConfig, + work_dir: Path, + truncation: int = 63, +) -> Path: + """ + Run the calnoro Fortran binary. + + Parameters + ---------- + config : with paleo.calnoro_binary set + work_dir : directory containing topodata.nc, output goes here + truncation : spectral truncation (63 for T63) + + Returns path to the output sso_par_fil.srv file. + """ + paleo = config.paleo + + if paleo.calnoro_binary is None or not paleo.calnoro_binary.exists(): + raise FileNotFoundError( + f"Calnoro binary not found: {paleo.calnoro_binary}\n" + "Compile calnoro from esm_tools: ./install_calnoro.sh levante" + ) + + print(f"\n Running calnoro (truncation T{truncation})...") + print(f" Binary: {paleo.calnoro_binary}") + print(f" Working dir: {work_dir}") + + result = subprocess.run( + [str(paleo.calnoro_binary)], + input=f"{truncation}\n", + capture_output=True, + text=True, + cwd=str(work_dir), + timeout=600, + ) + + if result.returncode != 0: + print(f" calnoro stderr:\n{result.stderr}") + raise RuntimeError(f"calnoro failed with exit code {result.returncode}") + + sso_file = work_dir / 'sso_par_fil.srv' + if not sso_file.exists(): + raise FileNotFoundError(f"calnoro did not produce {sso_file}") + + print(f" calnoro output: {sso_file}") + return sso_file + + +def postprocess_calnoro_output( + sso_file: Path, + truncation: int = 63, + verbose: bool = False, +) -> Dict[int, np.ndarray]: + """ + Read and post-process calnoro SERVICE output. + + Returns dict mapping calnoro variable code → 2D array (lat-flipped). + Calnoro for T63 uses a 64-latitude × 128-longitude Gaussian grid. + """ + print("\n Post-processing calnoro output...") + + # T63 Gaussian grid dimensions + nlat = truncation + 1 # approximate + nlon = nlat * 2 + + fields = _read_service_file(sso_file, nlat=nlat, nlon=nlon) + + # Flip latitudes (calnoro outputs S→N, OpenIFS expects N→S) + for code in fields: + fields[code] = fields[code][::-1, :] + if verbose: + print(f" Code {code}: shape {fields[code].shape}, " + f"range [{fields[code].min():.4f}, {fields[code].max():.4f}]") + + print(f" Read {len(fields)} SSO fields from calnoro output") + return fields + + +def apply_subgrid_orography( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], + sso_fields: Dict[int, np.ndarray], + truncation: int = 63, +) -> Dict[int, np.ndarray]: + """ + Map calnoro SSO fields to OpenIFS GRIB codes and interpolate to Gaussian grid. + + Applies unit conversions: + - OROSIG (slope): ×10 + - OROTHE (angle): degrees → radians + + Returns dict mapping OpenIFS GRIB paramId → values array on Gaussian grid. + """ + verbose = config.options.verbose + + print("\n Applying subgrid-scale orography to Gaussian grid...") + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + land_mask = masks['land'] + + # Build T63 Gaussian grid coordinates for the calnoro output + nlat = truncation + 1 + nlon = nlat * 2 + # Simple regular grid approximation for the T63 output + sso_lons_1d = np.linspace(0, 360 - 360/nlon, nlon) + sso_lats_1d = np.linspace(90, -90, nlat) + sso_lons_2d, sso_lats_2d = np.meshgrid(sso_lons_1d, sso_lats_1d) + + results = {} + for calnoro_code, (oifs_code, name, scale) in SSO_MAPPING.items(): + if calnoro_code not in sso_fields: + if verbose: + print(f" Skipping {name} (code {calnoro_code} not in calnoro output)") + continue + + field_2d = sso_fields[calnoro_code] + + # Interpolate to Gaussian grid + field_gauss = _interp_to_gaussian( + field_2d, sso_lons_2d, sso_lats_2d, + tgt_lons, tgt_lats, + method='nearest', fill_value=0, + ) + + # Apply scale factor + field_gauss *= scale + + # Mask ocean to 0 + field_gauss[~land_mask] = 0 + + results[oifs_code] = field_gauss + print(f" {name} (code {oifs_code}): range [{field_gauss.min():.4f}, {field_gauss.max():.4f}]") + + return results + + +# --------------------------------------------------------------------------- +# Top-level entry point +# --------------------------------------------------------------------------- + +def modify_paleo_subgrid_orography( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], +) -> Path: + """ + Full subgrid-scale orography pipeline. + + 1. Prepare calnoro input (topodata.nc) + 2. Run calnoro + 3. Post-process output + 4. Apply SSO fields to ICMGG + + Parameters + ---------- + config : OCPConfig with paleo section + grid : Gaussian grid + masks : derivative masks from create_paleo_masks() + + Returns + ------- + Path to the modified ICMGG file + """ + paleo = config.paleo + verbose = config.options.verbose + icmgg_file = config.get_icmgg_output_file() + + print("\n" + "=" * 60) + print(f" Paleo Subgrid-Scale Orography: {paleo.experiment_id}") + print("=" * 60) + + # Use a temporary working directory for calnoro + work_dir = config.output_paths.openifs_modified / f'calnoro_{paleo.experiment_id}' + work_dir.mkdir(parents=True, exist_ok=True) + + # 1. Prepare input + prepare_calnoro_input(config, work_dir) + + # 2. Run calnoro + sso_file = run_calnoro(config, work_dir) + + # 3. Post-process + sso_fields = postprocess_calnoro_output(sso_file, verbose=verbose) + + # 4. Apply to Gaussian grid + sso_replacements = apply_subgrid_orography(config, grid, masks, sso_fields) + + # 5. Write to ICMGG + if sso_replacements: + print(f"\n Writing {len(sso_replacements)} SSO fields to {icmgg_file}...") + _replace_grib_fields(icmgg_file, icmgg_file, sso_replacements, verbose=verbose) + print(f" Subgrid-scale orography applied: {icmgg_file}") + else: + print(" Warning: No SSO fields to write") + + return icmgg_file diff --git a/ocp_tool/paleo_topo.py b/ocp_tool/paleo_topo.py new file mode 100644 index 0000000..4ded193 --- /dev/null +++ b/ocp_tool/paleo_topo.py @@ -0,0 +1,249 @@ +""" +Paleo topography modification module for OCP-Tool. + +Replaces mod_topo_oifs.sh: modifies the ICMSH spectral topography file +using the anomaly method (paleo topography − modern topography) applied +to the existing OpenIFS spectral orography. + +Uses CDO (via Python bindings) for spectral ↔ gridpoint transforms +(sp2gpl / gp2spl), eccodes for GRIB I/O, and scipy for interpolation. +""" + +import os +import tempfile +from pathlib import Path +from shutil import copy2 +from typing import Dict, Tuple + +import numpy as np +from scipy.interpolate import griddata +import eccodes + +from .config import OCPConfig +from .gaussian_grids import GaussianGrid +from .paleo_input import ( + _read_netcdf_field, + _interp_to_gaussian, + _distance_weighted_fill, +) + + +# ── CDO helpers for spectral ↔ gridpoint ──────────────────────── + +def _sp2gp(spectral_grib: str, gridpoint_nc: str) -> None: + """Convert spectral GRIB → Gaussian gridpoint NetCDF using CDO sp2gpl.""" + from cdo import Cdo + cdo = Cdo() + cdo.sp2gpl(input=spectral_grib, output=gridpoint_nc, options='-f nc4 --eccodes') + + +def _gp2sp(gridpoint_nc: str, spectral_grib: str, grib_opts: str = '-f grb2 --eccodes') -> None: + """Convert Gaussian gridpoint NetCDF → spectral GRIB using CDO gp2spl.""" + from cdo import Cdo + cdo = Cdo() + cdo.gp2spl(input=gridpoint_nc, output=spectral_grib, options=grib_opts) + + +def _extract_z_grib(icmsh_file: Path, output_file: str) -> None: + """Extract the 'z' (geopotential / orography) message from an ICMSH file.""" + import subprocess + subprocess.run( + ['grib_copy', '-w', 'shortName=z', str(icmsh_file), output_file], + check=True, + ) + + +def _replace_z_in_icmsh(icmsh_input: Path, z_spectral_grib: str, icmsh_output: Path) -> None: + """Replace the 'z' field in ICMSH with a new spectral GRIB message.""" + import subprocess + with tempfile.NamedTemporaryFile(suffix='.grib', delete=False) as tmp: + filtered = tmp.name + try: + # Copy everything except 'z' + subprocess.run( + ['grib_copy', '-w', 'shortName!=z', str(icmsh_input), filtered], + check=True, + ) + # Merge: filtered (all except z) + new z → output + subprocess.run( + ['grib_copy', filtered, z_spectral_grib, str(icmsh_output)], + check=True, + ) + finally: + if os.path.exists(filtered): + os.remove(filtered) + + +# ── Topography reading helpers ────────────────────────────────── + +def _read_z_gridpoint(icmsh_file: Path, verbose: bool = False) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Read spectral 'z' from ICMSH, convert to gridpoint via CDO sp2gpl. + + Returns (topo_m, lats_1d, lons_1d) on a Gaussian grid. + """ + import xarray as xr + + with tempfile.TemporaryDirectory(prefix='ocp_topo_') as tmpdir: + z_grib = os.path.join(tmpdir, 'z_spectral.grib') + z_nc = os.path.join(tmpdir, 'z_gridpoint.nc') + + _extract_z_grib(icmsh_file, z_grib) + _sp2gp(z_grib, z_nc) + + ds = xr.open_dataset(z_nc) + # Variable may be called 'z' or 'orog' depending on CDO version + for vname in ['z', 'orog', 'Z']: + if vname in ds.data_vars: + data = ds[vname].values.squeeze() + break + else: + # Fallback: take the first data variable + vname = list(ds.data_vars)[0] + data = ds[vname].values.squeeze() + + lats = ds['lat'].values + lons = ds['lon'].values + + ds.close() + + topo_m = data / 9.80665 # geopotential → metres + if verbose: + print(f" sp2gp: {vname} → {data.shape}, " + f"topo range [{topo_m.min():.1f}, {topo_m.max():.1f}] m") + return topo_m, lats, lons + + +# ── Main entry point ──────────────────────────────────────────── + +def modify_topography( + config: OCPConfig, + grid: GaussianGrid, + masks: Dict[str, np.ndarray], +) -> Path: + """ + Modify ICMSH topography using the anomaly method. + + Steps: + 1. Read modern + paleo topography from reconstruction NetCDF files + 2. Clip below sea level to 0 + 3. Distance-weighted fill for land mask gaps + 4. Compute anomaly (paleo − modern) on Gaussian grid + 5. Read existing OpenIFS spectral topo via CDO sp2gpl → gridpoint + 6. Add anomaly on the gridpoint grid, mask by land, clip below 0 + 7. Convert back to spectral via CDO gp2spl, replace 'z' in ICMSH + + Returns path to the modified ICMSH file. + """ + import xarray as xr + + paleo = config.paleo + verbose = config.options.verbose + + print("\n" + "=" * 60) + print(f" Paleo Topography Modification: {paleo.experiment_id}") + print("=" * 60) + + tgt_lons = np.array(grid.lons_list) + tgt_lats = grid.center_lats.flatten() + land_mask = masks['land'] + + # --- 1. Read reconstruction topographies --- + modern_topo_file = paleo.get_modern_file('modern_topo_file') + paleo_topo_file = paleo.get_reconstruction_file('topography_file') + + print(f" Modern topo: {modern_topo_file}") + print(f" Paleo topo: {paleo_topo_file}") + + modern_data, m_lons, m_lats = _read_netcdf_field(modern_topo_file) + paleo_data, p_lons, p_lats = _read_netcdf_field(paleo_topo_file) + + # --- 2. Clip below sea level --- + modern_data = np.where(modern_data < 0, 0, modern_data) + paleo_data = np.where(paleo_data < 0, 0, paleo_data) + + # --- 3. Interpolate to Gaussian grid --- + modern_gauss = _interp_to_gaussian(modern_data, m_lons, m_lats, + tgt_lons, tgt_lats, method='nearest', fill_value=0) + paleo_gauss = _interp_to_gaussian(paleo_data, p_lons, p_lats, + tgt_lons, tgt_lats, method='nearest', fill_value=0) + + # Distance-weighted fill for land points missing paleo topo + paleo_gauss[~np.isfinite(paleo_gauss)] = 0 + paleo_on_land = np.where(land_mask, paleo_gauss, 0) + paleo_on_land[land_mask & (paleo_on_land == 0)] = 200 # default elevation + paleo_filled = _distance_weighted_fill(paleo_on_land, land_mask & (paleo_on_land > 0), + tgt_lons, tgt_lats) + paleo_filled[~land_mask] = 0 + + # --- 4. Compute anomaly --- + topo_anom = paleo_filled - modern_gauss + topo_anom[~land_mask] = 0 + + print(f" Topo anomaly range: [{topo_anom.min():.1f}, {topo_anom.max():.1f}] m") + + # --- 5. Read existing spectral topo → gridpoint via CDO --- + icmsh_input = config.get_icmsh_input_file() + print(f" Reading spectral topo from: {icmsh_input}") + + oifs_topo, gp_lats, gp_lons = _read_z_gridpoint(icmsh_input, verbose=verbose) + # oifs_topo is 2D (lat, lon) + + gp_lons2d, gp_lats2d = np.meshgrid(gp_lons, gp_lats) + + # Interpolate anomaly from Gaussian grid to the gridpoint grid + anom_pts = np.column_stack([tgt_lons, tgt_lats]) + gp_pts = np.column_stack([gp_lons2d.ravel(), gp_lats2d.ravel()]) + anom_on_gp = griddata(anom_pts, topo_anom, gp_pts, method='nearest', fill_value=0) + anom_on_gp = anom_on_gp.reshape(oifs_topo.shape) + + # --- 6. Add anomaly, clip --- + new_topo = oifs_topo + anom_on_gp + new_topo = np.where(new_topo < 0, 0, new_topo) + + # Mask ocean (where the paleo LSM says ocean) + land_on_gp = griddata(anom_pts, land_mask.astype(float), gp_pts, + method='nearest', fill_value=0).reshape(oifs_topo.shape) + new_topo[land_on_gp < 0.5] = 0 + + print(f" New topo range: [{new_topo.min():.1f}, {new_topo.max():.1f}] m") + + # --- 7. Convert modified gridpoint → spectral, write ICMSH --- + icmsh_output = config.get_icmsh_output_file() + print(f" Writing modified ICMSH: {icmsh_output}") + + new_geopotential = new_topo * 9.80665 + + with tempfile.TemporaryDirectory(prefix='ocp_topo_') as tmpdir: + # Write modified gridpoint field as NetCDF + mod_nc = os.path.join(tmpdir, 'z_modified.nc') + + # Read the sp2gp output to get metadata, then overwrite data + z_grib_tmp = os.path.join(tmpdir, 'z_orig.grib') + z_nc_tmp = os.path.join(tmpdir, 'z_orig.nc') + _extract_z_grib(icmsh_input, z_grib_tmp) + _sp2gp(z_grib_tmp, z_nc_tmp) + + ds = xr.open_dataset(z_nc_tmp) + # Find the variable name + for vname in ['z', 'orog', 'Z']: + if vname in ds.data_vars: + break + else: + vname = list(ds.data_vars)[0] + + ds_mod = ds.copy(deep=True) + ds_mod[vname].values[:] = new_geopotential.reshape(ds_mod[vname].shape) + ds_mod.to_netcdf(mod_nc) + ds.close() + ds_mod.close() + + # gp2sp: gridpoint NetCDF → spectral GRIB + z_new_grib = os.path.join(tmpdir, 'z_new_spectral.grib') + _gp2sp(mod_nc, z_new_grib) + + # Replace 'z' in the original ICMSH + _replace_z_in_icmsh(icmsh_input, z_new_grib, icmsh_output) + + print(f" Topography modification complete: {icmsh_output}") + return icmsh_output diff --git a/run_ocp_tool.py b/run_ocp_tool.py index e7b4420..e137783 100644 --- a/run_ocp_tool.py +++ b/run_ocp_tool.py @@ -24,6 +24,9 @@ from ocp_tool.co2_interpolation import interpolate_co2_to_icmgg from ocp_tool.field_interpolation import interpolate_2d_fields_to_icmgg from ocp_tool.create_outputdirs import create_outputdirs +from ocp_tool.paleo_input import modify_paleo_input, create_paleo_masks +from ocp_tool.paleo_topo import modify_topography +from ocp_tool.paleo_subgrid_oro import modify_paleo_subgrid_orography def run_ocp_tool(config: OCPConfig) -> None: @@ -41,7 +44,8 @@ def run_ocp_tool(config: OCPConfig) -> None: print(f" Ocean grid: {config.ocean.grid_name}") print(f" Ice cavities: {config.ocean.has_ice_cavities}") print(f" Experiment: {config.atmosphere.experiment_name}") - + if config.paleo and config.paleo.enabled: + print(f" Paleo mode: ENABLED (experiment {config.paleo.experiment_id})") # Process each resolution for resolution in config.atmosphere.resolution_list: @@ -131,6 +135,24 @@ def run_ocp_tool(config: OCPConfig) -> None: # Step 12: Create SLT output for LPJ-GUESS print("\nStep 12: Creating SLT output for LPJ-GUESS...") create_slt_output_for_lpjg(config, resolution) + + # ---- Paleo modification steps (conditional) ---- + if config.paleo and config.paleo.enabled: + # Step 13: Modify ICMGG land surface variables from reconstructions + print("\nStep 13: Paleo land surface modifications (ICMGG)...") + modify_paleo_input(config, grid, lsm_data) + + # Step 14: Modify ICMSH topography + print("\nStep 14: Paleo topography modification (ICMSH)...") + masks = create_paleo_masks(config, grid, lsm_data) + modify_topography(config, grid, masks) + + # Step 15: Subgrid-scale orography via calnoro + if config.paleo.calnoro_binary: + print("\nStep 15: Paleo subgrid-scale orography (calnoro)...") + modify_paleo_subgrid_orography(config, grid, masks) + else: + print("\nStep 15: Skipped calnoro (no binary configured)") print("\n" + "=" * 60) print(" OCP-Tool processing complete!") diff --git a/setup.py b/setup.py index 7fd3247..0c5dc5f 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,8 @@ 'numpy', 'netcdf4', 'eccodes', + 'cdo', + 'scipy', 'pyfesom2', ], entry_points={