From b7fbc4c82a3d6aa22642a5752d2e63b32cf054f2 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 11:20:32 -0500 Subject: [PATCH 01/40] Add GEFS atmospheric ensemble support for UFS-Coastal SECOFS Enable 6-member GEFS atmospheric ensemble for secofs_ufs with DATM forcing. Members use per-member DATM forcing directories instead of shared sflux files: - 000: GFS+HRRR blended control (existing prep output) - 001-004: GEFS pgrb2sp25 0.25 deg (no HRRR blend) - 005: RRFS (GFS fallback for now) Changes: - secofs_ufs.yaml: Add ensemble section with perturb_physics: false - nos_ofs_create_datm_forcing.sh: Add GEFS DBASE case + find_gefs_file() - nos_ofs_create_datm_forcing_blended.sh: Parameterize primary source via DATM_PRIMARY_SOURCE env var, add DATM_SKIP_UFS_CONFIG toggle - JNOS_OFS_ENSEMBLE_ATMOS_PREP: Add USE_DATM branch that generates per-member DATM forcing via blended orchestrator with cfp parallelism - nos_ofs_ensemble_run.sh: Read met_source_1 from params.json to stage member-specific DATM input, patch datm_in grid dims - New PBS scripts for UFS ensemble member, atmos prep, and launcher --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 242 +++++++++++-- parm/systems/secofs_ufs.yaml | 96 ++++- pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs | 109 ++++++ pbs/jnos_secofs_ufs_ensemble_member.pbs | 133 +++++++ pbs/launch_secofs_ufs_ensemble.sh | 330 ++++++++++++++++++ ush/nos_ofs_ensemble_run.sh | 54 ++- ush/nosofs/nos_ofs_create_datm_forcing.sh | 68 +++- .../nos_ofs_create_datm_forcing_blended.sh | 71 ++-- 8 files changed, 1038 insertions(+), 65 deletions(-) create mode 100644 pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs create mode 100644 pbs/jnos_secofs_ufs_ensemble_member.pbs create mode 100644 pbs/launch_secofs_ufs_ensemble.sh diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index 5099c52..67fff2b 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -565,8 +565,156 @@ GEFSWRAPEOF else ######################################################## - # COMF: Use nos_ofs_launch.sh + nos_ofs_create_forcing_met.sh - # (existing behavior for SECOFS and other COMF OFS) + # COMF: Two sub-branches depending on USE_DATM + ######################################################## + + if [ "${USE_DATM:-false}" == "true" ] || [ "${USE_DATM:-0}" == "1" ]; then + ######################################################## + # COMF + DATM: Generate per-member DATM forcing via the + # blended orchestrator with DATM_PRIMARY_SOURCE override. + # Each GEFS member gets its own datm_input directory in COMOUT. + # Control member (000) uses the existing GFS+HRRR blended + # forcing generated by the normal prep job. + ######################################################## + echo "=== COMF + DATM atmospheric ensemble prep ===" + + # Source nos_ofs_config.sh to set up YAML-derived env vars + if [ -f "${USHnos}/nos_ofs_config.sh" ]; then + source "${USHnos}/nos_ofs_config.sh" + load_ofs_config "${OFS_CONFIG}" "${OFS_FRAMEWORK}" + fi + + # Time variables for DATM forcing generation + export time_hotstart=${time_hotstart:-$($NDATE -6 ${PDY}${cyc})} + export time_nowcastend=${time_nowcastend:-${PDY}${cyc}} + export time_forecastend=${time_forecastend:-$($NDATE ${NHOURS_FCST:-48} ${PDY}${cyc})} + + echo "time_hotstart=${time_hotstart}" + echo "time_nowcastend=${time_nowcastend}" + echo "time_forecastend=${time_forecastend}" + + # wgrib2 path + export WGRIB2=${WGRIB2:-$(which wgrib2 2>/dev/null || echo wgrib2)} + + # Build cfp command file for parallel DATM forcing generation + DATM_CMDFILE=${DATA}/datm_ensemble_cmdfile + cat /dev/null > ${DATM_CMDFILE} + DATM_COUNT=0 + + # GEFS members: generate DATM forcing with GEFS as primary (no HRRR blend) + for GEFS_MEM in ${GEFS_MEMBERS}; do + GEFS_SOURCE="GEFS_${GEFS_MEM}" + MEM_SUFFIX="gefs_${GEFS_MEM}" + MEM_WORKDIR=${DATA}/datm_ensemble/${MEM_SUFFIX} + MEM_OUTDIR=${COMOUT}/${RUN}.${cycle}.datm_input_${MEM_SUFFIX} + mkdir -p ${MEM_WORKDIR} ${MEM_OUTDIR} + + # Create wrapper script for this member + WRAPPER=${DATA}/datm_${MEM_SUFFIX}_wrapper.sh + cat > ${WRAPPER} << WRAPEOF +#!/bin/bash +set -x +export DATA=${MEM_WORKDIR} +export DATM_PRIMARY_SOURCE=${GEFS_SOURCE} +export DATM_BLEND_HRRR_GFS=false +export DATM_OUTPUT_SUFFIX=${MEM_SUFFIX} +export DATM_SKIP_UFS_CONFIG=true +mkdir -p \${DATA}/INPUT +${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} +rc=\$? +if [ \$rc -eq 0 ] && [ -d \${DATA}/INPUT ]; then + cp -p \${DATA}/INPUT/*.nc ${MEM_OUTDIR}/ 2>/dev/null + echo "Archived DATM input to ${MEM_OUTDIR}" +fi +exit \$rc +WRAPEOF + chmod 755 ${WRAPPER} + echo "${WRAPPER}" >> ${DATM_CMDFILE} + (( DATM_COUNT = DATM_COUNT + 1 )) + done + + # RRFS member: generate DATM forcing with RRFS as primary (no HRRR blend) + if [ "${RRFS_ENABLED}" = "true" ]; then + RRFS_WORKDIR=${DATA}/datm_ensemble/rrfs + RRFS_OUTDIR=${COMOUT}/${RUN}.${cycle}.datm_input_rrfs + mkdir -p ${RRFS_WORKDIR} ${RRFS_OUTDIR} + + # RRFS needs special handling: regrid from Lambert to lat/lon first. + # The existing nos_ofs_create_datm_forcing.sh does not support RRFS + # directly (it's a 3km Lambert Conformal grid, not regular lat/lon). + # For now, use the RRFS sflux pipeline to generate sflux files, then + # convert. If RRFS DATM support is needed, add RRFS case to the + # DATM forcing script. For MVP, fall back to GFS for RRFS member. + WRAPPER=${DATA}/datm_rrfs_wrapper.sh + cat > ${WRAPPER} << WRAPEOF +#!/bin/bash +set -x +echo "WARNING: RRFS DATM forcing not yet implemented — using GFS as fallback" +export DATA=${RRFS_WORKDIR} +export DATM_PRIMARY_SOURCE=GFS25 +export DATM_BLEND_HRRR_GFS=false +export DATM_OUTPUT_SUFFIX=rrfs +export DATM_SKIP_UFS_CONFIG=true +mkdir -p \${DATA}/INPUT +${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} +rc=\$? +if [ \$rc -eq 0 ] && [ -d \${DATA}/INPUT ]; then + cp -p \${DATA}/INPUT/*.nc ${RRFS_OUTDIR}/ 2>/dev/null + echo "Archived DATM input to ${RRFS_OUTDIR}" +fi +exit \$rc +WRAPEOF + chmod 755 ${WRAPPER} + echo "${WRAPPER}" >> ${DATM_CMDFILE} + (( DATM_COUNT = DATM_COUNT + 1 )) + fi + + if [ ${DATM_COUNT} -gt 0 ]; then + echo "" + echo "DATM ensemble command file (${DATM_COUNT} members):" + cat ${DATM_CMDFILE} + + chmod 755 ${DATM_CMDFILE} + DATM_NP=${DATM_COUNT} + DATM_PPN=${GEFS_PPN:-8} + if [ ${DATM_NP} -gt ${DATM_PPN} ]; then + DATM_NP=${DATM_PPN} + fi + echo "Launching cfp with ${DATM_NP} parallel tasks for ${DATM_COUNT} DATM members" + mpiexec -n ${DATM_NP} -ppn ${DATM_NP} cfp ${DATM_CMDFILE} + rc=$? + if [ $rc -ne 0 ]; then + echo "WARNING: cfp DATM ensemble processing returned exit code $rc" >&2 + echo "Some members may have failed. Continuing..." + fi + + # Verify DATM output directories + echo "" + echo "DATM ensemble member outputs:" + for GEFS_MEM in ${GEFS_MEMBERS}; do + MEM_OUTDIR=${COMOUT}/${RUN}.${cycle}.datm_input_gefs_${GEFS_MEM} + if [ -s "${MEM_OUTDIR}/datm_forcing.nc" ]; then + echo " OK: datm_input_gefs_${GEFS_MEM} ($(ls -lh ${MEM_OUTDIR}/datm_forcing.nc | awk '{print $5}'))" + else + echo " MISSING: datm_input_gefs_${GEFS_MEM}" + fi + done + if [ "${RRFS_ENABLED}" = "true" ]; then + RRFS_OUTDIR=${COMOUT}/${RUN}.${cycle}.datm_input_rrfs + if [ -s "${RRFS_OUTDIR}/datm_forcing.nc" ]; then + echo " OK: datm_input_rrfs ($(ls -lh ${RRFS_OUTDIR}/datm_forcing.nc | awk '{print $5}'))" + else + echo " MISSING: datm_input_rrfs" + fi + fi + else + echo "No DATM ensemble members to process." + fi + + else + ######################################################## + # COMF (non-DATM): Use nos_ofs_launch.sh + nos_ofs_create_forcing_met.sh + # (existing behavior for standalone SECOFS and other COMF OFS) ######################################################## echo "=== Sourcing nos_ofs_launch.sh to set up environment ===" @@ -887,7 +1035,9 @@ GEFSWRAPEOF fi fi fi -fi + + fi # end USE_DATM if/else +fi # end OFS_FRAMEWORK if/else ######################################################## # Summary @@ -908,45 +1058,67 @@ if [ -n "${EXTRA_SOURCES}" ]; then fi if [ -n "${GEFS_MEMBERS}" ]; then - echo "GEFS member outputs:" - for GEFS_MEM in ${GEFS_MEMBERS}; do - if [ "${GEFS_MEM}" = "c00" ] || [ "${GEFS_MEM}" = "00" ]; then - _tag="gec00" - else - _tag="gep$(printf '%02d' "${GEFS_MEM}" 2>/dev/null || echo "${GEFS_MEM}")" - fi - # GEFS pgrb2sp25 produces air+prc+rad (all 8 sflux variables) - _ok=true - for var in air prc rad; do - _f="${COMOUTrerun:-${COMOUT}/rerun}/${RUN}.${cycle}.gefs_${GEFS_MEM}.${var}.nc" - [ ! -s "${_f}" ] && _ok=false + if [ "${USE_DATM:-false}" == "true" ] || [ "${USE_DATM:-0}" == "1" ]; then + echo "GEFS DATM member outputs:" + for GEFS_MEM in ${GEFS_MEMBERS}; do + MEM_OUTDIR=${COMOUT}/${RUN}.${cycle}.datm_input_gefs_${GEFS_MEM} + if [ -s "${MEM_OUTDIR}/datm_forcing.nc" ]; then + echo " OK: datm_input_gefs_${GEFS_MEM} ($(ls -lh ${MEM_OUTDIR}/datm_forcing.nc | awk '{print $5}'))" + else + echo " MISSING: datm_input_gefs_${GEFS_MEM}" + fi done - if [ "${_ok}" = true ]; then - echo " OK: gefs_${GEFS_MEM} (air/prc/rad in COMOUTrerun)" - else - echo " MISSING: gefs_${GEFS_MEM}" - fi - done + else + echo "GEFS member outputs:" + for GEFS_MEM in ${GEFS_MEMBERS}; do + if [ "${GEFS_MEM}" = "c00" ] || [ "${GEFS_MEM}" = "00" ]; then + _tag="gec00" + else + _tag="gep$(printf '%02d' "${GEFS_MEM}" 2>/dev/null || echo "${GEFS_MEM}")" + fi + # GEFS pgrb2sp25 produces air+prc+rad (all 8 sflux variables) + _ok=true + for var in air prc rad; do + _f="${COMOUTrerun:-${COMOUT}/rerun}/${RUN}.${cycle}.gefs_${GEFS_MEM}.${var}.nc" + [ ! -s "${_f}" ] && _ok=false + done + if [ "${_ok}" = true ]; then + echo " OK: gefs_${GEFS_MEM} (air/prc/rad in COMOUTrerun)" + else + echo " MISSING: gefs_${GEFS_MEM}" + fi + done + fi fi if [ "${RRFS_ENABLED}" = "true" ]; then - echo "RRFS forcing output:" - # RRFS only produces air+prc (radiation has incompatible variable naming; - # rad comes from GFS fallback during member staging) - _rrfs_ok=true - for var in air prc; do - _f="${COMOUTrerun:-${COMOUT}/rerun}/${RUN}.${cycle}.rrfs.${var}.nc" - if [ -s "${_f}" ]; then - echo " OK: $(basename ${_f}) ($(stat -c%s "${_f}" 2>/dev/null || echo '?') bytes)" + if [ "${USE_DATM:-false}" == "true" ] || [ "${USE_DATM:-0}" == "1" ]; then + echo "RRFS DATM forcing output:" + RRFS_OUTDIR=${COMOUT}/${RUN}.${cycle}.datm_input_rrfs + if [ -s "${RRFS_OUTDIR}/datm_forcing.nc" ]; then + echo " OK: datm_input_rrfs/datm_forcing.nc ($(ls -lh ${RRFS_OUTDIR}/datm_forcing.nc | awk '{print $5}'))" else - echo " MISSING: $(basename ${_f})" - _rrfs_ok=false + echo " MISSING: datm_input_rrfs/datm_forcing.nc" fi - done - if [ "${_rrfs_ok}" = true ]; then - echo " RRFS: OK (air+prc in COMOUTrerun; rad from GFS fallback)" else - echo " RRFS: INCOMPLETE" + echo "RRFS forcing output:" + # RRFS only produces air+prc (radiation has incompatible variable naming; + # rad comes from GFS fallback during member staging) + _rrfs_ok=true + for var in air prc; do + _f="${COMOUTrerun:-${COMOUT}/rerun}/${RUN}.${cycle}.rrfs.${var}.nc" + if [ -s "${_f}" ]; then + echo " OK: $(basename ${_f}) ($(stat -c%s "${_f}" 2>/dev/null || echo '?') bytes)" + else + echo " MISSING: $(basename ${_f})" + _rrfs_ok=false + fi + done + if [ "${_rrfs_ok}" = true ]; then + echo " RRFS: OK (air+prc in COMOUTrerun; rad from GFS fallback)" + else + echo " RRFS: INCOMPLETE" + fi fi fi diff --git a/parm/systems/secofs_ufs.yaml b/parm/systems/secofs_ufs.yaml index 498c4cc..6d6423c 100644 --- a/parm/systems/secofs_ufs.yaml +++ b/parm/systems/secofs_ufs.yaml @@ -371,9 +371,101 @@ shell_mappings: NX_GLOBAL: ufs_coastal.nx_global NY_GLOBAL: ufs_coastal.ny_global -# Ensemble disabled for initial UFS-Coastal integration +# GEFS atmospheric ensemble for UFS-Coastal (DATM forcing) +# 6-member atmospheric-only ensemble (no physics perturbation). +# Member 000 is the control run (GFS+HRRR blended DATM forcing + default params) +# Members 001-004 use GEFS perturbation members (GEFS-only DATM, no HRRR blend) +# Member 005 uses RRFS (3km regridded to regular lat/lon) ensemble: - enabled: false + enabled: true + n_members: 6 + method: gefs + seed: 42 # Reproducible sampling + # GEFS (Global Ensemble Forecast System) atmospheric forcing configuration + # Uses 0.25 deg pgrb2sp25 product (has all 8 sflux variables) + # Control member uses GFS deterministic (0.25 deg) + HRRR coastal blending + # Perturbed members use GEFS-only (no HRRR) to preserve ensemble spread at coast + gefs: + enabled: true + n_gefs_members: 4 + resolution: "0p25" + product: "pgrb2sp25" + control_member: "gec00" + perturbation_prefix: "gep" + members: + "000": + label: "control (GFS+HRRR)" + met_source_1: GFS + met_source_2: HRRR + "001": + label: "GEFS p01" + met_source_1: GEFS_01 + met_source_2: null + "002": + label: "GEFS p02" + met_source_1: GEFS_02 + met_source_2: null + "003": + label: "GEFS p03" + met_source_1: GEFS_03 + met_source_2: null + "004": + label: "GEFS p04" + met_source_1: GEFS_04 + met_source_2: null + "005": + label: "RRFS" + met_source_1: RRFS + met_source_2: null + extra_sources: + - GEFS_01 + - GEFS_02 + - GEFS_03 + - GEFS_04 + - RRFS + # RRFS (Rapid Refresh Forecast System) atmospheric forcing configuration + # 3km resolution, North America domain, available on WCOSS2 at para status + # Path: /lfs/h1/ops/para/com/rrfs/v1.0/rrfs.YYYYMMDD/HH/rrfs.tHHz.prslev.3km.fFFF.na.grib2 + # Note: RRFS does NOT have PRMSL — uses MSLET instead (forcing script handles this) + # SECOFS forecast is 48h — RRFS covers 84h at main cycles, no fallback needed. + rrfs: + enabled: true + resolution: "3km" + domain: "na" # North America + max_forecast_hours: 84 # At main cycles (00/06/12/18Z) + fallback_source: null # 48h forecast fits within RRFS 84h coverage + version: "v1.0" + status: "para" # Not yet in prod + # No physics perturbation — atmospheric forcing differences only + perturb_physics: false + # Member 000 (control) always uses default physics values + parameters: + rdrg2: + min: 0.001 + max: 0.01 + default: 0.005 + distribution: uniform + description: "Bottom drag coefficient" + zob: + min: 0.0001 + max: 0.001 + default: 0.0005 + distribution: log_uniform + description: "Bottom roughness length (m)" + akt_bak: + min: 1.0e-6 + max: 1.0e-5 + default: 5.0e-6 + distribution: log_uniform + description: "Background tracer diffusivity (m2/s)" + # Ensemble-specific resources (per member) + resources: + select: "select=10:ncpus=128:mpiprocs=120" + walltime: "05:30:00" + # Ensemble post-processing resources + post_resources: + select: "select=1:ncpus=8:mpiprocs=8" + walltime: "01:00:00" # Legacy mode legacy: diff --git a/pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs b/pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs new file mode 100644 index 0000000..afc2d67 --- /dev/null +++ b/pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs @@ -0,0 +1,109 @@ +#!/bin/bash +#PBS -N secofs_ufs_atmos_prep +#PBS -A ESTOFS-DEV +#PBS -q dev +#PBS -o /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_atmos_prep.out +#PBS -e /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_atmos_prep.err +#PBS -l select=1:ncpus=8:mpiprocs=8 +#PBS -l place=vscatter +#PBS -l walltime=02:00:00 + +# ------------------------------------------------------------------ +# SECOFS UFS-Coastal Ensemble Atmospheric Prep PBS Job +# +# Creates DATM-format forcing files for GEFS/RRFS ensemble members. +# Runs between the regular prep job and ensemble member jobs. +# Each member gets its own datm_input directory in COMOUT. +# +# Submit via launch_secofs_ufs_ensemble.sh --gefs or manually: +# qsub -v CYC=00,PDY=20260213 jnos_secofs_ufs_ensemble_atmos_prep.pbs +# ------------------------------------------------------------------ + +CYC=${CYC:-00} + +# PDY: accept from qsub -v, fall back to today +export PDY=${PDY:-$(date +%Y%m%d)} + +PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages +. ${PACKAGEROOT}/nosofs.v3.7.0/versions/run.ver + +# Working directory +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs_ufs +mkdir -p -m 755 $RPTDIR $WORKDIR +cd ${WORKDIR} + +# Standard modules (NOT hpc-stack — this job runs Python/wgrib2, not UFS) +module purge +module load envvar/${envvars_ver} + +module load PrgEnv-intel/${PrgEnv_intel_ver} +module load craype/${craype_ver} +module load intel/${intel_ver} +module load cray-mpich/${cray_mpich_ver} +module load cray-pals/${cray_pals_ver} +module load netcdf/${netcdf_ver} +module load hdf5/${hdf5_ver} +module load python/${python_ver} +module load prod_envir/${prod_envir_ver} +module load prod_util/${prod_util_ver} +module load grib_util/${grib_util_ver} +module load cfp/${cfp_ver} +module load nco/${nco_ver} +module load wgrib2/${wgrib2_ver} + +module list + +# EXPORT list +set +x +export envir=dev +export OFS=secofs_ufs +export cyc=${CYC} +export NET=nosofs +export model=nosofs +export job=secofs_ufs_atmos_prep_${CYC}_$envir +export platform=ptmp + +# UFS-Coastal DATM coupling +export USE_DATM=true + +export KEEPDATA=YES +export SENDCOM=NO +export SENDDBN=NO +export SENDSMS=NO + +export nosofs_ver=v3.7.0 +export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages + +# Data and COM paths +export COMPATH=/lfs/h1/ops/prod/com/nos +export com_ver=v3.7 +export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com +export DCOMROOT=/lfs/h1/ops/prod/dcom +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export COMIN=/lfs/h1/ops/prod/com + +# Input data paths +export COMINnam=/lfs/h1/ops/prod/com/nam/${nam_ver} +export COMINhrrr=/lfs/h1/ops/prod/com/hrrr/${hrrr_ver} +export COMINrap=/lfs/h1/ops/prod/com/rap/${rap_ver} +export COMINgfs=/lfs/h1/ops/prod/com/gfs/${gfs_ver} +export COMINrtma=/lfs/h1/ops/prod/com/rtma/${rtma_ver} +export COMINetss=/lfs/h1/ops/prod/com/petss/${petss_ver} +export COMINrtofs_2d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} +export COMINrtofs_3d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} +export COMINnwm=/lfs/h1/ops/prod/com/nwm/${nwm_ver} +export COMINgefs=/lfs/h1/ops/prod/com/gefs/${gefs_ver:-v12.3} +export COMINrrfs=/lfs/h1/ops/para/com/rrfs/${rrfs_ver:-v1.0} + +################################################ +# CALL executable job script here +export pbsid=${PBS_JOBID%%.*} +export job=${job:-$PBS_JOBNAME} +export jobid=${jobid:-$job.$PBS_JOBID} + +export HOMEnos=${PACKAGEROOT}/nosofs.${nosofs_ver} +export OFS_CONFIG=${HOMEnos}/parm/systems/${OFS}.yaml +export PYTHONPATH=${HOMEnos}/ush/python:${PYTHONPATH:-} + +${HOMEnos}/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP diff --git a/pbs/jnos_secofs_ufs_ensemble_member.pbs b/pbs/jnos_secofs_ufs_ensemble_member.pbs new file mode 100644 index 0000000..de3b663 --- /dev/null +++ b/pbs/jnos_secofs_ufs_ensemble_member.pbs @@ -0,0 +1,133 @@ +#!/bin/bash +#PBS -N secofs_ufs_ens_00 +#PBS -A ESTOFS-DEV +#PBS -q dev +#PBS -o /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_ens_00.out +#PBS -e /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_ens_00.err +#PBS -l place=vscatter:excl,select=10:ncpus=128:mpiprocs=120 +#PBS -l walltime=05:30:00 + +# ------------------------------------------------------------------ +# SECOFS UFS-Coastal Ensemble Member PBS Job +# +# Runs a single UFS-Coastal ensemble member (DATM+SCHISM coupled). +# Submit via launch_secofs_ufs_ensemble.sh or manually with: +# qsub -v MEMBER_ID=000,CYC=00,PDY=20260213 \ +# -N secofs_ufs_ens000_00 \ +# -o /lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs/secofs_ufs_ens000_00.out \ +# -e /lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs/secofs_ufs_ens000_00.err \ +# jnos_secofs_ufs_ensemble_member.pbs +# +# The launcher script overrides -N, -o, -e per member via qsub flags. +# The #PBS defaults above are fallbacks for direct submission. +# ------------------------------------------------------------------ + +# Validate required variables +if [ -z "${MEMBER_ID:-}" ]; then + echo "ERROR: MEMBER_ID not set. Use: qsub -v MEMBER_ID=001,CYC=00,PDY=20260213 $0" >&2 + exit 1 +fi +CYC=${CYC:-00} + +# PDY: accept from qsub -v, fall back to today +export PDY=${PDY:-$(date +%Y%m%d)} + +PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages +. ${PACKAGEROOT}/nosofs.v3.7.0/versions/run.ver + +# Working directory +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs_ufs +mkdir -p -m 755 $RPTDIR $WORKDIR +cd ${WORKDIR} + +# ---------------------------------------------------------------- +# Module setup: UFS-Coastal hpc-stack libraries +# ---------------------------------------------------------------- +# The UFS executable (fv3_coastalS.exe) requires hpc-stack libraries +# (hdf5-D/1.14.0, netcdf-D/4.9.2, esmf-D/8.8.0) that differ from +# standard WCOSS2 system modules (hdf5/1.10.6, netcdf/4.7.4). +module purge +module reset + +# Clear version env vars that would override modules.fv3.lua defaults +unset PrgEnv_intel_ver craype_ver cmake_ver g2_ver w3emc_ver 2>/dev/null || true + +# Load UFS-Coastal hpc-stack modules +module use ${PACKAGEROOT}/nosofs.${nosofs_ver}/modulefiles +module load modules.fv3 +module load cray-pals + +# Infrastructure modules for J-job scripting +module load python/${python_ver:-3.8.6} +module load prod_envir/${prod_envir_ver:-2.0.6} +module load prod_util/${prod_util_ver:-2.0.17} + +module list + +# MPI tuning for large-core SCHISM runs on Cray +export MPICH_OFI_STARTUP_CONNECT=1 +export MPICH_COLL_SYNC=MPI_Bcast +export MPICH_REDUCE_NO_SMP=1 +export FI_OFI_RXM_SAR_LIMIT=1572864 + +# EXPORT list +set +x +export envir=dev +export OFS=secofs_ufs +export cyc=${CYC} +export NET=nosofs +export model=nosofs +export MEMBER_ID=${MEMBER_ID} +export job=secofs_ufs_ens${MEMBER_ID}_${CYC}_$envir +export platform=ptmp + +# UFS-Coastal DATM coupling +export USE_DATM=true + +export KEEPDATA=YES +export SENDCOM=NO +export SENDDBN=NO +export SENDSMS=NO + +export nosofs_ver=v3.7.0 +export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages + +# Data and COM paths +export COMPATH=/lfs/h1/ops/prod/com/nos +export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com +export DCOMROOT=/lfs/h1/ops/prod/dcom +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export COMIN=/lfs/h1/ops/prod/com + +# Input data paths (production WCOSS2) +export COMINnam=/lfs/h1/ops/prod/com/nam/${nam_ver} +export COMINhrrr=/lfs/h1/ops/prod/com/hrrr/${hrrr_ver} +export COMINrap=/lfs/h1/ops/prod/com/rap/${rap_ver} +export COMINgfs=/lfs/h1/ops/prod/com/gfs/${gfs_ver} +export COMINrtma=/lfs/h1/ops/prod/com/rtma/${rtma_ver} +export COMINetss=/lfs/h1/ops/prod/com/petss/${petss_ver} +export COMINrtofs_2d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} +export COMINrtofs_3d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} +export COMINnwm=/lfs/h1/ops/prod/com/nwm/${nwm_ver} + +# UFS-Coastal: MED+ATM share PETs 0-119, OCN PETs 120-1199 +# 10 nodes x 120 mpiprocs = 1200 tasks +export TOTAL_TASKS=1200 + +################################################ +# CALL executable job script here +export pbsid=${PBS_JOBID%%.*} +export job=${job:-$PBS_JOBNAME} +export jobid=${jobid:-$job.$PBS_JOBID} + +# YAML configuration for secofs_ufs +export HOMEnos=${PACKAGEROOT}/nosofs.${nosofs_ver} +export OFS_CONFIG=${HOMEnos}/parm/systems/${OFS}.yaml +export PYTHONPATH=${HOMEnos}/ush/python:${PYTHONPATH:-} + +# Pass GEFS ensemble flag if set by launcher +export GEFS_ENSEMBLE=${GEFS_ENSEMBLE:-false} +export GEFS_MEMBER_ID=${GEFS_MEMBER_ID:-} + +${HOMEnos}/jobs/JNOS_OFS_ENSEMBLE_MEMBER diff --git a/pbs/launch_secofs_ufs_ensemble.sh b/pbs/launch_secofs_ufs_ensemble.sh new file mode 100644 index 0000000..3a64f26 --- /dev/null +++ b/pbs/launch_secofs_ufs_ensemble.sh @@ -0,0 +1,330 @@ +#!/bin/bash +# ====================================================================== +# launch_secofs_ufs_ensemble.sh - Submit SECOFS UFS-Coastal ensemble via PBS +# +# Shell-based launcher for running the GEFS atmospheric ensemble on WCOSS2 +# with UFS-Coastal (DATM+SCHISM coupled). Chains jobs with qsub dependencies: +# +# prep --> atmos_prep --> member_000 \ +# --> member_001 | +# --> member_002 |--> post_ensemble +# --> member_003 | +# --> member_004 | +# --> member_005 / +# +# The deterministic workflow (nowcast/forecast) can optionally +# run in parallel alongside the ensemble. +# +# Usage: +# ./launch_secofs_ufs_ensemble.sh # Default: cyc=00, 6 members, PDY=today +# ./launch_secofs_ufs_ensemble.sh 12 # Cycle 12 +# ./launch_secofs_ufs_ensemble.sh 00 3 # Cycle 00, 3 members +# ./launch_secofs_ufs_ensemble.sh 00 6 --with-det # Also submit deterministic +# ./launch_secofs_ufs_ensemble.sh 00 --gefs # GEFS ensemble (6 members, auto) +# PDY=20260212 ./launch_secofs_ufs_ensemble.sh 00 3 # Explicit PDY +# ./launch_secofs_ufs_ensemble.sh 00 3 --pdy 20260212 # Explicit PDY (alt) +# ./launch_secofs_ufs_ensemble.sh 00 3 --skip-prep # Skip prep, submit members only +# ./launch_secofs_ufs_ensemble.sh 00 --det-only # Deterministic only +# +# Requirements: +# - Run from the pbs/ directory (or set PBS_DIR) +# - Prep job PBS script must exist: jnos_secofs_ufs_prep_${CYC}.pbs +# - UFS ensemble PBS scripts in same directory +# ====================================================================== +set -e + +# ---- Configuration --------------------------------------------------- +CYC=${1:-00} +OFS=secofs_ufs + +# Track whether user explicitly provided N_MEMBERS +_USER_SET_MEMBERS=false +if [[ "${2:-}" != --* ]] && [ -n "${2:-}" ]; then + _USER_SET_MEMBERS=true +fi +# If $2 is a flag (starts with --), don't consume it as N_MEMBERS +if [[ "${2:-}" == --* ]]; then + N_MEMBERS=6 + shift 1 2>/dev/null || true +else + N_MEMBERS=${2:-6} + shift 2 2>/dev/null || true +fi +WITH_DET=false +SKIP_PREP=false +ATMOS_ENSEMBLE=true # Always true for UFS ensemble (DATM forcing per member) +GEFS_ENSEMBLE=true # Always true — this launcher is GEFS-specific +DET_ONLY=false + +# Check for flags +for arg in "$@"; do + case "$arg" in + --with-det) WITH_DET=true ;; + --skip-prep) SKIP_PREP=true ;; + --gefs) GEFS_ENSEMBLE=true ;; + --det-only) DET_ONLY=true ;; + --pdy) _NEXT_IS_PDY=true ;; + *) + if [ "${_NEXT_IS_PDY:-}" = true ]; then + PDY="$arg" + _NEXT_IS_PDY=false + fi + ;; + esac +done +unset _NEXT_IS_PDY + +# PDY: use env var, --pdy flag, or default to today +PDY=${PDY:-$(date +%Y%m%d)} +export PDY + +PBS_DIR="$(cd "$(dirname "$0")" && pwd)" + +echo "==============================================" +echo " SECOFS UFS-Coastal Ensemble Launcher" +echo "==============================================" +echo " PDY: ${PDY}" +echo " Cycle: ${CYC}" +echo " OFS: ${OFS}" +if [ "${DET_ONLY}" = true ]; then +echo " Mode: DETERMINISTIC ONLY (prep->nowcast->forecast)" +else +echo " Members: ${N_MEMBERS} (1 control + $((N_MEMBERS - 1)) perturbed)" +echo " Det run: ${WITH_DET}" +echo " GEFS ens: true (0.25 deg pgrb2sp25)" +echo " DATM: true (per-member DATM forcing)" +fi +echo " Skip prep: ${SKIP_PREP}" +echo " PBS dir: ${PBS_DIR}" +echo "==============================================" + +# ---- Validate PBS scripts exist -------------------------------------- +PREP_PBS="${PBS_DIR}/jnos_secofs_ufs_prep_${CYC}.pbs" +MEMBER_PBS="${PBS_DIR}/jnos_secofs_ufs_ensemble_member.pbs" +POST_PBS="${PBS_DIR}/jnos_secofs_ensemble_post.pbs" +ATMOS_PREP_PBS="${PBS_DIR}/jnos_secofs_ufs_ensemble_atmos_prep.pbs" + +if [ "${DET_ONLY}" != true ]; then + for script in "${MEMBER_PBS}" "${ATMOS_PREP_PBS}"; do + if [ ! -f "${script}" ]; then + echo "ERROR: PBS script not found: ${script}" >&2 + exit 1 + fi + done +fi + +if [ "${SKIP_PREP}" != true ] && [ ! -f "${PREP_PBS}" ]; then + echo "ERROR: Prep PBS script not found: ${PREP_PBS}" >&2 + exit 1 +fi + +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs +mkdir -p ${RPTDIR} 2>/dev/null || true + +# ---- Step 1: Submit prep job (unless --skip-prep) -------------------- +if [ "${SKIP_PREP}" = true ]; then + echo "" + echo ">>> Skipping prep (--skip-prep). Members will run without dependency." + PREP_JOBID_SHORT="" +else + echo "" + echo ">>> Submitting prep job..." + PREP_JOBID=$(qsub -v "PDY=${PDY}" "${PREP_PBS}") + PREP_JOBID_SHORT=${PREP_JOBID%%.*} + echo " Prep job: ${PREP_JOBID}" +fi + +# ---- Deterministic-only mode: just prep -> nowcast -> forecast -------- +if [ "${DET_ONLY}" = true ]; then + echo "" + echo ">>> Deterministic-only mode: submitting nowcast + forecast..." + + NCST_PBS="${PBS_DIR}/jnos_secofs_ufs_nowcast_${CYC}.pbs" + FCST_PBS="${PBS_DIR}/jnos_secofs_ufs_forecast_${CYC}.pbs" + + if [ ! -f "${NCST_PBS}" ] || [ ! -f "${FCST_PBS}" ]; then + echo "ERROR: Deterministic PBS scripts not found:" >&2 + [ ! -f "${NCST_PBS}" ] && echo " Missing: ${NCST_PBS}" >&2 + [ ! -f "${FCST_PBS}" ] && echo " Missing: ${FCST_PBS}" >&2 + exit 1 + fi + + NCST_JOBID=$(qsub \ + -v "PDY=${PDY},cyc=${CYC}" \ + ${PREP_JOBID_SHORT:+-W depend=afterok:${PREP_JOBID_SHORT}} \ + "${NCST_PBS}") + NCST_SHORT=${NCST_JOBID%%.*} + echo " Nowcast: ${NCST_JOBID}" + + FCST_JOBID=$(qsub \ + -v "PDY=${PDY},cyc=${CYC}" \ + -W depend=afterok:${NCST_SHORT} \ + "${FCST_PBS}") + FCST_SHORT=${FCST_JOBID%%.*} + echo " Forecast: ${FCST_JOBID}" + + echo "" + echo "==============================================" + echo " Deterministic workflow submitted successfully" + echo "==============================================" + echo "" + echo " Dependency chain:" + echo " prep (${PREP_JOBID_SHORT:-skipped})" + echo " |-> nowcast (${NCST_SHORT})" + echo " |-> forecast (${FCST_SHORT})" + echo "" + echo " Monitor with: qstat -u $LOGNAME" + echo " Cancel all: qdel ${PREP_JOBID_SHORT:-} ${NCST_SHORT} ${FCST_SHORT}" + echo "" + exit 0 +fi + +# ---- Step 1b: Submit DATM atmos prep job ------------------------------ +echo "" +echo ">>> Submitting GEFS DATM atmospheric ensemble prep job..." +ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY},GEFS_ENSEMBLE=true,USE_DATM=true" \ + -N "secofs_ufs_gefs_prep_${CYC}" \ + -o "${RPTDIR}/secofs_ufs_gefs_prep_${CYC}.out" \ + -e "${RPTDIR}/secofs_ufs_gefs_prep_${CYC}.err") +if [ -n "${PREP_JOBID_SHORT}" ]; then + ATMOS_QSUB_ARGS+=(-W "depend=afterok:${PREP_JOBID_SHORT}") +fi +ATMOS_PREP_JOBID=$(qsub "${ATMOS_QSUB_ARGS[@]}" "${ATMOS_PREP_PBS}") +ATMOS_PREP_JOBID_SHORT=${ATMOS_PREP_JOBID%%.*} +echo " Atmos prep: ${ATMOS_PREP_JOBID}" + +# ---- Step 2a (optional): Submit deterministic nowcast before ensemble ---- +NCST_JOBID_SHORT="" +FCST_JOBID_SHORT="" +if [ "${WITH_DET}" = true ]; then + echo "" + echo ">>> Submitting deterministic nowcast (required for ensemble ihot=2)..." + + NCST_PBS="${PBS_DIR}/jnos_secofs_ufs_nowcast_${CYC}.pbs" + FCST_PBS="${PBS_DIR}/jnos_secofs_ufs_forecast_${CYC}.pbs" + + if [ ! -f "${NCST_PBS}" ]; then + echo "ERROR: Nowcast PBS script not found: ${NCST_PBS}" >&2 + exit 1 + fi + + NCST_JOBID=$(qsub \ + -v "PDY=${PDY},cyc=${CYC}" \ + ${PREP_JOBID_SHORT:+-W depend=afterok:${PREP_JOBID_SHORT}} \ + "${NCST_PBS}") + NCST_JOBID_SHORT=${NCST_JOBID%%.*} + echo " Nowcast: ${NCST_JOBID}" + + if [ -f "${FCST_PBS}" ]; then + FCST_JOBID=$(qsub \ + -v "PDY=${PDY},cyc=${CYC}" \ + -W depend=afterok:${NCST_JOBID_SHORT} \ + "${FCST_PBS}") + FCST_JOBID_SHORT=${FCST_JOBID%%.*} + echo " Forecast: ${FCST_JOBID} (parallel with ensemble)" + fi +fi + +# ---- Step 2b: Submit ensemble members -------------------------------- +echo "" +echo ">>> Submitting ${N_MEMBERS} ensemble members..." +MEMBER_JOBIDS=() + +# Build member dependency string +MEMBER_DEP_PARTS=() +if [ -n "${NCST_JOBID_SHORT}" ]; then + MEMBER_DEP_PARTS+=("${NCST_JOBID_SHORT}") +elif [ -n "${PREP_JOBID_SHORT}" ]; then + MEMBER_DEP_PARTS+=("${PREP_JOBID_SHORT}") +fi +if [ -n "${ATMOS_PREP_JOBID_SHORT}" ]; then + MEMBER_DEP_PARTS+=("${ATMOS_PREP_JOBID_SHORT}") +fi + +MEMBER_DEP_STR="" +if [ ${#MEMBER_DEP_PARTS[@]} -gt 0 ]; then + MEMBER_DEP_STR="afterok" + for dep in "${MEMBER_DEP_PARTS[@]}"; do + MEMBER_DEP_STR="${MEMBER_DEP_STR}:${dep}" + done +fi + +for i in $(seq 0 $((N_MEMBERS - 1))); do + MID=$(printf '%03d' $i) + GEFS_MEM_ID=$(printf '%02d' $i) + MEMBER_VARS="MEMBER_ID=${MID},CYC=${CYC},PDY=${PDY},GEFS_ENSEMBLE=true,GEFS_MEMBER_ID=${GEFS_MEM_ID},USE_DATM=true" + QSUB_ARGS=(-v "${MEMBER_VARS}" \ + -N "secofs_ufs_ens${MID}_${CYC}" \ + -o "${RPTDIR}/secofs_ufs_ens${MID}_${CYC}.out" \ + -e "${RPTDIR}/secofs_ufs_ens${MID}_${CYC}.err") + if [ -n "${MEMBER_DEP_STR}" ]; then + QSUB_ARGS+=(-W "depend=${MEMBER_DEP_STR}") + fi + MJOB=$(qsub "${QSUB_ARGS[@]}" "${MEMBER_PBS}") + MEMBER_JOBIDS+=("${MJOB}") + MJOB_SHORT=${MJOB%%.*} + if [ "${MID}" = "000" ]; then + echo " Member ${MID} (control, GFS+HRRR DATM): ${MJOB}" + else + echo " Member ${MID} (perturbed): ${MJOB}" + fi +done + +# ---- Step 3: Submit ensemble post (depend on ALL members) ------------ +echo "" +echo ">>> Submitting ensemble post-processing..." + +DEP_STR="afterok" +for mjob in "${MEMBER_JOBIDS[@]}"; do + DEP_STR="${DEP_STR}:${mjob%%.*}" +done + +if [ -f "${POST_PBS}" ]; then + ENSPOST_JOBID=$(qsub \ + -v "N_MEMBERS=${N_MEMBERS},CYC=${CYC},PDY=${PDY}" \ + -N "secofs_ufs_enspost_${CYC}" \ + -W depend=${DEP_STR} \ + "${POST_PBS}") + echo " Ensemble post: ${ENSPOST_JOBID}" + ENSPOST_SHORT=${ENSPOST_JOBID%%.*} +else + echo " WARNING: Post PBS script not found: ${POST_PBS}" + echo " Ensemble post will need to be run manually" + ENSPOST_SHORT="(not submitted)" +fi + +# ---- Summary --------------------------------------------------------- +echo "" +echo "==============================================" +echo " UFS-Coastal Ensemble workflow submitted" +echo "==============================================" +echo "" +echo " Dependency chain:" +echo " prep (${PREP_JOBID_SHORT:-skipped})" +if [ -n "${NCST_JOBID_SHORT}" ]; then + echo " |-> nowcast (${NCST_JOBID_SHORT})" + if [ -n "${FCST_JOBID_SHORT}" ]; then + echo " | |-> forecast (${FCST_JOBID_SHORT})" + fi +fi +echo " |-> atmos_prep (${ATMOS_PREP_JOBID_SHORT})" +for i in $(seq 0 $((N_MEMBERS - 1))); do + MID=$(printf '%03d' $i) + MJOB_SHORT=${MEMBER_JOBIDS[$i]%%.*} + echo " |-> member_${MID} (${MJOB_SHORT})" +done +echo " |-> post_ensemble (${ENSPOST_SHORT})" +if [ -n "${NCST_JOBID_SHORT}" ]; then + echo "" + echo " Members wait for nowcast to finish (ihot=2 continuous staout)" +fi +echo "" +echo " Monitor with: qstat -u $LOGNAME" +ALL_JOBIDS="${PREP_JOBID_SHORT:-}" +[ -n "${NCST_JOBID_SHORT}" ] && ALL_JOBIDS="${ALL_JOBIDS} ${NCST_JOBID_SHORT}" +[ -n "${FCST_JOBID_SHORT}" ] && ALL_JOBIDS="${ALL_JOBIDS} ${FCST_JOBID_SHORT}" +ALL_JOBIDS="${ALL_JOBIDS} ${ATMOS_PREP_JOBID_SHORT}" +ALL_JOBIDS="${ALL_JOBIDS} ${MEMBER_JOBIDS[*]%%.*}" +[ "${ENSPOST_SHORT}" != "(not submitted)" ] && ALL_JOBIDS="${ALL_JOBIDS} ${ENSPOST_SHORT}" +echo " Cancel all: qdel ${ALL_JOBIDS}" +echo "" diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index a555c78..37c00bb 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -397,16 +397,66 @@ _ensemble_execute_ufs_coastal() { done # Stage DATM INPUT directory + # Determine member-specific DATM input based on met_source_1 from params.json mkdir -p ${MEMBER_DATA}/INPUT - local datm_dir="${COMOUT}/${RUN}.${cycle}.datm_input" + local datm_dir="" + local met_source="" + + if [ -f "${PARAM_FILE:-}" ]; then + met_source=$(python3 -c " +import json, sys +with open('${PARAM_FILE}') as f: + p = json.load(f) +print(p.get('met_source_1', '')) +" 2>/dev/null || echo "") + fi + + case "${met_source}" in + GEFS_*) + local gefs_id=$(echo "${met_source}" | sed 's/GEFS_//') + datm_dir="${COMOUT}/${RUN}.${cycle}.datm_input_gefs_${gefs_id}" + ;; + RRFS) + datm_dir="${COMOUT}/${RUN}.${cycle}.datm_input_rrfs" + ;; + *) + # Control member (GFS+HRRR) uses the standard prep output + datm_dir="${COMOUT}/${RUN}.${cycle}.datm_input" + ;; + esac + + echo "Met source: ${met_source:-GFS (control)}" + echo "DATM input dir: ${datm_dir}" + if [ -d "$datm_dir" ]; then cp -p ${datm_dir}/*.nc ${MEMBER_DATA}/INPUT/ 2>/dev/null || true - echo "Staged DATM INPUT files" + echo "Staged DATM INPUT files from ${datm_dir}" else echo "FATAL: DATM input directory not found: $datm_dir" >&2 return 1 fi + # Patch datm_in nx_global/ny_global if member forcing has different grid dims + if [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ] && [ -s "${MEMBER_DATA}/datm_in" ]; then + local mem_dims=$(python3 -c " +from netCDF4 import Dataset +ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') +# Try 'x'/'y' dims first (blended output), then 'longitude'/'latitude' (raw GFS/GEFS) +try: + print(len(ds.dimensions['x']), len(ds.dimensions['y'])) +except: + print(len(ds.dimensions['longitude']), len(ds.dimensions['latitude'])) +ds.close() +" 2>/dev/null || echo "") + if [ -n "$mem_dims" ]; then + local mem_nx=$(echo $mem_dims | awk '{print $1}') + local mem_ny=$(echo $mem_dims | awk '{print $2}') + sed -i "s/nx_global[[:space:]]*=.*/nx_global = ${mem_nx}/" ${MEMBER_DATA}/datm_in + sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${mem_ny}/" ${MEMBER_DATA}/datm_in + echo "Patched datm_in: nx_global=${mem_nx}, ny_global=${mem_ny}" + fi + fi + # Patch stop_n for ensemble phase (typically forecast length) local nhours=${LEN_FORECAST:-48} if [ -s "${MEMBER_DATA}/model_configure" ]; then diff --git a/ush/nosofs/nos_ofs_create_datm_forcing.sh b/ush/nosofs/nos_ofs_create_datm_forcing.sh index 25e419f..da9b310 100644 --- a/ush/nosofs/nos_ofs_create_datm_forcing.sh +++ b/ush/nosofs/nos_ofs_create_datm_forcing.sh @@ -14,7 +14,7 @@ # ./nos_ofs_create_datm_forcing.sh DBASE OUTPUT_DIR [TIME_START] [TIME_END] # # Arguments: -# DBASE - Data source: GFS25 or HRRR +# DBASE - Data source: GFS25, HRRR, or GEFS_NN (e.g., GEFS_01) # OUTPUT_DIR - Directory for output files # TIME_START - Start time YYYYMMDDHH (optional, default: time_hotstart - 3h) # TIME_END - End time YYYYMMDDHH (optional, default: time_forecastend) @@ -161,9 +161,27 @@ elif [ "$DBASE_UPPER" == "HRRR" ]; then # HRRR uses MSLMA instead of PRMSL MATCH_PATTERN=":(UGRD|VGRD):10 m above ground:|:(TMP|SPFH):2 m above ground:|:MSLMA:mean sea level:|:(DSWRF|DLWRF|PRATE):surface:" OUTPUT_FILE=${OUTPUT_DIR}/hrrr_forcing.nc + +elif [[ "$DBASE_UPPER" == GEFS* ]]; then + # GEFS pgrb2sp25 at 0.25 deg — structurally identical to GFS 0.25 deg + # (same variables, same regular lat/lon grid, 3-hourly) + INTERVAL=3 + COMIN=${COMINgefs:-/lfs/h1/ops/prod/com/gefs/v12.3} + # Same variables as GFS 0.25 + MATCH_PATTERN=":(UGRD|VGRD):10 m above ground:|:(TMP|SPFH):2 m above ground:|:PRMSL:mean sea level:|:(DSWRF|DLWRF|PRATE):surface:" + # Parse member ID: GEFS_01 -> 01, GEFS_02 -> 02, etc. + GEFS_MEM_NUM=$(echo "$DBASE_UPPER" | sed 's/GEFS_*//' | sed 's/^0*//' ) + GEFS_MEM_ID=$(printf '%02d' "${GEFS_MEM_NUM:-1}") + GEFS_PREFIX="gep${GEFS_MEM_ID}" + GEFS_PRODUCT=${GEFS_PRODUCT:-pgrb2sp25} + OUTPUT_FILE=${OUTPUT_DIR}/gefs_${GEFS_MEM_ID}_forcing.nc + echo "GEFS member: ${GEFS_PREFIX} (product: ${GEFS_PRODUCT})" + # Override DBASE_LOWER for intermediate file naming + DBASE_LOWER="gefs_${GEFS_MEM_ID}" + else echo "ERROR: Unknown DBASE: $DBASE" - echo "Supported values: GFS, GFS25, HRRR" + echo "Supported values: GFS, GFS25, HRRR, GEFS_NN" exit 1 fi @@ -211,6 +229,50 @@ find_gfs_file() { return 1 } +# ============================================================================= +# Function: Find GEFS pgrb2sp25 file for a valid time +# GEFS path: ${COMINgefs}/gefs.YYYYMMDD/HH/atmos/pgrb2sp25/gepNN.tHHz.pgrb2s.0p25.fFFF +# Strategy: Same as GFS — use base cycle with extended forecasts +# ============================================================================= +find_gefs_file() { + local VALID_TIME=$1 + local VALID_DATE=$(echo $VALID_TIME | cut -c1-8) + local VALID_HH=$(echo $VALID_TIME | cut -c9-10) + + # Calculate initial cycle (6 hours before valid time, rounded down to 00/06/12/18) + local INIT_CYCLE_TIME=$($NDATE -6 $VALID_TIME) + local INIT_DATE=$(echo $INIT_CYCLE_TIME | cut -c1-8) + local INIT_HH=$(echo $INIT_CYCLE_TIME | cut -c9-10) + local INIT_HH_NUM=$((10#$INIT_HH)) + + local CYCLE_HH=$(printf "%02d" $((INIT_HH_NUM / 6 * 6))) + local CYCLE_DATE=$INIT_DATE + local CYCLE_TIME="${CYCLE_DATE}${CYCLE_HH}" + + # Try up to 12 cycles (going back 72 hours) + for attempt in 1 2 3 4 5 6 7 8 9 10 11 12; do + local FHR=$($NHOUR $VALID_TIME $CYCLE_TIME 2>/dev/null || echo "-1") + local FHR_DEC=$((10#$FHR)) + if [ "$FHR_DEC" -ge 0 ] && [ "$FHR_DEC" -le 384 ]; then + local FHR_STR=$(printf "%03d" $FHR_DEC) + local GRIB2_FILE="${COMIN}/gefs.${CYCLE_DATE}/${CYCLE_HH}/atmos/${GEFS_PRODUCT}/${GEFS_PREFIX}.t${CYCLE_HH}z.pgrb2s.0p25.f${FHR_STR}" + + if [ -s "$GRIB2_FILE" ]; then + echo "$GRIB2_FILE" + return 0 + fi + fi + + # Try previous cycle + CYCLE_TIME=$($NDATE -6 $CYCLE_TIME) + CYCLE_DATE=$(echo $CYCLE_TIME | cut -c1-8) + CYCLE_HH=$(echo $CYCLE_TIME | cut -c9-10) + done + + echo "" + return 1 +} + # ============================================================================= # Function: Find HRRR files using Fortran executable (matches SFLUX exactly) # Uses nos_ofs_met_file_search for operational consistency @@ -636,6 +698,8 @@ while [ "$CURRENT_TIME" -le "$TIME_END" ]; do # Find the appropriate GRIB2 file if [ "$DBASE_UPPER" == "GFS" ] || [ "$DBASE_UPPER" == "GFS25" ]; then GRIB2_FILE=$(find_gfs_file $CURRENT_TIME) + elif [[ "$DBASE_UPPER" == GEFS* ]]; then + GRIB2_FILE=$(find_gefs_file $CURRENT_TIME) else GRIB2_FILE=$(find_hrrr_file $CURRENT_TIME) fi diff --git a/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh b/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh index 27d1179..548752a 100644 --- a/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh +++ b/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh @@ -78,6 +78,8 @@ done # Defaults DATM_BLEND_HRRR_GFS=${DATM_BLEND_HRRR_GFS:-true} +DATM_PRIMARY_SOURCE=${DATM_PRIMARY_SOURCE:-GFS25} +DATM_OUTPUT_SUFFIX=${DATM_OUTPUT_SUFFIX:-} BLEND_RESOLUTION=${BLEND_RESOLUTION:-0.025} NHOURS_FCST=${NHOURS_FCST:-48} DT_ATMOS=${DT_ATMOS:-720} @@ -140,7 +142,11 @@ TIME_START_BUFFERED=$($NDATE -3 $TIME_START) TIME_END=$($NDATE ${NHOURS_FCST} ${PDY}${cyc}) echo "Forcing time range: $TIME_START_BUFFERED to $TIME_END" +echo "Primary source: $DATM_PRIMARY_SOURCE" echo "HRRR blending: $DATM_BLEND_HRRR_GFS" +if [ -n "$DATM_OUTPUT_SUFFIX" ]; then + echo "Output suffix: $DATM_OUTPUT_SUFFIX" +fi # Create work directories DATM_WORK=${DATA}/datm_forcing @@ -152,22 +158,31 @@ mkdir -p ${DATA}/INPUT # ============================================================================= echo "" echo "============================================" -echo "Step 1/6: Extracting GFS forcing (native 0.25 deg grid)..." +echo "Step 1/6: Extracting primary forcing (${DATM_PRIMARY_SOURCE})..." echo "============================================" -GFS_DIR=${DATM_WORK}/gfs -mkdir -p $GFS_DIR +PRIMARY_DIR=${DATM_WORK}/primary +mkdir -p $PRIMARY_DIR -${USHnos}/nosofs/nos_ofs_create_datm_forcing.sh GFS25 $GFS_DIR \ +${USHnos}/nosofs/nos_ofs_create_datm_forcing.sh ${DATM_PRIMARY_SOURCE} $PRIMARY_DIR \ $TIME_START_BUFFERED $TIME_END rc=$? -if [ $rc -ne 0 ] || [ ! -s ${GFS_DIR}/gfs_forcing.nc ]; then - echo "ERROR: GFS forcing extraction failed (rc=$rc)" +# Determine the output filename based on source type +PRIMARY_FORCING_FILE="" +for candidate in ${PRIMARY_DIR}/gfs_forcing.nc ${PRIMARY_DIR}/gefs_*_forcing.nc; do + if [ -s "$candidate" ]; then + PRIMARY_FORCING_FILE="$candidate" + break + fi +done + +if [ $rc -ne 0 ] || [ -z "$PRIMARY_FORCING_FILE" ]; then + echo "ERROR: Primary forcing extraction failed (rc=$rc, source=${DATM_PRIMARY_SOURCE})" exit 1 fi -echo "GFS forcing: ${GFS_DIR}/gfs_forcing.nc ($(ls -lh ${GFS_DIR}/gfs_forcing.nc | awk '{print $5}'))" +echo "Primary forcing: ${PRIMARY_FORCING_FILE} ($(ls -lh ${PRIMARY_FORCING_FILE} | awk '{print $5}'))" # ============================================================================= # Step 2: Extract HRRR Forcing (native grid) @@ -222,7 +237,7 @@ if [ "$DATM_BLEND_HRRR_GFS" == "true" ] || [ "$DATM_BLEND_HRRR_GFS" == "1" ]; th # 6. Generates SCRIP grid and ESMF mesh for the blended output ${USHnos}/nosofs/nos_ofs_blend_hrrr_gfs.sh \ ${HRRR_DIR}/hrrr_forcing.nc \ - ${GFS_DIR}/gfs_forcing.nc \ + ${PRIMARY_FORCING_FILE} \ ${BLEND_DIR}/datm_forcing.nc \ ${DOMAIN} \ ${BLEND_RESOLUTION} @@ -234,14 +249,14 @@ if [ "$DATM_BLEND_HRRR_GFS" == "true" ] || [ "$DATM_BLEND_HRRR_GFS" == "1" ]; th fi fi -# Fallback: use GFS only (no blending) +# Fallback: use primary source only (no blending) if [ "$DATM_BLEND_HRRR_GFS" != "true" ] && [ "$DATM_BLEND_HRRR_GFS" != "1" ]; then echo "" echo "============================================" - echo "Step 3/6: Using GFS only (no blending)..." + echo "Step 3/6: Using ${DATM_PRIMARY_SOURCE} only (no blending)..." echo "============================================" - cp -p ${GFS_DIR}/gfs_forcing.nc ${BLEND_DIR}/datm_forcing.nc - echo "Copied GFS as datm_forcing.nc" + cp -p ${PRIMARY_FORCING_FILE} ${BLEND_DIR}/datm_forcing.nc + echo "Copied ${DATM_PRIMARY_SOURCE} as datm_forcing.nc" fi # ============================================================================= @@ -330,19 +345,26 @@ ds.close() # ============================================================================= # Step 6: Generate UFS Config Files +# Skip when DATM_SKIP_UFS_CONFIG=true (e.g., ensemble atmos prep — config +# files are generated once by the control prep job, not per member) # ============================================================================= -echo "" -echo "============================================" -echo "Step 6/6: Generating UFS config files..." -echo "============================================" +if [ "${DATM_SKIP_UFS_CONFIG:-false}" != "true" ]; then + echo "" + echo "============================================" + echo "Step 6/6: Generating UFS config files..." + echo "============================================" -export NHOURS=${NHOURS_FCST} -${USHnos}/nosofs/nos_ofs_gen_ufs_config.sh --verbose -rc=$? + export NHOURS=${NHOURS_FCST} + ${USHnos}/nosofs/nos_ofs_gen_ufs_config.sh --verbose + rc=$? -if [ $rc -ne 0 ]; then - echo "ERROR: UFS config generation failed" - exit 1 + if [ $rc -ne 0 ]; then + echo "ERROR: UFS config generation failed" + exit 1 + fi +else + echo "" + echo "Step 6/6: Skipping UFS config generation (DATM_SKIP_UFS_CONFIG=true)" fi # ============================================================================= @@ -356,10 +378,11 @@ echo "" echo "Domain: $DOMAIN" echo "Time range: $TIME_START_BUFFERED to $TIME_END" echo "Target grid: ${NX_TARGET} x ${NY_TARGET} at ${BLEND_RESOLUTION} deg" +echo "Primary src: ${DATM_PRIMARY_SOURCE}" if [ "$DATM_BLEND_HRRR_GFS" == "true" ] || [ "$DATM_BLEND_HRRR_GFS" == "1" ]; then - echo "Blending: HRRR+GFS (HRRR over CONUS, GFS global fill)" + echo "Blending: HRRR+${DATM_PRIMARY_SOURCE} (HRRR over CONUS, primary global fill)" else - echo "Blending: GFS only (HRRR unavailable)" + echo "Blending: ${DATM_PRIMARY_SOURCE} only (no HRRR blend)" fi echo "" echo "DATM dir: ${DATA}/${DATM_DIR}/" From d48179576a9aa9b9cc718ee1603ed741d582e7be Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 11:32:05 -0500 Subject: [PATCH 02/40] Consolidate UFS ensemble into existing PBS scripts instead of separate files Remove the 3 separate UFS-specific PBS scripts and instead make the existing ensemble PBS scripts OFS-aware via --ufs flag: - launch_secofs_ensemble.sh: Add --ufs flag (sets OFS=secofs_ufs, UFS_MODE=true), pass OFS to all qsub calls, use UFS prep/nowcast/forecast PBS for det workflow - jnos_secofs_ensemble_member.pbs: Branch module loading on OFS name (*_ufs* loads hpc-stack modules.fv3, standalone loads standard WCOSS2 modules), auto-detect USE_DATM and TOTAL_TASKS for UFS - jnos_secofs_ensemble_atmos_prep.pbs: Accept OFS from launcher, auto-detect USE_DATM, add COMINgefs/COMINrrfs exports --- pbs/jnos_secofs_ensemble_atmos_prep.pbs | 34 +- pbs/jnos_secofs_ensemble_member.pbs | 97 ++++-- pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs | 109 ------- pbs/jnos_secofs_ufs_ensemble_member.pbs | 133 -------- pbs/launch_secofs_ensemble.sh | 79 +++-- pbs/launch_secofs_ufs_ensemble.sh | 330 -------------------- 6 files changed, 146 insertions(+), 636 deletions(-) delete mode 100644 pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs delete mode 100644 pbs/jnos_secofs_ufs_ensemble_member.pbs delete mode 100644 pbs/launch_secofs_ufs_ensemble.sh diff --git a/pbs/jnos_secofs_ensemble_atmos_prep.pbs b/pbs/jnos_secofs_ensemble_atmos_prep.pbs index 6941e2a..140190c 100644 --- a/pbs/jnos_secofs_ensemble_atmos_prep.pbs +++ b/pbs/jnos_secofs_ensemble_atmos_prep.pbs @@ -11,12 +11,15 @@ # ------------------------------------------------------------------ # SECOFS Ensemble Atmospheric Prep PBS Job # -# Creates sflux tar archives for extra atmospheric forcing sources -# (e.g., NAM) needed by atmospheric ensemble members. Runs between +# Creates atmospheric forcing for ensemble members. Runs between # the regular prep job and ensemble member jobs. # +# For standalone SECOFS: creates sflux tar archives +# For UFS-Coastal (OFS=secofs_ufs): creates per-member DATM forcing +# # Submit via launch_secofs_ensemble.sh --atmos-ensemble or manually: # qsub -v CYC=00,PDY=20260213 jnos_secofs_ensemble_atmos_prep.pbs +# qsub -v CYC=00,PDY=20260213,OFS=secofs_ufs jnos_secofs_ensemble_atmos_prep.pbs # ------------------------------------------------------------------ CYC=${CYC:-00} @@ -24,11 +27,15 @@ CYC=${CYC:-00} # PDY: accept from qsub -v, fall back to today export PDY=${PDY:-$(date +%Y%m%d)} -. /lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/versions/run.ver +PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages +. ${PACKAGEROOT}/nosofs.v3.7.0/versions/run.ver + +# OFS: accept from launcher (default: secofs) +export OFS=${OFS:-secofs} # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0 -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS:-secofs} +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR fi @@ -63,19 +70,23 @@ module list # EXPORT list set +x export envir=dev -export OFS=secofs export cyc=${CYC} export NET=nosofs export model=nosofs -export job=secofs_atmos_prep_${CYC}_$envir +export job=${OFS}_atmos_prep_${CYC}_$envir export platform=ptmp +# UFS-Coastal DATM coupling (auto-detect from OFS name) +if [[ "${OFS}" == *_ufs* ]]; then + export USE_DATM=${USE_DATM:-true} +fi + export KEEPDATA=YES export SENDCOM=NO export SENDDBN=NO export SENDSMS=NO -export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages +export PACKAGEROOT export COMPATH=/lfs/h1/ops/prod/com/nos export com_ver=v3.7 export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com @@ -90,6 +101,8 @@ export COMINetss=/lfs/h1/ops/prod/com/petss/${petss_ver} export COMINrtofs_2d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} export COMINrtofs_3d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} export COMINnwm=/lfs/h1/ops/prod/com/nwm/${nwm_ver} +export COMINgefs=/lfs/h1/ops/prod/com/gefs/${gefs_ver:-v12.3} +export COMINrrfs=/lfs/h1/ops/para/com/rrfs/${rrfs_ver:-v1.0} export COMIN=/lfs/h1/ops/prod/com ################################################ @@ -99,7 +112,8 @@ export pbsid=${PBS_JOBID%%.*} export job=${job:-$PBS_JOBNAME} export jobid=${jobid:-$job.$PBS_JOBID} -export OFS_CONFIG=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/parm/systems/${OFS}.yaml -export PYTHONPATH=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/ush/python:${PYTHONPATH:-} +export HOMEnos=${PACKAGEROOT}/nosofs.${nosofs_ver} +export OFS_CONFIG=${HOMEnos}/parm/systems/${OFS}.yaml +export PYTHONPATH=${HOMEnos}/ush/python:${PYTHONPATH:-} -/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.${nosofs_ver}/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +${HOMEnos}/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP diff --git a/pbs/jnos_secofs_ensemble_member.pbs b/pbs/jnos_secofs_ensemble_member.pbs index 38d47a0..e1dc2a0 100644 --- a/pbs/jnos_secofs_ensemble_member.pbs +++ b/pbs/jnos_secofs_ensemble_member.pbs @@ -18,6 +18,10 @@ # -e /lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0/secofs_ens000_00.err \ # jnos_secofs_ensemble_member.pbs # +# For UFS-Coastal (DATM+SCHISM), pass OFS=secofs_ufs: +# qsub -v MEMBER_ID=000,CYC=00,PDY=20260213,OFS=secofs_ufs \ +# jnos_secofs_ensemble_member.pbs +# # The launcher script overrides -N, -o, -e per member via qsub flags. # The #PBS defaults above are fallbacks for direct submission. # ------------------------------------------------------------------ @@ -32,40 +36,59 @@ CYC=${CYC:-00} # PDY: accept from qsub -v, fall back to today export PDY=${PDY:-$(date +%Y%m%d)} -. /lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/versions/run.ver +PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages +. ${PACKAGEROOT}/nosofs.v3.7.0/versions/run.ver + +# OFS: accept from launcher (default: secofs for standalone SCHISM) +export OFS=${OFS:-secofs} # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0 -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS:-secofs} +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR fi cd ${WORKDIR} -module purge -module load envvar/${envvars_ver} - -# Loading Intel Compiler Suite -module load PrgEnv-intel/${PrgEnv_intel_ver} -module load craype/${craype_ver} -module load intel/${intel_ver} -module load cray-mpich/${cray_mpich_ver} -module load cray-pals/${cray_pals_ver} -#Set other library variables -module load netcdf/${netcdf_ver} -module load hdf5/${hdf5_ver} -module load libjpeg/${libjpeg_ver} -module load subversion/${subversion_ver} -module load python/${python_ver} -module load prod_envir/${prod_envir_ver} -module load prod_util/${prod_util_ver} -module load grib_util/${grib_util_ver} -module load cfp/${cfp_ver} -module load gsl/${gsl_ver} -module load udunits/${udunits_ver} -module load nco/${nco_ver} -module load cdo/${cdo_ver} -module load metis/${metis_ver} +# ---------------------------------------------------------------- +# Module setup: UFS-Coastal requires hpc-stack libraries, standalone +# SCHISM uses standard WCOSS2 modules. +# ---------------------------------------------------------------- +if [[ "${OFS}" == *_ufs* ]]; then + # UFS-Coastal: hpc-stack modules (hdf5-D/1.14.0, netcdf-D/4.9.2, esmf-D/8.8.0) + module purge + module reset + unset PrgEnv_intel_ver craype_ver cmake_ver g2_ver w3emc_ver 2>/dev/null || true + module use ${PACKAGEROOT}/nosofs.${nosofs_ver}/modulefiles + module load modules.fv3 + module load cray-pals + module load python/${python_ver:-3.8.6} + module load prod_envir/${prod_envir_ver:-2.0.6} + module load prod_util/${prod_util_ver:-2.0.17} +else + # Standalone SCHISM: standard WCOSS2 modules + module purge + module load envvar/${envvars_ver} + module load PrgEnv-intel/${PrgEnv_intel_ver} + module load craype/${craype_ver} + module load intel/${intel_ver} + module load cray-mpich/${cray_mpich_ver} + module load cray-pals/${cray_pals_ver} + module load netcdf/${netcdf_ver} + module load hdf5/${hdf5_ver} + module load libjpeg/${libjpeg_ver} + module load subversion/${subversion_ver} + module load python/${python_ver} + module load prod_envir/${prod_envir_ver} + module load prod_util/${prod_util_ver} + module load grib_util/${grib_util_ver} + module load cfp/${cfp_ver} + module load gsl/${gsl_ver} + module load udunits/${udunits_ver} + module load nco/${nco_ver} + module load cdo/${cdo_ver} + module load metis/${metis_ver} +fi module list @@ -78,20 +101,25 @@ export FI_OFI_RXM_SAR_LIMIT=1572864 # EXPORT list set +x export envir=dev -export OFS=secofs export cyc=${CYC} export NET=nosofs export model=nosofs export MEMBER_ID=${MEMBER_ID} -export job=secofs_ens${MEMBER_ID}_${CYC}_$envir +export job=${OFS}_ens${MEMBER_ID}_${CYC}_$envir export platform=ptmp +# UFS-Coastal DATM coupling (auto-detect from OFS name) +if [[ "${OFS}" == *_ufs* ]]; then + export USE_DATM=${USE_DATM:-true} + export TOTAL_TASKS=${TOTAL_TASKS:-1200} +fi + export KEEPDATA=YES export SENDCOM=NO export SENDDBN=NO export SENDSMS=NO -export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages +export PACKAGEROOT export COMPATH=/lfs/h1/ops/prod/com/nos export com_ver=v3.7 export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com @@ -108,13 +136,18 @@ export COMINrtofs_3d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} export COMINnwm=/lfs/h1/ops/prod/com/nwm/${nwm_ver} export COMIN=/lfs/h1/ops/prod/com +# Pass GEFS ensemble flags from launcher +export GEFS_ENSEMBLE=${GEFS_ENSEMBLE:-false} +export GEFS_MEMBER_ID=${GEFS_MEMBER_ID:-} + ################################################ # CALL executable job script here export pbsid=${PBS_JOBID%%.*} export job=${job:-$PBS_JOBNAME} export jobid=${jobid:-$job.$PBS_JOBID} -export OFS_CONFIG=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/parm/systems/${OFS}.yaml -export PYTHONPATH=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/ush/python:${PYTHONPATH:-} +export HOMEnos=${PACKAGEROOT}/nosofs.${nosofs_ver} +export OFS_CONFIG=${HOMEnos}/parm/systems/${OFS}.yaml +export PYTHONPATH=${HOMEnos}/ush/python:${PYTHONPATH:-} -/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.${nosofs_ver}/jobs/JNOS_OFS_ENSEMBLE_MEMBER +${HOMEnos}/jobs/JNOS_OFS_ENSEMBLE_MEMBER diff --git a/pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs b/pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs deleted file mode 100644 index afc2d67..0000000 --- a/pbs/jnos_secofs_ufs_ensemble_atmos_prep.pbs +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash -#PBS -N secofs_ufs_atmos_prep -#PBS -A ESTOFS-DEV -#PBS -q dev -#PBS -o /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_atmos_prep.out -#PBS -e /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_atmos_prep.err -#PBS -l select=1:ncpus=8:mpiprocs=8 -#PBS -l place=vscatter -#PBS -l walltime=02:00:00 - -# ------------------------------------------------------------------ -# SECOFS UFS-Coastal Ensemble Atmospheric Prep PBS Job -# -# Creates DATM-format forcing files for GEFS/RRFS ensemble members. -# Runs between the regular prep job and ensemble member jobs. -# Each member gets its own datm_input directory in COMOUT. -# -# Submit via launch_secofs_ufs_ensemble.sh --gefs or manually: -# qsub -v CYC=00,PDY=20260213 jnos_secofs_ufs_ensemble_atmos_prep.pbs -# ------------------------------------------------------------------ - -CYC=${CYC:-00} - -# PDY: accept from qsub -v, fall back to today -export PDY=${PDY:-$(date +%Y%m%d)} - -PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages -. ${PACKAGEROOT}/nosofs.v3.7.0/versions/run.ver - -# Working directory -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs_ufs -mkdir -p -m 755 $RPTDIR $WORKDIR -cd ${WORKDIR} - -# Standard modules (NOT hpc-stack — this job runs Python/wgrib2, not UFS) -module purge -module load envvar/${envvars_ver} - -module load PrgEnv-intel/${PrgEnv_intel_ver} -module load craype/${craype_ver} -module load intel/${intel_ver} -module load cray-mpich/${cray_mpich_ver} -module load cray-pals/${cray_pals_ver} -module load netcdf/${netcdf_ver} -module load hdf5/${hdf5_ver} -module load python/${python_ver} -module load prod_envir/${prod_envir_ver} -module load prod_util/${prod_util_ver} -module load grib_util/${grib_util_ver} -module load cfp/${cfp_ver} -module load nco/${nco_ver} -module load wgrib2/${wgrib2_ver} - -module list - -# EXPORT list -set +x -export envir=dev -export OFS=secofs_ufs -export cyc=${CYC} -export NET=nosofs -export model=nosofs -export job=secofs_ufs_atmos_prep_${CYC}_$envir -export platform=ptmp - -# UFS-Coastal DATM coupling -export USE_DATM=true - -export KEEPDATA=YES -export SENDCOM=NO -export SENDDBN=NO -export SENDSMS=NO - -export nosofs_ver=v3.7.0 -export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages - -# Data and COM paths -export COMPATH=/lfs/h1/ops/prod/com/nos -export com_ver=v3.7 -export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com -export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} -export COMIN=/lfs/h1/ops/prod/com - -# Input data paths -export COMINnam=/lfs/h1/ops/prod/com/nam/${nam_ver} -export COMINhrrr=/lfs/h1/ops/prod/com/hrrr/${hrrr_ver} -export COMINrap=/lfs/h1/ops/prod/com/rap/${rap_ver} -export COMINgfs=/lfs/h1/ops/prod/com/gfs/${gfs_ver} -export COMINrtma=/lfs/h1/ops/prod/com/rtma/${rtma_ver} -export COMINetss=/lfs/h1/ops/prod/com/petss/${petss_ver} -export COMINrtofs_2d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} -export COMINrtofs_3d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} -export COMINnwm=/lfs/h1/ops/prod/com/nwm/${nwm_ver} -export COMINgefs=/lfs/h1/ops/prod/com/gefs/${gefs_ver:-v12.3} -export COMINrrfs=/lfs/h1/ops/para/com/rrfs/${rrfs_ver:-v1.0} - -################################################ -# CALL executable job script here -export pbsid=${PBS_JOBID%%.*} -export job=${job:-$PBS_JOBNAME} -export jobid=${jobid:-$job.$PBS_JOBID} - -export HOMEnos=${PACKAGEROOT}/nosofs.${nosofs_ver} -export OFS_CONFIG=${HOMEnos}/parm/systems/${OFS}.yaml -export PYTHONPATH=${HOMEnos}/ush/python:${PYTHONPATH:-} - -${HOMEnos}/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP diff --git a/pbs/jnos_secofs_ufs_ensemble_member.pbs b/pbs/jnos_secofs_ufs_ensemble_member.pbs deleted file mode 100644 index de3b663..0000000 --- a/pbs/jnos_secofs_ufs_ensemble_member.pbs +++ /dev/null @@ -1,133 +0,0 @@ -#!/bin/bash -#PBS -N secofs_ufs_ens_00 -#PBS -A ESTOFS-DEV -#PBS -q dev -#PBS -o /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_ens_00.out -#PBS -e /lfs/h1/nos/ptmp/mansur.jisan/rpt/secofs_ufs/secofs_ufs_ens_00.err -#PBS -l place=vscatter:excl,select=10:ncpus=128:mpiprocs=120 -#PBS -l walltime=05:30:00 - -# ------------------------------------------------------------------ -# SECOFS UFS-Coastal Ensemble Member PBS Job -# -# Runs a single UFS-Coastal ensemble member (DATM+SCHISM coupled). -# Submit via launch_secofs_ufs_ensemble.sh or manually with: -# qsub -v MEMBER_ID=000,CYC=00,PDY=20260213 \ -# -N secofs_ufs_ens000_00 \ -# -o /lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs/secofs_ufs_ens000_00.out \ -# -e /lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs/secofs_ufs_ens000_00.err \ -# jnos_secofs_ufs_ensemble_member.pbs -# -# The launcher script overrides -N, -o, -e per member via qsub flags. -# The #PBS defaults above are fallbacks for direct submission. -# ------------------------------------------------------------------ - -# Validate required variables -if [ -z "${MEMBER_ID:-}" ]; then - echo "ERROR: MEMBER_ID not set. Use: qsub -v MEMBER_ID=001,CYC=00,PDY=20260213 $0" >&2 - exit 1 -fi -CYC=${CYC:-00} - -# PDY: accept from qsub -v, fall back to today -export PDY=${PDY:-$(date +%Y%m%d)} - -PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages -. ${PACKAGEROOT}/nosofs.v3.7.0/versions/run.ver - -# Working directory -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs_ufs -mkdir -p -m 755 $RPTDIR $WORKDIR -cd ${WORKDIR} - -# ---------------------------------------------------------------- -# Module setup: UFS-Coastal hpc-stack libraries -# ---------------------------------------------------------------- -# The UFS executable (fv3_coastalS.exe) requires hpc-stack libraries -# (hdf5-D/1.14.0, netcdf-D/4.9.2, esmf-D/8.8.0) that differ from -# standard WCOSS2 system modules (hdf5/1.10.6, netcdf/4.7.4). -module purge -module reset - -# Clear version env vars that would override modules.fv3.lua defaults -unset PrgEnv_intel_ver craype_ver cmake_ver g2_ver w3emc_ver 2>/dev/null || true - -# Load UFS-Coastal hpc-stack modules -module use ${PACKAGEROOT}/nosofs.${nosofs_ver}/modulefiles -module load modules.fv3 -module load cray-pals - -# Infrastructure modules for J-job scripting -module load python/${python_ver:-3.8.6} -module load prod_envir/${prod_envir_ver:-2.0.6} -module load prod_util/${prod_util_ver:-2.0.17} - -module list - -# MPI tuning for large-core SCHISM runs on Cray -export MPICH_OFI_STARTUP_CONNECT=1 -export MPICH_COLL_SYNC=MPI_Bcast -export MPICH_REDUCE_NO_SMP=1 -export FI_OFI_RXM_SAR_LIMIT=1572864 - -# EXPORT list -set +x -export envir=dev -export OFS=secofs_ufs -export cyc=${CYC} -export NET=nosofs -export model=nosofs -export MEMBER_ID=${MEMBER_ID} -export job=secofs_ufs_ens${MEMBER_ID}_${CYC}_$envir -export platform=ptmp - -# UFS-Coastal DATM coupling -export USE_DATM=true - -export KEEPDATA=YES -export SENDCOM=NO -export SENDDBN=NO -export SENDSMS=NO - -export nosofs_ver=v3.7.0 -export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages - -# Data and COM paths -export COMPATH=/lfs/h1/ops/prod/com/nos -export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com -export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} -export COMIN=/lfs/h1/ops/prod/com - -# Input data paths (production WCOSS2) -export COMINnam=/lfs/h1/ops/prod/com/nam/${nam_ver} -export COMINhrrr=/lfs/h1/ops/prod/com/hrrr/${hrrr_ver} -export COMINrap=/lfs/h1/ops/prod/com/rap/${rap_ver} -export COMINgfs=/lfs/h1/ops/prod/com/gfs/${gfs_ver} -export COMINrtma=/lfs/h1/ops/prod/com/rtma/${rtma_ver} -export COMINetss=/lfs/h1/ops/prod/com/petss/${petss_ver} -export COMINrtofs_2d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} -export COMINrtofs_3d=/lfs/h1/ops/prod/com/rtofs/${rtofs_ver} -export COMINnwm=/lfs/h1/ops/prod/com/nwm/${nwm_ver} - -# UFS-Coastal: MED+ATM share PETs 0-119, OCN PETs 120-1199 -# 10 nodes x 120 mpiprocs = 1200 tasks -export TOTAL_TASKS=1200 - -################################################ -# CALL executable job script here -export pbsid=${PBS_JOBID%%.*} -export job=${job:-$PBS_JOBNAME} -export jobid=${jobid:-$job.$PBS_JOBID} - -# YAML configuration for secofs_ufs -export HOMEnos=${PACKAGEROOT}/nosofs.${nosofs_ver} -export OFS_CONFIG=${HOMEnos}/parm/systems/${OFS}.yaml -export PYTHONPATH=${HOMEnos}/ush/python:${PYTHONPATH:-} - -# Pass GEFS ensemble flag if set by launcher -export GEFS_ENSEMBLE=${GEFS_ENSEMBLE:-false} -export GEFS_MEMBER_ID=${GEFS_MEMBER_ID:-} - -${HOMEnos}/jobs/JNOS_OFS_ENSEMBLE_MEMBER diff --git a/pbs/launch_secofs_ensemble.sh b/pbs/launch_secofs_ensemble.sh index a259de5..c4ad88a 100644 --- a/pbs/launch_secofs_ensemble.sh +++ b/pbs/launch_secofs_ensemble.sh @@ -33,6 +33,8 @@ # PDY=20260212 ./launch_secofs_ensemble.sh 00 3 # Explicit PDY # ./launch_secofs_ensemble.sh 00 3 --pdy 20260212 # Explicit PDY (alt) # ./launch_secofs_ensemble.sh 00 3 --skip-prep # Skip prep, submit members only +# ./launch_secofs_ensemble.sh 00 --ufs # UFS-Coastal (secofs_ufs) ensemble +# ./launch_secofs_ensemble.sh 00 --ufs --gefs # UFS-Coastal GEFS ensemble # ./launch_secofs_ensemble.sh 00 --det-only # Deterministic only (prep→nowcast→forecast) # ./launch_secofs_ensemble.sh 00 --det-only --pdy 20260216 # @@ -63,6 +65,7 @@ SKIP_PREP=false ATMOS_ENSEMBLE=false GEFS_ENSEMBLE=false DET_ONLY=false +UFS_MODE=false # Check for flags for arg in "$@"; do @@ -71,6 +74,7 @@ for arg in "$@"; do --skip-prep) SKIP_PREP=true ;; --atmos-ensemble) ATMOS_ENSEMBLE=true ;; --gefs) GEFS_ENSEMBLE=true; ATMOS_ENSEMBLE=true ;; + --ufs) UFS_MODE=true; ATMOS_ENSEMBLE=true ;; --det-only) DET_ONLY=true ;; --pdy) _NEXT_IS_PDY=true ;; *) @@ -83,6 +87,18 @@ for arg in "$@"; do done unset _NEXT_IS_PDY +# UFS mode implies GEFS ensemble by default +if [ "${UFS_MODE}" = true ]; then + GEFS_ENSEMBLE=true + ATMOS_ENSEMBLE=true +fi + +# OFS: secofs_ufs for UFS-Coastal, secofs for standalone +OFS=secofs +if [ "${UFS_MODE}" = true ]; then + OFS=secofs_ufs +fi + # GEFS defaults: 6 members for SECOFS (4 GEFS + 1 RRFS + 1 control) if [ "${GEFS_ENSEMBLE}" = true ] && [ "${_USER_SET_MEMBERS}" = false ]; then N_MEMBERS=6 @@ -100,12 +116,16 @@ echo " SECOFS Ensemble Launcher" echo "==============================================" echo " PDY: ${PDY}" echo " Cycle: ${CYC}" +echo " OFS: ${OFS}" if [ "${DET_ONLY}" = true ]; then -echo " Mode: DETERMINISTIC ONLY (prep→nowcast→forecast)" +echo " Mode: DETERMINISTIC ONLY (prep->nowcast->forecast)" else echo " Members: ${N_MEMBERS} (1 control + $((N_MEMBERS - 1)) perturbed)" echo " Det run: ${WITH_DET}" echo " Atmos ens: ${ATMOS_ENSEMBLE}" +if [ "${UFS_MODE}" = true ]; then +echo " UFS mode: true (DATM+SCHISM coupled, per-member DATM forcing)" +fi if [ "${GEFS_ENSEMBLE}" = true ]; then echo " GEFS ens: true (0.25 deg pgrb2sp25, members gep01-gep$(printf '%02d' $((N_MEMBERS - 1))))" fi @@ -115,7 +135,12 @@ echo " PBS dir: ${PBS_DIR}" echo "==============================================" # ---- Validate PBS scripts exist -------------------------------------- -PREP_PBS="${PBS_DIR}/jnos_secofs_prep_${CYC}.pbs" +# UFS mode uses UFS-specific prep scripts but the SAME ensemble scripts +if [ "${UFS_MODE}" = true ]; then + PREP_PBS="${PBS_DIR}/jnos_secofs_ufs_prep_${CYC}.pbs" +else + PREP_PBS="${PBS_DIR}/jnos_secofs_prep_${CYC}.pbs" +fi MEMBER_PBS="${PBS_DIR}/jnos_secofs_ensemble_member.pbs" POST_PBS="${PBS_DIR}/jnos_secofs_ensemble_post.pbs" ATMOS_PREP_PBS="${PBS_DIR}/jnos_secofs_ensemble_atmos_prep.pbs" @@ -162,8 +187,13 @@ if [ "${DET_ONLY}" = true ]; then echo "" echo ">>> Deterministic-only mode: submitting nowcast + forecast..." - NCST_PBS="${PBS_DIR}/jnos_secofs_nowcast_${CYC}.pbs" - FCST_PBS="${PBS_DIR}/jnos_secofs_forecast_${CYC}.pbs" + if [ "${UFS_MODE}" = true ]; then + NCST_PBS="${PBS_DIR}/jnos_secofs_ufs_nowcast_${CYC}.pbs" + FCST_PBS="${PBS_DIR}/jnos_secofs_ufs_forecast_${CYC}.pbs" + else + NCST_PBS="${PBS_DIR}/jnos_secofs_nowcast_${CYC}.pbs" + FCST_PBS="${PBS_DIR}/jnos_secofs_forecast_${CYC}.pbs" + fi if [ ! -f "${NCST_PBS}" ] || [ ! -f "${FCST_PBS}" ]; then echo "ERROR: Deterministic PBS scripts not found:" >&2 @@ -194,8 +224,8 @@ if [ "${DET_ONLY}" = true ]; then echo "" echo " Dependency chain:" echo " prep (${PREP_JOBID_SHORT:-skipped})" - echo " └─> nowcast (${NCST_SHORT})" - echo " └─> forecast (${FCST_SHORT})" + echo " |-> nowcast (${NCST_SHORT})" + echo " |-> forecast (${FCST_SHORT})" echo "" echo " Monitor with: qstat -u $LOGNAME" echo " Cancel all: qdel ${PREP_JOBID_SHORT:-} ${NCST_SHORT} ${FCST_SHORT}" @@ -211,16 +241,16 @@ if [ "${ATMOS_ENSEMBLE}" = true ]; then echo ">>> Submitting GEFS atmospheric ensemble prep job..." # Don't pass GEFS_MEMBERS — let the J-job read from YAML config. # YAML correctly distinguishes GEFS members from RRFS/other sources. - ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY},GEFS_ENSEMBLE=true" \ - -N "secofs_gefs_prep_${CYC}" \ - -o "${RPTDIR}/secofs_gefs_prep_${CYC}.out" \ - -e "${RPTDIR}/secofs_gefs_prep_${CYC}.err") + ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY},GEFS_ENSEMBLE=true,OFS=${OFS}" \ + -N "${OFS}_gefs_prep_${CYC}" \ + -o "${RPTDIR}/${OFS}_gefs_prep_${CYC}.out" \ + -e "${RPTDIR}/${OFS}_gefs_prep_${CYC}.err") else echo ">>> Submitting atmospheric ensemble prep job..." - ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY}" \ - -N "secofs_atmos_prep_${CYC}" \ - -o "${RPTDIR}/secofs_atmos_prep_${CYC}.out" \ - -e "${RPTDIR}/secofs_atmos_prep_${CYC}.err") + ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY},OFS=${OFS}" \ + -N "${OFS}_atmos_prep_${CYC}" \ + -o "${RPTDIR}/${OFS}_atmos_prep_${CYC}.out" \ + -e "${RPTDIR}/${OFS}_atmos_prep_${CYC}.err") fi if [ -n "${PREP_JOBID_SHORT}" ]; then ATMOS_QSUB_ARGS+=(-W "depend=afterok:${PREP_JOBID_SHORT}") @@ -241,8 +271,13 @@ if [ "${WITH_DET}" = true ]; then echo "" echo ">>> Submitting deterministic nowcast (required for ensemble ihot=2)..." - NCST_PBS="${PBS_DIR}/jnos_secofs_nowcast_${CYC}.pbs" - FCST_PBS="${PBS_DIR}/jnos_secofs_forecast_${CYC}.pbs" + if [ "${UFS_MODE}" = true ]; then + NCST_PBS="${PBS_DIR}/jnos_secofs_ufs_nowcast_${CYC}.pbs" + FCST_PBS="${PBS_DIR}/jnos_secofs_ufs_forecast_${CYC}.pbs" + else + NCST_PBS="${PBS_DIR}/jnos_secofs_nowcast_${CYC}.pbs" + FCST_PBS="${PBS_DIR}/jnos_secofs_forecast_${CYC}.pbs" + fi if [ ! -f "${NCST_PBS}" ]; then echo "ERROR: Nowcast PBS script not found: ${NCST_PBS}" >&2 @@ -299,16 +334,16 @@ fi for i in $(seq 0 $((N_MEMBERS - 1))); do MID=$(printf '%03d' $i) # Base variables for every member - MEMBER_VARS="MEMBER_ID=${MID},CYC=${CYC},PDY=${PDY}" + MEMBER_VARS="MEMBER_ID=${MID},CYC=${CYC},PDY=${PDY},OFS=${OFS}" # Pass GEFS_ENSEMBLE flag so J-job knows to stage GEFS sflux files if [ "${GEFS_ENSEMBLE}" = true ]; then GEFS_MEM_ID=$(printf '%02d' $i) MEMBER_VARS="${MEMBER_VARS},GEFS_ENSEMBLE=true,GEFS_MEMBER_ID=${GEFS_MEM_ID}" fi QSUB_ARGS=(-v "${MEMBER_VARS}" \ - -N "secofs_ens${MID}_${CYC}" \ - -o "${RPTDIR}/secofs_ens${MID}_${CYC}.out" \ - -e "${RPTDIR}/secofs_ens${MID}_${CYC}.err") + -N "${OFS}_ens${MID}_${CYC}" \ + -o "${RPTDIR}/${OFS}_ens${MID}_${CYC}.out" \ + -e "${RPTDIR}/${OFS}_ens${MID}_${CYC}.err") if [ -n "${MEMBER_DEP_STR}" ]; then QSUB_ARGS+=(-W "depend=${MEMBER_DEP_STR}") fi @@ -333,8 +368,8 @@ for mjob in "${MEMBER_JOBIDS[@]}"; do done ENSPOST_JOBID=$(qsub \ - -v "N_MEMBERS=${N_MEMBERS},CYC=${CYC},PDY=${PDY}" \ - -N "secofs_enspost_${CYC}" \ + -v "N_MEMBERS=${N_MEMBERS},CYC=${CYC},PDY=${PDY},OFS=${OFS}" \ + -N "${OFS}_enspost_${CYC}" \ -W depend=${DEP_STR} \ "${POST_PBS}") echo " Ensemble post: ${ENSPOST_JOBID}" diff --git a/pbs/launch_secofs_ufs_ensemble.sh b/pbs/launch_secofs_ufs_ensemble.sh deleted file mode 100644 index 3a64f26..0000000 --- a/pbs/launch_secofs_ufs_ensemble.sh +++ /dev/null @@ -1,330 +0,0 @@ -#!/bin/bash -# ====================================================================== -# launch_secofs_ufs_ensemble.sh - Submit SECOFS UFS-Coastal ensemble via PBS -# -# Shell-based launcher for running the GEFS atmospheric ensemble on WCOSS2 -# with UFS-Coastal (DATM+SCHISM coupled). Chains jobs with qsub dependencies: -# -# prep --> atmos_prep --> member_000 \ -# --> member_001 | -# --> member_002 |--> post_ensemble -# --> member_003 | -# --> member_004 | -# --> member_005 / -# -# The deterministic workflow (nowcast/forecast) can optionally -# run in parallel alongside the ensemble. -# -# Usage: -# ./launch_secofs_ufs_ensemble.sh # Default: cyc=00, 6 members, PDY=today -# ./launch_secofs_ufs_ensemble.sh 12 # Cycle 12 -# ./launch_secofs_ufs_ensemble.sh 00 3 # Cycle 00, 3 members -# ./launch_secofs_ufs_ensemble.sh 00 6 --with-det # Also submit deterministic -# ./launch_secofs_ufs_ensemble.sh 00 --gefs # GEFS ensemble (6 members, auto) -# PDY=20260212 ./launch_secofs_ufs_ensemble.sh 00 3 # Explicit PDY -# ./launch_secofs_ufs_ensemble.sh 00 3 --pdy 20260212 # Explicit PDY (alt) -# ./launch_secofs_ufs_ensemble.sh 00 3 --skip-prep # Skip prep, submit members only -# ./launch_secofs_ufs_ensemble.sh 00 --det-only # Deterministic only -# -# Requirements: -# - Run from the pbs/ directory (or set PBS_DIR) -# - Prep job PBS script must exist: jnos_secofs_ufs_prep_${CYC}.pbs -# - UFS ensemble PBS scripts in same directory -# ====================================================================== -set -e - -# ---- Configuration --------------------------------------------------- -CYC=${1:-00} -OFS=secofs_ufs - -# Track whether user explicitly provided N_MEMBERS -_USER_SET_MEMBERS=false -if [[ "${2:-}" != --* ]] && [ -n "${2:-}" ]; then - _USER_SET_MEMBERS=true -fi -# If $2 is a flag (starts with --), don't consume it as N_MEMBERS -if [[ "${2:-}" == --* ]]; then - N_MEMBERS=6 - shift 1 2>/dev/null || true -else - N_MEMBERS=${2:-6} - shift 2 2>/dev/null || true -fi -WITH_DET=false -SKIP_PREP=false -ATMOS_ENSEMBLE=true # Always true for UFS ensemble (DATM forcing per member) -GEFS_ENSEMBLE=true # Always true — this launcher is GEFS-specific -DET_ONLY=false - -# Check for flags -for arg in "$@"; do - case "$arg" in - --with-det) WITH_DET=true ;; - --skip-prep) SKIP_PREP=true ;; - --gefs) GEFS_ENSEMBLE=true ;; - --det-only) DET_ONLY=true ;; - --pdy) _NEXT_IS_PDY=true ;; - *) - if [ "${_NEXT_IS_PDY:-}" = true ]; then - PDY="$arg" - _NEXT_IS_PDY=false - fi - ;; - esac -done -unset _NEXT_IS_PDY - -# PDY: use env var, --pdy flag, or default to today -PDY=${PDY:-$(date +%Y%m%d)} -export PDY - -PBS_DIR="$(cd "$(dirname "$0")" && pwd)" - -echo "==============================================" -echo " SECOFS UFS-Coastal Ensemble Launcher" -echo "==============================================" -echo " PDY: ${PDY}" -echo " Cycle: ${CYC}" -echo " OFS: ${OFS}" -if [ "${DET_ONLY}" = true ]; then -echo " Mode: DETERMINISTIC ONLY (prep->nowcast->forecast)" -else -echo " Members: ${N_MEMBERS} (1 control + $((N_MEMBERS - 1)) perturbed)" -echo " Det run: ${WITH_DET}" -echo " GEFS ens: true (0.25 deg pgrb2sp25)" -echo " DATM: true (per-member DATM forcing)" -fi -echo " Skip prep: ${SKIP_PREP}" -echo " PBS dir: ${PBS_DIR}" -echo "==============================================" - -# ---- Validate PBS scripts exist -------------------------------------- -PREP_PBS="${PBS_DIR}/jnos_secofs_ufs_prep_${CYC}.pbs" -MEMBER_PBS="${PBS_DIR}/jnos_secofs_ufs_ensemble_member.pbs" -POST_PBS="${PBS_DIR}/jnos_secofs_ensemble_post.pbs" -ATMOS_PREP_PBS="${PBS_DIR}/jnos_secofs_ufs_ensemble_atmos_prep.pbs" - -if [ "${DET_ONLY}" != true ]; then - for script in "${MEMBER_PBS}" "${ATMOS_PREP_PBS}"; do - if [ ! -f "${script}" ]; then - echo "ERROR: PBS script not found: ${script}" >&2 - exit 1 - fi - done -fi - -if [ "${SKIP_PREP}" != true ] && [ ! -f "${PREP_PBS}" ]; then - echo "ERROR: Prep PBS script not found: ${PREP_PBS}" >&2 - exit 1 -fi - -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs -mkdir -p ${RPTDIR} 2>/dev/null || true - -# ---- Step 1: Submit prep job (unless --skip-prep) -------------------- -if [ "${SKIP_PREP}" = true ]; then - echo "" - echo ">>> Skipping prep (--skip-prep). Members will run without dependency." - PREP_JOBID_SHORT="" -else - echo "" - echo ">>> Submitting prep job..." - PREP_JOBID=$(qsub -v "PDY=${PDY}" "${PREP_PBS}") - PREP_JOBID_SHORT=${PREP_JOBID%%.*} - echo " Prep job: ${PREP_JOBID}" -fi - -# ---- Deterministic-only mode: just prep -> nowcast -> forecast -------- -if [ "${DET_ONLY}" = true ]; then - echo "" - echo ">>> Deterministic-only mode: submitting nowcast + forecast..." - - NCST_PBS="${PBS_DIR}/jnos_secofs_ufs_nowcast_${CYC}.pbs" - FCST_PBS="${PBS_DIR}/jnos_secofs_ufs_forecast_${CYC}.pbs" - - if [ ! -f "${NCST_PBS}" ] || [ ! -f "${FCST_PBS}" ]; then - echo "ERROR: Deterministic PBS scripts not found:" >&2 - [ ! -f "${NCST_PBS}" ] && echo " Missing: ${NCST_PBS}" >&2 - [ ! -f "${FCST_PBS}" ] && echo " Missing: ${FCST_PBS}" >&2 - exit 1 - fi - - NCST_JOBID=$(qsub \ - -v "PDY=${PDY},cyc=${CYC}" \ - ${PREP_JOBID_SHORT:+-W depend=afterok:${PREP_JOBID_SHORT}} \ - "${NCST_PBS}") - NCST_SHORT=${NCST_JOBID%%.*} - echo " Nowcast: ${NCST_JOBID}" - - FCST_JOBID=$(qsub \ - -v "PDY=${PDY},cyc=${CYC}" \ - -W depend=afterok:${NCST_SHORT} \ - "${FCST_PBS}") - FCST_SHORT=${FCST_JOBID%%.*} - echo " Forecast: ${FCST_JOBID}" - - echo "" - echo "==============================================" - echo " Deterministic workflow submitted successfully" - echo "==============================================" - echo "" - echo " Dependency chain:" - echo " prep (${PREP_JOBID_SHORT:-skipped})" - echo " |-> nowcast (${NCST_SHORT})" - echo " |-> forecast (${FCST_SHORT})" - echo "" - echo " Monitor with: qstat -u $LOGNAME" - echo " Cancel all: qdel ${PREP_JOBID_SHORT:-} ${NCST_SHORT} ${FCST_SHORT}" - echo "" - exit 0 -fi - -# ---- Step 1b: Submit DATM atmos prep job ------------------------------ -echo "" -echo ">>> Submitting GEFS DATM atmospheric ensemble prep job..." -ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY},GEFS_ENSEMBLE=true,USE_DATM=true" \ - -N "secofs_ufs_gefs_prep_${CYC}" \ - -o "${RPTDIR}/secofs_ufs_gefs_prep_${CYC}.out" \ - -e "${RPTDIR}/secofs_ufs_gefs_prep_${CYC}.err") -if [ -n "${PREP_JOBID_SHORT}" ]; then - ATMOS_QSUB_ARGS+=(-W "depend=afterok:${PREP_JOBID_SHORT}") -fi -ATMOS_PREP_JOBID=$(qsub "${ATMOS_QSUB_ARGS[@]}" "${ATMOS_PREP_PBS}") -ATMOS_PREP_JOBID_SHORT=${ATMOS_PREP_JOBID%%.*} -echo " Atmos prep: ${ATMOS_PREP_JOBID}" - -# ---- Step 2a (optional): Submit deterministic nowcast before ensemble ---- -NCST_JOBID_SHORT="" -FCST_JOBID_SHORT="" -if [ "${WITH_DET}" = true ]; then - echo "" - echo ">>> Submitting deterministic nowcast (required for ensemble ihot=2)..." - - NCST_PBS="${PBS_DIR}/jnos_secofs_ufs_nowcast_${CYC}.pbs" - FCST_PBS="${PBS_DIR}/jnos_secofs_ufs_forecast_${CYC}.pbs" - - if [ ! -f "${NCST_PBS}" ]; then - echo "ERROR: Nowcast PBS script not found: ${NCST_PBS}" >&2 - exit 1 - fi - - NCST_JOBID=$(qsub \ - -v "PDY=${PDY},cyc=${CYC}" \ - ${PREP_JOBID_SHORT:+-W depend=afterok:${PREP_JOBID_SHORT}} \ - "${NCST_PBS}") - NCST_JOBID_SHORT=${NCST_JOBID%%.*} - echo " Nowcast: ${NCST_JOBID}" - - if [ -f "${FCST_PBS}" ]; then - FCST_JOBID=$(qsub \ - -v "PDY=${PDY},cyc=${CYC}" \ - -W depend=afterok:${NCST_JOBID_SHORT} \ - "${FCST_PBS}") - FCST_JOBID_SHORT=${FCST_JOBID%%.*} - echo " Forecast: ${FCST_JOBID} (parallel with ensemble)" - fi -fi - -# ---- Step 2b: Submit ensemble members -------------------------------- -echo "" -echo ">>> Submitting ${N_MEMBERS} ensemble members..." -MEMBER_JOBIDS=() - -# Build member dependency string -MEMBER_DEP_PARTS=() -if [ -n "${NCST_JOBID_SHORT}" ]; then - MEMBER_DEP_PARTS+=("${NCST_JOBID_SHORT}") -elif [ -n "${PREP_JOBID_SHORT}" ]; then - MEMBER_DEP_PARTS+=("${PREP_JOBID_SHORT}") -fi -if [ -n "${ATMOS_PREP_JOBID_SHORT}" ]; then - MEMBER_DEP_PARTS+=("${ATMOS_PREP_JOBID_SHORT}") -fi - -MEMBER_DEP_STR="" -if [ ${#MEMBER_DEP_PARTS[@]} -gt 0 ]; then - MEMBER_DEP_STR="afterok" - for dep in "${MEMBER_DEP_PARTS[@]}"; do - MEMBER_DEP_STR="${MEMBER_DEP_STR}:${dep}" - done -fi - -for i in $(seq 0 $((N_MEMBERS - 1))); do - MID=$(printf '%03d' $i) - GEFS_MEM_ID=$(printf '%02d' $i) - MEMBER_VARS="MEMBER_ID=${MID},CYC=${CYC},PDY=${PDY},GEFS_ENSEMBLE=true,GEFS_MEMBER_ID=${GEFS_MEM_ID},USE_DATM=true" - QSUB_ARGS=(-v "${MEMBER_VARS}" \ - -N "secofs_ufs_ens${MID}_${CYC}" \ - -o "${RPTDIR}/secofs_ufs_ens${MID}_${CYC}.out" \ - -e "${RPTDIR}/secofs_ufs_ens${MID}_${CYC}.err") - if [ -n "${MEMBER_DEP_STR}" ]; then - QSUB_ARGS+=(-W "depend=${MEMBER_DEP_STR}") - fi - MJOB=$(qsub "${QSUB_ARGS[@]}" "${MEMBER_PBS}") - MEMBER_JOBIDS+=("${MJOB}") - MJOB_SHORT=${MJOB%%.*} - if [ "${MID}" = "000" ]; then - echo " Member ${MID} (control, GFS+HRRR DATM): ${MJOB}" - else - echo " Member ${MID} (perturbed): ${MJOB}" - fi -done - -# ---- Step 3: Submit ensemble post (depend on ALL members) ------------ -echo "" -echo ">>> Submitting ensemble post-processing..." - -DEP_STR="afterok" -for mjob in "${MEMBER_JOBIDS[@]}"; do - DEP_STR="${DEP_STR}:${mjob%%.*}" -done - -if [ -f "${POST_PBS}" ]; then - ENSPOST_JOBID=$(qsub \ - -v "N_MEMBERS=${N_MEMBERS},CYC=${CYC},PDY=${PDY}" \ - -N "secofs_ufs_enspost_${CYC}" \ - -W depend=${DEP_STR} \ - "${POST_PBS}") - echo " Ensemble post: ${ENSPOST_JOBID}" - ENSPOST_SHORT=${ENSPOST_JOBID%%.*} -else - echo " WARNING: Post PBS script not found: ${POST_PBS}" - echo " Ensemble post will need to be run manually" - ENSPOST_SHORT="(not submitted)" -fi - -# ---- Summary --------------------------------------------------------- -echo "" -echo "==============================================" -echo " UFS-Coastal Ensemble workflow submitted" -echo "==============================================" -echo "" -echo " Dependency chain:" -echo " prep (${PREP_JOBID_SHORT:-skipped})" -if [ -n "${NCST_JOBID_SHORT}" ]; then - echo " |-> nowcast (${NCST_JOBID_SHORT})" - if [ -n "${FCST_JOBID_SHORT}" ]; then - echo " | |-> forecast (${FCST_JOBID_SHORT})" - fi -fi -echo " |-> atmos_prep (${ATMOS_PREP_JOBID_SHORT})" -for i in $(seq 0 $((N_MEMBERS - 1))); do - MID=$(printf '%03d' $i) - MJOB_SHORT=${MEMBER_JOBIDS[$i]%%.*} - echo " |-> member_${MID} (${MJOB_SHORT})" -done -echo " |-> post_ensemble (${ENSPOST_SHORT})" -if [ -n "${NCST_JOBID_SHORT}" ]; then - echo "" - echo " Members wait for nowcast to finish (ihot=2 continuous staout)" -fi -echo "" -echo " Monitor with: qstat -u $LOGNAME" -ALL_JOBIDS="${PREP_JOBID_SHORT:-}" -[ -n "${NCST_JOBID_SHORT}" ] && ALL_JOBIDS="${ALL_JOBIDS} ${NCST_JOBID_SHORT}" -[ -n "${FCST_JOBID_SHORT}" ] && ALL_JOBIDS="${ALL_JOBIDS} ${FCST_JOBID_SHORT}" -ALL_JOBIDS="${ALL_JOBIDS} ${ATMOS_PREP_JOBID_SHORT}" -ALL_JOBIDS="${ALL_JOBIDS} ${MEMBER_JOBIDS[*]%%.*}" -[ "${ENSPOST_SHORT}" != "(not submitted)" ] && ALL_JOBIDS="${ALL_JOBIDS} ${ENSPOST_SHORT}" -echo " Cancel all: qdel ${ALL_JOBIDS}" -echo "" From a8a677becdcac5a5f4ca6bc797a917798132c673 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 12:35:26 -0500 Subject: [PATCH 03/40] Fix UFS-Coastal ensemble: ihot=1, model_configure start time, nws/nscribes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three critical fixes for UFS-Coastal ensemble members: 1. ihot=2 → ihot=1 for UFS-Coastal (USE_DATM=true). ihot=2 causes SCHISM/ESMF clock desync and ghost node pressure transient. Standalone SCHISM ensemble still uses ihot=2 for continuous staout timeseries. 2. Patch model_configure start_year/month/day/hour from time_nowcastend, matching the deterministic workflow. Without this, ESMF clock starts at the wrong time for ensemble forecast members. 3. Force nws=4 and nscribes=0 in param.nml for UFS-Coastal members. Prevents mismatches if param.nml originated from standalone SCHISM (nws=2, nscribes=6/7). --- ush/nos_ofs_ensemble_run.sh | 52 ++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 37c00bb..f7b5b89 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -92,8 +92,9 @@ ensemble_stage_files() { ################################################################################ # Step 3: Configure param.nml for this member # -# Copies the forecast param.nml, sets ihot=1, updates start_year/month/day/hour -# and rnday to match the forecast window, then applies LHS perturbations. +# Copies the forecast param.nml, sets ihot (1 for UFS, 2 for standalone), +# enforces nws/nscribes for UFS, updates start time and rnday, then applies +# LHS perturbations. ################################################################################ ensemble_configure_runtime() { echo "=== ensemble_configure_runtime: member ${MEMBER_ID} ===" @@ -105,14 +106,25 @@ ensemble_configure_runtime() { local rc=$? if [ $rc -ne 0 ]; then return $rc; fi - # 3b: Set ihot=2 for hot restart with continued output - # ihot=1: reads hotstart.nc but starts output from scratch (forecast-only staout) - # ihot=2: reads hotstart.nc AND continues output (appends forecast to nowcast staout) - # ihot=2 gives continuous nowcast+forecast timeseries in staout files. - # Requires real staout/mirror/flux files from the deterministic nowcast - # (restored in ensemble_prepare_restart). - sed -i 's/ihot *= *[0-9]*/ihot = 2/' ${MEMBER_DATA}/param.nml - echo "Set ihot=2 for hot restart with continued output (nowcast+forecast staout)" + # 3b: Set ihot for hot restart + # Standalone SCHISM: ihot=2 (continue from hotstart time, appends staout) + # UFS-Coastal: ihot=1 (NUOPC start_type=startup resets ESMF clock to t=0; + # ihot=2 causes SCHISM/ESMF clock desync → ghost node pressure transient) + if [ "${USE_DATM:-false}" == "true" ] || [ "${USE_DATM:-0}" == "1" ]; then + sed -i 's/ihot *= *[0-9]*/ihot = 1/' ${MEMBER_DATA}/param.nml + echo "Set ihot=1 for UFS-Coastal (NUOPC clock sync requires reset)" + else + sed -i 's/ihot *= *[0-9]*/ihot = 2/' ${MEMBER_DATA}/param.nml + echo "Set ihot=2 for hot restart with continued output (nowcast+forecast staout)" + fi + + # 3b2: Force UFS-Coastal param.nml overrides + # If param.nml came from standalone SCHISM (nws=2, nscribes>0), fix for NUOPC. + if [ "${USE_DATM:-false}" == "true" ] || [ "${USE_DATM:-0}" == "1" ]; then + sed -i 's/nws *= *[0-9]*/nws = 4/' ${MEMBER_DATA}/param.nml + sed -i 's/nscribes *= *[0-9]*/nscribes = 0/' ${MEMBER_DATA}/param.nml + echo "Forced nws=4 (NUOPC coupling) and nscribes=0 (CMEPS I/O) for UFS-Coastal" + fi # 3c: Update start time and rnday for forecast period _ensemble_update_start_time @@ -457,10 +469,26 @@ ds.close() fi fi - # Patch stop_n for ensemble phase (typically forecast length) + # Patch model_configure for ensemble forecast: start time + duration local nhours=${LEN_FORECAST:-48} + + # Derive forecast start time from time_nowcastend (= nowcast end = forecast begin) + local _time_ncend="" + [ -f "${COMOUT}/time_nowcastend.${cycle}" ] && read _time_ncend < "${COMOUT}/time_nowcastend.${cycle}" + _time_ncend=${_time_ncend:-${PDY}$(printf '%02d' "${cyc}")} + + local sim_yyyy=${_time_ncend:0:4} + local sim_mm=${_time_ncend:4:2} + local sim_dd=${_time_ncend:6:2} + local sim_hh=${_time_ncend:8:2} + if [ -s "${MEMBER_DATA}/model_configure" ]; then - sed -i "s/nhours_fcst:.*/nhours_fcst: ${nhours}/" ${MEMBER_DATA}/model_configure + sed -i "s/nhours_fcst:.*/nhours_fcst: ${nhours}/" ${MEMBER_DATA}/model_configure + sed -i "s/start_year:.*/start_year: ${sim_yyyy}/" ${MEMBER_DATA}/model_configure + sed -i "s/start_month:.*/start_month: ${sim_mm}/" ${MEMBER_DATA}/model_configure + sed -i "s/start_day:.*/start_day: ${sim_dd}/" ${MEMBER_DATA}/model_configure + sed -i "s/start_hour:.*/start_hour: ${sim_hh}/" ${MEMBER_DATA}/model_configure + echo "Patched model_configure: start=${sim_yyyy}-${sim_mm}-${sim_dd}T${sim_hh}Z, nhours_fcst=${nhours}" fi if [ -s "${MEMBER_DATA}/ufs.configure" ]; then sed -i "s/stop_n = .*/stop_n = ${nhours}/" ${MEMBER_DATA}/ufs.configure From 812d11b6c8b46df93ad20e5631b41d5d32192644 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 20:31:30 -0500 Subject: [PATCH 04/40] Fix PACKAGEROOT override by prod_envir module in ensemble PBS scripts prod_envir module sets PACKAGEROOT to /lfs/h1/ops/prod/packages, overriding the dev path. Save PACKAGEROOT before module loads and restore it after, so J-jobs resolve to the correct dev installation directory. --- pbs/jnos_secofs_ensemble_atmos_prep.pbs | 7 +++++++ pbs/jnos_secofs_ensemble_member.pbs | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/pbs/jnos_secofs_ensemble_atmos_prep.pbs b/pbs/jnos_secofs_ensemble_atmos_prep.pbs index 140190c..2335de0 100644 --- a/pbs/jnos_secofs_ensemble_atmos_prep.pbs +++ b/pbs/jnos_secofs_ensemble_atmos_prep.pbs @@ -41,6 +41,9 @@ if [ ! -r $WORKDIR ]; then fi cd ${WORKDIR} +# Save PACKAGEROOT before module loads (prod_envir overrides it) +_SAVED_PACKAGEROOT=${PACKAGEROOT} + module purge module load envvar/${envvars_ver} @@ -67,6 +70,10 @@ module load wgrib2/${wgrib2_ver} module list +# Restore PACKAGEROOT (prod_envir sets it to /lfs/h1/ops/prod/packages) +PACKAGEROOT=${_SAVED_PACKAGEROOT} +unset _SAVED_PACKAGEROOT + # EXPORT list set +x export envir=dev diff --git a/pbs/jnos_secofs_ensemble_member.pbs b/pbs/jnos_secofs_ensemble_member.pbs index e1dc2a0..5d2b03e 100644 --- a/pbs/jnos_secofs_ensemble_member.pbs +++ b/pbs/jnos_secofs_ensemble_member.pbs @@ -53,7 +53,10 @@ cd ${WORKDIR} # ---------------------------------------------------------------- # Module setup: UFS-Coastal requires hpc-stack libraries, standalone # SCHISM uses standard WCOSS2 modules. +# Save PACKAGEROOT before module loads (prod_envir overrides it) # ---------------------------------------------------------------- +_SAVED_PACKAGEROOT=${PACKAGEROOT} + if [[ "${OFS}" == *_ufs* ]]; then # UFS-Coastal: hpc-stack modules (hdf5-D/1.14.0, netcdf-D/4.9.2, esmf-D/8.8.0) module purge @@ -92,6 +95,10 @@ fi module list +# Restore PACKAGEROOT (prod_envir sets it to /lfs/h1/ops/prod/packages) +PACKAGEROOT=${_SAVED_PACKAGEROOT} +unset _SAVED_PACKAGEROOT + # MPI tuning for large-core SCHISM runs on Cray export MPICH_OFI_STARTUP_CONNECT=1 export MPICH_COLL_SYNC=MPI_Bcast From 47316dfdb99a5f38d1237ae32a84c04db20632dc Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 21:43:52 -0500 Subject: [PATCH 05/40] Fix DATM staging: read met_source_1 from atmospheric_source in params.json params.json nests met_source_1 under atmospheric_source:{}, but the staging code was reading it as a top-level key, always getting empty string. This caused all ensemble members to fall through to the control datm_input/ directory (blended GFS+HRRR) instead of their member-specific GEFS-only DATM forcing. --- ush/nos_ofs_ensemble_run.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index f7b5b89..fb68182 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -419,7 +419,10 @@ _ensemble_execute_ufs_coastal() { import json, sys with open('${PARAM_FILE}') as f: p = json.load(f) -print(p.get('met_source_1', '')) +# met_source_1 is nested under atmospheric_source in params.json +atm = p.get('atmospheric_source', {}) +ms = atm.get('met_source_1', '') or p.get('met_source_1', '') +print(ms) " 2>/dev/null || echo "") fi From a815a98e6e6889578067648507f247530be8be75 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 22:23:07 -0500 Subject: [PATCH 06/40] Fix DATM ensemble: mesh mismatch, explicit file checks, mesh regeneration Three fixes for UFS-Coastal GEFS ensemble member failures: 1. Explicit datm_forcing.nc check: Replace silent `cp *.nc || true` with explicit check for datm_forcing.nc and datm_esmf_mesh.nc. Fail early with clear error instead of launching model with missing input. 2. ESMF mesh regeneration: When member forcing grid (e.g., GEFS 1440x721) differs from control mesh (blended 1721x1721), regenerate ESMF mesh in-place from the member's forcing file using Python/netCDF4. 3. Atmos prep wrapper archival: Replace glob `cp *.nc` with explicit per-file copy with warnings, so missing files are visible in logs. --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 24 +++++-- ush/nos_ofs_ensemble_run.sh | 103 ++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 10 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index 67fff2b..ba5b705 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -622,9 +622,15 @@ export DATM_SKIP_UFS_CONFIG=true mkdir -p \${DATA}/INPUT ${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} rc=\$? -if [ \$rc -eq 0 ] && [ -d \${DATA}/INPUT ]; then - cp -p \${DATA}/INPUT/*.nc ${MEM_OUTDIR}/ 2>/dev/null - echo "Archived DATM input to ${MEM_OUTDIR}" +if [ \$rc -eq 0 ]; then + for f in datm_forcing.nc datm_esmf_mesh.nc; do + if [ -s "\${DATA}/INPUT/\${f}" ]; then + cp -p \${DATA}/INPUT/\${f} ${MEM_OUTDIR}/ + echo "Archived \${f} to ${MEM_OUTDIR}" + else + echo "WARNING: \${f} not found in \${DATA}/INPUT/" >&2 + fi + done fi exit \$rc WRAPEOF @@ -658,9 +664,15 @@ export DATM_SKIP_UFS_CONFIG=true mkdir -p \${DATA}/INPUT ${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} rc=\$? -if [ \$rc -eq 0 ] && [ -d \${DATA}/INPUT ]; then - cp -p \${DATA}/INPUT/*.nc ${RRFS_OUTDIR}/ 2>/dev/null - echo "Archived DATM input to ${RRFS_OUTDIR}" +if [ \$rc -eq 0 ]; then + for f in datm_forcing.nc datm_esmf_mesh.nc; do + if [ -s "\${DATA}/INPUT/\${f}" ]; then + cp -p \${DATA}/INPUT/\${f} ${RRFS_OUTDIR}/ + echo "Archived \${f} to ${RRFS_OUTDIR}" + else + echo "WARNING: \${f} not found in \${DATA}/INPUT/" >&2 + fi + done fi exit \$rc WRAPEOF diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index fb68182..24e6e93 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -443,14 +443,30 @@ print(ms) echo "Met source: ${met_source:-GFS (control)}" echo "DATM input dir: ${datm_dir}" - if [ -d "$datm_dir" ]; then - cp -p ${datm_dir}/*.nc ${MEMBER_DATA}/INPUT/ 2>/dev/null || true - echo "Staged DATM INPUT files from ${datm_dir}" - else + if [ ! -d "$datm_dir" ]; then echo "FATAL: DATM input directory not found: $datm_dir" >&2 return 1 fi + # Stage DATM files — check explicitly for datm_forcing.nc + if [ -s "${datm_dir}/datm_forcing.nc" ]; then + cp -p ${datm_dir}/datm_forcing.nc ${MEMBER_DATA}/INPUT/ + echo "Staged datm_forcing.nc from ${datm_dir}" + else + echo "FATAL: datm_forcing.nc not found in ${datm_dir}" >&2 + ls -la ${datm_dir}/ >&2 2>/dev/null + return 1 + fi + + # Stage ESMF mesh — use member-specific if available, else control + if [ -s "${datm_dir}/datm_esmf_mesh.nc" ]; then + cp -p ${datm_dir}/datm_esmf_mesh.nc ${MEMBER_DATA}/INPUT/ + echo "Staged datm_esmf_mesh.nc from ${datm_dir}" + elif [ -s "${COMOUT}/${RUN}.${cycle}.datm_input/datm_esmf_mesh.nc" ]; then + cp -p ${COMOUT}/${RUN}.${cycle}.datm_input/datm_esmf_mesh.nc ${MEMBER_DATA}/INPUT/ + echo "Staged datm_esmf_mesh.nc from control datm_input (member dir had none)" + fi + # Patch datm_in nx_global/ny_global if member forcing has different grid dims if [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ] && [ -s "${MEMBER_DATA}/datm_in" ]; then local mem_dims=$(python3 -c " @@ -469,6 +485,85 @@ ds.close() sed -i "s/nx_global[[:space:]]*=.*/nx_global = ${mem_nx}/" ${MEMBER_DATA}/datm_in sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${mem_ny}/" ${MEMBER_DATA}/datm_in echo "Patched datm_in: nx_global=${mem_nx}, ny_global=${mem_ny}" + + # If grid dims differ from control, regenerate ESMF mesh + # Control mesh is built for blended grid (e.g., 1721x1721) + # GEFS raw forcing is on global 0.25 deg grid (1440x721) + local ctrl_mesh="${COMOUT}/${RUN}.${cycle}.datm_input/datm_esmf_mesh.nc" + if [ -s "$ctrl_mesh" ]; then + local ctrl_dims=$(python3 -c " +from netCDF4 import Dataset +ds = Dataset('${ctrl_mesh}', 'r') +# ESMF mesh nodeCount should match nx*ny +nc = len(ds.dimensions.get('nodeCount', ds.dimensions.get('node_count', []))) +print(nc) +ds.close() +" 2>/dev/null || echo "0") + local mem_total=$((mem_nx * mem_ny)) + if [ "${ctrl_dims}" != "${mem_total}" ] && [ "${ctrl_dims}" != "0" ]; then + echo "ESMF mesh mismatch: control=${ctrl_dims} nodes, member=${mem_total} (${mem_nx}x${mem_ny})" + echo "Regenerating ESMF mesh from member forcing file..." + python3 -c " +from netCDF4 import Dataset +import numpy as np + +ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') +try: + lons = ds.variables['longitude'][:] + lats = ds.variables['latitude'][:] + # 1D coordinate arrays -> 2D meshgrid + lon2d, lat2d = np.meshgrid(lons, lats) +except: + lon2d = ds.variables['longitude'][:] + lat2d = ds.variables['latitude'][:] +ds.close() + +ny, nx = lon2d.shape +n_nodes = ny * nx +n_elems = (ny - 1) * (nx - 1) + +out = Dataset('${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc', 'w') +out.createDimension('nodeCount', n_nodes) +out.createDimension('elementCount', n_elems) +out.createDimension('maxNodePElement', 4) +out.createDimension('coordDim', 2) + +nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) +nodeCoords.units = 'degrees' +coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) +nodeCoords[:] = coords + +elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) +elemConn.long_name = 'Node indices that define the element connectivity' +elemConn.start_index = 1 +conn = np.zeros((n_elems, 4), dtype=np.int32) +idx = 0 +for j in range(ny - 1): + for i in range(nx - 1): + n0 = j * nx + i + 1 + conn[idx] = [n0, n0 + 1, n0 + nx + 1, n0 + nx] + idx += 1 +elemConn[:] = conn + +numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) +numElemConn[:] = 4 + +centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) +centerCoords.units = 'degrees' +clon = 0.25 * (coords[conn[:, 0]-1, 0] + coords[conn[:, 1]-1, 0] + coords[conn[:, 2]-1, 0] + coords[conn[:, 3]-1, 0]) +clat = 0.25 * (coords[conn[:, 0]-1, 1] + coords[conn[:, 1]-1, 1] + coords[conn[:, 2]-1, 1] + coords[conn[:, 3]-1, 1]) +centerCoords[:] = np.column_stack([clon, clat]) + +out.title = 'ESMF unstructured mesh for DATM forcing' +out.gridType = 'unstructured mesh' +out.close() +print('Generated ESMF mesh: ${mem_nx}x${mem_ny} = {} nodes, {} elements'.format(n_nodes, n_elems)) +" 2>&1 + if [ $? -ne 0 ]; then + echo "WARNING: ESMF mesh regeneration failed — model may crash" >&2 + fi + fi + fi fi fi From cd98e57f17dcbd2d0c27dd7cc8d7050f8e2b3d49 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 22:44:47 -0500 Subject: [PATCH 07/40] Fix ESMF mesh race condition: pre-generate once before cfp ESMF_Scrip2Unstruct fails when multiple cfp ranks run it concurrently (only rank 0 succeeds, ranks 1-4 produce empty mesh files). Fix: - Atmos prep: Generate GEFS 0.25-deg ESMF mesh once via Python/netCDF4 before cfp launches. Pass pre-generated mesh to all wrappers via DATM_ESMF_MESH env var. - Blended script: Check DATM_ESMF_MESH first in Step 4 priority chain, skipping ESMF_Scrip2Unstruct entirely when pre-generated mesh exists. - Remove broken heredoc attempt, keep only the python3 -c version. --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 67 +++++++++++++++++++ .../nos_ofs_create_datm_forcing_blended.sh | 6 +- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index ba5b705..e09771f 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -596,6 +596,71 @@ else # wgrib2 path export WGRIB2=${WGRIB2:-$(which wgrib2 2>/dev/null || echo wgrib2)} + # --------------------------------------------------------------- + # Pre-generate ESMF mesh for the GEFS 0.25-degree grid ONCE. + # All GEFS members share the same grid, so generate from the + # first available GEFS extraction sample using Python (not + # ESMF_Scrip2Unstruct which fails under parallel cfp). + # --------------------------------------------------------------- + GEFS_MESH=${DATA}/datm_ensemble/gefs_esmf_mesh.nc + mkdir -p ${DATA}/datm_ensemble + echo "Pre-generating ESMF mesh for GEFS 0.25-degree grid..." + python3 -c " +import numpy as np +from netCDF4 import Dataset + +lons = np.arange(0, 360, 0.25) +lats = np.arange(-90, 90.25, 0.25) +nx, ny = len(lons), len(lats) +lon2d, lat2d = np.meshgrid(lons, lats) + +n_nodes = ny * nx +n_elems = (ny - 1) * (nx - 1) + +out = Dataset('${GEFS_MESH}', 'w') +out.createDimension('nodeCount', n_nodes) +out.createDimension('elementCount', n_elems) +out.createDimension('maxNodePElement', 4) +out.createDimension('coordDim', 2) + +nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) +nodeCoords.units = 'degrees' +coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) +nodeCoords[:] = coords + +elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) +elemConn.long_name = 'Node indices that define the element connectivity' +elemConn.start_index = 1 +conn = np.zeros((n_elems, 4), dtype=np.int32) +idx = 0 +for j in range(ny - 1): + for i in range(nx - 1): + n0 = j * nx + i + 1 + conn[idx] = [n0, n0 + 1, n0 + nx + 1, n0 + nx] + idx += 1 +elemConn[:] = conn + +numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) +numElemConn[:] = 4 + +centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) +centerCoords.units = 'degrees' +clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) +clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) +centerCoords[:] = np.column_stack([clon, clat]) + +out.title = 'ESMF mesh for GEFS 0.25-degree global grid' +out.gridType = 'unstructured mesh' +out.close() +print('Generated GEFS ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) +" + rc=$? + if [ $rc -ne 0 ] || [ ! -s "${GEFS_MESH}" ]; then + echo "ERROR: Failed to pre-generate GEFS ESMF mesh" >&2 + exit 1 + fi + echo "Pre-generated GEFS ESMF mesh: ${GEFS_MESH}" + # Build cfp command file for parallel DATM forcing generation DATM_CMDFILE=${DATA}/datm_ensemble_cmdfile cat /dev/null > ${DATM_CMDFILE} @@ -619,6 +684,7 @@ export DATM_PRIMARY_SOURCE=${GEFS_SOURCE} export DATM_BLEND_HRRR_GFS=false export DATM_OUTPUT_SUFFIX=${MEM_SUFFIX} export DATM_SKIP_UFS_CONFIG=true +export DATM_ESMF_MESH=${GEFS_MESH} mkdir -p \${DATA}/INPUT ${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} rc=\$? @@ -661,6 +727,7 @@ export DATM_PRIMARY_SOURCE=GFS25 export DATM_BLEND_HRRR_GFS=false export DATM_OUTPUT_SUFFIX=rrfs export DATM_SKIP_UFS_CONFIG=true +export DATM_ESMF_MESH=${GEFS_MESH} mkdir -p \${DATA}/INPUT ${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} rc=\$? diff --git a/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh b/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh index 548752a..f6e8ede 100644 --- a/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh +++ b/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh @@ -269,7 +269,11 @@ echo "============================================" DATM_MESH_FILE=datm_esmf_mesh.nc -if [ -s "${BLEND_DIR}/datm_forcing_esmf_mesh.nc" ]; then +if [ -s "${DATM_ESMF_MESH:-}" ]; then + # Pre-generated mesh passed via env var (e.g., ensemble atmos prep) + cp -p ${DATM_ESMF_MESH} ${BLEND_DIR}/${DATM_MESH_FILE} + echo "Using pre-generated ESMF mesh: ${DATM_ESMF_MESH}" +elif [ -s "${BLEND_DIR}/datm_forcing_esmf_mesh.nc" ]; then # Blend script created the mesh cp -p ${BLEND_DIR}/datm_forcing_esmf_mesh.nc ${BLEND_DIR}/${DATM_MESH_FILE} echo "Using blended ESMF mesh" From 0c1f4b4996b0a024b2bf71d72d20e7d4751fc7b3 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 23:30:03 -0500 Subject: [PATCH 08/40] Fix ESMF mesh: vectorize generation, check staged mesh not control mesh Two issues causing member failures: 1. Mesh mismatch check compared forcing dims against the control's mesh nodeCount. The control mesh (from ESMF_Scrip2Unstruct) has 2,965,284 nodes while the blended forcing is 1721x1721=2,961,841. This triggered unnecessary mesh regeneration for ALL members including control. Fix: compare against the member's own staged mesh instead. 2. Python for-loop for element connectivity was slow for ~3M elements. Replace with vectorized numpy (np.mgrid + column_stack) in both the atmos prep and ensemble run mesh generators. --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 12 +++---- ush/nos_ofs_ensemble_run.sh | 55 +++++++++++++++---------------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index e09771f..85c79ff 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -628,16 +628,14 @@ nodeCoords.units = 'degrees' coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) nodeCoords[:] = coords +# Vectorized element connectivity (no Python for-loop) +j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] +n0 = (j_idx * nx + i_idx + 1).ravel() +conn = np.column_stack([n0, n0 + 1, n0 + nx + 1, n0 + nx]).astype(np.int32) + elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) elemConn.long_name = 'Node indices that define the element connectivity' elemConn.start_index = 1 -conn = np.zeros((n_elems, 4), dtype=np.int32) -idx = 0 -for j in range(ny - 1): - for i in range(nx - 1): - n0 = j * nx + i + 1 - conn[idx] = [n0, n0 + 1, n0 + nx + 1, n0 + nx] - idx += 1 elemConn[:] = conn numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 24e6e93..32ea328 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -486,24 +486,25 @@ ds.close() sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${mem_ny}/" ${MEMBER_DATA}/datm_in echo "Patched datm_in: nx_global=${mem_nx}, ny_global=${mem_ny}" - # If grid dims differ from control, regenerate ESMF mesh - # Control mesh is built for blended grid (e.g., 1721x1721) - # GEFS raw forcing is on global 0.25 deg grid (1440x721) - local ctrl_mesh="${COMOUT}/${RUN}.${cycle}.datm_input/datm_esmf_mesh.nc" - if [ -s "$ctrl_mesh" ]; then - local ctrl_dims=$(python3 -c " + # Verify ESMF mesh matches forcing dims. If the member's + # datm_input dir already had a matching mesh (e.g., from gefs_prep), + # no regeneration needed. Only regenerate if mesh node count != nx*ny. + local mem_total=$((mem_nx * mem_ny)) + local staged_mesh="${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" + local mesh_nodes="0" + if [ -s "$staged_mesh" ]; then + mesh_nodes=$(python3 -c " from netCDF4 import Dataset -ds = Dataset('${ctrl_mesh}', 'r') -# ESMF mesh nodeCount should match nx*ny -nc = len(ds.dimensions.get('nodeCount', ds.dimensions.get('node_count', []))) -print(nc) +ds = Dataset('${staged_mesh}', 'r') +print(len(ds.dimensions.get('nodeCount', ds.dimensions.get('node_count', [])))) ds.close() " 2>/dev/null || echo "0") - local mem_total=$((mem_nx * mem_ny)) - if [ "${ctrl_dims}" != "${mem_total}" ] && [ "${ctrl_dims}" != "0" ]; then - echo "ESMF mesh mismatch: control=${ctrl_dims} nodes, member=${mem_total} (${mem_nx}x${mem_ny})" - echo "Regenerating ESMF mesh from member forcing file..." - python3 -c " + fi + + if [ "${mesh_nodes}" != "${mem_total}" ]; then + echo "ESMF mesh mismatch: mesh=${mesh_nodes} nodes, forcing=${mem_total} (${mem_nx}x${mem_ny})" + echo "Regenerating ESMF mesh (vectorized)..." + python3 -c " from netCDF4 import Dataset import numpy as np @@ -511,7 +512,6 @@ ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') try: lons = ds.variables['longitude'][:] lats = ds.variables['latitude'][:] - # 1D coordinate arrays -> 2D meshgrid lon2d, lat2d = np.meshgrid(lons, lats) except: lon2d = ds.variables['longitude'][:] @@ -533,16 +533,14 @@ nodeCoords.units = 'degrees' coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) nodeCoords[:] = coords +# Vectorized element connectivity (no Python for-loop) +j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] +n0 = (j_idx * nx + i_idx + 1).ravel() +conn = np.column_stack([n0, n0 + 1, n0 + nx + 1, n0 + nx]).astype(np.int32) + elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) elemConn.long_name = 'Node indices that define the element connectivity' elemConn.start_index = 1 -conn = np.zeros((n_elems, 4), dtype=np.int32) -idx = 0 -for j in range(ny - 1): - for i in range(nx - 1): - n0 = j * nx + i + 1 - conn[idx] = [n0, n0 + 1, n0 + nx + 1, n0 + nx] - idx += 1 elemConn[:] = conn numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) @@ -550,8 +548,8 @@ numElemConn[:] = 4 centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) centerCoords.units = 'degrees' -clon = 0.25 * (coords[conn[:, 0]-1, 0] + coords[conn[:, 1]-1, 0] + coords[conn[:, 2]-1, 0] + coords[conn[:, 3]-1, 0]) -clat = 0.25 * (coords[conn[:, 0]-1, 1] + coords[conn[:, 1]-1, 1] + coords[conn[:, 2]-1, 1] + coords[conn[:, 3]-1, 1]) +clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) +clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) centerCoords[:] = np.column_stack([clon, clat]) out.title = 'ESMF unstructured mesh for DATM forcing' @@ -559,10 +557,11 @@ out.gridType = 'unstructured mesh' out.close() print('Generated ESMF mesh: ${mem_nx}x${mem_ny} = {} nodes, {} elements'.format(n_nodes, n_elems)) " 2>&1 - if [ $? -ne 0 ]; then - echo "WARNING: ESMF mesh regeneration failed — model may crash" >&2 - fi + if [ $? -ne 0 ]; then + echo "WARNING: ESMF mesh regeneration failed — model may crash" >&2 fi + else + echo "ESMF mesh OK: ${mesh_nodes} nodes matches forcing ${mem_nx}x${mem_ny}" fi fi fi From 5347c9b28ad62089286a06924dc5887f419148c8 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 28 Feb 2026 23:50:38 -0500 Subject: [PATCH 09/40] Fix UFS ensemble crash: force OMP_NUM_THREADS=1, pre-fix control ESMF mesh Two fixes for UFS-Coastal ensemble failures: 1. Force OMP_NUM_THREADS=1 unconditionally in ensemble run. Cray PBS pre-sets OMP_NUM_THREADS to ncpus (128), causing massive thread oversubscription with 120 MPI ranks per node. All members (001-005) crashed at timestep 1 due to this. The deterministic model_run.sh already forces OMP_NUM_THREADS=1; ensemble was using conditional default which didn't override the PBS value. 2. Pre-generate correct control ESMF mesh during atmos prep. The det prep's ESMF_Scrip2Unstruct produces a 1723x1723 mesh (2,965,284 nodes) but the blended forcing is 1721x1721 (2,961,841 points). Member 000 was spending time regenerating the mesh at runtime. Now atmos prep checks and fixes the mesh before members launch. --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 94 +++++++++++++++++++++++++++++++ ush/nos_ofs_ensemble_run.sh | 6 +- 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index 85c79ff..f971862 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -659,6 +659,100 @@ print('Generated GEFS ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n fi echo "Pre-generated GEFS ESMF mesh: ${GEFS_MESH}" + # --------------------------------------------------------------- + # Fix control (det prep) ESMF mesh if it doesn't match forcing dims. + # ESMF_Scrip2Unstruct sometimes produces meshes with different node + # counts than the forcing grid (e.g., 1723x1723 for a 1721x1721 grid). + # Pre-generate the correct mesh here so member 000 never needs to + # regenerate at runtime. + # --------------------------------------------------------------- + CTRL_DATM_DIR=${COMOUT}/${RUN}.${cycle}.datm_input + if [ -s "${CTRL_DATM_DIR}/datm_forcing.nc" ]; then + echo "Checking control ESMF mesh against forcing dims..." + python3 -c " +import numpy as np +from netCDF4 import Dataset + +# Read forcing grid dims +ds = Dataset('${CTRL_DATM_DIR}/datm_forcing.nc', 'r') +try: + nx = len(ds.dimensions['x']) + ny = len(ds.dimensions['y']) + lons = ds.variables['longitude'][:] + lats = ds.variables['latitude'][:] + if lons.ndim == 1: + lon2d, lat2d = np.meshgrid(lons, lats) + else: + lon2d, lat2d = lons, lats +except: + nx = len(ds.dimensions['longitude']) + ny = len(ds.dimensions['latitude']) + lons = ds.variables['longitude'][:] + lats = ds.variables['latitude'][:] + lon2d, lat2d = np.meshgrid(lons, lats) +ds.close() + +forcing_total = nx * ny + +# Check existing mesh +mesh_path = '${CTRL_DATM_DIR}/datm_esmf_mesh.nc' +try: + mds = Dataset(mesh_path, 'r') + mesh_nodes = len(mds.dimensions.get('nodeCount', mds.dimensions.get('node_count', []))) + mds.close() +except: + mesh_nodes = 0 + +if mesh_nodes == forcing_total: + print('Control ESMF mesh OK: {} nodes matches forcing {}x{}'.format(mesh_nodes, nx, ny)) +else: + print('Control ESMF mesh mismatch: mesh={} vs forcing={}x{}={}'.format(mesh_nodes, nx, ny, forcing_total)) + print('Regenerating control ESMF mesh...') + n_nodes = ny * nx + n_elems = (ny - 1) * (nx - 1) + + out = Dataset(mesh_path, 'w') + out.createDimension('nodeCount', n_nodes) + out.createDimension('elementCount', n_elems) + out.createDimension('maxNodePElement', 4) + out.createDimension('coordDim', 2) + + nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) + nodeCoords.units = 'degrees' + coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) + nodeCoords[:] = coords + + j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] + n0 = (j_idx * nx + i_idx + 1).ravel() + conn = np.column_stack([n0, n0+1, n0+nx+1, n0+nx]).astype(np.int32) + + elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) + elemConn.long_name = 'Node indices that define the element connectivity' + elemConn.start_index = 1 + elemConn[:] = conn + + numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) + numElemConn[:] = 4 + + centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) + centerCoords.units = 'degrees' + clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) + clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) + centerCoords[:] = np.column_stack([clon, clat]) + + out.title = 'ESMF mesh for control DATM forcing (corrected)' + out.gridType = 'unstructured mesh' + out.close() + print('Generated control ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) +" 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + echo "WARNING: Control ESMF mesh check/fix failed (rc=$rc)" >&2 + fi + else + echo "WARNING: Control datm_forcing.nc not found at ${CTRL_DATM_DIR} — mesh not checked" + fi + # Build cfp command file for parallel DATM forcing generation DATM_CMDFILE=${DATA}/datm_ensemble_cmdfile cat /dev/null > ${DATM_CMDFILE} diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 32ea328..bb70e01 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -624,8 +624,10 @@ print('Generated ESMF mesh: ${mem_nx}x${mem_ny} = {} nodes, {} elements'.format( # --- Set UFS-Coastal runtime environment --- export OMP_STACKSIZE=${OMP_STACKSIZE:-512M} - export OMP_NUM_THREADS=${OMP_NUM_THREADS:-1} - export OMP_PLACES=${OMP_PLACES:-cores} + # Force OMP_NUM_THREADS=1 — Cray PBS sets it to ncpus (128) which + # causes massive oversubscription with 120 MPI ranks per node + export OMP_NUM_THREADS=1 + export OMP_PLACES=cores export ESMF_RUNTIME_COMPLIANCECHECK=OFF:depth=4 export ESMF_RUNTIME_PROFILE=ON export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY" From 0d8dff23b06c65b2727b7566882ac52bf7c755ee Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 08:35:50 -0500 Subject: [PATCH 10/40] Patch datm.streams per member to match actual forcing variables GEFS pgrb2sp25 product lacks SPFH_2maboveground and PRATE_surface, and uses PRMSL_meansealevel instead of MSLMA_meansealevel. The datm.streams template references all 8 variables from the GFS+HRRR blended control, causing DATM to crash at the first coupled timestep when variables aren't found in the forcing file. After staging datm.streams and datm_forcing.nc, read actual variable names from the forcing NetCDF and rebuild stream_data_variables01 to only include variables that exist. Maps both PRMSL and MSLMA to the same CMEPS field (Sa_pslv). --- ush/nos_ofs_ensemble_run.sh | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index bb70e01..bb5a419 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -566,6 +566,44 @@ print('Generated ESMF mesh: ${mem_nx}x${mem_ny} = {} nodes, {} elements'.format( fi fi + # Patch datm.streams to match actual forcing file variables. + # The template may reference variables (MSLMA, SPFH, PRATE) that + # don't exist in GEFS pgrb2sp25 forcing, or use MSLMA where the + # forcing has PRMSL. Build the stream_data_variables line from + # actual file contents. + if [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ] && [ -s "${MEMBER_DATA}/datm.streams" ]; then + echo "Patching datm.streams to match forcing variables..." + local new_vars=$(python3 -c " +from netCDF4 import Dataset +ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') +# Map forcing variable names to CMEPS field names +var_map = { + 'UGRD_10maboveground': 'Sa_u10m', + 'VGRD_10maboveground': 'Sa_v10m', + 'MSLMA_meansealevel': 'Sa_pslv', + 'PRMSL_meansealevel': 'Sa_pslv', + 'TMP_2maboveground': 'Sa_t2m', + 'SPFH_2maboveground': 'Sa_q2m', + 'DSWRF_surface': 'Faxa_swnet', + 'DLWRF_surface': 'Faxa_lwdn', + 'PRATE_surface': 'Faxa_rain', +} +parts = [] +for vname in ds.variables: + if vname in var_map: + parts.append('\"{} {}\"'.format(vname, var_map[vname])) +ds.close() +print(' '.join(parts)) +" 2>/dev/null || echo "") + if [ -n "$new_vars" ]; then + # Replace the stream_data_variables01 line + sed -i "s|^stream_data_variables01:.*|stream_data_variables01: ${new_vars}|" ${MEMBER_DATA}/datm.streams + echo "Patched datm.streams variables: ${new_vars}" + else + echo "WARNING: Could not read forcing variables — datm.streams unchanged" + fi + fi + # Patch model_configure for ensemble forecast: start time + duration local nhours=${LEN_FORECAST:-48} From 93faaff80f7b24d966cc1a5cde4004e2f2548d65 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 08:54:45 -0500 Subject: [PATCH 11/40] Add elementMask to Python-generated ESMF mesh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ESMF_Scrip2Unstruct (used by det prep) includes elementMask in the mesh file, but our Python mesh generator did not. DATM's InitializeRealize calls ESMF_MeshGet which requires element mask info. Without it: "mesh doesn't contain element mask information" → crash during ATM component initialization. Add elementMask (all zeros = unmasked) to all three Python mesh generation sites: GEFS mesh, control mesh (atmos prep), and runtime mesh regeneration (ensemble run). --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 3 +++ ush/nos_ofs_ensemble_run.sh | 3 +++ 2 files changed, 6 insertions(+) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index f971862..c886fb7 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -641,6 +641,9 @@ elemConn[:] = conn numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) numElemConn[:] = 4 +elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) +elementMask[:] = np.zeros(n_elems, dtype=np.int32) + centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) centerCoords.units = 'degrees' clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index bb5a419..99a38b8 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -546,6 +546,9 @@ elemConn[:] = conn numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) numElemConn[:] = 4 +elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) +elementMask[:] = np.zeros(n_elems, dtype=np.int32) + centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) centerCoords.units = 'degrees' clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) From bebedb714b5aa356e6d3a25317543298fd522666 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 09:25:32 -0500 Subject: [PATCH 12/40] Fix control member 000 elementMask: check mesh completeness not just nodeCount The runtime ESMF mesh validation only checked nodeCount, so a mesh with correct dimensions but missing elementMask (generated before the elementMask fix) was accepted as-is. Member 000 (GFS+HRRR control) used such a stale mesh and crashed with "mesh doesn't contain element mask information." Now also verify elementMask variable exists before skipping regeneration. --- ush/nos_ofs_ensemble_run.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 99a38b8..222117f 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -501,8 +501,20 @@ ds.close() " 2>/dev/null || echo "0") fi - if [ "${mesh_nodes}" != "${mem_total}" ]; then - echo "ESMF mesh mismatch: mesh=${mesh_nodes} nodes, forcing=${mem_total} (${mem_nx}x${mem_ny})" + # Also check elementMask exists — meshes generated before the + # elementMask fix will pass the nodeCount check but still crash. + local has_emask="false" + if [ -s "$staged_mesh" ] && [ "${mesh_nodes}" = "${mem_total}" ]; then + has_emask=$(python3 -c " +from netCDF4 import Dataset +ds = Dataset('${staged_mesh}', 'r') +print('true' if 'elementMask' in ds.variables else 'false') +ds.close() +" 2>/dev/null || echo "false") + fi + + if [ "${mesh_nodes}" != "${mem_total}" ] || [ "${has_emask}" != "true" ]; then + echo "ESMF mesh needs regeneration: nodes=${mesh_nodes}/${mem_total}, elementMask=${has_emask}" echo "Regenerating ESMF mesh (vectorized)..." python3 -c " from netCDF4 import Dataset From 7be0133b458c62ed6b1c980a70e0c5034f868139 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 09:52:29 -0500 Subject: [PATCH 13/40] Fix control mesh OOM: patch elementMask in-place instead of full regeneration The 1721x1721 control mesh (3M nodes) full regeneration was killed by OOM (exit 137) on the PBS launch node, leaving the old mesh without elementMask. Split the fix into two paths: full regeneration only when nodeCount mismatches, lightweight in-place append of elementMask when the mesh geometry is correct but the variable is missing. --- ush/nos_ofs_ensemble_run.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 222117f..8a718bd 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -513,8 +513,9 @@ ds.close() " 2>/dev/null || echo "false") fi - if [ "${mesh_nodes}" != "${mem_total}" ] || [ "${has_emask}" != "true" ]; then - echo "ESMF mesh needs regeneration: nodes=${mesh_nodes}/${mem_total}, elementMask=${has_emask}" + if [ "${mesh_nodes}" != "${mem_total}" ]; then + # Node count mismatch — full regeneration needed + echo "ESMF mesh node mismatch: mesh=${mesh_nodes}, forcing=${mem_total} (${mem_nx}x${mem_ny})" echo "Regenerating ESMF mesh (vectorized)..." python3 -c " from netCDF4 import Dataset @@ -575,6 +576,22 @@ print('Generated ESMF mesh: ${mem_nx}x${mem_ny} = {} nodes, {} elements'.format( if [ $? -ne 0 ]; then echo "WARNING: ESMF mesh regeneration failed — model may crash" >&2 fi + elif [ "${has_emask}" != "true" ]; then + # Node count OK but elementMask missing — lightweight in-place patch + echo "ESMF mesh missing elementMask — patching in-place..." + python3 -c " +from netCDF4 import Dataset +import numpy as np +ds = Dataset('${staged_mesh}', 'a') +n_elems = len(ds.dimensions['elementCount']) +em = ds.createVariable('elementMask', 'i4', ('elementCount',)) +em[:] = np.zeros(n_elems, dtype=np.int32) +ds.close() +print('Added elementMask ({} elements) to existing mesh'.format(n_elems)) +" 2>&1 + if [ $? -ne 0 ]; then + echo "WARNING: elementMask patch failed — model may crash" >&2 + fi else echo "ESMF mesh OK: ${mesh_nodes} nodes matches forcing ${mem_nx}x${mem_ny}" fi From 25a3ca7e911b9b48b12fdf6e21b30f508e7c8a2f Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 11:06:11 -0500 Subject: [PATCH 14/40] Fix wrong CMEPS field names in datm.streams template and ensemble patching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The datm.streams template and ensemble var_map used incorrect CMEPS field names that don't match the SCHISM NUOPC cap interface: - Sa_t2m → Sa_tbot (temperature) - Sa_q2m → Sa_shum (specific humidity) - Faxa_swnet → Faxa_swdn (shortwave radiation) The working deterministic scripts (modify_gfs_4_esmfmesh.py) already used the correct names. The wrong names caused SCHISM to receive zero values for all atmospheric fields including wind, resulting in zero staout_2/3/4 (pressure, wind_u, wind_v) in ensemble runs. --- fix/secofs_ufs/datm.streams.template | 2 +- ush/nos_ofs_ensemble_run.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fix/secofs_ufs/datm.streams.template b/fix/secofs_ufs/datm.streams.template index c4b96c3..3f5807c 100644 --- a/fix/secofs_ufs/datm.streams.template +++ b/fix/secofs_ufs/datm.streams.template @@ -12,4 +12,4 @@ stream_vectors01: null stream_mesh_file01: "@[DATM_INPUT_DIR]/@[DATM_MESH_FILE]" stream_lev_dimname01: null stream_data_files01: "@[DATM_INPUT_DIR]/@[DATM_FORCING_FILE]" -stream_data_variables01: "UGRD_10maboveground Sa_u10m" "VGRD_10maboveground Sa_v10m" "MSLMA_meansealevel Sa_pslv" "TMP_2maboveground Sa_t2m" "SPFH_2maboveground Sa_q2m" "DSWRF_surface Faxa_swnet" "DLWRF_surface Faxa_lwdn" "PRATE_surface Faxa_rain" +stream_data_variables01: "UGRD_10maboveground Sa_u10m" "VGRD_10maboveground Sa_v10m" "MSLMA_meansealevel Sa_pslv" "TMP_2maboveground Sa_tbot" "SPFH_2maboveground Sa_shum" "DSWRF_surface Faxa_swdn" "DLWRF_surface Faxa_lwdn" "PRATE_surface Faxa_rain" diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 8a718bd..c1b152e 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -614,9 +614,9 @@ var_map = { 'VGRD_10maboveground': 'Sa_v10m', 'MSLMA_meansealevel': 'Sa_pslv', 'PRMSL_meansealevel': 'Sa_pslv', - 'TMP_2maboveground': 'Sa_t2m', - 'SPFH_2maboveground': 'Sa_q2m', - 'DSWRF_surface': 'Faxa_swnet', + 'TMP_2maboveground': 'Sa_tbot', + 'SPFH_2maboveground': 'Sa_shum', + 'DSWRF_surface': 'Faxa_swdn', 'DLWRF_surface': 'Faxa_lwdn', 'PRATE_surface': 'Faxa_rain', } From 9c9084285d5ab1984d7a9287b4c34f453a823b81 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 11:33:46 -0500 Subject: [PATCH 15/40] Fix ensemble wind: add missing GEFS vars instead of stripping datm.streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts the wrong CMEPS field name change (Sa_tbot→Sa_t2m etc) — the deterministic run confirms Sa_t2m/Sa_q2m/Faxa_swnet are correct. Root cause: the ensemble patching code stripped SPFH/PRATE from datm.streams and renamed MSLMA→PRMSL, making the config differ from the working deterministic (which has all 8 variables). This caused SCHISM NUOPC cap to not fully initialize atmospheric coupling, resulting in zero wind/pressure in staout. Fix: instead of stripping variables from datm.streams, add missing SPFH (zeros) and PRATE (zeros) to the forcing NetCDF and rename PRMSL→MSLMA, so the ensemble datm.streams stays identical to the deterministic with all 8 fields. --- fix/secofs_ufs/datm.streams.template | 2 +- ush/nos_ofs_ensemble_run.sh | 80 ++++++++++++++++------------ 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/fix/secofs_ufs/datm.streams.template b/fix/secofs_ufs/datm.streams.template index 3f5807c..c4b96c3 100644 --- a/fix/secofs_ufs/datm.streams.template +++ b/fix/secofs_ufs/datm.streams.template @@ -12,4 +12,4 @@ stream_vectors01: null stream_mesh_file01: "@[DATM_INPUT_DIR]/@[DATM_MESH_FILE]" stream_lev_dimname01: null stream_data_files01: "@[DATM_INPUT_DIR]/@[DATM_FORCING_FILE]" -stream_data_variables01: "UGRD_10maboveground Sa_u10m" "VGRD_10maboveground Sa_v10m" "MSLMA_meansealevel Sa_pslv" "TMP_2maboveground Sa_tbot" "SPFH_2maboveground Sa_shum" "DSWRF_surface Faxa_swdn" "DLWRF_surface Faxa_lwdn" "PRATE_surface Faxa_rain" +stream_data_variables01: "UGRD_10maboveground Sa_u10m" "VGRD_10maboveground Sa_v10m" "MSLMA_meansealevel Sa_pslv" "TMP_2maboveground Sa_t2m" "SPFH_2maboveground Sa_q2m" "DSWRF_surface Faxa_swnet" "DLWRF_surface Faxa_lwdn" "PRATE_surface Faxa_rain" diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index c1b152e..6394c4e 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -598,42 +598,54 @@ print('Added elementMask ({} elements) to existing mesh'.format(n_elems)) fi fi - # Patch datm.streams to match actual forcing file variables. - # The template may reference variables (MSLMA, SPFH, PRATE) that - # don't exist in GEFS pgrb2sp25 forcing, or use MSLMA where the - # forcing has PRMSL. Build the stream_data_variables line from - # actual file contents. - if [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ] && [ -s "${MEMBER_DATA}/datm.streams" ]; then - echo "Patching datm.streams to match forcing variables..." - local new_vars=$(python3 -c " + # Ensure forcing NetCDF has all 8 variables expected by datm.streams. + # GEFS pgrb2sp25 lacks SPFH_2maboveground, PRATE_surface, and uses + # PRMSL_meansealevel instead of MSLMA_meansealevel. Rather than + # stripping variables from datm.streams (which breaks SCHISM's + # atmospheric coupling), add missing variables as zeros and rename + # PRMSL→MSLMA so the config matches the working deterministic exactly. + if [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ]; then + echo "Checking forcing NetCDF for required variables..." + python3 -c " from netCDF4 import Dataset -ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') -# Map forcing variable names to CMEPS field names -var_map = { - 'UGRD_10maboveground': 'Sa_u10m', - 'VGRD_10maboveground': 'Sa_v10m', - 'MSLMA_meansealevel': 'Sa_pslv', - 'PRMSL_meansealevel': 'Sa_pslv', - 'TMP_2maboveground': 'Sa_tbot', - 'SPFH_2maboveground': 'Sa_shum', - 'DSWRF_surface': 'Faxa_swdn', - 'DLWRF_surface': 'Faxa_lwdn', - 'PRATE_surface': 'Faxa_rain', -} -parts = [] -for vname in ds.variables: - if vname in var_map: - parts.append('\"{} {}\"'.format(vname, var_map[vname])) +import numpy as np + +ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'a') +vnames = list(ds.variables.keys()) +time_dim = 'time' +lat_dim = [d for d in ds.dimensions if d in ('latitude','lat')][0] +lon_dim = [d for d in ds.dimensions if d in ('longitude','lon')][0] +shape_3d = (len(ds.dimensions[time_dim]), len(ds.dimensions[lat_dim]), len(ds.dimensions[lon_dim])) +changed = False + +# Rename PRMSL → MSLMA if needed +if 'PRMSL_meansealevel' in vnames and 'MSLMA_meansealevel' not in vnames: + ds.renameVariable('PRMSL_meansealevel', 'MSLMA_meansealevel') + print('Renamed PRMSL_meansealevel → MSLMA_meansealevel') + changed = True + +# Add missing SPFH as zeros +if 'SPFH_2maboveground' not in vnames: + v = ds.createVariable('SPFH_2maboveground', 'f4', (time_dim, lat_dim, lon_dim)) + v.long_name = '2m specific humidity' + v.units = 'kg/kg' + v[:] = np.zeros(shape_3d, dtype=np.float32) + print('Added SPFH_2maboveground (zeros) — GEFS pgrb2sp25 lacks this field') + changed = True + +# Add missing PRATE as zeros +if 'PRATE_surface' not in vnames: + v = ds.createVariable('PRATE_surface', 'f4', (time_dim, lat_dim, lon_dim)) + v.long_name = 'surface precipitation rate' + v.units = 'kg/m2/s' + v[:] = np.zeros(shape_3d, dtype=np.float32) + print('Added PRATE_surface (zeros) — GEFS pgrb2sp25 lacks this field') + changed = True + ds.close() -print(' '.join(parts)) -" 2>/dev/null || echo "") - if [ -n "$new_vars" ]; then - # Replace the stream_data_variables01 line - sed -i "s|^stream_data_variables01:.*|stream_data_variables01: ${new_vars}|" ${MEMBER_DATA}/datm.streams - echo "Patched datm.streams variables: ${new_vars}" - else - echo "WARNING: Could not read forcing variables — datm.streams unchanged" - fi +if not changed: + print('All 8 required variables present — no changes needed') +" 2>&1 fi # Patch model_configure for ensemble forecast: start time + duration From 65c6843a75bdcaf9bf3ff1a64d85619e5b209d24 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 13:32:59 -0500 Subject: [PATCH 16/40] Fix ESMF elementMask: use 1 (active) not 0 (masked out) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CMEPS uses srcMaskValues=(/0/) for ATM→OCN regridding, meaning elements with mask=0 are EXCLUDED from bilinear interpolation. The ensemble mesh code was setting elementMask to all zeros, causing every DATM element to be masked out. The mediator then produced zeros for all fields sent to SCHISM — confirmed by PET logs showing DATM sends real wind (Sa_u10m: -12.8 to 2.0) but SCHISM receives all zeros (inst_zonal_wind_height10m: 0.0). The DET mesh (from ESMF_Scrip2Unstruct) works because it has NO elementMask variable — ESMF defaults to all elements active. Fix: set elementMask=1 (active) in both full mesh regeneration and the lightweight in-place patch paths. --- ush/nos_ofs_ensemble_run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 6394c4e..247c19d 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -560,7 +560,7 @@ numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) numElemConn[:] = 4 elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) -elementMask[:] = np.zeros(n_elems, dtype=np.int32) +elementMask[:] = np.ones(n_elems, dtype=np.int32) centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) centerCoords.units = 'degrees' @@ -585,7 +585,7 @@ import numpy as np ds = Dataset('${staged_mesh}', 'a') n_elems = len(ds.dimensions['elementCount']) em = ds.createVariable('elementMask', 'i4', ('elementCount',)) -em[:] = np.zeros(n_elems, dtype=np.int32) +em[:] = np.ones(n_elems, dtype=np.int32) ds.close() print('Added elementMask ({} elements) to existing mesh'.format(n_elems)) " 2>&1 From f1dad2af8355f307151bf24af094ca1ccee85b23 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 1 Mar 2026 16:50:48 -0500 Subject: [PATCH 17/40] Fix elementMask=0 in ATMOS_PREP GEFS mesh generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix (65c6843) only fixed elementMask in the member job's mesh regeneration/patching code. But the actual mesh used by GEFS members is pre-generated in JNOS_OFS_ENSEMBLE_ATMOS_PREP and archived to $COMOUT/datm_input_gefs_NN/. The member job sees "ESMF mesh OK" (correct node count + elementMask exists) and uses the ATMOS_PREP mesh as-is — with elementMask=0 (all masked out). Fix: set elementMask=1 in both the GEFS global mesh generator (line 645) and the control mesh regenerator (line 740, also add elementMask which was previously missing). --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index c886fb7..ef56a06 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -642,7 +642,7 @@ numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) numElemConn[:] = 4 elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) -elementMask[:] = np.zeros(n_elems, dtype=np.int32) +elementMask[:] = np.ones(n_elems, dtype=np.int32) centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) centerCoords.units = 'degrees' @@ -737,6 +737,9 @@ else: numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) numElemConn[:] = 4 + elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) + elementMask[:] = np.ones(n_elems, dtype=np.int32) + centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) centerCoords.units = 'degrees' clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) From be48a1951f02b3bb9979285b0534548791cf21cd Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Fri, 13 Mar 2026 02:59:11 -0400 Subject: [PATCH 18/40] Fix GEFS ensemble mesh: use corrected control mesh for all cfp members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pre-generated GEFS ESMF mesh used a global 0.25° grid (1440×721 = 1,038,240 nodes), but the actual GEFS forcing is extracted to the SECOFS subdomain (1721×1721 = 2,961,841 nodes). The node count mismatch caused nos_ofs_create_datm_forcing_blended.sh to fall through to ESMF_Scrip2Unstruct, and 4 parallel cfp tasks racing that tool caused members 02-04 to fail. Fix: after regenerating the control mesh with correct dimensions, copy it to GEFS_MESH so all cfp wrappers inherit the corrected mesh. --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index ef56a06..ed7df92 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -755,6 +755,13 @@ else: if [ $rc -ne 0 ]; then echo "WARNING: Control ESMF mesh check/fix failed (rc=$rc)" >&2 fi + + # Update GEFS_MESH to use the corrected control mesh so all cfp + # wrappers get the right one (same SECOFS subdomain grid for all members) + if [ -s "${CTRL_DATM_DIR}/datm_esmf_mesh.nc" ]; then + cp -p ${CTRL_DATM_DIR}/datm_esmf_mesh.nc ${GEFS_MESH} + echo "Updated GEFS_MESH from corrected control mesh ($(python3 -c "from netCDF4 import Dataset; ds=Dataset('${GEFS_MESH}'); print(len(ds.dimensions['nodeCount'])); ds.close()" 2>/dev/null || echo '?') nodes)" + fi else echo "WARNING: Control datm_forcing.nc not found at ${CTRL_DATM_DIR} — mesh not checked" fi From c4df0630fae8539c26220acff3f66a36c324db73 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Fri, 13 Mar 2026 03:06:37 -0400 Subject: [PATCH 19/40] Add PDY to ensemble job log filenames for run distinction Log files now include the PDY date to distinguish runs on different days: secofs_ufs_ens000_12.20260311.out (was: secofs_ufs_ens000_12.out) secofs_ufs_gefs_prep_12.20260311.out secofs_ufs_enspost_12.20260311.out Also added explicit -o/-e for ensemble post job. --- pbs/launch_secofs_ensemble.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pbs/launch_secofs_ensemble.sh b/pbs/launch_secofs_ensemble.sh index c4ad88a..75d5564 100644 --- a/pbs/launch_secofs_ensemble.sh +++ b/pbs/launch_secofs_ensemble.sh @@ -243,14 +243,14 @@ if [ "${ATMOS_ENSEMBLE}" = true ]; then # YAML correctly distinguishes GEFS members from RRFS/other sources. ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY},GEFS_ENSEMBLE=true,OFS=${OFS}" \ -N "${OFS}_gefs_prep_${CYC}" \ - -o "${RPTDIR}/${OFS}_gefs_prep_${CYC}.out" \ - -e "${RPTDIR}/${OFS}_gefs_prep_${CYC}.err") + -o "${RPTDIR}/${OFS}_gefs_prep_${CYC}.${PDY}.out" \ + -e "${RPTDIR}/${OFS}_gefs_prep_${CYC}.${PDY}.err") else echo ">>> Submitting atmospheric ensemble prep job..." ATMOS_QSUB_ARGS=(-v "CYC=${CYC},PDY=${PDY},OFS=${OFS}" \ -N "${OFS}_atmos_prep_${CYC}" \ - -o "${RPTDIR}/${OFS}_atmos_prep_${CYC}.out" \ - -e "${RPTDIR}/${OFS}_atmos_prep_${CYC}.err") + -o "${RPTDIR}/${OFS}_atmos_prep_${CYC}.${PDY}.out" \ + -e "${RPTDIR}/${OFS}_atmos_prep_${CYC}.${PDY}.err") fi if [ -n "${PREP_JOBID_SHORT}" ]; then ATMOS_QSUB_ARGS+=(-W "depend=afterok:${PREP_JOBID_SHORT}") @@ -340,10 +340,11 @@ for i in $(seq 0 $((N_MEMBERS - 1))); do GEFS_MEM_ID=$(printf '%02d' $i) MEMBER_VARS="${MEMBER_VARS},GEFS_ENSEMBLE=true,GEFS_MEMBER_ID=${GEFS_MEM_ID}" fi + _PBSID_TAG=".${PDY}" # PDY tag to distinguish runs on different dates QSUB_ARGS=(-v "${MEMBER_VARS}" \ -N "${OFS}_ens${MID}_${CYC}" \ - -o "${RPTDIR}/${OFS}_ens${MID}_${CYC}.out" \ - -e "${RPTDIR}/${OFS}_ens${MID}_${CYC}.err") + -o "${RPTDIR}/${OFS}_ens${MID}_${CYC}${_PBSID_TAG}.out" \ + -e "${RPTDIR}/${OFS}_ens${MID}_${CYC}${_PBSID_TAG}.err") if [ -n "${MEMBER_DEP_STR}" ]; then QSUB_ARGS+=(-W "depend=${MEMBER_DEP_STR}") fi @@ -370,6 +371,8 @@ done ENSPOST_JOBID=$(qsub \ -v "N_MEMBERS=${N_MEMBERS},CYC=${CYC},PDY=${PDY},OFS=${OFS}" \ -N "${OFS}_enspost_${CYC}" \ + -o "${RPTDIR}/${OFS}_enspost_${CYC}.${PDY}.out" \ + -e "${RPTDIR}/${OFS}_enspost_${CYC}.${PDY}.err" \ -W depend=${DEP_STR} \ "${POST_PBS}") echo " Ensemble post: ${ENSPOST_JOBID}" From 5e0cfd735c7607e851ecb8bf95a3174a2be0142c Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Fri, 13 Mar 2026 03:26:09 -0400 Subject: [PATCH 20/40] Fix GEFS ensemble: pre-copy mesh to avoid cfp race, fix IndexError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes for UFS-SECOFS ensemble GEFS atmos_prep failures: 1. Remove wrong control→GEFS mesh copy: control mesh is 1721x1721 (SECOFS subdomain) but GEFS forcing is 1440x721 (global 0.25deg). Copying control mesh over GEFS mesh broke all members. 2. Pre-copy GEFS mesh into each member's blend dir as datm_forcing_esmf_mesh.nc before calling blended script. The DATM_ESMF_MESH env var is set but not visible to the blended script under cfp parallel execution (unknown cause). Pre-copying uses the blended script's existing fallback path at line 276, completely avoiding ESMF_Scrip2Unstruct and eliminating the cfp race condition that caused 4/5 members to fail. 3. Fix IndexError in ensemble_run.sh variable checker: control forcing uses x/y dimensions (from blended output) but code only searched for latitude/longitude. Added y/x with graceful fallback. --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 13 +++++++------ ush/nos_ofs_ensemble_run.sh | 10 ++++++++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index ed7df92..4bae3a2 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -756,12 +756,6 @@ else: echo "WARNING: Control ESMF mesh check/fix failed (rc=$rc)" >&2 fi - # Update GEFS_MESH to use the corrected control mesh so all cfp - # wrappers get the right one (same SECOFS subdomain grid for all members) - if [ -s "${CTRL_DATM_DIR}/datm_esmf_mesh.nc" ]; then - cp -p ${CTRL_DATM_DIR}/datm_esmf_mesh.nc ${GEFS_MESH} - echo "Updated GEFS_MESH from corrected control mesh ($(python3 -c "from netCDF4 import Dataset; ds=Dataset('${GEFS_MESH}'); print(len(ds.dimensions['nodeCount'])); ds.close()" 2>/dev/null || echo '?') nodes)" - fi else echo "WARNING: Control datm_forcing.nc not found at ${CTRL_DATM_DIR} — mesh not checked" fi @@ -791,6 +785,10 @@ export DATM_OUTPUT_SUFFIX=${MEM_SUFFIX} export DATM_SKIP_UFS_CONFIG=true export DATM_ESMF_MESH=${GEFS_MESH} mkdir -p \${DATA}/INPUT +# Pre-copy GEFS mesh into blend dir so blended script finds it at Step 4 +# (env var DATM_ESMF_MESH is unreliable under cfp parallel execution) +mkdir -p \${DATA}/datm_forcing/blended +cp -p ${GEFS_MESH} \${DATA}/datm_forcing/blended/datm_forcing_esmf_mesh.nc ${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} rc=\$? if [ \$rc -eq 0 ]; then @@ -834,6 +832,9 @@ export DATM_OUTPUT_SUFFIX=rrfs export DATM_SKIP_UFS_CONFIG=true export DATM_ESMF_MESH=${GEFS_MESH} mkdir -p \${DATA}/INPUT +# Pre-copy GEFS mesh into blend dir (same fix as GEFS wrappers above) +mkdir -p \${DATA}/datm_forcing/blended +cp -p ${GEFS_MESH} \${DATA}/datm_forcing/blended/datm_forcing_esmf_mesh.nc ${USHnos}/nosofs/nos_ofs_create_datm_forcing_blended.sh ${DATM_DOMAIN:-SECOFS} rc=\$? if [ \$rc -eq 0 ]; then diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 247c19d..1cf6dd5 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -613,8 +613,14 @@ import numpy as np ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'a') vnames = list(ds.variables.keys()) time_dim = 'time' -lat_dim = [d for d in ds.dimensions if d in ('latitude','lat')][0] -lon_dim = [d for d in ds.dimensions if d in ('longitude','lon')][0] +lat_dims = [d for d in ds.dimensions if d in ('latitude','lat','y')] +lon_dims = [d for d in ds.dimensions if d in ('longitude','lon','x')] +if not lat_dims or not lon_dims: + print('Forcing dims not recognized: {} — skipping variable check'.format(list(ds.dimensions.keys()))) + ds.close() + import sys; sys.exit(0) +lat_dim = lat_dims[0] +lon_dim = lon_dims[0] shape_3d = (len(ds.dimensions[time_dim]), len(ds.dimensions[lat_dim]), len(ds.dimensions[lon_dim])) changed = False From 5541f2b593b6e36b188008efb320fbcdc3ee49d3 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Fri, 13 Mar 2026 07:37:07 -0400 Subject: [PATCH 21/40] Add per-member SCHISM output combining to ensemble post-processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix ensemble post PBS script: OFS now from qsub -v (was hardcoded secofs), add udunits before nco, use ${OFS} in paths, 2hr walltime - Add parameterized schism_combine_outputs.py that reads grid dimensions dynamically (replaces hardcoded schism_fields_station_redo.py) - JNOS_OFS_ENSEMBLE_POST Step 1: per-member combining produces fields.fNNN.nc + stations.forecast.nc matching deterministic format - FIX file staging handles _ufs prefix variants (secofs_ufs → secofs) --- jobs/JNOS_OFS_ENSEMBLE_POST | 104 ++++++- pbs/jnos_secofs_ensemble_post.pbs | 22 +- ush/nosofs/schism_combine_outputs.py | 424 +++++++++++++++++++++++++++ 3 files changed, 537 insertions(+), 13 deletions(-) create mode 100644 ush/nosofs/schism_combine_outputs.py diff --git a/jobs/JNOS_OFS_ENSEMBLE_POST b/jobs/JNOS_OFS_ENSEMBLE_POST index 84a7f29..8e1a95f 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_POST +++ b/jobs/JNOS_OFS_ENSEMBLE_POST @@ -114,6 +114,11 @@ fi export COMOUTroot=${COMOUTroot:-${_comout_root:-${COMROOT}/${NET}/${_com_ver}}} export COMOUT=${COMOUT:-${COMOUTroot}/${RUN}.${PDY}} +############################################## +# FIX directory +############################################## +export FIXofs=${FIXofs:-${HOMEnos}/fix/${OFS}} + ############################################## # Build member directory list ############################################## @@ -137,12 +142,98 @@ echo "Found member directories:" echo "${MEMBER_DIRS}" | tr ' ' '\n' ######################################################## -# Run ensemble post-processing +# Step 1: Per-member SCHISM output combining +# +# Converts raw SCHISM split outputs (out2d_*.nc, temperature_*.nc, etc.) +# into CO-OPS standard per-timestep field files and station timeseries, +# matching the deterministic output format: +# {PREFIXNOS}.t{cyc}z.{PDY}.fields.fNNN.nc +# {PREFIXNOS}.t{cyc}z.{PDY}.stations.forecast.nc ######################################################## -echo "=== Computing ensemble statistics ===" +echo "=== Per-member SCHISM output combining ===" export PYTHONPATH=${HOMEnos}/ush/python:${PYTHONPATH:-} +# Determine forecast start time (= nowcast end) +TIMESTART=${PDY}$(printf '%02d' "${cyc}") +if [ -f "${COMOUT}/time_nowcastend.${cycle}" ]; then + read TIMESTART < "${COMOUT}/time_nowcastend.${cycle}" + echo " Forecast start time from nowcast: ${TIMESTART}" +fi + +COMBINE_SCRIPT="${USHnos}/nosofs/schism_combine_outputs.py" +if [ ! -f "${COMBINE_SCRIPT}" ]; then + echo "WARNING: ${COMBINE_SCRIPT} not found — skipping per-member combining" +else + for MDIR in ${MEMBER_DIRS}; do + MID=$(basename ${MDIR} | sed 's/member_//') + OUTPUTS_DIR="${MDIR}/outputs" + + if [ ! -d "${OUTPUTS_DIR}" ]; then + echo " Member ${MID}: no outputs/ directory, skipping" + continue + fi + + # Skip if already combined + if ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 1>/dev/null 2>&1; then + echo " Member ${MID}: already combined, skipping" + continue + fi + + # Check for SCHISM combined outputs + if [ ! -f "${OUTPUTS_DIR}/out2d_1.nc" ]; then + echo " Member ${MID}: no out2d_1.nc, skipping" + continue + fi + + echo " Combining outputs for member ${MID}..." + + # Stage FIX files (handle UFS-Coastal prefix variants: + # secofs_ufs uses same grid as secofs, so FIX files may + # be named secofs.* rather than secofs_ufs.*) + for _ext in nv.nc sigma.dat station.lat.lon; do + _target="${OUTPUTS_DIR}/${PREFIXNOS}.${_ext}" + if [ ! -f "${_target}" ]; then + if [ -f "${FIXofs}/${PREFIXNOS}.${_ext}" ]; then + cp -p "${FIXofs}/${PREFIXNOS}.${_ext}" "${_target}" + else + _base=${PREFIXNOS%_ufs} + if [ "${_base}" != "${PREFIXNOS}" ] && \ + [ -f "${FIXofs}/${_base}.${_ext}" ]; then + cp -p "${FIXofs}/${_base}.${_ext}" "${_target}" + fi + fi + fi + done + + python3 ${COMBINE_SCRIPT} \ + --prefix ${PREFIXNOS} \ + --cyc $(printf '%02d' "${cyc}") \ + --pdy ${PDY} \ + --mode f \ + --timestart ${TIMESTART} \ + --workdir ${OUTPUTS_DIR} + combine_err=$? + + if [ $combine_err -ne 0 ]; then + echo "WARNING: Combining failed for member ${MID} (rc=${combine_err})" + continue + fi + + # Move products from outputs/ to member directory + mv ${OUTPUTS_DIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc ${MDIR}/ 2>/dev/null + mv ${OUTPUTS_DIR}/${PREFIXNOS}.${cycle}.${PDY}.stations.forecast.nc ${MDIR}/ 2>/dev/null + + N_FIELDS=$(ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 2>/dev/null | wc -l) + echo " Member ${MID}: ${N_FIELDS} field files + station file created" + done +fi + +######################################################## +# Step 2: Ensemble statistics (mean, spread, percentiles) +######################################################## +echo "=== Computing ensemble statistics ===" + STATS_DIR="${COMOUT}/ensemble/stats" # VARIABLES env var: space-separated list (e.g., "elevation temperature") @@ -159,11 +250,14 @@ python3 -m nos_ofs.ensemble.ensemble_post \ export err=$? if [ $err -ne 0 ]; then - echo "ERROR: Ensemble post-processing failed (exit code: ${err})" >&2 - exit $err + echo "WARNING: Ensemble statistics failed (exit code: ${err})" >&2 + echo "Per-member combined outputs may still be valid." fi -echo "Ensemble statistics written to ${STATS_DIR}" +echo "Ensemble post-processing complete" +if [ -d "${STATS_DIR}" ]; then + echo "Statistics written to ${STATS_DIR}" +fi ######################################################## # Post-processing diff --git a/pbs/jnos_secofs_ensemble_post.pbs b/pbs/jnos_secofs_ensemble_post.pbs index 0cdb4fa..0a8a158 100644 --- a/pbs/jnos_secofs_ensemble_post.pbs +++ b/pbs/jnos_secofs_ensemble_post.pbs @@ -6,14 +6,17 @@ #PBS -e /lfs/h1/nos/ptmp/mansur.jisan/rpt/v3.7.0/secofs_enspost_00.err #PBS -l select=1:ncpus=8:mpiprocs=8 #PBS -l place=vscatter -#PBS -l walltime=01:00:00 +#PBS -l walltime=02:00:00 # ------------------------------------------------------------------ # SECOFS Ensemble Post-Processing PBS Job # -# Computes ensemble statistics (mean, spread, percentiles) from all -# member outputs. Submit via launch_secofs_ensemble.sh or manually: -# qsub -v N_MEMBERS=5,CYC=00 jnos_secofs_ensemble_post.pbs +# Combines per-member SCHISM outputs into field/station products +# (same format as deterministic), then computes ensemble statistics +# (mean, spread, percentiles). +# +# Submit via launch_secofs_ensemble.sh or manually: +# qsub -v N_MEMBERS=5,CYC=00,OFS=secofs jnos_secofs_ensemble_post.pbs # ------------------------------------------------------------------ CYC=${CYC:-00} @@ -22,13 +25,16 @@ N_MEMBERS=${N_MEMBERS:-5} # PDY: accept from qsub -v, fall back to today export PDY=${PDY:-$(date +%Y%m%d)} +# OFS: accept from qsub -v (launcher passes this), fall back to secofs +export OFS=${OFS:-secofs} + # Source version file PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages . ${PACKAGEROOT}/nosofs.v3.7.0/versions/run.ver # Working directory -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/secofs/ensemble +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/${OFS} +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS}/ensemble mkdir -p -m 755 $RPTDIR $WORKDIR cd ${WORKDIR} @@ -45,6 +51,7 @@ module load netcdf/${netcdf_ver:-4.7.4} module load python/${python_ver:-3.8.6} module load prod_envir/${prod_envir_ver:-2.0.6} module load prod_util/${prod_util_ver:-2.0.17} +module load udunits/${udunits_ver:-2.2.28} module load nco/${nco_ver:-5.0.6} module list @@ -52,12 +59,11 @@ module list # EXPORT list set +x export envir=dev -export OFS=secofs export cyc=${CYC} export NET=nosofs export model=nosofs export N_MEMBERS=${N_MEMBERS} -export job=secofs_enspost_${CYC}_$envir +export job=${OFS}_enspost_${CYC}_$envir export platform=ptmp export KEEPDATA=YES diff --git a/ush/nosofs/schism_combine_outputs.py b/ush/nosofs/schism_combine_outputs.py new file mode 100644 index 0000000..94cf731 --- /dev/null +++ b/ush/nosofs/schism_combine_outputs.py @@ -0,0 +1,424 @@ +#!/usr/bin/env python3 +"""Parameterized SCHISM Output Combiner + +Combines SCHISM split outputs (out2d_*.nc, temperature_*.nc, etc.) into +CO-OPS standard per-timestep field files and station timeseries NetCDF. + +Reads grid dimensions dynamically from output files — works for any +SCHISM-based OFS (SECOFS, STOFS-3D-ATL, etc.) without hardcoded values. + +Outputs: + {PREFIX}.t{cyc}z.{PDY}.fields.{mode}NNN.nc (one per timestep) + {PREFIX}.t{cyc}z.{PDY}.stations.{nowcast|forecast}.nc + +Based on nosofs.v3.7.0/ush/pysh/schism_fields_station_redo.py but +parameterized (no hardcoded dimensions, uses PREFIXNOS consistently). +""" + +import argparse +import os +import shutil +import subprocess +import sys + +import netCDF4 as nc +import numpy as np + + +def read_control_file(ctl_path="schism_standard_output.ctl"): + """Read the 5-line SCHISM standard output control file.""" + with open(ctl_path, 'r') as f: + lines = [line.strip() for line in f.readlines()] + return { + 'prefix': lines[0], + 'cyc': lines[1], + 'pdy': lines[2], + 'mode': lines[3], + 'timestart': lines[4], + } + + +def combine_schism_outputs(prefix, cyc, pdy, mode, timestart, workdir=None, + fixdir=None, compress=True): + """Combine SCHISM split outputs into field/station products. + + Args: + prefix: OFS prefix (e.g., 'secofs', 'secofs_ufs') + cyc: Cycle hour string (e.g., '12') + pdy: Date string YYYYMMDD + mode: 'n' for nowcast, 'f' for forecast + timestart: Start time YYYYMMDDHH + workdir: Directory containing SCHISM outputs (default: cwd) + fixdir: Directory with FIX files to copy (optional) + compress: Whether to compress output with ncks (default: True) + + Returns: + 0 on success, non-zero on error. + """ + if workdir: + os.chdir(workdir) + + yyyy = timestart[:4] + mm = timestart[4:6] + dd = timestart[6:8] + hh = timestart[8:10] + + # --- Copy FIX files if fixdir provided --- + if fixdir: + for fname in [f"{prefix}.nv.nc", f"{prefix}.hgrid.gr3", + f"{prefix}.station.lat.lon", f"{prefix}.sigma.dat"]: + src = os.path.join(fixdir, fname) + if os.path.exists(src) and not os.path.exists(fname): + shutil.copy2(src, ".") + print(f" Copied {fname} from {fixdir}") + + # --- Read sigma data --- + sigma_file = f"{prefix}.sigma.dat" + if not os.path.exists(sigma_file): + print(f"ERROR: {sigma_file} not found in {os.getcwd()}", file=sys.stderr) + return 1 + sigma_data = np.loadtxt(sigma_file, dtype=float) + sigma1 = sigma_data.T + if sigma1.ndim == 2: + nver = sigma1.shape[1] + else: + nver = sigma1.shape[0] + print(f" Sigma levels (nver): {nver}") + + # --- Read nv connectivity --- + nvfile = f"{prefix}.nv.nc" + if not os.path.exists(nvfile): + print(f"ERROR: {nvfile} not found in {os.getcwd()}", file=sys.stderr) + return 1 + ds_nv = nc.Dataset(nvfile) + nv1 = ds_nv.variables["nv"][:] + nele_count = nv1.shape[1] if nv1.ndim == 2 else nv1.shape[0] + ds_nv.close() + + # --- Read node count from first output file --- + if not os.path.exists("out2d_1.nc"): + print("ERROR: out2d_1.nc not found in " + os.getcwd(), file=sys.stderr) + return 1 + ds_test = nc.Dataset("out2d_1.nc") + node_count = None + for dim_name in ["nSCHISM_hgrid_node", "node"]: + if dim_name in ds_test.dimensions: + node_count = len(ds_test.dimensions[dim_name]) + break + if node_count is None: + node_count = ds_test.variables["SCHISM_hgrid_node_x"].shape[0] + ds_test.close() + + print(f" Nodes: {node_count}, Elements: {nele_count}, Sigma: {nver}") + + # === FIELD FILES === + field_count = 0 + i = 1 + while True: + file2d = f"out2d_{i}.nc" + if not os.path.exists(file2d): + break + + filetemp = f"temperature_{i}.nc" + filesalt = f"salinity_{i}.nc" + fileu = f"horizontalVelX_{i}.nc" + filev = f"horizontalVelY_{i}.nc" + + print(f" Processing output set {i}: {file2d}") + + ds_grid = nc.Dataset(file2d) + ds_temp = nc.Dataset(filetemp) + ds_salt = nc.Dataset(filesalt) + ds_u = nc.Dataset(fileu) + ds_v = nc.Dataset(filev) + + time1 = ds_grid.variables["time"][:] + nstep = len(time1) + + h1 = ds_grid.variables["depth"][:] + lon1 = ds_grid.variables["SCHISM_hgrid_node_x"][:] + lat1 = ds_grid.variables["SCHISM_hgrid_node_y"][:] + zeta1 = ds_grid.variables["elevation"][:] + uwind1 = ds_grid.variables["windSpeedX"][:] + vwind1 = ds_grid.variables["windSpeedY"][:] + + temp1 = ds_temp.variables["temperature"][:] + salt1 = ds_salt.variables["salinity"][:] + u1 = ds_u.variables["horizontalVelX"][:] + v1 = ds_v.variables["horizontalVelY"][:] + + # Copy raw files with named format (nowcast only, first set) + if i == 1 and mode == "n": + for src, var in [("out2d_1.nc", "out2d_1"), + ("zCoordinates_1.nc", "zCoordinates_1"), + ("temperature_1.nc", "temperature_1"), + ("salinity_1.nc", "salinity_1"), + ("horizontalVelX_1.nc", "horizontalVelX_1"), + ("horizontalVelY_1.nc", "horizontalVelY_1")]: + if os.path.exists(src): + dst = f"{prefix}.t{cyc}z.{pdy}.{var}.nowcast.nc" + shutil.copyfile(src, dst) + for k in range(1, 9): + src = f"staout_{k}" + if os.path.exists(src): + dst = f"{prefix}.t{cyc}z.{pdy}.nowcast.staout_{k}" + shutil.copyfile(src, dst) + + # Create per-timestep field files + for k in range(nstep): + iii = (i - 1) * nstep + k + 1 + kkk = f"{iii:03d}" + + tmp_file = f"{prefix}.t{cyc}z.{pdy}.fields.{mode}{kkk}.nc.tmp" + final_file = f"{prefix}.t{cyc}z.{pdy}.fields.{mode}{kkk}.nc" + + ncfile = nc.Dataset(tmp_file, mode='w', format='NETCDF4_CLASSIC') + + ncfile.createDimension('node', node_count) + ncfile.createDimension('nele', nele_count) + ncfile.createDimension('nface', 3) + ncfile.createDimension('nv', nver) + ncfile.createDimension('time', None) + + lon_v = ncfile.createVariable('lon', np.float32, ('node',)) + lat_v = ncfile.createVariable('lat', np.float32, ('node',)) + time_v = ncfile.createVariable('time', np.float32, ('time',)) + time_v.units = f"seconds since {yyyy}-{mm}-{dd} {hh}:00:00" + + ele_v = ncfile.createVariable('ele', 'i4', ('nface', 'nele')) + h_v = ncfile.createVariable('h', np.float32, ('node',)) + + zeta_v = ncfile.createVariable('zeta', np.float32, ('time', 'node')) + uwind_v = ncfile.createVariable('uwind_speed', np.float32, ('time', 'node')) + vwind_v = ncfile.createVariable('Vwind_speed', np.float32, ('time', 'node')) + + temp_v = ncfile.createVariable('temp', np.float32, ('time', 'nv', 'node')) + salt_v = ncfile.createVariable('salinity', np.float32, ('time', 'nv', 'node')) + u_v = ncfile.createVariable('u', np.float32, ('time', 'nv', 'node')) + v_v = ncfile.createVariable('v', np.float32, ('time', 'nv', 'node')) + + sigma_v = ncfile.createVariable('sigma', np.float32, ('node', 'nv')) + + h_v[:] = h1[:] + lon_v[:] = lon1[:] + lat_v[:] = lat1[:] + ele_v[:, :] = nv1[:, :] + sigma_v[:, :] = sigma1[:, :] + + time_v[:] = time1[k] + zeta_v[0, :] = zeta1[k, :] + uwind_v[0, :] = uwind1[k, :] + vwind_v[0, :] = vwind1[k, :] + + temp_v[0, :, :] = temp1[k, :, :].T + salt_v[0, :, :] = salt1[k, :, :].T + u_v[0, :, :] = u1[k, :, :].T + v_v[0, :, :] = v1[k, :, :].T + + ncfile.close() + + # Compress with ncks (deflation level 4) + if compress: + try: + subprocess.check_call( + ["ncks", "-4", "-L", "4", tmp_file, final_file], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + os.remove(tmp_file) + except (subprocess.CalledProcessError, FileNotFoundError): + os.rename(tmp_file, final_file) + else: + os.rename(tmp_file, final_file) + + field_count += 1 + + ds_grid.close() + ds_temp.close() + ds_salt.close() + ds_u.close() + ds_v.close() + + i += 1 + + print(f" Created {field_count} field files") + + # === STATION FILE === + sta_file = f"{prefix}.station.lat.lon" + if not os.path.exists(sta_file): + print(f"WARNING: {sta_file} not found — skipping station output") + return 0 + + with open(sta_file, 'r') as f: + sta_lines = [line.strip() for line in f if line.strip()] + nsta = len(sta_lines) + nsta2 = nsta * 2 + print(f" Stations: {nsta}") + + modefull = "nowcast" if mode == "n" else "forecast" + + # Read 2D station outputs (elevation, wind u, wind v) + ele_values = uwind_values = vwind_values = time_values = None + + for ind in [1, 3, 4]: + fname = f"staout_{ind}" + if not os.path.exists(fname): + print(f"WARNING: {fname} not found — skipping station output") + return 0 + + with open(fname, 'r') as f: + all_numbers = [] + for line in f: + numbers = [float(x) for x in line.strip().split()] + all_numbers.append(numbers) + + arr = np.array(all_numbers) + time_values = arr[:, 0] + + if ind == 1: + ele_values = arr[:, 1:nsta + 1] + elif ind == 3: + uwind_values = arr[:, 1:nsta + 1] + elif ind == 4: + vwind_values = arr[:, 1:nsta + 1] + + # Read 3D station outputs (T, S, u, v) — every other line + temp_final = salt_final = u_final = v_final = None + + for ind in [5, 6, 7, 8]: + fname = f"staout_{ind}" + if not os.path.exists(fname): + print(f"WARNING: {fname} not found — skipping 3D station variables") + break + + with open(fname, 'r') as f: + all_numbers = [] + nline = 0 + for line in f: + nline += 1 + if nline % 2 == 0: + numbers = [float(x) for x in line.strip().split()] + all_numbers.append(numbers) + + arr = np.array(all_numbers) + nstep_sta = len(arr) + + data0 = arr[:, 1:] + data_reshape = data0.reshape(nstep_sta, nsta2, nver) + data_real = data_reshape[:, :nsta, :] + data_final = np.swapaxes(data_real, 1, 2) + + if ind == 5: + temp_final = data_final + elif ind == 6: + salt_final = data_final + elif ind == 7: + u_final = data_final + elif ind == 8: + v_final = data_final + + nstep_sta = len(time_values) + + # Create station NetCDF + sta_nc = f"{prefix}.t{cyc}z.{pdy}.stations.{modefull}.nc" + ncfile = nc.Dataset(sta_nc, mode='w', format='NETCDF4') + + name_length = 20 + ncfile.createDimension('station', nsta) + ncfile.createDimension('clen', name_length) + ncfile.createDimension('time', nstep_sta) + ncfile.createDimension('siglay', nver) + ncfile.createDimension('num_entries', nsta) + + time_v = ncfile.createVariable('time', np.float32, ('time',)) + time_v.units = f"seconds since {yyyy}-{mm}-{dd} {hh}:00:00" + + lon_v = ncfile.createVariable('lon', np.float32, ('station',)) + lat_v = ncfile.createVariable('lat', np.float32, ('station',)) + + name_v = ncfile.createVariable('name_station', 'S1', ('station', 'clen')) + + zeta_v = ncfile.createVariable('zeta', np.float32, ('time', 'station')) + uwind_v = ncfile.createVariable('uwind_speed', np.float32, ('time', 'station')) + vwind_v = ncfile.createVariable('vwind_speed', np.float32, ('time', 'station')) + + temp_v = ncfile.createVariable('temp', np.float32, ('time', 'siglay', 'station')) + salt_v = ncfile.createVariable('salinity', np.float32, ('time', 'siglay', 'station')) + u_v = ncfile.createVariable('u', np.float32, ('time', 'siglay', 'station')) + v_v = ncfile.createVariable('v', np.float32, ('time', 'siglay', 'station')) + + # Read station coordinates + sta_data = np.array([line.split() for line in sta_lines]) + lon_v[:] = sta_data[:, 1].astype(float) + lat_v[:] = sta_data[:, 2].astype(float) + + # Station names + station_names = [f'station_{prefix}_{i + 1:05d}' for i in range(nsta)] + names_char = nc.stringtochar(np.array(station_names, dtype=f'S{name_length}')) + name_v[:] = names_char + + time_v[:] = time_values[:] + zeta_v[:, :] = ele_values[:, :] + uwind_v[:, :] = uwind_values[:, :] + vwind_v[:, :] = vwind_values[:, :] + + if temp_final is not None: + temp_v[:, :, :] = temp_final[:, :, :] + if salt_final is not None: + salt_v[:, :, :] = salt_final[:, :, :] + if u_final is not None: + u_v[:, :, :] = u_final[:, :, :] + if v_final is not None: + v_v[:, :, :] = v_final[:, :, :] + + ncfile.close() + print(f" Created station file: {sta_nc}") + + return 0 + + +def main(): + parser = argparse.ArgumentParser( + description="Combine SCHISM split outputs into field/station products" + ) + parser.add_argument("--prefix", help="OFS prefix (e.g., secofs, secofs_ufs)") + parser.add_argument("--cyc", help="Cycle hour (e.g., 12)") + parser.add_argument("--pdy", help="Date YYYYMMDD") + parser.add_argument("--mode", choices=["n", "f"], help="n=nowcast, f=forecast") + parser.add_argument("--timestart", help="Start time YYYYMMDDHH") + parser.add_argument("--workdir", default=".", help="Directory with SCHISM outputs") + parser.add_argument("--fixdir", help="FIX directory for grid/station files") + parser.add_argument("--no-compress", action="store_true", + help="Skip ncks compression") + parser.add_argument("--ctl", help="Control file path (alternative to args)") + + args = parser.parse_args() + + if args.ctl: + cfg = read_control_file(args.ctl) + prefix = cfg['prefix'] + cyc = cfg['cyc'] + pdy = cfg['pdy'] + mode = cfg['mode'] + timestart = cfg['timestart'] + else: + prefix = args.prefix or os.environ.get('PREFIXNOS', 'secofs') + cyc = args.cyc or os.environ.get('cyc', '00') + pdy = args.pdy or os.environ.get('PDY', '') + mode = args.mode or 'f' + timestart = args.timestart or f"{pdy}{cyc}" + + if not pdy: + print("ERROR: --pdy or PDY env var required", file=sys.stderr) + return 1 + + print(f"SCHISM Output Combiner: prefix={prefix} cyc={cyc} pdy={pdy} " + f"mode={mode} timestart={timestart}") + + return combine_schism_outputs( + prefix=prefix, cyc=cyc, pdy=pdy, mode=mode, timestart=timestart, + workdir=args.workdir, fixdir=args.fixdir, compress=not args.no_compress, + ) + + +if __name__ == "__main__": + sys.exit(main()) From 296c74bea780324e98e50bc47f6d4ba0e11694be Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Fri, 13 Mar 2026 07:57:35 -0400 Subject: [PATCH 22/40] Use control-file combining approach from comf-schism-post branch - Replace CLI-args schism_combine_outputs.py with control-file version from feature/comf-schism-post (same pattern as nos_ofs_nowcast_forecast.sh) - Adds UFS-Coastal fallbacks: hgrid.gr3 for missing depth/coords, zeros for missing windSpeedX/Y in out2d - J-job now creates schism_standard_output.ctl per member (5-line format), copies FIX files to outputs dir, runs python script in CWD - Matches the established COMF inline post-processing approach --- jobs/JNOS_OFS_ENSEMBLE_POST | 53 ++- ush/nosofs/schism_combine_outputs.py | 610 +++++++++++++++------------ 2 files changed, 365 insertions(+), 298 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_POST b/jobs/JNOS_OFS_ENSEMBLE_POST index 8e1a95f..c8471b7 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_POST +++ b/jobs/JNOS_OFS_ENSEMBLE_POST @@ -144,9 +144,13 @@ echo "${MEMBER_DIRS}" | tr ' ' '\n' ######################################################## # Step 1: Per-member SCHISM output combining # -# Converts raw SCHISM split outputs (out2d_*.nc, temperature_*.nc, etc.) -# into CO-OPS standard per-timestep field files and station timeseries, -# matching the deterministic output format: +# Same approach as nos_ofs_nowcast_forecast.sh inline combining: +# 1. Copy FIX files to outputs directory +# 2. Create schism_standard_output.ctl (5-line control file) +# 3. Run python schism_combine_outputs.py +# 4. Move products to member COMOUT +# +# Produces deterministic-format outputs per member: # {PREFIXNOS}.t{cyc}z.{PDY}.fields.fNNN.nc # {PREFIXNOS}.t{cyc}z.{PDY}.stations.forecast.nc ######################################################## @@ -187,15 +191,17 @@ else fi echo " Combining outputs for member ${MID}..." - - # Stage FIX files (handle UFS-Coastal prefix variants: - # secofs_ufs uses same grid as secofs, so FIX files may - # be named secofs.* rather than secofs_ufs.*) - for _ext in nv.nc sigma.dat station.lat.lon; do - _target="${OUTPUTS_DIR}/${PREFIXNOS}.${_ext}" + cd ${OUTPUTS_DIR} + + # Step 1a: Copy FIX files to outputs dir + # (same as nos_ofs_nowcast_forecast.sh lines 1097-1101) + # Handle UFS-Coastal prefix variants: secofs_ufs grid files + # may be named secofs.* in FIXofs + for _ext in nv.nc hgrid.gr3 station.lat.lon sigma.dat; do + _target="${PREFIXNOS}.${_ext}" if [ ! -f "${_target}" ]; then if [ -f "${FIXofs}/${PREFIXNOS}.${_ext}" ]; then - cp -p "${FIXofs}/${PREFIXNOS}.${_ext}" "${_target}" + cp -p "${FIXofs}/${PREFIXNOS}.${_ext}" . else _base=${PREFIXNOS%_ufs} if [ "${_base}" != "${PREFIXNOS}" ] && \ @@ -206,23 +212,30 @@ else fi done - python3 ${COMBINE_SCRIPT} \ - --prefix ${PREFIXNOS} \ - --cyc $(printf '%02d' "${cyc}") \ - --pdy ${PDY} \ - --mode f \ - --timestart ${TIMESTART} \ - --workdir ${OUTPUTS_DIR} + # Step 1b: Create 5-line control file + # (same as nos_ofs_nowcast_forecast.sh lines 1103-1108) + rm -f schism_standard_output.ctl + echo ${PREFIXNOS} >> schism_standard_output.ctl + echo $(printf '%02d' "${cyc}") >> schism_standard_output.ctl + echo ${PDY} >> schism_standard_output.ctl + echo "f" >> schism_standard_output.ctl + echo ${TIMESTART} >> schism_standard_output.ctl + + # Step 1c: Run combining script + python3 ${COMBINE_SCRIPT} combine_err=$? if [ $combine_err -ne 0 ]; then echo "WARNING: Combining failed for member ${MID} (rc=${combine_err})" + cd ${DATA} continue fi - # Move products from outputs/ to member directory - mv ${OUTPUTS_DIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc ${MDIR}/ 2>/dev/null - mv ${OUTPUTS_DIR}/${PREFIXNOS}.${cycle}.${PDY}.stations.forecast.nc ${MDIR}/ 2>/dev/null + # Step 1d: Move products from outputs/ to member directory + mv ${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc ${MDIR}/ 2>/dev/null + mv ${PREFIXNOS}.${cycle}.${PDY}.stations.forecast.nc ${MDIR}/ 2>/dev/null + + cd ${DATA} N_FIELDS=$(ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 2>/dev/null | wc -l) echo " Member ${MID}: ${N_FIELDS} field files + station file created" diff --git a/ush/nosofs/schism_combine_outputs.py b/ush/nosofs/schism_combine_outputs.py index 94cf731..19595a1 100644 --- a/ush/nosofs/schism_combine_outputs.py +++ b/ush/nosofs/schism_combine_outputs.py @@ -1,130 +1,151 @@ #!/usr/bin/env python3 -"""Parameterized SCHISM Output Combiner - -Combines SCHISM split outputs (out2d_*.nc, temperature_*.nc, etc.) into -CO-OPS standard per-timestep field files and station timeseries NetCDF. - -Reads grid dimensions dynamically from output files — works for any -SCHISM-based OFS (SECOFS, STOFS-3D-ATL, etc.) without hardcoded values. - -Outputs: - {PREFIX}.t{cyc}z.{PDY}.fields.{mode}NNN.nc (one per timestep) - {PREFIX}.t{cyc}z.{PDY}.stations.{nowcast|forecast}.nc - -Based on nosofs.v3.7.0/ush/pysh/schism_fields_station_redo.py but -parameterized (no hardcoded dimensions, uses PREFIXNOS consistently). +""" +schism_combine_outputs.py — Combine distributed SCHISM outputs into CO-OPS standard NetCDF products. + +Parameterized version of schism_fields_station_redo.py from nosofs.v3.7.0. +Reads grid dimensions dynamically from output files instead of hardcoding them. + +Reads control file: schism_standard_output.ctl (5 lines) + Line 1: PREFIXNOS (e.g., secofs) + Line 2: cyc (e.g., 00) + Line 3: PDY (e.g., 20260228) + Line 4: mode ("n" for nowcast, "f" for forecast) + Line 5: timestart (e.g., 2026022806) + +Products: + - Per-timestep field files: {prefix}.t{cyc}z.{PDY}.fields.{n|f}{NNN}.nc + - Station timeseries: {prefix}.t{cyc}z.{PDY}.stations.{nowcast|forecast}.nc + - Renamed raw outputs: {prefix}.t{cyc}z.{PDY}.out2d_1.{stage}.nc, etc. + +Usage: + cd $DATA/outputs + python schism_combine_outputs.py """ -import argparse -import os import shutil +import netCDF4 as nc +from netCDF4 import Dataset import subprocess +import os import sys - -import netCDF4 as nc +import glob import numpy as np -def read_control_file(ctl_path="schism_standard_output.ctl"): - """Read the 5-line SCHISM standard output control file.""" - with open(ctl_path, 'r') as f: +def read_hgrid_gr3(prefixnos): + """Read node coordinates and depth from hgrid.gr3. + + Searches for {prefixnos}.hgrid.gr3 or hgrid.gr3 in current directory. + Returns (x, y, depth) as float32 arrays, or (None, None, None). + """ + candidates = [f"{prefixnos}.hgrid.gr3", "hgrid.gr3"] + # Also try any *.hgrid.gr3 + candidates.extend(glob.glob("*.hgrid.gr3")) + + hgrid_path = None + for c in candidates: + if os.path.exists(c): + hgrid_path = c + break + + if hgrid_path is None: + return None, None, None + + print(f" Reading static grid from: {hgrid_path}") + with open(hgrid_path, 'r') as f: + f.readline() # comment + ne, np_global = map(int, f.readline().split()) + x = np.empty(np_global, dtype=np.float64) + y = np.empty(np_global, dtype=np.float64) + depth = np.empty(np_global, dtype=np.float64) + for i in range(np_global): + parts = f.readline().split() + x[i] = float(parts[1]) + y[i] = float(parts[2]) + depth[i] = float(parts[3]) + + return x.astype(np.float32), y.astype(np.float32), depth.astype(np.float32) + + +def read_control_file(cfile="schism_standard_output.ctl"): + """Read the 5-line control file.""" + with open(cfile, 'r') as f: lines = [line.strip() for line in f.readlines()] + if len(lines) < 5: + print(f"ERROR: Control file {cfile} must have 5 lines, got {len(lines)}") + sys.exit(1) return { - 'prefix': lines[0], + 'PREFIXNOS': lines[0], 'cyc': lines[1], - 'pdy': lines[2], + 'PDY': lines[2], 'mode': lines[3], 'timestart': lines[4], } -def combine_schism_outputs(prefix, cyc, pdy, mode, timestart, workdir=None, - fixdir=None, compress=True): - """Combine SCHISM split outputs into field/station products. - - Args: - prefix: OFS prefix (e.g., 'secofs', 'secofs_ufs') - cyc: Cycle hour string (e.g., '12') - pdy: Date string YYYYMMDD - mode: 'n' for nowcast, 'f' for forecast - timestart: Start time YYYYMMDDHH - workdir: Directory containing SCHISM outputs (default: cwd) - fixdir: Directory with FIX files to copy (optional) - compress: Whether to compress output with ncks (default: True) +def get_grid_dimensions(prefixnos): + """Read grid dimensions dynamically from output files and FIX files.""" + # Node and element counts from out2d_1.nc + ds = nc.Dataset("out2d_1.nc") + n_nodes = len(ds.dimensions['nSCHISM_hgrid_node']) + ds.close() - Returns: - 0 on success, non-zero on error. - """ - if workdir: - os.chdir(workdir) - - yyyy = timestart[:4] - mm = timestart[4:6] - dd = timestart[6:8] - hh = timestart[8:10] - - # --- Copy FIX files if fixdir provided --- - if fixdir: - for fname in [f"{prefix}.nv.nc", f"{prefix}.hgrid.gr3", - f"{prefix}.station.lat.lon", f"{prefix}.sigma.dat"]: - src = os.path.join(fixdir, fname) - if os.path.exists(src) and not os.path.exists(fname): - shutil.copy2(src, ".") - print(f" Copied {fname} from {fixdir}") - - # --- Read sigma data --- - sigma_file = f"{prefix}.sigma.dat" - if not os.path.exists(sigma_file): - print(f"ERROR: {sigma_file} not found in {os.getcwd()}", file=sys.stderr) - return 1 - sigma_data = np.loadtxt(sigma_file, dtype=float) - sigma1 = sigma_data.T - if sigma1.ndim == 2: - nver = sigma1.shape[1] - else: - nver = sigma1.shape[0] - print(f" Sigma levels (nver): {nver}") - - # --- Read nv connectivity --- - nvfile = f"{prefix}.nv.nc" - if not os.path.exists(nvfile): - print(f"ERROR: {nvfile} not found in {os.getcwd()}", file=sys.stderr) - return 1 + # Element count from nv.nc + nvfile = f"{prefixnos}.nv.nc" ds_nv = nc.Dataset(nvfile) - nv1 = ds_nv.variables["nv"][:] - nele_count = nv1.shape[1] if nv1.ndim == 2 else nv1.shape[0] + nv = ds_nv.variables["nv"][:] + n_elements = nv.shape[1] # nv is (nface, nele) ds_nv.close() - # --- Read node count from first output file --- - if not os.path.exists("out2d_1.nc"): - print("ERROR: out2d_1.nc not found in " + os.getcwd(), file=sys.stderr) - return 1 - ds_test = nc.Dataset("out2d_1.nc") - node_count = None - for dim_name in ["nSCHISM_hgrid_node", "node"]: - if dim_name in ds_test.dimensions: - node_count = len(ds_test.dimensions[dim_name]) - break - if node_count is None: - node_count = ds_test.variables["SCHISM_hgrid_node_x"].shape[0] - ds_test.close() + # Station count from station.lat.lon + sta_file = f"{prefixnos}.station.lat.lon" + with open(sta_file, 'r') as f: + n_stations = sum(1 for line in f if line.strip()) - print(f" Nodes: {node_count}, Elements: {nele_count}, Sigma: {nver}") + # Sigma levels from sigma.dat + sigma_data = np.loadtxt(f"{prefixnos}.sigma.dat", dtype=float) + sigma = sigma_data.T + n_levels = sigma.shape[1] if sigma.ndim == 2 else len(sigma) - # === FIELD FILES === - field_count = 0 - i = 1 - while True: - file2d = f"out2d_{i}.nc" - if not os.path.exists(file2d): - break + return { + 'n_nodes': n_nodes, + 'n_elements': n_elements, + 'n_stations': n_stations, + 'n_levels': n_levels, + 'nv': nv, + 'sigma': sigma, + } + + +def process_field_files(ctl, dims): + """Create per-timestep field files from distributed SCHISM outputs.""" + PREFIXNOS = ctl['PREFIXNOS'] + cyc = ctl['cyc'] + day = ctl['PDY'] + mode = ctl['mode'] + ts = ctl['timestart'] + yyyy, mm, dd, hh = ts[0:4], ts[4:6], ts[6:8], ts[8:10] + n_nodes = dims['n_nodes'] + n_elements = dims['n_elements'] + n_levels = dims['n_levels'] + nv1 = dims['nv'] + sigma1 = dims['sigma'] + + # Rename raw files for nowcast (only on first file iteration) + nowcast_renamed = False + + for i in range(1, 999): # iterate over output file groups + file2d = f"out2d_{i}.nc" filetemp = f"temperature_{i}.nc" filesalt = f"salinity_{i}.nc" fileu = f"horizontalVelX_{i}.nc" filev = f"horizontalVelY_{i}.nc" - print(f" Processing output set {i}: {file2d}") + if not os.path.exists(file2d): + break + + print(f"Processing output group {i}: {file2d}") ds_grid = nc.Dataset(file2d) ds_temp = nc.Dataset(filetemp) @@ -133,104 +154,123 @@ def combine_schism_outputs(prefix, cyc, pdy, mode, timestart, workdir=None, ds_v = nc.Dataset(filev) time1 = ds_grid.variables["time"][:] + zeta1 = ds_grid.variables["elevation"][:] nstep = len(time1) - h1 = ds_grid.variables["depth"][:] - lon1 = ds_grid.variables["SCHISM_hgrid_node_x"][:] - lat1 = ds_grid.variables["SCHISM_hgrid_node_y"][:] - zeta1 = ds_grid.variables["elevation"][:] - uwind1 = ds_grid.variables["windSpeedX"][:] - vwind1 = ds_grid.variables["windSpeedY"][:] + # Static variables: try out2d first, fall back to hgrid.gr3 + if "depth" in ds_grid.variables: + h1 = ds_grid.variables["depth"][:] + lon1 = ds_grid.variables["SCHISM_hgrid_node_x"][:] + lat1 = ds_grid.variables["SCHISM_hgrid_node_y"][:] + else: + if i == 1: + print(" depth/coordinates not in out2d, reading from hgrid.gr3...") + hlon, hlat, hdepth = read_hgrid_gr3(PREFIXNOS) + if hdepth is None: + print("ERROR: No depth in out2d and no hgrid.gr3 found") + sys.exit(1) + lon1, lat1, h1 = hlon, hlat, hdepth + + # Wind variables: optional (may not exist in UFS-Coastal outputs) + if "windSpeedX" in ds_grid.variables: + uwind1 = ds_grid.variables["windSpeedX"][:] + vwind1 = ds_grid.variables["windSpeedY"][:] + else: + uwind1 = np.zeros((nstep, n_nodes), dtype=np.float32) + vwind1 = np.zeros((nstep, n_nodes), dtype=np.float32) + if i == 1: + print(" WARNING: windSpeedX/Y not in out2d, using zeros") temp1 = ds_temp.variables["temperature"][:] salt1 = ds_salt.variables["salinity"][:] u1 = ds_u.variables["horizontalVelX"][:] v1 = ds_v.variables["horizontalVelY"][:] - # Copy raw files with named format (nowcast only, first set) - if i == 1 and mode == "n": - for src, var in [("out2d_1.nc", "out2d_1"), - ("zCoordinates_1.nc", "zCoordinates_1"), - ("temperature_1.nc", "temperature_1"), - ("salinity_1.nc", "salinity_1"), - ("horizontalVelX_1.nc", "horizontalVelX_1"), - ("horizontalVelY_1.nc", "horizontalVelY_1")]: - if os.path.exists(src): - dst = f"{prefix}.t{cyc}z.{pdy}.{var}.nowcast.nc" - shutil.copyfile(src, dst) + # Rename raw files for nowcast (first iteration only) + if mode == "n" and not nowcast_renamed: + modefull = "nowcast" + for raw_name, var_name in [ + ("out2d_1.nc", "out2d_1"), + ("zCoordinates_1.nc", "zCoordinates_1"), + ("temperature_1.nc", "temperature_1"), + ("salinity_1.nc", "salinity_1"), + ("horizontalVelX_1.nc", "horizontalVelX_1"), + ("horizontalVelY_1.nc", "horizontalVelY_1"), + ]: + if os.path.exists(raw_name): + dest = f"{PREFIXNOS}.t{cyc}z.{day}.{var_name}.{modefull}.nc" + shutil.copyfile(raw_name, dest) + print(f" Renamed: {raw_name} -> {dest}") + for k in range(1, 9): src = f"staout_{k}" if os.path.exists(src): - dst = f"{prefix}.t{cyc}z.{pdy}.nowcast.staout_{k}" - shutil.copyfile(src, dst) + dest = f"{PREFIXNOS}.t{cyc}z.{day}.{modefull}.staout_{k}" + shutil.copyfile(src, dest) + nowcast_renamed = True # Create per-timestep field files for k in range(nstep): iii = (i - 1) * nstep + k + 1 kkk = f"{iii:03d}" - tmp_file = f"{prefix}.t{cyc}z.{pdy}.fields.{mode}{kkk}.nc.tmp" - final_file = f"{prefix}.t{cyc}z.{pdy}.fields.{mode}{kkk}.nc" + nfields_tmp = f"{PREFIXNOS}.t{cyc}z.{day}.fields.{mode}{kkk}.nc.old" + nfields_out = f"{PREFIXNOS}.t{cyc}z.{day}.fields.{mode}{kkk}.nc" - ncfile = nc.Dataset(tmp_file, mode='w', format='NETCDF4_CLASSIC') + print(f" Creating field file: {nfields_out}") - ncfile.createDimension('node', node_count) - ncfile.createDimension('nele', nele_count) + ncfile = Dataset(nfields_tmp, mode='w', format='NETCDF4_CLASSIC') + + ncfile.createDimension('node', n_nodes) + ncfile.createDimension('nele', n_elements) ncfile.createDimension('nface', 3) - ncfile.createDimension('nv', nver) + ncfile.createDimension('nv', n_levels) ncfile.createDimension('time', None) - lon_v = ncfile.createVariable('lon', np.float32, ('node',)) - lat_v = ncfile.createVariable('lat', np.float32, ('node',)) - time_v = ncfile.createVariable('time', np.float32, ('time',)) - time_v.units = f"seconds since {yyyy}-{mm}-{dd} {hh}:00:00" - - ele_v = ncfile.createVariable('ele', 'i4', ('nface', 'nele')) - h_v = ncfile.createVariable('h', np.float32, ('node',)) + lon_var = ncfile.createVariable('lon', np.float32, ('node',)) + lat_var = ncfile.createVariable('lat', np.float32, ('node',)) + time_var = ncfile.createVariable('time', np.float32, ('time',)) + time_var.units = f"seconds since {yyyy}-{mm}-{dd} {hh}:00:00" - zeta_v = ncfile.createVariable('zeta', np.float32, ('time', 'node')) - uwind_v = ncfile.createVariable('uwind_speed', np.float32, ('time', 'node')) - vwind_v = ncfile.createVariable('Vwind_speed', np.float32, ('time', 'node')) + ele_var = ncfile.createVariable('ele', 'i4', ('nface', 'nele')) + h_var = ncfile.createVariable('h', np.float32, ('node',)) - temp_v = ncfile.createVariable('temp', np.float32, ('time', 'nv', 'node')) - salt_v = ncfile.createVariable('salinity', np.float32, ('time', 'nv', 'node')) - u_v = ncfile.createVariable('u', np.float32, ('time', 'nv', 'node')) - v_v = ncfile.createVariable('v', np.float32, ('time', 'nv', 'node')) + zeta_var = ncfile.createVariable('zeta', np.float32, ('time', 'node')) + uwind_var = ncfile.createVariable('uwind_speed', np.float32, ('time', 'node')) + vwind_var = ncfile.createVariable('Vwind_speed', np.float32, ('time', 'node')) - sigma_v = ncfile.createVariable('sigma', np.float32, ('node', 'nv')) + temp_var = ncfile.createVariable('temp', np.float32, ('time', 'nv', 'node')) + salt_var = ncfile.createVariable('salinity', np.float32, ('time', 'nv', 'node')) + u_var = ncfile.createVariable('u', np.float32, ('time', 'nv', 'node')) + v_var = ncfile.createVariable('v', np.float32, ('time', 'nv', 'node')) + sigma_var = ncfile.createVariable('sigma', np.float32, ('node', 'nv')) - h_v[:] = h1[:] - lon_v[:] = lon1[:] - lat_v[:] = lat1[:] - ele_v[:, :] = nv1[:, :] - sigma_v[:, :] = sigma1[:, :] + h_var[:] = h1[:] + lon_var[:] = lon1[:] + lat_var[:] = lat1[:] + ele_var[:, :] = nv1[:, :] + sigma_var[:, :] = sigma1[:, :] - time_v[:] = time1[k] - zeta_v[0, :] = zeta1[k, :] - uwind_v[0, :] = uwind1[k, :] - vwind_v[0, :] = vwind1[k, :] + time_var[:] = time1[k] + zeta_var[0, :] = zeta1[k, :] + uwind_var[0, :] = uwind1[k, :] + vwind_var[0, :] = vwind1[k, :] - temp_v[0, :, :] = temp1[k, :, :].T - salt_v[0, :, :] = salt1[k, :, :].T - u_v[0, :, :] = u1[k, :, :].T - v_v[0, :, :] = v1[k, :, :].T + temp_var[0, :, :] = temp1[k, :, :].T + salt_var[0, :, :] = salt1[k, :, :].T + u_var[0, :, :] = u1[k, :, :].T + v_var[0, :, :] = v1[k, :, :].T ncfile.close() - # Compress with ncks (deflation level 4) - if compress: - try: - subprocess.check_call( - ["ncks", "-4", "-L", "4", tmp_file, final_file], - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) - os.remove(tmp_file) - except (subprocess.CalledProcessError, FileNotFoundError): - os.rename(tmp_file, final_file) - else: - os.rename(tmp_file, final_file) - - field_count += 1 + # Compress with ncks + try: + subprocess.check_call(["ncks", "-4", "-L", "4", nfields_tmp, nfields_out]) + os.remove(nfields_tmp) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + print(f" WARNING: ncks compression failed ({e}), using uncompressed file") + if not os.path.exists(nfields_out): + os.rename(nfields_tmp, nfields_out) ds_grid.close() ds_temp.close() @@ -238,35 +278,34 @@ def combine_schism_outputs(prefix, cyc, pdy, mode, timestart, workdir=None, ds_u.close() ds_v.close() - i += 1 - print(f" Created {field_count} field files") +def process_station_files(ctl, dims): + """Create station timeseries NetCDF from staout text files.""" + PREFIXNOS = ctl['PREFIXNOS'] + cyc = ctl['cyc'] + day = ctl['PDY'] + mode = ctl['mode'] + ts = ctl['timestart'] + yyyy, mm, dd, hh = ts[0:4], ts[4:6], ts[6:8], ts[8:10] - # === STATION FILE === - sta_file = f"{prefix}.station.lat.lon" - if not os.path.exists(sta_file): - print(f"WARNING: {sta_file} not found — skipping station output") - return 0 - - with open(sta_file, 'r') as f: - sta_lines = [line.strip() for line in f if line.strip()] - nsta = len(sta_lines) + nsta = dims['n_stations'] + nver = dims['n_levels'] nsta2 = nsta * 2 - print(f" Stations: {nsta}") - - modefull = "nowcast" if mode == "n" else "forecast" # Read 2D station outputs (elevation, wind u, wind v) - ele_values = uwind_values = vwind_values = time_values = None + ele_values = None + uwind_values = None + vwind_values = None + time_values = None for ind in [1, 3, 4]: - fname = f"staout_{ind}" - if not os.path.exists(fname): - print(f"WARNING: {fname} not found — skipping station output") - return 0 + file_name = f"staout_{ind}" + if not os.path.exists(file_name): + print(f"WARNING: {file_name} not found, skipping") + continue - with open(fname, 'r') as f: - all_numbers = [] + all_numbers = [] + with open(file_name, 'r') as f: for line in f: numbers = [float(x) for x in line.strip().split()] all_numbers.append(numbers) @@ -281,18 +320,26 @@ def combine_schism_outputs(prefix, cyc, pdy, mode, timestart, workdir=None, elif ind == 4: vwind_values = arr[:, 1:nsta + 1] - # Read 3D station outputs (T, S, u, v) — every other line - temp_final = salt_final = u_final = v_final = None + if time_values is None: + print("ERROR: No 2D staout files found, cannot create station file") + return - for ind in [5, 6, 7, 8]: - fname = f"staout_{ind}" - if not os.path.exists(fname): - print(f"WARNING: {fname} not found — skipping 3D station variables") - break + # Read 3D station outputs (temp, salt, u, v) + # These files have alternating lines: odd=header, even=data + temp_final = None + salt_final = None + u_final = None + v_final = None - with open(fname, 'r') as f: - all_numbers = [] - nline = 0 + for ind in [5, 6, 7, 8]: + file_name = f"staout_{ind}" + if not os.path.exists(file_name): + print(f"WARNING: {file_name} not found, skipping 3D variable") + continue + + all_numbers = [] + nline = 0 + with open(file_name, 'r') as f: for line in f: nline += 1 if nline % 2 == 0: @@ -300,11 +347,11 @@ def combine_schism_outputs(prefix, cyc, pdy, mode, timestart, workdir=None, all_numbers.append(numbers) arr = np.array(all_numbers) - nstep_sta = len(arr) + nstep = len(arr) data0 = arr[:, 1:] - data_reshape = data0.reshape(nstep_sta, nsta2, nver) - data_real = data_reshape[:, :nsta, :] + data_reshaped = data0.reshape(nstep, nsta2, nver) + data_real = data_reshaped[:, 0:nsta, :] data_final = np.swapaxes(data_real, 1, 2) if ind == 5: @@ -316,109 +363,116 @@ def combine_schism_outputs(prefix, cyc, pdy, mode, timestart, workdir=None, elif ind == 8: v_final = data_final - nstep_sta = len(time_values) + nstep = len(time_values) + + # Determine mode full name + modefull = "nowcast" if mode == "n" else "forecast" + + # Read station locations + sta_file = f"{PREFIXNOS}.station.lat.lon" + all_from_file = [] + with open(sta_file, 'r') as f: + for line in f: + if line.strip(): + all_from_file.append(line.strip().split()) + + sta_arr = np.array(all_from_file) # Create station NetCDF - sta_nc = f"{prefix}.t{cyc}z.{pdy}.stations.{modefull}.nc" - ncfile = nc.Dataset(sta_nc, mode='w', format='NETCDF4') + filesta = f"{PREFIXNOS}.t{cyc}z.{day}.stations.{modefull}.nc" + print(f"Creating station file: {filesta}") name_length = 20 + ncfile = Dataset(filesta, mode='w', format='NETCDF4') + ncfile.createDimension('station', nsta) ncfile.createDimension('clen', name_length) - ncfile.createDimension('time', nstep_sta) + ncfile.createDimension('time', nstep) ncfile.createDimension('siglay', nver) ncfile.createDimension('num_entries', nsta) - time_v = ncfile.createVariable('time', np.float32, ('time',)) - time_v.units = f"seconds since {yyyy}-{mm}-{dd} {hh}:00:00" - - lon_v = ncfile.createVariable('lon', np.float32, ('station',)) - lat_v = ncfile.createVariable('lat', np.float32, ('station',)) + time_var = ncfile.createVariable('time', np.float32, ('time',)) + time_var.units = f"seconds since {yyyy}-{mm}-{dd} {hh}:00:00" - name_v = ncfile.createVariable('name_station', 'S1', ('station', 'clen')) + lon_var = ncfile.createVariable('lon', np.float32, ('station',)) + lat_var = ncfile.createVariable('lat', np.float32, ('station',)) - zeta_v = ncfile.createVariable('zeta', np.float32, ('time', 'station')) - uwind_v = ncfile.createVariable('uwind_speed', np.float32, ('time', 'station')) - vwind_v = ncfile.createVariable('vwind_speed', np.float32, ('time', 'station')) + name_station_var = ncfile.createVariable('name_station', 'S1', ('station', 'clen')) - temp_v = ncfile.createVariable('temp', np.float32, ('time', 'siglay', 'station')) - salt_v = ncfile.createVariable('salinity', np.float32, ('time', 'siglay', 'station')) - u_v = ncfile.createVariable('u', np.float32, ('time', 'siglay', 'station')) - v_v = ncfile.createVariable('v', np.float32, ('time', 'siglay', 'station')) + zeta_var = ncfile.createVariable('zeta', np.float32, ('time', 'station')) + uwind_var = ncfile.createVariable('uwind_speed', np.float32, ('time', 'station')) + vwind_var = ncfile.createVariable('vwind_speed', np.float32, ('time', 'station')) - # Read station coordinates - sta_data = np.array([line.split() for line in sta_lines]) - lon_v[:] = sta_data[:, 1].astype(float) - lat_v[:] = sta_data[:, 2].astype(float) + temp_var = ncfile.createVariable('temp', np.float32, ('time', 'siglay', 'station')) + salt_var = ncfile.createVariable('salinity', np.float32, ('time', 'siglay', 'station')) + u_var = ncfile.createVariable('u', np.float32, ('time', 'siglay', 'station')) + v_var = ncfile.createVariable('v', np.float32, ('time', 'siglay', 'station')) # Station names - station_names = [f'station_{prefix}_{i + 1:05d}' for i in range(nsta)] - names_char = nc.stringtochar(np.array(station_names, dtype=f'S{name_length}')) - name_v[:] = names_char - - time_v[:] = time_values[:] - zeta_v[:, :] = ele_values[:, :] - uwind_v[:, :] = uwind_values[:, :] - vwind_v[:, :] = vwind_values[:, :] - + station_names = [f'station_{PREFIXNOS}_{i+1:05d}' for i in range(nsta)] + names_char_array = nc.stringtochar(np.array(station_names, dtype=f'S{name_length}')) + name_station_var[:] = names_char_array + + # Station coordinates + lon_var[:] = sta_arr[:, 1].astype(float) + lat_var[:] = sta_arr[:, 2].astype(float) + + # Time and 2D variables + time_var[:] = time_values[:] + if ele_values is not None: + zeta_var[:, :] = ele_values[:, :] + if uwind_values is not None: + uwind_var[:, :] = uwind_values[:, :] + if vwind_values is not None: + vwind_var[:, :] = vwind_values[:, :] + + # 3D variables if temp_final is not None: - temp_v[:, :, :] = temp_final[:, :, :] + temp_var[:, :, :] = temp_final[:, :, :] if salt_final is not None: - salt_v[:, :, :] = salt_final[:, :, :] + salt_var[:, :, :] = salt_final[:, :, :] if u_final is not None: - u_v[:, :, :] = u_final[:, :, :] + u_var[:, :, :] = u_final[:, :, :] if v_final is not None: - v_v[:, :, :] = v_final[:, :, :] + v_var[:, :, :] = v_final[:, :, :] ncfile.close() - print(f" Created station file: {sta_nc}") - - return 0 + print(f"Station file created: {filesta} ({nsta} stations, {nstep} timesteps, {nver} levels)") def main(): - parser = argparse.ArgumentParser( - description="Combine SCHISM split outputs into field/station products" - ) - parser.add_argument("--prefix", help="OFS prefix (e.g., secofs, secofs_ufs)") - parser.add_argument("--cyc", help="Cycle hour (e.g., 12)") - parser.add_argument("--pdy", help="Date YYYYMMDD") - parser.add_argument("--mode", choices=["n", "f"], help="n=nowcast, f=forecast") - parser.add_argument("--timestart", help="Start time YYYYMMDDHH") - parser.add_argument("--workdir", default=".", help="Directory with SCHISM outputs") - parser.add_argument("--fixdir", help="FIX directory for grid/station files") - parser.add_argument("--no-compress", action="store_true", - help="Skip ncks compression") - parser.add_argument("--ctl", help="Control file path (alternative to args)") - - args = parser.parse_args() - - if args.ctl: - cfg = read_control_file(args.ctl) - prefix = cfg['prefix'] - cyc = cfg['cyc'] - pdy = cfg['pdy'] - mode = cfg['mode'] - timestart = cfg['timestart'] - else: - prefix = args.prefix or os.environ.get('PREFIXNOS', 'secofs') - cyc = args.cyc or os.environ.get('cyc', '00') - pdy = args.pdy or os.environ.get('PDY', '') - mode = args.mode or 'f' - timestart = args.timestart or f"{pdy}{cyc}" - - if not pdy: - print("ERROR: --pdy or PDY env var required", file=sys.stderr) - return 1 - - print(f"SCHISM Output Combiner: prefix={prefix} cyc={cyc} pdy={pdy} " - f"mode={mode} timestart={timestart}") - - return combine_schism_outputs( - prefix=prefix, cyc=cyc, pdy=pdy, mode=mode, timestart=timestart, - workdir=args.workdir, fixdir=args.fixdir, compress=not args.no_compress, - ) + print("=" * 60) + print("SCHISM Combine Outputs — CO-OPS Standard NetCDF Products") + print("=" * 60) + + # Read control file + ctl = read_control_file() + print(f" PREFIXNOS: {ctl['PREFIXNOS']}") + print(f" Cycle: {ctl['cyc']}") + print(f" PDY: {ctl['PDY']}") + print(f" Mode: {ctl['mode']} ({'nowcast' if ctl['mode'] == 'n' else 'forecast'})") + print(f" TimeStart: {ctl['timestart']}") + + # Get grid dimensions dynamically + print("\nReading grid dimensions...") + dims = get_grid_dimensions(ctl['PREFIXNOS']) + print(f" Nodes: {dims['n_nodes']}") + print(f" Elements: {dims['n_elements']}") + print(f" Stations: {dims['n_stations']}") + print(f" Levels: {dims['n_levels']}") + + # Process field files + print("\n--- Processing Field Files ---") + process_field_files(ctl, dims) + + # Process station files + print("\n--- Processing Station Files ---") + process_station_files(ctl, dims) + + print("\n" + "=" * 60) + print("SCHISM output combining completed successfully") + print("=" * 60) if __name__ == "__main__": - sys.exit(main()) + main() From 81fe1275622c5aba61781dba37eb1ee62dac595f Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Fri, 13 Mar 2026 08:40:35 -0400 Subject: [PATCH 23/40] Fix DET datm_in dimension mismatch and ensemble post stations-only mode - DET _comf_execute_ufs_coastal(): patch datm_in nx_global/ny_global to match actual datm_forcing.nc dimensions (was using stale template values 1881x1841 instead of actual 1721x1721 blended grid). Regenerate ESMF mesh from forcing file coordinates to guarantee consistency. - Ensemble post: accept staout_* as valid member output (UFS-Coastal ensemble produces only station text files, not out2d_*.nc field output). schism_combine_outputs.py now runs in stations-only mode when no out2d_*.nc files are present. - PBS script: add gsl module before nco (required dependency on WCOSS2). --- jobs/JNOS_OFS_ENSEMBLE_POST | 29 +++++--- pbs/jnos_secofs_ensemble_post.pbs | 1 + ush/nos_ofs_model_run.sh | 100 +++++++++++++++++++++++++++ ush/nosofs/schism_combine_outputs.py | 79 +++++++++++++++------ 4 files changed, 178 insertions(+), 31 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_POST b/jobs/JNOS_OFS_ENSEMBLE_POST index c8471b7..cd37d48 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_POST +++ b/jobs/JNOS_OFS_ENSEMBLE_POST @@ -178,26 +178,37 @@ else continue fi - # Skip if already combined - if ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 1>/dev/null 2>&1; then + # Skip if already combined (check both field and station files) + if ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 1>/dev/null 2>&1 || \ + ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.stations.forecast.nc 1>/dev/null 2>&1; then echo " Member ${MID}: already combined, skipping" continue fi - # Check for SCHISM combined outputs - if [ ! -f "${OUTPUTS_DIR}/out2d_1.nc" ]; then - echo " Member ${MID}: no out2d_1.nc, skipping" + # Check for ANY SCHISM outputs (combined out2d OR raw staout) + if [ ! -f "${OUTPUTS_DIR}/out2d_1.nc" ] && [ ! -f "${OUTPUTS_DIR}/staout_1" ]; then + echo " Member ${MID}: no out2d_1.nc or staout_1, skipping" continue fi echo " Combining outputs for member ${MID}..." + if [ -f "${OUTPUTS_DIR}/out2d_1.nc" ]; then + echo " Mode: field + station files" + else + echo " Mode: station files only (no out2d_*.nc)" + fi cd ${OUTPUTS_DIR} # Step 1a: Copy FIX files to outputs dir - # (same as nos_ofs_nowcast_forecast.sh lines 1097-1101) + # station.lat.lon and sigma.dat always needed (stations) + # nv.nc and hgrid.gr3 needed only for field files (out2d_*.nc present) # Handle UFS-Coastal prefix variants: secofs_ufs grid files # may be named secofs.* in FIXofs - for _ext in nv.nc hgrid.gr3 station.lat.lon sigma.dat; do + _FIX_EXTS="station.lat.lon sigma.dat" + if [ -f "${OUTPUTS_DIR}/out2d_1.nc" ]; then + _FIX_EXTS="nv.nc hgrid.gr3 station.lat.lon sigma.dat" + fi + for _ext in ${_FIX_EXTS}; do _target="${PREFIXNOS}.${_ext}" if [ ! -f "${_target}" ]; then if [ -f "${FIXofs}/${PREFIXNOS}.${_ext}" ]; then @@ -234,11 +245,13 @@ else # Step 1d: Move products from outputs/ to member directory mv ${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc ${MDIR}/ 2>/dev/null mv ${PREFIXNOS}.${cycle}.${PDY}.stations.forecast.nc ${MDIR}/ 2>/dev/null + mv ${PREFIXNOS}.${cycle}.${PDY}.stations.nowcast.nc ${MDIR}/ 2>/dev/null cd ${DATA} N_FIELDS=$(ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 2>/dev/null | wc -l) - echo " Member ${MID}: ${N_FIELDS} field files + station file created" + HAS_STA=$(ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.stations.*.nc 2>/dev/null | wc -l) + echo " Member ${MID}: ${N_FIELDS} field files, ${HAS_STA} station file(s) created" done fi diff --git a/pbs/jnos_secofs_ensemble_post.pbs b/pbs/jnos_secofs_ensemble_post.pbs index 0a8a158..6c5a1ce 100644 --- a/pbs/jnos_secofs_ensemble_post.pbs +++ b/pbs/jnos_secofs_ensemble_post.pbs @@ -52,6 +52,7 @@ module load python/${python_ver:-3.8.6} module load prod_envir/${prod_envir_ver:-2.0.6} module load prod_util/${prod_util_ver:-2.0.17} module load udunits/${udunits_ver:-2.2.28} +module load gsl/${gsl_ver:-2.7} module load nco/${nco_ver:-5.0.6} module list diff --git a/ush/nos_ofs_model_run.sh b/ush/nos_ofs_model_run.sh index 5b4ca70..c8ddb18 100644 --- a/ush/nos_ofs_model_run.sh +++ b/ush/nos_ofs_model_run.sh @@ -1015,6 +1015,106 @@ _comf_execute_ufs_coastal() { echo "WARNING: fd_ufs.yaml not found in ${DATA}/" fi + # ------------------------------------------------------------------ + # Sync datm_in and ESMF mesh with actual forcing file dimensions. + # The datm_in template may have stale nx_global/ny_global values + # (e.g., raw GFS 0.25° grid) that don't match the actual forcing + # file (e.g., blended HRRR+GFS grid). Mismatched dimensions cause + # CDEPS to scramble the atmospheric forcing distribution. + # ------------------------------------------------------------------ + local _forcing_file="${DATA}/${DATM_DIR}/datm_forcing.nc" + if [ -s "${_forcing_file}" ] && [ -s "${DATA}/datm_in" ]; then + local _fdims=$(python3 -c " +from netCDF4 import Dataset +ds = Dataset('${_forcing_file}', 'r') +try: + print(len(ds.dimensions['x']), len(ds.dimensions['y'])) +except: + print(len(ds.dimensions['longitude']), len(ds.dimensions['latitude'])) +ds.close() +" 2>/dev/null || echo "") + + if [ -n "$_fdims" ]; then + local _fnx=$(echo $_fdims | awk '{print $1}') + local _fny=$(echo $_fdims | awk '{print $2}') + local _old_nx=$(grep -oP 'nx_global\s*=\s*\K[0-9]+' ${DATA}/datm_in 2>/dev/null || echo "0") + local _old_ny=$(grep -oP 'ny_global\s*=\s*\K[0-9]+' ${DATA}/datm_in 2>/dev/null || echo "0") + + if [ "${_old_nx}" != "${_fnx}" ] || [ "${_old_ny}" != "${_fny}" ]; then + echo "Patching datm_in: nx_global ${_old_nx}->${_fnx}, ny_global ${_old_ny}->${_fny}" + fi + sed -i "s/nx_global[[:space:]]*=.*/nx_global = ${_fnx}/" ${DATA}/datm_in + sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${_fny}/" ${DATA}/datm_in + + # Regenerate ESMF mesh from forcing file to guarantee consistency. + # This ensures mesh dimensions, elementMask, and coordinates all + # match the actual forcing data — no stale template meshes. + local _ftotal=$((_fnx * _fny)) + echo "Generating ESMF mesh from forcing (${_fnx}x${_fny} = ${_ftotal} nodes)..." + python3 -c " +from netCDF4 import Dataset +import numpy as np + +ds = Dataset('${_forcing_file}', 'r') +try: + lons = ds.variables['longitude'][:] + lats = ds.variables['latitude'][:] + if lons.ndim == 1: + lon2d, lat2d = np.meshgrid(lons, lats) + else: + lon2d, lat2d = lons, lats +except: + lon2d = ds.variables['x'][:] + lat2d = ds.variables['y'][:] +ds.close() + +ny, nx = lon2d.shape +n_nodes = ny * nx +n_elems = (ny - 1) * (nx - 1) + +out = Dataset('${DATA}/${DATM_DIR}/datm_esmf_mesh.nc', 'w') +out.createDimension('nodeCount', n_nodes) +out.createDimension('elementCount', n_elems) +out.createDimension('maxNodePElement', 4) +out.createDimension('coordDim', 2) + +nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) +nodeCoords.units = 'degrees' +coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) +nodeCoords[:] = coords + +j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] +n0 = (j_idx * nx + i_idx + 1).ravel() +conn = np.column_stack([n0, n0 + 1, n0 + nx + 1, n0 + nx]).astype(np.int32) + +elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) +elemConn.long_name = 'Node indices that define the element connectivity' +elemConn.start_index = 1 +elemConn[:] = conn + +numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) +numElemConn[:] = 4 + +elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) +elementMask[:] = np.ones(n_elems, dtype=np.int32) + +centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) +centerCoords.units = 'degrees' +clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) +clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) +centerCoords[:] = np.column_stack([clon, clat]) + +out.title = 'ESMF mesh generated from DATM forcing file' +out.gridType = 'unstructured mesh' +out.close() +print('Generated ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) +" 2>&1 + if [ $? -ne 0 ]; then + echo "WARNING: ESMF mesh generation failed — using existing mesh" >&2 + fi + fi + fi + # Determine executable local UFS_EXEC="" if [ -x "${DATA}/fv3_coastalS.exe" ]; then diff --git a/ush/nosofs/schism_combine_outputs.py b/ush/nosofs/schism_combine_outputs.py index 19595a1..afa814a 100644 --- a/ush/nosofs/schism_combine_outputs.py +++ b/ush/nosofs/schism_combine_outputs.py @@ -83,19 +83,29 @@ def read_control_file(cfile="schism_standard_output.ctl"): } -def get_grid_dimensions(prefixnos): - """Read grid dimensions dynamically from output files and FIX files.""" - # Node and element counts from out2d_1.nc - ds = nc.Dataset("out2d_1.nc") - n_nodes = len(ds.dimensions['nSCHISM_hgrid_node']) - ds.close() - - # Element count from nv.nc - nvfile = f"{prefixnos}.nv.nc" - ds_nv = nc.Dataset(nvfile) - nv = ds_nv.variables["nv"][:] - n_elements = nv.shape[1] # nv is (nface, nele) - ds_nv.close() +def get_grid_dimensions(prefixnos, fields_available=True): + """Read grid dimensions dynamically from output files and FIX files. + + If fields_available=False, skip reading out2d_1.nc and nv.nc (needed + only for field file creation). Station processing needs only station + count and sigma levels from FIX files. + """ + n_nodes = 0 + n_elements = 0 + nv = None + + if fields_available: + # Node and element counts from out2d_1.nc + ds = nc.Dataset("out2d_1.nc") + n_nodes = len(ds.dimensions['nSCHISM_hgrid_node']) + ds.close() + + # Element count from nv.nc + nvfile = f"{prefixnos}.nv.nc" + ds_nv = nc.Dataset(nvfile) + nv = ds_nv.variables["nv"][:] + n_elements = nv.shape[1] # nv is (nface, nele) + ds_nv.close() # Station count from station.lat.lon sta_file = f"{prefixnos}.station.lat.lon" @@ -453,21 +463,44 @@ def main(): print(f" Mode: {ctl['mode']} ({'nowcast' if ctl['mode'] == 'n' else 'forecast'})") print(f" TimeStart: {ctl['timestart']}") + # Check what output files are available + has_fields = os.path.exists("out2d_1.nc") + has_stations = os.path.exists("staout_1") + + if not has_fields and not has_stations: + print("ERROR: No output files found (neither out2d_1.nc nor staout_1)") + sys.exit(1) + + if has_fields: + print("\n Field output files detected (out2d_*.nc)") + else: + print("\n No field output files (out2d_*.nc) — stations-only mode") + + if has_stations: + print(f" Station output files detected (staout_*)") + # Get grid dimensions dynamically print("\nReading grid dimensions...") - dims = get_grid_dimensions(ctl['PREFIXNOS']) - print(f" Nodes: {dims['n_nodes']}") - print(f" Elements: {dims['n_elements']}") + dims = get_grid_dimensions(ctl['PREFIXNOS'], fields_available=has_fields) + if has_fields: + print(f" Nodes: {dims['n_nodes']}") + print(f" Elements: {dims['n_elements']}") print(f" Stations: {dims['n_stations']}") print(f" Levels: {dims['n_levels']}") - # Process field files - print("\n--- Processing Field Files ---") - process_field_files(ctl, dims) - - # Process station files - print("\n--- Processing Station Files ---") - process_station_files(ctl, dims) + # Process field files (only if out2d_*.nc available) + if has_fields: + print("\n--- Processing Field Files ---") + process_field_files(ctl, dims) + else: + print("\n--- Skipping Field Files (no out2d_*.nc) ---") + + # Process station files (only if staout_* available) + if has_stations: + print("\n--- Processing Station Files ---") + process_station_files(ctl, dims) + else: + print("\n--- Skipping Station Files (no staout_*) ---") print("\n" + "=" * 60) print("SCHISM output combining completed successfully") From 7f8386dc99326016a2ba8f2ae55b2957ffcebcf8 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Fri, 13 Mar 2026 08:55:07 -0400 Subject: [PATCH 24/40] Add GEFS p05 as ensemble member 005, renumber RRFS to 006 Expands atmospheric ensemble from 6 to 7 members: - Members 001-005: GEFS perturbation forecasts (was 001-004) - Member 006: RRFS 3km (was member 005) --- parm/systems/secofs_ufs.yaml | 15 ++++++++++----- pbs/launch_secofs_ensemble.sh | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/parm/systems/secofs_ufs.yaml b/parm/systems/secofs_ufs.yaml index 6d6423c..fbc0b26 100644 --- a/parm/systems/secofs_ufs.yaml +++ b/parm/systems/secofs_ufs.yaml @@ -372,13 +372,13 @@ shell_mappings: NY_GLOBAL: ufs_coastal.ny_global # GEFS atmospheric ensemble for UFS-Coastal (DATM forcing) -# 6-member atmospheric-only ensemble (no physics perturbation). +# 7-member atmospheric-only ensemble (no physics perturbation). # Member 000 is the control run (GFS+HRRR blended DATM forcing + default params) -# Members 001-004 use GEFS perturbation members (GEFS-only DATM, no HRRR blend) -# Member 005 uses RRFS (3km regridded to regular lat/lon) +# Members 001-005 use GEFS perturbation members (GEFS-only DATM, no HRRR blend) +# Member 006 uses RRFS (3km regridded to regular lat/lon) ensemble: enabled: true - n_members: 6 + n_members: 7 method: gefs seed: 42 # Reproducible sampling # GEFS (Global Ensemble Forecast System) atmospheric forcing configuration @@ -387,7 +387,7 @@ ensemble: # Perturbed members use GEFS-only (no HRRR) to preserve ensemble spread at coast gefs: enabled: true - n_gefs_members: 4 + n_gefs_members: 5 resolution: "0p25" product: "pgrb2sp25" control_member: "gec00" @@ -414,6 +414,10 @@ ensemble: met_source_1: GEFS_04 met_source_2: null "005": + label: "GEFS p05" + met_source_1: GEFS_05 + met_source_2: null + "006": label: "RRFS" met_source_1: RRFS met_source_2: null @@ -422,6 +426,7 @@ ensemble: - GEFS_02 - GEFS_03 - GEFS_04 + - GEFS_05 - RRFS # RRFS (Rapid Refresh Forecast System) atmospheric forcing configuration # 3km resolution, North America domain, available on WCOSS2 at para status diff --git a/pbs/launch_secofs_ensemble.sh b/pbs/launch_secofs_ensemble.sh index 75d5564..ba82e9a 100644 --- a/pbs/launch_secofs_ensemble.sh +++ b/pbs/launch_secofs_ensemble.sh @@ -99,9 +99,9 @@ if [ "${UFS_MODE}" = true ]; then OFS=secofs_ufs fi -# GEFS defaults: 6 members for SECOFS (4 GEFS + 1 RRFS + 1 control) +# GEFS defaults: 7 members for SECOFS (5 GEFS + 1 RRFS + 1 control) if [ "${GEFS_ENSEMBLE}" = true ] && [ "${_USER_SET_MEMBERS}" = false ]; then - N_MEMBERS=6 + N_MEMBERS=7 fi unset _USER_SET_MEMBERS From 1db3799f0b9aa05d2806c27854f09a200d98d664 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 06:32:51 -0400 Subject: [PATCH 25/40] Fix ensemble: patch datm.streams to use member-specific forcing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All ensemble members were copying datm.streams from the DET prep, which contains an absolute path to the DET's datm_forcing.nc. Even though each member staged its own GEFS-specific datm_forcing.nc to $MEMBER_DATA/INPUT/, DATM read from the DET path — producing identical output for all GEFS members. Fix: After staging member-specific forcing files, sed-replace the paths in datm.streams to point to $MEMBER_DATA/INPUT/ instead of the DET's datm_input directory. --- ush/nos_ofs_ensemble_run.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 1cf6dd5..d4c3125 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -598,6 +598,19 @@ print('Added elementMask ({} elements) to existing mesh'.format(n_elems)) fi fi + # Patch datm.streams to point to member-specific forcing files. + # The datm.streams copied from COMOUT contains the DET prep's absolute + # path (e.g., $COMOUT/secofs_ufs.t12z.datm_input/datm_forcing.nc). + # Each ensemble member has its own datm_forcing.nc in $MEMBER_DATA/INPUT/ + # (from the GEFS-specific prep), so we must redirect datm.streams. + if [ -s "${MEMBER_DATA}/datm.streams" ] && [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ]; then + echo "Patching datm.streams to use member-specific INPUT/..." + sed -i "s|\"[^\"]*datm_forcing.nc\"|\"${MEMBER_DATA}/INPUT/datm_forcing.nc\"|g" ${MEMBER_DATA}/datm.streams + sed -i "s|\"[^\"]*datm_esmf_mesh.nc\"|\"${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc\"|g" ${MEMBER_DATA}/datm.streams + echo " stream_data_files01 -> ${MEMBER_DATA}/INPUT/datm_forcing.nc" + echo " stream_mesh_file01 -> ${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" + fi + # Ensure forcing NetCDF has all 8 variables expected by datm.streams. # GEFS pgrb2sp25 lacks SPFH_2maboveground, PRATE_surface, and uses # PRMSL_meansealevel instead of MSLMA_meansealevel. Rather than From 5e6317e0ecf09aab82a3969ef7d06d12663b07b9 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 09:14:56 -0400 Subject: [PATCH 26/40] Fix GEFS ensemble: patch datm_in model_meshfile for member-specific mesh All GEFS ensemble members produced identical output because datm_in model_meshfile and model_maskfile still pointed to the DET's ESMF mesh (1721x1721 GFS+HRRR grid) while the member's forcing uses 1440x721 GEFS grid. The nx_global/ny_global were correctly patched but the mesh file reference was not, causing DATM to decompose forcing incorrectly. Now patch both datm.streams (stream_data_files01, stream_mesh_file01) AND datm_in (model_meshfile, model_maskfile) to point to each member's own INPUT/datm_esmf_mesh.nc and INPUT/datm_forcing.nc. --- ush/nos_ofs_ensemble_run.sh | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index d4c3125..a96cce6 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -598,17 +598,30 @@ print('Added elementMask ({} elements) to existing mesh'.format(n_elems)) fi fi - # Patch datm.streams to point to member-specific forcing files. - # The datm.streams copied from COMOUT contains the DET prep's absolute - # path (e.g., $COMOUT/secofs_ufs.t12z.datm_input/datm_forcing.nc). + # Patch datm.streams AND datm_in to point to member-specific forcing files. + # The datm.streams and datm_in copied from COMOUT contain the DET prep's + # absolute path (e.g., $COMOUT/secofs_ufs.t12z.datm_input/datm_forcing.nc). # Each ensemble member has its own datm_forcing.nc in $MEMBER_DATA/INPUT/ - # (from the GEFS-specific prep), so we must redirect datm.streams. - if [ -s "${MEMBER_DATA}/datm.streams" ] && [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ]; then - echo "Patching datm.streams to use member-specific INPUT/..." - sed -i "s|\"[^\"]*datm_forcing.nc\"|\"${MEMBER_DATA}/INPUT/datm_forcing.nc\"|g" ${MEMBER_DATA}/datm.streams - sed -i "s|\"[^\"]*datm_esmf_mesh.nc\"|\"${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc\"|g" ${MEMBER_DATA}/datm.streams - echo " stream_data_files01 -> ${MEMBER_DATA}/INPUT/datm_forcing.nc" - echo " stream_mesh_file01 -> ${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" + # (from the GEFS-specific prep), so we must redirect both files. + if [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ]; then + echo "Patching datm.streams and datm_in to use member-specific INPUT/..." + + # Patch datm.streams: stream_data_files01 and stream_mesh_file01 + if [ -s "${MEMBER_DATA}/datm.streams" ]; then + sed -i "s|\"[^\"]*datm_forcing.nc\"|\"${MEMBER_DATA}/INPUT/datm_forcing.nc\"|g" ${MEMBER_DATA}/datm.streams + sed -i "s|\"[^\"]*datm_esmf_mesh.nc\"|\"${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc\"|g" ${MEMBER_DATA}/datm.streams + echo " datm.streams: data -> ${MEMBER_DATA}/INPUT/datm_forcing.nc" + echo " datm.streams: mesh -> ${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" + fi + + # Patch datm_in: model_meshfile and model_maskfile + # These also use the DET's @[DATM_INPUT_DIR] path and must point to + # the member's ESMF mesh (which matches the GEFS grid, not DET grid). + if [ -s "${MEMBER_DATA}/datm_in" ] && [ -s "${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" ]; then + sed -i "s|model_meshfile[[:space:]]*=.*|model_meshfile = \"${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc\"|" ${MEMBER_DATA}/datm_in + sed -i "s|model_maskfile[[:space:]]*=.*|model_maskfile = \"${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc\"|" ${MEMBER_DATA}/datm_in + echo " datm_in: meshfile -> ${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" + fi fi # Ensure forcing NetCDF has all 8 variables expected by datm.streams. From e62d8557d268c9e9825dbac4172ef75f559fdb20 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 13:18:24 -0400 Subject: [PATCH 27/40] Cap GFS/GEFS cycle selection at model cycle time for reproducible forcing find_gfs_file() and find_gefs_file() now never use a GFS/GEFS cycle newer than ${PDY}${cyc}. This ensures datm_forcing.nc is identical regardless of when prep runs, matching operational SECOFS behavior. Previously, running prep later would find newer GFS cycles with shorter forecast hours, producing different forcing data and causing UFS-Coastal ensemble control to diverge from operational standalone SECOFS. Override via MAX_GFS_CYCLE env var if needed. --- ush/nosofs/nos_ofs_create_datm_forcing.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ush/nosofs/nos_ofs_create_datm_forcing.sh b/ush/nosofs/nos_ofs_create_datm_forcing.sh index da9b310..7eccf89 100644 --- a/ush/nosofs/nos_ofs_create_datm_forcing.sh +++ b/ush/nosofs/nos_ofs_create_datm_forcing.sh @@ -188,11 +188,14 @@ fi # ============================================================================= # Function: Find GFS file for a valid time # Strategy: Use base cycle with extended forecasts (up to f120 = 5 days) +# Note: MAX_GFS_CYCLE caps the newest GFS cycle to use (default: ${PDY}${cyc}). +# This ensures reproducible file selection regardless of when prep runs. # ============================================================================= find_gfs_file() { local VALID_TIME=$1 local VALID_DATE=$(echo $VALID_TIME | cut -c1-8) local VALID_HH=$(echo $VALID_TIME | cut -c9-10) + local _MAX_CYCLE=${MAX_GFS_CYCLE:-${PDY}${cyc}} # Calculate initial cycle (6 hours before valid time, rounded down to 00/06/12/18) local INIT_CYCLE_TIME=$($NDATE -6 $VALID_TIME) @@ -205,6 +208,14 @@ find_gfs_file() { local CYCLE_DATE=$INIT_DATE local CYCLE_TIME="${CYCLE_DATE}${CYCLE_HH}" + # Cap: never use a GFS cycle newer than the model cycle + if [ "$CYCLE_TIME" -gt "$_MAX_CYCLE" ]; then + local _CAP_HH_NUM=$((10#$(echo $_MAX_CYCLE | cut -c9-10))) + CYCLE_HH=$(printf "%02d" $((_CAP_HH_NUM / 6 * 6))) + CYCLE_DATE=$(echo $_MAX_CYCLE | cut -c1-8) + CYCLE_TIME="${CYCLE_DATE}${CYCLE_HH}" + fi + # Try up to 12 cycles (going back 72 hours) for attempt in 1 2 3 4 5 6 7 8 9 10 11 12; do local FHR=$($NHOUR $VALID_TIME $CYCLE_TIME 2>/dev/null || echo "-1") @@ -233,11 +244,13 @@ find_gfs_file() { # Function: Find GEFS pgrb2sp25 file for a valid time # GEFS path: ${COMINgefs}/gefs.YYYYMMDD/HH/atmos/pgrb2sp25/gepNN.tHHz.pgrb2s.0p25.fFFF # Strategy: Same as GFS — use base cycle with extended forecasts +# Note: MAX_GFS_CYCLE caps the newest cycle to use (default: ${PDY}${cyc}). # ============================================================================= find_gefs_file() { local VALID_TIME=$1 local VALID_DATE=$(echo $VALID_TIME | cut -c1-8) local VALID_HH=$(echo $VALID_TIME | cut -c9-10) + local _MAX_CYCLE=${MAX_GFS_CYCLE:-${PDY}${cyc}} # Calculate initial cycle (6 hours before valid time, rounded down to 00/06/12/18) local INIT_CYCLE_TIME=$($NDATE -6 $VALID_TIME) @@ -249,6 +262,14 @@ find_gefs_file() { local CYCLE_DATE=$INIT_DATE local CYCLE_TIME="${CYCLE_DATE}${CYCLE_HH}" + # Cap: never use a cycle newer than the model cycle + if [ "$CYCLE_TIME" -gt "$_MAX_CYCLE" ]; then + local _CAP_HH_NUM=$((10#$(echo $_MAX_CYCLE | cut -c9-10))) + CYCLE_HH=$(printf "%02d" $((_CAP_HH_NUM / 6 * 6))) + CYCLE_DATE=$(echo $_MAX_CYCLE | cut -c1-8) + CYCLE_TIME="${CYCLE_DATE}${CYCLE_HH}" + fi + # Try up to 12 cycles (going back 72 hours) for attempt in 1 2 3 4 5 6 7 8 9 10 11 12; do local FHR=$($NHOUR $VALID_TIME $CYCLE_TIME 2>/dev/null || echo "-1") @@ -684,6 +705,7 @@ print_files_summary() { # ============================================================================= echo "" echo "Step 1: Finding GRIB2 files for time range..." +echo " Max GFS/GEFS cycle: ${MAX_GFS_CYCLE:-${PDY}${cyc}}" echo "============================================" FILE_COUNT=0 From 54dad70ac5e37af09a30d00f00bcb639856217e2 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 19:53:52 -0400 Subject: [PATCH 28/40] Skip ESMF mesh regeneration for control member to match DET The DET prep creates an ESMF mesh with 1722x1722 nodes (padding row/column) for a 1721x1721 forcing grid. The ensemble script's node count check (mesh_nodes != nx*ny) triggered full mesh regeneration with 1721x1721 nodes, producing different interpolation weights and causing member 000 to diverge from DET despite having identical datm_forcing.nc. Fix: detect control members (is_control_det flag) that stage from the DET prep's datm_input directory, and skip mesh regeneration. Only add elementMask if missing (required by CMEPS). Non-control members (GEFS, RRFS) continue to use mesh regeneration as before. --- ush/nos_ofs_ensemble_run.sh | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index a96cce6..3719932 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -426,6 +426,7 @@ print(ms) " 2>/dev/null || echo "") fi + local is_control_det=false case "${met_source}" in GEFS_*) local gefs_id=$(echo "${met_source}" | sed 's/GEFS_//') @@ -437,6 +438,7 @@ print(ms) *) # Control member (GFS+HRRR) uses the standard prep output datm_dir="${COMOUT}/${RUN}.${cycle}.datm_input" + is_control_det=true ;; esac @@ -489,6 +491,11 @@ ds.close() # Verify ESMF mesh matches forcing dims. If the member's # datm_input dir already had a matching mesh (e.g., from gefs_prep), # no regeneration needed. Only regenerate if mesh node count != nx*ny. + # + # EXCEPTION: Control members (is_control_det=true) use the DET prep's + # mesh+forcing pair, which is validated to match Op SECOFS. The DET's + # ESMF mesh may have a padding row/column (e.g., 1722x1722 nodes for + # 1721x1721 forcing) — this is correct and must NOT be regenerated. local mem_total=$((mem_nx * mem_ny)) local staged_mesh="${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" local mesh_nodes="0" @@ -501,6 +508,32 @@ ds.close() " 2>/dev/null || echo "0") fi + # Control member: trust the DET prep's mesh — skip regen + if [ "${is_control_det}" = "true" ] && [ "${mesh_nodes}" != "0" ]; then + echo "Control member: using DET mesh as-is (${mesh_nodes} nodes, forcing ${mem_nx}x${mem_ny}=${mem_total})" + # Only ensure elementMask exists (required by CMEPS) + local has_emask_ctrl=$(python3 -c " +from netCDF4 import Dataset +ds = Dataset('${staged_mesh}', 'r') +print('true' if 'elementMask' in ds.variables else 'false') +ds.close() +" 2>/dev/null || echo "false") + if [ "${has_emask_ctrl}" != "true" ]; then + echo "Adding elementMask to DET mesh..." + python3 -c " +from netCDF4 import Dataset +import numpy as np +ds = Dataset('${staged_mesh}', 'a') +n_elems = len(ds.dimensions['elementCount']) +em = ds.createVariable('elementMask', 'i4', ('elementCount',)) +em[:] = np.ones(n_elems, dtype=np.int32) +ds.close() +print('Added elementMask ({} elements)'.format(n_elems)) +" 2>&1 + fi + else + # Non-control members: check mesh node count and regenerate if needed + # Also check elementMask exists — meshes generated before the # elementMask fix will pass the nodeCount check but still crash. local has_emask="false" @@ -595,6 +628,8 @@ print('Added elementMask ({} elements) to existing mesh'.format(n_elems)) else echo "ESMF mesh OK: ${mesh_nodes} nodes matches forcing ${mem_nx}x${mem_ny}" fi + + fi # end is_control_det check fi fi From cecc1f01dea0872294ecd3a35f72b761a57ee3eb Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 20:01:21 -0400 Subject: [PATCH 29/40] Always regenerate ESMF mesh from forcing file in ensemble run The DET model run (_comf_execute_ufs_coastal) always regenerates the ESMF mesh from datm_forcing.nc at runtime. The ensemble run was using the prep's mesh from $COMOUT, which is generated by a different method and has different coordinate values. This caused member 000 to diverge from DET despite having identical forcing. Fix: always regenerate the mesh from the forcing file using the same Python code as the DET model run. This ensures identical interpolation weights for all members. The mesh generation code now also handles both coordinate conventions (1D lon/lat arrays and 2D x/y arrays) matching nos_ofs_model_run.sh exactly. Replaces the previous approach of skipping regeneration for control members, which was incorrect. --- ush/nos_ofs_ensemble_run.sh | 109 ++++++------------------------------ 1 file changed, 18 insertions(+), 91 deletions(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 3719932..c605f84 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -488,69 +488,15 @@ ds.close() sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${mem_ny}/" ${MEMBER_DATA}/datm_in echo "Patched datm_in: nx_global=${mem_nx}, ny_global=${mem_ny}" - # Verify ESMF mesh matches forcing dims. If the member's - # datm_input dir already had a matching mesh (e.g., from gefs_prep), - # no regeneration needed. Only regenerate if mesh node count != nx*ny. - # - # EXCEPTION: Control members (is_control_det=true) use the DET prep's - # mesh+forcing pair, which is validated to match Op SECOFS. The DET's - # ESMF mesh may have a padding row/column (e.g., 1722x1722 nodes for - # 1721x1721 forcing) — this is correct and must NOT be regenerated. + # Always regenerate ESMF mesh from the forcing file. + # This matches what _comf_execute_ufs_coastal() does for the DET + # model run (nos_ofs_model_run.sh:1049-1111). The prep may create + # a different mesh (different generation method or coordinate + # convention), so we must regenerate to ensure identical + # interpolation weights between DET and ensemble members. local mem_total=$((mem_nx * mem_ny)) - local staged_mesh="${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" - local mesh_nodes="0" - if [ -s "$staged_mesh" ]; then - mesh_nodes=$(python3 -c " -from netCDF4 import Dataset -ds = Dataset('${staged_mesh}', 'r') -print(len(ds.dimensions.get('nodeCount', ds.dimensions.get('node_count', [])))) -ds.close() -" 2>/dev/null || echo "0") - fi - - # Control member: trust the DET prep's mesh — skip regen - if [ "${is_control_det}" = "true" ] && [ "${mesh_nodes}" != "0" ]; then - echo "Control member: using DET mesh as-is (${mesh_nodes} nodes, forcing ${mem_nx}x${mem_ny}=${mem_total})" - # Only ensure elementMask exists (required by CMEPS) - local has_emask_ctrl=$(python3 -c " -from netCDF4 import Dataset -ds = Dataset('${staged_mesh}', 'r') -print('true' if 'elementMask' in ds.variables else 'false') -ds.close() -" 2>/dev/null || echo "false") - if [ "${has_emask_ctrl}" != "true" ]; then - echo "Adding elementMask to DET mesh..." - python3 -c " -from netCDF4 import Dataset -import numpy as np -ds = Dataset('${staged_mesh}', 'a') -n_elems = len(ds.dimensions['elementCount']) -em = ds.createVariable('elementMask', 'i4', ('elementCount',)) -em[:] = np.ones(n_elems, dtype=np.int32) -ds.close() -print('Added elementMask ({} elements)'.format(n_elems)) -" 2>&1 - fi - else - # Non-control members: check mesh node count and regenerate if needed - - # Also check elementMask exists — meshes generated before the - # elementMask fix will pass the nodeCount check but still crash. - local has_emask="false" - if [ -s "$staged_mesh" ] && [ "${mesh_nodes}" = "${mem_total}" ]; then - has_emask=$(python3 -c " -from netCDF4 import Dataset -ds = Dataset('${staged_mesh}', 'r') -print('true' if 'elementMask' in ds.variables else 'false') -ds.close() -" 2>/dev/null || echo "false") - fi - - if [ "${mesh_nodes}" != "${mem_total}" ]; then - # Node count mismatch — full regeneration needed - echo "ESMF mesh node mismatch: mesh=${mesh_nodes}, forcing=${mem_total} (${mem_nx}x${mem_ny})" - echo "Regenerating ESMF mesh (vectorized)..." - python3 -c " + echo "Regenerating ESMF mesh from forcing (${mem_nx}x${mem_ny} = ${mem_total} nodes)..." + python3 -c " from netCDF4 import Dataset import numpy as np @@ -558,10 +504,13 @@ ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') try: lons = ds.variables['longitude'][:] lats = ds.variables['latitude'][:] - lon2d, lat2d = np.meshgrid(lons, lats) + if lons.ndim == 1: + lon2d, lat2d = np.meshgrid(lons, lats) + else: + lon2d, lat2d = lons, lats except: - lon2d = ds.variables['longitude'][:] - lat2d = ds.variables['latitude'][:] + lon2d = ds.variables['x'][:] + lat2d = ds.variables['y'][:] ds.close() ny, nx = lon2d.shape @@ -579,7 +528,6 @@ nodeCoords.units = 'degrees' coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) nodeCoords[:] = coords -# Vectorized element connectivity (no Python for-loop) j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] n0 = (j_idx * nx + i_idx + 1).ravel() conn = np.column_stack([n0, n0 + 1, n0 + nx + 1, n0 + nx]).astype(np.int32) @@ -601,35 +549,14 @@ clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]- clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) centerCoords[:] = np.column_stack([clon, clat]) -out.title = 'ESMF unstructured mesh for DATM forcing' +out.title = 'ESMF mesh generated from DATM forcing file' out.gridType = 'unstructured mesh' out.close() -print('Generated ESMF mesh: ${mem_nx}x${mem_ny} = {} nodes, {} elements'.format(n_nodes, n_elems)) +print('Generated ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) " 2>&1 - if [ $? -ne 0 ]; then - echo "WARNING: ESMF mesh regeneration failed — model may crash" >&2 - fi - elif [ "${has_emask}" != "true" ]; then - # Node count OK but elementMask missing — lightweight in-place patch - echo "ESMF mesh missing elementMask — patching in-place..." - python3 -c " -from netCDF4 import Dataset -import numpy as np -ds = Dataset('${staged_mesh}', 'a') -n_elems = len(ds.dimensions['elementCount']) -em = ds.createVariable('elementMask', 'i4', ('elementCount',)) -em[:] = np.ones(n_elems, dtype=np.int32) -ds.close() -print('Added elementMask ({} elements) to existing mesh'.format(n_elems)) -" 2>&1 - if [ $? -ne 0 ]; then - echo "WARNING: elementMask patch failed — model may crash" >&2 - fi - else - echo "ESMF mesh OK: ${mesh_nodes} nodes matches forcing ${mem_nx}x${mem_ny}" + if [ $? -ne 0 ]; then + echo "WARNING: ESMF mesh generation failed — model may crash" >&2 fi - - fi # end is_control_det check fi fi From bbbaa2ac90c36980a70358dc9eea3458613b1e36 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 20:27:42 -0400 Subject: [PATCH 30/40] Standardize on SCRIP-based ESMF mesh everywhere, stop regenerating All three mesh generation points now use the prep's SCRIP-based mesh (proc_scrip.py + ESMF_Scrip2Unstruct) instead of inline Python center- based mesh generation. This ensures DET and ensemble member 000 use identical interpolation weights, eliminating the divergence caused by different mesh coordinate conventions. Changes: - JNOS_OFS_ENSEMBLE_ATMOS_PREP: Use SCRIP pipeline for GEFS mesh; preserve control mesh from prep (only add elementMask if missing) - nos_ofs_model_run.sh: Remove Python mesh regeneration from _comf_execute_ufs_coastal(); keep datm_in nx/ny patching and add elementMask check only - nos_ofs_ensemble_run.sh: Remove "always regenerate" mesh block; use prep's staged mesh with elementMask check only --- jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP | 215 +++++++++++++++--------------- ush/nos_ofs_ensemble_run.sh | 81 +++-------- ush/nos_ofs_model_run.sh | 78 +++-------- 3 files changed, 139 insertions(+), 235 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP index 4bae3a2..b6d9a6f 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP +++ b/jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP @@ -597,23 +597,95 @@ else export WGRIB2=${WGRIB2:-$(which wgrib2 2>/dev/null || echo wgrib2)} # --------------------------------------------------------------- - # Pre-generate ESMF mesh for the GEFS 0.25-degree grid ONCE. - # All GEFS members share the same grid, so generate from the - # first available GEFS extraction sample using Python (not - # ESMF_Scrip2Unstruct which fails under parallel cfp). + # Pre-generate ESMF mesh for the GEFS 0.25-degree grid ONCE + # using the standard SCRIP → ESMF_Scrip2Unstruct pipeline. + # All GEFS members share the same grid. This runs BEFORE cfp. # --------------------------------------------------------------- GEFS_MESH=${DATA}/datm_ensemble/gefs_esmf_mesh.nc mkdir -p ${DATA}/datm_ensemble - echo "Pre-generating ESMF mesh for GEFS 0.25-degree grid..." + + # Find ESMF_Scrip2Unstruct + ESMF_SCRIP2UNSTRUCT="${ESMF_SCRIP2UNSTRUCT:-}" + if [ -z "$ESMF_SCRIP2UNSTRUCT" ] || [ ! -x "$ESMF_SCRIP2UNSTRUCT" ]; then + if command -v ESMF_Scrip2Unstruct &> /dev/null; then + ESMF_SCRIP2UNSTRUCT=$(command -v ESMF_Scrip2Unstruct) + else + for esmf_dir in /apps/prod/hpc-stack/*/intel-*/cray-mpich-*/esmf/*/bin; do + if [ -x "${esmf_dir}/ESMF_Scrip2Unstruct" ]; then + ESMF_SCRIP2UNSTRUCT="${esmf_dir}/ESMF_Scrip2Unstruct" + break + fi + done + fi + fi + + # Setup LD_LIBRARY_PATH for ESMF's HDF5/NetCDF dependencies + if [ -n "$ESMF_SCRIP2UNSTRUCT" ] && [ -x "$ESMF_SCRIP2UNSTRUCT" ]; then + echo "Using ESMF_Scrip2Unstruct: $ESMF_SCRIP2UNSTRUCT" + ESMF_DIR=$(cd "$(dirname "$ESMF_SCRIP2UNSTRUCT")/.." 2>/dev/null && pwd) + MPI_DIR=$(cd "$ESMF_DIR/../.." 2>/dev/null && pwd) + COMPILER_DIR=$(cd "$MPI_DIR/.." 2>/dev/null && pwd) + for _lib_pkg in hdf5 netcdf; do + for _lib_dir in "$MPI_DIR"/${_lib_pkg}/*/lib "$COMPILER_DIR"/${_lib_pkg}/*/lib; do + if [ -d "$_lib_dir" ]; then + export LD_LIBRARY_PATH="${_lib_dir}:${LD_LIBRARY_PATH:-}" + break + fi + done + done + else + echo "WARNING: ESMF_Scrip2Unstruct not found — will fall back to Python mesh" >&2 + fi + + # proc_scrip.py location + PYnos=${PYnos:-${USHnos}/python/nos_ofs} + SCRIP_SCRIPT="${PYnos}/datm/proc_scrip.py" + + # Step 1: Create a synthetic GEFS 0.25° grid NetCDF for SCRIP input + GEFS_GRID_NC=${DATA}/datm_ensemble/gefs_grid.nc + GEFS_SCRIP_NC=${DATA}/datm_ensemble/gefs_scrip.nc + echo "Pre-generating ESMF mesh for GEFS 0.25-degree grid (SCRIP method)..." python3 -c " import numpy as np from netCDF4 import Dataset +lons = np.arange(0, 360, 0.25) +lats = np.arange(-90, 90.25, 0.25) + +out = Dataset('${GEFS_GRID_NC}', 'w') +out.createDimension('longitude', len(lons)) +out.createDimension('latitude', len(lats)) +v_lon = out.createVariable('longitude', 'f8', ('longitude',)) +v_lon[:] = lons +v_lon.units = 'degrees_east' +v_lat = out.createVariable('latitude', 'f8', ('latitude',)) +v_lat[:] = lats +v_lat.units = 'degrees_north' +out.close() +print('Created GEFS grid file: {}x{}'.format(len(lons), len(lats))) +" + # Step 2: Generate SCRIP file + if [ -s "$SCRIP_SCRIPT" ] && [ -s "$GEFS_GRID_NC" ]; then + python3 "$SCRIP_SCRIPT" --ifile "$GEFS_GRID_NC" \ + --ofile "$(basename $GEFS_SCRIP_NC)" --odir "$(dirname $GEFS_SCRIP_NC)" + fi + + # Step 3: Create ESMF mesh from SCRIP + if [ -n "$ESMF_SCRIP2UNSTRUCT" ] && [ -x "$ESMF_SCRIP2UNSTRUCT" ] && [ -s "$GEFS_SCRIP_NC" ]; then + $ESMF_SCRIP2UNSTRUCT "$GEFS_SCRIP_NC" "$GEFS_MESH" 0 + fi + + # Verify or fall back to Python method + if [ ! -s "${GEFS_MESH}" ]; then + echo "WARNING: SCRIP method failed for GEFS mesh, falling back to Python..." + python3 -c " +import numpy as np +from netCDF4 import Dataset + lons = np.arange(0, 360, 0.25) lats = np.arange(-90, 90.25, 0.25) nx, ny = len(lons), len(lats) lon2d, lat2d = np.meshgrid(lons, lats) - n_nodes = ny * nx n_elems = (ny - 1) * (nx - 1) @@ -622,142 +694,65 @@ out.createDimension('nodeCount', n_nodes) out.createDimension('elementCount', n_elems) out.createDimension('maxNodePElement', 4) out.createDimension('coordDim', 2) - nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) nodeCoords.units = 'degrees' coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) nodeCoords[:] = coords - -# Vectorized element connectivity (no Python for-loop) j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] n0 = (j_idx * nx + i_idx + 1).ravel() -conn = np.column_stack([n0, n0 + 1, n0 + nx + 1, n0 + nx]).astype(np.int32) - +conn = np.column_stack([n0, n0+1, n0+nx+1, n0+nx]).astype(np.int32) elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) elemConn.long_name = 'Node indices that define the element connectivity' elemConn.start_index = 1 elemConn[:] = conn - numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) numElemConn[:] = 4 - elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) elementMask[:] = np.ones(n_elems, dtype=np.int32) - centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) centerCoords.units = 'degrees' -clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) -clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) +clon = 0.25*(coords[conn[:,0]-1,0]+coords[conn[:,1]-1,0]+coords[conn[:,2]-1,0]+coords[conn[:,3]-1,0]) +clat = 0.25*(coords[conn[:,0]-1,1]+coords[conn[:,1]-1,1]+coords[conn[:,2]-1,1]+coords[conn[:,3]-1,1]) centerCoords[:] = np.column_stack([clon, clat]) - -out.title = 'ESMF mesh for GEFS 0.25-degree global grid' +out.title = 'ESMF mesh for GEFS 0.25-degree global grid (fallback)' out.gridType = 'unstructured mesh' out.close() -print('Generated GEFS ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) +print('Fallback: Generated GEFS ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) " - rc=$? - if [ $rc -ne 0 ] || [ ! -s "${GEFS_MESH}" ]; then - echo "ERROR: Failed to pre-generate GEFS ESMF mesh" >&2 + fi + + if [ ! -s "${GEFS_MESH}" ]; then + echo "ERROR: Failed to generate GEFS ESMF mesh" >&2 exit 1 fi echo "Pre-generated GEFS ESMF mesh: ${GEFS_MESH}" # --------------------------------------------------------------- - # Fix control (det prep) ESMF mesh if it doesn't match forcing dims. - # ESMF_Scrip2Unstruct sometimes produces meshes with different node - # counts than the forcing grid (e.g., 1723x1723 for a 1721x1721 grid). - # Pre-generate the correct mesh here so member 000 never needs to - # regenerate at runtime. + # Control (DET prep) ESMF mesh: use the prep's SCRIP-based mesh + # as-is. The blended forcing pipeline already creates the correct + # mesh via proc_scrip.py + ESMF_Scrip2Unstruct. Do NOT overwrite + # it — the node count difference (e.g., 1722x1722 for 1721x1721 + # forcing) is correct ESMF behavior (corner-based nodes). + # Only add elementMask if missing (required by CMEPS). # --------------------------------------------------------------- CTRL_DATM_DIR=${COMOUT}/${RUN}.${cycle}.datm_input - if [ -s "${CTRL_DATM_DIR}/datm_forcing.nc" ]; then - echo "Checking control ESMF mesh against forcing dims..." + if [ -s "${CTRL_DATM_DIR}/datm_esmf_mesh.nc" ]; then + echo "Control ESMF mesh: using prep's SCRIP-based mesh as-is" python3 -c " -import numpy as np from netCDF4 import Dataset - -# Read forcing grid dims -ds = Dataset('${CTRL_DATM_DIR}/datm_forcing.nc', 'r') -try: - nx = len(ds.dimensions['x']) - ny = len(ds.dimensions['y']) - lons = ds.variables['longitude'][:] - lats = ds.variables['latitude'][:] - if lons.ndim == 1: - lon2d, lat2d = np.meshgrid(lons, lats) - else: - lon2d, lat2d = lons, lats -except: - nx = len(ds.dimensions['longitude']) - ny = len(ds.dimensions['latitude']) - lons = ds.variables['longitude'][:] - lats = ds.variables['latitude'][:] - lon2d, lat2d = np.meshgrid(lons, lats) -ds.close() - -forcing_total = nx * ny - -# Check existing mesh -mesh_path = '${CTRL_DATM_DIR}/datm_esmf_mesh.nc' -try: - mds = Dataset(mesh_path, 'r') - mesh_nodes = len(mds.dimensions.get('nodeCount', mds.dimensions.get('node_count', []))) - mds.close() -except: - mesh_nodes = 0 - -if mesh_nodes == forcing_total: - print('Control ESMF mesh OK: {} nodes matches forcing {}x{}'.format(mesh_nodes, nx, ny)) +import numpy as np +ds = Dataset('${CTRL_DATM_DIR}/datm_esmf_mesh.nc', 'a') +if 'elementMask' not in ds.variables: + n_elems = len(ds.dimensions['elementCount']) + em = ds.createVariable('elementMask', 'i4', ('elementCount',)) + em[:] = np.ones(n_elems, dtype=np.int32) + print('Added elementMask ({} elements) to control mesh'.format(n_elems)) else: - print('Control ESMF mesh mismatch: mesh={} vs forcing={}x{}={}'.format(mesh_nodes, nx, ny, forcing_total)) - print('Regenerating control ESMF mesh...') - n_nodes = ny * nx - n_elems = (ny - 1) * (nx - 1) - - out = Dataset(mesh_path, 'w') - out.createDimension('nodeCount', n_nodes) - out.createDimension('elementCount', n_elems) - out.createDimension('maxNodePElement', 4) - out.createDimension('coordDim', 2) - - nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) - nodeCoords.units = 'degrees' - coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) - nodeCoords[:] = coords - - j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] - n0 = (j_idx * nx + i_idx + 1).ravel() - conn = np.column_stack([n0, n0+1, n0+nx+1, n0+nx]).astype(np.int32) - - elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) - elemConn.long_name = 'Node indices that define the element connectivity' - elemConn.start_index = 1 - elemConn[:] = conn - - numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) - numElemConn[:] = 4 - - elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) - elementMask[:] = np.ones(n_elems, dtype=np.int32) - - centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) - centerCoords.units = 'degrees' - clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) - clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) - centerCoords[:] = np.column_stack([clon, clat]) - - out.title = 'ESMF mesh for control DATM forcing (corrected)' - out.gridType = 'unstructured mesh' - out.close() - print('Generated control ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) + print('Control mesh already has elementMask') +ds.close() " 2>&1 - rc=$? - if [ $rc -ne 0 ]; then - echo "WARNING: Control ESMF mesh check/fix failed (rc=$rc)" >&2 - fi - else - echo "WARNING: Control datm_forcing.nc not found at ${CTRL_DATM_DIR} — mesh not checked" + echo "WARNING: Control datm_esmf_mesh.nc not found at ${CTRL_DATM_DIR}" fi # Build cfp command file for parallel DATM forcing generation diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index c605f84..4f10f6f 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -488,74 +488,27 @@ ds.close() sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${mem_ny}/" ${MEMBER_DATA}/datm_in echo "Patched datm_in: nx_global=${mem_nx}, ny_global=${mem_ny}" - # Always regenerate ESMF mesh from the forcing file. - # This matches what _comf_execute_ufs_coastal() does for the DET - # model run (nos_ofs_model_run.sh:1049-1111). The prep may create - # a different mesh (different generation method or coordinate - # convention), so we must regenerate to ensure identical - # interpolation weights between DET and ensemble members. - local mem_total=$((mem_nx * mem_ny)) - echo "Regenerating ESMF mesh from forcing (${mem_nx}x${mem_ny} = ${mem_total} nodes)..." - python3 -c " + # Ensure ESMF mesh has elementMask (required by CMEPS). + # The prep's SCRIP-based mesh (proc_scrip.py + ESMF_Scrip2Unstruct) + # is the authoritative mesh — do NOT regenerate it here. + # ESMF_Scrip2Unstruct does not add elementMask, so we add it if missing. + local _mesh_file="${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" + if [ -s "${_mesh_file}" ]; then + python3 -c " from netCDF4 import Dataset import numpy as np - -ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') -try: - lons = ds.variables['longitude'][:] - lats = ds.variables['latitude'][:] - if lons.ndim == 1: - lon2d, lat2d = np.meshgrid(lons, lats) - else: - lon2d, lat2d = lons, lats -except: - lon2d = ds.variables['x'][:] - lat2d = ds.variables['y'][:] +ds = Dataset('${_mesh_file}', 'a') +if 'elementMask' not in ds.variables: + n_elems = len(ds.dimensions['elementCount']) + em = ds.createVariable('elementMask', 'i4', ('elementCount',)) + em[:] = np.ones(n_elems, dtype=np.int32) + print('Added elementMask ({} elements) to ESMF mesh'.format(n_elems)) +else: + print('ESMF mesh already has elementMask') ds.close() - -ny, nx = lon2d.shape -n_nodes = ny * nx -n_elems = (ny - 1) * (nx - 1) - -out = Dataset('${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc', 'w') -out.createDimension('nodeCount', n_nodes) -out.createDimension('elementCount', n_elems) -out.createDimension('maxNodePElement', 4) -out.createDimension('coordDim', 2) - -nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) -nodeCoords.units = 'degrees' -coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) -nodeCoords[:] = coords - -j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] -n0 = (j_idx * nx + i_idx + 1).ravel() -conn = np.column_stack([n0, n0 + 1, n0 + nx + 1, n0 + nx]).astype(np.int32) - -elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) -elemConn.long_name = 'Node indices that define the element connectivity' -elemConn.start_index = 1 -elemConn[:] = conn - -numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) -numElemConn[:] = 4 - -elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) -elementMask[:] = np.ones(n_elems, dtype=np.int32) - -centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) -centerCoords.units = 'degrees' -clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) -clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) -centerCoords[:] = np.column_stack([clon, clat]) - -out.title = 'ESMF mesh generated from DATM forcing file' -out.gridType = 'unstructured mesh' -out.close() -print('Generated ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) " 2>&1 - if [ $? -ne 0 ]; then - echo "WARNING: ESMF mesh generation failed — model may crash" >&2 + else + echo "WARNING: ESMF mesh not found at ${_mesh_file}" >&2 fi fi fi diff --git a/ush/nos_ofs_model_run.sh b/ush/nos_ofs_model_run.sh index c8ddb18..d013ed1 100644 --- a/ush/nos_ofs_model_run.sh +++ b/ush/nos_ofs_model_run.sh @@ -1046,71 +1046,27 @@ ds.close() sed -i "s/nx_global[[:space:]]*=.*/nx_global = ${_fnx}/" ${DATA}/datm_in sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${_fny}/" ${DATA}/datm_in - # Regenerate ESMF mesh from forcing file to guarantee consistency. - # This ensures mesh dimensions, elementMask, and coordinates all - # match the actual forcing data — no stale template meshes. - local _ftotal=$((_fnx * _fny)) - echo "Generating ESMF mesh from forcing (${_fnx}x${_fny} = ${_ftotal} nodes)..." - python3 -c " + # Ensure ESMF mesh has elementMask (required by CMEPS). + # The prep's SCRIP-based mesh (proc_scrip.py + ESMF_Scrip2Unstruct) + # is the authoritative mesh — do NOT regenerate it here. + # ESMF_Scrip2Unstruct does not add elementMask, so we add it if missing. + local _mesh_file="${DATA}/${DATM_DIR}/datm_esmf_mesh.nc" + if [ -s "${_mesh_file}" ]; then + python3 -c " from netCDF4 import Dataset import numpy as np - -ds = Dataset('${_forcing_file}', 'r') -try: - lons = ds.variables['longitude'][:] - lats = ds.variables['latitude'][:] - if lons.ndim == 1: - lon2d, lat2d = np.meshgrid(lons, lats) - else: - lon2d, lat2d = lons, lats -except: - lon2d = ds.variables['x'][:] - lat2d = ds.variables['y'][:] +ds = Dataset('${_mesh_file}', 'a') +if 'elementMask' not in ds.variables: + n_elems = len(ds.dimensions['elementCount']) + em = ds.createVariable('elementMask', 'i4', ('elementCount',)) + em[:] = np.ones(n_elems, dtype=np.int32) + print('Added elementMask ({} elements) to ESMF mesh'.format(n_elems)) +else: + print('ESMF mesh already has elementMask') ds.close() - -ny, nx = lon2d.shape -n_nodes = ny * nx -n_elems = (ny - 1) * (nx - 1) - -out = Dataset('${DATA}/${DATM_DIR}/datm_esmf_mesh.nc', 'w') -out.createDimension('nodeCount', n_nodes) -out.createDimension('elementCount', n_elems) -out.createDimension('maxNodePElement', 4) -out.createDimension('coordDim', 2) - -nodeCoords = out.createVariable('nodeCoords', 'f8', ('nodeCount', 'coordDim')) -nodeCoords.units = 'degrees' -coords = np.column_stack([lon2d.ravel(), lat2d.ravel()]) -nodeCoords[:] = coords - -j_idx, i_idx = np.mgrid[0:ny-1, 0:nx-1] -n0 = (j_idx * nx + i_idx + 1).ravel() -conn = np.column_stack([n0, n0 + 1, n0 + nx + 1, n0 + nx]).astype(np.int32) - -elemConn = out.createVariable('elementConn', 'i4', ('elementCount', 'maxNodePElement')) -elemConn.long_name = 'Node indices that define the element connectivity' -elemConn.start_index = 1 -elemConn[:] = conn - -numElemConn = out.createVariable('numElementConn', 'i4', ('elementCount',)) -numElemConn[:] = 4 - -elementMask = out.createVariable('elementMask', 'i4', ('elementCount',)) -elementMask[:] = np.ones(n_elems, dtype=np.int32) - -centerCoords = out.createVariable('centerCoords', 'f8', ('elementCount', 'coordDim')) -centerCoords.units = 'degrees' -clon = 0.25 * (coords[conn[:,0]-1,0] + coords[conn[:,1]-1,0] + coords[conn[:,2]-1,0] + coords[conn[:,3]-1,0]) -clat = 0.25 * (coords[conn[:,0]-1,1] + coords[conn[:,1]-1,1] + coords[conn[:,2]-1,1] + coords[conn[:,3]-1,1]) -centerCoords[:] = np.column_stack([clon, clat]) - -out.title = 'ESMF mesh generated from DATM forcing file' -out.gridType = 'unstructured mesh' -out.close() -print('Generated ESMF mesh: {}x{} = {} nodes, {} elements'.format(nx, ny, n_nodes, n_elems)) " 2>&1 - if [ $? -ne 0 ]; then - echo "WARNING: ESMF mesh generation failed — using existing mesh" >&2 + else + echo "WARNING: ESMF mesh not found at ${_mesh_file}" >&2 fi fi fi From 7fb8b33366afb1b16967ec260acaa2ff65001594 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 21:17:15 -0400 Subject: [PATCH 31/40] Fix ensemble post skip-check: require both fields AND stations The skip-check used || (OR), so having just stations.forecast.nc (from inline post) caused all members to skip combining. Changed to && (AND) so it only skips when both field and station files exist. --- jobs/JNOS_OFS_ENSEMBLE_POST | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_POST b/jobs/JNOS_OFS_ENSEMBLE_POST index cd37d48..a270dc8 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_POST +++ b/jobs/JNOS_OFS_ENSEMBLE_POST @@ -178,8 +178,8 @@ else continue fi - # Skip if already combined (check both field and station files) - if ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 1>/dev/null 2>&1 || \ + # Skip if already combined (require BOTH field AND station files) + if ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.fields.f*.nc 1>/dev/null 2>&1 && \ ls ${MDIR}/${PREFIXNOS}.${cycle}.${PDY}.stations.forecast.nc 1>/dev/null 2>&1; then echo " Member ${MID}: already combined, skipping" continue From 283c8f7aa87114c01b8f018cadc69f2807adf7a9 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 21:22:14 -0400 Subject: [PATCH 32/40] Unify RPTDIR to rpt/${OFS} across all SECOFS PBS scripts Ensemble launcher, member, and atmos_prep PBS scripts used rpt/v3.7.0 while DET UFS scripts used rpt/secofs_ufs. Now all use rpt/${OFS} so logs are in the same directory. --- pbs/jnos_secofs_ensemble_atmos_prep.pbs | 2 +- pbs/jnos_secofs_ensemble_member.pbs | 2 +- pbs/jnos_secofs_forecast_00.pbs | 2 +- pbs/jnos_secofs_nowcast_00.pbs | 2 +- pbs/launch_secofs_ensemble.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pbs/jnos_secofs_ensemble_atmos_prep.pbs b/pbs/jnos_secofs_ensemble_atmos_prep.pbs index 2335de0..c1c27d9 100644 --- a/pbs/jnos_secofs_ensemble_atmos_prep.pbs +++ b/pbs/jnos_secofs_ensemble_atmos_prep.pbs @@ -34,7 +34,7 @@ PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages export OFS=${OFS:-secofs} # Working directory -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0 +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/${OFS} WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR diff --git a/pbs/jnos_secofs_ensemble_member.pbs b/pbs/jnos_secofs_ensemble_member.pbs index 5d2b03e..cf6fd0e 100644 --- a/pbs/jnos_secofs_ensemble_member.pbs +++ b/pbs/jnos_secofs_ensemble_member.pbs @@ -43,7 +43,7 @@ PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages export OFS=${OFS:-secofs} # Working directory -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0 +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/${OFS} WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR diff --git a/pbs/jnos_secofs_forecast_00.pbs b/pbs/jnos_secofs_forecast_00.pbs index 288f464..d90698e 100644 --- a/pbs/jnos_secofs_forecast_00.pbs +++ b/pbs/jnos_secofs_forecast_00.pbs @@ -10,7 +10,7 @@ . /lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/versions/run.ver # Working directory -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0 +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR diff --git a/pbs/jnos_secofs_nowcast_00.pbs b/pbs/jnos_secofs_nowcast_00.pbs index 071a853..a86d308 100644 --- a/pbs/jnos_secofs_nowcast_00.pbs +++ b/pbs/jnos_secofs_nowcast_00.pbs @@ -10,7 +10,7 @@ . /lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/versions/run.ver # Working directory -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0 +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR diff --git a/pbs/launch_secofs_ensemble.sh b/pbs/launch_secofs_ensemble.sh index ba82e9a..377acd9 100644 --- a/pbs/launch_secofs_ensemble.sh +++ b/pbs/launch_secofs_ensemble.sh @@ -166,7 +166,7 @@ if [ "${ATMOS_ENSEMBLE}" = true ] && [ ! -f "${ATMOS_PREP_PBS}" ]; then exit 1 fi -RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0 +RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/${OFS} mkdir -p ${RPTDIR} 2>/dev/null || true # ---- Step 1: Submit prep job (unless --skip-prep) -------------------- From 1094b202b97b628fa8da25dcc4fbae7e8a55ddd4 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 21:25:49 -0400 Subject: [PATCH 33/40] Unify WORKDIR to work/${OFS} across all SECOFS PBS scripts Remove ${nosofs_ver} from WORKDIR paths so all jobs (DET, UFS, ensemble) use the same work directory structure: work/secofs_ufs for UFS, work/secofs for standalone. --- pbs/jnos_secofs_ensemble_atmos_prep.pbs | 2 +- pbs/jnos_secofs_ensemble_member.pbs | 2 +- pbs/jnos_secofs_forecast_00.pbs | 2 +- pbs/jnos_secofs_nowcast_00.pbs | 2 +- pbs/jnos_secofs_ufs_forecast_00.pbs | 2 +- pbs/jnos_secofs_ufs_nowcast_00.pbs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pbs/jnos_secofs_ensemble_atmos_prep.pbs b/pbs/jnos_secofs_ensemble_atmos_prep.pbs index c1c27d9..b62f369 100644 --- a/pbs/jnos_secofs_ensemble_atmos_prep.pbs +++ b/pbs/jnos_secofs_ensemble_atmos_prep.pbs @@ -35,7 +35,7 @@ export OFS=${OFS:-secofs} # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/${OFS} -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR fi diff --git a/pbs/jnos_secofs_ensemble_member.pbs b/pbs/jnos_secofs_ensemble_member.pbs index cf6fd0e..da69971 100644 --- a/pbs/jnos_secofs_ensemble_member.pbs +++ b/pbs/jnos_secofs_ensemble_member.pbs @@ -44,7 +44,7 @@ export OFS=${OFS:-secofs} # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/${OFS} -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR fi diff --git a/pbs/jnos_secofs_forecast_00.pbs b/pbs/jnos_secofs_forecast_00.pbs index d90698e..04ff5cf 100644 --- a/pbs/jnos_secofs_forecast_00.pbs +++ b/pbs/jnos_secofs_forecast_00.pbs @@ -11,7 +11,7 @@ # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/secofs if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR fi diff --git a/pbs/jnos_secofs_nowcast_00.pbs b/pbs/jnos_secofs_nowcast_00.pbs index a86d308..6f0582c 100644 --- a/pbs/jnos_secofs_nowcast_00.pbs +++ b/pbs/jnos_secofs_nowcast_00.pbs @@ -11,7 +11,7 @@ # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/secofs if [ ! -r $WORKDIR ]; then mkdir -p -m 755 $WORKDIR fi diff --git a/pbs/jnos_secofs_ufs_forecast_00.pbs b/pbs/jnos_secofs_ufs_forecast_00.pbs index 38e99b8..50df64d 100644 --- a/pbs/jnos_secofs_ufs_forecast_00.pbs +++ b/pbs/jnos_secofs_ufs_forecast_00.pbs @@ -12,7 +12,7 @@ PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs_ufs +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/secofs_ufs mkdir -p -m 755 $RPTDIR $WORKDIR cd ${WORKDIR} diff --git a/pbs/jnos_secofs_ufs_nowcast_00.pbs b/pbs/jnos_secofs_ufs_nowcast_00.pbs index bbd149f..3da243c 100644 --- a/pbs/jnos_secofs_ufs_nowcast_00.pbs +++ b/pbs/jnos_secofs_ufs_nowcast_00.pbs @@ -12,7 +12,7 @@ PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages # Working directory RPTDIR=/lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs_ufs -WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/secofs_ufs +WORKDIR=/lfs/h1/nos/ptmp/$LOGNAME/work/secofs_ufs mkdir -p -m 755 $RPTDIR $WORKDIR cd ${WORKDIR} From d80c98fe9295c21184d122cbcfd698ba74300da2 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 21:33:11 -0400 Subject: [PATCH 34/40] Unify DATAROOT to work/${OFS} across all SECOFS PBS scripts Remove ${nosofs_ver} from DATAROOT paths to match prep scripts. All jobs now create DATA under work/${OFS}/ instead of some using work/v3.7.0/${OFS}/. --- pbs/jnos_secofs_ensemble_atmos_prep.pbs | 2 +- pbs/jnos_secofs_ensemble_member.pbs | 2 +- pbs/jnos_secofs_forecast_00.pbs | 2 +- pbs/jnos_secofs_nowcast_00.pbs | 2 +- pbs/jnos_secofs_ufs_forecast_00.pbs | 2 +- pbs/jnos_secofs_ufs_nowcast_00.pbs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pbs/jnos_secofs_ensemble_atmos_prep.pbs b/pbs/jnos_secofs_ensemble_atmos_prep.pbs index b62f369..7fa825a 100644 --- a/pbs/jnos_secofs_ensemble_atmos_prep.pbs +++ b/pbs/jnos_secofs_ensemble_atmos_prep.pbs @@ -98,7 +98,7 @@ export COMPATH=/lfs/h1/ops/prod/com/nos export com_ver=v3.7 export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} export COMINnam=/lfs/h1/ops/prod/com/nam/${nam_ver} export COMINhrrr=/lfs/h1/ops/prod/com/hrrr/${hrrr_ver} export COMINrap=/lfs/h1/ops/prod/com/rap/${rap_ver} diff --git a/pbs/jnos_secofs_ensemble_member.pbs b/pbs/jnos_secofs_ensemble_member.pbs index da69971..193b805 100644 --- a/pbs/jnos_secofs_ensemble_member.pbs +++ b/pbs/jnos_secofs_ensemble_member.pbs @@ -131,7 +131,7 @@ export COMPATH=/lfs/h1/ops/prod/com/nos export com_ver=v3.7 export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} export COMINnam=/lfs/h1/ops/prod/com/nam/${nam_ver} export COMINhrrr=/lfs/h1/ops/prod/com/hrrr/${hrrr_ver} export COMINrap=/lfs/h1/ops/prod/com/rap/${rap_ver} diff --git a/pbs/jnos_secofs_forecast_00.pbs b/pbs/jnos_secofs_forecast_00.pbs index 04ff5cf..467520d 100644 --- a/pbs/jnos_secofs_forecast_00.pbs +++ b/pbs/jnos_secofs_forecast_00.pbs @@ -62,7 +62,7 @@ export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages export COMPATH=/lfs/h1/ops/prod/com/nos export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} export COMIN=/lfs/h1/ops/prod/com # Input data paths (production WCOSS2) diff --git a/pbs/jnos_secofs_nowcast_00.pbs b/pbs/jnos_secofs_nowcast_00.pbs index 6f0582c..6dd6ef0 100644 --- a/pbs/jnos_secofs_nowcast_00.pbs +++ b/pbs/jnos_secofs_nowcast_00.pbs @@ -62,7 +62,7 @@ export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages export COMPATH=/lfs/h1/ops/prod/com/nos export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} export COMIN=/lfs/h1/ops/prod/com # Input data paths (production WCOSS2) diff --git a/pbs/jnos_secofs_ufs_forecast_00.pbs b/pbs/jnos_secofs_ufs_forecast_00.pbs index 50df64d..278193d 100644 --- a/pbs/jnos_secofs_ufs_forecast_00.pbs +++ b/pbs/jnos_secofs_ufs_forecast_00.pbs @@ -61,7 +61,7 @@ export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages export COMPATH=/lfs/h1/ops/prod/com/nos export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} export COMIN=/lfs/h1/ops/prod/com # Input data paths (production WCOSS2) diff --git a/pbs/jnos_secofs_ufs_nowcast_00.pbs b/pbs/jnos_secofs_ufs_nowcast_00.pbs index 3da243c..9313cd6 100644 --- a/pbs/jnos_secofs_ufs_nowcast_00.pbs +++ b/pbs/jnos_secofs_ufs_nowcast_00.pbs @@ -61,7 +61,7 @@ export PACKAGEROOT=/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages export COMPATH=/lfs/h1/ops/prod/com/nos export COMROOT=/lfs/h1/nos/ptmp/$LOGNAME/com export DCOMROOT=/lfs/h1/ops/prod/dcom -export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${nosofs_ver}/${OFS} +export DATAROOT=/lfs/h1/nos/ptmp/$LOGNAME/work/${OFS} export COMIN=/lfs/h1/ops/prod/com # Input data paths (production WCOSS2) From a4d04ab6b41edb7377a8e0ae258541b9df96effc Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 22:51:34 -0400 Subject: [PATCH 35/40] Fix silent Python failure: use ncdump for dims, unset LD_PRELOAD The Python netCDF4 dimension reader was failing silently due to LD_PRELOAD=libnetcdff.so conflicts (memory item #6/#8). This caused the entire datm_in patching AND elementMask addition to be skipped. Fix: Use ncdump -h (always available, no LD_PRELOAD issues) to read forcing file dimensions. For elementMask Python, run in subshell with LD_PRELOAD unset. --- ush/nos_ofs_ensemble_run.sh | 52 ++++++++++++++++++++----------------- ush/nos_ofs_model_run.sh | 50 +++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 47 deletions(-) diff --git a/ush/nos_ofs_ensemble_run.sh b/ush/nos_ofs_ensemble_run.sh index 4f10f6f..d27989a 100644 --- a/ush/nos_ofs_ensemble_run.sh +++ b/ush/nos_ofs_ensemble_run.sh @@ -471,30 +471,35 @@ print(ms) # Patch datm_in nx_global/ny_global if member forcing has different grid dims if [ -s "${MEMBER_DATA}/INPUT/datm_forcing.nc" ] && [ -s "${MEMBER_DATA}/datm_in" ]; then - local mem_dims=$(python3 -c " -from netCDF4 import Dataset -ds = Dataset('${MEMBER_DATA}/INPUT/datm_forcing.nc', 'r') -# Try 'x'/'y' dims first (blended output), then 'longitude'/'latitude' (raw GFS/GEFS) -try: - print(len(ds.dimensions['x']), len(ds.dimensions['y'])) -except: - print(len(ds.dimensions['longitude']), len(ds.dimensions['latitude'])) -ds.close() -" 2>/dev/null || echo "") - if [ -n "$mem_dims" ]; then - local mem_nx=$(echo $mem_dims | awk '{print $1}') - local mem_ny=$(echo $mem_dims | awk '{print $2}') + # Use ncdump (always available) instead of Python netCDF4 + # to avoid LD_PRELOAD conflicts with libnetcdff.so + local _ncdump_h=$(ncdump -h "${MEMBER_DATA}/INPUT/datm_forcing.nc" 2>/dev/null || true) + local mem_nx="" mem_ny="" + if [ -n "$_ncdump_h" ]; then + mem_nx=$(echo "$_ncdump_h" | grep -oP '\bx\s*=\s*\K\d+' | head -1) + mem_ny=$(echo "$_ncdump_h" | grep -oP '\by\s*=\s*\K\d+' | head -1) + if [ -z "$mem_nx" ]; then + mem_nx=$(echo "$_ncdump_h" | grep -oP '\blongitude\s*=\s*\K\d+' | head -1) + mem_ny=$(echo "$_ncdump_h" | grep -oP '\blatitude\s*=\s*\K\d+' | head -1) + fi + fi + + if [ -n "$mem_nx" ] && [ -n "$mem_ny" ]; then sed -i "s/nx_global[[:space:]]*=.*/nx_global = ${mem_nx}/" ${MEMBER_DATA}/datm_in sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${mem_ny}/" ${MEMBER_DATA}/datm_in echo "Patched datm_in: nx_global=${mem_nx}, ny_global=${mem_ny}" + else + echo "WARNING: Could not read forcing dims — datm_in not patched" >&2 + fi - # Ensure ESMF mesh has elementMask (required by CMEPS). - # The prep's SCRIP-based mesh (proc_scrip.py + ESMF_Scrip2Unstruct) - # is the authoritative mesh — do NOT regenerate it here. - # ESMF_Scrip2Unstruct does not add elementMask, so we add it if missing. - local _mesh_file="${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" - if [ -s "${_mesh_file}" ]; then - python3 -c " + # Ensure ESMF mesh has elementMask (required by CMEPS). + # The prep's SCRIP-based mesh (proc_scrip.py + ESMF_Scrip2Unstruct) + # is the authoritative mesh — do NOT regenerate it here. + # ESMF_Scrip2Unstruct does not add elementMask, so we add it if missing. + # Unset LD_PRELOAD to avoid libnetcdff.so conflicts with Python netCDF4. + local _mesh_file="${MEMBER_DATA}/INPUT/datm_esmf_mesh.nc" + if [ -s "${_mesh_file}" ]; then + (unset LD_PRELOAD; python3 -c " from netCDF4 import Dataset import numpy as np ds = Dataset('${_mesh_file}', 'a') @@ -506,10 +511,9 @@ if 'elementMask' not in ds.variables: else: print('ESMF mesh already has elementMask') ds.close() -" 2>&1 - else - echo "WARNING: ESMF mesh not found at ${_mesh_file}" >&2 - fi +" 2>&1) + else + echo "WARNING: ESMF mesh not found at ${_mesh_file}" >&2 fi fi diff --git a/ush/nos_ofs_model_run.sh b/ush/nos_ofs_model_run.sh index d013ed1..dca1e58 100644 --- a/ush/nos_ofs_model_run.sh +++ b/ush/nos_ofs_model_run.sh @@ -1024,19 +1024,20 @@ _comf_execute_ufs_coastal() { # ------------------------------------------------------------------ local _forcing_file="${DATA}/${DATM_DIR}/datm_forcing.nc" if [ -s "${_forcing_file}" ] && [ -s "${DATA}/datm_in" ]; then - local _fdims=$(python3 -c " -from netCDF4 import Dataset -ds = Dataset('${_forcing_file}', 'r') -try: - print(len(ds.dimensions['x']), len(ds.dimensions['y'])) -except: - print(len(ds.dimensions['longitude']), len(ds.dimensions['latitude'])) -ds.close() -" 2>/dev/null || echo "") + # Use ncdump (always available) instead of Python netCDF4 + # to avoid LD_PRELOAD conflicts with libnetcdff.so + local _ncdump_h=$(ncdump -h "${_forcing_file}" 2>/dev/null || true) + local _fnx="" _fny="" + if [ -n "$_ncdump_h" ]; then + _fnx=$(echo "$_ncdump_h" | grep -oP '\bx\s*=\s*\K\d+' | head -1) + _fny=$(echo "$_ncdump_h" | grep -oP '\by\s*=\s*\K\d+' | head -1) + if [ -z "$_fnx" ]; then + _fnx=$(echo "$_ncdump_h" | grep -oP '\blongitude\s*=\s*\K\d+' | head -1) + _fny=$(echo "$_ncdump_h" | grep -oP '\blatitude\s*=\s*\K\d+' | head -1) + fi + fi - if [ -n "$_fdims" ]; then - local _fnx=$(echo $_fdims | awk '{print $1}') - local _fny=$(echo $_fdims | awk '{print $2}') + if [ -n "$_fnx" ] && [ -n "$_fny" ]; then local _old_nx=$(grep -oP 'nx_global\s*=\s*\K[0-9]+' ${DATA}/datm_in 2>/dev/null || echo "0") local _old_ny=$(grep -oP 'ny_global\s*=\s*\K[0-9]+' ${DATA}/datm_in 2>/dev/null || echo "0") @@ -1045,14 +1046,18 @@ ds.close() fi sed -i "s/nx_global[[:space:]]*=.*/nx_global = ${_fnx}/" ${DATA}/datm_in sed -i "s/ny_global[[:space:]]*=.*/ny_global = ${_fny}/" ${DATA}/datm_in + else + echo "WARNING: Could not read forcing dims from ${_forcing_file}" >&2 + fi - # Ensure ESMF mesh has elementMask (required by CMEPS). - # The prep's SCRIP-based mesh (proc_scrip.py + ESMF_Scrip2Unstruct) - # is the authoritative mesh — do NOT regenerate it here. - # ESMF_Scrip2Unstruct does not add elementMask, so we add it if missing. - local _mesh_file="${DATA}/${DATM_DIR}/datm_esmf_mesh.nc" - if [ -s "${_mesh_file}" ]; then - python3 -c " + # Ensure ESMF mesh has elementMask (required by CMEPS). + # The prep's SCRIP-based mesh (proc_scrip.py + ESMF_Scrip2Unstruct) + # is the authoritative mesh — do NOT regenerate it here. + # ESMF_Scrip2Unstruct does not add elementMask, so we add it if missing. + # Unset LD_PRELOAD to avoid libnetcdff.so conflicts with Python netCDF4. + local _mesh_file="${DATA}/${DATM_DIR}/datm_esmf_mesh.nc" + if [ -s "${_mesh_file}" ]; then + (unset LD_PRELOAD; python3 -c " from netCDF4 import Dataset import numpy as np ds = Dataset('${_mesh_file}', 'a') @@ -1064,10 +1069,9 @@ if 'elementMask' not in ds.variables: else: print('ESMF mesh already has elementMask') ds.close() -" 2>&1 - else - echo "WARNING: ESMF mesh not found at ${_mesh_file}" >&2 - fi +" 2>&1) + else + echo "WARNING: ESMF mesh not found at ${_mesh_file}" >&2 fi fi From c4aa63a77ec1a0c797c68afc86b4aba0424c62c5 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sat, 14 Mar 2026 23:12:06 -0400 Subject: [PATCH 36/40] Add DATM forcing/mesh verification diagnostic script Checks forcing dims, ESMF mesh consistency (node count, elementMask, coordinates), datm_in nx/ny match, and DET vs member 000 identity. --- ush/python/nos_ofs/ensemble/verify_datm.py | 359 +++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 ush/python/nos_ofs/ensemble/verify_datm.py diff --git a/ush/python/nos_ofs/ensemble/verify_datm.py b/ush/python/nos_ofs/ensemble/verify_datm.py new file mode 100644 index 0000000..605c1a9 --- /dev/null +++ b/ush/python/nos_ofs/ensemble/verify_datm.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python3 +""" +Verify DATM forcing files and ESMF meshes for UFS-Coastal ensemble. + +Checks: + 1. Forcing file dimensions and variables + 2. ESMF mesh consistency (node count, elementMask, coordinates) + 3. datm_in nx_global/ny_global match forcing dims + 4. Member 000 vs DET forcing/mesh identity + 5. GEFS member mesh matches their forcing grid + +Usage: + python3 verify_datm.py --comout /path/to/comout/secofs_ufs.YYYYMMDD + python3 verify_datm.py --comout /path/to/comout/secofs_ufs.YYYYMMDD --cycle t12z + python3 verify_datm.py --workdir /path/to/work/member_000 # check single member workdir +""" + +import argparse +import hashlib +import os +import re +import sys + +try: + from netCDF4 import Dataset + import numpy as np + HAS_NC = True +except ImportError: + HAS_NC = False + + +def md5sum(filepath): + """Compute MD5 checksum of a file.""" + h = hashlib.md5() + with open(filepath, "rb") as f: + for chunk in iter(lambda: f.read(1 << 20), b""): + h.update(chunk) + return h.hexdigest() + + +def get_forcing_dims(nc_path): + """Read forcing file dimensions. Returns (nx, ny, nt, dim_names).""" + ds = Dataset(nc_path, "r") + dims = list(ds.dimensions.keys()) + nx = ny = nt = None + xname = yname = None + + for xn, yn in [("x", "y"), ("longitude", "latitude")]: + if xn in ds.dimensions and yn in ds.dimensions: + nx = len(ds.dimensions[xn]) + ny = len(ds.dimensions[yn]) + xname, yname = xn, yn + break + + if "time" in ds.dimensions: + nt = len(ds.dimensions["time"]) + + variables = sorted(ds.variables.keys()) + ds.close() + return nx, ny, nt, xname, yname, dims, variables + + +def get_mesh_info(nc_path): + """Read ESMF mesh metadata.""" + ds = Dataset(nc_path, "r") + info = {} + info["nodeCount"] = len(ds.dimensions.get("nodeCount", [])) + info["elementCount"] = len(ds.dimensions.get("elementCount", [])) + info["has_elementMask"] = "elementMask" in ds.variables + info["has_centerCoords"] = "centerCoords" in ds.variables + info["has_nodeCoords"] = "nodeCoords" in ds.variables + + if info["has_elementMask"]: + mask = ds.variables["elementMask"][:] + info["mask_min"] = int(np.min(mask)) + info["mask_max"] = int(np.max(mask)) + info["mask_zeros"] = int(np.sum(mask == 0)) + + if info["has_nodeCoords"]: + coords = ds.variables["nodeCoords"][:] + info["lon_min"] = float(np.min(coords[:, 0])) + info["lon_max"] = float(np.max(coords[:, 0])) + info["lat_min"] = float(np.min(coords[:, 1])) + info["lat_max"] = float(np.max(coords[:, 1])) + + info["variables"] = sorted(ds.variables.keys()) + info["dimensions"] = {k: len(v) for k, v in ds.dimensions.items()} + ds.close() + return info + + +def parse_datm_in(filepath): + """Parse datm_in for nx_global and ny_global.""" + nx = ny = None + if not os.path.isfile(filepath): + return nx, ny + with open(filepath) as f: + for line in f: + m = re.search(r"nx_global\s*=\s*(\d+)", line) + if m: + nx = int(m.group(1)) + m = re.search(r"ny_global\s*=\s*(\d+)", line) + if m: + ny = int(m.group(1)) + return nx, ny + + +def check_dir(label, dirpath, expected_scrip_nodes=None): + """Check a single DATM input directory (forcing + mesh + datm_in).""" + print(f"\n{'='*60}") + print(f" {label}") + print(f"{'='*60}") + + forcing = os.path.join(dirpath, "datm_forcing.nc") + mesh = os.path.join(dirpath, "datm_esmf_mesh.nc") + datm_in_candidates = [ + os.path.join(dirpath, "datm_in"), + os.path.join(os.path.dirname(dirpath), "datm_in"), + ] + + errors = [] + info = {"dir": dirpath} + + # --- Forcing file --- + if not os.path.isfile(forcing): + print(f" FORCING: MISSING — {forcing}") + errors.append("forcing file missing") + else: + sz = os.path.getsize(forcing) / (1024 * 1024) + info["forcing_md5"] = md5sum(forcing) + print(f" FORCING: {forcing}") + print(f" Size: {sz:.1f} MB") + print(f" MD5: {info['forcing_md5']}") + + if HAS_NC: + nx, ny, nt, xn, yn, dims, variables = get_forcing_dims(forcing) + info["nx"] = nx + info["ny"] = ny + print(f" Dims: {xn}={nx}, {yn}={ny}, time={nt}") + print(f" Vars: {', '.join(variables)}") + + # Check expected variables for DATM + expected = { + "UGRD_10maboveground", "VGRD_10maboveground", + "TMP_2maboveground", "MSLMA_meansealevel", + } + missing_vars = expected - set(variables) + if missing_vars: + print(f" WARN: Missing expected vars: {missing_vars}") + + # --- ESMF mesh --- + if not os.path.isfile(mesh): + print(f" MESH: MISSING — {mesh}") + errors.append("mesh file missing") + else: + sz = os.path.getsize(mesh) / (1024 * 1024) + info["mesh_md5"] = md5sum(mesh) + print(f" MESH: {mesh}") + print(f" Size: {sz:.1f} MB") + print(f" MD5: {info['mesh_md5']}") + + if HAS_NC: + mi = get_mesh_info(mesh) + info["mesh_info"] = mi + print(f" Nodes: {mi['nodeCount']}") + print(f" Elems: {mi['elementCount']}") + + if mi["has_elementMask"]: + print(f" Mask: min={mi['mask_min']}, max={mi['mask_max']}, " + f"zeros={mi['mask_zeros']}") + if mi["mask_zeros"] > 0: + pct = 100.0 * mi["mask_zeros"] / mi["elementCount"] + print(f" WARN: {mi['mask_zeros']} masked elements " + f"({pct:.1f}%) — check if intentional") + if mi["mask_max"] == 0: + print(f" ERROR: ALL elements masked (mask=0) — " + f"ATM forcing will be zero!") + errors.append("all elements masked") + else: + print(f" Mask: ABSENT (ESMF defaults to all-active)") + + if mi["has_nodeCoords"]: + print(f" Lon: [{mi['lon_min']:.4f}, {mi['lon_max']:.4f}]") + print(f" Lat: [{mi['lat_min']:.4f}, {mi['lat_max']:.4f}]") + + # Check mesh vs forcing consistency + if "nx" in info and info["nx"] is not None: + fnx, fny = info["nx"], info["ny"] + f_total = fnx * fny + # SCRIP mesh: corner-based nodes = (nx+1)*(ny+1) + scrip_nodes = (fnx + 1) * (fny + 1) + # Center-based mesh: nodes = nx*ny + center_nodes = f_total + + if mi["nodeCount"] == scrip_nodes: + print(f" Match: SCRIP corner-based ({fnx+1}x{fny+1} = " + f"{scrip_nodes} nodes) ✓") + elif mi["nodeCount"] == center_nodes: + print(f" Match: Center-based ({fnx}x{fny} = " + f"{center_nodes} nodes)") + print(f" WARN: Not SCRIP method — may differ from prep mesh") + else: + print(f" WARN: Node count {mi['nodeCount']} doesn't match " + f"forcing {fnx}x{fny} (expected SCRIP={scrip_nodes} " + f"or center={center_nodes})") + errors.append("mesh/forcing dimension mismatch") + + # --- datm_in --- + for datm_in_path in datm_in_candidates: + if os.path.isfile(datm_in_path): + din_nx, din_ny = parse_datm_in(datm_in_path) + print(f" DATM_IN: {datm_in_path}") + print(f" nx_global={din_nx}, ny_global={din_ny}") + if HAS_NC and "nx" in info and info["nx"] is not None: + if din_nx != info["nx"] or din_ny != info["ny"]: + print(f" ERROR: Mismatch! forcing={info['nx']}x{info['ny']}, " + f"datm_in={din_nx}x{din_ny}") + errors.append("datm_in dims mismatch") + else: + print(f" Match: datm_in matches forcing dims ✓") + break + + if errors: + print(f"\n *** {len(errors)} ERROR(S): {', '.join(errors)}") + else: + print(f"\n All checks passed ✓") + + return info, errors + + +def main(): + parser = argparse.ArgumentParser( + description="Verify DATM forcing and ESMF mesh for UFS-Coastal ensemble") + parser.add_argument("--comout", help="COMOUT directory (e.g., .../secofs_ufs.20260311)") + parser.add_argument("--cycle", default="t12z", help="Cycle (default: t12z)") + parser.add_argument("--run", default="secofs_ufs", help="RUN prefix (default: secofs_ufs)") + parser.add_argument("--workdir", help="Check a single member work directory") + args = parser.parse_args() + + if not HAS_NC: + print("WARNING: netCDF4 not available — skipping NetCDF checks") + print(" Install: pip install netCDF4") + + all_errors = [] + + if args.workdir: + # Check a single work directory + input_dir = os.path.join(args.workdir, "INPUT") + if os.path.isdir(input_dir): + info, errs = check_dir(f"WorkDir: {args.workdir}", input_dir) + else: + info, errs = check_dir(f"WorkDir: {args.workdir}", args.workdir) + all_errors.extend(errs) + + elif args.comout: + if not os.path.isdir(args.comout): + print(f"ERROR: COMOUT directory not found: {args.comout}") + sys.exit(1) + + prefix = f"{args.run}.{args.cycle}" + + # DET (control prep output) + det_dir = os.path.join(args.comout, f"{prefix}.datm_input") + det_info = None + if os.path.isdir(det_dir): + det_info, errs = check_dir("DET (prep output)", det_dir) + all_errors.extend(errs) + else: + print(f"\nDET datm_input not found: {det_dir}") + + # Ensemble members + member_infos = {} + ens_dir = os.path.join(args.comout, "ensemble") + + # GEFS prep outputs + for suffix in sorted(os.listdir(args.comout)): + if suffix.startswith(f"{prefix}.datm_input_gefs_"): + gefs_id = suffix.split("_gefs_")[-1] + gefs_dir = os.path.join(args.comout, suffix) + info, errs = check_dir(f"GEFS {gefs_id} (prep output)", gefs_dir) + member_infos[f"gefs_{gefs_id}"] = info + all_errors.extend(errs) + + # RRFS prep output + rrfs_dir = os.path.join(args.comout, f"{prefix}.datm_input_rrfs") + if os.path.isdir(rrfs_dir): + info, errs = check_dir("RRFS (prep output)", rrfs_dir) + member_infos["rrfs"] = info + all_errors.extend(errs) + + # Member work directories (if ensemble ran) + if os.path.isdir(ens_dir): + for mdir in sorted(os.listdir(ens_dir)): + if mdir.startswith("member_"): + mid = mdir.replace("member_", "") + input_dir = os.path.join(ens_dir, mdir, "INPUT") + if os.path.isdir(input_dir) and \ + os.path.isfile(os.path.join(input_dir, "datm_forcing.nc")): + info, errs = check_dir( + f"Member {mid} (runtime INPUT)", input_dir) + member_infos[f"member_{mid}"] = info + all_errors.extend(errs) + + # --- Cross-checks --- + print(f"\n{'='*60}") + print(f" Cross-checks") + print(f"{'='*60}") + + # Member 000 vs DET forcing + m000_keys = [k for k in member_infos if k in ("member_000",)] + if det_info and m000_keys: + m000 = member_infos[m000_keys[0]] + if "forcing_md5" in det_info and "forcing_md5" in m000: + if det_info["forcing_md5"] == m000["forcing_md5"]: + print(f" DET vs member_000 forcing: IDENTICAL ✓") + else: + print(f" DET vs member_000 forcing: DIFFER") + print(f" DET: {det_info['forcing_md5']}") + print(f" 000: {m000['forcing_md5']}") + + if "mesh_md5" in det_info and "mesh_md5" in m000: + if det_info["mesh_md5"] == m000["mesh_md5"]: + print(f" DET vs member_000 mesh: IDENTICAL ✓") + else: + print(f" DET vs member_000 mesh: DIFFER") + print(f" DET: {det_info['mesh_md5']}") + print(f" 000: {m000['mesh_md5']}") + all_errors.append("DET and member_000 mesh differ") + + # Check all GEFS members share the same mesh + gefs_meshes = {k: v.get("mesh_md5") for k, v in member_infos.items() + if k.startswith("gefs_") and "mesh_md5" in v} + if len(set(gefs_meshes.values())) == 1 and gefs_meshes: + print(f" GEFS member meshes: ALL IDENTICAL ✓ " + f"({len(gefs_meshes)} members)") + elif len(set(gefs_meshes.values())) > 1: + print(f" GEFS member meshes: DIFFER!") + for k, v in gefs_meshes.items(): + print(f" {k}: {v}") + all_errors.append("GEFS member meshes differ") + + else: + parser.print_help() + sys.exit(1) + + # Summary + print(f"\n{'='*60}") + if all_errors: + print(f" RESULT: {len(all_errors)} error(s) found") + for e in all_errors: + print(f" - {e}") + sys.exit(1) + else: + print(f" RESULT: All checks passed ✓") + sys.exit(0) + + +if __name__ == "__main__": + main() From b48ad31c9f8554ee7518d55569c25da245ee4c9d Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 15 Mar 2026 05:20:43 -0400 Subject: [PATCH 37/40] Fix verify_datm: handle global wrapping grids and PRMSL/MSLMA alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SCRIP meshes for global grids (GEFS 0.25°) wrap in longitude, so node count = nx*(ny+1) not (nx+1)*(ny+1). Also accept PRMSL as equivalent to MSLMA (renamed at runtime by ensemble run script). --- ush/python/nos_ofs/ensemble/verify_datm.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/ush/python/nos_ofs/ensemble/verify_datm.py b/ush/python/nos_ofs/ensemble/verify_datm.py index 605c1a9..eb7b137 100644 --- a/ush/python/nos_ofs/ensemble/verify_datm.py +++ b/ush/python/nos_ofs/ensemble/verify_datm.py @@ -140,13 +140,18 @@ def check_dir(label, dirpath, expected_scrip_nodes=None): print(f" Vars: {', '.join(variables)}") # Check expected variables for DATM + # MSLMA_meansealevel OR PRMSL_meansealevel (renamed at runtime) expected = { "UGRD_10maboveground", "VGRD_10maboveground", - "TMP_2maboveground", "MSLMA_meansealevel", + "TMP_2maboveground", } missing_vars = expected - set(variables) if missing_vars: print(f" WARN: Missing expected vars: {missing_vars}") + has_mslp = ("MSLMA_meansealevel" in variables or + "PRMSL_meansealevel" in variables) + if not has_mslp: + print(f" WARN: Missing MSLMA/PRMSL mean sea level pressure") # --- ESMF mesh --- if not os.path.isfile(mesh): @@ -189,20 +194,26 @@ def check_dir(label, dirpath, expected_scrip_nodes=None): f_total = fnx * fny # SCRIP mesh: corner-based nodes = (nx+1)*(ny+1) scrip_nodes = (fnx + 1) * (fny + 1) + # SCRIP mesh for global wrapping grids: lon wraps so + # no extra column, only extra row for lat corners + scrip_global = fnx * (fny + 1) # Center-based mesh: nodes = nx*ny center_nodes = f_total if mi["nodeCount"] == scrip_nodes: print(f" Match: SCRIP corner-based ({fnx+1}x{fny+1} = " f"{scrip_nodes} nodes) ✓") + elif mi["nodeCount"] == scrip_global: + print(f" Match: SCRIP global-wrap ({fnx}x{fny+1} = " + f"{scrip_global} nodes) ✓") elif mi["nodeCount"] == center_nodes: print(f" Match: Center-based ({fnx}x{fny} = " f"{center_nodes} nodes)") print(f" WARN: Not SCRIP method — may differ from prep mesh") else: print(f" WARN: Node count {mi['nodeCount']} doesn't match " - f"forcing {fnx}x{fny} (expected SCRIP={scrip_nodes} " - f"or center={center_nodes})") + f"forcing {fnx}x{fny} (expected SCRIP={scrip_nodes}, " + f"global-wrap={scrip_global}, or center={center_nodes})") errors.append("mesh/forcing dimension mismatch") # --- datm_in --- From eca248b42dc414a6269f28cf02d216d4b7462baf Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 15 Mar 2026 10:43:42 -0400 Subject: [PATCH 38/40] Add 3h buffer to DATM forcing TIME_END for CDEPS interpolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CDEPS tintalgo=linear needs data records bracketing each model timestep. Without buffer, the last GEFS forcing record (20 of 20) coincided exactly with model end time, causing: shr_stream_findBounds ERROR: limit on and rDateIn gt rDategvd Adding 3h buffer ensures at least one extra record past forecast end. DET already had enough hourly records; this mainly fixes GEFS members which use 3-hourly data (20 records → 21 records). --- ush/nosofs/nos_ofs_create_datm_forcing_blended.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh b/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh index f6e8ede..d94439f 100644 --- a/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh +++ b/ush/nosofs/nos_ofs_create_datm_forcing_blended.sh @@ -138,8 +138,11 @@ echo "" TIME_START=${time_hotstart:-$($NDATE -6 ${PDY}${cyc})} # Add 3h buffer before start TIME_START_BUFFERED=$($NDATE -3 $TIME_START) -# End: cycle + forecast hours -TIME_END=$($NDATE ${NHOURS_FCST} ${PDY}${cyc}) +# End: cycle + forecast hours + 3h buffer for CDEPS linear interpolation +# CDEPS tintalgo=linear needs data records bracketing each model timestep. +# Without buffer, the last forcing record equals the model end time and +# CDEPS fails with "rDateIn gt rDategvd". +TIME_END=$($NDATE $((NHOURS_FCST + 3)) ${PDY}${cyc}) echo "Forcing time range: $TIME_START_BUFFERED to $TIME_END" echo "Primary source: $DATM_PRIMARY_SOURCE" From c975f22786536535cdbaa83f3faf8e06adea1554 Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Sun, 15 Mar 2026 21:12:36 -0400 Subject: [PATCH 39/40] Add schout_*.nc support to ensemble post-processing schism_combine_outputs.py: Add convert_schout_to_split() to auto-detect and convert combined schout_*.nc format to split files (out2d, temperature, salinity, horizontalVelX/Y) before processing. Existing split format (out2d_*.nc) continues to work unchanged. JNOS_OFS_ENSEMBLE_POST: Update file existence checks to also look for schout_1.nc when deciding whether a member has field outputs, and copy FIX files (nv.nc, hgrid.gr3) when schout format is detected. --- jobs/JNOS_OFS_ENSEMBLE_POST | 16 ++- ush/nosofs/schism_combine_outputs.py | 158 +++++++++++++++++++++++++-- 2 files changed, 160 insertions(+), 14 deletions(-) diff --git a/jobs/JNOS_OFS_ENSEMBLE_POST b/jobs/JNOS_OFS_ENSEMBLE_POST index a270dc8..6786492 100644 --- a/jobs/JNOS_OFS_ENSEMBLE_POST +++ b/jobs/JNOS_OFS_ENSEMBLE_POST @@ -185,17 +185,21 @@ else continue fi - # Check for ANY SCHISM outputs (combined out2d OR raw staout) - if [ ! -f "${OUTPUTS_DIR}/out2d_1.nc" ] && [ ! -f "${OUTPUTS_DIR}/staout_1" ]; then - echo " Member ${MID}: no out2d_1.nc or staout_1, skipping" + # Check for ANY SCHISM outputs (split out2d, combined schout, or raw staout) + if [ ! -f "${OUTPUTS_DIR}/out2d_1.nc" ] && \ + [ ! -f "${OUTPUTS_DIR}/schout_1.nc" ] && \ + [ ! -f "${OUTPUTS_DIR}/staout_1" ]; then + echo " Member ${MID}: no out2d_1.nc, schout_1.nc, or staout_1, skipping" continue fi echo " Combining outputs for member ${MID}..." if [ -f "${OUTPUTS_DIR}/out2d_1.nc" ]; then - echo " Mode: field + station files" + echo " Mode: field + station files (split format)" + elif [ -f "${OUTPUTS_DIR}/schout_1.nc" ]; then + echo " Mode: field + station files (combined schout format)" else - echo " Mode: station files only (no out2d_*.nc)" + echo " Mode: station files only (no out2d_*.nc or schout_*.nc)" fi cd ${OUTPUTS_DIR} @@ -205,7 +209,7 @@ else # Handle UFS-Coastal prefix variants: secofs_ufs grid files # may be named secofs.* in FIXofs _FIX_EXTS="station.lat.lon sigma.dat" - if [ -f "${OUTPUTS_DIR}/out2d_1.nc" ]; then + if [ -f "${OUTPUTS_DIR}/out2d_1.nc" ] || [ -f "${OUTPUTS_DIR}/schout_1.nc" ]; then _FIX_EXTS="nv.nc hgrid.gr3 station.lat.lon sigma.dat" fi for _ext in ${_FIX_EXTS}; do diff --git a/ush/nosofs/schism_combine_outputs.py b/ush/nosofs/schism_combine_outputs.py index afa814a..c8e2331 100644 --- a/ush/nosofs/schism_combine_outputs.py +++ b/ush/nosofs/schism_combine_outputs.py @@ -450,6 +450,140 @@ def process_station_files(ctl, dims): print(f"Station file created: {filesta} ({nsta} stations, {nstep} timesteps, {nver} levels)") +def convert_schout_to_split(): + """Convert combined schout_*.nc files to split format (out2d, temperature, etc.). + + SCHISM combined output (schout_*.nc) contains all variables in one file. + This extracts them into the split format that process_field_files() expects: + - out2d_{i}.nc: elevation, windSpeedX/Y, depth, coordinates + - temperature_{i}.nc: temperature + - salinity_{i}.nc: salinity + - horizontalVelX_{i}.nc: horizontalVelX + - horizontalVelY_{i}.nc: horizontalVelY + """ + # Variable name mapping: schout name -> (split file prefix, split var name) + VAR_2D = { + 'elev': ('elevation', 'out2d'), + 'windSpeedX': ('windSpeedX', 'out2d'), + 'windSpeedY': ('windSpeedY', 'out2d'), + } + VAR_3D = { + 'temp': ('temperature', 'temperature'), + 'salt': ('salinity', 'salinity'), + 'horizontalVelX': ('horizontalVelX', 'horizontalVelX'), + 'horizontalVelY': ('horizontalVelY', 'horizontalVelY'), + } + + for i in range(1, 999): + schout_file = f"schout_{i}.nc" + if not os.path.exists(schout_file): + break + + print(f" Splitting {schout_file}...") + ds = Dataset(schout_file, 'r') + + # Identify available variables + schout_vars = list(ds.variables.keys()) + time_data = ds.variables['time'][:] + nstep = len(time_data) + + # Get node dimension name + node_dim = None + for dname in ['nSCHISM_hgrid_node', 'node', 'nod2']: + if dname in ds.dimensions: + node_dim = dname + break + if node_dim is None: + # Fallback: use first spatial dimension of elevation + for vn in ['elev', 'elevation']: + if vn in ds.variables: + node_dim = ds.variables[vn].dimensions[-1] + break + + n_nodes = len(ds.dimensions[node_dim]) if node_dim else 0 + + # --- Create out2d_{i}.nc --- + out2d_file = f"out2d_{i}.nc" + ds_out = Dataset(out2d_file, 'w', format='NETCDF4') + + # Copy dimensions + ds_out.createDimension('time', None) + ds_out.createDimension('nSCHISM_hgrid_node', n_nodes) + + # Time + tv = ds_out.createVariable('time', 'f8', ('time',)) + tv[:] = time_data + if hasattr(ds.variables['time'], 'units'): + tv.units = ds.variables['time'].units + + # Elevation + for src_name in ['elev', 'elevation']: + if src_name in ds.variables: + elev = ds_out.createVariable('elevation', 'f4', + ('time', 'nSCHISM_hgrid_node')) + elev[:] = ds.variables[src_name][:] + break + + # Wind (optional) + for wvar in ['windSpeedX', 'windSpeedY']: + if wvar in ds.variables: + wv = ds_out.createVariable(wvar, 'f4', + ('time', 'nSCHISM_hgrid_node')) + wv[:] = ds.variables[wvar][:] + + # Coordinates and depth (from first schout only, or from schout if present) + for coord in ['SCHISM_hgrid_node_x', 'SCHISM_hgrid_node_y']: + if coord in ds.variables: + cv = ds_out.createVariable(coord, 'f8', ('nSCHISM_hgrid_node',)) + cv[:] = ds.variables[coord][:] + if 'depth' in ds.variables: + dv = ds_out.createVariable('depth', 'f4', ('nSCHISM_hgrid_node',)) + dv[:] = ds.variables['depth'][:] + + ds_out.close() + + # --- Create 3D split files --- + # Determine vertical dimension + vert_dim = None + for dname in ['nSCHISM_vgrid_layers', 'nVert', 'sigma']: + if dname in ds.dimensions: + vert_dim = dname + break + + n_vert = len(ds.dimensions[vert_dim]) if vert_dim else 0 + + for src_name, (split_var, split_prefix) in VAR_3D.items(): + if src_name not in ds.variables: + continue + split_file = f"{split_prefix}_{i}.nc" + ds_split = Dataset(split_file, 'w', format='NETCDF4') + + ds_split.createDimension('time', None) + ds_split.createDimension('nSCHISM_hgrid_node', n_nodes) + if n_vert > 0: + ds_split.createDimension('nSCHISM_vgrid_layers', n_vert) + + tv = ds_split.createVariable('time', 'f8', ('time',)) + tv[:] = time_data + + if n_vert > 0: + var = ds_split.createVariable( + split_var, 'f4', + ('time', 'nSCHISM_hgrid_layers', 'nSCHISM_hgrid_node') + if f'nSCHISM_hgrid_layers' in ds.variables[src_name].dimensions + else ('time', 'nSCHISM_vgrid_layers', 'nSCHISM_hgrid_node')) + var[:] = ds.variables[src_name][:] + else: + var = ds_split.createVariable(split_var, 'f4', + ('time', 'nSCHISM_hgrid_node')) + var[:] = ds.variables[src_name][:] + + ds_split.close() + + ds.close() + print(f" -> {out2d_file} + 3D split files created") + + def main(): print("=" * 60) print("SCHISM Combine Outputs — CO-OPS Standard NetCDF Products") @@ -464,32 +598,40 @@ def main(): print(f" TimeStart: {ctl['timestart']}") # Check what output files are available - has_fields = os.path.exists("out2d_1.nc") + has_split_fields = os.path.exists("out2d_1.nc") + has_schout = os.path.exists("schout_1.nc") + has_fields = has_split_fields or has_schout has_stations = os.path.exists("staout_1") if not has_fields and not has_stations: - print("ERROR: No output files found (neither out2d_1.nc nor staout_1)") + print("ERROR: No output files found (neither out2d_1.nc, schout_1.nc, nor staout_1)") sys.exit(1) - if has_fields: - print("\n Field output files detected (out2d_*.nc)") + if has_split_fields: + print("\n Field output files detected (out2d_*.nc — split format)") + elif has_schout: + print("\n Field output files detected (schout_*.nc — combined format)") + # Convert schout to split format for uniform processing + print(" Converting schout_*.nc to split format...") + convert_schout_to_split() + has_split_fields = True else: - print("\n No field output files (out2d_*.nc) — stations-only mode") + print("\n No field output files (out2d_*.nc / schout_*.nc) — stations-only mode") if has_stations: print(f" Station output files detected (staout_*)") # Get grid dimensions dynamically print("\nReading grid dimensions...") - dims = get_grid_dimensions(ctl['PREFIXNOS'], fields_available=has_fields) - if has_fields: + dims = get_grid_dimensions(ctl['PREFIXNOS'], fields_available=has_split_fields) + if has_split_fields: print(f" Nodes: {dims['n_nodes']}") print(f" Elements: {dims['n_elements']}") print(f" Stations: {dims['n_stations']}") print(f" Levels: {dims['n_levels']}") # Process field files (only if out2d_*.nc available) - if has_fields: + if has_split_fields: print("\n--- Processing Field Files ---") process_field_files(ctl, dims) else: From 41f034da5331c809ac0c70803caf527d237a8eac Mon Sep 17 00:00:00 2001 From: mansurjisan Date: Mon, 16 Mar 2026 09:50:30 -0400 Subject: [PATCH 40/40] Remove docs directory (local reference only, not for repo) --- docs/INTEGRATION_GUIDE.md | 643 ------------------------- docs/secofs_rrfs_implementation.md | 188 -------- docs/stofs3datl_rrfs_implementation.md | 365 -------------- docs/stofs_3d_atl_v311_changes.md | 399 --------------- 4 files changed, 1595 deletions(-) delete mode 100644 docs/INTEGRATION_GUIDE.md delete mode 100644 docs/secofs_rrfs_implementation.md delete mode 100644 docs/stofs3datl_rrfs_implementation.md delete mode 100644 docs/stofs_3d_atl_v311_changes.md diff --git a/docs/INTEGRATION_GUIDE.md b/docs/INTEGRATION_GUIDE.md deleted file mode 100644 index 17d490b..0000000 --- a/docs/INTEGRATION_GUIDE.md +++ /dev/null @@ -1,643 +0,0 @@ -# NOS OFS Unified Package Integration Guide - -This guide describes the recommended approach for integrating the Python-based YAML configuration system into the production nosofs.v3.7.0 workflow. - -## Overview - -The integration follows a **minimal-change approach** that: -- Adds YAML configuration support at the script level -- Maintains full backward compatibility with legacy `.ctl` files -- Requires no changes to ecFlow, J-jobs, or USH scripts -- Allows parallel testing of YAML vs .ctl configurations - ---- - -## Current Production Workflow - -### Flow Diagram - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ PRODUCTION FLOW (nosofs.v3.7.0) │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ pbs/jnos_secofs_prep_00.pbs │ -│ ├── Sources: versions/run.ver │ -│ ├── Loads modules (python, wgrib2, netcdf, etc.) │ -│ ├── Sets environment: OFS=secofs, cyc=00 │ -│ └── Calls: jobs/JNOS_OFS_PREP │ -│ │ │ -│ ▼ │ -│ jobs/JNOS_OFS_PREP │ -│ ├── Sets paths: HOMEnos, FIXofs, USHnos, etc. │ -│ ├── Creates directories: DATA, COMOUT │ -│ └── Calls: scripts/exnos_ofs_prep.sh │ -│ │ │ -│ ▼ │ -│ scripts/exnos_ofs_prep.sh │ -│ ├── Sources: ${FIXofs}/secofs.ctl ← CONFIGURATION LOADED HERE │ -│ ├── Calls: ush/nos_ofs_launch.sh │ -│ ├── Calls: ush/nos_ofs_create_forcing_met.sh │ -│ ├── Calls: ush/nos_ofs_create_forcing_river.sh │ -│ └── Calls: ush/nos_ofs_create_forcing_obc.sh │ -│ │ │ -│ ▼ │ -│ ush/nos_ofs_create_forcing_*.sh │ -│ └── Uses environment variables: $MINLON, $MAXLON, $DBASE_MET_NOW, etc. │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Key Insight - -The USH scripts don't care where variables come from - they just use environment variables like `$MINLON`, `$MAXLON`, `$DBASE_MET_NOW`. This means we only need to change HOW variables are loaded, not the scripts that USE them. - ---- - -## Recommended Integration Approach - -### Architecture - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ INTEGRATED FLOW (with YAML support) │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ PBS Script → OPTIONAL: Add OFS_CONFIG env var │ -│ J-Job → NO CHANGE │ -│ Script → ADD: YAML loading with .ctl fallback │ -│ USH Scripts → NO CHANGE (use env vars as before) │ -│ │ -│ Configuration Priority: │ -│ 1. OFS_CONFIG env var (YAML) │ -│ 2. ${FIXofs}/${PREFIXNOS}.yaml │ -│ 3. ${FIXofs}/${PREFIXNOS}.ctl (legacy fallback) │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Components to Change - -| Component | Change Required | Risk Level | -|-----------|-----------------|------------| -| ecf/ | None | - | -| jobs/ | None | - | -| scripts/exnos_ofs_prep.sh | Add YAML loading | Low | -| scripts/exnos_ofs_nowcast_forecast.sh | Add YAML loading | Low | -| ush/*.sh | None | - | -| fix/secofs/secofs.ctl | None (keep for fallback) | - | - -### Components to Add - -| Component | Purpose | -|-----------|---------| -| ush/python/nos_ofs/ | Python package for YAML processing | -| ush/nos_ofs_config.sh | Helper script for YAML loading | -| parm/systems/secofs.yaml | YAML configuration for SECOFS | -| parm/base/schism.yaml | Base SCHISM configuration | - ---- - -## Complete File List for WCOSS2 Testing - -### Files to Copy - -The following files from the unified package need to be copied to your production installation: - -```bash -# Set your package locations -UNIFIED_PKG=/path/to/nos-workflow/nos_ofs # Cloned unified package -PROD_DIR=/path/to/nosofs.v3.7.0 # Production installation - -# 1. Python package (REQUIRED) -mkdir -p ${PROD_DIR}/ush/python -cp -r ${UNIFIED_PKG}/ush/python/nos_ofs ${PROD_DIR}/ush/python/ - -# 2. Helper script for config loading (REQUIRED) -cp ${UNIFIED_PKG}/ush/nosofs/nos_ofs_config.sh ${PROD_DIR}/ush/ - -# 3. YAML configuration files (REQUIRED) -mkdir -p ${PROD_DIR}/parm/systems -mkdir -p ${PROD_DIR}/parm/base -cp ${UNIFIED_PKG}/parm/systems/secofs.yaml ${PROD_DIR}/parm/systems/ -cp ${UNIFIED_PKG}/parm/base/schism.yaml ${PROD_DIR}/parm/base/ - -# 4. Modified shell scripts (BACKUP ORIGINALS FIRST) -cp ${PROD_DIR}/scripts/exnos_ofs_prep.sh ${PROD_DIR}/scripts/exnos_ofs_prep.sh.backup -cp ${PROD_DIR}/scripts/exnos_ofs_nowcast_forecast.sh ${PROD_DIR}/scripts/exnos_ofs_nowcast_forecast.sh.backup -cp ${PROD_DIR}/scripts/exnos_ofs_obs.sh ${PROD_DIR}/scripts/exnos_ofs_obs.sh.backup -cp ${PROD_DIR}/scripts/exnos_ofs_continue_forecast.sh ${PROD_DIR}/scripts/exnos_ofs_continue_forecast.sh.backup - -cp ${UNIFIED_PKG}/scripts/nosofs/exnos_ofs_prep.sh ${PROD_DIR}/scripts/ -cp ${UNIFIED_PKG}/scripts/nosofs/exnos_ofs_nowcast_forecast.sh ${PROD_DIR}/scripts/ -cp ${UNIFIED_PKG}/scripts/nosofs/exnos_ofs_obs.sh ${PROD_DIR}/scripts/ -cp ${UNIFIED_PKG}/scripts/nosofs/exnos_ofs_continue_forecast.sh ${PROD_DIR}/scripts/ -``` - -### YAML Loading Methods - -The modified scripts use two different approaches for loading YAML configuration: - -| Script | YAML Loading Method | Helper Used | -|--------|---------------------|-------------| -| `exnos_ofs_prep.sh` | Sources helper script | `${USHnos}/nos_ofs_config.sh` | -| `exnos_ofs_nowcast_forecast.sh` | Direct Python call | `python3 -m nos_ofs.cli export-env` | -| `exnos_ofs_obs.sh` | Direct Python call | `python3 -m nos_ofs.cli export-env` | -| `exnos_ofs_continue_forecast.sh` | Direct Python call | `python3 -m nos_ofs.cli export-env` | - -Both approaches: -- Follow the same priority: `OFS_CONFIG` env var → `FIXofs` YAML → `FIXofs` .ctl -- Fall back to legacy `.ctl` if YAML loading fails -- Export the same environment variables - -### Safety Note - -The `.ctl` fallback ensures that if YAML loading fails for any reason (missing Python package, parse error, etc.), the scripts will automatically fall back to the original `secofs.ctl` configuration. This makes testing safe - the workflow will continue to work even if YAML integration has issues. - ---- - -## Step-by-Step Integration - -### Step 1: Add Python Package - -Copy the `nos_ofs` Python package to the production installation: - -```bash -# From unified package -cp -r nos_ofs/ush/python/nos_ofs ${HOMEnos}/ush/python/ - -# Verify structure -ls ${HOMEnos}/ush/python/nos_ofs/ -# Should show: __init__.py, cli.py, config/, forcing/, models/, utils/, etc. -``` - -### Step 2: Add YAML Configurations - -```bash -# Create parm directories -mkdir -p ${HOMEnos}/parm/systems -mkdir -p ${HOMEnos}/parm/base - -# Copy YAML configs -cp nos_ofs/parm/systems/secofs.yaml ${HOMEnos}/parm/systems/ -cp nos_ofs/parm/base/schism.yaml ${HOMEnos}/parm/base/ -``` - -### Step 3: Update Script (exnos_ofs_prep.sh) - -Replace the configuration loading section in `scripts/exnos_ofs_prep.sh`: - -**Original (lines 24-36):** -```bash -# Control Files For Model Run -if [ -s ${FIXofs}/${PREFIXNOS}.ctl ] -then - . ${FIXofs}/${PREFIXNOS}.ctl -else - echo "${RUN} control file is not found" - ... -fi -``` - -**Updated:** -```bash -############################################################################## -# CONFIGURATION LOADING -# Priority: 1. OFS_CONFIG env var (YAML), 2. FIXofs YAML, 3. FIXofs .ctl -############################################################################## - -CONFIG_LOADED=0 - -# Find Python package location for YAML support -for search_path in \ - "${HOMEnos}/ush/python" \ - "$(dirname $0)/../ush/python" -do - if [ -d "$search_path/nos_ofs" ]; then - export PYnos_ofs="$search_path" - break - fi -done - -# Option 1: Load from OFS_CONFIG environment variable (YAML) -if [ "$CONFIG_LOADED" -eq 0 ] && [ -n "$OFS_CONFIG" ] && [ -f "$OFS_CONFIG" ]; then - echo "Attempting to load YAML config from OFS_CONFIG: $OFS_CONFIG" - if [ -n "$PYnos_ofs" ]; then - export PYTHONPATH="${PYnos_ofs}:${PYTHONPATH}" - yaml_exports=$(python3 -m nos_ofs.cli export-env --config "$OFS_CONFIG" --framework comf 2>/dev/null) - if [ $? -eq 0 ] && [ -n "$yaml_exports" ]; then - eval "$yaml_exports" - CONFIG_LOADED=1 - echo "Configuration loaded from YAML: $OFS_CONFIG" - else - echo "WARNING: Failed to parse YAML config: $OFS_CONFIG" - fi - fi -fi - -# Option 2: Try to find YAML config in FIXofs -if [ "$CONFIG_LOADED" -eq 0 ] && [ -n "$PYnos_ofs" ] && [ -f "${FIXofs}/${PREFIXNOS}.yaml" ]; then - echo "Attempting to load YAML config from FIXofs: ${FIXofs}/${PREFIXNOS}.yaml" - export PYTHONPATH="${PYnos_ofs}:${PYTHONPATH}" - yaml_exports=$(python3 -m nos_ofs.cli export-env --config "${FIXofs}/${PREFIXNOS}.yaml" --framework comf 2>/dev/null) - if [ $? -eq 0 ] && [ -n "$yaml_exports" ]; then - eval "$yaml_exports" - export OFS_CONFIG="${FIXofs}/${PREFIXNOS}.yaml" - CONFIG_LOADED=1 - echo "Configuration loaded from YAML: ${FIXofs}/${PREFIXNOS}.yaml" - fi -fi - -# Option 3: Fall back to legacy .ctl file -if [ "$CONFIG_LOADED" -eq 0 ]; then - if [ -s ${FIXofs}/${PREFIXNOS}.ctl ]; then - . ${FIXofs}/${PREFIXNOS}.ctl - CONFIG_LOADED=1 - echo "Configuration loaded from legacy .ctl: ${FIXofs}/${PREFIXNOS}.ctl" - fi -fi - -# Verify configuration was loaded -if [ "$CONFIG_LOADED" -eq 0 ]; then - echo "${RUN} control file is not found, FATAL ERROR!" - echo "please provide ${RUN} config: ${PREFIXNOS}.yaml or ${PREFIXNOS}.ctl in ${FIXofs}" - msg="${RUN} control file is not found, FATAL ERROR!" - postmsg "$jlogfile" "$msg" - err_chk -fi - -echo "Configuration source: $([ -n \"$OFS_CONFIG\" ] && echo 'YAML' || echo '.ctl')" -``` - -### Step 4: Create Test PBS Script (Optional) - -Create `pbs/jnos_secofs_prep_00_yaml.pbs` for testing: - -```bash -#!/bin/bash -#PBS -N secofs_prep_00_yaml -#PBS -A NOSOFS-DEV -#PBS -q dev -#PBS -o /lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0/secofs_prep_00_yaml.out -#PBS -e /lfs/h1/nos/ptmp/$LOGNAME/rpt/v3.7.0/secofs_prep_00_yaml.err -#PBS -l place=vscatter,select=1:ncpus=128:mpiprocs=128 -#PBS -l walltime=1:30:00 - -. /lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.v3.7.0/versions/run.ver - -# ... (same module loads as original) ... - -# === YAML CONFIGURATION === -export PYnos=${PACKAGEROOT}/nosofs.${nosofs_ver}/ush/python -export PYTHONPATH=${PYnos}:${PYTHONPATH} -export OFS_CONFIG=${PACKAGEROOT}/nosofs.${nosofs_ver}/parm/systems/secofs.yaml -# === END YAML CONFIGURATION === - -export envir=dev -export OFS=secofs -export cyc=00 -# ... (rest same as original) ... - -/lfs/h1/nos/estofs/noscrub/$LOGNAME/packages/nosofs.${nosofs_ver}/jobs/JNOS_OFS_PREP -``` - ---- - -## Testing Procedure - -### Test 1: Verify Python Package - -```bash -module load python/3.12.0 -export PYTHONPATH=${HOMEnos}/ush/python:$PYTHONPATH - -# Test import -python3 -c "from nos_ofs.cli import main; print('OK')" - -# Test YAML export -python3 -m nos_ofs.cli export-env \ - --config ${HOMEnos}/parm/systems/secofs.yaml \ - --framework comf | head -20 -``` - -### Test 2: Verify Variable Values - -```bash -# Export and verify -eval $(python3 -m nos_ofs.cli export-env \ - --config ${HOMEnos}/parm/systems/secofs.yaml \ - --framework comf) - -# Check key variables match secofs.ctl -echo "MINLON=$MINLON" # Should be -88.0 -echo "MAXLON=$MAXLON" # Should be -63.0 -echo "OCEAN_MODEL=$OCEAN_MODEL" # Should be SCHISM -echo "TOTAL_TASKS=$TOTAL_TASKS" # Should be 1200 -``` - -### Test 3: Parallel Comparison - -```bash -# Run with .ctl (baseline) -unset OFS_CONFIG -qsub jnos_secofs_prep_00.pbs -# Save output to baseline/ - -# Run with YAML -qsub jnos_secofs_prep_00_yaml.pbs -# Save output to yaml_test/ - -# Compare forcing files -diff baseline/ yaml_test/ -``` - ---- - -## Directory Structure After Integration - -``` -nosofs.v3.7.0/ -├── ecf/ # NO CHANGE -├── jobs/ -│ └── JNOS_OFS_PREP # NO CHANGE -├── scripts/ -│ ├── exnos_ofs_prep.sh # UPDATED (YAML support) -│ ├── exnos_ofs_nowcast_forecast.sh # UPDATED (YAML support) -│ ├── exnos_ofs_obs.sh # UPDATED (YAML support) -│ └── exnos_ofs_continue_forecast.sh # UPDATED (YAML support) -├── ush/ -│ ├── nos_ofs_config.sh # NEW (helper for YAML loading) -│ ├── nos_ofs_create_forcing_*.sh # NO CHANGE -│ └── python/ # NEW -│ └── nos_ofs/ # Python package -│ ├── __init__.py -│ ├── cli.py -│ ├── config/ -│ ├── forcing/ -│ └── utils/ -├── parm/ # NEW/UPDATED -│ ├── base/ -│ │ └── schism.yaml -│ └── systems/ -│ ├── secofs.yaml -│ └── stofs_3d_atl.yaml -├── fix/ -│ └── secofs/ -│ └── secofs.ctl # KEEP (fallback) -├── pbs/ -│ ├── jnos_secofs_prep_00.pbs # ORIGINAL -│ └── jnos_secofs_prep_00_yaml.pbs # NEW (for testing) -└── versions/ - └── run.ver # NO CHANGE -``` - ---- - -## YAML vs .ctl Configuration Mapping - -| .ctl Variable | YAML Path | Value | -|---------------|-----------|-------| -| `MINLON` | `grid.domain.lon_min` | -88.0 | -| `MAXLON` | `grid.domain.lon_max` | -63.0 | -| `MINLAT` | `grid.domain.lat_min` | 17.0 | -| `MAXLAT` | `grid.domain.lat_max` | 40.0 | -| `OCEAN_MODEL` | `model.ocean_model` | SCHISM | -| `DBASE_MET_NOW` | `forcing.atmospheric.primary` | GFS | -| `DBASE_MET_FOR` | `forcing.atmospheric.forecast_source` | GFS | -| `TOTAL_TASKS` | `resources.nprocs` | 1200 | -| `DELT_MODEL` | `model.physics.dt` | 120.0 | -| `LEN_FORECAST` | `model.run.forecast_days × 24` | 48 | - ---- - -## Rollback Procedure - -If issues arise, rollback is simple: - -```bash -# Option 1: Unset OFS_CONFIG (falls back to .ctl) -unset OFS_CONFIG - -# Option 2: Restore original script -cp scripts/exnos_ofs_prep.sh.backup scripts/exnos_ofs_prep.sh - -# Option 3: Use original PBS script -qsub jnos_secofs_prep_00.pbs # Uses .ctl by default -``` - ---- - -## Benefits of YAML Integration - -| Benefit | Description | -|---------|-------------| -| **Structured Config** | Hierarchical organization vs flat exports | -| **Inheritance** | Base configs reduce duplication | -| **Validation** | Schema validation possible | -| **Documentation** | Self-documenting with comments | -| **Version Control** | Easier diff/merge than .ctl | -| **Future-Ready** | Foundation for Python-native processors | - ---- - -## Integration Approaches: Current vs Hybrid vs Pure Python - -There are three possible approaches for integrating the unified package: - -### Approach 1: YAML-to-Shell Bridge (Current/Short-term) - -``` -┌─────────────┐ ┌──────────────┐ ┌─────────────────┐ -│ YAML Config │ --> │ export-env │ --> │ Shell Scripts │ -│ (secofs.yaml) │ (Python CLI) │ │ (unchanged USH) │ -└─────────────┘ └──────────────┘ └─────────────────┘ -``` - -**How it works:** -- YAML configuration is converted to shell environment variables -- Existing USH scripts run unchanged, using env vars as before -- `.ctl` fallback ensures production safety - -**Pros:** -- Minimal risk - USH scripts unchanged -- Easy rollback to `.ctl` if issues arise -- Can be tested in parallel with production - -**Cons:** -- Shell scripts still do the heavy lifting -- Limited ability to leverage Python ecosystem (xarray, pandas) -- Two config systems to maintain during transition - -**Best for:** Initial WCOSS2 testing and validation - ---- - -### Approach 2: Hybrid (Recommended/Medium-term) - -``` -┌─────────────┐ ┌──────────────────────────────────────┐ -│ YAML Config │ --> │ Python Workflow Controller │ -└─────────────┘ │ ├── Native Python (new processors) │ - │ └── Shell calls (legacy scripts) │ - └──────────────────────────────────────┘ -``` - -**How it works:** -- Python workflow controller orchestrates all stages -- New forcing processors written in Python (GFS, HRRR, NWM) -- Legacy shell scripts called via subprocess where needed -- Gradual migration: replace one processor at a time - -**Migration Order (recommended):** -1. Atmospheric forcing (GFS/HRRR) - most complex, highest benefit -2. River forcing (NWM) - already has Python dependencies -3. Ocean boundary conditions (RTOFS) - depends on xarray -4. Tidal forcing - relatively simple - -**Pros:** -- Incremental migration reduces risk -- Can leverage Python ecosystem (xarray, dask, pandas) -- Better error handling and logging -- Easier unit testing - -**Cons:** -- Mixed shell/Python during transition -- Need to maintain both code paths temporarily - -**Best for:** Production deployment after YAML validation - ---- - -### Approach 3: Pure Python (Long-term) - -``` -┌─────────────┐ ┌──────────────────────────────────────┐ -│ YAML Config │ --> │ Pure Python Workflow │ -└─────────────┘ │ ├── GFSProcessor │ - │ ├── HRRRProcessor │ - │ ├── NWMProcessor │ - │ ├── RTOFSProcessor │ - │ └── TidalProcessor │ - └──────────────────────────────────────┘ -``` - -**How it works:** -- All forcing processors implemented in Python -- No shell script dependencies -- Full Python workflow from config to model input - -**Pros:** -- Single language, easier maintenance -- Full Python ecosystem benefits -- Better parallelization with dask/xarray -- Comprehensive unit testing - -**Cons:** -- Significant development effort -- Need to replicate Fortran executable functionality -- Higher risk during initial deployment - -**Best for:** Future development after hybrid approach is stable - ---- - -### Fortran Wrapper Module - -For the hybrid approach, we provide Python wrappers around the production Fortran executables. This ensures: -- **Validated outputs**: Same executables used in production -- **Easy comparison**: Can compare outputs with shell script runs -- **Gradual migration**: Replace wrappers with Python implementations over time - -**Available Wrappers** (`nos_ofs/ush/python/nos_ofs/forcing/fortran_wrapper.py`): - -| Wrapper Class | Fortran Executable | Purpose | -|--------------|-------------------|---------| -| `MetFileSearchWrapper` | `nos_ofs_met_file_search` | Search for available met files | -| `MetForcingWrapper` | `nos_ofs_create_forcing_met` | Create met forcing files | -| `RiverForcingWrapper` | `nos_ofs_create_forcing_river` | Create river forcing | -| `OBCForcingWrapper` | `nos_ofs_create_forcing_obc_schism` | Create ocean boundary conditions | -| `TidalForcingWrapper` | `nos_ofs_create_forcing_obc_tides` | Create tidal forcing | - -**Usage Example:** - -```python -from nos_ofs.forcing import get_fortran_wrappers - -# Get all wrappers configured for SECOFS -wrappers = get_fortran_wrappers( - model_type="SCHISM", - exec_dir="/path/to/nosofs.v3.7.0/exec", - work_dir="/path/to/DATA" -) - -# Search for available GFS files -result = wrappers["met_search"].search( - time_start="2025010100", - time_nowcast_end="2025010106", - time_end="2025010306", - available_files=gfs_file_list, -) - -if result.success: - filtered_files = wrappers["met_search"].get_filtered_files(result) - print(f"Found {len(filtered_files)} met files") - -# Create met forcing using Fortran executable -result = wrappers["met_forcing"].create_forcing( - dbase="GFS", - runtype="nowcast", - time_start="2025010100", - time_end="2025010106", - met_files=filtered_files, - grid_file="secofs.hgrid.gr3", - output_prefix="sflux", -) -``` - -**Environment Requirements:** - -The wrappers look for executables in this order: -1. `exec_dir` parameter (if provided) -2. `$EXECnos` environment variable -3. `$HOMEnos/exec` directory -4. Current working directory - ---- - -### Recommended Migration Path - -``` -Phase 1 (Now) Phase 2 (After WCOSS2 Test) Phase 3 (Future) -───────────────────── ──────────────────────────── ───────────────── -YAML → env → shell Python + Fortran wrappers Pure Python - -- Test on WCOSS2 - Python workflow controller - All processors -- Validate YAML output - Call Fortran via wrappers in Python -- Compare with .ctl - Same validated executables - Remove Fortran - - Parallel validation dependencies -``` - -**Phase 2 Details (Fortran Wrappers):** -- Python orchestrates the workflow -- Fortran wrappers call production executables (`nos_ofs_met_file_search`, `nos_ofs_create_forcing_met`, etc.) -- Output identical to shell scripts (same Fortran code) -- Enables Python-based logging, error handling, and parallelization - -**Current Focus:** Phase 1 - Validate YAML configuration produces identical results to `.ctl` on WCOSS2. - ---- - -## Next Steps After Successful Integration - -1. **Validate on WCOSS2** - Run parallel tests with both YAML and .ctl configurations -2. **Compare Outputs** - Ensure YAML produces identical forcing files to .ctl -3. **Verify All Scripts** - Test prep, nowcast_forecast, obs, and continue_forecast -4. **Develop Python Processors** - Replace shell scripts gradually with native Python -5. **Production Deployment** - After thorough validation on WCOSS2 - ---- - -## Support - -- Repository: https://github.com/mansurjisan/nos-workflow -- Branch: `feature/nos-ofs-unified-package` diff --git a/docs/secofs_rrfs_implementation.md b/docs/secofs_rrfs_implementation.md deleted file mode 100644 index 8e77fc6..0000000 --- a/docs/secofs_rrfs_implementation.md +++ /dev/null @@ -1,188 +0,0 @@ -# RRFS Atmospheric Forcing Implementation for SECOFS Ensemble - -## Overview - -Ensemble member 005 uses RRFS (Rapid Refresh Forecast System) 3km atmospheric forcing as an alternative to the GFS+HRRR control and GEFS perturbation members. RRFS provides the highest-resolution atmospheric forcing in the ensemble at hourly temporal resolution. - -| Property | Value | -|----------|-------| -| **Ensemble Member** | 005 | -| **Resolution** | 3km native (regridded to 0.03° lat/lon) | -| **Temporal** | Hourly | -| **Max Forecast** | 84h (extended cycles 00/06/12/18Z) | -| **WCOSS2 Status** | `para` (not yet operational) | -| **Source Path** | `/lfs/h1/ops/para/com/rrfs/v1.0/rrfs.YYYYMMDD/HH/` | -| **File Pattern** | `rrfs.tHHz.prslev.3km.fFFF.na.grib2` | - -## Configuration - -### YAML (`parm/systems/secofs.yaml`) - -```yaml -ensemble: - enabled: true - n_members: 6 - method: gefs - gefs: - n_gefs_members: 4 # Does NOT count RRFS - members: - "000": { label: "control (GFS+HRRR)", met_source_1: GFS, met_source_2: HRRR } - "001": { label: "GEFS p01", met_source_1: GEFS_01 } - "002": { label: "GEFS p02", met_source_1: GEFS_02 } - "003": { label: "GEFS p03", met_source_1: GEFS_03 } - "004": { label: "GEFS p04", met_source_1: GEFS_04 } - "005": { label: "RRFS", met_source_1: RRFS } - extra_sources: [GEFS_01, GEFS_02, GEFS_03, GEFS_04, RRFS] - rrfs: - enabled: true - resolution: "3km" - domain: "na" - max_forecast_hours: 84 - fallback_source: null # SECOFS 48h fits within 84h coverage - version: "v1.0" - status: "para" -``` - -## Data Flow - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ 1. ATMOS PREP (JNOS_OFS_ENSEMBLE_ATMOS_PREP) │ -│ - Reads YAML config, detects RRFS enabled │ -│ - Sets COMINrrfs, domain bounds, COMOUTrerun │ -│ - Calls shared RRFS forcing script │ -└──────────────────────────┬──────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 2. RRFS FORCING SCRIPT │ -│ (ush/stofs_3d_atl/stofs_3d_atl_create_surface_forcing_rrfs.sh) │ -│ │ -│ a. Collect ~111 hourly RRFS GRIB2 files │ -│ b. Per timestep: │ -│ - Extract vars (TMP, SPFH, UGRD, VGRD, MSLET, PRATE) │ -│ - Regrid Lambert Conformal → regular lat/lon (0.03°) │ -│ - Convert GRIB2 → NetCDF │ -│ - Rename MSLET → PRMSL │ -│ - Fill missing PRATE with zeros │ -│ c. Merge all timesteps → single sflux file │ -│ d. Archive to COMOUTrerun/ │ -│ │ -│ Output: │ -│ ${COMOUTrerun}/secofs.t12z.rrfs.air.nc │ -│ ${COMOUTrerun}/secofs.t12z.rrfs.prc.nc │ -│ (no radiation — RRFS encoding issues) │ -└──────────────────────────┬──────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 3. MEMBER STAGING (ush/nos_ofs_ensemble_run.sh) │ -│ │ -│ - Copy RRFS air+prc → member_005/sflux/sflux_{air,prc}_1.1.nc │ -│ - Fix time(0): 0.499999 → 0.0 (COMF compatibility) │ -│ - Stage GFS sflux_rad_1.1.nc as radiation fallback │ -└──────────────────────────┬──────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 4. SCHISM EXECUTION (JNOS_OFS_ENSEMBLE_MEMBER) │ -│ │ -│ member_005/sflux/ │ -│ ├── sflux_air_1.1.nc ← RRFS (hourly, 0.03° regridded) │ -│ ├── sflux_prc_1.1.nc ← RRFS (hourly, 0.03° regridded) │ -│ ├── sflux_rad_1.1.nc ← GFS fallback │ -│ └── sflux_inputs.txt │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Processing Details - -### Lambert Conformal Regridding - -RRFS native 3km grid is Lambert Conformal, which causes SCHISM's `get_weight()` to abort with "orientation not consistent" — LC quadrilaterals are non-convex in lon/lat space. - -**Fix**: Regrid to regular lat/lon using wgrib2: - -```bash -RRFS_REGRID_DX=0.03 # ~3.3km, preserves RRFS detail - -wgrib2 $input \ - -new_grid_winds earth \ - -new_grid latlon ${LONMIN}:${NLON}:${RRFS_REGRID_DX} \ - ${LATMIN}:${NLAT}:${RRFS_REGRID_DX} \ - $output -``` - -- `-new_grid_winds earth` rotates grid-relative winds to Earth-relative -- Reduces file size from ~6GB native to ~1.6GB regridded (SECOFS domain) - -### MSLET vs PRMSL - -RRFS uses MSLET (Mean Sea Level pressure from ETA model reduction) instead of PRMSL. The script renames after extraction: - -```bash -ncrename -O -v MSLET_meansealevel,PRMSL_meansealevel ${file} -``` - -### NCO Variable Mapping - -After regridding, RRFS data uses the same `[latitude,longitude]` dimensions as GFS/GEFS, so the shared NCO script applies: - -``` -# fix/stofs_3d_atl/stofs_3d_atl_gefs_input_nco_update_var.nco -lon[latitude,longitude] = float(longitude); -lat[latitude,longitude] = float(latitude); -stmp[time,latitude,longitude] = TMP_2maboveground; -spfh[time,latitude,longitude] = SPFH_2maboveground; -uwind[time,latitude,longitude] = UGRD_10maboveground; -vwind[time,latitude,longitude] = VGRD_10maboveground; -prmsl[time,latitude,longitude] = PRMSL_meansealevel; -prate[time,latitude,longitude] = PRATE_surface; -``` - -### Radiation Fallback - -RRFS does not provide usable DSWRF/DLWRF in its GRIB2 output. Member 005 uses GFS radiation: - -| sflux file | Source | Resolution | -|------------|--------|------------| -| `sflux_air_1.1.nc` | RRFS | 0.03° hourly | -| `sflux_prc_1.1.nc` | RRFS | 0.03° hourly | -| `sflux_rad_1.1.nc` | GFS | 0.25° 3-hourly | - -### Time Coverage - -SECOFS runs a 6h nowcast + 48h forecast = 54h total. RRFS extended cycles provide 84h of data, so **no GFS fallback is needed** for time coverage (unlike STOFS-3D-ATL which needs 108h). - -## RRFS vs Other Ensemble Forcing Sources - -| Property | GFS+HRRR (Control) | GEFS (001-004) | RRFS (005) | -|----------|-------------------|-----------------|------------| -| **Resolution** | 0.25° + 3km blend | 0.25° | 3km → 0.03° | -| **Temporal** | Hourly | 3-hourly | Hourly | -| **Radiation** | GFS | GEFS (own) | GFS (fallback) | -| **Variables** | All 8 sflux vars | All 8 sflux vars | 6 vars + GFS rad | -| **Forecast Length** | 384h | 384h | 84h | -| **Status** | Operational | Operational | Para | - -## Key Files - -| File | Purpose | -|------|---------| -| `parm/systems/secofs.yaml` | RRFS configuration (lines 369-391) | -| `jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP` | Orchestrates RRFS processing for COMF | -| `ush/stofs_3d_atl/stofs_3d_atl_create_surface_forcing_rrfs.sh` | Shared RRFS forcing script (585 lines) | -| `fix/stofs_3d_atl/stofs_3d_atl_gefs_input_nco_update_var.nco` | NCO variable mapping (shared after regridding) | -| `ush/nos_ofs_ensemble_run.sh` | Member staging with time(0) fix | -| `pbs/launch_secofs_ensemble.sh` | PBS launcher (6 members default with `--gefs`) | - -## WCOSS2 Launch - -```bash -# Full 6-member ensemble (GFS+HRRR control, 4×GEFS, RRFS) -./pbs/launch_secofs_ensemble.sh 12 --gefs --pdy 20260221 - -# Monitor -qstat -u $LOGNAME -tail -f /lfs/h1/nos/ptmp/$LOGNAME/rpt/secofs/secofs_ens005_12.out -``` diff --git a/docs/stofs3datl_rrfs_implementation.md b/docs/stofs3datl_rrfs_implementation.md deleted file mode 100644 index 0a35ce8..0000000 --- a/docs/stofs3datl_rrfs_implementation.md +++ /dev/null @@ -1,365 +0,0 @@ -# RRFS Atmospheric Forcing Implementation for STOFS-3D-ATL Ensemble - -## Overview - -Ensemble member 005 uses RRFS (Rapid Refresh Forecast System) 3km atmospheric forcing as an alternative to the GFS+HRRR control and GEFS perturbation members. RRFS provides the highest-resolution atmospheric forcing in the ensemble at hourly temporal resolution. - -| Property | Value | -|----------|-------| -| **Ensemble Member** | 005 | -| **Resolution** | 3km native (regridded to 0.03° lat/lon) | -| **Temporal** | Hourly | -| **Max Forecast** | 84h (extended cycles 00/06/12/18Z) | -| **WCOSS2 Status** | `para` (not yet operational) | -| **Source Path** | `/lfs/h1/ops/para/com/rrfs/v1.0/rrfs.YYYYMMDD/HH/` | -| **File Pattern** | `rrfs.tHHz.prslev.3km.fFFF.na.grib2` | - -## Configuration - -### YAML (`parm/systems/stofs_3d_atl.yaml`) - -```yaml -ensemble: - enabled: true - n_members: 6 - method: gefs - gefs: - n_gefs_members: 5 # Includes control in count - members: - "000": { label: "control (GFS+HRRR)", met_source_1: GFS, met_source_2: HRRR } - "001": { label: "GEFS p01", met_source_1: GEFS_01 } - "002": { label: "GEFS p02", met_source_1: GEFS_02 } - "003": { label: "GEFS p03", met_source_1: GEFS_03 } - "004": { label: "GEFS p04", met_source_1: GEFS_04 } - "005": { label: "RRFS", met_source_1: RRFS } - extra_sources: [GEFS_01, GEFS_02, GEFS_03, GEFS_04, RRFS] - rrfs: - enabled: true - resolution: "3km" - domain: "na" - max_forecast_hours: 84 - fallback_source: GFS # GFS backfill for hours 84-108 - version: "v1.0" - status: "para" -``` - -### Domain Bounds - -```yaml -grid: - domain: - lon_min: -98.5035 - lon_max: -52.4867 - lat_min: 7.347 - lat_max: 52.5904 -``` - -The STOFS-3D-ATL domain is significantly larger than SECOFS (-88 to -63°, 17-40°N), covering the full western Atlantic from the Gulf of Mexico to Newfoundland. - -## Data Flow - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ 1. ATMOS PREP (JNOS_OFS_ENSEMBLE_ATMOS_PREP) │ -│ - Reads YAML config, detects RRFS enabled │ -│ - STOFS framework branch: calls RRFS script directly │ -│ - Sets COMINrrfs, domain bounds, COMOUTrerun │ -└──────────────────────────┬──────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 2. RRFS FORCING SCRIPT │ -│ (ush/stofs_3d_atl/stofs_3d_atl_create_surface_forcing_rrfs.sh) │ -│ │ -│ a. Collect ~111 hourly RRFS GRIB2 files (cycle-aware) │ -│ b. Per timestep: │ -│ - Extract vars (TMP, SPFH, UGRD, VGRD, MSLET, PRATE) │ -│ - Regrid Lambert Conformal → regular lat/lon (0.03°) │ -│ - Convert GRIB2 → NetCDF │ -│ - Rename MSLET → PRMSL │ -│ - Fill missing PRATE with zeros │ -│ c. Merge all timesteps → single sflux file │ -│ d. Archive to COMOUTrerun/ │ -│ │ -│ Output: │ -│ ${COMOUTrerun}/stofs_3d_atl.t12z.rrfs.air.nc │ -│ ${COMOUTrerun}/stofs_3d_atl.t12z.rrfs.prc.nc │ -│ (no radiation — RRFS encoding issues) │ -└──────────────────────────┬──────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 3. MEMBER STAGING (ush/nos_ofs_ensemble_run.sh) │ -│ _ensemble_stofs_stage_atmos() │ -│ │ -│ - Copy RRFS air+prc → member_005/sflux/sflux_{air,prc}_1.0001.nc│ -│ - Stage GFS sflux_rad_1.0001.nc as radiation fallback │ -│ - STOFS uses .0001.nc naming (not .1.nc like COMF) │ -└──────────────────────────┬──────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 4. SCHISM EXECUTION (JNOS_OFS_ENSEMBLE_MEMBER) │ -│ 34 nodes × 128 cores = 4352 (4314 compute + 6 I/O scribes) │ -│ Walltime: 6 hours │ -│ │ -│ member_005/sflux/ │ -│ ├── sflux_air_1.0001.nc ← RRFS (hourly, 0.03° regridded) │ -│ ├── sflux_prc_1.0001.nc ← RRFS (hourly, 0.03° regridded) │ -│ ├── sflux_rad_1.0001.nc ← GFS fallback │ -│ └── sflux_inputs.txt │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Time Coverage and GFS Fallback - -### The 84h Gap Problem - -STOFS-3D-ATL runs a 24h nowcast + 108h forecast = **132h total**. RRFS extended cycles only provide **84h** of forecast data. - -``` - RRFS coverage (84h) - ├───────────────────────────────────────┤ - | nowcast (24h) | forecast (108h) | - ├─────────────────┼───────────────────────────────┤ - ├─────────┤ - GFS backfill - (hours 84-108) -``` - -### Fallback Configuration - -```yaml -rrfs: - max_forecast_hours: 84 - fallback_source: GFS # Backfill for hours beyond RRFS coverage -``` - -The RRFS forcing script collects files up to the 84h limit. For the remaining 24h (hours 84-108), the ensemble staging function falls back to GFS atmospheric forcing from the control member's prep output. - -### Comparison with SECOFS - -| | STOFS-3D-ATL | SECOFS | -|---|---|---| -| **Forecast length** | 108h | 48h | -| **RRFS coverage** | 84h | 84h | -| **Gap** | 24h (needs GFS backfill) | None (fully covered) | -| **`fallback_source`** | `GFS` | `null` | - -## Processing Details - -### Lambert Conformal Regridding - -RRFS native 3km grid is Lambert Conformal, which causes SCHISM's `get_weight()` to abort with "orientation not consistent" — LC quadrilaterals are non-convex in lon/lat space. - -```bash -RRFS_REGRID_DX=0.03 # ~3.3km, preserves RRFS detail - -wgrib2 $input \ - -new_grid_winds earth \ - -new_grid latlon ${LONMIN}:${NLON}:${RRFS_REGRID_DX} \ - ${LATMIN}:${NLAT}:${RRFS_REGRID_DX} \ - $output -``` - -- `-new_grid_winds earth` rotates grid-relative winds to Earth-relative -- STOFS-3D-ATL regrid target: ~1535 × 1508 grid points (vs SECOFS ~834 × 767) -- Reduces native ~6GB files to ~2GB (STOFS-3D-ATL domain) - -### Cycle-Aware File Collection - -The RRFS forcing script uses different file collection strategies depending on cycle time: - -**12Z cycle (primary STOFS-3D-ATL cycle):** -``` -Nowcast period (24h lookback): - prev-day 06Z f006 (1 file) - prev-day 12Z f001-f006 (6 files) - prev-day 18Z f001-f006 (6 files) - today 00Z f001-f006 (6 files) - today 06Z f001-f006 (6 files) - ───────── - 25 files - -Forecast period (84h ahead): - today 12Z extended f001-f084 (84 files) - ───────── -Total: ~109 hourly files -``` - -Each file entry stores `VALID_HOUR|FILEPATH` for proper time-ordering when stitching files from multiple cycles. Deduplication prevents duplicate valid times. - -### MSLET vs PRMSL - -RRFS uses MSLET (Mean Sea Level pressure from ETA model reduction) instead of PRMSL: - -```bash -ncrename -O -v MSLET_meansealevel,PRMSL_meansealevel ${file} -``` - -### NCO Variable Mapping - -After regridding, RRFS uses the same `[latitude,longitude]` dimensions as GFS/GEFS: - -``` -# fix/stofs_3d_atl/stofs_3d_atl_gefs_input_nco_update_var.nco -time[time] = float(tin/24.); -lon[latitude,longitude] = float(longitude); -lat[latitude,longitude] = float(latitude); -stmp[time,latitude,longitude] = TMP_2maboveground; -spfh[time,latitude,longitude] = SPFH_2maboveground; -uwind[time,latitude,longitude] = UGRD_10maboveground; -vwind[time,latitude,longitude] = VGRD_10maboveground; -prmsl[time,latitude,longitude] = PRMSL_meansealevel; -prate[time,latitude,longitude] = PRATE_surface; -``` - -### Radiation Fallback - -RRFS DSWRF/DLWRF variables have inconsistent wgrib2 `-netcdf` naming (averaged fields get suffixes that don't match the NCO rename script). Member 005 uses GFS radiation: - -| sflux file | Source | Resolution | -|------------|--------|------------| -| `sflux_air_1.0001.nc` | RRFS | 0.03° hourly | -| `sflux_prc_1.0001.nc` | RRFS | 0.03° hourly | -| `sflux_rad_1.0001.nc` | GFS | 0.25° 3-hourly | - -### QC Thresholds - -```bash -FILESIZE=500000000 # Minimum 500MB per RRFS GRIB2 file (native ~6GB) -N_dim_cr_min_cntList=30 # Minimum 30 timesteps for valid merged file -N_dim_cr_min=70 # Minimum 70 timesteps (~2.9 days) for archival QC -N_dim_cr_max=100 # Expected ~100 timesteps (~4.2 days) -list_fn_sz_cr=(2000000) # Merged file size threshold (~2GB for STOFS-ATL) -``` - -## Sflux File Naming Conventions - -### STOFS vs COMF - -| Convention | STOFS-3D-ATL | SECOFS (COMF) | -|------------|-------------|---------------| -| **Air forcing** | `sflux_air_1.0001.nc` | `sflux_air_1.1.nc` | -| **Precip** | `sflux_prc_1.0001.nc` | `sflux_prc_1.1.nc` | -| **Radiation** | `sflux_rad_1.0001.nc` | `sflux_rad_1.1.nc` | - -### COMOUTrerun Archive Names - -``` -${RUN}.${cycle}.rrfs.air.nc # e.g., stofs_3d_atl.t12z.rrfs.air.nc -${RUN}.${cycle}.rrfs.prc.nc # e.g., stofs_3d_atl.t12z.rrfs.prc.nc -``` - -No `.rad.nc` archived — radiation comes from GFS. - -## RRFS vs Other Ensemble Forcing Sources - -| Property | GFS+HRRR (Control) | GEFS (001-004) | RRFS (005) | -|----------|-------------------|-----------------|------------| -| **Resolution** | 0.25° + 3km blend | 0.25° | 3km → 0.03° | -| **Temporal** | Hourly | 3-hourly | Hourly | -| **Radiation** | GFS | GEFS (own) | GFS (fallback) | -| **Variables** | All 8 sflux vars | All 8 sflux vars | 6 vars + GFS rad | -| **Forecast Length** | 384h | 384h | 84h (+GFS backfill) | -| **Pressure Var** | PRMSL | PRMSL | MSLET → PRMSL | -| **Humidity Var** | RH (converted) | SPFH | SPFH | -| **Precip Var** | APCP (converted) | PRATE | PRATE | -| **Native Grid** | Gaussian | Gaussian | Lambert Conformal | -| **Status** | Operational | Operational | Para | - -## PBS Resources - -### Atmospheric Prep (`jnos_stofs3datl_ensemble_atmos_prep.pbs`) - -``` -#PBS -l select=1:ncpus=128:mpiprocs=8 -#PBS -l walltime=02:00:00 -``` - -- 8 MPI procs: wgrib2/NCO are serial; parallel only via `cfp` for GEFS members -- GEFS members processed in parallel, RRFS processed sequentially -- 2-hour walltime accounts for large RRFS GRIB2 files - -### Ensemble Member (`jnos_stofs3datl_ensemble_member.pbs`) - -``` -#PBS -l select=34:ncpus=128:mpiprocs=128 -#PBS -l place=vscatter:exclhost -#PBS -l walltime=06:00:00 -``` - -- 34 nodes × 128 cores = 4352 total (4314 compute + 6 I/O scribes) -- MPI tuning for WCOSS2: - ```bash - export MPICH_OFI_STARTUP_CONNECT=1 - export MPICH_COLL_SYNC=MPI_Bcast - export MPICH_REDUCE_NO_SMP=1 - export FI_OFI_RXM_SAR_LIMIT=1572864 - ``` - -## PBS Launcher Dependency Chain - -``` -./launch_stofs3datl_ensemble.sh 12 --gefs --pdy 20260221 -``` - -``` -prep ─────────┐ - ▼ - atmos_prep (GEFS 01-04 + RRFS) - │ - ├──→ member_000 (control GFS+HRRR) ──┐ - ├──→ member_001 (GEFS gep01) ──┤ - ├──→ member_002 (GEFS gep02) ──┤ - ├──→ member_003 (GEFS gep03) ──┼──→ post_ensemble - ├──→ member_004 (GEFS gep04) ──┤ - └──→ member_005 (RRFS 3km) ──┘ -``` - -All members run in parallel after atmos_prep completes. Post-processing waits for all members to finish. - -### Launcher Options - -```bash -# Full 6-member GEFS+RRFS ensemble -./launch_stofs3datl_ensemble.sh 12 --gefs --pdy 20260221 - -# Smaller test (3 members: control + 2 GEFS) -./launch_stofs3datl_ensemble.sh 12 3 --gefs --pdy 20260221 - -# Ensemble + deterministic in parallel -./launch_stofs3datl_ensemble.sh 12 --gefs --with-det --pdy 20260221 - -# Deterministic only (prep → nowcast → forecast) -./launch_stofs3datl_ensemble.sh 12 --det-only --pdy 20260221 -``` - -## Key Files - -| File | Purpose | -|------|---------| -| `parm/systems/stofs_3d_atl.yaml` | RRFS + ensemble configuration | -| `jobs/JNOS_OFS_ENSEMBLE_ATMOS_PREP` | Orchestrates RRFS + GEFS processing | -| `ush/stofs_3d_atl/stofs_3d_atl_create_surface_forcing_rrfs.sh` | RRFS forcing script (585 lines) | -| `fix/stofs_3d_atl/stofs_3d_atl_gefs_input_nco_update_var.nco` | NCO variable mapping (shared after regridding) | -| `ush/nos_ofs_ensemble_run.sh` | Member staging with STOFS `.0001.nc` naming | -| `scripts/stofs_3d_atl/exnos_ofs_ensemble_member.sh` | STOFS member execution script | -| `pbs/launch_stofs3datl_ensemble.sh` | PBS launcher with dependency chain | -| `pbs/jnos_stofs3datl_ensemble_member.pbs` | PBS resources (34 nodes, 6h) | -| `pbs/jnos_stofs3datl_ensemble_atmos_prep.pbs` | PBS resources (1 node, 2h) | - -## Differences from SECOFS RRFS Implementation - -| Aspect | STOFS-3D-ATL | SECOFS | -|--------|-------------|--------| -| **Framework** | STOFS | COMF | -| **Domain** | -98.5 to -52.5°, 7.3-52.6°N | -88 to -63°, 17-40°N | -| **Regrid size** | ~1535 × 1508 | ~834 × 767 | -| **Forecast** | 108h (exceeds RRFS 84h) | 48h (within RRFS 84h) | -| **GFS fallback** | Required (hours 84-108) | Not needed | -| **sflux naming** | `.0001.nc` | `.1.nc` | -| **time(0) fix** | Not needed (native STOFS) | Required (0.499999 → 0.0) | -| **Cores per member** | 4352 (34 nodes) | 480 (4 nodes) | -| **Walltime** | 6 hours | 1.5 hours | -| **n_gefs_members** | 5 (includes control) | 4 (excludes control) | diff --git a/docs/stofs_3d_atl_v311_changes.md b/docs/stofs_3d_atl_v311_changes.md deleted file mode 100644 index 37a60e3..0000000 --- a/docs/stofs_3d_atl_v311_changes.md +++ /dev/null @@ -1,399 +0,0 @@ -# STOFS-3D-ATL v3.1.0 → v3.1.1 Change Analysis - -**Date:** 2026-02-24 -**Source:** `/mnt/e/IT_STOFS_V.3.1/stofs_3d_atl/stofs_3d_atl/` -**Target:** `/mnt/d/NOS-Workflow-Project/nos_ofs_complete_package/nos_ofs/` (branch: `feature/unified-nowcast-forecast`) -**Para output for validation:** https://noaa-nos-stofs3d-pds.s3.amazonaws.com/index.html#STOFS-3D-Atl/para/stofs.v3.1.1_CMMB/ - ---- - -## Table of Contents - -1. [Unchanged Scripts](#unchanged-scripts) -2. [USH Forcing Script Changes](#ush-forcing-script-changes) -3. [Post-Processing Script Changes](#post-processing-script-changes) -4. [Architecture Changes](#architecture-changes) -5. [Python Script Changes](#python-script-changes) -6. [New Fix Files](#new-fix-files-needed-on-wcoss2) -7. [Ex-Script Changes](#ex-script-changes) -8. [J-Job Changes](#j-job-changes) -9. [Integration Strategy](#integration-strategy) - ---- - -## Unchanged Scripts - -These v3.1.0 scripts are byte-identical in v3.1.1 — no work needed: - -- `stofs_3d_atl_create_surface_forcing_gfs.sh` -- `stofs_3d_atl_create_surface_forcing_hrrr.sh` -- `stofs_3d_atl_create_obc_nudge.sh` -- `stofs_3d_atl_create_bctides_in.sh` -- `stofs_3d_atl_create_param_nml.sh` -- `make_ntc_file.pl` - ---- - -## USH Forcing Script Changes - -### 1. `stofs_3d_atl_create_river_forcing_nwm.sh` — MAJOR - -**VIMS v7 simplified river source/sink system:** - -- v3.1.0 used 7 fix files: `source_sink.in.before_relocate`, `sources_conus.json`, `source_scale.txt`, `sinks_conus.json`, `relocate_map.txt`, `source_sink.in`, plus `relocate_source_feeder_lean.py` -- v3.1.1 uses only `sources_conus.json` (renamed to `sources.json`) — no relocation, no sinks, no scale file -- The time-axis correction block (`if [ 1 -eq 1 ]`) is retained but immediately overwritten by `cp -f ${fn_river_th} ${fn_river_th_std}` — meaning the sed/cut time rewrite is effectively bypassed -- Added `sed 's/^ *//g' -i` to strip leading whitespace from river forcing output - -### 2. `stofs_3d_atl_create_river_st_lawrence.sh` — MAJOR - -**St. Lawrence River data source and processing changed:** - -- **Data source path:** `${DCOMROOT}/${date}/canadian_water/QC_02OA016_hourly_hydrometric.csv` → `${COMINlaw}/${date}/can_streamgauge/02OA016_hydrometric.csv` (new `COMINlaw` variable) -- **Symlink naming:** No longer creates `river_st_law_obs.csv` symlink; directly symlinks `02OA016_hydrometric.csv` -- **New datetime format:** Added `str_yyyy_mm_dd_hr_mm_ss_py_Law` variable in format `YYYY-MM-DD HH:00:00` (was `YYYY-MM-DD-HH`) -- Added `rm -f TEM_1.th` before TEM_1.th creation (cleanup of stale files) - -### 3. `stofs_3d_atl_create_obc_3d_th_non_adjust.sh` — MAJOR - -**ADT processing restored and output renamed:** - -- v3.1.0 had all ADT code commented out (workaround reading pre-processed `adt_aft_cvtz_cln.nc` from rerun dir) -- v3.1.1 restores the full ADT processing pipeline: 3 code paths for both/today/prev ADT data availability -- **ADT size validation:** New check `sz_adt_aft_cvtz_cln` — if ADT file < 5MB, falls back to previous day's rerun copy -- Archives `adt_aft_cvtz_cln.nc` to `${COMOUTrerun}` -- **Output renamed:** `elev2dth` → `elev2dth_non_adj` (differentiates from dynamically adjusted version) -- **Bug fix in backup fallback:** `cpreq -pf ${fn_std} ${COMOUTrerun}/${fn_std}` changed to `cpreq -pf ${fn_prev} ${COMOUTrerun}/${fn_std}` (was overwriting with empty current file instead of previous day's file) - -### 4. `stofs_3d_atl_create_obc_3d_th_dynamic_adjust.sh` — NEW - -**Entirely new: Dynamic bias correction for OBC water levels.** - -This is the single biggest scientific change in v3.1.1: - -1. Reads CO-OPS water level observations from 11 tide stations (Fort Pulaski to Newport) via `${DCOMROOT}/${date}/coops_waterlvlobs/${staID}.xml` -2. Creates NPZ database from observation CSVs via `create_npz_NOAA.py` -3. Derives model bias via `derive_bias.py` comparing SCHISM `staout_1` against observations over a 2-day window -4. Applies the bias correction to `elev2D.th.nc`: - - Time step 0: subtract `adj0` (previous cycle's bias) - - Time step 1: subtract average of `adj0` and `adj1` - - Time steps 2+: subtract `adj1` (today's bias) -5. Archives bias value (`avg_bias`) for use by next cycle -6. Uses `cdo inttime` to resample to hourly before applying corrections - -**Dependencies:** `pylibs/` (VIMS pylib library), `create_npz_NOAA.py`, `derive_bias.py`, `station.bp`, `diff.bp` (xGEOID-NAVD offsets) - -### 5. `stofs_3d_atl_create_restart_combine_rtofs_stofs.sh` — Minor - -- Threshold for RTOFS 2D/3D file count changed from `$cnt > 1` to `$cnt > 0` (allows processing with exactly 1 file) -- Stderr redirects renamed: `errfile` → `errfile_rtofs` and `errfile_python_combine` (avoids overwriting) - ---- - -## Post-Processing Script Changes - -### 6. `stofs_3d_atl_add_attr_2d_3d_nc.sh` — MAJOR - -**Two new output variables added (6 → 8 global outputs):** - -- Added `verticalVelocity` (units="m/s") -- Added `diffusivity` (units="m2/s") -- Added `nSCHISM_vgrid_layers` dimension (49 levels) to out2d files via `ncap2 -s 'defdim("nSCHISM_vgrid_layers",49);vgrid_dummy[nSCHISM_vgrid_layers]=0.'` - -### 7. `stofs_3d_atl_create_2d_field_nc.sh` — Rewritten - -- Working directory changed from `results/` to `dir_slab_2d/` -- Adds `vgrid.in` symlink (not present in v3.1.0) -- Python call changed: removed `--date` argument, added `--output_dir` argument -- Comment notes "lack of `import psutil`" as motivation - -### 8. `stofs_3d_atl_create_adcirc_nc.sh` — MAJOR (Parallelized) - -**Restructured for MPMD parallel execution:** - -- v3.1.0: Sequential loop over stacks 1-10, then merges pairs into 5 daily files -- v3.1.1: Takes 3 positional arguments (`idx_1_adc`, `idx_2_adc`, `idx_day_adc`), merges two stacks via `ncrcat` first, then runs Python on the merged file -- Designed for `mpiexec cfp` parallel dispatch from `post_1.sh` -- Fixed typo: `mstofs` → `msg` - -### 9. `stofs_3d_atl_create_awips_shef.sh` — MAJOR (Datum Shift) - -**Vertical datum changed from NAVD88 to MSL:** - -- All references to `navd` replaced with `msl` -- Fix file `stofs_3d_atl_sta_cwl_xgeoid_to_navd.nco` → `stofs_3d_atl_sta_cwl_xgeoid_to_msl.nco` -- Fix file `stofs_3d_atl_sta_awips_shef_navd88_mllw.txt` → `stofs_3d_atl_sta_awips_shef_msl_mllw.txt` -- SHEF station count: 107 → 150 (`ncks -O -C -d station,0,106,1` → `ncks -O -C -d station,0,149,1`) - -### 10. `stofs_3d_atl_create_station_profile_nc.sh` — MAJOR (Parallelized + Datum) - -**Restructured for MPMD + datum shift:** - -- v3.1.0: Two sequential Python calls (nowcast stacks 1-2, forecast stacks 3-10) -- v3.1.1: Takes single positional argument `stack_no_oi`, processes one stack per invocation (for MPMD dispatch) -- Output directory changed from `results/` to `dir_profile/` -- Adds `vgrid.in`, `hgrid.gr3`, `station.in` symlinks -- **Datum shift:** NAVD NCO file → MSL NCO file (`stofs_3d_atl_sta_cwl_xgeoid_to_msl.nco`) -- Removed SENDDBN alert calls (archival/merge moved to calling script) - -### 11. `stofs_3d_atl_create_awips_grib2.sh` — Changed - -- File list reduced from `schout_adcirc_{1..10}.nc` to `schout_adcirc_{1..5}.nc` (matches new daily-merged ADCIRC format) -- `ncrcat` merges 5 files instead of 10 -- Removed `SENDDBN_NTC` / `dbn_alert NTC_LOW` block (NTC dissemination dropped) - -### 12. `stofs_3d_atl_create_geopackage.sh` — Changed - -- v3.1.0 copied from `Dir_backup_2d3d/out2d_*.nc` -- v3.1.1 copies directly from `outputs/{horizontalVelX,horizontalVelY,out2d,salinity,temperature,zCoordinates}*.nc` -- Removed stray `export err=$?` at end - -### 13. `stofs_3d_atl_create_AWS_autoval_nc.sh` — Minor - -- File glob changed from `out2d_?.nc` to `out2d_{?,??}.nc` to match 2-digit stack numbers (stacks 1-10) - -### 14. `stofs_3d_atl_create_merged_hotstart_nc.sh` — NEW - -**Standalone hotstart merge script for MPMD parallel execution.** - -Previously inline in `exstofs_3d_atl_post_2.sh`, now extracted: -- Calls `stofs_3d_atl_combine_hotstart -i 576` -- Sets `time=0.0` in the merged file -- Archives to `${COMOUT}/${RUN}.${cycle}.hotstart.stofs3d.nc` - -### 15. `stofs_3d_atl_create_partition_prop.sh` — NEW - -**Dynamic mesh partitioning at runtime.** - -Handles PBS node allocation differing from pre-computed partition: -- Reads max processor ID from existing `partition.prop` -- Compares with `${NCPU_PBS} - n_scribes` (n_scribes now 8) -- If mismatch: runs `gpmetis` on `stofs_3d_atl_graphinfo.txt` to generate new partitioning -- Archives result to `${COMOUT}/rerun/stofs_3d_atl_partition.prop` - ---- - -## Architecture Changes - -### Watchdog Job (NEW) - -`exstofs_3d_atl_watchdog.sh` (357 lines) — the biggest architectural addition in v3.1.1. - -Runs as a **concurrent PBS job** alongside `now_forecast`: - -1. **Discovers NF run directory:** Reads PID and output path from `${COMOUT}/rerun/file_one_line_dir_NF_run_outputs` (written by now_forecast) -2. **Polls `mirror.out`:** Watches for `TIME STEP=` entries at 288-step intervals (288, 576, ..., 2880) -3. **Incrementally archives** to both `${COMOUT}/outputs_watchdog/` and `${TMPPATH}_${pid}`: - - `local_to_global_*` files (once, on first timestep) - - `hotstart_*_${step}.nc` subdomain files - - History NC files (8 variables: horizontalVelX/Y, out2d, salinity, temperature, zCoordinates, verticalVelocity, diffusivity) - - `staout_*` files, `mirror.out` -4. **Skips final timestep (2880):** Defers to now_forecast for final archival - -**Impact:** Eliminates the 5-hour polling loops in post_1/post_2. Post jobs read from `outputs_watchdog/` directly. - -> **Note for unified workflow:** Our split nowcast/forecast approach archives after each phase via `archive_outputs()`, so we do NOT need the watchdog. However, the post-processing scripts now expect `outputs_watchdog/` paths — we'll need to adjust these references. - -### Other Architecture Changes - -| Change | v3.1.0 | v3.1.1 | -|--------|--------|--------| -| I/O scribes | 6 | **8** | -| Global output variables | 6 | **8** (+ verticalVelocity, diffusivity) | -| param.nml template | `_param.nml_6globaloutput` | **`_param.nml_8globaloutput`** | -| sflux file naming | `.0001.nc` (4-digit padded) | **`.1.nc`** (non-padded, matches COMF!) | -| COLDSTART | Allowed (copies fix file) | **Forbidden** (`err_exit`) | -| Missing restart | Warning only | **Fatal** (`err_exit`) | -| Annual T/S restart date | April 5 | **April 15** | -| Post-processing data source | `$DATA/outputs/` | **`${COMOUT}/outputs_watchdog/`** | -| Station profiles | Sequential | **MPMD parallel** (10 tasks) | -| ADCIRC NC extraction | Sequential loop (10 stacks) | **MPMD parallel** (5 tasks, paired stacks) | -| Hotstart merge | Inline in post_2 | **Standalone script** via MPMD | - ---- - -## Python Script Changes - -### Changed Files - -| File | Severity | Key Changes | -|------|----------|-------------| -| `gen_sourcesink.py` | **Major rewrite** | VIMS v7: removed `relocate_source_feeder_lean.py`, `write_th_file`, `write_mth_file`, `get_aggregated_features`; simplified to direct numpy ops; uses `sources.json` only; no more sink/scale/relocate processing | -| `gen_fluxth_st_lawrence_riv.py` | **Rewrite** | Renamed `get_river_discharge()` → `get_river_hydrometric()`; extracts both flow AND temperature; new CSV column parsing; hardcoded `02OA016_hydrometric.csv`; time format `%.3f` → `%d` | -| `extract_slab_fcst_netcdf4.py` | **Complete rewrite** | New `VerticalInterpInfo` dataclass; `vertical_interp()` replaces `get_zcor_interp_coefficient()`; memory-save mode via `--mem_save_mode`; `--stack N` argument; imports `generate_adcirc.split_quads` | -| `generate_adcirc.py` | **Significant** | `split_quads()` vectorized with numpy; `--datum` CLI arg (default `xGEOID20B`); dry node masking via `dryFlagNode`; fixed depth coordinates attribute `"y x"` | -| `hotstart_proc.py` | **Significant** | `zdata` class imported from `pylib` (was inline); `self.dims` → `my_hot.sizes` (xarray compat); `infile/outfile` → `foreground_file/background_file`; sample functions extracted | -| `pylib.py` | **Major refactoring** | `pylibs/src/` submodule support; import restructuring (`src.mylib`, `src.schism_file`); many new functions imported; lazy imports for pandas/pyproj/netCDF4; `numpy._core` aliasing for newer numpy | -| `gen_geojson.py` | Changed | Disturbance threshold `< 0.3` → `< -5`; GeoPackage levels expanded `np.arange(0.3, 2.1, 0.1)` → `np.arange(-0.5, 2.1, 0.1)` with -5 min and 20 max (supports negative water levels) | -| `get_stations_profile.py` | Minor | Output filename `stofs_stations_forecast.nc` → `stofs_stations_profile_{stack_start}_{stack_end}.nc` | - -### New Files - -| File | Lines | Purpose | -|------|-------|---------| -| `create_npz_NOAA.py` | 60 | Converts CO-OPS XML water level obs to NPZ format for bias correction | -| `derive_bias.py` | 276 | Calculates model-observation bias at 11 Atlantic coast stations; outputs average bias correction value | -| `pylibs/` | (directory) | VIMS upstream `pylib` library (git submodule with `src/mylib.py`, `src/schism_file.py`, scripts, tutorials) | - -### Identical Python Files - -- `gen_temp_1_st_lawrence_riv.py` -- `generate_station_timeseries.py` -- `mylib.py` -- `relocate_source_feeder_lean.py` -- `river_th_extract2asci.py` -- `schism_file.py` -- `utils.py` - ---- - -## New Fix Files Needed on WCOSS2 - -These files exist in v3.1.1's `fix/stofs_3d_atl/` and must be staged on WCOSS2: - -### Dynamic OBC Bias Correction -- `stofs_3d_atl_obc_adjust_msl_geoid.bp` (639 B) — MSL-to-geoid adjustment for 11 OBC stations -- `stofs_3d_atl_obc_adjust_station.bp` (478 B) — OBC adjustment station definitions - -### VIMS v7 River Sources -- `stofs_3d_atl_river_msource.th` (135 KB) — River momentum source time history -- `stofs_3d_atl_river_source_sink.in` (15 MB) — River source/sink definitions -- `stofs_3d_atl_river_sources_conus.json` (130 KB) — CONUS river-to-NWM reach ID mapping (8575 lines) -- `stofs_3d_atl_river_vsink.th` (42 MB) — River volume sink time history - -### MSL Datum Products -- `stofs_3d_atl_sta_awips_shef_msl_mllw.txt` (2.3 KB) — Station MSL-to-MLLW offsets for SHEF (150 stations) -- `stofs_3d_atl_sta_cwl_xgeoid_to_msl.nco` (6.5 KB) — Per-station geoid-to-MSL NCO corrections (166 lines) - -### Station Metadata -- `stofs_3d_atl_staout_nc.csv` (11 KB) — Station info CSV (167 stations: ID, name, lat, lon) -- `stofs_3d_atl_staout_nc.json` (1.1 KB) — Output variable metadata JSON - -### Model Configuration -- `stofs_3d_atl_param.nml_8globaloutput` (62 KB) — Updated param.nml template with 8 global outputs - -### Unchanged Fix Files (already on WCOSS2) -All `.gr3` files (312 MB each), `vgrid.in` (1.6 GB), `graphinfo.txt` (1.4 GB), `partition.prop` (71 MB), etc. are unchanged. - ---- - -## Ex-Script Changes - -### `exstofs_3d_atl_prep_processing.sh` - -| Area | v3.1.0 | v3.1.1 | -|------|--------|--------| -| Debug mode | `set $setoff` | `set $seton` | -| param.nml template | `_param.nml_6globaloutput` | `_param.nml_8globaloutput` | -| OBC scripts | Single `_obc_3d_th.sh` | Split: `_non_adjust.sh` then `_dynamic_adjust.sh` | -| Script order | St. Lawrence → OBC | OBC → St. Lawrence | -| COLDSTART | Allowed (copy fix file) | **Forbidden** (`err_exit`) | -| Missing restart | Warning | **Fatal** (`err_exit`) | -| `source_sink.in` | Active symlink | Commented out | -| `postmsg` | `postmsg "$jlogfile" "$msg"` | `postmsg "$msg"` | - -### `exstofs_3d_atl_now_forecast.sh` - -| Area | v3.1.0 | v3.1.1 | -|------|--------|--------| -| n_scribes | 6 | **8** | -| sflux naming | `.0001.nc` | **`.1.nc`** | -| partition.prop | Static from fix | **Dynamic** via `create_partition_prop.sh` | -| Watchdog PID | Not present | **Exports** `pid_JOBS_NF_run` and writes to `file_one_line_dir_NF_run_outputs` | -| Post-run archival | None | **Copies** key outputs to `${COMOUT}/outputs_watchdog/` | -| Post-run sleep | None | **`sleep 180s`** | - -### `exstofs_3d_atl_post_1.sh` - -| Area | v3.1.0 | v3.1.1 | -|------|--------|--------| -| Polling loop | 30 iterations × 600s (5 hours max) | **Removed** — reads from `outputs_watchdog/` directly | -| Data source | `$DATA/outputs/` | **`${COMOUT}/outputs_watchdog/`** | -| Station profiles | Sequential Python calls | **MPMD parallel** (2 nowcast + 8 forecast tasks) | -| ADCIRC NC | Sequential loop over 10 stacks | **MPMD parallel** (5 tasks, paired stacks) | -| Error handling | Warning | **Fatal** (`err_exit`) | - -### `exstofs_3d_atl_post_2.sh` - -| Area | v3.1.0 | v3.1.1 | -|------|--------|--------| -| Polling loop | Present | **Removed** | -| Hotstart merge | Inline | **Standalone** `create_merged_hotstart_nc.sh` via MPMD | -| Attribute update | Only in post_1 | **Also runs here** (10-process MPMD) | -| Data source | `$DATA/outputs/` | **`${COMOUT}/outputs_watchdog/`** | - -### `exstofs_3d_atl_temp_salt_restart.sh` - -- Annual T/S update date: April 5 (`0405`) → **April 15** (`0415`) - -### `exstofs_3d_atl_hot_restart_prep.sh` - -| Area | v3.1.0 | v3.1.1 | -|------|--------|--------| -| NCPU_PBS_hot_restart | Hardcoded `4314` | **Dynamic** `NCPU_PBS - n_scribes` | -| Hotstart search | Starts at step 2880 | **Starts at 2592** (skips 2880, handled by now_forecast) | -| File source | `$DATA/outputs/` | **`${COMOUT}/outputs_watchdog/`** | -| param.nml backup | `_hot_restart` suffix | `_backup_ihot1` / `_back_ihot2` suffixes | - -### `exstofs_3d_atl_watchdog.sh` — NEW - -357-line companion job running concurrently with `now_forecast`. See [Architecture Changes](#watchdog-job-new) above. - ---- - -## J-Job Changes - -All v3.1.1 J-jobs have these common structural changes: - -- **Date as argument:** Accepts `$1` for `YMD_CURRENT_DATE` (no more hardcoded dates) -- **PID-based DATA directories:** `${DATAROOT}/prep_${YMD_CURRENT_DATE}.${pid}` (prevents collisions) -- **`COMROOT` version bump:** `com/stofs/v2.1` → `com/stofs/v3.1` -- **`NET` changed:** `stofs` → `stofs3d` -- **Removed `compath.py` calls:** Direct `${COMPATH}/...` paths instead -- **New path variables:** `COMPATH`, `COMPATH_DCOM` as base paths - -### Per-Job Changes - -| J-Job | v3.1.1 Changes | -|-------|----------------| -| `JSTOFS_3D_ATL_PREP` | New forcing source dirs: `COMINadt`, `COMINgfs`, `COMINhrrr`, `COMINrtofs`, `COMINnwm`, `COMINlaw`, `COMINwl` | -| `JSTOFS_3D_ATL_NOW_FORECAST` | Exports `pid_JOBS_NF_run=$$` for watchdog; `KEEPDATA="YES"` always | -| `JSTOFS_3D_ATL_POST_I` | `SENDCOM` defaulted to NO | -| `JSTOFS_3D_ATL_POST_II` | All `SEND*` defaulted to NO | -| `JSTOFS_3D_ATL_TEMP_SALT_RESTART` | `SORCstofs3d` path added; `COMINrtofs` defined | -| `JSTOFS_3D_ATL_WATCHDOG` | **NEW** — `TMPPATH` variable for temp archive | - ---- - -## Integration Strategy - -### Approach - -**Copy v3.1.1 USH scripts and Python exactly, then re-apply our YAML config loading and unified workflow adaptations on top.** - -### What We Update (STOFS-specific scripts) - -1. **USH scripts in `ush/stofs_3d_atl/`** — Replace with v3.1.1 versions, re-add YAML config loading blocks -2. **Python scripts in `ush/stofs_3d_atl/pysh/`** — Copy all v3.1.1 Python files (including new ones) -3. **STOFS ex-scripts in `scripts/stofs_3d_atl/`** — Update to match v3.1.1 logic (OBC split, 8globaloutput, MPMD, etc.) - -### What We Keep (unified framework) - -1. **Unified J-jobs** (`jobs/JNOS_OFS_*`) — Already framework-aware, just need new env vars (`COMINlaw`, `COMINwl`, etc.) -2. **Unified ex-scripts** (`scripts/nosofs/exnos_ofs_prep.sh`, etc.) — Update STOFS branch to call new script names -3. **Shared libraries** (`ush/nos_ofs_config.sh`, `nos_ofs_prep_run.sh`, `nos_ofs_model_run.sh`) — Minimal changes -4. **YAML config** (`parm/systems/stofs_3d_atl.yaml`) — Update for 8 globals, MSL datum, VIMS v7 rivers - -### What We Skip - -1. **Watchdog job** — Our split nowcast/forecast approach archives after each phase via `archive_outputs()`, eliminating the need for concurrent monitoring -2. **`outputs_watchdog/` paths in post-processing** — Adapt post scripts to read from our archive location instead -3. **STOFS-specific J-jobs** — We use unified `JNOS_OFS_*` jobs - -### Sflux Naming Alignment - -The sflux naming change (`.0001.nc` → `.1.nc`) is actually beneficial — it **aligns v3.1.1 with our COMF convention**, eliminating the naming mismatch documented in MEMORY.md lesson #11. - -### New Fix Files - -Must be copied from v3.1.1 `fix/` to WCOSS2's `$FIXstofs3d` directory before testing.