From 760ff9a460bd83c501e8962a90f63ff7fd732372 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Thu, 5 Mar 2026 11:42:29 -0500 Subject: [PATCH 1/8] adds cron scripts for nightly tests --- cron-scripts/README.md | 102 +++++++ cron-scripts/launch_all.sh | 60 +++++ cron-scripts/machines/config_chrysalis.sh | 9 + cron-scripts/machines/config_frontier.sh | 16 ++ cron-scripts/machines/config_machine.sh | 72 +++++ cron-scripts/machines/config_pm-cpu.sh | 10 + cron-scripts/machines/config_pm-gpu.sh | 10 + .../job_chrysalis_omega_cdash.sbatch | 73 +++++ .../job_frontier_omega_cdash.sbatch | 72 +++++ .../omega_cdash/job_pm-cpu_omega_cdash.sbatch | 86 ++++++ .../omega_cdash/job_pm-gpu_omega_cdash.sbatch | 83 ++++++ .../tasks/omega_cdash/launch_omega_cdash.sh | 44 +++ .../tasks/polaris_cdash/CTestScript.txt | 55 ++++ .../polaris_cdash/launch_polaris_ctest.sh | 250 ++++++++++++++++++ .../tasks/polaris_cdash/polaris_cdash.py | 245 +++++++++++++++++ 15 files changed, 1187 insertions(+) create mode 100644 cron-scripts/README.md create mode 100755 cron-scripts/launch_all.sh create mode 100755 cron-scripts/machines/config_chrysalis.sh create mode 100755 cron-scripts/machines/config_frontier.sh create mode 100755 cron-scripts/machines/config_machine.sh create mode 100755 cron-scripts/machines/config_pm-cpu.sh create mode 100755 cron-scripts/machines/config_pm-gpu.sh create mode 100755 cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch create mode 100755 cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch create mode 100755 cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch create mode 100755 cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch create mode 100755 cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh create mode 100644 cron-scripts/tasks/polaris_cdash/CTestScript.txt create mode 100755 cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh create mode 100644 cron-scripts/tasks/polaris_cdash/polaris_cdash.py diff --git a/cron-scripts/README.md b/cron-scripts/README.md new file mode 100644 index 0000000000..32ab933dbf --- /dev/null +++ b/cron-scripts/README.md @@ -0,0 +1,102 @@ +# OMEGA Cron Scripts + +Automated cron job scripts for continuous testing and CDash reporting of OMEGA ocean modeling projects across multiple HPC systems. + +## Overview + +This repository orchestrates the compilation, testing, and result submission to [CDash](https://my.cdash.org) for two major OMEGA ocean model components: + +- **Omega** - Next-generation ocean model +- **Polaris** - MPAS-Ocean model with Omega integration + +## Supported Systems + +| Machine | Location | Compilers | +|---------|----------|-----------| +| Frontier | ORNL | craygnu, craycray, crayamd (with mphipcc variants) | +| Chrysalis | ANL (LCRC) | gnu, intel | +| pm-gpu | NERSC (Perlmutter GPU) | gnugpu | +| pm-cpu | NERSC (Perlmutter CPU) | gnu | + +## Repository Structure + +``` +cron-scripts/ +├── launch_all.sh # Main entry point +├── machines/ # Machine-specific configurations +│ ├── config_machine.sh # Auto-detection dispatcher +│ ├── config_frontier.sh +│ ├── config_chrysalis.sh +│ ├── config_pm-gpu.sh +│ └── config_pm-cpu.sh +└── tasks/ # Scheduled job definitions + ├── omega_cdash/ # Omega model CDash testing + │ ├── launch_omega_cdash.sh + │ └── job_*.sbatch + └── polaris_cdash/ # Polaris model CDash testing + ├── launch_polaris_ctest.sh + ├── polaris_cdash.py + └── CTestScript.txt +``` + +## Usage + +### Run on auto-detected machine + +```bash +./launch_all.sh +``` + +### Run on a specific machine + +```bash +./launch_all.sh -m frontier +./launch_all.sh -m chrysalis +./launch_all.sh -m pm-gpu +./launch_all.sh -m pm-cpu +``` + +### Set up in crontab + +```bash +# Run daily at 1 AM +0 1 * * * /path/to/cron-scripts/launch_all.sh +``` + +## How It Works + +1. `launch_all.sh` auto-detects the machine via hostname or accepts a `-m` flag +2. Sources the appropriate machine configuration (compilers, paths, modules) +3. Uses file locking to prevent concurrent executions +4. Discovers and executes all `launch*.sh` scripts in task subdirectories +5. Each task clones/updates repos, submits SBATCH jobs, and reports to CDash + +## Environment Variables + +| Variable | Description | +|----------|-------------| +| `CRONJOB_BASEDIR` | Root directory for job outputs | +| `CRONJOB_MACHINE` | Detected/specified machine name | +| `CRONJOB_LOGDIR` | Log directory location | +| `E3SM_COMPILERS` | Space-separated list of compilers to test | + +## Adding a New Machine + +1. Create `machines/config_.sh` with: + - `CRONJOB_BASEDIR` path + - `E3SM_COMPILERS` list + - Module loads and environment setup +2. Add hostname pattern to `machines/config_machine.sh` +3. Create machine-specific SBATCH scripts in task directories if needed + +## Adding a New Task + +1. Create a new directory under `tasks/` +2. Add a `launch_.sh` script +3. The script will be auto-discovered and executed by `launch_all.sh` + +## CDash Integration + +Test results are submitted to: +- E3SM project: https://my.cdash.org/submit.php?project=E3SM +- Omega project: https://my.cdash.org/submit.php?project=omega diff --git a/cron-scripts/launch_all.sh b/cron-scripts/launch_all.sh new file mode 100755 index 0000000000..12023564e6 --- /dev/null +++ b/cron-scripts/launch_all.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -eo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")" + +# --- Parse command-line arguments --- +CLI_MACHINE="" +while [[ $# -gt 0 ]]; do + case "$1" in + -m|--machine) + CLI_MACHINE="$2" + shift 2 + ;; + *) + echo "ERROR: Unknown option '$1'" >&2 + echo "Usage: $SCRIPT_NAME [-m|--machine MACHINE_NAME]" + exit 1 + ;; + esac +done + +echo "[$(date)] Starting $SCRIPT_NAME" + +# set CRONJOB_BASEDIR and machine-specific variables +# pass -m through so config_machine.sh uses CLI override if provided +if [[ -n "$CLI_MACHINE" ]]; then + source "${HERE}/machines/config_machine.sh" -m "$CLI_MACHINE" +else + source "${HERE}/machines/config_machine.sh" +fi + +export CRONJOB_LOGDIR="${CRONJOB_BASEDIR}/logs" +mkdir -p "$CRONJOB_LOGDIR" + +export CRONJOB_DATE=$(date +"%d") +export CRONJOB_TIME=$(date +"%T") + +LOCKFILE="/tmp/${USER}_cronjob.lock" +exec 9>"$LOCKFILE" +if ! flock -n 9; then + echo "[$(date)] launch_all.sh is already running, exiting." + exit 0 +fi +#LOCKFILE="${HERE}/cronjob.lock" +#exec 9>"$LOCKFILE" +#if ! flock -n 9; then +# echo "[$(date)] launch_all.sh is already running, exiting." +# exit 0 +#fi + +# Run all launch*.sh scripts under immediate subdirectories of $HERE/tasks +while IFS= read -r script; do + /bin/bash "$script" +done < <( + find "$HERE/tasks" -mindepth 2 -maxdepth 2 \ + -type f -name 'launch*.sh' | sort +) + +echo "[$(date)] Finished $SCRIPT_NAME" diff --git a/cron-scripts/machines/config_chrysalis.sh b/cron-scripts/machines/config_chrysalis.sh new file mode 100755 index 0000000000..050a102063 --- /dev/null +++ b/cron-scripts/machines/config_chrysalis.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -eo pipefail + +source /etc/bashrc + +export CRONJOB_BASEDIR=/lcrc/globalscratch/ac.kimy/cronjobs +export E3SM_COMPILERS="gnu intel" + +mkdir -p "$CRONJOB_BASEDIR" diff --git a/cron-scripts/machines/config_frontier.sh b/cron-scripts/machines/config_frontier.sh new file mode 100755 index 0000000000..dedc560ac3 --- /dev/null +++ b/cron-scripts/machines/config_frontier.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -eo pipefail + +module load cray-python cmake + +export all_proxy=socks://proxy.ccs.ornl.gov:3128/ +export ftp_proxy=ftp://proxy.ccs.ornl.gov:3128/ +export http_proxy=http://proxy.ccs.ornl.gov:3128/ +export https_proxy=http://proxy.ccs.ornl.gov:3128/ +export no_proxy='localhost,127.0.0.0/8,*.ccs.ornl.gov' + +export CRONJOB_BASEDIR=/lustre/orion/cli115/scratch/grnydawn/cronjobs +export E3SM_COMPILERS="craygnu-mphipcc craycray-mphipcc crayamd-mphipcc craygnu craycray crayamd" + +mkdir -p "$CRONJOB_BASEDIR" diff --git a/cron-scripts/machines/config_machine.sh b/cron-scripts/machines/config_machine.sh new file mode 100755 index 0000000000..a0dc05a592 --- /dev/null +++ b/cron-scripts/machines/config_machine.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +set -eo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# --- Parse command-line arguments --- +usage() { + echo "Usage: $(basename "$0") [-m|--machine MACHINE_NAME] [-h|--help]" + echo " -m, --machine Override the auto-detected machine name" + echo " -h, --help Show this help message" + exit "${1:-0}" +} + +CLI_MACHINE="" +while [[ $# -gt 0 ]]; do + case "$1" in + -m|--machine) + CLI_MACHINE="$2" + shift 2 + ;; + -h|--help) + usage 0 + ;; + *) + echo "ERROR: Unknown option '$1'" >&2 + usage 1 + ;; + esac +done + +# --- Get a stable hostname / FQDN (try multiple methods) --- +get_fqdn() { + local fqdn="" + fqdn="$(hostname -f 2>/dev/null || true)" + if [[ -z "$fqdn" || "$fqdn" == "(none)" ]]; then + fqdn="$(hostname --fqdn 2>/dev/null || true)" + fi + if [[ -z "$fqdn" || "$fqdn" == "(none)" ]]; then + fqdn="$(hostname 2>/dev/null || true)" + fi + echo "$fqdn" +} + +FQDN="$(get_fqdn)" + +# --- Determine CRONJOB_MACHINE --- +if [[ -n "$CLI_MACHINE" ]]; then + # Command-line argument takes highest priority + CRONJOB_MACHINE="$CLI_MACHINE" +else + # Fall back to FQDN-based detection + CRONJOB_MACHINE="unknown" + case "$FQDN" in + *.frontier.olcf.ornl.gov) + CRONJOB_MACHINE="frontier" + ;; + *.polaris.alcf.anl.gov) + CRONJOB_MACHINE="polaris" + ;; + *.perlmutter.nersc.gov) + CRONJOB_MACHINE="pm-gpu" + ;; + *.lcrc.anl.gov) + CRONJOB_MACHINE="chrysalis" + ;; + esac +fi + +export CRONJOB_MACHINE +echo "FQDN=$FQDN" +echo "CRONJOB_MACHINE=$CRONJOB_MACHINE" + +source "${SCRIPT_DIR}/config_${CRONJOB_MACHINE}.sh" diff --git a/cron-scripts/machines/config_pm-cpu.sh b/cron-scripts/machines/config_pm-cpu.sh new file mode 100755 index 0000000000..07c06720f1 --- /dev/null +++ b/cron-scripts/machines/config_pm-cpu.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -eo pipefail + +module load cray-python cmake + +export CRONJOB_BASEDIR=/pscratch/sd/y/youngsun/omega/cronjobs_pm-cpu +export E3SM_COMPILERS="gnu" + +mkdir -p "$CRONJOB_BASEDIR" diff --git a/cron-scripts/machines/config_pm-gpu.sh b/cron-scripts/machines/config_pm-gpu.sh new file mode 100755 index 0000000000..dc98c8f367 --- /dev/null +++ b/cron-scripts/machines/config_pm-gpu.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -eo pipefail + +module load cray-python cmake + +export CRONJOB_BASEDIR=/pscratch/sd/y/youngsun/omega/cronjobs_pm-gpu +export E3SM_COMPILERS="gnugpu" + +mkdir -p "$CRONJOB_BASEDIR" diff --git a/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch new file mode 100755 index 0000000000..3f091e40c5 --- /dev/null +++ b/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch @@ -0,0 +1,73 @@ +#!/bin/bash +#SBATCH --nodes=1 +#SBATCH --qos=high +#SBATCH --time 02:00:00 + +source /etc/bashrc + +echo "Starting omega cdash job" + +if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then + module load python cmake + PARMETIS_TPL="/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + module load cray-python cmake git-lfs + PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then + echo "CRONJOB_MACHINE is not set." + exit -1 + +else + echo "It seems that the cron job is not configured with CRONJOB_MACHINE." + exit -1 + +fi + +echo "Compilers: ${E3SM_COMPILERS}" +# Run Omega ctest +for COMPILER in ${E3SM_COMPILERS}; do + + WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} + rm -rf ${WORKDIR} + mkdir -p ${WORKDIR} + + PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" + if [ ! -f "$PARMETIS_HOME" ]; then + if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + PARMETIS_HOME="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view" + fi + fi + + cmake \ + -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ + -DOMEGA_CIME_COMPILER=${COMPILER} \ + -DOMEGA_ARCH=SERIAL \ + -DOMEGA_BUILD_TEST=ON \ + -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ + -S ${OMEGA_HOME}/components/omega \ + -B ${WORKDIR} + + mkdir -p ${WORKDIR}/test + + ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc + + source ${WORKDIR}/omega_env.sh + + ctest \ + -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ + -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ + -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ + -DCTEST_SITE=${CRONJOB_MACHINE} \ + -DCTEST_BUILD_GROUP="Omega Unit-test" \ + -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ + -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ + -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ + -DCTEST_BUILD_CONFIGURATION="Release" \ + -DCTEST_DROP_SITE_CDASH=TRUE \ + -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" + +done diff --git a/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch new file mode 100755 index 0000000000..0dff8a78d9 --- /dev/null +++ b/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch @@ -0,0 +1,72 @@ +#!/bin/bash -l +#SBATCH --nodes=1 +#SBATCH -q debug +#SBATCH --account=cli115 +#SBATCH --time 02:00:00 + +echo "Starting omega cdash job" + +if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then + module load python cmake + PARMETIS_TPL=/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view + +elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + module load cray-python cmake git-lfs + PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then + echo "CRONJOB_MACHINE is not set." + exit -1 + +else + echo "It seems that the cron job is not configured with CRONJOB_MACHINE." + exit -1 + +fi + +echo "Compilers: ${E3SM_COMPILERS}" +# Run Omega ctest +for COMPILER in ${E3SM_COMPILERS}; do + + WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} + rm -rf ${WORKDIR} + mkdir -p ${WORKDIR} + + PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" + if [ ! -f "$PARMETIS_HOME" ]; then + if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + PARMETIS_HOME=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view + fi + fi + + cmake \ + -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ + -DOMEGA_CIME_COMPILER=${COMPILER} \ + -DOMEGA_ARCH=SERIAL \ + -DOMEGA_BUILD_TEST=ON \ + -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ + -S ${OMEGA_HOME}/components/omega \ + -B ${WORKDIR} + + mkdir -p ${WORKDIR}/test + + ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc + + source ${WORKDIR}/omega_env.sh + + ctest \ + -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ + -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ + -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ + -DCTEST_SITE=${CRONJOB_MACHINE} \ + -DCTEST_BUILD_GROUP="Omega Unit-test" \ + -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ + -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ + -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ + -DCTEST_BUILD_CONFIGURATION="Release" \ + -DCTEST_DROP_SITE_CDASH=TRUE \ + -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" + +done diff --git a/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch new file mode 100755 index 0000000000..90fd973b2f --- /dev/null +++ b/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch @@ -0,0 +1,86 @@ +#!/bin/bash -l +#SBATCH --job-name=OmegaSCron +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --output=/global/cfs/cdirs/e3sm/omega/cronjbos_pm-cpu/logs/OmegaSCronCPU_%j.out +#SBATCH --error=/global/cfs/cdirs/e3sm/omega/cronjobs_pm-cpu/logs/OmegaSCronCPU_%j.err +#SBATCH --constraint=cpu +#SBATCH --account=e3sm +#SBATCH --qos regular +#SBATCH --exclusive +#SBATCH --time 01:00:00 + +echo "Starting omega cdash job" + +if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then + module load python cmake + PARMETIS_TPL=/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view + +elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + module load cray-python cmake git-lfs + PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "pm-gpu" ]]; then + module load cray-python cmake + PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "pm-cpu" ]]; then + module load cray-python cmake + PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then + echo "CRONJOB_MACHINE is not set." + exit -1 + +else + echo "It seems that the cron job is not configured with CRONJOB_MACHINE." + exit -1 + +fi + +echo "Compilers: ${E3SM_COMPILERS}" +# Run Omega ctest +for COMPILER in ${E3SM_COMPILERS}; do + + WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} + rm -rf ${WORKDIR} + mkdir -p ${WORKDIR} + + PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" + if [ ! -f "$PARMETIS_HOME" ]; then + if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + PARMETIS_HOME=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view + fi + fi + + cmake \ + -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ + -DOMEGA_CIME_COMPILER=${COMPILER} \ + -DOMEGA_ARCH=SERIAL \ + -DOMEGA_BUILD_TEST=ON \ + -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ + -S ${OMEGA_HOME}/components/omega \ + -B ${WORKDIR} + + mkdir -p ${WORKDIR}/test + + ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc + + source ${WORKDIR}/omega_env.sh + + ctest \ + -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ + -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ + -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ + -DCTEST_SITE=${CRONJOB_MACHINE} \ + -DCTEST_BUILD_GROUP="Omega Unit-test" \ + -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ + -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ + -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ + -DCTEST_BUILD_CONFIGURATION="Release" \ + -DCTEST_DROP_SITE_CDASH=TRUE \ + -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" + +done diff --git a/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch new file mode 100755 index 0000000000..7a10dcc890 --- /dev/null +++ b/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch @@ -0,0 +1,83 @@ +#!/bin/bash -l +#SBATCH --job-name=OmegaSCron +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=8 +#SBATCH --gpus-per-node=4 +#SBATCH --output=/global/cfs/cdirs/e3sm/omega/cronjobs_pm-gpu/logs/OmegaSCronGPU_%j.out +#SBATCH --error=/global/cfs/cdirs/e3sm/omega/cronjobs_pm-gpu/logs/OmegaSCronGPU_%j.err +#SBATCH --constraint=gpu +#SBATCH --account=e3sm_g +#SBATCH --qos regular +#SBATCH --exclusive +#SBATCH --time 01:00:00 + +echo "Starting omega cdash job" + +if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then + module load python cmake + PARMETIS_TPL=/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view + +elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + module load cray-python cmake git-lfs + PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "pm-gpu" ]]; then + module load cray-python cmake + PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then + echo "CRONJOB_MACHINE is not set." + exit -1 + +else + echo "It seems that the cron job is not configured with CRONJOB_MACHINE." + exit -1 + +fi + +echo "Compilers: ${E3SM_COMPILERS}" +# Run Omega ctest +for COMPILER in ${E3SM_COMPILERS}; do + + WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} + rm -rf ${WORKDIR} + mkdir -p ${WORKDIR} + + PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" + if [ ! -f "$PARMETIS_HOME" ]; then + if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + PARMETIS_HOME=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view + fi + fi + + cmake \ + -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ + -DOMEGA_CIME_COMPILER=${COMPILER} \ + -DOMEGA_ARCH=SERIAL \ + -DOMEGA_BUILD_TEST=ON \ + -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ + -S ${OMEGA_HOME}/components/omega \ + -B ${WORKDIR} + + mkdir -p ${WORKDIR}/test + + ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc + + source ${WORKDIR}/omega_env.sh + + ctest \ + -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ + -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ + -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ + -DCTEST_SITE=${CRONJOB_MACHINE} \ + -DCTEST_BUILD_GROUP="Omega Unit-test" \ + -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ + -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ + -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ + -DCTEST_BUILD_CONFIGURATION="Release" \ + -DCTEST_DROP_SITE_CDASH=TRUE \ + -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" + +done diff --git a/cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh b/cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh new file mode 100755 index 0000000000..ec2e4f493a --- /dev/null +++ b/cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -eo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")" +echo "[$(date)] Starting $SCRIPT_NAME" + +export OMEGA_CDASH_BASEDIR=${CRONJOB_BASEDIR}/tasks/omega_cdash +export TESTROOT="${OMEGA_CDASH_BASEDIR}/tests" +mkdir -p $OMEGA_CDASH_BASEDIR +mkdir -p $TESTROOT + +export OMEGA_HOME="${OMEGA_CDASH_BASEDIR}/Omega" + +if [[ ! -d $OMEGA_HOME ]]; then + cd ${OMEGA_CDASH_BASEDIR} + git clone https://github.com/E3SM-Project/Omega.git +fi + +cd ${OMEGA_HOME} +git checkout develop +git fetch origin +git reset --hard origin/develop +git submodule update --init --recursive || true + +if [[ ! -f ${TESTROOT}/OmegaMesh.nc ]]; then + wget -O ${TESTROOT}/OmegaMesh.nc https://web.lcrc.anl.gov/public/e3sm/inputdata/ocn/mpas-o/oQU240/ocean.QU.240km.151209.nc +fi + +if [[ ! -f ${TESTROOT}/OmegaSphereMesh.nc ]]; then + wget -O ${TESTROOT}/OmegaSphereMesh.nc https://web.lcrc.anl.gov/public/e3sm/polaris/ocean/polaris_cache/global_convergence/icos/cosine_bell/Icos480/init/initial_state.230220.nc +fi + +if [[ ! -f ${TESTROOT}/OmegaPlanarMesh.nc ]]; then + wget -O ${TESTROOT}/OmegaPlanarMesh.nc https://gist.github.com/mwarusz/f8caf260398dbe140d2102ec46a41268/raw/e3c29afbadc835797604369114321d93fd69886d/PlanarPeriodic48x48.nc +fi + +sbatch \ + --job-name=OmegaCdash \ + --output="$CRONJOB_LOGDIR/omega_cdash_%j.out" \ + --error="$CRONJOB_LOGDIR/omega_cdash_%j.err" \ + ${HERE}/job_${CRONJOB_MACHINE}_omega_cdash.sbatch + +echo "[$(date)] Finished $SCRIPT_NAME" diff --git a/cron-scripts/tasks/polaris_cdash/CTestScript.txt b/cron-scripts/tasks/polaris_cdash/CTestScript.txt new file mode 100644 index 0000000000..271a2c70c0 --- /dev/null +++ b/cron-scripts/tasks/polaris_cdash/CTestScript.txt @@ -0,0 +1,55 @@ +# CTestScript.txt to submit generated XMLs +set(CTEST_PROJECT_NAME omega) +set(CTEST_NIGHTLY_START_TIME "01:00:00 UTC") +set(CTEST_SITE "$ENV{CRONJOB_MACHINE}") +set(CTEST_BUILD_NAME "ExternalTest_Run") + +# Set source and binary directory to current (required for ctest_submit) +set(CTEST_SOURCE_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") +set(CTEST_BINARY_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +# Create Testing/Temporary directory manually for ctest log files +file(MAKE_DIRECTORY "${CTEST_BINARY_DIRECTORY}/Testing/Temporary") + +# Initialize CTest to ensure CTEST_BINARY_DIRECTORY is used for logging +ctest_start(Nightly) + +# CDash configuration +if(CMAKE_VERSION VERSION_GREATER 3.14) + set(CTEST_SUBMIT_URL "https://my.cdash.org/submit.php?project=omega") +else() + set(CTEST_DROP_METHOD "https") + set(CTEST_DROP_SITE "my.cdash.org") + set(CTEST_DROP_LOCATION "/submit.php?project=omega") +endif() + +set(CTEST_DROP_SITE_CDASH TRUE) + +# Define files to submit +set(FILES_TO_SUBMIT) + +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/Build.xml") + list(APPEND FILES_TO_SUBMIT "${CMAKE_CURRENT_LIST_DIR}/Build.xml") +else() + message(WARNING "Build.xml not found") +endif() + +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/Test.xml") + list(APPEND FILES_TO_SUBMIT "${CMAKE_CURRENT_LIST_DIR}/Test.xml") +else() + message(WARNING "Test.xml not found") +endif() + +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/Done.xml") + list(APPEND FILES_TO_SUBMIT "${CMAKE_CURRENT_LIST_DIR}/Done.xml") +else() + message(WARNING "Done.xml not found") +endif() + +# Submit files +if(FILES_TO_SUBMIT) + message(STATUS "Submitting files: ${FILES_TO_SUBMIT}") + ctest_submit(FILES ${FILES_TO_SUBMIT}) +else() + message(WARNING "No files to submit") +endif() diff --git a/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh new file mode 100755 index 0000000000..64eff3fe69 --- /dev/null +++ b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh @@ -0,0 +1,250 @@ +#!/bin/bash -l +set -eo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")" +echo "[$(date)] Starting $SCRIPT_NAME" + +POLARIS_CDASH_BASEDIR=${CRONJOB_BASEDIR}/tasks/polaris_cdash +POLARIS_CDASH_TESTDIR="${POLARIS_CDASH_BASEDIR}/tests" +OMEGA_HOME="${POLARIS_CDASH_BASEDIR}/polaris/e3sm_submodules/Omega" +MINIFORGE3_HOME="${POLARIS_CDASH_BASEDIR}/miniforge3" + +mkdir -p $POLARIS_CDASH_BASEDIR +mkdir -p $POLARIS_CDASH_TESTDIR + +if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then + module load python cmake + PARMETIS_TPL=/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view + +elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + module load cray-python cmake git-lfs + PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "pm-gpu" ]]; then + module load cray-python cmake + PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "pm-cpu" ]]; then + module load cray-python cmake + PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then + echo "CRONJOB_MACHINE is not set." + exit -1 + +else + echo "It seems that the cron job is not configured with CRONJOB_MACHINE." + exit -1 + +fi + +# ============================================================================== +# Functions +# ============================================================================== + +install_miniforge3() { + +if [ ! -d "$MINIFORGE3_HOME" ]; then + echo "Installing Miniforge3..." + pushd "$POLARIS_CDASH_BASEDIR" > /dev/null + wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh + bash Miniforge3-Linux-x86_64.sh -b -p $MINIFORGE3_HOME + popd > /dev/null +fi + +} + +setup_polaris_repo() { + echo "================================================================================" + echo "STEP 1: Setting up Polaris Repo (Baseline)" + echo "================================================================================" + cd "${POLARIS_CDASH_BASEDIR}" + + # Check if we are inside the 'polaris' folder or need to enter it + if [ ! -d "polaris" ]; then + echo "Cloning Polaris repository..." + git clone git@github.com:E3SM-Project/polaris.git + cd polaris + else + cd polaris + echo "Repository exists. Resetting to main branch..." + git fetch origin + git checkout main + git reset --hard origin/main + fi + + echo "Updating specific submodules (jigsaw-python, Omega)..." + git submodule update --init --recursive jigsaw-python + git submodule update --init --recursive e3sm_submodules/Omega +} + +configure_polaris() { + local compiler=$1 + + echo "--------------------------------------------------------------------------------" + echo "Configuring Polaris for $compiler" + echo "--------------------------------------------------------------------------------" + + cd "${POLARIS_CDASH_BASEDIR}/polaris" + + if [ ! -f "configure_polaris_envs.py" ]; then + echo "Error: configure_polaris_envs.py not found in $(pwd)" + exit 1 + fi + + if ! ls load_dev_polaris_*_${CRONJOB_MACHINE}_${compiler}_*.sh >/dev/null 2>&1; then + echo "Configuring Polaris Environment" + ./configure_polaris_envs.py --conda "${MINIFORGE3_HOME}" \ + -c "${compiler}" -m "${CRONJOB_MACHINE}" + fi +} + +build_omega_dev() { + local compiler=$1 + local omega_build=$2 + local parmetis_path=$3 + + echo "--------------------------------------------------------------------------------" + echo "Building Omega (dev) with $compiler in $omega_build" + echo "--------------------------------------------------------------------------------" + + + rm -rf "$omega_build" + mkdir -p "$omega_build" + pushd "$omega_build" > /dev/null + + cmake \ + -DOMEGA_CIME_MACHINE="${CRONJOB_MACHINE}" \ + -DOMEGA_CIME_COMPILER="${compiler}" \ + -DOMEGA_BUILD_TEST=ON \ + -DOMEGA_PARMETIS_ROOT="${parmetis_path}" \ + "${OMEGA_HOME}/components/omega" + + source ./omega_env.sh + + ctest -M Nightly -T Start + ctest -M Nightly -T Build + #./omega_build.sh + popd > /dev/null +} + +run_baseline_suite() { + local compiler=$1 + local omega_build=$2 + + local polaris_build="${POLARIS_CDASH_TESTDIR}/${compiler}/polaris_build" + + # Clean up previous baseline directory to avoid stale logs + if [ -d "$polaris_build" ]; then + echo "Removing previous polaris build directory: $polaris_build" + rm -rf "$polaris_build" + fi + + mkdir -p "$polaris_build" + + pushd "$polaris_build" > /dev/null + + echo "--------------------------------------------------------------------------------" + echo "Running Polaris Baseline Suite for $compiler" + echo "--------------------------------------------------------------------------------" + + cd "" + + local env_file=$(ls ${POLARIS_CDASH_BASEDIR}/polaris/load_dev_polaris_*_${CRONJOB_MACHINE}_${compiler}_*.sh | head -n 1) + if [ -f "$env_file" ]; then + echo "Sourcing $env_file" + source "$env_file" + else + echo "Warning: Environment file matching 'load_dev_polaris_*_${CRONJOB_MACHINE}_${compiler}_*.sh' not found." + fi + + # Set up baseline suite + polaris suite -c ocean -t omega_nightly --model omega \ + -w "$polaris_build" \ + -p "$omega_build" + +# --clean_build + + + # Submit baseline job + if [ -d "$polaris_build" ]; then + cd "$polaris_build" + echo "Submitting baseline job in $(pwd)..." + # Fire and forget / continue on error + sbatch --wait job_script.omega_pr.sh || true + else + echo "Error: Baseline directory $polaris_build was not created." + fi +} + +# ============================================================================== +# Main Execution +# ============================================================================== +install_miniforge3 +setup_polaris_repo + +for COMPILER in ${E3SM_COMPILERS}; do + echo "################################################################################" + echo "Processing Baseline for COMPILER: $COMPILER" + echo "################################################################################" + + MAIN_LOG="${CRONJOB_LOGDIR}/polaris_cdash_main_${CRONJOB_DATE}.log" + + echo "Starting $COMPILER... logging to $MAIN_LOG" + + DEVELOP_BUILD="${POLARIS_CDASH_TESTDIR}/${COMPILER}/omega_build" + + # Capture Block + { + configure_polaris "$COMPILER" + + PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" + if [ ! -f "$PARMETIS_HOME" ]; then + if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then + PARMETIS_HOME=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view + fi + fi + + build_omega_dev "$COMPILER" "$DEVELOP_BUILD" "$PARMETIS_HOME" + + run_baseline_suite "$COMPILER" "$DEVELOP_BUILD" + } 2>&1 | tee "$MAIN_LOG" + + # CDash Submission Logic + BUILD_ID=$(date +%s) + + CASE_OUTPUTS_DIR="${POLARIS_CDASH_TESTDIR}/${COMPILER}/polaris_build/case_outputs" + + CDASH_DIR="${POLARIS_CDASH_TESTDIR}/${COMPILER}/cdash" + echo "Creating CDash directory: $CDASH_DIR" + rm -rf "$CDASH_DIR" + mkdir -p "$CDASH_DIR" + + echo "Submitting results to CDash..." + if [ -f "${HERE}/polaris_cdash.py" ]; then + python3 "${HERE}/polaris_cdash.py" \ + --log-dir "$CASE_OUTPUTS_DIR" \ + --output-dir "$CDASH_DIR" \ + --results-dir "$DEVELOP_BUILD/Testing" \ + --site-name "$CRONJOB_MACHINE" \ + --build-name "Baseline_${COMPILER}" \ + --build-id "$BUILD_ID" + else + echo "Error: polaris_cdash.py not found at ${HERE}/polaris_cdash.py" + fi + + echo "Running CTest submission from $CDASH_DIR..." + if [ -f "${HERE}/CTestScript.txt" ]; then + cp "${HERE}/CTestScript.txt" "$CDASH_DIR/" + pushd "$CDASH_DIR" > /dev/null + module load cmake && ctest -S CTestScript.txt -V + popd > /dev/null + else + echo "Warning: CTestScript.txt not found in ${HERE}" + fi + + echo "Finished Baseline processing for $COMPILER" +done + +echo "[$(date)] Finished $SCRIPT_NAME" diff --git a/cron-scripts/tasks/polaris_cdash/polaris_cdash.py b/cron-scripts/tasks/polaris_cdash/polaris_cdash.py new file mode 100644 index 0000000000..6040c21d38 --- /dev/null +++ b/cron-scripts/tasks/polaris_cdash/polaris_cdash.py @@ -0,0 +1,245 @@ +import argparse +import platform +import time +import xml.etree.ElementTree as ET +from xml.dom import minidom +import os +import sys +import glob +import re +import shutil + +# Shared Utilities + +def get_system_info(): + info = {} + info['OSName'] = platform.system() + info['Hostname'] = platform.node() + info['OSRelease'] = platform.release() + info['OSVersion'] = platform.version() + info['OSPlatform'] = platform.machine() + info['Is64Bits'] = "1" if sys.maxsize > 2**32 else "0" + + try: + import psutil + info['NumberOfLogicalCPU'] = str(psutil.cpu_count(logical=True)) + info['NumberOfPhysicalCPU'] = str(psutil.cpu_count(logical=False)) + info['TotalPhysicalMemory'] = str(int(psutil.virtual_memory().total / (1024 * 1024))) # MB + except ImportError: + info['NumberOfLogicalCPU'] = "1" + info['NumberOfPhysicalCPU'] = "1" + info['TotalPhysicalMemory'] = "1024" + + info['VendorString'] = "Unknown" + info['VendorID'] = "Unknown" + info['FamilyID'] = "0" + info['ModelID'] = "0" + info['ProcessorCacheSize'] = "0" + info['ProcessorClockFrequency'] = "0" + + return info + +def strip_ansi_codes(text): + ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') + return ansi_escape.sub('', text) + +def read_tag_file(results_dir): + tag_path = os.path.join(results_dir, "TAG") + if not os.path.exists(tag_path): + raise FileNotFoundError(f"TAG file not found at {tag_path}") + + with open(tag_path, 'r') as f: + lines = [line.strip() for line in f.readlines()] + + if len(lines) < 2: + raise ValueError(f"TAG file at {tag_path} contains fewer than 2 lines.") + + folder_name = lines[0] + group_name = lines[1] + + # "Joining the two lines of TAG with '-' is a BUILD_STAMP" + build_stamp = f"{folder_name}-{group_name}" + + return folder_name, build_stamp + +def process_build_xml(args, folder_name, build_stamp, sys_info): + # "there is a folder with the same name to the first line of TAG file. in the folder, there is Build.xml" + source_build_xml = os.path.join(args.results_dir, folder_name, "Build.xml") + + if not os.path.exists(source_build_xml): + print(f"Warning: Build.xml not found at {source_build_xml}. Generating minimal Build.xml instead.") + # Fallback or error? User said "Use it instead of generating it." + # I'll try to generate a minimal one if missing, but primarily we expect it. + # For now let's error if strictly required, but safer to warn. + # Actually user instructions imply it exists. I will error if not found to be explicit. + raise FileNotFoundError(f"Source Build.xml not found at {source_build_xml}") + + print(f"Reading Build.xml from {source_build_xml}") + tree = ET.parse(source_build_xml) + site = tree.getroot() + if site.tag != "Site": + # Check if root is Site, sometimes it might be different? XML usually + # CTest XMLs usually start with Site. + pass + + # "Modify BuildName of Site node with --build-name argument." + # "Also change Name of Site node with --site-name argument." + if args.build_name: + site.set("BuildName", args.build_name) + + if args.site_name: + site.set("Name", args.site_name) + + # Ensure BuildStamp is set to the one from TAG + site.set("BuildStamp", build_stamp) + + # Add system info updates if needed? + # The existing Build.xml might have system info. + # User didn't fetch system info explicitly for Build.xml, but we used to add it. + # Let's preserve existing attributes unless we need to overwrite. + # But usually has OS info. We can update it if missing or just trust existing. + # User instruction: "In Build.xml modify [Names]... Also... use [Names]... of Test.xml... to the same data to Build.xml" + # Doesn't explicitly say "update OS info". I will leave OS info as is from the source file. + + xmlstr = minidom.parseString(ET.tostring(site)).toprettyxml(indent="\t") + + output_path = os.path.join(args.output_dir, "Build.xml") + with open(output_path, "w") as f: + f.write(xmlstr) + print(f"Generated {output_path} (copied and modified from source)") + + return site.attrib # Return attributes for Test.xml usage + +def generate_test_xml(args, site_attribs, sys_info): + # Same structure as before, but using site_attribs for the Site element + site = ET.Element("Site") + + # Copy attributes from Build.xml's Site element + for k, v in site_attribs.items(): + site.set(k, v) + + # Ensure our CLI args override if not already (process_build_xml updated the attribs, so they should be correct) + + testing = ET.SubElement(site, "Testing") + + start_time = int(time.time()) + formatted_start_time = time.strftime("%b %d %H:%M %Z", time.localtime(start_time)) + ET.SubElement(testing, "StartDateTime").text = formatted_start_time + ET.SubElement(testing, "StartTestTime").text = str(start_time) + + test_list = ET.SubElement(testing, "TestList") + + log_files = glob.glob(os.path.join(args.log_dir, "*.log")) + log_files.sort() + + tests = [] + + if not log_files: + print(f"Warning: No log files found in {args.log_dir}") + + for log_file in log_files: + filename = os.path.basename(log_file) + test_name = filename + + try: + with open(log_file, 'r', errors='replace') as f: + content = f.read() + content = strip_ansi_codes(content) + except Exception as e: + content = f"Error reading file: {e}" + + if "POLARIS TASK: PASS" not in content: + status = "failed" + elif "POLARIS BASELINE:" in content and "POLARIS BASELINE: PASS" not in content: + status = "failed" + else: + status = "passed" + + tests.append({ + 'name': test_name, + 'status': status, + 'output': content, + 'path': log_file + }) + + ET.SubElement(test_list, "Test").text = f"./{args.log_dir}/{test_name}" + + for test_data in tests: + test_elem = ET.SubElement(testing, "Test", Status=test_data['status']) + ET.SubElement(test_elem, "Name").text = test_data['name'] + ET.SubElement(test_elem, "Path").text = f"./{args.log_dir}" + ET.SubElement(test_elem, "FullName").text = f"./{args.log_dir}/{test_data['name']}" + ET.SubElement(test_elem, "FullCommandLine").text = f"cat {test_data['path']}" + + results = ET.SubElement(test_elem, "Results") + + named_meas_time = ET.SubElement(results, "NamedMeasurement", type="numeric/double", name="Execution Time") + ET.SubElement(named_meas_time, "Value").text = "1.0" + + named_meas_status = ET.SubElement(results, "NamedMeasurement", type="text/string", name="Completion Status") + ET.SubElement(named_meas_status, "Value").text = "Completed" + + named_meas_cmd = ET.SubElement(results, "NamedMeasurement", type="text/string", name="Command Line") + ET.SubElement(named_meas_cmd, "Value").text = f"cat {test_data['path']}" + + measurement = ET.SubElement(results, "Measurement") + ET.SubElement(measurement, "Value").text = test_data['output'] + + formatted_end_time = time.strftime("%b %d %H:%M %Z", time.localtime(int(time.time()))) + ET.SubElement(testing, "EndDateTime").text = formatted_end_time + ET.SubElement(testing, "EndTestTime").text = str(int(time.time())) + + output_path = os.path.join(args.output_dir, "Test.xml") + tree = ET.ElementTree(site) + tree.write(output_path, encoding="UTF-8", xml_declaration=True) + print(f"Generated {output_path}") + +def generate_done_xml(args, build_id): + root = ET.Element("Done") + ET.SubElement(root, "buildId").text = build_id + ET.SubElement(root, "time").text = str(int(time.time())) + + xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent="\t") + output_path = os.path.join(args.output_dir, "Done.xml") + with open(output_path, "w") as f: + f.write(xmlstr) + print(f"Generated {output_path}") + +def main(): + parser = argparse.ArgumentParser(description="Generate CDash XML files from log directory") + + parser.add_argument("--log-dir", required=True, help="Directory containing log files") + parser.add_argument("--results-dir", required=True, help="Directory containing TAG file and Build.xml subdirectory") + # Removed --build-stamp + parser.add_argument("--site-name", required=True, help="Name of the site") + # Build name defaults to log folder name, but can be overridden + parser.add_argument("--build-name", help="Name of the build") + parser.add_argument("--build-id", required=True, help="ID of the build") + + parser.add_argument("--output-dir", default=".", help="Directory to output XML files") + + args = parser.parse_args() + + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + + if not args.build_name: + args.build_name = os.path.basename(os.path.normpath(args.log_dir)) + + sys_info = get_system_info() + + # 1. Read TAG + folder_name, build_stamp = read_tag_file(args.results_dir) + print(f"Detected BuildStamp: {build_stamp} (from {args.results_dir}/TAG)") + + # 2. Process Build.xml + site_attribs = process_build_xml(args, folder_name, build_stamp, sys_info) + + # 3. Generate Test.xml using same Site attribs + generate_test_xml(args, site_attribs, sys_info) + + # 4. Generate Done.xml + generate_done_xml(args, args.build_id) + +if __name__ == "__main__": + main() From e0b6827e7a72ac0143d17c28cf4529a68981817b Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Fri, 6 Mar 2026 14:37:15 -0500 Subject: [PATCH 2/8] fix a bug of using old omega_pr instead of omega_nightly suite name --- cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh index 64eff3fe69..f2aa85361f 100755 --- a/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh +++ b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh @@ -172,7 +172,7 @@ run_baseline_suite() { cd "$polaris_build" echo "Submitting baseline job in $(pwd)..." # Fire and forget / continue on error - sbatch --wait job_script.omega_pr.sh || true + sbatch --wait job_script.omega_nightly.sh || true else echo "Error: Baseline directory $polaris_build was not created." fi From bebd6c12cf29b6c33fcb9e42163dbe3599680c3c Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Tue, 10 Mar 2026 12:39:45 +0100 Subject: [PATCH 3/8] Lint code --- .../job_chrysalis_omega_cdash.sbatch | 2 +- .../job_frontier_omega_cdash.sbatch | 4 +- .../omega_cdash/job_pm-cpu_omega_cdash.sbatch | 2 +- .../omega_cdash/job_pm-gpu_omega_cdash.sbatch | 4 +- .../polaris_cdash/launch_polaris_ctest.sh | 20 +- .../tasks/polaris_cdash/polaris_cdash.py | 358 +++++++++++------- 6 files changed, 228 insertions(+), 162 deletions(-) diff --git a/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch index 3f091e40c5..86f8ccaea3 100755 --- a/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch @@ -52,7 +52,7 @@ for COMPILER in ${E3SM_COMPILERS}; do mkdir -p ${WORKDIR}/test ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc source ${WORKDIR}/omega_env.sh diff --git a/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch index 0dff8a78d9..1d11c8721c 100755 --- a/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch @@ -1,7 +1,7 @@ #!/bin/bash -l #SBATCH --nodes=1 #SBATCH -q debug -#SBATCH --account=cli115 +#SBATCH --account=cli115 #SBATCH --time 02:00:00 echo "Starting omega cdash job" @@ -51,7 +51,7 @@ for COMPILER in ${E3SM_COMPILERS}; do mkdir -p ${WORKDIR}/test ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc source ${WORKDIR}/omega_env.sh diff --git a/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch index 90fd973b2f..f1a69cec6a 100755 --- a/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch @@ -65,7 +65,7 @@ for COMPILER in ${E3SM_COMPILERS}; do mkdir -p ${WORKDIR}/test ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc source ${WORKDIR}/omega_env.sh diff --git a/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch index 7a10dcc890..c49f80faf5 100755 --- a/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch @@ -6,7 +6,7 @@ #SBATCH --output=/global/cfs/cdirs/e3sm/omega/cronjobs_pm-gpu/logs/OmegaSCronGPU_%j.out #SBATCH --error=/global/cfs/cdirs/e3sm/omega/cronjobs_pm-gpu/logs/OmegaSCronGPU_%j.err #SBATCH --constraint=gpu -#SBATCH --account=e3sm_g +#SBATCH --account=e3sm_g #SBATCH --qos regular #SBATCH --exclusive #SBATCH --time 01:00:00 @@ -62,7 +62,7 @@ for COMPILER in ${E3SM_COMPILERS}; do mkdir -p ${WORKDIR}/test ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc source ${WORKDIR}/omega_env.sh diff --git a/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh index f2aa85361f..8871a490a5 100755 --- a/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh +++ b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh @@ -60,7 +60,7 @@ setup_polaris_repo() { echo "STEP 1: Setting up Polaris Repo (Baseline)" echo "================================================================================" cd "${POLARIS_CDASH_BASEDIR}" - + # Check if we are inside the 'polaris' folder or need to enter it if [ ! -d "polaris" ]; then echo "Cloning Polaris repository..." @@ -73,7 +73,7 @@ setup_polaris_repo() { git checkout main git reset --hard origin/main fi - + echo "Updating specific submodules (jigsaw-python, Omega)..." git submodule update --init --recursive jigsaw-python git submodule update --init --recursive e3sm_submodules/Omega @@ -85,7 +85,7 @@ configure_polaris() { echo "--------------------------------------------------------------------------------" echo "Configuring Polaris for $compiler" echo "--------------------------------------------------------------------------------" - + cd "${POLARIS_CDASH_BASEDIR}/polaris" if [ ! -f "configure_polaris_envs.py" ]; then @@ -148,7 +148,7 @@ run_baseline_suite() { echo "--------------------------------------------------------------------------------" echo "Running Polaris Baseline Suite for $compiler" echo "--------------------------------------------------------------------------------" - + cd "" local env_file=$(ls ${POLARIS_CDASH_BASEDIR}/polaris/load_dev_polaris_*_${CRONJOB_MACHINE}_${compiler}_*.sh | head -n 1) @@ -188,7 +188,7 @@ for COMPILER in ${E3SM_COMPILERS}; do echo "################################################################################" echo "Processing Baseline for COMPILER: $COMPILER" echo "################################################################################" - + MAIN_LOG="${CRONJOB_LOGDIR}/polaris_cdash_main_${CRONJOB_DATE}.log" echo "Starting $COMPILER... logging to $MAIN_LOG" @@ -207,13 +207,13 @@ for COMPILER in ${E3SM_COMPILERS}; do fi build_omega_dev "$COMPILER" "$DEVELOP_BUILD" "$PARMETIS_HOME" - + run_baseline_suite "$COMPILER" "$DEVELOP_BUILD" } 2>&1 | tee "$MAIN_LOG" - + # CDash Submission Logic BUILD_ID=$(date +%s) - + CASE_OUTPUTS_DIR="${POLARIS_CDASH_TESTDIR}/${COMPILER}/polaris_build/case_outputs" CDASH_DIR="${POLARIS_CDASH_TESTDIR}/${COMPILER}/cdash" @@ -233,7 +233,7 @@ for COMPILER in ${E3SM_COMPILERS}; do else echo "Error: polaris_cdash.py not found at ${HERE}/polaris_cdash.py" fi - + echo "Running CTest submission from $CDASH_DIR..." if [ -f "${HERE}/CTestScript.txt" ]; then cp "${HERE}/CTestScript.txt" "$CDASH_DIR/" @@ -243,7 +243,7 @@ for COMPILER in ${E3SM_COMPILERS}; do else echo "Warning: CTestScript.txt not found in ${HERE}" fi - + echo "Finished Baseline processing for $COMPILER" done diff --git a/cron-scripts/tasks/polaris_cdash/polaris_cdash.py b/cron-scripts/tasks/polaris_cdash/polaris_cdash.py index 6040c21d38..999e653aee 100644 --- a/cron-scripts/tasks/polaris_cdash/polaris_cdash.py +++ b/cron-scripts/tasks/polaris_cdash/polaris_cdash.py @@ -1,16 +1,16 @@ import argparse +import glob +import os import platform +import re +import sys import time import xml.etree.ElementTree as ET from xml.dom import minidom -import os -import sys -import glob -import re -import shutil # Shared Utilities + def get_system_info(): info = {} info['OSName'] = platform.system() @@ -18,228 +18,294 @@ def get_system_info(): info['OSRelease'] = platform.release() info['OSVersion'] = platform.version() info['OSPlatform'] = platform.machine() - info['Is64Bits'] = "1" if sys.maxsize > 2**32 else "0" - + info['Is64Bits'] = '1' if sys.maxsize > 2**32 else '0' + try: import psutil + info['NumberOfLogicalCPU'] = str(psutil.cpu_count(logical=True)) info['NumberOfPhysicalCPU'] = str(psutil.cpu_count(logical=False)) - info['TotalPhysicalMemory'] = str(int(psutil.virtual_memory().total / (1024 * 1024))) # MB + info['TotalPhysicalMemory'] = str( + int(psutil.virtual_memory().total / (1024 * 1024)) + ) # MB except ImportError: - info['NumberOfLogicalCPU'] = "1" - info['NumberOfPhysicalCPU'] = "1" - info['TotalPhysicalMemory'] = "1024" - - info['VendorString'] = "Unknown" - info['VendorID'] = "Unknown" - info['FamilyID'] = "0" - info['ModelID'] = "0" - info['ProcessorCacheSize'] = "0" - info['ProcessorClockFrequency'] = "0" - + info['NumberOfLogicalCPU'] = '1' + info['NumberOfPhysicalCPU'] = '1' + info['TotalPhysicalMemory'] = '1024' + + info['VendorString'] = 'Unknown' + info['VendorID'] = 'Unknown' + info['FamilyID'] = '0' + info['ModelID'] = '0' + info['ProcessorCacheSize'] = '0' + info['ProcessorClockFrequency'] = '0' + return info + def strip_ansi_codes(text): ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') return ansi_escape.sub('', text) + def read_tag_file(results_dir): - tag_path = os.path.join(results_dir, "TAG") + tag_path = os.path.join(results_dir, 'TAG') if not os.path.exists(tag_path): - raise FileNotFoundError(f"TAG file not found at {tag_path}") - + raise FileNotFoundError(f'TAG file not found at {tag_path}') + with open(tag_path, 'r') as f: lines = [line.strip() for line in f.readlines()] - + if len(lines) < 2: - raise ValueError(f"TAG file at {tag_path} contains fewer than 2 lines.") - + raise ValueError( + f'TAG file at {tag_path} contains fewer than 2 lines.' + ) + folder_name = lines[0] group_name = lines[1] - + # "Joining the two lines of TAG with '-' is a BUILD_STAMP" - build_stamp = f"{folder_name}-{group_name}" - + build_stamp = f'{folder_name}-{group_name}' + return folder_name, build_stamp + def process_build_xml(args, folder_name, build_stamp, sys_info): - # "there is a folder with the same name to the first line of TAG file. in the folder, there is Build.xml" - source_build_xml = os.path.join(args.results_dir, folder_name, "Build.xml") - + # "there is a folder with the same name to the first line of TAG file. + # in the folder, there is Build.xml" + source_build_xml = os.path.join(args.results_dir, folder_name, 'Build.xml') + if not os.path.exists(source_build_xml): - print(f"Warning: Build.xml not found at {source_build_xml}. Generating minimal Build.xml instead.") + print( + f'Warning: Build.xml not found at {source_build_xml}. Generating ' + f'minimal Build.xml instead.' + ) # Fallback or error? User said "Use it instead of generating it." - # I'll try to generate a minimal one if missing, but primarily we expect it. + # I'll try to generate a minimal one if missing, but primarily we + # expect it. # For now let's error if strictly required, but safer to warn. - # Actually user instructions imply it exists. I will error if not found to be explicit. - raise FileNotFoundError(f"Source Build.xml not found at {source_build_xml}") + # Actually user instructions imply it exists. I will error if not found + # to be explicit. + raise FileNotFoundError( + f'Source Build.xml not found at {source_build_xml}' + ) - print(f"Reading Build.xml from {source_build_xml}") + print(f'Reading Build.xml from {source_build_xml}') tree = ET.parse(source_build_xml) site = tree.getroot() - if site.tag != "Site": - # Check if root is Site, sometimes it might be different? XML usually + if site.tag != 'Site': + # Check if root is Site, sometimes it might be different? XML usually + # # CTest XMLs usually start with Site. pass - + # "Modify BuildName of Site node with --build-name argument." # "Also change Name of Site node with --site-name argument." if args.build_name: - site.set("BuildName", args.build_name) - + site.set('BuildName', args.build_name) + if args.site_name: - site.set("Name", args.site_name) - + site.set('Name', args.site_name) + # Ensure BuildStamp is set to the one from TAG - site.set("BuildStamp", build_stamp) - - # Add system info updates if needed? + site.set('BuildStamp', build_stamp) + + # Add system info updates if needed? # The existing Build.xml might have system info. - # User didn't fetch system info explicitly for Build.xml, but we used to add it. + # User didn't fetch system info explicitly for Build.xml, but we used to + # add it. # Let's preserve existing attributes unless we need to overwrite. - # But usually has OS info. We can update it if missing or just trust existing. - # User instruction: "In Build.xml modify [Names]... Also... use [Names]... of Test.xml... to the same data to Build.xml" - # Doesn't explicitly say "update OS info". I will leave OS info as is from the source file. - - xmlstr = minidom.parseString(ET.tostring(site)).toprettyxml(indent="\t") - - output_path = os.path.join(args.output_dir, "Build.xml") - with open(output_path, "w") as f: + # But usually has OS info. We can update it if missing or just + # trust existing. + # User instruction: "In Build.xml modify [Names]... Also... use [Names]... + # of Test.xml... to the same data to Build.xml" + # Doesn't explicitly say "update OS info". I will leave OS info as is from + # the source file. + + xmlstr = minidom.parseString(ET.tostring(site)).toprettyxml(indent='\t') + + output_path = os.path.join(args.output_dir, 'Build.xml') + with open(output_path, 'w') as f: f.write(xmlstr) - print(f"Generated {output_path} (copied and modified from source)") - - return site.attrib # Return attributes for Test.xml usage + print(f'Generated {output_path} (copied and modified from source)') + + return site.attrib # Return attributes for Test.xml usage + def generate_test_xml(args, site_attribs, sys_info): # Same structure as before, but using site_attribs for the Site element - site = ET.Element("Site") - + site = ET.Element('Site') + # Copy attributes from Build.xml's Site element for k, v in site_attribs.items(): site.set(k, v) - - # Ensure our CLI args override if not already (process_build_xml updated the attribs, so they should be correct) - - testing = ET.SubElement(site, "Testing") - + + # Ensure our CLI args override if not already (process_build_xml updated + # the attribs, so they should be correct) + + testing = ET.SubElement(site, 'Testing') + start_time = int(time.time()) - formatted_start_time = time.strftime("%b %d %H:%M %Z", time.localtime(start_time)) - ET.SubElement(testing, "StartDateTime").text = formatted_start_time - ET.SubElement(testing, "StartTestTime").text = str(start_time) - - test_list = ET.SubElement(testing, "TestList") - - log_files = glob.glob(os.path.join(args.log_dir, "*.log")) + formatted_start_time = time.strftime( + '%b %d %H:%M %Z', time.localtime(start_time) + ) + ET.SubElement(testing, 'StartDateTime').text = formatted_start_time + ET.SubElement(testing, 'StartTestTime').text = str(start_time) + + test_list = ET.SubElement(testing, 'TestList') + + log_files = glob.glob(os.path.join(args.log_dir, '*.log')) log_files.sort() - + tests = [] - + if not log_files: - print(f"Warning: No log files found in {args.log_dir}") - + print(f'Warning: No log files found in {args.log_dir}') + for log_file in log_files: filename = os.path.basename(log_file) test_name = filename - + try: with open(log_file, 'r', errors='replace') as f: content = f.read() content = strip_ansi_codes(content) except Exception as e: - content = f"Error reading file: {e}" - - if "POLARIS TASK: PASS" not in content: - status = "failed" - elif "POLARIS BASELINE:" in content and "POLARIS BASELINE: PASS" not in content: - status = "failed" + content = f'Error reading file: {e}' + + if 'POLARIS TASK: PASS' not in content: + status = 'failed' + elif ( + 'POLARIS BASELINE:' in content + and 'POLARIS BASELINE: PASS' not in content + ): + status = 'failed' else: - status = "passed" - - tests.append({ - 'name': test_name, - 'status': status, - 'output': content, - 'path': log_file - }) - - ET.SubElement(test_list, "Test").text = f"./{args.log_dir}/{test_name}" - + status = 'passed' + + tests.append( + { + 'name': test_name, + 'status': status, + 'output': content, + 'path': log_file, + } + ) + + ET.SubElement(test_list, 'Test').text = f'./{args.log_dir}/{test_name}' + for test_data in tests: - test_elem = ET.SubElement(testing, "Test", Status=test_data['status']) - ET.SubElement(test_elem, "Name").text = test_data['name'] - ET.SubElement(test_elem, "Path").text = f"./{args.log_dir}" - ET.SubElement(test_elem, "FullName").text = f"./{args.log_dir}/{test_data['name']}" - ET.SubElement(test_elem, "FullCommandLine").text = f"cat {test_data['path']}" - - results = ET.SubElement(test_elem, "Results") - - named_meas_time = ET.SubElement(results, "NamedMeasurement", type="numeric/double", name="Execution Time") - ET.SubElement(named_meas_time, "Value").text = "1.0" - - named_meas_status = ET.SubElement(results, "NamedMeasurement", type="text/string", name="Completion Status") - ET.SubElement(named_meas_status, "Value").text = "Completed" - - named_meas_cmd = ET.SubElement(results, "NamedMeasurement", type="text/string", name="Command Line") - ET.SubElement(named_meas_cmd, "Value").text = f"cat {test_data['path']}" - - measurement = ET.SubElement(results, "Measurement") - ET.SubElement(measurement, "Value").text = test_data['output'] - - formatted_end_time = time.strftime("%b %d %H:%M %Z", time.localtime(int(time.time()))) - ET.SubElement(testing, "EndDateTime").text = formatted_end_time - ET.SubElement(testing, "EndTestTime").text = str(int(time.time())) - - output_path = os.path.join(args.output_dir, "Test.xml") + test_elem = ET.SubElement(testing, 'Test', Status=test_data['status']) + ET.SubElement(test_elem, 'Name').text = test_data['name'] + ET.SubElement(test_elem, 'Path').text = f'./{args.log_dir}' + ET.SubElement( + test_elem, 'FullName' + ).text = f'./{args.log_dir}/{test_data["name"]}' + ET.SubElement( + test_elem, 'FullCommandLine' + ).text = f'cat {test_data["path"]}' + + results = ET.SubElement(test_elem, 'Results') + + named_meas_time = ET.SubElement( + results, + 'NamedMeasurement', + type='numeric/double', + name='Execution Time', + ) + ET.SubElement(named_meas_time, 'Value').text = '1.0' + + named_meas_status = ET.SubElement( + results, + 'NamedMeasurement', + type='text/string', + name='Completion Status', + ) + ET.SubElement(named_meas_status, 'Value').text = 'Completed' + + named_meas_cmd = ET.SubElement( + results, + 'NamedMeasurement', + type='text/string', + name='Command Line', + ) + ET.SubElement( + named_meas_cmd, 'Value' + ).text = f'cat {test_data["path"]}' + + measurement = ET.SubElement(results, 'Measurement') + ET.SubElement(measurement, 'Value').text = test_data['output'] + + formatted_end_time = time.strftime( + '%b %d %H:%M %Z', time.localtime(int(time.time())) + ) + ET.SubElement(testing, 'EndDateTime').text = formatted_end_time + ET.SubElement(testing, 'EndTestTime').text = str(int(time.time())) + + output_path = os.path.join(args.output_dir, 'Test.xml') tree = ET.ElementTree(site) - tree.write(output_path, encoding="UTF-8", xml_declaration=True) - print(f"Generated {output_path}") + tree.write(output_path, encoding='UTF-8', xml_declaration=True) + print(f'Generated {output_path}') + def generate_done_xml(args, build_id): - root = ET.Element("Done") - ET.SubElement(root, "buildId").text = build_id - ET.SubElement(root, "time").text = str(int(time.time())) + root = ET.Element('Done') + ET.SubElement(root, 'buildId').text = build_id + ET.SubElement(root, 'time').text = str(int(time.time())) - xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent="\t") - output_path = os.path.join(args.output_dir, "Done.xml") - with open(output_path, "w") as f: + xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent='\t') + output_path = os.path.join(args.output_dir, 'Done.xml') + with open(output_path, 'w') as f: f.write(xmlstr) - print(f"Generated {output_path}") + print(f'Generated {output_path}') + def main(): - parser = argparse.ArgumentParser(description="Generate CDash XML files from log directory") - - parser.add_argument("--log-dir", required=True, help="Directory containing log files") - parser.add_argument("--results-dir", required=True, help="Directory containing TAG file and Build.xml subdirectory") + parser = argparse.ArgumentParser( + description='Generate CDash XML files from log directory' + ) + + parser.add_argument( + '--log-dir', required=True, help='Directory containing log files' + ) + parser.add_argument( + '--results-dir', + required=True, + help='Directory containing TAG file and Build.xml subdirectory', + ) # Removed --build-stamp - parser.add_argument("--site-name", required=True, help="Name of the site") + parser.add_argument('--site-name', required=True, help='Name of the site') # Build name defaults to log folder name, but can be overridden - parser.add_argument("--build-name", help="Name of the build") - parser.add_argument("--build-id", required=True, help="ID of the build") - - parser.add_argument("--output-dir", default=".", help="Directory to output XML files") - + parser.add_argument('--build-name', help='Name of the build') + parser.add_argument('--build-id', required=True, help='ID of the build') + + parser.add_argument( + '--output-dir', default='.', help='Directory to output XML files' + ) + args = parser.parse_args() - + if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) if not args.build_name: args.build_name = os.path.basename(os.path.normpath(args.log_dir)) - + sys_info = get_system_info() - + # 1. Read TAG folder_name, build_stamp = read_tag_file(args.results_dir) - print(f"Detected BuildStamp: {build_stamp} (from {args.results_dir}/TAG)") - + print(f'Detected BuildStamp: {build_stamp} (from {args.results_dir}/TAG)') + # 2. Process Build.xml site_attribs = process_build_xml(args, folder_name, build_stamp, sys_info) - + # 3. Generate Test.xml using same Site attribs generate_test_xml(args, site_attribs, sys_info) - + # 4. Generate Done.xml generate_done_xml(args, args.build_id) -if __name__ == "__main__": + +if __name__ == '__main__': main() From b6c1ba0d04d106c008c2059c84741319bbe32de5 Mon Sep 17 00:00:00 2001 From: Carolyn Begeman Date: Thu, 12 Mar 2026 16:29:42 -0600 Subject: [PATCH 4/8] factor omega cdash logic into shared driver --- .../tasks/omega_cdash/run_omega_cdash.sh | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 cron-scripts/tasks/omega_cdash/run_omega_cdash.sh diff --git a/cron-scripts/tasks/omega_cdash/run_omega_cdash.sh b/cron-scripts/tasks/omega_cdash/run_omega_cdash.sh new file mode 100644 index 0000000000..a90b033d28 --- /dev/null +++ b/cron-scripts/tasks/omega_cdash/run_omega_cdash.sh @@ -0,0 +1,78 @@ +#!/bin/bash -l + +set -euo pipefail + +echo "Starting omega cdash job" + +if [[ "${CRONJOB_MACHINE:-unknown}" == "chrysalis" ]]; then + module load python cmake + PARMETIS_TPL="/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view" + +elif [[ "${CRONJOB_MACHINE:-unknown}" == "frontier" ]]; then + module load cray-python cmake git-lfs + PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "${CRONJOB_MACHINE:-unknown}" == "pm-gpu" ]]; then + module load cray-python cmake + PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "${CRONJOB_MACHINE:-unknown}" == "pm-cpu" ]]; then + module load cray-python cmake + PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" + +elif [[ "${CRONJOB_MACHINE:-unknown}" == "unknown" ]]; then + echo "CRONJOB_MACHINE is not set." + exit 1 + +else + echo "It seems that the cron job is not configured with CRONJOB_MACHINE." + exit -1 + +fi + +echo "Compilers: ${E3SM_COMPILERS}" + +for COMPILER in ${E3SM_COMPILERS}; do + + WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} + rm -rf ${WORKDIR} + mkdir -p ${WORKDIR} + + PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" + if [ ! -d "$PARMETIS_HOME" ]; then + if [[ "${CRONJOB_MACHINE:-unknown}" == "frontier" ]]; then + PARMETIS_HOME="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view" + fi + fi + + cmake \ + -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ + -DOMEGA_CIME_COMPILER=${COMPILER} \ + -DOMEGA_ARCH=SERIAL \ + -DOMEGA_BUILD_TEST=ON \ + -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ + -S ${OMEGA_HOME}/components/omega \ + -B ${WORKDIR}; + + mkdir -p ${WORKDIR}/test + + ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc + ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc + ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc + + source ${WORKDIR}/omega_env.sh + + ctest \ + -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ + -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ + -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ + -DCTEST_SITE=${CRONJOB_MACHINE} \ + -DCTEST_BUILD_GROUP="Omega Unit-test" \ + -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ + -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ + -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ + -DCTEST_BUILD_CONFIGURATION="Release" \ + -DCTEST_DROP_SITE_CDASH=TRUE \ + -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM"; + +done From 066616459f70dbfb67db43640994f7e34a0b3587 Mon Sep 17 00:00:00 2001 From: Carolyn Begeman Date: Mon, 16 Mar 2026 09:10:44 -0700 Subject: [PATCH 5/8] Use shared omega cdash driver --- .../job_chrysalis_omega_cdash.sbatch | 69 +---------------- .../job_frontier_omega_cdash.sbatch | 66 +---------------- .../omega_cdash/job_pm-cpu_omega_cdash.sbatch | 74 +------------------ .../omega_cdash/job_pm-gpu_omega_cdash.sbatch | 70 +----------------- 4 files changed, 8 insertions(+), 271 deletions(-) diff --git a/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch index 86f8ccaea3..0bfbac48be 100755 --- a/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_chrysalis_omega_cdash.sbatch @@ -1,73 +1,8 @@ -#!/bin/bash +#!/bin/bash -l #SBATCH --nodes=1 #SBATCH --qos=high #SBATCH --time 02:00:00 source /etc/bashrc -echo "Starting omega cdash job" - -if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then - module load python cmake - PARMETIS_TPL="/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - module load cray-python cmake git-lfs - PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then - echo "CRONJOB_MACHINE is not set." - exit -1 - -else - echo "It seems that the cron job is not configured with CRONJOB_MACHINE." - exit -1 - -fi - -echo "Compilers: ${E3SM_COMPILERS}" -# Run Omega ctest -for COMPILER in ${E3SM_COMPILERS}; do - - WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} - rm -rf ${WORKDIR} - mkdir -p ${WORKDIR} - - PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" - if [ ! -f "$PARMETIS_HOME" ]; then - if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - PARMETIS_HOME="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view" - fi - fi - - cmake \ - -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ - -DOMEGA_CIME_COMPILER=${COMPILER} \ - -DOMEGA_ARCH=SERIAL \ - -DOMEGA_BUILD_TEST=ON \ - -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ - -S ${OMEGA_HOME}/components/omega \ - -B ${WORKDIR} - - mkdir -p ${WORKDIR}/test - - ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc - ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc - - source ${WORKDIR}/omega_env.sh - - ctest \ - -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ - -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ - -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ - -DCTEST_SITE=${CRONJOB_MACHINE} \ - -DCTEST_BUILD_GROUP="Omega Unit-test" \ - -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ - -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ - -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ - -DCTEST_BUILD_CONFIGURATION="Release" \ - -DCTEST_DROP_SITE_CDASH=TRUE \ - -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" - -done +exec bash $(dirname "$0")/run_omega_cdash.sh diff --git a/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch index 1d11c8721c..28de743c4d 100755 --- a/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_frontier_omega_cdash.sbatch @@ -4,69 +4,7 @@ #SBATCH --account=cli115 #SBATCH --time 02:00:00 -echo "Starting omega cdash job" -if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then - module load python cmake - PARMETIS_TPL=/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view +source /etc/bashrc -elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - module load cray-python cmake git-lfs - PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then - echo "CRONJOB_MACHINE is not set." - exit -1 - -else - echo "It seems that the cron job is not configured with CRONJOB_MACHINE." - exit -1 - -fi - -echo "Compilers: ${E3SM_COMPILERS}" -# Run Omega ctest -for COMPILER in ${E3SM_COMPILERS}; do - - WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} - rm -rf ${WORKDIR} - mkdir -p ${WORKDIR} - - PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" - if [ ! -f "$PARMETIS_HOME" ]; then - if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - PARMETIS_HOME=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view - fi - fi - - cmake \ - -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ - -DOMEGA_CIME_COMPILER=${COMPILER} \ - -DOMEGA_ARCH=SERIAL \ - -DOMEGA_BUILD_TEST=ON \ - -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ - -S ${OMEGA_HOME}/components/omega \ - -B ${WORKDIR} - - mkdir -p ${WORKDIR}/test - - ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc - ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc - - source ${WORKDIR}/omega_env.sh - - ctest \ - -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ - -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ - -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ - -DCTEST_SITE=${CRONJOB_MACHINE} \ - -DCTEST_BUILD_GROUP="Omega Unit-test" \ - -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ - -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ - -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ - -DCTEST_BUILD_CONFIGURATION="Release" \ - -DCTEST_DROP_SITE_CDASH=TRUE \ - -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" - -done +exec bash $(dirname "$0")/run_omega_cdash.sh diff --git a/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch index f1a69cec6a..6c4314cc6d 100755 --- a/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_pm-cpu_omega_cdash.sbatch @@ -10,77 +10,7 @@ #SBATCH --exclusive #SBATCH --time 01:00:00 -echo "Starting omega cdash job" -if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then - module load python cmake - PARMETIS_TPL=/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view +source /etc/bashrc -elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - module load cray-python cmake git-lfs - PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "pm-gpu" ]]; then - module load cray-python cmake - PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "pm-cpu" ]]; then - module load cray-python cmake - PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then - echo "CRONJOB_MACHINE is not set." - exit -1 - -else - echo "It seems that the cron job is not configured with CRONJOB_MACHINE." - exit -1 - -fi - -echo "Compilers: ${E3SM_COMPILERS}" -# Run Omega ctest -for COMPILER in ${E3SM_COMPILERS}; do - - WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} - rm -rf ${WORKDIR} - mkdir -p ${WORKDIR} - - PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" - if [ ! -f "$PARMETIS_HOME" ]; then - if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - PARMETIS_HOME=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view - fi - fi - - cmake \ - -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ - -DOMEGA_CIME_COMPILER=${COMPILER} \ - -DOMEGA_ARCH=SERIAL \ - -DOMEGA_BUILD_TEST=ON \ - -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ - -S ${OMEGA_HOME}/components/omega \ - -B ${WORKDIR} - - mkdir -p ${WORKDIR}/test - - ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc - ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc - - source ${WORKDIR}/omega_env.sh - - ctest \ - -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ - -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ - -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ - -DCTEST_SITE=${CRONJOB_MACHINE} \ - -DCTEST_BUILD_GROUP="Omega Unit-test" \ - -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ - -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ - -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ - -DCTEST_BUILD_CONFIGURATION="Release" \ - -DCTEST_DROP_SITE_CDASH=TRUE \ - -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" - -done +exec bash $(dirname "$0")/run_omega_cdash.sh diff --git a/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch b/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch index c49f80faf5..66c2a83819 100755 --- a/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch +++ b/cron-scripts/tasks/omega_cdash/job_pm-gpu_omega_cdash.sbatch @@ -11,73 +11,7 @@ #SBATCH --exclusive #SBATCH --time 01:00:00 -echo "Starting omega cdash job" -if [[ "$CRONJOB_MACHINE" == "chrysalis" ]]; then - module load python cmake - PARMETIS_TPL=/lcrc/soft/climate/polaris/chrysalis/spack/dev_polaris_0_10_0_COMPILER_openmpi/var/spack/environments/dev_polaris_0_10_0_COMPILER_openmpi/.spack-env/view +source /etc/bashrc -elif [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - module load cray-python cmake git-lfs - PARMETIS_TPL="/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "pm-gpu" ]]; then - module load cray-python cmake - PARMETIS_TPL="/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_10_0_COMPILER_mpich/var/spack/environments/dev_polaris_0_10_0_COMPILER_mpich/.spack-env/view" - -elif [[ "$CRONJOB_MACHINE" == "unknown" ]]; then - echo "CRONJOB_MACHINE is not set." - exit -1 - -else - echo "It seems that the cron job is not configured with CRONJOB_MACHINE." - exit -1 - -fi - -echo "Compilers: ${E3SM_COMPILERS}" -# Run Omega ctest -for COMPILER in ${E3SM_COMPILERS}; do - - WORKDIR=${TESTROOT}/${COMPILER}/${CRONJOB_DATE} - rm -rf ${WORKDIR} - mkdir -p ${WORKDIR} - - PARMETIS_HOME="${PARMETIS_TPL//COMPILER/$COMPILER}" - if [ ! -f "$PARMETIS_HOME" ]; then - if [[ "$CRONJOB_MACHINE" == "frontier" ]]; then - PARMETIS_HOME=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_10_0_craygnu-mphipcc_mpich/var/spack/environments/dev_polaris_0_10_0_craygnu-mphipcc_mpich/.spack-env/view - fi - fi - - cmake \ - -DOMEGA_CIME_MACHINE=${CRONJOB_MACHINE} \ - -DOMEGA_CIME_COMPILER=${COMPILER} \ - -DOMEGA_ARCH=SERIAL \ - -DOMEGA_BUILD_TEST=ON \ - -DOMEGA_PARMETIS_ROOT=${PARMETIS_HOME} \ - -S ${OMEGA_HOME}/components/omega \ - -B ${WORKDIR} - - mkdir -p ${WORKDIR}/test - - ln -sf ${TESTROOT}/OmegaMesh.nc ${WORKDIR}/test/OmegaMesh.nc - ln -sf ${TESTROOT}/OmegaSphereMesh.nc ${WORKDIR}/test/OmegaSphereMesh.nc - ln -sf ${TESTROOT}/OmegaPlanarMesh.nc ${WORKDIR}/test/OmegaPlanarMesh.nc - - source ${WORKDIR}/omega_env.sh - - ctest \ - -S ${OMEGA_HOME}/components/omega/CTestScript.cmake \ - -DCTEST_SOURCE_DIRECTORY=${OMEGA_HOME}/components/omega \ - -DCTEST_BINARY_DIRECTORY=${WORKDIR} \ - -DCTEST_SITE=${CRONJOB_MACHINE} \ - -DCTEST_BUILD_GROUP="Omega Unit-test" \ - -DCTEST_BUILD_NAME="unitest-develop-${COMPILER}" \ - -DCTEST_NIGHTLY_START_TIME="06:00:00 UTC" \ - -DCTEST_BUILD_COMMAND="${WORKDIR}/omega_build.sh" \ - -DCTEST_BUILD_CONFIGURATION="Release" \ - -DCTEST_DROP_SITE_CDASH=TRUE \ - -DCTEST_SUBMIT_URL="https://my.cdash.org/submit.php?project=E3SM" - -done +exec bash $(dirname "$0")/run_omega_cdash.sh From c12641db5230332c3af41875ea064ff290fa15c0 Mon Sep 17 00:00:00 2001 From: Carolyn Begeman Date: Mon, 16 Mar 2026 08:59:03 -0700 Subject: [PATCH 6/8] Use USER env variable for paths --- cron-scripts/machines/config_chrysalis.sh | 2 +- cron-scripts/machines/config_frontier.sh | 2 +- cron-scripts/machines/config_pm-cpu.sh | 2 +- cron-scripts/machines/config_pm-gpu.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cron-scripts/machines/config_chrysalis.sh b/cron-scripts/machines/config_chrysalis.sh index 050a102063..d342b5d18a 100755 --- a/cron-scripts/machines/config_chrysalis.sh +++ b/cron-scripts/machines/config_chrysalis.sh @@ -3,7 +3,7 @@ set -eo pipefail source /etc/bashrc -export CRONJOB_BASEDIR=/lcrc/globalscratch/ac.kimy/cronjobs +export CRONJOB_BASEDIR=/lcrc/globalscratch/${USER}/cronjobs export E3SM_COMPILERS="gnu intel" mkdir -p "$CRONJOB_BASEDIR" diff --git a/cron-scripts/machines/config_frontier.sh b/cron-scripts/machines/config_frontier.sh index dedc560ac3..a7db934cdc 100755 --- a/cron-scripts/machines/config_frontier.sh +++ b/cron-scripts/machines/config_frontier.sh @@ -10,7 +10,7 @@ export http_proxy=http://proxy.ccs.ornl.gov:3128/ export https_proxy=http://proxy.ccs.ornl.gov:3128/ export no_proxy='localhost,127.0.0.0/8,*.ccs.ornl.gov' -export CRONJOB_BASEDIR=/lustre/orion/cli115/scratch/grnydawn/cronjobs +export CRONJOB_BASEDIR=/lustre/orion/cli115/scratch/${USER}/cronjobs export E3SM_COMPILERS="craygnu-mphipcc craycray-mphipcc crayamd-mphipcc craygnu craycray crayamd" mkdir -p "$CRONJOB_BASEDIR" diff --git a/cron-scripts/machines/config_pm-cpu.sh b/cron-scripts/machines/config_pm-cpu.sh index 07c06720f1..35b4a958fa 100755 --- a/cron-scripts/machines/config_pm-cpu.sh +++ b/cron-scripts/machines/config_pm-cpu.sh @@ -4,7 +4,7 @@ set -eo pipefail module load cray-python cmake -export CRONJOB_BASEDIR=/pscratch/sd/y/youngsun/omega/cronjobs_pm-cpu +export CRONJOB_BASEDIR=/pscratch/sd/${USER:0:1}/${USER}/omega/cronjobs_pm-cpu export E3SM_COMPILERS="gnu" mkdir -p "$CRONJOB_BASEDIR" diff --git a/cron-scripts/machines/config_pm-gpu.sh b/cron-scripts/machines/config_pm-gpu.sh index dc98c8f367..e025b44dcf 100755 --- a/cron-scripts/machines/config_pm-gpu.sh +++ b/cron-scripts/machines/config_pm-gpu.sh @@ -4,7 +4,7 @@ set -eo pipefail module load cray-python cmake -export CRONJOB_BASEDIR=/pscratch/sd/y/youngsun/omega/cronjobs_pm-gpu +export CRONJOB_BASEDIR=/pscratch/sd/${USER:0:1}/${USER}/omega/cronjobs_pm-gpu export E3SM_COMPILERS="gnugpu" mkdir -p "$CRONJOB_BASEDIR" From d4e4c1effe2ed4744c418e6baa5d9282afc302ca Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Wed, 8 Apr 2026 07:17:34 -0700 Subject: [PATCH 7/8] updates README --- cron-scripts/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cron-scripts/README.md b/cron-scripts/README.md index 32ab933dbf..bad492a17e 100644 --- a/cron-scripts/README.md +++ b/cron-scripts/README.md @@ -4,10 +4,10 @@ Automated cron job scripts for continuous testing and CDash reporting of OMEGA o ## Overview -This repository orchestrates the compilation, testing, and result submission to [CDash](https://my.cdash.org) for two major OMEGA ocean model components: +This repository orchestrates the compilation, testing, and result submission to [CDash](https://my.cdash.org) for two types of OMEGA tests: -- **Omega** - Next-generation ocean model -- **Polaris** - MPAS-Ocean model with Omega integration +- **Omega CTests** +- **Polaris** - Omega tests on MPAS meshes ## Supported Systems From 86fee0e39b8f391ff8e2a6ec31a24e699c3559a4 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Tue, 14 Apr 2026 10:06:15 -0700 Subject: [PATCH 8/8] updtes tasks for being launched as a part of polaris --- cron-scripts/launch_all.sh | 6 -- .../tasks/omega_cdash/launch_omega_cdash.sh | 30 +++++++--- .../polaris_cdash/launch_polaris_ctest.sh | 59 +++++++++---------- 3 files changed, 48 insertions(+), 47 deletions(-) diff --git a/cron-scripts/launch_all.sh b/cron-scripts/launch_all.sh index 12023564e6..f4ee8f3c9c 100755 --- a/cron-scripts/launch_all.sh +++ b/cron-scripts/launch_all.sh @@ -42,12 +42,6 @@ if ! flock -n 9; then echo "[$(date)] launch_all.sh is already running, exiting." exit 0 fi -#LOCKFILE="${HERE}/cronjob.lock" -#exec 9>"$LOCKFILE" -#if ! flock -n 9; then -# echo "[$(date)] launch_all.sh is already running, exiting." -# exit 0 -#fi # Run all launch*.sh scripts under immediate subdirectories of $HERE/tasks while IFS= read -r script; do diff --git a/cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh b/cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh index ec2e4f493a..417deec99b 100755 --- a/cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh +++ b/cron-scripts/tasks/omega_cdash/launch_omega_cdash.sh @@ -10,18 +10,30 @@ export TESTROOT="${OMEGA_CDASH_BASEDIR}/tests" mkdir -p $OMEGA_CDASH_BASEDIR mkdir -p $TESTROOT -export OMEGA_HOME="${OMEGA_CDASH_BASEDIR}/Omega" -if [[ ! -d $OMEGA_HOME ]]; then - cd ${OMEGA_CDASH_BASEDIR} - git clone https://github.com/E3SM-Project/Omega.git +# Configuration +export REPO_PATH="${OMEGA_CDASH_BASEDIR}/Omega" +REMOTE_URL="https://github.com/E3SM-Project/Omega.git" +BRANCH="develop" + +# 1. & 2. Check existence and handle repository state +if [ ! -d "$REPO_PATH/.git" ]; then + echo "Repository not found. Cloning..." + git clone -b "$BRANCH" "$REMOTE_URL" "$REPO_PATH" + cd "$REPO_PATH" || exit +else + echo "Repository exists. Updating to latest remote state..." + cd "$REPO_PATH" || exit + + # Ensure we are on the correct branch and sync with origin + git fetch origin + git checkout "$BRANCH" + git reset --hard "origin/$BRANCH" fi -cd ${OMEGA_HOME} -git checkout develop -git fetch origin -git reset --hard origin/develop -git submodule update --init --recursive || true +# 3. Update specific submodules recursively +echo "Updating submodules..." +git submodule update --init --recursive externals/ekat externals/scorpio cime components/omega/external if [[ ! -f ${TESTROOT}/OmegaMesh.nc ]]; then wget -O ${TESTROOT}/OmegaMesh.nc https://web.lcrc.anl.gov/public/e3sm/inputdata/ocn/mpas-o/oQU240/ocean.QU.240km.151209.nc diff --git a/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh index 8871a490a5..06b9a98c59 100755 --- a/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh +++ b/cron-scripts/tasks/polaris_cdash/launch_polaris_ctest.sh @@ -55,29 +55,29 @@ fi } -setup_polaris_repo() { - echo "================================================================================" - echo "STEP 1: Setting up Polaris Repo (Baseline)" - echo "================================================================================" - cd "${POLARIS_CDASH_BASEDIR}" - - # Check if we are inside the 'polaris' folder or need to enter it - if [ ! -d "polaris" ]; then - echo "Cloning Polaris repository..." - git clone git@github.com:E3SM-Project/polaris.git - cd polaris - else - cd polaris - echo "Repository exists. Resetting to main branch..." - git fetch origin - git checkout main - git reset --hard origin/main - fi - - echo "Updating specific submodules (jigsaw-python, Omega)..." - git submodule update --init --recursive jigsaw-python - git submodule update --init --recursive e3sm_submodules/Omega -} +#setup_polaris_repo() { +# echo "================================================================================" +# echo "STEP 1: Setting up Polaris Repo (Baseline)" +# echo "================================================================================" +# cd "${POLARIS_CDASH_BASEDIR}" +# +# # Check if we are inside the 'polaris' folder or need to enter it +# if [ ! -d "polaris" ]; then +# echo "Cloning Polaris repository..." +# git clone git@github.com:E3SM-Project/polaris.git +# cd polaris +# else +# cd polaris +# echo "Repository exists. Resetting to main branch..." +# git fetch origin +# git checkout main +# git reset --hard origin/main +# fi +# +# echo "Updating specific submodules (jigsaw-python, Omega)..." +# git submodule update --init --recursive jigsaw-python +# git submodule update --init --recursive e3sm_submodules/Omega +#} configure_polaris() { local compiler=$1 @@ -88,16 +88,11 @@ configure_polaris() { cd "${POLARIS_CDASH_BASEDIR}/polaris" - if [ ! -f "configure_polaris_envs.py" ]; then - echo "Error: configure_polaris_envs.py not found in $(pwd)" - exit 1 + if [ ! -f "load_polaris_${CRONJOB_MACHINE}_${compiler}_*.sh" ]; then + ./deploy.py --machine ${CRONJOB_MACHINE} --compiler ${compiler} fi - if ! ls load_dev_polaris_*_${CRONJOB_MACHINE}_${compiler}_*.sh >/dev/null 2>&1; then - echo "Configuring Polaris Environment" - ./configure_polaris_envs.py --conda "${MINIFORGE3_HOME}" \ - -c "${compiler}" -m "${CRONJOB_MACHINE}" - fi + source ./load_polaris_${CRONJOB_MACHINE}_${compiler}_*.sh } build_omega_dev() { @@ -182,7 +177,7 @@ run_baseline_suite() { # Main Execution # ============================================================================== install_miniforge3 -setup_polaris_repo +#setup_polaris_repo for COMPILER in ${E3SM_COMPILERS}; do echo "################################################################################"