diff --git a/Cargo.lock b/Cargo.lock index 365553b..882fd59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -787,7 +787,7 @@ dependencies = [ [[package]] name = "pharos" -version = "0.3.0" +version = "0.3.1" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 61d34fe..3c143b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pharos" -version = "0.3.0" +version = "0.3.1" edition = "2024" [[bin]] diff --git a/components/config/src/nonmem.rs b/components/config/src/nonmem.rs index b41b8ef..0c4fb8f 100644 --- a/components/config/src/nonmem.rs +++ b/components/config/src/nonmem.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::process::Command; @@ -10,6 +11,28 @@ use which::which; const KNOWN_NONMEM_FOLDERS: [&str; 2] = ["/opt/nonmem", "/opt/NONMEM"]; +fn parse_nonmem_version_name(name: &str) -> Option { + let rest = name.strip_prefix("nm")?; + let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count(); + if digit_count == 0 { + return None; + } + + rest[..digit_count].parse::().ok() +} + +fn cmp_nonmem_version_names(a: &str, b: &str) -> Ordering { + let a_num = parse_nonmem_version_name(a); + let b_num = parse_nonmem_version_name(b); + + match (a_num, b_num) { + (Some(a_num), Some(b_num)) => b_num.cmp(&a_num).then_with(|| a.cmp(b)), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => a.cmp(b), + } +} + fn resolve_path_from_config_dir(path: Option<&PathBuf>, config_dir: &Path) -> Option { path.map(|p| { if p.is_relative() { @@ -432,7 +455,7 @@ impl NonmemConfig { } config.versions = find_nonmem_versions()?; let mut versions = config.versions.keys().collect::>(); - versions.sort(); + versions.sort_by(|a, b| cmp_nonmem_version_names(a, b)); config.default_version = versions[0].clone(); Ok(config) @@ -646,4 +669,33 @@ mod tests { let nonmem = config.nonmem.expect("Should have nonmem config"); assert_eq!(nonmem.files_to_copy().len(), 0); } + + #[test] + fn test_nonmem_versions_sorted_latest_first_with_lexical_fallback() { + let mut versions = [ + "nm74gf_nmfe", + "nm75", + "nm74", + "nm73", + "nm73gf", + "nm74gf", + "nm73gf_nmfe", + "nm76", + ]; + versions.sort_by(|a, b| cmp_nonmem_version_names(a, b)); + + assert_eq!( + versions, + [ + "nm76", + "nm75", + "nm74", + "nm74gf", + "nm74gf_nmfe", + "nm73", + "nm73gf", + "nm73gf_nmfe" + ] + ); + } } diff --git a/components/scheduler/src/lib.rs b/components/scheduler/src/lib.rs index 3349ad0..97db9ad 100644 --- a/components/scheduler/src/lib.rs +++ b/components/scheduler/src/lib.rs @@ -270,11 +270,24 @@ impl SchedulerType { .with_context( || format!("failed to execute {cmd_name} command for model {m:?}",), )?; + + // If SGE does not have a compute node ready during job submission + // qsub fails and gives this error: + // + // Unable to run job: warning: job is not allowed to run in any queue + // Your job ("") has been submitted + // Exiting. + // + // Checking stderr for this message to prevent bail! on non-zero exit code from qsub if !output.status.success() { - bail!( - "{cmd_name} failed: {}", - String::from_utf8_lossy(&output.stderr) - ); + let stderr = String::from_utf8_lossy(&output.stderr); + let sge_queuing_warning = stderr.contains("Unable to run job: warning") + && stderr.contains("job is not allowed to run in any queue") + && stderr.contains("has been submitted"); + if !sge_queuing_warning { + bail!("{cmd_name} failed: {stderr}"); + } + log::warn!("{cmd_name} reported a warning but the job was submitted: {stderr}"); } let stdout = String::from_utf8_lossy(&output.stdout); @@ -284,10 +297,34 @@ impl SchedulerType { num.parse() .map_err(|e| anyhow!("Failed to parse job ID '{stdout}': {e}"))? } - SchedulerType::Sge(_) => stdout - .trim() - .parse() - .map_err(|e| anyhow!("Failed to parse job ID '{stdout}': {e}"))?, + SchedulerType::Sge(_) => { + // If qsub failed due to no compute nodes being available, + // the job ID is not printed in stdout but rather in the + // error message given to stderr (see error message template above). + if !stdout.trim().is_empty() { + stdout + .trim() + .parse() + .map_err(|e| anyhow!("Failed to parse job ID '{stdout}': {e}"))? + } else { + // Need to isolate job ID from + // Your job ("") has been submitted + let stderr = String::from_utf8_lossy(&output.stderr); + let job_id_str = stderr + .lines() + .find_map(|line| { + line.trim() + .strip_prefix("Your job ") + .and_then(|rest| rest.split_whitespace().next()) + }) + .ok_or_else(|| { + anyhow!("Failed to find job ID in SGE output: {stderr}") + })?; + job_id_str + .parse() + .map_err(|e| anyhow!("Failed to parse job ID '{job_id_str}': {e}"))? + } + } }; out.push((m, job_id)); diff --git a/components/scheduler/src/sge.rs b/components/scheduler/src/sge.rs index bec9069..502c847 100644 --- a/components/scheduler/src/sge.rs +++ b/components/scheduler/src/sge.rs @@ -8,7 +8,7 @@ use tera::Tera; const DEFAULT_TEMPLATE: &str = r#"#!/bin/bash #$ -N {{job_name}} #$ -V -#$ -j oe +#$ -j y #$ -o {{log_path}} {% if parallel -%}#$ -pe orte {{num_mpi_cpus}}{% endif %}