Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,14 @@ pub enum Command {
#[arg(long)]
kinship_groups: Option<String>,

/// Phenotype column carrying per-sample time values for the
/// longitudinal random-slope LMM (GMMAT `glmmkin.R:174-183`).
/// Each input --kinship matrix expands into three variance
/// components (intercept, intercept-slope covariance, slope).
/// Requires --kinship; gaussian family only; single-trait only.
#[arg(long)]
random_slope: Option<String>,

/// Known loci file for conditional analysis (one chr:pos:ref:alt per line)
#[arg(long)]
known_loci: Option<PathBuf>,
Expand Down
78 changes: 45 additions & 33 deletions src/commands/staar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub struct StaarArgs {
pub scang_step: usize,
pub kinship: Vec<PathBuf>,
pub kinship_groups: Option<String>,
pub random_slope: Option<String>,
pub known_loci: Option<PathBuf>,
pub null_model: Option<PathBuf>,
pub emit_sumstats: bool,
Expand Down Expand Up @@ -78,11 +79,12 @@ fn build_config(args: StaarArgs) -> Result<StaarConfig, CohortError> {
));
}

// Multi-trait dispatch. Joint MultiSTAAR covers unrelated continuous
// traits with a shared covariate matrix; everything else (kinship-aware
// joint null, SPA for binary, AI-STAAR ancestry weighting, sumstats
// dump) is a separate track. Reject the combinations at config-build
// time so the pipeline never sees an invalid state.
// Multi-trait dispatch. Joint MultiSTAAR covers unrelated traits
// (gaussian or binomial) with a shared covariate matrix; kinship-aware
// joint null, single-trait SPA calibration, AI-STAAR ancestry
// weighting, and MetaSTAAR sumstats stay on their own tracks for now.
// Reject the combinations at config-build time so the pipeline never
// sees an invalid state.
let multi_trait = args.trait_names.len() > 1;
if multi_trait {
let n_traits = args.trait_names.len();
Expand All @@ -95,7 +97,9 @@ fn build_config(args: StaarArgs) -> Result<StaarConfig, CohortError> {
if args.spa {
return Err(reject(
"--spa",
"SPA applies to binary traits; joint MultiSTAAR is unrelated continuous only",
"SPA is a single-trait tail-calibration path; joint MultiSTAAR \
routes every per-variant common test through the joint χ²(k) \
kernel instead",
));
}
if blank_to_none(args.ancestry_col.clone()).is_some() {
Expand All @@ -104,23 +108,45 @@ fn build_config(args: StaarArgs) -> Result<StaarConfig, CohortError> {
"AI-STAAR ancestry weighting has not been composed with the joint multi-trait kernel",
));
}
if !args.kinship.is_empty() {
// Multi-trait kinship + `--kinship-groups` are supported via
// `MultiNull::fit_with_kinship` (port of GMMAT's
// `fit_null_glmmkin_multi`, dense-backend first cut). The
// `--ancestry-col` reject above already covers the AI-STAAR
// combination which has not been composed with the kinship
// kernel yet.
if args.emit_sumstats {
return Err(reject(
"--kinship",
"kinship-aware joint null (fit_null_glmmkin_multi) is not yet implemented; \
the current path assumes unrelated samples",
"--emit-sumstats",
"MetaSTAAR sumstats are single-trait; run each trait separately to dump U/K",
));
}
if blank_to_none(args.kinship_groups.clone()).is_some() {
if blank_to_none(args.random_slope.clone()).is_some() {
return Err(reject(
"--kinship-groups",
"kinship-aware joint null is not yet implemented",
"--random-slope",
"GMMAT `glmmkin.R:50` rejects multi-pheno + duplicated ids; the joint \
multi-trait null does not compose with random slopes",
));
}
if args.emit_sumstats {
return Err(reject(
"--emit-sumstats",
"MetaSTAAR sumstats are single-trait; run each trait separately to dump U/K",
}

// `--random-slope` requires kinship and gaussian family (single-trait
// binary runs through PQL which doesn't implement the random-slope
// expansion; GMMAT `glmmkin.R:30` enforces `method.optim == "AI"` for
// random.slope which is what our `fit_reml_random_slope` uses).
if blank_to_none(args.random_slope.clone()).is_some() {
if args.kinship.is_empty() {
return Err(CohortError::Input(
"--random-slope requires at least one --kinship matrix. GMMAT \
`glmmkin.R:64` warns and drops random.slope when samples are unrelated \
and there is no kinship."
.into(),
));
}
if args.spa {
return Err(CohortError::Input(
"--random-slope is gaussian-only. --spa applies to binary traits and is \
not compatible with the random-slope LMM."
.into(),
));
}
}
Expand Down Expand Up @@ -232,6 +258,7 @@ fn build_config(args: StaarArgs) -> Result<StaarConfig, CohortError> {
},
kinship: args.kinship,
kinship_groups: blank_to_none(args.kinship_groups),
random_slope: blank_to_none(args.random_slope),
known_loci: args.known_loci,
null_model_path: args.null_model,
run_mode: if multi_trait {
Expand Down Expand Up @@ -563,6 +590,7 @@ mod tests {
scang_step: 10,
kinship: Vec::new(),
kinship_groups: None,
random_slope: None,
known_loci: None,
null_model: None,
emit_sumstats: false,
Expand Down Expand Up @@ -621,22 +649,6 @@ mod tests {
assert_input_error_mentions(build_config(args), "--ancestry-col");
}

#[test]
fn multi_trait_rejects_kinship() {
let pheno = pheno_file();
let mut args = multi_trait_args(pheno.path().to_path_buf(), &["BMI", "HEIGHT"]);
args.kinship = vec![PathBuf::from("/no/such/kin.tsv")];
assert_input_error_mentions(build_config(args), "--kinship");
}

#[test]
fn multi_trait_rejects_kinship_groups() {
let pheno = pheno_file();
let mut args = multi_trait_args(pheno.path().to_path_buf(), &["BMI", "HEIGHT"]);
args.kinship_groups = Some("ancestry".into());
assert_input_error_mentions(build_config(args), "--kinship-groups");
}

#[test]
fn multi_trait_rejects_emit_sumstats() {
let pheno = pheno_file();
Expand Down
2 changes: 2 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ fn run(
scang_step,
kinship,
kinship_groups,
random_slope,
known_loci,
null_model,
emit_sumstats,
Expand Down Expand Up @@ -187,6 +188,7 @@ fn run(
scang_step,
kinship,
kinship_groups,
random_slope,
known_loci,
null_model,
emit_sumstats,
Expand Down
70 changes: 70 additions & 0 deletions src/staar/kinship/load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,76 @@ pub fn load_groups(
Ok(partition)
}

/// Load the per-sample time vector for the random-slope LMM. Ports the
/// `time.var <- data[idx, random.slope]` extraction at upstream
/// `GMMAT/R/glmmkin.R:113`. Rows are returned in the same compacted
/// order `load_phenotype` produces (i.e., filtered by `pheno_mask` and
/// aligned with `geno.sample_names`), so `time_var[i]` aligns with the
/// i-th row of `y` / `x`.
pub fn load_random_slope(
engine: &DfEngine,
phenotype: &Path,
time_col: &str,
geno: &GenotypeResult,
pheno_mask: &[bool],
column_map: &HashMap<String, String>,
out: &dyn Output,
) -> Result<Vec<f64>, CohortError> {
engine.register_csv("_pheno_rs", phenotype, b'\t')?;
let cols = engine.table_columns("_pheno_rs")?;

let resolved =
crate::staar::model::resolve_column(time_col, &cols, column_map, "RandomSlope")?;
let id_col = crate::staar::model::resolve_id_column(&cols, column_map);
let sample_list = geno
.sample_names
.iter()
.map(|s| format!("'{s}'"))
.collect::<Vec<_>>()
.join(",");

let sql = format!(
"SELECT \"{id_col}\", CAST(\"{resolved}\" AS DOUBLE) \
FROM _pheno_rs \
WHERE \"{id_col}\" IN ({sample_list}) AND \"{resolved}\" IS NOT NULL"
);
let batches = engine.collect(&sql)?;

let mut id_to_value: HashMap<String, f64> = HashMap::new();
for batch in &batches {
for row in 0..batch.num_rows() {
let id = crate::staar::model::arrow_str(batch.column(0).as_ref(), row);
let v = crate::staar::model::arrow_f64(batch.column(1).as_ref(), row);
if !v.is_finite() {
return Err(CohortError::Input(format!(
"random.slope column '{resolved}' has non-finite value for sample '{id}'"
)));
}
id_to_value.insert(id, v);
}
}

let mut out_vec: Vec<f64> = Vec::new();
for (vcf_idx, name) in geno.sample_names.iter().enumerate() {
if !pheno_mask[vcf_idx] {
continue;
}
let v = *id_to_value.get(name).ok_or_else(|| {
CohortError::Input(format!(
"Sample '{name}' has no value in random.slope column '{resolved}'"
))
})?;
out_vec.push(v);
}
out.status(&format!(
" Random-slope time values: n = {}, range = [{}, {}]",
out_vec.len(),
out_vec.iter().cloned().fold(f64::INFINITY, f64::min),
out_vec.iter().cloned().fold(f64::NEG_INFINITY, f64::max),
));
Ok(out_vec)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading
Loading