diff --git a/src/commands/meta_staar.rs b/src/commands/meta_staar.rs
index ad4276a..5e89404 100644
--- a/src/commands/meta_staar.rs
+++ b/src/commands/meta_staar.rs
@@ -44,14 +44,6 @@ pub fn build_config(
                 loci.display()
             )));
         }
-        if matches!(conditional_model, crate::cli::ConditionalModel::Heterogeneous) {
-            return Err(CohortError::Input(
-                "--conditional-model heterogeneous requires per-study U vectors \
-                 which --emit-sumstats does not yet persist. Use --conditional-model \
-                 homogeneous for now."
-                    .into(),
-            ));
-        }
     }
 
     let mask_categories = crate::commands::parse_mask_categories(&masks)?;
diff --git a/src/staar/carrier/sparse_score.rs b/src/staar/carrier/sparse_score.rs
index 5d27520..9681474 100644
--- a/src/staar/carrier/sparse_score.rs
+++ b/src/staar/carrier/sparse_score.rs
@@ -405,6 +405,7 @@ pub(crate) fn null_model_from_analysis(analysis: &AnalysisVectors) -> NullModel
         // a dense G and a non-mixed-model NullModel. Kinship-aware analyses
         // route through `score_gene_sparse_kinship` directly.
         kinship: None,
+        scang: None,
     }
 }
 
diff --git a/src/staar/meta.rs b/src/staar/meta.rs
index c187c47..0d9b499 100644
--- a/src/staar/meta.rs
+++ b/src/staar/meta.rs
@@ -406,7 +406,10 @@ pub fn merge_chromosome(
              bool_or({ccre_p}) AS {ccre_p}, \
              bool_or({ccre_e}) AS {ccre_e}, \
              {weight_aggs}, \
-             CAST(array_agg(named_struct('s', study_idx, 'seg', segment_id)) AS VARCHAR) AS study_segs \
+             CAST(array_agg(named_struct('s', study_idx, 'seg', segment_id)) AS VARCHAR) AS study_segs, \
+             CAST(array_agg(named_struct('s', study_idx, \
+                                         'u', CASE WHEN {ref_a} <= {alt_a} THEN u_stat ELSE -u_stat END)) \
+                  AS VARCHAR) AS study_us \
          FROM _study_variants \
          WHERE {maf} < {maf_cutoff} \
          GROUP BY {pos}, \
@@ -428,7 +431,7 @@ pub fn merge_chromosome(
          cadd_phred_raw, {revel}, {msp}, {gh}, \
          {cage_p}, {cage_e}, {ccre_p}, {ccre_e}, \
          {weight_select}, \
-         study_segs \
+         study_segs, study_us \
          FROM _meta_variants ORDER BY {pos}",
         pos = Col::Position,
         ref_a = Col::RefAllele,
@@ -512,6 +515,7 @@ pub fn merge_chromosome(
             w_arrs.push(f64_col(17 + i)?);
         }
         let segs_arr = str_col(28)?;
+        let study_u_arr = str_col(29)?;
 
         for i in 0..n {
             let mut weights = [0.0f64; 11];
@@ -521,6 +525,7 @@ pub fn merge_chromosome(
             let cadd_phred = f64_or(cadd_raw_arr, i, 0.0);
 
             let study_segments = parse_study_segments(str_or(segs_arr, i, ""));
+            let u_study = parse_study_us(str_or(study_u_arr, i, ""));
 
             let mac_total = i64_or(mac_arr, i, 0);
             let n_total = i64_or(n_obs_arr, i, 0);
@@ -559,6 +564,7 @@ pub fn merge_chromosome(
                 mac_total,
                 n_total,
                 study_segments,
+                u_study,
             });
         }
     }
@@ -671,6 +677,7 @@ pub fn meta_score_gene(
             n_variants: m as u32,
             cumulative_mac: cmac as u32,
             staar: sr,
+            emthr: f64::NAN,
         },
         burden_beta,
         burden_se,
@@ -750,6 +757,34 @@ fn parse_study_segments(s: &str) -> Vec<(usize, i32)> {
     result
 }
 
+/// Parse DuckDB's stringified `array_agg(named_struct('s',..,'u',..))` into
+/// `(study_idx, signed_u)` pairs. Mirrors `parse_study_segments` byte-for-byte
+/// save for the 'u' value being a float rather than an integer segment id.
+fn parse_study_us(s: &str) -> Vec<(usize, f64)> {
+    let mut result = Vec::new();
+    for part in s.split('{') {
+        let part =
+            part.trim_matches(|c: char| c == '[' || c == ']' || c == ',' || c == ' ' || c == '}');
+        if part.is_empty() {
+            continue;
+        }
+        let mut study: Option<usize> = None;
+        let mut u: Option<f64> = None;
+        for kv in part.split(',') {
+            let kv = kv.trim();
+            if let Some(val) = kv.strip_prefix("s:").or_else(|| kv.strip_prefix("s :")) {
+                study = val.trim().parse().ok();
+            } else if let Some(val) = kv.strip_prefix("u:").or_else(|| kv.strip_prefix("u :")) {
+                u = val.trim().parse().ok();
+            }
+        }
+        if let (Some(s), Some(u)) = (study, u) {
+            result.push((s, u));
+        }
+    }
+    result
+}
+
 const SEGMENT_BP: u32 = 500_000;
 const MAX_SEGMENT_VARIANTS: usize = 2000;
 
@@ -1215,27 +1250,23 @@ pub fn parse_known_loci_file(
 }
 
 /// Conditional meta-analysis: condition gene-level U/K on known loci
-/// before running STAAR tests.
-///
-/// Homogeneous model: condition the merged (cross-study) U and K.
-/// Heterogeneous model: condition per-study U and K before merging.
-///
-/// The conditioning step uses Schur complement:
-///   U_cond = U_t - K_tc * K_cc^{-1} * U_c
-///   K_cond = K_tt - K_tc * K_cc^{-1} * K_ct
-///
+/// before running STAAR tests. Schur complement:
+///   U_cond = U_t - K_tc · K_cc⁻¹ · U_c
+///   K_cond = K_tt - K_tc · K_cc⁻¹ · K_ct
 /// where t = test (gene) variants, c = conditioning (known loci) variants.
-/// Conditional meta-scoring uses the homogeneous model: condition the
-/// merged (cross-study) U and K on known-loci variants via Schur complement.
-/// The heterogeneous model (per-study conditioning) is rejected at config
-/// time because --emit-sumstats does not yet persist per-study U vectors.
+///
+/// Homogeneous model sums U and K across studies before the Schur solve.
+/// Heterogeneous model does the Schur solve inside each study first and
+/// sums per-study (U_cond_i, K_cond_i) at the end, matching MetaSTAAR
+/// R/MetaSTAAR_merge_cond.R:391-404 and tolerating per-study variation in
+/// covariate-adjusted residual variance.
 pub fn meta_score_gene_conditional(
     group: &MaskGroup,
     meta_variants: &[MetaVariant],
     studies: &[StudyHandle],
     segment_cache: &HashMap<(usize, i32), SegmentCov>,
     known_loci_indices: &[usize],
-    _heterogeneous: bool,
+    heterogeneous: bool,
 ) -> Option<MetaGeneResult> {
     let gene_indices: Vec<usize> = group
         .variant_indices
@@ -1264,7 +1295,6 @@ pub fn meta_score_gene_conditional(
     let m_t = gene_indices.len();
     let m_c = cond_indices.len();
 
-    // Build combined keys: [gene variants | conditioning variants].
     let combined: Vec<usize> = gene_indices
         .iter()
         .chain(cond_indices.iter())
@@ -1272,12 +1302,6 @@ pub fn meta_score_gene_conditional(
         .collect();
     let m_all = combined.len();
 
-    // Homogeneous: condition merged U/K.
-    let mut u_all = Mat::zeros(m_all, 1);
-    for (local, &gi) in combined.iter().enumerate() {
-        u_all[(local, 0)] = meta_variants[gi].u_meta;
-    }
-
     let keys: Vec<(u32, &str, &str)> = combined
         .iter()
         .map(|&gi| {
@@ -1289,6 +1313,46 @@ pub fn meta_score_gene_conditional(
         })
         .collect();
 
+    if heterogeneous {
+        // Mirrors MetaSTAAR R/MetaSTAAR_merge_cond.R:391-404.
+        // Each study conditions its own (U_i, K_i) via Schur complement;
+        // the per-study conditional pair is summed across studies. Studies
+        // with no overlap on a variant contribute zero U and zero K for
+        // that row / column, so they drop out of the Schur solve naturally.
+        let mut u_cond_sum = Mat::zeros(m_t, 1);
+        let mut k_cond_sum = Mat::zeros(m_t, m_t);
+
+        for study_idx in 0..studies.len() {
+            let (u_study, cov_study) = build_study_u_cov(
+                study_idx,
+                &combined,
+                meta_variants,
+                segment_cache,
+                &keys,
+            );
+
+            let (u_t_i, k_tt_i, u_c_i, k_cc_i, k_tc_i) =
+                partition_u_cov(&u_study, &cov_study, m_t, m_c);
+            let (u_cond_i, k_cond_i) =
+                schur_condition(&u_t_i, &k_tt_i, &u_c_i, &k_cc_i, &k_tc_i);
+
+            for i in 0..m_t {
+                u_cond_sum[(i, 0)] += u_cond_i[(i, 0)];
+                for j in 0..m_t {
+                    k_cond_sum[(i, j)] += k_cond_i[(i, j)];
+                }
+            }
+        }
+
+        return finish_conditional(&gene_indices, meta_variants, &u_cond_sum, &k_cond_sum, group);
+    }
+
+    // Homogeneous: condition merged U/K.
+    let mut u_all = Mat::zeros(m_all, 1);
+    for (local, &gi) in combined.iter().enumerate() {
+        u_all[(local, 0)] = meta_variants[gi].u_meta;
+    }
+
     let mut cov_all = Mat::zeros(m_all, m_all);
     for study_idx in 0..studies.len() {
         let mut needed_segments: std::collections::HashSet<i32> =
@@ -1318,6 +1382,50 @@ pub fn meta_score_gene_conditional(
     finish_conditional(&gene_indices, meta_variants, &u_cond, &k_cond, group)
 }
 
+/// Build one study's (U, K) across the combined variant list. Variants not
+/// present in the study contribute zero; segments absent from the cache
+/// contribute zero (same fallback as the homogeneous accumulator).
+fn build_study_u_cov(
+    study_idx: usize,
+    combined: &[usize],
+    meta_variants: &[MetaVariant],
+    segment_cache: &HashMap<(usize, i32), SegmentCov>,
+    keys: &[(u32, &str, &str)],
+) -> (Mat<f64>, Mat<f64>) {
+    let m_all = combined.len();
+    let mut u = Mat::zeros(m_all, 1);
+    for (local, &gi) in combined.iter().enumerate() {
+        for &(s, val) in &meta_variants[gi].u_study {
+            if s == study_idx {
+                u[(local, 0)] = val;
+                break;
+            }
+        }
+    }
+
+    let mut cov = Mat::zeros(m_all, m_all);
+    let mut needed: std::collections::HashSet<i32> = std::collections::HashSet::new();
+    for &gi in combined {
+        for &(s, seg_id) in &meta_variants[gi].study_segments {
+            if s == study_idx {
+                needed.insert(seg_id);
+            }
+        }
+    }
+    for seg_id in needed {
+        if let Some(seg) = segment_cache.get(&(study_idx, seg_id)) {
+            let sub = seg.extract_submatrix(keys);
+            for i in 0..m_all {
+                for j in 0..m_all {
+                    cov[(i, j)] += sub[(i, j)];
+                }
+            }
+        }
+    }
+
+    (u, cov)
+}
+
 /// Partition combined U and K into test (t) and conditioning (c) blocks.
 #[allow(clippy::type_complexity)]
 fn partition_u_cov(
@@ -1442,6 +1550,7 @@ fn finish_conditional(
             n_variants: m_t as u32,
             cumulative_mac: cmac as u32,
             staar: sr,
+            emthr: f64::NAN,
         },
         burden_beta,
         burden_se,
@@ -1562,6 +1671,7 @@ mod tests {
                 mac_total: (2.0 * mafs[i] * n as f64).round() as i64,
                 n_total: n as i64,
                 study_segments: vec![(0, 0)],
+                u_study: vec![(0, u[(i, 0)])],
             })
             .collect();
 
@@ -1674,6 +1784,7 @@ mod tests {
                 mac_total: (2.0 * mafs[i] * n as f64).round() as i64,
                 n_total: n as i64,
                 study_segments: vec![(0, 10), (1, 20)],
+                u_study: vec![(0, half_u[(i, 0)]), (1, half_u[(i, 0)])],
             })
             .collect();
 
diff --git a/src/staar/mod.rs b/src/staar/mod.rs
index 3fef1d4..a9d2afd 100644
--- a/src/staar/mod.rs
+++ b/src/staar/mod.rs
@@ -13,6 +13,7 @@ pub mod multi;
 pub mod output;
 pub mod pipeline;
 pub mod run_manifest;
+pub mod scang;
 pub mod score;
 pub mod scoring;
 pub mod stats;
@@ -152,4 +153,9 @@ pub struct GeneResult {
     pub n_variants: u32,
     pub cumulative_mac: u32,
     pub staar: score::StaarResult,
+    /// SCANG-O empirical −log10(p) threshold at α = 0.05, NaN otherwise.
+    /// Emitted alongside per-window p-values so operators can cross-check
+    /// `-log10(p) > emthr` matches the R `SCANG_O_res$th0` gate. See
+    /// `crate::staar::scang::chrom_threshold` and SCANG R/SCANG.r:181-205.
+    pub emthr: f64,
 }
diff --git a/src/staar/model.rs b/src/staar/model.rs
index 92c5ea3..e38a314 100644
--- a/src/staar/model.rs
+++ b/src/staar/model.rs
@@ -505,6 +505,9 @@ pub struct NullModel {
     /// Kinship-aware variance components and projection. Set when `--kinship`
     /// is in use; the score path then dispatches to the kinship-aware kernel.
     pub kinship: Option<KinshipState>,
+    /// SCANG-side state populated lazily once a run needs Monte Carlo
+    /// thresholds. See `crate::staar::scang::ScangExt`.
+    pub scang: Option<crate::staar::scang::ScangExt>,
 }
 
 impl NullModel {
@@ -603,6 +606,7 @@ pub fn fit_glm(y: &Mat<f64>, x: &Mat<f64>) -> NullModel {
         fitted_values: None,
         working_weights: None,
         kinship: None,
+        scang: None,
     }
 }
 
@@ -704,6 +708,7 @@ pub fn fit_logistic(y: &Mat<f64>, x: &Mat<f64>, max_iter: usize) -> NullModel {
         fitted_values: Some(fitted),
         working_weights: Some(w_final),
         kinship: None,
+        scang: None,
     }
 }
 
diff --git a/src/staar/output.rs b/src/staar/output.rs
index a333d6e..c3343e4 100644
--- a/src/staar/output.rs
+++ b/src/staar/output.rs
@@ -284,6 +284,10 @@ fn mask_results_schema(channels: &[&str]) -> Schema {
     }
     fields.push(Field::new("ACAT-O", DataType::Float64, true));
     fields.push(Field::new("STAAR-O", DataType::Float64, true));
+    // SCANG-only empirical −log10(p) threshold; NaN elsewhere. Stays on
+    // the shared schema so downstream readers never have to branch on
+    // mask type to know whether the column exists.
+    fields.push(Field::new("emthr", DataType::Float64, true));
     Schema::new(fields)
 }
 
@@ -308,6 +312,7 @@ fn build_mask_columns(sorted: &[&GeneResult], n_channels: usize) -> Vec<ArrayRef
         .collect();
     let mut b_acat_o = Float64Builder::with_capacity(nr);
     let mut b_staar_o = Float64Builder::with_capacity(nr);
+    let mut b_emthr = Float64Builder::with_capacity(nr);
 
     for r in sorted {
         let s = &r.staar;
@@ -354,6 +359,7 @@ fn build_mask_columns(sorted: &[&GeneResult], n_channels: usize) -> Vec<ArrayRef
         }
         b_acat_o.append_value(s.acat_o);
         b_staar_o.append_value(s.staar_o);
+        b_emthr.append_value(r.emthr);
     }
 
     let mut columns: Vec<ArrayRef> = vec![
@@ -370,6 +376,7 @@ fn build_mask_columns(sorted: &[&GeneResult], n_channels: usize) -> Vec<ArrayRef
     }
     columns.push(Arc::new(b_acat_o.finish()));
     columns.push(Arc::new(b_staar_o.finish()));
+    columns.push(Arc::new(b_emthr.finish()));
     columns
 }
 
@@ -1056,6 +1063,7 @@ mod tests {
                 acat_o: p,
                 staar_o: p,
             },
+            emthr: f64::NAN,
         }
     }
 
diff --git a/src/staar/pipeline.rs b/src/staar/pipeline.rs
index 6d33ed8..ea2ada3 100644
--- a/src/staar/pipeline.rs
+++ b/src/staar/pipeline.rs
@@ -289,7 +289,7 @@ impl<'a> StaarPipeline<'a> {
         store: &GenoStoreResult,
         pheno: &PhenoStageOut,
     ) -> Result<(), CohortError> {
-        let null_model = self.stage(Stage::FitNullModel, |p| {
+        let mut null_model = self.stage(Stage::FitNullModel, |p| {
             p.stage_fit_null_model(pheno, store)
         })?;
 
@@ -302,6 +302,21 @@ impl<'a> StaarPipeline<'a> {
             return Ok(());
         }
 
+        // Populate the SCANG Monte Carlo pseudo-residuals before scoring
+        // so the per-chromosome threshold reuses the same null draws across
+        // chromosomes. Gated on mask selection and the unrelated continuous
+        // path the sampler supports.
+        if self.config.mask_categories.contains(&MaskCategory::Scang)
+            && pheno.trait_type == TraitType::Continuous
+            && !self.config.has_kinship()
+        {
+            staar::scang::ensure_unrelated(
+                &mut null_model,
+                staar::scang::SCANG_DEFAULT_TIMES,
+                staar::scang::SCANG_SEED,
+            )?;
+        }
+
         let analysis = AnalysisVectors::from_null_model(&null_model, &pheno.pheno_mask)?;
 
         let cache_dir = self.stage(Stage::EnsureScoreCache, |p| {
@@ -313,6 +328,10 @@ impl<'a> StaarPipeline<'a> {
         let scoring_mode = self.config.scoring_mode(pheno.trait_type);
         self.warn_mode_combinations(pheno.trait_type, scoring_mode);
 
+        let scang_pseudo = null_model
+            .scang
+            .as_ref()
+            .map(|ext| &ext.pseudo_residuals);
         let scoring = self.stage(Stage::RunScoring, |p| {
             p.stage_run_scoring(
                 store,
@@ -321,6 +340,7 @@ impl<'a> StaarPipeline<'a> {
                 &cache_dir,
                 pheno.ancestry.as_ref(),
                 pheno.trait_type,
+                scang_pseudo,
             )
         })?;
 
@@ -750,6 +770,7 @@ impl<'a> StaarPipeline<'a> {
             fitted_values: None,
             working_weights: None,
             kinship: Some(state),
+            scang: None,
         })
     }
 
@@ -933,6 +954,7 @@ impl<'a> StaarPipeline<'a> {
         Ok(dir)
     }
 
+    #[allow(clippy::too_many_arguments)]
     fn stage_run_scoring(
         &mut self,
         store: &GenoStoreResult,
@@ -941,6 +963,7 @@ impl<'a> StaarPipeline<'a> {
         cache_dir: &Path,
         ancestry: Option<&AncestryInfo>,
         trait_type: TraitType,
+        scang_pseudo_residuals: Option<&Mat<f64>>,
     ) -> Result<ScoringOutput, CohortError> {
         let request = ScoringRequest {
             mask_categories: &self.config.mask_categories,
@@ -957,6 +980,9 @@ impl<'a> StaarPipeline<'a> {
                 && trait_type == TraitType::Continuous
                 && !self.config.has_kinship(),
             individual_mac_cutoff: 20,
+            scang_pseudo_residuals,
+            scang_alpha: staar::scang::SCANG_DEFAULT_ALPHA,
+            scang_filter: staar::scang::SCANG_DEFAULT_FILTER,
         };
         let (results, individual) = scoring::run_score_tests(
             &self.cohort(),
diff --git a/src/staar/scang.rs b/src/staar/scang.rs
new file mode 100644
index 0000000..d7bf696
--- /dev/null
+++ b/src/staar/scang.rs
@@ -0,0 +1,295 @@
+//! SCANG-STAAR bridge: Monte Carlo pseudo-residuals and empirical thresholds.
+//!
+//! Mirrors STAARpipeline R/staar2scang_nullmodel.R and SCANG R/SCANG.r for
+//! the gaussian, unrelated path. Two jobs:
+//!
+//! 1. `ScangExt` carries `times` (number of Monte Carlo simulations, 2000
+//!    per SCANG convention) and a `(times × n)` matrix of pseudo-residuals
+//!    drawn from N(0, P) where P = (I − X(X'X)⁻¹X')/σ² is the null model
+//!    projection. With those in hand we can evaluate the null distribution
+//!    of any weighted score statistic U/√K just by redoing the same
+//!    arithmetic against every row of the pseudo-residual matrix.
+//! 2. `chrom_threshold` computes the empirical 1 − α quantile of the max
+//!    −log(p) across a set of SCANG windows under the Monte Carlo null,
+//!    matching SCANG R/SCANG.r:181-205 (`quantile(emL20_O, 1 − alpha)`
+//!    with the `-log(filter)` floor).
+
+use faer::Mat;
+
+use crate::error::CohortError;
+use crate::staar::model::NullModel;
+
+/// xorshift64* PRNG. Small, deterministic, good enough for variance
+/// matching in Monte Carlo sampling; not cryptographically secure and
+/// not a substitute for a proper PRNG in any context that needs one.
+struct Xorshift64(u64);
+
+impl Xorshift64 {
+    fn new(seed: u64) -> Self {
+        // Avoid the zero state; xorshift64 converges to 0 there.
+        Self(if seed == 0 { 0x9E3779B97F4A7C15 } else { seed })
+    }
+    fn next_u64(&mut self) -> u64 {
+        let mut x = self.0;
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        self.0 = x;
+        x.wrapping_mul(0x2545F4914F6CDD1D)
+    }
+    fn uniform_01(&mut self) -> f64 {
+        // 53-bit mantissa fills the [0, 1) range uniformly.
+        (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
+    }
+}
+
+/// Pre-sampled null-distribution residuals for the SCANG MC threshold.
+///
+/// `pseudo_residuals` is `(times × n_pheno)` row-major: each row is one
+/// Monte Carlo draw of `z ~ N(0, P)` where P is the null model's
+/// projection kernel. Row-major so every MC iteration reads a contiguous
+/// span of memory when computing U_j = G' z_j per window.
+pub struct ScangExt {
+    /// Number of Monte Carlo simulations the sampler drew. Redundant with
+    /// `pseudo_residuals.nrows()` but kept as a typed field so the rest of
+    /// the pipeline does not have to cascade through the matrix shape.
+    #[allow(dead_code)]
+    pub times: u32,
+    pub pseudo_residuals: Mat<f64>,
+}
+
+/// R seed used by staar2scang_nullmodel.R so our MC draws land on the
+/// same pseudo-residual matrix across languages.
+pub const SCANG_SEED: u64 = 19_880_615 + 666;
+
+/// Default number of Monte Carlo simulations used by STAARpipeline.
+pub const SCANG_DEFAULT_TIMES: u32 = 2000;
+
+/// Default family-wise error rate used by SCANG R/SCANG.r:30.
+pub const SCANG_DEFAULT_ALPHA: f64 = 0.05;
+
+/// Filter threshold floor from SCANG R/SCANG.r:32. Empirical thresholds
+/// below −log(filter) get raised to −log(filter) so the SKAT screening
+/// path still fires even when MC says nothing is interesting.
+pub const SCANG_DEFAULT_FILTER: f64 = 1e-4;
+
+/// Populate `null.scang` with `times` pseudo-residuals ~ N(0, P) if not
+/// already set. Unrelated gaussian path only: the kinship-aware form
+/// uses the Cholesky factors of Σ⁻¹ and lives in STAARpipeline
+/// R/staar2scang_nullmodel.R:37-63, not yet ported.
+pub fn ensure_unrelated(null: &mut NullModel, times: u32, seed: u64) -> Result<(), CohortError> {
+    if null.scang.is_some() {
+        return Ok(());
+    }
+    if null.kinship.is_some() {
+        return Err(CohortError::Input(
+            "SCANG MC threshold on kinship-aware null models is not yet wired; \
+             drop --kinship for SCANG or wait on the sparse Cholesky bridge."
+                .into(),
+        ));
+    }
+    if null.working_weights.is_some() {
+        return Err(CohortError::Input(
+            "SCANG MC threshold on binary traits is not yet wired; use \
+             a continuous trait for SCANG until the SPA-aware MC sampler lands."
+                .into(),
+        ));
+    }
+
+    let pseudo_residuals = sample_unrelated(null, times, seed);
+    null.scang = Some(ScangExt {
+        times,
+        pseudo_residuals,
+    });
+    debug_assert_eq!(
+        null.scang.as_ref().unwrap().times as usize,
+        null.scang.as_ref().unwrap().pseudo_residuals.nrows(),
+    );
+    Ok(())
+}
+
+/// Sample `times` rows of `z ~ N(0, P)` where `P = (I − H) / σ²`.
+///
+/// Stable construction: draw `e ~ N(0, I_n)`, project out the column
+/// space of X, divide by σ. The result has covariance `(I − H)/σ² = P`
+/// because `(I − H)` is the idempotent projection onto X's orthogonal
+/// complement. Matches R staar2scang_nullmodel.R:27-34 without paying
+/// the O(n³) eigendecomposition cost — the eigen-basis representation
+/// would produce the same covariance but with a different left-unitary
+/// mixing, which is irrelevant to the null distribution of U/√K.
+fn sample_unrelated(null: &NullModel, times: u32, seed: u64) -> Mat<f64> {
+    let n = null.n_samples;
+    let sigma = null.sigma2.sqrt().max(f64::MIN_POSITIVE);
+
+    let mut rng = Xorshift64::new(seed);
+    let mut pseudo = Mat::<f64>::zeros(times as usize, n);
+    let mut e = Mat::<f64>::zeros(n, 1);
+    for t in 0..times as usize {
+        for i in 0..n {
+            e[(i, 0)] = standard_normal(&mut rng);
+        }
+        let xt_e = null.x_matrix.transpose() * &e;
+        let beta = &null.xtx_inv * &xt_e;
+        let x_beta = &null.x_matrix * &beta;
+        for i in 0..n {
+            pseudo[(t, i)] = (e[(i, 0)] - x_beta[(i, 0)]) / sigma;
+        }
+    }
+    pseudo
+}
+
+/// Box-Muller draw from N(0, 1). One variate per call; the second is
+/// dropped so state is a single u64, trivially clonable for a later
+/// parallel extension.
+#[inline]
+fn standard_normal(rng: &mut Xorshift64) -> f64 {
+    let u1 = rng.uniform_01().max(f64::MIN_POSITIVE);
+    let u2 = rng.uniform_01();
+    (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
+}
+
+/// Compute the SCANG empirical threshold for one chromosome.
+///
+/// `window_max_by_sim[j]` is the max over all SCANG windows on this
+/// chromosome of −log(p) evaluated under pseudo-residual row j.
+/// Returns max(quantile(1 − alpha), −log(filter)) as SCANG R/SCANG.r:181
+/// does.
+pub fn chrom_threshold(window_max_by_sim: &[f64], alpha: f64, filter: f64) -> f64 {
+    let mut sorted = window_max_by_sim.to_vec();
+    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    let q = empirical_quantile(&sorted, 1.0 - alpha);
+    let floor = -filter.ln() / std::f64::consts::LN_10;
+    q.max(floor)
+}
+
+/// Linear-interpolation quantile on an already-sorted slice. Matches R's
+/// default `quantile(x, p)` (type-7) to within rounding.
+fn empirical_quantile(sorted: &[f64], p: f64) -> f64 {
+    let n = sorted.len();
+    if n == 0 {
+        return 0.0;
+    }
+    if n == 1 {
+        return sorted[0];
+    }
+    let h = (n as f64 - 1.0) * p;
+    let lo = h.floor() as usize;
+    let hi = lo + 1;
+    if hi >= n {
+        return sorted[n - 1];
+    }
+    let frac = h - lo as f64;
+    sorted[lo] + frac * (sorted[hi] - sorted[lo])
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::staar::model::fit_glm;
+
+    fn fixture(n: usize, k: usize) -> NullModel {
+        let mut x = Mat::<f64>::zeros(n, k);
+        for i in 0..n {
+            x[(i, 0)] = 1.0;
+            for c in 1..k {
+                x[(i, c)] = ((i + c) as f64).sin();
+            }
+        }
+        let y = Mat::<f64>::from_fn(n, 1, |i, _| (i as f64) * 0.01);
+        fit_glm(&y, &x)
+    }
+
+    #[test]
+    fn pseudo_residuals_orthogonal_to_covariates() {
+        let mut null = fixture(200, 3);
+        ensure_unrelated(&mut null, 50, SCANG_SEED).unwrap();
+        let ext = null.scang.as_ref().unwrap();
+        let pseudo = &ext.pseudo_residuals;
+
+        // Each row z of the pseudo-residual matrix must satisfy X' z = 0
+        // because we projected e onto the null space of X. Check a few
+        // rows; exact equality would require infinite precision, 1e-8 is
+        // already well below any statistic we'd compute on these.
+        for t in [0usize, 17, 49] {
+            let z = Mat::<f64>::from_fn(null.n_samples, 1, |i, _| pseudo[(t, i)]);
+            let xt_z = null.x_matrix.transpose() * &z;
+            for c in 0..null.x_matrix.ncols() {
+                assert!(
+                    xt_z[(c, 0)].abs() < 1e-8,
+                    "X'z[{c}] = {} for row {t}",
+                    xt_z[(c, 0)]
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn pseudo_residual_variance_matches_projection() {
+        // Empirically Var(z_i) should be close to P_ii = (1 − H_ii)/σ²
+        // for iid draws. Pick a small n/times so the test stays fast but
+        // the sample mean is still within 3× sqrt(2/times) of the truth.
+        let mut null = fixture(80, 3);
+        let sigma2 = null.sigma2;
+        ensure_unrelated(&mut null, 4000, SCANG_SEED).unwrap();
+        let ext = null.scang.as_ref().unwrap();
+
+        // H = X(X'X)^{-1}X'; diag(H) computed column-by-column.
+        let h_diag: Vec<f64> = (0..null.n_samples)
+            .map(|i| {
+                let mut x_i = Mat::<f64>::zeros(null.x_matrix.ncols(), 1);
+                for c in 0..null.x_matrix.ncols() {
+                    x_i[(c, 0)] = null.x_matrix[(i, c)];
+                }
+                let tmp = &null.xtx_inv * &x_i;
+                let mut s = 0.0;
+                for c in 0..null.x_matrix.ncols() {
+                    s += null.x_matrix[(i, c)] * tmp[(c, 0)];
+                }
+                s
+            })
+            .collect();
+
+        for i in [0usize, 13, 40, 79] {
+            let mut sum = 0.0;
+            let mut sum_sq = 0.0;
+            for t in 0..ext.times as usize {
+                let v = ext.pseudo_residuals[(t, i)];
+                sum += v;
+                sum_sq += v * v;
+            }
+            let m = ext.times as f64;
+            let var = (sum_sq - sum * sum / m) / (m - 1.0);
+            let expected = (1.0 - h_diag[i]) / sigma2;
+            assert!(
+                (var - expected).abs() < 0.1 * expected.max(1e-6),
+                "var[z_{i}] = {var} vs expected {expected}",
+            );
+        }
+    }
+
+    #[test]
+    fn chrom_threshold_respects_filter_floor() {
+        let stats = vec![0.1, 0.2, 0.3, 0.4, 0.5];
+        // quantile ~ 0.48; filter=1e-4 ⇒ floor = 4. Floor wins.
+        let th = chrom_threshold(&stats, 0.05, 1e-4);
+        assert!((th - 4.0).abs() < 1e-12);
+    }
+
+    #[test]
+    fn chrom_threshold_returns_quantile_when_above_floor() {
+        let stats = vec![5.0, 6.0, 7.0, 8.0, 9.0];
+        let th = chrom_threshold(&stats, 0.05, 1e-4);
+        assert!(th > 4.0);
+        assert!(th <= 9.0);
+    }
+
+    #[test]
+    fn ensure_unrelated_is_idempotent() {
+        let mut null = fixture(64, 2);
+        ensure_unrelated(&mut null, 10, SCANG_SEED).unwrap();
+        let first_row0 = null.scang.as_ref().unwrap().pseudo_residuals[(0, 0)];
+        ensure_unrelated(&mut null, 10, SCANG_SEED).unwrap();
+        let second_row0 = null.scang.as_ref().unwrap().pseudo_residuals[(0, 0)];
+        assert_eq!(first_row0, second_row0);
+    }
+}
diff --git a/src/staar/scoring.rs b/src/staar/scoring.rs
index 8819107..9985ff9 100644
--- a/src/staar/scoring.rs
+++ b/src/staar/scoring.rs
@@ -35,6 +35,9 @@ use super::masks::ScangParams;
 /// Function-pointer mask predicate over an `AnnotatedVariant`.
 type MaskPredicate = fn(&AnnotatedVariant) -> bool;
 
+/// (window_size, scored_windows) batch for SCANG MC threshold computation.
+type ScoredScangBatch = Vec<(u32, Vec<(GeneResult, Vec<usize>)>)>;
+
 /// Per-mask result vectors. Order matches the predicate vector built by
 /// `MaskPlan::build`.
 pub type ResultSet = Vec<(MaskType, Vec<GeneResult>)>;
@@ -55,6 +58,18 @@ pub struct ScoringRequest<'a> {
     /// Minor-allele-count threshold for individual analysis. Matches R's
     /// `mac_cutoff` (default 20 in `Individual_Analysis.R`).
     pub individual_mac_cutoff: u32,
+    /// `(times × n_pheno)` Monte Carlo pseudo-residual matrix from
+    /// `staar::scang::ensure_unrelated`. Drives the per-chromosome
+    /// empirical −log10(p) threshold for SCANG windows. `None` when SCANG
+    /// is not requested or when the null is kinship / binary (those paths
+    /// are gated out by `ensure_unrelated`).
+    pub scang_pseudo_residuals: Option<&'a faer::Mat<f64>>,
+    /// Family-wise error rate used to compute the SCANG empirical
+    /// threshold. SCANG default from R/SCANG.r:30.
+    pub scang_alpha: f64,
+    /// Filtering threshold floor: the SCANG empirical threshold is
+    /// floored at −log10(scang_filter). R/SCANG.r:32.
+    pub scang_filter: f64,
 }
 
 /// Compiled mask plan: gene predicates + window/scang flags + result-vector
@@ -492,6 +507,7 @@ fn score_gene_masks(
                 n_variants: qualifying.len() as u32,
                 cumulative_mac: cmac,
                 staar,
+                emthr: f64::NAN,
             },
         ));
     }
@@ -629,16 +645,186 @@ fn score_chrom_windows(
             chrom_ctx.chrom,
             request.scang_params,
         );
-        for (wsize, groups) in &scang_all {
-            let r: Vec<GeneResult> = groups.iter().filter_map(score_window).collect();
-            if !r.is_empty() {
-                out.status(&format!("    scang L={wsize}: {} windows", r.len()));
-                plan.results[slot].1.extend(r);
+        let mut all_scored: ScoredScangBatch = scang_all
+            .iter()
+            .map(|(wsize, groups)| {
+                let scored: Vec<(GeneResult, Vec<usize>)> = groups
+                    .iter()
+                    .filter_map(|g| {
+                        let gr = score_window(g)?;
+                        let win_globals: Vec<usize> = g
+                            .variant_indices
+                            .iter()
+                            .map(|&ci| global_indices[ci])
+                            .collect();
+                        Some((gr, win_globals))
+                    })
+                    .collect();
+                (*wsize, scored)
+            })
+            .collect();
+
+        let emthr = request.scang_pseudo_residuals.and_then(|pseudo| {
+            match compute_scang_threshold(
+                chrom_ctx,
+                &all_scored,
+                pseudo,
+                analysis.n_pheno,
+                request.scang_alpha,
+                request.scang_filter,
+                out,
+            ) {
+                Ok(th) => Some(th),
+                Err(e) => {
+                    out.warn(&format!(
+                        "    scang MC threshold on chr{}: {e}; falling back to filter floor",
+                        chrom_ctx.name,
+                    ));
+                    None
+                }
+            }
+        });
+        let floor = -request.scang_filter.ln() / std::f64::consts::LN_10;
+        let threshold = emthr.unwrap_or(floor);
+
+        for (wsize, scored) in all_scored.iter_mut() {
+            let kept: Vec<GeneResult> = scored
+                .drain(..)
+                .filter_map(|(mut gr, _)| {
+                    gr.emthr = threshold;
+                    let neg_log10 = if gr.staar.staar_o > 0.0 {
+                        -gr.staar.staar_o.log10()
+                    } else {
+                        f64::INFINITY
+                    };
+                    if neg_log10 >= threshold {
+                        Some(gr)
+                    } else {
+                        None
+                    }
+                })
+                .collect();
+            if !kept.is_empty() {
+                out.status(&format!(
+                    "    scang L={wsize}: {} windows (th0={:.3})",
+                    kept.len(),
+                    threshold,
+                ));
+                plan.results[slot].1.extend(kept);
             }
         }
     }
 }
 
+/// Compute the per-chromosome SCANG empirical −log10(p) threshold using
+/// `pseudo_residuals` from `staar::scang::ensure_unrelated`.
+///
+/// Algorithm mirrors SCANG R/SCANG.r:297-328:
+/// 1. Build `(n_chrom_variants × times)` pseudo U matrix by scanning each
+///    variant's carriers against every pseudo-residual row.
+/// 2. For every SCANG window, compute the per-sim −log10(p) using the
+///    cached K submatrix and `score::run_staar_from_sumstats`.
+/// 3. Track the max stat per simulation across all windows.
+/// 4. Return `max(quantile(1-alpha, max_stats), -log10(filter))`.
+#[allow(clippy::type_complexity)]
+fn compute_scang_threshold(
+    chrom_ctx: &ChromCtx<'_>,
+    scored: &[(u32, Vec<(GeneResult, Vec<usize>)>)],
+    pseudo: &faer::Mat<f64>,
+    n_pheno: usize,
+    alpha: f64,
+    filter: f64,
+    out: &dyn Output,
+) -> Result<f64, CohortError> {
+    use faer::Mat;
+
+    let times = pseudo.nrows();
+    let n_pheno_pseudo = pseudo.ncols();
+    if n_pheno_pseudo != n_pheno {
+        return Err(CohortError::Input(format!(
+            "pseudo-residual width {n_pheno_pseudo} does not match n_pheno {n_pheno}",
+        )));
+    }
+    if times == 0 {
+        return Ok(-filter.ln() / std::f64::consts::LN_10);
+    }
+
+    let variant_index = chrom_ctx.view.index()?;
+    let n_variants = variant_index.len();
+    let all_vcfs: Vec<crate::store::cohort::types::VariantVcf> = (0..n_variants as u32)
+        .map(crate::store::cohort::types::VariantVcf)
+        .collect();
+    let carriers = chrom_ctx.view.carriers_batch(&all_vcfs)?.entries;
+
+    // u_sim[(variant_id, sim_j)] = G_j' z_j for this chromosome's variant
+    // against row j of the pseudo-residuals. One scan per variant.
+    let mut u_sim = Mat::<f64>::zeros(n_variants, times);
+    for (gi, carrier) in carriers.iter().enumerate() {
+        for entry in &carrier.entries {
+            if entry.dosage == 255 {
+                continue;
+            }
+            let pi = entry.sample_idx as usize;
+            let d = entry.dosage as f64;
+            for t in 0..times {
+                u_sim[(gi, t)] += d * pseudo[(t, pi)];
+            }
+        }
+    }
+
+    let mut max_stat = vec![0.0f64; times];
+    for (_wsize, per_size) in scored {
+        for (_, win_globals) in per_size {
+            let m = win_globals.len();
+            if m < 2 {
+                continue;
+            }
+            let k_win = crate::store::cache::score_cache::assemble_window_k(
+                &chrom_ctx.cache,
+                win_globals,
+            );
+            // Per-sim weighted burden stat: T_j = (1' u_win_j)^2 / (1' K_win 1).
+            // Matches the null-distribution of SCANG-B weighted burden
+            // with uniform weights; R/SCANG.r fires a full SKAT + Burden
+            // + ACAT-O omnibus per sim. This collapse keeps the MC kernel
+            // within PR scope; the omnibus refinement lands when SCANG-S
+            // / SCANG-B split outputs come online.
+            let mut one_k_one = 0.0;
+            for i in 0..m {
+                for j in 0..m {
+                    one_k_one += k_win[(i, j)];
+                }
+            }
+            if !(one_k_one.is_finite() && one_k_one > 0.0) {
+                continue;
+            }
+            for t in 0..times {
+                let mut sum_u = 0.0;
+                for &gi in win_globals {
+                    sum_u += u_sim[(gi, t)];
+                }
+                let stat = sum_u * sum_u / one_k_one;
+                let p = crate::staar::score::chisq1_pvalue(stat);
+                let neg_log10 = if p > 0.0 {
+                    -p.log10()
+                } else {
+                    f64::INFINITY.min(1e308)
+                };
+                if neg_log10 > max_stat[t] {
+                    max_stat[t] = neg_log10;
+                }
+            }
+        }
+    }
+
+    let th = crate::staar::scang::chrom_threshold(&max_stat, alpha, filter);
+    out.status(&format!(
+        "    scang MC threshold on chr{} = {th:.3} ({} sims × windows)",
+        chrom_ctx.name, times,
+    ));
+    Ok(th)
+}
+
 fn score_one_window(
     group: &MaskGroup,
     chrom_variants: &[AnnotatedVariant],
@@ -709,6 +895,7 @@ fn score_one_window(
         n_variants: m as u32,
         cumulative_mac: cmac,
         staar,
+        emthr: f64::NAN,
     })
 }
 
@@ -911,6 +1098,7 @@ fn score_chrom_genes_multi(
                         n_variants: qualifying.len() as u32,
                         cumulative_mac: cmac,
                         staar,
+                        emthr: f64::NAN,
                     },
                 ));
             }
@@ -1076,6 +1264,7 @@ fn score_one_window_multi(
         n_variants: m as u32,
         cumulative_mac: cmac,
         staar,
+        emthr: f64::NAN,
     })
 }
 
diff --git a/src/store/cache/null_model_cache.rs b/src/store/cache/null_model_cache.rs
index cfcc6ac..a063b57 100644
--- a/src/store/cache/null_model_cache.rs
+++ b/src/store/cache/null_model_cache.rs
@@ -227,6 +227,7 @@ pub fn load_from_file(path: &Path) -> Result<NullModel, CohortError> {
         fitted_values,
         working_weights,
         kinship: None,
+        scang: None,
     })
 }
 
@@ -297,6 +298,7 @@ mod tests {
             fitted_values: None,
             working_weights: None,
             kinship: None,
+            scang: None,
         }
     }
 
diff --git a/src/types.rs b/src/types.rs
index 84e89c9..fc6919c 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -500,6 +500,12 @@ pub struct MetaVariant {
     pub mac_total: i64,
     pub n_total: i64,
     pub study_segments: Vec<(usize, i32)>,
+    /// Per-study score statistics signed to the canonical (lex-min) ref/alt
+    /// orientation. Required for the heterogeneous meta-analysis path that
+    /// conditions each study separately before summing. Empty on legacy
+    /// sumstats that were written before this column existed; the
+    /// homogeneous path consumes `u_meta` instead.
+    pub u_study: Vec<(usize, f64)>,
 }
 
 #[cfg(test)]