From 178047fb6f3e847b7bc30dcf0e7094258e9689b4 Mon Sep 17 00:00:00 2001 From: Marc Balestreri Date: Wed, 3 Dec 2025 21:34:41 -0800 Subject: [PATCH] fix(imputation): add missing CpGs from source in impute_from_standard Fixes #86 --- biolearn/imputation.py | 1 + biolearn/test/test_imputation.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/biolearn/imputation.py b/biolearn/imputation.py index cc34b14..62a67e8 100644 --- a/biolearn/imputation.py +++ b/biolearn/imputation.py @@ -16,6 +16,7 @@ def impute_from_standard(dnam, cpg_source, cpgs_to_impute=None): pd.DataFrame: DataFrame with missing values filled. """ if cpgs_to_impute: + dnam = dnam.reindex(dnam.index.union(cpgs_to_impute)) impute_rows = dnam.loc[cpgs_to_impute] impute_rows = impute_rows.apply(lambda col: col.fillna(cpg_source)) df_filled = dnam.combine_first(impute_rows) diff --git a/biolearn/test/test_imputation.py b/biolearn/test/test_imputation.py index 28680c1..5e26c52 100644 --- a/biolearn/test/test_imputation.py +++ b/biolearn/test/test_imputation.py @@ -38,6 +38,15 @@ def test_impute_from_standard(): assert df_filled.loc["cpg2", "Sample1"] == 2.5 +def test_impute_from_standard_adds_missing_cpgs(): + cpgs_with_missing = ["cpg1", "cpg5"] + df_filled = impute_from_standard( + df_test, cpg_averages_test, cpgs_to_impute=cpgs_with_missing + ) + assert "cpg5" in df_filled.index + assert df_filled.loc["cpg5", "Sample1"] == 5.5 + + def test_impute_from_standard_specific_cpgs(): specific_cpgs = ["cpg1", "cpg3"] df_filled = impute_from_standard(