From 54d1dbaa66cacad964b5fcce8c4466fcb7f6b012 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Fri, 29 Aug 2025 22:31:16 -0400
Subject: [PATCH 1/4] Update test_utils.R

add unite test for codecov reduce issue
---
 tests/testthat/test_utils.R | 487 +++++++++++++++++++++++++++++++++++-
 1 file changed, 486 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index 11fda10..a622540 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -409,4 +409,489 @@ test_that("get_cos extracts CoS correctly with generated test results", {
     "No colocalization results in this region!"
   )
   expect_equal(result_empty, list("cos" = NULL, "cos_purity" = NULL))
-})
\ No newline at end of file
+})
+
+library(testthat)
+
+# Test for get_merge_ordered_with_indices function - Core case handling
+test_that("get_merge_ordered_with_indices handles conflicting variable orders correctly", {
+  
+  # Test Case 1: Conflicting orders (cycle detection)
+  vec1 <- c("A", "B", "C")
+  vec2 <- c("B", "A", "C")  # B comes before A (conflicting with vec1)
+  vec3 <- c("C", "A", "B")
+  
+  vector_list_cycle <- list(vec1, vec2, vec3)
+  
+  # Should trigger warning about different orders and use priority-based fallback
+  expect_warning(
+    result_cycle <- get_merge_ordered_with_indices(vector_list_cycle),
+    "Variable names in different datasets have different orders"
+  )
+  
+  # Should prioritize the order from earlier datasets
+  expect_equal(result_cycle, c("A", "B", "C"))  # Order from vec1 (first dataset)
+  
+  # Test Case 2: Simple cycle A->B, B->A
+  vec_simple1 <- c("A", "B")
+  vec_simple2 <- c("B", "A")
+  
+  expect_warning(
+    result_simple <- get_merge_ordered_with_indices(list(vec_simple1, vec_simple2)),
+    "Variable names in different datasets have different orders"
+  )
+  expect_equal(result_simple, c("A", "B"))  # Order from first dataset
+  
+  # Test Case 3: No conflicts (topological sort should work)
+  vec_topo1 <- c("A", "B", "C")
+  vec_topo2 <- c("B", "C", "D")  # B->C is consistent
+  vec_topo3 <- c("C", "D", "E")  # C->D is consistent
+  
+  expect_silent(
+    result_topo <- get_merge_ordered_with_indices(list(vec_topo1, vec_topo2, vec_topo3))
+  )
+  expect_equal(result_topo, c("A", "B", "C", "D", "E"))
+  
+  # Test Case 4: Complex cycle with multiple conflicts
+  vec_complex1 <- c("X", "Y", "Z")
+  vec_complex2 <- c("Y", "Z", "X")  # Creates Y->Z->X cycle
+  vec_complex3 <- c("Z", "X", "Y")  # Continues the cycle
+  
+  expect_warning(
+    result_complex <- get_merge_ordered_with_indices(list(vec_complex1, vec_complex2, vec_complex3)),
+    "Variable names in different datasets have different orders"
+  )
+  expect_equal(result_complex, c("X", "Y", "Z"))  # Order from first dataset
+  
+  # Test Case 5: Partial conflicts
+  vec_partial1 <- c("A", "B", "C", "D")
+  vec_partial2 <- c("B", "A", "E", "F")  # B->A conflicts, but E->F is new
+  
+  expect_warning(
+    result_partial <- get_merge_ordered_with_indices(list(vec_partial1, vec_partial2)),
+    "Variable names in different datasets have different orders"
+  )
+  expect_equal(result_partial, c("A", "B", "C", "D", "E", "F"))  # Priority to first dataset
+  
+  
+  # Test Case 7: All same elements but different orders
+  vec_same1 <- c("X", "Y", "Z")
+  vec_same2 <- c("Z", "Y", "X")
+  vec_same3 <- c("Y", "X", "Z")
+  
+  expect_warning(
+    result_same <- get_merge_ordered_with_indices(list(vec_same1, vec_same2, vec_same3)),
+    "Variable names in different datasets have different orders"
+  )
+  expect_equal(result_same, c("X", "Y", "Z"))  # Priority to first dataset
+})
+
+# Test for get_merge_ordered_with_indices edge cases
+test_that("get_merge_ordered_with_indices handles edge cases", {
+  
+  # Test with NULL inputs
+  expect_equal(get_merge_ordered_with_indices(list()), character(0))
+  
+  # Test with single vector
+  expect_equal(get_merge_ordered_with_indices(list(c("A", "B", "C"))), c("A", "B", "C"))
+  
+  # Test with identical vectors
+  vec <- c("A", "B", "C")
+  expect_silent(
+    result <- get_merge_ordered_with_indices(list(vec, vec, vec))
+  )
+  expect_equal(result, c("A", "B", "C"))
+  
+  # Test with numeric vectors (converted to character)
+  num_vec1 <- c(1, 2, 3)
+  num_vec2 <- c(2, 3, 4)
+  expect_silent(
+    result_num <- get_merge_ordered_with_indices(list(num_vec1, num_vec2))
+  )
+  expect_equal(result_num, c("1", "2", "3", "4"))
+})
+
+# Test for missing coverage in colocboost_init_data function
+test_that("colocboost_init_data handles complex dictionary mappings", {
+  
+  # Create test data with mixed individual and summary data
+  set.seed(123)
+  n <- 100
+  p_ind <- 50
+  p_sumstat <- 100
+  
+  # Individual level data
+  X_ind <- matrix(rnorm(n * p_ind), n, p_ind)
+  colnames(X_ind) <- paste0("SNP_IND_", 1:p_ind)
+  Y1 <- matrix(rnorm(n), n, 1)
+  Y2 <- matrix(rnorm(n), n, 1)
+  
+  # Summary statistics
+  sumstat1 <- data.frame(
+    beta = rnorm(p_sumstat),
+    sebeta = abs(rnorm(p_sumstat, 0, 0.1)),
+    n = n,
+    variant = paste0("SNP_SUM_", 1:p_sumstat)
+  )
+  
+  sumstat2 <- data.frame(
+    beta = rnorm(p_sumstat),
+    sebeta = abs(rnorm(p_sumstat, 0, 0.1)),
+    n = n,
+    variant = paste0("SNP_SUM_", 1:p_sumstat)
+  )
+  
+  # LD matrix
+  LD_matrix <- diag(p_sumstat)
+  colnames(LD_matrix) <- rownames(LD_matrix) <- paste0("SNP_SUM_", 1:p_sumstat)
+  
+  # Test case: Mixed individual and summary data with complex mappings
+  X_list <- list(X_ind)
+  Y_list <- list(Y1, Y2)
+  Z_list <- list(sumstat1$beta / sumstat1$sebeta, sumstat2$beta / sumstat2$sebeta)
+  LD_list <- list(LD_matrix)
+  N_sumstat <- list(n, n)
+  
+  dict_YX <- c(1, 1)  # Both Y outcomes use the same X
+  dict_sumstatLD <- c(1, 1)  # Both sumstats use the same LD
+  
+  keep_variables <- list(
+    colnames(X_ind),        # X variables
+    sumstat1$variant,       # Sumstat 1 variables  
+    sumstat2$variant        # Sumstat 2 variables
+  )
+  
+  # Test focal outcome mapping with mixed data
+  if (exists("colocboost_init_data")) {
+    # Test focal outcome index that maps to summary statistics
+    expect_error({
+      cb_data_mixed <- colocboost_init_data(
+        X = X_list,
+        Y = Y_list,
+        dict_YX = dict_YX,
+        Z = Z_list,
+        LD = LD_list,
+        N_sumstat = N_sumstat,
+        dict_sumstatLD = dict_sumstatLD,
+        Var_y = NULL,
+        SeBhat = NULL,
+        keep_variables = keep_variables,
+        focal_outcome_idx = 3,  # Should map to first summary statistic
+        focal_outcome_variables = TRUE,
+        overlap_variables = FALSE
+      )
+      
+      # Should use variables from the first summary statistic
+      expect_equal(length(cb_data_mixed$variable.names), p_sumstat)
+      expect_true(all(grepl("SNP_SUM_", cb_data_mixed$variable.names)))
+    }, NA)
+  } else {
+    skip("colocboost_init_data not directly accessible")
+  }
+})
+
+# Test for error handling in input validation
+test_that("colocboost input validation handles edge cases", {
+  
+  
+  # Test with very few overlapping variables
+  sumstat1 <- data.frame(
+    beta = rnorm(5),
+    sebeta = abs(rnorm(5, 0, 0.1)),
+    variant = paste0("SNP_A_", 1:5)
+  )
+  
+  sumstat2 <- data.frame(
+    beta = rnorm(5),
+    sebeta = abs(rnorm(5, 0, 0.1)),
+    variant = paste0("SNP_B_", 1:5)  # No overlap with sumstat1
+  )
+  
+warnings <- testthat::capture_warnings(
+  colocboost(sumstat = list(sumstat1, sumstat2))
+)
+expect_true(any(grepl("No or only 1 overlapping variables were found", warnings)))
+  
+  # Test with inconsistent effect matrices
+  effect_est_bad <- matrix(rnorm(10), 5, 2)
+  effect_se_bad <- matrix(abs(rnorm(12)), 6, 2)  # Different dimensions
+  
+  expect_warning(
+    colocboost(effect_est = effect_est_bad, effect_se = effect_se_bad),
+    "effect_est and effect_se should be the same dimension!"
+  )
+})
+
+# Test for LD matrix edge cases
+test_that("colocboost handles LD matrix edge cases", {
+  
+  # Test with singular LD matrix (rank deficient)
+  set.seed(123)
+  n <- 100
+  p <- 10
+  
+  # Create summary statistics
+  sumstat_sing <- data.frame(
+    beta = rnorm(p),
+    sebeta = abs(rnorm(p, 0, 0.1)),
+    n = n,
+    variant = paste0("SNP", 1:p)
+  )
+  
+  # Create singular LD matrix (first row = second row)
+  LD_singular <- matrix(0.5, p, p)
+  diag(LD_singular) <- 1
+  LD_singular[2, ] <- LD_singular[1, ]  # Make second row identical to first
+  LD_singular[, 2] <- LD_singular[, 1]  # Make second column identical to first
+  colnames(LD_singular) <- rownames(LD_singular) <- paste0("SNP", 1:p)
+  
+  # Should handle singular LD matrix without crashing
+  expect_error({
+    suppressWarnings(result_singular <- colocboost(
+      sumstat = list(sumstat_sing, sumstat_sing),
+      LD = LD_singular,
+      M = 1
+    ))
+  }, NA)
+  
+  # Test with identity LD matrix
+  LD_identity <- diag(p)
+  colnames(LD_identity) <- rownames(LD_identity) <- paste0("SNP", 1:p)
+  
+  expect_error({
+    suppressWarnings(result_identity <- colocboost(
+      sumstat = list(sumstat_sing, sumstat_sing),
+      LD = LD_identity,
+      M = 1
+    ))
+  }, NA)
+})
+
+# Test for outcome name handling edge cases
+test_that("colocboost handles outcome names correctly", {
+  
+  # Generate basic test data
+  set.seed(123)
+  X_basic <- matrix(rnorm(100 * 10), 100, 10)
+  colnames(X_basic) <- paste0("SNP", 1:10)
+  Y_basic <- matrix(rnorm(100 * 3), 100, 3)
+  
+  Y_basic_list <- list(Y_basic[,1], Y_basic[,2], Y_basic[,3])
+  X_basic_list <- list(X_basic, X_basic, X_basic)
+  
+
+  warnings <- testthat::capture_warnings(
+    result_mismatch <- colocboost(
+      X = X_basic_list, 
+      Y = Y_basic_list,
+      outcome_names = c("Trait1", "Trait2"),  # Only 2 names for 3 traits
+      M = 2
+    )
+  )
+  expect_true(any(grepl("There are 3 outcomes inputed, but only 2 provided.*Using default outcome_names as Y1,...,YL", warnings)))
+  
+  expect_equal(result_mismatch$data_info$outcome_info$outcome_names, c("Y1", "Y2", "Y3"))
+  
+  # Test with NULL outcome_names
+  expect_error({
+    suppressWarnings(result_null_names <- colocboost(
+      X = X_basic_list, 
+      Y = Y_basic_list,
+      outcome_names = NULL,
+      M = 2
+    ))
+    expect_equal(result_null_names$data_info$outcome_info$outcome_names, c("Y1", "Y2", "Y3"))
+  }, NA)
+})
+
+
+# Test for missing value handling edge cases
+test_that("colocboost handles missing values in different scenarios", {
+  
+  # Generate test data with systematic missing values
+  set.seed(123)
+  n <- 100
+  p <- 15
+  X_miss <- matrix(rnorm(n * p), n, p)
+  colnames(X_miss) <- paste0("SNP", 1:p)
+  Y_miss <- matrix(rnorm(n * 2), n, 2)
+  
+  # Introduce missing values in specific pattern
+  missing_indices <- sample(1:n, 10)
+  Y_miss[missing_indices, 1] <- NA
+  
+  # Different missing indices for second outcome
+  missing_indices2 <- sample(1:n, 8)
+  Y_miss[missing_indices2, 2] <- NA
+  
+  Y_miss_list <- list(Y_miss[,1], Y_miss[,2])
+  X_miss_list <- list(X_miss, X_miss)
+  
+  # Should handle missing values by creating separate X matrices
+  expect_error({
+    suppressWarnings(result_miss <- colocboost(
+      X = X_miss_list, 
+      Y = Y_miss_list,
+      M = 2
+    ))
+  }, NA)
+  
+  expect_s3_class(result_miss, "colocboost")
+})
+
+# Test for custom parameter combinations
+test_that("colocboost handles custom parameter combinations", {
+  
+  # Generate test data
+  set.seed(123)
+  X_param <- matrix(rnorm(200 * 25), 200, 25)
+  colnames(X_param) <- paste0("SNP", 1:25)
+  Y_param <- matrix(rnorm(200 * 2), 200, 2)
+  
+  Y_param_list <- list(Y_param[,1], Y_param[,2])
+  X_param_list <- list(X_param, X_param)
+  
+  # Test extreme parameter values
+  expect_error({
+    suppressWarnings(result_extreme <- colocboost(
+      X = X_param_list, 
+      Y = Y_param_list,
+      M = 2,
+      lambda = 0.99,           # Very high lambda
+      tau = 0.001,             # Very small tau
+      learning_rate_init = 0.001,  # Very small learning rate
+      jk_equiv_corr = 0.99,    # Very high correlation threshold
+      coverage = 0.99,         # Very high coverage
+      min_abs_corr = 0.1       # Low correlation threshold
+    ))
+  }, NA)
+  
+  expect_s3_class(result_extreme, "colocboost")
+})
+
+# Test for plotting edge cases with no colocalization
+test_that("colocboost_plot handles results with no colocalization", {
+  
+  # Create a colocboost result with no colocalization
+  set.seed(123)
+  X_no_coloc <- matrix(rnorm(100 * 20), 100, 20)
+  colnames(X_no_coloc) <- paste0("SNP", 1:20)
+  
+  # Independent traits (no colocalization)
+  Y_no_coloc <- matrix(rnorm(100 * 2) * 0.01, 100, 2)  # Very weak independent signals
+  
+  Y_no_coloc_list <- list(Y_no_coloc[,1], Y_no_coloc[,2])
+  X_no_coloc_list <- list(X_no_coloc, X_no_coloc)
+  
+  suppressWarnings({
+    result_no_coloc <- colocboost(
+      X = X_no_coloc_list, 
+      Y = Y_no_coloc_list,
+      M = 2
+    )
+  })
+  
+  # Should plot without error even with no colocalization
+  expect_error(suppressWarnings(colocboost_plot(result_no_coloc)), NA)
+  
+  # Test with different y-axis options
+  expect_error(suppressWarnings(colocboost_plot(result_no_coloc, y = "vcp")), NA)
+  expect_error(suppressWarnings(colocboost_plot(result_no_coloc, y = "cos_vcp")), NA)
+})
+
+
+# Test for complex coverage scenarios in get_cos
+test_that("get_cos handles complex coverage scenarios", {
+  
+  # Generate test colocboost result
+  set.seed(123)
+  cb_result_complex <- generate_test_result(n = 200, p = 30, L = 3)
+  
+  # Test with coverage = 1.0 (should include all variants)
+  expect_error({
+    result_full_coverage <- get_cos(cb_result_complex, coverage = 1.0)
+  }, NA)
+  
+  # Test with coverage = 0.5 (should include fewer variants)
+  expect_error({
+    result_half_coverage <- get_cos(cb_result_complex, coverage = 0.5)
+  }, NA)
+  
+  # Generate large genotype matrix for purity testing
+  set.seed(456)
+  N_large <- 150
+  P_large <- 100
+  sigma_large <- 0.7^abs(outer(1:P_large, 1:P_large, "-"))
+  X_large <- MASS::mvrnorm(N_large, rep(0, P_large), sigma_large)
+  colnames(X_large) <- paste0("SNP", 1:P_large)
+  
+  # Test with very stringent purity thresholds
+  expect_error({
+    result_stringent <- get_cos(cb_result_complex, 
+                               coverage = 0.95, 
+                               X = X_large, 
+                               min_abs_corr = 0.9,
+                               median_abs_corr = 0.95)
+  }, NA)
+  
+  # Should potentially filter out impure CoS
+  expect_type(result_stringent, "list")
+  expect_named(result_stringent, c("cos", "cos_purity"))
+})
+
+# Test for get_hierarchical_clusters with extreme correlation structures
+test_that("get_hierarchical_clusters handles extreme correlation structures", {
+  
+  # Test with perfect block structure
+  set.seed(789)
+  N <- 100
+  P <- 12
+  
+  # Create perfect block correlation structure
+  sigma_block <- matrix(0.1, P, P)
+  diag(sigma_block) <- 1
+  # Block 1: variables 1-4
+  sigma_block[1:4, 1:4] <- 0.95
+  # Block 2: variables 5-8  
+  sigma_block[5:8, 5:8] <- 0.95
+  # Block 3: variables 9-12
+  sigma_block[9:12, 9:12] <- 0.95
+  
+  X_block <- MASS::mvrnorm(N, rep(0, P), sigma_block)
+  cormat_block <- cor(X_block)
+  
+  # Should detect 3 clusters with high min_cluster_corr
+  result_block <- get_hierarchical_clusters(cormat_block, min_cluster_corr = 0.9)
+  
+  expect_equal(ncol(result_block$cluster), 3)
+  expect_equal(nrow(result_block$cluster), P)
+  
+  # Test with all low correlations (should result in many clusters)
+  cormat_low <- matrix(0.1, 8, 8)
+  diag(cormat_low) <- 1
+  
+  result_low <- get_hierarchical_clusters(cormat_low, min_cluster_corr = 0.5)
+  
+  # Should create multiple clusters due to low correlations
+  expect_gt(ncol(result_low$cluster), 1)
+  
+  # Test with mixed correlation structure
+  P_mixed <- 6
+  sigma_mixed <- matrix(0.2, P_mixed, P_mixed)
+  diag(sigma_mixed) <- 1
+  # High correlation pair: 1-2
+  sigma_mixed[1:2, 1:2] <- 0.9
+  # Medium correlation pair: 3-4  
+  sigma_mixed[3:4, 3:4] <- 0.6
+  # Low correlation singletons: 5, 6
+  
+  X_mixed <- MASS::mvrnorm(N, rep(0, P_mixed), sigma_mixed)
+  cormat_mixed <- cor(X_mixed)
+  
+  result_mixed <- get_hierarchical_clusters(cormat_mixed, min_cluster_corr = 0.7)
+  
+  expect_type(result_mixed, "list")
+  expect_named(result_mixed, c("cluster", "Q_modularity"))
+  expect_equal(nrow(result_mixed$cluster), P_mixed)
+})
+

From 3a1082aa3a0674d8dcf4aa126c207a4e69ecbc17 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Wed, 3 Sep 2025 01:32:24 -0400
Subject: [PATCH 2/4] Update announcements.Rmd

---
 vignettes/announcements.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/announcements.Rmd b/vignettes/announcements.Rmd
index 28941ac..01d2630 100644
--- a/vignettes/announcements.Rmd
+++ b/vignettes/announcements.Rmd
@@ -14,7 +14,7 @@ vignette: >
 - *May 2, 2025*: `colocboost` R package is available on [CRAN](https://cran.r-project.org/web/packages/colocboost/index.html).
 
 ## Software updates
-
+- `v1.0.5` Improve package and minor resolved bugs [CRAN](https://cran.r-project.org/web/packages/colocboost/index.html). For changelog, see [PR](https://github.com/StatFunGen/colocboost/pull/102).
 - `v1.0.4` Initial public stable release on [CRAN](https://cran.r-project.org/web/packages/colocboost/index.html). 
 Due to the file size limitation of CRAN release, all example data are the subset of the simulated data. 
 See full dataset in [colocboost paper repo](https://github.com/StatFunGen/colocboost-paper).

From 22823437446a49bf849d3202e1a0501d84fb0cd0 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Wed, 3 Sep 2025 21:30:22 -0400
Subject: [PATCH 3/4] Update test_inference.R

---
 tests/testthat/test_inference.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R
index 0c1acd7..6123ad3 100644
--- a/tests/testthat/test_inference.R
+++ b/tests/testthat/test_inference.R
@@ -65,13 +65,13 @@ test_that("get_robust_colocalization filters results correctly", {
   cb_res <- generate_test_result()
   
   # Basic call
-  expect_error(get_robust_colocalization(cb_res), NA)
+  expect_error(suppressWarnings(get_robust_colocalization(cb_res)), NA)
   
   # With stricter thresholds
-  expect_error(get_robust_colocalization(cb_res, cos_npc_cutoff = 0.8), NA)
+  expect_error(suppressWarnings(get_robust_colocalization(cb_res, cos_npc_cutoff = 0.8)), NA)
   
   # With p-value threshold
-  expect_error(get_robust_colocalization(cb_res, pvalue_cutoff = 0.05), NA)
+  expect_error(suppressWarnings(get_robust_colocalization(cb_res, pvalue_cutoff = 0.05)), NA)
 })
 
 # Test for get_hierarchical_clusters

From 30a90db38693751386f5e6a2da5b27d353562fbe Mon Sep 17 00:00:00 2001
From: xueweic <xueweic@users.noreply.github.com>
Date: Sat, 6 Sep 2025 11:36:56 +0000
Subject: [PATCH 4/4] Update documentation

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9ca5d8f..5034204 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,7 +20,7 @@ Description: A multi-task learning approach to variable selection regression wit
 Encoding: UTF-8
 LazyDataCompression: xz
 LazyData: true
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 URL: https://github.com/StatFunGen/colocboost
 BugReports: https://github.com/StatFunGen/colocboost/issues
 Depends: