diff --git a/code/association_scan/quantile_models/qr_and_twas.ipynb b/code/association_scan/quantile_models/qr_and_twas.ipynb index 077369b76..d4ba9f4e9 100644 --- a/code/association_scan/quantile_models/qr_and_twas.ipynb +++ b/code/association_scan/quantile_models/qr_and_twas.ipynb @@ -125,7 +125,7 @@ "\n", "\n", "\n", - "Additionally, the matrix of quantile TWAS weights and pseudo R-squares is calculated and saved using Koenker and Machado's Pseudo R\u00b2 method, as described in their [Koenker and Machado, 1999: Inference in Quantile Regression](https://www.maths.usyd.edu.au/u/jchan/GLM/Koenker&Machado1999InferenceQuantileReg.pdf)" + "Additionally, the matrix of quantile TWAS weights and pseudo R-squares is calculated and saved using Koenker and Machado's Pseudo R² method, as described in their [Koenker and Machado, 1999: Inference in Quantile Regression](https://www.maths.usyd.edu.au/u/jchan/GLM/Koenker&Machado1999InferenceQuantileReg.pdf)" ] }, { @@ -356,13 +356,18 @@ "parameter: indel = True\n", "parameter: min_twas_maf = 0.01\n", "parameter: screen_threshold = 0.01\n", - "parameter: qrank_screen_calculate = True\n", - "parameter: marginal_beta_calculate = True\n", - "parameter: twas_weight_calculate = True\n", - "parameter: vqtl_calculate = True\n", + "parameter: screen_method = \"qvalue\"\n", + "parameter: screen_significant = True\n", + "parameter: pre_filter_by_pqr = False\n", + "parameter: initial_corr_filter_cutoff = 0.8\n", + "parameter: full_rank_corr_filter_cutoff = \"seq(0.75, 0.5, by = -0.05)\"\n", "parameter: ld_reference_meta_file = path()\n", "# Only focus on a subset of variants\n", "parameter: keep_variants = path()\n", + "parameter: marginal_beta_calculate = True\n", + "parameter: twas_weight_calculate = True\n", + "parameter: qrank_screen_calculate = True\n", + "parameter: vqtl_calculate = True\n", "# Analysis environment settings\n", "parameter: container = \"\"\n", "# For cluster jobs, number commands to run per job\n", @@ -716,9 +721,11 @@ " # Multi-column format: filter by context and molecular_trait_object_id\n", " # For genotype loading: use union of all matching variants across all contexts\n", " current_region = \"${_meta_info[2]}\"\n", + " original_region_ids = c(${\",\".join([(\"'%s'\" % x) if isinstance(x, str) else \",\".join([\"'%s'\" % i for i in x]) for x in _meta_info[3]])})\n", + " all_region_ids = unique(c(current_region, original_region_ids))\n", " current_contexts = conditions\n", " keep_variants_region_data = keep_variants_data[\n", - " keep_variants_data$molecular_trait_object_id == current_region &\n", + " keep_variants_data$molecular_trait_object_id %in% all_region_ids &\n", " keep_variants_data$context %in% current_contexts, ]\n", " keep_variants_list = unique(as.character(keep_variants_region_data$variant_id))\n", " keep_variants_list = keep_variants_list[keep_variants_list != \"\"]\n", @@ -727,9 +734,7 @@ " message(paste(length(keep_variants_list), \"variants matched for region\", current_region,\n", " \"across\", length(current_contexts), \"contexts (per-context filtering enabled)\"))\n", " if (length(keep_variants_list) == 0) {\n", - " message(\"No matching variants found, proceeding without variant filter\")\n", - " keep_variants_list = NULL\n", - " keep_variants_per_context = FALSE\n", + " message(\"No matching variants found for this region, marginal coef will be skipped for contexts without variants\")\n", " }\n", " } else {\n", " # Fallback: treat first column as variant_id list\n", @@ -811,8 +816,8 @@ " current_keep_variants = current_keep_variants[current_keep_variants != \"\"]\n", " message(paste(length(current_keep_variants), \"variants for context\", ctx_name))\n", " } else {\n", - " current_keep_variants = NULL\n", - " message(paste(\"No context-specific variants for\", ctx_name, \"- no variant filter applied\"))\n", + " current_keep_variants = character(0)\n", + " message(paste(\"No context-specific variants for\", ctx_name, \"- skipping marginal coef fitting\"))\n", " }\n", " }\n", "\n", @@ -826,7 +831,12 @@ " region_id = paste0(colnames(Y_col), \"_\", names(fdat$residual_Y)[r]),\n", " quantile_qtl_tau_list = seq(0.05, 0.95, by = 0.05),\n", " quantile_twas_tau_list = seq(0.01, 0.99, by = 0.01),\n", + " screen_method = \"${screen_method}\",\n", " screen_threshold = ${screen_threshold},\n", + " screen_significant = ${\"TRUE\" if screen_significant else \"FALSE\"},\n", + " pre_filter_by_pqr = ${\"TRUE\" if pre_filter_by_pqr else \"FALSE\"},\n", + " initial_corr_filter_cutoff = ${initial_corr_filter_cutoff},\n", + " full_rank_corr_filter_cutoff = ${full_rank_corr_filter_cutoff},\n", " keep_variants = current_keep_variants,\n", " marginal_beta_calculate = ${\"TRUE\" if marginal_beta_calculate else \"FALSE\"},\n", " twas_weight_calculate = ${\"TRUE\" if twas_weight_calculate else \"FALSE\"},\n", @@ -912,4 +922,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +}